From cd94eda70457bac2e13534d8bf58d42c855af649 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Tue, 14 Nov 2023 13:25:47 +0000 Subject: [PATCH 001/844] Simplify --- src/Functions/toStartOfInterval.cpp | 130 +++++++++++++++++----------- 1 file changed, 78 insertions(+), 52 deletions(-) diff --git a/src/Functions/toStartOfInterval.cpp b/src/Functions/toStartOfInterval.cpp index 48bf88cb14c..0a3a409ea9f 100644 --- a/src/Functions/toStartOfInterval.cpp +++ b/src/Functions/toStartOfInterval.cpp @@ -324,38 +324,61 @@ public: DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { - bool first_argument_is_date = false; + bool value_is_date = false; auto check_first_argument = [&] { - if (!isDate(arguments[0].type) && !isDateTime(arguments[0].type) && !isDateTime64(arguments[0].type)) + const DataTypePtr & type_arg1 = arguments[0].type; + if (!isDate(type_arg1) && !isDateTime(type_arg1) && !isDateTime64(type_arg1)) throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument of function {}. " - "Should be a date or a date with time", arguments[0].type->getName(), getName()); - first_argument_is_date = isDate(arguments[0].type); + "Should be a date or a date with time", type_arg1->getName(), getName()); + value_is_date = isDate(type_arg1); }; const DataTypeInterval * interval_type = nullptr; - bool result_type_is_date = false; - bool result_type_is_datetime = false; - auto check_interval_argument = [&] + enum class ResultType { - interval_type = checkAndGetDataType(arguments[1].type.get()); + Date, + DateTime, + DateTime64 + }; + ResultType result_type; + auto check_second_argument = [&] + { + const DataTypePtr & type_arg2 = arguments[1].type; + interval_type = checkAndGetDataType(type_arg2.get()); if (!interval_type) throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument of function {}. " - "Should be an interval of time", arguments[1].type->getName(), getName()); - result_type_is_date = (interval_type->getKind() == IntervalKind::Year) - || (interval_type->getKind() == IntervalKind::Quarter) || (interval_type->getKind() == IntervalKind::Month) - || (interval_type->getKind() == IntervalKind::Week); - result_type_is_datetime = (interval_type->getKind() == IntervalKind::Day) || (interval_type->getKind() == IntervalKind::Hour) - || (interval_type->getKind() == IntervalKind::Minute) || (interval_type->getKind() == IntervalKind::Second); + "Should be an interval of time", type_arg2->getName(), getName()); + switch (interval_type->getKind()) // NOLINT(bugprone-switch-missing-default-case) + { + case IntervalKind::Nanosecond: + case IntervalKind::Microsecond: + case IntervalKind::Millisecond: + result_type = ResultType::DateTime64; + break; + case IntervalKind::Second: + case IntervalKind::Minute: + case IntervalKind::Hour: + case IntervalKind::Day: + result_type = ResultType::DateTime; + break; + case IntervalKind::Week: + case IntervalKind::Month: + case IntervalKind::Quarter: + case IntervalKind::Year: + result_type = ResultType::Date; + break; + } }; - auto check_timezone_argument = [&] + auto check_third_argument = [&] { - if (!WhichDataType(arguments[2].type).isString()) + const DataTypePtr & type_arg3 = arguments[2].type; + if (!isString(type_arg3)) throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument of function {}. " "This argument is optional and must be a constant string with timezone name", - arguments[2].type->getName(), getName()); - if (first_argument_is_date && result_type_is_date) + type_arg3->getName(), getName()); + if (value_is_date && result_type == ResultType::Date) throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "The timezone argument of function {} with interval type {} is allowed only when the 1st argument " "has the type DateTime or DateTime64", @@ -365,13 +388,13 @@ public: if (arguments.size() == 2) { check_first_argument(); - check_interval_argument(); + check_second_argument(); } else if (arguments.size() == 3) { check_first_argument(); - check_interval_argument(); - check_timezone_argument(); + check_second_argument(); + check_third_argument(); } else { @@ -380,24 +403,27 @@ public: getName(), arguments.size()); } - if (result_type_is_date) - return std::make_shared(); - else if (result_type_is_datetime) - return std::make_shared(extractTimeZoneNameFromFunctionArguments(arguments, 2, 0, false)); - else + switch (result_type) { - auto scale = 0; + case ResultType::Date: + return std::make_shared(); + case ResultType::DateTime: + return std::make_shared(extractTimeZoneNameFromFunctionArguments(arguments, 2, 0, false)); + case ResultType::DateTime64: + { + UInt32 scale = 0; + if (interval_type->getKind() == IntervalKind::Nanosecond) + scale = 9; + else if (interval_type->getKind() == IntervalKind::Microsecond) + scale = 6; + else if (interval_type->getKind() == IntervalKind::Millisecond) + scale = 3; - if (interval_type->getKind() == IntervalKind::Nanosecond) - scale = 9; - else if (interval_type->getKind() == IntervalKind::Microsecond) - scale = 6; - else if (interval_type->getKind() == IntervalKind::Millisecond) - scale = 3; - - return std::make_shared(scale, extractTimeZoneNameFromFunctionArguments(arguments, 2, 0, false)); + return std::make_shared(scale, extractTimeZoneNameFromFunctionArguments(arguments, 2, 0, false)); + } } + std::unreachable(); } bool useDefaultImplementationForConstants() const override { return true; } @@ -426,34 +452,34 @@ private: ColumnPtr dispatchForColumns( const ColumnWithTypeAndName & time_column, const ColumnWithTypeAndName & interval_column, const DataTypePtr & result_type, const DateLUTImpl & time_zone) const { - const auto & from_datatype = *time_column.type.get(); - const auto which_type = WhichDataType(from_datatype); + const auto & time_column_type = *time_column.type.get(); + const auto & time_column_col = *time_column.column.get(); - if (which_type.isDateTime64()) + if (isDateTime64(time_column_type)) { - const auto * time_column_vec = checkAndGetColumn(time_column.column.get()); - auto scale = assert_cast(from_datatype).getScale(); + const auto * time_column_vec = checkAndGetColumn(time_column_col); + auto scale = assert_cast(time_column_type).getScale(); if (time_column_vec) - return dispatchForIntervalColumn(assert_cast(from_datatype), *time_column_vec, interval_column, result_type, time_zone, scale); + return dispatchForIntervalColumn(assert_cast(time_column_type), *time_column_vec, interval_column, result_type, time_zone, scale); } - if (which_type.isDateTime()) + else if (isDateTime(time_column_type)) { - const auto * time_column_vec = checkAndGetColumn(time_column.column.get()); + const auto * time_column_vec = checkAndGetColumn(time_column_col); if (time_column_vec) - return dispatchForIntervalColumn(assert_cast(from_datatype), *time_column_vec, interval_column, result_type, time_zone); + return dispatchForIntervalColumn(assert_cast(time_column_type), *time_column_vec, interval_column, result_type, time_zone); } - if (which_type.isDate()) + else if (isDate(time_column_type)) { - const auto * time_column_vec = checkAndGetColumn(time_column.column.get()); + const auto * time_column_vec = checkAndGetColumn(time_column_col); if (time_column_vec) - return dispatchForIntervalColumn(assert_cast(from_datatype), *time_column_vec, interval_column, result_type, time_zone); + return dispatchForIntervalColumn(assert_cast(time_column_type), *time_column_vec, interval_column, result_type, time_zone); } - if (which_type.isDate32()) + else if (isDate32(time_column_type)) { - const auto * time_column_vec = checkAndGetColumn(time_column.column.get()); + const auto * time_column_vec = checkAndGetColumn(time_column_col); if (time_column_vec) - return dispatchForIntervalColumn(assert_cast(from_datatype), *time_column_vec, interval_column, result_type, time_zone); + return dispatchForIntervalColumn(assert_cast(time_column_type), *time_column_vec, interval_column, result_type, time_zone); } throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal column for first argument of function {}. " "Must contain dates or dates with time", getName()); @@ -502,7 +528,7 @@ private: return execute(from, time_column, num_units, result_type, time_zone, scale); } - UNREACHABLE(); + std::unreachable(); } template @@ -515,7 +541,7 @@ private: size_t size = time_data.size(); auto result_col = result_type->createColumn(); - auto *col_to = assert_cast(result_col.get()); + auto * col_to = assert_cast(result_col.get()); auto & result_data = col_to->getData(); result_data.resize(size); From aef1ea3df71d7d19c6267f6bb03ce994b7e14909 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Tue, 14 Nov 2023 17:49:08 +0000 Subject: [PATCH 002/844] Extend with origin argument, pt. I --- src/Functions/toStartOfInterval.cpp | 128 +++++++++++++++++++++------- 1 file changed, 95 insertions(+), 33 deletions(-) diff --git a/src/Functions/toStartOfInterval.cpp b/src/Functions/toStartOfInterval.cpp index 0a3a409ea9f..4d716637932 100644 --- a/src/Functions/toStartOfInterval.cpp +++ b/src/Functions/toStartOfInterval.cpp @@ -321,6 +321,11 @@ public: size_t getNumberOfArguments() const override { return 0; } bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; } + bool useDefaultImplementationForConstants() const override { return true; } + ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1, 2, 3}; } + + bool hasInformationAboutMonotonicity() const override { return true; } + Monotonicity getMonotonicityForRange(const IDataType &, const Field &, const Field &) const override { return { .is_monotonic = true, .is_always_monotonic = true }; } DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { @@ -329,7 +334,7 @@ public: { const DataTypePtr & type_arg1 = arguments[0].type; if (!isDate(type_arg1) && !isDateTime(type_arg1) && !isDateTime64(type_arg1)) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument of function {}. " + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of 1st argument of function {}. " "Should be a date or a date with time", type_arg1->getName(), getName()); value_is_date = isDate(type_arg1); }; @@ -347,7 +352,7 @@ public: const DataTypePtr & type_arg2 = arguments[1].type; interval_type = checkAndGetDataType(type_arg2.get()); if (!interval_type) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument of function {}. " + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of 2nd argument of function {}. " "Should be an interval of time", type_arg2->getName(), getName()); switch (interval_type->getKind()) // NOLINT(bugprone-switch-missing-default-case) { @@ -371,18 +376,48 @@ public: } }; + enum class ThirdArgument + { + IsTimezone, + IsOrigin + }; + ThirdArgument third_argument; /// valid only if 3rd argument is given auto check_third_argument = [&] { const DataTypePtr & type_arg3 = arguments[2].type; - if (!isString(type_arg3)) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument of function {}. " - "This argument is optional and must be a constant string with timezone name", + if (isString(type_arg3)) + { + third_argument = ThirdArgument::IsTimezone; + if (value_is_date && result_type == ResultType::Date) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "The timezone argument of function {} with interval type {} is allowed only when the 1st argument has the type DateTime or DateTime64", + getName(), interval_type->getKind().toString()); + } + else if (isDateOrDate32OrDateTimeOrDateTime64(type_arg3)) + third_argument = ThirdArgument::IsOrigin; + else + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of 3rd argument of function {}. " + "This argument is optional and must be a constant String with timezone name or a Date/Date32/DateTime/DateTime64 with a constant origin", type_arg3->getName(), getName()); + + }; + + auto check_fourth_argument = [&] + { + if (third_argument != ThirdArgument::IsOrigin) /// sanity check + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of 3rd argument of function {}. " + "The third argument must a Date/Date32/DateTime/DateTime64 with a constant origin", + arguments[2].type->getName(), getName()); + + const DataTypePtr & type_arg4 = arguments[3].type; + if (!isString(type_arg4)) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of 4th argument of function {}. " + "This argument is optional and must be a constant String with timezone name", + type_arg4->getName(), getName()); if (value_is_date && result_type == ResultType::Date) throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, - "The timezone argument of function {} with interval type {} is allowed only when the 1st argument " - "has the type DateTime or DateTime64", - getName(), interval_type->getKind().toString()); + "The timezone argument of function {} with interval type {} is allowed only when the 1st argument has the type DateTime or DateTime64", + getName(), interval_type->getKind().toString()); }; if (arguments.size() == 2) @@ -396,10 +431,17 @@ public: check_second_argument(); check_third_argument(); } + else if (arguments.size() == 4) + { + check_first_argument(); + check_second_argument(); + check_third_argument(); + check_fourth_argument(); + } else { throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, - "Number of arguments for function {} doesn't match: passed {}, should be 2 or 3", + "Number of arguments for function {} doesn't match: passed {}, should be 2, 3 or 4", getName(), arguments.size()); } @@ -408,7 +450,10 @@ public: case ResultType::Date: return std::make_shared(); case ResultType::DateTime: - return std::make_shared(extractTimeZoneNameFromFunctionArguments(arguments, 2, 0, false)); + { + const size_t time_zone_arg_num = (arguments.size() == 2 || (arguments.size() == 3 && third_argument == ThirdArgument::IsTimezone)) ? 2 : 3; + return std::make_shared(extractTimeZoneNameFromFunctionArguments(arguments, time_zone_arg_num, 0, false)); + } case ResultType::DateTime64: { UInt32 scale = 0; @@ -419,7 +464,8 @@ public: else if (interval_type->getKind() == IntervalKind::Millisecond) scale = 3; - return std::make_shared(scale, extractTimeZoneNameFromFunctionArguments(arguments, 2, 0, false)); + const size_t time_zone_arg_num = (arguments.size() == 2 || (arguments.size() == 3 && third_argument == ThirdArgument::IsTimezone)) ? 2 : 3; + return std::make_shared(scale, extractTimeZoneNameFromFunctionArguments(arguments, time_zone_arg_num, 0, false)); } } @@ -433,8 +479,16 @@ public: { const auto & time_column = arguments[0]; const auto & interval_column = arguments[1]; - const auto & time_zone = extractTimeZoneFromFunctionArguments(arguments, 2, 0); - auto result_column = dispatchForColumns(time_column, interval_column, result_type, time_zone); + + ColumnWithTypeAndName origin_column; + const bool has_origin_arg = (arguments.size() == 3 && isDateOrDate32OrDateTimeOrDateTime64(arguments[2].type)) || arguments.size() == 4; + if (has_origin_arg) + origin_column = arguments[2]; + + const size_t time_zone_arg_num = (arguments.size() == 2 || (arguments.size() == 3 && isString(arguments[2].type))) ? 2 : 3; + const auto & time_zone = extractTimeZoneFromFunctionArguments(arguments, time_zone_arg_num, 0); + + auto result_column = dispatchForTimeColumn(time_column, interval_column, origin_column, result_type, time_zone); return result_column; } @@ -449,8 +503,8 @@ public: } private: - ColumnPtr dispatchForColumns( - const ColumnWithTypeAndName & time_column, const ColumnWithTypeAndName & interval_column, const DataTypePtr & result_type, const DateLUTImpl & time_zone) const + ColumnPtr dispatchForTimeColumn( + const ColumnWithTypeAndName & time_column, const ColumnWithTypeAndName & interval_column, const ColumnWithTypeAndName & origin_column, const DataTypePtr & result_type, const DateLUTImpl & time_zone) const { const auto & time_column_type = *time_column.type.get(); const auto & time_column_col = *time_column.column.get(); @@ -461,25 +515,25 @@ private: auto scale = assert_cast(time_column_type).getScale(); if (time_column_vec) - return dispatchForIntervalColumn(assert_cast(time_column_type), *time_column_vec, interval_column, result_type, time_zone, scale); + return dispatchForIntervalColumn(assert_cast(time_column_type), *time_column_vec, interval_column, origin_column, result_type, time_zone, scale); } else if (isDateTime(time_column_type)) { const auto * time_column_vec = checkAndGetColumn(time_column_col); if (time_column_vec) - return dispatchForIntervalColumn(assert_cast(time_column_type), *time_column_vec, interval_column, result_type, time_zone); + return dispatchForIntervalColumn(assert_cast(time_column_type), *time_column_vec, interval_column, origin_column, result_type, time_zone); } else if (isDate(time_column_type)) { const auto * time_column_vec = checkAndGetColumn(time_column_col); if (time_column_vec) - return dispatchForIntervalColumn(assert_cast(time_column_type), *time_column_vec, interval_column, result_type, time_zone); + return dispatchForIntervalColumn(assert_cast(time_column_type), *time_column_vec, interval_column, origin_column, result_type, time_zone); } else if (isDate32(time_column_type)) { const auto * time_column_vec = checkAndGetColumn(time_column_col); if (time_column_vec) - return dispatchForIntervalColumn(assert_cast(time_column_type), *time_column_vec, interval_column, result_type, time_zone); + return dispatchForIntervalColumn(assert_cast(time_column_type), *time_column_vec, interval_column, origin_column, result_type, time_zone); } throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal column for first argument of function {}. " "Must contain dates or dates with time", getName()); @@ -487,7 +541,7 @@ private: template ColumnPtr dispatchForIntervalColumn( - const FromDataType & from, const ColumnType & time_column, const ColumnWithTypeAndName & interval_column, + const TimeDataType & time_data_type, const TimeColumnType & time_column, const ColumnWithTypeAndName & interval_column, const ColumnWithTypeAndName & origin_column, const DataTypePtr & result_type, const DateLUTImpl & time_zone, const UInt16 scale = 1) const { const auto * interval_type = checkAndGetDataType(interval_column.type.get()); @@ -505,34 +559,34 @@ private: switch (interval_type->getKind()) { case IntervalKind::Nanosecond: - return execute(from, time_column, num_units, result_type, time_zone, scale); + return execute(time_data_type, time_column, num_units, origin_column, result_type, time_zone, scale); case IntervalKind::Microsecond: - return execute(from, time_column, num_units, result_type, time_zone, scale); + return execute(time_data_type, time_column, num_units, origin_column, result_type, time_zone, scale); case IntervalKind::Millisecond: - return execute(from, time_column, num_units, result_type, time_zone, scale); + return execute(time_data_type, time_column, num_units, origin_column, result_type, time_zone, scale); case IntervalKind::Second: - return execute(from, time_column, num_units, result_type, time_zone, scale); + return execute(time_data_type, time_column, num_units, origin_column, result_type, time_zone, scale); case IntervalKind::Minute: - return execute(from, time_column, num_units, result_type, time_zone, scale); + return execute(time_data_type, time_column, num_units, origin_column, result_type, time_zone, scale); case IntervalKind::Hour: - return execute(from, time_column, num_units, result_type, time_zone, scale); + return execute(time_data_type, time_column, num_units, origin_column, result_type, time_zone, scale); case IntervalKind::Day: - return execute(from, time_column, num_units, result_type, time_zone, scale); + return execute(time_data_type, time_column, num_units, origin_column, result_type, time_zone, scale); case IntervalKind::Week: - return execute(from, time_column, num_units, result_type, time_zone, scale); + return execute(time_data_type, time_column, num_units, origin_column, result_type, time_zone, scale); case IntervalKind::Month: - return execute(from, time_column, num_units, result_type, time_zone, scale); + return execute(time_data_type, time_column, num_units, origin_column, result_type, time_zone, scale); case IntervalKind::Quarter: - return execute(from, time_column, num_units, result_type, time_zone, scale); + return execute(time_data_type, time_column, num_units, origin_column, result_type, time_zone, scale); case IntervalKind::Year: - return execute(from, time_column, num_units, result_type, time_zone, scale); + return execute(time_data_type, time_column, num_units, origin_column, result_type, time_zone, scale); } std::unreachable(); } - template - ColumnPtr execute(const FromDataType &, const ColumnType & time_column_type, Int64 num_units, const DataTypePtr & result_type, const DateLUTImpl & time_zone, const UInt16 scale) const + template + ColumnPtr execute(const TimeDataType &, const ColumnType & time_column_type, Int64 num_units, [[maybe_unused]] const ColumnWithTypeAndName & origin_column, const DataTypePtr & result_type, const DateLUTImpl & time_zone, const UInt16 scale) const { using ToColumnType = typename ToDataType::ColumnType; using ToFieldType = typename ToDataType::FieldType; @@ -547,6 +601,14 @@ private: Int64 scale_multiplier = DecimalUtils::scaleMultiplier(scale); + /// TODO: This part is missing. origin_column is either {} (<-- to check, you could do `origin_column.column == nullptr`) or not {} + /// In the former case, we can execute below existing code. + /// In the latter case, we need to read the actual origin value. As per `getArgumentsThatAreAlwaysConstant()` (see above), we + /// can be sure that origin_column is a `ColumnConst`. The second assumption we can reasonable make is that it has the same + /// type (Date/Date32/DateTime/DateTime64) as the time column (1st argument). Since the method we are in is already + /// templatized on the data type of the time column, we can use `checkAndGetColumnConst(...)` to cast + /// `origin_column.column` to a const column and then read the (const) value from it, and proceed with the calculations. + for (size_t i = 0; i != size; ++i) result_data[i] = static_cast( Transform::execute(time_data[i], num_units, time_zone, scale_multiplier)); From 188c469318e8a7a7e339bcf4048952181e4e324c Mon Sep 17 00:00:00 2001 From: yariks5s Date: Fri, 17 Nov 2023 14:22:39 +0000 Subject: [PATCH 003/844] done suggestings about logic --- src/Functions/toStartOfInterval.cpp | 341 +++------------------------- 1 file changed, 27 insertions(+), 314 deletions(-) diff --git a/src/Functions/toStartOfInterval.cpp b/src/Functions/toStartOfInterval.cpp index 4d716637932..29f8faa810c 100644 --- a/src/Functions/toStartOfInterval.cpp +++ b/src/Functions/toStartOfInterval.cpp @@ -1,5 +1,7 @@ #include #include +#include +#include #include #include #include @@ -21,300 +23,18 @@ namespace ErrorCodes extern const int ILLEGAL_COLUMN; extern const int ILLEGAL_TYPE_OF_ARGUMENT; extern const int ARGUMENT_OUT_OF_BOUND; - extern const int DECIMAL_OVERFLOW; } namespace { - constexpr auto function_name = "toStartOfInterval"; - - template - struct Transform; - - template <> - struct Transform - { - static UInt16 execute(UInt16 d, Int64 years, const DateLUTImpl & time_zone, Int64) - { - return time_zone.toStartOfYearInterval(DayNum(d), years); - } - - static UInt16 execute(Int32 d, Int64 years, const DateLUTImpl & time_zone, Int64) - { - return time_zone.toStartOfYearInterval(ExtendedDayNum(d), years); - } - - static UInt16 execute(UInt32 t, Int64 years, const DateLUTImpl & time_zone, Int64) - { - return time_zone.toStartOfYearInterval(time_zone.toDayNum(t), years); - } - - static UInt16 execute(Int64 t, Int64 years, const DateLUTImpl & time_zone, Int64 scale_multiplier) - { - return time_zone.toStartOfYearInterval(time_zone.toDayNum(t / scale_multiplier), years); - } - }; - - template <> - struct Transform - { - static UInt16 execute(UInt16 d, Int64 quarters, const DateLUTImpl & time_zone, Int64) - { - return time_zone.toStartOfQuarterInterval(DayNum(d), quarters); - } - - static UInt16 execute(Int32 d, Int64 quarters, const DateLUTImpl & time_zone, Int64) - { - return time_zone.toStartOfQuarterInterval(ExtendedDayNum(d), quarters); - } - - static UInt16 execute(UInt32 t, Int64 quarters, const DateLUTImpl & time_zone, Int64) - { - return time_zone.toStartOfQuarterInterval(time_zone.toDayNum(t), quarters); - } - - static UInt16 execute(Int64 t, Int64 quarters, const DateLUTImpl & time_zone, Int64 scale_multiplier) - { - return time_zone.toStartOfQuarterInterval(time_zone.toDayNum(t / scale_multiplier), quarters); - } - }; - - template <> - struct Transform - { - static UInt16 execute(UInt16 d, Int64 months, const DateLUTImpl & time_zone, Int64) - { - return time_zone.toStartOfMonthInterval(DayNum(d), months); - } - - static UInt16 execute(Int32 d, Int64 months, const DateLUTImpl & time_zone, Int64) - { - return time_zone.toStartOfMonthInterval(ExtendedDayNum(d), months); - } - - static UInt16 execute(UInt32 t, Int64 months, const DateLUTImpl & time_zone, Int64) - { - return time_zone.toStartOfMonthInterval(time_zone.toDayNum(t), months); - } - - static UInt16 execute(Int64 t, Int64 months, const DateLUTImpl & time_zone, Int64 scale_multiplier) - { - return time_zone.toStartOfMonthInterval(time_zone.toDayNum(t / scale_multiplier), months); - } - }; - - template <> - struct Transform - { - static UInt16 execute(UInt16 d, Int64 weeks, const DateLUTImpl & time_zone, Int64) - { - return time_zone.toStartOfWeekInterval(DayNum(d), weeks); - } - - static UInt16 execute(Int32 d, Int64 weeks, const DateLUTImpl & time_zone, Int64) - { - return time_zone.toStartOfWeekInterval(ExtendedDayNum(d), weeks); - } - - static UInt16 execute(UInt32 t, Int64 weeks, const DateLUTImpl & time_zone, Int64) - { - return time_zone.toStartOfWeekInterval(time_zone.toDayNum(t), weeks); - } - - static UInt16 execute(Int64 t, Int64 weeks, const DateLUTImpl & time_zone, Int64 scale_multiplier) - { - return time_zone.toStartOfWeekInterval(time_zone.toDayNum(t / scale_multiplier), weeks); - } - }; - - template <> - struct Transform - { - static UInt32 execute(UInt16 d, Int64 days, const DateLUTImpl & time_zone, Int64) - { - return static_cast(time_zone.toStartOfDayInterval(ExtendedDayNum(d), days)); - } - - static UInt32 execute(Int32 d, Int64 days, const DateLUTImpl & time_zone, Int64) - { - return static_cast(time_zone.toStartOfDayInterval(ExtendedDayNum(d), days)); - } - - static UInt32 execute(UInt32 t, Int64 days, const DateLUTImpl & time_zone, Int64) - { - return static_cast(time_zone.toStartOfDayInterval(time_zone.toDayNum(t), days)); - } - - static Int64 execute(Int64 t, Int64 days, const DateLUTImpl & time_zone, Int64 scale_multiplier) - { - return time_zone.toStartOfDayInterval(time_zone.toDayNum(t / scale_multiplier), days); - } - }; - - template <> - struct Transform - { - static UInt32 execute(UInt16, Int64, const DateLUTImpl &, Int64) { throwDateIsNotSupported(function_name); } - - static UInt32 execute(Int32, Int64, const DateLUTImpl &, Int64) { throwDateIsNotSupported(function_name); } - - static UInt32 execute(UInt32 t, Int64 hours, const DateLUTImpl & time_zone, Int64) - { - return time_zone.toStartOfHourInterval(t, hours); - } - - static Int64 execute(Int64 t, Int64 hours, const DateLUTImpl & time_zone, Int64 scale_multiplier) - { - return time_zone.toStartOfHourInterval(t / scale_multiplier, hours); - } - }; - - template <> - struct Transform - { - static UInt32 execute(UInt16, Int64, const DateLUTImpl &, Int64) { throwDateIsNotSupported(function_name); } - - static UInt32 execute(Int32, Int64, const DateLUTImpl &, Int64) { throwDateIsNotSupported(function_name); } - - static UInt32 execute(UInt32 t, Int64 minutes, const DateLUTImpl & time_zone, Int64) - { - return time_zone.toStartOfMinuteInterval(t, minutes); - } - - static Int64 execute(Int64 t, Int64 minutes, const DateLUTImpl & time_zone, Int64 scale_multiplier) - { - return time_zone.toStartOfMinuteInterval(t / scale_multiplier, minutes); - } - }; - - template <> - struct Transform - { - static UInt32 execute(UInt16, Int64, const DateLUTImpl &, Int64) { throwDateIsNotSupported(function_name); } - - static UInt32 execute(Int32, Int64, const DateLUTImpl &, Int64) { throwDateIsNotSupported(function_name); } - - static UInt32 execute(UInt32 t, Int64 seconds, const DateLUTImpl & time_zone, Int64) - { - return time_zone.toStartOfSecondInterval(t, seconds); - } - - static Int64 execute(Int64 t, Int64 seconds, const DateLUTImpl & time_zone, Int64 scale_multiplier) - { - return time_zone.toStartOfSecondInterval(t / scale_multiplier, seconds); - } - }; - - template <> - struct Transform - { - static UInt32 execute(UInt16, Int64, const DateLUTImpl &, Int64) { throwDateIsNotSupported(function_name); } - - static UInt32 execute(Int32, Int64, const DateLUTImpl &, Int64) { throwDateIsNotSupported(function_name); } - - static UInt32 execute(UInt32, Int64, const DateLUTImpl &, Int64) { throwDateTimeIsNotSupported(function_name); } - - static Int64 execute(Int64 t, Int64 milliseconds, const DateLUTImpl &, Int64 scale_multiplier) - { - if (scale_multiplier < 1000) - { - Int64 t_milliseconds = 0; - if (common::mulOverflow(t, static_cast(1000) / scale_multiplier, t_milliseconds)) - throw DB::Exception(ErrorCodes::DECIMAL_OVERFLOW, "Numeric overflow"); - if (likely(t >= 0)) - return t_milliseconds / milliseconds * milliseconds; - else - return ((t_milliseconds + 1) / milliseconds - 1) * milliseconds; - } - else if (scale_multiplier > 1000) - { - Int64 scale_diff = scale_multiplier / static_cast(1000); - if (likely(t >= 0)) - return t / milliseconds / scale_diff * milliseconds; - else - return ((t + 1) / milliseconds / scale_diff - 1) * milliseconds; - } - else - if (likely(t >= 0)) - return t / milliseconds * milliseconds; - else - return ((t + 1) / milliseconds - 1) * milliseconds; - } - }; - - template <> - struct Transform - { - static UInt32 execute(UInt16, Int64, const DateLUTImpl &, Int64) { throwDateIsNotSupported(function_name); } - - static UInt32 execute(Int32, Int64, const DateLUTImpl &, Int64) { throwDateIsNotSupported(function_name); } - - static UInt32 execute(UInt32, Int64, const DateLUTImpl &, Int64) { throwDateTimeIsNotSupported(function_name); } - - static Int64 execute(Int64 t, Int64 microseconds, const DateLUTImpl &, Int64 scale_multiplier) - { - if (scale_multiplier < 1000000) - { - Int64 t_microseconds = 0; - if (common::mulOverflow(t, static_cast(1000000) / scale_multiplier, t_microseconds)) - throw DB::Exception(ErrorCodes::DECIMAL_OVERFLOW, "Numeric overflow"); - if (likely(t >= 0)) - return t_microseconds / microseconds * microseconds; - else - return ((t_microseconds + 1) / microseconds - 1) * microseconds; - } - else if (scale_multiplier > 1000000) - { - Int64 scale_diff = scale_multiplier / static_cast(1000000); - if (likely(t >= 0)) - return t / microseconds / scale_diff * microseconds; - else - return ((t + 1) / microseconds / scale_diff - 1) * microseconds; - } - else - if (likely(t >= 0)) - return t / microseconds * microseconds; - else - return ((t + 1) / microseconds - 1) * microseconds; - } - }; - - template <> - struct Transform - { - static UInt32 execute(UInt16, Int64, const DateLUTImpl &, Int64) { throwDateIsNotSupported(function_name); } - - static UInt32 execute(Int32, Int64, const DateLUTImpl &, Int64) { throwDateIsNotSupported(function_name); } - - static UInt32 execute(UInt32, Int64, const DateLUTImpl &, Int64) { throwDateTimeIsNotSupported(function_name); } - - static Int64 execute(Int64 t, Int64 nanoseconds, const DateLUTImpl &, Int64 scale_multiplier) - { - if (scale_multiplier < 1000000000) - { - Int64 t_nanoseconds = 0; - if (common::mulOverflow(t, (static_cast(1000000000) / scale_multiplier), t_nanoseconds)) - throw DB::Exception(ErrorCodes::DECIMAL_OVERFLOW, "Numeric overflow"); - if (likely(t >= 0)) - return t_nanoseconds / nanoseconds * nanoseconds; - else - return ((t_nanoseconds + 1) / nanoseconds - 1) * nanoseconds; - } - else - if (likely(t >= 0)) - return t / nanoseconds * nanoseconds; - else - return ((t + 1) / nanoseconds - 1) * nanoseconds; - } - }; class FunctionToStartOfInterval : public IFunction { public: static FunctionPtr create(ContextPtr) { return std::make_shared(); } - static constexpr auto name = function_name; + static constexpr auto name = "toStartOfInterval"; String getName() const override { return name; } bool isVariadic() const override { return true; } @@ -472,9 +192,6 @@ public: std::unreachable(); } - bool useDefaultImplementationForConstants() const override { return true; } - ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1, 2}; } - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t /* input_rows_count */) const override { const auto & time_column = arguments[0]; @@ -485,23 +202,13 @@ public: if (has_origin_arg) origin_column = arguments[2]; - const size_t time_zone_arg_num = (arguments.size() == 2 || (arguments.size() == 3 && isString(arguments[2].type))) ? 2 : 3; + const size_t time_zone_arg_num = (arguments.size() == 3 && isString(arguments[2].type)) ? 2 : 3; const auto & time_zone = extractTimeZoneFromFunctionArguments(arguments, time_zone_arg_num, 0); auto result_column = dispatchForTimeColumn(time_column, interval_column, origin_column, result_type, time_zone); return result_column; } - bool hasInformationAboutMonotonicity() const override - { - return true; - } - - Monotonicity getMonotonicityForRange(const IDataType &, const Field &, const Field &) const override - { - return { .is_monotonic = true, .is_always_monotonic = true }; - } - private: ColumnPtr dispatchForTimeColumn( const ColumnWithTypeAndName & time_column, const ColumnWithTypeAndName & interval_column, const ColumnWithTypeAndName & origin_column, const DataTypePtr & result_type, const DateLUTImpl & time_zone) const @@ -535,11 +242,10 @@ private: if (time_column_vec) return dispatchForIntervalColumn(assert_cast(time_column_type), *time_column_vec, interval_column, origin_column, result_type, time_zone); } - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal column for first argument of function {}. " - "Must contain dates or dates with time", getName()); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal column for first argument of function {}. Must contain dates or dates with time", getName()); } - template + template ColumnPtr dispatchForIntervalColumn( const TimeDataType & time_data_type, const TimeColumnType & time_column, const ColumnWithTypeAndName & interval_column, const ColumnWithTypeAndName & origin_column, const DataTypePtr & result_type, const DateLUTImpl & time_zone, const UInt16 scale = 1) const @@ -547,16 +253,16 @@ private: const auto * interval_type = checkAndGetDataType(interval_column.type.get()); if (!interval_type) throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column for second argument of function {}, must be an interval of time.", getName()); + const auto * interval_column_const_int64 = checkAndGetColumnConst(interval_column.column.get()); if (!interval_column_const_int64) - throw Exception(ErrorCodes::ILLEGAL_COLUMN, - "Illegal column for second argument of function {}, must be a const interval of time.", - getName()); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column for second argument of function {}, must be a const interval of time.", getName()); + Int64 num_units = interval_column_const_int64->getValue(); if (num_units <= 0) throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Value for second argument of function {} must be positive.", getName()); - switch (interval_type->getKind()) + switch (interval_type->getKind()) // NOLINT(bugprone-switch-missing-default-case) { case IntervalKind::Nanosecond: return execute(time_data_type, time_column, num_units, origin_column, result_type, time_zone, scale); @@ -601,17 +307,24 @@ private: Int64 scale_multiplier = DecimalUtils::scaleMultiplier(scale); - /// TODO: This part is missing. origin_column is either {} (<-- to check, you could do `origin_column.column == nullptr`) or not {} - /// In the former case, we can execute below existing code. - /// In the latter case, we need to read the actual origin value. As per `getArgumentsThatAreAlwaysConstant()` (see above), we - /// can be sure that origin_column is a `ColumnConst`. The second assumption we can reasonable make is that it has the same - /// type (Date/Date32/DateTime/DateTime64) as the time column (1st argument). Since the method we are in is already - /// templatized on the data type of the time column, we can use `checkAndGetColumnConst(...)` to cast - /// `origin_column.column` to a const column and then read the (const) value from it, and proceed with the calculations. + if (origin_column.column == nullptr) + for (size_t i = 0; i != size; ++i) + result_data[i] = static_cast(ToStartOfInterval::execute(time_data[i], num_units, time_zone, scale_multiplier)); + else + { + UInt64 od = origin_column.column->get64(0); + + for (size_t i = 0; i != size; ++i) + { + auto td = time_data[i]; + if (od > size_t(td)) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "The origin must be before the end date/datetime"); + td -= od; + result_data[i] = static_cast(ToStartOfInterval::execute(td, num_units, time_zone, scale_multiplier)); - for (size_t i = 0; i != size; ++i) - result_data[i] = static_cast( - Transform::execute(time_data[i], num_units, time_zone, scale_multiplier)); + result_data[i] += scale_multiplier == 10 ? od : od / scale_multiplier; + } + } return result_col; } From 6605a375b642d0bc8b6ffccbd0400eb4ccee223e Mon Sep 17 00:00:00 2001 From: yariks5s Date: Fri, 17 Nov 2023 17:35:04 +0000 Subject: [PATCH 004/844] style check, docs, tests and modified logic --- .../functions/date-time-functions.md | 7 +- src/Functions/toStartOfInterval.cpp | 306 +++++++++++++++++- ...tart_of_interval_origin_overload.reference | 15 + ...6_to_start_of_interval_origin_overload.sql | 12 + 4 files changed, 335 insertions(+), 5 deletions(-) create mode 100644 tests/queries/0_stateless/02916_to_start_of_interval_origin_overload.reference create mode 100644 tests/queries/0_stateless/02916_to_start_of_interval_origin_overload.sql diff --git a/docs/en/sql-reference/functions/date-time-functions.md b/docs/en/sql-reference/functions/date-time-functions.md index 0364a610404..3f522eeb164 100644 --- a/docs/en/sql-reference/functions/date-time-functions.md +++ b/docs/en/sql-reference/functions/date-time-functions.md @@ -590,7 +590,7 @@ Rounds down a date with time to the start of the ten-minute interval. Rounds down the date with time to the start of the fifteen-minute interval. -## toStartOfInterval(time_or_data, INTERVAL x unit \[, time_zone\]) +## toStartOfInterval(time_or_data, INTERVAL x unit \[, origin_time_or_data\] \[, time_zone\]) This is a generalization of other functions named `toStartOf*`. For example, `toStartOfInterval(t, INTERVAL 1 year)` returns the same as `toStartOfYear(t)`, @@ -598,6 +598,11 @@ This is a generalization of other functions named `toStartOf*`. For example, `toStartOfInterval(t, INTERVAL 1 day)` returns the same as `toStartOfDay(t)`, `toStartOfInterval(t, INTERVAL 15 minute)` returns the same as `toStartOfFifteenMinutes(t)` etc. +Also it has an overload including `origin_time_or_data` column which emulates TimescaleDB's `time_bucket()` function, respectively PostgreSQL's `date_bin()` function: +``` SQL +SELECT toStartOfInterval(toDateTime('2023-01-01 14:45:00'), toIntervalMinute(1), toDateTime('2023-01-01 14:35:30')); +``` + ## toTime Converts a date with time to a certain fixed date, while preserving the time. diff --git a/src/Functions/toStartOfInterval.cpp b/src/Functions/toStartOfInterval.cpp index 29f8faa810c..8529643ee70 100644 --- a/src/Functions/toStartOfInterval.cpp +++ b/src/Functions/toStartOfInterval.cpp @@ -1,4 +1,5 @@ #include +#include #include #include #include @@ -23,11 +24,292 @@ namespace ErrorCodes extern const int ILLEGAL_COLUMN; extern const int ILLEGAL_TYPE_OF_ARGUMENT; extern const int ARGUMENT_OUT_OF_BOUND; + extern const int BAD_ARGUMENTS; } namespace { +constexpr auto function_name = "toStartOfInterval"; + +template +struct Transform; + +template <> +struct Transform +{ + static UInt16 execute(UInt16 d, Int64 years, const DateLUTImpl & time_zone, Int64) + { + return time_zone.toStartOfYearInterval(DayNum(d), years); + } + + static UInt16 execute(Int32 d, Int64 years, const DateLUTImpl & time_zone, Int64) + { + return time_zone.toStartOfYearInterval(ExtendedDayNum(d), years); + } + + static UInt16 execute(UInt32 t, Int64 years, const DateLUTImpl & time_zone, Int64) + { + return time_zone.toStartOfYearInterval(time_zone.toDayNum(t), years); + } + + static UInt16 execute(Int64 t, Int64 years, const DateLUTImpl & time_zone, Int64 scale_multiplier) + { + return time_zone.toStartOfYearInterval(time_zone.toDayNum(t / scale_multiplier), years); + } +}; + +template <> +struct Transform +{ + static UInt16 execute(UInt16 d, Int64 quarters, const DateLUTImpl & time_zone, Int64) + { + return time_zone.toStartOfQuarterInterval(DayNum(d), quarters); + } + + static UInt16 execute(Int32 d, Int64 quarters, const DateLUTImpl & time_zone, Int64) + { + return time_zone.toStartOfQuarterInterval(ExtendedDayNum(d), quarters); + } + + static UInt16 execute(UInt32 t, Int64 quarters, const DateLUTImpl & time_zone, Int64) + { + return time_zone.toStartOfQuarterInterval(time_zone.toDayNum(t), quarters); + } + + static UInt16 execute(Int64 t, Int64 quarters, const DateLUTImpl & time_zone, Int64 scale_multiplier) + { + return time_zone.toStartOfQuarterInterval(time_zone.toDayNum(t / scale_multiplier), quarters); + } +}; + +template <> +struct Transform +{ + static UInt16 execute(UInt16 d, Int64 months, const DateLUTImpl & time_zone, Int64) + { + return time_zone.toStartOfMonthInterval(DayNum(d), months); + } + + static UInt16 execute(Int32 d, Int64 months, const DateLUTImpl & time_zone, Int64) + { + return time_zone.toStartOfMonthInterval(ExtendedDayNum(d), months); + } + + static UInt16 execute(UInt32 t, Int64 months, const DateLUTImpl & time_zone, Int64) + { + return time_zone.toStartOfMonthInterval(time_zone.toDayNum(t), months); + } + + static UInt16 execute(Int64 t, Int64 months, const DateLUTImpl & time_zone, Int64 scale_multiplier) + { + return time_zone.toStartOfMonthInterval(time_zone.toDayNum(t / scale_multiplier), months); + } +}; + +template <> +struct Transform +{ + static UInt16 execute(UInt16 d, Int64 weeks, const DateLUTImpl & time_zone, Int64) + { + return time_zone.toStartOfWeekInterval(DayNum(d), weeks); + } +static UInt16 execute(Int32 d, Int64 weeks, const DateLUTImpl & time_zone, Int64) + { + return time_zone.toStartOfWeekInterval(ExtendedDayNum(d), weeks); + } + + static UInt16 execute(UInt32 t, Int64 weeks, const DateLUTImpl & time_zone, Int64) + { + return time_zone.toStartOfWeekInterval(time_zone.toDayNum(t), weeks); + } + + static UInt16 execute(Int64 t, Int64 weeks, const DateLUTImpl & time_zone, Int64 scale_multiplier) + { + return time_zone.toStartOfWeekInterval(time_zone.toDayNum(t / scale_multiplier), weeks); + } +}; + +template <> +struct Transform +{ + static UInt32 execute(UInt16 d, Int64 days, const DateLUTImpl & time_zone, Int64) + { + return static_cast(time_zone.toStartOfDayInterval(ExtendedDayNum(d), days)); + } + + static UInt32 execute(Int32 d, Int64 days, const DateLUTImpl & time_zone, Int64) + { + return static_cast(time_zone.toStartOfDayInterval(ExtendedDayNum(d), days)); + } + + static UInt32 execute(UInt32 t, Int64 days, const DateLUTImpl & time_zone, Int64) + { + return static_cast(time_zone.toStartOfDayInterval(time_zone.toDayNum(t), days)); + } + + static Int64 execute(Int64 t, Int64 days, const DateLUTImpl & time_zone, Int64 scale_multiplier) + { + return time_zone.toStartOfDayInterval(time_zone.toDayNum(t / scale_multiplier), days); + } +}; + +template <> +struct Transform +{ + static UInt32 execute(UInt16, Int64, const DateLUTImpl &, Int64) { throwDateIsNotSupported(function_name); } + + static UInt32 execute(Int32, Int64, const DateLUTImpl &, Int64) { throwDateIsNotSupported(function_name); } + + static UInt32 execute(UInt32 t, Int64 hours, const DateLUTImpl & time_zone, Int64) + { + return time_zone.toStartOfHourInterval(t, hours); + } + + static Int64 execute(Int64 t, Int64 hours, const DateLUTImpl & time_zone, Int64 scale_multiplier) + { + return time_zone.toStartOfHourInterval(t / scale_multiplier, hours); + } +}; + +template <> +struct Transform +{ + static UInt32 execute(UInt16, Int64, const DateLUTImpl &, Int64) { throwDateIsNotSupported(function_name); } + + static UInt32 execute(Int32, Int64, const DateLUTImpl &, Int64) { throwDateIsNotSupported(function_name); } + + static UInt32 execute(UInt32 t, Int64 minutes, const DateLUTImpl & time_zone, Int64) + { + return time_zone.toStartOfMinuteInterval(t, minutes); + } + + static Int64 execute(Int64 t, Int64 minutes, const DateLUTImpl & time_zone, Int64 scale_multiplier) + { + return time_zone.toStartOfMinuteInterval(t / scale_multiplier, minutes); + } +}; + +template <> +struct Transform +{ + static UInt32 execute(UInt16, Int64, const DateLUTImpl &, Int64) { throwDateIsNotSupported(function_name); } + + static UInt32 execute(Int32, Int64, const DateLUTImpl &, Int64) { throwDateIsNotSupported(function_name); } + + static UInt32 execute(UInt32 t, Int64 seconds, const DateLUTImpl & time_zone, Int64) + { + return time_zone.toStartOfSecondInterval(t, seconds); + } + + static Int64 execute(Int64 t, Int64 seconds, const DateLUTImpl & time_zone, Int64 scale_multiplier) + { + return time_zone.toStartOfSecondInterval(t / scale_multiplier, seconds); + } +}; + +template <> +struct Transform +{ + static UInt32 execute(UInt16, Int64, const DateLUTImpl &, Int64) { throwDateIsNotSupported(function_name); } + + static UInt32 execute(Int32, Int64, const DateLUTImpl &, Int64) { throwDateIsNotSupported(function_name); } + + static UInt32 execute(UInt32, Int64, const DateLUTImpl &, Int64) { throwDateTimeIsNotSupported(function_name); } + + static Int64 execute(Int64 t, Int64 milliseconds, const DateLUTImpl &, Int64 scale_multiplier) + { + if (scale_multiplier < 1000) + { + Int64 t_milliseconds = 0; + if (common::mulOverflow(t, static_cast(1000) / scale_multiplier, t_milliseconds)) + throw DB::Exception(ErrorCodes::DECIMAL_OVERFLOW, "Numeric overflow"); + if (likely(t >= 0)) + return t_milliseconds / milliseconds * milliseconds; + else + return ((t_milliseconds + 1) / milliseconds - 1) * milliseconds; + } + else if (scale_multiplier > 1000) + { + Int64 scale_diff = scale_multiplier / static_cast(1000); + if (likely(t >= 0)) + return t / milliseconds / scale_diff * milliseconds; + else + return ((t + 1) / milliseconds / scale_diff - 1) * milliseconds; + } + else + if (likely(t >= 0)) + return t / milliseconds * milliseconds; + else + return ((t + 1) / milliseconds - 1) * milliseconds; + } +}; + +template <> +struct Transform +{ + static UInt32 execute(UInt16, Int64, const DateLUTImpl &, Int64) { throwDateIsNotSupported(function_name); } + + static UInt32 execute(Int32, Int64, const DateLUTImpl &, Int64) { throwDateIsNotSupported(function_name); } + + static UInt32 execute(UInt32, Int64, const DateLUTImpl &, Int64) { throwDateTimeIsNotSupported(function_name); } + + static Int64 execute(Int64 t, Int64 microseconds, const DateLUTImpl &, Int64 scale_multiplier) + { + if (scale_multiplier < 1000000) + { + Int64 t_microseconds = 0; + if (common::mulOverflow(t, static_cast(1000000) / scale_multiplier, t_microseconds)) + throw DB::Exception(ErrorCodes::DECIMAL_OVERFLOW, "Numeric overflow"); + if (likely(t >= 0)) + return t_microseconds / microseconds * microseconds; + else + return ((t_microseconds + 1) / microseconds - 1) * microseconds; + } + else if (scale_multiplier > 1000000) + { + Int64 scale_diff = scale_multiplier / static_cast(1000000); + if (likely(t >= 0)) + return t / microseconds / scale_diff * microseconds; + else + return ((t + 1) / microseconds / scale_diff - 1) * microseconds; + } + else + if (likely(t >= 0)) + return t / microseconds * microseconds; + else + return ((t + 1) / microseconds - 1) * microseconds; + } +}; + +template <> +struct Transform +{ + static UInt32 execute(UInt16, Int64, const DateLUTImpl &, Int64) { throwDateIsNotSupported(function_name); } + + static UInt32 execute(Int32, Int64, const DateLUTImpl &, Int64) { throwDateIsNotSupported(function_name); } + + static UInt32 execute(UInt32, Int64, const DateLUTImpl &, Int64) { throwDateTimeIsNotSupported(function_name); } + + static Int64 execute(Int64 t, Int64 nanoseconds, const DateLUTImpl &, Int64 scale_multiplier) + { + if (scale_multiplier < 1000000000) + { + Int64 t_nanoseconds = 0; + if (common::mulOverflow(t, (static_cast(1000000000) / scale_multiplier), t_nanoseconds)) + throw DB::Exception(ErrorCodes::DECIMAL_OVERFLOW, "Numeric overflow"); + if (likely(t >= 0)) + return t_nanoseconds / nanoseconds * nanoseconds; + else + return ((t_nanoseconds + 1) / nanoseconds - 1) * nanoseconds; + } + else + if (likely(t >= 0)) + return t / nanoseconds * nanoseconds; + else + return ((t + 1) / nanoseconds - 1) * nanoseconds; + } +}; class FunctionToStartOfInterval : public IFunction { @@ -218,6 +500,10 @@ private: if (isDateTime64(time_column_type)) { + if (origin_column.column != nullptr) + if (!isDateTime64(origin_column.type.get())) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Datetime argument and origin argument for function {} must have the same type", getName()); + const auto * time_column_vec = checkAndGetColumn(time_column_col); auto scale = assert_cast(time_column_type).getScale(); @@ -226,18 +512,30 @@ private: } else if (isDateTime(time_column_type)) { + if (origin_column.column != nullptr) + if (!isDateTime(origin_column.type.get())) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Datetime argument and origin argument for function {} must have the same type", getName()); + const auto * time_column_vec = checkAndGetColumn(time_column_col); if (time_column_vec) return dispatchForIntervalColumn(assert_cast(time_column_type), *time_column_vec, interval_column, origin_column, result_type, time_zone); } else if (isDate(time_column_type)) { + if (origin_column.column != nullptr) + if (!isDate(origin_column.type.get())) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Datetime argument and origin argument for function {} must have the same type", getName()); + const auto * time_column_vec = checkAndGetColumn(time_column_col); if (time_column_vec) return dispatchForIntervalColumn(assert_cast(time_column_type), *time_column_vec, interval_column, origin_column, result_type, time_zone); } else if (isDate32(time_column_type)) { + if (origin_column.column != nullptr) + if (!isDate32(origin_column.type.get())) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Datetime argument and origin argument for function {} must have the same type", getName()); + const auto * time_column_vec = checkAndGetColumn(time_column_col); if (time_column_vec) return dispatchForIntervalColumn(assert_cast(time_column_type), *time_column_vec, interval_column, origin_column, result_type, time_zone); @@ -292,7 +590,7 @@ private: } template - ColumnPtr execute(const TimeDataType &, const ColumnType & time_column_type, Int64 num_units, [[maybe_unused]] const ColumnWithTypeAndName & origin_column, const DataTypePtr & result_type, const DateLUTImpl & time_zone, const UInt16 scale) const + ColumnPtr execute(const TimeDataType &, const ColumnType & time_column_type, Int64 num_units, const ColumnWithTypeAndName & origin_column, const DataTypePtr & result_type, const DateLUTImpl & time_zone, const UInt16 scale) const { using ToColumnType = typename ToDataType::ColumnType; using ToFieldType = typename ToDataType::FieldType; @@ -309,18 +607,18 @@ private: if (origin_column.column == nullptr) for (size_t i = 0; i != size; ++i) - result_data[i] = static_cast(ToStartOfInterval::execute(time_data[i], num_units, time_zone, scale_multiplier)); + result_data[i] = static_cast(Transform::execute(time_data[i], num_units, time_zone, scale_multiplier)); else { UInt64 od = origin_column.column->get64(0); - + for (size_t i = 0; i != size; ++i) { auto td = time_data[i]; if (od > size_t(td)) throw Exception(ErrorCodes::BAD_ARGUMENTS, "The origin must be before the end date/datetime"); td -= od; - result_data[i] = static_cast(ToStartOfInterval::execute(td, num_units, time_zone, scale_multiplier)); + result_data[i] = static_cast(Transform::execute(td, num_units, time_zone, scale_multiplier)); result_data[i] += scale_multiplier == 10 ? od : od / scale_multiplier; } diff --git a/tests/queries/0_stateless/02916_to_start_of_interval_origin_overload.reference b/tests/queries/0_stateless/02916_to_start_of_interval_origin_overload.reference new file mode 100644 index 00000000000..7213925fb64 --- /dev/null +++ b/tests/queries/0_stateless/02916_to_start_of_interval_origin_overload.reference @@ -0,0 +1,15 @@ +2023-03-01 16:55:00 +2023-02-01 16:55:00 +2023-03-01 16:55:00 +2023-02-01 16:55:00 +2023-03-01 16:55:00 +2023-03-01 16:55:00 +2023-02-01 16:55:00 +2023-03-01 16:55:00 +2023-02-01 16:55:00 +2023-03-01 16:55:00 +2023-01-02 15:44:30 +2023-01-02 15:44:30 +2023-01-02 14:45:30 +2023-01-02 14:45:30 +2023-01-02 diff --git a/tests/queries/0_stateless/02916_to_start_of_interval_origin_overload.sql b/tests/queries/0_stateless/02916_to_start_of_interval_origin_overload.sql new file mode 100644 index 00000000000..ce4c8f87811 --- /dev/null +++ b/tests/queries/0_stateless/02916_to_start_of_interval_origin_overload.sql @@ -0,0 +1,12 @@ +set session_timezone = 'UTC'; +SELECT toStartOfInterval(number % 2 == 0 ? toDateTime64('2023-03-01 15:55:00', 2) : toDateTime64('2023-02-01 15:55:00', 2), toIntervalMinute(1), toDateTime64('2023-01-01 13:55:00', 2), 'Europe/Amsterdam') from numbers(5); +SELECT toStartOfInterval(number % 2 == 0 ? toDateTime('2023-03-01 15:55:00') : toDateTime('2023-02-01 15:55:00'), toIntervalMinute(1), toDateTime('2023-01-01 13:55:00'), 'Europe/Amsterdam') from numbers(5); +SELECT toStartOfInterval(toDateTime('2023-01-02 14:45:50'), toIntervalHour(1), toDateTime('2023-01-02 14:44:30'), 'Europe/Amsterdam'); +SELECT toStartOfInterval(toDateTime64('2023-01-02 14:45:50', 2), toIntervalHour(1), toDateTime64('2023-01-02 14:44:30', 2), 'Europe/Amsterdam'); +SELECT toStartOfInterval(toDateTime64('2023-01-02 14:45:50', 2), toIntervalMinute(1), toDateTime64('2023-01-02 14:44:30', 2)); +SELECT toStartOfInterval(toDateTime('2023-01-02 14:45:50'), toIntervalMinute(1), toDateTime('2023-01-02 14:44:30')); +SELECT toStartOfInterval(toDate('2023-01-02 14:45:50'), toIntervalWeek(1), toDate('2023-01-02 14:44:30')); +SELECT toStartOfInterval(toDateTime('2023-01-02 14:45:50'), toIntervalMinute(1), toDateTime64('2023-01-02 14:44:30', 2)); -- { serverError 43 } +SELECT toStartOfInterval(toDateTime64('2023-01-02 14:45:50', 2), toIntervalMinute(1), toDate('2023-01-02 14:44:30')); -- { serverError 43 } +SELECT toStartOfInterval(toDateTime('2023-01-02 14:42:50'), toIntervalMinute(1), toDateTime('2023-01-02 14:44:30')); -- { serverError 36 } +SELECT toStartOfInterval(toDateTime('2023-01-02 14:45:50'), toIntervalMinute(1), number % 2 == 0 ? toDateTime('2023-02-01 15:55:00') : toDateTime('2023-01-01 15:55:00'), 'Europe/Amsterdam') from numbers(1); -- { serverError 44 } From f596ae7a2c02e7915917daca82dad717a7df2640 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Sat, 18 Nov 2023 16:24:56 +0000 Subject: [PATCH 005/844] Fix style --- src/Functions/toStartOfInterval.cpp | 7 ++++--- utils/check-style/aspell-ignore/en/aspell-dict.txt | 9 +++++---- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/src/Functions/toStartOfInterval.cpp b/src/Functions/toStartOfInterval.cpp index 8529643ee70..10792922afb 100644 --- a/src/Functions/toStartOfInterval.cpp +++ b/src/Functions/toStartOfInterval.cpp @@ -20,11 +20,12 @@ namespace DB { namespace ErrorCodes { - extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; - extern const int ILLEGAL_COLUMN; - extern const int ILLEGAL_TYPE_OF_ARGUMENT; extern const int ARGUMENT_OUT_OF_BOUND; extern const int BAD_ARGUMENTS; + extern const int DECIMAL_OVERFLOW; + extern const int ILLEGAL_COLUMN; + extern const int ILLEGAL_TYPE_OF_ARGUMENT; + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; } diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt index 5d11185ff76..7eaafe8a777 100644 --- a/utils/check-style/aspell-ignore/en/aspell-dict.txt +++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt @@ -261,6 +261,7 @@ FOSDEM FQDN Failover FarmHash +FileLog FilesystemCacheBytes FilesystemCacheElements FilesystemCacheFiles @@ -278,7 +279,6 @@ FilesystemMainPathTotalBytes FilesystemMainPathTotalINodes FilesystemMainPathUsedBytes FilesystemMainPathUsedINodes -FileLog FixedString Flink ForEach @@ -571,13 +571,13 @@ NetworkSendPackets NodeJs NuRaft NumHexagons +NumPy NumToString NumToStringClassC NumberOfDatabases NumberOfDetachedByUserParts NumberOfDetachedParts NumberOfTables -NumPy OFNS OLAP OLTP @@ -588,10 +588,10 @@ OSGuestNiceTimeNormalized OSGuestTime OSGuestTimeCPU OSGuestTimeNormalized +OSIOWaitMicroseconds OSIOWaitTime OSIOWaitTimeCPU OSIOWaitTimeNormalized -OSIOWaitMicroseconds OSIdleTime OSIdleTimeCPU OSIdleTimeNormalized @@ -900,6 +900,7 @@ ThreadPoolRemoteFSReaderThreads ThreadPoolRemoteFSReaderThreadsActive ThreadsActive ThreadsInOvercommitTracker +TimescaleDB's Timeunit TinyLog Tkachenko @@ -1470,12 +1471,12 @@ fastops fcoverage fibonacci fifo +filelog filesystem filesystemAvailable filesystemCapacity filesystemFree filesystems -filelog finalizeAggregation fips firstLine From ea2ba82c1001febabdd8753246b64143e4bf1f6f Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Sat, 18 Nov 2023 16:37:50 +0000 Subject: [PATCH 006/844] Update docs --- .../functions/date-time-functions.md | 36 ++++++++++++------- 1 file changed, 23 insertions(+), 13 deletions(-) diff --git a/docs/en/sql-reference/functions/date-time-functions.md b/docs/en/sql-reference/functions/date-time-functions.md index 1291b570da4..989b39e46c1 100644 --- a/docs/en/sql-reference/functions/date-time-functions.md +++ b/docs/en/sql-reference/functions/date-time-functions.md @@ -1345,7 +1345,7 @@ toStartOfTenMinutes(toDateTime('2023-04-21 10:23:00')): 2023-04-21 10:20:00 Rounds down the date with time to the start of the fifteen-minute interval. -## toStartOfInterval(date_or_date_with_time, INTERVAL x unit \[, origin_time_or_data\] \[, time_zone\]) +## toStartOfInterval This function generalizes other `toStartOf*()` functions. For example, - `toStartOfInterval(t, INTERVAL 1 year)` returns the same as `toStartOfYear(t)`, @@ -1372,21 +1372,21 @@ The calculation is performed relative to specific points in time: (*) hour intervals are special: the calculation is always performed relative to 00:00:00 (midnight) of the current day. As a result, only hour values between 1 and 23 are useful. -Also it has an overload including `origin_time_or_data` column which emulates TimescaleDB's `time_bucket()` function, respectively PostgreSQL's `date_bin()` function: -``` SQL -SELECT toStartOfInterval(toDateTime('2023-01-01 14:45:00'), toIntervalMinute(1), toDateTime('2023-01-01 14:35:30')); +**Syntax** + +```sql +toStartOfInterval(value, INTERVAL x unit[, time_zone]) +toStartOfInterval(value, INTERVAL x unit[, origin[, time_zone]]) ``` +The second overload emulates TimescaleDB's `time_bucket()` function, respectively PostgreSQL's `date_bin()` function, e.g. + +``` SQL +SELECT toStartOfInterval(toDateTime('2023-01-01 14:45:00'), INTERVAL 1 MINUTE, toDateTime('2023-01-01 14:35:30')); +``` **See Also** - - - [date_trunc](#date_trunc) -Also it has an overload including `origin_time_or_data` column which emulates TimescaleDB's `time_bucket()` function, respectively PostgreSQL's `date_bin()` function: -``` SQL -SELECT toStartOfInterval(toDateTime('2023-01-01 14:45:00'), toIntervalMinute(1), toDateTime('2023-01-01 14:35:30')); -``` - ## toTime Converts a date with time to a certain fixed date, while preserving the time. @@ -2462,19 +2462,29 @@ SELECT └──────────────────────────┴───────────────────────────────┴──────────────────────────────────────┘ ``` -## timeSlots(StartTime, Duration,\[, Size\]) +## timeSlots For a time interval starting at ‘StartTime’ and continuing for ‘Duration’ seconds, it returns an array of moments in time, consisting of points from this interval rounded down to the ‘Size’ in seconds. ‘Size’ is an optional parameter set to 1800 (30 minutes) by default. This is necessary, for example, when searching for pageviews in the corresponding session. Accepts DateTime and DateTime64 as ’StartTime’ argument. For DateTime, ’Duration’ and ’Size’ arguments must be `UInt32`. For ’DateTime64’ they must be `Decimal64`. Returns an array of DateTime/DateTime64 (return type matches the type of ’StartTime’). For DateTime64, the return value's scale can differ from the scale of ’StartTime’ --- the highest scale among all given arguments is taken. -Example: +**Syntax** + +```sql +timeSlots(StartTime, Duration,\[, Size\]) +``` + +**Example** + ```sql SELECT timeSlots(toDateTime('2012-01-01 12:20:00'), toUInt32(600)); SELECT timeSlots(toDateTime('1980-12-12 21:01:02', 'UTC'), toUInt32(600), 299); SELECT timeSlots(toDateTime64('1980-12-12 21:01:02.1234', 4, 'UTC'), toDecimal64(600.1, 1), toDecimal64(299, 0)); ``` + +Result: + ``` text ┌─timeSlots(toDateTime('2012-01-01 12:20:00'), toUInt32(600))─┐ │ ['2012-01-01 12:00:00','2012-01-01 12:30:00'] │ From 74449872180542ebdcc26e7410100b6d897efc65 Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Mon, 20 Nov 2023 18:23:18 +0000 Subject: [PATCH 007/844] need to deal with week+ time intervals --- src/Functions/toStartOfInterval.cpp | 311 +----------------- ...to_start_of_interva_with_origin.reference} | 0 ...6_to_start_of_interval_origin_overload.sql | 12 - ...02916_to_start_of_interval_with_origin.sql | 42 +++ 4 files changed, 58 insertions(+), 307 deletions(-) rename tests/queries/0_stateless/{02916_to_start_of_interval_origin_overload.reference => 02916_to_start_of_interva_with_origin.reference} (100%) delete mode 100644 tests/queries/0_stateless/02916_to_start_of_interval_origin_overload.sql create mode 100644 tests/queries/0_stateless/02916_to_start_of_interval_with_origin.sql diff --git a/src/Functions/toStartOfInterval.cpp b/src/Functions/toStartOfInterval.cpp index 10792922afb..fd472d453b5 100644 --- a/src/Functions/toStartOfInterval.cpp +++ b/src/Functions/toStartOfInterval.cpp @@ -20,297 +20,16 @@ namespace DB { namespace ErrorCodes { - extern const int ARGUMENT_OUT_OF_BOUND; - extern const int BAD_ARGUMENTS; - extern const int DECIMAL_OVERFLOW; + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; extern const int ILLEGAL_COLUMN; extern const int ILLEGAL_TYPE_OF_ARGUMENT; - extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; + extern const int ARGUMENT_OUT_OF_BOUND; + extern const int BAD_ARGUMENTS; } namespace { -constexpr auto function_name = "toStartOfInterval"; - -template -struct Transform; - -template <> -struct Transform -{ - static UInt16 execute(UInt16 d, Int64 years, const DateLUTImpl & time_zone, Int64) - { - return time_zone.toStartOfYearInterval(DayNum(d), years); - } - - static UInt16 execute(Int32 d, Int64 years, const DateLUTImpl & time_zone, Int64) - { - return time_zone.toStartOfYearInterval(ExtendedDayNum(d), years); - } - - static UInt16 execute(UInt32 t, Int64 years, const DateLUTImpl & time_zone, Int64) - { - return time_zone.toStartOfYearInterval(time_zone.toDayNum(t), years); - } - - static UInt16 execute(Int64 t, Int64 years, const DateLUTImpl & time_zone, Int64 scale_multiplier) - { - return time_zone.toStartOfYearInterval(time_zone.toDayNum(t / scale_multiplier), years); - } -}; - -template <> -struct Transform -{ - static UInt16 execute(UInt16 d, Int64 quarters, const DateLUTImpl & time_zone, Int64) - { - return time_zone.toStartOfQuarterInterval(DayNum(d), quarters); - } - - static UInt16 execute(Int32 d, Int64 quarters, const DateLUTImpl & time_zone, Int64) - { - return time_zone.toStartOfQuarterInterval(ExtendedDayNum(d), quarters); - } - - static UInt16 execute(UInt32 t, Int64 quarters, const DateLUTImpl & time_zone, Int64) - { - return time_zone.toStartOfQuarterInterval(time_zone.toDayNum(t), quarters); - } - - static UInt16 execute(Int64 t, Int64 quarters, const DateLUTImpl & time_zone, Int64 scale_multiplier) - { - return time_zone.toStartOfQuarterInterval(time_zone.toDayNum(t / scale_multiplier), quarters); - } -}; - -template <> -struct Transform -{ - static UInt16 execute(UInt16 d, Int64 months, const DateLUTImpl & time_zone, Int64) - { - return time_zone.toStartOfMonthInterval(DayNum(d), months); - } - - static UInt16 execute(Int32 d, Int64 months, const DateLUTImpl & time_zone, Int64) - { - return time_zone.toStartOfMonthInterval(ExtendedDayNum(d), months); - } - - static UInt16 execute(UInt32 t, Int64 months, const DateLUTImpl & time_zone, Int64) - { - return time_zone.toStartOfMonthInterval(time_zone.toDayNum(t), months); - } - - static UInt16 execute(Int64 t, Int64 months, const DateLUTImpl & time_zone, Int64 scale_multiplier) - { - return time_zone.toStartOfMonthInterval(time_zone.toDayNum(t / scale_multiplier), months); - } -}; - -template <> -struct Transform -{ - static UInt16 execute(UInt16 d, Int64 weeks, const DateLUTImpl & time_zone, Int64) - { - return time_zone.toStartOfWeekInterval(DayNum(d), weeks); - } -static UInt16 execute(Int32 d, Int64 weeks, const DateLUTImpl & time_zone, Int64) - { - return time_zone.toStartOfWeekInterval(ExtendedDayNum(d), weeks); - } - - static UInt16 execute(UInt32 t, Int64 weeks, const DateLUTImpl & time_zone, Int64) - { - return time_zone.toStartOfWeekInterval(time_zone.toDayNum(t), weeks); - } - - static UInt16 execute(Int64 t, Int64 weeks, const DateLUTImpl & time_zone, Int64 scale_multiplier) - { - return time_zone.toStartOfWeekInterval(time_zone.toDayNum(t / scale_multiplier), weeks); - } -}; - -template <> -struct Transform -{ - static UInt32 execute(UInt16 d, Int64 days, const DateLUTImpl & time_zone, Int64) - { - return static_cast(time_zone.toStartOfDayInterval(ExtendedDayNum(d), days)); - } - - static UInt32 execute(Int32 d, Int64 days, const DateLUTImpl & time_zone, Int64) - { - return static_cast(time_zone.toStartOfDayInterval(ExtendedDayNum(d), days)); - } - - static UInt32 execute(UInt32 t, Int64 days, const DateLUTImpl & time_zone, Int64) - { - return static_cast(time_zone.toStartOfDayInterval(time_zone.toDayNum(t), days)); - } - - static Int64 execute(Int64 t, Int64 days, const DateLUTImpl & time_zone, Int64 scale_multiplier) - { - return time_zone.toStartOfDayInterval(time_zone.toDayNum(t / scale_multiplier), days); - } -}; - -template <> -struct Transform -{ - static UInt32 execute(UInt16, Int64, const DateLUTImpl &, Int64) { throwDateIsNotSupported(function_name); } - - static UInt32 execute(Int32, Int64, const DateLUTImpl &, Int64) { throwDateIsNotSupported(function_name); } - - static UInt32 execute(UInt32 t, Int64 hours, const DateLUTImpl & time_zone, Int64) - { - return time_zone.toStartOfHourInterval(t, hours); - } - - static Int64 execute(Int64 t, Int64 hours, const DateLUTImpl & time_zone, Int64 scale_multiplier) - { - return time_zone.toStartOfHourInterval(t / scale_multiplier, hours); - } -}; - -template <> -struct Transform -{ - static UInt32 execute(UInt16, Int64, const DateLUTImpl &, Int64) { throwDateIsNotSupported(function_name); } - - static UInt32 execute(Int32, Int64, const DateLUTImpl &, Int64) { throwDateIsNotSupported(function_name); } - - static UInt32 execute(UInt32 t, Int64 minutes, const DateLUTImpl & time_zone, Int64) - { - return time_zone.toStartOfMinuteInterval(t, minutes); - } - - static Int64 execute(Int64 t, Int64 minutes, const DateLUTImpl & time_zone, Int64 scale_multiplier) - { - return time_zone.toStartOfMinuteInterval(t / scale_multiplier, minutes); - } -}; - -template <> -struct Transform -{ - static UInt32 execute(UInt16, Int64, const DateLUTImpl &, Int64) { throwDateIsNotSupported(function_name); } - - static UInt32 execute(Int32, Int64, const DateLUTImpl &, Int64) { throwDateIsNotSupported(function_name); } - - static UInt32 execute(UInt32 t, Int64 seconds, const DateLUTImpl & time_zone, Int64) - { - return time_zone.toStartOfSecondInterval(t, seconds); - } - - static Int64 execute(Int64 t, Int64 seconds, const DateLUTImpl & time_zone, Int64 scale_multiplier) - { - return time_zone.toStartOfSecondInterval(t / scale_multiplier, seconds); - } -}; - -template <> -struct Transform -{ - static UInt32 execute(UInt16, Int64, const DateLUTImpl &, Int64) { throwDateIsNotSupported(function_name); } - - static UInt32 execute(Int32, Int64, const DateLUTImpl &, Int64) { throwDateIsNotSupported(function_name); } - - static UInt32 execute(UInt32, Int64, const DateLUTImpl &, Int64) { throwDateTimeIsNotSupported(function_name); } - - static Int64 execute(Int64 t, Int64 milliseconds, const DateLUTImpl &, Int64 scale_multiplier) - { - if (scale_multiplier < 1000) - { - Int64 t_milliseconds = 0; - if (common::mulOverflow(t, static_cast(1000) / scale_multiplier, t_milliseconds)) - throw DB::Exception(ErrorCodes::DECIMAL_OVERFLOW, "Numeric overflow"); - if (likely(t >= 0)) - return t_milliseconds / milliseconds * milliseconds; - else - return ((t_milliseconds + 1) / milliseconds - 1) * milliseconds; - } - else if (scale_multiplier > 1000) - { - Int64 scale_diff = scale_multiplier / static_cast(1000); - if (likely(t >= 0)) - return t / milliseconds / scale_diff * milliseconds; - else - return ((t + 1) / milliseconds / scale_diff - 1) * milliseconds; - } - else - if (likely(t >= 0)) - return t / milliseconds * milliseconds; - else - return ((t + 1) / milliseconds - 1) * milliseconds; - } -}; - -template <> -struct Transform -{ - static UInt32 execute(UInt16, Int64, const DateLUTImpl &, Int64) { throwDateIsNotSupported(function_name); } - - static UInt32 execute(Int32, Int64, const DateLUTImpl &, Int64) { throwDateIsNotSupported(function_name); } - - static UInt32 execute(UInt32, Int64, const DateLUTImpl &, Int64) { throwDateTimeIsNotSupported(function_name); } - - static Int64 execute(Int64 t, Int64 microseconds, const DateLUTImpl &, Int64 scale_multiplier) - { - if (scale_multiplier < 1000000) - { - Int64 t_microseconds = 0; - if (common::mulOverflow(t, static_cast(1000000) / scale_multiplier, t_microseconds)) - throw DB::Exception(ErrorCodes::DECIMAL_OVERFLOW, "Numeric overflow"); - if (likely(t >= 0)) - return t_microseconds / microseconds * microseconds; - else - return ((t_microseconds + 1) / microseconds - 1) * microseconds; - } - else if (scale_multiplier > 1000000) - { - Int64 scale_diff = scale_multiplier / static_cast(1000000); - if (likely(t >= 0)) - return t / microseconds / scale_diff * microseconds; - else - return ((t + 1) / microseconds / scale_diff - 1) * microseconds; - } - else - if (likely(t >= 0)) - return t / microseconds * microseconds; - else - return ((t + 1) / microseconds - 1) * microseconds; - } -}; - -template <> -struct Transform -{ - static UInt32 execute(UInt16, Int64, const DateLUTImpl &, Int64) { throwDateIsNotSupported(function_name); } - - static UInt32 execute(Int32, Int64, const DateLUTImpl &, Int64) { throwDateIsNotSupported(function_name); } - - static UInt32 execute(UInt32, Int64, const DateLUTImpl &, Int64) { throwDateTimeIsNotSupported(function_name); } - - static Int64 execute(Int64 t, Int64 nanoseconds, const DateLUTImpl &, Int64 scale_multiplier) - { - if (scale_multiplier < 1000000000) - { - Int64 t_nanoseconds = 0; - if (common::mulOverflow(t, (static_cast(1000000000) / scale_multiplier), t_nanoseconds)) - throw DB::Exception(ErrorCodes::DECIMAL_OVERFLOW, "Numeric overflow"); - if (likely(t >= 0)) - return t_nanoseconds / nanoseconds * nanoseconds; - else - return ((t_nanoseconds + 1) / nanoseconds - 1) * nanoseconds; - } - else - if (likely(t >= 0)) - return t / nanoseconds * nanoseconds; - else - return ((t + 1) / nanoseconds - 1) * nanoseconds; - } -}; class FunctionToStartOfInterval : public IFunction { @@ -485,7 +204,7 @@ public: if (has_origin_arg) origin_column = arguments[2]; - const size_t time_zone_arg_num = (arguments.size() == 3 && isString(arguments[2].type)) ? 2 : 3; + const size_t time_zone_arg_num = (arguments.size() == 2 || (arguments.size() == 3 && isString(arguments[2].type))) ? 2 : 3; const auto & time_zone = extractTimeZoneFromFunctionArguments(arguments, time_zone_arg_num, 0); auto result_column = dispatchForTimeColumn(time_column, interval_column, origin_column, result_type, time_zone); @@ -501,9 +220,8 @@ private: if (isDateTime64(time_column_type)) { - if (origin_column.column != nullptr) - if (!isDateTime64(origin_column.type.get())) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Datetime argument and origin argument for function {} must have the same type", getName()); + if (!isDateTime64(origin_column.type.get()) && origin_column.column != nullptr) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Datetime argument and origin argument for function {} must have the same type", getName()); const auto * time_column_vec = checkAndGetColumn(time_column_col); auto scale = assert_cast(time_column_type).getScale(); @@ -608,20 +326,23 @@ private: if (origin_column.column == nullptr) for (size_t i = 0; i != size; ++i) - result_data[i] = static_cast(Transform::execute(time_data[i], num_units, time_zone, scale_multiplier)); + result_data[i] = static_cast(ToStartOfInterval::execute(time_data[i], num_units, time_zone, scale_multiplier)); else { - UInt64 od = origin_column.column->get64(0); + UInt64 origin = origin_column.column->get64(0); + std::cerr << "origin: " << origin << std::endl; + std::cerr << "scale_multiplier: " << scale_multiplier << std::endl; for (size_t i = 0; i != size; ++i) { auto td = time_data[i]; - if (od > size_t(td)) + if (origin > size_t(td)) throw Exception(ErrorCodes::BAD_ARGUMENTS, "The origin must be before the end date/datetime"); - td -= od; - result_data[i] = static_cast(Transform::execute(td, num_units, time_zone, scale_multiplier)); - - result_data[i] += scale_multiplier == 10 ? od : od / scale_multiplier; + td -= origin; + result_data[i] = static_cast(ToStartOfInterval::execute(td, num_units, time_zone, scale_multiplier)); + if (!(unit == IntervalKind::Millisecond || unit == IntervalKind::Microsecond || unit == IntervalKind::Nanosecond) && scale_multiplier != 10) + origin = origin / scale_multiplier; + result_data[i] += origin; } } diff --git a/tests/queries/0_stateless/02916_to_start_of_interval_origin_overload.reference b/tests/queries/0_stateless/02916_to_start_of_interva_with_origin.reference similarity index 100% rename from tests/queries/0_stateless/02916_to_start_of_interval_origin_overload.reference rename to tests/queries/0_stateless/02916_to_start_of_interva_with_origin.reference diff --git a/tests/queries/0_stateless/02916_to_start_of_interval_origin_overload.sql b/tests/queries/0_stateless/02916_to_start_of_interval_origin_overload.sql deleted file mode 100644 index ce4c8f87811..00000000000 --- a/tests/queries/0_stateless/02916_to_start_of_interval_origin_overload.sql +++ /dev/null @@ -1,12 +0,0 @@ -set session_timezone = 'UTC'; -SELECT toStartOfInterval(number % 2 == 0 ? toDateTime64('2023-03-01 15:55:00', 2) : toDateTime64('2023-02-01 15:55:00', 2), toIntervalMinute(1), toDateTime64('2023-01-01 13:55:00', 2), 'Europe/Amsterdam') from numbers(5); -SELECT toStartOfInterval(number % 2 == 0 ? toDateTime('2023-03-01 15:55:00') : toDateTime('2023-02-01 15:55:00'), toIntervalMinute(1), toDateTime('2023-01-01 13:55:00'), 'Europe/Amsterdam') from numbers(5); -SELECT toStartOfInterval(toDateTime('2023-01-02 14:45:50'), toIntervalHour(1), toDateTime('2023-01-02 14:44:30'), 'Europe/Amsterdam'); -SELECT toStartOfInterval(toDateTime64('2023-01-02 14:45:50', 2), toIntervalHour(1), toDateTime64('2023-01-02 14:44:30', 2), 'Europe/Amsterdam'); -SELECT toStartOfInterval(toDateTime64('2023-01-02 14:45:50', 2), toIntervalMinute(1), toDateTime64('2023-01-02 14:44:30', 2)); -SELECT toStartOfInterval(toDateTime('2023-01-02 14:45:50'), toIntervalMinute(1), toDateTime('2023-01-02 14:44:30')); -SELECT toStartOfInterval(toDate('2023-01-02 14:45:50'), toIntervalWeek(1), toDate('2023-01-02 14:44:30')); -SELECT toStartOfInterval(toDateTime('2023-01-02 14:45:50'), toIntervalMinute(1), toDateTime64('2023-01-02 14:44:30', 2)); -- { serverError 43 } -SELECT toStartOfInterval(toDateTime64('2023-01-02 14:45:50', 2), toIntervalMinute(1), toDate('2023-01-02 14:44:30')); -- { serverError 43 } -SELECT toStartOfInterval(toDateTime('2023-01-02 14:42:50'), toIntervalMinute(1), toDateTime('2023-01-02 14:44:30')); -- { serverError 36 } -SELECT toStartOfInterval(toDateTime('2023-01-02 14:45:50'), toIntervalMinute(1), number % 2 == 0 ? toDateTime('2023-02-01 15:55:00') : toDateTime('2023-01-01 15:55:00'), 'Europe/Amsterdam') from numbers(1); -- { serverError 44 } diff --git a/tests/queries/0_stateless/02916_to_start_of_interval_with_origin.sql b/tests/queries/0_stateless/02916_to_start_of_interval_with_origin.sql new file mode 100644 index 00000000000..5f5f941047a --- /dev/null +++ b/tests/queries/0_stateless/02916_to_start_of_interval_with_origin.sql @@ -0,0 +1,42 @@ +set session_timezone = 'UTC'; + +SELECT toStartOfInterval(toDateTime64('2023-01-02 14:45:50.917122341', 9), toIntervalNanosecond(10000), toDateTime64('2023-01-02 14:44:30.500600700', 9)); +SELECT toStartOfInterval(toDateTime64('2023-01-02 14:45:50.917122', 6), toIntervalMicrosecond(10000), toDateTime64('2023-01-02 14:44:30.500600', 6)); +SELECT toStartOfInterval(toDateTime64('2023-01-02 14:45:50.91', 3), toIntervalMillisecond(100), toDateTime64('2023-01-02 14:44:30.501', 3)); +SELECT toStartOfInterval(toDateTime('2023-01-02 14:45:50'), toIntervalSecond(2), toDateTime('2023-01-02 14:44:30')); +SELECT toStartOfInterval(toDateTime('2023-01-02 14:45:50'), toIntervalMinute(1), toDateTime('2023-01-02 14:44:30')); +SELECT toStartOfInterval(toDateTime64('2023-01-02 14:45:50', 2), toIntervalHour(1), toDateTime64('2023-01-02 14:44:30', 2)); +SELECT toStartOfInterval(toDateTime('2023-01-02 14:45:50'), toIntervalDay(1), toDateTime('2023-01-02 14:44:30')); +SELECT toStartOfInterval(toDateTime('2023-01-02 14:45:50'), toIntervalWeek(1), toDateTime('2023-01-02 14:44:30')); +SELECT toStartOfInterval(toDateTime('2023-01-02 14:45:50'), toIntervalMonth(1), toDateTime('2022-01-02 14:44:30')); +SELECT toStartOfInterval(toDateTime('2023-01-02 14:45:50'), toIntervalQuarter(1), toDateTime('2022-01-02 14:44:30')); +SELECT toStartOfInterval(toDateTime('2023-01-02 14:45:50'), toIntervalYear(1), toDateTime('2020-01-02 14:44:30')); + +SELECT toStartOfInterval(toDateTime64('2023-01-02 14:45:50.917122341', 9), toIntervalNanosecond(10000), toDateTime64('2023-01-02 14:44:30.500600700', 9), 'Europe/Amsterdam'); +SELECT toStartOfInterval(toDateTime64('2023-01-02 14:45:50.917122', 6), toIntervalMicrosecond(10000), toDateTime64('2023-01-02 14:44:30.500600', 6), 'Europe/Amsterdam'); +SELECT toStartOfInterval(toDateTime64('2023-01-02 14:45:50.91', 3), toIntervalMillisecond(100), toDateTime64('2023-01-02 14:44:30.501', 3), 'Europe/Amsterdam'); +SELECT toStartOfInterval(toDateTime('2023-01-02 14:45:50'), toIntervalSecond(2), toDateTime('2023-01-02 14:44:30'), 'Europe/Amsterdam'); +SELECT toStartOfInterval(toDateTime('2023-01-02 14:45:50'), toIntervalMinute(1), toDateTime('2023-01-02 14:44:30'), 'Europe/Amsterdam'); +SELECT toStartOfInterval(toDateTime64('2023-01-02 14:45:50', 2), toIntervalHour(1), toDateTime64('2023-01-02 14:44:30', 2), 'Europe/Amsterdam'); +SELECT toStartOfInterval(toDateTime('2023-01-02 14:45:50'), toIntervalDay(1), toDateTime('2023-01-02 14:44:30'), 'Europe/Amsterdam'); +SELECT toStartOfInterval(toDateTime('2023-01-02 14:45:50'), toIntervalWeek(1), toDateTime('2023-01-02 14:44:30'), 'Europe/Amsterdam'); +SELECT toStartOfInterval(toDateTime('2023-01-02 14:45:50'), toIntervalMonth(1), toDateTime('2022-01-02 14:44:30'), 'Europe/Amsterdam'); +SELECT toStartOfInterval(toDateTime('2023-01-02 14:45:50'), toIntervalQuarter(1), toDateTime('2022-01-02 14:44:30'), 'Europe/Amsterdam'); +SELECT toStartOfInterval(toDateTime('2023-01-02 14:45:50'), toIntervalYear(1), toDateTime('2020-01-02 14:44:30'), 'Europe/Amsterdam'); + +SELECT toStartOfInterval(number % 2 == 0 ? toDateTime64('2023-03-01 15:55:00', 2) : toDateTime64('2023-02-01 15:55:00', 2), toIntervalMinute(1), toDateTime64('2023-01-01 13:55:00', 2), 'Europe/Amsterdam') from numbers(5); +SELECT toStartOfInterval(number % 2 == 0 ? toDateTime('2023-03-01 15:55:00') : toDateTime('2023-02-01 15:55:00'), toIntervalHour(1), toDateTime('2023-01-01 13:55:00'), 'Europe/Amsterdam') from numbers(5); +SELECT toStartOfInterval(materialize(toDateTime('2023-01-02 14:45:50')), toIntervalHour(1), toDateTime('2023-01-02 14:44:30'), 'Europe/Amsterdam'); +SELECT toStartOfInterval(materialize(toDateTime64('2023-02-01 15:45:50', 2)), toIntervalHour(1), toDateTime('2023-01-02 14:44:30'), 'Europe/Amsterdam'); + +SELECT toStartOfInterval(toDate('2023-01-02 14:45:50'), toIntervalSecond(5), toDate32('2023-01-02 14:44:30')); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT toStartOfInterval(toDate('2023-01-02 14:45:50'), toIntervalMillisecond(12), toDateTime('2023-01-02 14:44:30')); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT toStartOfInterval(toDate32('2023-01-02 14:45:50'), toIntervalHour(5), toDate('2023-01-02 14:44:30')); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT toStartOfInterval(toDateTime('2023-01-02 14:45:50'), toIntervalMinute(1), toDateTime64('2023-01-02 14:44:30', 2)); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT toStartOfInterval(toDateTime64('2023-01-02 14:45:50', 2), toIntervalMinute(1), toDate('2023-01-02 14:44:30')); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT toStartOfInterval(toDateTime('2023-01-02 14:42:50'), toIntervalMinute(1), toDateTime('2023-01-02 14:44:30')); -- { serverError BAD_ARGUMENTS } +SELECT toStartOfInterval(toDateTime('2023-01-02 14:45:50'), toIntervalMinute(1), number % 2 == 0 ? toDateTime('2023-02-01 15:55:00') : toDateTime('2023-01-01 15:55:00'), 'Europe/Amsterdam') from numbers(1); -- { serverError ILLEGAL_COLUMN } +SELECT toStartOfInterval(toDateTime('2023-01-02 14:45:50'), toIntervalYear(1), 'Europe/Amsterdam', 'a'); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT toStartOfInterval(toDateTime('2023-01-02 14:45:50'), toIntervalYear(1), toDateTime('2020-01-02 14:44:30'), 'Europe/Amsterdam', 5); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +SELECT toStartOfInterval(toDateTime('2023-01-02 14:45:50'), toIntervalYear(1), 5, 'Europe/Amsterdam'); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT toStartOfInterval(toDateTime('2023-01-02 14:45:50'), toIntervalHour(1), materialize(toDateTime('2023-01-02 14:44:30')), 'Europe/Amsterdam'); -- { serverError ILLEGAL_COLUMN } From 8cd4d35ead1dc7ea5fff3a794522ffb5dbc75726 Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Wed, 29 Nov 2023 17:35:33 +0000 Subject: [PATCH 008/844] fixed review --- src/Functions/toStartOfInterval.cpp | 26 ++++++++------ ..._to_start_of_interva_with_origin.reference | 35 ++++++++++++++----- ...02916_to_start_of_interval_with_origin.sql | 18 +++++----- 3 files changed, 51 insertions(+), 28 deletions(-) diff --git a/src/Functions/toStartOfInterval.cpp b/src/Functions/toStartOfInterval.cpp index fd472d453b5..c4f60d6d6db 100644 --- a/src/Functions/toStartOfInterval.cpp +++ b/src/Functions/toStartOfInterval.cpp @@ -1,6 +1,8 @@ #include +#include "Common/IntervalKind.h" #include #include +#include "base/types.h" #include #include #include @@ -220,7 +222,7 @@ private: if (isDateTime64(time_column_type)) { - if (!isDateTime64(origin_column.type.get()) && origin_column.column != nullptr) + if (origin_column.column != nullptr && !isDateTime64(origin_column.type.get())) throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Datetime argument and origin argument for function {} must have the same type", getName()); const auto * time_column_vec = checkAndGetColumn(time_column_col); @@ -231,9 +233,8 @@ private: } else if (isDateTime(time_column_type)) { - if (origin_column.column != nullptr) - if (!isDateTime(origin_column.type.get())) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Datetime argument and origin argument for function {} must have the same type", getName()); + if (origin_column.column != nullptr && !isDateTime(origin_column.type.get())) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Datetime argument and origin argument for function {} must have the same type", getName()); const auto * time_column_vec = checkAndGetColumn(time_column_col); if (time_column_vec) @@ -241,9 +242,8 @@ private: } else if (isDate(time_column_type)) { - if (origin_column.column != nullptr) - if (!isDate(origin_column.type.get())) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Datetime argument and origin argument for function {} must have the same type", getName()); + if (origin_column.column != nullptr && !isDate(origin_column.type.get())) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Datetime argument and origin argument for function {} must have the same type", getName()); const auto * time_column_vec = checkAndGetColumn(time_column_col); if (time_column_vec) @@ -330,19 +330,23 @@ private: else { UInt64 origin = origin_column.column->get64(0); - std::cerr << "origin: " << origin << std::endl; - std::cerr << "scale_multiplier: " << scale_multiplier << std::endl; for (size_t i = 0; i != size; ++i) { auto td = time_data[i]; + result_data[i] = 0; if (origin > size_t(td)) throw Exception(ErrorCodes::BAD_ARGUMENTS, "The origin must be before the end date/datetime"); + td -= origin; - result_data[i] = static_cast(ToStartOfInterval::execute(td, num_units, time_zone, scale_multiplier)); + auto res = static_cast(ToStartOfInterval::execute(td, num_units, time_zone, scale_multiplier)); + if (!(unit == IntervalKind::Millisecond || unit == IntervalKind::Microsecond || unit == IntervalKind::Nanosecond) && scale_multiplier != 10) origin = origin / scale_multiplier; - result_data[i] += origin; + if (unit == IntervalKind::Week || unit == IntervalKind::Month || unit == IntervalKind::Quarter || unit == IntervalKind::Year) + result_data[i] = UInt16(origin/86400 + res); + else + result_data[i] += origin + res; } } diff --git a/tests/queries/0_stateless/02916_to_start_of_interva_with_origin.reference b/tests/queries/0_stateless/02916_to_start_of_interva_with_origin.reference index 7213925fb64..78e3b117cb8 100644 --- a/tests/queries/0_stateless/02916_to_start_of_interva_with_origin.reference +++ b/tests/queries/0_stateless/02916_to_start_of_interva_with_origin.reference @@ -1,15 +1,34 @@ +2023-01-02 14:45:50.917120700 +2023-01-02 14:45:50.910600 +2023-01-02 14:45:50.901 +2023-01-02 14:45:50 +2023-01-02 14:45:30 +2023-01-02 14:44:30 +2023-01-02 14:44:30 +2023-01-06 +2023-03-02 +2023-07-02 +2022-01-02 +2023-01-02 15:45:50.917120700 +2023-01-02 15:45:50.910600 +2023-01-02 15:45:50.901 +2023-01-02 15:45:50 +2023-01-02 15:45:30 +2023-01-02 15:44:30 +2023-01-03 14:44:30 +2023-01-06 +2023-03-02 +2023-07-02 +2022-01-02 2023-03-01 16:55:00 -2023-02-01 16:55:00 -2023-03-01 16:55:00 -2023-02-01 16:55:00 -2023-03-01 16:55:00 +2023-02-01 16:54:33 +2023-03-01 16:54:38 +2023-02-01 16:54:52 +2023-03-01 16:54:16 2023-03-01 16:55:00 2023-02-01 16:55:00 2023-03-01 16:55:00 2023-02-01 16:55:00 2023-03-01 16:55:00 2023-01-02 15:44:30 -2023-01-02 15:44:30 -2023-01-02 14:45:30 -2023-01-02 14:45:30 -2023-01-02 +2023-02-01 16:44:30 diff --git a/tests/queries/0_stateless/02916_to_start_of_interval_with_origin.sql b/tests/queries/0_stateless/02916_to_start_of_interval_with_origin.sql index 5f5f941047a..390b36a4cf1 100644 --- a/tests/queries/0_stateless/02916_to_start_of_interval_with_origin.sql +++ b/tests/queries/0_stateless/02916_to_start_of_interval_with_origin.sql @@ -7,10 +7,10 @@ SELECT toStartOfInterval(toDateTime('2023-01-02 14:45:50'), toIntervalSecond(2), SELECT toStartOfInterval(toDateTime('2023-01-02 14:45:50'), toIntervalMinute(1), toDateTime('2023-01-02 14:44:30')); SELECT toStartOfInterval(toDateTime64('2023-01-02 14:45:50', 2), toIntervalHour(1), toDateTime64('2023-01-02 14:44:30', 2)); SELECT toStartOfInterval(toDateTime('2023-01-02 14:45:50'), toIntervalDay(1), toDateTime('2023-01-02 14:44:30')); -SELECT toStartOfInterval(toDateTime('2023-01-02 14:45:50'), toIntervalWeek(1), toDateTime('2023-01-02 14:44:30')); -SELECT toStartOfInterval(toDateTime('2023-01-02 14:45:50'), toIntervalMonth(1), toDateTime('2022-01-02 14:44:30')); -SELECT toStartOfInterval(toDateTime('2023-01-02 14:45:50'), toIntervalQuarter(1), toDateTime('2022-01-02 14:44:30')); -SELECT toStartOfInterval(toDateTime('2023-01-02 14:45:50'), toIntervalYear(1), toDateTime('2020-01-02 14:44:30')); +SELECT toStartOfInterval(toDateTime('2023-01-08 14:45:50'), toIntervalWeek(1), toDateTime('2023-01-02 14:44:30')); +SELECT toStartOfInterval(toDateTime('2023-03-03 14:45:50'), toIntervalMonth(1), toDateTime('2022-01-02 14:44:30')); +SELECT toStartOfInterval(toDateTime('2023-08-02 14:45:50'), toIntervalQuarter(1), toDateTime('2022-01-02 14:44:30')); +SELECT toStartOfInterval(toDateTime('2023-01-02 14:45:50'), toIntervalYear(1), toDateTime('2020-01-03 14:44:30')); SELECT toStartOfInterval(toDateTime64('2023-01-02 14:45:50.917122341', 9), toIntervalNanosecond(10000), toDateTime64('2023-01-02 14:44:30.500600700', 9), 'Europe/Amsterdam'); SELECT toStartOfInterval(toDateTime64('2023-01-02 14:45:50.917122', 6), toIntervalMicrosecond(10000), toDateTime64('2023-01-02 14:44:30.500600', 6), 'Europe/Amsterdam'); @@ -19,15 +19,15 @@ SELECT toStartOfInterval(toDateTime('2023-01-02 14:45:50'), toIntervalSecond(2), SELECT toStartOfInterval(toDateTime('2023-01-02 14:45:50'), toIntervalMinute(1), toDateTime('2023-01-02 14:44:30'), 'Europe/Amsterdam'); SELECT toStartOfInterval(toDateTime64('2023-01-02 14:45:50', 2), toIntervalHour(1), toDateTime64('2023-01-02 14:44:30', 2), 'Europe/Amsterdam'); SELECT toStartOfInterval(toDateTime('2023-01-02 14:45:50'), toIntervalDay(1), toDateTime('2023-01-02 14:44:30'), 'Europe/Amsterdam'); -SELECT toStartOfInterval(toDateTime('2023-01-02 14:45:50'), toIntervalWeek(1), toDateTime('2023-01-02 14:44:30'), 'Europe/Amsterdam'); -SELECT toStartOfInterval(toDateTime('2023-01-02 14:45:50'), toIntervalMonth(1), toDateTime('2022-01-02 14:44:30'), 'Europe/Amsterdam'); -SELECT toStartOfInterval(toDateTime('2023-01-02 14:45:50'), toIntervalQuarter(1), toDateTime('2022-01-02 14:44:30'), 'Europe/Amsterdam'); -SELECT toStartOfInterval(toDateTime('2023-01-02 14:45:50'), toIntervalYear(1), toDateTime('2020-01-02 14:44:30'), 'Europe/Amsterdam'); +SELECT toStartOfInterval(toDateTime('2023-01-08 14:45:50'), toIntervalWeek(1), toDateTime('2023-01-02 14:44:30'), 'Europe/Amsterdam'); +SELECT toStartOfInterval(toDateTime('2023-03-03 14:45:50'), toIntervalMonth(1), toDateTime('2022-01-02 14:44:30'), 'Europe/Amsterdam'); +SELECT toStartOfInterval(toDateTime('2023-08-02 14:45:50'), toIntervalQuarter(1), toDateTime('2022-01-02 14:44:30'), 'Europe/Amsterdam'); +SELECT toStartOfInterval(toDateTime('2023-01-02 14:45:50'), toIntervalYear(1), toDateTime('2020-01-03 14:44:30'), 'Europe/Amsterdam'); SELECT toStartOfInterval(number % 2 == 0 ? toDateTime64('2023-03-01 15:55:00', 2) : toDateTime64('2023-02-01 15:55:00', 2), toIntervalMinute(1), toDateTime64('2023-01-01 13:55:00', 2), 'Europe/Amsterdam') from numbers(5); SELECT toStartOfInterval(number % 2 == 0 ? toDateTime('2023-03-01 15:55:00') : toDateTime('2023-02-01 15:55:00'), toIntervalHour(1), toDateTime('2023-01-01 13:55:00'), 'Europe/Amsterdam') from numbers(5); SELECT toStartOfInterval(materialize(toDateTime('2023-01-02 14:45:50')), toIntervalHour(1), toDateTime('2023-01-02 14:44:30'), 'Europe/Amsterdam'); -SELECT toStartOfInterval(materialize(toDateTime64('2023-02-01 15:45:50', 2)), toIntervalHour(1), toDateTime('2023-01-02 14:44:30'), 'Europe/Amsterdam'); +SELECT toStartOfInterval(materialize(toDateTime64('2023-02-01 15:45:50', 2)), toIntervalHour(1), toDateTime64('2023-01-02 14:44:30', 2), 'Europe/Amsterdam'); SELECT toStartOfInterval(toDate('2023-01-02 14:45:50'), toIntervalSecond(5), toDate32('2023-01-02 14:44:30')); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } SELECT toStartOfInterval(toDate('2023-01-02 14:45:50'), toIntervalMillisecond(12), toDateTime('2023-01-02 14:44:30')); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } From b298bed1cfdb67e21da89e2b8a4bb4affc44f14b Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Wed, 29 Nov 2023 23:08:19 +0100 Subject: [PATCH 009/844] Rename 02916_to_start_of_interva_with_origin.reference to 02916_to_start_of_interval_with_origin.reference --- ...reference => 02916_to_start_of_interval_with_origin.reference} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename tests/queries/0_stateless/{02916_to_start_of_interva_with_origin.reference => 02916_to_start_of_interval_with_origin.reference} (100%) diff --git a/tests/queries/0_stateless/02916_to_start_of_interva_with_origin.reference b/tests/queries/0_stateless/02916_to_start_of_interval_with_origin.reference similarity index 100% rename from tests/queries/0_stateless/02916_to_start_of_interva_with_origin.reference rename to tests/queries/0_stateless/02916_to_start_of_interval_with_origin.reference From 450f609227fe0cedefbf2473a1a2c4b4e7b65d5f Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Sat, 2 Dec 2023 22:58:52 +0000 Subject: [PATCH 010/844] Various updates --- src/Functions/toStartOfInterval.cpp | 29 +++-- ...to_start_of_interval_with_origin.reference | 74 +++++++------ ...02916_to_start_of_interval_with_origin.sql | 103 ++++++++++++------ 3 files changed, 128 insertions(+), 78 deletions(-) diff --git a/src/Functions/toStartOfInterval.cpp b/src/Functions/toStartOfInterval.cpp index c4f60d6d6db..ec0deee8abd 100644 --- a/src/Functions/toStartOfInterval.cpp +++ b/src/Functions/toStartOfInterval.cpp @@ -1,21 +1,21 @@ -#include #include "Common/IntervalKind.h" -#include -#include -#include "base/types.h" -#include -#include #include #include +#include +#include #include #include #include #include #include +#include #include #include +#include #include #include +#include +#include namespace DB @@ -325,26 +325,31 @@ private: Int64 scale_multiplier = DecimalUtils::scaleMultiplier(scale); if (origin_column.column == nullptr) + { for (size_t i = 0; i != size; ++i) result_data[i] = static_cast(ToStartOfInterval::execute(time_data[i], num_units, time_zone, scale_multiplier)); + } else { UInt64 origin = origin_column.column->get64(0); for (size_t i = 0; i != size; ++i) { - auto td = time_data[i]; - result_data[i] = 0; - if (origin > size_t(td)) + auto t = time_data[i]; + if (origin > static_cast(t)) throw Exception(ErrorCodes::BAD_ARGUMENTS, "The origin must be before the end date/datetime"); - td -= origin; - auto res = static_cast(ToStartOfInterval::execute(td, num_units, time_zone, scale_multiplier)); + t -= origin; + auto res = static_cast(ToStartOfInterval::execute(t, num_units, time_zone, scale_multiplier)); if (!(unit == IntervalKind::Millisecond || unit == IntervalKind::Microsecond || unit == IntervalKind::Nanosecond) && scale_multiplier != 10) origin = origin / scale_multiplier; + + static constexpr size_t SECONDS_PER_DAY = 86400; + + result_data[i] = 0; if (unit == IntervalKind::Week || unit == IntervalKind::Month || unit == IntervalKind::Quarter || unit == IntervalKind::Year) - result_data[i] = UInt16(origin/86400 + res); + result_data[i] = static_cast(origin/SECONDS_PER_DAY + res); else result_data[i] += origin + res; } diff --git a/tests/queries/0_stateless/02916_to_start_of_interval_with_origin.reference b/tests/queries/0_stateless/02916_to_start_of_interval_with_origin.reference index 78e3b117cb8..3574da8f685 100644 --- a/tests/queries/0_stateless/02916_to_start_of_interval_with_origin.reference +++ b/tests/queries/0_stateless/02916_to_start_of_interval_with_origin.reference @@ -1,34 +1,40 @@ -2023-01-02 14:45:50.917120700 -2023-01-02 14:45:50.910600 -2023-01-02 14:45:50.901 -2023-01-02 14:45:50 -2023-01-02 14:45:30 -2023-01-02 14:44:30 -2023-01-02 14:44:30 -2023-01-06 -2023-03-02 -2023-07-02 -2022-01-02 -2023-01-02 15:45:50.917120700 -2023-01-02 15:45:50.910600 -2023-01-02 15:45:50.901 -2023-01-02 15:45:50 -2023-01-02 15:45:30 -2023-01-02 15:44:30 -2023-01-03 14:44:30 -2023-01-06 -2023-03-02 -2023-07-02 -2022-01-02 -2023-03-01 16:55:00 -2023-02-01 16:54:33 -2023-03-01 16:54:38 -2023-02-01 16:54:52 -2023-03-01 16:54:16 -2023-03-01 16:55:00 -2023-02-01 16:55:00 -2023-03-01 16:55:00 -2023-02-01 16:55:00 -2023-03-01 16:55:00 -2023-01-02 15:44:30 -2023-02-01 16:44:30 +-- Negative tests +Time and origin as Time +1971-01-01 +1971-07-01 +1970-02-01 +1970-01-05 +1970-01-02 05:27:18 +Time and origin as DateTime +2023-02-01 +2023-08-01 +2023-10-09 +2023-10-05 +2023-10-09 09:08:07 +2023-10-09 10:10:07 +2023-10-09 10:11:11 +2023-10-09 10:11:12 +Time and origin as DateTime64(9) +2023-02-01 +2023-08-01 +2023-10-09 +2023-10-05 +2023-10-09 09:08:07 +2023-10-09 10:10:07 +2023-10-09 10:11:11 +2023-10-09 10:11:12 +2299-12-31 23:57:37.653 +2299-12-31 23:42:04.320986 +2023-10-09 10:11:12.987654321 +Time and origin as DateTime64(3) +2023-02-01 +2023-08-01 +2023-10-09 +2023-10-05 +2023-10-09 09:08:07 +2023-10-09 10:10:07 +2023-10-09 10:11:11 +2023-10-09 10:11:12 +2023-10-09 10:11:12.987 +1970-01-20 15:20:47.136123 +1970-01-01 00:28:17.710272123 diff --git a/tests/queries/0_stateless/02916_to_start_of_interval_with_origin.sql b/tests/queries/0_stateless/02916_to_start_of_interval_with_origin.sql index 390b36a4cf1..864ef56e7ea 100644 --- a/tests/queries/0_stateless/02916_to_start_of_interval_with_origin.sql +++ b/tests/queries/0_stateless/02916_to_start_of_interval_with_origin.sql @@ -1,42 +1,81 @@ set session_timezone = 'UTC'; -SELECT toStartOfInterval(toDateTime64('2023-01-02 14:45:50.917122341', 9), toIntervalNanosecond(10000), toDateTime64('2023-01-02 14:44:30.500600700', 9)); -SELECT toStartOfInterval(toDateTime64('2023-01-02 14:45:50.917122', 6), toIntervalMicrosecond(10000), toDateTime64('2023-01-02 14:44:30.500600', 6)); -SELECT toStartOfInterval(toDateTime64('2023-01-02 14:45:50.91', 3), toIntervalMillisecond(100), toDateTime64('2023-01-02 14:44:30.501', 3)); -SELECT toStartOfInterval(toDateTime('2023-01-02 14:45:50'), toIntervalSecond(2), toDateTime('2023-01-02 14:44:30')); -SELECT toStartOfInterval(toDateTime('2023-01-02 14:45:50'), toIntervalMinute(1), toDateTime('2023-01-02 14:44:30')); -SELECT toStartOfInterval(toDateTime64('2023-01-02 14:45:50', 2), toIntervalHour(1), toDateTime64('2023-01-02 14:44:30', 2)); -SELECT toStartOfInterval(toDateTime('2023-01-02 14:45:50'), toIntervalDay(1), toDateTime('2023-01-02 14:44:30')); -SELECT toStartOfInterval(toDateTime('2023-01-08 14:45:50'), toIntervalWeek(1), toDateTime('2023-01-02 14:44:30')); -SELECT toStartOfInterval(toDateTime('2023-03-03 14:45:50'), toIntervalMonth(1), toDateTime('2022-01-02 14:44:30')); -SELECT toStartOfInterval(toDateTime('2023-08-02 14:45:50'), toIntervalQuarter(1), toDateTime('2022-01-02 14:44:30')); -SELECT toStartOfInterval(toDateTime('2023-01-02 14:45:50'), toIntervalYear(1), toDateTime('2020-01-03 14:44:30')); - -SELECT toStartOfInterval(toDateTime64('2023-01-02 14:45:50.917122341', 9), toIntervalNanosecond(10000), toDateTime64('2023-01-02 14:44:30.500600700', 9), 'Europe/Amsterdam'); -SELECT toStartOfInterval(toDateTime64('2023-01-02 14:45:50.917122', 6), toIntervalMicrosecond(10000), toDateTime64('2023-01-02 14:44:30.500600', 6), 'Europe/Amsterdam'); -SELECT toStartOfInterval(toDateTime64('2023-01-02 14:45:50.91', 3), toIntervalMillisecond(100), toDateTime64('2023-01-02 14:44:30.501', 3), 'Europe/Amsterdam'); -SELECT toStartOfInterval(toDateTime('2023-01-02 14:45:50'), toIntervalSecond(2), toDateTime('2023-01-02 14:44:30'), 'Europe/Amsterdam'); -SELECT toStartOfInterval(toDateTime('2023-01-02 14:45:50'), toIntervalMinute(1), toDateTime('2023-01-02 14:44:30'), 'Europe/Amsterdam'); -SELECT toStartOfInterval(toDateTime64('2023-01-02 14:45:50', 2), toIntervalHour(1), toDateTime64('2023-01-02 14:44:30', 2), 'Europe/Amsterdam'); -SELECT toStartOfInterval(toDateTime('2023-01-02 14:45:50'), toIntervalDay(1), toDateTime('2023-01-02 14:44:30'), 'Europe/Amsterdam'); -SELECT toStartOfInterval(toDateTime('2023-01-08 14:45:50'), toIntervalWeek(1), toDateTime('2023-01-02 14:44:30'), 'Europe/Amsterdam'); -SELECT toStartOfInterval(toDateTime('2023-03-03 14:45:50'), toIntervalMonth(1), toDateTime('2022-01-02 14:44:30'), 'Europe/Amsterdam'); -SELECT toStartOfInterval(toDateTime('2023-08-02 14:45:50'), toIntervalQuarter(1), toDateTime('2022-01-02 14:44:30'), 'Europe/Amsterdam'); -SELECT toStartOfInterval(toDateTime('2023-01-02 14:45:50'), toIntervalYear(1), toDateTime('2020-01-03 14:44:30'), 'Europe/Amsterdam'); - -SELECT toStartOfInterval(number % 2 == 0 ? toDateTime64('2023-03-01 15:55:00', 2) : toDateTime64('2023-02-01 15:55:00', 2), toIntervalMinute(1), toDateTime64('2023-01-01 13:55:00', 2), 'Europe/Amsterdam') from numbers(5); -SELECT toStartOfInterval(number % 2 == 0 ? toDateTime('2023-03-01 15:55:00') : toDateTime('2023-02-01 15:55:00'), toIntervalHour(1), toDateTime('2023-01-01 13:55:00'), 'Europe/Amsterdam') from numbers(5); -SELECT toStartOfInterval(materialize(toDateTime('2023-01-02 14:45:50')), toIntervalHour(1), toDateTime('2023-01-02 14:44:30'), 'Europe/Amsterdam'); -SELECT toStartOfInterval(materialize(toDateTime64('2023-02-01 15:45:50', 2)), toIntervalHour(1), toDateTime64('2023-01-02 14:44:30', 2), 'Europe/Amsterdam'); +SELECT '-- Negative tests'; +-- time and origin arguments must have the same type SELECT toStartOfInterval(toDate('2023-01-02 14:45:50'), toIntervalSecond(5), toDate32('2023-01-02 14:44:30')); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } SELECT toStartOfInterval(toDate('2023-01-02 14:45:50'), toIntervalMillisecond(12), toDateTime('2023-01-02 14:44:30')); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } SELECT toStartOfInterval(toDate32('2023-01-02 14:45:50'), toIntervalHour(5), toDate('2023-01-02 14:44:30')); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } SELECT toStartOfInterval(toDateTime('2023-01-02 14:45:50'), toIntervalMinute(1), toDateTime64('2023-01-02 14:44:30', 2)); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } SELECT toStartOfInterval(toDateTime64('2023-01-02 14:45:50', 2), toIntervalMinute(1), toDate('2023-01-02 14:44:30')); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } + +-- the origin must be before the time SELECT toStartOfInterval(toDateTime('2023-01-02 14:42:50'), toIntervalMinute(1), toDateTime('2023-01-02 14:44:30')); -- { serverError BAD_ARGUMENTS } -SELECT toStartOfInterval(toDateTime('2023-01-02 14:45:50'), toIntervalMinute(1), number % 2 == 0 ? toDateTime('2023-02-01 15:55:00') : toDateTime('2023-01-01 15:55:00'), 'Europe/Amsterdam') from numbers(1); -- { serverError ILLEGAL_COLUMN } -SELECT toStartOfInterval(toDateTime('2023-01-02 14:45:50'), toIntervalYear(1), 'Europe/Amsterdam', 'a'); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } -SELECT toStartOfInterval(toDateTime('2023-01-02 14:45:50'), toIntervalYear(1), toDateTime('2020-01-02 14:44:30'), 'Europe/Amsterdam', 5); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } -SELECT toStartOfInterval(toDateTime('2023-01-02 14:45:50'), toIntervalYear(1), 5, 'Europe/Amsterdam'); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } + +-- the origin must be constant +SELECT toStartOfInterval(toDateTime('2023-01-02 14:45:50'), toIntervalMinute(1), number % 2 == 0 ? toDateTime('2023-02-01 15:55:00') : toDateTime('2023-01-01 15:55:00')) from numbers(1); -- { serverError ILLEGAL_COLUMN } SELECT toStartOfInterval(toDateTime('2023-01-02 14:45:50'), toIntervalHour(1), materialize(toDateTime('2023-01-02 14:44:30')), 'Europe/Amsterdam'); -- { serverError ILLEGAL_COLUMN } + +-- with 4 arguments, the 3rd one must not be a string or an integer +SELECT toStartOfInterval(toDateTime('2023-01-02 14:45:50'), toIntervalYear(1), 'Europe/Amsterdam', 'Europe/Amsterdam'); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT toStartOfInterval(toDateTime('2023-01-02 14:45:50'), toIntervalYear(1), 5, 'Europe/Amsterdam'); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } + +-- too many arguments +SELECT toStartOfInterval(toDateTime('2023-01-02 14:45:50'), toIntervalYear(1), toDateTime('2020-01-02 14:44:30'), 'Europe/Amsterdam', 5); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } + +SELECT 'Time and origin as Time'; +SELECT toStartOfInterval(toDate('2023-10-09'), toIntervalYear(1), toDate('2022-02-01')); -- broken +SELECT toStartOfInterval(toDate('2023-10-09'), toIntervalQuarter(1), toDate('2022-02-01')); -- broken +SELECT toStartOfInterval(toDate('2023-10-09'), toIntervalMonth(1), toDate('2023-09-08')); -- broken +SELECT toStartOfInterval(toDate('2023-10-09'), toIntervalWeek(1), toDate('2023-10-01')); -- broken +SELECT toStartOfInterval(toDate('2023-10-09'), toIntervalDay(1), toDate('2023-10-08')); -- broken +SELECT toStartOfInterval(toDate('2023-10-09'), toIntervalHour(1), toDate('2023-10-09')); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT toStartOfInterval(toDate('2023-10-09'), toIntervalMinute(1), toDate('2023-10-09')); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT toStartOfInterval(toDate('2023-10-09'), toIntervalSecond(1), toDate('2023-10-09')); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT toStartOfInterval(toDate('2023-10-09'), toIntervalMillisecond(1), toDate('2023-10-09')); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT toStartOfInterval(toDate('2023-10-09'), toIntervalMicrosecond(1), toDate('2023-10-09')); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT toStartOfInterval(toDate('2023-10-09'), toIntervalNanosecond(1), toDate('2023-10-09')); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } + +SELECT 'Time and origin as DateTime'; +SELECT toStartOfInterval(toDateTime('2023-10-09 10:11:12'), toIntervalYear(1), toDateTime('2022-02-01 09:08:07')); -- broken, should that not return 2023-02-01 09:08:07? +SELECT toStartOfInterval(toDateTime('2023-10-09 10:11:12'), toIntervalQuarter(1), toDateTime('2022-02-01 09:08:07')); -- broken, should that not return 2023-08-01 09:08:07? +SELECT toStartOfInterval(toDateTime('2023-10-09 10:11:12'), toIntervalMonth(1), toDateTime('2023-09-08 09:08:07')); -- broken, should that not return 2023-10-09 09:08:07? +SELECT toStartOfInterval(toDateTime('2023-10-09 10:11:12'), toIntervalWeek(1), toDateTime('2023-10-01 09:08:07')); -- broken, should that not return 2023-10-05 09:08:07? +SELECT toStartOfInterval(toDateTime('2023-10-09 10:11:12'), toIntervalDay(1), toDateTime('2023-10-08 09:08:07')); +SELECT toStartOfInterval(toDateTime('2023-10-09 10:11:12'), toIntervalHour(1), toDateTime('2023-10-09 09:10:07')); +SELECT toStartOfInterval(toDateTime('2023-10-09 10:11:12'), toIntervalMinute(1), toDateTime('2023-10-09 10:10:11')); +SELECT toStartOfInterval(toDateTime('2023-10-09 10:11:12'), toIntervalSecond(1), toDateTime('2023-10-09 10:11:10')); +SELECT toStartOfInterval(toDateTime('2023-10-09 10:11:12'), toIntervalMillisecond(1), toDateTime('2023-10-09 10:11:12')); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT toStartOfInterval(toDateTime('2023-10-09 10:11:12'), toIntervalMicrosecond(1), toDateTime('2023-10-09 10:11:12')); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT toStartOfInterval(toDateTime('2023-10-09 10:11:12'), toIntervalNanosecond(1), toDateTime('2023-10-09 10:11:12')); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } + +SELECT 'Time and origin as DateTime64(9)'; +SELECT toStartOfInterval(toDateTime64('2023-10-09 10:11:12.987654321', 9), toIntervalYear(1), toDateTime64('2022-02-01 09:08:07.123456789', 9)); -- broken, see above +SELECT toStartOfInterval(toDateTime64('2023-10-09 10:11:12.987654321', 9), toIntervalQuarter(1), toDateTime64('2022-02-01 09:08:07.123456789', 9)); -- broken, see above +SELECT toStartOfInterval(toDateTime64('2023-10-09 10:11:12.987654321', 9), toIntervalMonth(1), toDateTime64('2023-09-08 09:08:07.123456789', 9)); -- broken, see above +SELECT toStartOfInterval(toDateTime64('2023-10-09 10:11:12.987654321', 9), toIntervalWeek(1), toDateTime64('2023-10-01 09:08:07.123456789', 9)); -- broken, see above +SELECT toStartOfInterval(toDateTime64('2023-10-09 10:11:12.987654321', 9), toIntervalDay(1), toDateTime64('2023-10-08 09:08:07.123456789', 9)); +SELECT toStartOfInterval(toDateTime64('2023-10-09 10:11:12.987654321', 9), toIntervalHour(1), toDateTime64('2023-10-09 09:10:07.123456789', 9)); +SELECT toStartOfInterval(toDateTime64('2023-10-09 10:11:12.987654321', 9), toIntervalMinute(1), toDateTime64('2023-10-09 10:10:11.123456789', 9)); +SELECT toStartOfInterval(toDateTime64('2023-10-09 10:11:12.987654321', 9), toIntervalSecond(1), toDateTime64('2023-10-09 10:11:10.123456789', 9)); +SELECT toStartOfInterval(toDateTime64('2023-10-09 10:11:12.987654321', 9), toIntervalMillisecond(1), toDateTime64('2023-10-09 10:11:12.123456789', 9)); -- broken (2299) +SELECT toStartOfInterval(toDateTime64('2023-10-09 10:11:12.987654321', 9), toIntervalMicrosecond(1), toDateTime64('2023-10-09 10:11:12.123456789', 9)); -- broken (2299) +SELECT toStartOfInterval(toDateTime64('2023-10-09 10:11:12.987654321', 9), toIntervalNanosecond(1), toDateTime64('2023-10-09 10:11:12.123456789', 9)); -- broken (2299) + +SELECT 'Time and origin as DateTime64(3)'; +SELECT toStartOfInterval(toDateTime64('2023-10-09 10:11:12.987', 3), toIntervalYear(1), toDateTime64('2022-02-01 09:08:07.123', 3)); -- broken, see above +SELECT toStartOfInterval(toDateTime64('2023-10-09 10:11:12.987', 3), toIntervalQuarter(1), toDateTime64('2022-02-01 09:08:07.123', 3)); -- broken, see above +SELECT toStartOfInterval(toDateTime64('2023-10-09 10:11:12.987', 3), toIntervalMonth(1), toDateTime64('2023-09-08 09:08:07.123', 3)); -- broken, see above +SELECT toStartOfInterval(toDateTime64('2023-10-09 10:11:12.987', 3), toIntervalWeek(1), toDateTime64('2023-10-01 09:08:07.123', 3)); -- broken, see above +SELECT toStartOfInterval(toDateTime64('2023-10-09 10:11:12.987', 3), toIntervalDay(1), toDateTime64('2023-10-08 09:08:07.123', 3)); +SELECT toStartOfInterval(toDateTime64('2023-10-09 10:11:12.987', 3), toIntervalHour(1), toDateTime64('2023-10-09 09:10:07.123', 3)); +SELECT toStartOfInterval(toDateTime64('2023-10-09 10:11:12.987', 3), toIntervalMinute(1), toDateTime64('2023-10-09 10:10:11.123', 3)); +SELECT toStartOfInterval(toDateTime64('2023-10-09 10:11:12.987', 3), toIntervalSecond(1), toDateTime64('2023-10-09 10:11:10.123', 3)); +SELECT toStartOfInterval(toDateTime64('2023-10-09 10:11:12.987', 3), toIntervalMillisecond(1), toDateTime64('2023-10-09 10:11:12.123', 3)); +SELECT toStartOfInterval(toDateTime64('2023-10-09 10:11:12.987', 3), toIntervalMicrosecond(1), toDateTime64('2023-10-09 10:11:12.123', 3)); -- broken (1970) +SELECT toStartOfInterval(toDateTime64('2023-10-09 10:11:12.987', 3), toIntervalNanosecond(1), toDateTime64('2023-10-09 10:11:12.123', 3)); -- broken (1970) + +-- SELECT toStartOfInterval(number % 2 == 0 ? toDateTime64('2023-03-01 15:55:00', 2) : toDateTime64('2023-02-01 15:55:00', 2), toIntervalMinute(1), toDateTime64('2023-01-01 13:55:00', 2), 'Europe/Amsterdam') from numbers(5); +-- SELECT toStartOfInterval(number % 2 == 0 ? toDateTime('2023-03-01 15:55:00') : toDateTime('2023-02-01 15:55:00'), toIntervalHour(1), toDateTime('2023-01-01 13:55:00'), 'Europe/Amsterdam') from numbers(5); +-- SELECT toStartOfInterval(materialize(toDateTime('2023-01-02 14:45:50')), toIntervalHour(1), toDateTime('2023-01-02 14:44:30'), 'Europe/Amsterdam'); +-- SELECT toStartOfInterval(materialize(toDateTime64('2023-02-01 15:45:50', 2)), toIntervalHour(1), toDateTime64('2023-01-02 14:44:30', 2), 'Europe/Amsterdam'); From 01036a21f59925b78f45415cacb1953481292e31 Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Wed, 6 Dec 2023 17:44:40 +0100 Subject: [PATCH 011/844] Fixed bugs regarding precision --- src/Functions/toStartOfInterval.cpp | 124 +++++++++++++++++++++------- 1 file changed, 95 insertions(+), 29 deletions(-) diff --git a/src/Functions/toStartOfInterval.cpp b/src/Functions/toStartOfInterval.cpp index ec0deee8abd..da4eba9a594 100644 --- a/src/Functions/toStartOfInterval.cpp +++ b/src/Functions/toStartOfInterval.cpp @@ -1,8 +1,10 @@ -#include "Common/IntervalKind.h" +#include +#include #include #include #include #include +#include "DataTypes/IDataType.h" #include #include #include @@ -14,7 +16,6 @@ #include #include #include -#include #include @@ -116,9 +117,22 @@ public: throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "The timezone argument of function {} with interval type {} is allowed only when the 1st argument has the type DateTime or DateTime64", getName(), interval_type->getKind().toString()); + + if (arguments[0].type.get() != arguments[2].type.get()) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Datetime argument and origin argument for function {} must have the same type", getName()); } else if (isDateOrDate32OrDateTimeOrDateTime64(type_arg3)) + { third_argument = ThirdArgument::IsOrigin; + if (isDateTime64(arguments[0].type) && isDateTime64(arguments[2].type)) + result_type = ResultType::DateTime64; + else if (isDateTime(arguments[0].type) && isDateTime(arguments[2].type)) + result_type = ResultType::DateTime; + else if ((isDate(arguments[0].type) || isDate32(arguments[0].type)) && (isDate(arguments[2].type) || isDate32(arguments[2].type))) + result_type = ResultType::Date; + else + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Datetime argument and origin argument for function {} must have the same type", getName()); + } else throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of 3rd argument of function {}. " "This argument is optional and must be a constant String with timezone name or a Date/Date32/DateTime/DateTime64 with a constant origin", @@ -180,13 +194,14 @@ public: } case ResultType::DateTime64: { - UInt32 scale = 0; + auto scale_date_time = assert_cast(*arguments[0].type.get()).getScale(); + UInt32 scale = scale_date_time; if (interval_type->getKind() == IntervalKind::Nanosecond) - scale = 9; + scale = 9 > scale_date_time ? 9 : scale_date_time; else if (interval_type->getKind() == IntervalKind::Microsecond) - scale = 6; + scale = 6 > scale_date_time ? 6 : scale_date_time; else if (interval_type->getKind() == IntervalKind::Millisecond) - scale = 3; + scale = 3 > scale_date_time ? 3 : scale_date_time; const size_t time_zone_arg_num = (arguments.size() == 2 || (arguments.size() == 3 && third_argument == ThirdArgument::IsTimezone)) ? 2 : 3; return std::make_shared(scale, extractTimeZoneNameFromFunctionArguments(arguments, time_zone_arg_num, 0, false)); @@ -209,11 +224,19 @@ public: const size_t time_zone_arg_num = (arguments.size() == 2 || (arguments.size() == 3 && isString(arguments[2].type))) ? 2 : 3; const auto & time_zone = extractTimeZoneFromFunctionArguments(arguments, time_zone_arg_num, 0); - auto result_column = dispatchForTimeColumn(time_column, interval_column, origin_column, result_type, time_zone); + ColumnPtr result_column = nullptr; + if (isDateTime64(result_type)) + result_column = dispatchForTimeColumn(time_column, interval_column, origin_column, result_type, time_zone); + else if (isDateTime(result_type)) + result_column = dispatchForTimeColumn(time_column, interval_column, origin_column, result_type, time_zone); + else + result_column = dispatchForTimeColumn(time_column, interval_column, origin_column, result_type, time_zone); + return result_column; } private: + template ColumnPtr dispatchForTimeColumn( const ColumnWithTypeAndName & time_column, const ColumnWithTypeAndName & interval_column, const ColumnWithTypeAndName & origin_column, const DataTypePtr & result_type, const DateLUTImpl & time_zone) const { @@ -229,7 +252,7 @@ private: auto scale = assert_cast(time_column_type).getScale(); if (time_column_vec) - return dispatchForIntervalColumn(assert_cast(time_column_type), *time_column_vec, interval_column, origin_column, result_type, time_zone, scale); + return dispatchForIntervalColumn(assert_cast(time_column_type), *time_column_vec, interval_column, origin_column, result_type, time_zone, scale); } else if (isDateTime(time_column_type)) { @@ -238,7 +261,7 @@ private: const auto * time_column_vec = checkAndGetColumn(time_column_col); if (time_column_vec) - return dispatchForIntervalColumn(assert_cast(time_column_type), *time_column_vec, interval_column, origin_column, result_type, time_zone); + return dispatchForIntervalColumn(assert_cast(time_column_type), *time_column_vec, interval_column, origin_column, result_type, time_zone); } else if (isDate(time_column_type)) { @@ -247,7 +270,7 @@ private: const auto * time_column_vec = checkAndGetColumn(time_column_col); if (time_column_vec) - return dispatchForIntervalColumn(assert_cast(time_column_type), *time_column_vec, interval_column, origin_column, result_type, time_zone); + return dispatchForIntervalColumn(assert_cast(time_column_type), *time_column_vec, interval_column, origin_column, result_type, time_zone); } else if (isDate32(time_column_type)) { @@ -257,12 +280,12 @@ private: const auto * time_column_vec = checkAndGetColumn(time_column_col); if (time_column_vec) - return dispatchForIntervalColumn(assert_cast(time_column_type), *time_column_vec, interval_column, origin_column, result_type, time_zone); + return dispatchForIntervalColumn(assert_cast(time_column_type), *time_column_vec, interval_column, origin_column, result_type, time_zone); } throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal column for first argument of function {}. Must contain dates or dates with time", getName()); } - template + template ColumnPtr dispatchForIntervalColumn( const TimeDataType & time_data_type, const TimeColumnType & time_column, const ColumnWithTypeAndName & interval_column, const ColumnWithTypeAndName & origin_column, const DataTypePtr & result_type, const DateLUTImpl & time_zone, const UInt16 scale = 1) const @@ -282,32 +305,52 @@ private: switch (interval_type->getKind()) // NOLINT(bugprone-switch-missing-default-case) { case IntervalKind::Nanosecond: - return execute(time_data_type, time_column, num_units, origin_column, result_type, time_zone, scale); + return execute(time_data_type, time_column, num_units, origin_column, result_type, time_zone, scale); case IntervalKind::Microsecond: - return execute(time_data_type, time_column, num_units, origin_column, result_type, time_zone, scale); + return execute(time_data_type, time_column, num_units, origin_column, result_type, time_zone, scale); case IntervalKind::Millisecond: - return execute(time_data_type, time_column, num_units, origin_column, result_type, time_zone, scale); + return execute(time_data_type, time_column, num_units, origin_column, result_type, time_zone, scale); case IntervalKind::Second: - return execute(time_data_type, time_column, num_units, origin_column, result_type, time_zone, scale); + return execute(time_data_type, time_column, num_units, origin_column, result_type, time_zone, scale); case IntervalKind::Minute: - return execute(time_data_type, time_column, num_units, origin_column, result_type, time_zone, scale); + return execute(time_data_type, time_column, num_units, origin_column, result_type, time_zone, scale); case IntervalKind::Hour: - return execute(time_data_type, time_column, num_units, origin_column, result_type, time_zone, scale); + return execute(time_data_type, time_column, num_units, origin_column, result_type, time_zone, scale); case IntervalKind::Day: - return execute(time_data_type, time_column, num_units, origin_column, result_type, time_zone, scale); + return execute(time_data_type, time_column, num_units, origin_column, result_type, time_zone, scale); case IntervalKind::Week: - return execute(time_data_type, time_column, num_units, origin_column, result_type, time_zone, scale); + return execute(time_data_type, time_column, num_units, origin_column, result_type, time_zone, scale); case IntervalKind::Month: - return execute(time_data_type, time_column, num_units, origin_column, result_type, time_zone, scale); + return execute(time_data_type, time_column, num_units, origin_column, result_type, time_zone, scale); case IntervalKind::Quarter: - return execute(time_data_type, time_column, num_units, origin_column, result_type, time_zone, scale); + return execute(time_data_type, time_column, num_units, origin_column, result_type, time_zone, scale); case IntervalKind::Year: - return execute(time_data_type, time_column, num_units, origin_column, result_type, time_zone, scale); + return execute(time_data_type, time_column, num_units, origin_column, result_type, time_zone, scale); } std::unreachable(); } + template + Int64 decideScaleOnPrecision(const UInt16 scale) const + { + static constexpr Int64 MILLISECOND_SCALE = 1000; + static constexpr Int64 MICROSECOND_SCALE = 1000000; + static constexpr Int64 NANOSECOND_SCALE = 1000000000; + Int64 scale_multiplier = DecimalUtils::scaleMultiplier(scale); + switch (unit) + { + case IntervalKind::Millisecond: + return MILLISECOND_SCALE; + case IntervalKind::Microsecond: + return MICROSECOND_SCALE; + case IntervalKind::Nanosecond: + return NANOSECOND_SCALE; + default: + return scale_multiplier; + } + } + template ColumnPtr execute(const TimeDataType &, const ColumnType & time_column_type, Int64 num_units, const ColumnWithTypeAndName & origin_column, const DataTypePtr & result_type, const DateLUTImpl & time_zone, const UInt16 scale) const { @@ -323,6 +366,8 @@ private: result_data.resize(size); Int64 scale_multiplier = DecimalUtils::scaleMultiplier(scale); + Int64 scale_on_precision = decideScaleOnPrecision(scale); + Int64 scale_diff = scale_on_precision > scale_multiplier ? scale_on_precision / scale_multiplier : scale_multiplier / scale_on_precision; if (origin_column.column == nullptr) { @@ -342,19 +387,40 @@ private: t -= origin; auto res = static_cast(ToStartOfInterval::execute(t, num_units, time_zone, scale_multiplier)); - if (!(unit == IntervalKind::Millisecond || unit == IntervalKind::Microsecond || unit == IntervalKind::Nanosecond) && scale_multiplier != 10) - origin = origin / scale_multiplier; - static constexpr size_t SECONDS_PER_DAY = 86400; result_data[i] = 0; if (unit == IntervalKind::Week || unit == IntervalKind::Month || unit == IntervalKind::Quarter || unit == IntervalKind::Year) - result_data[i] = static_cast(origin/SECONDS_PER_DAY + res); + { + if (isDate(result_type) || isDate32(result_type)) + { + result_data[i] += origin + res; + } + else if (isDateTime64(result_type)) + { + result_data[i] += origin + (res * SECONDS_PER_DAY * scale_multiplier); + } + else + { + result_data[i] += origin + res * SECONDS_PER_DAY; + } + } else - result_data[i] += origin + res; + { + if (isDate(result_type) || isDate32(result_type)) + res = res / SECONDS_PER_DAY; + + if (scale_on_precision > scale_multiplier) + { + result_data[i] += (origin + res / scale_diff) * scale_diff; + } + else + { + result_data[i] += origin + res * scale_diff; + } + } } } - return result_col; } }; From 65f34394cf84fa12f6852e9988c8a3d62c155701 Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Wed, 6 Dec 2023 17:45:29 +0100 Subject: [PATCH 012/844] Update 02916_to_start_of_interval_with_origin.reference --- ...to_start_of_interval_with_origin.reference | 52 +++++++++---------- 1 file changed, 26 insertions(+), 26 deletions(-) diff --git a/tests/queries/0_stateless/02916_to_start_of_interval_with_origin.reference b/tests/queries/0_stateless/02916_to_start_of_interval_with_origin.reference index 3574da8f685..806330743d7 100644 --- a/tests/queries/0_stateless/02916_to_start_of_interval_with_origin.reference +++ b/tests/queries/0_stateless/02916_to_start_of_interval_with_origin.reference @@ -1,40 +1,40 @@ -- Negative tests Time and origin as Time -1971-01-01 -1971-07-01 -1970-02-01 -1970-01-05 -1970-01-02 05:27:18 -Time and origin as DateTime 2023-02-01 2023-08-01 2023-10-09 2023-10-05 +2023-10-08 +Time and origin as DateTime +2023-02-01 09:08:07 +2023-08-01 09:08:07 +2023-10-09 09:08:07 +2023-10-05 09:08:07 2023-10-09 09:08:07 2023-10-09 10:10:07 2023-10-09 10:11:11 2023-10-09 10:11:12 Time and origin as DateTime64(9) -2023-02-01 -2023-08-01 -2023-10-09 -2023-10-05 -2023-10-09 09:08:07 -2023-10-09 10:10:07 -2023-10-09 10:11:11 -2023-10-09 10:11:12 -2299-12-31 23:57:37.653 -2299-12-31 23:42:04.320986 +2023-02-01 09:08:07.123456789 +2023-08-01 09:08:07.123456789 +2023-09-10 09:08:07.123456789 +2023-10-05 09:08:07.123456789 +2023-10-08 09:08:07.123543189 +2023-10-09 09:10:07.123460389 +2023-10-09 10:10:11.123456849 +2023-10-09 10:11:10.123456791 +2023-10-09 10:11:12.987456789 +2023-10-09 10:11:12.987653789 2023-10-09 10:11:12.987654321 Time and origin as DateTime64(3) -2023-02-01 -2023-08-01 -2023-10-09 -2023-10-05 -2023-10-09 09:08:07 -2023-10-09 10:10:07 -2023-10-09 10:11:11 -2023-10-09 10:11:12 +2023-02-01 09:08:07.123 +2023-08-01 09:08:07.123 +2023-10-09 09:08:07.123 +2023-10-05 09:08:07.123 +2023-10-08 09:09:33.523 +2023-10-09 09:10:10.723 +2023-10-09 10:10:11.183 +2023-10-09 10:11:10.125 2023-10-09 10:11:12.987 -1970-01-20 15:20:47.136123 -1970-01-01 00:28:17.710272123 +2023-10-09 10:11:12.987000 +2023-10-09 10:11:12.987000000 From bd105d51ce469d9ed0c741a7a45012d506e34ddd Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Wed, 6 Dec 2023 17:46:00 +0100 Subject: [PATCH 013/844] fixed tests --- ...02916_to_start_of_interval_with_origin.sql | 44 +++++++++---------- 1 file changed, 22 insertions(+), 22 deletions(-) diff --git a/tests/queries/0_stateless/02916_to_start_of_interval_with_origin.sql b/tests/queries/0_stateless/02916_to_start_of_interval_with_origin.sql index 864ef56e7ea..853103ecc77 100644 --- a/tests/queries/0_stateless/02916_to_start_of_interval_with_origin.sql +++ b/tests/queries/0_stateless/02916_to_start_of_interval_with_origin.sql @@ -24,11 +24,11 @@ SELECT toStartOfInterval(toDateTime('2023-01-02 14:45:50'), toIntervalYear(1), 5 SELECT toStartOfInterval(toDateTime('2023-01-02 14:45:50'), toIntervalYear(1), toDateTime('2020-01-02 14:44:30'), 'Europe/Amsterdam', 5); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } SELECT 'Time and origin as Time'; -SELECT toStartOfInterval(toDate('2023-10-09'), toIntervalYear(1), toDate('2022-02-01')); -- broken -SELECT toStartOfInterval(toDate('2023-10-09'), toIntervalQuarter(1), toDate('2022-02-01')); -- broken -SELECT toStartOfInterval(toDate('2023-10-09'), toIntervalMonth(1), toDate('2023-09-08')); -- broken -SELECT toStartOfInterval(toDate('2023-10-09'), toIntervalWeek(1), toDate('2023-10-01')); -- broken -SELECT toStartOfInterval(toDate('2023-10-09'), toIntervalDay(1), toDate('2023-10-08')); -- broken +SELECT toStartOfInterval(toDate('2023-10-09'), toIntervalYear(1), toDate('2022-02-01')); +SELECT toStartOfInterval(toDate('2023-10-09'), toIntervalQuarter(1), toDate('2022-02-01')); +SELECT toStartOfInterval(toDate('2023-10-09'), toIntervalMonth(1), toDate('2023-09-08')); +SELECT toStartOfInterval(toDate('2023-10-09'), toIntervalWeek(1), toDate('2023-10-01')); +SELECT toStartOfInterval(toDate('2023-10-09'), toIntervalDay(1), toDate('2023-10-08')); SELECT toStartOfInterval(toDate('2023-10-09'), toIntervalHour(1), toDate('2023-10-09')); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } SELECT toStartOfInterval(toDate('2023-10-09'), toIntervalMinute(1), toDate('2023-10-09')); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } SELECT toStartOfInterval(toDate('2023-10-09'), toIntervalSecond(1), toDate('2023-10-09')); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } @@ -37,10 +37,10 @@ SELECT toStartOfInterval(toDate('2023-10-09'), toIntervalMicrosecond(1), toDate( SELECT toStartOfInterval(toDate('2023-10-09'), toIntervalNanosecond(1), toDate('2023-10-09')); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } SELECT 'Time and origin as DateTime'; -SELECT toStartOfInterval(toDateTime('2023-10-09 10:11:12'), toIntervalYear(1), toDateTime('2022-02-01 09:08:07')); -- broken, should that not return 2023-02-01 09:08:07? -SELECT toStartOfInterval(toDateTime('2023-10-09 10:11:12'), toIntervalQuarter(1), toDateTime('2022-02-01 09:08:07')); -- broken, should that not return 2023-08-01 09:08:07? -SELECT toStartOfInterval(toDateTime('2023-10-09 10:11:12'), toIntervalMonth(1), toDateTime('2023-09-08 09:08:07')); -- broken, should that not return 2023-10-09 09:08:07? -SELECT toStartOfInterval(toDateTime('2023-10-09 10:11:12'), toIntervalWeek(1), toDateTime('2023-10-01 09:08:07')); -- broken, should that not return 2023-10-05 09:08:07? +SELECT toStartOfInterval(toDateTime('2023-10-09 10:11:12'), toIntervalYear(1), toDateTime('2022-02-01 09:08:07')); +SELECT toStartOfInterval(toDateTime('2023-10-09 10:11:12'), toIntervalQuarter(1), toDateTime('2022-02-01 09:08:07')); +SELECT toStartOfInterval(toDateTime('2023-10-09 10:11:12'), toIntervalMonth(1), toDateTime('2023-09-08 09:08:07')); +SELECT toStartOfInterval(toDateTime('2023-10-09 10:11:12'), toIntervalWeek(1), toDateTime('2023-10-01 09:08:07')); SELECT toStartOfInterval(toDateTime('2023-10-09 10:11:12'), toIntervalDay(1), toDateTime('2023-10-08 09:08:07')); SELECT toStartOfInterval(toDateTime('2023-10-09 10:11:12'), toIntervalHour(1), toDateTime('2023-10-09 09:10:07')); SELECT toStartOfInterval(toDateTime('2023-10-09 10:11:12'), toIntervalMinute(1), toDateTime('2023-10-09 10:10:11')); @@ -50,30 +50,30 @@ SELECT toStartOfInterval(toDateTime('2023-10-09 10:11:12'), toIntervalMicrosecon SELECT toStartOfInterval(toDateTime('2023-10-09 10:11:12'), toIntervalNanosecond(1), toDateTime('2023-10-09 10:11:12')); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } SELECT 'Time and origin as DateTime64(9)'; -SELECT toStartOfInterval(toDateTime64('2023-10-09 10:11:12.987654321', 9), toIntervalYear(1), toDateTime64('2022-02-01 09:08:07.123456789', 9)); -- broken, see above -SELECT toStartOfInterval(toDateTime64('2023-10-09 10:11:12.987654321', 9), toIntervalQuarter(1), toDateTime64('2022-02-01 09:08:07.123456789', 9)); -- broken, see above -SELECT toStartOfInterval(toDateTime64('2023-10-09 10:11:12.987654321', 9), toIntervalMonth(1), toDateTime64('2023-09-08 09:08:07.123456789', 9)); -- broken, see above -SELECT toStartOfInterval(toDateTime64('2023-10-09 10:11:12.987654321', 9), toIntervalWeek(1), toDateTime64('2023-10-01 09:08:07.123456789', 9)); -- broken, see above +SELECT toStartOfInterval(toDateTime64('2023-10-09 10:11:12.987654321', 9), toIntervalYear(1), toDateTime64('2022-02-01 09:08:07.123456789', 9)); +SELECT toStartOfInterval(toDateTime64('2023-10-09 10:11:12.987654321', 9), toIntervalQuarter(1), toDateTime64('2022-02-01 09:08:07.123456789', 9)); +SELECT toStartOfInterval(toDateTime64('2023-10-09 10:11:12.987654321', 9), toIntervalMonth(1), toDateTime64('2023-09-10 09:08:07.123456789', 9)); +SELECT toStartOfInterval(toDateTime64('2023-10-09 10:11:12.987654321', 9), toIntervalWeek(1), toDateTime64('2023-10-01 09:08:07.123456789', 9)); SELECT toStartOfInterval(toDateTime64('2023-10-09 10:11:12.987654321', 9), toIntervalDay(1), toDateTime64('2023-10-08 09:08:07.123456789', 9)); SELECT toStartOfInterval(toDateTime64('2023-10-09 10:11:12.987654321', 9), toIntervalHour(1), toDateTime64('2023-10-09 09:10:07.123456789', 9)); SELECT toStartOfInterval(toDateTime64('2023-10-09 10:11:12.987654321', 9), toIntervalMinute(1), toDateTime64('2023-10-09 10:10:11.123456789', 9)); SELECT toStartOfInterval(toDateTime64('2023-10-09 10:11:12.987654321', 9), toIntervalSecond(1), toDateTime64('2023-10-09 10:11:10.123456789', 9)); -SELECT toStartOfInterval(toDateTime64('2023-10-09 10:11:12.987654321', 9), toIntervalMillisecond(1), toDateTime64('2023-10-09 10:11:12.123456789', 9)); -- broken (2299) -SELECT toStartOfInterval(toDateTime64('2023-10-09 10:11:12.987654321', 9), toIntervalMicrosecond(1), toDateTime64('2023-10-09 10:11:12.123456789', 9)); -- broken (2299) -SELECT toStartOfInterval(toDateTime64('2023-10-09 10:11:12.987654321', 9), toIntervalNanosecond(1), toDateTime64('2023-10-09 10:11:12.123456789', 9)); -- broken (2299) +SELECT toStartOfInterval(toDateTime64('2023-10-09 10:11:12.987654321', 9), toIntervalMillisecond(1), toDateTime64('2023-10-09 10:11:12.123456789', 9)); +SELECT toStartOfInterval(toDateTime64('2023-10-09 10:11:12.987654321', 9), toIntervalMicrosecond(1), toDateTime64('2023-10-09 10:11:12.123456789', 9)); +SELECT toStartOfInterval(toDateTime64('2023-10-09 10:11:12.987654321', 9), toIntervalNanosecond(1), toDateTime64('2023-10-09 10:11:12.123456789', 9)); SELECT 'Time and origin as DateTime64(3)'; -SELECT toStartOfInterval(toDateTime64('2023-10-09 10:11:12.987', 3), toIntervalYear(1), toDateTime64('2022-02-01 09:08:07.123', 3)); -- broken, see above -SELECT toStartOfInterval(toDateTime64('2023-10-09 10:11:12.987', 3), toIntervalQuarter(1), toDateTime64('2022-02-01 09:08:07.123', 3)); -- broken, see above -SELECT toStartOfInterval(toDateTime64('2023-10-09 10:11:12.987', 3), toIntervalMonth(1), toDateTime64('2023-09-08 09:08:07.123', 3)); -- broken, see above -SELECT toStartOfInterval(toDateTime64('2023-10-09 10:11:12.987', 3), toIntervalWeek(1), toDateTime64('2023-10-01 09:08:07.123', 3)); -- broken, see above +SELECT toStartOfInterval(toDateTime64('2023-10-09 10:11:12.987', 3), toIntervalYear(1), toDateTime64('2022-02-01 09:08:07.123', 3)); +SELECT toStartOfInterval(toDateTime64('2023-10-09 10:11:12.987', 3), toIntervalQuarter(1), toDateTime64('2022-02-01 09:08:07.123', 3)); +SELECT toStartOfInterval(toDateTime64('2023-10-09 10:11:12.987', 3), toIntervalMonth(1), toDateTime64('2023-09-08 09:08:07.123', 3)); +SELECT toStartOfInterval(toDateTime64('2023-10-09 10:11:12.987', 3), toIntervalWeek(1), toDateTime64('2023-10-01 09:08:07.123', 3)); SELECT toStartOfInterval(toDateTime64('2023-10-09 10:11:12.987', 3), toIntervalDay(1), toDateTime64('2023-10-08 09:08:07.123', 3)); SELECT toStartOfInterval(toDateTime64('2023-10-09 10:11:12.987', 3), toIntervalHour(1), toDateTime64('2023-10-09 09:10:07.123', 3)); SELECT toStartOfInterval(toDateTime64('2023-10-09 10:11:12.987', 3), toIntervalMinute(1), toDateTime64('2023-10-09 10:10:11.123', 3)); SELECT toStartOfInterval(toDateTime64('2023-10-09 10:11:12.987', 3), toIntervalSecond(1), toDateTime64('2023-10-09 10:11:10.123', 3)); SELECT toStartOfInterval(toDateTime64('2023-10-09 10:11:12.987', 3), toIntervalMillisecond(1), toDateTime64('2023-10-09 10:11:12.123', 3)); -SELECT toStartOfInterval(toDateTime64('2023-10-09 10:11:12.987', 3), toIntervalMicrosecond(1), toDateTime64('2023-10-09 10:11:12.123', 3)); -- broken (1970) -SELECT toStartOfInterval(toDateTime64('2023-10-09 10:11:12.987', 3), toIntervalNanosecond(1), toDateTime64('2023-10-09 10:11:12.123', 3)); -- broken (1970) +SELECT toStartOfInterval(toDateTime64('2023-10-09 10:11:12.987', 3), toIntervalMicrosecond(1), toDateTime64('2023-10-09 10:11:12.123', 3)); +SELECT toStartOfInterval(toDateTime64('2023-10-09 10:11:12.987', 3), toIntervalNanosecond(1), toDateTime64('2023-10-09 10:11:12.123', 3)); -- SELECT toStartOfInterval(number % 2 == 0 ? toDateTime64('2023-03-01 15:55:00', 2) : toDateTime64('2023-02-01 15:55:00', 2), toIntervalMinute(1), toDateTime64('2023-01-01 13:55:00', 2), 'Europe/Amsterdam') from numbers(5); -- SELECT toStartOfInterval(number % 2 == 0 ? toDateTime('2023-03-01 15:55:00') : toDateTime('2023-02-01 15:55:00'), toIntervalHour(1), toDateTime('2023-01-01 13:55:00'), 'Europe/Amsterdam') from numbers(5); From 885e44c50e1968d660866e53f25110cb662e27a2 Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Wed, 6 Dec 2023 18:17:41 +0100 Subject: [PATCH 014/844] style fix --- src/Functions/toStartOfInterval.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/Functions/toStartOfInterval.cpp b/src/Functions/toStartOfInterval.cpp index da4eba9a594..4fae8cb6bb6 100644 --- a/src/Functions/toStartOfInterval.cpp +++ b/src/Functions/toStartOfInterval.cpp @@ -231,7 +231,6 @@ public: result_column = dispatchForTimeColumn(time_column, interval_column, origin_column, result_type, time_zone); else result_column = dispatchForTimeColumn(time_column, interval_column, origin_column, result_type, time_zone); - return result_column; } From e5fdad21609704baa3b90eb07161461120364fb2 Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Wed, 6 Dec 2023 19:09:55 +0100 Subject: [PATCH 015/844] fix non-const arguments --- src/Functions/toStartOfInterval.cpp | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/src/Functions/toStartOfInterval.cpp b/src/Functions/toStartOfInterval.cpp index 4fae8cb6bb6..94514d1e1a0 100644 --- a/src/Functions/toStartOfInterval.cpp +++ b/src/Functions/toStartOfInterval.cpp @@ -392,17 +392,11 @@ private: if (unit == IntervalKind::Week || unit == IntervalKind::Month || unit == IntervalKind::Quarter || unit == IntervalKind::Year) { if (isDate(result_type) || isDate32(result_type)) - { result_data[i] += origin + res; - } else if (isDateTime64(result_type)) - { result_data[i] += origin + (res * SECONDS_PER_DAY * scale_multiplier); - } else - { result_data[i] += origin + res * SECONDS_PER_DAY; - } } else { @@ -410,13 +404,11 @@ private: res = res / SECONDS_PER_DAY; if (scale_on_precision > scale_multiplier) - { result_data[i] += (origin + res / scale_diff) * scale_diff; - } + else if (scale_on_precision == scale_multiplier && scale_on_precision != 10) /// scale == 10 is default case + result_data[i] += origin + (res * scale_on_precision); else - { result_data[i] += origin + res * scale_diff; - } } } } From ac8d80da515144d2d4fe3ed0c47e2bab1d171927 Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Wed, 6 Dec 2023 19:28:06 +0100 Subject: [PATCH 016/844] fix --- src/Functions/toStartOfInterval.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/Functions/toStartOfInterval.cpp b/src/Functions/toStartOfInterval.cpp index 94514d1e1a0..e67d3378f6e 100644 --- a/src/Functions/toStartOfInterval.cpp +++ b/src/Functions/toStartOfInterval.cpp @@ -405,7 +405,8 @@ private: if (scale_on_precision > scale_multiplier) result_data[i] += (origin + res / scale_diff) * scale_diff; - else if (scale_on_precision == scale_multiplier && scale_on_precision != 10) /// scale == 10 is default case + else if (scale_on_precision == scale_multiplier && scale_on_precision % 1000 != 0 && scale_multiplier != 10) /// when it's not a default case with DateTime + /// and when precision is not sub-scale result_data[i] += origin + (res * scale_on_precision); else result_data[i] += origin + res * scale_diff; From 7be47eca5e3ee31ef958f5b9e9804ceb15f35f48 Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Wed, 6 Dec 2023 19:28:27 +0100 Subject: [PATCH 017/844] Update 02916_to_start_of_interval_with_origin.reference --- ...02916_to_start_of_interval_with_origin.reference | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/tests/queries/0_stateless/02916_to_start_of_interval_with_origin.reference b/tests/queries/0_stateless/02916_to_start_of_interval_with_origin.reference index 806330743d7..870853bc371 100644 --- a/tests/queries/0_stateless/02916_to_start_of_interval_with_origin.reference +++ b/tests/queries/0_stateless/02916_to_start_of_interval_with_origin.reference @@ -38,3 +38,16 @@ Time and origin as DateTime64(3) 2023-10-09 10:11:12.987 2023-10-09 10:11:12.987000 2023-10-09 10:11:12.987000000 +Non-const arguments +2023-03-01 16:55:00.00 +2023-02-01 16:55:00.00 +2023-03-01 16:55:00.00 +2023-02-01 16:55:00.00 +2023-03-01 16:55:00.00 +2023-03-01 16:55:00 +2023-02-01 16:55:00 +2023-03-01 16:55:00 +2023-02-01 16:55:00 +2023-03-01 16:55:00 +2023-01-02 15:44:30 +2023-02-01 16:44:30.00 From 969c7f36a5db55ef53cbce2def91820b619c23ea Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Wed, 6 Dec 2023 19:28:45 +0100 Subject: [PATCH 018/844] fix tests --- .../02916_to_start_of_interval_with_origin.sql | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/tests/queries/0_stateless/02916_to_start_of_interval_with_origin.sql b/tests/queries/0_stateless/02916_to_start_of_interval_with_origin.sql index 853103ecc77..71f5fb7fb36 100644 --- a/tests/queries/0_stateless/02916_to_start_of_interval_with_origin.sql +++ b/tests/queries/0_stateless/02916_to_start_of_interval_with_origin.sql @@ -75,7 +75,8 @@ SELECT toStartOfInterval(toDateTime64('2023-10-09 10:11:12.987', 3), toIntervalM SELECT toStartOfInterval(toDateTime64('2023-10-09 10:11:12.987', 3), toIntervalMicrosecond(1), toDateTime64('2023-10-09 10:11:12.123', 3)); SELECT toStartOfInterval(toDateTime64('2023-10-09 10:11:12.987', 3), toIntervalNanosecond(1), toDateTime64('2023-10-09 10:11:12.123', 3)); --- SELECT toStartOfInterval(number % 2 == 0 ? toDateTime64('2023-03-01 15:55:00', 2) : toDateTime64('2023-02-01 15:55:00', 2), toIntervalMinute(1), toDateTime64('2023-01-01 13:55:00', 2), 'Europe/Amsterdam') from numbers(5); --- SELECT toStartOfInterval(number % 2 == 0 ? toDateTime('2023-03-01 15:55:00') : toDateTime('2023-02-01 15:55:00'), toIntervalHour(1), toDateTime('2023-01-01 13:55:00'), 'Europe/Amsterdam') from numbers(5); --- SELECT toStartOfInterval(materialize(toDateTime('2023-01-02 14:45:50')), toIntervalHour(1), toDateTime('2023-01-02 14:44:30'), 'Europe/Amsterdam'); --- SELECT toStartOfInterval(materialize(toDateTime64('2023-02-01 15:45:50', 2)), toIntervalHour(1), toDateTime64('2023-01-02 14:44:30', 2), 'Europe/Amsterdam'); +SELECT 'Non-const arguments'; +SELECT toStartOfInterval(number % 2 == 0 ? toDateTime64('2023-03-01 15:55:00', 2) : toDateTime64('2023-02-01 15:55:00', 2), toIntervalMinute(1), toDateTime64('2023-01-01 13:55:00', 2), 'Europe/Amsterdam') from numbers(5); +SELECT toStartOfInterval(number % 2 == 0 ? toDateTime('2023-03-01 15:55:00') : toDateTime('2023-02-01 15:55:00'), toIntervalHour(1), toDateTime('2023-01-01 13:55:00'), 'Europe/Amsterdam') from numbers(5); +SELECT toStartOfInterval(materialize(toDateTime('2023-01-02 14:45:50')), toIntervalHour(1), toDateTime('2023-01-02 14:44:30'), 'Europe/Amsterdam'); +SELECT toStartOfInterval(materialize(toDateTime64('2023-02-01 15:45:50', 2)), toIntervalHour(1), toDateTime64('2023-01-02 14:44:30', 2), 'Europe/Amsterdam'); From 3027f3a04f99beafdc8ead3641aa84fdf4f6657a Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Thu, 7 Dec 2023 15:29:11 +0000 Subject: [PATCH 019/844] fix tests --- src/Functions/toStartOfInterval.cpp | 23 ++++++++++--------- .../02207_subseconds_intervals.reference | 16 ++++++------- 2 files changed, 20 insertions(+), 19 deletions(-) diff --git a/src/Functions/toStartOfInterval.cpp b/src/Functions/toStartOfInterval.cpp index e67d3378f6e..7c75d7aa8e9 100644 --- a/src/Functions/toStartOfInterval.cpp +++ b/src/Functions/toStartOfInterval.cpp @@ -117,9 +117,6 @@ public: throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "The timezone argument of function {} with interval type {} is allowed only when the 1st argument has the type DateTime or DateTime64", getName(), interval_type->getKind().toString()); - - if (arguments[0].type.get() != arguments[2].type.get()) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Datetime argument and origin argument for function {} must have the same type", getName()); } else if (isDateOrDate32OrDateTimeOrDateTime64(type_arg3)) { @@ -137,7 +134,6 @@ public: throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of 3rd argument of function {}. " "This argument is optional and must be a constant String with timezone name or a Date/Date32/DateTime/DateTime64 with a constant origin", type_arg3->getName(), getName()); - }; auto check_fourth_argument = [&] @@ -365,13 +361,19 @@ private: result_data.resize(size); Int64 scale_multiplier = DecimalUtils::scaleMultiplier(scale); - Int64 scale_on_precision = decideScaleOnPrecision(scale); - Int64 scale_diff = scale_on_precision > scale_multiplier ? scale_on_precision / scale_multiplier : scale_multiplier / scale_on_precision; + Int64 scale_on_interval = decideScaleOnPrecision(scale); + Int64 scale_diff = scale_on_interval > scale_multiplier ? scale_on_interval / scale_multiplier : scale_multiplier / scale_on_interval; if (origin_column.column == nullptr) { for (size_t i = 0; i != size; ++i) - result_data[i] = static_cast(ToStartOfInterval::execute(time_data[i], num_units, time_zone, scale_multiplier)); + { + result_data[i] = 0; + if (scale_on_interval < scale_multiplier) + result_data[i] += static_cast(ToStartOfInterval::execute(time_data[i], num_units, time_zone, scale_multiplier)) * scale_diff; + else + result_data[i] = static_cast(ToStartOfInterval::execute(time_data[i], num_units, time_zone, scale_multiplier)); + } } else { @@ -403,11 +405,10 @@ private: if (isDate(result_type) || isDate32(result_type)) res = res / SECONDS_PER_DAY; - if (scale_on_precision > scale_multiplier) + if (scale_on_interval > scale_multiplier) result_data[i] += (origin + res / scale_diff) * scale_diff; - else if (scale_on_precision == scale_multiplier && scale_on_precision % 1000 != 0 && scale_multiplier != 10) /// when it's not a default case with DateTime - /// and when precision is not sub-scale - result_data[i] += origin + (res * scale_on_precision); + else if (scale_on_interval == scale_multiplier && scale_on_interval % 1000 != 0 && scale_multiplier != 10) + result_data[i] += origin + (res * scale_on_interval); else result_data[i] += origin + res * scale_diff; } diff --git a/tests/queries/0_stateless/02207_subseconds_intervals.reference b/tests/queries/0_stateless/02207_subseconds_intervals.reference index 91f0ecb8606..b0edbda5e76 100644 --- a/tests/queries/0_stateless/02207_subseconds_intervals.reference +++ b/tests/queries/0_stateless/02207_subseconds_intervals.reference @@ -10,25 +10,25 @@ test intervals - test microseconds 1980-12-12 12:12:12.123456 1980-12-12 12:12:12.123400 -1980-12-12 12:12:12.123456 -1980-12-12 12:12:12.123456 +1980-12-12 12:12:12.12345600 +1980-12-12 12:12:12.12345600 1930-12-12 12:12:12.123456 1930-12-12 12:12:12.123400 -1930-12-12 12:12:12.123456 +1930-12-12 12:12:12.12345600 2220-12-12 12:12:12.123456 2220-12-12 12:12:12.123400 -2220-12-12 12:12:12.123456 +2220-12-12 12:12:12.12345600 - test milliseconds 1980-12-12 12:12:12.123 1980-12-12 12:12:12.120 -1980-12-12 12:12:12.123 -1980-12-12 12:12:12.123 +1980-12-12 12:12:12.123000 +1980-12-12 12:12:12.123000 1930-12-12 12:12:12.123 1930-12-12 12:12:12.120 -1930-12-12 12:12:12.123 +1930-12-12 12:12:12.123000 2220-12-12 12:12:12.123 2220-12-12 12:12:12.120 -2220-12-12 12:12:12.123 +2220-12-12 12:12:12.123000 test add[...]seconds() - test nanoseconds 1980-12-12 12:12:12.123456790 From d08fd931bec203f1b86d89227b766973ad3612c9 Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Fri, 8 Dec 2023 13:42:00 +0000 Subject: [PATCH 020/844] fix error --- src/Functions/toStartOfInterval.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/Functions/toStartOfInterval.cpp b/src/Functions/toStartOfInterval.cpp index 7c75d7aa8e9..e0301f45ded 100644 --- a/src/Functions/toStartOfInterval.cpp +++ b/src/Functions/toStartOfInterval.cpp @@ -190,8 +190,10 @@ public: } case ResultType::DateTime64: { - auto scale_date_time = assert_cast(*arguments[0].type.get()).getScale(); - UInt32 scale = scale_date_time; + UInt32 scale = 0; + auto scale_date_time = 0; + if (third_argument == ThirdArgument::IsOrigin) + scale_date_time = assert_cast(*arguments[0].type.get()).getScale(); if (interval_type->getKind() == IntervalKind::Nanosecond) scale = 9 > scale_date_time ? 9 : scale_date_time; else if (interval_type->getKind() == IntervalKind::Microsecond) From e13eec9c5a9cb49004aef8d635034e9ccfd697d4 Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Fri, 8 Dec 2023 15:07:39 +0000 Subject: [PATCH 021/844] fix --- src/Functions/toStartOfInterval.cpp | 11 +++++------ .../02207_subseconds_intervals.reference | 16 ++++++++-------- 2 files changed, 13 insertions(+), 14 deletions(-) diff --git a/src/Functions/toStartOfInterval.cpp b/src/Functions/toStartOfInterval.cpp index e0301f45ded..e90f965a6f9 100644 --- a/src/Functions/toStartOfInterval.cpp +++ b/src/Functions/toStartOfInterval.cpp @@ -191,15 +191,14 @@ public: case ResultType::DateTime64: { UInt32 scale = 0; - auto scale_date_time = 0; - if (third_argument == ThirdArgument::IsOrigin) - scale_date_time = assert_cast(*arguments[0].type.get()).getScale(); + if (isDate32(arguments[0].type) || isDateTime(arguments[0].type) || isDateTime64(arguments[0].type)) + scale = assert_cast(*arguments[0].type.get()).getScale(); if (interval_type->getKind() == IntervalKind::Nanosecond) - scale = 9 > scale_date_time ? 9 : scale_date_time; + scale = 9 > scale ? 9 : scale; else if (interval_type->getKind() == IntervalKind::Microsecond) - scale = 6 > scale_date_time ? 6 : scale_date_time; + scale = 6 > scale ? 6 : scale; else if (interval_type->getKind() == IntervalKind::Millisecond) - scale = 3 > scale_date_time ? 3 : scale_date_time; + scale = 3 > scale ? 3 : scale; const size_t time_zone_arg_num = (arguments.size() == 2 || (arguments.size() == 3 && third_argument == ThirdArgument::IsTimezone)) ? 2 : 3; return std::make_shared(scale, extractTimeZoneNameFromFunctionArguments(arguments, time_zone_arg_num, 0, false)); diff --git a/tests/queries/0_stateless/02207_subseconds_intervals.reference b/tests/queries/0_stateless/02207_subseconds_intervals.reference index b0edbda5e76..91f0ecb8606 100644 --- a/tests/queries/0_stateless/02207_subseconds_intervals.reference +++ b/tests/queries/0_stateless/02207_subseconds_intervals.reference @@ -10,25 +10,25 @@ test intervals - test microseconds 1980-12-12 12:12:12.123456 1980-12-12 12:12:12.123400 -1980-12-12 12:12:12.12345600 -1980-12-12 12:12:12.12345600 +1980-12-12 12:12:12.123456 +1980-12-12 12:12:12.123456 1930-12-12 12:12:12.123456 1930-12-12 12:12:12.123400 -1930-12-12 12:12:12.12345600 +1930-12-12 12:12:12.123456 2220-12-12 12:12:12.123456 2220-12-12 12:12:12.123400 -2220-12-12 12:12:12.12345600 +2220-12-12 12:12:12.123456 - test milliseconds 1980-12-12 12:12:12.123 1980-12-12 12:12:12.120 -1980-12-12 12:12:12.123000 -1980-12-12 12:12:12.123000 +1980-12-12 12:12:12.123 +1980-12-12 12:12:12.123 1930-12-12 12:12:12.123 1930-12-12 12:12:12.120 -1930-12-12 12:12:12.123000 +1930-12-12 12:12:12.123 2220-12-12 12:12:12.123 2220-12-12 12:12:12.120 -2220-12-12 12:12:12.123000 +2220-12-12 12:12:12.123 test add[...]seconds() - test nanoseconds 1980-12-12 12:12:12.123456790 From 4c83b7e46ffe50409aab35b889839fb6cb92a18e Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Fri, 8 Dec 2023 15:18:43 +0000 Subject: [PATCH 022/844] style fix --- src/Functions/toStartOfInterval.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Functions/toStartOfInterval.cpp b/src/Functions/toStartOfInterval.cpp index e90f965a6f9..b55d92f809b 100644 --- a/src/Functions/toStartOfInterval.cpp +++ b/src/Functions/toStartOfInterval.cpp @@ -191,7 +191,7 @@ public: case ResultType::DateTime64: { UInt32 scale = 0; - if (isDate32(arguments[0].type) || isDateTime(arguments[0].type) || isDateTime64(arguments[0].type)) + if (isDate32(arguments[0].type) || isDateTime(arguments[0].type) || isDateTime64(arguments[0].type)) scale = assert_cast(*arguments[0].type.get()).getScale(); if (interval_type->getKind() == IntervalKind::Nanosecond) scale = 9 > scale ? 9 : scale; From 5793725bc7ae8cfd66bea9c9f82ef0e30c124314 Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Fri, 8 Dec 2023 16:45:45 +0000 Subject: [PATCH 023/844] tests --- .../02207_subseconds_intervals.reference | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/tests/queries/0_stateless/02207_subseconds_intervals.reference b/tests/queries/0_stateless/02207_subseconds_intervals.reference index 91f0ecb8606..bedd5d4878b 100644 --- a/tests/queries/0_stateless/02207_subseconds_intervals.reference +++ b/tests/queries/0_stateless/02207_subseconds_intervals.reference @@ -10,25 +10,25 @@ test intervals - test microseconds 1980-12-12 12:12:12.123456 1980-12-12 12:12:12.123400 -1980-12-12 12:12:12.123456 -1980-12-12 12:12:12.123456 +1980-12-12 12:12:12.12345600 +1980-12-12 12:12:12.12345600 1930-12-12 12:12:12.123456 1930-12-12 12:12:12.123400 -1930-12-12 12:12:12.123456 +1930-12-12 12:12:12.12345600 2220-12-12 12:12:12.123456 2220-12-12 12:12:12.123400 -2220-12-12 12:12:12.123456 +2220-12-12 12:12:12.12345600 - test milliseconds 1980-12-12 12:12:12.123 1980-12-12 12:12:12.120 -1980-12-12 12:12:12.123 -1980-12-12 12:12:12.123 +1980-12-12 12:12:12.123000 +1980-12-12 12:12:12.123000 1930-12-12 12:12:12.123 1930-12-12 12:12:12.120 -1930-12-12 12:12:12.123 +1930-12-12 12:12:12.123000 2220-12-12 12:12:12.123 2220-12-12 12:12:12.120 -2220-12-12 12:12:12.123 +2220-12-12 12:12:12.123000 test add[...]seconds() - test nanoseconds 1980-12-12 12:12:12.123456790 @@ -75,4 +75,4 @@ test subtract[...]seconds() 2022-12-31 23:59:59.999 2022-12-31 23:59:59.900 2023-01-01 00:00:00.001 -2023-01-01 00:00:00.100 +2023-01-01 00:00:00.100 \ No newline at end of file From 53ef9c0cb80c734caa449430ff4600f32b900134 Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Fri, 8 Dec 2023 18:38:25 +0100 Subject: [PATCH 024/844] Update 02207_subseconds_intervals.reference --- tests/queries/0_stateless/02207_subseconds_intervals.reference | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/02207_subseconds_intervals.reference b/tests/queries/0_stateless/02207_subseconds_intervals.reference index bedd5d4878b..b0edbda5e76 100644 --- a/tests/queries/0_stateless/02207_subseconds_intervals.reference +++ b/tests/queries/0_stateless/02207_subseconds_intervals.reference @@ -75,4 +75,4 @@ test subtract[...]seconds() 2022-12-31 23:59:59.999 2022-12-31 23:59:59.900 2023-01-01 00:00:00.001 -2023-01-01 00:00:00.100 \ No newline at end of file +2023-01-01 00:00:00.100 From 87bda03da17cdf0e3878a1df73175c92bae834c6 Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Fri, 8 Dec 2023 21:36:24 +0100 Subject: [PATCH 025/844] logical error --- src/Functions/toStartOfInterval.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Functions/toStartOfInterval.cpp b/src/Functions/toStartOfInterval.cpp index b55d92f809b..b6a3a9389d6 100644 --- a/src/Functions/toStartOfInterval.cpp +++ b/src/Functions/toStartOfInterval.cpp @@ -191,7 +191,7 @@ public: case ResultType::DateTime64: { UInt32 scale = 0; - if (isDate32(arguments[0].type) || isDateTime(arguments[0].type) || isDateTime64(arguments[0].type)) + if (isDate32(arguments[0].type) || isDateTime64(arguments[0].type)) scale = assert_cast(*arguments[0].type.get()).getScale(); if (interval_type->getKind() == IntervalKind::Nanosecond) scale = 9 > scale ? 9 : scale; From d1c49cc9bcb869030361821e60349e8054a51c4b Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Tue, 12 Dec 2023 17:55:53 +0100 Subject: [PATCH 026/844] Added comments, simplified and fixed review --- src/Functions/toStartOfInterval.cpp | 138 ++++++++++++++++------------ 1 file changed, 81 insertions(+), 57 deletions(-) diff --git a/src/Functions/toStartOfInterval.cpp b/src/Functions/toStartOfInterval.cpp index b6a3a9389d6..6c71b357590 100644 --- a/src/Functions/toStartOfInterval.cpp +++ b/src/Functions/toStartOfInterval.cpp @@ -1,9 +1,7 @@ -#include -#include #include #include #include -#include +#include #include "DataTypes/IDataType.h" #include #include @@ -16,7 +14,7 @@ #include #include #include -#include +#include namespace DB @@ -31,21 +29,23 @@ namespace ErrorCodes } -namespace -{ - class FunctionToStartOfInterval : public IFunction { public: + enum class Overload + { + Default, /// toStartOfInterval(time, interval) or toStartOfInterval(time, interval, timezone) + Origin /// toStartOfInterval(time, interval, origin) or toStartOfInterval(time, interval, origin, timezone) + }; + mutable Overload overload; + static FunctionPtr create(ContextPtr) { return std::make_shared(); } static constexpr auto name = "toStartOfInterval"; String getName() const override { return name; } - bool isVariadic() const override { return true; } size_t getNumberOfArguments() const override { return 0; } bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; } - bool useDefaultImplementationForConstants() const override { return true; } ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1, 2, 3}; } @@ -59,8 +59,9 @@ public: { const DataTypePtr & type_arg1 = arguments[0].type; if (!isDate(type_arg1) && !isDateTime(type_arg1) && !isDateTime64(type_arg1)) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of 1st argument of function {}. " - "Should be a date or a date with time", type_arg1->getName(), getName()); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal type {} of 1st argument of function {}, expected a Date, DateTime or DateTime64", + type_arg1->getName(), getName()); value_is_date = isDate(type_arg1); }; @@ -75,10 +76,14 @@ public: auto check_second_argument = [&] { const DataTypePtr & type_arg2 = arguments[1].type; + interval_type = checkAndGetDataType(type_arg2.get()); if (!interval_type) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of 2nd argument of function {}. " - "Should be an interval of time", type_arg2->getName(), getName()); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal type {} of 2nd argument of function {}, expected a time interval", + type_arg2->getName(), getName()); + + /// Result here is determined for default overload (without origin) switch (interval_type->getKind()) // NOLINT(bugprone-switch-missing-default-case) { case IntervalKind::Nanosecond: @@ -89,7 +94,7 @@ public: case IntervalKind::Second: case IntervalKind::Minute: case IntervalKind::Hour: - case IntervalKind::Day: + case IntervalKind::Day: /// weird why Day leads to DateTime but too afraid to change it result_type = ResultType::DateTime; break; case IntervalKind::Week: @@ -101,31 +106,26 @@ public: } }; - enum class ThirdArgument - { - IsTimezone, - IsOrigin - }; - ThirdArgument third_argument; /// valid only if 3rd argument is given auto check_third_argument = [&] { const DataTypePtr & type_arg3 = arguments[2].type; if (isString(type_arg3)) { - third_argument = ThirdArgument::IsTimezone; + overload = Overload::Default; + if (value_is_date && result_type == ResultType::Date) throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "The timezone argument of function {} with interval type {} is allowed only when the 1st argument has the type DateTime or DateTime64", getName(), interval_type->getKind().toString()); } - else if (isDateOrDate32OrDateTimeOrDateTime64(type_arg3)) + else if (isDateTimeOrDateTime64(type_arg3) || isDate(type_arg3)) { - third_argument = ThirdArgument::IsOrigin; + overload = Overload::Origin; if (isDateTime64(arguments[0].type) && isDateTime64(arguments[2].type)) result_type = ResultType::DateTime64; else if (isDateTime(arguments[0].type) && isDateTime(arguments[2].type)) result_type = ResultType::DateTime; - else if ((isDate(arguments[0].type) || isDate32(arguments[0].type)) && (isDate(arguments[2].type) || isDate32(arguments[2].type))) + else if (isDate(arguments[0].type) && isDate(arguments[2].type)) result_type = ResultType::Date; else throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Datetime argument and origin argument for function {} must have the same type", getName()); @@ -138,7 +138,7 @@ public: auto check_fourth_argument = [&] { - if (third_argument != ThirdArgument::IsOrigin) /// sanity check + if (overload != Overload::Origin) /// sanity check throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of 3rd argument of function {}. " "The third argument must a Date/Date32/DateTime/DateTime64 with a constant origin", arguments[2].type->getName(), getName()); @@ -185,7 +185,7 @@ public: return std::make_shared(); case ResultType::DateTime: { - const size_t time_zone_arg_num = (arguments.size() == 2 || (arguments.size() == 3 && third_argument == ThirdArgument::IsTimezone)) ? 2 : 3; + const size_t time_zone_arg_num = (overload == Overload::Default) ? 2 : 3; return std::make_shared(extractTimeZoneNameFromFunctionArguments(arguments, time_zone_arg_num, 0, false)); } case ResultType::DateTime64: @@ -200,7 +200,7 @@ public: else if (interval_type->getKind() == IntervalKind::Millisecond) scale = 3 > scale ? 3 : scale; - const size_t time_zone_arg_num = (arguments.size() == 2 || (arguments.size() == 3 && third_argument == ThirdArgument::IsTimezone)) ? 2 : 3; + const size_t time_zone_arg_num = (overload == Overload::Default) ? 2 : 3; return std::make_shared(scale, extractTimeZoneNameFromFunctionArguments(arguments, time_zone_arg_num, 0, false)); } } @@ -278,25 +278,25 @@ private: if (time_column_vec) return dispatchForIntervalColumn(assert_cast(time_column_type), *time_column_vec, interval_column, origin_column, result_type, time_zone); } - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal column for first argument of function {}. Must contain dates or dates with time", getName()); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal column for 1st argument of function {}, expected a Date, DateTime or DateTime64", getName()); } template ColumnPtr dispatchForIntervalColumn( const TimeDataType & time_data_type, const TimeColumnType & time_column, const ColumnWithTypeAndName & interval_column, const ColumnWithTypeAndName & origin_column, - const DataTypePtr & result_type, const DateLUTImpl & time_zone, const UInt16 scale = 1) const + const DataTypePtr & result_type, const DateLUTImpl & time_zone, UInt16 scale = 1) const { const auto * interval_type = checkAndGetDataType(interval_column.type.get()); if (!interval_type) - throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column for second argument of function {}, must be an interval of time.", getName()); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column for 2nd argument of function {}, must be a time interval", getName()); const auto * interval_column_const_int64 = checkAndGetColumnConst(interval_column.column.get()); if (!interval_column_const_int64) - throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column for second argument of function {}, must be a const interval of time.", getName()); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column for 2nd argument of function {}, must be a const time interval", getName()); - Int64 num_units = interval_column_const_int64->getValue(); + const Int64 num_units = interval_column_const_int64->getValue(); if (num_units <= 0) - throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Value for second argument of function {} must be positive.", getName()); + throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Value for 2nd argument of function {} must be positive", getName()); switch (interval_type->getKind()) // NOLINT(bugprone-switch-missing-default-case) { @@ -328,12 +328,11 @@ private: } template - Int64 decideScaleOnPrecision(const UInt16 scale) const + Int64 decideScaleOnPrecision() const { static constexpr Int64 MILLISECOND_SCALE = 1000; static constexpr Int64 MICROSECOND_SCALE = 1000000; static constexpr Int64 NANOSECOND_SCALE = 1000000000; - Int64 scale_multiplier = DecimalUtils::scaleMultiplier(scale); switch (unit) { case IntervalKind::Millisecond: @@ -343,37 +342,41 @@ private: case IntervalKind::Nanosecond: return NANOSECOND_SCALE; default: - return scale_multiplier; + return 1; } } - template + template ColumnPtr execute(const TimeDataType &, const ColumnType & time_column_type, Int64 num_units, const ColumnWithTypeAndName & origin_column, const DataTypePtr & result_type, const DateLUTImpl & time_zone, const UInt16 scale) const { - using ToColumnType = typename ToDataType::ColumnType; - using ToFieldType = typename ToDataType::FieldType; + using ResultColumnType = typename ResultDataType::ColumnType; + using ResultFieldType = typename ResultDataType::FieldType; const auto & time_data = time_column_type.getData(); size_t size = time_data.size(); auto result_col = result_type->createColumn(); - auto * col_to = assert_cast(result_col.get()); + auto * col_to = assert_cast(result_col.get()); auto & result_data = col_to->getData(); result_data.resize(size); - Int64 scale_multiplier = DecimalUtils::scaleMultiplier(scale); - Int64 scale_on_interval = decideScaleOnPrecision(scale); - Int64 scale_diff = scale_on_interval > scale_multiplier ? scale_on_interval / scale_multiplier : scale_multiplier / scale_on_interval; + Int64 scale_on_time = DecimalUtils::scaleMultiplier(scale); // scale that depends on type of arguments + Int64 scale_on_interval = decideScaleOnPrecision(); // scale that depends on the Interval + /// In case if we have a difference between time arguments and Interval, we need to calculate the difference between them + /// to get the right precision for the result. + Int64 scale_diff = scale_on_interval > scale_on_time ? scale_on_interval / scale_on_time : scale_on_time / scale_on_interval; if (origin_column.column == nullptr) { for (size_t i = 0; i != size; ++i) { result_data[i] = 0; - if (scale_on_interval < scale_multiplier) - result_data[i] += static_cast(ToStartOfInterval::execute(time_data[i], num_units, time_zone, scale_multiplier)) * scale_diff; + if (scale_on_interval < scale_on_time) + /// if we have a time argument that has bigger scale than the interval can contain, we need + /// to return a value with bigger precision and thus we should multiply result on the scale difference. + result_data[i] += static_cast(ToStartOfInterval::execute(time_data[i], num_units, time_zone, scale_on_interval)) * scale_diff; else - result_data[i] = static_cast(ToStartOfInterval::execute(time_data[i], num_units, time_zone, scale_multiplier)); + result_data[i] = static_cast(ToStartOfInterval::execute(time_data[i], num_units, time_zone, scale_on_time)); } } else @@ -387,31 +390,54 @@ private: throw Exception(ErrorCodes::BAD_ARGUMENTS, "The origin must be before the end date/datetime"); t -= origin; - auto res = static_cast(ToStartOfInterval::execute(t, num_units, time_zone, scale_multiplier)); + auto res = static_cast(ToStartOfInterval::execute(t, num_units, time_zone, scale_on_time)); static constexpr size_t SECONDS_PER_DAY = 86400; result_data[i] = 0; if (unit == IntervalKind::Week || unit == IntervalKind::Month || unit == IntervalKind::Quarter || unit == IntervalKind::Year) { + /// By default, when we use week, month, quarter or year interval, we get date return type. So, simply add values. if (isDate(result_type) || isDate32(result_type)) result_data[i] += origin + res; - else if (isDateTime64(result_type)) - result_data[i] += origin + (res * SECONDS_PER_DAY * scale_multiplier); - else + /// When we use DateTime arguments, we should keep in mind that we also have hours, minutes and seconds there, + /// so we need to multiply result by amount of seconds per day. + else if (isDateTime(result_type)) result_data[i] += origin + res * SECONDS_PER_DAY; + /// When we use DateTime64 arguments, we also should multiply it on right scale. + else + result_data[i] += origin + (res * SECONDS_PER_DAY * scale_on_time); } else { + /// In this case result will be calculated as datetime, so we need to get the amount of days if the arguments are Date. if (isDate(result_type) || isDate32(result_type)) res = res / SECONDS_PER_DAY; - if (scale_on_interval > scale_multiplier) - result_data[i] += (origin + res / scale_diff) * scale_diff; - else if (scale_on_interval == scale_multiplier && scale_on_interval % 1000 != 0 && scale_multiplier != 10) - result_data[i] += origin + (res * scale_on_interval); + /// Case when Interval has default scale + if (scale_on_interval == 1) + { + /// Case when the arguments are DateTime64 with precision like 4,5,7,8. Here res has right precision and origin doesn't. + if (scale_on_time % 1000 != 0 && scale_on_time >= 1000) + result_data[i] += (origin + res / scale_on_time) * scale_on_time; + /// Special case when the arguments are DateTime64 with precision 2. Here origin has right precision and res doesn't + else if (scale_on_time == 100) + result_data[i] += (origin + res * scale_on_time); + /// Cases when precision of DateTime64 is 1, 3, 6, 9 e.g. has right precision in res and origin. + else + result_data[i] += (origin + res); + } + /// Case when Interval has some specific scale (3,6,9) else - result_data[i] += origin + res * scale_diff; + { + /// If we have a time argument that has bigger scale than the interval can contain, we need + /// to return a value with bigger precision and thus we should multiply result on the scale difference. + if (scale_on_interval < scale_on_time) + result_data[i] += origin + res * scale_diff; + /// The other case: interval has bigger scale than the interval or they have the same scale, so res has the right precision and origin doesn't + else + result_data[i] += (origin + res / scale_diff) * scale_diff; + } } } } @@ -419,8 +445,6 @@ private: } }; -} - REGISTER_FUNCTION(ToStartOfInterval) { factory.registerFunction(); From 36fa954f5000c8130f2556abba6052392dd16fa7 Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Tue, 12 Dec 2023 18:19:11 +0100 Subject: [PATCH 027/844] Stly check --- src/Functions/toStartOfInterval.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Functions/toStartOfInterval.cpp b/src/Functions/toStartOfInterval.cpp index f4171748b92..fb071c952ab 100644 --- a/src/Functions/toStartOfInterval.cpp +++ b/src/Functions/toStartOfInterval.cpp @@ -371,7 +371,7 @@ private: { result_data[i] = 0; if (scale_on_interval < scale_on_time) - /// if we have a time argument that has bigger scale than the interval can contain, we need + /// if we have a time argument that has bigger scale than the interval can contain, we need /// to return a value with bigger precision and thus we should multiply result on the scale difference. result_data[i] += static_cast(ToStartOfInterval::execute(time_data[i], num_units, time_zone, scale_on_interval)) * scale_diff; else @@ -429,7 +429,7 @@ private: /// Case when Interval has some specific scale (3,6,9) else { - /// If we have a time argument that has bigger scale than the interval can contain, we need + /// If we have a time argument that has bigger scale than the interval can contain, we need /// to return a value with bigger precision and thus we should multiply result on the scale difference. if (scale_on_interval < scale_on_time) result_data[i] += origin + res * scale_diff; From 202ca21e3f1366479ec4f0d786adeab6e54cc3d4 Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Wed, 13 Dec 2023 15:16:51 +0100 Subject: [PATCH 028/844] fix tests --- src/Functions/toStartOfInterval.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/Functions/toStartOfInterval.cpp b/src/Functions/toStartOfInterval.cpp index fb071c952ab..56e721b7601 100644 --- a/src/Functions/toStartOfInterval.cpp +++ b/src/Functions/toStartOfInterval.cpp @@ -370,12 +370,12 @@ private: for (size_t i = 0; i != size; ++i) { result_data[i] = 0; - if (scale_on_interval < scale_on_time) - /// if we have a time argument that has bigger scale than the interval can contain, we need + if (scale_on_interval < scale_on_time && scale_on_interval != 1) + /// If we have a time argument that has bigger scale than the interval can contain and interval is not default, we need /// to return a value with bigger precision and thus we should multiply result on the scale difference. - result_data[i] += static_cast(ToStartOfInterval::execute(time_data[i], num_units, time_zone, scale_on_interval)) * scale_diff; + result_data[i] += static_cast(ToStartOfInterval::execute(time_data[i], num_units, time_zone, scale_on_time)) * scale_diff; else - result_data[i] = static_cast(ToStartOfInterval::execute(time_data[i], num_units, time_zone, scale_on_time)); + result_data[i] = static_cast(ToStartOfInterval::execute(time_data[i], num_units, time_zone, scale_on_time)); } } else From 174309821a9813dba1d2a090768332e9de9470e3 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Tue, 19 Dec 2023 20:48:30 +0000 Subject: [PATCH 029/844] Small fixups --- src/Functions/toStartOfInterval.cpp | 181 +++++++++++++--------------- 1 file changed, 85 insertions(+), 96 deletions(-) diff --git a/src/Functions/toStartOfInterval.cpp b/src/Functions/toStartOfInterval.cpp index 56e721b7601..81a2fd0a75d 100644 --- a/src/Functions/toStartOfInterval.cpp +++ b/src/Functions/toStartOfInterval.cpp @@ -31,7 +31,7 @@ namespace ErrorCodes class FunctionToStartOfInterval : public IFunction { -public: +private: enum class Overload { Default, /// toStartOfInterval(time, interval) or toStartOfInterval(time, interval, timezone) @@ -39,6 +39,7 @@ public: }; mutable Overload overload; +public: static FunctionPtr create(ContextPtr) { return std::make_shared(); } static constexpr auto name = "toStartOfInterval"; @@ -82,7 +83,9 @@ public: "Illegal type {} of 2nd argument of function {}, expected a time interval", type_arg2->getName(), getName()); - /// Result here is determined for default overload (without origin) + overload = Overload::Default; + + /// Determine result type for default overload (no origin) switch (interval_type->getKind()) // NOLINT(bugprone-switch-missing-default-case) { case IntervalKind::Nanosecond: @@ -110,21 +113,20 @@ public: const DataTypePtr & type_arg3 = arguments[2].type; if (isString(type_arg3)) { - overload = Overload::Default; - if (value_is_date && result_type == ResultType::Date) throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, - "The timezone argument of function {} with interval type {} is allowed only when the 1st argument has the type DateTime or DateTime64", + "A timezone argument of function {} with interval type {} is allowed only when the 1st argument has the type DateTime or DateTime64", getName(), interval_type->getKind().toString()); } - else if (isDateTimeOrDateTime64(type_arg3) || isDate(type_arg3)) + else if (isDate(type_arg3) || isDateTime(type_arg3) || isDateTime64(type_arg3)) { overload = Overload::Origin; - if (isDateTime64(arguments[0].type) && isDateTime64(arguments[2].type)) + const DataTypePtr & type_arg1 = arguments[0].type; + if (isDateTime64(type_arg1) && isDateTime64(type_arg3)) result_type = ResultType::DateTime64; - else if (isDateTime(arguments[0].type) && isDateTime(arguments[2].type)) + else if (isDateTime(type_arg1) && isDateTime(type_arg3)) result_type = ResultType::DateTime; - else if (isDate(arguments[0].type) && isDate(arguments[2].type)) + else if (isDate(type_arg1) && isDate(type_arg3)) result_type = ResultType::Date; else throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Datetime argument and origin argument for function {} must have the same type", getName()); @@ -149,7 +151,7 @@ public: type_arg4->getName(), getName()); if (value_is_date && result_type == ResultType::Date) throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, - "The timezone argument of function {} with interval type {} is allowed only when the 1st argument has the type DateTime or DateTime64", + "A timezone argument of function {} with interval type {} is allowed only when the 1st argument has the type DateTime or DateTime64", getName(), interval_type->getKind().toString()); }; @@ -190,14 +192,14 @@ public: case ResultType::DateTime64: { UInt32 scale = 0; - if (isDate32(arguments[0].type) || isDateTime64(arguments[0].type)) + if (isDateTime64(arguments[0].type)) scale = assert_cast(*arguments[0].type.get()).getScale(); if (interval_type->getKind() == IntervalKind::Nanosecond) - scale = 9 > scale ? 9 : scale; + scale = (9 > scale) ? 9 : scale; else if (interval_type->getKind() == IntervalKind::Microsecond) - scale = 6 > scale ? 6 : scale; + scale = (6 > scale) ? 6 : scale; else if (interval_type->getKind() == IntervalKind::Millisecond) - scale = 3 > scale ? 3 : scale; + scale = (3 > scale) ? 3 : scale; const size_t time_zone_arg_num = (overload == Overload::Default) ? 2 : 3; return std::make_shared(scale, extractTimeZoneNameFromFunctionArguments(arguments, time_zone_arg_num, 0, false)); @@ -213,20 +215,19 @@ public: const auto & interval_column = arguments[1]; ColumnWithTypeAndName origin_column; - const bool has_origin_arg = (arguments.size() == 3 && isDateOrDate32OrDateTimeOrDateTime64(arguments[2].type)) || arguments.size() == 4; - if (has_origin_arg) + if (overload == Overload::Origin) origin_column = arguments[2]; - const size_t time_zone_arg_num = (arguments.size() == 2 || (arguments.size() == 3 && isString(arguments[2].type))) ? 2 : 3; + const size_t time_zone_arg_num = (overload == Overload::Origin) ? 3 : 2; const auto & time_zone = extractTimeZoneFromFunctionArguments(arguments, time_zone_arg_num, 0); - ColumnPtr result_column = nullptr; - if (isDateTime64(result_type)) - result_column = dispatchForTimeColumn(time_column, interval_column, origin_column, result_type, time_zone); + ColumnPtr result_column; + if (isDate(result_type)) + result_column = dispatchForTimeColumn(time_column, interval_column, origin_column, result_type, time_zone); else if (isDateTime(result_type)) result_column = dispatchForTimeColumn(time_column, interval_column, origin_column, result_type, time_zone); - else - result_column = dispatchForTimeColumn(time_column, interval_column, origin_column, result_type, time_zone); + else if (isDateTime64(result_type)) + result_column = dispatchForTimeColumn(time_column, interval_column, origin_column, result_type, time_zone); return result_column; } @@ -238,44 +239,24 @@ private: const auto & time_column_type = *time_column.type.get(); const auto & time_column_col = *time_column.column.get(); - if (isDateTime64(time_column_type)) + if (isDate(time_column_type)) { - if (origin_column.column != nullptr && !isDateTime64(origin_column.type.get())) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Datetime argument and origin argument for function {} must have the same type", getName()); - - const auto * time_column_vec = checkAndGetColumn(time_column_col); - auto scale = assert_cast(time_column_type).getScale(); - - if (time_column_vec) - return dispatchForIntervalColumn(assert_cast(time_column_type), *time_column_vec, interval_column, origin_column, result_type, time_zone, scale); - } - else if (isDateTime(time_column_type)) - { - if (origin_column.column != nullptr && !isDateTime(origin_column.type.get())) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Datetime argument and origin argument for function {} must have the same type", getName()); - - const auto * time_column_vec = checkAndGetColumn(time_column_col); - if (time_column_vec) - return dispatchForIntervalColumn(assert_cast(time_column_type), *time_column_vec, interval_column, origin_column, result_type, time_zone); - } - else if (isDate(time_column_type)) - { - if (origin_column.column != nullptr && !isDate(origin_column.type.get())) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Datetime argument and origin argument for function {} must have the same type", getName()); - const auto * time_column_vec = checkAndGetColumn(time_column_col); if (time_column_vec) return dispatchForIntervalColumn(assert_cast(time_column_type), *time_column_vec, interval_column, origin_column, result_type, time_zone); } - else if (isDate32(time_column_type)) + else if (isDateTime(time_column_type)) { - if (origin_column.column != nullptr) - if (!isDate32(origin_column.type.get())) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Datetime argument and origin argument for function {} must have the same type", getName()); - - const auto * time_column_vec = checkAndGetColumn(time_column_col); + const auto * time_column_vec = checkAndGetColumn(time_column_col); if (time_column_vec) - return dispatchForIntervalColumn(assert_cast(time_column_type), *time_column_vec, interval_column, origin_column, result_type, time_zone); + return dispatchForIntervalColumn(assert_cast(time_column_type), *time_column_vec, interval_column, origin_column, result_type, time_zone); + } + else if (isDateTime64(time_column_type)) + { + const auto * time_column_vec = checkAndGetColumn(time_column_col); + auto scale = assert_cast(time_column_type).getScale(); + if (time_column_vec) + return dispatchForIntervalColumn(assert_cast(time_column_type), *time_column_vec, interval_column, origin_column, result_type, time_zone, scale); } throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal column for 1st argument of function {}, expected a Date, DateTime or DateTime64", getName()); } @@ -327,26 +308,23 @@ private: } template - Int64 decideScaleOnPrecision() const + static Int64 scaleFromInterval() { - static constexpr Int64 MILLISECOND_SCALE = 1000; - static constexpr Int64 MICROSECOND_SCALE = 1000000; - static constexpr Int64 NANOSECOND_SCALE = 1000000000; switch (unit) { case IntervalKind::Millisecond: - return MILLISECOND_SCALE; + return 1'000; case IntervalKind::Microsecond: - return MICROSECOND_SCALE; + return 1'000'000; case IntervalKind::Nanosecond: - return NANOSECOND_SCALE; + return 1'000'000'000; default: return 1; } } template - ColumnPtr execute(const TimeDataType &, const ColumnType & time_column_type, Int64 num_units, const ColumnWithTypeAndName & origin_column, const DataTypePtr & result_type, const DateLUTImpl & time_zone, const UInt16 scale) const + ColumnPtr execute(const TimeDataType &, const ColumnType & time_column_type, Int64 num_units, const ColumnWithTypeAndName & origin_column, const DataTypePtr & result_type, const DateLUTImpl & time_zone, UInt16 scale) const { using ResultColumnType = typename ResultDataType::ColumnType; using ResultFieldType = typename ResultDataType::FieldType; @@ -359,23 +337,29 @@ private: auto & result_data = col_to->getData(); result_data.resize(size); - Int64 scale_on_time = DecimalUtils::scaleMultiplier(scale); // scale that depends on type of arguments - Int64 scale_on_interval = decideScaleOnPrecision(); // scale that depends on the Interval + const Int64 scale_time = DecimalUtils::scaleMultiplier(scale); + const Int64 scale_interval = scaleFromInterval(); + /// In case if we have a difference between time arguments and Interval, we need to calculate the difference between them /// to get the right precision for the result. - Int64 scale_diff = scale_on_interval > scale_on_time ? scale_on_interval / scale_on_time : scale_on_time / scale_on_interval; + const Int64 scale_diff = (scale_interval > scale_time) ? (scale_interval / scale_time) : (scale_time / scale_interval); if (origin_column.column == nullptr) { - for (size_t i = 0; i != size; ++i) + if (scale_time > scale_interval && scale_interval != 1) { - result_data[i] = 0; - if (scale_on_interval < scale_on_time && scale_on_interval != 1) + for (size_t i = 0; i != size; ++i) + { /// If we have a time argument that has bigger scale than the interval can contain and interval is not default, we need /// to return a value with bigger precision and thus we should multiply result on the scale difference. - result_data[i] += static_cast(ToStartOfInterval::execute(time_data[i], num_units, time_zone, scale_on_time)) * scale_diff; - else - result_data[i] = static_cast(ToStartOfInterval::execute(time_data[i], num_units, time_zone, scale_on_time)); + result_data[i] = 0; + result_data[i] += static_cast(ToStartOfInterval::execute(time_data[i], num_units, time_zone, scale_time)) * scale_diff; + } + } + else + { + for (size_t i = 0; i != size; ++i) + result_data[i] = static_cast(ToStartOfInterval::execute(time_data[i], num_units, time_zone, scale_time)); } } else @@ -386,55 +370,60 @@ private: { auto t = time_data[i]; if (origin > static_cast(t)) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "The origin must be before the end date/datetime"); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "The origin must be before the end date / date with time"); + + /// The trick to calculate the interval starting from an offset is to + /// 1. subtract the offset, + /// 2. perform the calculation, and + /// 3. add the offset to the result. t -= origin; - auto res = static_cast(ToStartOfInterval::execute(t, num_units, time_zone, scale_on_time)); + auto res = static_cast(ToStartOfInterval::execute(t, num_units, time_zone, scale_time)); - static constexpr size_t SECONDS_PER_DAY = 86400; + static constexpr size_t SECONDS_PER_DAY = 86'400; result_data[i] = 0; if (unit == IntervalKind::Week || unit == IntervalKind::Month || unit == IntervalKind::Quarter || unit == IntervalKind::Year) { - /// By default, when we use week, month, quarter or year interval, we get date return type. So, simply add values. - if (isDate(result_type) || isDate32(result_type)) + /// For such intervals, ToStartOfInterval::execute() returns days + if (isDate(result_type)) result_data[i] += origin + res; - /// When we use DateTime arguments, we should keep in mind that we also have hours, minutes and seconds there, - /// so we need to multiply result by amount of seconds per day. else if (isDateTime(result_type)) result_data[i] += origin + res * SECONDS_PER_DAY; - /// When we use DateTime64 arguments, we also should multiply it on right scale. - else - result_data[i] += origin + (res * SECONDS_PER_DAY * scale_on_time); + else if (isDateTime64(result_type)) + result_data[i] += origin + (res * SECONDS_PER_DAY * scale_time); } else { - /// In this case result will be calculated as datetime, so we need to get the amount of days if the arguments are Date. - if (isDate(result_type) || isDate32(result_type)) + /// ToStartOfInterval::execute() returns seconds + + if (isDate(result_type)) res = res / SECONDS_PER_DAY; - /// Case when Interval has default scale - if (scale_on_interval == 1) + if (scale_interval == 1) { - /// Case when the arguments are DateTime64 with precision like 4,5,7,8. Here res has right precision and origin doesn't. - if (scale_on_time % 1000 != 0 && scale_on_time >= 1000) - result_data[i] += (origin + res / scale_on_time) * scale_on_time; - /// Special case when the arguments are DateTime64 with precision 2. Here origin has right precision and res doesn't - else if (scale_on_time == 100) - result_data[i] += (origin + res * scale_on_time); - /// Cases when precision of DateTime64 is 1, 3, 6, 9 e.g. has right precision in res and origin. + /// Interval has default scale, i.e. Year - Second + + if (scale_time % 1000 != 0 && scale_time >= 1000) + /// The arguments are DateTime64 with precision like 4,5,7,8. Here res has right precision and origin doesn't. + result_data[i] += (origin + res / scale_time) * scale_time; + else if (scale_time == 100) + /// The arguments are DateTime64 with precision 2. Here origin has right precision and res doesn't + result_data[i] += (origin + res * scale_time); else + /// Precision of DateTime64 is 1, 3, 6, 9, e.g. has right precision in res and origin. result_data[i] += (origin + res); } - /// Case when Interval has some specific scale (3,6,9) else { - /// If we have a time argument that has bigger scale than the interval can contain, we need - /// to return a value with bigger precision and thus we should multiply result on the scale difference. - if (scale_on_interval < scale_on_time) + /// Interval has some specific scale (3,6,9), i.e. Millisecond - Nanosecond + + if (scale_interval < scale_time) + /// If we have a time argument that has bigger scale than the interval can contain, we need + /// to return a value with bigger precision and thus we should multiply result on the scale difference. result_data[i] += origin + res * scale_diff; - /// The other case: interval has bigger scale than the interval or they have the same scale, so res has the right precision and origin doesn't else + /// The other case: interval has bigger scale than the interval or they have the same scale, so res has the right precision and origin doesn't result_data[i] += (origin + res / scale_diff) * scale_diff; } } From 861421d27ac74fe11921c4ba901dcccb94df76e8 Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Fri, 5 Jan 2024 16:03:22 +0000 Subject: [PATCH 030/844] fixes --- src/Functions/DateTimeTransforms.h | 57 ++++++--- src/Functions/toStartOfInterval.cpp | 117 ++++++++++++------ ...to_start_of_interval_with_origin.reference | 32 ++--- ...02916_to_start_of_interval_with_origin.sql | 8 +- 4 files changed, 139 insertions(+), 75 deletions(-) diff --git a/src/Functions/DateTimeTransforms.h b/src/Functions/DateTimeTransforms.h index 74b37e18907..dbe2b11d7b2 100644 --- a/src/Functions/DateTimeTransforms.h +++ b/src/Functions/DateTimeTransforms.h @@ -487,7 +487,7 @@ struct ToStartOfInterval { throwDateTimeIsNotSupported(TO_START_OF_INTERVAL_NAME); } - static Int64 execute(Int64 t, Int64 nanoseconds, const DateLUTImpl &, Int64 scale_multiplier) + static Int64 execute(Int64 t, Int64 nanoseconds, const DateLUTImpl &, Int64 scale_multiplier, Int64 /*origin*/ = 0) { if (scale_multiplier < 1000000000) { @@ -522,7 +522,7 @@ struct ToStartOfInterval { throwDateTimeIsNotSupported(TO_START_OF_INTERVAL_NAME); } - static Int64 execute(Int64 t, Int64 microseconds, const DateLUTImpl &, Int64 scale_multiplier) + static Int64 execute(Int64 t, Int64 microseconds, const DateLUTImpl &, Int64 scale_multiplier, Int64 /*origin*/ = 0) { if (scale_multiplier < 1000000) { @@ -565,7 +565,7 @@ struct ToStartOfInterval { throwDateTimeIsNotSupported(TO_START_OF_INTERVAL_NAME); } - static Int64 execute(Int64 t, Int64 milliseconds, const DateLUTImpl &, Int64 scale_multiplier) + static Int64 execute(Int64 t, Int64 milliseconds, const DateLUTImpl &, Int64 scale_multiplier, Int64 /*origin*/ = 0) { if (scale_multiplier < 1000) { @@ -608,7 +608,7 @@ struct ToStartOfInterval { return time_zone.toStartOfSecondInterval(t, seconds); } - static Int64 execute(Int64 t, Int64 seconds, const DateLUTImpl & time_zone, Int64 scale_multiplier) + static Int64 execute(Int64 t, Int64 seconds, const DateLUTImpl & time_zone, Int64 scale_multiplier, Int64 /*origin*/ = 0) { return time_zone.toStartOfSecondInterval(t / scale_multiplier, seconds); } @@ -629,7 +629,7 @@ struct ToStartOfInterval { return time_zone.toStartOfMinuteInterval(t, minutes); } - static Int64 execute(Int64 t, Int64 minutes, const DateLUTImpl & time_zone, Int64 scale_multiplier) + static Int64 execute(Int64 t, Int64 minutes, const DateLUTImpl & time_zone, Int64 scale_multiplier, Int64 /*origin*/ = 0) { return time_zone.toStartOfMinuteInterval(t / scale_multiplier, minutes); } @@ -650,7 +650,7 @@ struct ToStartOfInterval { return time_zone.toStartOfHourInterval(t, hours); } - static Int64 execute(Int64 t, Int64 hours, const DateLUTImpl & time_zone, Int64 scale_multiplier) + static Int64 execute(Int64 t, Int64 hours, const DateLUTImpl & time_zone, Int64 scale_multiplier, Int64 /*origin*/ = 0) { return time_zone.toStartOfHourInterval(t / scale_multiplier, hours); } @@ -671,7 +671,7 @@ struct ToStartOfInterval { return static_cast(time_zone.toStartOfDayInterval(time_zone.toDayNum(t), days)); } - static Int64 execute(Int64 t, Int64 days, const DateLUTImpl & time_zone, Int64 scale_multiplier) + static Int64 execute(Int64 t, Int64 days, const DateLUTImpl & time_zone, Int64 scale_multiplier, Int64 /*origin*/ = 0) { return time_zone.toStartOfDayInterval(time_zone.toDayNum(t / scale_multiplier), days); } @@ -692,9 +692,12 @@ struct ToStartOfInterval { return time_zone.toStartOfWeekInterval(time_zone.toDayNum(t), weeks); } - static UInt16 execute(Int64 t, Int64 weeks, const DateLUTImpl & time_zone, Int64 scale_multiplier) + static Int64 execute(Int64 t, Int64 weeks, const DateLUTImpl & time_zone, Int64 scale_multiplier, Int64 origin = 0) { - return time_zone.toStartOfWeekInterval(time_zone.toDayNum(t / scale_multiplier), weeks); + if (origin == 0) + return time_zone.toStartOfWeekInterval(time_zone.toDayNum(t / scale_multiplier), weeks); + else + return ToStartOfInterval::execute(t, weeks * 7, time_zone, scale_multiplier, origin); } }; @@ -713,9 +716,24 @@ struct ToStartOfInterval { return time_zone.toStartOfMonthInterval(time_zone.toDayNum(t), months); } - static UInt16 execute(Int64 t, Int64 months, const DateLUTImpl & time_zone, Int64 scale_multiplier) + static Int64 execute(Int64 t, Int64 months, const DateLUTImpl & time_zone, Int64 scale_multiplier, Int64 origin = 0) { - return time_zone.toStartOfMonthInterval(time_zone.toDayNum(t / scale_multiplier), months); + if (origin == 0) + return time_zone.toStartOfMonthInterval(time_zone.toDayNum(t / scale_multiplier), months); + else + { + Int64 days = time_zone.toDayOfMonth(t / scale_multiplier + origin) - time_zone.toDayOfMonth(origin); + Int64 months_to_add = time_zone.toMonth(t / scale_multiplier + origin) - time_zone.toMonth(origin); + Int64 years = time_zone.toYear(t / scale_multiplier + origin) - time_zone.toYear(origin); + months_to_add = days < 0 ? months_to_add - 1 : months_to_add; + months_to_add += years * 12; + Int64 month_multiplier = (months_to_add / months) * months; + Int64 a = 0; + + a = time_zone.addMonths(time_zone.toDate(origin), month_multiplier); + // a += time_zone.toTime(origin); + return a - time_zone.toDate(origin); + } } }; @@ -734,9 +752,12 @@ struct ToStartOfInterval { return time_zone.toStartOfQuarterInterval(time_zone.toDayNum(t), quarters); } - static UInt16 execute(Int64 t, Int64 quarters, const DateLUTImpl & time_zone, Int64 scale_multiplier) + static Int64 execute(Int64 t, Int64 quarters, const DateLUTImpl & time_zone, Int64 scale_multiplier, Int64 origin = 0) { - return time_zone.toStartOfQuarterInterval(time_zone.toDayNum(t / scale_multiplier), quarters); + if (origin == 0) + return time_zone.toStartOfQuarterInterval(time_zone.toDayNum(t / scale_multiplier), quarters); + else + return ToStartOfInterval::execute(t, quarters * 3, time_zone, scale_multiplier, origin); } }; @@ -755,9 +776,15 @@ struct ToStartOfInterval { return time_zone.toStartOfYearInterval(time_zone.toDayNum(t), years); } - static UInt16 execute(Int64 t, Int64 years, const DateLUTImpl & time_zone, Int64 scale_multiplier) + static Int64 execute(Int64 t, Int64 years, const DateLUTImpl & time_zone, Int64 scale_multiplier, Int64 origin = 0) { - return time_zone.toStartOfYearInterval(time_zone.toDayNum(t / scale_multiplier), years); + if (origin == 0) + return time_zone.toStartOfYearInterval(time_zone.toDayNum(t / scale_multiplier), years); + else + { + auto a = ToStartOfInterval::execute(t, years * 12, time_zone, scale_multiplier, origin); + return a; + } } }; diff --git a/src/Functions/toStartOfInterval.cpp b/src/Functions/toStartOfInterval.cpp index 81a2fd0a75d..1ba7fed4bee 100644 --- a/src/Functions/toStartOfInterval.cpp +++ b/src/Functions/toStartOfInterval.cpp @@ -1,17 +1,17 @@ +#include +#include +#include #include #include #include #include -#include "DataTypes/IDataType.h" #include #include #include #include #include -#include #include #include -#include #include #include #include @@ -270,6 +270,27 @@ private: if (!interval_type) throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column for 2nd argument of function {}, must be a time interval", getName()); + if (isDate(time_data_type) || isDateTime(time_data_type)) + { + switch (interval_type->getKind()) // NOLINT(bugprone-switch-missing-default-case) + { + case IntervalKind::Nanosecond: + case IntervalKind::Microsecond: + case IntervalKind::Millisecond: + if (isDate(time_data_type) || isDateTime(time_data_type)) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal interval kind for argument data type {}", isDate(time_data_type) ? "Date" : "DateTime"); + break; + case IntervalKind::Second: + case IntervalKind::Minute: + case IntervalKind::Hour: + if (isDate(time_data_type)) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal interval kind for argument data type Date"); + break; + default: + break; + } + } + const auto * interval_column_const_int64 = checkAndGetColumnConst(interval_column.column.get()); if (!interval_column_const_int64) throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column for 2nd argument of function {}, must be a const time interval", getName()); @@ -337,94 +358,110 @@ private: auto & result_data = col_to->getData(); result_data.resize(size); - const Int64 scale_time = DecimalUtils::scaleMultiplier(scale); + const Int64 scale_endtime = DecimalUtils::scaleMultiplier(scale); const Int64 scale_interval = scaleFromInterval(); /// In case if we have a difference between time arguments and Interval, we need to calculate the difference between them /// to get the right precision for the result. - const Int64 scale_diff = (scale_interval > scale_time) ? (scale_interval / scale_time) : (scale_time / scale_interval); + const Int64 scale_diff = (scale_interval > scale_endtime) ? (scale_interval / scale_endtime) : (scale_endtime / scale_interval); if (origin_column.column == nullptr) { - if (scale_time > scale_interval && scale_interval != 1) + if (scale_endtime > scale_interval && scale_interval != 1) { for (size_t i = 0; i != size; ++i) { /// If we have a time argument that has bigger scale than the interval can contain and interval is not default, we need /// to return a value with bigger precision and thus we should multiply result on the scale difference. result_data[i] = 0; - result_data[i] += static_cast(ToStartOfInterval::execute(time_data[i], num_units, time_zone, scale_time)) * scale_diff; + result_data[i] += static_cast(ToStartOfInterval::execute(time_data[i], num_units, time_zone, scale_endtime)) * scale_diff; } } else { for (size_t i = 0; i != size; ++i) - result_data[i] = static_cast(ToStartOfInterval::execute(time_data[i], num_units, time_zone, scale_time)); + result_data[i] = static_cast(ToStartOfInterval::execute(time_data[i], num_units, time_zone, scale_endtime)); } } else { UInt64 origin = origin_column.column->get64(0); + Int64 origin_scale = 1; + if (isDateTime64(origin_column.type.get())) + origin_scale = assert_cast(*origin_column.type.get()).getScale(); for (size_t i = 0; i != size; ++i) { - auto t = time_data[i]; - if (origin > static_cast(t)) + UInt64 end_time = time_data[i]; + + if (origin > static_cast(end_time) && origin_scale == scale) throw Exception(ErrorCodes::BAD_ARGUMENTS, "The origin must be before the end date / date with time"); + else if (origin_scale > scale) + origin /= static_cast(std::pow(10, origin_scale - scale)); /// If aguments have different scales, we make + else if (origin_scale < scale) /// origin argument to have the same scale as the first argument. + origin *= static_cast(std::pow(10, scale - origin_scale)); /// The trick to calculate the interval starting from an offset is to /// 1. subtract the offset, /// 2. perform the calculation, and /// 3. add the offset to the result. - t -= origin; - auto res = static_cast(ToStartOfInterval::execute(t, num_units, time_zone, scale_time)); - static constexpr size_t SECONDS_PER_DAY = 86'400; - result_data[i] = 0; - if (unit == IntervalKind::Week || unit == IntervalKind::Month || unit == IntervalKind::Quarter || unit == IntervalKind::Year) + + if (isDate(origin_column.type.get())) /// We need to perform calculations on dateTime (dateTime64) values only. { - /// For such intervals, ToStartOfInterval::execute() returns days - if (isDate(result_type)) - result_data[i] += origin + res; - else if (isDateTime(result_type)) - result_data[i] += origin + res * SECONDS_PER_DAY; - else if (isDateTime64(result_type)) - result_data[i] += origin + (res * SECONDS_PER_DAY * scale_time); + end_time *= SECONDS_PER_DAY; + origin *= SECONDS_PER_DAY; + } + + Int64 delta = (end_time - origin) * (isDateTime64(origin_column.type.get()) ? 1 : scale_endtime); /// No need to multiply on scale endtime if we have dateTime64 argument. + Int64 offset = 0; + + { + auto origin_data = isDateTime64(result_type) ? origin / scale_endtime : origin; + offset = static_cast(ToStartOfInterval::execute(delta, num_units, time_zone, scale_endtime, origin_data)); + } + + + if (isDate(result_type)) /// The result should be a date and the calculations were as datetime. + result_data[i] += (origin + offset) / SECONDS_PER_DAY; + else if (unit == IntervalKind::Week || unit == IntervalKind::Month || unit == IntervalKind::Quarter || unit == IntervalKind::Year) + { + if (isDateTime64(result_type)) /// We need to have the right scale for offset, origin already has the right scale. + offset *= scale_endtime; + + result_data[i] += origin + offset; } else { - /// ToStartOfInterval::execute() returns seconds - - if (isDate(result_type)) - res = res / SECONDS_PER_DAY; + /// ToStartOfInterval::execute() returns seconds. if (scale_interval == 1) { - /// Interval has default scale, i.e. Year - Second + if (isDateTime64(result_type)) /// We need to have the right scale for offset, origin already has the right scale. + offset *= scale_endtime; - if (scale_time % 1000 != 0 && scale_time >= 1000) - /// The arguments are DateTime64 with precision like 4,5,7,8. Here res has right precision and origin doesn't. - result_data[i] += (origin + res / scale_time) * scale_time; - else if (scale_time == 100) - /// The arguments are DateTime64 with precision 2. Here origin has right precision and res doesn't - result_data[i] += (origin + res * scale_time); + /// Interval has default scale, i.e. Year - Second. + + if (scale_endtime % 1000 != 0 && scale_endtime >= 1000) + /// The arguments are DateTime64 with precision like 4,5,7,8. Here offset has right precision and origin doesn't. + result_data[i] += (origin + offset / scale_endtime) * scale_endtime; else - /// Precision of DateTime64 is 1, 3, 6, 9, e.g. has right precision in res and origin. - result_data[i] += (origin + res); + /// Precision of DateTime64 is 1, 2, 3, 6, 9, e.g. has right precision in offset and origin. + result_data[i] += (origin + offset); } else { - /// Interval has some specific scale (3,6,9), i.e. Millisecond - Nanosecond + /// Interval has some specific scale (3,6,9), i.e. Millisecond - Nanosecond. - if (scale_interval < scale_time) + if (scale_interval < scale_endtime) /// If we have a time argument that has bigger scale than the interval can contain, we need /// to return a value with bigger precision and thus we should multiply result on the scale difference. - result_data[i] += origin + res * scale_diff; + result_data[i] += origin + offset * scale_diff; else - /// The other case: interval has bigger scale than the interval or they have the same scale, so res has the right precision and origin doesn't - result_data[i] += (origin + res / scale_diff) * scale_diff; + /// The other case: interval has bigger scale than the interval or they have the same scale, so offset has the right precision and origin doesn't. + result_data[i] += (origin + offset / scale_diff) * scale_diff; } } } diff --git a/tests/queries/0_stateless/02916_to_start_of_interval_with_origin.reference b/tests/queries/0_stateless/02916_to_start_of_interval_with_origin.reference index 870853bc371..969e2726902 100644 --- a/tests/queries/0_stateless/02916_to_start_of_interval_with_origin.reference +++ b/tests/queries/0_stateless/02916_to_start_of_interval_with_origin.reference @@ -1,40 +1,40 @@ -- Negative tests -Time and origin as Time +Time and origin as Date 2023-02-01 2023-08-01 -2023-10-09 -2023-10-05 2023-10-08 +2023-10-08 +2023-10-09 Time and origin as DateTime 2023-02-01 09:08:07 2023-08-01 09:08:07 -2023-10-09 09:08:07 -2023-10-05 09:08:07 +2023-10-08 09:08:07 +2023-10-08 09:08:07 2023-10-09 09:08:07 2023-10-09 10:10:07 -2023-10-09 10:11:11 +2023-10-09 10:11:07 2023-10-09 10:11:12 Time and origin as DateTime64(9) 2023-02-01 09:08:07.123456789 2023-08-01 09:08:07.123456789 2023-09-10 09:08:07.123456789 -2023-10-05 09:08:07.123456789 -2023-10-08 09:08:07.123543189 -2023-10-09 09:10:07.123460389 -2023-10-09 10:10:11.123456849 -2023-10-09 10:11:10.123456791 +2023-10-08 09:08:07.123456789 +2023-10-09 09:08:07.123456789 +2023-10-09 10:10:07.123456789 +2023-10-09 10:11:11.123456789 +2023-10-09 10:11:12.123456789 2023-10-09 10:11:12.987456789 2023-10-09 10:11:12.987653789 2023-10-09 10:11:12.987654321 Time and origin as DateTime64(3) 2023-02-01 09:08:07.123 2023-08-01 09:08:07.123 +2023-10-08 09:08:07.123 +2023-10-08 09:08:07.123 2023-10-09 09:08:07.123 -2023-10-05 09:08:07.123 -2023-10-08 09:09:33.523 -2023-10-09 09:10:10.723 -2023-10-09 10:10:11.183 -2023-10-09 10:11:10.125 +2023-10-09 10:10:07.123 +2023-10-09 10:11:11.123 +2023-10-09 10:11:12.123 2023-10-09 10:11:12.987 2023-10-09 10:11:12.987000 2023-10-09 10:11:12.987000000 diff --git a/tests/queries/0_stateless/02916_to_start_of_interval_with_origin.sql b/tests/queries/0_stateless/02916_to_start_of_interval_with_origin.sql index 71f5fb7fb36..4f8a96b093d 100644 --- a/tests/queries/0_stateless/02916_to_start_of_interval_with_origin.sql +++ b/tests/queries/0_stateless/02916_to_start_of_interval_with_origin.sql @@ -23,7 +23,7 @@ SELECT toStartOfInterval(toDateTime('2023-01-02 14:45:50'), toIntervalYear(1), 5 -- too many arguments SELECT toStartOfInterval(toDateTime('2023-01-02 14:45:50'), toIntervalYear(1), toDateTime('2020-01-02 14:44:30'), 'Europe/Amsterdam', 5); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } -SELECT 'Time and origin as Time'; +SELECT 'Time and origin as Date'; SELECT toStartOfInterval(toDate('2023-10-09'), toIntervalYear(1), toDate('2022-02-01')); SELECT toStartOfInterval(toDate('2023-10-09'), toIntervalQuarter(1), toDate('2022-02-01')); SELECT toStartOfInterval(toDate('2023-10-09'), toIntervalMonth(1), toDate('2023-09-08')); @@ -43,8 +43,8 @@ SELECT toStartOfInterval(toDateTime('2023-10-09 10:11:12'), toIntervalMonth(1), SELECT toStartOfInterval(toDateTime('2023-10-09 10:11:12'), toIntervalWeek(1), toDateTime('2023-10-01 09:08:07')); SELECT toStartOfInterval(toDateTime('2023-10-09 10:11:12'), toIntervalDay(1), toDateTime('2023-10-08 09:08:07')); SELECT toStartOfInterval(toDateTime('2023-10-09 10:11:12'), toIntervalHour(1), toDateTime('2023-10-09 09:10:07')); -SELECT toStartOfInterval(toDateTime('2023-10-09 10:11:12'), toIntervalMinute(1), toDateTime('2023-10-09 10:10:11')); -SELECT toStartOfInterval(toDateTime('2023-10-09 10:11:12'), toIntervalSecond(1), toDateTime('2023-10-09 10:11:10')); +SELECT toStartOfInterval(toDateTime('2023-10-09 10:11:12'), toIntervalMinute(1), toDateTime('2023-10-09 09:10:07')); +SELECT toStartOfInterval(toDateTime('2023-10-09 10:11:12'), toIntervalSecond(1), toDateTime('2023-10-09 09:10:07')); SELECT toStartOfInterval(toDateTime('2023-10-09 10:11:12'), toIntervalMillisecond(1), toDateTime('2023-10-09 10:11:12')); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } SELECT toStartOfInterval(toDateTime('2023-10-09 10:11:12'), toIntervalMicrosecond(1), toDateTime('2023-10-09 10:11:12')); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } SELECT toStartOfInterval(toDateTime('2023-10-09 10:11:12'), toIntervalNanosecond(1), toDateTime('2023-10-09 10:11:12')); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } @@ -56,7 +56,7 @@ SELECT toStartOfInterval(toDateTime64('2023-10-09 10:11:12.987654321', 9), toInt SELECT toStartOfInterval(toDateTime64('2023-10-09 10:11:12.987654321', 9), toIntervalWeek(1), toDateTime64('2023-10-01 09:08:07.123456789', 9)); SELECT toStartOfInterval(toDateTime64('2023-10-09 10:11:12.987654321', 9), toIntervalDay(1), toDateTime64('2023-10-08 09:08:07.123456789', 9)); SELECT toStartOfInterval(toDateTime64('2023-10-09 10:11:12.987654321', 9), toIntervalHour(1), toDateTime64('2023-10-09 09:10:07.123456789', 9)); -SELECT toStartOfInterval(toDateTime64('2023-10-09 10:11:12.987654321', 9), toIntervalMinute(1), toDateTime64('2023-10-09 10:10:11.123456789', 9)); +SELECT toStartOfInterval(toDateTime64('2023-10-09 10:11:12.987654321', 9), toIntervalMinute(1), toDateTime64('2023-10-09 09:10:11.123456789', 9)); SELECT toStartOfInterval(toDateTime64('2023-10-09 10:11:12.987654321', 9), toIntervalSecond(1), toDateTime64('2023-10-09 10:11:10.123456789', 9)); SELECT toStartOfInterval(toDateTime64('2023-10-09 10:11:12.987654321', 9), toIntervalMillisecond(1), toDateTime64('2023-10-09 10:11:12.123456789', 9)); SELECT toStartOfInterval(toDateTime64('2023-10-09 10:11:12.987654321', 9), toIntervalMicrosecond(1), toDateTime64('2023-10-09 10:11:12.123456789', 9)); From cb645f82198250b119d734291456afe8dcdde27f Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Fri, 5 Jan 2024 17:18:56 +0100 Subject: [PATCH 031/844] fix style --- src/Functions/toStartOfInterval.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/Functions/toStartOfInterval.cpp b/src/Functions/toStartOfInterval.cpp index 1ba7fed4bee..ffabf38ef20 100644 --- a/src/Functions/toStartOfInterval.cpp +++ b/src/Functions/toStartOfInterval.cpp @@ -397,7 +397,7 @@ private: if (origin > static_cast(end_time) && origin_scale == scale) throw Exception(ErrorCodes::BAD_ARGUMENTS, "The origin must be before the end date / date with time"); else if (origin_scale > scale) - origin /= static_cast(std::pow(10, origin_scale - scale)); /// If aguments have different scales, we make + origin /= static_cast(std::pow(10, origin_scale - scale)); /// If arguments have different scales, we make else if (origin_scale < scale) /// origin argument to have the same scale as the first argument. origin *= static_cast(std::pow(10, scale - origin_scale)); @@ -439,16 +439,16 @@ private: if (scale_interval == 1) { - if (isDateTime64(result_type)) /// We need to have the right scale for offset, origin already has the right scale. + if (isDateTime64(result_type)) /// We need to have the right scale for offset, origin already has the correct scale. offset *= scale_endtime; /// Interval has default scale, i.e. Year - Second. if (scale_endtime % 1000 != 0 && scale_endtime >= 1000) - /// The arguments are DateTime64 with precision like 4,5,7,8. Here offset has right precision and origin doesn't. + /// The arguments are DateTime64 with precision like 4,5,7,8. Here offset has correct precision and origin doesn't. result_data[i] += (origin + offset / scale_endtime) * scale_endtime; else - /// Precision of DateTime64 is 1, 2, 3, 6, 9, e.g. has right precision in offset and origin. + /// Precision of DateTime64 is 1, 2, 3, 6, 9, e.g. has correct precision in offset and origin. result_data[i] += (origin + offset); } else From 1117284be7f72a22ac841af8f1a91ec853adc900 Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Fri, 5 Jan 2024 23:43:58 +0000 Subject: [PATCH 032/844] fix overflow --- src/Functions/DateTimeTransforms.h | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/src/Functions/DateTimeTransforms.h b/src/Functions/DateTimeTransforms.h index dbe2b11d7b2..e9cee9616fb 100644 --- a/src/Functions/DateTimeTransforms.h +++ b/src/Functions/DateTimeTransforms.h @@ -697,7 +697,12 @@ struct ToStartOfInterval if (origin == 0) return time_zone.toStartOfWeekInterval(time_zone.toDayNum(t / scale_multiplier), weeks); else - return ToStartOfInterval::execute(t, weeks * 7, time_zone, scale_multiplier, origin); + { + if (const auto weeks_to_days = weeks * 7; weeks_to_days / 7 == weeks) // Check if multiplication doesn't overflow Int64 value + return ToStartOfInterval::execute(t, weeks_to_days, time_zone, scale_multiplier, origin); + else + throw Exception(ErrorCodes::VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE, "Value {} * 7 is out of bounds for type Int64", weeks); + } } }; @@ -728,11 +733,8 @@ struct ToStartOfInterval months_to_add = days < 0 ? months_to_add - 1 : months_to_add; months_to_add += years * 12; Int64 month_multiplier = (months_to_add / months) * months; - Int64 a = 0; - a = time_zone.addMonths(time_zone.toDate(origin), month_multiplier); - // a += time_zone.toTime(origin); - return a - time_zone.toDate(origin); + return time_zone.addMonths(time_zone.toDate(origin), month_multiplier) - time_zone.toDate(origin); } } }; @@ -757,7 +759,12 @@ struct ToStartOfInterval if (origin == 0) return time_zone.toStartOfQuarterInterval(time_zone.toDayNum(t / scale_multiplier), quarters); else - return ToStartOfInterval::execute(t, quarters * 3, time_zone, scale_multiplier, origin); + { + if (const auto quarters_to_months = quarters * 3; quarters_to_months / 3 == quarters) // Check if multiplication doesn't overflow Int64 value + return ToStartOfInterval::execute(t, quarters_to_months, time_zone, scale_multiplier, origin); + else + throw Exception(ErrorCodes::VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE, "Value {} * 3 is out of bounds for type Int64", quarters); + } } }; @@ -782,8 +789,10 @@ struct ToStartOfInterval return time_zone.toStartOfYearInterval(time_zone.toDayNum(t / scale_multiplier), years); else { - auto a = ToStartOfInterval::execute(t, years * 12, time_zone, scale_multiplier, origin); - return a; + if (const auto years_to_months = years * 12; years_to_months / 12 == years) // Check if multiplication doesn't overflow Int64 value + return ToStartOfInterval::execute(t, years_to_months, time_zone, scale_multiplier, origin); + else + throw Exception(ErrorCodes::VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE, "Value {} * 12 is out of bounds for type Int64", years); } } }; From 07f031ec8a26d0ee7081d725fe59b312101bcae8 Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Mon, 8 Jan 2024 15:34:22 +0100 Subject: [PATCH 033/844] fix fuzzer --- src/Functions/DateTimeTransforms.h | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/src/Functions/DateTimeTransforms.h b/src/Functions/DateTimeTransforms.h index e9cee9616fb..dd843daed8c 100644 --- a/src/Functions/DateTimeTransforms.h +++ b/src/Functions/DateTimeTransforms.h @@ -20,6 +20,7 @@ namespace DB { +static Int64 Int64_max_value = std::numeric_limits::max(); static constexpr auto microsecond_multiplier = 1000000; static constexpr auto millisecond_multiplier = 1000; @@ -698,8 +699,8 @@ struct ToStartOfInterval return time_zone.toStartOfWeekInterval(time_zone.toDayNum(t / scale_multiplier), weeks); else { - if (const auto weeks_to_days = weeks * 7; weeks_to_days / 7 == weeks) // Check if multiplication doesn't overflow Int64 value - return ToStartOfInterval::execute(t, weeks_to_days, time_zone, scale_multiplier, origin); + if (weeks < Int64_max_value / 7) // Check if multiplication doesn't overflow Int64 value + return ToStartOfInterval::execute(t, weeks * 7, time_zone, scale_multiplier, origin); else throw Exception(ErrorCodes::VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE, "Value {} * 7 is out of bounds for type Int64", weeks); } @@ -760,8 +761,8 @@ struct ToStartOfInterval return time_zone.toStartOfQuarterInterval(time_zone.toDayNum(t / scale_multiplier), quarters); else { - if (const auto quarters_to_months = quarters * 3; quarters_to_months / 3 == quarters) // Check if multiplication doesn't overflow Int64 value - return ToStartOfInterval::execute(t, quarters_to_months, time_zone, scale_multiplier, origin); + if (quarters < Int64_max_value / 3) // Check if multiplication doesn't overflow Int64 value + return ToStartOfInterval::execute(t, quarters * 3, time_zone, scale_multiplier, origin); else throw Exception(ErrorCodes::VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE, "Value {} * 3 is out of bounds for type Int64", quarters); } @@ -789,8 +790,8 @@ struct ToStartOfInterval return time_zone.toStartOfYearInterval(time_zone.toDayNum(t / scale_multiplier), years); else { - if (const auto years_to_months = years * 12; years_to_months / 12 == years) // Check if multiplication doesn't overflow Int64 value - return ToStartOfInterval::execute(t, years_to_months, time_zone, scale_multiplier, origin); + if (years < Int64_max_value / 12) // Check if multiplication doesn't overflow Int64 value + return ToStartOfInterval::execute(t, years * 12, time_zone, scale_multiplier, origin); else throw Exception(ErrorCodes::VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE, "Value {} * 12 is out of bounds for type Int64", years); } From 26561c6bdd22085d6fe8537fa37cbf12de573efd Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Fri, 12 Jan 2024 17:47:17 +0100 Subject: [PATCH 034/844] fix due to #58557 --- .../02916_to_start_of_interval_with_origin.reference | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/02916_to_start_of_interval_with_origin.reference b/tests/queries/0_stateless/02916_to_start_of_interval_with_origin.reference index 969e2726902..552323be1a5 100644 --- a/tests/queries/0_stateless/02916_to_start_of_interval_with_origin.reference +++ b/tests/queries/0_stateless/02916_to_start_of_interval_with_origin.reference @@ -24,7 +24,7 @@ Time and origin as DateTime64(9) 2023-10-09 10:11:11.123456789 2023-10-09 10:11:12.123456789 2023-10-09 10:11:12.987456789 -2023-10-09 10:11:12.987653789 +2023-10-09 10:11:12.987654789 2023-10-09 10:11:12.987654321 Time and origin as DateTime64(3) 2023-02-01 09:08:07.123 From 85a35dce28d3a367cf306c2e95edb41a3484a9c8 Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Fri, 12 Jan 2024 17:22:16 +0000 Subject: [PATCH 035/844] fix tests --- .../0_stateless/02207_subseconds_intervals.reference | 6 +++--- .../02956_fix_to_start_of_milli_microsecond.reference | 8 ++++---- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/tests/queries/0_stateless/02207_subseconds_intervals.reference b/tests/queries/0_stateless/02207_subseconds_intervals.reference index b0edbda5e76..6cde773c3c4 100644 --- a/tests/queries/0_stateless/02207_subseconds_intervals.reference +++ b/tests/queries/0_stateless/02207_subseconds_intervals.reference @@ -10,14 +10,14 @@ test intervals - test microseconds 1980-12-12 12:12:12.123456 1980-12-12 12:12:12.123400 -1980-12-12 12:12:12.12345600 -1980-12-12 12:12:12.12345600 +1980-12-12 12:12:12.12345700 +1980-12-12 12:12:12.12345700 1930-12-12 12:12:12.123456 1930-12-12 12:12:12.123400 1930-12-12 12:12:12.12345600 2220-12-12 12:12:12.123456 2220-12-12 12:12:12.123400 -2220-12-12 12:12:12.12345600 +2220-12-12 12:12:12.12345700 - test milliseconds 1980-12-12 12:12:12.123 1980-12-12 12:12:12.120 diff --git a/tests/queries/0_stateless/02956_fix_to_start_of_milli_microsecond.reference b/tests/queries/0_stateless/02956_fix_to_start_of_milli_microsecond.reference index d3a002c4fd4..dff0c2a9585 100644 --- a/tests/queries/0_stateless/02956_fix_to_start_of_milli_microsecond.reference +++ b/tests/queries/0_stateless/02956_fix_to_start_of_milli_microsecond.reference @@ -1,4 +1,4 @@ -2023-10-09 10:11:12.001 -2023-10-09 10:11:12.001 -2023-10-09 10:11:12.000 -2023-10-09 10:11:12.000 +2023-10-09 10:11:12.001000 +2023-10-09 10:11:12.001000 +2023-10-09 10:11:12.000000 +2023-10-09 10:11:12.000000 From 3e2f41f3010ca7b68762518738a35dee0f84f8e0 Mon Sep 17 00:00:00 2001 From: Kirill Nikiforov Date: Wed, 27 Mar 2024 18:35:44 +0300 Subject: [PATCH 036/844] support ATTACH PARTITION `ALL` FROM `TABLE` --- src/Storages/MergeTree/MergeTreeData.cpp | 4 +- src/Storages/StorageMergeTree.cpp | 19 +- src/Storages/StorageReplicatedMergeTree.cpp | 451 +++++++++--------- ...626_replace_partition_from_table.reference | 11 +- .../00626_replace_partition_from_table.sql | 6 +- ...e_partition_from_table_zookeeper.reference | 1 + ..._replace_partition_from_table_zookeeper.sh | 9 + 7 files changed, 273 insertions(+), 228 deletions(-) diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 6cc8063d90a..7cdb18ce0b9 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -4876,7 +4876,7 @@ void MergeTreeData::checkAlterPartitionIsPossible( const auto * partition_ast = command.partition->as(); if (partition_ast && partition_ast->all) { - if (command.type != PartitionCommand::DROP_PARTITION && command.type != PartitionCommand::ATTACH_PARTITION) + if (command.type != PartitionCommand::DROP_PARTITION && command.type != PartitionCommand::ATTACH_PARTITION && (command.type == PartitionCommand::REPLACE_PARTITION && command.replace)) throw DB::Exception(ErrorCodes::SUPPORT_IS_DISABLED, "Only support DROP/DETACH/ATTACH PARTITION ALL currently"); } else @@ -5625,7 +5625,7 @@ String MergeTreeData::getPartitionIDFromQuery(const ASTPtr & ast, ContextPtr loc const auto & partition_ast = ast->as(); if (partition_ast.all) - throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "Only Support DETACH PARTITION ALL currently"); + throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "Only Support DROP/DETACH/ATTACH PARTITION ALL currently"); if (!partition_ast.value) { diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp index 788f250e6a9..821b02b5b5f 100644 --- a/src/Storages/StorageMergeTree.cpp +++ b/src/Storages/StorageMergeTree.cpp @@ -2078,9 +2078,21 @@ void StorageMergeTree::replacePartitionFrom(const StoragePtr & source_table, con ProfileEventsScope profile_events_scope; MergeTreeData & src_data = checkStructureAndGetMergeTreeData(source_table, source_metadata_snapshot, my_metadata_snapshot); - String partition_id = getPartitionIDFromQuery(partition, local_context); + DataPartsVector src_parts; + String partition_id; + bool is_all = partition->as()->all; + if (is_all) + { + if (replace) + throw DB::Exception(ErrorCodes::SUPPORT_IS_DISABLED, "Only support DROP/DETACH/ATTACH PARTITION ALL currently"); + + src_parts = src_data.getVisibleDataPartsVector(local_context); + } else + { + partition_id = getPartitionIDFromQuery(partition, local_context); + src_parts = src_data.getVisibleDataPartsVectorInPartition(local_context, partition_id); + } - DataPartsVector src_parts = src_data.getVisibleDataPartsVectorInPartition(local_context, partition_id); MutableDataPartsVector dst_parts; std::vector dst_parts_locks; @@ -2088,6 +2100,9 @@ void StorageMergeTree::replacePartitionFrom(const StoragePtr & source_table, con for (const DataPartPtr & src_part : src_parts) { + if (is_all) + partition_id = src_part->partition.getID(src_data); + if (!canReplacePartition(src_part)) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Cannot replace partition '{}' because part '{}' has inconsistent granularity with table", diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index ce6735d9176..7d7ab712163 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -7961,232 +7961,247 @@ void StorageReplicatedMergeTree::replacePartitionFrom( ProfileEventsScope profile_events_scope; MergeTreeData & src_data = checkStructureAndGetMergeTreeData(source_table, source_metadata_snapshot, metadata_snapshot); - String partition_id = getPartitionIDFromQuery(partition, query_context); - /// NOTE: Some covered parts may be missing in src_all_parts if corresponding log entries are not executed yet. - DataPartsVector src_all_parts = src_data.getVisibleDataPartsVectorInPartition(query_context, partition_id); - - LOG_DEBUG(log, "Cloning {} parts", src_all_parts.size()); - - static const String TMP_PREFIX = "tmp_replace_from_"; + const String TMP_PREFIX = "tmp_replace_from_"; auto zookeeper = getZooKeeper(); - /// Retry if alter_partition_version changes - for (size_t retry = 0; retry < 1000; ++retry) + std::unordered_set partitions; + if (partition->as()->all) { - DataPartsVector src_parts; - MutableDataPartsVector dst_parts; - std::vector dst_parts_locks; - Strings block_id_paths; - Strings part_checksums; - std::vector ephemeral_locks; - String alter_partition_version_path = zookeeper_path + "/alter_partition_version"; - Coordination::Stat alter_partition_version_stat; - zookeeper->get(alter_partition_version_path, &alter_partition_version_stat); - - /// Firstly, generate last block number and compute drop_range - /// NOTE: Even if we make ATTACH PARTITION instead of REPLACE PARTITION drop_range will not be empty, it will contain a block. - /// So, such case has special meaning, if drop_range contains only one block it means that nothing to drop. - /// TODO why not to add normal DROP_RANGE entry to replication queue if `replace` is true? - MergeTreePartInfo drop_range; - std::optional delimiting_block_lock; - bool partition_was_empty = !getFakePartCoveringAllPartsInPartition(partition_id, drop_range, delimiting_block_lock, true); - if (replace && partition_was_empty) - { - /// Nothing to drop, will just attach new parts - LOG_INFO(log, "Partition {} was empty, REPLACE PARTITION will work as ATTACH PARTITION FROM", drop_range.partition_id); - replace = false; - } - - if (!replace) - { - /// It's ATTACH PARTITION FROM, not REPLACE PARTITION. We have to reset drop range - drop_range = makeDummyDropRangeForMovePartitionOrAttachPartitionFrom(partition_id); - } - - assert(replace == !LogEntry::ReplaceRangeEntry::isMovePartitionOrAttachFrom(drop_range)); - - String drop_range_fake_part_name = getPartNamePossiblyFake(format_version, drop_range); - - std::set replaced_parts; - for (const auto & src_part : src_all_parts) - { - /// We also make some kind of deduplication to avoid duplicated parts in case of ATTACH PARTITION - /// Assume that merges in the partition are quite rare - /// Save deduplication block ids with special prefix replace_partition - - if (!canReplacePartition(src_part)) - throw Exception(ErrorCodes::LOGICAL_ERROR, - "Cannot replace partition '{}' because part '{}" - "' has inconsistent granularity with table", partition_id, src_part->name); - - String hash_hex = src_part->checksums.getTotalChecksumHex(); - const bool is_duplicated_part = replaced_parts.contains(hash_hex); - replaced_parts.insert(hash_hex); - - if (replace) - LOG_INFO(log, "Trying to replace {} with hash_hex {}", src_part->name, hash_hex); - else - LOG_INFO(log, "Trying to attach {} with hash_hex {}", src_part->name, hash_hex); - - String block_id_path = (replace || is_duplicated_part) ? "" : (fs::path(zookeeper_path) / "blocks" / (partition_id + "_replace_from_" + hash_hex)); - - auto lock = allocateBlockNumber(partition_id, zookeeper, block_id_path); - if (!lock) - { - LOG_INFO(log, "Part {} (hash {}) has been already attached", src_part->name, hash_hex); - continue; - } - - UInt64 index = lock->getNumber(); - MergeTreePartInfo dst_part_info(partition_id, index, index, src_part->info.level); - - bool zero_copy_enabled = storage_settings_ptr->allow_remote_fs_zero_copy_replication - || dynamic_cast(source_table.get())->getSettings()->allow_remote_fs_zero_copy_replication; - - IDataPartStorage::ClonePartParams clone_params - { - .copy_instead_of_hardlink = storage_settings_ptr->always_use_copy_instead_of_hardlinks || (zero_copy_enabled && src_part->isStoredOnRemoteDiskWithZeroCopySupport()), - .metadata_version_to_write = metadata_snapshot->getMetadataVersion() - }; - - auto [dst_part, part_lock] = cloneAndLoadDataPart( - src_part, - TMP_PREFIX, - dst_part_info, - metadata_snapshot, - clone_params, - query_context->getReadSettings(), - query_context->getWriteSettings()); - - dst_parts.emplace_back(std::move(dst_part)); - dst_parts_locks.emplace_back(std::move(part_lock)); - src_parts.emplace_back(src_part); - ephemeral_locks.emplace_back(std::move(*lock)); - block_id_paths.emplace_back(block_id_path); - part_checksums.emplace_back(hash_hex); - } - - ReplicatedMergeTreeLogEntryData entry; - { - auto src_table_id = src_data.getStorageID(); - entry.type = ReplicatedMergeTreeLogEntryData::REPLACE_RANGE; - entry.source_replica = replica_name; - entry.create_time = time(nullptr); - entry.replace_range_entry = std::make_shared(); - - auto & entry_replace = *entry.replace_range_entry; - entry_replace.drop_range_part_name = drop_range_fake_part_name; - entry_replace.from_database = src_table_id.database_name; - entry_replace.from_table = src_table_id.table_name; - for (const auto & part : src_parts) - entry_replace.src_part_names.emplace_back(part->name); - for (const auto & part : dst_parts) - entry_replace.new_part_names.emplace_back(part->name); - for (const String & checksum : part_checksums) - entry_replace.part_names_checksums.emplace_back(checksum); - entry_replace.columns_version = -1; - } - if (replace) - { - /// Cancel concurrent inserts in range - clearLockedBlockNumbersInPartition(*zookeeper, drop_range.partition_id, drop_range.min_block, drop_range.max_block); - /// Remove deduplication block_ids of replacing parts - clearBlocksInPartition(*zookeeper, drop_range.partition_id, drop_range.min_block, drop_range.max_block); - } + throw DB::Exception(ErrorCodes::SUPPORT_IS_DISABLED, "Only support DROP/DETACH/ATTACH PARTITION ALL currently"); - Coordination::Responses op_results; - DataPartsVector parts_holder; - - try - { - Coordination::Requests ops; - for (size_t i = 0; i < dst_parts.size(); ++i) - { - getCommitPartOps(ops, dst_parts[i], block_id_paths[i]); - ephemeral_locks[i].getUnlockOp(ops); - } - - if (auto txn = query_context->getZooKeeperMetadataTransaction()) - txn->moveOpsTo(ops); - - delimiting_block_lock->getUnlockOp(ops); - /// Check and update version to avoid race with DROP_RANGE - ops.emplace_back(zkutil::makeSetRequest(alter_partition_version_path, "", alter_partition_version_stat.version)); - /// Just update version, because merges assignment relies on it - ops.emplace_back(zkutil::makeSetRequest(fs::path(zookeeper_path) / "log", "", -1)); - ops.emplace_back(zkutil::makeCreateRequest(fs::path(zookeeper_path) / "log/log-", entry.toString(), zkutil::CreateMode::PersistentSequential)); - - Transaction transaction(*this, NO_TRANSACTION_RAW); - { - auto data_parts_lock = lockParts(); - for (auto & part : dst_parts) - renameTempPartAndReplaceUnlocked(part, transaction, data_parts_lock); - } - - for (const auto & dst_part : dst_parts) - lockSharedData(*dst_part, false, /*hardlinked_files*/ {}); - - Coordination::Error code = zookeeper->tryMulti(ops, op_results); - if (code == Coordination::Error::ZOK) - delimiting_block_lock->assumeUnlocked(); - else if (code == Coordination::Error::ZBADVERSION) - { - /// Cannot retry automatically, because some zookeeper ops were lost on the first attempt. Will retry on DDLWorker-level. - if (query_context->getZooKeeperMetadataTransaction()) - throw Exception(ErrorCodes::CANNOT_ASSIGN_ALTER, - "Cannot execute alter, because alter partition version was suddenly changed due " - "to concurrent alter"); - continue; - } - else - zkutil::KeeperMultiException::check(code, ops, op_results); - - { - auto data_parts_lock = lockParts(); - transaction.commit(&data_parts_lock); - if (replace) - { - parts_holder = getDataPartsVectorInPartitionForInternalUsage(MergeTreeDataPartState::Active, drop_range.partition_id, &data_parts_lock); - /// We ignore the list of parts returned from the function below. We will remove them from zk when executing REPLACE_RANGE - removePartsInRangeFromWorkingSetAndGetPartsToRemoveFromZooKeeper(NO_TRANSACTION_RAW, drop_range, data_parts_lock); - } - } - - PartLog::addNewParts(getContext(), PartLog::createPartLogEntries(dst_parts, watch.elapsed(), profile_events_scope.getSnapshot())); - } - catch (...) - { - PartLog::addNewParts(getContext(), PartLog::createPartLogEntries(dst_parts, watch.elapsed()), ExecutionStatus::fromCurrentException("", true)); - for (const auto & dst_part : dst_parts) - unlockSharedData(*dst_part); - - throw; - } - - String log_znode_path = dynamic_cast(*op_results.back()).path_created; - entry.znode_name = log_znode_path.substr(log_znode_path.find_last_of('/') + 1); - - for (auto & lock : ephemeral_locks) - lock.assumeUnlocked(); - - lock2.reset(); - lock1.reset(); - - /// We need to pull the DROP_RANGE before cleaning the replaced parts (otherwise CHeckThread may decide that parts are lost) - queue.pullLogsToQueue(getZooKeeperAndAssertNotReadonly(), {}, ReplicatedMergeTreeQueue::SYNC); - parts_holder.clear(); - cleanup_thread.wakeup(); - - - waitForLogEntryToBeProcessedIfNecessary(entry, query_context); - - return; + partitions = src_data.getAllPartitionIds(); + LOG_INFO(log, "Will try to attach {} partitions", partitions.size()); + } else + { + partitions = std::unordered_set(); + partitions.emplace(getPartitionIDFromQuery(partition, query_context)); } - throw Exception( - ErrorCodes::CANNOT_ASSIGN_ALTER, "Cannot assign ALTER PARTITION, because another ALTER PARTITION query was concurrently executed"); + for (const auto & partition_id : partitions) { + auto src_all_parts = src_data.getVisibleDataPartsVectorInPartition(query_context, partition_id); + LOG_DEBUG(log, "Cloning {} parts from partition '{}'", src_all_parts.size(), partition_id); + + auto ok = false; + /// Retry if alter_partition_version changes + for (size_t retry = 0; retry < 1000; ++retry) + { + DataPartsVector src_parts; + MutableDataPartsVector dst_parts; + std::vector dst_parts_locks; + Strings block_id_paths; + Strings part_checksums; + std::vector ephemeral_locks; + String alter_partition_version_path = zookeeper_path + "/alter_partition_version"; + Coordination::Stat alter_partition_version_stat; + zookeeper->get(alter_partition_version_path, &alter_partition_version_stat); + + /// Firstly, generate last block number and compute drop_range + /// NOTE: Even if we make ATTACH PARTITION instead of REPLACE PARTITION drop_range will not be empty, it will contain a block. + /// So, such case has special meaning, if drop_range contains only one block it means that nothing to drop. + /// TODO why not to add normal DROP_RANGE entry to replication queue if `replace` is true? + MergeTreePartInfo drop_range; + std::optional delimiting_block_lock; + bool partition_was_empty = !getFakePartCoveringAllPartsInPartition(partition_id, drop_range, delimiting_block_lock, true); + if (replace && partition_was_empty) + { + /// Nothing to drop, will just attach new parts + LOG_INFO(log, "Partition {} was empty, REPLACE PARTITION will work as ATTACH PARTITION FROM", drop_range.partition_id); + replace = false; + } + + if (!replace) + { + /// It's ATTACH PARTITION FROM, not REPLACE PARTITION. We have to reset drop range + drop_range = makeDummyDropRangeForMovePartitionOrAttachPartitionFrom(partition_id); + } + + assert(replace == !LogEntry::ReplaceRangeEntry::isMovePartitionOrAttachFrom(drop_range)); + + String drop_range_fake_part_name = getPartNamePossiblyFake(format_version, drop_range); + + std::set replaced_parts; + for (const auto & src_part : src_all_parts) + { + /// We also make some kind of deduplication to avoid duplicated parts in case of ATTACH PARTITION + /// Assume that merges in the partition are quite rare + /// Save deduplication block ids with special prefix replace_partition + + if (!canReplacePartition(src_part)) + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Cannot replace partition '{}' because part '{}" + "' has inconsistent granularity with table", partition_id, src_part->name); + + String hash_hex = src_part->checksums.getTotalChecksumHex(); + const bool is_duplicated_part = replaced_parts.contains(hash_hex); + replaced_parts.insert(hash_hex); + + if (replace) + LOG_INFO(log, "Trying to replace '{}' with hash_hex '{}'", src_part->name, hash_hex); + else + LOG_INFO(log, "Trying to attach '{}' with hash_hex '{}'", src_part->name, hash_hex); + + String block_id_path = (replace || is_duplicated_part) ? "" : (fs::path(zookeeper_path) / "blocks" / (partition_id + "_replace_from_" + hash_hex)); + + auto lock = allocateBlockNumber(partition_id, zookeeper, block_id_path); + if (!lock) + { + LOG_INFO(log, "Part '{}' (hash '{}') in partition '{}' has been already attached", src_part->name, hash_hex, partition_id); + continue; + } + + UInt64 index = lock->getNumber(); + MergeTreePartInfo dst_part_info(partition_id, index, index, src_part->info.level); + + bool zero_copy_enabled = storage_settings_ptr->allow_remote_fs_zero_copy_replication + || dynamic_cast(source_table.get())->getSettings()->allow_remote_fs_zero_copy_replication; + + IDataPartStorage::ClonePartParams clone_params + { + .copy_instead_of_hardlink = storage_settings_ptr->always_use_copy_instead_of_hardlinks || (zero_copy_enabled && src_part->isStoredOnRemoteDiskWithZeroCopySupport()), + .metadata_version_to_write = metadata_snapshot->getMetadataVersion() + }; + + auto [dst_part, part_lock] = cloneAndLoadDataPart( + src_part, + TMP_PREFIX, + dst_part_info, + metadata_snapshot, + clone_params, + query_context->getReadSettings(), + query_context->getWriteSettings()); + + dst_parts.emplace_back(std::move(dst_part)); + dst_parts_locks.emplace_back(std::move(part_lock)); + src_parts.emplace_back(src_part); + ephemeral_locks.emplace_back(std::move(*lock)); + block_id_paths.emplace_back(block_id_path); + part_checksums.emplace_back(hash_hex); + } + + ReplicatedMergeTreeLogEntryData entry; + { + auto src_table_id = src_data.getStorageID(); + entry.type = ReplicatedMergeTreeLogEntryData::REPLACE_RANGE; + entry.source_replica = replica_name; + entry.create_time = time(nullptr); + entry.replace_range_entry = std::make_shared(); + + auto & entry_replace = *entry.replace_range_entry; + entry_replace.drop_range_part_name = drop_range_fake_part_name; + entry_replace.from_database = src_table_id.database_name; + entry_replace.from_table = src_table_id.table_name; + for (const auto & part : src_parts) + entry_replace.src_part_names.emplace_back(part->name); + for (const auto & part : dst_parts) + entry_replace.new_part_names.emplace_back(part->name); + for (const String & checksum : part_checksums) + entry_replace.part_names_checksums.emplace_back(checksum); + entry_replace.columns_version = -1; + } + + if (replace) + { + /// Cancel concurrent inserts in range + clearLockedBlockNumbersInPartition(*zookeeper, drop_range.partition_id, drop_range.min_block, drop_range.max_block); + /// Remove deduplication block_ids of replacing parts + clearBlocksInPartition(*zookeeper, drop_range.partition_id, drop_range.min_block, drop_range.max_block); + } + + Coordination::Responses op_results; + DataPartsVector parts_holder; + + try + { + Coordination::Requests ops; + for (size_t i = 0; i < dst_parts.size(); ++i) + { + getCommitPartOps(ops, dst_parts[i], block_id_paths[i]); + ephemeral_locks[i].getUnlockOp(ops); + } + + if (auto txn = query_context->getZooKeeperMetadataTransaction()) + txn->moveOpsTo(ops); + + delimiting_block_lock->getUnlockOp(ops); + /// Check and update version to avoid race with DROP_RANGE + ops.emplace_back(zkutil::makeSetRequest(alter_partition_version_path, "", alter_partition_version_stat.version)); + /// Just update version, because merges assignment relies on it + ops.emplace_back(zkutil::makeSetRequest(fs::path(zookeeper_path) / "log", "", -1)); + ops.emplace_back(zkutil::makeCreateRequest(fs::path(zookeeper_path) / "log/log-", entry.toString(), zkutil::CreateMode::PersistentSequential)); + + Transaction transaction(*this, NO_TRANSACTION_RAW); + { + auto data_parts_lock = lockParts(); + for (auto & part : dst_parts) + renameTempPartAndReplaceUnlocked(part, transaction, data_parts_lock); + } + + for (const auto & dst_part : dst_parts) + lockSharedData(*dst_part, false, /*hardlinked_files*/ {}); + + Coordination::Error code = zookeeper->tryMulti(ops, op_results); + if (code == Coordination::Error::ZOK) + delimiting_block_lock->assumeUnlocked(); + else if (code == Coordination::Error::ZBADVERSION) + { + /// Cannot retry automatically, because some zookeeper ops were lost on the first attempt. Will retry on DDLWorker-level. + if (query_context->getZooKeeperMetadataTransaction()) + throw Exception(ErrorCodes::CANNOT_ASSIGN_ALTER, + "Cannot execute alter on partition '{}', because alter partition version was suddenly changed due " + "to concurrent alter", partition_id); + continue; + } + else + zkutil::KeeperMultiException::check(code, ops, op_results); + + { + auto data_parts_lock = lockParts(); + transaction.commit(&data_parts_lock); + if (replace) + { + parts_holder = getDataPartsVectorInPartitionForInternalUsage(MergeTreeDataPartState::Active, drop_range.partition_id, &data_parts_lock); + /// We ignore the list of parts returned from the function below. We will remove them from zk when executing REPLACE_RANGE + removePartsInRangeFromWorkingSetAndGetPartsToRemoveFromZooKeeper(NO_TRANSACTION_RAW, drop_range, data_parts_lock); + } + } + + PartLog::addNewParts(getContext(), PartLog::createPartLogEntries(dst_parts, watch.elapsed(), profile_events_scope.getSnapshot())); + } + catch (...) + { + PartLog::addNewParts(getContext(), PartLog::createPartLogEntries(dst_parts, watch.elapsed()), ExecutionStatus::fromCurrentException("", true)); + for (const auto & dst_part : dst_parts) + unlockSharedData(*dst_part); + + throw; + } + + String log_znode_path = dynamic_cast(*op_results.back()).path_created; + entry.znode_name = log_znode_path.substr(log_znode_path.find_last_of('/') + 1); + + for (auto & lock : ephemeral_locks) + lock.assumeUnlocked(); + + /// We need to pull the DROP_RANGE before cleaning the replaced parts (otherwise CHeckThread may decide that parts are lost) + queue.pullLogsToQueue(getZooKeeperAndAssertNotReadonly(), {}, ReplicatedMergeTreeQueue::SYNC); + parts_holder.clear(); + cleanup_thread.wakeup(); + + waitForLogEntryToBeProcessedIfNecessary(entry, query_context); + + ok = true; + break; + } + + if (!ok) + throw Exception( + ErrorCodes::CANNOT_ASSIGN_ALTER, "Cannot assign ALTER PARTITION '{}', because another ALTER PARTITION query was concurrently executed", partition_id); + } + + lock2.reset(); + lock1.reset(); } void StorageReplicatedMergeTree::movePartitionToTable(const StoragePtr & dest_table, const ASTPtr & partition, ContextPtr query_context) diff --git a/tests/queries/0_stateless/00626_replace_partition_from_table.reference b/tests/queries/0_stateless/00626_replace_partition_from_table.reference index 611f3a93ced..0f8ded245d0 100644 --- a/tests/queries/0_stateless/00626_replace_partition_from_table.reference +++ b/tests/queries/0_stateless/00626_replace_partition_from_table.reference @@ -10,11 +10,12 @@ REPLACE recursive 4 8 1 ATTACH FROM -5 8 +6 8 +10 12 OPTIMIZE -5 8 5 -5 8 3 +10 12 9 +10 12 5 After restart -5 8 +10 12 DETACH+ATTACH PARTITION -3 4 +7 7 diff --git a/tests/queries/0_stateless/00626_replace_partition_from_table.sql b/tests/queries/0_stateless/00626_replace_partition_from_table.sql index 7224224334e..3f712f48c06 100644 --- a/tests/queries/0_stateless/00626_replace_partition_from_table.sql +++ b/tests/queries/0_stateless/00626_replace_partition_from_table.sql @@ -53,12 +53,16 @@ DROP TABLE src; CREATE TABLE src (p UInt64, k String, d UInt64) ENGINE = MergeTree PARTITION BY p ORDER BY k; INSERT INTO src VALUES (1, '0', 1); INSERT INTO src VALUES (1, '1', 1); +INSERT INTO src VALUES (2, '2', 1); +INSERT INTO src VALUES (3, '3', 1); SYSTEM STOP MERGES dst; -INSERT INTO dst VALUES (1, '1', 2); +INSERT INTO dst VALUES (1, '1', 2), (1, '2', 0); ALTER TABLE dst ATTACH PARTITION 1 FROM src; SELECT count(), sum(d) FROM dst; +ALTER TABLE dst ATTACH PARTITION ALL FROM src; +SELECT count(), sum(d) FROM dst; SELECT 'OPTIMIZE'; SELECT count(), sum(d), uniqExact(_part) FROM dst; diff --git a/tests/queries/0_stateless/00626_replace_partition_from_table_zookeeper.reference b/tests/queries/0_stateless/00626_replace_partition_from_table_zookeeper.reference index c6208941ac6..6a7c3478f86 100644 --- a/tests/queries/0_stateless/00626_replace_partition_from_table_zookeeper.reference +++ b/tests/queries/0_stateless/00626_replace_partition_from_table_zookeeper.reference @@ -16,6 +16,7 @@ REPLACE recursive ATTACH FROM 5 8 5 8 +7 12 REPLACE with fetch 4 6 4 6 diff --git a/tests/queries/0_stateless/00626_replace_partition_from_table_zookeeper.sh b/tests/queries/0_stateless/00626_replace_partition_from_table_zookeeper.sh index ffbf4df4ba7..49976df83b7 100755 --- a/tests/queries/0_stateless/00626_replace_partition_from_table_zookeeper.sh +++ b/tests/queries/0_stateless/00626_replace_partition_from_table_zookeeper.sh @@ -82,6 +82,8 @@ $CLICKHOUSE_CLIENT --query="DROP TABLE src;" $CLICKHOUSE_CLIENT --query="CREATE TABLE src (p UInt64, k String, d UInt64) ENGINE = MergeTree PARTITION BY p ORDER BY k;" $CLICKHOUSE_CLIENT --query="INSERT INTO src VALUES (1, '0', 1);" $CLICKHOUSE_CLIENT --query="INSERT INTO src VALUES (1, '1', 1);" +$CLICKHOUSE_CLIENT --query="INSERT INTO src VALUES (3, '1', 2);" +$CLICKHOUSE_CLIENT --query="INSERT INTO src VALUES (4, '1', 2);" $CLICKHOUSE_CLIENT --query="INSERT INTO dst_r2 VALUES (1, '1', 2);" query_with_retry "ALTER TABLE dst_r2 ATTACH PARTITION 1 FROM src;" @@ -90,6 +92,13 @@ $CLICKHOUSE_CLIENT --query="SYSTEM SYNC REPLICA dst_r1;" $CLICKHOUSE_CLIENT --query="SELECT count(), sum(d) FROM dst_r1;" $CLICKHOUSE_CLIENT --query="SELECT count(), sum(d) FROM dst_r2;" +query_with_retry "ALTER TABLE dst_r2 ATTACH PARTITION ALL FROM src;" +$CLICKHOUSE_CLIENT --query="SYSTEM SYNC REPLICA dst_r2;" +$CLICKHOUSE_CLIENT --query="SELECT count(), sum(d) FROM dst_r2;" +query_with_retry "ALTER TABLE dst_r2 DROP PARTITION 3;" +$CLICKHOUSE_CLIENT --query="SYSTEM SYNC REPLICA dst_r2;" +query_with_retry "ALTER TABLE dst_r2 DROP PARTITION 4;" +$CLICKHOUSE_CLIENT --query="SYSTEM SYNC REPLICA dst_r2;" $CLICKHOUSE_CLIENT --query="SELECT 'REPLACE with fetch';" $CLICKHOUSE_CLIENT --query="DROP TABLE src;" From 0fbf612e150d83d9d0dfb7b157ea4cef6127c86a Mon Sep 17 00:00:00 2001 From: Kirill Nikiforov Date: Wed, 27 Mar 2024 18:41:24 +0300 Subject: [PATCH 037/844] support ATTACH PARTITION `ALL` FROM `TABLE` --- src/Storages/StorageReplicatedMergeTree.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 7d7ab712163..5434a8bd63e 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -7962,7 +7962,7 @@ void StorageReplicatedMergeTree::replacePartitionFrom( MergeTreeData & src_data = checkStructureAndGetMergeTreeData(source_table, source_metadata_snapshot, metadata_snapshot); - const String TMP_PREFIX = "tmp_replace_from_"; + static const String TMP_PREFIX = "tmp_replace_from_"; auto zookeeper = getZooKeeper(); std::unordered_set partitions; From 9c5e094289df6f0c0063819b68ae96a23f032d5e Mon Sep 17 00:00:00 2001 From: Kirill Nikiforov Date: Wed, 27 Mar 2024 19:57:41 +0300 Subject: [PATCH 038/844] fix cs --- src/Storages/StorageReplicatedMergeTree.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 5434a8bd63e..b4ffd5eff0a 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -7979,7 +7979,8 @@ void StorageReplicatedMergeTree::replacePartitionFrom( partitions.emplace(getPartitionIDFromQuery(partition, query_context)); } - for (const auto & partition_id : partitions) { + for (const auto & partition_id : partitions) + { auto src_all_parts = src_data.getVisibleDataPartsVectorInPartition(query_context, partition_id); LOG_DEBUG(log, "Cloning {} parts from partition '{}'", src_all_parts.size(), partition_id); From 1d4aa10099fec19f2b60f9094a9f9c004ab06421 Mon Sep 17 00:00:00 2001 From: Kirill Nikiforov Date: Sun, 31 Mar 2024 00:09:40 +0300 Subject: [PATCH 039/844] fix cs --- src/Storages/StorageMergeTree.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp index 821b02b5b5f..04115cf0ede 100644 --- a/src/Storages/StorageMergeTree.cpp +++ b/src/Storages/StorageMergeTree.cpp @@ -2087,7 +2087,8 @@ void StorageMergeTree::replacePartitionFrom(const StoragePtr & source_table, con throw DB::Exception(ErrorCodes::SUPPORT_IS_DISABLED, "Only support DROP/DETACH/ATTACH PARTITION ALL currently"); src_parts = src_data.getVisibleDataPartsVector(local_context); - } else + } + else { partition_id = getPartitionIDFromQuery(partition, local_context); src_parts = src_data.getVisibleDataPartsVectorInPartition(local_context, partition_id); From 9aa0fa11f843bf82432f93d497ec9a68dc756db6 Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Wed, 10 Apr 2024 14:01:53 +0200 Subject: [PATCH 040/844] IntervalKind -> IntervalKind::Kind --- src/Functions/DateTimeTransforms.h | 6 +-- src/Functions/toStartOfInterval.cpp | 70 ++++++++++++++--------------- 2 files changed, 38 insertions(+), 38 deletions(-) diff --git a/src/Functions/DateTimeTransforms.h b/src/Functions/DateTimeTransforms.h index 8d70dbea685..20dc1bc21f2 100644 --- a/src/Functions/DateTimeTransforms.h +++ b/src/Functions/DateTimeTransforms.h @@ -703,7 +703,7 @@ struct ToStartOfInterval else { if (weeks < Int64_max_value / 7) // Check if multiplication doesn't overflow Int64 value - return ToStartOfInterval::execute(t, weeks * 7, time_zone, scale_multiplier, origin); + return ToStartOfInterval::execute(t, weeks * 7, time_zone, scale_multiplier, origin); else throw Exception(ErrorCodes::VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE, "Value {} * 7 is out of bounds for type Int64", weeks); } @@ -765,7 +765,7 @@ struct ToStartOfInterval else { if (quarters < Int64_max_value / 3) // Check if multiplication doesn't overflow Int64 value - return ToStartOfInterval::execute(t, quarters * 3, time_zone, scale_multiplier, origin); + return ToStartOfInterval::execute(t, quarters * 3, time_zone, scale_multiplier, origin); else throw Exception(ErrorCodes::VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE, "Value {} * 3 is out of bounds for type Int64", quarters); } @@ -794,7 +794,7 @@ struct ToStartOfInterval else { if (years < Int64_max_value / 12) // Check if multiplication doesn't overflow Int64 value - return ToStartOfInterval::execute(t, years * 12, time_zone, scale_multiplier, origin); + return ToStartOfInterval::execute(t, years * 12, time_zone, scale_multiplier, origin); else throw Exception(ErrorCodes::VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE, "Value {} * 12 is out of bounds for type Int64", years); } diff --git a/src/Functions/toStartOfInterval.cpp b/src/Functions/toStartOfInterval.cpp index bdf947977b6..ab8dfef58ca 100644 --- a/src/Functions/toStartOfInterval.cpp +++ b/src/Functions/toStartOfInterval.cpp @@ -192,11 +192,11 @@ public: UInt32 scale = 0; if (isDateTime64(arguments[0].type)) scale = assert_cast(*arguments[0].type.get()).getScale(); - if (interval_type->getKind() == IntervalKind::Nanosecond) + if (interval_type->getKind() == IntervalKind::Kind::Nanosecond) scale = (9 > scale) ? 9 : scale; - else if (interval_type->getKind() == IntervalKind::Microsecond) + else if (interval_type->getKind() == IntervalKind::Kind::Microsecond) scale = (6 > scale) ? 6 : scale; - else if (interval_type->getKind() == IntervalKind::Millisecond) + else if (interval_type->getKind() == IntervalKind::Kind::Millisecond) scale = (3 > scale) ? 3 : scale; const size_t time_zone_arg_num = (overload == Overload::Default) ? 2 : 3; @@ -272,15 +272,15 @@ private: { switch (interval_type->getKind()) // NOLINT(bugprone-switch-missing-default-case) { - case IntervalKind::Nanosecond: - case IntervalKind::Microsecond: - case IntervalKind::Millisecond: + case IntervalKind::Kind::Nanosecond: + case IntervalKind::Kind::Microsecond: + case IntervalKind::Kind::Millisecond: if (isDate(time_data_type) || isDateTime(time_data_type)) throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal interval kind for argument data type {}", isDate(time_data_type) ? "Date" : "DateTime"); break; - case IntervalKind::Second: - case IntervalKind::Minute: - case IntervalKind::Hour: + case IntervalKind::Kind::Second: + case IntervalKind::Kind::Minute: + case IntervalKind::Kind::Hour: if (isDate(time_data_type)) throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal interval kind for argument data type Date"); break; @@ -299,28 +299,28 @@ private: switch (interval_type->getKind()) // NOLINT(bugprone-switch-missing-default-case) { - case IntervalKind::Nanosecond: - return execute(time_data_type, time_column, num_units, origin_column, result_type, time_zone, scale); - case IntervalKind::Microsecond: - return execute(time_data_type, time_column, num_units, origin_column, result_type, time_zone, scale); - case IntervalKind::Millisecond: - return execute(time_data_type, time_column, num_units, origin_column, result_type, time_zone, scale); - case IntervalKind::Second: - return execute(time_data_type, time_column, num_units, origin_column, result_type, time_zone, scale); - case IntervalKind::Minute: - return execute(time_data_type, time_column, num_units, origin_column, result_type, time_zone, scale); - case IntervalKind::Hour: - return execute(time_data_type, time_column, num_units, origin_column, result_type, time_zone, scale); - case IntervalKind::Day: - return execute(time_data_type, time_column, num_units, origin_column, result_type, time_zone, scale); - case IntervalKind::Week: - return execute(time_data_type, time_column, num_units, origin_column, result_type, time_zone, scale); - case IntervalKind::Month: - return execute(time_data_type, time_column, num_units, origin_column, result_type, time_zone, scale); - case IntervalKind::Quarter: - return execute(time_data_type, time_column, num_units, origin_column, result_type, time_zone, scale); - case IntervalKind::Year: - return execute(time_data_type, time_column, num_units, origin_column, result_type, time_zone, scale); + case IntervalKind::Kind::Nanosecond: + return execute(time_data_type, time_column, num_units, origin_column, result_type, time_zone, scale); + case IntervalKind::Kind::Microsecond: + return execute(time_data_type, time_column, num_units, origin_column, result_type, time_zone, scale); + case IntervalKind::Kind::Millisecond: + return execute(time_data_type, time_column, num_units, origin_column, result_type, time_zone, scale); + case IntervalKind::Kind::Second: + return execute(time_data_type, time_column, num_units, origin_column, result_type, time_zone, scale); + case IntervalKind::Kind::Minute: + return execute(time_data_type, time_column, num_units, origin_column, result_type, time_zone, scale); + case IntervalKind::Kind::Hour: + return execute(time_data_type, time_column, num_units, origin_column, result_type, time_zone, scale); + case IntervalKind::Kind::Day: + return execute(time_data_type, time_column, num_units, origin_column, result_type, time_zone, scale); + case IntervalKind::Kind::Week: + return execute(time_data_type, time_column, num_units, origin_column, result_type, time_zone, scale); + case IntervalKind::Kind::Month: + return execute(time_data_type, time_column, num_units, origin_column, result_type, time_zone, scale); + case IntervalKind::Kind::Quarter: + return execute(time_data_type, time_column, num_units, origin_column, result_type, time_zone, scale); + case IntervalKind::Kind::Year: + return execute(time_data_type, time_column, num_units, origin_column, result_type, time_zone, scale); } std::unreachable(); @@ -331,11 +331,11 @@ private: { switch (unit) { - case IntervalKind::Millisecond: + case IntervalKind::Kind::Millisecond: return 1'000; - case IntervalKind::Microsecond: + case IntervalKind::Kind::Microsecond: return 1'000'000; - case IntervalKind::Nanosecond: + case IntervalKind::Kind::Nanosecond: return 1'000'000'000; default: return 1; @@ -424,7 +424,7 @@ private: if (isDate(result_type)) /// The result should be a date and the calculations were as datetime. result_data[i] += (origin + offset) / SECONDS_PER_DAY; - else if (unit == IntervalKind::Week || unit == IntervalKind::Month || unit == IntervalKind::Quarter || unit == IntervalKind::Year) + else if (unit == IntervalKind::Kind::Week || unit == IntervalKind::Kind::Month || unit == IntervalKind::Kind::Quarter || unit == IntervalKind::Kind::Year) { if (isDateTime64(result_type)) /// We need to have the right scale for offset, origin already has the right scale. offset *= scale_endtime; From 6109da248f9a4a3d26e0ca8c41225129202e9688 Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Wed, 10 Apr 2024 14:29:13 +0200 Subject: [PATCH 041/844] fix test --- .../02956_fix_to_start_of_milli_microsecond.reference | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/02956_fix_to_start_of_milli_microsecond.reference b/tests/queries/0_stateless/02956_fix_to_start_of_milli_microsecond.reference index b005ce6dfb0..95a05a24981 100644 --- a/tests/queries/0_stateless/02956_fix_to_start_of_milli_microsecond.reference +++ b/tests/queries/0_stateless/02956_fix_to_start_of_milli_microsecond.reference @@ -2,6 +2,6 @@ 2023-10-09 10:11:12.001000 2023-10-09 10:11:12.000000 2023-10-09 10:11:12.000000 -2023-10-09 00:00:00.000000 -2023-10-09 00:00:00.000 +2023-10-09 00:00:00.000000000 +2023-10-09 00:00:00.000000000 2023-10-09 00:00:00 From 938c888b131d15b311c8368337d36a52ad2b0a02 Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Thu, 11 Apr 2024 17:01:01 +0200 Subject: [PATCH 042/844] Reload CI to remove build error (empty commit) From 115d7cfa572556f321373e441e5becf3221426ea Mon Sep 17 00:00:00 2001 From: Kirill Nikiforov Date: Fri, 26 Apr 2024 15:22:34 +0300 Subject: [PATCH 043/844] fix after merge --- src/Storages/StorageReplicatedMergeTree.cpp | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 84bb2e78eb6..b0d2faf1651 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -7982,13 +7982,6 @@ void StorageReplicatedMergeTree::replacePartitionFrom( ProfileEventsScope profile_events_scope; MergeTreeData & src_data = checkStructureAndGetMergeTreeData(source_table, source_metadata_snapshot, metadata_snapshot); - String partition_id = getPartitionIDFromQuery(partition, query_context); - - /// NOTE: Some covered parts may be missing in src_all_parts if corresponding log entries are not executed yet. - DataPartsVector src_all_parts = src_data.getVisibleDataPartsVectorInPartition(query_context, partition_id); - - LOG_DEBUG(log, "Cloning {} parts", src_all_parts.size()); - static const String TMP_PREFIX = "tmp_replace_from_"; auto zookeeper = getZooKeeper(); @@ -7999,7 +7992,7 @@ void StorageReplicatedMergeTree::replacePartitionFrom( throw DB::Exception(ErrorCodes::SUPPORT_IS_DISABLED, "Only support DROP/DETACH/ATTACH PARTITION ALL currently"); partitions = src_data.getAllPartitionIds(); - LOG_INFO(log, "Will try to attach {} partitions", partitions.size()); + LOG_INFO(log, "Will try to attach {} partitions without replace", partitions.size()); } else { partitions = std::unordered_set(); @@ -8008,6 +8001,7 @@ void StorageReplicatedMergeTree::replacePartitionFrom( for (const auto & partition_id : partitions) { + /// NOTE: Some covered parts may be missing in src_all_parts if corresponding log entries are not executed yet. auto src_all_parts = src_data.getVisibleDataPartsVectorInPartition(query_context, partition_id); LOG_DEBUG(log, "Cloning {} parts from partition '{}'", src_all_parts.size(), partition_id); From fa8aafa94222da68791492561aaa0ee70e395249 Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Wed, 22 May 2024 21:28:33 +0000 Subject: [PATCH 044/844] Local plan for parallel replicas: save --- .../ClusterProxy/executeQuery.cpp | 83 +++++++++++++++---- src/Interpreters/ClusterProxy/executeQuery.h | 9 +- src/Planner/PlannerJoinTree.cpp | 4 +- .../QueryPlan/DistributedCreateLocalPlan.cpp | 2 - .../QueryPlan/DistributedCreateLocalPlan.h | 5 -- .../QueryPlan/ParallelReplicasLocalPlan.cpp | 78 +++++++++++++++++ .../QueryPlan/ParallelReplicasLocalPlan.h | 19 +++++ src/Processors/QueryPlan/ReadFromRemote.cpp | 7 +- src/Processors/QueryPlan/ReadFromRemote.h | 4 +- 9 files changed, 182 insertions(+), 29 deletions(-) create mode 100644 src/Processors/QueryPlan/ParallelReplicasLocalPlan.cpp create mode 100644 src/Processors/QueryPlan/ParallelReplicasLocalPlan.h diff --git a/src/Interpreters/ClusterProxy/executeQuery.cpp b/src/Interpreters/ClusterProxy/executeQuery.cpp index d1701d268f1..71912fa1081 100644 --- a/src/Interpreters/ClusterProxy/executeQuery.cpp +++ b/src/Interpreters/ClusterProxy/executeQuery.cpp @@ -25,6 +25,7 @@ #include #include #include +#include namespace DB { @@ -403,7 +404,8 @@ void executeQueryWithParallelReplicas( QueryProcessingStage::Enum processed_stage, const ASTPtr & query_ast, ContextPtr context, - std::shared_ptr storage_limits) + std::shared_ptr storage_limits, + QueryPlanStepPtr read_from_merge_tree) { const auto & settings = context->getSettingsRef(); @@ -486,21 +488,66 @@ void executeQueryWithParallelReplicas( auto coordinator = std::make_shared( new_cluster->getShardsInfo().begin()->getAllNodeCount(), settings.parallel_replicas_mark_segment_size); auto external_tables = new_context->getExternalTables(); - auto read_from_remote = std::make_unique( - query_ast, - new_cluster, - storage_id, - std::move(coordinator), - header, - processed_stage, - new_context, - getThrottler(new_context), - std::move(scalars), - std::move(external_tables), - getLogger("ReadFromParallelRemoteReplicasStep"), - std::move(storage_limits)); - query_plan.addStep(std::move(read_from_remote)); + if (settings.allow_experimental_analyzer) + { + auto read_from_remote = std::make_unique( + query_ast, + new_cluster, + storage_id, + std::move(coordinator), + header, + processed_stage, + new_context, + getThrottler(new_context), + std::move(scalars), + std::move(external_tables), + getLogger("ReadFromParallelRemoteReplicasStep"), + std::move(storage_limits), + /*exclude_local_replica*/ true); + + auto remote_plan = std::make_unique(); + remote_plan->addStep(std::move(read_from_remote)); + + auto local_plan = createLocalPlanForParallelReplicas( + query_ast, + header, + new_context, + processed_stage, + coordinator, + std::move(read_from_merge_tree), + /*has_missing_objects=*/false); + + DataStreams input_streams; + input_streams.reserve(2); + input_streams.emplace_back(local_plan->getCurrentDataStream()); + input_streams.emplace_back(remote_plan->getCurrentDataStream()); + + std::vector plans; + plans.emplace_back(std::move(local_plan)); + plans.emplace_back(std::move(remote_plan)); + + auto union_step = std::make_unique(std::move(input_streams)); + query_plan.unitePlans(std::move(union_step), std::move(plans)); + } + else { + auto read_from_remote = std::make_unique( + query_ast, + new_cluster, + storage_id, + std::move(coordinator), + header, + processed_stage, + new_context, + getThrottler(new_context), + std::move(scalars), + std::move(external_tables), + getLogger("ReadFromParallelRemoteReplicasStep"), + std::move(storage_limits), + /*exclude_local_replica*/ false); + + query_plan.addStep(std::move(read_from_remote)); + } } void executeQueryWithParallelReplicas( @@ -510,7 +557,8 @@ void executeQueryWithParallelReplicas( const QueryTreeNodePtr & query_tree, const PlannerContextPtr & planner_context, ContextPtr context, - std::shared_ptr storage_limits) + std::shared_ptr storage_limits, + QueryPlanStepPtr read_from_merge_tree) { QueryTreeNodePtr modified_query_tree = query_tree->clone(); rewriteJoinToGlobalJoin(modified_query_tree, context); @@ -520,7 +568,8 @@ void executeQueryWithParallelReplicas( = InterpreterSelectQueryAnalyzer::getSampleBlock(modified_query_tree, context, SelectQueryOptions(processed_stage).analyze()); auto modified_query_ast = queryNodeToDistributedSelectQuery(modified_query_tree); - executeQueryWithParallelReplicas(query_plan, storage_id, header, processed_stage, modified_query_ast, context, storage_limits); + executeQueryWithParallelReplicas( + query_plan, storage_id, header, processed_stage, modified_query_ast, context, storage_limits, std::move(read_from_merge_tree)); } void executeQueryWithParallelReplicas( diff --git a/src/Interpreters/ClusterProxy/executeQuery.h b/src/Interpreters/ClusterProxy/executeQuery.h index 6548edf8939..1b38d1921b1 100644 --- a/src/Interpreters/ClusterProxy/executeQuery.h +++ b/src/Interpreters/ClusterProxy/executeQuery.h @@ -30,6 +30,9 @@ using QueryTreeNodePtr = std::shared_ptr; class PlannerContext; using PlannerContextPtr = std::shared_ptr; +class IQueryPlanStep; +using QueryPlanStepPtr = std::unique_ptr; + namespace ClusterProxy { @@ -73,7 +76,8 @@ void executeQueryWithParallelReplicas( QueryProcessingStage::Enum processed_stage, const ASTPtr & query_ast, ContextPtr context, - std::shared_ptr storage_limits); + std::shared_ptr storage_limits, + QueryPlanStepPtr read_from_merge_tree = nullptr); void executeQueryWithParallelReplicas( QueryPlan & query_plan, @@ -90,7 +94,8 @@ void executeQueryWithParallelReplicas( const QueryTreeNodePtr & query_tree, const PlannerContextPtr & planner_context, ContextPtr context, - std::shared_ptr storage_limits); + std::shared_ptr storage_limits, + QueryPlanStepPtr read_from_merge_tree); } } diff --git a/src/Planner/PlannerJoinTree.cpp b/src/Planner/PlannerJoinTree.cpp index a6e4a8ebcde..275461fa109 100644 --- a/src/Planner/PlannerJoinTree.cpp +++ b/src/Planner/PlannerJoinTree.cpp @@ -934,6 +934,7 @@ JoinTreeQueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expres { from_stage = QueryProcessingStage::WithMergeableState; QueryPlan query_plan_parallel_replicas; + QueryPlanStepPtr reading_step = std::move(node->step); ClusterProxy::executeQueryWithParallelReplicas( query_plan_parallel_replicas, storage->getStorageID(), @@ -941,7 +942,8 @@ JoinTreeQueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expres table_expression_query_info.query_tree, table_expression_query_info.planner_context, query_context, - table_expression_query_info.storage_limits); + table_expression_query_info.storage_limits, + std::move(reading_step)); query_plan = std::move(query_plan_parallel_replicas); const Block & query_plan_header = query_plan.getCurrentDataStream().header; diff --git a/src/Processors/QueryPlan/DistributedCreateLocalPlan.cpp b/src/Processors/QueryPlan/DistributedCreateLocalPlan.cpp index d4545482477..1f4f271fa6e 100644 --- a/src/Processors/QueryPlan/DistributedCreateLocalPlan.cpp +++ b/src/Processors/QueryPlan/DistributedCreateLocalPlan.cpp @@ -1,8 +1,6 @@ #include -#include #include -#include #include #include #include diff --git a/src/Processors/QueryPlan/DistributedCreateLocalPlan.h b/src/Processors/QueryPlan/DistributedCreateLocalPlan.h index 50545d9ae81..f59123a7d88 100644 --- a/src/Processors/QueryPlan/DistributedCreateLocalPlan.h +++ b/src/Processors/QueryPlan/DistributedCreateLocalPlan.h @@ -1,17 +1,12 @@ #pragma once #include -#include #include #include -#include namespace DB { -class PreparedSets; -using PreparedSetsPtr = std::shared_ptr; - std::unique_ptr createLocalPlan( const ASTPtr & query_ast, const Block & header, diff --git a/src/Processors/QueryPlan/ParallelReplicasLocalPlan.cpp b/src/Processors/QueryPlan/ParallelReplicasLocalPlan.cpp new file mode 100644 index 00000000000..4d78e049b58 --- /dev/null +++ b/src/Processors/QueryPlan/ParallelReplicasLocalPlan.cpp @@ -0,0 +1,78 @@ +#include + +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +namespace +{ + +void addConvertingActions(QueryPlan & plan, const Block & header, bool has_missing_objects) +{ + if (blocksHaveEqualStructure(plan.getCurrentDataStream().header, header)) + return; + + auto mode = has_missing_objects ? ActionsDAG::MatchColumnsMode::Position : ActionsDAG::MatchColumnsMode::Name; + + auto get_converting_dag = [mode](const Block & block_, const Block & header_) + { + /// Convert header structure to expected. + /// Also we ignore constants from result and replace it with constants from header. + /// It is needed for functions like `now64()` or `randConstant()` because their values may be different. + return ActionsDAG::makeConvertingActions( + block_.getColumnsWithTypeAndName(), + header_.getColumnsWithTypeAndName(), + mode, + true); + }; + + auto convert_actions_dag = get_converting_dag(plan.getCurrentDataStream().header, header); + auto converting = std::make_unique(plan.getCurrentDataStream(), convert_actions_dag); + plan.addStep(std::move(converting)); +} + +} + +std::unique_ptr createLocalPlanForParallelReplicas( + const ASTPtr & query_ast, + const Block & header, + ContextPtr context, + QueryProcessingStage::Enum processed_stage, + ParallelReplicasReadingCoordinatorPtr /*coordinator*/, + QueryPlanStepPtr /*read_from_merge_tree*/, + bool has_missing_objects) +{ + checkStackSize(); + + auto query_plan = std::make_unique(); + auto new_context = Context::createCopy(context); + + /// Do not push down limit to local plan, as it will break `rows_before_limit_at_least` counter. + if (processed_stage == QueryProcessingStage::WithMergeableStateAfterAggregationAndLimit) + processed_stage = QueryProcessingStage::WithMergeableStateAfterAggregation; + + /// Do not apply AST optimizations, because query + /// is already optimized and some optimizations + /// can be applied only for non-distributed tables + /// and we can produce query, inconsistent with remote plans. + auto select_query_options = SelectQueryOptions(processed_stage).ignoreASTOptimizations(); + + /// For Analyzer, identifier in GROUP BY/ORDER BY/LIMIT BY lists has been resolved to + /// ConstantNode in QueryTree if it is an alias of a constant, so we should not replace + /// ConstantNode with ProjectionNode again(https://github.com/ClickHouse/ClickHouse/issues/62289). + new_context->setSetting("enable_positional_arguments", Field(false)); + auto interpreter = InterpreterSelectQueryAnalyzer(query_ast, new_context, select_query_options); + query_plan = std::make_unique(std::move(interpreter).extractQueryPlan()); + + addConvertingActions(*query_plan, header, has_missing_objects); + return query_plan; +} + +} diff --git a/src/Processors/QueryPlan/ParallelReplicasLocalPlan.h b/src/Processors/QueryPlan/ParallelReplicasLocalPlan.h new file mode 100644 index 00000000000..89d2019f807 --- /dev/null +++ b/src/Processors/QueryPlan/ParallelReplicasLocalPlan.h @@ -0,0 +1,19 @@ +#pragma once + +#include +#include +#include +#include + +namespace DB +{ + +std::unique_ptr createLocalPlanForParallelReplicas( + const ASTPtr & query_ast, + const Block & header, + ContextPtr context, + QueryProcessingStage::Enum processed_stage, + ParallelReplicasReadingCoordinatorPtr coordinator, + QueryPlanStepPtr read_from_merge_tree, + bool has_missing_objects); +} diff --git a/src/Processors/QueryPlan/ReadFromRemote.cpp b/src/Processors/QueryPlan/ReadFromRemote.cpp index b4e35af85d6..6e6edfa1208 100644 --- a/src/Processors/QueryPlan/ReadFromRemote.cpp +++ b/src/Processors/QueryPlan/ReadFromRemote.cpp @@ -369,7 +369,8 @@ ReadFromParallelRemoteReplicasStep::ReadFromParallelRemoteReplicasStep( Scalars scalars_, Tables external_tables_, LoggerPtr log_, - std::shared_ptr storage_limits_) + std::shared_ptr storage_limits_, + bool exclude_local_replica_) : ISourceStep(DataStream{.header = std::move(header_)}) , cluster(cluster_) , query_ast(query_ast_) @@ -382,6 +383,7 @@ ReadFromParallelRemoteReplicasStep::ReadFromParallelRemoteReplicasStep( , external_tables{external_tables_} , storage_limits(std::move(storage_limits_)) , log(log_) + , exclude_local_replica(exclude_local_replica_) { chassert(cluster->getShardCount() == 1); @@ -410,6 +412,9 @@ void ReadFromParallelRemoteReplicasStep::initializePipeline(QueryPipelineBuilder const auto & shard = cluster->getShardsInfo().at(0); size_t all_replicas_count = current_settings.max_parallel_replicas; + if (exclude_local_replica) + --all_replicas_count; + if (all_replicas_count > shard.getAllNodeCount()) { LOG_INFO( diff --git a/src/Processors/QueryPlan/ReadFromRemote.h b/src/Processors/QueryPlan/ReadFromRemote.h index eb15269155a..442da098a17 100644 --- a/src/Processors/QueryPlan/ReadFromRemote.h +++ b/src/Processors/QueryPlan/ReadFromRemote.h @@ -78,7 +78,8 @@ public: Scalars scalars_, Tables external_tables_, LoggerPtr log_, - std::shared_ptr storage_limits_); + std::shared_ptr storage_limits_, + bool exclude_local_replica = false); String getName() const override { return "ReadFromRemoteParallelReplicas"; } @@ -101,6 +102,7 @@ private: Tables external_tables; std::shared_ptr storage_limits; LoggerPtr log; + bool exclude_local_replica; }; } From a959663e977361af3896f010f9aeadaa8b64c323 Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Thu, 23 May 2024 16:43:18 +0200 Subject: [PATCH 045/844] Update toStartOfInterval.cpp --- src/Functions/toStartOfInterval.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Functions/toStartOfInterval.cpp b/src/Functions/toStartOfInterval.cpp index 749aa9f8800..a4f870613d3 100644 --- a/src/Functions/toStartOfInterval.cpp +++ b/src/Functions/toStartOfInterval.cpp @@ -241,14 +241,14 @@ private: if (isDate(time_column_type)) { - const auto * time_column_vec = checkAndGetColumn(time_column_col); + const auto * time_column_vec = checkAndGetColumn(time_column_col); if (time_column_vec) return dispatchForIntervalColumn(assert_cast(time_column_type), *time_column_vec, interval_column, origin_column, result_type, time_zone); } else if (isDateTime(time_column_type)) { - const auto * time_column_vec = checkAndGetColumn(&time_column_col); + const auto * time_column_vec = checkAndGetColumn(&time_column_col); if (time_column_vec) return dispatchForIntervalColumn(assert_cast(time_column_type), *time_column_vec, interval_column, origin_column, result_type, time_zone); } From be08ebd0f4755c475385fa3460c32eaea6f71312 Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Fri, 24 May 2024 13:01:30 +0000 Subject: [PATCH 046/844] Fix --- src/Processors/QueryPlan/ParallelReplicasLocalPlan.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Processors/QueryPlan/ParallelReplicasLocalPlan.cpp b/src/Processors/QueryPlan/ParallelReplicasLocalPlan.cpp index 4d78e049b58..64646960824 100644 --- a/src/Processors/QueryPlan/ParallelReplicasLocalPlan.cpp +++ b/src/Processors/QueryPlan/ParallelReplicasLocalPlan.cpp @@ -68,6 +68,7 @@ std::unique_ptr createLocalPlanForParallelReplicas( /// ConstantNode in QueryTree if it is an alias of a constant, so we should not replace /// ConstantNode with ProjectionNode again(https://github.com/ClickHouse/ClickHouse/issues/62289). new_context->setSetting("enable_positional_arguments", Field(false)); + new_context->setSetting("allow_experimental_parallel_reading_from_replicas", Field(0)); auto interpreter = InterpreterSelectQueryAnalyzer(query_ast, new_context, select_query_options); query_plan = std::make_unique(std::move(interpreter).extractQueryPlan()); From 5cdf8d336cf509f7af1031a1b9a856d06b5f2ac2 Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Mon, 27 May 2024 12:22:18 +0000 Subject: [PATCH 047/844] Local reading step from merge tree --- .../ClusterProxy/executeQuery.cpp | 2 +- .../QueryPlan/ParallelReplicasLocalPlan.cpp | 217 +++++++++++++++++- .../QueryPlan/ReadFromMergeTree.cpp | 45 +++- src/Processors/QueryPlan/ReadFromMergeTree.h | 12 +- src/Storages/MergeTree/RequestResponse.h | 1 - 5 files changed, 264 insertions(+), 13 deletions(-) diff --git a/src/Interpreters/ClusterProxy/executeQuery.cpp b/src/Interpreters/ClusterProxy/executeQuery.cpp index 71912fa1081..e12e531ab51 100644 --- a/src/Interpreters/ClusterProxy/executeQuery.cpp +++ b/src/Interpreters/ClusterProxy/executeQuery.cpp @@ -495,7 +495,7 @@ void executeQueryWithParallelReplicas( query_ast, new_cluster, storage_id, - std::move(coordinator), + coordinator, header, processed_stage, new_context, diff --git a/src/Processors/QueryPlan/ParallelReplicasLocalPlan.cpp b/src/Processors/QueryPlan/ParallelReplicasLocalPlan.cpp index 64646960824..2bfd8965269 100644 --- a/src/Processors/QueryPlan/ParallelReplicasLocalPlan.cpp +++ b/src/Processors/QueryPlan/ParallelReplicasLocalPlan.cpp @@ -1,12 +1,32 @@ #include #include +#include "Storages/MergeTree/RequestResponse.h" #include #include #include #include #include #include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include + +namespace ProfileEvents +{ + extern const Event SelectedParts; + extern const Event SelectedRanges; + extern const Event SelectedMarks; +} namespace DB { @@ -40,13 +60,179 @@ void addConvertingActions(QueryPlan & plan, const Block & header, bool has_missi } +class ReadFromMergeTreeCoordinated : public ISourceStep +{ +public: + ReadFromMergeTreeCoordinated(QueryPlanStepPtr read_from_merge_tree_, ParallelReplicasReadingCoordinatorPtr coordinator_) + : ISourceStep(read_from_merge_tree_->getOutputStream()) + , read_from_merge_tree(std::move(read_from_merge_tree_)) + , coordinator(std::move(coordinator_)) + { + } + + void initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings & settings) override; + String getName() const override { return "ReadFromLocalParallelReplica"; } + +private: + QueryPlanStepPtr read_from_merge_tree; + ParallelReplicasReadingCoordinatorPtr coordinator; +}; + +void ReadFromMergeTreeCoordinated::initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings & /*settings*/) +{ + ReadFromMergeTree & reading = *typeid_cast(read_from_merge_tree.get()); + + auto result = reading.getAnalysisResult(); + const auto & query_info = reading.getQueryInfo(); + const auto & data = reading.data; + const auto & context = reading.getContext(); + const auto & storage_snapshot = reading.getStorageSnapshot(); + + if (reading.enable_remove_parts_from_snapshot_optimization) + { + /// Do not keep data parts in snapshot. + /// They are stored separately, and some could be released after PK analysis. + reading.storage_snapshot->data = std::make_unique(); + } + + LOG_DEBUG( + reading.log, + "Selected {}/{} parts by partition key, {} parts by primary key, {}/{} marks by primary key, {} marks to read from {} ranges", + result.parts_before_pk, + result.total_parts, + result.selected_parts, + result.selected_marks_pk, + result.total_marks_pk, + result.selected_marks, + result.selected_ranges); + + // Adding partition info to QueryAccessInfo. + if (context->hasQueryContext() && !query_info.is_internal) + { + Names partition_names; + for (const auto & part : result.parts_with_ranges) + { + partition_names.emplace_back( + fmt::format("{}.{}", data.getStorageID().getFullNameNotQuoted(), part.data_part->info.partition_id)); + } + context->getQueryContext()->addQueryAccessInfo(partition_names); + + if (storage_snapshot->projection) + context->getQueryContext()->addQueryAccessInfo( + Context::QualifiedProjectionName{.storage_id = data.getStorageID(), .projection_name = storage_snapshot->projection->name}); + } + + ProfileEvents::increment(ProfileEvents::SelectedParts, result.selected_parts); + ProfileEvents::increment(ProfileEvents::SelectedRanges, result.selected_ranges); + ProfileEvents::increment(ProfileEvents::SelectedMarks, result.selected_marks); + + auto query_id_holder = MergeTreeDataSelectExecutor::checkLimits(data, result, context); + + // TODO: check this on plan level, we should be here if there is nothing to read + if (result.parts_with_ranges.empty()) + { + pipeline.init(Pipe(std::make_shared(getOutputStream().header))); + return; + } + + /// Projection, that needed to drop columns, which have appeared by execution + /// of some extra expressions, and to allow execute the same expressions later. + /// NOTE: It may lead to double computation of expressions. + ActionsDAGPtr result_projection; + + Pipe pipe = reading.spreadMarkRanges(std::move(result.parts_with_ranges), reading.requested_num_streams, result, result_projection); + + for (const auto & processor : pipe.getProcessors()) + processor->setStorageLimits(query_info.storage_limits); + + if (pipe.empty()) + { + pipeline.init(Pipe(std::make_shared(getOutputStream().header))); + return; + } + + if (result.sampling.use_sampling) + { + auto sampling_actions = std::make_shared(result.sampling.filter_expression); + pipe.addSimpleTransform([&](const Block & header) + { + return std::make_shared( + header, + sampling_actions, + result.sampling.filter_function->getColumnName(), + false); + }); + } + + Block cur_header = pipe.getHeader(); + + auto append_actions = [&result_projection](ActionsDAGPtr actions) + { + if (!result_projection) + result_projection = std::move(actions); + else + result_projection = ActionsDAG::merge(std::move(*result_projection), std::move(*actions)); + }; + + if (result_projection) + cur_header = result_projection->updateHeader(cur_header); + + /// Extra columns may be returned (for example, if sampling is used). + /// Convert pipe to step header structure. + if (!isCompatibleHeader(cur_header, getOutputStream().header)) + { + auto converting = ActionsDAG::makeConvertingActions( + cur_header.getColumnsWithTypeAndName(), + getOutputStream().header.getColumnsWithTypeAndName(), + ActionsDAG::MatchColumnsMode::Name); + + append_actions(std::move(converting)); + } + + if (result_projection) + { + auto projection_actions = std::make_shared(result_projection); + pipe.addSimpleTransform([&](const Block & header) + { + return std::make_shared(header, projection_actions); + }); + } + + /// Some extra columns could be added by sample/final/in-order/etc + /// Remove them from header if not needed. + if (!blocksHaveEqualStructure(pipe.getHeader(), getOutputStream().header)) + { + auto convert_actions_dag = ActionsDAG::makeConvertingActions( + pipe.getHeader().getColumnsWithTypeAndName(), + getOutputStream().header.getColumnsWithTypeAndName(), + ActionsDAG::MatchColumnsMode::Name, + true); + + auto converting_dag_expr = std::make_shared(convert_actions_dag); + + pipe.addSimpleTransform([&](const Block & header) + { + return std::make_shared(header, converting_dag_expr); + }); + } + + for (const auto & processor : pipe.getProcessors()) + processors.emplace_back(processor); + + pipeline.init(std::move(pipe)); + pipeline.addContext(context); + // Attach QueryIdHolder if needed + if (query_id_holder) + pipeline.setQueryIdHolder(std::move(query_id_holder)); +} + std::unique_ptr createLocalPlanForParallelReplicas( const ASTPtr & query_ast, const Block & header, ContextPtr context, QueryProcessingStage::Enum processed_stage, - ParallelReplicasReadingCoordinatorPtr /*coordinator*/, - QueryPlanStepPtr /*read_from_merge_tree*/, + ParallelReplicasReadingCoordinatorPtr coordinator, + QueryPlanStepPtr read_from_merge_tree, bool has_missing_objects) { checkStackSize(); @@ -72,6 +258,33 @@ std::unique_ptr createLocalPlanForParallelReplicas( auto interpreter = InterpreterSelectQueryAnalyzer(query_ast, new_context, select_query_options); query_plan = std::make_unique(std::move(interpreter).extractQueryPlan()); + QueryPlan::Node * node = query_plan->getRootNode(); + ReadFromMergeTree * reading = nullptr; + while (node) + { + reading = typeid_cast(node->step.get()); + if (reading) + break; + + if (!node->children.empty()) + node = node->children.at(0); + } + + chassert(reading); + + MergeTreeAllRangesCallback all_ranges_cb = [coordinator](InitialAllRangesAnnouncement announcement) + { + chassert(coordinator); + coordinator->handleInitialAllRangesAnnouncement(std::move(announcement)); + }; + + MergeTreeReadTaskCallback read_task_cb = [coordinator](ParallelReadRequest req) -> std::optional + { return coordinator->handleRequest(std::move(req)); }; + + const auto * analyzed_merge_tree = typeid_cast(read_from_merge_tree.get()); + auto read_from_merge_tree_parallel_replicas = reading->createLocalParallelReplicasReadingStep(analyzed_merge_tree, true, all_ranges_cb, read_task_cb); + node->step = std::move(read_from_merge_tree_parallel_replicas); + addConvertingActions(*query_plan, header, has_missing_objects); return query_plan; } diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp index 6f0fa55c349..21303cf2af2 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp +++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp @@ -272,7 +272,9 @@ ReadFromMergeTree::ReadFromMergeTree( std::shared_ptr max_block_numbers_to_read_, LoggerPtr log_, AnalysisResultPtr analyzed_result_ptr_, - bool enable_parallel_reading) + bool enable_parallel_reading_, + std::optional all_ranges_callback_, + std::optional read_task_callback_) : SourceStepWithFilter(DataStream{.header = MergeTreeSelectProcessor::transformHeader( storage_snapshot_->getSampleBlockForColumns(all_column_names_), query_info_.prewhere_info)}, all_column_names_, query_info_, storage_snapshot_, context_) @@ -291,13 +293,20 @@ ReadFromMergeTree::ReadFromMergeTree( , max_block_numbers_to_read(std::move(max_block_numbers_to_read_)) , log(std::move(log_)) , analyzed_result_ptr(analyzed_result_ptr_) - , is_parallel_reading_from_replicas(enable_parallel_reading) + , is_parallel_reading_from_replicas(enable_parallel_reading_) , enable_remove_parts_from_snapshot_optimization(query_info_.merge_tree_enable_remove_parts_from_snapshot_optimization) { if (is_parallel_reading_from_replicas) { - all_ranges_callback = context->getMergeTreeAllRangesCallback(); - read_task_callback = context->getMergeTreeReadTaskCallback(); + if (all_ranges_callback_) + all_ranges_callback = all_ranges_callback_.value(); + else + all_ranges_callback = context->getMergeTreeAllRangesCallback(); + + if (read_task_callback_) + read_task_callback = read_task_callback_.value(); + else + read_task_callback = context->getMergeTreeReadTaskCallback(); } const auto & settings = context->getSettingsRef(); @@ -331,11 +340,31 @@ ReadFromMergeTree::ReadFromMergeTree( enable_vertical_final); } +std::unique_ptr ReadFromMergeTree::createLocalParallelReplicasReadingStep( + const ReadFromMergeTree * analyzed_merge_tree, + bool enable_parallel_reading_, + std::optional all_ranges_callback_, + std::optional read_task_callback_) +{ + return std::make_unique( + prepared_parts, + alter_conversions_for_parts, + all_column_names, + data, + getQueryInfo(), + getStorageSnapshot(), + getContext(), + block_size.max_block_size_rows, + requested_num_streams, + max_block_numbers_to_read, + log, + analyzed_merge_tree->analyzed_result_ptr, + enable_parallel_reading_, + all_ranges_callback_, + read_task_callback_); +} -Pipe ReadFromMergeTree::readFromPoolParallelReplicas( - RangesInDataParts parts_with_range, - Names required_columns, - PoolSettings pool_settings) +Pipe ReadFromMergeTree::readFromPoolParallelReplicas(RangesInDataParts parts_with_range, Names required_columns, PoolSettings pool_settings) { const auto & client_info = context->getClientInfo(); diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.h b/src/Processors/QueryPlan/ReadFromMergeTree.h index 5d7879e8dee..bba5293e863 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.h +++ b/src/Processors/QueryPlan/ReadFromMergeTree.h @@ -119,7 +119,15 @@ public: std::shared_ptr max_block_numbers_to_read_, LoggerPtr log_, AnalysisResultPtr analyzed_result_ptr_, - bool enable_parallel_reading); + bool enable_parallel_reading_, + std::optional all_ranges_callback_ = std::nullopt, + std::optional read_task_callback_ = std::nullopt); + + std::unique_ptr createLocalParallelReplicasReadingStep( + const ReadFromMergeTree * analyzed_merge_tree, + bool enable_parallel_reading_, + std::optional all_ranges_callback_, + std::optional read_task_callback_); static constexpr auto name = "ReadFromMergeTree"; String getName() const override { return name; } @@ -282,6 +290,8 @@ private: std::optional read_task_callback; bool enable_vertical_final = false; bool enable_remove_parts_from_snapshot_optimization = true; + + friend class ReadFromMergeTreeCoordinated; }; } diff --git a/src/Storages/MergeTree/RequestResponse.h b/src/Storages/MergeTree/RequestResponse.h index 3a5bfde6c20..5f5516a6804 100644 --- a/src/Storages/MergeTree/RequestResponse.h +++ b/src/Storages/MergeTree/RequestResponse.h @@ -1,6 +1,5 @@ #pragma once -#include #include #include From 1f26281493d66f15408d8459c198170e71901f68 Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Mon, 27 May 2024 15:43:14 +0200 Subject: [PATCH 048/844] build fix --- src/Functions/toStartOfInterval.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Functions/toStartOfInterval.cpp b/src/Functions/toStartOfInterval.cpp index a4f870613d3..c45501aa905 100644 --- a/src/Functions/toStartOfInterval.cpp +++ b/src/Functions/toStartOfInterval.cpp @@ -241,20 +241,20 @@ private: if (isDate(time_column_type)) { - const auto * time_column_vec = checkAndGetColumn(time_column_col); + const auto * time_column_vec = checkAndGetColumn(&time_column_col); if (time_column_vec) return dispatchForIntervalColumn(assert_cast(time_column_type), *time_column_vec, interval_column, origin_column, result_type, time_zone); } else if (isDateTime(time_column_type)) { - const auto * time_column_vec = checkAndGetColumn(&time_column_col); + const auto * time_column_vec = checkAndGetColumn(&time_column_col); if (time_column_vec) return dispatchForIntervalColumn(assert_cast(time_column_type), *time_column_vec, interval_column, origin_column, result_type, time_zone); } else if (isDateTime64(time_column_type)) { - const auto * time_column_vec = checkAndGetColumn(time_column_col); + const auto * time_column_vec = checkAndGetColumn(&time_column_col); auto scale = assert_cast(time_column_type).getScale(); if (time_column_vec) From 29346f607398dba5cb7796d53dc525fd937af104 Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Tue, 28 May 2024 12:16:18 +0000 Subject: [PATCH 049/844] Init coordinator separately --- src/Planner/PlannerJoinTree.cpp | 9 +++++--- .../QueryPlan/ParallelReplicasLocalPlan.cpp | 23 ++++++++++++++++++- src/Processors/QueryPlan/ReadFromMergeTree.h | 6 +++++ .../ParallelReplicasReadingCoordinator.h | 2 +- 4 files changed, 35 insertions(+), 5 deletions(-) diff --git a/src/Planner/PlannerJoinTree.cpp b/src/Planner/PlannerJoinTree.cpp index 275461fa109..01455d4b955 100644 --- a/src/Planner/PlannerJoinTree.cpp +++ b/src/Planner/PlannerJoinTree.cpp @@ -892,16 +892,19 @@ JoinTreeQueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expres if (!node->children.empty()) node = node->children.at(0); + else + node = nullptr; } chassert(reading); + auto result_ptr = reading->selectRangesToRead(); + UInt64 rows_to_read = result_ptr->selected_rows; + reading->setAnalyzedResult(std::move(result_ptr)); + // (2) if it's ReadFromMergeTree - run index analysis and check number of rows to read if (settings.parallel_replicas_min_number_of_rows_per_replica > 0) { - auto result_ptr = reading->selectRangesToRead(); - - UInt64 rows_to_read = result_ptr->selected_rows; if (table_expression_query_info.limit > 0 && table_expression_query_info.limit < rows_to_read) rows_to_read = table_expression_query_info.limit; diff --git a/src/Processors/QueryPlan/ParallelReplicasLocalPlan.cpp b/src/Processors/QueryPlan/ParallelReplicasLocalPlan.cpp index 2bfd8965269..1c5e0861530 100644 --- a/src/Processors/QueryPlan/ParallelReplicasLocalPlan.cpp +++ b/src/Processors/QueryPlan/ParallelReplicasLocalPlan.cpp @@ -268,10 +268,32 @@ std::unique_ptr createLocalPlanForParallelReplicas( if (!node->children.empty()) node = node->children.at(0); + else + node = nullptr; } chassert(reading); + const auto * analyzed_merge_tree = typeid_cast(read_from_merge_tree.get()); + if (!analyzed_merge_tree->hasAnalyzedResult()) + analyzed_merge_tree->selectRangesToRead(); + + switch (analyzed_merge_tree->getReadType()) + { + case ReadFromMergeTree::ReadType::Default: + coordinator->initialize(CoordinationMode::Default); + break; + case ReadFromMergeTree::ReadType::InOrder: + coordinator->initialize(CoordinationMode::WithOrder); + break; + case ReadFromMergeTree::ReadType::InReverseOrder: + coordinator->initialize(CoordinationMode::ReverseOrder); + break; + case ReadFromMergeTree::ReadType::ParallelReplicas: + chassert(false); + UNREACHABLE(); + } + MergeTreeAllRangesCallback all_ranges_cb = [coordinator](InitialAllRangesAnnouncement announcement) { chassert(coordinator); @@ -281,7 +303,6 @@ std::unique_ptr createLocalPlanForParallelReplicas( MergeTreeReadTaskCallback read_task_cb = [coordinator](ParallelReadRequest req) -> std::optional { return coordinator->handleRequest(std::move(req)); }; - const auto * analyzed_merge_tree = typeid_cast(read_from_merge_tree.get()); auto read_from_merge_tree_parallel_replicas = reading->createLocalParallelReplicasReadingStep(analyzed_merge_tree, true, all_ranges_cb, read_task_cb); node->step = std::move(read_from_merge_tree_parallel_replicas); diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.h b/src/Processors/QueryPlan/ReadFromMergeTree.h index bba5293e863..adc3818c3ab 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.h +++ b/src/Processors/QueryPlan/ReadFromMergeTree.h @@ -203,6 +203,12 @@ public: void applyFilters(ActionDAGNodes added_filter_nodes) override; + ReadType getReadType() const + { + chassert(analyzed_result_ptr); + return analyzed_result_ptr->read_type; + } + private: static AnalysisResultPtr selectRangesToReadImpl( MergeTreeData::DataPartsVector parts, diff --git a/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.h b/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.h index 60343988f03..ff72decbf8d 100644 --- a/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.h +++ b/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.h @@ -30,8 +30,8 @@ public: /// needed to report total rows to read void setProgressCallback(ProgressCallback callback); -private: void initialize(CoordinationMode mode); +private: std::mutex mutex; size_t replicas_count{0}; From 1aa5d70d2a65b860f0d469a6c7c00b9f4129b07d Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Tue, 28 May 2024 13:03:24 +0000 Subject: [PATCH 050/844] Fix style --- src/Interpreters/ClusterProxy/executeQuery.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/Interpreters/ClusterProxy/executeQuery.cpp b/src/Interpreters/ClusterProxy/executeQuery.cpp index e12e531ab51..3d08219155f 100644 --- a/src/Interpreters/ClusterProxy/executeQuery.cpp +++ b/src/Interpreters/ClusterProxy/executeQuery.cpp @@ -530,7 +530,8 @@ void executeQueryWithParallelReplicas( auto union_step = std::make_unique(std::move(input_streams)); query_plan.unitePlans(std::move(union_step), std::move(plans)); } - else { + else + { auto read_from_remote = std::make_unique( query_ast, new_cluster, From a9b485a2c1b8eb85fa04a5fef50b1258b0b63102 Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Tue, 28 May 2024 19:05:09 +0000 Subject: [PATCH 051/844] Disable temporary PR inorder test --- .../02898_parallel_replicas_progress_bar.reference | 6 ------ .../02898_parallel_replicas_progress_bar.sql | 10 +++++----- 2 files changed, 5 insertions(+), 11 deletions(-) diff --git a/tests/queries/0_stateless/02898_parallel_replicas_progress_bar.reference b/tests/queries/0_stateless/02898_parallel_replicas_progress_bar.reference index c66597436f3..380aac4dbe8 100644 --- a/tests/queries/0_stateless/02898_parallel_replicas_progress_bar.reference +++ b/tests/queries/0_stateless/02898_parallel_replicas_progress_bar.reference @@ -1,8 +1,2 @@ 3000 1000 3999 2499.5 1 -1998 2944475297004403859 -1999 254596732598015005 -2000 6863370867519437063 -2001 17844331710293705251 -2002 1587587338113897332 -1 diff --git a/tests/queries/0_stateless/02898_parallel_replicas_progress_bar.sql b/tests/queries/0_stateless/02898_parallel_replicas_progress_bar.sql index d8bfec12b3a..42f8091db08 100644 --- a/tests/queries/0_stateless/02898_parallel_replicas_progress_bar.sql +++ b/tests/queries/0_stateless/02898_parallel_replicas_progress_bar.sql @@ -26,12 +26,12 @@ WHERE query_id in (select query_id from system.query_log where current_database AND message LIKE '%Total rows to read: 3000%' SETTINGS allow_experimental_parallel_reading_from_replicas=0; -- reading in order coordinator -SELECT k, sipHash64(v) FROM t1 order by k limit 5 offset 998 SETTINGS optimize_read_in_order=1, log_comment='02898_inorder_190aed82-2423-413b-ad4c-24dcca50f65b'; +-- SELECT k, sipHash64(v) FROM t1 order by k limit 5 offset 998 SETTINGS optimize_read_in_order=1, log_comment='02898_inorder_190aed82-2423-413b-ad4c-24dcca50f65b'; -SYSTEM FLUSH LOGS; -SELECT count() > 0 FROM system.text_log -WHERE query_id in (select query_id from system.query_log where current_database = currentDatabase() AND log_comment='02898_inorder_190aed82-2423-413b-ad4c-24dcca50f65b') - AND message LIKE '%Updated total rows to read: added % rows, total 3000 rows%' SETTINGS allow_experimental_parallel_reading_from_replicas=0; +-- SYSTEM FLUSH LOGS; +-- SELECT count() > 0 FROM system.text_log +-- WHERE query_id in (select query_id from system.query_log where current_database = currentDatabase() AND log_comment='02898_inorder_190aed82-2423-413b-ad4c-24dcca50f65b') +-- AND message LIKE '%Updated total rows to read: added % rows, total 3000 rows%' SETTINGS allow_experimental_parallel_reading_from_replicas=0; DROP TABLE t1 SYNC; DROP TABLE t2 SYNC; From d756729f384ff9a6fbf93f12934ed0f98b54b7a6 Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Tue, 28 May 2024 21:04:33 +0000 Subject: [PATCH 052/844] Use local working set for parallel replicas --- .../QueryPlan/ParallelReplicasLocalPlan.cpp | 42 +++++++++---------- .../QueryPlan/ReadFromMergeTree.cpp | 29 +++++-------- src/Processors/QueryPlan/ReadFromMergeTree.h | 10 +++-- src/Processors/QueryPlan/ReadFromRemote.cpp | 19 ++++++--- .../MergeTree/MergeTreeSelectProcessor.h | 5 --- 5 files changed, 52 insertions(+), 53 deletions(-) diff --git a/src/Processors/QueryPlan/ParallelReplicasLocalPlan.cpp b/src/Processors/QueryPlan/ParallelReplicasLocalPlan.cpp index 1c5e0861530..48184cb2b5f 100644 --- a/src/Processors/QueryPlan/ParallelReplicasLocalPlan.cpp +++ b/src/Processors/QueryPlan/ParallelReplicasLocalPlan.cpp @@ -273,37 +273,37 @@ std::unique_ptr createLocalPlanForParallelReplicas( } chassert(reading); - const auto * analyzed_merge_tree = typeid_cast(read_from_merge_tree.get()); - if (!analyzed_merge_tree->hasAnalyzedResult()) - analyzed_merge_tree->selectRangesToRead(); + chassert(analyzed_merge_tree->hasAnalyzedResult()); + CoordinationMode mode = CoordinationMode::Default; switch (analyzed_merge_tree->getReadType()) { - case ReadFromMergeTree::ReadType::Default: - coordinator->initialize(CoordinationMode::Default); - break; - case ReadFromMergeTree::ReadType::InOrder: - coordinator->initialize(CoordinationMode::WithOrder); - break; - case ReadFromMergeTree::ReadType::InReverseOrder: - coordinator->initialize(CoordinationMode::ReverseOrder); - break; - case ReadFromMergeTree::ReadType::ParallelReplicas: - chassert(false); - UNREACHABLE(); + case ReadFromMergeTree::ReadType::Default: + mode = CoordinationMode::Default; + break; + case ReadFromMergeTree::ReadType::InOrder: + mode = CoordinationMode::WithOrder; + break; + case ReadFromMergeTree::ReadType::InReverseOrder: + mode = CoordinationMode::ReverseOrder; + break; + case ReadFromMergeTree::ReadType::ParallelReplicas: + chassert(false); + UNREACHABLE(); } - MergeTreeAllRangesCallback all_ranges_cb = [coordinator](InitialAllRangesAnnouncement announcement) - { - chassert(coordinator); - coordinator->handleInitialAllRangesAnnouncement(std::move(announcement)); - }; + const auto number_of_local_replica = new_context->getSettingsRef().max_parallel_replicas - 1; + coordinator->handleInitialAllRangesAnnouncement(InitialAllRangesAnnouncement( + mode, analyzed_merge_tree->getAnalysisResult().parts_with_ranges.getDescriptions(), number_of_local_replica)); + + MergeTreeAllRangesCallback all_ranges_cb = [coordinator](InitialAllRangesAnnouncement) {}; MergeTreeReadTaskCallback read_task_cb = [coordinator](ParallelReadRequest req) -> std::optional { return coordinator->handleRequest(std::move(req)); }; - auto read_from_merge_tree_parallel_replicas = reading->createLocalParallelReplicasReadingStep(analyzed_merge_tree, true, all_ranges_cb, read_task_cb); + auto read_from_merge_tree_parallel_replicas + = reading->createLocalParallelReplicasReadingStep(analyzed_merge_tree, true, all_ranges_cb, read_task_cb, number_of_local_replica); node->step = std::move(read_from_merge_tree_parallel_replicas); addConvertingActions(*query_plan, header, has_missing_objects); diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp index 198236c4f49..e3e09673431 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp +++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp @@ -273,8 +273,9 @@ ReadFromMergeTree::ReadFromMergeTree( LoggerPtr log_, AnalysisResultPtr analyzed_result_ptr_, bool enable_parallel_reading_, - std::optional all_ranges_callback_, - std::optional read_task_callback_) + std::optional all_ranges_callback_, + std::optional read_task_callback_, + std::optional number_of_current_replica_) : SourceStepWithFilter(DataStream{.header = MergeTreeSelectProcessor::transformHeader( storage_snapshot_->getSampleBlockForColumns(all_column_names_), query_info_.prewhere_info)}, all_column_names_, query_info_, storage_snapshot_, context_) @@ -295,18 +296,12 @@ ReadFromMergeTree::ReadFromMergeTree( , analyzed_result_ptr(analyzed_result_ptr_) , is_parallel_reading_from_replicas(enable_parallel_reading_) , enable_remove_parts_from_snapshot_optimization(query_info_.merge_tree_enable_remove_parts_from_snapshot_optimization) + , number_of_current_replica(number_of_current_replica_) { if (is_parallel_reading_from_replicas) { - if (all_ranges_callback_) - all_ranges_callback = all_ranges_callback_.value(); - else - all_ranges_callback = context->getMergeTreeAllRangesCallback(); - - if (read_task_callback_) - read_task_callback = read_task_callback_.value(); - else - read_task_callback = context->getMergeTreeReadTaskCallback(); + all_ranges_callback = all_ranges_callback_.value_or(context->getMergeTreeAllRangesCallback()); + read_task_callback = read_task_callback_.value_or(context->getMergeTreeReadTaskCallback()); } const auto & settings = context->getSettingsRef(); @@ -344,7 +339,8 @@ std::unique_ptr ReadFromMergeTree::createLocalParallelReplica const ReadFromMergeTree * analyzed_merge_tree, bool enable_parallel_reading_, std::optional all_ranges_callback_, - std::optional read_task_callback_) + std::optional read_task_callback_, + std::optional number_of_current_replica_) { return std::make_unique( prepared_parts, @@ -361,7 +357,8 @@ std::unique_ptr ReadFromMergeTree::createLocalParallelReplica analyzed_merge_tree->analyzed_result_ptr, enable_parallel_reading_, all_ranges_callback_, - read_task_callback_); + read_task_callback_, + number_of_current_replica_); } Pipe ReadFromMergeTree::readFromPoolParallelReplicas(RangesInDataParts parts_with_range, Names required_columns, PoolSettings pool_settings) @@ -372,9 +369,7 @@ Pipe ReadFromMergeTree::readFromPoolParallelReplicas(RangesInDataParts parts_wit { .all_callback = all_ranges_callback.value(), .callback = read_task_callback.value(), - .count_participating_replicas = client_info.count_participating_replicas, - .number_of_current_replica = client_info.number_of_current_replica, - .columns_to_read = required_columns, + .number_of_current_replica = number_of_current_replica.value_or(client_info.number_of_current_replica), }; /// We have a special logic for local replica. It has to read less data, because in some cases it should @@ -545,9 +540,7 @@ Pipe ReadFromMergeTree::readInOrder( { .all_callback = all_ranges_callback.value(), .callback = read_task_callback.value(), - .count_participating_replicas = client_info.count_participating_replicas, .number_of_current_replica = client_info.number_of_current_replica, - .columns_to_read = required_columns, }; const auto multiplier = context->getSettingsRef().parallel_replicas_single_task_marks_count_multiplier; diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.h b/src/Processors/QueryPlan/ReadFromMergeTree.h index adc3818c3ab..8cabc6822c1 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.h +++ b/src/Processors/QueryPlan/ReadFromMergeTree.h @@ -121,13 +121,15 @@ public: AnalysisResultPtr analyzed_result_ptr_, bool enable_parallel_reading_, std::optional all_ranges_callback_ = std::nullopt, - std::optional read_task_callback_ = std::nullopt); + std::optional read_task_callback_ = std::nullopt, + std::optional number_of_current_replica_ = std::nullopt); std::unique_ptr createLocalParallelReplicasReadingStep( const ReadFromMergeTree * analyzed_merge_tree, bool enable_parallel_reading_, std::optional all_ranges_callback_, - std::optional read_task_callback_); + std::optional read_task_callback_, + std::optional number_of_current_replica_); static constexpr auto name = "ReadFromMergeTree"; String getName() const override { return name; } @@ -192,6 +194,7 @@ public: bool hasAnalyzedResult() const { return analyzed_result_ptr != nullptr; } void setAnalyzedResult(AnalysisResultPtr analyzed_result_ptr_) { analyzed_result_ptr = std::move(analyzed_result_ptr_); } + ReadFromMergeTree::AnalysisResult getAnalysisResult() const; const MergeTreeData::DataPartsVector & getParts() const { return prepared_parts; } const std::vector & getAlterConvertionsForParts() const { return alter_conversions_for_parts; } @@ -286,8 +289,6 @@ private: Pipe spreadMarkRangesAmongStreamsFinal( RangesInDataParts && parts, size_t num_streams, const Names & origin_column_names, const Names & column_names, ActionsDAGPtr & out_projection); - ReadFromMergeTree::AnalysisResult getAnalysisResult() const; - AnalysisResultPtr analyzed_result_ptr; VirtualFields shared_virtual_fields; @@ -296,6 +297,7 @@ private: std::optional read_task_callback; bool enable_vertical_final = false; bool enable_remove_parts_from_snapshot_optimization = true; + std::optional number_of_current_replica; friend class ReadFromMergeTreeCoordinated; }; diff --git a/src/Processors/QueryPlan/ReadFromRemote.cpp b/src/Processors/QueryPlan/ReadFromRemote.cpp index 485d5f675ab..23d8f2b496f 100644 --- a/src/Processors/QueryPlan/ReadFromRemote.cpp +++ b/src/Processors/QueryPlan/ReadFromRemote.cpp @@ -390,8 +390,18 @@ ReadFromParallelRemoteReplicasStep::ReadFromParallelRemoteReplicasStep( std::vector description; description.push_back(fmt::format("query: {}", formattedAST(query_ast))); - for (const auto & pool : cluster->getShardsInfo().front().per_replica_pools) - description.push_back(fmt::format("Replica: {}", pool->getHost())); + bool first_local = false; + for (const auto & addr : cluster->getShardsAddresses().front()) + { + /// skip first local + if (exclude_local_replica && addr.is_local && !first_local) + { + first_local = true; + continue; + } + + description.push_back(fmt::format("Replica: {}", addr.host_name)); + } setStepDescription(boost::algorithm::join(description, ", ")); } @@ -414,9 +424,6 @@ void ReadFromParallelRemoteReplicasStep::initializePipeline(QueryPipelineBuilder const auto & shard = cluster->getShardsInfo().at(0); size_t all_replicas_count = current_settings.max_parallel_replicas; - if (exclude_local_replica) - --all_replicas_count; - if (all_replicas_count > shard.getAllNodeCount()) { LOG_INFO( @@ -427,6 +434,8 @@ void ReadFromParallelRemoteReplicasStep::initializePipeline(QueryPipelineBuilder shard.getAllNodeCount()); all_replicas_count = shard.getAllNodeCount(); } + if (exclude_local_replica) + --all_replicas_count; std::vector shuffled_pool; if (all_replicas_count < shard.getAllNodeCount()) diff --git a/src/Storages/MergeTree/MergeTreeSelectProcessor.h b/src/Storages/MergeTree/MergeTreeSelectProcessor.h index 8f41f5deacb..03ca30dd5b3 100644 --- a/src/Storages/MergeTree/MergeTreeSelectProcessor.h +++ b/src/Storages/MergeTree/MergeTreeSelectProcessor.h @@ -26,12 +26,7 @@ struct ParallelReadingExtension { MergeTreeAllRangesCallback all_callback; MergeTreeReadTaskCallback callback; - size_t count_participating_replicas{0}; size_t number_of_current_replica{0}; - /// This is needed to estimate the number of bytes - /// between a pair of marks to perform one request - /// over the network for a 1Gb of data. - Names columns_to_read; }; /// Base class for MergeTreeThreadSelectAlgorithm and MergeTreeSelectAlgorithm From 40aab93db123840042a0f5f4703ae26f4e2ab507 Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Wed, 29 May 2024 20:55:02 +0000 Subject: [PATCH 053/844] Correct replica id for inorder case --- src/Processors/QueryPlan/ReadFromMergeTree.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp index e3e09673431..9516072f269 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp +++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp @@ -540,7 +540,7 @@ Pipe ReadFromMergeTree::readInOrder( { .all_callback = all_ranges_callback.value(), .callback = read_task_callback.value(), - .number_of_current_replica = client_info.number_of_current_replica, + .number_of_current_replica = number_of_current_replica.value_or(client_info.number_of_current_replica), }; const auto multiplier = context->getSettingsRef().parallel_replicas_single_task_marks_count_multiplier; From e8a1a800dcdc44001c6c9f08f78390df9b3133e3 Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Wed, 29 May 2024 20:57:16 +0000 Subject: [PATCH 054/844] Fix crash with JOINs --- src/Interpreters/ClusterProxy/executeQuery.cpp | 8 ++++---- src/Processors/QueryPlan/ParallelReplicasLocalPlan.cpp | 9 ++++++++- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/src/Interpreters/ClusterProxy/executeQuery.cpp b/src/Interpreters/ClusterProxy/executeQuery.cpp index 464ce2ec586..0eeae112062 100644 --- a/src/Interpreters/ClusterProxy/executeQuery.cpp +++ b/src/Interpreters/ClusterProxy/executeQuery.cpp @@ -405,7 +405,7 @@ void executeQueryWithParallelReplicas( const ASTPtr & query_ast, ContextPtr context, std::shared_ptr storage_limits, - QueryPlanStepPtr read_from_merge_tree) + QueryPlanStepPtr analyzed_read_from_merge_tree) { auto logger = getLogger("executeQueryWithParallelReplicas"); LOG_DEBUG(logger, "Executing read from {}, header {}, query ({}), stage {} with parallel replicas", @@ -519,7 +519,7 @@ void executeQueryWithParallelReplicas( new_context, processed_stage, coordinator, - std::move(read_from_merge_tree), + std::move(analyzed_read_from_merge_tree), /*has_missing_objects=*/false); DataStreams input_streams; @@ -563,7 +563,7 @@ void executeQueryWithParallelReplicas( const PlannerContextPtr & planner_context, ContextPtr context, std::shared_ptr storage_limits, - QueryPlanStepPtr read_from_merge_tree) + QueryPlanStepPtr analyzed_read_from_merge_tree) { QueryTreeNodePtr modified_query_tree = query_tree->clone(); rewriteJoinToGlobalJoin(modified_query_tree, context); @@ -574,7 +574,7 @@ void executeQueryWithParallelReplicas( auto modified_query_ast = queryNodeToDistributedSelectQuery(modified_query_tree); executeQueryWithParallelReplicas( - query_plan, storage_id, header, processed_stage, modified_query_ast, context, storage_limits, std::move(read_from_merge_tree)); + query_plan, storage_id, header, processed_stage, modified_query_ast, context, storage_limits, std::move(analyzed_read_from_merge_tree)); } void executeQueryWithParallelReplicas( diff --git a/src/Processors/QueryPlan/ParallelReplicasLocalPlan.cpp b/src/Processors/QueryPlan/ParallelReplicasLocalPlan.cpp index 48184cb2b5f..e847a0ece26 100644 --- a/src/Processors/QueryPlan/ParallelReplicasLocalPlan.cpp +++ b/src/Processors/QueryPlan/ParallelReplicasLocalPlan.cpp @@ -273,7 +273,14 @@ std::unique_ptr createLocalPlanForParallelReplicas( } chassert(reading); - const auto * analyzed_merge_tree = typeid_cast(read_from_merge_tree.get()); + if (!read_from_merge_tree) + read_from_merge_tree = std::move(node->step); + + auto * analyzed_merge_tree = typeid_cast(read_from_merge_tree.get()); + /// if no analysis is done yet, let's do it (happens with JOINs) + if (!analyzed_merge_tree->hasAnalyzedResult()) + analyzed_merge_tree->setAnalyzedResult(analyzed_merge_tree->selectRangesToRead()); + chassert(analyzed_merge_tree->hasAnalyzedResult()); CoordinationMode mode = CoordinationMode::Default; From eece76bc412b4973d7f13a19c46e0f24c5580d02 Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Sat, 1 Jun 2024 20:53:33 +0000 Subject: [PATCH 055/844] Fix build --- src/Processors/QueryPlan/ReadFromMergeTree.cpp | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp index 1b57515d850..a969b84dbe8 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp +++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp @@ -540,11 +540,7 @@ Pipe ReadFromMergeTree::readInOrder( { .all_callback = all_ranges_callback.value(), .callback = read_task_callback.value(), -<<<<<<< HEAD .number_of_current_replica = number_of_current_replica.value_or(client_info.number_of_current_replica), -======= - .number_of_current_replica = client_info.number_of_current_replica, ->>>>>>> origin/pr-plan-rewrite }; const auto multiplier = context->getSettingsRef().parallel_replicas_single_task_marks_count_multiplier; From 6476e9ad9bb2557f5010e4358e4881c80281a18c Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Wed, 5 Jun 2024 06:45:56 +0000 Subject: [PATCH 056/844] Temporary test fix --- .../queries/0_stateless/02841_parallel_replicas_summary.sh | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/02841_parallel_replicas_summary.sh b/tests/queries/0_stateless/02841_parallel_replicas_summary.sh index c82d2c8b0c0..bff56feacbf 100755 --- a/tests/queries/0_stateless/02841_parallel_replicas_summary.sh +++ b/tests/queries/0_stateless/02841_parallel_replicas_summary.sh @@ -27,6 +27,7 @@ $CLICKHOUSE_CLIENT --query "CREATE TABLE replicas_summary (n Int64) ENGINE = Mer query_id_base="02841_summary_$CLICKHOUSE_DATABASE" +# TODO: rethink the test, for now temporary disable allow_experimental_analyzer echo " SELECT * FROM replicas_summary @@ -36,7 +37,8 @@ echo " cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', allow_experimental_parallel_reading_from_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, - interactive_delay=0 + interactive_delay=0, + allow_experimental_analyzer=0 "\ | ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&wait_end_of_query=1&query_id=${query_id_base}_interactive_0" --data-binary @- -vvv 2>&1 \ | grep "Summary" | grep -cv '"read_rows":"0"' @@ -50,7 +52,8 @@ echo " cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', allow_experimental_parallel_reading_from_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, - interactive_delay=99999999999 + interactive_delay=99999999999, + allow_experimental_analyzer=0 "\ | ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&wait_end_of_query=1&query_id=${query_id_base}_interactive_high" --data-binary @- -vvv 2>&1 \ | grep "Summary" | grep -cv '"read_rows":"0"' From 3210d0f4716b5ac820d6f267437d13bf9fb02011 Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Wed, 5 Jun 2024 11:56:12 +0000 Subject: [PATCH 057/844] Simple renaming --- src/Processors/QueryPlan/ParallelReplicasLocalPlan.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/Processors/QueryPlan/ParallelReplicasLocalPlan.cpp b/src/Processors/QueryPlan/ParallelReplicasLocalPlan.cpp index e847a0ece26..8ea826b861c 100644 --- a/src/Processors/QueryPlan/ParallelReplicasLocalPlan.cpp +++ b/src/Processors/QueryPlan/ParallelReplicasLocalPlan.cpp @@ -232,7 +232,7 @@ std::unique_ptr createLocalPlanForParallelReplicas( ContextPtr context, QueryProcessingStage::Enum processed_stage, ParallelReplicasReadingCoordinatorPtr coordinator, - QueryPlanStepPtr read_from_merge_tree, + QueryPlanStepPtr analyzed_read_from_merge_tree, bool has_missing_objects) { checkStackSize(); @@ -273,10 +273,10 @@ std::unique_ptr createLocalPlanForParallelReplicas( } chassert(reading); - if (!read_from_merge_tree) - read_from_merge_tree = std::move(node->step); + if (!analyzed_read_from_merge_tree) + analyzed_read_from_merge_tree = std::move(node->step); - auto * analyzed_merge_tree = typeid_cast(read_from_merge_tree.get()); + auto * analyzed_merge_tree = typeid_cast(analyzed_read_from_merge_tree.get()); /// if no analysis is done yet, let's do it (happens with JOINs) if (!analyzed_merge_tree->hasAnalyzedResult()) analyzed_merge_tree->setAnalyzedResult(analyzed_merge_tree->selectRangesToRead()); From 9a7888ba516420c00978901bbe5b85cafe6e7bdf Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Wed, 5 Jun 2024 16:39:54 +0200 Subject: [PATCH 058/844] fix tests --- src/Functions/toStartOfInterval.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Functions/toStartOfInterval.cpp b/src/Functions/toStartOfInterval.cpp index c45501aa905..b98d78171ae 100644 --- a/src/Functions/toStartOfInterval.cpp +++ b/src/Functions/toStartOfInterval.cpp @@ -376,7 +376,7 @@ private: /// If we have a time argument that has bigger scale than the interval can contain and interval is not default, we need /// to return a value with bigger precision and thus we should multiply result on the scale difference. result_data[i] = 0; - result_data[i] += static_cast(ToStartOfInterval::execute(time_data[i], num_units, time_zone, scale_endtime)) * scale_diff; + result_data[i] += static_cast(ToStartOfInterval::execute(time_data[i], num_units, time_zone, scale_endtime)); } } else From f8d4aabfe0df44638184017ee90bfbe63ee79e90 Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Thu, 6 Jun 2024 14:28:17 +0000 Subject: [PATCH 059/844] Initiliaze working set on pipelie initialization, right after analysis --- src/Planner/PlannerJoinTree.cpp | 8 ++-- .../QueryPlan/ParallelReplicasLocalPlan.cpp | 39 ++++--------------- .../QueryPlan/ReadFromMergeTree.cpp | 39 +++++++++++++++---- src/Processors/QueryPlan/ReadFromMergeTree.h | 6 ++- 4 files changed, 48 insertions(+), 44 deletions(-) diff --git a/src/Planner/PlannerJoinTree.cpp b/src/Planner/PlannerJoinTree.cpp index 25206763a40..26e78ea69ac 100644 --- a/src/Planner/PlannerJoinTree.cpp +++ b/src/Planner/PlannerJoinTree.cpp @@ -908,13 +908,13 @@ JoinTreeQueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expres chassert(reading); - auto result_ptr = reading->selectRangesToRead(); - UInt64 rows_to_read = result_ptr->selected_rows; - reading->setAnalyzedResult(std::move(result_ptr)); - // (2) if it's ReadFromMergeTree - run index analysis and check number of rows to read if (settings.parallel_replicas_min_number_of_rows_per_replica > 0) { + auto result_ptr = reading->selectRangesToRead(); + UInt64 rows_to_read = result_ptr->selected_rows; + reading->setAnalyzedResult(std::move(result_ptr)); + if (table_expression_query_info.limit > 0 && table_expression_query_info.limit < rows_to_read) rows_to_read = table_expression_query_info.limit; diff --git a/src/Processors/QueryPlan/ParallelReplicasLocalPlan.cpp b/src/Processors/QueryPlan/ParallelReplicasLocalPlan.cpp index 8ea826b861c..27d86f656c5 100644 --- a/src/Processors/QueryPlan/ParallelReplicasLocalPlan.cpp +++ b/src/Processors/QueryPlan/ParallelReplicasLocalPlan.cpp @@ -273,44 +273,21 @@ std::unique_ptr createLocalPlanForParallelReplicas( } chassert(reading); - if (!analyzed_read_from_merge_tree) - analyzed_read_from_merge_tree = std::move(node->step); - auto * analyzed_merge_tree = typeid_cast(analyzed_read_from_merge_tree.get()); - /// if no analysis is done yet, let's do it (happens with JOINs) - if (!analyzed_merge_tree->hasAnalyzedResult()) - analyzed_merge_tree->setAnalyzedResult(analyzed_merge_tree->selectRangesToRead()); + ReadFromMergeTree * analyzed_merge_tree = nullptr; + if (analyzed_read_from_merge_tree.get()) + analyzed_merge_tree = typeid_cast(analyzed_read_from_merge_tree.get()); - chassert(analyzed_merge_tree->hasAnalyzedResult()); - - CoordinationMode mode = CoordinationMode::Default; - switch (analyzed_merge_tree->getReadType()) - { - case ReadFromMergeTree::ReadType::Default: - mode = CoordinationMode::Default; - break; - case ReadFromMergeTree::ReadType::InOrder: - mode = CoordinationMode::WithOrder; - break; - case ReadFromMergeTree::ReadType::InReverseOrder: - mode = CoordinationMode::ReverseOrder; - break; - case ReadFromMergeTree::ReadType::ParallelReplicas: - chassert(false); - UNREACHABLE(); - } - - const auto number_of_local_replica = new_context->getSettingsRef().max_parallel_replicas - 1; - coordinator->handleInitialAllRangesAnnouncement(InitialAllRangesAnnouncement( - mode, analyzed_merge_tree->getAnalysisResult().parts_with_ranges.getDescriptions(), number_of_local_replica)); - - MergeTreeAllRangesCallback all_ranges_cb = [coordinator](InitialAllRangesAnnouncement) {}; + MergeTreeAllRangesCallback all_ranges_cb + = [coordinator](InitialAllRangesAnnouncement announcement) { coordinator->handleInitialAllRangesAnnouncement(announcement); }; MergeTreeReadTaskCallback read_task_cb = [coordinator](ParallelReadRequest req) -> std::optional { return coordinator->handleRequest(std::move(req)); }; + const auto number_of_local_replica = new_context->getSettingsRef().max_parallel_replicas - 1; + auto read_from_merge_tree_parallel_replicas - = reading->createLocalParallelReplicasReadingStep(analyzed_merge_tree, true, all_ranges_cb, read_task_cb, number_of_local_replica); + = reading->createLocalParallelReplicasReadingStep(analyzed_merge_tree, all_ranges_cb, read_task_cb, number_of_local_replica); node->step = std::move(read_from_merge_tree_parallel_replicas); addConvertingActions(*query_plan, header, has_missing_objects); diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp index 7a13f072e4a..1aa48dbcff8 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp +++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp @@ -337,7 +337,6 @@ ReadFromMergeTree::ReadFromMergeTree( std::unique_ptr ReadFromMergeTree::createLocalParallelReplicasReadingStep( const ReadFromMergeTree * analyzed_merge_tree, - bool enable_parallel_reading_, std::optional all_ranges_callback_, std::optional read_task_callback_, std::optional number_of_current_replica_) @@ -354,8 +353,8 @@ std::unique_ptr ReadFromMergeTree::createLocalParallelReplica requested_num_streams, max_block_numbers_to_read, log, - analyzed_merge_tree->analyzed_result_ptr, - enable_parallel_reading_, + (analyzed_merge_tree ? analyzed_merge_tree->analyzed_result_ptr : nullptr), + true, all_ranges_callback_, read_task_callback_, number_of_current_replica_); @@ -1424,11 +1423,8 @@ static void buildIndexes( const auto & settings = context->getSettingsRef(); - indexes.emplace(ReadFromMergeTree::Indexes{{ - filter_actions_dag, - context, - primary_key_column_names, - primary_key.expression}, {}, {}, {}, {}, false, {}}); + indexes.emplace( + ReadFromMergeTree::Indexes{KeyCondition{filter_actions_dag, context, primary_key_column_names, primary_key.expression}}); if (metadata_snapshot->hasPartitionKey()) { @@ -1951,6 +1947,33 @@ void ReadFromMergeTree::initializePipeline(QueryPipelineBuilder & pipeline, cons { auto result = getAnalysisResult(); + if (is_parallel_reading_from_replicas && context->canUseParallelReplicasOnInitiator()) + { + CoordinationMode mode = CoordinationMode::Default; + switch (result.read_type) + { + case ReadFromMergeTree::ReadType::Default: + mode = CoordinationMode::Default; + break; + case ReadFromMergeTree::ReadType::InOrder: + mode = CoordinationMode::WithOrder; + break; + case ReadFromMergeTree::ReadType::InReverseOrder: + mode = CoordinationMode::ReverseOrder; + break; + case ReadFromMergeTree::ReadType::ParallelReplicas: + chassert(false); + UNREACHABLE(); + } + + chassert(number_of_current_replica.has_value()); + chassert(all_ranges_callback.has_value()); + + /// initialize working set from local replica + all_ranges_callback.value()( + InitialAllRangesAnnouncement(mode, result.parts_with_ranges.getDescriptions(), number_of_current_replica.value())); + } + if (enable_remove_parts_from_snapshot_optimization) { /// Do not keep data parts in snapshot. diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.h b/src/Processors/QueryPlan/ReadFromMergeTree.h index b541481fa62..dc1d20cc807 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.h +++ b/src/Processors/QueryPlan/ReadFromMergeTree.h @@ -126,7 +126,6 @@ public: std::unique_ptr createLocalParallelReplicasReadingStep( const ReadFromMergeTree * analyzed_merge_tree, - bool enable_parallel_reading_, std::optional all_ranges_callback_, std::optional read_task_callback_, std::optional number_of_current_replica_); @@ -151,6 +150,11 @@ public: struct Indexes { + explicit Indexes(KeyCondition key_condition_) + : key_condition(std::move(key_condition_)) + , use_skip_indexes(false) + {} + KeyCondition key_condition; std::optional partition_pruner; std::optional minmax_idx_condition; From 4f37fafe426bbefe55aabc36faf386dc047e8517 Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Thu, 6 Jun 2024 15:57:12 +0000 Subject: [PATCH 060/844] Fix 02771_parallel_replicas_analyzer --- .../0_stateless/02771_parallel_replicas_analyzer.reference | 2 +- .../queries/0_stateless/02771_parallel_replicas_analyzer.sql | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/02771_parallel_replicas_analyzer.reference b/tests/queries/0_stateless/02771_parallel_replicas_analyzer.reference index 5bf3520ccdb..f87b3244f09 100644 --- a/tests/queries/0_stateless/02771_parallel_replicas_analyzer.reference +++ b/tests/queries/0_stateless/02771_parallel_replicas_analyzer.reference @@ -9,4 +9,4 @@ 7885388429666205427 8124171311239967992 1 1 -- Simple query with analyzer and pure parallel replicas\nSELECT number\nFROM join_inner_table__fuzz_146_replicated\n SETTINGS\n allow_experimental_analyzer = 1,\n max_parallel_replicas = 2,\n cluster_for_parallel_replicas = \'test_cluster_one_shard_three_replicas_localhost\',\n allow_experimental_parallel_reading_from_replicas = 1; -0 2 SELECT `__table1`.`number` AS `number` FROM `default`.`join_inner_table__fuzz_146_replicated` AS `__table1` +0 1 SELECT `__table1`.`number` AS `number` FROM `default`.`join_inner_table__fuzz_146_replicated` AS `__table1` diff --git a/tests/queries/0_stateless/02771_parallel_replicas_analyzer.sql b/tests/queries/0_stateless/02771_parallel_replicas_analyzer.sql index 88a0d2163d6..7e27507ada9 100644 --- a/tests/queries/0_stateless/02771_parallel_replicas_analyzer.sql +++ b/tests/queries/0_stateless/02771_parallel_replicas_analyzer.sql @@ -1,5 +1,5 @@ -- Tags: zookeeper - +DROP TABLE IF EXISTS join_inner_table__fuzz_146_replicated; CREATE TABLE join_inner_table__fuzz_146_replicated ( `id` UUID, @@ -49,3 +49,5 @@ WHERE ) GROUP BY is_initial_query, query ORDER BY is_initial_query DESC, c, query; + +DROP TABLE join_inner_table__fuzz_146_replicated; From 63b0d13a62c84245a3efe6e3cd9cf08cd2a588a8 Mon Sep 17 00:00:00 2001 From: yariks5s Date: Thu, 6 Jun 2024 16:15:35 +0000 Subject: [PATCH 061/844] trying to fix tests --- src/Functions/toStartOfInterval.cpp | 2 -- .../02207_subseconds_intervals.reference | 16 ++++++++-------- ...6_fix_to_start_of_milli_microsecond.reference | 12 ++++++------ 3 files changed, 14 insertions(+), 16 deletions(-) diff --git a/src/Functions/toStartOfInterval.cpp b/src/Functions/toStartOfInterval.cpp index b98d78171ae..e358addf972 100644 --- a/src/Functions/toStartOfInterval.cpp +++ b/src/Functions/toStartOfInterval.cpp @@ -192,8 +192,6 @@ public: case ResultType::DateTime64: { UInt32 scale = 0; - if (isDateTime64(arguments[0].type)) - scale = assert_cast(*arguments[0].type.get()).getScale(); if (interval_type->getKind() == IntervalKind::Kind::Nanosecond) scale = (9 > scale) ? 9 : scale; else if (interval_type->getKind() == IntervalKind::Kind::Microsecond) diff --git a/tests/queries/0_stateless/02207_subseconds_intervals.reference b/tests/queries/0_stateless/02207_subseconds_intervals.reference index 6cde773c3c4..7e6d64b6b9f 100644 --- a/tests/queries/0_stateless/02207_subseconds_intervals.reference +++ b/tests/queries/0_stateless/02207_subseconds_intervals.reference @@ -10,25 +10,25 @@ test intervals - test microseconds 1980-12-12 12:12:12.123456 1980-12-12 12:12:12.123400 -1980-12-12 12:12:12.12345700 -1980-12-12 12:12:12.12345700 +1980-12-12 12:12:12.123457 +1980-12-12 12:12:12.123457 1930-12-12 12:12:12.123456 1930-12-12 12:12:12.123400 -1930-12-12 12:12:12.12345600 +1930-12-12 12:12:12.123456 2220-12-12 12:12:12.123456 2220-12-12 12:12:12.123400 -2220-12-12 12:12:12.12345700 +2220-12-12 12:12:12.123457 - test milliseconds 1980-12-12 12:12:12.123 1980-12-12 12:12:12.120 -1980-12-12 12:12:12.123000 -1980-12-12 12:12:12.123000 +1980-12-12 12:12:12.123 +1980-12-12 12:12:12.123 1930-12-12 12:12:12.123 1930-12-12 12:12:12.120 -1930-12-12 12:12:12.123000 +1930-12-12 12:12:12.123 2220-12-12 12:12:12.123 2220-12-12 12:12:12.120 -2220-12-12 12:12:12.123000 +2220-12-12 12:12:12.123 test add[...]seconds() - test nanoseconds 1980-12-12 12:12:12.123456790 diff --git a/tests/queries/0_stateless/02956_fix_to_start_of_milli_microsecond.reference b/tests/queries/0_stateless/02956_fix_to_start_of_milli_microsecond.reference index 95a05a24981..413c79828c7 100644 --- a/tests/queries/0_stateless/02956_fix_to_start_of_milli_microsecond.reference +++ b/tests/queries/0_stateless/02956_fix_to_start_of_milli_microsecond.reference @@ -1,7 +1,7 @@ -2023-10-09 10:11:12.001000 -2023-10-09 10:11:12.001000 -2023-10-09 10:11:12.000000 -2023-10-09 10:11:12.000000 -2023-10-09 00:00:00.000000000 -2023-10-09 00:00:00.000000000 +2023-10-09 10:11:12.001 +2023-10-09 10:11:12.001 +2023-10-09 10:11:12.000 +2023-10-09 10:11:12.000 +2023-10-09 00:00:00.000000 +2023-10-09 00:00:00.000 2023-10-09 00:00:00 From c5ba8eb477fb168d38dd1493b2b20a121a3ed1fd Mon Sep 17 00:00:00 2001 From: yariks5s Date: Thu, 6 Jun 2024 16:46:38 +0000 Subject: [PATCH 062/844] fix for origin --- src/Functions/toStartOfInterval.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/Functions/toStartOfInterval.cpp b/src/Functions/toStartOfInterval.cpp index e358addf972..cc5ffd56976 100644 --- a/src/Functions/toStartOfInterval.cpp +++ b/src/Functions/toStartOfInterval.cpp @@ -192,6 +192,8 @@ public: case ResultType::DateTime64: { UInt32 scale = 0; + if (isDateTime64(arguments[0].type) && overload == Overload::Origin) + scale = assert_cast(*arguments[0].type.get()).getScale(); if (interval_type->getKind() == IntervalKind::Kind::Nanosecond) scale = (9 > scale) ? 9 : scale; else if (interval_type->getKind() == IntervalKind::Kind::Microsecond) From 7be90470d5c33877426cfc22cb5b3ad949a523ad Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Thu, 6 Jun 2024 16:49:05 +0000 Subject: [PATCH 063/844] Fix 02731_parallel_replicas_join_subquery --- .../02731_parallel_replicas_join_subquery.reference | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/02731_parallel_replicas_join_subquery.reference b/tests/queries/0_stateless/02731_parallel_replicas_join_subquery.reference index 451f0d6d485..f0d096d2072 100644 --- a/tests/queries/0_stateless/02731_parallel_replicas_join_subquery.reference +++ b/tests/queries/0_stateless/02731_parallel_replicas_join_subquery.reference @@ -31,7 +31,7 @@ 29 2j&S)ba?XG QuQj 17163829389637435056 3 UlI+1 14144472852965836438 =============== QUERIES EXECUTED BY PARALLEL INNER QUERY ALONE =============== -0 3 SELECT `__table1`.`key` AS `key`, `__table1`.`value1` AS `value1`, `__table1`.`value2` AS `value2`, toUInt64(min(`__table1`.`time`)) AS `start_ts` FROM `default`.`join_inner_table` AS `__table1` PREWHERE (`__table1`.`id` = \'833c9e22-c245-4eb5-8745-117a9a1f26b1\') AND (`__table1`.`number` > _CAST(1610517366120, \'UInt64\')) GROUP BY `__table1`.`key`, `__table1`.`value1`, `__table1`.`value2` ORDER BY `__table1`.`key` ASC, `__table1`.`value1` ASC, `__table1`.`value2` ASC LIMIT _CAST(10, \'UInt64\') +0 2 SELECT `__table1`.`key` AS `key`, `__table1`.`value1` AS `value1`, `__table1`.`value2` AS `value2`, toUInt64(min(`__table1`.`time`)) AS `start_ts` FROM `default`.`join_inner_table` AS `__table1` PREWHERE (`__table1`.`id` = \'833c9e22-c245-4eb5-8745-117a9a1f26b1\') AND (`__table1`.`number` > _CAST(1610517366120, \'UInt64\')) GROUP BY `__table1`.`key`, `__table1`.`value1`, `__table1`.`value2` ORDER BY `__table1`.`key` ASC, `__table1`.`value1` ASC, `__table1`.`value2` ASC LIMIT _CAST(10, \'UInt64\') 0 3 SELECT `key`, `value1`, `value2`, toUInt64(min(`time`)) AS `start_ts` FROM `default`.`join_inner_table` PREWHERE (`id` = \'833c9e22-c245-4eb5-8745-117a9a1f26b1\') AND (`number` > toUInt64(\'1610517366120\')) GROUP BY `key`, `value1`, `value2` ORDER BY `key` ASC, `value1` ASC, `value2` ASC LIMIT 10 1 1 -- Parallel inner query alone\nSELECT\n key,\n value1,\n value2,\n toUInt64(min(time)) AS start_ts\nFROM join_inner_table\nPREWHERE (id = \'833c9e22-c245-4eb5-8745-117a9a1f26b1\') AND (number > toUInt64(\'1610517366120\'))\nGROUP BY key, value1, value2\nORDER BY key, value1, value2\nLIMIT 10\nSETTINGS allow_experimental_parallel_reading_from_replicas = 1, allow_experimental_analyzer=0; 1 1 -- Parallel inner query alone\nSELECT\n key,\n value1,\n value2,\n toUInt64(min(time)) AS start_ts\nFROM join_inner_table\nPREWHERE (id = \'833c9e22-c245-4eb5-8745-117a9a1f26b1\') AND (number > toUInt64(\'1610517366120\'))\nGROUP BY key, value1, value2\nORDER BY key, value1, value2\nLIMIT 10\nSETTINGS allow_experimental_parallel_reading_from_replicas = 1, allow_experimental_analyzer=1; @@ -58,7 +58,7 @@ U c 10 UlI+1 10 bX?}ix [ Ny]2 G 10 t _CAST(1610517366120, \'UInt64\')) GROUP BY `__table3`.`key`, `__table3`.`value1`, `__table3`.`value2`) AS `__table2` USING (`key`) GROUP BY `__table1`.`key`, `__table2`.`value1`, `__table2`.`value2` +0 2 SELECT `__table2`.`value1` AS `value1`, `__table2`.`value2` AS `value2`, count() AS `count` FROM `default`.`join_outer_table` AS `__table1` ALL INNER JOIN (SELECT `__table3`.`key` AS `key`, `__table3`.`value1` AS `value1`, `__table3`.`value2` AS `value2` FROM `default`.`join_inner_table` AS `__table3` PREWHERE (`__table3`.`id` = \'833c9e22-c245-4eb5-8745-117a9a1f26b1\') AND (`__table3`.`number` > _CAST(1610517366120, \'UInt64\')) GROUP BY `__table3`.`key`, `__table3`.`value1`, `__table3`.`value2`) AS `__table2` USING (`key`) GROUP BY `__table1`.`key`, `__table2`.`value1`, `__table2`.`value2` 0 3 SELECT `key`, `value1`, `value2` FROM `default`.`join_inner_table` PREWHERE (`id` = \'833c9e22-c245-4eb5-8745-117a9a1f26b1\') AND (`number` > toUInt64(\'1610517366120\')) GROUP BY `key`, `value1`, `value2` 0 3 SELECT `value1`, `value2`, count() AS `count` FROM `default`.`join_outer_table` ALL INNER JOIN `_data_` USING (`key`) GROUP BY `key`, `value1`, `value2` 1 1 -- Parallel full query\nSELECT\n value1,\n value2,\n avg(count) AS avg\nFROM\n (\n SELECT\n key,\n value1,\n value2,\n count() AS count\n FROM join_outer_table\n INNER JOIN\n (\n SELECT\n key,\n value1,\n value2,\n toUInt64(min(time)) AS start_ts\n FROM join_inner_table\n PREWHERE (id = \'833c9e22-c245-4eb5-8745-117a9a1f26b1\') AND (number > toUInt64(\'1610517366120\'))\n GROUP BY key, value1, value2\n ) USING (key)\n GROUP BY key, value1, value2\n )\nGROUP BY value1, value2\nORDER BY value1, value2\nSETTINGS allow_experimental_parallel_reading_from_replicas = 1, allow_experimental_analyzer=0; From 44fecf66ca3cd1969b2fd03d30861dac92df5ed0 Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Thu, 6 Jun 2024 17:11:42 +0000 Subject: [PATCH 064/844] Fix 02967_parallel_replicas_joins_and_analyzer --- ...llel_replicas_joins_and_analyzer.reference | 365 +++++++++++++++--- 1 file changed, 303 insertions(+), 62 deletions(-) diff --git a/tests/queries/0_stateless/02967_parallel_replicas_joins_and_analyzer.reference b/tests/queries/0_stateless/02967_parallel_replicas_joins_and_analyzer.reference index 100e4e500cd..baec06244e2 100644 --- a/tests/queries/0_stateless/02967_parallel_replicas_joins_and_analyzer.reference +++ b/tests/queries/0_stateless/02967_parallel_replicas_joins_and_analyzer.reference @@ -19,8 +19,20 @@ select x, y, r.y, z, rr.z, a from (select l.x, l.y, r.y, r.z as z from (select x 14 14 14 14 0 0 15 15 0 0 0 0 explain description=0 select x, y, r.y, z, rr.z, a from (select l.x, l.y, r.y, r.z as z from (select x, y from tab1 where x != 2) l any left join (select y, z from tab2 where y != 4) r on l.y = r.y) ll any left join (select z, a from tab3 where z != 8) rr on ll.z = rr.z SETTINGS allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, prefer_localhost_replica = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', allow_experimental_analyzer=1; -Expression - ReadFromRemoteParallelReplicas +Union + Expression + Join + Expression + Join + Expression + Expression + ReadFromMergeTree + Expression + ReadFromMemoryStorage + Expression + ReadFromMemoryStorage + Expression + ReadFromRemoteParallelReplicas -- -- The same query with cte; with sub1 as (select x, y from tab1 where x != 2), @@ -55,8 +67,22 @@ select * from sub5 order by x SETTINGS allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, prefer_localhost_replica = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', allow_experimental_analyzer=1; Expression Sorting - Expression - ReadFromRemoteParallelReplicas + Union + Expression + Sorting + Expression + Join + Expression + Join + Expression + Expression + ReadFromMergeTree + Expression + ReadFromMemoryStorage + Expression + ReadFromMemoryStorage + Expression + ReadFromRemoteParallelReplicas -- -- GROUP BY should work up to WithMergableStage with sub1 as (select x, y from tab1 where x != 2), @@ -80,8 +106,22 @@ Expression Sorting Expression MergingAggregated - Expression - ReadFromRemoteParallelReplicas + Union + Expression + Aggregating + Expression + Join + Expression + Join + Expression + Expression + ReadFromMergeTree + Expression + ReadFromMemoryStorage + Expression + ReadFromMemoryStorage + Expression + ReadFromRemoteParallelReplicas -- -- ORDER BY in sub3 : sub1 is fully pushed, sub3 -> WithMergableStage with sub1 as (select x, y from tab1 where x != 2), @@ -118,10 +158,22 @@ Expression Sorting Expression Join - Expression - ReadFromRemoteParallelReplicas - Expression - ReadFromRemoteParallelReplicas + Union + Expression + Join + Expression + Expression + ReadFromMergeTree + Expression + ReadFromMemoryStorage + Expression + ReadFromRemoteParallelReplicas + Union + Expression + Expression + ReadFromMergeTree + Expression + ReadFromRemoteParallelReplicas -- -- ORDER BY in sub1 : sub1 -> WithMergableStage with sub1 as (select x, y from tab1 where x != 2 order by y), @@ -160,12 +212,24 @@ Expression Join Expression Join + Union + Expression + Expression + ReadFromMergeTree + Expression + ReadFromRemoteParallelReplicas + Union + Expression + Expression + ReadFromMergeTree + Expression + ReadFromRemoteParallelReplicas + Union + Expression Expression - ReadFromRemoteParallelReplicas - Expression - ReadFromRemoteParallelReplicas - Expression - ReadFromRemoteParallelReplicas + ReadFromMergeTree + Expression + ReadFromRemoteParallelReplicas -- -- RIGHT JOIN in sub3: sub3 -> WithMergableStage with sub1 as (select x, y from tab1 where x != 2), @@ -202,12 +266,24 @@ Expression Join Expression Join + Union + Expression + Expression + ReadFromMergeTree + Expression + ReadFromRemoteParallelReplicas + Union + Expression + Expression + ReadFromMergeTree + Expression + ReadFromRemoteParallelReplicas + Union + Expression Expression - ReadFromRemoteParallelReplicas - Expression - ReadFromRemoteParallelReplicas - Expression - ReadFromRemoteParallelReplicas + ReadFromMergeTree + Expression + ReadFromRemoteParallelReplicas -- -- RIGHT JOIN in sub5: sub5 -> WithMergableStage with sub1 as (select x, y from tab1 where x != 2), @@ -242,14 +318,26 @@ Expression Sorting Expression Join - Expression - ReadFromRemoteParallelReplicas + Union + Expression + Expression + ReadFromMergeTree + Expression + ReadFromRemoteParallelReplicas Expression Join - Expression - ReadFromRemoteParallelReplicas - Expression - ReadFromRemoteParallelReplicas + Union + Expression + Expression + ReadFromMergeTree + Expression + ReadFromRemoteParallelReplicas + Union + Expression + Expression + ReadFromMergeTree + Expression + ReadFromRemoteParallelReplicas -- -- Subqueries for IN allowed with sub1 as (select x, y from tab1 where x in (select number from numbers(16) where number != 2)), @@ -284,8 +372,28 @@ select * from sub5 order by x SETTINGS allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, prefer_localhost_replica = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', allow_experimental_analyzer=1; Expression Sorting - Expression - ReadFromRemoteParallelReplicas + Union + Expression + Sorting + Expression + Join + Expression + Join + Expression + CreatingSets + Expression + Expression + ReadFromMergeTree + CreatingSet + Expression + Filter + ReadFromSystemNumbers + Expression + ReadFromMemoryStorage + Expression + ReadFromMemoryStorage + Expression + ReadFromRemoteParallelReplicas -- -- Subqueries for IN are not allowed with sub1 as (select x, y from tab1 where x in (select number from numbers(16) where number != 2)), @@ -333,10 +441,18 @@ Expression Expression Filter ReadFromSystemNumbers + Union + Expression + Expression + ReadFromMergeTree + Expression + ReadFromRemoteParallelReplicas + Union + Expression Expression - ReadFromRemoteParallelReplicas - Expression - ReadFromRemoteParallelReplicas + ReadFromMergeTree + Expression + ReadFromRemoteParallelReplicas set parallel_replicas_prefer_local_join = 1; -- A query with only INNER/LEFT joins is fully send to replicas. JOIN is executed in GLOBAL mode. select x, y, r.y, z, rr.z, a from (select l.x, l.y, r.y, r.z as z from (select x, y from tab1 where x != 2) l any left join (select y, z from tab2 where y != 4) r on l.y = r.y) ll any left join (select z, a from tab3 where z != 8) rr on ll.z = rr.z order by x SETTINGS allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, prefer_localhost_replica = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', allow_experimental_analyzer=1; @@ -356,8 +472,22 @@ select x, y, r.y, z, rr.z, a from (select l.x, l.y, r.y, r.z as z from (select x 14 14 14 14 0 0 15 15 0 0 0 0 explain description=0 select x, y, r.y, z, rr.z, a from (select l.x, l.y, r.y, r.z as z from (select x, y from tab1 where x != 2) l any left join (select y, z from tab2 where y != 4) r on l.y = r.y) ll any left join (select z, a from tab3 where z != 8) rr on ll.z = rr.z SETTINGS allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, prefer_localhost_replica = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', allow_experimental_analyzer=1; -Expression - ReadFromRemoteParallelReplicas +Union + Expression + Join + Expression + Join + Expression + Expression + ReadFromMergeTree + Expression + Expression + ReadFromMergeTree + Expression + Expression + ReadFromMergeTree + Expression + ReadFromRemoteParallelReplicas -- -- The same query with cte; with sub1 as (select x, y from tab1 where x != 2), @@ -392,8 +522,24 @@ select * from sub5 order by x SETTINGS allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, prefer_localhost_replica = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', allow_experimental_analyzer=1; Expression Sorting - Expression - ReadFromRemoteParallelReplicas + Union + Expression + Sorting + Expression + Join + Expression + Join + Expression + Expression + ReadFromMergeTree + Expression + Expression + ReadFromMergeTree + Expression + Expression + ReadFromMergeTree + Expression + ReadFromRemoteParallelReplicas -- -- GROUP BY should work up to WithMergableStage with sub1 as (select x, y from tab1 where x != 2), @@ -417,8 +563,24 @@ Expression Sorting Expression MergingAggregated - Expression - ReadFromRemoteParallelReplicas + Union + Expression + Aggregating + Expression + Join + Expression + Join + Expression + Expression + ReadFromMergeTree + Expression + Expression + ReadFromMergeTree + Expression + Expression + ReadFromMergeTree + Expression + ReadFromRemoteParallelReplicas -- -- ORDER BY in sub3 : sub1 is fully pushed, sub3 -> WithMergableStage with sub1 as (select x, y from tab1 where x != 2), @@ -455,10 +617,23 @@ Expression Sorting Expression Join - Expression - ReadFromRemoteParallelReplicas - Expression - ReadFromRemoteParallelReplicas + Union + Expression + Join + Expression + Expression + ReadFromMergeTree + Expression + Expression + ReadFromMergeTree + Expression + ReadFromRemoteParallelReplicas + Union + Expression + Expression + ReadFromMergeTree + Expression + ReadFromRemoteParallelReplicas -- -- ORDER BY in sub1 : sub1 -> WithMergableStage with sub1 as (select x, y from tab1 where x != 2 order by y), @@ -497,12 +672,24 @@ Expression Join Expression Join + Union + Expression + Expression + ReadFromMergeTree + Expression + ReadFromRemoteParallelReplicas + Union + Expression + Expression + ReadFromMergeTree + Expression + ReadFromRemoteParallelReplicas + Union + Expression Expression - ReadFromRemoteParallelReplicas - Expression - ReadFromRemoteParallelReplicas - Expression - ReadFromRemoteParallelReplicas + ReadFromMergeTree + Expression + ReadFromRemoteParallelReplicas -- -- RIGHT JOIN in sub3: sub3 -> WithMergableStage with sub1 as (select x, y from tab1 where x != 2), @@ -539,12 +726,24 @@ Expression Join Expression Join + Union + Expression + Expression + ReadFromMergeTree + Expression + ReadFromRemoteParallelReplicas + Union + Expression + Expression + ReadFromMergeTree + Expression + ReadFromRemoteParallelReplicas + Union + Expression Expression - ReadFromRemoteParallelReplicas - Expression - ReadFromRemoteParallelReplicas - Expression - ReadFromRemoteParallelReplicas + ReadFromMergeTree + Expression + ReadFromRemoteParallelReplicas -- -- RIGHT JOIN in sub5: sub5 -> WithMergableStage with sub1 as (select x, y from tab1 where x != 2), @@ -579,14 +778,26 @@ Expression Sorting Expression Join - Expression - ReadFromRemoteParallelReplicas + Union + Expression + Expression + ReadFromMergeTree + Expression + ReadFromRemoteParallelReplicas Expression Join - Expression - ReadFromRemoteParallelReplicas - Expression - ReadFromRemoteParallelReplicas + Union + Expression + Expression + ReadFromMergeTree + Expression + ReadFromRemoteParallelReplicas + Union + Expression + Expression + ReadFromMergeTree + Expression + ReadFromRemoteParallelReplicas -- -- Subqueries for IN allowed with sub1 as (select x, y from tab1 where x in (select number from numbers(16) where number != 2)), @@ -621,8 +832,30 @@ select * from sub5 order by x SETTINGS allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, prefer_localhost_replica = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', allow_experimental_analyzer=1; Expression Sorting - Expression - ReadFromRemoteParallelReplicas + Union + Expression + Sorting + Expression + Join + Expression + Join + Expression + CreatingSets + Expression + Expression + ReadFromMergeTree + CreatingSet + Expression + Filter + ReadFromSystemNumbers + Expression + Expression + ReadFromMergeTree + Expression + Expression + ReadFromMergeTree + Expression + ReadFromRemoteParallelReplicas -- -- Subqueries for IN are not allowed with sub1 as (select x, y from tab1 where x in (select number from numbers(16) where number != 2)), @@ -670,7 +903,15 @@ Expression Expression Filter ReadFromSystemNumbers + Union + Expression + Expression + ReadFromMergeTree + Expression + ReadFromRemoteParallelReplicas + Union + Expression Expression - ReadFromRemoteParallelReplicas - Expression - ReadFromRemoteParallelReplicas + ReadFromMergeTree + Expression + ReadFromRemoteParallelReplicas From 09995013085511b206388816f896cdf251535ee4 Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Fri, 7 Jun 2024 14:35:06 +0000 Subject: [PATCH 065/844] Fix crash in in-order coordinator --- src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp b/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp index f3318a48883..9eea40c4c5f 100644 --- a/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp +++ b/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp @@ -887,7 +887,9 @@ void InOrderCoordinator::doHandleInitialAllRangesAnnouncement(InitialAllRa ++stats[announcement.replica_num].number_of_requests; - if (new_rows_to_read > 0) + /// FIXME: this code updating total_rows_to_read but it needs to be done only once since we're taking working set from initiator + /// util I missing something, it seems this code is not necessary if working set is taken from initiator (todo: check it) + if (new_rows_to_read > 0 && progress_callback) { Progress progress; progress.total_rows_to_read = new_rows_to_read; From 29d56b61b3425effb0f1fedc0c51319d8fa4062b Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Fri, 7 Jun 2024 14:54:00 +0000 Subject: [PATCH 066/844] fix 02404_memory_bound_merging --- .../02404_memory_bound_merging.reference | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/tests/queries/0_stateless/02404_memory_bound_merging.reference b/tests/queries/0_stateless/02404_memory_bound_merging.reference index 0409c48f846..3daa99f91c6 100644 --- a/tests/queries/0_stateless/02404_memory_bound_merging.reference +++ b/tests/queries/0_stateless/02404_memory_bound_merging.reference @@ -113,12 +113,18 @@ ExpressionTransform (Expression) ExpressionTransform × 4 (MergingAggregated) - Resize 1 → 4 - SortingAggregatedTransform 4 → 1 - MergingAggregatedBucketTransform × 4 - Resize 1 → 4 - GroupingAggregatedTransform 3 → 1 - (ReadFromRemoteParallelReplicas) + MergingAggregatedBucketTransform × 4 + Resize 1 → 4 + FinishAggregatingInOrderTransform 3 → 1 + (Union) + (Aggregating) + FinalizeAggregatedTransform + AggregatingInOrderTransform + (Expression) + ExpressionTransform + (ReadFromMergeTree) + MergeTreeSelect(pool: ReadPoolParallelReplicasInOrder, algorithm: InOrder) 0 → 1 + (ReadFromRemoteParallelReplicas) select a, count() from pr_t group by a order by a limit 5 offset 500; 500 1000 501 1000 From 23ca8608ba12dc9221effc760ecf11b3e2aefdf0 Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Tue, 11 Jun 2024 13:25:19 +0000 Subject: [PATCH 067/844] Fix 03006_parallel_replicas_prewhere --- .../03006_parallel_replicas_prewhere.reference | 1 + .../0_stateless/03006_parallel_replicas_prewhere.sql | 12 +++++++++++- 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/03006_parallel_replicas_prewhere.reference b/tests/queries/0_stateless/03006_parallel_replicas_prewhere.reference index e69de29bb2d..8b8d0e5d565 100644 --- a/tests/queries/0_stateless/03006_parallel_replicas_prewhere.reference +++ b/tests/queries/0_stateless/03006_parallel_replicas_prewhere.reference @@ -0,0 +1 @@ +default url_na_log 1 130000 130 diff --git a/tests/queries/0_stateless/03006_parallel_replicas_prewhere.sql b/tests/queries/0_stateless/03006_parallel_replicas_prewhere.sql index 4b84646c034..afe6a00cc4d 100644 --- a/tests/queries/0_stateless/03006_parallel_replicas_prewhere.sql +++ b/tests/queries/0_stateless/03006_parallel_replicas_prewhere.sql @@ -21,11 +21,21 @@ SELECT FROM numbers(130000) SETTINGS max_insert_block_size = 200000; +SET max_block_size = 1048576, max_threads = 1, allow_experimental_parallel_reading_from_replicas = 1, parallel_replicas_for_non_replicated_merge_tree = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', max_parallel_replicas = 3, parallel_replicas_min_number_of_rows_per_replica=10000; + EXPLAIN ESTIMATE SELECT count() FROM url_na_log PREWHERE (DateVisit >= toFixedString('2022-08-10', 10)) AND (DateVisit <= '2022-08-20') -SETTINGS max_block_size = 1048576, max_threads = 1, allow_experimental_parallel_reading_from_replicas = 1, parallel_replicas_for_non_replicated_merge_tree = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', max_parallel_replicas = 3, parallel_replicas_min_number_of_rows_per_replica=10000; +SETTINGS allow_experimental_analyzer=0; + +-- here parallel replicas uses local snapshot as working set +-- so, the estimation can be done +EXPLAIN ESTIMATE +SELECT count() +FROM url_na_log +PREWHERE (DateVisit >= toFixedString('2022-08-10', 10)) AND (DateVisit <= '2022-08-20') +SETTINGS allow_experimental_analyzer=1; DROP POLICY url_na_log_policy0 ON url_na_log; DROP TABLE url_na_log; From d6dec38103cce55ba6c026ed8d56148eeed16e46 Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Tue, 11 Jun 2024 14:37:42 +0000 Subject: [PATCH 068/844] Fix test_parallel_replicas_over_distributed --- src/Interpreters/ClusterProxy/executeQuery.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/Interpreters/ClusterProxy/executeQuery.cpp b/src/Interpreters/ClusterProxy/executeQuery.cpp index 0eeae112062..cde33697915 100644 --- a/src/Interpreters/ClusterProxy/executeQuery.cpp +++ b/src/Interpreters/ClusterProxy/executeQuery.cpp @@ -493,7 +493,8 @@ void executeQueryWithParallelReplicas( new_cluster->getShardsInfo().begin()->getAllNodeCount(), settings.parallel_replicas_mark_segment_size); auto external_tables = new_context->getExternalTables(); - if (settings.allow_experimental_analyzer) + /// do not build local plan for distributed queries for now (address it later) + if (settings.allow_experimental_analyzer && !shard_num) { auto read_from_remote = std::make_unique( query_ast, From f999eed3767ba5e98ee9de7d1f94d13fcab20ebc Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Tue, 11 Jun 2024 16:49:11 +0000 Subject: [PATCH 069/844] Fix 02967_parallel_replicas_join_algo_and_analyzer --- ...llel_replicas_join_algo_and_analyzer.reference | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/tests/queries/0_stateless/02967_parallel_replicas_join_algo_and_analyzer.reference b/tests/queries/0_stateless/02967_parallel_replicas_join_algo_and_analyzer.reference index d7fa419aeab..d911335c6c0 100644 --- a/tests/queries/0_stateless/02967_parallel_replicas_join_algo_and_analyzer.reference +++ b/tests/queries/0_stateless/02967_parallel_replicas_join_algo_and_analyzer.reference @@ -23,10 +23,8 @@ simple (global) join with analyzer and parallel replicas 4200048 4200048 4200048 -1400016 4200054 4200054 4200054 -1400018 SELECT `__table1`.`key` AS `key`, `__table1`.`value` AS `value` FROM `default`.`num_2` AS `__table1` (stage: WithMergeableState) -SELECT `__table1`.`key` AS `key`, `__table1`.`value` AS `value` FROM `default`.`num_2` AS `__table1` (stage: WithMergeableState) DefaultCoordinator: Coordination done SELECT `__table1`.`key` AS `key`, `__table1`.`value` AS `value`, `__table3`.`key` AS `r.key`, `__table3`.`value` AS `r.value` FROM (SELECT `__table2`.`key` AS `key`, `__table2`.`value` AS `value` FROM `default`.`num_1` AS `__table2`) AS `__table1` GLOBAL ALL INNER JOIN `_data_` AS `__table3` ON `__table1`.`key` = `__table3`.`key` ORDER BY `__table1`.`key` ASC LIMIT _CAST(700000, 'UInt64'), _CAST(10, 'UInt64') (stage: WithMergeableState) -SELECT `__table1`.`key` AS `key`, `__table1`.`value` AS `value`, `__table3`.`key` AS `r.key`, `__table3`.`value` AS `r.value` FROM (SELECT `__table2`.`key` AS `key`, `__table2`.`value` AS `value` FROM `default`.`num_1` AS `__table2`) AS `__table1` GLOBAL ALL INNER JOIN `_data_` AS `__table3` ON `__table1`.`key` = `__table3`.`key` ORDER BY `__table1`.`key` ASC LIMIT _CAST(700000, 'UInt64'), _CAST(10, 'UInt64') (stage: WithMergeableState) DefaultCoordinator: Coordination done simple (local) join with analyzer and parallel replicas @@ -41,7 +39,6 @@ simple (local) join with analyzer and parallel replicas 4200048 4200048 4200048 -1400016 4200054 4200054 4200054 -1400018 SELECT `__table1`.`key` AS `key`, `__table1`.`value` AS `value`, `__table3`.`key` AS `r.key`, `__table3`.`value` AS `r.value` FROM (SELECT `__table2`.`key` AS `key`, `__table2`.`value` AS `value` FROM `default`.`num_1` AS `__table2`) AS `__table1` ALL INNER JOIN (SELECT `__table4`.`key` AS `key`, `__table4`.`value` AS `value` FROM `default`.`num_2` AS `__table4`) AS `__table3` ON `__table1`.`key` = `__table3`.`key` ORDER BY `__table1`.`key` ASC LIMIT _CAST(700000, 'UInt64'), _CAST(10, 'UInt64') (stage: WithMergeableState) -SELECT `__table1`.`key` AS `key`, `__table1`.`value` AS `value`, `__table3`.`key` AS `r.key`, `__table3`.`value` AS `r.value` FROM (SELECT `__table2`.`key` AS `key`, `__table2`.`value` AS `value` FROM `default`.`num_1` AS `__table2`) AS `__table1` ALL INNER JOIN (SELECT `__table4`.`key` AS `key`, `__table4`.`value` AS `value` FROM `default`.`num_2` AS `__table4`) AS `__table3` ON `__table1`.`key` = `__table3`.`key` ORDER BY `__table1`.`key` ASC LIMIT _CAST(700000, 'UInt64'), _CAST(10, 'UInt64') (stage: WithMergeableState) DefaultCoordinator: Coordination done simple (local) join with analyzer and parallel replicas and full sorting merge join @@ -56,7 +53,6 @@ simple (local) join with analyzer and parallel replicas and full sorting merge j 4200048 4200048 4200048 -1400016 4200054 4200054 4200054 -1400018 SELECT `__table1`.`key` AS `key`, `__table1`.`value` AS `value`, `__table3`.`key` AS `r.key`, `__table3`.`value` AS `r.value` FROM (SELECT `__table2`.`key` AS `key`, `__table2`.`value` AS `value` FROM `default`.`num_1` AS `__table2`) AS `__table1` ALL INNER JOIN (SELECT `__table4`.`key` AS `key`, `__table4`.`value` AS `value` FROM `default`.`num_2` AS `__table4`) AS `__table3` ON `__table1`.`key` = `__table3`.`key` ORDER BY `__table1`.`key` ASC LIMIT _CAST(700000, 'UInt64'), _CAST(10, 'UInt64') (stage: WithMergeableState) -SELECT `__table1`.`key` AS `key`, `__table1`.`value` AS `value`, `__table3`.`key` AS `r.key`, `__table3`.`value` AS `r.value` FROM (SELECT `__table2`.`key` AS `key`, `__table2`.`value` AS `value` FROM `default`.`num_1` AS `__table2`) AS `__table1` ALL INNER JOIN (SELECT `__table4`.`key` AS `key`, `__table4`.`value` AS `value` FROM `default`.`num_2` AS `__table4`) AS `__table3` ON `__table1`.`key` = `__table3`.`key` ORDER BY `__table1`.`key` ASC LIMIT _CAST(700000, 'UInt64'), _CAST(10, 'UInt64') (stage: WithMergeableState) WithOrderCoordinator: Coordination done nested join with analyzer @@ -83,7 +79,6 @@ nested join with analyzer and parallel replicas, both local 420336 420336 420336 -140112 420378 420378 420378 -140126 SELECT `__table1`.`key` AS `key`, `__table1`.`value` AS `value`, `__table3`.`key` AS `r.key`, `__table3`.`value` AS `r.value` FROM (SELECT `__table2`.`key` AS `key`, `__table2`.`value` AS `value` FROM `default`.`num_1` AS `__table2`) AS `__table1` ALL INNER JOIN (SELECT `__table4`.`key` AS `key`, `__table4`.`value` AS `value` FROM `default`.`num_2` AS `__table4` ALL INNER JOIN (SELECT `__table6`.`number` * 7 AS `key` FROM numbers(100000.) AS `__table6`) AS `__table5` ON `__table4`.`key` = `__table5`.`key` SETTINGS parallel_replicas_prefer_local_join = 1) AS `__table3` ON `__table1`.`key` = `__table3`.`key` ORDER BY `__table1`.`key` ASC LIMIT _CAST(10000, 'UInt64'), _CAST(10, 'UInt64') (stage: WithMergeableState) -SELECT `__table1`.`key` AS `key`, `__table1`.`value` AS `value`, `__table3`.`key` AS `r.key`, `__table3`.`value` AS `r.value` FROM (SELECT `__table2`.`key` AS `key`, `__table2`.`value` AS `value` FROM `default`.`num_1` AS `__table2`) AS `__table1` ALL INNER JOIN (SELECT `__table4`.`key` AS `key`, `__table4`.`value` AS `value` FROM `default`.`num_2` AS `__table4` ALL INNER JOIN (SELECT `__table6`.`number` * 7 AS `key` FROM numbers(100000.) AS `__table6`) AS `__table5` ON `__table4`.`key` = `__table5`.`key` SETTINGS parallel_replicas_prefer_local_join = 1) AS `__table3` ON `__table1`.`key` = `__table3`.`key` ORDER BY `__table1`.`key` ASC LIMIT _CAST(10000, 'UInt64'), _CAST(10, 'UInt64') (stage: WithMergeableState) WithOrderCoordinator: Coordination done nested join with analyzer and parallel replicas, both global @@ -98,10 +93,8 @@ nested join with analyzer and parallel replicas, both global 420336 420336 420336 -140112 420378 420378 420378 -140126 SELECT `__table1`.`key` AS `key`, `__table1`.`value` AS `value` FROM `default`.`num_2` AS `__table1` GLOBAL ALL INNER JOIN `_data_` AS `__table2` ON `__table1`.`key` = `__table2`.`key` (stage: WithMergeableState) -SELECT `__table1`.`key` AS `key`, `__table1`.`value` AS `value` FROM `default`.`num_2` AS `__table1` GLOBAL ALL INNER JOIN `_data_` AS `__table2` ON `__table1`.`key` = `__table2`.`key` (stage: WithMergeableState) DefaultCoordinator: Coordination done SELECT `__table1`.`key` AS `key`, `__table1`.`value` AS `value`, `__table3`.`key` AS `r.key`, `__table3`.`value` AS `r.value` FROM (SELECT `__table2`.`key` AS `key`, `__table2`.`value` AS `value` FROM `default`.`num_1` AS `__table2`) AS `__table1` GLOBAL ALL INNER JOIN `_data_` AS `__table3` ON `__table1`.`key` = `__table3`.`key` ORDER BY `__table1`.`key` ASC LIMIT _CAST(10000, 'UInt64'), _CAST(10, 'UInt64') (stage: WithMergeableState) -SELECT `__table1`.`key` AS `key`, `__table1`.`value` AS `value`, `__table3`.`key` AS `r.key`, `__table3`.`value` AS `r.value` FROM (SELECT `__table2`.`key` AS `key`, `__table2`.`value` AS `value` FROM `default`.`num_1` AS `__table2`) AS `__table1` GLOBAL ALL INNER JOIN `_data_` AS `__table3` ON `__table1`.`key` = `__table3`.`key` ORDER BY `__table1`.`key` ASC LIMIT _CAST(10000, 'UInt64'), _CAST(10, 'UInt64') (stage: WithMergeableState) DefaultCoordinator: Coordination done nested join with analyzer and parallel replicas, global + local @@ -116,10 +109,8 @@ nested join with analyzer and parallel replicas, global + local 420336 420336 420336 -140112 420378 420378 420378 -140126 SELECT `__table1`.`key` AS `key`, `__table1`.`value` AS `value` FROM `default`.`num_2` AS `__table1` ALL INNER JOIN (SELECT `__table3`.`number` * 7 AS `key` FROM numbers(100000.) AS `__table3`) AS `__table2` ON `__table1`.`key` = `__table2`.`key` (stage: WithMergeableState) -SELECT `__table1`.`key` AS `key`, `__table1`.`value` AS `value` FROM `default`.`num_2` AS `__table1` ALL INNER JOIN (SELECT `__table3`.`number` * 7 AS `key` FROM numbers(100000.) AS `__table3`) AS `__table2` ON `__table1`.`key` = `__table2`.`key` (stage: WithMergeableState) DefaultCoordinator: Coordination done SELECT `__table1`.`key` AS `key`, `__table1`.`value` AS `value`, `__table3`.`key` AS `r.key`, `__table3`.`value` AS `r.value` FROM (SELECT `__table2`.`key` AS `key`, `__table2`.`value` AS `value` FROM `default`.`num_1` AS `__table2`) AS `__table1` GLOBAL ALL INNER JOIN `_data_` AS `__table3` ON `__table1`.`key` = `__table3`.`key` ORDER BY `__table1`.`key` ASC LIMIT _CAST(10000, 'UInt64'), _CAST(10, 'UInt64') (stage: WithMergeableState) -SELECT `__table1`.`key` AS `key`, `__table1`.`value` AS `value`, `__table3`.`key` AS `r.key`, `__table3`.`value` AS `r.value` FROM (SELECT `__table2`.`key` AS `key`, `__table2`.`value` AS `value` FROM `default`.`num_1` AS `__table2`) AS `__table1` GLOBAL ALL INNER JOIN `_data_` AS `__table3` ON `__table1`.`key` = `__table3`.`key` ORDER BY `__table1`.`key` ASC LIMIT _CAST(10000, 'UInt64'), _CAST(10, 'UInt64') (stage: WithMergeableState) DefaultCoordinator: Coordination done nested join with analyzer and parallel replicas, both local, both full sorting merge join @@ -134,10 +125,8 @@ nested join with analyzer and parallel replicas, both local, both full sorting m 420336 420336 420336 -140112 420378 420378 420378 -140126 SELECT `__table1`.`key` AS `key`, `__table1`.`value` AS `value` FROM `default`.`num_2` AS `__table1` GLOBAL ALL INNER JOIN `_data_` AS `__table2` ON `__table1`.`key` = `__table2`.`key` (stage: WithMergeableState) -SELECT `__table1`.`key` AS `key`, `__table1`.`value` AS `value` FROM `default`.`num_2` AS `__table1` GLOBAL ALL INNER JOIN `_data_` AS `__table2` ON `__table1`.`key` = `__table2`.`key` (stage: WithMergeableState) WithOrderCoordinator: Coordination done SELECT `__table1`.`key` AS `key`, `__table1`.`value` AS `value`, `__table3`.`key` AS `r.key`, `__table3`.`value` AS `r.value` FROM (SELECT `__table2`.`key` AS `key`, `__table2`.`value` AS `value` FROM `default`.`num_1` AS `__table2`) AS `__table1` GLOBAL ALL INNER JOIN `_data_` AS `__table3` ON `__table1`.`key` = `__table3`.`key` ORDER BY `__table1`.`key` ASC LIMIT _CAST(10000, 'UInt64'), _CAST(10, 'UInt64') (stage: WithMergeableState) -SELECT `__table1`.`key` AS `key`, `__table1`.`value` AS `value`, `__table3`.`key` AS `r.key`, `__table3`.`value` AS `r.value` FROM (SELECT `__table2`.`key` AS `key`, `__table2`.`value` AS `value` FROM `default`.`num_1` AS `__table2`) AS `__table1` GLOBAL ALL INNER JOIN `_data_` AS `__table3` ON `__table1`.`key` = `__table3`.`key` ORDER BY `__table1`.`key` ASC LIMIT _CAST(10000, 'UInt64'), _CAST(10, 'UInt64') (stage: WithMergeableState) WithOrderCoordinator: Coordination done nested join with analyzer and parallel replicas, both local, both full sorting and hash join @@ -152,10 +141,8 @@ nested join with analyzer and parallel replicas, both local, both full sorting a 420336 420336 420336 -140112 420378 420378 420378 -140126 SELECT `__table1`.`key` AS `key`, `__table1`.`value` AS `value` FROM `default`.`num_2` AS `__table1` GLOBAL ALL INNER JOIN `_data_` AS `__table2` ON `__table1`.`key` = `__table2`.`key` (stage: WithMergeableState) -SELECT `__table1`.`key` AS `key`, `__table1`.`value` AS `value` FROM `default`.`num_2` AS `__table1` GLOBAL ALL INNER JOIN `_data_` AS `__table2` ON `__table1`.`key` = `__table2`.`key` (stage: WithMergeableState) DefaultCoordinator: Coordination done SELECT `__table1`.`key` AS `key`, `__table1`.`value` AS `value`, `__table3`.`key` AS `r.key`, `__table3`.`value` AS `r.value` FROM (SELECT `__table2`.`key` AS `key`, `__table2`.`value` AS `value` FROM `default`.`num_1` AS `__table2`) AS `__table1` GLOBAL ALL INNER JOIN `_data_` AS `__table3` ON `__table1`.`key` = `__table3`.`key` ORDER BY `__table1`.`key` ASC LIMIT _CAST(10000, 'UInt64'), _CAST(10, 'UInt64') (stage: WithMergeableState) -SELECT `__table1`.`key` AS `key`, `__table1`.`value` AS `value`, `__table3`.`key` AS `r.key`, `__table3`.`value` AS `r.value` FROM (SELECT `__table2`.`key` AS `key`, `__table2`.`value` AS `value` FROM `default`.`num_1` AS `__table2`) AS `__table1` GLOBAL ALL INNER JOIN `_data_` AS `__table3` ON `__table1`.`key` = `__table3`.`key` ORDER BY `__table1`.`key` ASC LIMIT _CAST(10000, 'UInt64'), _CAST(10, 'UInt64') (stage: WithMergeableState) WithOrderCoordinator: Coordination done nested join with analyzer and parallel replicas, both local, both full sorting and hash join @@ -170,8 +157,6 @@ nested join with analyzer and parallel replicas, both local, both full sorting a 420336 420336 420336 -140112 420378 420378 420378 -140126 SELECT `__table1`.`key` AS `key`, `__table1`.`value` AS `value` FROM `default`.`num_2` AS `__table1` GLOBAL ALL INNER JOIN `_data_` AS `__table2` ON `__table1`.`key` = `__table2`.`key` (stage: WithMergeableState) -SELECT `__table1`.`key` AS `key`, `__table1`.`value` AS `value` FROM `default`.`num_2` AS `__table1` GLOBAL ALL INNER JOIN `_data_` AS `__table2` ON `__table1`.`key` = `__table2`.`key` (stage: WithMergeableState) WithOrderCoordinator: Coordination done SELECT `__table1`.`key` AS `key`, `__table1`.`value` AS `value`, `__table3`.`key` AS `r.key`, `__table3`.`value` AS `r.value` FROM (SELECT `__table2`.`key` AS `key`, `__table2`.`value` AS `value` FROM `default`.`num_1` AS `__table2`) AS `__table1` GLOBAL ALL INNER JOIN `_data_` AS `__table3` ON `__table1`.`key` = `__table3`.`key` ORDER BY `__table1`.`key` ASC LIMIT _CAST(10000, 'UInt64'), _CAST(10, 'UInt64') (stage: WithMergeableState) -SELECT `__table1`.`key` AS `key`, `__table1`.`value` AS `value`, `__table3`.`key` AS `r.key`, `__table3`.`value` AS `r.value` FROM (SELECT `__table2`.`key` AS `key`, `__table2`.`value` AS `value` FROM `default`.`num_1` AS `__table2`) AS `__table1` GLOBAL ALL INNER JOIN `_data_` AS `__table3` ON `__table1`.`key` = `__table3`.`key` ORDER BY `__table1`.`key` ASC LIMIT _CAST(10000, 'UInt64'), _CAST(10, 'UInt64') (stage: WithMergeableState) DefaultCoordinator: Coordination done From f46deb4e793647b505514eb1a826dc94d7a69207 Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Tue, 11 Jun 2024 16:49:51 +0000 Subject: [PATCH 070/844] Fix clang-tidy --- .../QueryPlan/ParallelReplicasLocalPlan.cpp | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/src/Processors/QueryPlan/ParallelReplicasLocalPlan.cpp b/src/Processors/QueryPlan/ParallelReplicasLocalPlan.cpp index 27d86f656c5..16cca6a7f5d 100644 --- a/src/Processors/QueryPlan/ParallelReplicasLocalPlan.cpp +++ b/src/Processors/QueryPlan/ParallelReplicasLocalPlan.cpp @@ -1,26 +1,22 @@ #include #include -#include "Storages/MergeTree/RequestResponse.h" -#include #include +#include #include +#include +#include #include #include -#include #include -#include #include -#include #include -#include +#include #include -#include - +#include +#include #include -#include - namespace ProfileEvents { extern const Event SelectedParts; From da4ed273e304cf01010289b374074818e04771d7 Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Tue, 11 Jun 2024 19:24:50 +0000 Subject: [PATCH 071/844] Fix 02811_parallel_replicas_prewhere_count --- src/Planner/PlannerJoinTree.cpp | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/src/Planner/PlannerJoinTree.cpp b/src/Planner/PlannerJoinTree.cpp index 9d1301305b8..15d0fa45891 100644 --- a/src/Planner/PlannerJoinTree.cpp +++ b/src/Planner/PlannerJoinTree.cpp @@ -959,6 +959,19 @@ JoinTreeQueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expres std::move(reading_step)); query_plan = std::move(query_plan_parallel_replicas); } + else { + QueryPlan query_plan_no_parallel_replicas; + storage->read( + query_plan_no_parallel_replicas, + columns_names, + storage_snapshot, + table_expression_query_info, + query_context, + from_stage, + max_block_size, + max_streams); + query_plan = std::move(query_plan_no_parallel_replicas); + } } const auto & alias_column_expressions = table_expression_data.getAliasColumnExpressions(); From eccd56c9b93f6ac3a8ef5c23c4bbc5ddc8a77fb6 Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Tue, 11 Jun 2024 20:16:00 +0000 Subject: [PATCH 072/844] Fix style check --- src/Planner/PlannerJoinTree.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/Planner/PlannerJoinTree.cpp b/src/Planner/PlannerJoinTree.cpp index 15d0fa45891..7a8ad50b9ab 100644 --- a/src/Planner/PlannerJoinTree.cpp +++ b/src/Planner/PlannerJoinTree.cpp @@ -959,7 +959,8 @@ JoinTreeQueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expres std::move(reading_step)); query_plan = std::move(query_plan_parallel_replicas); } - else { + else + { QueryPlan query_plan_no_parallel_replicas; storage->read( query_plan_no_parallel_replicas, From 164b5e47a81ae3ad0719d00319f5193fd59ee680 Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Wed, 12 Jun 2024 10:30:15 +0000 Subject: [PATCH 073/844] Fix 02769_parallel_replicas_unavailable_shards --- .../0_stateless/02769_parallel_replicas_unavailable_shards.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/02769_parallel_replicas_unavailable_shards.sql b/tests/queries/0_stateless/02769_parallel_replicas_unavailable_shards.sql index 1a75e000349..34678fdd146 100644 --- a/tests/queries/0_stateless/02769_parallel_replicas_unavailable_shards.sql +++ b/tests/queries/0_stateless/02769_parallel_replicas_unavailable_shards.sql @@ -2,7 +2,7 @@ DROP TABLE IF EXISTS test_parallel_replicas_unavailable_shards; CREATE TABLE test_parallel_replicas_unavailable_shards (n UInt64) ENGINE=MergeTree() ORDER BY tuple(); INSERT INTO test_parallel_replicas_unavailable_shards SELECT * FROM numbers(10); -SET allow_experimental_parallel_reading_from_replicas=2, max_parallel_replicas=11, cluster_for_parallel_replicas='parallel_replicas', parallel_replicas_for_non_replicated_merge_tree=1; +SET allow_experimental_parallel_reading_from_replicas=2, max_parallel_replicas=11, cluster_for_parallel_replicas='parallel_replicas', parallel_replicas_for_non_replicated_merge_tree=1, allow_experimental_analyzer=0; SET send_logs_level='error'; SELECT count() FROM test_parallel_replicas_unavailable_shards WHERE NOT ignore(*) SETTINGS log_comment = '02769_7b513191-5082-4073-8568-53b86a49da79'; From d4c1faad3b4399f19573a216a3444add70bac24f Mon Sep 17 00:00:00 2001 From: Arthur Passos Date: Thu, 13 Jun 2024 11:24:00 -0300 Subject: [PATCH 074/844] testing the waters --- src/Access/Authentication.cpp | 264 ++++++++---------- src/Access/Authentication.h | 2 +- src/Access/IAccessStorage.cpp | 15 +- src/Access/LDAPAccessStorage.cpp | 4 +- src/Access/User.cpp | 3 +- src/Access/User.h | 2 +- src/Access/UsersConfigAccessStorage.cpp | 41 +-- .../Access/InterpreterCreateUserQuery.cpp | 22 +- ...InterpreterShowCreateAccessEntityQuery.cpp | 5 +- src/Interpreters/Session.cpp | 2 +- src/Interpreters/SessionLog.cpp | 9 +- src/Parsers/Access/ASTCreateUserQuery.h | 1 + src/Parsers/Access/ParserCreateUserQuery.cpp | 17 ++ src/Parsers/CommonParsers.h | 2 + src/Storages/System/StorageSystemUsers.cpp | 2 +- 15 files changed, 208 insertions(+), 183 deletions(-) diff --git a/src/Access/Authentication.cpp b/src/Access/Authentication.cpp index bf1fe3feec3..d7d656b703e 100644 --- a/src/Access/Authentication.cpp +++ b/src/Access/Authentication.cpp @@ -85,10 +85,26 @@ namespace #endif } +static std::vector getAuthenticationMethodsOfType( + const std::vector & authentication_methods, + const std::unordered_set & types) +{ + std::vector authentication_methods_of_type; + + for (const auto & authentication_method : authentication_methods) + { + if (types.contains(authentication_method.getType())) + { + authentication_methods_of_type.push_back(authentication_method); + } + } + + return authentication_methods_of_type; +} bool Authentication::areCredentialsValid( const Credentials & credentials, - const AuthenticationData & auth_data, + const std::vector & authentication_methods, const ExternalAuthenticators & external_authenticators, SettingsChanges & settings) { @@ -97,181 +113,149 @@ bool Authentication::areCredentialsValid( if (const auto * gss_acceptor_context = typeid_cast(&credentials)) { - switch (auth_data.getType()) + auto kerberos_authentication_methods = getAuthenticationMethodsOfType(authentication_methods, {AuthenticationType::KERBEROS}); + + for (const auto & kerberos_authentication : kerberos_authentication_methods) { - case AuthenticationType::NO_PASSWORD: - case AuthenticationType::PLAINTEXT_PASSWORD: - case AuthenticationType::SHA256_PASSWORD: - case AuthenticationType::DOUBLE_SHA1_PASSWORD: - case AuthenticationType::BCRYPT_PASSWORD: - case AuthenticationType::LDAP: - case AuthenticationType::HTTP: - throw Authentication::Require("ClickHouse Basic Authentication"); - - case AuthenticationType::KERBEROS: - return external_authenticators.checkKerberosCredentials(auth_data.getKerberosRealm(), *gss_acceptor_context); - - case AuthenticationType::SSL_CERTIFICATE: - throw Authentication::Require("ClickHouse X.509 Authentication"); - - case AuthenticationType::SSH_KEY: -#if USE_SSH - throw Authentication::Require("SSH Keys Authentication"); -#else - throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "SSH is disabled, because ClickHouse is built without libssh"); -#endif - - case AuthenticationType::MAX: - break; + if (external_authenticators.checkKerberosCredentials(kerberos_authentication.getKerberosRealm(), *gss_acceptor_context)) + { + return true; + } } + + return false; } if (const auto * mysql_credentials = typeid_cast(&credentials)) { - switch (auth_data.getType()) + auto mysql_authentication_methods = getAuthenticationMethodsOfType( + authentication_methods, + {AuthenticationType::PLAINTEXT_PASSWORD, AuthenticationType::DOUBLE_SHA1_PASSWORD}); + + for (const auto & mysql_authentication_method : mysql_authentication_methods) { - case AuthenticationType::NO_PASSWORD: - return true; // N.B. even if the password is not empty! + switch (mysql_authentication_method.getType()) + { + case AuthenticationType::PLAINTEXT_PASSWORD: + if (checkPasswordPlainTextMySQL( + mysql_credentials->getScramble(), mysql_credentials->getScrambledPassword(), mysql_authentication_method.getPasswordHashBinary())) + { + return true; + } + break; - case AuthenticationType::PLAINTEXT_PASSWORD: - return checkPasswordPlainTextMySQL(mysql_credentials->getScramble(), mysql_credentials->getScrambledPassword(), auth_data.getPasswordHashBinary()); - - case AuthenticationType::DOUBLE_SHA1_PASSWORD: - return checkPasswordDoubleSHA1MySQL(mysql_credentials->getScramble(), mysql_credentials->getScrambledPassword(), auth_data.getPasswordHashBinary()); - - case AuthenticationType::SHA256_PASSWORD: - case AuthenticationType::BCRYPT_PASSWORD: - case AuthenticationType::LDAP: - case AuthenticationType::KERBEROS: - case AuthenticationType::HTTP: - throw Authentication::Require("ClickHouse Basic Authentication"); - - case AuthenticationType::SSL_CERTIFICATE: - throw Authentication::Require("ClickHouse X.509 Authentication"); - - case AuthenticationType::SSH_KEY: -#if USE_SSH - throw Authentication::Require("SSH Keys Authentication"); -#else - throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "SSH is disabled, because ClickHouse is built without libssh"); -#endif - - case AuthenticationType::MAX: - break; + case AuthenticationType::DOUBLE_SHA1_PASSWORD: + if (checkPasswordDoubleSHA1MySQL( + mysql_credentials->getScramble(), + mysql_credentials->getScrambledPassword(), + mysql_authentication_method.getPasswordHashBinary())) + { + return true; + } + break; + default: + throw Exception(ErrorCodes::LOGICAL_ERROR, "something bad happened"); + } } + + return false; } if (const auto * basic_credentials = typeid_cast(&credentials)) { - switch (auth_data.getType()) + auto basic_credentials_authentication_methods = getAuthenticationMethodsOfType( + authentication_methods, + {AuthenticationType::PLAINTEXT_PASSWORD, AuthenticationType::SHA256_PASSWORD, + AuthenticationType::DOUBLE_SHA1_PASSWORD, AuthenticationType::LDAP, AuthenticationType::BCRYPT_PASSWORD, + AuthenticationType::HTTP}); + + for (const auto & auth_method : basic_credentials_authentication_methods) { - case AuthenticationType::NO_PASSWORD: - return true; // N.B. even if the password is not empty! + switch (auth_method.getType()) + { + case AuthenticationType::PLAINTEXT_PASSWORD: + if (checkPasswordPlainText(basic_credentials->getPassword(), auth_method.getPasswordHashBinary())) + { + return true; + } + break; - case AuthenticationType::PLAINTEXT_PASSWORD: - return checkPasswordPlainText(basic_credentials->getPassword(), auth_data.getPasswordHashBinary()); - - case AuthenticationType::SHA256_PASSWORD: - return checkPasswordSHA256(basic_credentials->getPassword(), auth_data.getPasswordHashBinary(), auth_data.getSalt()); - - case AuthenticationType::DOUBLE_SHA1_PASSWORD: - return checkPasswordDoubleSHA1(basic_credentials->getPassword(), auth_data.getPasswordHashBinary()); - - case AuthenticationType::LDAP: - return external_authenticators.checkLDAPCredentials(auth_data.getLDAPServerName(), *basic_credentials); - - case AuthenticationType::KERBEROS: - throw Authentication::Require(auth_data.getKerberosRealm()); - - case AuthenticationType::SSL_CERTIFICATE: - throw Authentication::Require("ClickHouse X.509 Authentication"); - - case AuthenticationType::SSH_KEY: -#if USE_SSH - throw Authentication::Require("SSH Keys Authentication"); -#else - throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "SSH is disabled, because ClickHouse is built without libssh"); -#endif - - case AuthenticationType::BCRYPT_PASSWORD: - return checkPasswordBcrypt(basic_credentials->getPassword(), auth_data.getPasswordHashBinary()); - - case AuthenticationType::HTTP: - switch (auth_data.getHTTPAuthenticationScheme()) - { - case HTTPAuthenticationScheme::BASIC: + case AuthenticationType::SHA256_PASSWORD: + if (checkPasswordSHA256(basic_credentials->getPassword(), auth_method.getPasswordHashBinary(), auth_method.getSalt())) + { + return true; + } + break; + case AuthenticationType::DOUBLE_SHA1_PASSWORD: + if (checkPasswordDoubleSHA1(basic_credentials->getPassword(), auth_method.getPasswordHashBinary())) + { + return true; + } + break; + case AuthenticationType::LDAP: + if (external_authenticators.checkLDAPCredentials(auth_method.getLDAPServerName(), *basic_credentials)) + { + return true; + } + break; + case AuthenticationType::BCRYPT_PASSWORD: + if (checkPasswordBcrypt(basic_credentials->getPassword(), auth_method.getPasswordHashBinary())) + { + return true; + } + break; + case AuthenticationType::HTTP: + if (auth_method.getHTTPAuthenticationScheme() == HTTPAuthenticationScheme::BASIC) + { return external_authenticators.checkHTTPBasicCredentials( - auth_data.getHTTPAuthenticationServerName(), *basic_credentials, settings); - } - - case AuthenticationType::MAX: - break; + auth_method.getHTTPAuthenticationServerName(), *basic_credentials, settings); + } + break; + default: + throw Exception(ErrorCodes::LOGICAL_ERROR, "something bad happened"); + } } + + return false; } if (const auto * ssl_certificate_credentials = typeid_cast(&credentials)) { - switch (auth_data.getType()) + const auto ssl_certificate_authentication_methods = getAuthenticationMethodsOfType(authentication_methods, {AuthenticationType::SSL_CERTIFICATE}); + + for (const auto & auth_method : ssl_certificate_authentication_methods) { - case AuthenticationType::NO_PASSWORD: - case AuthenticationType::PLAINTEXT_PASSWORD: - case AuthenticationType::SHA256_PASSWORD: - case AuthenticationType::DOUBLE_SHA1_PASSWORD: - case AuthenticationType::BCRYPT_PASSWORD: - case AuthenticationType::LDAP: - case AuthenticationType::HTTP: - throw Authentication::Require("ClickHouse Basic Authentication"); - - case AuthenticationType::KERBEROS: - throw Authentication::Require(auth_data.getKerberosRealm()); - - case AuthenticationType::SSL_CERTIFICATE: - return auth_data.getSSLCertificateCommonNames().contains(ssl_certificate_credentials->getCommonName()); - - case AuthenticationType::SSH_KEY: -#if USE_SSH - throw Authentication::Require("SSH Keys Authentication"); -#else - throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "SSH is disabled, because ClickHouse is built without libssh"); -#endif - - case AuthenticationType::MAX: - break; + if (auth_method.getSSLCertificateCommonNames().contains(ssl_certificate_credentials->getCommonName())) + { + return true; + } } + + return false; } #if USE_SSH if (const auto * ssh_credentials = typeid_cast(&credentials)) { - switch (auth_data.getType()) + const auto ssh_authentication_methods = getAuthenticationMethodsOfType(authentication_methods, {AuthenticationType::SSL_CERTIFICATE}); + + for (const auto & auth_method : ssh_authentication_methods) { - case AuthenticationType::NO_PASSWORD: - case AuthenticationType::PLAINTEXT_PASSWORD: - case AuthenticationType::SHA256_PASSWORD: - case AuthenticationType::DOUBLE_SHA1_PASSWORD: - case AuthenticationType::BCRYPT_PASSWORD: - case AuthenticationType::LDAP: - case AuthenticationType::HTTP: - throw Authentication::Require("ClickHouse Basic Authentication"); - - case AuthenticationType::KERBEROS: - throw Authentication::Require(auth_data.getKerberosRealm()); - - case AuthenticationType::SSL_CERTIFICATE: - throw Authentication::Require("ClickHouse X.509 Authentication"); - - case AuthenticationType::SSH_KEY: - return checkSshSignature(auth_data.getSSHKeys(), ssh_credentials->getSignature(), ssh_credentials->getOriginal()); - case AuthenticationType::MAX: - break; + if (checkSshSignature(auth_method.getSSHKeys(), ssh_credentials->getSignature(), ssh_credentials->getOriginal())) + { + return true; + } } + + return false; } #endif if ([[maybe_unused]] const auto * always_allow_credentials = typeid_cast(&credentials)) return true; - throw Exception(ErrorCodes::NOT_IMPLEMENTED, "areCredentialsValid(): authentication type {} not supported", toString(auth_data.getType())); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "TODO arthur, list possible types"); +// throw Exception(ErrorCodes::NOT_IMPLEMENTED, "areCredentialsValid(): authentication type {} not supported", toString(auth_data.getType())); } } diff --git a/src/Access/Authentication.h b/src/Access/Authentication.h index ffc497cc442..13bd487f21a 100644 --- a/src/Access/Authentication.h +++ b/src/Access/Authentication.h @@ -24,7 +24,7 @@ struct Authentication /// returned by the authentication server static bool areCredentialsValid( const Credentials & credentials, - const AuthenticationData & auth_data, + const std::vector & authentication_methods, const ExternalAuthenticators & external_authenticators, SettingsChanges & settings); diff --git a/src/Access/IAccessStorage.cpp b/src/Access/IAccessStorage.cpp index 8d4e7d3073e..1b8c1a7f2c2 100644 --- a/src/Access/IAccessStorage.cpp +++ b/src/Access/IAccessStorage.cpp @@ -514,8 +514,8 @@ std::optional IAccessStorage::authenticateImpl( const Poco::Net::IPAddress & address, const ExternalAuthenticators & external_authenticators, bool throw_if_user_not_exists, - bool allow_no_password, - bool allow_plaintext_password) const + bool , + bool ) const { if (auto id = find(credentials.getUserName())) { @@ -525,10 +525,11 @@ std::optional IAccessStorage::authenticateImpl( if (!isAddressAllowed(*user, address)) throwAddressNotAllowed(address); - auto auth_type = user->auth_data.getType(); - if (((auth_type == AuthenticationType::NO_PASSWORD) && !allow_no_password) || - ((auth_type == AuthenticationType::PLAINTEXT_PASSWORD) && !allow_plaintext_password)) - throwAuthenticationTypeNotAllowed(auth_type); + // todo arthur +// auto auth_type = user->auth_data.getType(); +// if (((auth_type == AuthenticationType::NO_PASSWORD) && !allow_no_password) || +// ((auth_type == AuthenticationType::PLAINTEXT_PASSWORD) && !allow_plaintext_password)) +// throwAuthenticationTypeNotAllowed(auth_type); if (!areCredentialsValid(*user, credentials, external_authenticators, auth_result.settings)) throwInvalidCredentials(); @@ -564,7 +565,7 @@ bool IAccessStorage::areCredentialsValid( return false; } - return Authentication::areCredentialsValid(credentials, user.auth_data, external_authenticators, settings); + return Authentication::areCredentialsValid(credentials, user.authentication_methods, external_authenticators, settings); } diff --git a/src/Access/LDAPAccessStorage.cpp b/src/Access/LDAPAccessStorage.cpp index 3206b20b691..89920878fef 100644 --- a/src/Access/LDAPAccessStorage.cpp +++ b/src/Access/LDAPAccessStorage.cpp @@ -468,8 +468,8 @@ std::optional LDAPAccessStorage::authenticateImpl( // User does not exist, so we create one, and will add it if authentication is successful. new_user = std::make_shared(); new_user->setName(credentials.getUserName()); - new_user->auth_data = AuthenticationData(AuthenticationType::LDAP); - new_user->auth_data.setLDAPServerName(ldap_server_name); + new_user->authentication_methods.emplace_back(AuthenticationType::LDAP); + new_user->authentication_methods.back().setLDAPServerName(ldap_server_name); user = new_user; } diff --git a/src/Access/User.cpp b/src/Access/User.cpp index 6a296706baf..90c151a32b7 100644 --- a/src/Access/User.cpp +++ b/src/Access/User.cpp @@ -16,7 +16,8 @@ bool User::equal(const IAccessEntity & other) const if (!IAccessEntity::equal(other)) return false; const auto & other_user = typeid_cast(other); - return (auth_data == other_user.auth_data) && (allowed_client_hosts == other_user.allowed_client_hosts) + return (authentication_methods == other_user.authentication_methods) + && (allowed_client_hosts == other_user.allowed_client_hosts) && (access == other_user.access) && (granted_roles == other_user.granted_roles) && (default_roles == other_user.default_roles) && (settings == other_user.settings) && (grantees == other_user.grantees) && (default_database == other_user.default_database) && (valid_until == other_user.valid_until); diff --git a/src/Access/User.h b/src/Access/User.h index e4ab654dafd..28f16a76b0c 100644 --- a/src/Access/User.h +++ b/src/Access/User.h @@ -15,7 +15,7 @@ namespace DB */ struct User : public IAccessEntity { - AuthenticationData auth_data; + std::vector authentication_methods; AllowedClientHosts allowed_client_hosts = AllowedClientHosts::AnyHostTag{}; AccessRights access; GrantedRoles granted_roles; diff --git a/src/Access/UsersConfigAccessStorage.cpp b/src/Access/UsersConfigAccessStorage.cpp index 1f9a977bab6..a4ab5549e8c 100644 --- a/src/Access/UsersConfigAccessStorage.cpp +++ b/src/Access/UsersConfigAccessStorage.cpp @@ -154,18 +154,18 @@ namespace if (has_password_plaintext) { - user->auth_data = AuthenticationData{AuthenticationType::PLAINTEXT_PASSWORD}; - user->auth_data.setPassword(config.getString(user_config + ".password")); + user->authentication_methods.emplace_back(AuthenticationType::PLAINTEXT_PASSWORD); + user->authentication_methods.back().setPassword(config.getString(user_config + ".password")); } else if (has_password_sha256_hex) { - user->auth_data = AuthenticationData{AuthenticationType::SHA256_PASSWORD}; - user->auth_data.setPasswordHashHex(config.getString(user_config + ".password_sha256_hex")); + user->authentication_methods.emplace_back(AuthenticationType::SHA256_PASSWORD); + user->authentication_methods.back().setPasswordHashHex(config.getString(user_config + ".password_sha256_hex")); } else if (has_password_double_sha1_hex) { - user->auth_data = AuthenticationData{AuthenticationType::DOUBLE_SHA1_PASSWORD}; - user->auth_data.setPasswordHashHex(config.getString(user_config + ".password_double_sha1_hex")); + user->authentication_methods.emplace_back(AuthenticationType::DOUBLE_SHA1_PASSWORD); + user->authentication_methods.back().setPasswordHashHex(config.getString(user_config + ".password_double_sha1_hex")); } else if (has_ldap) { @@ -177,19 +177,19 @@ namespace if (ldap_server_name.empty()) throw Exception(ErrorCodes::BAD_ARGUMENTS, "LDAP server name cannot be empty for user {}.", user_name); - user->auth_data = AuthenticationData{AuthenticationType::LDAP}; - user->auth_data.setLDAPServerName(ldap_server_name); + user->authentication_methods.emplace_back(AuthenticationType::LDAP); + user->authentication_methods.back().setLDAPServerName(ldap_server_name); } else if (has_kerberos) { const auto realm = config.getString(user_config + ".kerberos.realm", ""); - user->auth_data = AuthenticationData{AuthenticationType::KERBEROS}; - user->auth_data.setKerberosRealm(realm); + user->authentication_methods.emplace_back(AuthenticationType::KERBEROS); + user->authentication_methods.back().setKerberosRealm(realm); } else if (has_certificates) { - user->auth_data = AuthenticationData{AuthenticationType::SSL_CERTIFICATE}; + user->authentication_methods.emplace_back(AuthenticationType::SSL_CERTIFICATE); /// Fill list of allowed certificates. Poco::Util::AbstractConfiguration::Keys keys; @@ -205,12 +205,12 @@ namespace else throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown certificate pattern type: {}", key); } - user->auth_data.setSSLCertificateCommonNames(std::move(common_names)); + user->authentication_methods.back().setSSLCertificateCommonNames(std::move(common_names)); } else if (has_ssh_keys) { #if USE_SSH - user->auth_data = AuthenticationData{AuthenticationType::SSH_KEY}; + user->authentication_methods.emplace_back(AuthenticationType::SSH_KEY); Poco::Util::AbstractConfiguration::Keys entries; config.keys(ssh_keys_config, entries); @@ -247,20 +247,25 @@ namespace else throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown ssh_key entry pattern type: {}", entry); } - user->auth_data.setSSHKeys(std::move(keys)); + user->authentication_methods.back().setSSHKeys(std::move(keys)); #else throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "SSH is disabled, because ClickHouse is built without libssh"); #endif } else if (has_http_auth) { - user->auth_data = AuthenticationData{AuthenticationType::HTTP}; - user->auth_data.setHTTPAuthenticationServerName(config.getString(http_auth_config + ".server")); + user->authentication_methods.emplace_back(AuthenticationType::HTTP); + user->authentication_methods.back().setHTTPAuthenticationServerName(config.getString(http_auth_config + ".server")); auto scheme = config.getString(http_auth_config + ".scheme"); - user->auth_data.setHTTPAuthenticationScheme(parseHTTPAuthenticationScheme(scheme)); + user->authentication_methods.back().setHTTPAuthenticationScheme(parseHTTPAuthenticationScheme(scheme)); + } + else + { + // remember to clean it up.. do I need to? smth in my memory tells me I don't need to + user->authentication_methods.emplace_back(); } - auto auth_type = user->auth_data.getType(); + auto auth_type = user->authentication_methods.back().getType(); if (((auth_type == AuthenticationType::NO_PASSWORD) && !allow_no_password) || ((auth_type == AuthenticationType::PLAINTEXT_PASSWORD) && !allow_plaintext_password)) { diff --git a/src/Interpreters/Access/InterpreterCreateUserQuery.cpp b/src/Interpreters/Access/InterpreterCreateUserQuery.cpp index 32c51b745c7..274017903ee 100644 --- a/src/Interpreters/Access/InterpreterCreateUserQuery.cpp +++ b/src/Interpreters/Access/InterpreterCreateUserQuery.cpp @@ -39,6 +39,7 @@ namespace const std::optional & override_settings, const std::optional & override_grantees, const std::optional & valid_until, + bool reset_authentication_methods, bool allow_implicit_no_password, bool allow_no_password, bool allow_plaintext_password) @@ -57,11 +58,22 @@ namespace "in the server configuration"); if (auth_data) - user.auth_data = *auth_data; + { + user.authentication_methods.push_back(*auth_data); + } + + if (reset_authentication_methods) + { + // todo check if element exists + auto primary_authentication_method = user.authentication_methods.back(); + user.authentication_methods.clear(); + user.authentication_methods.push_back(primary_authentication_method); + } if (auth_data || !query.alter) { - auto auth_type = user.auth_data.getType(); + // todo arthur + auto auth_type = user.authentication_methods[0].getType(); if (((auth_type == AuthenticationType::NO_PASSWORD) && !allow_no_password) || ((auth_type == AuthenticationType::PLAINTEXT_PASSWORD) && !allow_plaintext_password)) { @@ -196,7 +208,7 @@ BlockIO InterpreterCreateUserQuery::execute() auto updated_user = typeid_cast>(entity->clone()); updateUserFromQueryImpl( *updated_user, query, auth_data, {}, default_roles_from_query, settings_from_query, grantees_from_query, - valid_until, implicit_no_password_allowed, no_password_allowed, plaintext_password_allowed); + valid_until, query.reset_authentication_methods_to_new, implicit_no_password_allowed, no_password_allowed, plaintext_password_allowed); return updated_user; }; @@ -216,7 +228,7 @@ BlockIO InterpreterCreateUserQuery::execute() auto new_user = std::make_shared(); updateUserFromQueryImpl( *new_user, query, auth_data, name, default_roles_from_query, settings_from_query, RolesOrUsersSet::AllTag{}, - valid_until, implicit_no_password_allowed, no_password_allowed, plaintext_password_allowed); + valid_until, query.reset_authentication_methods_to_new, implicit_no_password_allowed, no_password_allowed, plaintext_password_allowed); new_users.emplace_back(std::move(new_user)); } @@ -259,7 +271,7 @@ void InterpreterCreateUserQuery::updateUserFromQuery(User & user, const ASTCreat if (query.auth_data) auth_data = AuthenticationData::fromAST(*query.auth_data, {}, !query.attach); - updateUserFromQueryImpl(user, query, auth_data, {}, {}, {}, {}, {}, allow_no_password, allow_plaintext_password, true); + updateUserFromQueryImpl(user, query, auth_data, {}, {}, {}, {}, {}, query.reset_authentication_methods_to_new, allow_no_password, allow_plaintext_password, true); } void registerInterpreterCreateUserQuery(InterpreterFactory & factory) diff --git a/src/Interpreters/Access/InterpreterShowCreateAccessEntityQuery.cpp b/src/Interpreters/Access/InterpreterShowCreateAccessEntityQuery.cpp index 96d8e55a74c..e0c5abbb877 100644 --- a/src/Interpreters/Access/InterpreterShowCreateAccessEntityQuery.cpp +++ b/src/Interpreters/Access/InterpreterShowCreateAccessEntityQuery.cpp @@ -64,8 +64,9 @@ namespace query->default_roles = user.default_roles.toASTWithNames(*access_control); } - if (user.auth_data.getType() != AuthenticationType::NO_PASSWORD) - query->auth_data = user.auth_data.toAST(); + // todo arthur + if (user.authentication_methods[0].getType() != AuthenticationType::NO_PASSWORD) + query->auth_data = user.authentication_methods[0].toAST(); if (user.valid_until) { diff --git a/src/Interpreters/Session.cpp b/src/Interpreters/Session.cpp index 396562189e0..0c230c10388 100644 --- a/src/Interpreters/Session.cpp +++ b/src/Interpreters/Session.cpp @@ -309,7 +309,7 @@ Session::~Session() AuthenticationType Session::getAuthenticationType(const String & user_name) const { - return global_context->getAccessControl().read(user_name)->auth_data.getType(); + return global_context->getAccessControl().read(user_name)->authentication_methods.back().getType(); } AuthenticationType Session::getAuthenticationTypeOrLogInFailure(const String & user_name) const diff --git a/src/Interpreters/SessionLog.cpp b/src/Interpreters/SessionLog.cpp index adb94cae0c2..51fc83f515f 100644 --- a/src/Interpreters/SessionLog.cpp +++ b/src/Interpreters/SessionLog.cpp @@ -220,9 +220,10 @@ void SessionLog::addLoginSuccess(const UUID & auth_id, if (login_user) { log_entry.user = login_user->getName(); - log_entry.user_identified_with = login_user->auth_data.getType(); + log_entry.user_identified_with = login_user->authentication_methods.back().getType(); +// log_entry.user_identified_with = login_user->auth_data.getType(); } - log_entry.external_auth_server = login_user ? login_user->auth_data.getLDAPServerName() : ""; + log_entry.external_auth_server = login_user ? login_user->authentication_methods.back().getLDAPServerName() : ""; log_entry.session_id = session_id; @@ -260,9 +261,9 @@ void SessionLog::addLogOut(const UUID & auth_id, const UserPtr & login_user, con if (login_user) { log_entry.user = login_user->getName(); - log_entry.user_identified_with = login_user->auth_data.getType(); + log_entry.user_identified_with = login_user->authentication_methods.back().getType(); } - log_entry.external_auth_server = login_user ? login_user->auth_data.getLDAPServerName() : ""; + log_entry.external_auth_server = login_user ? login_user->authentication_methods.back().getLDAPServerName() : ""; log_entry.client_info = client_info; add(std::move(log_entry)); diff --git a/src/Parsers/Access/ASTCreateUserQuery.h b/src/Parsers/Access/ASTCreateUserQuery.h index 4e14d86c425..3f79f09984e 100644 --- a/src/Parsers/Access/ASTCreateUserQuery.h +++ b/src/Parsers/Access/ASTCreateUserQuery.h @@ -42,6 +42,7 @@ public: bool if_exists = false; bool if_not_exists = false; bool or_replace = false; + bool reset_authentication_methods_to_new = false; std::shared_ptr names; std::optional new_name; diff --git a/src/Parsers/Access/ParserCreateUserQuery.cpp b/src/Parsers/Access/ParserCreateUserQuery.cpp index d4729ab796a..e4717572b79 100644 --- a/src/Parsers/Access/ParserCreateUserQuery.cpp +++ b/src/Parsers/Access/ParserCreateUserQuery.cpp @@ -48,6 +48,7 @@ namespace { return IParserBase::wrapParseImpl(pos, [&] { + ParserKeyword{Keyword::ADD_NEW_AUTHENTICATION_METHOD}.ignore(pos, expected); if (ParserKeyword{Keyword::NOT_IDENTIFIED}.ignore(pos, expected)) { auth_data = std::make_shared(); @@ -401,6 +402,14 @@ namespace return until_p.parse(pos, valid_until, expected); }); } + + bool parseResetAuthenticationMethods(IParserBase::Pos & pos, Expected & expected) + { + return IParserBase::wrapParseImpl(pos, [&] + { + return ParserKeyword{Keyword::RESET_AUTHENTICATION_METHODS_TO_NEW}.ignore(pos, expected); + }); + } } @@ -454,6 +463,7 @@ bool ParserCreateUserQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expec ASTPtr valid_until; String cluster; String storage_name; + std::optional reset_authentication_methods_to_new; while (true) { @@ -467,6 +477,12 @@ bool ParserCreateUserQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expec } } + if (!reset_authentication_methods_to_new.has_value()) + { + reset_authentication_methods_to_new = parseResetAuthenticationMethods(pos, expected); + continue; + } + if (!valid_until) { parseValidUntil(pos, expected, valid_until); @@ -564,6 +580,7 @@ bool ParserCreateUserQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expec query->default_database = std::move(default_database); query->valid_until = std::move(valid_until); query->storage_name = std::move(storage_name); + query->reset_authentication_methods_to_new = reset_authentication_methods_to_new.value_or(false); if (query->auth_data) query->children.push_back(query->auth_data); diff --git a/src/Parsers/CommonParsers.h b/src/Parsers/CommonParsers.h index f0cbe42da80..a7f33080588 100644 --- a/src/Parsers/CommonParsers.h +++ b/src/Parsers/CommonParsers.h @@ -15,6 +15,7 @@ namespace DB MR_MACROS(ADD_PROJECTION, "ADD PROJECTION") \ MR_MACROS(ADD_STATISTICS, "ADD STATISTICS") \ MR_MACROS(ADD, "ADD") \ + MR_MACROS(ADD_NEW_AUTHENTICATION_METHOD, "ADD NEW AUTHENTICATION METHOD") \ MR_MACROS(ADMIN_OPTION_FOR, "ADMIN OPTION FOR") \ MR_MACROS(AFTER, "AFTER") \ MR_MACROS(ALGORITHM, "ALGORITHM") \ @@ -401,6 +402,7 @@ namespace DB MR_MACROS(REPLACE_PARTITION, "REPLACE PARTITION") \ MR_MACROS(REPLACE, "REPLACE") \ MR_MACROS(RESET_SETTING, "RESET SETTING") \ + MR_MACROS(RESET_AUTHENTICATION_METHODS_TO_NEW, "RESET AUTHENTICATION METHODS TO NEW") \ MR_MACROS(RESPECT_NULLS, "RESPECT NULLS") \ MR_MACROS(RESTORE, "RESTORE") \ MR_MACROS(RESTRICT, "RESTRICT") \ diff --git a/src/Storages/System/StorageSystemUsers.cpp b/src/Storages/System/StorageSystemUsers.cpp index 0c34f04844d..e8557efa9f9 100644 --- a/src/Storages/System/StorageSystemUsers.cpp +++ b/src/Storages/System/StorageSystemUsers.cpp @@ -231,7 +231,7 @@ void StorageSystemUsers::fillData(MutableColumns & res_columns, ContextPtr conte if (!storage) continue; - add_row(user->getName(), id, storage->getStorageName(), user->auth_data, user->allowed_client_hosts, + add_row(user->getName(), id, storage->getStorageName(), user->authentication_methods.back(), user->allowed_client_hosts, user->default_roles, user->grantees, user->default_database); } } From 048cbb17a6b016c20128bfa156d52a7554974fdc Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Thu, 13 Jun 2024 19:55:30 +0000 Subject: [PATCH 075/844] Comment to 02769_parallel_replicas_unavailable_shards.sql --- .../02769_parallel_replicas_unavailable_shards.sql | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/02769_parallel_replicas_unavailable_shards.sql b/tests/queries/0_stateless/02769_parallel_replicas_unavailable_shards.sql index 34678fdd146..337b05263d0 100644 --- a/tests/queries/0_stateless/02769_parallel_replicas_unavailable_shards.sql +++ b/tests/queries/0_stateless/02769_parallel_replicas_unavailable_shards.sql @@ -2,9 +2,11 @@ DROP TABLE IF EXISTS test_parallel_replicas_unavailable_shards; CREATE TABLE test_parallel_replicas_unavailable_shards (n UInt64) ENGINE=MergeTree() ORDER BY tuple(); INSERT INTO test_parallel_replicas_unavailable_shards SELECT * FROM numbers(10); -SET allow_experimental_parallel_reading_from_replicas=2, max_parallel_replicas=11, cluster_for_parallel_replicas='parallel_replicas', parallel_replicas_for_non_replicated_merge_tree=1, allow_experimental_analyzer=0; +SET allow_experimental_parallel_reading_from_replicas=2, max_parallel_replicas=11, cluster_for_parallel_replicas='parallel_replicas', parallel_replicas_for_non_replicated_merge_tree=1; SET send_logs_level='error'; -SELECT count() FROM test_parallel_replicas_unavailable_shards WHERE NOT ignore(*) SETTINGS log_comment = '02769_7b513191-5082-4073-8568-53b86a49da79'; +-- with local plan for initiator, the query can be executed fast on initator, we can simply not come to the point where unavailable replica can be detected +-- therefore disable local plan for now +SELECT count() FROM test_parallel_replicas_unavailable_shards WHERE NOT ignore(*) SETTINGS log_comment = '02769_7b513191-5082-4073-8568-53b86a49da79', allow_experimental_analyzer=0; SYSTEM FLUSH LOGS; From 245476b34bbbf7b0fd24fffa54539fa9c2b3b490 Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Fri, 14 Jun 2024 12:02:47 +0000 Subject: [PATCH 076/844] Consistent replica id assignment --- .../QueryPlan/ParallelReplicasLocalPlan.cpp | 3 +- src/Processors/QueryPlan/ReadFromRemote.cpp | 34 ++++++++++++++++--- 2 files changed, 30 insertions(+), 7 deletions(-) diff --git a/src/Processors/QueryPlan/ParallelReplicasLocalPlan.cpp b/src/Processors/QueryPlan/ParallelReplicasLocalPlan.cpp index 16cca6a7f5d..bd79fc38ae9 100644 --- a/src/Processors/QueryPlan/ParallelReplicasLocalPlan.cpp +++ b/src/Processors/QueryPlan/ParallelReplicasLocalPlan.cpp @@ -280,8 +280,7 @@ std::unique_ptr createLocalPlanForParallelReplicas( MergeTreeReadTaskCallback read_task_cb = [coordinator](ParallelReadRequest req) -> std::optional { return coordinator->handleRequest(std::move(req)); }; - const auto number_of_local_replica = new_context->getSettingsRef().max_parallel_replicas - 1; - + const auto number_of_local_replica = 0; auto read_from_merge_tree_parallel_replicas = reading->createLocalParallelReplicasReadingStep(analyzed_merge_tree, all_ranges_cb, read_task_cb, number_of_local_replica); node->step = std::move(read_from_merge_tree_parallel_replicas); diff --git a/src/Processors/QueryPlan/ReadFromRemote.cpp b/src/Processors/QueryPlan/ReadFromRemote.cpp index 23d8f2b496f..6949c35e0ca 100644 --- a/src/Processors/QueryPlan/ReadFromRemote.cpp +++ b/src/Processors/QueryPlan/ReadFromRemote.cpp @@ -434,14 +434,11 @@ void ReadFromParallelRemoteReplicasStep::initializePipeline(QueryPipelineBuilder shard.getAllNodeCount()); all_replicas_count = shard.getAllNodeCount(); } - if (exclude_local_replica) - --all_replicas_count; std::vector shuffled_pool; if (all_replicas_count < shard.getAllNodeCount()) { shuffled_pool = shard.pool->getShuffledPools(current_settings); - shuffled_pool.resize(all_replicas_count); } else { @@ -451,10 +448,37 @@ void ReadFromParallelRemoteReplicasStep::initializePipeline(QueryPipelineBuilder shuffled_pool = shard.pool->getShuffledPools(current_settings, priority_func); } - for (size_t i=0; i < all_replicas_count; ++i) + std::vector pools_to_use; + if (exclude_local_replica) { - IConnections::ReplicaInfo replica_info + std::vector local_addr_possitions; + for (auto & pool : shuffled_pool) { + const auto & hostname = pool.pool->getHost(); + auto it = std::find_if( + begin(shard.local_addresses), + end(shard.local_addresses), + [&hostname](const Cluster::Address & local_addr) { return hostname == local_addr.host_name; }); + if (it != shard.local_addresses.end()) + pool.pool.reset(); + } + } + for (const auto & pool : shuffled_pool) + { + if (pool.pool) + pools_to_use.push_back(pool.pool); + } + + if (pools_to_use.size() > all_replicas_count) + pools_to_use.resize(all_replicas_count); + else + all_replicas_count = pools_to_use.size(); + + /// local replicas has number 0 + size_t offset = (exclude_local_replica ? 1 : 0); + for (size_t i = 0 + offset; i < all_replicas_count + offset; ++i) + { + IConnections::ReplicaInfo replica_info{ .all_replicas_count = all_replicas_count, /// we should use this number specifically because efficiency of data distribution by consistent hash depends on it. .number_of_current_replica = i, From b22776d3a81784daf23ac6e04871912249695030 Mon Sep 17 00:00:00 2001 From: Arthur Passos Date: Fri, 14 Jun 2024 09:34:10 -0300 Subject: [PATCH 077/844] throw exception upon auth --- src/Access/Authentication.cpp | 22 +++++++++++++++++-- src/Access/IAccessStorage.cpp | 18 ++++++++++----- .../Access/InterpreterCreateUserQuery.cpp | 4 ++-- ...InterpreterShowCreateAccessEntityQuery.cpp | 2 ++ 4 files changed, 36 insertions(+), 10 deletions(-) diff --git a/src/Access/Authentication.cpp b/src/Access/Authentication.cpp index d7d656b703e..166230c52eb 100644 --- a/src/Access/Authentication.cpp +++ b/src/Access/Authentication.cpp @@ -254,8 +254,26 @@ bool Authentication::areCredentialsValid( if ([[maybe_unused]] const auto * always_allow_credentials = typeid_cast(&credentials)) return true; - throw Exception(ErrorCodes::NOT_IMPLEMENTED, "TODO arthur, list possible types"); -// throw Exception(ErrorCodes::NOT_IMPLEMENTED, "areCredentialsValid(): authentication type {} not supported", toString(auth_data.getType())); + + // below code sucks, but works for now I guess. + std::string possible_authentication_types; + bool first = true; + + for (const auto & authentication_method : authentication_methods) + { + if (first) + { + possible_authentication_types += ", "; + first = false; + } + possible_authentication_types += toString(authentication_method.getType()); + } + + throw Exception( + ErrorCodes::NOT_IMPLEMENTED, + "areCredentialsValid(): Invalid credentials provided, available authentication methods are {}", + possible_authentication_types); + } } diff --git a/src/Access/IAccessStorage.cpp b/src/Access/IAccessStorage.cpp index 1b8c1a7f2c2..40041e62aa4 100644 --- a/src/Access/IAccessStorage.cpp +++ b/src/Access/IAccessStorage.cpp @@ -514,8 +514,8 @@ std::optional IAccessStorage::authenticateImpl( const Poco::Net::IPAddress & address, const ExternalAuthenticators & external_authenticators, bool throw_if_user_not_exists, - bool , - bool ) const + bool allow_no_password, + bool allow_plaintext_password) const { if (auto id = find(credentials.getUserName())) { @@ -526,10 +526,16 @@ std::optional IAccessStorage::authenticateImpl( throwAddressNotAllowed(address); // todo arthur -// auto auth_type = user->auth_data.getType(); -// if (((auth_type == AuthenticationType::NO_PASSWORD) && !allow_no_password) || -// ((auth_type == AuthenticationType::PLAINTEXT_PASSWORD) && !allow_plaintext_password)) -// throwAuthenticationTypeNotAllowed(auth_type); + // for now, just throw exception in case a user exists with invalid auth method + // back in the day, it would also throw an exception. There might be a smarter alternative + // like a user scan during startup. + for (const auto & auth_method : user->authentication_methods) + { + auto auth_type = auth_method.getType(); + if (((auth_type == AuthenticationType::NO_PASSWORD) && !allow_no_password) || + ((auth_type == AuthenticationType::PLAINTEXT_PASSWORD) && !allow_plaintext_password)) + throwAuthenticationTypeNotAllowed(auth_type); + } if (!areCredentialsValid(*user, credentials, external_authenticators, auth_result.settings)) throwInvalidCredentials(); diff --git a/src/Interpreters/Access/InterpreterCreateUserQuery.cpp b/src/Interpreters/Access/InterpreterCreateUserQuery.cpp index 274017903ee..d93f9d30f20 100644 --- a/src/Interpreters/Access/InterpreterCreateUserQuery.cpp +++ b/src/Interpreters/Access/InterpreterCreateUserQuery.cpp @@ -72,8 +72,8 @@ namespace if (auth_data || !query.alter) { - // todo arthur - auto auth_type = user.authentication_methods[0].getType(); + // I suppose it is guaranteed a user will always have at least one authentication method + auto auth_type = user.authentication_methods.back().getType(); if (((auth_type == AuthenticationType::NO_PASSWORD) && !allow_no_password) || ((auth_type == AuthenticationType::PLAINTEXT_PASSWORD) && !allow_plaintext_password)) { diff --git a/src/Interpreters/Access/InterpreterShowCreateAccessEntityQuery.cpp b/src/Interpreters/Access/InterpreterShowCreateAccessEntityQuery.cpp index e0c5abbb877..7de939afa5a 100644 --- a/src/Interpreters/Access/InterpreterShowCreateAccessEntityQuery.cpp +++ b/src/Interpreters/Access/InterpreterShowCreateAccessEntityQuery.cpp @@ -65,6 +65,8 @@ namespace } // todo arthur + // to fix this, I'll need to turn `query->auth_data` into a list + // that also means creating a user with multiple authentication methods should be allowed if (user.authentication_methods[0].getType() != AuthenticationType::NO_PASSWORD) query->auth_data = user.authentication_methods[0].toAST(); From 98e5ea52062048a00c8a1cc9e53c376717b88e72 Mon Sep 17 00:00:00 2001 From: Arthur Passos Date: Fri, 14 Jun 2024 10:02:40 -0300 Subject: [PATCH 078/844] style fix --- src/Access/Authentication.cpp | 3 ++- src/Interpreters/SessionLog.cpp | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/src/Access/Authentication.cpp b/src/Access/Authentication.cpp index 166230c52eb..80153f415a0 100644 --- a/src/Access/Authentication.cpp +++ b/src/Access/Authentication.cpp @@ -16,7 +16,7 @@ namespace DB namespace ErrorCodes { extern const int NOT_IMPLEMENTED; - extern const int SUPPORT_IS_DISABLED; + extern const int LOGICAL_ERROR; } namespace @@ -256,6 +256,7 @@ bool Authentication::areCredentialsValid( // below code sucks, but works for now I guess. + // might be a problem if no auth method has been registered std::string possible_authentication_types; bool first = true; diff --git a/src/Interpreters/SessionLog.cpp b/src/Interpreters/SessionLog.cpp index 51fc83f515f..903eaff499f 100644 --- a/src/Interpreters/SessionLog.cpp +++ b/src/Interpreters/SessionLog.cpp @@ -207,6 +207,7 @@ void SessionLogElement::appendToBlock(MutableColumns & columns) const columns[i++]->insertData(auth_failure_reason.data(), auth_failure_reason.length()); } +// todo arthur fix this method void SessionLog::addLoginSuccess(const UUID & auth_id, const String & session_id, const Settings & settings, From c1250ccb35cd438d06445f82c096dde44f627188 Mon Sep 17 00:00:00 2001 From: Arthur Passos Date: Fri, 14 Jun 2024 10:58:57 -0300 Subject: [PATCH 079/844] append default constructed auth method upon alter without auth data --- src/Access/Authentication.cpp | 152 +++++++++++------- .../Access/InterpreterCreateUserQuery.cpp | 5 + 2 files changed, 98 insertions(+), 59 deletions(-) diff --git a/src/Access/Authentication.cpp b/src/Access/Authentication.cpp index 80153f415a0..05140fcb8d5 100644 --- a/src/Access/Authentication.cpp +++ b/src/Access/Authentication.cpp @@ -83,38 +83,30 @@ namespace return false; } #endif -} -static std::vector getAuthenticationMethodsOfType( - const std::vector & authentication_methods, - const std::unordered_set & types) -{ - std::vector authentication_methods_of_type; - - for (const auto & authentication_method : authentication_methods) + std::vector getAuthenticationMethodsOfType( + const std::vector & authentication_methods, + const std::unordered_set & types) { - if (types.contains(authentication_method.getType())) + std::vector authentication_methods_of_type; + + for (const auto & authentication_method : authentication_methods) { - authentication_methods_of_type.push_back(authentication_method); + if (types.contains(authentication_method.getType())) + { + authentication_methods_of_type.push_back(authentication_method); + } } + + return authentication_methods_of_type; } - return authentication_methods_of_type; -} - -bool Authentication::areCredentialsValid( - const Credentials & credentials, - const std::vector & authentication_methods, - const ExternalAuthenticators & external_authenticators, - SettingsChanges & settings) -{ - if (!credentials.isReady()) - return false; - - if (const auto * gss_acceptor_context = typeid_cast(&credentials)) + bool checkKerberosAuthentication( + const GSSAcceptorContext * gss_acceptor_context, + const std::vector & authentication_methods, + const ExternalAuthenticators & external_authenticators) { auto kerberos_authentication_methods = getAuthenticationMethodsOfType(authentication_methods, {AuthenticationType::KERBEROS}); - for (const auto & kerberos_authentication : kerberos_authentication_methods) { if (external_authenticators.checkKerberosCredentials(kerberos_authentication.getKerberosRealm(), *gss_acceptor_context)) @@ -122,11 +114,12 @@ bool Authentication::areCredentialsValid( return true; } } - return false; } - if (const auto * mysql_credentials = typeid_cast(&credentials)) + bool checkMySQLAuthentication( + const MySQLNative41Credentials * mysql_credentials, + const std::vector & authentication_methods) { auto mysql_authentication_methods = getAuthenticationMethodsOfType( authentication_methods, @@ -138,7 +131,7 @@ bool Authentication::areCredentialsValid( { case AuthenticationType::PLAINTEXT_PASSWORD: if (checkPasswordPlainTextMySQL( - mysql_credentials->getScramble(), mysql_credentials->getScrambledPassword(), mysql_authentication_method.getPasswordHashBinary())) + mysql_credentials->getScramble(), mysql_credentials->getScrambledPassword(), mysql_authentication_method.getPasswordHashBinary())) { return true; } @@ -154,14 +147,17 @@ bool Authentication::areCredentialsValid( } break; default: - throw Exception(ErrorCodes::LOGICAL_ERROR, "something bad happened"); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid MySQL authentication type"); } } - return false; } - if (const auto * basic_credentials = typeid_cast(&credentials)) + bool checkBasicAuthentication( + const BasicCredentials * basic_credentials, + const std::vector & authentication_methods, + const ExternalAuthenticators & external_authenticators, + SettingsChanges & settings) { auto basic_credentials_authentication_methods = getAuthenticationMethodsOfType( authentication_methods, @@ -212,17 +208,17 @@ bool Authentication::areCredentialsValid( } break; default: - throw Exception(ErrorCodes::LOGICAL_ERROR, "something bad happened"); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid basic authentication type"); } } - return false; } - if (const auto * ssl_certificate_credentials = typeid_cast(&credentials)) + bool checkSSLCertificateAuthentication( + const SSLCertificateCredentials * ssl_certificate_credentials, + const std::vector & authentication_methods) { const auto ssl_certificate_authentication_methods = getAuthenticationMethodsOfType(authentication_methods, {AuthenticationType::SSL_CERTIFICATE}); - for (const auto & auth_method : ssl_certificate_authentication_methods) { if (auth_method.getSSLCertificateCommonNames().contains(ssl_certificate_credentials->getCommonName())) @@ -230,15 +226,15 @@ bool Authentication::areCredentialsValid( return true; } } - return false; } #if USE_SSH - if (const auto * ssh_credentials = typeid_cast(&credentials)) + bool checkSshAuthentication( + const SshCredentials * ssh_credentials, + const std::vector & authentication_methods) { - const auto ssh_authentication_methods = getAuthenticationMethodsOfType(authentication_methods, {AuthenticationType::SSL_CERTIFICATE}); - + const auto ssh_authentication_methods = getAuthenticationMethodsOfType(authentication_methods, {AuthenticationType::SSH_KEY}); for (const auto & auth_method : ssh_authentication_methods) { if (checkSshSignature(auth_method.getSSHKeys(), ssh_credentials->getSignature(), ssh_credentials->getOriginal())) @@ -246,35 +242,73 @@ bool Authentication::areCredentialsValid( return true; } } - return false; } #endif + [[noreturn]] void throwInvalidCredentialsException(const std::vector & authentication_methods) + { + std::string possible_authentication_types; + bool first = true; + + for (const auto & authentication_method : authentication_methods) + { + if (!first) + { + possible_authentication_types += ", "; + } + possible_authentication_types += toString(authentication_method.getType()); + first = false; + } + + throw Exception( + ErrorCodes::NOT_IMPLEMENTED, + "areCredentialsValid(): Invalid credentials provided, available authentication methods are {}", + possible_authentication_types); + } +} + +bool Authentication::areCredentialsValid( + const Credentials & credentials, + const std::vector & authentication_methods, + const ExternalAuthenticators & external_authenticators, + SettingsChanges & settings) +{ + if (!credentials.isReady()) + return false; + + if (const auto * gss_acceptor_context = typeid_cast(&credentials)) + { + return checkKerberosAuthentication(gss_acceptor_context, authentication_methods, external_authenticators); + } + + if (const auto * mysql_credentials = typeid_cast(&credentials)) + { + return checkMySQLAuthentication(mysql_credentials, authentication_methods); + } + + if (const auto * basic_credentials = typeid_cast(&credentials)) + { + return checkBasicAuthentication(basic_credentials, authentication_methods, external_authenticators, settings); + } + + if (const auto * ssl_certificate_credentials = typeid_cast(&credentials)) + { + return checkSSLCertificateAuthentication(ssl_certificate_credentials, authentication_methods); + } + +#if USE_SSH + if (const auto * ssh_credentials = typeid_cast(&credentials)) + { + return checkSshAuthentication(ssh_credentials, authentication_methods); + } +#endif + if ([[maybe_unused]] const auto * always_allow_credentials = typeid_cast(&credentials)) return true; - // below code sucks, but works for now I guess. - // might be a problem if no auth method has been registered - std::string possible_authentication_types; - bool first = true; - - for (const auto & authentication_method : authentication_methods) - { - if (first) - { - possible_authentication_types += ", "; - first = false; - } - possible_authentication_types += toString(authentication_method.getType()); - } - - throw Exception( - ErrorCodes::NOT_IMPLEMENTED, - "areCredentialsValid(): Invalid credentials provided, available authentication methods are {}", - possible_authentication_types); - + throwInvalidCredentialsException(authentication_methods); } } diff --git a/src/Interpreters/Access/InterpreterCreateUserQuery.cpp b/src/Interpreters/Access/InterpreterCreateUserQuery.cpp index d93f9d30f20..398d77863d4 100644 --- a/src/Interpreters/Access/InterpreterCreateUserQuery.cpp +++ b/src/Interpreters/Access/InterpreterCreateUserQuery.cpp @@ -61,6 +61,11 @@ namespace { user.authentication_methods.push_back(*auth_data); } + else if (user.authentication_methods.empty()) + { + // previously, a user always had a default constructed auth method.. maybe I should put this somewhere else + user.authentication_methods.emplace_back(); + } if (reset_authentication_methods) { From c2d38b3c93c2182f096b21558db2b7f574fa435c Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Fri, 14 Jun 2024 14:06:12 +0000 Subject: [PATCH 080/844] Fix crash --- src/Processors/QueryPlan/ReadFromRemote.cpp | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/src/Processors/QueryPlan/ReadFromRemote.cpp b/src/Processors/QueryPlan/ReadFromRemote.cpp index 6949c35e0ca..1a96668e30e 100644 --- a/src/Processors/QueryPlan/ReadFromRemote.cpp +++ b/src/Processors/QueryPlan/ReadFromRemote.cpp @@ -474,9 +474,17 @@ void ReadFromParallelRemoteReplicasStep::initializePipeline(QueryPipelineBuilder else all_replicas_count = pools_to_use.size(); + chassert(all_replicas_count == pools_to_use.size()); + + if (exclude_local_replica && !pools_to_use.empty()) + pools_to_use.resize(all_replicas_count - 1); + + if (pools_to_use.empty()) + return; + /// local replicas has number 0 size_t offset = (exclude_local_replica ? 1 : 0); - for (size_t i = 0 + offset; i < all_replicas_count + offset; ++i) + for (size_t i = 0 + offset; i < all_replicas_count; ++i) { IConnections::ReplicaInfo replica_info{ .all_replicas_count = all_replicas_count, @@ -484,7 +492,7 @@ void ReadFromParallelRemoteReplicasStep::initializePipeline(QueryPipelineBuilder .number_of_current_replica = i, }; - addPipeForSingeReplica(pipes, shuffled_pool[i].pool, replica_info); + addPipeForSingeReplica(pipes, pools_to_use[i - offset], replica_info); } auto pipe = Pipe::unitePipes(std::move(pipes)); From 70e493322123f13b7981e20cdb9f904b0bfe28e6 Mon Sep 17 00:00:00 2001 From: Arthur Passos Date: Fri, 14 Jun 2024 14:39:55 -0300 Subject: [PATCH 081/844] fix no pwd authentication --- src/Access/Authentication.cpp | 6 +++- src/Client/ClientBase.cpp | 6 ++-- .../Access/InterpreterCreateUserQuery.cpp | 36 +++++++++++++------ ...InterpreterShowCreateAccessEntityQuery.cpp | 12 ++++--- src/Parsers/Access/ASTCreateUserQuery.cpp | 20 +++++++---- src/Parsers/Access/ASTCreateUserQuery.h | 2 +- src/Parsers/Access/ParserCreateUserQuery.cpp | 16 ++++++--- 7 files changed, 65 insertions(+), 33 deletions(-) diff --git a/src/Access/Authentication.cpp b/src/Access/Authentication.cpp index 05140fcb8d5..d486f717b92 100644 --- a/src/Access/Authentication.cpp +++ b/src/Access/Authentication.cpp @@ -161,7 +161,7 @@ namespace { auto basic_credentials_authentication_methods = getAuthenticationMethodsOfType( authentication_methods, - {AuthenticationType::PLAINTEXT_PASSWORD, AuthenticationType::SHA256_PASSWORD, + {AuthenticationType::NO_PASSWORD, AuthenticationType::PLAINTEXT_PASSWORD, AuthenticationType::SHA256_PASSWORD, AuthenticationType::DOUBLE_SHA1_PASSWORD, AuthenticationType::LDAP, AuthenticationType::BCRYPT_PASSWORD, AuthenticationType::HTTP}); @@ -169,6 +169,10 @@ namespace { switch (auth_method.getType()) { + case AuthenticationType::NO_PASSWORD: + { + return true; + } case AuthenticationType::PLAINTEXT_PASSWORD: if (checkPasswordPlainText(basic_credentials->getPassword(), auth_method.getPasswordHashBinary())) { diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index f8391c64d5a..a4692da0e53 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -1903,11 +1903,11 @@ void ClientBase::processParsedSingleQuery(const String & full_query, const Strin if (const auto * create_user_query = parsed_query->as()) { - if (!create_user_query->attach && create_user_query->auth_data) + if (!create_user_query->attach && !create_user_query->auth_data.empty()) { - if (const auto * auth_data = create_user_query->auth_data->as()) + for (const auto & authentication_method : create_user_query->auth_data) { - auto password = auth_data->getPassword(); + auto password = authentication_method->getPassword(); if (password) global_context->getAccessControl().checkPasswordComplexityRules(*password); diff --git a/src/Interpreters/Access/InterpreterCreateUserQuery.cpp b/src/Interpreters/Access/InterpreterCreateUserQuery.cpp index 398d77863d4..b49d169dda4 100644 --- a/src/Interpreters/Access/InterpreterCreateUserQuery.cpp +++ b/src/Interpreters/Access/InterpreterCreateUserQuery.cpp @@ -33,7 +33,7 @@ namespace void updateUserFromQueryImpl( User & user, const ASTCreateUserQuery & query, - const std::optional auth_data, + const std::vector auth_data, const std::shared_ptr & override_name, const std::optional & override_default_roles, const std::optional & override_settings, @@ -51,15 +51,19 @@ namespace else if (query.names->size() == 1) user.setName(query.names->front()->toString()); - if (!query.attach && !query.alter && !auth_data && !allow_implicit_no_password) + // todo arthur check if auth_data.empty makes sense + if (!query.attach && !query.alter && !auth_data.empty() && !allow_implicit_no_password) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Authentication type NO_PASSWORD must " "be explicitly specified, check the setting allow_implicit_no_password " "in the server configuration"); - if (auth_data) + if (!auth_data.empty()) { - user.authentication_methods.push_back(*auth_data); + for (const auto & authentication_method : auth_data) + { + user.authentication_methods.push_back(authentication_method); + } } else if (user.authentication_methods.empty()) { @@ -75,7 +79,7 @@ namespace user.authentication_methods.push_back(primary_authentication_method); } - if (auth_data || !query.alter) + if (!auth_data.empty() || !query.alter) { // I suppose it is guaranteed a user will always have at least one authentication method auto auth_type = user.authentication_methods.back().getType(); @@ -145,9 +149,14 @@ BlockIO InterpreterCreateUserQuery::execute() bool no_password_allowed = access_control.isNoPasswordAllowed(); bool plaintext_password_allowed = access_control.isPlaintextPasswordAllowed(); - std::optional auth_data; - if (query.auth_data) - auth_data = AuthenticationData::fromAST(*query.auth_data, getContext(), !query.attach); + std::vector auth_data; + if (!query.auth_data.empty()) + { + for (const auto & authentication_method_ast : query.auth_data) + { + auth_data.push_back(AuthenticationData::fromAST(*authentication_method_ast, getContext(), !query.attach)); + } + } std::optional valid_until; if (query.valid_until) @@ -272,9 +281,14 @@ BlockIO InterpreterCreateUserQuery::execute() void InterpreterCreateUserQuery::updateUserFromQuery(User & user, const ASTCreateUserQuery & query, bool allow_no_password, bool allow_plaintext_password) { - std::optional auth_data; - if (query.auth_data) - auth_data = AuthenticationData::fromAST(*query.auth_data, {}, !query.attach); + std::vector auth_data; + if (!query.auth_data.empty()) + { + for (const auto & authentication_method_ast : query.auth_data) + { + auth_data.emplace_back(AuthenticationData::fromAST(*authentication_method_ast, {}, !query.attach)); + } + } updateUserFromQueryImpl(user, query, auth_data, {}, {}, {}, {}, {}, query.reset_authentication_methods_to_new, allow_no_password, allow_plaintext_password, true); } diff --git a/src/Interpreters/Access/InterpreterShowCreateAccessEntityQuery.cpp b/src/Interpreters/Access/InterpreterShowCreateAccessEntityQuery.cpp index 7de939afa5a..70dd859c70b 100644 --- a/src/Interpreters/Access/InterpreterShowCreateAccessEntityQuery.cpp +++ b/src/Interpreters/Access/InterpreterShowCreateAccessEntityQuery.cpp @@ -64,11 +64,13 @@ namespace query->default_roles = user.default_roles.toASTWithNames(*access_control); } - // todo arthur - // to fix this, I'll need to turn `query->auth_data` into a list - // that also means creating a user with multiple authentication methods should be allowed - if (user.authentication_methods[0].getType() != AuthenticationType::NO_PASSWORD) - query->auth_data = user.authentication_methods[0].toAST(); + for (const auto & authentication_method : user.authentication_methods) + { + if (authentication_method.getType() != AuthenticationType::NO_PASSWORD) + { + query->auth_data.push_back(authentication_method.toAST()); + } + } if (user.valid_until) { diff --git a/src/Parsers/Access/ASTCreateUserQuery.cpp b/src/Parsers/Access/ASTCreateUserQuery.cpp index 02735568a04..975f81e57ca 100644 --- a/src/Parsers/Access/ASTCreateUserQuery.cpp +++ b/src/Parsers/Access/ASTCreateUserQuery.cpp @@ -18,9 +18,12 @@ namespace << quoteString(new_name); } - void formatAuthenticationData(const ASTAuthenticationData & auth_data, const IAST::FormatSettings & settings) + void formatAuthenticationData(const std::vector> & auth_data, const IAST::FormatSettings & settings) { - auth_data.format(settings); + for (const auto & authentication_method : auth_data) + { + authentication_method->format(settings); + } } void formatValidUntil(const IAST & valid_until, const IAST::FormatSettings & settings) @@ -180,10 +183,13 @@ ASTPtr ASTCreateUserQuery::clone() const if (settings) res->settings = std::static_pointer_cast(settings->clone()); - if (auth_data) + if (!auth_data.empty()) { - res->auth_data = std::static_pointer_cast(auth_data->clone()); - res->children.push_back(res->auth_data); + for (const auto & authentication_method : auth_data) + { + res->auth_data.push_back(std::static_pointer_cast(authentication_method->clone())); + res->children.push_back(res->auth_data.back()); + } } return res; @@ -222,8 +228,8 @@ void ASTCreateUserQuery::formatImpl(const FormatSettings & format, FormatState & if (new_name) formatRenameTo(*new_name, format); - if (auth_data) - formatAuthenticationData(*auth_data, format); + if (!auth_data.empty()) + formatAuthenticationData(auth_data, format); if (valid_until) formatValidUntil(*valid_until, format); diff --git a/src/Parsers/Access/ASTCreateUserQuery.h b/src/Parsers/Access/ASTCreateUserQuery.h index 3f79f09984e..58f167fe5b7 100644 --- a/src/Parsers/Access/ASTCreateUserQuery.h +++ b/src/Parsers/Access/ASTCreateUserQuery.h @@ -48,7 +48,7 @@ public: std::optional new_name; String storage_name; - std::shared_ptr auth_data; + std::vector> auth_data; std::optional hosts; std::optional add_hosts; diff --git a/src/Parsers/Access/ParserCreateUserQuery.cpp b/src/Parsers/Access/ParserCreateUserQuery.cpp index e4717572b79..1c4b23e1a34 100644 --- a/src/Parsers/Access/ParserCreateUserQuery.cpp +++ b/src/Parsers/Access/ParserCreateUserQuery.cpp @@ -455,7 +455,7 @@ bool ParserCreateUserQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expec std::optional hosts; std::optional add_hosts; std::optional remove_hosts; - std::shared_ptr auth_data; + std::vector> auth_data; std::shared_ptr default_roles; std::shared_ptr settings; std::shared_ptr grantees; @@ -467,12 +467,12 @@ bool ParserCreateUserQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expec while (true) { - if (!auth_data) + if (auth_data.empty()) { std::shared_ptr new_auth_data; if (parseAuthenticationData(pos, expected, new_auth_data)) { - auth_data = std::move(new_auth_data); + auth_data.push_back(new_auth_data); continue; } } @@ -582,8 +582,14 @@ bool ParserCreateUserQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expec query->storage_name = std::move(storage_name); query->reset_authentication_methods_to_new = reset_authentication_methods_to_new.value_or(false); - if (query->auth_data) - query->children.push_back(query->auth_data); + if (!query->auth_data.empty()) + { + // as of now, this will always have a single element, but looping just in case. + for (const auto & authentication_method : query->auth_data) + { + query->children.push_back(authentication_method); + } + } if (query->valid_until) query->children.push_back(query->valid_until); From 70a33e633cd8acd0ec7220b5ce6e45d9c7bd4e9e Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Fri, 14 Jun 2024 19:53:04 +0000 Subject: [PATCH 082/844] Fix 02784_parallel_replicas_automatic_decision --- .../02784_parallel_replicas_automatic_decision.sh | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/tests/queries/0_stateless/02784_parallel_replicas_automatic_decision.sh b/tests/queries/0_stateless/02784_parallel_replicas_automatic_decision.sh index 8a3b34e5cfa..dd9b5c3f6d9 100755 --- a/tests/queries/0_stateless/02784_parallel_replicas_automatic_decision.sh +++ b/tests/queries/0_stateless/02784_parallel_replicas_automatic_decision.sh @@ -10,12 +10,14 @@ function were_parallel_replicas_used () { $CLICKHOUSE_CLIENT --query " SELECT initial_query_id, - concat('Used parallel replicas: ', (countIf(initial_query_id != query_id) != 0)::bool::String) as used + concat('Used parallel replicas: ', (ProfileEvents['ParallelReplicasUsedCount'] > 0)::bool::String) as used FROM system.query_log WHERE event_date >= yesterday() AND initial_query_id LIKE '$1%' - GROUP BY initial_query_id - ORDER BY min(event_time_microseconds) ASC + AND query_id = initial_query_id + AND type = 'QueryFinish' + AND current_database = '$CLICKHOUSE_DATABASE' + ORDER BY event_time_microseconds ASC FORMAT TSV" } @@ -48,7 +50,6 @@ function run_query_with_pure_parallel_replicas () { --query "$3" \ --query_id "${1}_pure" \ --max_parallel_replicas 3 \ - --prefer_localhost_replica 1 \ --cluster_for_parallel_replicas "parallel_replicas" \ --allow_experimental_parallel_reading_from_replicas 1 \ --parallel_replicas_for_non_replicated_merge_tree 1 \ From 6ac24fcf54aef805b9a37509d4c27b5921d1628a Mon Sep 17 00:00:00 2001 From: Arthur Passos Date: Fri, 14 Jun 2024 18:08:01 -0300 Subject: [PATCH 083/844] fix test build --- src/Parsers/tests/gtest_Parser.cpp | 3 ++- src/Parsers/tests/gtest_common.cpp | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/Parsers/tests/gtest_Parser.cpp b/src/Parsers/tests/gtest_Parser.cpp index f0abc68f966..fd22dd34128 100644 --- a/src/Parsers/tests/gtest_Parser.cpp +++ b/src/Parsers/tests/gtest_Parser.cpp @@ -87,7 +87,8 @@ TEST_P(ParserTest, parseQuery) { if (input_text.starts_with("ATTACH")) { - auto salt = (dynamic_cast(ast.get())->auth_data)->getSalt().value_or(""); + // todo arthur + auto salt = (dynamic_cast(ast.get())->auth_data.back())->getSalt().value_or(""); EXPECT_TRUE(re2::RE2::FullMatch(salt, expected_ast)); } else diff --git a/src/Parsers/tests/gtest_common.cpp b/src/Parsers/tests/gtest_common.cpp index 8ff9400d8a2..bc780e17ffd 100644 --- a/src/Parsers/tests/gtest_common.cpp +++ b/src/Parsers/tests/gtest_common.cpp @@ -63,7 +63,8 @@ TEST_P(ParserKQLTest, parseKQLQuery) { if (input_text.starts_with("ATTACH")) { - auto salt = (dynamic_cast(ast.get())->auth_data)->getSalt().value_or(""); + // todo arthur check + auto salt = (dynamic_cast(ast.get())->auth_data.back())->getSalt().value_or(""); EXPECT_TRUE(re2::RE2::FullMatch(salt, expected_ast)); } else From c87bfe102e36612ebb4b474eb822543de0cf011e Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Mon, 17 Jun 2024 12:24:14 +0000 Subject: [PATCH 084/844] Fix: correct local replica exclusion - fixes 02731_parallel_replicas_join_subquery --- .../ClusterProxy/executeQuery.cpp | 8 +++++-- src/Processors/QueryPlan/ReadFromRemote.cpp | 21 +++++++++++++++---- 2 files changed, 23 insertions(+), 6 deletions(-) diff --git a/src/Interpreters/ClusterProxy/executeQuery.cpp b/src/Interpreters/ClusterProxy/executeQuery.cpp index cde33697915..0937e121426 100644 --- a/src/Interpreters/ClusterProxy/executeQuery.cpp +++ b/src/Interpreters/ClusterProxy/executeQuery.cpp @@ -489,8 +489,12 @@ void executeQueryWithParallelReplicas( "`cluster_for_parallel_replicas` setting refers to cluster with several shards. Expected a cluster with one shard"); } - auto coordinator = std::make_shared( - new_cluster->getShardsInfo().begin()->getAllNodeCount(), settings.parallel_replicas_mark_segment_size); + auto replica_count = new_cluster->getShardsInfo().begin()->getAllNodeCount(); + if (settings.max_parallel_replicas < replica_count) + replica_count = settings.max_parallel_replicas; + + auto coordinator = std::make_shared(replica_count, settings.parallel_replicas_mark_segment_size); + auto external_tables = new_context->getExternalTables(); /// do not build local plan for distributed queries for now (address it later) diff --git a/src/Processors/QueryPlan/ReadFromRemote.cpp b/src/Processors/QueryPlan/ReadFromRemote.cpp index 1a96668e30e..f7cb88154af 100644 --- a/src/Processors/QueryPlan/ReadFromRemote.cpp +++ b/src/Processors/QueryPlan/ReadFromRemote.cpp @@ -449,6 +449,7 @@ void ReadFromParallelRemoteReplicasStep::initializePipeline(QueryPipelineBuilder } std::vector pools_to_use; + pools_to_use.reserve(shuffled_pool.size()); if (exclude_local_replica) { std::vector local_addr_possitions; @@ -460,7 +461,9 @@ void ReadFromParallelRemoteReplicasStep::initializePipeline(QueryPipelineBuilder end(shard.local_addresses), [&hostname](const Cluster::Address & local_addr) { return hostname == local_addr.host_name; }); if (it != shard.local_addresses.end()) + { pool.pool.reset(); + } } } for (const auto & pool : shuffled_pool) @@ -469,12 +472,14 @@ void ReadFromParallelRemoteReplicasStep::initializePipeline(QueryPipelineBuilder pools_to_use.push_back(pool.pool); } + LOG_DEBUG( + getLogger("ReadFromParallelRemoteReplicasStep"), + "Number of pools to use is {}. Originally {}", + pools_to_use.size(), + shuffled_pool.size()); + if (pools_to_use.size() > all_replicas_count) pools_to_use.resize(all_replicas_count); - else - all_replicas_count = pools_to_use.size(); - - chassert(all_replicas_count == pools_to_use.size()); if (exclude_local_replica && !pools_to_use.empty()) pools_to_use.resize(all_replicas_count - 1); @@ -482,6 +487,14 @@ void ReadFromParallelRemoteReplicasStep::initializePipeline(QueryPipelineBuilder if (pools_to_use.empty()) return; + { + String pool_addresses; + for (const auto & pool : pools_to_use) + pool_addresses += pool->getAddress() + ";"; + + LOG_DEBUG(getLogger("ReadFromParallelRemoteReplicasStep"), "Addresses to use: {}", pool_addresses); + } + /// local replicas has number 0 size_t offset = (exclude_local_replica ? 1 : 0); for (size_t i = 0 + offset; i < all_replicas_count; ++i) From c286188419df31cdc17ac29f341a99b723da6360 Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Tue, 18 Jun 2024 10:39:13 +0000 Subject: [PATCH 085/844] Update 00177_memory_bound_merging.reference --- .../queries/1_stateful/00177_memory_bound_merging.reference | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tests/queries/1_stateful/00177_memory_bound_merging.reference b/tests/queries/1_stateful/00177_memory_bound_merging.reference index 5689152d60a..4c7505d1123 100644 --- a/tests/queries/1_stateful/00177_memory_bound_merging.reference +++ b/tests/queries/1_stateful/00177_memory_bound_merging.reference @@ -10,6 +10,8 @@ http://auto.ru/chatay-baranta_bound-in-thankYou=ru/tver/zhanny 2014-03-18 http:/ http://auto.ru/chatay-baranta_bound-in-thankYou=ru/tver/zhanny 2014-03-19 http://auto.ru/chatay-baranta_bound-in-thankYou=ru/tver/zhanny http://auto.ru/chatay-baranta_bound-in-thankYou=ru/tver/zhanny 2014-03-20 http://auto.ru/chatay-baranta_bound-in-thankYou=ru/tver/zhanny 1 -SortingAggregatedTransform MergingAggregatedBucketTransform -GroupingAggregatedTransform +FinishAggregatingInOrderTransform +FinalizeAggregatedTransform +AggregatingInOrderTransform +MergeTreeSelect From 366de07856cd25d7212ae8106d68994cd84a2de3 Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Tue, 18 Jun 2024 10:39:50 +0000 Subject: [PATCH 086/844] Remove prefer_localhost_replica --- .../0_stateless/02731_parallel_replicas_join_subquery.sql | 1 - tests/queries/1_stateful/00177_memory_bound_merging.sh | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/02731_parallel_replicas_join_subquery.sql b/tests/queries/0_stateless/02731_parallel_replicas_join_subquery.sql index 7693d0da295..e91e2a19526 100644 --- a/tests/queries/0_stateless/02731_parallel_replicas_join_subquery.sql +++ b/tests/queries/0_stateless/02731_parallel_replicas_join_subquery.sql @@ -22,7 +22,6 @@ SELECT LIMIT 100; SET max_parallel_replicas = 3; -SET prefer_localhost_replica = 1; SET cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost'; SET joined_subquery_requires_alias = 0; diff --git a/tests/queries/1_stateful/00177_memory_bound_merging.sh b/tests/queries/1_stateful/00177_memory_bound_merging.sh index d5cd1a05cd8..74b5e80e059 100755 --- a/tests/queries/1_stateful/00177_memory_bound_merging.sh +++ b/tests/queries/1_stateful/00177_memory_bound_merging.sh @@ -55,7 +55,7 @@ test2() { test3() { $CLICKHOUSE_CLIENT -nq " SET cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost'; - SET max_threads = 16, prefer_localhost_replica = 1, read_in_order_two_level_merge_threshold = 1000, query_plan_aggregation_in_order = 1, distributed_aggregation_memory_efficient = 1; + SET max_threads = 16, read_in_order_two_level_merge_threshold = 1000, query_plan_aggregation_in_order = 1, distributed_aggregation_memory_efficient = 1; SELECT replaceRegexpOne(explain, '^ *(\w+).*', '\\1') FROM ( From 0ecefa6973ecb1356e117d944d26f26fb204d8c9 Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Tue, 18 Jun 2024 11:07:15 +0000 Subject: [PATCH 087/844] Better replicas notation in plan description --- src/Processors/QueryPlan/ReadFromRemote.cpp | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/src/Processors/QueryPlan/ReadFromRemote.cpp b/src/Processors/QueryPlan/ReadFromRemote.cpp index f7cb88154af..26c57c4a760 100644 --- a/src/Processors/QueryPlan/ReadFromRemote.cpp +++ b/src/Processors/QueryPlan/ReadFromRemote.cpp @@ -387,8 +387,8 @@ ReadFromParallelRemoteReplicasStep::ReadFromParallelRemoteReplicasStep( { chassert(cluster->getShardCount() == 1); - std::vector description; - description.push_back(fmt::format("query: {}", formattedAST(query_ast))); + std::vector replicas; + replicas.reserve(cluster->getShardsAddresses().front().size()); bool first_local = false; for (const auto & addr : cluster->getShardsAddresses().front()) @@ -400,10 +400,16 @@ ReadFromParallelRemoteReplicasStep::ReadFromParallelRemoteReplicasStep( continue; } - description.push_back(fmt::format("Replica: {}", addr.host_name)); + /// replace hostname with replica name if the hostname started with replica namespace, + /// it makes description shorter and more readable + if (!addr.database_replica_name.empty() && addr.host_name.starts_with(addr.database_replica_name)) + replicas.push_back(fmt::format("{}", addr.database_replica_name)); + else + replicas.push_back(fmt::format("{}", addr.host_name)); } - setStepDescription(boost::algorithm::join(description, ", ")); + auto description = fmt::format("Query: {} Replicas: ", formattedAST(query_ast)) + boost::algorithm::join(replicas, ", "); + setStepDescription(std::move(description)); } void ReadFromParallelRemoteReplicasStep::enforceSorting(SortDescription output_sort_description) From dba6ea078f259433dd91a0b2ddc88b6a4f3c143d Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Tue, 18 Jun 2024 19:47:07 +0000 Subject: [PATCH 088/844] Setting parallel_replicas_local_plan --- src/Core/Settings.h | 1 + src/Interpreters/ClusterProxy/executeQuery.cpp | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 6197a7cf6e1..c16efc5717b 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -216,6 +216,7 @@ class IColumn; M(UInt64, parallel_replicas_min_number_of_rows_per_replica, 0, "Limit the number of replicas used in a query to (estimated rows to read / min_number_of_rows_per_replica). The max is still limited by 'max_parallel_replicas'", 0) \ M(Bool, parallel_replicas_prefer_local_join, true, "If true, and JOIN can be executed with parallel replicas algorithm, and all storages of right JOIN part are *MergeTree, local JOIN will be used instead of GLOBAL JOIN.", 0) \ M(UInt64, parallel_replicas_mark_segment_size, 128, "Parts virtually divided into segments to be distributed between replicas for parallel reading. This setting controls the size of these segments. Not recommended to change until you're absolutely sure in what you're doing", 0) \ + M(Bool, parallel_replicas_local_plan, true, "Build local plan for local replica", 0) \ \ M(Bool, skip_unavailable_shards, false, "If true, ClickHouse silently skips unavailable shards. Shard is marked as unavailable when: 1) The shard cannot be reached due to a connection failure. 2) Shard is unresolvable through DNS. 3) Table does not exist on the shard.", 0) \ \ diff --git a/src/Interpreters/ClusterProxy/executeQuery.cpp b/src/Interpreters/ClusterProxy/executeQuery.cpp index 0937e121426..ab25da090d6 100644 --- a/src/Interpreters/ClusterProxy/executeQuery.cpp +++ b/src/Interpreters/ClusterProxy/executeQuery.cpp @@ -498,7 +498,7 @@ void executeQueryWithParallelReplicas( auto external_tables = new_context->getExternalTables(); /// do not build local plan for distributed queries for now (address it later) - if (settings.allow_experimental_analyzer && !shard_num) + if (settings.allow_experimental_analyzer && settings.parallel_replicas_local_plan && !shard_num) { auto read_from_remote = std::make_unique( query_ast, From 9658d37d49e3c47cd306657abb8a314f47c4011d Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Wed, 19 Jun 2024 11:27:50 +0000 Subject: [PATCH 089/844] Add parallel_replicas_local_plan to settings history --- src/Core/SettingsChangesHistory.h | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Core/SettingsChangesHistory.h b/src/Core/SettingsChangesHistory.h index eddf83f7912..1d197db9e31 100644 --- a/src/Core/SettingsChangesHistory.h +++ b/src/Core/SettingsChangesHistory.h @@ -113,6 +113,7 @@ static const std::map Date: Wed, 19 Jun 2024 11:57:38 -0300 Subject: [PATCH 090/844] ptal --- src/Parsers/Access/ParserCreateUserQuery.cpp | 67 +++++++++++++++----- 1 file changed, 52 insertions(+), 15 deletions(-) diff --git a/src/Parsers/Access/ParserCreateUserQuery.cpp b/src/Parsers/Access/ParserCreateUserQuery.cpp index 1c4b23e1a34..71d8e344646 100644 --- a/src/Parsers/Access/ParserCreateUserQuery.cpp +++ b/src/Parsers/Access/ParserCreateUserQuery.cpp @@ -26,6 +26,11 @@ namespace DB { +namespace ErrorCodes +{ + extern const int BAD_ARGUMENTS; +} + namespace { bool parseRenameTo(IParserBase::Pos & pos, Expected & expected, std::optional & new_name) @@ -48,7 +53,6 @@ namespace { return IParserBase::wrapParseImpl(pos, [&] { - ParserKeyword{Keyword::ADD_NEW_AUTHENTICATION_METHOD}.ignore(pos, expected); if (ParserKeyword{Keyword::NOT_IDENTIFIED}.ignore(pos, expected)) { auth_data = std::make_shared(); @@ -403,6 +407,19 @@ namespace }); } + bool parseAddNewAuthenticationMethod(IParserBase::Pos & pos, Expected & expected, std::shared_ptr & auth_data) + { + return IParserBase::wrapParseImpl(pos, [&] + { + if (!ParserKeyword{Keyword::ADD_NEW_AUTHENTICATION_METHOD}.ignore(pos, expected)) + { + return false; + } + + return parseAuthenticationData(pos, expected, auth_data); + }); + } + bool parseResetAuthenticationMethods(IParserBase::Pos & pos, Expected & expected) { return IParserBase::wrapParseImpl(pos, [&] @@ -465,18 +482,42 @@ bool ParserCreateUserQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expec String storage_name; std::optional reset_authentication_methods_to_new; + bool parsed_identified_with = false; + bool parsed_add_new_method = false; + while (true) { - if (auth_data.empty()) + std::shared_ptr identified_with_auth_data; + bool parse_auth_data = parseAuthenticationData(pos, expected, identified_with_auth_data); + + bool found_multiple_identified_with = parsed_identified_with && parse_auth_data; + + if (found_multiple_identified_with) { - std::shared_ptr new_auth_data; - if (parseAuthenticationData(pos, expected, new_auth_data)) - { - auth_data.push_back(new_auth_data); - continue; - } + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Only one identified with is permitted"); } + if (parse_auth_data) + { + if (parsed_add_new_method) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Identified with must precede add new auth"); + } + auth_data.push_back(identified_with_auth_data); + parsed_identified_with = true; + continue; + } + + std::shared_ptr add_new_auth_method; + parsed_add_new_method = parseAddNewAuthenticationMethod(pos, expected, add_new_auth_method); + + if (parsed_add_new_method) + { + auth_data.push_back(add_new_auth_method); + continue; + } + + // todo arthur maybe check that neither identified with or add new auth method has been parsed if (!reset_authentication_methods_to_new.has_value()) { reset_authentication_methods_to_new = parseResetAuthenticationMethods(pos, expected); @@ -580,15 +621,11 @@ bool ParserCreateUserQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expec query->default_database = std::move(default_database); query->valid_until = std::move(valid_until); query->storage_name = std::move(storage_name); - query->reset_authentication_methods_to_new = reset_authentication_methods_to_new.value_or(false); + query->reset_authentication_methods_to_new = parsed_identified_with || reset_authentication_methods_to_new.value_or(false); - if (!query->auth_data.empty()) + for (const auto & authentication_method : query->auth_data) { - // as of now, this will always have a single element, but looping just in case. - for (const auto & authentication_method : query->auth_data) - { - query->children.push_back(authentication_method); - } + query->children.push_back(authentication_method); } if (query->valid_until) From 179d54505ad39245e13f40bde7c0a719744f0555 Mon Sep 17 00:00:00 2001 From: Arthur Passos Date: Thu, 20 Jun 2024 15:07:16 -0300 Subject: [PATCH 091/844] make progress, seems functional --- src/Access/IAccessStorage.cpp | 1 - src/Access/UsersConfigAccessStorage.cpp | 15 ++-- src/Core/PostgreSQLProtocol.h | 19 +++--- .../Access/InterpreterCreateUserQuery.cpp | 68 ++++++++++++------- src/Interpreters/Session.cpp | 17 +++-- src/Interpreters/Session.h | 4 +- src/Parsers/Access/ASTAuthenticationData.cpp | 4 +- src/Parsers/Access/ASTCreateUserQuery.cpp | 14 ++-- src/Parsers/Access/ASTCreateUserQuery.h | 1 + src/Parsers/Access/ParserCreateUserQuery.cpp | 5 +- src/Parsers/IAST.h | 6 +- src/Server/MySQLHandler.cpp | 13 ++-- src/Server/TCPHandler.cpp | 12 +++- 13 files changed, 119 insertions(+), 60 deletions(-) diff --git a/src/Access/IAccessStorage.cpp b/src/Access/IAccessStorage.cpp index 40041e62aa4..f1725dafdf4 100644 --- a/src/Access/IAccessStorage.cpp +++ b/src/Access/IAccessStorage.cpp @@ -525,7 +525,6 @@ std::optional IAccessStorage::authenticateImpl( if (!isAddressAllowed(*user, address)) throwAddressNotAllowed(address); - // todo arthur // for now, just throw exception in case a user exists with invalid auth method // back in the day, it would also throw an exception. There might be a smarter alternative // like a user scan during startup. diff --git a/src/Access/UsersConfigAccessStorage.cpp b/src/Access/UsersConfigAccessStorage.cpp index a4ab5549e8c..cfb02b0962b 100644 --- a/src/Access/UsersConfigAccessStorage.cpp +++ b/src/Access/UsersConfigAccessStorage.cpp @@ -265,13 +265,16 @@ namespace user->authentication_methods.emplace_back(); } - auto auth_type = user->authentication_methods.back().getType(); - if (((auth_type == AuthenticationType::NO_PASSWORD) && !allow_no_password) || - ((auth_type == AuthenticationType::PLAINTEXT_PASSWORD) && !allow_plaintext_password)) + for (const auto & authentication_method : user->authentication_methods) { - throw Exception(ErrorCodes::BAD_ARGUMENTS, - "Authentication type {} is not allowed, check the setting allow_{} in the server configuration", - toString(auth_type), AuthenticationTypeInfo::get(auth_type).name); + auto auth_type = authentication_method.getType(); + if (((auth_type == AuthenticationType::NO_PASSWORD) && !allow_no_password) || + ((auth_type == AuthenticationType::PLAINTEXT_PASSWORD) && !allow_plaintext_password)) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "Authentication type {} is not allowed, check the setting allow_{} in the server configuration", + toString(auth_type), AuthenticationTypeInfo::get(auth_type).name); + } } const auto profile_name_config = user_config + ".profile"; diff --git a/src/Core/PostgreSQLProtocol.h b/src/Core/PostgreSQLProtocol.h index 807e4a7187a..5dc9082d49d 100644 --- a/src/Core/PostgreSQLProtocol.h +++ b/src/Core/PostgreSQLProtocol.h @@ -890,16 +890,19 @@ public: Messaging::MessageTransport & mt, const Poco::Net::SocketAddress & address) { - AuthenticationType user_auth_type; try { - user_auth_type = session.getAuthenticationTypeOrLogInFailure(user_name); - if (type_to_method.find(user_auth_type) != type_to_method.end()) + const auto user_authentication_types = session.getAuthenticationTypesOrLogInFailure(user_name); + + for (auto user_authentication_type : user_authentication_types) { - type_to_method[user_auth_type]->authenticate(user_name, session, mt, address); - mt.send(Messaging::AuthenticationOk(), true); - LOG_DEBUG(log, "Authentication for user {} was successful.", user_name); - return; + if (type_to_method.find(user_authentication_type) != type_to_method.end()) + { + type_to_method[user_authentication_type]->authenticate(user_name, session, mt, address); + mt.send(Messaging::AuthenticationOk(), true); + LOG_DEBUG(log, "Authentication for user {} was successful.", user_name); + return; + } } } catch (const Exception&) @@ -913,7 +916,7 @@ public: mt.send(Messaging::ErrorOrNoticeResponse(Messaging::ErrorOrNoticeResponse::ERROR, "0A000", "Authentication method is not supported"), true); - throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Authentication method is not supported: {}", user_auth_type); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "None of the authentication methods registered for the user are supported"); } }; } diff --git a/src/Interpreters/Access/InterpreterCreateUserQuery.cpp b/src/Interpreters/Access/InterpreterCreateUserQuery.cpp index b49d169dda4..c79b7b5bc34 100644 --- a/src/Interpreters/Access/InterpreterCreateUserQuery.cpp +++ b/src/Interpreters/Access/InterpreterCreateUserQuery.cpp @@ -40,6 +40,7 @@ namespace const std::optional & override_grantees, const std::optional & valid_until, bool reset_authentication_methods, + bool replace_authentication_methods, bool allow_implicit_no_password, bool allow_no_password, bool allow_plaintext_password) @@ -51,45 +52,48 @@ namespace else if (query.names->size() == 1) user.setName(query.names->front()->toString()); - // todo arthur check if auth_data.empty makes sense if (!query.attach && !query.alter && !auth_data.empty() && !allow_implicit_no_password) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Authentication type NO_PASSWORD must " "be explicitly specified, check the setting allow_implicit_no_password " "in the server configuration"); - if (!auth_data.empty()) - { - for (const auto & authentication_method : auth_data) - { - user.authentication_methods.push_back(authentication_method); - } - } - else if (user.authentication_methods.empty()) + if (user.authentication_methods.empty() && auth_data.empty()) { // previously, a user always had a default constructed auth method.. maybe I should put this somewhere else user.authentication_methods.emplace_back(); } - if (reset_authentication_methods) + if (replace_authentication_methods) { - // todo check if element exists - auto primary_authentication_method = user.authentication_methods.back(); user.authentication_methods.clear(); - user.authentication_methods.push_back(primary_authentication_method); } - if (!auth_data.empty() || !query.alter) + for (const auto & authentication_method : auth_data) { - // I suppose it is guaranteed a user will always have at least one authentication method - auto auth_type = user.authentication_methods.back().getType(); - if (((auth_type == AuthenticationType::NO_PASSWORD) && !allow_no_password) || - ((auth_type == AuthenticationType::PLAINTEXT_PASSWORD) && !allow_plaintext_password)) + user.authentication_methods.emplace_back(authentication_method); + } + + if (reset_authentication_methods) + { + auto backup_authentication_method = user.authentication_methods.back(); + user.authentication_methods.clear(); + user.authentication_methods.emplace_back(backup_authentication_method); + } + + if (!query.alter) + { + for (const auto & authentication_method : user.authentication_methods) { - throw Exception(ErrorCodes::BAD_ARGUMENTS, - "Authentication type {} is not allowed, check the setting allow_{} in the server configuration", - toString(auth_type), - AuthenticationTypeInfo::get(auth_type).name); + auto auth_type = authentication_method.getType(); + if (((auth_type == AuthenticationType::NO_PASSWORD) && !allow_no_password) || + ((auth_type == AuthenticationType::PLAINTEXT_PASSWORD) && !allow_plaintext_password)) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "Authentication type {} is not allowed, check the setting allow_{} in the server configuration", + toString(auth_type), + AuthenticationTypeInfo::get(auth_type).name); + } } } @@ -222,7 +226,8 @@ BlockIO InterpreterCreateUserQuery::execute() auto updated_user = typeid_cast>(entity->clone()); updateUserFromQueryImpl( *updated_user, query, auth_data, {}, default_roles_from_query, settings_from_query, grantees_from_query, - valid_until, query.reset_authentication_methods_to_new, implicit_no_password_allowed, no_password_allowed, plaintext_password_allowed); + valid_until, query.reset_authentication_methods_to_new, query.replace_authentication_methods, + implicit_no_password_allowed, no_password_allowed, plaintext_password_allowed); return updated_user; }; @@ -242,7 +247,8 @@ BlockIO InterpreterCreateUserQuery::execute() auto new_user = std::make_shared(); updateUserFromQueryImpl( *new_user, query, auth_data, name, default_roles_from_query, settings_from_query, RolesOrUsersSet::AllTag{}, - valid_until, query.reset_authentication_methods_to_new, implicit_no_password_allowed, no_password_allowed, plaintext_password_allowed); + valid_until, query.reset_authentication_methods_to_new, query.replace_authentication_methods, + implicit_no_password_allowed, no_password_allowed, plaintext_password_allowed); new_users.emplace_back(std::move(new_user)); } @@ -290,7 +296,19 @@ void InterpreterCreateUserQuery::updateUserFromQuery(User & user, const ASTCreat } } - updateUserFromQueryImpl(user, query, auth_data, {}, {}, {}, {}, {}, query.reset_authentication_methods_to_new, allow_no_password, allow_plaintext_password, true); + updateUserFromQueryImpl(user, + query, + auth_data, + {}, + {}, + {}, + {}, + {}, + query.reset_authentication_methods_to_new, + query.replace_authentication_methods, + allow_no_password, + allow_plaintext_password, + true); } void registerInterpreterCreateUserQuery(InterpreterFactory & factory) diff --git a/src/Interpreters/Session.cpp b/src/Interpreters/Session.cpp index 0c230c10388..89b764a5788 100644 --- a/src/Interpreters/Session.cpp +++ b/src/Interpreters/Session.cpp @@ -307,16 +307,25 @@ Session::~Session() } } -AuthenticationType Session::getAuthenticationType(const String & user_name) const +std::unordered_set Session::getAuthenticationTypes(const String & user_name) const { - return global_context->getAccessControl().read(user_name)->authentication_methods.back().getType(); + std::unordered_set authentication_types; + + const auto user_to_query = global_context->getAccessControl().read(user_name); + + for (const auto & authentication_method : user_to_query->authentication_methods) + { + authentication_types.insert(authentication_method.getType()); + } + + return authentication_types; } -AuthenticationType Session::getAuthenticationTypeOrLogInFailure(const String & user_name) const +std::unordered_set Session::getAuthenticationTypesOrLogInFailure(const String & user_name) const { try { - return getAuthenticationType(user_name); + return getAuthenticationTypes(user_name); } catch (const Exception & e) { diff --git a/src/Interpreters/Session.h b/src/Interpreters/Session.h index 14f6f806acd..6dac52d4a39 100644 --- a/src/Interpreters/Session.h +++ b/src/Interpreters/Session.h @@ -43,10 +43,10 @@ public: Session & operator=(const Session &) = delete; /// Provides information about the authentication type of a specified user. - AuthenticationType getAuthenticationType(const String & user_name) const; + std::unordered_set getAuthenticationTypes(const String & user_name) const; /// Same as getAuthenticationType, but adds LoginFailure event in case of error. - AuthenticationType getAuthenticationTypeOrLogInFailure(const String & user_name) const; + std::unordered_set getAuthenticationTypesOrLogInFailure(const String & user_name) const; /// Sets the current user, checks the credentials and that the specified address is allowed to connect from. /// The function throws an exception if there is no such user or password is wrong. diff --git a/src/Parsers/Access/ASTAuthenticationData.cpp b/src/Parsers/Access/ASTAuthenticationData.cpp index 3a62480dc0c..c155c588a24 100644 --- a/src/Parsers/Access/ASTAuthenticationData.cpp +++ b/src/Parsers/Access/ASTAuthenticationData.cpp @@ -154,7 +154,9 @@ void ASTAuthenticationData::formatImpl(const FormatSettings & settings, FormatSt auth_type_name = AuthenticationTypeInfo::get(*type).name; } - settings.ostr << (settings.hilite ? IAST::hilite_keyword : "") << " IDENTIFIED" << (settings.hilite ? IAST::hilite_none : ""); + const char * identified_string = settings.additional_authentication_method ? " ADD NEW AUTHENTICATION METHOD IDENTIFIED" : " IDENTIFIED"; + + settings.ostr << (settings.hilite ? IAST::hilite_keyword : "") << identified_string << (settings.hilite ? IAST::hilite_none : ""); if (!auth_type_name.empty()) { diff --git a/src/Parsers/Access/ASTCreateUserQuery.cpp b/src/Parsers/Access/ASTCreateUserQuery.cpp index 975f81e57ca..2e6014f0d59 100644 --- a/src/Parsers/Access/ASTCreateUserQuery.cpp +++ b/src/Parsers/Access/ASTCreateUserQuery.cpp @@ -20,9 +20,14 @@ namespace void formatAuthenticationData(const std::vector> & auth_data, const IAST::FormatSettings & settings) { - for (const auto & authentication_method : auth_data) + auth_data[0]->format(settings); + + auto settings_with_additional_authentication_method = settings; + settings_with_additional_authentication_method.additional_authentication_method = true; + + for (auto i = 1u; i < auth_data.size(); i++) { - authentication_method->format(settings); + auth_data[i]->format(settings_with_additional_authentication_method); } } @@ -187,8 +192,9 @@ ASTPtr ASTCreateUserQuery::clone() const { for (const auto & authentication_method : auth_data) { - res->auth_data.push_back(std::static_pointer_cast(authentication_method->clone())); - res->children.push_back(res->auth_data.back()); + auto ast_clone = std::static_pointer_cast(authentication_method->clone()); + res->auth_data.push_back(ast_clone); + res->children.push_back(ast_clone); } } diff --git a/src/Parsers/Access/ASTCreateUserQuery.h b/src/Parsers/Access/ASTCreateUserQuery.h index 58f167fe5b7..24af6d00ca4 100644 --- a/src/Parsers/Access/ASTCreateUserQuery.h +++ b/src/Parsers/Access/ASTCreateUserQuery.h @@ -43,6 +43,7 @@ public: bool if_not_exists = false; bool or_replace = false; bool reset_authentication_methods_to_new = false; + bool replace_authentication_methods = false; std::shared_ptr names; std::optional new_name; diff --git a/src/Parsers/Access/ParserCreateUserQuery.cpp b/src/Parsers/Access/ParserCreateUserQuery.cpp index 71d8e344646..213360fface 100644 --- a/src/Parsers/Access/ParserCreateUserQuery.cpp +++ b/src/Parsers/Access/ParserCreateUserQuery.cpp @@ -517,11 +517,9 @@ bool ParserCreateUserQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expec continue; } - // todo arthur maybe check that neither identified with or add new auth method has been parsed if (!reset_authentication_methods_to_new.has_value()) { reset_authentication_methods_to_new = parseResetAuthenticationMethods(pos, expected); - continue; } if (!valid_until) @@ -621,7 +619,8 @@ bool ParserCreateUserQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expec query->default_database = std::move(default_database); query->valid_until = std::move(valid_until); query->storage_name = std::move(storage_name); - query->reset_authentication_methods_to_new = parsed_identified_with || reset_authentication_methods_to_new.value_or(false); + query->reset_authentication_methods_to_new = reset_authentication_methods_to_new.value_or(false); + query->replace_authentication_methods = parsed_identified_with; for (const auto & authentication_method : query->auth_data) { diff --git a/src/Parsers/IAST.h b/src/Parsers/IAST.h index ee70fed0f07..a74f321eead 100644 --- a/src/Parsers/IAST.h +++ b/src/Parsers/IAST.h @@ -201,6 +201,7 @@ public: bool show_secrets; /// Show secret parts of the AST (e.g. passwords, encryption keys). char nl_or_ws; /// Newline or whitespace. LiteralEscapingStyle literal_escaping_style; + bool additional_authentication_method; explicit FormatSettings( WriteBuffer & ostr_, @@ -209,7 +210,8 @@ public: bool always_quote_identifiers_ = false, IdentifierQuotingStyle identifier_quoting_style_ = IdentifierQuotingStyle::Backticks, bool show_secrets_ = true, - LiteralEscapingStyle literal_escaping_style_ = LiteralEscapingStyle::Regular) + LiteralEscapingStyle literal_escaping_style_ = LiteralEscapingStyle::Regular, + bool additional_authentication_method_ = false) : ostr(ostr_) , one_line(one_line_) , hilite(hilite_) @@ -218,6 +220,7 @@ public: , show_secrets(show_secrets_) , nl_or_ws(one_line ? ' ' : '\n') , literal_escaping_style(literal_escaping_style_) + , additional_authentication_method(additional_authentication_method_) { } @@ -230,6 +233,7 @@ public: , show_secrets(other.show_secrets) , nl_or_ws(other.nl_or_ws) , literal_escaping_style(other.literal_escaping_style) + , additional_authentication_method(other.additional_authentication_method) { } diff --git a/src/Server/MySQLHandler.cpp b/src/Server/MySQLHandler.cpp index 6456f6d24ff..86f3dba1697 100644 --- a/src/Server/MySQLHandler.cpp +++ b/src/Server/MySQLHandler.cpp @@ -373,11 +373,16 @@ void MySQLHandler::authenticate(const String & user_name, const String & auth_pl { try { - // For compatibility with JavaScript MySQL client, Native41 authentication plugin is used when possible - // (if password is specified using double SHA1). Otherwise, SHA256 plugin is used. - if (session->getAuthenticationTypeOrLogInFailure(user_name) == DB::AuthenticationType::SHA256_PASSWORD) + const auto user_authentication_types = session->getAuthenticationTypesOrLogInFailure(user_name); + + for (const auto user_authentication_type : user_authentication_types) { - authPluginSSL(); + // For compatibility with JavaScript MySQL client, Native41 authentication plugin is used when possible + // (if password is specified using double SHA1). Otherwise, SHA256 plugin is used. + if (user_authentication_type == DB::AuthenticationType::SHA256_PASSWORD) + { + authPluginSSL(); + } } std::optional auth_response = auth_plugin_name == auth_plugin->getName() ? std::make_optional(initial_auth_response) : std::nullopt; diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp index e3a820340ad..073e8d6cb5f 100644 --- a/src/Server/TCPHandler.cpp +++ b/src/Server/TCPHandler.cpp @@ -1498,7 +1498,17 @@ void TCPHandler::receiveHello() /// Perform handshake for SSH authentication if (is_ssh_based_auth) { - if (session->getAuthenticationTypeOrLogInFailure(user) != AuthenticationType::SSH_KEY) + const auto authentication_types = session->getAuthenticationTypesOrLogInFailure(user); + + bool user_supports_ssh_authentication = std::find_if( + authentication_types.begin(), + authentication_types.end(), + [](auto authentication_type) + { + return authentication_type == AuthenticationType::SSH_KEY; + }) != authentication_types.end(); + + if (!user_supports_ssh_authentication) throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "Expected authentication with SSH key"); if (client_tcp_protocol_version < DBMS_MIN_REVISION_WITH_SSH_AUTHENTICATION) From a1211a0f5a66a676d7742dc102bb8a223945057d Mon Sep 17 00:00:00 2001 From: Arthur Passos Date: Thu, 20 Jun 2024 16:51:52 -0300 Subject: [PATCH 092/844] simplify syntax --- src/Parsers/Access/ASTAuthenticationData.cpp | 2 +- src/Parsers/Access/ParserCreateUserQuery.cpp | 2 +- src/Parsers/CommonParsers.h | 1 - 3 files changed, 2 insertions(+), 3 deletions(-) diff --git a/src/Parsers/Access/ASTAuthenticationData.cpp b/src/Parsers/Access/ASTAuthenticationData.cpp index c155c588a24..73fa5a5a692 100644 --- a/src/Parsers/Access/ASTAuthenticationData.cpp +++ b/src/Parsers/Access/ASTAuthenticationData.cpp @@ -154,7 +154,7 @@ void ASTAuthenticationData::formatImpl(const FormatSettings & settings, FormatSt auth_type_name = AuthenticationTypeInfo::get(*type).name; } - const char * identified_string = settings.additional_authentication_method ? " ADD NEW AUTHENTICATION METHOD IDENTIFIED" : " IDENTIFIED"; + const char * identified_string = settings.additional_authentication_method ? " ADD IDENTIFIED" : " IDENTIFIED"; settings.ostr << (settings.hilite ? IAST::hilite_keyword : "") << identified_string << (settings.hilite ? IAST::hilite_none : ""); diff --git a/src/Parsers/Access/ParserCreateUserQuery.cpp b/src/Parsers/Access/ParserCreateUserQuery.cpp index 213360fface..36162086609 100644 --- a/src/Parsers/Access/ParserCreateUserQuery.cpp +++ b/src/Parsers/Access/ParserCreateUserQuery.cpp @@ -411,7 +411,7 @@ namespace { return IParserBase::wrapParseImpl(pos, [&] { - if (!ParserKeyword{Keyword::ADD_NEW_AUTHENTICATION_METHOD}.ignore(pos, expected)) + if (!ParserKeyword{Keyword::ADD}.ignore(pos, expected)) { return false; } diff --git a/src/Parsers/CommonParsers.h b/src/Parsers/CommonParsers.h index a7f33080588..726e491fbf3 100644 --- a/src/Parsers/CommonParsers.h +++ b/src/Parsers/CommonParsers.h @@ -15,7 +15,6 @@ namespace DB MR_MACROS(ADD_PROJECTION, "ADD PROJECTION") \ MR_MACROS(ADD_STATISTICS, "ADD STATISTICS") \ MR_MACROS(ADD, "ADD") \ - MR_MACROS(ADD_NEW_AUTHENTICATION_METHOD, "ADD NEW AUTHENTICATION METHOD") \ MR_MACROS(ADMIN_OPTION_FOR, "ADMIN OPTION FOR") \ MR_MACROS(AFTER, "AFTER") \ MR_MACROS(ALGORITHM, "ALGORITHM") \ From 55da169fe737ec6c2ca567398bf2a4b3997375d3 Mon Sep 17 00:00:00 2001 From: Arthur Passos Date: Fri, 21 Jun 2024 10:30:12 -0300 Subject: [PATCH 093/844] fix wrong condition --- src/Interpreters/Access/InterpreterCreateUserQuery.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Interpreters/Access/InterpreterCreateUserQuery.cpp b/src/Interpreters/Access/InterpreterCreateUserQuery.cpp index c79b7b5bc34..bc507b5f156 100644 --- a/src/Interpreters/Access/InterpreterCreateUserQuery.cpp +++ b/src/Interpreters/Access/InterpreterCreateUserQuery.cpp @@ -52,7 +52,7 @@ namespace else if (query.names->size() == 1) user.setName(query.names->front()->toString()); - if (!query.attach && !query.alter && !auth_data.empty() && !allow_implicit_no_password) + if (!query.attach && !query.alter && auth_data.empty() && !allow_implicit_no_password) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Authentication type NO_PASSWORD must " "be explicitly specified, check the setting allow_implicit_no_password " From 08c9cc18d6a0e07094871b90a8f88088d96c3454 Mon Sep 17 00:00:00 2001 From: Arthur Passos Date: Fri, 21 Jun 2024 12:05:58 -0300 Subject: [PATCH 094/844] fix astcreateuserquery clone --- src/Parsers/Access/ASTCreateUserQuery.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/Parsers/Access/ASTCreateUserQuery.cpp b/src/Parsers/Access/ASTCreateUserQuery.cpp index 2e6014f0d59..0d5db681a7e 100644 --- a/src/Parsers/Access/ASTCreateUserQuery.cpp +++ b/src/Parsers/Access/ASTCreateUserQuery.cpp @@ -172,6 +172,7 @@ ASTPtr ASTCreateUserQuery::clone() const { auto res = std::make_shared(*this); res->children.clear(); + res->auth_data.clear(); if (names) res->names = std::static_pointer_cast(names->clone()); @@ -188,6 +189,7 @@ ASTPtr ASTCreateUserQuery::clone() const if (settings) res->settings = std::static_pointer_cast(settings->clone()); + // this is weird. if (!auth_data.empty()) { for (const auto & authentication_method : auth_data) From 237abda2eb2c6923835e93916b3e44a1c9e33299 Mon Sep 17 00:00:00 2001 From: Arthur Passos Date: Fri, 21 Jun 2024 15:35:50 -0300 Subject: [PATCH 095/844] add some ut --- src/Parsers/tests/gtest_Parser.cpp | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/src/Parsers/tests/gtest_Parser.cpp b/src/Parsers/tests/gtest_Parser.cpp index fd22dd34128..3b4cd9af695 100644 --- a/src/Parsers/tests/gtest_Parser.cpp +++ b/src/Parsers/tests/gtest_Parser.cpp @@ -284,6 +284,10 @@ INSTANTIATE_TEST_SUITE_P(ParserCreateUserQuery, ParserTest, "CREATE USER user1 IDENTIFIED WITH sha256_password BY 'qwe123'", "CREATE USER user1 IDENTIFIED WITH sha256_password BY 'qwe123'" }, + { + "CREATE USER user1 IDENTIFIED WITH plaintext_password BY 'abc123' ADD IDENTIFIED WITH plaintext_password BY 'def123' ADD IDENTIFIED WITH sha256_password BY 'ghi123'", + "CREATE USER user1 IDENTIFIED WITH plaintext_password BY 'abc123' ADD IDENTIFIED WITH plaintext_password BY 'def123' ADD IDENTIFIED WITH sha256_password BY 'ghi123'" + }, { "CREATE USER user1 IDENTIFIED WITH sha256_hash BY '7A37B85C8918EAC19A9089C0FA5A2AB4DCE3F90528DCDEEC108B23DDF3607B99' SALT 'salt'", "CREATE USER user1 IDENTIFIED WITH sha256_hash BY '7A37B85C8918EAC19A9089C0FA5A2AB4DCE3F90528DCDEEC108B23DDF3607B99' SALT 'salt'" @@ -292,6 +296,10 @@ INSTANTIATE_TEST_SUITE_P(ParserCreateUserQuery, ParserTest, "ALTER USER user1 IDENTIFIED WITH sha256_password BY 'qwe123'", "ALTER USER user1 IDENTIFIED WITH sha256_password BY 'qwe123'" }, + { + "ALTER USER user1 IDENTIFIED WITH plaintext_password BY 'abc123' ADD IDENTIFIED WITH plaintext_password BY 'def123' ADD IDENTIFIED WITH sha256_password BY 'ghi123'", + "ALTER USER user1 IDENTIFIED WITH plaintext_password BY 'abc123' ADD IDENTIFIED WITH plaintext_password BY 'def123' ADD IDENTIFIED WITH sha256_password BY 'ghi123'" + }, { "ALTER USER user1 IDENTIFIED WITH sha256_hash BY '7A37B85C8918EAC19A9089C0FA5A2AB4DCE3F90528DCDEEC108B23DDF3607B99' SALT 'salt'", "ALTER USER user1 IDENTIFIED WITH sha256_hash BY '7A37B85C8918EAC19A9089C0FA5A2AB4DCE3F90528DCDEEC108B23DDF3607B99' SALT 'salt'" @@ -299,6 +307,10 @@ INSTANTIATE_TEST_SUITE_P(ParserCreateUserQuery, ParserTest, { "CREATE USER user1 IDENTIFIED WITH sha256_password BY 'qwe123' SALT 'EFFD7F6B03B3EA68B8F86C1E91614DD50E42EB31EF7160524916444D58B5E264'", "throws Syntax error" + }, + { + "ALTER USER user1 IDENTIFIED WITH plaintext_password BY 'abc123' IDENTIFIED WITH plaintext_password BY 'def123'", + "throws Only one identified with is permitted" } }))); From f9b70ea77a3b3059d9c784e2fe4b90000e75d3d4 Mon Sep 17 00:00:00 2001 From: Alexey Korepanov Date: Sat, 22 Jun 2024 09:30:16 +0200 Subject: [PATCH 096/844] Add JSONCompactWithProgressRowOutputFormat --- src/Formats/registerFormats.cpp | 2 + ...JSONCompactWithProgressRowOutputFormat.cpp | 125 ++++++++++++++++++ .../JSONCompactWithProgressRowOutputFormat.h | 53 ++++++++ 3 files changed, 180 insertions(+) create mode 100644 src/Processors/Formats/Impl/JSONCompactWithProgressRowOutputFormat.cpp create mode 100644 src/Processors/Formats/Impl/JSONCompactWithProgressRowOutputFormat.h diff --git a/src/Formats/registerFormats.cpp b/src/Formats/registerFormats.cpp index 57ca1bb49c8..770b747fafd 100644 --- a/src/Formats/registerFormats.cpp +++ b/src/Formats/registerFormats.cpp @@ -95,6 +95,7 @@ void registerOutputFormatMarkdown(FormatFactory & factory); void registerOutputFormatPostgreSQLWire(FormatFactory & factory); void registerOutputFormatPrometheus(FormatFactory & factory); void registerOutputFormatSQLInsert(FormatFactory & factory); +void registerOutputFormatJSONCompactWithProgress(FormatFactory & factory); /// Input only formats. @@ -242,6 +243,7 @@ void registerFormats() registerOutputFormatCapnProto(factory); registerOutputFormatPrometheus(factory); registerOutputFormatSQLInsert(factory); + registerOutputFormatJSONCompactWithProgress(factory); registerInputFormatRegexp(factory); registerInputFormatJSONAsString(factory); diff --git a/src/Processors/Formats/Impl/JSONCompactWithProgressRowOutputFormat.cpp b/src/Processors/Formats/Impl/JSONCompactWithProgressRowOutputFormat.cpp new file mode 100644 index 00000000000..78cf5b9a003 --- /dev/null +++ b/src/Processors/Formats/Impl/JSONCompactWithProgressRowOutputFormat.cpp @@ -0,0 +1,125 @@ +#include +#include +#include + +#include + +#include + + + +namespace DB +{ + +JSONCompactWithProgressRowOutputFormat::JSONCompactWithProgressRowOutputFormat( + WriteBuffer & out_, + const Block & header, + const FormatSettings & settings_, + bool yield_strings_) + : JSONRowOutputFormat(out_, header, settings_, yield_strings_) +{ +} + +void JSONCompactWithProgressRowOutputFormat::writeField(const IColumn & column, const ISerialization & serialization, size_t row_num) +{ + JSONUtils::writeFieldFromColumn(column, serialization, row_num, yield_strings, settings, *ostr); + ++field_number; + LOG_DEBUG(getLogger("JSONCompactWithProgressRowOutputFormat"), "Field number: {}", field_number); +} + +void JSONCompactWithProgressRowOutputFormat::writeFieldDelimiter() +{ + JSONUtils::writeFieldCompactDelimiter(*ostr); +} + +void JSONCompactWithProgressRowOutputFormat::writeRowStartDelimiter() +{ + if (has_progress) + writeProgress(); + JSONUtils::writeCompactArrayStart(*ostr, 2); +} + +void JSONCompactWithProgressRowOutputFormat::writeRowEndDelimiter() +{ + JSONUtils::writeCompactArrayEnd(*ostr); + field_number = 0; + ++row_count; +} + +void JSONCompactWithProgressRowOutputFormat::writeBeforeTotals() +{ + JSONUtils::writeFieldDelimiter(*ostr, 2); + JSONUtils::writeCompactArrayStart(*ostr, 1, "totals"); +} + +void JSONCompactWithProgressRowOutputFormat::writeTotals(const Columns & columns, size_t row_num) +{ + JSONUtils::writeCompactColumns(columns, serializations, row_num, yield_strings, settings, *ostr); +} + +void JSONCompactWithProgressRowOutputFormat::writeAfterTotals() +{ + JSONUtils::writeCompactArrayEnd(*ostr); +} + +void JSONCompactWithProgressRowOutputFormat::writeExtremesElement(const char * title, const Columns & columns, size_t row_num) +{ + JSONUtils::writeCompactArrayStart(*ostr, 2, title); + JSONUtils::writeCompactColumns(columns, serializations, row_num, yield_strings, settings, *ostr); + JSONUtils::writeCompactArrayEnd(*ostr); +} + +void JSONCompactWithProgressRowOutputFormat::onProgress(const Progress & value) +{ + LOG_DEBUG(getLogger("JSONCompactWithProgressRowOutputFormat"), "onProgress: {}", value.read_rows); + + progress.incrementPiecewiseAtomically(value); + String progress_line; + WriteBufferFromString buf(progress_line); + writeCString("{\"progress\":", buf); + progress.writeJSON(buf); + writeCString("}\n", buf); + buf.finalize(); + std::lock_guard lock(progress_lines_mutex); + progress_lines.emplace_back(std::move(progress_line)); + has_progress = true; +} + + +void JSONCompactWithProgressRowOutputFormat::flush() +{ + if (has_progress) + writeProgress(); + JSONRowOutputFormat::flush(); +} + +void JSONCompactWithProgressRowOutputFormat::writeSuffix() +{ + if (has_progress) + writeProgress(); + JSONRowOutputFormat::writeSuffix(); +} + +void JSONCompactWithProgressRowOutputFormat::writeProgress() +{ + std::lock_guard lock(progress_lines_mutex); + for (const auto & progress_line : progress_lines) + writeString(progress_line, *ostr); + progress_lines.clear(); + has_progress = false; +} + +void registerOutputFormatJSONCompactWithProgress(FormatFactory & factory) +{ + factory.registerOutputFormat("JSONCompactWithProgress", []( + WriteBuffer & buf, + const Block & sample, + const FormatSettings & format_settings) + { + return std::make_shared(buf, sample, format_settings, false); + }); + + factory.markOutputFormatSupportsParallelFormatting("JSONCompactWithProgress"); +} + +} diff --git a/src/Processors/Formats/Impl/JSONCompactWithProgressRowOutputFormat.h b/src/Processors/Formats/Impl/JSONCompactWithProgressRowOutputFormat.h new file mode 100644 index 00000000000..4bc10d41f19 --- /dev/null +++ b/src/Processors/Formats/Impl/JSONCompactWithProgressRowOutputFormat.h @@ -0,0 +1,53 @@ +#pragma once + +#include +#include +#include +#include + + +namespace DB +{ + +struct FormatSettings; + +/** The stream for outputting data in the JSONCompact- formats. + */ +class JSONCompactWithProgressRowOutputFormat final : public JSONRowOutputFormat +{ +public: + JSONCompactWithProgressRowOutputFormat( + WriteBuffer & out_, + const Block & header, + const FormatSettings & settings_, + bool yield_strings_); + + String getName() const override { return "JSONCompactWithProgressRowOutputFormat"; } + + void onProgress(const Progress & value) override; + void flush() override; + +private: + void writeField(const IColumn & column, const ISerialization & serialization, size_t row_num) override; + void writeFieldDelimiter() override; + void writeRowStartDelimiter() override; + void writeRowEndDelimiter() override; + bool supportTotals() const override { return true; } + bool supportExtremes() const override { return true; } + void writeBeforeTotals() override; + void writeAfterTotals() override; + void writeExtremesElement(const char * title, const Columns & columns, size_t row_num) override; + void writeTotals(const Columns & columns, size_t row_num) override; + + void writeProgress(); + void writeSuffix() override; + + Progress progress; + std::vector progress_lines; + std::mutex progress_lines_mutex; + /// To not lock mutex and check progress_lines every row, + /// we will use atomic flag that progress_lines is not empty. + std::atomic_bool has_progress = false; +}; + +} From 20860ed8b04f6588de12b4e59baf932f02f90d27 Mon Sep 17 00:00:00 2001 From: Alexey Korepanov Date: Sun, 23 Jun 2024 14:34:54 +0200 Subject: [PATCH 097/844] Enable onProgress calls to JSONCompactWithProgressRowOutputFormat --- .../Formats/Impl/JSONCompactWithProgressRowOutputFormat.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/Processors/Formats/Impl/JSONCompactWithProgressRowOutputFormat.cpp b/src/Processors/Formats/Impl/JSONCompactWithProgressRowOutputFormat.cpp index 78cf5b9a003..2cc7d99f8e7 100644 --- a/src/Processors/Formats/Impl/JSONCompactWithProgressRowOutputFormat.cpp +++ b/src/Processors/Formats/Impl/JSONCompactWithProgressRowOutputFormat.cpp @@ -118,8 +118,6 @@ void registerOutputFormatJSONCompactWithProgress(FormatFactory & factory) { return std::make_shared(buf, sample, format_settings, false); }); - - factory.markOutputFormatSupportsParallelFormatting("JSONCompactWithProgress"); } } From e1c60c4e40a03db459e71dd2b54b082d5205654d Mon Sep 17 00:00:00 2001 From: Alexey Korepanov Date: Mon, 24 Jun 2024 10:11:46 +0200 Subject: [PATCH 098/844] Remove debug output --- .../Formats/Impl/JSONCompactWithProgressRowOutputFormat.cpp | 6 ------ 1 file changed, 6 deletions(-) diff --git a/src/Processors/Formats/Impl/JSONCompactWithProgressRowOutputFormat.cpp b/src/Processors/Formats/Impl/JSONCompactWithProgressRowOutputFormat.cpp index 2cc7d99f8e7..39532fb76fb 100644 --- a/src/Processors/Formats/Impl/JSONCompactWithProgressRowOutputFormat.cpp +++ b/src/Processors/Formats/Impl/JSONCompactWithProgressRowOutputFormat.cpp @@ -4,9 +4,6 @@ #include -#include - - namespace DB { @@ -24,7 +21,6 @@ void JSONCompactWithProgressRowOutputFormat::writeField(const IColumn & column, { JSONUtils::writeFieldFromColumn(column, serialization, row_num, yield_strings, settings, *ostr); ++field_number; - LOG_DEBUG(getLogger("JSONCompactWithProgressRowOutputFormat"), "Field number: {}", field_number); } void JSONCompactWithProgressRowOutputFormat::writeFieldDelimiter() @@ -71,8 +67,6 @@ void JSONCompactWithProgressRowOutputFormat::writeExtremesElement(const char * t void JSONCompactWithProgressRowOutputFormat::onProgress(const Progress & value) { - LOG_DEBUG(getLogger("JSONCompactWithProgressRowOutputFormat"), "onProgress: {}", value.read_rows); - progress.incrementPiecewiseAtomically(value); String progress_line; WriteBufferFromString buf(progress_line); From 5d91fd9717eb4978e0cf8cfc93b35b5d88bc0be6 Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Mon, 24 Jun 2024 14:31:05 +0000 Subject: [PATCH 099/844] Cleanup --- src/Processors/Executors/PipelineExecutor.h | 1 - .../QueryPlan/ParallelReplicasLocalPlan.cpp | 173 ------------------ 2 files changed, 174 deletions(-) diff --git a/src/Processors/Executors/PipelineExecutor.h b/src/Processors/Executors/PipelineExecutor.h index 03f0f7f1a0a..ae119355cb5 100644 --- a/src/Processors/Executors/PipelineExecutor.h +++ b/src/Processors/Executors/PipelineExecutor.h @@ -9,7 +9,6 @@ #include #include -#include #include diff --git a/src/Processors/QueryPlan/ParallelReplicasLocalPlan.cpp b/src/Processors/QueryPlan/ParallelReplicasLocalPlan.cpp index bd79fc38ae9..bd5a4793872 100644 --- a/src/Processors/QueryPlan/ParallelReplicasLocalPlan.cpp +++ b/src/Processors/QueryPlan/ParallelReplicasLocalPlan.cpp @@ -17,13 +17,6 @@ #include #include -namespace ProfileEvents -{ - extern const Event SelectedParts; - extern const Event SelectedRanges; - extern const Event SelectedMarks; -} - namespace DB { @@ -56,172 +49,6 @@ void addConvertingActions(QueryPlan & plan, const Block & header, bool has_missi } -class ReadFromMergeTreeCoordinated : public ISourceStep -{ -public: - ReadFromMergeTreeCoordinated(QueryPlanStepPtr read_from_merge_tree_, ParallelReplicasReadingCoordinatorPtr coordinator_) - : ISourceStep(read_from_merge_tree_->getOutputStream()) - , read_from_merge_tree(std::move(read_from_merge_tree_)) - , coordinator(std::move(coordinator_)) - { - } - - void initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings & settings) override; - String getName() const override { return "ReadFromLocalParallelReplica"; } - -private: - QueryPlanStepPtr read_from_merge_tree; - ParallelReplicasReadingCoordinatorPtr coordinator; -}; - -void ReadFromMergeTreeCoordinated::initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings & /*settings*/) -{ - ReadFromMergeTree & reading = *typeid_cast(read_from_merge_tree.get()); - - auto result = reading.getAnalysisResult(); - const auto & query_info = reading.getQueryInfo(); - const auto & data = reading.data; - const auto & context = reading.getContext(); - const auto & storage_snapshot = reading.getStorageSnapshot(); - - if (reading.enable_remove_parts_from_snapshot_optimization) - { - /// Do not keep data parts in snapshot. - /// They are stored separately, and some could be released after PK analysis. - reading.storage_snapshot->data = std::make_unique(); - } - - LOG_DEBUG( - reading.log, - "Selected {}/{} parts by partition key, {} parts by primary key, {}/{} marks by primary key, {} marks to read from {} ranges", - result.parts_before_pk, - result.total_parts, - result.selected_parts, - result.selected_marks_pk, - result.total_marks_pk, - result.selected_marks, - result.selected_ranges); - - // Adding partition info to QueryAccessInfo. - if (context->hasQueryContext() && !query_info.is_internal) - { - Names partition_names; - for (const auto & part : result.parts_with_ranges) - { - partition_names.emplace_back( - fmt::format("{}.{}", data.getStorageID().getFullNameNotQuoted(), part.data_part->info.partition_id)); - } - context->getQueryContext()->addQueryAccessInfo(partition_names); - - if (storage_snapshot->projection) - context->getQueryContext()->addQueryAccessInfo( - Context::QualifiedProjectionName{.storage_id = data.getStorageID(), .projection_name = storage_snapshot->projection->name}); - } - - ProfileEvents::increment(ProfileEvents::SelectedParts, result.selected_parts); - ProfileEvents::increment(ProfileEvents::SelectedRanges, result.selected_ranges); - ProfileEvents::increment(ProfileEvents::SelectedMarks, result.selected_marks); - - auto query_id_holder = MergeTreeDataSelectExecutor::checkLimits(data, result, context); - - // TODO: check this on plan level, we should be here if there is nothing to read - if (result.parts_with_ranges.empty()) - { - pipeline.init(Pipe(std::make_shared(getOutputStream().header))); - return; - } - - /// Projection, that needed to drop columns, which have appeared by execution - /// of some extra expressions, and to allow execute the same expressions later. - /// NOTE: It may lead to double computation of expressions. - ActionsDAGPtr result_projection; - - Pipe pipe = reading.spreadMarkRanges(std::move(result.parts_with_ranges), reading.requested_num_streams, result, result_projection); - - for (const auto & processor : pipe.getProcessors()) - processor->setStorageLimits(query_info.storage_limits); - - if (pipe.empty()) - { - pipeline.init(Pipe(std::make_shared(getOutputStream().header))); - return; - } - - if (result.sampling.use_sampling) - { - auto sampling_actions = std::make_shared(result.sampling.filter_expression); - pipe.addSimpleTransform([&](const Block & header) - { - return std::make_shared( - header, - sampling_actions, - result.sampling.filter_function->getColumnName(), - false); - }); - } - - Block cur_header = pipe.getHeader(); - - auto append_actions = [&result_projection](ActionsDAGPtr actions) - { - if (!result_projection) - result_projection = std::move(actions); - else - result_projection = ActionsDAG::merge(std::move(*result_projection), std::move(*actions)); - }; - - if (result_projection) - cur_header = result_projection->updateHeader(cur_header); - - /// Extra columns may be returned (for example, if sampling is used). - /// Convert pipe to step header structure. - if (!isCompatibleHeader(cur_header, getOutputStream().header)) - { - auto converting = ActionsDAG::makeConvertingActions( - cur_header.getColumnsWithTypeAndName(), - getOutputStream().header.getColumnsWithTypeAndName(), - ActionsDAG::MatchColumnsMode::Name); - - append_actions(std::move(converting)); - } - - if (result_projection) - { - auto projection_actions = std::make_shared(result_projection); - pipe.addSimpleTransform([&](const Block & header) - { - return std::make_shared(header, projection_actions); - }); - } - - /// Some extra columns could be added by sample/final/in-order/etc - /// Remove them from header if not needed. - if (!blocksHaveEqualStructure(pipe.getHeader(), getOutputStream().header)) - { - auto convert_actions_dag = ActionsDAG::makeConvertingActions( - pipe.getHeader().getColumnsWithTypeAndName(), - getOutputStream().header.getColumnsWithTypeAndName(), - ActionsDAG::MatchColumnsMode::Name, - true); - - auto converting_dag_expr = std::make_shared(convert_actions_dag); - - pipe.addSimpleTransform([&](const Block & header) - { - return std::make_shared(header, converting_dag_expr); - }); - } - - for (const auto & processor : pipe.getProcessors()) - processors.emplace_back(processor); - - pipeline.init(std::move(pipe)); - pipeline.addContext(context); - // Attach QueryIdHolder if needed - if (query_id_holder) - pipeline.setQueryIdHolder(std::move(query_id_holder)); -} - std::unique_ptr createLocalPlanForParallelReplicas( const ASTPtr & query_ast, const Block & header, From fe0d3b3e27a5a70337b4d9174e1244569b4b0043 Mon Sep 17 00:00:00 2001 From: Arthur Passos Date: Mon, 24 Jun 2024 15:48:46 -0300 Subject: [PATCH 100/844] initial tests --- .../03174_add_identified_with.reference | 38 ++++++++ .../0_stateless/03174_add_identified_with.sh | 92 +++++++++++++++++++ 2 files changed, 130 insertions(+) create mode 100644 tests/queries/0_stateless/03174_add_identified_with.reference create mode 100755 tests/queries/0_stateless/03174_add_identified_with.sh diff --git a/tests/queries/0_stateless/03174_add_identified_with.reference b/tests/queries/0_stateless/03174_add_identified_with.reference new file mode 100644 index 00000000000..bd855d06ffc --- /dev/null +++ b/tests/queries/0_stateless/03174_add_identified_with.reference @@ -0,0 +1,38 @@ +Basic authentication after user creation +1 +Changed password, old password should not work +AUTHENTICATION_FAILED +New password should work +1 +Two new passwords were added, should both work +1 +1 +Gen ssh key +Authenticating with ssh key +1 +Altering credentials and keeping only bcrypt_password +Asserting SSH does not work anymore +AUTHENTICATION_FAILED +Asserting bcrypt_password works +1 +Adding new bcrypt_password +Both current authentication methods should work +1 +1 +Reset authentication methods to new +Only the latest should work, below should fail +AUTHENTICATION_FAILED +Should work +1 +Multiple identified with, not allowed +SYNTAX_ERROR +Multiple identified with, not allowed, even if mixed +SYNTAX_ERROR +Identified with must precede all add identified with, not allowed +SYNTAX_ERROR +CREATE Multiple identified with, not allowed +SYNTAX_ERROR +CREATE Multiple identified with, not allowed, even if mixed +SYNTAX_ERROR +CREATE Identified with must precede all add identified with, not allowed +SYNTAX_ERROR diff --git a/tests/queries/0_stateless/03174_add_identified_with.sh b/tests/queries/0_stateless/03174_add_identified_with.sh new file mode 100755 index 00000000000..f259f236c96 --- /dev/null +++ b/tests/queries/0_stateless/03174_add_identified_with.sh @@ -0,0 +1,92 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +function test_login_pwd +{ + ${CLICKHOUSE_CLIENT} --user $1 --password $2 --query "select 1" +} + +function test_login_pwd_expect_error +{ + test_login_pwd "$1" "$2" 2>&1 | grep -m1 -o 'AUTHENTICATION_FAILED' +} + +user="u01_03174" + +${CLICKHOUSE_CLIENT} --query "DROP USER IF EXISTS ${user}" + +${CLICKHOUSE_CLIENT} --query "CREATE USER ${user} IDENTIFIED WITH plaintext_password BY '1'" + +echo "Basic authentication after user creation" +test_login_pwd ${user} '1' + +${CLICKHOUSE_CLIENT} --query "ALTER USER ${user} IDENTIFIED WITH plaintext_password BY '2'" + +echo "Changed password, old password should not work" +test_login_pwd_expect_error ${user} '1' + +echo "New password should work" +test_login_pwd ${user} '2' + +${CLICKHOUSE_CLIENT} --query "ALTER USER ${user} ADD IDENTIFIED WITH sha256_password BY '3' ADD IDENTIFIED WITH plaintext_password BY '4'" + +echo "Two new passwords were added, should both work" +test_login_pwd ${user} '3' + +test_login_pwd ${user} '4' + +echo "Gen ssh key" +yes 'yes' | ssh-keygen -t ed25519 -N '' -f 'ssh_key' > /dev/null + +pub_key=$(awk '{print $2}' ssh_key.pub) + +${CLICKHOUSE_CLIENT} --query "ALTER USER ${user} ADD IDENTIFIED WITH ssh_key BY KEY '${pub_key}' TYPE 'ssh-ed25519'" + +echo "Authenticating with ssh key" +${CLICKHOUSE_CLIENT} --user ${user} --ssh-key-file 'ssh_key' --ssh-key-passphrase "" --query "SELECT 1" + +echo "Altering credentials and keeping only bcrypt_password" +${CLICKHOUSE_CLIENT} --query "ALTER USER ${user} IDENTIFIED WITH bcrypt_password BY '5'" + +echo "Asserting SSH does not work anymore" +${CLICKHOUSE_CLIENT} --user ${user} --ssh-key-file 'ssh_key' --ssh-key-passphrase "" --query "SELECT 1" 2>&1 | grep -m1 -o 'AUTHENTICATION_FAILED' + +echo "Asserting bcrypt_password works" +test_login_pwd ${user} '5' + +echo "Adding new bcrypt_password" +${CLICKHOUSE_CLIENT} --query "ALTER USER ${user} ADD IDENTIFIED WITH bcrypt_password BY '6'" + +echo "Both current authentication methods should work" +test_login_pwd ${user} '5' +test_login_pwd ${user} '6' + +echo "Reset authentication methods to new" +${CLICKHOUSE_CLIENT} --query "ALTER USER ${user} RESET AUTHENTICATION METHODS TO NEW" + +echo "Only the latest should work, below should fail" +test_login_pwd_expect_error ${user} '5' + +echo "Should work" +test_login_pwd ${user} '6' + +echo "Multiple identified with, not allowed" +${CLICKHOUSE_CLIENT} --query "ALTER USER ${user} IDENTIFIED WITH plaintext_password '7' IDENTIFIED WITH plaintext_password '8'" 2>&1 | grep -m1 -o "SYNTAX_ERROR" +echo "Multiple identified with, not allowed, even if mixed" +${CLICKHOUSE_CLIENT} --query "ALTER USER ${user} IDENTIFIED WITH plaintext_password '7' ADD IDENTIFIED WITH plaintext_password '8' IDENTIFIED WITH plaintext_password '9'" 2>&1 | grep -m1 -o "SYNTAX_ERROR" +echo "Identified with must precede all add identified with, not allowed" +${CLICKHOUSE_CLIENT} --query "ALTER USER ${user} ADD IDENTIFIED WITH plaintext_password '7' IDENTIFIED WITH plaintext_password '8'" 2>&1 | grep -m1 -o "SYNTAX_ERROR" + +${CLICKHOUSE_CLIENT} --query "DROP USER ${user}" + +echo "CREATE Multiple identified with, not allowed" +${CLICKHOUSE_CLIENT} --query "CREATE USER ${user} IDENTIFIED WITH plaintext_password '7' IDENTIFIED WITH plaintext_password '8'" 2>&1 | grep -m1 -o "SYNTAX_ERROR" +echo "CREATE Multiple identified with, not allowed, even if mixed" +${CLICKHOUSE_CLIENT} --query "CREATE USER ${user} IDENTIFIED WITH plaintext_password '7' ADD IDENTIFIED WITH plaintext_password '8' IDENTIFIED WITH plaintext_password '9'" 2>&1 | grep -m1 -o "SYNTAX_ERROR" +echo "CREATE Identified with must precede all add identified with, not allowed" +${CLICKHOUSE_CLIENT} --query "CREATE USER ${user} ADD IDENTIFIED WITH plaintext_password '7' IDENTIFIED WITH plaintext_password '8'" 2>&1 | grep -m1 -o "SYNTAX_ERROR" + +${CLICKHOUSE_CLIENT} --query "DROP USER IF EXISTS ${user}" From d4da0a0a21e6ead8591848d08b3680b74cdd4629 Mon Sep 17 00:00:00 2001 From: Arthur Passos Date: Mon, 24 Jun 2024 16:30:08 -0300 Subject: [PATCH 101/844] use plaintext_password instead of sha256 because of cicd --- tests/queries/0_stateless/03174_add_identified_with.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/03174_add_identified_with.sh b/tests/queries/0_stateless/03174_add_identified_with.sh index f259f236c96..902002e11e5 100755 --- a/tests/queries/0_stateless/03174_add_identified_with.sh +++ b/tests/queries/0_stateless/03174_add_identified_with.sh @@ -31,7 +31,7 @@ test_login_pwd_expect_error ${user} '1' echo "New password should work" test_login_pwd ${user} '2' -${CLICKHOUSE_CLIENT} --query "ALTER USER ${user} ADD IDENTIFIED WITH sha256_password BY '3' ADD IDENTIFIED WITH plaintext_password BY '4'" +${CLICKHOUSE_CLIENT} --query "ALTER USER ${user} ADD IDENTIFIED WITH plaintext_password BY '3' ADD IDENTIFIED WITH plaintext_password BY '4'" echo "Two new passwords were added, should both work" test_login_pwd ${user} '3' From 5bdd49f36cbd236c97e6cf90c0e424e9689a3859 Mon Sep 17 00:00:00 2001 From: Arthur Passos Date: Mon, 24 Jun 2024 16:43:19 -0300 Subject: [PATCH 102/844] do not gen at runtime, use pre-built ones --- .../03174_add_identified_with.reference | 1 - .../0_stateless/03174_add_identified_with.sh | 15 +++++++++++---- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/tests/queries/0_stateless/03174_add_identified_with.reference b/tests/queries/0_stateless/03174_add_identified_with.reference index bd855d06ffc..c2963686d18 100644 --- a/tests/queries/0_stateless/03174_add_identified_with.reference +++ b/tests/queries/0_stateless/03174_add_identified_with.reference @@ -7,7 +7,6 @@ New password should work Two new passwords were added, should both work 1 1 -Gen ssh key Authenticating with ssh key 1 Altering credentials and keeping only bcrypt_password diff --git a/tests/queries/0_stateless/03174_add_identified_with.sh b/tests/queries/0_stateless/03174_add_identified_with.sh index 902002e11e5..f13d9e0caf5 100755 --- a/tests/queries/0_stateless/03174_add_identified_with.sh +++ b/tests/queries/0_stateless/03174_add_identified_with.sh @@ -4,6 +4,14 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh +ssh_key="-----BEGIN OPENSSH PRIVATE KEY----- + b3BlbnNzaC1rZXktdjEAAAAABG5vbmUAAAAEbm9uZQAAAAAAAAABAAAAMwAAAAtzc2gtZW + QyNTUxOQAAACAc6mt3bktHHukGJM1IJKPVtFMe4u3d8T6LHha8J4WOGAAAAJApc2djKXNn + YwAAAAtzc2gtZWQyNTUxOQAAACAc6mt3bktHHukGJM1IJKPVtFMe4u3d8T6LHha8J4WOGA + AAAEAk15S5L7j85LvmAivo2J8lo44OR/tLILBO1Wb2//mFwBzqa3duS0ce6QYkzUgko9W0 + Ux7i7d3xPoseFrwnhY4YAAAADWFydGh1ckBhcnRodXI= + -----END OPENSSH PRIVATE KEY-----" + function test_login_pwd { ${CLICKHOUSE_CLIENT} --user $1 --password $2 --query "select 1" @@ -38,12 +46,11 @@ test_login_pwd ${user} '3' test_login_pwd ${user} '4' -echo "Gen ssh key" -yes 'yes' | ssh-keygen -t ed25519 -N '' -f 'ssh_key' > /dev/null +ssh_pub_key="AAAAC3NzaC1lZDI1NTE5AAAAIBzqa3duS0ce6QYkzUgko9W0Ux7i7d3xPoseFrwnhY4Y" -pub_key=$(awk '{print $2}' ssh_key.pub) +${CLICKHOUSE_CLIENT} --query "ALTER USER ${user} ADD IDENTIFIED WITH ssh_key BY KEY '${ssh_pub_key}' TYPE 'ssh-ed25519'" -${CLICKHOUSE_CLIENT} --query "ALTER USER ${user} ADD IDENTIFIED WITH ssh_key BY KEY '${pub_key}' TYPE 'ssh-ed25519'" +echo ${ssh_key} > ssh_key echo "Authenticating with ssh key" ${CLICKHOUSE_CLIENT} --user ${user} --ssh-key-file 'ssh_key' --ssh-key-passphrase "" --query "SELECT 1" From 4d5676f4551310380d7cad6e012bb155a2843524 Mon Sep 17 00:00:00 2001 From: Arthur Passos Date: Mon, 24 Jun 2024 16:50:23 -0300 Subject: [PATCH 103/844] dont test ssh at all, it wont work if openssl is not built --- .../03174_add_identified_with.reference | 4 ---- .../0_stateless/03174_add_identified_with.sh | 20 ------------------- 2 files changed, 24 deletions(-) diff --git a/tests/queries/0_stateless/03174_add_identified_with.reference b/tests/queries/0_stateless/03174_add_identified_with.reference index c2963686d18..3d74469d535 100644 --- a/tests/queries/0_stateless/03174_add_identified_with.reference +++ b/tests/queries/0_stateless/03174_add_identified_with.reference @@ -7,11 +7,7 @@ New password should work Two new passwords were added, should both work 1 1 -Authenticating with ssh key -1 Altering credentials and keeping only bcrypt_password -Asserting SSH does not work anymore -AUTHENTICATION_FAILED Asserting bcrypt_password works 1 Adding new bcrypt_password diff --git a/tests/queries/0_stateless/03174_add_identified_with.sh b/tests/queries/0_stateless/03174_add_identified_with.sh index f13d9e0caf5..2eb71f3a982 100755 --- a/tests/queries/0_stateless/03174_add_identified_with.sh +++ b/tests/queries/0_stateless/03174_add_identified_with.sh @@ -4,14 +4,6 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -ssh_key="-----BEGIN OPENSSH PRIVATE KEY----- - b3BlbnNzaC1rZXktdjEAAAAABG5vbmUAAAAEbm9uZQAAAAAAAAABAAAAMwAAAAtzc2gtZW - QyNTUxOQAAACAc6mt3bktHHukGJM1IJKPVtFMe4u3d8T6LHha8J4WOGAAAAJApc2djKXNn - YwAAAAtzc2gtZWQyNTUxOQAAACAc6mt3bktHHukGJM1IJKPVtFMe4u3d8T6LHha8J4WOGA - AAAEAk15S5L7j85LvmAivo2J8lo44OR/tLILBO1Wb2//mFwBzqa3duS0ce6QYkzUgko9W0 - Ux7i7d3xPoseFrwnhY4YAAAADWFydGh1ckBhcnRodXI= - -----END OPENSSH PRIVATE KEY-----" - function test_login_pwd { ${CLICKHOUSE_CLIENT} --user $1 --password $2 --query "select 1" @@ -46,21 +38,9 @@ test_login_pwd ${user} '3' test_login_pwd ${user} '4' -ssh_pub_key="AAAAC3NzaC1lZDI1NTE5AAAAIBzqa3duS0ce6QYkzUgko9W0Ux7i7d3xPoseFrwnhY4Y" - -${CLICKHOUSE_CLIENT} --query "ALTER USER ${user} ADD IDENTIFIED WITH ssh_key BY KEY '${ssh_pub_key}' TYPE 'ssh-ed25519'" - -echo ${ssh_key} > ssh_key - -echo "Authenticating with ssh key" -${CLICKHOUSE_CLIENT} --user ${user} --ssh-key-file 'ssh_key' --ssh-key-passphrase "" --query "SELECT 1" - echo "Altering credentials and keeping only bcrypt_password" ${CLICKHOUSE_CLIENT} --query "ALTER USER ${user} IDENTIFIED WITH bcrypt_password BY '5'" -echo "Asserting SSH does not work anymore" -${CLICKHOUSE_CLIENT} --user ${user} --ssh-key-file 'ssh_key' --ssh-key-passphrase "" --query "SELECT 1" 2>&1 | grep -m1 -o 'AUTHENTICATION_FAILED' - echo "Asserting bcrypt_password works" test_login_pwd ${user} '5' From 96cb9f13dd1b53982cab61b245ee25e4fc91d202 Mon Sep 17 00:00:00 2001 From: Arthur Passos Date: Mon, 24 Jun 2024 16:51:34 -0300 Subject: [PATCH 104/844] remove comments --- src/Access/UsersConfigAccessStorage.cpp | 1 - src/Interpreters/Access/InterpreterCreateUserQuery.cpp | 1 - 2 files changed, 2 deletions(-) diff --git a/src/Access/UsersConfigAccessStorage.cpp b/src/Access/UsersConfigAccessStorage.cpp index cfb02b0962b..a4fb0972220 100644 --- a/src/Access/UsersConfigAccessStorage.cpp +++ b/src/Access/UsersConfigAccessStorage.cpp @@ -261,7 +261,6 @@ namespace } else { - // remember to clean it up.. do I need to? smth in my memory tells me I don't need to user->authentication_methods.emplace_back(); } diff --git a/src/Interpreters/Access/InterpreterCreateUserQuery.cpp b/src/Interpreters/Access/InterpreterCreateUserQuery.cpp index bc507b5f156..a1f8eaa4cb0 100644 --- a/src/Interpreters/Access/InterpreterCreateUserQuery.cpp +++ b/src/Interpreters/Access/InterpreterCreateUserQuery.cpp @@ -60,7 +60,6 @@ namespace if (user.authentication_methods.empty() && auth_data.empty()) { - // previously, a user always had a default constructed auth method.. maybe I should put this somewhere else user.authentication_methods.emplace_back(); } From c7aed3c98c2820468e23a8a86e8ca3301dd72254 Mon Sep 17 00:00:00 2001 From: Arthur Passos Date: Mon, 24 Jun 2024 19:37:46 -0300 Subject: [PATCH 105/844] Revert "dont test ssh at all, it wont work if openssl is not built" This reverts commit 4d5676f4551310380d7cad6e012bb155a2843524. --- .../03174_add_identified_with.reference | 4 ++++ .../0_stateless/03174_add_identified_with.sh | 20 +++++++++++++++++++ 2 files changed, 24 insertions(+) diff --git a/tests/queries/0_stateless/03174_add_identified_with.reference b/tests/queries/0_stateless/03174_add_identified_with.reference index 3d74469d535..c2963686d18 100644 --- a/tests/queries/0_stateless/03174_add_identified_with.reference +++ b/tests/queries/0_stateless/03174_add_identified_with.reference @@ -7,7 +7,11 @@ New password should work Two new passwords were added, should both work 1 1 +Authenticating with ssh key +1 Altering credentials and keeping only bcrypt_password +Asserting SSH does not work anymore +AUTHENTICATION_FAILED Asserting bcrypt_password works 1 Adding new bcrypt_password diff --git a/tests/queries/0_stateless/03174_add_identified_with.sh b/tests/queries/0_stateless/03174_add_identified_with.sh index 2eb71f3a982..f13d9e0caf5 100755 --- a/tests/queries/0_stateless/03174_add_identified_with.sh +++ b/tests/queries/0_stateless/03174_add_identified_with.sh @@ -4,6 +4,14 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh +ssh_key="-----BEGIN OPENSSH PRIVATE KEY----- + b3BlbnNzaC1rZXktdjEAAAAABG5vbmUAAAAEbm9uZQAAAAAAAAABAAAAMwAAAAtzc2gtZW + QyNTUxOQAAACAc6mt3bktHHukGJM1IJKPVtFMe4u3d8T6LHha8J4WOGAAAAJApc2djKXNn + YwAAAAtzc2gtZWQyNTUxOQAAACAc6mt3bktHHukGJM1IJKPVtFMe4u3d8T6LHha8J4WOGA + AAAEAk15S5L7j85LvmAivo2J8lo44OR/tLILBO1Wb2//mFwBzqa3duS0ce6QYkzUgko9W0 + Ux7i7d3xPoseFrwnhY4YAAAADWFydGh1ckBhcnRodXI= + -----END OPENSSH PRIVATE KEY-----" + function test_login_pwd { ${CLICKHOUSE_CLIENT} --user $1 --password $2 --query "select 1" @@ -38,9 +46,21 @@ test_login_pwd ${user} '3' test_login_pwd ${user} '4' +ssh_pub_key="AAAAC3NzaC1lZDI1NTE5AAAAIBzqa3duS0ce6QYkzUgko9W0Ux7i7d3xPoseFrwnhY4Y" + +${CLICKHOUSE_CLIENT} --query "ALTER USER ${user} ADD IDENTIFIED WITH ssh_key BY KEY '${ssh_pub_key}' TYPE 'ssh-ed25519'" + +echo ${ssh_key} > ssh_key + +echo "Authenticating with ssh key" +${CLICKHOUSE_CLIENT} --user ${user} --ssh-key-file 'ssh_key' --ssh-key-passphrase "" --query "SELECT 1" + echo "Altering credentials and keeping only bcrypt_password" ${CLICKHOUSE_CLIENT} --query "ALTER USER ${user} IDENTIFIED WITH bcrypt_password BY '5'" +echo "Asserting SSH does not work anymore" +${CLICKHOUSE_CLIENT} --user ${user} --ssh-key-file 'ssh_key' --ssh-key-passphrase "" --query "SELECT 1" 2>&1 | grep -m1 -o 'AUTHENTICATION_FAILED' + echo "Asserting bcrypt_password works" test_login_pwd ${user} '5' From a1928bd2994cd6572fa4a6509216737e8999b819 Mon Sep 17 00:00:00 2001 From: Arthur Passos Date: Mon, 24 Jun 2024 19:39:26 -0300 Subject: [PATCH 106/844] no-fast test --- tests/queries/0_stateless/03174_add_identified_with.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/queries/0_stateless/03174_add_identified_with.sh b/tests/queries/0_stateless/03174_add_identified_with.sh index f13d9e0caf5..060bcd3363a 100755 --- a/tests/queries/0_stateless/03174_add_identified_with.sh +++ b/tests/queries/0_stateless/03174_add_identified_with.sh @@ -1,4 +1,5 @@ #!/usr/bin/env bash +# Tags: no-fasttest CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh From 907a54e9f692d0dd352e13f643d29a464190fd67 Mon Sep 17 00:00:00 2001 From: Arthur Passos Date: Mon, 24 Jun 2024 21:21:03 -0300 Subject: [PATCH 107/844] add no parallel --- tests/queries/0_stateless/03174_add_identified_with.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/03174_add_identified_with.sh b/tests/queries/0_stateless/03174_add_identified_with.sh index 060bcd3363a..5b80894d056 100755 --- a/tests/queries/0_stateless/03174_add_identified_with.sh +++ b/tests/queries/0_stateless/03174_add_identified_with.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-fasttest +# Tags: no-fasttest, no-parallel CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh From f62873b1735bc22e8a24a1ed91a43edbf405fea4 Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Tue, 25 Jun 2024 12:29:21 +0000 Subject: [PATCH 108/844] Fix 02404_memory_bound_merging --- .../02404_memory_bound_merging.reference | 21 ++++++++++++++++++- .../02404_memory_bound_merging.sql | 3 ++- 2 files changed, 22 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/02404_memory_bound_merging.reference b/tests/queries/0_stateless/02404_memory_bound_merging.reference index 3daa99f91c6..b2195051c0b 100644 --- a/tests/queries/0_stateless/02404_memory_bound_merging.reference +++ b/tests/queries/0_stateless/02404_memory_bound_merging.reference @@ -100,7 +100,26 @@ select a, count() from dist_t_different_dbs group by a, b order by a limit 5 off 504 2000 1000000 -- { echoOn } -- -explain pipeline select a from pr_t group by a order by a limit 5 offset 500; +explain pipeline select a from pr_t group by a order by a limit 5 offset 500 settings allow_experimental_analyzer=0; +(Expression) +ExpressionTransform + (Limit) + Limit + (Sorting) + MergingSortedTransform 4 → 1 + MergeSortingTransform × 4 + LimitsCheckingTransform × 4 + PartialSortingTransform × 4 + (Expression) + ExpressionTransform × 4 + (MergingAggregated) + Resize 1 → 4 + SortingAggregatedTransform 4 → 1 + MergingAggregatedBucketTransform × 4 + Resize 1 → 4 + GroupingAggregatedTransform 3 → 1 + (ReadFromRemoteParallelReplicas) +explain pipeline select a from pr_t group by a order by a limit 5 offset 500 settings allow_experimental_analyzer=1, parallel_replicas_local_plan=1; (Expression) ExpressionTransform (Limit) diff --git a/tests/queries/0_stateless/02404_memory_bound_merging.sql b/tests/queries/0_stateless/02404_memory_bound_merging.sql index 5e017e79309..a12ccb2ffb3 100644 --- a/tests/queries/0_stateless/02404_memory_bound_merging.sql +++ b/tests/queries/0_stateless/02404_memory_bound_merging.sql @@ -72,7 +72,8 @@ set distributed_aggregation_memory_efficient=1; select count() from pr_t; -- { echoOn } -- -explain pipeline select a from pr_t group by a order by a limit 5 offset 500; +explain pipeline select a from pr_t group by a order by a limit 5 offset 500 settings allow_experimental_analyzer=0; +explain pipeline select a from pr_t group by a order by a limit 5 offset 500 settings allow_experimental_analyzer=1, parallel_replicas_local_plan=1; select a, count() from pr_t group by a order by a limit 5 offset 500; select a, count() from pr_t group by a, b order by a limit 5 offset 500; From 96b68cb920600cb1a080cefd5f7e82de332ac8dd Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Tue, 25 Jun 2024 13:33:36 +0000 Subject: [PATCH 109/844] Update setting in 02404_memory_bound_merging --- tests/queries/0_stateless/02404_memory_bound_merging.reference | 2 +- tests/queries/0_stateless/02404_memory_bound_merging.sql | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/02404_memory_bound_merging.reference b/tests/queries/0_stateless/02404_memory_bound_merging.reference index b2195051c0b..e02684c3b95 100644 --- a/tests/queries/0_stateless/02404_memory_bound_merging.reference +++ b/tests/queries/0_stateless/02404_memory_bound_merging.reference @@ -100,7 +100,7 @@ select a, count() from dist_t_different_dbs group by a, b order by a limit 5 off 504 2000 1000000 -- { echoOn } -- -explain pipeline select a from pr_t group by a order by a limit 5 offset 500 settings allow_experimental_analyzer=0; +explain pipeline select a from pr_t group by a order by a limit 5 offset 500 settings parallel_replicas_local_plan=0; (Expression) ExpressionTransform (Limit) diff --git a/tests/queries/0_stateless/02404_memory_bound_merging.sql b/tests/queries/0_stateless/02404_memory_bound_merging.sql index a12ccb2ffb3..2958cc22ce0 100644 --- a/tests/queries/0_stateless/02404_memory_bound_merging.sql +++ b/tests/queries/0_stateless/02404_memory_bound_merging.sql @@ -72,7 +72,7 @@ set distributed_aggregation_memory_efficient=1; select count() from pr_t; -- { echoOn } -- -explain pipeline select a from pr_t group by a order by a limit 5 offset 500 settings allow_experimental_analyzer=0; +explain pipeline select a from pr_t group by a order by a limit 5 offset 500 settings parallel_replicas_local_plan=0; explain pipeline select a from pr_t group by a order by a limit 5 offset 500 settings allow_experimental_analyzer=1, parallel_replicas_local_plan=1; select a, count() from pr_t group by a order by a limit 5 offset 500; From 318af3af95845166ff7cd9c38c6a2cfd1e97ccfd Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Tue, 25 Jun 2024 14:31:11 +0000 Subject: [PATCH 110/844] Cleanup --- .../QueryPlan/ParallelReplicasLocalPlan.cpp | 34 ++----------------- .../QueryPlan/ParallelReplicasLocalPlan.h | 3 +- .../QueryPlan/ReadFromMergeTree.cpp | 10 +++--- src/Processors/QueryPlan/ReadFromMergeTree.h | 9 ++--- .../ParallelReplicasReadingCoordinator.h | 2 +- ...9_parallel_replicas_unavailable_shards.sql | 2 +- 6 files changed, 14 insertions(+), 46 deletions(-) diff --git a/src/Processors/QueryPlan/ParallelReplicasLocalPlan.cpp b/src/Processors/QueryPlan/ParallelReplicasLocalPlan.cpp index bd5a4793872..6822d8b0a71 100644 --- a/src/Processors/QueryPlan/ParallelReplicasLocalPlan.cpp +++ b/src/Processors/QueryPlan/ParallelReplicasLocalPlan.cpp @@ -20,34 +20,7 @@ namespace DB { -namespace -{ - -void addConvertingActions(QueryPlan & plan, const Block & header, bool has_missing_objects) -{ - if (blocksHaveEqualStructure(plan.getCurrentDataStream().header, header)) - return; - - auto mode = has_missing_objects ? ActionsDAG::MatchColumnsMode::Position : ActionsDAG::MatchColumnsMode::Name; - - auto get_converting_dag = [mode](const Block & block_, const Block & header_) - { - /// Convert header structure to expected. - /// Also we ignore constants from result and replace it with constants from header. - /// It is needed for functions like `now64()` or `randConstant()` because their values may be different. - return ActionsDAG::makeConvertingActions( - block_.getColumnsWithTypeAndName(), - header_.getColumnsWithTypeAndName(), - mode, - true); - }; - - auto convert_actions_dag = get_converting_dag(plan.getCurrentDataStream().header, header); - auto converting = std::make_unique(plan.getCurrentDataStream(), convert_actions_dag); - plan.addStep(std::move(converting)); -} - -} +void addConvertingActions(QueryPlan & plan, const Block & header, bool has_missing_objects); std::unique_ptr createLocalPlanForParallelReplicas( const ASTPtr & query_ast, @@ -55,8 +28,7 @@ std::unique_ptr createLocalPlanForParallelReplicas( ContextPtr context, QueryProcessingStage::Enum processed_stage, ParallelReplicasReadingCoordinatorPtr coordinator, - QueryPlanStepPtr analyzed_read_from_merge_tree, - bool has_missing_objects) + QueryPlanStepPtr analyzed_read_from_merge_tree) { checkStackSize(); @@ -112,7 +84,7 @@ std::unique_ptr createLocalPlanForParallelReplicas( = reading->createLocalParallelReplicasReadingStep(analyzed_merge_tree, all_ranges_cb, read_task_cb, number_of_local_replica); node->step = std::move(read_from_merge_tree_parallel_replicas); - addConvertingActions(*query_plan, header, has_missing_objects); + addConvertingActions(*query_plan, header, /*has_missing_objects=*/false); return query_plan; } diff --git a/src/Processors/QueryPlan/ParallelReplicasLocalPlan.h b/src/Processors/QueryPlan/ParallelReplicasLocalPlan.h index 89d2019f807..123754458a1 100644 --- a/src/Processors/QueryPlan/ParallelReplicasLocalPlan.h +++ b/src/Processors/QueryPlan/ParallelReplicasLocalPlan.h @@ -14,6 +14,5 @@ std::unique_ptr createLocalPlanForParallelReplicas( ContextPtr context, QueryProcessingStage::Enum processed_stage, ParallelReplicasReadingCoordinatorPtr coordinator, - QueryPlanStepPtr read_from_merge_tree, - bool has_missing_objects); + QueryPlanStepPtr read_from_merge_tree); } diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp index 71caece30ed..1de701cff0b 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp +++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp @@ -339,9 +339,9 @@ ReadFromMergeTree::ReadFromMergeTree( std::unique_ptr ReadFromMergeTree::createLocalParallelReplicasReadingStep( const ReadFromMergeTree * analyzed_merge_tree, std::optional all_ranges_callback_, - std::optional read_task_callback_, - std::optional number_of_current_replica_) + std::optional read_task_callback_) { + const auto number_of_local_replica = 0; return std::make_unique( prepared_parts, alter_conversions_for_parts, @@ -356,9 +356,9 @@ std::unique_ptr ReadFromMergeTree::createLocalParallelReplica log, (analyzed_merge_tree ? analyzed_merge_tree->analyzed_result_ptr : nullptr), true, - all_ranges_callback_, - read_task_callback_, - number_of_current_replica_); + all_ranges_callback, + read_task_callback, + number_of_local_replica); } Pipe ReadFromMergeTree::readFromPoolParallelReplicas(RangesInDataParts parts_with_range, Names required_columns, PoolSettings pool_settings) diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.h b/src/Processors/QueryPlan/ReadFromMergeTree.h index 079f95be908..59a85413dd9 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.h +++ b/src/Processors/QueryPlan/ReadFromMergeTree.h @@ -123,14 +123,13 @@ public: AnalysisResultPtr analyzed_result_ptr_, bool enable_parallel_reading_, std::optional all_ranges_callback_ = std::nullopt, - std::optional read_task_callback_ = std::nullopt, + std::optional read_task_callback_ = std::nullopt); std::optional number_of_current_replica_ = std::nullopt); std::unique_ptr createLocalParallelReplicasReadingStep( const ReadFromMergeTree * analyzed_merge_tree, - std::optional all_ranges_callback_, - std::optional read_task_callback_, - std::optional number_of_current_replica_); + std::optional all_ranges_callback, + std::optional read_task_callback); static constexpr auto name = "ReadFromMergeTree"; String getName() const override { return name; } @@ -291,8 +290,6 @@ private: bool enable_vertical_final = false; bool enable_remove_parts_from_snapshot_optimization = true; std::optional number_of_current_replica; - - friend class ReadFromMergeTreeCoordinated; }; } diff --git a/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.h b/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.h index afde5f336af..8b463fda395 100644 --- a/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.h +++ b/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.h @@ -30,8 +30,8 @@ public: /// needed to report total rows to read void setProgressCallback(ProgressCallback callback); - void initialize(CoordinationMode mode); private: + void initialize(CoordinationMode mode); std::mutex mutex; const size_t replicas_count{0}; diff --git a/tests/queries/0_stateless/02769_parallel_replicas_unavailable_shards.sql b/tests/queries/0_stateless/02769_parallel_replicas_unavailable_shards.sql index 337b05263d0..7dbc389b55c 100644 --- a/tests/queries/0_stateless/02769_parallel_replicas_unavailable_shards.sql +++ b/tests/queries/0_stateless/02769_parallel_replicas_unavailable_shards.sql @@ -6,7 +6,7 @@ SET allow_experimental_parallel_reading_from_replicas=2, max_parallel_replicas=1 SET send_logs_level='error'; -- with local plan for initiator, the query can be executed fast on initator, we can simply not come to the point where unavailable replica can be detected -- therefore disable local plan for now -SELECT count() FROM test_parallel_replicas_unavailable_shards WHERE NOT ignore(*) SETTINGS log_comment = '02769_7b513191-5082-4073-8568-53b86a49da79', allow_experimental_analyzer=0; +SELECT count() FROM test_parallel_replicas_unavailable_shards WHERE NOT ignore(*) SETTINGS log_comment = '02769_7b513191-5082-4073-8568-53b86a49da79', parallel_replicas_local_plan=0; SYSTEM FLUSH LOGS; From 510cb961a1d41b592f986b98f2c40a360cd83314 Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Tue, 25 Jun 2024 14:47:38 +0000 Subject: [PATCH 111/844] Fix --- src/Interpreters/ClusterProxy/executeQuery.cpp | 3 +-- src/Processors/QueryPlan/DistributedCreateLocalPlan.cpp | 5 ----- src/Processors/QueryPlan/ParallelReplicasLocalPlan.cpp | 3 +-- src/Processors/QueryPlan/ReadFromMergeTree.cpp | 4 ++-- src/Processors/QueryPlan/ReadFromMergeTree.h | 4 ++-- 5 files changed, 6 insertions(+), 13 deletions(-) diff --git a/src/Interpreters/ClusterProxy/executeQuery.cpp b/src/Interpreters/ClusterProxy/executeQuery.cpp index ab25da090d6..da88f16b504 100644 --- a/src/Interpreters/ClusterProxy/executeQuery.cpp +++ b/src/Interpreters/ClusterProxy/executeQuery.cpp @@ -524,8 +524,7 @@ void executeQueryWithParallelReplicas( new_context, processed_stage, coordinator, - std::move(analyzed_read_from_merge_tree), - /*has_missing_objects=*/false); + std::move(analyzed_read_from_merge_tree)); DataStreams input_streams; input_streams.reserve(2); diff --git a/src/Processors/QueryPlan/DistributedCreateLocalPlan.cpp b/src/Processors/QueryPlan/DistributedCreateLocalPlan.cpp index 1f4f271fa6e..4d4ac69ec25 100644 --- a/src/Processors/QueryPlan/DistributedCreateLocalPlan.cpp +++ b/src/Processors/QueryPlan/DistributedCreateLocalPlan.cpp @@ -9,9 +9,6 @@ namespace DB { -namespace -{ - void addConvertingActions(QueryPlan & plan, const Block & header, bool has_missing_objects) { if (blocksHaveEqualStructure(plan.getCurrentDataStream().header, header)) @@ -36,8 +33,6 @@ void addConvertingActions(QueryPlan & plan, const Block & header, bool has_missi plan.addStep(std::move(converting)); } -} - std::unique_ptr createLocalPlan( const ASTPtr & query_ast, const Block & header, diff --git a/src/Processors/QueryPlan/ParallelReplicasLocalPlan.cpp b/src/Processors/QueryPlan/ParallelReplicasLocalPlan.cpp index 6822d8b0a71..67c10985408 100644 --- a/src/Processors/QueryPlan/ParallelReplicasLocalPlan.cpp +++ b/src/Processors/QueryPlan/ParallelReplicasLocalPlan.cpp @@ -79,9 +79,8 @@ std::unique_ptr createLocalPlanForParallelReplicas( MergeTreeReadTaskCallback read_task_cb = [coordinator](ParallelReadRequest req) -> std::optional { return coordinator->handleRequest(std::move(req)); }; - const auto number_of_local_replica = 0; auto read_from_merge_tree_parallel_replicas - = reading->createLocalParallelReplicasReadingStep(analyzed_merge_tree, all_ranges_cb, read_task_cb, number_of_local_replica); + = reading->createLocalParallelReplicasReadingStep(analyzed_merge_tree, all_ranges_cb, read_task_cb); node->step = std::move(read_from_merge_tree_parallel_replicas); addConvertingActions(*query_plan, header, /*has_missing_objects=*/false); diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp index 1de701cff0b..0ed06639f8c 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp +++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp @@ -356,8 +356,8 @@ std::unique_ptr ReadFromMergeTree::createLocalParallelReplica log, (analyzed_merge_tree ? analyzed_merge_tree->analyzed_result_ptr : nullptr), true, - all_ranges_callback, - read_task_callback, + all_ranges_callback_, + read_task_callback_, number_of_local_replica); } diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.h b/src/Processors/QueryPlan/ReadFromMergeTree.h index 59a85413dd9..fd66a8af126 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.h +++ b/src/Processors/QueryPlan/ReadFromMergeTree.h @@ -123,12 +123,12 @@ public: AnalysisResultPtr analyzed_result_ptr_, bool enable_parallel_reading_, std::optional all_ranges_callback_ = std::nullopt, - std::optional read_task_callback_ = std::nullopt); + std::optional read_task_callback_ = std::nullopt, std::optional number_of_current_replica_ = std::nullopt); std::unique_ptr createLocalParallelReplicasReadingStep( const ReadFromMergeTree * analyzed_merge_tree, - std::optional all_ranges_callback, + std::optional all_ranges_callback_, std::optional read_task_callback); static constexpr auto name = "ReadFromMergeTree"; From 0eff924475af085f055fb04f514ea656446c9996 Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Tue, 25 Jun 2024 15:11:02 +0000 Subject: [PATCH 112/844] Use parallel_replicas_local_plan in test --- .../queries/0_stateless/03006_parallel_replicas_prewhere.sql | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/03006_parallel_replicas_prewhere.sql b/tests/queries/0_stateless/03006_parallel_replicas_prewhere.sql index afe6a00cc4d..de43302690a 100644 --- a/tests/queries/0_stateless/03006_parallel_replicas_prewhere.sql +++ b/tests/queries/0_stateless/03006_parallel_replicas_prewhere.sql @@ -27,7 +27,7 @@ EXPLAIN ESTIMATE SELECT count() FROM url_na_log PREWHERE (DateVisit >= toFixedString('2022-08-10', 10)) AND (DateVisit <= '2022-08-20') -SETTINGS allow_experimental_analyzer=0; +SETTINGS parallel_replicas_local_plan=0; -- here parallel replicas uses local snapshot as working set -- so, the estimation can be done @@ -35,7 +35,7 @@ EXPLAIN ESTIMATE SELECT count() FROM url_na_log PREWHERE (DateVisit >= toFixedString('2022-08-10', 10)) AND (DateVisit <= '2022-08-20') -SETTINGS allow_experimental_analyzer=1; +SETTINGS allow_experimental_analyzer=1, parallel_replicas_local_plan=1; DROP POLICY url_na_log_policy0 ON url_na_log; DROP TABLE url_na_log; From 64cfe1628f74c3797455c26b47770012fe5119b1 Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Tue, 25 Jun 2024 15:21:44 +0000 Subject: [PATCH 113/844] Cleanup ReadFromMergeTree.h --- src/Processors/QueryPlan/ReadFromMergeTree.h | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.h b/src/Processors/QueryPlan/ReadFromMergeTree.h index fd66a8af126..99205c1de1d 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.h +++ b/src/Processors/QueryPlan/ReadFromMergeTree.h @@ -199,7 +199,6 @@ public: AnalysisResultPtr getAnalyzedResult() const { return analyzed_result_ptr; } void setAnalyzedResult(AnalysisResultPtr analyzed_result_ptr_) { analyzed_result_ptr = std::move(analyzed_result_ptr_); } - ReadFromMergeTree::AnalysisResult getAnalysisResult() const; const MergeTreeData::DataPartsVector & getParts() const { return prepared_parts; } const std::vector & getAlterConvertionsForParts() const { return alter_conversions_for_parts; } @@ -211,12 +210,6 @@ public: void applyFilters(ActionDAGNodes added_filter_nodes) override; - ReadType getReadType() const - { - chassert(analyzed_result_ptr); - return analyzed_result_ptr->read_type; - } - private: int getSortDirection() const { @@ -281,6 +274,8 @@ private: Pipe spreadMarkRangesAmongStreamsFinal( RangesInDataParts && parts, size_t num_streams, const Names & origin_column_names, const Names & column_names, ActionsDAGPtr & out_projection); + ReadFromMergeTree::AnalysisResult getAnalysisResult() const; + mutable AnalysisResultPtr analyzed_result_ptr; VirtualFields shared_virtual_fields; From 3d35d31655edd271dcdc1b6e3a562edbcb8c17fd Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Tue, 25 Jun 2024 16:11:04 +0000 Subject: [PATCH 114/844] More cleanup --- src/Processors/QueryPlan/ParallelReplicasLocalPlan.cpp | 10 +++++++--- src/Processors/QueryPlan/ReadFromMergeTree.cpp | 7 ++++--- src/Processors/QueryPlan/ReadFromMergeTree.h | 4 ++-- 3 files changed, 13 insertions(+), 8 deletions(-) diff --git a/src/Processors/QueryPlan/ParallelReplicasLocalPlan.cpp b/src/Processors/QueryPlan/ParallelReplicasLocalPlan.cpp index 67c10985408..5f48a12072b 100644 --- a/src/Processors/QueryPlan/ParallelReplicasLocalPlan.cpp +++ b/src/Processors/QueryPlan/ParallelReplicasLocalPlan.cpp @@ -69,9 +69,13 @@ std::unique_ptr createLocalPlanForParallelReplicas( chassert(reading); - ReadFromMergeTree * analyzed_merge_tree = nullptr; + ReadFromMergeTree::AnalysisResultPtr analyzed_result_ptr; if (analyzed_read_from_merge_tree.get()) - analyzed_merge_tree = typeid_cast(analyzed_read_from_merge_tree.get()); + { + auto * analyzed_merge_tree = typeid_cast(analyzed_read_from_merge_tree.get()); + if (analyzed_merge_tree) + analyzed_result_ptr = analyzed_merge_tree->getAnalyzedResult(); + } MergeTreeAllRangesCallback all_ranges_cb = [coordinator](InitialAllRangesAnnouncement announcement) { coordinator->handleInitialAllRangesAnnouncement(announcement); }; @@ -80,7 +84,7 @@ std::unique_ptr createLocalPlanForParallelReplicas( { return coordinator->handleRequest(std::move(req)); }; auto read_from_merge_tree_parallel_replicas - = reading->createLocalParallelReplicasReadingStep(analyzed_merge_tree, all_ranges_cb, read_task_cb); + = reading->createLocalParallelReplicasReadingStep(analyzed_result_ptr, all_ranges_cb, read_task_cb); node->step = std::move(read_from_merge_tree_parallel_replicas); addConvertingActions(*query_plan, header, /*has_missing_objects=*/false); diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp index 0ed06639f8c..95c337e2150 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp +++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp @@ -337,11 +337,12 @@ ReadFromMergeTree::ReadFromMergeTree( } std::unique_ptr ReadFromMergeTree::createLocalParallelReplicasReadingStep( - const ReadFromMergeTree * analyzed_merge_tree, + AnalysisResultPtr analyzed_result_ptr_, std::optional all_ranges_callback_, std::optional read_task_callback_) { const auto number_of_local_replica = 0; + const bool enable_parallel_reading = true; return std::make_unique( prepared_parts, alter_conversions_for_parts, @@ -354,8 +355,8 @@ std::unique_ptr ReadFromMergeTree::createLocalParallelReplica requested_num_streams, max_block_numbers_to_read, log, - (analyzed_merge_tree ? analyzed_merge_tree->analyzed_result_ptr : nullptr), - true, + analyzed_result_ptr_, + enable_parallel_reading, all_ranges_callback_, read_task_callback_, number_of_local_replica); diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.h b/src/Processors/QueryPlan/ReadFromMergeTree.h index 99205c1de1d..c9c6030d207 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.h +++ b/src/Processors/QueryPlan/ReadFromMergeTree.h @@ -127,9 +127,9 @@ public: std::optional number_of_current_replica_ = std::nullopt); std::unique_ptr createLocalParallelReplicasReadingStep( - const ReadFromMergeTree * analyzed_merge_tree, + AnalysisResultPtr analyzed_result_ptr_, std::optional all_ranges_callback_, - std::optional read_task_callback); + std::optional read_task_callback_); static constexpr auto name = "ReadFromMergeTree"; String getName() const override { return name; } From 57afc5f0353ce7b7e6251940f2d17c40b1480855 Mon Sep 17 00:00:00 2001 From: Arthur Passos Date: Tue, 25 Jun 2024 20:02:19 -0300 Subject: [PATCH 115/844] serialize no_password --- src/Access/AuthenticationData.cpp | 3 ++- .../Access/InterpreterShowCreateAccessEntityQuery.cpp | 5 +---- src/Parsers/Access/ASTAuthenticationData.cpp | 2 +- src/Parsers/Access/ASTCreateUserQuery.cpp | 11 +++++++++-- src/Parsers/tests/gtest_Parser.cpp | 8 ++++++++ 5 files changed, 21 insertions(+), 8 deletions(-) diff --git a/src/Access/AuthenticationData.cpp b/src/Access/AuthenticationData.cpp index 70355fadfbd..7191c593eea 100644 --- a/src/Access/AuthenticationData.cpp +++ b/src/Access/AuthenticationData.cpp @@ -356,7 +356,8 @@ std::shared_ptr AuthenticationData::toAST() const break; } - case AuthenticationType::NO_PASSWORD: [[fallthrough]]; + case AuthenticationType::NO_PASSWORD: + break; case AuthenticationType::MAX: throw Exception(ErrorCodes::LOGICAL_ERROR, "AST: Unexpected authentication type {}", toString(auth_type)); } diff --git a/src/Interpreters/Access/InterpreterShowCreateAccessEntityQuery.cpp b/src/Interpreters/Access/InterpreterShowCreateAccessEntityQuery.cpp index 70dd859c70b..e49cf68727c 100644 --- a/src/Interpreters/Access/InterpreterShowCreateAccessEntityQuery.cpp +++ b/src/Interpreters/Access/InterpreterShowCreateAccessEntityQuery.cpp @@ -66,10 +66,7 @@ namespace for (const auto & authentication_method : user.authentication_methods) { - if (authentication_method.getType() != AuthenticationType::NO_PASSWORD) - { - query->auth_data.push_back(authentication_method.toAST()); - } + query->auth_data.push_back(authentication_method.toAST()); } if (user.valid_until) diff --git a/src/Parsers/Access/ASTAuthenticationData.cpp b/src/Parsers/Access/ASTAuthenticationData.cpp index 73fa5a5a692..8dd66808126 100644 --- a/src/Parsers/Access/ASTAuthenticationData.cpp +++ b/src/Parsers/Access/ASTAuthenticationData.cpp @@ -44,7 +44,7 @@ void ASTAuthenticationData::formatImpl(const FormatSettings & settings, FormatSt { if (type && *type == AuthenticationType::NO_PASSWORD) { - settings.ostr << (settings.hilite ? IAST::hilite_keyword : "") << " NOT IDENTIFIED" + settings.ostr << (settings.hilite ? IAST::hilite_keyword : "") << " IDENTIFIED WITH no_password" << (settings.hilite ? IAST::hilite_none : ""); return; } diff --git a/src/Parsers/Access/ASTCreateUserQuery.cpp b/src/Parsers/Access/ASTCreateUserQuery.cpp index 0d5db681a7e..86b91fcb6f4 100644 --- a/src/Parsers/Access/ASTCreateUserQuery.cpp +++ b/src/Parsers/Access/ASTCreateUserQuery.cpp @@ -189,8 +189,15 @@ ASTPtr ASTCreateUserQuery::clone() const if (settings) res->settings = std::static_pointer_cast(settings->clone()); - // this is weird. - if (!auth_data.empty()) + if (auth_data.empty()) + { + auto ast = std::make_shared(); + ast->type = AuthenticationType::NO_PASSWORD; + + res->auth_data.push_back(ast); + res->children.push_back(ast); + } + else { for (const auto & authentication_method : auth_data) { diff --git a/src/Parsers/tests/gtest_Parser.cpp b/src/Parsers/tests/gtest_Parser.cpp index 3b4cd9af695..b88c8f56029 100644 --- a/src/Parsers/tests/gtest_Parser.cpp +++ b/src/Parsers/tests/gtest_Parser.cpp @@ -284,6 +284,14 @@ INSTANTIATE_TEST_SUITE_P(ParserCreateUserQuery, ParserTest, "CREATE USER user1 IDENTIFIED WITH sha256_password BY 'qwe123'", "CREATE USER user1 IDENTIFIED WITH sha256_password BY 'qwe123'" }, + { + "CREATE USER user1 IDENTIFIED WITH no_password", + "CREATE USER user1 IDENTIFIED WITH no_password" + }, + { + "CREATE USER user1", + "CREATE USER user1 IDENTIFIED WITH no_password" + }, { "CREATE USER user1 IDENTIFIED WITH plaintext_password BY 'abc123' ADD IDENTIFIED WITH plaintext_password BY 'def123' ADD IDENTIFIED WITH sha256_password BY 'ghi123'", "CREATE USER user1 IDENTIFIED WITH plaintext_password BY 'abc123' ADD IDENTIFIED WITH plaintext_password BY 'def123' ADD IDENTIFIED WITH sha256_password BY 'ghi123'" From 35214a34ee60ca7ee44895d1c0eb57a0e43f0b75 Mon Sep 17 00:00:00 2001 From: Arthur Passos Date: Tue, 25 Jun 2024 22:39:30 -0300 Subject: [PATCH 116/844] update some tests --- tests/queries/0_stateless/01073_grant_and_revoke.reference | 2 +- .../0_stateless/01939_user_with_default_database.reference | 4 ++-- tests/queries/0_stateless/01999_grant_with_replace.reference | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/queries/0_stateless/01073_grant_and_revoke.reference b/tests/queries/0_stateless/01073_grant_and_revoke.reference index b91820914e6..b2c7797ad3c 100644 --- a/tests/queries/0_stateless/01073_grant_and_revoke.reference +++ b/tests/queries/0_stateless/01073_grant_and_revoke.reference @@ -1,5 +1,5 @@ A -CREATE USER test_user_01073 +CREATE USER test_user_01073 IDENTIFIED WITH no_password B C GRANT INSERT, ALTER DELETE ON *.* TO test_user_01073 diff --git a/tests/queries/0_stateless/01939_user_with_default_database.reference b/tests/queries/0_stateless/01939_user_with_default_database.reference index 8c8ff7e3007..6e5a1d20758 100644 --- a/tests/queries/0_stateless/01939_user_with_default_database.reference +++ b/tests/queries/0_stateless/01939_user_with_default_database.reference @@ -1,4 +1,4 @@ default db_01939 -CREATE USER u_01939 -CREATE USER u_01939 DEFAULT DATABASE NONE +CREATE USER u_01939 IDENTIFIED WITH no_password +CREATE USER u_01939 IDENTIFIED WITH no_password DEFAULT DATABASE NONE diff --git a/tests/queries/0_stateless/01999_grant_with_replace.reference b/tests/queries/0_stateless/01999_grant_with_replace.reference index dc2047ab73c..903f2c301a0 100644 --- a/tests/queries/0_stateless/01999_grant_with_replace.reference +++ b/tests/queries/0_stateless/01999_grant_with_replace.reference @@ -1,4 +1,4 @@ -CREATE USER test_user_01999 +CREATE USER test_user_01999 IDENTIFIED WITH no_password A B GRANT SELECT ON db1.* TO test_user_01999 From 7c6293a747a813742c663d59342c0c0cc9db4956 Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Wed, 26 Jun 2024 10:12:09 +0000 Subject: [PATCH 117/844] Fix test_parallel_replicas_no_replicas --- tests/integration/test_parallel_replicas_no_replicas/test.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/integration/test_parallel_replicas_no_replicas/test.py b/tests/integration/test_parallel_replicas_no_replicas/test.py index 9f716459643..73ab6fde5a3 100644 --- a/tests/integration/test_parallel_replicas_no_replicas/test.py +++ b/tests/integration/test_parallel_replicas_no_replicas/test.py @@ -46,5 +46,6 @@ def test_skip_all_replicas(start_cluster, skip_unavailable_shards): "max_parallel_replicas": 3, "cluster_for_parallel_replicas": cluster_name, "skip_unavailable_shards": skip_unavailable_shards, + "parallel_replicas_local_plan": 0, }, ) From 11e537cb282e1ca7b0d65ccaff332afd616d1fbb Mon Sep 17 00:00:00 2001 From: Arthur Passos Date: Wed, 26 Jun 2024 08:51:30 -0300 Subject: [PATCH 118/844] update some more tests --- .../01075_allowed_client_hosts.reference | 34 +++++++++---------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/tests/queries/0_stateless/01075_allowed_client_hosts.reference b/tests/queries/0_stateless/01075_allowed_client_hosts.reference index 5fb11bae65e..5d2168d6c3b 100644 --- a/tests/queries/0_stateless/01075_allowed_client_hosts.reference +++ b/tests/queries/0_stateless/01075_allowed_client_hosts.reference @@ -1,17 +1,17 @@ -CREATE USER test_user_01075 -CREATE USER test_user_01075 -CREATE USER test_user_01075 HOST NONE -CREATE USER test_user_01075 HOST LOCAL -CREATE USER test_user_01075 HOST IP \'192.168.23.15\' -CREATE USER test_user_01075 HOST IP \'2001:db8:11a3:9d7:1f34:8a2e:7a0:765d\' -CREATE USER test_user_01075 HOST LOCAL, IP \'2001:db8:11a3:9d7:1f34:8a2e:7a0:765d\' -CREATE USER test_user_01075 HOST LOCAL -CREATE USER test_user_01075 HOST NONE -CREATE USER test_user_01075 HOST LIKE \'@.somesite.com\' -CREATE USER test_user_01075 HOST REGEXP \'.*\\\\.anothersite\\\\.com\' -CREATE USER test_user_01075 HOST REGEXP \'.*\\\\.anothersite\\\\.com\', \'.*\\\\.anothersite\\\\.org\' -CREATE USER test_user_01075 HOST REGEXP \'.*\\\\.anothersite2\\\\.com\', \'.*\\\\.anothersite2\\\\.org\' -CREATE USER test_user_01075 HOST REGEXP \'.*\\\\.anothersite3\\\\.com\', \'.*\\\\.anothersite3\\\\.org\' -CREATE USER `test_user_01075_x@localhost` HOST LOCAL -CREATE USER test_user_01075_x HOST LOCAL -CREATE USER `test_user_01075_x@192.168.23.15` HOST LOCAL +CREATE USER test_user_01075 IDENTIFIED WITH no_password +CREATE USER test_user_01075 IDENTIFIED WITH no_password +CREATE USER test_user_01075 IDENTIFIED WITH no_password HOST NONE +CREATE USER test_user_01075 IDENTIFIED WITH no_password HOST LOCAL +CREATE USER test_user_01075 IDENTIFIED WITH no_password HOST IP \'192.168.23.15\' +CREATE USER test_user_01075 IDENTIFIED WITH no_password HOST IP \'2001:db8:11a3:9d7:1f34:8a2e:7a0:765d\' +CREATE USER test_user_01075 IDENTIFIED WITH no_password HOST LOCAL, IP \'2001:db8:11a3:9d7:1f34:8a2e:7a0:765d\' +CREATE USER test_user_01075 IDENTIFIED WITH no_password HOST LOCAL +CREATE USER test_user_01075 IDENTIFIED WITH no_password HOST NONE +CREATE USER test_user_01075 IDENTIFIED WITH no_password HOST LIKE \'@.somesite.com\' +CREATE USER test_user_01075 IDENTIFIED WITH no_password HOST REGEXP \'.*\\\\.anothersite\\\\.com\' +CREATE USER test_user_01075 IDENTIFIED WITH no_password HOST REGEXP \'.*\\\\.anothersite\\\\.com\', \'.*\\\\.anothersite\\\\.org\' +CREATE USER test_user_01075 IDENTIFIED WITH no_password HOST REGEXP \'.*\\\\.anothersite2\\\\.com\', \'.*\\\\.anothersite2\\\\.org\' +CREATE USER test_user_01075 IDENTIFIED WITH no_password HOST REGEXP \'.*\\\\.anothersite3\\\\.com\', \'.*\\\\.anothersite3\\\\.org\' +CREATE USER `test_user_01075_x@localhost` IDENTIFIED WITH no_password HOST LOCAL +CREATE USER test_user_01075_x IDENTIFIED WITH no_password HOST LOCAL +CREATE USER `test_user_01075_x@192.168.23.15` IDENTIFIED WITH no_password HOST LOCAL From 4d800a848741a71e6805050367de77e2edf5f646 Mon Sep 17 00:00:00 2001 From: Arthur Passos Date: Wed, 26 Jun 2024 08:54:58 -0300 Subject: [PATCH 119/844] update test_disk_access_storage --- tests/integration/test_disk_access_storage/test.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/integration/test_disk_access_storage/test.py b/tests/integration/test_disk_access_storage/test.py index a710295505e..7414ff4d84b 100644 --- a/tests/integration/test_disk_access_storage/test.py +++ b/tests/integration/test_disk_access_storage/test.py @@ -46,7 +46,7 @@ def test_create(): def check(): assert ( instance.query("SHOW CREATE USER u1") - == "CREATE USER u1 SETTINGS PROFILE `s1`\n" + == "CREATE USER u1 IDENTIFIED WITH no_password SETTINGS PROFILE `s1`\n" ) assert ( instance.query("SHOW CREATE USER u2") @@ -99,7 +99,7 @@ def test_alter(): def check(): assert ( instance.query("SHOW CREATE USER u1") - == "CREATE USER u1 SETTINGS PROFILE `s1`\n" + == "CREATE USER u1 IDENTIFIED WITH no_password SETTINGS PROFILE `s1`\n" ) assert ( instance.query("SHOW CREATE USER u2") @@ -147,7 +147,7 @@ def test_drop(): instance.query("DROP SETTINGS PROFILE s1") def check(): - assert instance.query("SHOW CREATE USER u1") == "CREATE USER u1\n" + assert instance.query("SHOW CREATE USER u1") == "CREATE USER u1 IDENTIFIED WITH no_password\n" assert ( instance.query("SHOW CREATE SETTINGS PROFILE s2") == "CREATE SETTINGS PROFILE `s2`\n" From b93c21a0410b5930b359be0e1ae5683d14978881 Mon Sep 17 00:00:00 2001 From: Arthur Passos Date: Wed, 26 Jun 2024 08:58:44 -0300 Subject: [PATCH 120/844] add no-parallel to grant_and_Revoke.sql --- tests/queries/0_stateless/01073_grant_and_revoke.sql | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/queries/0_stateless/01073_grant_and_revoke.sql b/tests/queries/0_stateless/01073_grant_and_revoke.sql index 4cffd916e9f..59f599ce140 100644 --- a/tests/queries/0_stateless/01073_grant_and_revoke.sql +++ b/tests/queries/0_stateless/01073_grant_and_revoke.sql @@ -1,3 +1,5 @@ +-- Tags: no-parallel + DROP USER IF EXISTS test_user_01073; DROP ROLE IF EXISTS test_role_01073; From f15551b47b4cc54eeb1c6ece75c9f5c37f90bb13 Mon Sep 17 00:00:00 2001 From: Arthur Passos Date: Wed, 26 Jun 2024 09:38:55 -0300 Subject: [PATCH 121/844] black --- tests/integration/test_disk_access_storage/test.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tests/integration/test_disk_access_storage/test.py b/tests/integration/test_disk_access_storage/test.py index 7414ff4d84b..ae756ef1e27 100644 --- a/tests/integration/test_disk_access_storage/test.py +++ b/tests/integration/test_disk_access_storage/test.py @@ -147,7 +147,10 @@ def test_drop(): instance.query("DROP SETTINGS PROFILE s1") def check(): - assert instance.query("SHOW CREATE USER u1") == "CREATE USER u1 IDENTIFIED WITH no_password\n" + assert ( + instance.query("SHOW CREATE USER u1") + == "CREATE USER u1 IDENTIFIED WITH no_password\n" + ) assert ( instance.query("SHOW CREATE SETTINGS PROFILE s2") == "CREATE SETTINGS PROFILE `s2`\n" From 341071402c9d2c1f4f19d0f98cba8de1d3b25d90 Mon Sep 17 00:00:00 2001 From: Arthur Passos Date: Wed, 26 Jun 2024 11:00:13 -0300 Subject: [PATCH 122/844] fix wrong expected error code & add test --- .../03174_add_identified_with.reference | 15 ++++++++----- .../0_stateless/03174_add_identified_with.sh | 22 ++++++++++++++----- 2 files changed, 25 insertions(+), 12 deletions(-) diff --git a/tests/queries/0_stateless/03174_add_identified_with.reference b/tests/queries/0_stateless/03174_add_identified_with.reference index c2963686d18..e8e1ee9d294 100644 --- a/tests/queries/0_stateless/03174_add_identified_with.reference +++ b/tests/queries/0_stateless/03174_add_identified_with.reference @@ -24,14 +24,17 @@ AUTHENTICATION_FAILED Should work 1 Multiple identified with, not allowed -SYNTAX_ERROR +BAD_ARGUMENTS Multiple identified with, not allowed, even if mixed -SYNTAX_ERROR +BAD_ARGUMENTS Identified with must precede all add identified with, not allowed -SYNTAX_ERROR +BAD_ARGUMENTS CREATE Multiple identified with, not allowed -SYNTAX_ERROR +BAD_ARGUMENTS CREATE Multiple identified with, not allowed, even if mixed -SYNTAX_ERROR +BAD_ARGUMENTS CREATE Identified with must precede all add identified with, not allowed -SYNTAX_ERROR +BAD_ARGUMENTS +Create user with no identification +Add identified with +CREATE USER u01_03174 IDENTIFIED WITH no_password ADD IDENTIFIED WITH plaintext_password diff --git a/tests/queries/0_stateless/03174_add_identified_with.sh b/tests/queries/0_stateless/03174_add_identified_with.sh index 5b80894d056..0884f31dd73 100755 --- a/tests/queries/0_stateless/03174_add_identified_with.sh +++ b/tests/queries/0_stateless/03174_add_identified_with.sh @@ -82,19 +82,29 @@ echo "Should work" test_login_pwd ${user} '6' echo "Multiple identified with, not allowed" -${CLICKHOUSE_CLIENT} --query "ALTER USER ${user} IDENTIFIED WITH plaintext_password '7' IDENTIFIED WITH plaintext_password '8'" 2>&1 | grep -m1 -o "SYNTAX_ERROR" +${CLICKHOUSE_CLIENT} --query "ALTER USER ${user} IDENTIFIED WITH plaintext_password by '7' IDENTIFIED WITH plaintext_password by '8'" 2>&1 | grep -m1 -o "BAD_ARGUMENTS" echo "Multiple identified with, not allowed, even if mixed" -${CLICKHOUSE_CLIENT} --query "ALTER USER ${user} IDENTIFIED WITH plaintext_password '7' ADD IDENTIFIED WITH plaintext_password '8' IDENTIFIED WITH plaintext_password '9'" 2>&1 | grep -m1 -o "SYNTAX_ERROR" +${CLICKHOUSE_CLIENT} --query "ALTER USER ${user} IDENTIFIED WITH plaintext_password by '7' ADD IDENTIFIED WITH plaintext_password by '8' IDENTIFIED WITH plaintext_password by '9'" 2>&1 | grep -m1 -o "BAD_ARGUMENTS" echo "Identified with must precede all add identified with, not allowed" -${CLICKHOUSE_CLIENT} --query "ALTER USER ${user} ADD IDENTIFIED WITH plaintext_password '7' IDENTIFIED WITH plaintext_password '8'" 2>&1 | grep -m1 -o "SYNTAX_ERROR" +${CLICKHOUSE_CLIENT} --query "ALTER USER ${user} ADD IDENTIFIED WITH plaintext_password by '7' IDENTIFIED WITH plaintext_password by '8'" 2>&1 | grep -m1 -o "BAD_ARGUMENTS" ${CLICKHOUSE_CLIENT} --query "DROP USER ${user}" echo "CREATE Multiple identified with, not allowed" -${CLICKHOUSE_CLIENT} --query "CREATE USER ${user} IDENTIFIED WITH plaintext_password '7' IDENTIFIED WITH plaintext_password '8'" 2>&1 | grep -m1 -o "SYNTAX_ERROR" +${CLICKHOUSE_CLIENT} --query "CREATE USER ${user} IDENTIFIED WITH plaintext_password by '7' IDENTIFIED WITH plaintext_password by '8'" 2>&1 | grep -m1 -o "BAD_ARGUMENTS" echo "CREATE Multiple identified with, not allowed, even if mixed" -${CLICKHOUSE_CLIENT} --query "CREATE USER ${user} IDENTIFIED WITH plaintext_password '7' ADD IDENTIFIED WITH plaintext_password '8' IDENTIFIED WITH plaintext_password '9'" 2>&1 | grep -m1 -o "SYNTAX_ERROR" +${CLICKHOUSE_CLIENT} --query "CREATE USER ${user} IDENTIFIED WITH plaintext_password by '7' ADD IDENTIFIED WITH plaintext_password by '8' IDENTIFIED WITH plaintext_password by '9'" 2>&1 | grep -m1 -o "BAD_ARGUMENTS" echo "CREATE Identified with must precede all add identified with, not allowed" -${CLICKHOUSE_CLIENT} --query "CREATE USER ${user} ADD IDENTIFIED WITH plaintext_password '7' IDENTIFIED WITH plaintext_password '8'" 2>&1 | grep -m1 -o "SYNTAX_ERROR" +${CLICKHOUSE_CLIENT} --query "CREATE USER ${user} ADD IDENTIFIED WITH plaintext_password by '7' IDENTIFIED WITH plaintext_password by '8'" 2>&1 | grep -m1 -o "BAD_ARGUMENTS" + +${CLICKHOUSE_CLIENT} --query "DROP USER IF EXISTS ${user}" + +echo "Create user with no identification" +${CLICKHOUSE_CLIENT} --query "CREATE USER ${user}" + +echo "Add identified with" +${CLICKHOUSE_CLIENT} --query "ALTER USER ${user} ADD IDENTIFIED WITH plaintext_password by '7'" + +${CLICKHOUSE_CLIENT} --query "SHOW CREATE USER ${user}" ${CLICKHOUSE_CLIENT} --query "DROP USER IF EXISTS ${user}" From 3e77101b164eb3855f377a203e511186e9afb908 Mon Sep 17 00:00:00 2001 From: Arthur Passos Date: Wed, 26 Jun 2024 13:29:13 -0300 Subject: [PATCH 123/844] update some other tests --- tests/integration/test_access_control_on_cluster/test.py | 6 +++--- tests/queries/0_stateless/01075_allowed_client_hosts.sql | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/integration/test_access_control_on_cluster/test.py b/tests/integration/test_access_control_on_cluster/test.py index b12add7ad3f..3933ab15e0e 100644 --- a/tests/integration/test_access_control_on_cluster/test.py +++ b/tests/integration/test_access_control_on_cluster/test.py @@ -42,9 +42,9 @@ def test_access_control_on_cluster(): ch1.query_with_retry( "CREATE USER IF NOT EXISTS Alex ON CLUSTER 'cluster'", retry_count=5 ) - assert ch1.query("SHOW CREATE USER Alex") == "CREATE USER Alex\n" - assert ch2.query("SHOW CREATE USER Alex") == "CREATE USER Alex\n" - assert ch3.query("SHOW CREATE USER Alex") == "CREATE USER Alex\n" + assert ch2.query("SHOW CREATE USER Alex") == "CREATE USER Alex IDENTIFIED WITH no_password\n" + assert ch1.query("SHOW CREATE USER Alex") == "CREATE USER Alex IDENTIFIED WITH no_password\n" + assert ch3.query("SHOW CREATE USER Alex") == "CREATE USER Alex IDENTIFIED WITH no_password\n" ch2.query_with_retry( "GRANT ON CLUSTER 'cluster' SELECT ON *.* TO Alex", retry_count=3 diff --git a/tests/queries/0_stateless/01075_allowed_client_hosts.sql b/tests/queries/0_stateless/01075_allowed_client_hosts.sql index 17957c8826b..8c25d45f421 100644 --- a/tests/queries/0_stateless/01075_allowed_client_hosts.sql +++ b/tests/queries/0_stateless/01075_allowed_client_hosts.sql @@ -1,4 +1,4 @@ --- Tags: no-fasttest +-- Tags: no-fasttest, no-parallel DROP USER IF EXISTS test_user_01075, test_user_01075_x, test_user_01075_x@localhost, test_user_01075_x@'192.168.23.15'; From a77a0b5eb011e4b4907b903c347f4a85c461b984 Mon Sep 17 00:00:00 2001 From: Arthur Passos Date: Wed, 26 Jun 2024 13:59:57 -0300 Subject: [PATCH 124/844] black --- .../test_access_control_on_cluster/test.py | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/tests/integration/test_access_control_on_cluster/test.py b/tests/integration/test_access_control_on_cluster/test.py index 3933ab15e0e..a754292bfff 100644 --- a/tests/integration/test_access_control_on_cluster/test.py +++ b/tests/integration/test_access_control_on_cluster/test.py @@ -42,9 +42,18 @@ def test_access_control_on_cluster(): ch1.query_with_retry( "CREATE USER IF NOT EXISTS Alex ON CLUSTER 'cluster'", retry_count=5 ) - assert ch2.query("SHOW CREATE USER Alex") == "CREATE USER Alex IDENTIFIED WITH no_password\n" - assert ch1.query("SHOW CREATE USER Alex") == "CREATE USER Alex IDENTIFIED WITH no_password\n" - assert ch3.query("SHOW CREATE USER Alex") == "CREATE USER Alex IDENTIFIED WITH no_password\n" + assert ( + ch2.query("SHOW CREATE USER Alex") + == "CREATE USER Alex IDENTIFIED WITH no_password\n" + ) + assert ( + ch1.query("SHOW CREATE USER Alex") + == "CREATE USER Alex IDENTIFIED WITH no_password\n" + ) + assert ( + ch3.query("SHOW CREATE USER Alex") + == "CREATE USER Alex IDENTIFIED WITH no_password\n" + ) ch2.query_with_retry( "GRANT ON CLUSTER 'cluster' SELECT ON *.* TO Alex", retry_count=3 From 938004c090b112f4a76c904bcf26f4f6bed5a867 Mon Sep 17 00:00:00 2001 From: Arthur Passos Date: Wed, 26 Jun 2024 16:40:40 -0300 Subject: [PATCH 125/844] update yet another test --- .../0_stateless/01292_create_user.reference | 158 +++++++++--------- 1 file changed, 79 insertions(+), 79 deletions(-) diff --git a/tests/queries/0_stateless/01292_create_user.reference b/tests/queries/0_stateless/01292_create_user.reference index d5841a74a2c..c23723b3ec1 100644 --- a/tests/queries/0_stateless/01292_create_user.reference +++ b/tests/queries/0_stateless/01292_create_user.reference @@ -1,12 +1,12 @@ -- default -CREATE USER u1_01292 +CREATE USER u1_01292 IDENTIFIED WITH no_password -- same as default -CREATE USER u2_01292 -CREATE USER u3_01292 +CREATE USER u2_01292 IDENTIFIED WITH no_password +CREATE USER u3_01292 IDENTIFIED WITH no_password -- rename -CREATE USER u2_01292_renamed +CREATE USER u2_01292_renamed IDENTIFIED WITH no_password -- authentication -CREATE USER u1_01292 +CREATE USER u1_01292 IDENTIFIED WITH no_password CREATE USER u2_01292 IDENTIFIED WITH plaintext_password CREATE USER u3_01292 IDENTIFIED WITH sha256_password CREATE USER u4_01292 IDENTIFIED WITH sha256_password @@ -19,92 +19,92 @@ CREATE USER u1_01292 IDENTIFIED WITH sha256_password CREATE USER u2_01292 IDENTIFIED WITH sha256_password CREATE USER u3_01292 IDENTIFIED WITH sha256_password CREATE USER u4_01292 IDENTIFIED WITH plaintext_password -CREATE USER u5_01292 +CREATE USER u5_01292 IDENTIFIED WITH no_password -- host -CREATE USER u1_01292 -CREATE USER u2_01292 HOST NONE -CREATE USER u3_01292 HOST LOCAL -CREATE USER u4_01292 HOST NAME \'myhost.com\' -CREATE USER u5_01292 HOST LOCAL, NAME \'myhost.com\' -CREATE USER u6_01292 HOST LOCAL, NAME \'myhost.com\' -CREATE USER u7_01292 HOST REGEXP \'.*\\\\.myhost\\\\.com\' -CREATE USER u8_01292 -CREATE USER u9_01292 HOST LIKE \'%.myhost.com\' -CREATE USER u10_01292 HOST LIKE \'%.myhost.com\' -CREATE USER u11_01292 HOST LOCAL -CREATE USER u12_01292 HOST IP \'192.168.1.1\' -CREATE USER u13_01292 HOST IP \'192.168.0.0/16\' -CREATE USER u14_01292 HOST LOCAL -CREATE USER u15_01292 HOST IP \'2001:db8:11a3:9d7:1f34:8a2e:7a0:765d\' -CREATE USER u16_01292 HOST LOCAL, IP \'65:ff0c::/96\' -CREATE USER u1_01292 HOST NONE -CREATE USER u2_01292 HOST NAME \'myhost.com\' -CREATE USER u3_01292 HOST LOCAL, NAME \'myhost.com\' -CREATE USER u4_01292 HOST NONE +CREATE USER u1_01292 IDENTIFIED WITH no_password +CREATE USER u2_01292 IDENTIFIED WITH no_password HOST NONE +CREATE USER u3_01292 IDENTIFIED WITH no_password HOST LOCAL +CREATE USER u4_01292 IDENTIFIED WITH no_password HOST NAME \'myhost.com\' +CREATE USER u5_01292 IDENTIFIED WITH no_password HOST LOCAL, NAME \'myhost.com\' +CREATE USER u6_01292 IDENTIFIED WITH no_password HOST LOCAL, NAME \'myhost.com\' +CREATE USER u7_01292 IDENTIFIED WITH no_password HOST REGEXP \'.*\\\\.myhost\\\\.com\' +CREATE USER u8_01292 IDENTIFIED WITH no_password +CREATE USER u9_01292 IDENTIFIED WITH no_password HOST LIKE \'%.myhost.com\' +CREATE USER u10_01292 IDENTIFIED WITH no_password HOST LIKE \'%.myhost.com\' +CREATE USER u11_01292 IDENTIFIED WITH no_password HOST LOCAL +CREATE USER u12_01292 IDENTIFIED WITH no_password HOST IP \'192.168.1.1\' +CREATE USER u13_01292 IDENTIFIED WITH no_password HOST IP \'192.168.0.0/16\' +CREATE USER u14_01292 IDENTIFIED WITH no_password HOST LOCAL +CREATE USER u15_01292 IDENTIFIED WITH no_password HOST IP \'2001:db8:11a3:9d7:1f34:8a2e:7a0:765d\' +CREATE USER u16_01292 IDENTIFIED WITH no_password HOST LOCAL, IP \'65:ff0c::/96\' +CREATE USER u1_01292 IDENTIFIED WITH no_password HOST NONE +CREATE USER u2_01292 IDENTIFIED WITH no_password HOST NAME \'myhost.com\' +CREATE USER u3_01292 IDENTIFIED WITH no_password HOST LOCAL, NAME \'myhost.com\' +CREATE USER u4_01292 IDENTIFIED WITH no_password HOST NONE -- host after @ -CREATE USER u1_01292 -CREATE USER u1_01292 -CREATE USER `u2_01292@%.myhost.com` HOST LIKE \'%.myhost.com\' -CREATE USER `u2_01292@%.myhost.com` HOST LIKE \'%.myhost.com\' -CREATE USER `u3_01292@192.168.%.%` HOST LIKE \'192.168.%.%\' -CREATE USER `u3_01292@192.168.%.%` HOST LIKE \'192.168.%.%\' -CREATE USER `u4_01292@::1` HOST LOCAL -CREATE USER `u4_01292@::1` HOST LOCAL -CREATE USER `u5_01292@65:ff0c::/96` HOST LIKE \'65:ff0c::/96\' -CREATE USER `u5_01292@65:ff0c::/96` HOST LIKE \'65:ff0c::/96\' -CREATE USER u1_01292 HOST LOCAL -CREATE USER `u2_01292@%.myhost.com` +CREATE USER u1_01292 IDENTIFIED WITH no_password +CREATE USER u1_01292 IDENTIFIED WITH no_password +CREATE USER `u2_01292@%.myhost.com` IDENTIFIED WITH no_password HOST LIKE \'%.myhost.com\' +CREATE USER `u2_01292@%.myhost.com` IDENTIFIED WITH no_password HOST LIKE \'%.myhost.com\' +CREATE USER `u3_01292@192.168.%.%` IDENTIFIED WITH no_password HOST LIKE \'192.168.%.%\' +CREATE USER `u3_01292@192.168.%.%` IDENTIFIED WITH no_password HOST LIKE \'192.168.%.%\' +CREATE USER `u4_01292@::1` IDENTIFIED WITH no_password HOST LOCAL +CREATE USER `u4_01292@::1` IDENTIFIED WITH no_password HOST LOCAL +CREATE USER `u5_01292@65:ff0c::/96` IDENTIFIED WITH no_password HOST LIKE \'65:ff0c::/96\' +CREATE USER `u5_01292@65:ff0c::/96` IDENTIFIED WITH no_password HOST LIKE \'65:ff0c::/96\' +CREATE USER u1_01292 IDENTIFIED WITH no_password HOST LOCAL +CREATE USER `u2_01292@%.myhost.com` IDENTIFIED WITH no_password -- settings -CREATE USER u1_01292 -CREATE USER u2_01292 SETTINGS PROFILE `default` -CREATE USER u3_01292 SETTINGS max_memory_usage = 5000000 -CREATE USER u4_01292 SETTINGS max_memory_usage MIN 5000000 -CREATE USER u5_01292 SETTINGS max_memory_usage MAX 5000000 -CREATE USER u6_01292 SETTINGS max_memory_usage CONST -CREATE USER u7_01292 SETTINGS max_memory_usage WRITABLE -CREATE USER u8_01292 SETTINGS max_memory_usage = 5000000 MIN 4000000 MAX 6000000 CONST -CREATE USER u9_01292 SETTINGS PROFILE `default`, max_memory_usage = 5000000 WRITABLE -CREATE USER u1_01292 SETTINGS readonly = 1 -CREATE USER u2_01292 SETTINGS readonly = 1 -CREATE USER u3_01292 +CREATE USER u1_01292 IDENTIFIED WITH no_password +CREATE USER u2_01292 IDENTIFIED WITH no_password SETTINGS PROFILE `default` +CREATE USER u3_01292 IDENTIFIED WITH no_password SETTINGS max_memory_usage = 5000000 +CREATE USER u4_01292 IDENTIFIED WITH no_password SETTINGS max_memory_usage MIN 5000000 +CREATE USER u5_01292 IDENTIFIED WITH no_password SETTINGS max_memory_usage MAX 5000000 +CREATE USER u6_01292 IDENTIFIED WITH no_password SETTINGS max_memory_usage CONST +CREATE USER u7_01292 IDENTIFIED WITH no_password SETTINGS max_memory_usage WRITABLE +CREATE USER u8_01292 IDENTIFIED WITH no_password SETTINGS max_memory_usage = 5000000 MIN 4000000 MAX 6000000 CONST +CREATE USER u9_01292 IDENTIFIED WITH no_password SETTINGS PROFILE `default`, max_memory_usage = 5000000 WRITABLE +CREATE USER u1_01292 IDENTIFIED WITH no_password SETTINGS readonly = 1 +CREATE USER u2_01292 IDENTIFIED WITH no_password SETTINGS readonly = 1 +CREATE USER u3_01292 IDENTIFIED WITH no_password -- default role -CREATE USER u1_01292 -CREATE USER u2_01292 DEFAULT ROLE NONE -CREATE USER u3_01292 DEFAULT ROLE r1_01292 -CREATE USER u4_01292 DEFAULT ROLE r1_01292, r2_01292 -CREATE USER u5_01292 DEFAULT ROLE ALL EXCEPT r2_01292 -CREATE USER u6_01292 DEFAULT ROLE ALL EXCEPT r1_01292, r2_01292 -CREATE USER u1_01292 DEFAULT ROLE r1_01292 -CREATE USER u2_01292 DEFAULT ROLE ALL EXCEPT r2_01292 -CREATE USER u3_01292 DEFAULT ROLE r2_01292 -CREATE USER u4_01292 -CREATE USER u5_01292 DEFAULT ROLE ALL EXCEPT r1_01292 -CREATE USER u6_01292 DEFAULT ROLE NONE +CREATE USER u1_01292 IDENTIFIED WITH no_password +CREATE USER u2_01292 IDENTIFIED WITH no_password DEFAULT ROLE NONE +CREATE USER u3_01292 IDENTIFIED WITH no_password DEFAULT ROLE r1_01292 +CREATE USER u4_01292 IDENTIFIED WITH no_password DEFAULT ROLE r1_01292, r2_01292 +CREATE USER u5_01292 IDENTIFIED WITH no_password DEFAULT ROLE ALL EXCEPT r2_01292 +CREATE USER u6_01292 IDENTIFIED WITH no_password DEFAULT ROLE ALL EXCEPT r1_01292, r2_01292 +CREATE USER u1_01292 IDENTIFIED WITH no_password DEFAULT ROLE r1_01292 +CREATE USER u2_01292 IDENTIFIED WITH no_password DEFAULT ROLE ALL EXCEPT r2_01292 +CREATE USER u3_01292 IDENTIFIED WITH no_password DEFAULT ROLE r2_01292 +CREATE USER u4_01292 IDENTIFIED WITH no_password +CREATE USER u5_01292 IDENTIFIED WITH no_password DEFAULT ROLE ALL EXCEPT r1_01292 +CREATE USER u6_01292 IDENTIFIED WITH no_password DEFAULT ROLE NONE -- complex CREATE USER u1_01292 IDENTIFIED WITH plaintext_password HOST LOCAL SETTINGS readonly = 1 -CREATE USER u1_01292 HOST LIKE \'%.%.myhost.com\' DEFAULT ROLE NONE SETTINGS PROFILE `default` +CREATE USER u1_01292 IDENTIFIED WITH no_password HOST LIKE \'%.%.myhost.com\' DEFAULT ROLE NONE SETTINGS PROFILE `default` -- if not exists -CREATE USER u1_01292 +CREATE USER u1_01292 IDENTIFIED WITH no_password GRANT r1_01292 TO u1_01292 -- if not exists-part2 -CREATE USER u1_01292 +CREATE USER u1_01292 IDENTIFIED WITH no_password GRANT r1_01292, r2_01292 TO u1_01292 -- or replace -CREATE USER u1_01292 -CREATE USER u2_01292 +CREATE USER u1_01292 IDENTIFIED WITH no_password +CREATE USER u2_01292 IDENTIFIED WITH no_password -- multiple users in one command -CREATE USER u1_01292 DEFAULT ROLE NONE -CREATE USER u2_01292 DEFAULT ROLE NONE -CREATE USER u3_01292 HOST LIKE \'%.%.myhost.com\' -CREATE USER u4_01292 HOST LIKE \'%.%.myhost.com\' -CREATE USER `u5_01292@%.host.com` HOST LIKE \'%.host.com\' -CREATE USER `u6_01292@%.host.com` HOST LIKE \'%.host.com\' -CREATE USER `u7_01292@%.host.com` HOST LIKE \'%.host.com\' -CREATE USER `u8_01292@%.otherhost.com` HOST LIKE \'%.otherhost.com\' -CREATE USER u1_01292 DEFAULT ROLE NONE SETTINGS readonly = 1 -CREATE USER u2_01292 DEFAULT ROLE r1_01292, r2_01292 SETTINGS readonly = 1 -CREATE USER u3_01292 HOST LIKE \'%.%.myhost.com\' DEFAULT ROLE r1_01292, r2_01292 -CREATE USER u4_01292 HOST LIKE \'%.%.myhost.com\' DEFAULT ROLE r1_01292, r2_01292 +CREATE USER u1_01292 IDENTIFIED WITH no_password DEFAULT ROLE NONE +CREATE USER u2_01292 IDENTIFIED WITH no_password DEFAULT ROLE NONE +CREATE USER u3_01292 IDENTIFIED WITH no_password HOST LIKE \'%.%.myhost.com\' +CREATE USER u4_01292 IDENTIFIED WITH no_password HOST LIKE \'%.%.myhost.com\' +CREATE USER `u5_01292@%.host.com` IDENTIFIED WITH no_password HOST LIKE \'%.host.com\' +CREATE USER `u6_01292@%.host.com` IDENTIFIED WITH no_password HOST LIKE \'%.host.com\' +CREATE USER `u7_01292@%.host.com` IDENTIFIED WITH no_password HOST LIKE \'%.host.com\' +CREATE USER `u8_01292@%.otherhost.com` IDENTIFIED WITH no_password HOST LIKE \'%.otherhost.com\' +CREATE USER u1_01292 IDENTIFIED WITH no_password DEFAULT ROLE NONE SETTINGS readonly = 1 +CREATE USER u2_01292 IDENTIFIED WITH no_password DEFAULT ROLE r1_01292, r2_01292 SETTINGS readonly = 1 +CREATE USER u3_01292 IDENTIFIED WITH no_password HOST LIKE \'%.%.myhost.com\' DEFAULT ROLE r1_01292, r2_01292 +CREATE USER u4_01292 IDENTIFIED WITH no_password HOST LIKE \'%.%.myhost.com\' DEFAULT ROLE r1_01292, r2_01292 -- system.users u1_01292 local_directory plaintext_password {} [] ['localhost'] [] [] 1 [] [] u2_01292 local_directory no_password {} [] [] [] ['%.%.myhost.com'] 0 [] [] From c0e1095e66670289cc83d8d104bd508412029a85 Mon Sep 17 00:00:00 2001 From: Arthur Passos Date: Wed, 26 Jun 2024 17:06:38 -0300 Subject: [PATCH 126/844] initial fix for session_log, shall be refactored --- src/Access/Authentication.cpp | 23 +---------------------- src/Access/IAccessStorage.cpp | 27 +++++++++++++++------------ src/Access/IAccessStorage.h | 6 +++++- src/Access/LDAPAccessStorage.cpp | 2 +- src/Interpreters/Session.cpp | 4 +++- src/Interpreters/Session.h | 1 + src/Interpreters/SessionLog.cpp | 11 ++++++----- src/Interpreters/SessionLog.h | 5 +++-- 8 files changed, 35 insertions(+), 44 deletions(-) diff --git a/src/Access/Authentication.cpp b/src/Access/Authentication.cpp index 827f2e17e94..7174c66d4b1 100644 --- a/src/Access/Authentication.cpp +++ b/src/Access/Authentication.cpp @@ -249,27 +249,6 @@ namespace return false; } #endif - - [[noreturn]] void throwInvalidCredentialsException(const std::vector & authentication_methods) - { - std::string possible_authentication_types; - bool first = true; - - for (const auto & authentication_method : authentication_methods) - { - if (!first) - { - possible_authentication_types += ", "; - } - possible_authentication_types += toString(authentication_method.getType()); - first = false; - } - - throw Exception( - ErrorCodes::NOT_IMPLEMENTED, - "areCredentialsValid(): Invalid credentials provided, available authentication methods are {}", - possible_authentication_types); - } } bool Authentication::areCredentialsValid( @@ -311,7 +290,7 @@ bool Authentication::areCredentialsValid( if ([[maybe_unused]] const auto * always_allow_credentials = typeid_cast(&credentials)) return true; - throwInvalidCredentialsException(authentication_methods); + return false; } } diff --git a/src/Access/IAccessStorage.cpp b/src/Access/IAccessStorage.cpp index f1725dafdf4..5b29c07e046 100644 --- a/src/Access/IAccessStorage.cpp +++ b/src/Access/IAccessStorage.cpp @@ -521,7 +521,7 @@ std::optional IAccessStorage::authenticateImpl( { if (auto user = tryRead(*id)) { - AuthResult auth_result { .user_id = *id }; + AuthResult auth_result { .user_id = *id, .authentication_data = std::nullopt }; if (!isAddressAllowed(*user, address)) throwAddressNotAllowed(address); @@ -534,12 +534,15 @@ std::optional IAccessStorage::authenticateImpl( if (((auth_type == AuthenticationType::NO_PASSWORD) && !allow_no_password) || ((auth_type == AuthenticationType::PLAINTEXT_PASSWORD) && !allow_plaintext_password)) throwAuthenticationTypeNotAllowed(auth_type); + + if (areCredentialsValid(user->getName(), user->valid_until, auth_method, credentials, external_authenticators, auth_result.settings)) + { + auth_result.authentication_data = auth_method; + return auth_result; + } } - if (!areCredentialsValid(*user, credentials, external_authenticators, auth_result.settings)) - throwInvalidCredentials(); - - return auth_result; + throwInvalidCredentials(); } } @@ -549,9 +552,10 @@ std::optional IAccessStorage::authenticateImpl( return std::nullopt; } - bool IAccessStorage::areCredentialsValid( - const User & user, + const std::string user_name, + time_t valid_until, + const AuthenticationData & authentication_method, const Credentials & credentials, const ExternalAuthenticators & external_authenticators, SettingsChanges & settings) const @@ -559,21 +563,20 @@ bool IAccessStorage::areCredentialsValid( if (!credentials.isReady()) return false; - if (credentials.getUserName() != user.getName()) + if (credentials.getUserName() != user_name) return false; - if (user.valid_until) + if (valid_until) { const time_t now = std::chrono::system_clock::to_time_t(std::chrono::system_clock::now()); - if (now > user.valid_until) + if (now > valid_until) return false; } - return Authentication::areCredentialsValid(credentials, user.authentication_methods, external_authenticators, settings); + return Authentication::areCredentialsValid(credentials, {authentication_method}, external_authenticators, settings); } - bool IAccessStorage::isAddressAllowed(const User & user, const Poco::Net::IPAddress & address) const { return user.allowed_client_hosts.contains(address); diff --git a/src/Access/IAccessStorage.h b/src/Access/IAccessStorage.h index e88b1601f32..df2e8739f7a 100644 --- a/src/Access/IAccessStorage.h +++ b/src/Access/IAccessStorage.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include #include #include @@ -34,6 +35,7 @@ struct AuthResult UUID user_id; /// Session settings received from authentication server (if any) SettingsChanges settings{}; + std::optional authentication_data; }; /// Contains entities, i.e. instances of classes derived from IAccessEntity. @@ -227,7 +229,9 @@ protected: bool allow_no_password, bool allow_plaintext_password) const; virtual bool areCredentialsValid( - const User & user, + const std::string user_name, + time_t valid_until, + const AuthenticationData & authentication_method, const Credentials & credentials, const ExternalAuthenticators & external_authenticators, SettingsChanges & settings) const; diff --git a/src/Access/LDAPAccessStorage.cpp b/src/Access/LDAPAccessStorage.cpp index 89920878fef..f6435b0d899 100644 --- a/src/Access/LDAPAccessStorage.cpp +++ b/src/Access/LDAPAccessStorage.cpp @@ -504,7 +504,7 @@ std::optional LDAPAccessStorage::authenticateImpl( } if (id) - return AuthResult{ .user_id = *id }; + return AuthResult{ .user_id = *id, .authentication_data = AuthenticationData(AuthenticationType::LDAP) }; return std::nullopt; } diff --git a/src/Interpreters/Session.cpp b/src/Interpreters/Session.cpp index 8bacc3ac8fc..f56fcb2172f 100644 --- a/src/Interpreters/Session.cpp +++ b/src/Interpreters/Session.cpp @@ -361,6 +361,7 @@ void Session::authenticate(const Credentials & credentials_, const Poco::Net::So { auto auth_result = global_context->getAccessControl().authenticate(credentials_, address.host(), getClientInfo().getLastForwardedFor()); user_id = auth_result.user_id; + user_authenticated_with = auth_result.authentication_data; settings_from_auth_server = auth_result.settings; LOG_DEBUG(log, "{} Authenticated with global context as user {}", toString(auth_id), toString(*user_id)); @@ -705,7 +706,8 @@ void Session::recordLoginSuccess(ContextPtr login_context) const settings, access->getAccess(), getClientInfo(), - user); + user, + *user_authenticated_with); } notified_session_log_about_login = true; diff --git a/src/Interpreters/Session.h b/src/Interpreters/Session.h index f671969b1d4..6281df25de3 100644 --- a/src/Interpreters/Session.h +++ b/src/Interpreters/Session.h @@ -113,6 +113,7 @@ private: mutable UserPtr user; std::optional user_id; + std::optional user_authenticated_with; ContextMutablePtr session_context; mutable bool query_context_created = false; diff --git a/src/Interpreters/SessionLog.cpp b/src/Interpreters/SessionLog.cpp index 4832450af48..30c3bd570f3 100644 --- a/src/Interpreters/SessionLog.cpp +++ b/src/Interpreters/SessionLog.cpp @@ -208,13 +208,13 @@ void SessionLogElement::appendToBlock(MutableColumns & columns) const columns[i++]->insertData(auth_failure_reason.data(), auth_failure_reason.length()); } -// todo arthur fix this method void SessionLog::addLoginSuccess(const UUID & auth_id, const String & session_id, const Settings & settings, const ContextAccessPtr & access, const ClientInfo & client_info, - const UserPtr & login_user) + const UserPtr & login_user, + const AuthenticationData & user_authenticated_with) { SessionLogElement log_entry(auth_id, SESSION_LOGIN_SUCCESS); log_entry.client_info = client_info; @@ -222,10 +222,11 @@ void SessionLog::addLoginSuccess(const UUID & auth_id, if (login_user) { log_entry.user = login_user->getName(); - log_entry.user_identified_with = login_user->authentication_methods.back().getType(); -// log_entry.user_identified_with = login_user->auth_data.getType(); + log_entry.user_identified_with = user_authenticated_with.getType(); } - log_entry.external_auth_server = login_user ? login_user->authentication_methods.back().getLDAPServerName() : ""; + + log_entry.external_auth_server = user_authenticated_with.getLDAPServerName(); + log_entry.session_id = session_id; diff --git a/src/Interpreters/SessionLog.h b/src/Interpreters/SessionLog.h index 5bacb9677c0..2037d6c6861 100644 --- a/src/Interpreters/SessionLog.h +++ b/src/Interpreters/SessionLog.h @@ -22,6 +22,7 @@ class ContextAccess; struct User; using UserPtr = std::shared_ptr; using ContextAccessPtr = std::shared_ptr; +class AuthenticationData; /** A struct which will be inserted as row into session_log table. * @@ -71,14 +72,14 @@ struct SessionLogElement class SessionLog : public SystemLog { using SystemLog::SystemLog; - public: void addLoginSuccess(const UUID & auth_id, const String & session_id, const Settings & settings, const ContextAccessPtr & access, const ClientInfo & client_info, - const UserPtr & login_user); + const UserPtr & login_user, + const AuthenticationData & user_authenticated_with); void addLoginFailure(const UUID & auth_id, const ClientInfo & info, const std::optional & user, const Exception & reason); void addLogOut(const UUID & auth_id, const UserPtr & login_user, const ClientInfo & client_info); From 95f584c5d05ddd2f1642456cbedadcabee2f0b23 Mon Sep 17 00:00:00 2001 From: Arthur Passos Date: Wed, 26 Jun 2024 17:25:46 -0300 Subject: [PATCH 127/844] black --- src/Access/Authentication.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/Access/Authentication.cpp b/src/Access/Authentication.cpp index 7174c66d4b1..e38bd5abfe5 100644 --- a/src/Access/Authentication.cpp +++ b/src/Access/Authentication.cpp @@ -15,7 +15,6 @@ namespace DB { namespace ErrorCodes { - extern const int NOT_IMPLEMENTED; extern const int LOGICAL_ERROR; } From 678700f137f0932505a8c6101569b9c6d12b3764 Mon Sep 17 00:00:00 2001 From: Arthur Passos Date: Wed, 26 Jun 2024 22:26:46 -0300 Subject: [PATCH 128/844] update some other tests --- .../integration/test_backup_restore_new/test.py | 2 +- .../test_backup_restore_on_cluster/test.py | 2 +- .../test_enabling_access_management/test.py | 4 ++-- tests/integration/test_grant_and_revoke/test.py | 16 ++++++++-------- tests/integration/test_settings_profile/test.py | 4 ++-- tests/integration/test_user_valid_until/test.py | 14 +++++++------- 6 files changed, 21 insertions(+), 21 deletions(-) diff --git a/tests/integration/test_backup_restore_new/test.py b/tests/integration/test_backup_restore_new/test.py index d8662fad011..78b906053a7 100644 --- a/tests/integration/test_backup_restore_new/test.py +++ b/tests/integration/test_backup_restore_new/test.py @@ -1212,7 +1212,7 @@ def test_system_users_required_privileges(): instance.query("GRANT SELECT ON test.* TO u2 WITH GRANT OPTION") instance.query(f"RESTORE ALL FROM {backup_name}", user="u2") - assert instance.query("SHOW CREATE USER u1") == "CREATE USER u1 DEFAULT ROLE r1\n" + assert instance.query("SHOW CREATE USER u1") == "CREATE USER u1 IDENTIFIED WITH no_password DEFAULT ROLE r1\n" assert instance.query("SHOW GRANTS FOR u1") == TSV( ["GRANT SELECT ON test.* TO u1", "GRANT r1 TO u1"] ) diff --git a/tests/integration/test_backup_restore_on_cluster/test.py b/tests/integration/test_backup_restore_on_cluster/test.py index 700ed6f15f5..3a15bc1d947 100644 --- a/tests/integration/test_backup_restore_on_cluster/test.py +++ b/tests/integration/test_backup_restore_on_cluster/test.py @@ -769,7 +769,7 @@ def test_system_users(): ) assert ( - node1.query("SHOW CREATE USER u1") == "CREATE USER u1 SETTINGS custom_a = 123\n" + node1.query("SHOW CREATE USER u1") == "CREATE USER u1 IDENTIFIED WITH no_password SETTINGS custom_a = 123\n" ) assert node1.query("SHOW GRANTS FOR u1") == "GRANT SELECT ON default.tbl TO u1\n" diff --git a/tests/integration/test_enabling_access_management/test.py b/tests/integration/test_enabling_access_management/test.py index 0b8c1771a40..450ff267a5a 100644 --- a/tests/integration/test_enabling_access_management/test.py +++ b/tests/integration/test_enabling_access_management/test.py @@ -20,10 +20,10 @@ def started_cluster(): def test_enabling_access_management(): instance.query("CREATE USER Alex", user="default") assert ( - instance.query("SHOW CREATE USER Alex", user="default") == "CREATE USER Alex\n" + instance.query("SHOW CREATE USER Alex", user="default") == "CREATE USER Alex IDENTIFIED WITH no_password\n" ) assert ( - instance.query("SHOW CREATE USER Alex", user="readonly") == "CREATE USER Alex\n" + instance.query("SHOW CREATE USER Alex", user="readonly") == "CREATE USER Alex IDENTIFIED WITH no_password\n" ) assert "Not enough privileges" in instance.query_and_get_error( "SHOW CREATE USER Alex", user="xyz" diff --git a/tests/integration/test_grant_and_revoke/test.py b/tests/integration/test_grant_and_revoke/test.py index e533cced1e4..9748675dccf 100644 --- a/tests/integration/test_grant_and_revoke/test.py +++ b/tests/integration/test_grant_and_revoke/test.py @@ -144,7 +144,7 @@ def test_allowed_grantees(): instance.query("ALTER USER A GRANTEES ANY EXCEPT B") assert ( - instance.query("SHOW CREATE USER A") == "CREATE USER A GRANTEES ANY EXCEPT B\n" + instance.query("SHOW CREATE USER A") == "CREATE USER A IDENTIFIED WITH no_password GRANTEES ANY EXCEPT B\n" ) expected_error = "user `B` is not allowed as grantee" assert expected_error in instance.query_and_get_error( @@ -157,7 +157,7 @@ def test_allowed_grantees(): instance.query("REVOKE SELECT ON test.table FROM B", user="A") instance.query("ALTER USER A GRANTEES ANY") - assert instance.query("SHOW CREATE USER A") == "CREATE USER A\n" + assert instance.query("SHOW CREATE USER A") == "CREATE USER A IDENTIFIED WITH no_password\n" instance.query("GRANT SELECT ON test.table TO B", user="A") assert instance.query("SELECT * FROM test.table", user="B") == "1\t5\n2\t10\n" @@ -169,7 +169,7 @@ def test_allowed_grantees(): instance.query("CREATE USER C GRANTEES ANY EXCEPT C") assert ( - instance.query("SHOW CREATE USER C") == "CREATE USER C GRANTEES ANY EXCEPT C\n" + instance.query("SHOW CREATE USER C") == "CREATE USER C IDENTIFIED WITH no_password GRANTEES ANY EXCEPT C\n" ) instance.query("GRANT SELECT ON test.table TO C WITH GRANT OPTION") assert instance.query("SELECT * FROM test.table", user="C") == "1\t5\n2\t10\n" @@ -385,15 +385,15 @@ def test_introspection(): instance.query("GRANT CREATE ON *.* TO B WITH GRANT OPTION") assert instance.query("SHOW USERS") == TSV(["A", "B", "default"]) - assert instance.query("SHOW CREATE USERS A") == TSV(["CREATE USER A"]) - assert instance.query("SHOW CREATE USERS B") == TSV(["CREATE USER B"]) + assert instance.query("SHOW CREATE USERS A") == TSV(["CREATE USER A IDENTIFIED WITH no_password"]) + assert instance.query("SHOW CREATE USERS B") == TSV(["CREATE USER B IDENTIFIED WITH no_password"]) assert instance.query("SHOW CREATE USERS A,B") == TSV( - ["CREATE USER A", "CREATE USER B"] + ["CREATE USER A IDENTIFIED WITH no_password", "CREATE USER B IDENTIFIED WITH no_password"] ) assert instance.query("SHOW CREATE USERS") == TSV( [ - "CREATE USER A", - "CREATE USER B", + "CREATE USER A IDENTIFIED WITH no_password", + "CREATE USER B IDENTIFIED WITH no_password", "CREATE USER default IDENTIFIED WITH plaintext_password SETTINGS PROFILE `default`", ] ) diff --git a/tests/integration/test_settings_profile/test.py b/tests/integration/test_settings_profile/test.py index e5c0a072ff9..bfd93744832 100644 --- a/tests/integration/test_settings_profile/test.py +++ b/tests/integration/test_settings_profile/test.py @@ -128,7 +128,7 @@ def test_smoke(): instance.query("ALTER USER robin SETTINGS PROFILE xyz") assert ( instance.query("SHOW CREATE USER robin") - == "CREATE USER robin SETTINGS PROFILE `xyz`\n" + == "CREATE USER robin IDENTIFIED WITH no_password SETTINGS PROFILE `xyz`\n" ) assert ( instance.query( @@ -152,7 +152,7 @@ def test_smoke(): ] instance.query("ALTER USER robin SETTINGS NONE") - assert instance.query("SHOW CREATE USER robin") == "CREATE USER robin\n" + assert instance.query("SHOW CREATE USER robin") == "CREATE USER robin IDENTIFIED WITH no_password\n" assert ( instance.query( "SELECT value FROM system.settings WHERE name = 'max_memory_usage'", diff --git a/tests/integration/test_user_valid_until/test.py b/tests/integration/test_user_valid_until/test.py index d6d5bf8b18e..487a9a2744a 100644 --- a/tests/integration/test_user_valid_until/test.py +++ b/tests/integration/test_user_valid_until/test.py @@ -22,7 +22,7 @@ def test_basic(started_cluster): # 1. Without VALID UNTIL node.query("CREATE USER user_basic") - assert node.query("SHOW CREATE USER user_basic") == "CREATE USER user_basic\n" + assert node.query("SHOW CREATE USER user_basic") == "CREATE USER user_basic IDENTIFIED WITH no_password\n" assert node.query("SELECT 1", user="user_basic") == "1\n" # 2. With valid VALID UNTIL @@ -30,7 +30,7 @@ def test_basic(started_cluster): assert ( node.query("SHOW CREATE USER user_basic") - == "CREATE USER user_basic VALID UNTIL \\'2040-11-06 05:03:20\\'\n" + == "CREATE USER user_basic IDENTIFIED WITH no_password VALID UNTIL \\'2040-11-06 05:03:20\\'\n" ) assert node.query("SELECT 1", user="user_basic") == "1\n" @@ -39,7 +39,7 @@ def test_basic(started_cluster): assert ( node.query("SHOW CREATE USER user_basic") - == "CREATE USER user_basic VALID UNTIL \\'2010-11-06 05:03:20\\'\n" + == "CREATE USER user_basic IDENTIFIED WITH no_password VALID UNTIL \\'2010-11-06 05:03:20\\'\n" ) error = "Authentication failed" @@ -48,7 +48,7 @@ def test_basic(started_cluster): # 4. Reset VALID UNTIL node.query("ALTER USER user_basic VALID UNTIL 'infinity'") - assert node.query("SHOW CREATE USER user_basic") == "CREATE USER user_basic\n" + assert node.query("SHOW CREATE USER user_basic") == "CREATE USER user_basic IDENTIFIED WITH no_password\n" assert node.query("SELECT 1", user="user_basic") == "1\n" node.query("DROP USER user_basic") @@ -72,15 +72,15 @@ def test_details(started_cluster): assert ( node.query("SHOW CREATE USER user_details_infinity") - == "CREATE USER user_details_infinity\n" + == "CREATE USER user_details_infinity IDENTIFIED WITH no_password\n" ) # 2. Time only is not supported - node.query("CREATE USER user_details_time_only VALID UNTIL '22:03:40'") + node.query("CREATE USER user_details_time_only IDENTIFIED WITH no_password VALID UNTIL '22:03:40'") until_year = datetime.today().strftime("%Y") assert ( node.query("SHOW CREATE USER user_details_time_only") - == f"CREATE USER user_details_time_only VALID UNTIL \\'{until_year}-01-01 22:03:40\\'\n" + == f"CREATE USER user_details_time_only IDENTIFIED WITH no_password VALID UNTIL \\'{until_year}-01-01 22:03:40\\'\n" ) From ec0d426a6f4c6fe95bf4c185f6880bc26943050c Mon Sep 17 00:00:00 2001 From: Arthur Passos Date: Thu, 27 Jun 2024 08:52:20 -0300 Subject: [PATCH 129/844] try to fix black --- tests/integration/test_backup_restore_new/test.py | 5 ++++- .../test_backup_restore_on_cluster/test.py | 3 ++- .../test_enabling_access_management/test.py | 6 ++++-- tests/integration/test_grant_and_revoke/test.py | 11 ++++++++--- 4 files changed, 18 insertions(+), 7 deletions(-) diff --git a/tests/integration/test_backup_restore_new/test.py b/tests/integration/test_backup_restore_new/test.py index 78b906053a7..b330a509e05 100644 --- a/tests/integration/test_backup_restore_new/test.py +++ b/tests/integration/test_backup_restore_new/test.py @@ -1212,7 +1212,10 @@ def test_system_users_required_privileges(): instance.query("GRANT SELECT ON test.* TO u2 WITH GRANT OPTION") instance.query(f"RESTORE ALL FROM {backup_name}", user="u2") - assert instance.query("SHOW CREATE USER u1") == "CREATE USER u1 IDENTIFIED WITH no_password DEFAULT ROLE r1\n" + assert ( + instance.query("SHOW CREATE USER u1") + == "CREATE USER u1 IDENTIFIED WITH no_password DEFAULT ROLE r1\n" + ) assert instance.query("SHOW GRANTS FOR u1") == TSV( ["GRANT SELECT ON test.* TO u1", "GRANT r1 TO u1"] ) diff --git a/tests/integration/test_backup_restore_on_cluster/test.py b/tests/integration/test_backup_restore_on_cluster/test.py index 3a15bc1d947..5415820bed6 100644 --- a/tests/integration/test_backup_restore_on_cluster/test.py +++ b/tests/integration/test_backup_restore_on_cluster/test.py @@ -769,7 +769,8 @@ def test_system_users(): ) assert ( - node1.query("SHOW CREATE USER u1") == "CREATE USER u1 IDENTIFIED WITH no_password SETTINGS custom_a = 123\n" + node1.query("SHOW CREATE USER u1") + == "CREATE USER u1 IDENTIFIED WITH no_password SETTINGS custom_a = 123\n" ) assert node1.query("SHOW GRANTS FOR u1") == "GRANT SELECT ON default.tbl TO u1\n" diff --git a/tests/integration/test_enabling_access_management/test.py b/tests/integration/test_enabling_access_management/test.py index 450ff267a5a..821e598d385 100644 --- a/tests/integration/test_enabling_access_management/test.py +++ b/tests/integration/test_enabling_access_management/test.py @@ -20,10 +20,12 @@ def started_cluster(): def test_enabling_access_management(): instance.query("CREATE USER Alex", user="default") assert ( - instance.query("SHOW CREATE USER Alex", user="default") == "CREATE USER Alex IDENTIFIED WITH no_password\n" + instance.query("SHOW CREATE USER Alex", user="default") + == "CREATE USER Alex IDENTIFIED WITH no_password\n" ) assert ( - instance.query("SHOW CREATE USER Alex", user="readonly") == "CREATE USER Alex IDENTIFIED WITH no_password\n" + instance.query("SHOW CREATE USER Alex", user="readonly") + == "CREATE USER Alex IDENTIFIED WITH no_password\n" ) assert "Not enough privileges" in instance.query_and_get_error( "SHOW CREATE USER Alex", user="xyz" diff --git a/tests/integration/test_grant_and_revoke/test.py b/tests/integration/test_grant_and_revoke/test.py index 9748675dccf..a1ce37a5d3d 100644 --- a/tests/integration/test_grant_and_revoke/test.py +++ b/tests/integration/test_grant_and_revoke/test.py @@ -144,7 +144,8 @@ def test_allowed_grantees(): instance.query("ALTER USER A GRANTEES ANY EXCEPT B") assert ( - instance.query("SHOW CREATE USER A") == "CREATE USER A IDENTIFIED WITH no_password GRANTEES ANY EXCEPT B\n" + instance.query("SHOW CREATE USER A") + == "CREATE USER A IDENTIFIED WITH no_password GRANTEES ANY EXCEPT B\n" ) expected_error = "user `B` is not allowed as grantee" assert expected_error in instance.query_and_get_error( @@ -157,7 +158,10 @@ def test_allowed_grantees(): instance.query("REVOKE SELECT ON test.table FROM B", user="A") instance.query("ALTER USER A GRANTEES ANY") - assert instance.query("SHOW CREATE USER A") == "CREATE USER A IDENTIFIED WITH no_password\n" + assert ( + instance.query("SHOW CREATE USER A") + == "CREATE USER A IDENTIFIED WITH no_password\n" + ) instance.query("GRANT SELECT ON test.table TO B", user="A") assert instance.query("SELECT * FROM test.table", user="B") == "1\t5\n2\t10\n" @@ -169,7 +173,8 @@ def test_allowed_grantees(): instance.query("CREATE USER C GRANTEES ANY EXCEPT C") assert ( - instance.query("SHOW CREATE USER C") == "CREATE USER C IDENTIFIED WITH no_password GRANTEES ANY EXCEPT C\n" + instance.query("SHOW CREATE USER C") + == "CREATE USER C IDENTIFIED WITH no_password GRANTEES ANY EXCEPT C\n" ) instance.query("GRANT SELECT ON test.table TO C WITH GRANT OPTION") assert instance.query("SELECT * FROM test.table", user="C") == "1\t5\n2\t10\n" From f69a19d01d18848e08ff2e7b81c064b9da0da160 Mon Sep 17 00:00:00 2001 From: Arthur Passos Date: Thu, 27 Jun 2024 09:20:12 -0300 Subject: [PATCH 130/844] try to fix black --- tests/integration/test_grant_and_revoke/test.py | 15 ++++++++++++--- tests/integration/test_settings_profile/test.py | 5 ++++- tests/integration/test_user_valid_until/test.py | 14 +++++++++++--- 3 files changed, 27 insertions(+), 7 deletions(-) diff --git a/tests/integration/test_grant_and_revoke/test.py b/tests/integration/test_grant_and_revoke/test.py index a1ce37a5d3d..9fc3dfc249a 100644 --- a/tests/integration/test_grant_and_revoke/test.py +++ b/tests/integration/test_grant_and_revoke/test.py @@ -390,10 +390,19 @@ def test_introspection(): instance.query("GRANT CREATE ON *.* TO B WITH GRANT OPTION") assert instance.query("SHOW USERS") == TSV(["A", "B", "default"]) - assert instance.query("SHOW CREATE USERS A") == TSV(["CREATE USER A IDENTIFIED WITH no_password"]) - assert instance.query("SHOW CREATE USERS B") == TSV(["CREATE USER B IDENTIFIED WITH no_password"]) + assert ( + instance.query("SHOW CREATE USERS A") + == TSV(["CREATE USER A IDENTIFIED WITH no_password"]) + ) + assert ( + instance.query("SHOW CREATE USERS B") + == TSV(["CREATE USER B IDENTIFIED WITH no_password"]) + ) assert instance.query("SHOW CREATE USERS A,B") == TSV( - ["CREATE USER A IDENTIFIED WITH no_password", "CREATE USER B IDENTIFIED WITH no_password"] + [ + "CREATE USER A IDENTIFIED WITH no_password", + "CREATE USER B IDENTIFIED WITH no_password" + ] ) assert instance.query("SHOW CREATE USERS") == TSV( [ diff --git a/tests/integration/test_settings_profile/test.py b/tests/integration/test_settings_profile/test.py index bfd93744832..888f9f55b20 100644 --- a/tests/integration/test_settings_profile/test.py +++ b/tests/integration/test_settings_profile/test.py @@ -152,7 +152,10 @@ def test_smoke(): ] instance.query("ALTER USER robin SETTINGS NONE") - assert instance.query("SHOW CREATE USER robin") == "CREATE USER robin IDENTIFIED WITH no_password\n" + assert ( + instance.query("SHOW CREATE USER robin") + == "CREATE USER robin IDENTIFIED WITH no_password\n" + ) assert ( instance.query( "SELECT value FROM system.settings WHERE name = 'max_memory_usage'", diff --git a/tests/integration/test_user_valid_until/test.py b/tests/integration/test_user_valid_until/test.py index 487a9a2744a..c43c6a2e52a 100644 --- a/tests/integration/test_user_valid_until/test.py +++ b/tests/integration/test_user_valid_until/test.py @@ -22,7 +22,10 @@ def test_basic(started_cluster): # 1. Without VALID UNTIL node.query("CREATE USER user_basic") - assert node.query("SHOW CREATE USER user_basic") == "CREATE USER user_basic IDENTIFIED WITH no_password\n" + assert ( + node.query("SHOW CREATE USER user_basic") + == "CREATE USER user_basic IDENTIFIED WITH no_password\n" + ) assert node.query("SELECT 1", user="user_basic") == "1\n" # 2. With valid VALID UNTIL @@ -48,7 +51,10 @@ def test_basic(started_cluster): # 4. Reset VALID UNTIL node.query("ALTER USER user_basic VALID UNTIL 'infinity'") - assert node.query("SHOW CREATE USER user_basic") == "CREATE USER user_basic IDENTIFIED WITH no_password\n" + assert ( + node.query("SHOW CREATE USER user_basic") + == "CREATE USER user_basic IDENTIFIED WITH no_password\n" + ) assert node.query("SELECT 1", user="user_basic") == "1\n" node.query("DROP USER user_basic") @@ -76,7 +82,9 @@ def test_details(started_cluster): ) # 2. Time only is not supported - node.query("CREATE USER user_details_time_only IDENTIFIED WITH no_password VALID UNTIL '22:03:40'") + node.query( + "CREATE USER user_details_time_only IDENTIFIED WITH no_password VALID UNTIL '22:03:40'" + ) until_year = datetime.today().strftime("%Y") From 21de3e29612a1fc929168a048a9bfbe096462868 Mon Sep 17 00:00:00 2001 From: Arthur Passos Date: Thu, 27 Jun 2024 10:05:37 -0300 Subject: [PATCH 131/844] try to fix black --- tests/integration/test_grant_and_revoke/test.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/tests/integration/test_grant_and_revoke/test.py b/tests/integration/test_grant_and_revoke/test.py index 9fc3dfc249a..8de78163520 100644 --- a/tests/integration/test_grant_and_revoke/test.py +++ b/tests/integration/test_grant_and_revoke/test.py @@ -390,18 +390,16 @@ def test_introspection(): instance.query("GRANT CREATE ON *.* TO B WITH GRANT OPTION") assert instance.query("SHOW USERS") == TSV(["A", "B", "default"]) - assert ( - instance.query("SHOW CREATE USERS A") - == TSV(["CREATE USER A IDENTIFIED WITH no_password"]) + assert instance.query("SHOW CREATE USERS A") == TSV( + ["CREATE USER A IDENTIFIED WITH no_password"] ) - assert ( - instance.query("SHOW CREATE USERS B") - == TSV(["CREATE USER B IDENTIFIED WITH no_password"]) + assert instance.query("SHOW CREATE USERS B") == TSV( + ["CREATE USER B IDENTIFIED WITH no_password"] ) assert instance.query("SHOW CREATE USERS A,B") == TSV( [ "CREATE USER A IDENTIFIED WITH no_password", - "CREATE USER B IDENTIFIED WITH no_password" + "CREATE USER B IDENTIFIED WITH no_password", ] ) assert instance.query("SHOW CREATE USERS") == TSV( From b219b9938044da68d773c80439e4a47b504ec3a7 Mon Sep 17 00:00:00 2001 From: Arthur Passos Date: Thu, 27 Jun 2024 11:50:16 -0300 Subject: [PATCH 132/844] small refactor --- src/Access/Authentication.cpp | 201 ++++++++++------------------- src/Access/Authentication.h | 2 +- src/Parsers/tests/gtest_Parser.cpp | 1 - 3 files changed, 67 insertions(+), 137 deletions(-) diff --git a/src/Access/Authentication.cpp b/src/Access/Authentication.cpp index e38bd5abfe5..cb67924475b 100644 --- a/src/Access/Authentication.cpp +++ b/src/Access/Authentication.cpp @@ -83,176 +83,107 @@ namespace } #endif - std::vector getAuthenticationMethodsOfType( - const std::vector & authentication_methods, - const std::unordered_set & types) - { - std::vector authentication_methods_of_type; - - for (const auto & authentication_method : authentication_methods) - { - if (types.contains(authentication_method.getType())) - { - authentication_methods_of_type.push_back(authentication_method); - } - } - - return authentication_methods_of_type; - } - bool checkKerberosAuthentication( const GSSAcceptorContext * gss_acceptor_context, - const std::vector & authentication_methods, + const AuthenticationData & authentication_method, const ExternalAuthenticators & external_authenticators) { - auto kerberos_authentication_methods = getAuthenticationMethodsOfType(authentication_methods, {AuthenticationType::KERBEROS}); - for (const auto & kerberos_authentication : kerberos_authentication_methods) - { - if (external_authenticators.checkKerberosCredentials(kerberos_authentication.getKerberosRealm(), *gss_acceptor_context)) - { - return true; - } - } - return false; + return authentication_method.getType() == AuthenticationType::KERBEROS + && external_authenticators.checkKerberosCredentials(authentication_method.getKerberosRealm(), *gss_acceptor_context); } bool checkMySQLAuthentication( const MySQLNative41Credentials * mysql_credentials, - const std::vector & authentication_methods) + const AuthenticationData & authentication_method) { - auto mysql_authentication_methods = getAuthenticationMethodsOfType( - authentication_methods, - {AuthenticationType::PLAINTEXT_PASSWORD, AuthenticationType::DOUBLE_SHA1_PASSWORD}); - - for (const auto & mysql_authentication_method : mysql_authentication_methods) + switch (authentication_method.getType()) { - switch (mysql_authentication_method.getType()) - { - case AuthenticationType::PLAINTEXT_PASSWORD: - if (checkPasswordPlainTextMySQL( - mysql_credentials->getScramble(), mysql_credentials->getScrambledPassword(), mysql_authentication_method.getPasswordHashBinary())) - { - return true; - } - break; - - case AuthenticationType::DOUBLE_SHA1_PASSWORD: - if (checkPasswordDoubleSHA1MySQL( - mysql_credentials->getScramble(), - mysql_credentials->getScrambledPassword(), - mysql_authentication_method.getPasswordHashBinary())) - { - return true; - } - break; - default: - throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid MySQL authentication type"); - } + case AuthenticationType::PLAINTEXT_PASSWORD: + return checkPasswordPlainTextMySQL( + mysql_credentials->getScramble(), + mysql_credentials->getScrambledPassword(), + authentication_method.getPasswordHashBinary()); + case AuthenticationType::DOUBLE_SHA1_PASSWORD: + return checkPasswordDoubleSHA1MySQL( + mysql_credentials->getScramble(), + mysql_credentials->getScrambledPassword(), + authentication_method.getPasswordHashBinary()); + default: + return false; } - return false; } bool checkBasicAuthentication( const BasicCredentials * basic_credentials, - const std::vector & authentication_methods, + const AuthenticationData & authentication_method, const ExternalAuthenticators & external_authenticators, SettingsChanges & settings) { - auto basic_credentials_authentication_methods = getAuthenticationMethodsOfType( - authentication_methods, - {AuthenticationType::NO_PASSWORD, AuthenticationType::PLAINTEXT_PASSWORD, AuthenticationType::SHA256_PASSWORD, - AuthenticationType::DOUBLE_SHA1_PASSWORD, AuthenticationType::LDAP, AuthenticationType::BCRYPT_PASSWORD, - AuthenticationType::HTTP}); - - for (const auto & auth_method : basic_credentials_authentication_methods) + switch (authentication_method.getType()) { - switch (auth_method.getType()) + case AuthenticationType::NO_PASSWORD: { - case AuthenticationType::NO_PASSWORD: - { - return true; - } - case AuthenticationType::PLAINTEXT_PASSWORD: - if (checkPasswordPlainText(basic_credentials->getPassword(), auth_method.getPasswordHashBinary())) - { - return true; - } - break; - - case AuthenticationType::SHA256_PASSWORD: - if (checkPasswordSHA256(basic_credentials->getPassword(), auth_method.getPasswordHashBinary(), auth_method.getSalt())) - { - return true; - } - break; - case AuthenticationType::DOUBLE_SHA1_PASSWORD: - if (checkPasswordDoubleSHA1(basic_credentials->getPassword(), auth_method.getPasswordHashBinary())) - { - return true; - } - break; - case AuthenticationType::LDAP: - if (external_authenticators.checkLDAPCredentials(auth_method.getLDAPServerName(), *basic_credentials)) - { - return true; - } - break; - case AuthenticationType::BCRYPT_PASSWORD: - if (checkPasswordBcrypt(basic_credentials->getPassword(), auth_method.getPasswordHashBinary())) - { - return true; - } - break; - case AuthenticationType::HTTP: - if (auth_method.getHTTPAuthenticationScheme() == HTTPAuthenticationScheme::BASIC) - { - return external_authenticators.checkHTTPBasicCredentials( - auth_method.getHTTPAuthenticationServerName(), *basic_credentials, settings); - } - break; - default: - throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid basic authentication type"); + return true; } + case AuthenticationType::PLAINTEXT_PASSWORD: + { + return checkPasswordPlainText(basic_credentials->getPassword(), authentication_method.getPasswordHashBinary()); + } + case AuthenticationType::SHA256_PASSWORD: + { + return checkPasswordSHA256( + basic_credentials->getPassword(), authentication_method.getPasswordHashBinary(), authentication_method.getSalt()); + } + case AuthenticationType::DOUBLE_SHA1_PASSWORD: + { + return checkPasswordDoubleSHA1(basic_credentials->getPassword(), authentication_method.getPasswordHashBinary()); + } + case AuthenticationType::LDAP: + { + return external_authenticators.checkLDAPCredentials(authentication_method.getLDAPServerName(), *basic_credentials); + } + case AuthenticationType::BCRYPT_PASSWORD: + { + return checkPasswordBcrypt(basic_credentials->getPassword(), authentication_method.getPasswordHashBinary()); + } + case AuthenticationType::HTTP: + { + if (authentication_method.getHTTPAuthenticationScheme() == HTTPAuthenticationScheme::BASIC) + { + return external_authenticators.checkHTTPBasicCredentials( + authentication_method.getHTTPAuthenticationServerName(), *basic_credentials, settings); + } + break; + } + default: + break; } + return false; } bool checkSSLCertificateAuthentication( const SSLCertificateCredentials * ssl_certificate_credentials, - const std::vector & authentication_methods) + const AuthenticationData & authentication_method) { - const auto ssl_certificate_authentication_methods = getAuthenticationMethodsOfType(authentication_methods, {AuthenticationType::SSL_CERTIFICATE}); - for (const auto & auth_method : ssl_certificate_authentication_methods) - { - if (auth_method.getSSLCertificateCommonNames().contains(ssl_certificate_credentials->getCommonName())) - { - return true; - } - } - return false; + return AuthenticationType::SSL_CERTIFICATE == authentication_method.getType() + && authentication_method.getSSLCertificateCommonNames().contains(ssl_certificate_credentials->getCommonName()); } #if USE_SSH bool checkSshAuthentication( const SshCredentials * ssh_credentials, - const std::vector & authentication_methods) + const AuthenticationData & authentication_method) { - const auto ssh_authentication_methods = getAuthenticationMethodsOfType(authentication_methods, {AuthenticationType::SSH_KEY}); - for (const auto & auth_method : ssh_authentication_methods) - { - if (checkSshSignature(auth_method.getSSHKeys(), ssh_credentials->getSignature(), ssh_credentials->getOriginal())) - { - return true; - } - } - return false; + return AuthenticationType::SSH_KEY == authentication_method.getType() + && checkSshSignature(authentication_method.getSSHKeys(), ssh_credentials->getSignature(), ssh_credentials->getOriginal()); } #endif } bool Authentication::areCredentialsValid( const Credentials & credentials, - const std::vector & authentication_methods, + const AuthenticationData & authentication_method, const ExternalAuthenticators & external_authenticators, SettingsChanges & settings) { @@ -261,28 +192,28 @@ bool Authentication::areCredentialsValid( if (const auto * gss_acceptor_context = typeid_cast(&credentials)) { - return checkKerberosAuthentication(gss_acceptor_context, authentication_methods, external_authenticators); + return checkKerberosAuthentication(gss_acceptor_context, authentication_method, external_authenticators); } if (const auto * mysql_credentials = typeid_cast(&credentials)) { - return checkMySQLAuthentication(mysql_credentials, authentication_methods); + return checkMySQLAuthentication(mysql_credentials, authentication_method); } if (const auto * basic_credentials = typeid_cast(&credentials)) { - return checkBasicAuthentication(basic_credentials, authentication_methods, external_authenticators, settings); + return checkBasicAuthentication(basic_credentials, authentication_method, external_authenticators, settings); } if (const auto * ssl_certificate_credentials = typeid_cast(&credentials)) { - return checkSSLCertificateAuthentication(ssl_certificate_credentials, authentication_methods); + return checkSSLCertificateAuthentication(ssl_certificate_credentials, authentication_method); } #if USE_SSH if (const auto * ssh_credentials = typeid_cast(&credentials)) { - return checkSshAuthentication(ssh_credentials, authentication_methods); + return checkSshAuthentication(ssh_credentials, authentication_method); } #endif diff --git a/src/Access/Authentication.h b/src/Access/Authentication.h index 13bd487f21a..e895001304d 100644 --- a/src/Access/Authentication.h +++ b/src/Access/Authentication.h @@ -24,7 +24,7 @@ struct Authentication /// returned by the authentication server static bool areCredentialsValid( const Credentials & credentials, - const std::vector & authentication_methods, + const AuthenticationData & authentication_method, const ExternalAuthenticators & external_authenticators, SettingsChanges & settings); diff --git a/src/Parsers/tests/gtest_Parser.cpp b/src/Parsers/tests/gtest_Parser.cpp index b88c8f56029..bd5d39773dd 100644 --- a/src/Parsers/tests/gtest_Parser.cpp +++ b/src/Parsers/tests/gtest_Parser.cpp @@ -87,7 +87,6 @@ TEST_P(ParserTest, parseQuery) { if (input_text.starts_with("ATTACH")) { - // todo arthur auto salt = (dynamic_cast(ast.get())->auth_data.back())->getSalt().value_or(""); EXPECT_TRUE(re2::RE2::FullMatch(salt, expected_ast)); } From a948334b8b93a8a7814a4396aa217d735149edac Mon Sep 17 00:00:00 2001 From: Arthur Passos Date: Thu, 27 Jun 2024 11:53:42 -0300 Subject: [PATCH 133/844] minor --- src/Access/IAccessStorage.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Access/IAccessStorage.cpp b/src/Access/IAccessStorage.cpp index 5b29c07e046..e5512a5b7c8 100644 --- a/src/Access/IAccessStorage.cpp +++ b/src/Access/IAccessStorage.cpp @@ -574,7 +574,7 @@ bool IAccessStorage::areCredentialsValid( return false; } - return Authentication::areCredentialsValid(credentials, {authentication_method}, external_authenticators, settings); + return Authentication::areCredentialsValid(credentials, authentication_method, external_authenticators, settings); } bool IAccessStorage::isAddressAllowed(const User & user, const Poco::Net::IPAddress & address) const From 50da0cb7324ab7bfcf056ab5f2219a00c7cbac23 Mon Sep 17 00:00:00 2001 From: Arthur Passos Date: Thu, 27 Jun 2024 12:14:08 -0300 Subject: [PATCH 134/844] remove definition of logical_error from auth.cpp --- src/Access/Authentication.cpp | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/Access/Authentication.cpp b/src/Access/Authentication.cpp index cb67924475b..be11bf7ba27 100644 --- a/src/Access/Authentication.cpp +++ b/src/Access/Authentication.cpp @@ -13,10 +13,6 @@ namespace DB { -namespace ErrorCodes -{ - extern const int LOGICAL_ERROR; -} namespace { From 43a91947390ebc90d61500aae841b8d3aa58965f Mon Sep 17 00:00:00 2001 From: Arthur Passos Date: Thu, 27 Jun 2024 14:21:29 -0300 Subject: [PATCH 135/844] remove optional from auth_result --- src/Access/IAccessStorage.cpp | 2 +- src/Access/IAccessStorage.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Access/IAccessStorage.cpp b/src/Access/IAccessStorage.cpp index e5512a5b7c8..5d5a9b9ce18 100644 --- a/src/Access/IAccessStorage.cpp +++ b/src/Access/IAccessStorage.cpp @@ -521,7 +521,7 @@ std::optional IAccessStorage::authenticateImpl( { if (auto user = tryRead(*id)) { - AuthResult auth_result { .user_id = *id, .authentication_data = std::nullopt }; + AuthResult auth_result { .user_id = *id }; if (!isAddressAllowed(*user, address)) throwAddressNotAllowed(address); diff --git a/src/Access/IAccessStorage.h b/src/Access/IAccessStorage.h index df2e8739f7a..353d76536fa 100644 --- a/src/Access/IAccessStorage.h +++ b/src/Access/IAccessStorage.h @@ -35,7 +35,7 @@ struct AuthResult UUID user_id; /// Session settings received from authentication server (if any) SettingsChanges settings{}; - std::optional authentication_data; + AuthenticationData authentication_data {}; }; /// Contains entities, i.e. instances of classes derived from IAccessEntity. From e1119587621f0b4cc898e48ddd61341c22f31cdb Mon Sep 17 00:00:00 2001 From: Arthur Passos Date: Thu, 27 Jun 2024 14:33:28 -0300 Subject: [PATCH 136/844] remmovev optional from auth data in session object --- src/Interpreters/Session.cpp | 2 +- src/Interpreters/Session.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Interpreters/Session.cpp b/src/Interpreters/Session.cpp index f56fcb2172f..cdca3f5c049 100644 --- a/src/Interpreters/Session.cpp +++ b/src/Interpreters/Session.cpp @@ -707,7 +707,7 @@ void Session::recordLoginSuccess(ContextPtr login_context) const access->getAccess(), getClientInfo(), user, - *user_authenticated_with); + user_authenticated_with); } notified_session_log_about_login = true; diff --git a/src/Interpreters/Session.h b/src/Interpreters/Session.h index 6281df25de3..ab4bc53b6f1 100644 --- a/src/Interpreters/Session.h +++ b/src/Interpreters/Session.h @@ -113,7 +113,7 @@ private: mutable UserPtr user; std::optional user_id; - std::optional user_authenticated_with; + AuthenticationData user_authenticated_with; ContextMutablePtr session_context; mutable bool query_context_created = false; From eb8a18304f33659766c5590c0887991463d30693 Mon Sep 17 00:00:00 2001 From: Arthur Passos Date: Thu, 27 Jun 2024 17:08:14 -0300 Subject: [PATCH 137/844] rename somem stuff --- src/Client/ClientBase.cpp | 4 +-- .../Access/InterpreterCreateUserQuery.cpp | 30 +++++++++---------- ...InterpreterShowCreateAccessEntityQuery.cpp | 2 +- src/Parsers/Access/ASTCreateUserQuery.cpp | 22 +++++++------- src/Parsers/Access/ASTCreateUserQuery.h | 2 +- src/Parsers/Access/ParserCreateUserQuery.cpp | 4 +-- src/Parsers/tests/gtest_Parser.cpp | 2 +- src/Parsers/tests/gtest_common.cpp | 2 +- src/Storages/System/StorageSystemUsers.cpp | 1 + 9 files changed, 35 insertions(+), 34 deletions(-) diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index 10d77d853f5..3024ee64aea 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -1912,9 +1912,9 @@ void ClientBase::processParsedSingleQuery(const String & full_query, const Strin if (const auto * create_user_query = parsed_query->as()) { - if (!create_user_query->attach && !create_user_query->auth_data.empty()) + if (!create_user_query->attach && !create_user_query->authentication_methods.empty()) { - for (const auto & authentication_method : create_user_query->auth_data) + for (const auto & authentication_method : create_user_query->authentication_methods) { auto password = authentication_method->getPassword(); diff --git a/src/Interpreters/Access/InterpreterCreateUserQuery.cpp b/src/Interpreters/Access/InterpreterCreateUserQuery.cpp index a1f8eaa4cb0..5d0d2c434bd 100644 --- a/src/Interpreters/Access/InterpreterCreateUserQuery.cpp +++ b/src/Interpreters/Access/InterpreterCreateUserQuery.cpp @@ -33,7 +33,7 @@ namespace void updateUserFromQueryImpl( User & user, const ASTCreateUserQuery & query, - const std::vector auth_data, + const std::vector authentication_methods, const std::shared_ptr & override_name, const std::optional & override_default_roles, const std::optional & override_settings, @@ -52,13 +52,13 @@ namespace else if (query.names->size() == 1) user.setName(query.names->front()->toString()); - if (!query.attach && !query.alter && auth_data.empty() && !allow_implicit_no_password) + if (!query.attach && !query.alter && authentication_methods.empty() && !allow_implicit_no_password) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Authentication type NO_PASSWORD must " "be explicitly specified, check the setting allow_implicit_no_password " "in the server configuration"); - if (user.authentication_methods.empty() && auth_data.empty()) + if (user.authentication_methods.empty() && authentication_methods.empty()) { user.authentication_methods.emplace_back(); } @@ -68,7 +68,7 @@ namespace user.authentication_methods.clear(); } - for (const auto & authentication_method : auth_data) + for (const auto & authentication_method : authentication_methods) { user.authentication_methods.emplace_back(authentication_method); } @@ -152,12 +152,12 @@ BlockIO InterpreterCreateUserQuery::execute() bool no_password_allowed = access_control.isNoPasswordAllowed(); bool plaintext_password_allowed = access_control.isPlaintextPasswordAllowed(); - std::vector auth_data; - if (!query.auth_data.empty()) + std::vector authentication_methods; + if (!query.authentication_methods.empty()) { - for (const auto & authentication_method_ast : query.auth_data) + for (const auto & authentication_method_ast : query.authentication_methods) { - auth_data.push_back(AuthenticationData::fromAST(*authentication_method_ast, getContext(), !query.attach)); + authentication_methods.push_back(AuthenticationData::fromAST(*authentication_method_ast, getContext(), !query.attach)); } } @@ -224,7 +224,7 @@ BlockIO InterpreterCreateUserQuery::execute() { auto updated_user = typeid_cast>(entity->clone()); updateUserFromQueryImpl( - *updated_user, query, auth_data, {}, default_roles_from_query, settings_from_query, grantees_from_query, + *updated_user, query, authentication_methods, {}, default_roles_from_query, settings_from_query, grantees_from_query, valid_until, query.reset_authentication_methods_to_new, query.replace_authentication_methods, implicit_no_password_allowed, no_password_allowed, plaintext_password_allowed); return updated_user; @@ -245,7 +245,7 @@ BlockIO InterpreterCreateUserQuery::execute() { auto new_user = std::make_shared(); updateUserFromQueryImpl( - *new_user, query, auth_data, name, default_roles_from_query, settings_from_query, RolesOrUsersSet::AllTag{}, + *new_user, query, authentication_methods, name, default_roles_from_query, settings_from_query, RolesOrUsersSet::AllTag{}, valid_until, query.reset_authentication_methods_to_new, query.replace_authentication_methods, implicit_no_password_allowed, no_password_allowed, plaintext_password_allowed); new_users.emplace_back(std::move(new_user)); @@ -286,18 +286,18 @@ BlockIO InterpreterCreateUserQuery::execute() void InterpreterCreateUserQuery::updateUserFromQuery(User & user, const ASTCreateUserQuery & query, bool allow_no_password, bool allow_plaintext_password) { - std::vector auth_data; - if (!query.auth_data.empty()) + std::vector authentication_methods; + if (!query.authentication_methods.empty()) { - for (const auto & authentication_method_ast : query.auth_data) + for (const auto & authentication_method_ast : query.authentication_methods) { - auth_data.emplace_back(AuthenticationData::fromAST(*authentication_method_ast, {}, !query.attach)); + authentication_methods.emplace_back(AuthenticationData::fromAST(*authentication_method_ast, {}, !query.attach)); } } updateUserFromQueryImpl(user, query, - auth_data, + authentication_methods, {}, {}, {}, diff --git a/src/Interpreters/Access/InterpreterShowCreateAccessEntityQuery.cpp b/src/Interpreters/Access/InterpreterShowCreateAccessEntityQuery.cpp index e49cf68727c..ef6ddf1866d 100644 --- a/src/Interpreters/Access/InterpreterShowCreateAccessEntityQuery.cpp +++ b/src/Interpreters/Access/InterpreterShowCreateAccessEntityQuery.cpp @@ -66,7 +66,7 @@ namespace for (const auto & authentication_method : user.authentication_methods) { - query->auth_data.push_back(authentication_method.toAST()); + query->authentication_methods.push_back(authentication_method.toAST()); } if (user.valid_until) diff --git a/src/Parsers/Access/ASTCreateUserQuery.cpp b/src/Parsers/Access/ASTCreateUserQuery.cpp index 86b91fcb6f4..c6e78955711 100644 --- a/src/Parsers/Access/ASTCreateUserQuery.cpp +++ b/src/Parsers/Access/ASTCreateUserQuery.cpp @@ -18,16 +18,16 @@ namespace << quoteString(new_name); } - void formatAuthenticationData(const std::vector> & auth_data, const IAST::FormatSettings & settings) + void formatAuthenticationData(const std::vector> & authentication_methods, const IAST::FormatSettings & settings) { - auth_data[0]->format(settings); + authentication_methods[0]->format(settings); auto settings_with_additional_authentication_method = settings; settings_with_additional_authentication_method.additional_authentication_method = true; - for (auto i = 1u; i < auth_data.size(); i++) + for (auto i = 1u; i < authentication_methods.size(); i++) { - auth_data[i]->format(settings_with_additional_authentication_method); + authentication_methods[i]->format(settings_with_additional_authentication_method); } } @@ -172,7 +172,7 @@ ASTPtr ASTCreateUserQuery::clone() const { auto res = std::make_shared(*this); res->children.clear(); - res->auth_data.clear(); + res->authentication_methods.clear(); if (names) res->names = std::static_pointer_cast(names->clone()); @@ -189,20 +189,20 @@ ASTPtr ASTCreateUserQuery::clone() const if (settings) res->settings = std::static_pointer_cast(settings->clone()); - if (auth_data.empty()) + if (authentication_methods.empty()) { auto ast = std::make_shared(); ast->type = AuthenticationType::NO_PASSWORD; - res->auth_data.push_back(ast); + res->authentication_methods.push_back(ast); res->children.push_back(ast); } else { - for (const auto & authentication_method : auth_data) + for (const auto & authentication_method : authentication_methods) { auto ast_clone = std::static_pointer_cast(authentication_method->clone()); - res->auth_data.push_back(ast_clone); + res->authentication_methods.push_back(ast_clone); res->children.push_back(ast_clone); } } @@ -243,8 +243,8 @@ void ASTCreateUserQuery::formatImpl(const FormatSettings & format, FormatState & if (new_name) formatRenameTo(*new_name, format); - if (!auth_data.empty()) - formatAuthenticationData(auth_data, format); + if (!authentication_methods.empty()) + formatAuthenticationData(authentication_methods, format); if (valid_until) formatValidUntil(*valid_until, format); diff --git a/src/Parsers/Access/ASTCreateUserQuery.h b/src/Parsers/Access/ASTCreateUserQuery.h index 24af6d00ca4..66a31366993 100644 --- a/src/Parsers/Access/ASTCreateUserQuery.h +++ b/src/Parsers/Access/ASTCreateUserQuery.h @@ -49,7 +49,7 @@ public: std::optional new_name; String storage_name; - std::vector> auth_data; + std::vector> authentication_methods; std::optional hosts; std::optional add_hosts; diff --git a/src/Parsers/Access/ParserCreateUserQuery.cpp b/src/Parsers/Access/ParserCreateUserQuery.cpp index 36162086609..13d988e215d 100644 --- a/src/Parsers/Access/ParserCreateUserQuery.cpp +++ b/src/Parsers/Access/ParserCreateUserQuery.cpp @@ -609,7 +609,7 @@ bool ParserCreateUserQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expec query->cluster = std::move(cluster); query->names = std::move(names); query->new_name = std::move(new_name); - query->auth_data = std::move(auth_data); + query->authentication_methods = std::move(auth_data); query->hosts = std::move(hosts); query->add_hosts = std::move(add_hosts); query->remove_hosts = std::move(remove_hosts); @@ -622,7 +622,7 @@ bool ParserCreateUserQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expec query->reset_authentication_methods_to_new = reset_authentication_methods_to_new.value_or(false); query->replace_authentication_methods = parsed_identified_with; - for (const auto & authentication_method : query->auth_data) + for (const auto & authentication_method : query->authentication_methods) { query->children.push_back(authentication_method); } diff --git a/src/Parsers/tests/gtest_Parser.cpp b/src/Parsers/tests/gtest_Parser.cpp index bd5d39773dd..e2ccdc96c95 100644 --- a/src/Parsers/tests/gtest_Parser.cpp +++ b/src/Parsers/tests/gtest_Parser.cpp @@ -87,7 +87,7 @@ TEST_P(ParserTest, parseQuery) { if (input_text.starts_with("ATTACH")) { - auto salt = (dynamic_cast(ast.get())->auth_data.back())->getSalt().value_or(""); + auto salt = (dynamic_cast(ast.get())->authentication_methods.back())->getSalt().value_or(""); EXPECT_TRUE(re2::RE2::FullMatch(salt, expected_ast)); } else diff --git a/src/Parsers/tests/gtest_common.cpp b/src/Parsers/tests/gtest_common.cpp index bc780e17ffd..0aded8c98f7 100644 --- a/src/Parsers/tests/gtest_common.cpp +++ b/src/Parsers/tests/gtest_common.cpp @@ -64,7 +64,7 @@ TEST_P(ParserKQLTest, parseKQLQuery) if (input_text.starts_with("ATTACH")) { // todo arthur check - auto salt = (dynamic_cast(ast.get())->auth_data.back())->getSalt().value_or(""); + auto salt = (dynamic_cast(ast.get())->authentication_methods.back())->getSalt().value_or(""); EXPECT_TRUE(re2::RE2::FullMatch(salt, expected_ast)); } else diff --git a/src/Storages/System/StorageSystemUsers.cpp b/src/Storages/System/StorageSystemUsers.cpp index e8557efa9f9..bf2078f7bc1 100644 --- a/src/Storages/System/StorageSystemUsers.cpp +++ b/src/Storages/System/StorageSystemUsers.cpp @@ -112,6 +112,7 @@ void StorageSystemUsers::fillData(MutableColumns & res_columns, ContextPtr conte auto & column_grantees_except_offsets = assert_cast(*res_columns[column_index++]).getOffsets(); auto & column_default_database = assert_cast(*res_columns[column_index++]); + // todo arthur check this auto add_row = [&](const String & name, const UUID & id, const String & storage_name, From 372b948d34fe24257e096a6b7d6c9ea03ba0023a Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Thu, 27 Jun 2024 20:30:05 +0000 Subject: [PATCH 138/844] Fix: progress bar, reading in order --- src/Interpreters/ClusterProxy/executeQuery.cpp | 16 ++++++++-------- src/Processors/QueryPlan/ReadFromMergeTree.cpp | 10 ++++++---- ...2898_parallel_replicas_progress_bar.reference | 6 ++++++ .../02898_parallel_replicas_progress_bar.sql | 10 +++++----- 4 files changed, 25 insertions(+), 17 deletions(-) diff --git a/src/Interpreters/ClusterProxy/executeQuery.cpp b/src/Interpreters/ClusterProxy/executeQuery.cpp index da88f16b504..9302baf57ca 100644 --- a/src/Interpreters/ClusterProxy/executeQuery.cpp +++ b/src/Interpreters/ClusterProxy/executeQuery.cpp @@ -500,6 +500,14 @@ void executeQueryWithParallelReplicas( /// do not build local plan for distributed queries for now (address it later) if (settings.allow_experimental_analyzer && settings.parallel_replicas_local_plan && !shard_num) { + auto local_plan = createLocalPlanForParallelReplicas( + query_ast, + header, + new_context, + processed_stage, + coordinator, + std::move(analyzed_read_from_merge_tree)); + auto read_from_remote = std::make_unique( query_ast, new_cluster, @@ -518,14 +526,6 @@ void executeQueryWithParallelReplicas( auto remote_plan = std::make_unique(); remote_plan->addStep(std::move(read_from_remote)); - auto local_plan = createLocalPlanForParallelReplicas( - query_ast, - header, - new_context, - processed_stage, - coordinator, - std::move(analyzed_read_from_merge_tree)); - DataStreams input_streams; input_streams.reserve(2); input_streams.emplace_back(local_plan->getCurrentDataStream()); diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp index 7cf574ddf5c..e4e251b694e 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp +++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp @@ -586,12 +586,14 @@ Pipe ReadFromMergeTree::readInOrder( context); } - /// Actually it means that parallel reading from replicas enabled - /// and we have to collaborate with initiator. + /// Actually it means that parallel reading from replicas enabled and read snapshot is not local - + /// we can't rely on local snapshot /// In this case we won't set approximate rows, because it will be accounted multiple times. /// Also do not count amount of read rows if we read in order of sorting key, /// because we don't know actual amount of read rows in case when limit is set. - bool set_rows_approx = !is_parallel_reading_from_replicas && !reader_settings.read_in_order; + const UInt64 in_order_limit = query_info.input_order_info ? query_info.input_order_info->limit : 0; + const bool set_total_rows_approx + = !(is_parallel_reading_from_replicas && context->canUseParallelReplicasOnFollower()) && !in_order_limit; Pipes pipes; for (size_t i = 0; i < parts_with_ranges.size(); ++i) @@ -621,7 +623,7 @@ Pipe ReadFromMergeTree::readInOrder( processor->addPartLevelToChunk(isQueryWithFinal()); auto source = std::make_shared(std::move(processor)); - if (set_rows_approx) + if (set_total_rows_approx) source->addTotalRowsApprox(total_rows); pipes.emplace_back(std::move(source)); diff --git a/tests/queries/0_stateless/02898_parallel_replicas_progress_bar.reference b/tests/queries/0_stateless/02898_parallel_replicas_progress_bar.reference index 380aac4dbe8..c66597436f3 100644 --- a/tests/queries/0_stateless/02898_parallel_replicas_progress_bar.reference +++ b/tests/queries/0_stateless/02898_parallel_replicas_progress_bar.reference @@ -1,2 +1,8 @@ 3000 1000 3999 2499.5 1 +1998 2944475297004403859 +1999 254596732598015005 +2000 6863370867519437063 +2001 17844331710293705251 +2002 1587587338113897332 +1 diff --git a/tests/queries/0_stateless/02898_parallel_replicas_progress_bar.sql b/tests/queries/0_stateless/02898_parallel_replicas_progress_bar.sql index 42f8091db08..d3bf228e0fb 100644 --- a/tests/queries/0_stateless/02898_parallel_replicas_progress_bar.sql +++ b/tests/queries/0_stateless/02898_parallel_replicas_progress_bar.sql @@ -26,12 +26,12 @@ WHERE query_id in (select query_id from system.query_log where current_database AND message LIKE '%Total rows to read: 3000%' SETTINGS allow_experimental_parallel_reading_from_replicas=0; -- reading in order coordinator --- SELECT k, sipHash64(v) FROM t1 order by k limit 5 offset 998 SETTINGS optimize_read_in_order=1, log_comment='02898_inorder_190aed82-2423-413b-ad4c-24dcca50f65b'; +SELECT k, sipHash64(v) FROM t1 order by k limit 5 offset 998 SETTINGS optimize_read_in_order=1, parallel_replicas_local_plan=0, log_comment='02898_inorder_190aed82-2423-413b-ad4c-24dcca50f65b'; --- SYSTEM FLUSH LOGS; --- SELECT count() > 0 FROM system.text_log --- WHERE query_id in (select query_id from system.query_log where current_database = currentDatabase() AND log_comment='02898_inorder_190aed82-2423-413b-ad4c-24dcca50f65b') --- AND message LIKE '%Updated total rows to read: added % rows, total 3000 rows%' SETTINGS allow_experimental_parallel_reading_from_replicas=0; +SYSTEM FLUSH LOGS; +SELECT count() > 0 FROM system.text_log +WHERE query_id in (select query_id from system.query_log where current_database = currentDatabase() AND log_comment='02898_inorder_190aed82-2423-413b-ad4c-24dcca50f65b') + AND message LIKE '%Updated total rows to read: added % rows, total 3000 rows%' SETTINGS allow_experimental_parallel_reading_from_replicas=0; DROP TABLE t1 SYNC; DROP TABLE t2 SYNC; From e9360221b7083d94bc6c0137f97b84b3d20e57cb Mon Sep 17 00:00:00 2001 From: Arthur Passos Date: Fri, 28 Jun 2024 08:40:32 -0300 Subject: [PATCH 139/844] trigger ci From 1cd253e05fcf8c9043b56cf24537bbb1d22d4f4f Mon Sep 17 00:00:00 2001 From: Arthur Passos Date: Fri, 28 Jun 2024 12:48:38 -0300 Subject: [PATCH 140/844] fix integ test --- tests/integration/test_grant_and_revoke/test.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/integration/test_grant_and_revoke/test.py b/tests/integration/test_grant_and_revoke/test.py index 8de78163520..498c60a1d40 100644 --- a/tests/integration/test_grant_and_revoke/test.py +++ b/tests/integration/test_grant_and_revoke/test.py @@ -464,8 +464,8 @@ def test_introspection(): assert expected_error in instance.query_and_get_error("SHOW GRANTS FOR B", user="A") expected_access1 = ( - "CREATE USER A\n" - "CREATE USER B\n" + "CREATE USER A IDENTIFIED WITH no_password\n" + "CREATE USER B IDENTIFIED WITH no_password\n" "CREATE USER default IDENTIFIED WITH plaintext_password SETTINGS PROFILE `default`" ) expected_access2 = ( From 27c9bb9b106f0eb7ef31f8c6a49ea206ddf4dc63 Mon Sep 17 00:00:00 2001 From: Arthur Passos Date: Fri, 28 Jun 2024 17:06:06 -0300 Subject: [PATCH 141/844] add missing reference qualifier --- src/Access/IAccessStorage.cpp | 2 +- src/Access/IAccessStorage.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Access/IAccessStorage.cpp b/src/Access/IAccessStorage.cpp index 5d5a9b9ce18..10a0341b301 100644 --- a/src/Access/IAccessStorage.cpp +++ b/src/Access/IAccessStorage.cpp @@ -553,7 +553,7 @@ std::optional IAccessStorage::authenticateImpl( } bool IAccessStorage::areCredentialsValid( - const std::string user_name, + const std::string & user_name, time_t valid_until, const AuthenticationData & authentication_method, const Credentials & credentials, diff --git a/src/Access/IAccessStorage.h b/src/Access/IAccessStorage.h index 353d76536fa..74350bdbd63 100644 --- a/src/Access/IAccessStorage.h +++ b/src/Access/IAccessStorage.h @@ -229,7 +229,7 @@ protected: bool allow_no_password, bool allow_plaintext_password) const; virtual bool areCredentialsValid( - const std::string user_name, + const std::string & user_name, time_t valid_until, const AuthenticationData & authentication_method, const Credentials & credentials, From ebcf455f4a88043234fb19770bfc29958789c090 Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Fri, 28 Jun 2024 22:17:34 +0000 Subject: [PATCH 142/844] Fix: progress bar for reading in order --- src/Processors/QueryPlan/ReadFromMergeTree.cpp | 9 +++++---- .../MergeTree/ParallelReplicasReadingCoordinator.cpp | 6 +++--- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp index e4e251b694e..08c6989242e 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp +++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp @@ -592,8 +592,10 @@ Pipe ReadFromMergeTree::readInOrder( /// Also do not count amount of read rows if we read in order of sorting key, /// because we don't know actual amount of read rows in case when limit is set. const UInt64 in_order_limit = query_info.input_order_info ? query_info.input_order_info->limit : 0; - const bool set_total_rows_approx - = !(is_parallel_reading_from_replicas && context->canUseParallelReplicasOnFollower()) && !in_order_limit; + const bool parallel_replicas_remote_plan_for_initiator = is_parallel_reading_from_replicas + && !context->getSettingsRef().parallel_replicas_local_plan && context->canUseParallelReplicasOnInitiator(); + const bool parallel_replicas_follower = is_parallel_reading_from_replicas && context->canUseParallelReplicasOnFollower(); + const bool set_total_rows_approx = !parallel_replicas_follower && !parallel_replicas_remote_plan_for_initiator && !in_order_limit; Pipes pipes; for (size_t i = 0; i < parts_with_ranges.size(); ++i) @@ -1968,8 +1970,7 @@ void ReadFromMergeTree::initializePipeline(QueryPipelineBuilder & pipeline, cons mode = CoordinationMode::ReverseOrder; break; case ReadFromMergeTree::ReadType::ParallelReplicas: - chassert(false); - UNREACHABLE(); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Read type can't be ParallelReplicas on initiator"); } chassert(number_of_current_replica.has_value()); diff --git a/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp b/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp index 601ab3aeb70..f66cfdafa1a 100644 --- a/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp +++ b/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp @@ -888,9 +888,8 @@ void InOrderCoordinator::doHandleInitialAllRangesAnnouncement(InitialAllRa ++stats[announcement.replica_num].number_of_requests; - /// FIXME: this code updating total_rows_to_read but it needs to be done only once since we're taking working set from initiator - /// util I missing something, it seems this code is not necessary if working set is taken from initiator (todo: check it) - if (new_rows_to_read > 0 && progress_callback) + // progress_callback is not set when local plan is used for initiator + if (progress_callback && new_rows_to_read > 0) { Progress progress; progress.total_rows_to_read = new_rows_to_read; @@ -1052,6 +1051,7 @@ void ParallelReplicasReadingCoordinator::initialize(CoordinationMode mode) break; } + // progress_callback is not set when local plan is used for initiator if (progress_callback) pimpl->setProgressCallback(std::move(progress_callback)); From 096616bd1f5f105a760243dfaec5f4493bccabeb Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Fri, 28 Jun 2024 02:28:02 +0000 Subject: [PATCH 143/844] introduce mutations snapshot --- src/Interpreters/MutationsInterpreter.cpp | 17 ++- src/Interpreters/MutationsInterpreter.h | 6 +- .../optimizeUseAggregateProjection.cpp | 2 +- .../optimizeUseNormalProjection.cpp | 2 +- .../Optimizations/projectionsCommon.cpp | 9 +- src/Processors/QueryPlan/PartsSplitter.cpp | 1 - .../QueryPlan/ReadFromMergeTree.cpp | 24 ++-- src/Processors/QueryPlan/ReadFromMergeTree.h | 10 +- src/Storages/MergeTree/AlterConversions.cpp | 9 +- src/Storages/MergeTree/AlterConversions.h | 3 +- src/Storages/MergeTree/MergeTask.cpp | 14 ++- src/Storages/MergeTree/MergeTask.h | 2 + src/Storages/MergeTree/MergeTreeData.cpp | 77 ++++++++---- src/Storages/MergeTree/MergeTreeData.h | 46 +++++-- .../MergeTree/MergeTreeDataSelectExecutor.cpp | 51 ++------ .../MergeTree/MergeTreeDataSelectExecutor.h | 8 +- .../MergeTree/MergeTreeMutationEntry.cpp | 12 +- .../MergeTree/MergeTreeMutationEntry.h | 2 +- .../MergeTree/MergeTreePrefetchedReadPool.cpp | 2 + .../MergeTree/MergeTreePrefetchedReadPool.h | 1 + src/Storages/MergeTree/MergeTreeReadPool.cpp | 2 + src/Storages/MergeTree/MergeTreeReadPool.h | 1 + .../MergeTree/MergeTreeReadPoolBase.cpp | 6 +- .../MergeTree/MergeTreeReadPoolBase.h | 4 + .../MergeTree/MergeTreeReadPoolInOrder.cpp | 2 + .../MergeTree/MergeTreeReadPoolInOrder.h | 1 + .../MergeTreeReadPoolParallelReplicas.cpp | 2 + .../MergeTreeReadPoolParallelReplicas.h | 1 + ...rgeTreeReadPoolParallelReplicasInOrder.cpp | 2 + ...MergeTreeReadPoolParallelReplicasInOrder.h | 1 + .../MergeTree/MergeTreeSequentialSource.cpp | 21 +++- .../MergeTree/MergeTreeSequentialSource.h | 2 + src/Storages/MergeTree/MutateTask.cpp | 27 ++-- src/Storages/MergeTree/RangesInDataPart.h | 6 +- .../MergeTree/ReplicatedMergeTreeLogEntry.h | 1 - .../MergeTree/ReplicatedMergeTreeQueue.cpp | 82 ++++++++---- .../MergeTree/ReplicatedMergeTreeQueue.h | 24 +++- .../MergeTree/StorageFromMergeTreeDataPart.h | 11 +- src/Storages/StorageMergeTree.cpp | 117 ++++++++++-------- src/Storages/StorageMergeTree.h | 24 +++- src/Storages/StorageReplicatedMergeTree.cpp | 6 +- src/Storages/StorageReplicatedMergeTree.h | 2 +- 42 files changed, 413 insertions(+), 230 deletions(-) diff --git a/src/Interpreters/MutationsInterpreter.cpp b/src/Interpreters/MutationsInterpreter.cpp index 6d3a4f30b34..fc15a20f992 100644 --- a/src/Interpreters/MutationsInterpreter.cpp +++ b/src/Interpreters/MutationsInterpreter.cpp @@ -145,6 +145,7 @@ ColumnDependencies getAllColumnDependencies( bool isStorageTouchedByMutations( MergeTreeData & storage, MergeTreeData::DataPartPtr source_part, + MergeTreeData::MutationsSnapshotPtr mutations_snapshot, const StorageMetadataPtr & metadata_snapshot, const std::vector & commands, ContextPtr context) @@ -181,7 +182,7 @@ bool isStorageTouchedByMutations( if (all_commands_can_be_skipped) return false; - auto storage_from_part = std::make_shared(source_part); + auto storage_from_part = std::make_shared(source_part, mutations_snapshot); std::optional interpreter_select_query; BlockIO io; @@ -283,8 +284,13 @@ MutationsInterpreter::Source::Source(StoragePtr storage_) : storage(std::move(st { } -MutationsInterpreter::Source::Source(MergeTreeData & storage_, MergeTreeData::DataPartPtr source_part_) - : data(&storage_), part(std::move(source_part_)) +MutationsInterpreter::Source::Source( + MergeTreeData & storage_, + MergeTreeData::DataPartPtr source_part_, + AlterConversionsPtr alter_conversions_) + : data(&storage_) + , part(std::move(source_part_)) + , alter_conversions(std::move(alter_conversions_)) { } @@ -384,13 +390,14 @@ MutationsInterpreter::MutationsInterpreter( MutationsInterpreter::MutationsInterpreter( MergeTreeData & storage_, MergeTreeData::DataPartPtr source_part_, + AlterConversionsPtr alter_conversions_, StorageMetadataPtr metadata_snapshot_, MutationCommands commands_, Names available_columns_, ContextPtr context_, Settings settings_) : MutationsInterpreter( - Source(storage_, std::move(source_part_)), + Source(storage_, std::move(source_part_), std::move(alter_conversions_)), std::move(metadata_snapshot_), std::move(commands_), std::move(available_columns_), std::move(context_), std::move(settings_)) { @@ -1210,7 +1217,7 @@ void MutationsInterpreter::Source::read( createReadFromPartStep( MergeTreeSequentialSourceType::Mutation, plan, *data, storage_snapshot, - part, required_columns, + part, alter_conversions, required_columns, apply_deleted_mask_, filter, context_, getLogger("MutationsInterpreter")); } diff --git a/src/Interpreters/MutationsInterpreter.h b/src/Interpreters/MutationsInterpreter.h index 6aaa233cda3..8ae438efc19 100644 --- a/src/Interpreters/MutationsInterpreter.h +++ b/src/Interpreters/MutationsInterpreter.h @@ -6,6 +6,7 @@ #include #include #include +#include "Storages/MergeTree/AlterConversions.h" namespace DB @@ -21,6 +22,7 @@ using QueryPipelineBuilderPtr = std::unique_ptr; bool isStorageTouchedByMutations( MergeTreeData & storage, MergeTreeData::DataPartPtr source_part, + MergeTreeData::MutationsSnapshotPtr mutations_snapshot, const StorageMetadataPtr & metadata_snapshot, const std::vector & commands, ContextPtr context @@ -71,6 +73,7 @@ public: MutationsInterpreter( MergeTreeData & storage_, MergeTreeData::DataPartPtr source_part_, + AlterConversionsPtr alter_conversions_, StorageMetadataPtr metadata_snapshot_, MutationCommands commands_, Names available_columns_, @@ -138,7 +141,7 @@ public: bool can_execute_) const; explicit Source(StoragePtr storage_); - Source(MergeTreeData & storage_, MergeTreeData::DataPartPtr source_part_); + Source(MergeTreeData & storage_, MergeTreeData::DataPartPtr source_part_, AlterConversionsPtr alter_conversions_); private: StoragePtr storage; @@ -146,6 +149,7 @@ public: /// Special case for *MergeTree. MergeTreeData * data = nullptr; MergeTreeData::DataPartPtr part; + AlterConversionsPtr alter_conversions; }; private: diff --git a/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp b/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp index 70327bc95b4..7e69734a7e5 100644 --- a/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp +++ b/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp @@ -764,7 +764,7 @@ std::optional optimizeUseAggregateProjections(QueryPlan::Node & node, Qu projection_reading = reader.readFromParts( /* parts = */ {}, - /* alter_conversions = */ {}, + reading->getMutationsSnapshot()->cloneEmpty(), best_candidate->dag->getRequiredColumnsNames(), proj_snapshot, projection_query_info, diff --git a/src/Processors/QueryPlan/Optimizations/optimizeUseNormalProjection.cpp b/src/Processors/QueryPlan/Optimizations/optimizeUseNormalProjection.cpp index 0af3869ccf1..43e60318004 100644 --- a/src/Processors/QueryPlan/Optimizations/optimizeUseNormalProjection.cpp +++ b/src/Processors/QueryPlan/Optimizations/optimizeUseNormalProjection.cpp @@ -199,7 +199,7 @@ std::optional optimizeUseNormalProjections(Stack & stack, QueryPlan::Nod auto projection_reading = reader.readFromParts( /*parts=*/ {}, - /*alter_conversions=*/ {}, + reading->getMutationsSnapshot()->cloneEmpty(), required_columns, proj_snapshot, query_info_copy, diff --git a/src/Processors/QueryPlan/Optimizations/projectionsCommon.cpp b/src/Processors/QueryPlan/Optimizations/projectionsCommon.cpp index af1578d6af8..e6939581b9e 100644 --- a/src/Processors/QueryPlan/Optimizations/projectionsCommon.cpp +++ b/src/Processors/QueryPlan/Optimizations/projectionsCommon.cpp @@ -217,20 +217,15 @@ bool analyzeProjectionCandidate( { MergeTreeData::DataPartsVector projection_parts; MergeTreeData::DataPartsVector normal_parts; - std::vector alter_conversions; + for (const auto & part_with_ranges : parts_with_ranges) { const auto & created_projections = part_with_ranges.data_part->getProjectionParts(); auto it = created_projections.find(candidate.projection->name); if (it != created_projections.end() && !it->second->is_broken) - { projection_parts.push_back(it->second); - } else - { normal_parts.push_back(part_with_ranges.data_part); - alter_conversions.push_back(part_with_ranges.alter_conversions); - } } if (projection_parts.empty()) @@ -255,7 +250,7 @@ bool analyzeProjectionCandidate( if (!normal_parts.empty()) { /// TODO: We can reuse existing analysis_result by filtering out projection parts - auto normal_result_ptr = reading.selectRangesToRead(std::move(normal_parts), std::move(alter_conversions)); + auto normal_result_ptr = reading.selectRangesToRead(std::move(normal_parts)); if (normal_result_ptr->selected_marks != 0) { diff --git a/src/Processors/QueryPlan/PartsSplitter.cpp b/src/Processors/QueryPlan/PartsSplitter.cpp index ed4b1906635..65f1c89f990 100644 --- a/src/Processors/QueryPlan/PartsSplitter.cpp +++ b/src/Processors/QueryPlan/PartsSplitter.cpp @@ -229,7 +229,6 @@ public: { ranges_in_data_parts.emplace_back( initial_ranges_in_data_parts[part_index].data_part, - initial_ranges_in_data_parts[part_index].alter_conversions, initial_ranges_in_data_parts[part_index].part_index_in_query, MarkRanges{mark_range}); part_index_to_initial_ranges_in_data_parts_index[it->second] = part_index; diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp index aba3f6ff2da..e958430ff16 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp +++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp @@ -262,7 +262,7 @@ void ReadFromMergeTree::AnalysisResult::checkLimits(const Settings & settings, c ReadFromMergeTree::ReadFromMergeTree( MergeTreeData::DataPartsVector parts_, - std::vector alter_conversions_, + MergeTreeData::MutationsSnapshotPtr mutations_, Names all_column_names_, const MergeTreeData & data_, const SelectQueryInfo & query_info_, @@ -279,7 +279,7 @@ ReadFromMergeTree::ReadFromMergeTree( query_info_.prewhere_info)}, all_column_names_, query_info_, storage_snapshot_, context_) , reader_settings(getMergeTreeReaderSettings(context_, query_info_)) , prepared_parts(std::move(parts_)) - , alter_conversions_for_parts(std::move(alter_conversions_)) + , mutations_snapshot(std::move(mutations_)) , all_column_names(std::move(all_column_names_)) , data(data_) , actions_settings(ExpressionActionsSettings::fromContext(context_)) @@ -361,6 +361,7 @@ Pipe ReadFromMergeTree::readFromPoolParallelReplicas( auto pool = std::make_shared( std::move(extension), std::move(parts_with_range), + mutations_snapshot, shared_virtual_fields, storage_snapshot, prewhere_info, @@ -442,6 +443,7 @@ Pipe ReadFromMergeTree::readFromPool( { pool = std::make_shared( std::move(parts_with_range), + mutations_snapshot, shared_virtual_fields, storage_snapshot, prewhere_info, @@ -455,6 +457,7 @@ Pipe ReadFromMergeTree::readFromPool( { pool = std::make_shared( std::move(parts_with_range), + mutations_snapshot, shared_virtual_fields, storage_snapshot, prewhere_info, @@ -535,6 +538,7 @@ Pipe ReadFromMergeTree::readInOrder( std::move(extension), mode, parts_with_ranges, + mutations_snapshot, shared_virtual_fields, storage_snapshot, prewhere_info, @@ -550,6 +554,7 @@ Pipe ReadFromMergeTree::readInOrder( has_limit_below_one_block, read_type, parts_with_ranges, + mutations_snapshot, shared_virtual_fields, storage_snapshot, prewhere_info, @@ -1016,7 +1021,7 @@ Pipe ReadFromMergeTree::spreadMarkRangesAmongStreamsWithOrder( } ranges_to_get_from_part = split_ranges(ranges_to_get_from_part, input_order_info->direction); - new_parts.emplace_back(part.data_part, part.alter_conversions, part.part_index_in_query, std::move(ranges_to_get_from_part)); + new_parts.emplace_back(part.data_part, part.part_index_in_query, std::move(ranges_to_get_from_part)); } splitted_parts_and_ranges.emplace_back(std::move(new_parts)); @@ -1243,7 +1248,7 @@ Pipe ReadFromMergeTree::spreadMarkRangesAmongStreamsFinal( RangesInDataParts new_parts; for (auto part_it = parts_to_merge_ranges[range_index]; part_it != parts_to_merge_ranges[range_index + 1]; ++part_it) - new_parts.emplace_back(part_it->data_part, part_it->alter_conversions, part_it->part_index_in_query, part_it->ranges); + new_parts.emplace_back(part_it->data_part, part_it->part_index_in_query, part_it->ranges); if (new_parts.empty()) continue; @@ -1356,15 +1361,13 @@ Pipe ReadFromMergeTree::spreadMarkRangesAmongStreamsFinal( ReadFromMergeTree::AnalysisResultPtr ReadFromMergeTree::selectRangesToRead(bool find_exact_ranges) const { - return selectRangesToRead(prepared_parts, alter_conversions_for_parts, find_exact_ranges); + return selectRangesToRead(prepared_parts, find_exact_ranges); } -ReadFromMergeTree::AnalysisResultPtr ReadFromMergeTree::selectRangesToRead( - MergeTreeData::DataPartsVector parts, std::vector alter_conversions, bool find_exact_ranges) const +ReadFromMergeTree::AnalysisResultPtr ReadFromMergeTree::selectRangesToRead(MergeTreeData::DataPartsVector parts, bool find_exact_ranges) const { return selectRangesToRead( std::move(parts), - std::move(alter_conversions), metadata_for_reading, query_info, context, @@ -1534,7 +1537,6 @@ void ReadFromMergeTree::applyFilters(ActionDAGNodes added_filter_nodes) ReadFromMergeTree::AnalysisResultPtr ReadFromMergeTree::selectRangesToRead( MergeTreeData::DataPartsVector parts, - std::vector alter_conversions, const StorageMetadataPtr & metadata_snapshot, const SelectQueryInfo & query_info_, ContextPtr context_, @@ -1596,10 +1598,9 @@ ReadFromMergeTree::AnalysisResultPtr ReadFromMergeTree::selectRangesToRead( { MergeTreeDataSelectExecutor::filterPartsByPartition( + parts, indexes->partition_pruner, indexes->minmax_idx_condition, - parts, - alter_conversions, indexes->part_values, metadata_snapshot, data, @@ -1628,7 +1629,6 @@ ReadFromMergeTree::AnalysisResultPtr ReadFromMergeTree::selectRangesToRead( auto reader_settings = getMergeTreeReaderSettings(context_, query_info_); result.parts_with_ranges = MergeTreeDataSelectExecutor::filterPartsByPrimaryKeyAndSkipIndexes( std::move(parts), - std::move(alter_conversions), metadata_snapshot, context_, indexes->key_condition, diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.h b/src/Processors/QueryPlan/ReadFromMergeTree.h index caa8aa2e1bd..57e19441b82 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.h +++ b/src/Processors/QueryPlan/ReadFromMergeTree.h @@ -110,7 +110,7 @@ public: ReadFromMergeTree( MergeTreeData::DataPartsVector parts_, - std::vector alter_conversions_, + MergeTreeData::MutationsSnapshotPtr mutations_snapshot_, Names all_column_names_, const MergeTreeData & data_, const SelectQueryInfo & query_info_, @@ -154,7 +154,6 @@ public: static AnalysisResultPtr selectRangesToRead( MergeTreeData::DataPartsVector parts, - std::vector alter_conversions, const StorageMetadataPtr & metadata_snapshot, const SelectQueryInfo & query_info, ContextPtr context, @@ -166,8 +165,7 @@ public: std::optional & indexes, bool find_exact_ranges); - AnalysisResultPtr selectRangesToRead( - MergeTreeData::DataPartsVector parts, std::vector alter_conversions, bool find_exact_ranges = false) const; + AnalysisResultPtr selectRangesToRead(MergeTreeData::DataPartsVector parts, bool find_exact_ranges = false) const; AnalysisResultPtr selectRangesToRead(bool find_exact_ranges = false) const; @@ -188,7 +186,7 @@ public: void setAnalyzedResult(AnalysisResultPtr analyzed_result_ptr_) { analyzed_result_ptr = std::move(analyzed_result_ptr_); } const MergeTreeData::DataPartsVector & getParts() const { return prepared_parts; } - const std::vector & getAlterConvertionsForParts() const { return alter_conversions_for_parts; } + MergeTreeData::MutationsSnapshotPtr getMutationsSnapshot() const { return mutations_snapshot; } const MergeTreeData & getMergeTreeData() const { return data; } size_t getMaxBlockSize() const { return block_size.max_block_size_rows; } @@ -209,7 +207,7 @@ private: MergeTreeReaderSettings reader_settings; MergeTreeData::DataPartsVector prepared_parts; - std::vector alter_conversions_for_parts; + MergeTreeData::MutationsSnapshotPtr mutations_snapshot; Names all_column_names; diff --git a/src/Storages/MergeTree/AlterConversions.cpp b/src/Storages/MergeTree/AlterConversions.cpp index 31f8f17e2c1..82bef500b34 100644 --- a/src/Storages/MergeTree/AlterConversions.cpp +++ b/src/Storages/MergeTree/AlterConversions.cpp @@ -9,9 +9,14 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; } -bool AlterConversions::supportsMutationCommandType(MutationCommand::Type t) +bool AlterConversions::isSupportedDataMutation(MutationCommand::Type) { - return t == MutationCommand::Type::RENAME_COLUMN; + return false; +} + +bool AlterConversions::isSupportedMetadataMutation(MutationCommand::Type type) +{ + return type == MutationCommand::Type::RENAME_COLUMN; } void AlterConversions::addMutationCommand(const MutationCommand & command) diff --git a/src/Storages/MergeTree/AlterConversions.h b/src/Storages/MergeTree/AlterConversions.h index 0f857d351dd..68966f88f84 100644 --- a/src/Storages/MergeTree/AlterConversions.h +++ b/src/Storages/MergeTree/AlterConversions.h @@ -35,7 +35,8 @@ public: /// Get column old name before rename (lookup by key in rename_map) std::string getColumnOldName(const std::string & new_name) const; - static bool supportsMutationCommandType(MutationCommand::Type); + static bool isSupportedDataMutation(MutationCommand::Type type); + static bool isSupportedMetadataMutation(MutationCommand::Type type); private: /// Rename map new_name -> old_name. diff --git a/src/Storages/MergeTree/MergeTask.cpp b/src/Storages/MergeTree/MergeTask.cpp index 7ab8fa2430a..5dab0cd0c08 100644 --- a/src/Storages/MergeTree/MergeTask.cpp +++ b/src/Storages/MergeTree/MergeTask.cpp @@ -257,6 +257,10 @@ bool MergeTask::ExecuteAndFinalizeHorizontalPart::prepare() if (enabledBlockOffsetColumn(global_ctx)) addGatheringColumn(global_ctx, BlockOffsetColumn::name, BlockOffsetColumn::type); + auto mutations_snapshot = global_ctx->data->getMutationsSnapshot( + global_ctx->metadata_snapshot->getMetadataVersion(), + /*need_data_mutations=*/ false); + SerializationInfo::Settings info_settings = { .ratio_of_defaults_for_sparse = global_ctx->data->getSettings()->ratio_of_defaults_for_sparse_serialization, @@ -264,10 +268,12 @@ bool MergeTask::ExecuteAndFinalizeHorizontalPart::prepare() }; SerializationInfoByName infos(global_ctx->storage_columns, info_settings); + global_ctx->alter_conversions.reserve(global_ctx->future_part->parts.size()); for (const auto & part : global_ctx->future_part->parts) { global_ctx->new_data_part->ttl_infos.update(part->ttl_infos); + if (global_ctx->metadata_snapshot->hasAnyTTL() && !part->checkAllTTLCalculated(global_ctx->metadata_snapshot)) { LOG_INFO(ctx->log, "Some TTL values were not calculated for part {}. Will calculate them forcefully during merge.", part->name); @@ -288,6 +294,8 @@ bool MergeTask::ExecuteAndFinalizeHorizontalPart::prepare() infos.add(part_infos); } + + global_ctx->alter_conversions.push_back(MergeTreeData::getAlterConversionsForPart(part, mutations_snapshot)); } const auto & local_part_min_ttl = global_ctx->new_data_part->ttl_infos.part_min_ttl; @@ -604,6 +612,7 @@ Pipe MergeTask::VerticalMergeStage::createPipeForReadingOneColumn(const String & *global_ctx->data, global_ctx->storage_snapshot, global_ctx->future_part->parts[part_num], + global_ctx->alter_conversions[part_num], Names{column_name}, /*mark_ranges=*/ {}, global_ctx->input_rows_filtered, @@ -996,13 +1005,14 @@ void MergeTask::ExecuteAndFinalizeHorizontalPart::createMergedStream() global_ctx->horizontal_stage_progress = std::make_unique( ctx->column_sizes ? ctx->column_sizes->keyColumnsWeight() : 1.0); - for (const auto & part : global_ctx->future_part->parts) + for (size_t i = 0; i < global_ctx->future_part->parts.size(); ++i) { Pipe pipe = createMergeTreeSequentialSource( MergeTreeSequentialSourceType::Merge, *global_ctx->data, global_ctx->storage_snapshot, - part, + global_ctx->future_part->parts[i], + global_ctx->alter_conversions[i], global_ctx->merging_columns.getNames(), /*mark_ranges=*/ {}, global_ctx->input_rows_filtered, diff --git a/src/Storages/MergeTree/MergeTask.h b/src/Storages/MergeTree/MergeTask.h index 56909d1b7a0..c394a47aff0 100644 --- a/src/Storages/MergeTree/MergeTask.h +++ b/src/Storages/MergeTree/MergeTask.h @@ -4,6 +4,7 @@ #include #include +#include "Storages/MergeTree/AlterConversions.h" #include #include @@ -154,6 +155,7 @@ private: StorageSnapshotPtr storage_snapshot{nullptr}; StorageMetadataPtr metadata_snapshot{nullptr}; FutureMergedMutatedPartPtr future_part{nullptr}; + std::vector alter_conversions; /// This will be either nullptr or new_data_part, so raw pointer is ok. IMergeTreeDataPart * parent_part{nullptr}; ContextPtr context{nullptr}; diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index f9cc65871fe..98c308f5fd1 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -8115,11 +8115,13 @@ bool MergeTreeData::canUsePolymorphicParts(const MergeTreeSettings & settings, S return true; } -AlterConversionsPtr MergeTreeData::getAlterConversionsForPart(MergeTreeDataPartPtr part) const +AlterConversionsPtr MergeTreeData::getAlterConversionsForPart( + const MergeTreeDataPartPtr & part, + const MutationsSnapshotPtr & snapshot) { - auto commands = getAlterMutationCommandsForPart(part); - + auto commands = snapshot->getAlterMutationCommandsForPart(part); auto result = std::make_shared(); + for (const auto & command : commands | std::views::reverse) result->addMutationCommand(command); @@ -8427,9 +8429,9 @@ StorageSnapshotPtr MergeTreeData::getStorageSnapshot(const StorageMetadataPtr & object_columns_copy = object_columns; } - snapshot_data->alter_conversions.reserve(snapshot_data->parts.size()); - for (const auto & part : snapshot_data->parts) - snapshot_data->alter_conversions.push_back(getAlterConversionsForPart(part)); + snapshot_data->mutations_snapshot = getMutationsSnapshot( + metadata_snapshot->getMetadataVersion(), + query_context->getSettingsRef().apply_mutations_on_fly); return std::make_shared(*this, metadata_snapshot, std::move(object_columns_copy), std::move(snapshot_data)); } @@ -8616,28 +8618,59 @@ void MergeTreeData::verifySortingKey(const KeyDescription & sorting_key) } } -bool updateAlterConversionsMutations(const MutationCommands & commands, std::atomic & alter_conversions_mutations, bool remove) +static void updateMutationsCounters( + Int64 & data_mutations_to_apply, + Int64 & metadata_mutations_to_apply, + const MutationCommands & commands, + Int64 increment) { + if (data_mutations_to_apply < 0) + throw Exception(ErrorCodes::LOGICAL_ERROR, "On-fly data mutations counter is negative ({})", data_mutations_to_apply); + + if (metadata_mutations_to_apply < 0) + throw Exception(ErrorCodes::LOGICAL_ERROR, "On-fly metadata mutations counter is negative ({})", metadata_mutations_to_apply); + + bool has_data_mutation = false; + bool has_metadata_mutation = false; + for (const auto & command : commands) { - if (AlterConversions::supportsMutationCommandType(command.type)) + if (!has_data_mutation && AlterConversions::isSupportedDataMutation(command.type)) { - if (remove) - { - --alter_conversions_mutations; - if (alter_conversions_mutations < 0) - throw Exception(ErrorCodes::LOGICAL_ERROR, "On-fly mutations counter is negative ({})", alter_conversions_mutations); - } - else - { - if (alter_conversions_mutations < 0) - throw Exception(ErrorCodes::LOGICAL_ERROR, "On-fly mutations counter is negative ({})", alter_conversions_mutations); - ++alter_conversions_mutations; - } - return true; + data_mutations_to_apply += increment; + has_data_mutation = true; + + if (data_mutations_to_apply < 0) + throw Exception(ErrorCodes::LOGICAL_ERROR, "On-fly data mutations counter is negative ({})", data_mutations_to_apply); + } + + if (!has_metadata_mutation && AlterConversions::isSupportedMetadataMutation(command.type)) + { + metadata_mutations_to_apply += increment; + has_metadata_mutation = true; + + if (metadata_mutations_to_apply < 0) + throw Exception(ErrorCodes::LOGICAL_ERROR, "On-fly metadata mutations counter is negative ({})", metadata_mutations_to_apply); } } - return false; +} + +void incrementMutationsCounters( + Int64 & data_mutations_to_apply, + Int64 & metadata_mutations_to_apply, + const MutationCommands & commands, + std::lock_guard & /*lock*/) +{ + return updateMutationsCounters(data_mutations_to_apply, metadata_mutations_to_apply, commands, 1); +} + +void decrementMutationsCounters( + Int64 & data_mutations_to_apply, + Int64 & metadata_mutations_to_apply, + const MutationCommands & commands, + std::lock_guard & /*lock*/) +{ + return updateMutationsCounters(data_mutations_to_apply, metadata_mutations_to_apply, commands, -1); } } diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h index c6f736a4afd..765b68b7559 100644 --- a/src/Storages/MergeTree/MergeTreeData.h +++ b/src/Storages/MergeTree/MergeTreeData.h @@ -35,6 +35,7 @@ #include #include #include +#include "Storages/ProjectionsDescription.h" #include #include @@ -445,12 +446,27 @@ public: bool supportsTrivialCountOptimization(const StorageSnapshotPtr &, ContextPtr) const override; + struct IMutationsSnapshot + { + /// Return pending mutations that weren't applied to `part` yet and should be applied on the fly + /// (i.e. when reading from the part). Mutations not supported by AlterConversions + /// (supportsMutationCommandType()) can be omitted. + /// + /// @return list of mutations, in *reverse* order (newest to oldest) + virtual MutationCommands getAlterMutationCommandsForPart(const DataPartPtr & part) const = 0; + virtual std::shared_ptr cloneEmpty() const = 0; + + virtual ~IMutationsSnapshot() = default; + }; + + using MutationsSnapshotPtr = std::shared_ptr; + /// Snapshot for MergeTree contains the current set of data parts - /// at the moment of the start of query. + /// and mutations required to be applied at the moment of the start of query. struct SnapshotData : public StorageSnapshot::Data { DataPartsVector parts; - std::vector alter_conversions; + MutationsSnapshotPtr mutations_snapshot; }; StorageSnapshotPtr getStorageSnapshot(const StorageMetadataPtr & metadata_snapshot, ContextPtr query_context) const override; @@ -934,8 +950,13 @@ public: Disks getDisks() const { return getStoragePolicy()->getDisks(); } + /// TODO: comment + virtual MutationsSnapshotPtr getMutationsSnapshot(Int64 metadata_version, bool need_data_mutations) const = 0; + /// Return alter conversions for part which must be applied on fly. - AlterConversionsPtr getAlterConversionsForPart(MergeTreeDataPartPtr part) const; + static AlterConversionsPtr getAlterConversionsForPart( + const MergeTreeDataPartPtr & part, + const MutationsSnapshotPtr & snapshot); /// Returns destination disk or volume for the TTL rule according to current storage policy. SpacePtr getDestinationForMoveTTL(const TTLDescription & move_ttl) const; @@ -1448,13 +1469,6 @@ protected: /// mechanisms for parts locking virtual bool partIsAssignedToBackgroundOperation(const DataPartPtr & part) const = 0; - /// Return pending mutations that weren't applied to `part` yet and should be applied on the fly - /// (i.e. when reading from the part). Mutations not supported by AlterConversions - /// (supportsMutationCommandType()) can be omitted. - /// - /// @return list of mutations, in *reverse* order (newest to oldest) - virtual MutationCommands getAlterMutationCommandsForPart(const DataPartPtr & part) const = 0; - struct PartBackupEntries { String part_name; @@ -1738,6 +1752,16 @@ struct CurrentlySubmergingEmergingTagger /// Look at MutationCommands if it contains mutations for AlterConversions, update the counter. /// Return true if the counter had been updated -bool updateAlterConversionsMutations(const MutationCommands & commands, std::atomic & alter_conversions_mutations, bool remove); +void incrementMutationsCounters( + Int64 & data_mutations_to_apply, + Int64 & metadata_mutations_to_apply, + const MutationCommands & commands, + std::lock_guard & lock); + +void decrementMutationsCounters( + Int64 & data_mutations_to_apply, + Int64 & metadata_mutations_to_apply, + const MutationCommands & commands, + std::lock_guard & lock); } diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index 2e287ff3042..0ad7bd47648 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -130,12 +130,10 @@ QueryPlanPtr MergeTreeDataSelectExecutor::read( bool enable_parallel_reading) const { const auto & snapshot_data = assert_cast(*storage_snapshot->data); - const auto & parts = snapshot_data.parts; - const auto & alter_conversions = snapshot_data.alter_conversions; auto step = readFromParts( - parts, - alter_conversions, + snapshot_data.parts, + snapshot_data.mutations_snapshot, column_names_to_return, storage_snapshot, query_info, @@ -491,10 +489,9 @@ std::optional> MergeTreeDataSelectExecutor::filterPar } void MergeTreeDataSelectExecutor::filterPartsByPartition( + MergeTreeData::DataPartsVector & parts, const std::optional & partition_pruner, const std::optional & minmax_idx_condition, - MergeTreeData::DataPartsVector & parts, - std::vector & alter_conversions, const std::optional> & part_values, const StorageMetadataPtr & metadata_snapshot, const MergeTreeData & data, @@ -503,8 +500,6 @@ void MergeTreeDataSelectExecutor::filterPartsByPartition( LoggerPtr log, ReadFromMergeTree::IndexStats & index_stats) { - chassert(alter_conversions.empty() || parts.size() == alter_conversions.size()); - const Settings & settings = context->getSettingsRef(); DataTypes minmax_columns_types; @@ -528,7 +523,6 @@ void MergeTreeDataSelectExecutor::filterPartsByPartition( if (query_context->getSettingsRef().allow_experimental_query_deduplication) selectPartsToReadWithUUIDFilter( parts, - alter_conversions, part_values, data.getPinnedPartUUIDs(), minmax_idx_condition, @@ -541,7 +535,6 @@ void MergeTreeDataSelectExecutor::filterPartsByPartition( else selectPartsToRead( parts, - alter_conversions, part_values, minmax_idx_condition, minmax_columns_types, @@ -580,7 +573,6 @@ void MergeTreeDataSelectExecutor::filterPartsByPartition( RangesInDataParts MergeTreeDataSelectExecutor::filterPartsByPrimaryKeyAndSkipIndexes( MergeTreeData::DataPartsVector && parts, - std::vector && alter_conversions, StorageMetadataPtr metadata_snapshot, const ContextPtr & context, const KeyCondition & key_condition, @@ -593,8 +585,6 @@ RangesInDataParts MergeTreeDataSelectExecutor::filterPartsByPrimaryKeyAndSkipInd bool use_skip_indexes, bool find_exact_ranges) { - chassert(alter_conversions.empty() || parts.size() == alter_conversions.size()); - RangesInDataParts parts_with_ranges; parts_with_ranges.resize(parts.size()); const Settings & settings = context->getSettingsRef(); @@ -653,11 +643,8 @@ RangesInDataParts MergeTreeDataSelectExecutor::filterPartsByPrimaryKeyAndSkipInd auto process_part = [&](size_t part_index) { auto & part = parts[part_index]; - auto alter_conversions_for_part = !alter_conversions.empty() - ? alter_conversions[part_index] - : std::make_shared(); - RangesInDataPart ranges(part, alter_conversions_for_part, part_index); + RangesInDataPart ranges(part, part_index); size_t total_marks_count = part->index_granularity.getMarksCountWithoutFinal(); if (metadata_snapshot->hasPrimaryKey() || part_offset_condition) @@ -907,11 +894,11 @@ ReadFromMergeTree::AnalysisResultPtr MergeTreeDataSelectExecutor::estimateNumMar return std::make_shared(); std::optional indexes; - /// NOTE: We don't need alter_conversions because the returned analysis_result is only used for: - /// 1. estimate the number of rows to read; 2. projection reading, which doesn't have alter_conversions. + /// NOTE: We don't need mutations snapshot because the returned analysis_result is only used for: + /// 1. estimate the number of rows to read; + /// 2. projection reading, which doesn't have alter conversions. return ReadFromMergeTree::selectRangesToRead( std::move(parts), - /*alter_conversions=*/{}, metadata_snapshot, query_info, context, @@ -926,7 +913,7 @@ ReadFromMergeTree::AnalysisResultPtr MergeTreeDataSelectExecutor::estimateNumMar QueryPlanStepPtr MergeTreeDataSelectExecutor::readFromParts( MergeTreeData::DataPartsVector parts, - std::vector alter_conversions, + MergeTreeData::MutationsSnapshotPtr mutations_snapshot, const Names & column_names_to_return, const StorageSnapshotPtr & storage_snapshot, const SelectQueryInfo & query_info, @@ -948,7 +935,7 @@ QueryPlanStepPtr MergeTreeDataSelectExecutor::readFromParts( return std::make_unique( std::move(parts), - std::move(alter_conversions), + std::move(mutations_snapshot), column_names_to_return, data, query_info, @@ -1546,7 +1533,6 @@ MarkRanges MergeTreeDataSelectExecutor::filterMarksUsingMergedIndex( void MergeTreeDataSelectExecutor::selectPartsToRead( MergeTreeData::DataPartsVector & parts, - std::vector & alter_conversions, const std::optional> & part_values, const std::optional & minmax_idx_condition, const DataTypes & minmax_columns_types, @@ -1555,10 +1541,7 @@ void MergeTreeDataSelectExecutor::selectPartsToRead( PartFilterCounters & counters) { MergeTreeData::DataPartsVector prev_parts; - std::vector prev_conversions; - std::swap(prev_parts, parts); - std::swap(prev_conversions, alter_conversions); for (size_t i = 0; i < prev_parts.size(); ++i) { @@ -1600,14 +1583,11 @@ void MergeTreeDataSelectExecutor::selectPartsToRead( counters.num_granules_after_partition_pruner += num_granules; parts.push_back(prev_parts[i]); - if (!prev_conversions.empty()) - alter_conversions.push_back(prev_conversions[i]); } } void MergeTreeDataSelectExecutor::selectPartsToReadWithUUIDFilter( MergeTreeData::DataPartsVector & parts, - std::vector & alter_conversions, const std::optional> & part_values, MergeTreeData::PinnedPartUUIDsPtr pinned_part_uuids, const std::optional & minmax_idx_condition, @@ -1620,18 +1600,13 @@ void MergeTreeDataSelectExecutor::selectPartsToReadWithUUIDFilter( { /// process_parts prepare parts that have to be read for the query, /// returns false if duplicated parts' UUID have been met - auto select_parts = [&] ( - MergeTreeData::DataPartsVector & selected_parts, - std::vector & selected_conversions) -> bool + auto select_parts = [&](MergeTreeData::DataPartsVector & selected_parts) -> bool { auto ignored_part_uuids = query_context->getIgnoredPartUUIDs(); std::unordered_set temp_part_uuids; MergeTreeData::DataPartsVector prev_parts; - std::vector prev_conversions; - std::swap(prev_parts, selected_parts); - std::swap(prev_conversions, selected_conversions); for (size_t i = 0; i < prev_parts.size(); ++i) { @@ -1686,8 +1661,6 @@ void MergeTreeDataSelectExecutor::selectPartsToReadWithUUIDFilter( } selected_parts.push_back(prev_parts[i]); - if (!prev_conversions.empty()) - selected_conversions.push_back(prev_conversions[i]); } if (!temp_part_uuids.empty()) @@ -1706,7 +1679,7 @@ void MergeTreeDataSelectExecutor::selectPartsToReadWithUUIDFilter( }; /// Process parts that have to be read for a query. - auto needs_retry = !select_parts(parts, alter_conversions); + auto needs_retry = !select_parts(parts); /// If any duplicated part UUIDs met during the first step, try to ignore them in second pass. /// This may happen when `prefer_localhost_replica` is set and "distributed" stage runs in the same process with "remote" stage. @@ -1717,7 +1690,7 @@ void MergeTreeDataSelectExecutor::selectPartsToReadWithUUIDFilter( counters = PartFilterCounters(); /// Second attempt didn't help, throw an exception - if (!select_parts(parts, alter_conversions)) + if (!select_parts(parts)) throw Exception(ErrorCodes::DUPLICATED_PART_UUIDS, "Found duplicate UUIDs while processing query."); } } diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h index 788355c1e59..0d02456e480 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h @@ -40,7 +40,7 @@ public: /// The same as read, but with specified set of parts. QueryPlanStepPtr readFromParts( MergeTreeData::DataPartsVector parts, - std::vector alter_conversions, + MergeTreeData::MutationsSnapshotPtr mutations_snapshot, const Names & column_names, const StorageSnapshotPtr & storage_snapshot, const SelectQueryInfo & query_info, @@ -120,7 +120,6 @@ private: /// as well as `max_block_number_to_read`. static void selectPartsToRead( MergeTreeData::DataPartsVector & parts, - std::vector & alter_conversions, const std::optional> & part_values, const std::optional & minmax_idx_condition, const DataTypes & minmax_columns_types, @@ -131,7 +130,6 @@ private: /// Same as previous but also skip parts uuids if any to the query context, or skip parts which uuids marked as excluded. static void selectPartsToReadWithUUIDFilter( MergeTreeData::DataPartsVector & parts, - std::vector & alter_conversions, const std::optional> & part_values, MergeTreeData::PinnedPartUUIDsPtr pinned_part_uuids, const std::optional & minmax_idx_condition, @@ -175,10 +173,9 @@ public: /// Filter parts using minmax index and partition key. static void filterPartsByPartition( + MergeTreeData::DataPartsVector & parts, const std::optional & partition_pruner, const std::optional & minmax_idx_condition, - MergeTreeData::DataPartsVector & parts, - std::vector & alter_conversions, const std::optional> & part_values, const StorageMetadataPtr & metadata_snapshot, const MergeTreeData & data, @@ -192,7 +189,6 @@ public: /// If 'check_limits = true' it will throw exception if the amount of data exceed the limits from settings. static RangesInDataParts filterPartsByPrimaryKeyAndSkipIndexes( MergeTreeData::DataPartsVector && parts, - std::vector && alter_conversions, StorageMetadataPtr metadata_snapshot, const ContextPtr & context, const KeyCondition & key_condition, diff --git a/src/Storages/MergeTree/MergeTreeMutationEntry.cpp b/src/Storages/MergeTree/MergeTreeMutationEntry.cpp index 4dbccb91620..6f06b921031 100644 --- a/src/Storages/MergeTree/MergeTreeMutationEntry.cpp +++ b/src/Storages/MergeTree/MergeTreeMutationEntry.cpp @@ -6,6 +6,7 @@ #include #include #include +#include "Storages/MutationCommands.h" #include @@ -50,7 +51,7 @@ UInt64 MergeTreeMutationEntry::parseFileName(const String & file_name_) MergeTreeMutationEntry::MergeTreeMutationEntry(MutationCommands commands_, DiskPtr disk_, const String & path_prefix_, UInt64 tmp_number, const TransactionID & tid_, const WriteSettings & settings) : create_time(time(nullptr)) - , commands(std::move(commands_)) + , commands(std::make_shared(std::move(commands_))) , disk(std::move(disk_)) , path_prefix(path_prefix_) , file_name("tmp_mutation_" + toString(tmp_number) + ".txt") @@ -63,7 +64,7 @@ MergeTreeMutationEntry::MergeTreeMutationEntry(MutationCommands commands_, DiskP *out << "format version: 1\n" << "create time: " << LocalDateTime(create_time, DateLUT::serverTimezoneInstance()) << "\n"; *out << "commands: "; - commands.writeText(*out, /* with_pure_metadata_commands = */ false); + commands->writeText(*out, /* with_pure_metadata_commands = */ false); *out << "\n"; if (tid.isPrehistoric()) { @@ -116,7 +117,8 @@ void MergeTreeMutationEntry::writeCSN(CSN csn_) } MergeTreeMutationEntry::MergeTreeMutationEntry(DiskPtr disk_, const String & path_prefix_, const String & file_name_) - : disk(std::move(disk_)) + : commands(std::make_shared()) + , disk(std::move(disk_)) , path_prefix(path_prefix_) , file_name(file_name_) , is_temp(false) @@ -133,7 +135,7 @@ MergeTreeMutationEntry::MergeTreeMutationEntry(DiskPtr disk_, const String & pat create_time_dt.hour(), create_time_dt.minute(), create_time_dt.second()); *buf >> "commands: "; - commands.readText(*buf); + commands->readText(*buf); *buf >> "\n"; if (buf->eof()) @@ -177,7 +179,7 @@ std::shared_ptr MergeTreeMutationEntry::backup() const out << "block number: " << block_number << "\n"; out << "commands: "; - commands.writeText(out, /* with_pure_metadata_commands = */ false); + commands->writeText(out, /* with_pure_metadata_commands = */ false); out << "\n"; return std::make_shared(out.str()); diff --git a/src/Storages/MergeTree/MergeTreeMutationEntry.h b/src/Storages/MergeTree/MergeTreeMutationEntry.h index 04297f2852a..f41ad2a17f8 100644 --- a/src/Storages/MergeTree/MergeTreeMutationEntry.h +++ b/src/Storages/MergeTree/MergeTreeMutationEntry.h @@ -16,7 +16,7 @@ class IBackupEntry; struct MergeTreeMutationEntry { time_t create_time = 0; - MutationCommands commands; + std::shared_ptr commands; DiskPtr disk; String path_prefix; diff --git a/src/Storages/MergeTree/MergeTreePrefetchedReadPool.cpp b/src/Storages/MergeTree/MergeTreePrefetchedReadPool.cpp index 2c249f7b63b..2aaf06fde7f 100644 --- a/src/Storages/MergeTree/MergeTreePrefetchedReadPool.cpp +++ b/src/Storages/MergeTree/MergeTreePrefetchedReadPool.cpp @@ -84,6 +84,7 @@ MergeTreeReadTask::Readers MergeTreePrefetchedReadPool::PrefetchedReaders::get() MergeTreePrefetchedReadPool::MergeTreePrefetchedReadPool( RangesInDataParts && parts_, + MutationsSnapshotPtr mutations_snapshot_, VirtualFields shared_virtual_fields_, const StorageSnapshotPtr & storage_snapshot_, const PrewhereInfoPtr & prewhere_info_, @@ -94,6 +95,7 @@ MergeTreePrefetchedReadPool::MergeTreePrefetchedReadPool( const ContextPtr & context_) : MergeTreeReadPoolBase( std::move(parts_), + std::move(mutations_snapshot_), std::move(shared_virtual_fields_), storage_snapshot_, prewhere_info_, diff --git a/src/Storages/MergeTree/MergeTreePrefetchedReadPool.h b/src/Storages/MergeTree/MergeTreePrefetchedReadPool.h index a3a57227630..65a7d62ad2d 100644 --- a/src/Storages/MergeTree/MergeTreePrefetchedReadPool.h +++ b/src/Storages/MergeTree/MergeTreePrefetchedReadPool.h @@ -19,6 +19,7 @@ class MergeTreePrefetchedReadPool : public MergeTreeReadPoolBase, private WithCo public: MergeTreePrefetchedReadPool( RangesInDataParts && parts_, + MutationsSnapshotPtr mutations_snapshot_, VirtualFields shared_virtual_fields_, const StorageSnapshotPtr & storage_snapshot_, const PrewhereInfoPtr & prewhere_info_, diff --git a/src/Storages/MergeTree/MergeTreeReadPool.cpp b/src/Storages/MergeTree/MergeTreeReadPool.cpp index dc1ba030f45..d45e2e9c578 100644 --- a/src/Storages/MergeTree/MergeTreeReadPool.cpp +++ b/src/Storages/MergeTree/MergeTreeReadPool.cpp @@ -35,6 +35,7 @@ size_t getApproxSizeOfPart(const IMergeTreeDataPart & part, const Names & column MergeTreeReadPool::MergeTreeReadPool( RangesInDataParts && parts_, + MutationsSnapshotPtr mutations_snapshot_, VirtualFields shared_virtual_fields_, const StorageSnapshotPtr & storage_snapshot_, const PrewhereInfoPtr & prewhere_info_, @@ -45,6 +46,7 @@ MergeTreeReadPool::MergeTreeReadPool( const ContextPtr & context_) : MergeTreeReadPoolBase( std::move(parts_), + std::move(mutations_snapshot_), std::move(shared_virtual_fields_), storage_snapshot_, prewhere_info_, diff --git a/src/Storages/MergeTree/MergeTreeReadPool.h b/src/Storages/MergeTree/MergeTreeReadPool.h index cb0e8a9657f..d7b354a2799 100644 --- a/src/Storages/MergeTree/MergeTreeReadPool.h +++ b/src/Storages/MergeTree/MergeTreeReadPool.h @@ -26,6 +26,7 @@ public: MergeTreeReadPool( RangesInDataParts && parts_, + MutationsSnapshotPtr mutations_snapshot_, VirtualFields shared_virtual_fields_, const StorageSnapshotPtr & storage_snapshot_, const PrewhereInfoPtr & prewhere_info_, diff --git a/src/Storages/MergeTree/MergeTreeReadPoolBase.cpp b/src/Storages/MergeTree/MergeTreeReadPoolBase.cpp index 0ea19370d45..2935890cba5 100644 --- a/src/Storages/MergeTree/MergeTreeReadPoolBase.cpp +++ b/src/Storages/MergeTree/MergeTreeReadPoolBase.cpp @@ -13,6 +13,7 @@ namespace ErrorCodes MergeTreeReadPoolBase::MergeTreeReadPoolBase( RangesInDataParts && parts_, + MutationsSnapshotPtr mutations_snapshot_, VirtualFields shared_virtual_fields_, const StorageSnapshotPtr & storage_snapshot_, const PrewhereInfoPtr & prewhere_info_, @@ -22,6 +23,7 @@ MergeTreeReadPoolBase::MergeTreeReadPoolBase( const PoolSettings & pool_settings_, const ContextPtr & context_) : parts_ranges(std::move(parts_)) + , mutations_snapshot(std::move(mutations_snapshot_)) , shared_virtual_fields(std::move(shared_virtual_fields_)) , storage_snapshot(storage_snapshot_) , prewhere_info(prewhere_info_) @@ -67,9 +69,9 @@ void MergeTreeReadPoolBase::fillPerPartInfos() } read_task_info.part_index_in_query = part_with_ranges.part_index_in_query; - read_task_info.alter_conversions = part_with_ranges.alter_conversions; + read_task_info.alter_conversions = MergeTreeData::getAlterConversionsForPart(part_with_ranges.data_part, mutations_snapshot); - LoadedMergeTreeDataPartInfoForReader part_info(part_with_ranges.data_part, part_with_ranges.alter_conversions); + LoadedMergeTreeDataPartInfoForReader part_info(part_with_ranges.data_part, read_task_info.alter_conversions); read_task_info.task_columns = getReadTaskColumns( part_info, diff --git a/src/Storages/MergeTree/MergeTreeReadPoolBase.h b/src/Storages/MergeTree/MergeTreeReadPoolBase.h index 1b5bfec5898..8286ff52a5c 100644 --- a/src/Storages/MergeTree/MergeTreeReadPoolBase.h +++ b/src/Storages/MergeTree/MergeTreeReadPoolBase.h @@ -9,6 +9,8 @@ namespace DB class MergeTreeReadPoolBase : public IMergeTreeReadPool { public: + using MutationsSnapshotPtr = MergeTreeData::MutationsSnapshotPtr; + struct PoolSettings { size_t threads = 0; @@ -23,6 +25,7 @@ public: MergeTreeReadPoolBase( RangesInDataParts && parts_, + MutationsSnapshotPtr mutations_snapshot_, VirtualFields shared_virtual_fields_, const StorageSnapshotPtr & storage_snapshot_, const PrewhereInfoPtr & prewhere_info_, @@ -37,6 +40,7 @@ public: protected: /// Initialized in constructor const RangesInDataParts parts_ranges; + const MutationsSnapshotPtr mutations_snapshot; const VirtualFields shared_virtual_fields; const StorageSnapshotPtr storage_snapshot; const PrewhereInfoPtr prewhere_info; diff --git a/src/Storages/MergeTree/MergeTreeReadPoolInOrder.cpp b/src/Storages/MergeTree/MergeTreeReadPoolInOrder.cpp index 4c0391ffa57..60f127acdae 100644 --- a/src/Storages/MergeTree/MergeTreeReadPoolInOrder.cpp +++ b/src/Storages/MergeTree/MergeTreeReadPoolInOrder.cpp @@ -12,6 +12,7 @@ MergeTreeReadPoolInOrder::MergeTreeReadPoolInOrder( bool has_limit_below_one_block_, MergeTreeReadType read_type_, RangesInDataParts parts_, + MutationsSnapshotPtr mutations_snapshot_, VirtualFields shared_virtual_fields_, const StorageSnapshotPtr & storage_snapshot_, const PrewhereInfoPtr & prewhere_info_, @@ -22,6 +23,7 @@ MergeTreeReadPoolInOrder::MergeTreeReadPoolInOrder( const ContextPtr & context_) : MergeTreeReadPoolBase( std::move(parts_), + std::move(mutations_snapshot_), std::move(shared_virtual_fields_), storage_snapshot_, prewhere_info_, diff --git a/src/Storages/MergeTree/MergeTreeReadPoolInOrder.h b/src/Storages/MergeTree/MergeTreeReadPoolInOrder.h index 9fedf396a6b..a3668acb170 100644 --- a/src/Storages/MergeTree/MergeTreeReadPoolInOrder.h +++ b/src/Storages/MergeTree/MergeTreeReadPoolInOrder.h @@ -11,6 +11,7 @@ public: bool has_limit_below_one_block_, MergeTreeReadType read_type_, RangesInDataParts parts_, + MutationsSnapshotPtr mutations_snapshot_, VirtualFields shared_virtual_fields_, const StorageSnapshotPtr & storage_snapshot_, const PrewhereInfoPtr & prewhere_info_, diff --git a/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicas.cpp b/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicas.cpp index 38035d97f56..0d615fae443 100644 --- a/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicas.cpp +++ b/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicas.cpp @@ -13,6 +13,7 @@ namespace ErrorCodes MergeTreeReadPoolParallelReplicas::MergeTreeReadPoolParallelReplicas( ParallelReadingExtension extension_, RangesInDataParts && parts_, + MutationsSnapshotPtr mutations_snapshot_, VirtualFields shared_virtual_fields_, const StorageSnapshotPtr & storage_snapshot_, const PrewhereInfoPtr & prewhere_info_, @@ -23,6 +24,7 @@ MergeTreeReadPoolParallelReplicas::MergeTreeReadPoolParallelReplicas( const ContextPtr & context_) : MergeTreeReadPoolBase( std::move(parts_), + std::move(mutations_snapshot_), std::move(shared_virtual_fields_), storage_snapshot_, prewhere_info_, diff --git a/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicas.h b/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicas.h index ca159edb91c..d9d628b8be2 100644 --- a/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicas.h +++ b/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicas.h @@ -11,6 +11,7 @@ public: MergeTreeReadPoolParallelReplicas( ParallelReadingExtension extension_, RangesInDataParts && parts_, + MutationsSnapshotPtr mutations_snapshot_, VirtualFields shared_virtual_fields_, const StorageSnapshotPtr & storage_snapshot_, const PrewhereInfoPtr & prewhere_info_, diff --git a/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicasInOrder.cpp b/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicasInOrder.cpp index 01c0a9f91be..b1f9ffc8ea4 100644 --- a/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicasInOrder.cpp +++ b/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicasInOrder.cpp @@ -12,6 +12,7 @@ MergeTreeReadPoolParallelReplicasInOrder::MergeTreeReadPoolParallelReplicasInOrd ParallelReadingExtension extension_, CoordinationMode mode_, RangesInDataParts parts_, + MutationsSnapshotPtr mutations_snapshot_, VirtualFields shared_virtual_fields_, const StorageSnapshotPtr & storage_snapshot_, const PrewhereInfoPtr & prewhere_info_, @@ -22,6 +23,7 @@ MergeTreeReadPoolParallelReplicasInOrder::MergeTreeReadPoolParallelReplicasInOrd const ContextPtr & context_) : MergeTreeReadPoolBase( std::move(parts_), + std::move(mutations_snapshot_), std::move(shared_virtual_fields_), storage_snapshot_, prewhere_info_, diff --git a/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicasInOrder.h b/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicasInOrder.h index 4fe3f7a699c..7c549ed3c4a 100644 --- a/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicasInOrder.h +++ b/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicasInOrder.h @@ -12,6 +12,7 @@ public: ParallelReadingExtension extension_, CoordinationMode mode_, RangesInDataParts parts_, + MutationsSnapshotPtr mutations_snapshot_, VirtualFields shared_virtual_fields_, const StorageSnapshotPtr & storage_snapshot_, const PrewhereInfoPtr & prewhere_info_, diff --git a/src/Storages/MergeTree/MergeTreeSequentialSource.cpp b/src/Storages/MergeTree/MergeTreeSequentialSource.cpp index 02f8d6f4f6a..bd4aa066dfd 100644 --- a/src/Storages/MergeTree/MergeTreeSequentialSource.cpp +++ b/src/Storages/MergeTree/MergeTreeSequentialSource.cpp @@ -13,6 +13,7 @@ #include #include #include +#include "Storages/MergeTree/AlterConversions.h" #include namespace DB @@ -38,6 +39,7 @@ public: const MergeTreeData & storage_, const StorageSnapshotPtr & storage_snapshot_, MergeTreeData::DataPartPtr data_part_, + AlterConversionsPtr alter_conversions_, Names columns_to_read_, std::optional mark_ranges_, bool apply_deleted_mask, @@ -62,6 +64,9 @@ private: /// Data part will not be removed if the pointer owns it MergeTreeData::DataPartPtr data_part; + /// TODO: comment. + AlterConversionsPtr alter_conversions; + /// Columns we have to read (each Block from read will contain them) Names columns_to_read; @@ -91,6 +96,7 @@ MergeTreeSequentialSource::MergeTreeSequentialSource( const MergeTreeData & storage_, const StorageSnapshotPtr & storage_snapshot_, MergeTreeData::DataPartPtr data_part_, + AlterConversionsPtr alter_conversions_, Names columns_to_read_, std::optional mark_ranges_, bool apply_deleted_mask, @@ -100,6 +106,7 @@ MergeTreeSequentialSource::MergeTreeSequentialSource( , storage(storage_) , storage_snapshot(storage_snapshot_) , data_part(std::move(data_part_)) + , alter_conversions(std::move(alter_conversions_)) , columns_to_read(std::move(columns_to_read_)) , read_with_direct_io(read_with_direct_io_) , mark_ranges(std::move(mark_ranges_)) @@ -113,8 +120,6 @@ MergeTreeSequentialSource::MergeTreeSequentialSource( LOG_DEBUG(log, "Reading {} marks from part {}, total {} rows starting from the beginning of the part", data_part->getMarksCount(), data_part->name, data_part->rows_count); - auto alter_conversions = storage.getAlterConversionsForPart(data_part); - /// Note, that we don't check setting collaborate_with_coordinator presence, because this source /// is only used in background merges. addTotalRowsApprox(data_part->rows_count); @@ -300,6 +305,7 @@ Pipe createMergeTreeSequentialSource( const MergeTreeData & storage, const StorageSnapshotPtr & storage_snapshot, MergeTreeData::DataPartPtr data_part, + AlterConversionsPtr alter_conversions, Names columns_to_read, std::optional mark_ranges, std::shared_ptr> filtered_rows_count, @@ -316,7 +322,8 @@ Pipe createMergeTreeSequentialSource( columns_to_read.emplace_back(RowExistsColumn::name); auto column_part_source = std::make_shared(type, - storage, storage_snapshot, data_part, columns_to_read, std::move(mark_ranges), + storage, storage_snapshot, data_part, alter_conversions, + columns_to_read, std::move(mark_ranges), /*apply_deleted_mask=*/ false, read_with_direct_io, prefetch); Pipe pipe(std::move(column_part_source)); @@ -347,6 +354,7 @@ public: const MergeTreeData & storage_, const StorageSnapshotPtr & storage_snapshot_, MergeTreeData::DataPartPtr data_part_, + AlterConversionsPtr alter_conversions_, Names columns_to_read_, bool apply_deleted_mask_, ActionsDAGPtr filter_, @@ -357,6 +365,7 @@ public: , storage(storage_) , storage_snapshot(storage_snapshot_) , data_part(std::move(data_part_)) + , alter_conversions(std::move(alter_conversions_)) , columns_to_read(std::move(columns_to_read_)) , apply_deleted_mask(apply_deleted_mask_) , filter(std::move(filter_)) @@ -400,6 +409,7 @@ public: storage, storage_snapshot, data_part, + alter_conversions, columns_to_read, std::move(mark_ranges), /*filtered_rows_count=*/ nullptr, @@ -415,6 +425,7 @@ private: const MergeTreeData & storage; StorageSnapshotPtr storage_snapshot; MergeTreeData::DataPartPtr data_part; + AlterConversionsPtr alter_conversions; Names columns_to_read; bool apply_deleted_mask; ActionsDAGPtr filter; @@ -428,6 +439,7 @@ void createReadFromPartStep( const MergeTreeData & storage, const StorageSnapshotPtr & storage_snapshot, MergeTreeData::DataPartPtr data_part, + AlterConversionsPtr alter_conversions, Names columns_to_read, bool apply_deleted_mask, ActionsDAGPtr filter, @@ -435,7 +447,8 @@ void createReadFromPartStep( LoggerPtr log) { auto reading = std::make_unique(type, - storage, storage_snapshot, std::move(data_part), + storage, storage_snapshot, + std::move(data_part), std::move(alter_conversions), std::move(columns_to_read), apply_deleted_mask, filter, std::move(context), log); diff --git a/src/Storages/MergeTree/MergeTreeSequentialSource.h b/src/Storages/MergeTree/MergeTreeSequentialSource.h index e6f055f776c..1ecc721d4a8 100644 --- a/src/Storages/MergeTree/MergeTreeSequentialSource.h +++ b/src/Storages/MergeTree/MergeTreeSequentialSource.h @@ -21,6 +21,7 @@ Pipe createMergeTreeSequentialSource( const MergeTreeData & storage, const StorageSnapshotPtr & storage_snapshot, MergeTreeData::DataPartPtr data_part, + AlterConversionsPtr alter_conversions, Names columns_to_read, std::optional mark_ranges, std::shared_ptr> filtered_rows_count, @@ -36,6 +37,7 @@ void createReadFromPartStep( const MergeTreeData & storage, const StorageSnapshotPtr & storage_snapshot, MergeTreeData::DataPartPtr data_part, + AlterConversionsPtr alter_conversions, Names columns_to_read, bool apply_deleted_mask, ActionsDAGPtr filter, diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp index a552ee89aee..3c4ef44dbd8 100644 --- a/src/Storages/MergeTree/MutateTask.cpp +++ b/src/Storages/MergeTree/MutateTask.cpp @@ -29,6 +29,7 @@ #include #include #include +#include "Storages/MergeTree/AlterConversions.h" #include @@ -104,6 +105,7 @@ static UInt64 getExistingRowsCount(const Block & block) static void splitAndModifyMutationCommands( MergeTreeData::DataPartPtr part, StorageMetadataPtr metadata_snapshot, + AlterConversionsPtr alter_conversions, const MutationCommands & commands, MutationCommands & for_interpreter, MutationCommands & for_file_renames, @@ -169,8 +171,6 @@ static void splitAndModifyMutationCommands( } - auto alter_conversions = part->storage.getAlterConversionsForPart(part); - /// We don't add renames from commands, instead we take them from rename_map. /// It's important because required renames depend not only on part's data version (i.e. mutation version) /// but also on part's metadata version. Why we have such logic only for renames? Because all other types of alter @@ -286,7 +286,6 @@ static void splitAndModifyMutationCommands( } } - auto alter_conversions = part->storage.getAlterConversionsForPart(part); /// We don't add renames from commands, instead we take them from rename_map. /// It's important because required renames depend not only on part's data version (i.e. mutation version) /// but also on part's metadata version. Why we have such logic only for renames? Because all other types of alter @@ -2119,6 +2118,14 @@ bool MutateTask::prepare() ctx->num_mutations = std::make_unique(CurrentMetrics::PartMutation); + auto mutations_snapshot = ctx->data->getMutationsSnapshot( + ctx->metadata_snapshot->getMetadataVersion(), + /*need_data_mutations=*/ false); + + auto alter_conversions = MergeTreeData::getAlterConversionsForPart( + ctx->source_part, + mutations_snapshot); + auto context_for_reading = Context::createCopy(ctx->context); /// Allow mutations to work when force_index_by_date or force_primary_key is on. @@ -2133,7 +2140,7 @@ bool MutateTask::prepare() ctx->commands_for_part.emplace_back(command); if (ctx->source_part->isStoredOnDisk() && !isStorageTouchedByMutations( - *ctx->data, ctx->source_part, ctx->metadata_snapshot, ctx->commands_for_part, context_for_reading)) + *ctx->data, ctx->source_part, mutations_snapshot, ctx->metadata_snapshot, ctx->commands_for_part, context_for_reading)) { NameSet files_to_copy_instead_of_hardlinks; auto settings_ptr = ctx->data->getSettings(); @@ -2192,8 +2199,13 @@ bool MutateTask::prepare() context_for_reading->setSetting("read_from_filesystem_cache_if_exists_otherwise_bypass_cache", 1); MutationHelpers::splitAndModifyMutationCommands( - ctx->source_part, ctx->metadata_snapshot, - ctx->commands_for_part, ctx->for_interpreter, ctx->for_file_renames, ctx->log); + ctx->source_part, + ctx->metadata_snapshot, + alter_conversions, + ctx->commands_for_part, + ctx->for_interpreter, + ctx->for_file_renames, + ctx->log); ctx->stage_progress = std::make_unique(1.0); @@ -2205,7 +2217,8 @@ bool MutateTask::prepare() settings.apply_deleted_mask = false; ctx->interpreter = std::make_unique( - *ctx->data, ctx->source_part, ctx->metadata_snapshot, ctx->for_interpreter, + *ctx->data, ctx->source_part, alter_conversions, + ctx->metadata_snapshot, ctx->for_interpreter, ctx->metadata_snapshot->getColumns().getNamesOfPhysical(), context_for_reading, settings); ctx->materialized_indices = ctx->interpreter->grabMaterializedIndices(); diff --git a/src/Storages/MergeTree/RangesInDataPart.h b/src/Storages/MergeTree/RangesInDataPart.h index bf9e4c7dfb2..966637d0812 100644 --- a/src/Storages/MergeTree/RangesInDataPart.h +++ b/src/Storages/MergeTree/RangesInDataPart.h @@ -42,7 +42,6 @@ struct RangesInDataPartsDescription: public std::deque #include #include +#include +#include #include #include @@ -949,7 +951,7 @@ int32_t ReplicatedMergeTreeQueue::updateMutations(zkutil::ZooKeeperPtr zookeeper { const auto commands = entry.commands; it = mutations_by_znode.erase(it); - updateAlterConversionsMutations(commands, alter_conversions_mutations, /* remove= */ true); + decrementMutationsCounters(data_mutations_to_apply, metadata_mutations_to_apply, commands, lock); } else it = mutations_by_znode.erase(it); @@ -1001,7 +1003,7 @@ int32_t ReplicatedMergeTreeQueue::updateMutations(zkutil::ZooKeeperPtr zookeeper auto & mutation = mutations_by_znode.emplace(entry->znode_name, MutationStatus(entry, format_version)) .first->second; - updateAlterConversionsMutations(entry->commands, alter_conversions_mutations, /* remove= */ false); + incrementMutationsCounters(data_mutations_to_apply, metadata_mutations_to_apply, entry->commands, lock); NOEXCEPT_SCOPE({ for (const auto & pair : entry->block_numbers) { @@ -1075,7 +1077,7 @@ ReplicatedMergeTreeMutationEntryPtr ReplicatedMergeTreeQueue::removeMutation( } mutations_by_znode.erase(it); - /// updateAlterConversionsMutations() will be called in updateMutations() + /// decrementMutationsCounters() will be called in updateMutations() LOG_DEBUG(log, "Removed mutation {} from local state.", entry->znode_name); } @@ -1899,25 +1901,15 @@ ReplicatedMergeTreeMergePredicate ReplicatedMergeTreeQueue::getMergePredicate(zk return ReplicatedMergeTreeMergePredicate(*this, zookeeper, std::move(partition_ids_hint)); } - -MutationCommands ReplicatedMergeTreeQueue::getAlterMutationCommandsForPart(const MergeTreeData::DataPartPtr & part) const +MutationCommands ReplicatedMergeTreeQueue::MutationsSnapshot::getAlterMutationCommandsForPart(const MergeTreeData::DataPartPtr & part) const { - int32_t part_metadata_version = part->getMetadataVersion(); - int32_t metadata_version = storage.getInMemoryMetadataPtr()->getMetadataVersion(); - - chassert(alter_conversions_mutations >= 0); - /// NOTE: that just checking part_metadata_version is not enough, since we - /// need to check for non-metadata mutations as well. - if (alter_conversions_mutations == 0 && metadata_version == part_metadata_version) - return {}; - - std::unique_lock lock(state_mutex); - auto in_partition = mutations_by_partition.find(part->info.partition_id); if (in_partition == mutations_by_partition.end()) return {}; Int64 part_data_version = part->info.getDataVersion(); + int32_t part_metadata_version = part->getMetadataVersion(); + MutationCommands result; bool seen_all_data_mutations = false; @@ -1926,20 +1918,22 @@ MutationCommands ReplicatedMergeTreeQueue::getAlterMutationCommandsForPart(const auto add_to_result = [&](const ReplicatedMergeTreeMutationEntryPtr & entry) { for (const auto & command : entry->commands | std::views::reverse) - if (AlterConversions::supportsMutationCommandType(command.type)) - result.emplace_back(command); + { + if (need_data_mutations && AlterConversions::isSupportedDataMutation(command.type)) + result.push_back(command); + else if (AlterConversions::isSupportedMetadataMutation(command.type)) + result.push_back(command); + } }; /// Here we return mutation commands for part which has bigger alter version than part metadata version. /// Please note, we don't use getDataVersion(). It's because these alter commands are used for in-fly conversions /// of part's metadata. - for (const auto & [mutation_version, mutation_status] : in_partition->second | std::views::reverse) + for (const auto & [mutation_version, entry] : in_partition->second | std::views::reverse) { if (seen_all_data_mutations && seen_all_metadata_mutations) break; - auto & entry = mutation_status->entry; - auto alter_version = entry->alter_version; if (alter_version != -1) { @@ -1964,6 +1958,48 @@ MutationCommands ReplicatedMergeTreeQueue::getAlterMutationCommandsForPart(const return result; } +MergeTreeData::MutationsSnapshotPtr ReplicatedMergeTreeQueue::getMutationsSnapshot(Int64 metadata_version, bool need_data_mutations) const +{ + auto res = std::make_shared(); + res->metadata_version = metadata_version; + res->need_data_mutations = need_data_mutations; + + std::lock_guard lock(state_mutex); + + bool have_data_mutations = res->need_data_mutations && data_mutations_to_apply > 0; + bool have_metadata_mutations = metadata_mutations_to_apply > 0; + + if (!have_data_mutations && !have_metadata_mutations) + return res; + + for (const auto & [partition_id, mutations] : mutations_by_partition) + { + auto & in_partition = res->mutations_by_partition[partition_id]; + + for (const auto & [version, status] : mutations | std::views::reverse) + { + if (status->is_done) + break; + + bool has_required_command = std::ranges::any_of(status->entry->commands, [&](const auto & command) + { + if (have_data_mutations && AlterConversions::isSupportedDataMutation(command.type)) + return true; + + if (have_metadata_mutations && AlterConversions::isSupportedMetadataMutation(command.type)) + return true; + + return false; + }); + + if (has_required_command) + in_partition.emplace(version, status->entry); + } + } + + return res; +} + MutationCommands ReplicatedMergeTreeQueue::getMutationCommands( const MergeTreeData::DataPartPtr & part, Int64 desired_mutation_version, Strings & mutation_ids) const { @@ -2044,7 +2080,7 @@ bool ReplicatedMergeTreeQueue::tryFinalizeMutations(zkutil::ZooKeeperPtr zookeep mutation.parts_to_do.clear(); } - updateAlterConversionsMutations(mutation.entry->commands, alter_conversions_mutations, /* remove= */ true); + decrementMutationsCounters(data_mutations_to_apply, metadata_mutations_to_apply, mutation.entry->commands, lock); } else if (mutation.parts_to_do.size() == 0) { @@ -2101,7 +2137,7 @@ bool ReplicatedMergeTreeQueue::tryFinalizeMutations(zkutil::ZooKeeperPtr zookeep LOG_TRACE(log, "Finishing data alter with version {} for entry {}", entry->alter_version, entry->znode_name); alter_sequence.finishDataAlter(entry->alter_version, lock); } - updateAlterConversionsMutations(entry->commands, alter_conversions_mutations, /* remove= */ true); + decrementMutationsCounters(data_mutations_to_apply, metadata_mutations_to_apply, entry->commands, lock); } } } diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h index 89ef6240558..f9d5487ee3f 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h +++ b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h @@ -1,5 +1,6 @@ #pragma once +#include #include #include @@ -151,8 +152,11 @@ private: /// Mapping from znode path to Mutations Status std::map mutations_by_znode; - /// Unfinished mutations that is required AlterConversions (see getAlterMutationCommandsForPart()) - std::atomic alter_conversions_mutations = 0; + + /// Unfinished mutations that are required for AlterConversions. + Int64 data_mutations_to_apply = 0; + Int64 metadata_mutations_to_apply = 0; + /// Partition -> (block_number -> MutationStatus) std::unordered_map> mutations_by_partition; /// Znode ID of the latest mutation that is done. @@ -409,10 +413,24 @@ public: MutationCommands getMutationCommands(const MergeTreeData::DataPartPtr & part, Int64 desired_mutation_version, Strings & mutation_ids) const; + struct MutationsSnapshot : public MergeTreeData::IMutationsSnapshot + { + MutationsSnapshot() = default; + + Int64 metadata_version = -1; + bool need_data_mutations = false; + + using MutationsByPartititon = std::unordered_map>; + MutationsByPartititon mutations_by_partition; + + MutationCommands getAlterMutationCommandsForPart(const MergeTreeData::DataPartPtr & part) const override; + std::shared_ptr cloneEmpty() const override { return std::make_shared(); } + }; + /// Return mutation commands for part which could be not applied to /// it according to part mutation version. Used when we apply alter commands on fly, /// without actual data modification on disk. - MutationCommands getAlterMutationCommandsForPart(const MergeTreeData::DataPartPtr & part) const; + MergeTreeData::MutationsSnapshotPtr getMutationsSnapshot(Int64 metadata_version, bool need_data_mutations) const; /// Mark finished mutations as done. If the function needs to be called again at some later time /// (because some mutations are probably done but we are not sure yet), returns true. diff --git a/src/Storages/MergeTree/StorageFromMergeTreeDataPart.h b/src/Storages/MergeTree/StorageFromMergeTreeDataPart.h index a94508ad41f..f871157c2c9 100644 --- a/src/Storages/MergeTree/StorageFromMergeTreeDataPart.h +++ b/src/Storages/MergeTree/StorageFromMergeTreeDataPart.h @@ -26,10 +26,12 @@ class StorageFromMergeTreeDataPart final : public IStorage { public: /// Used in part mutation. - explicit StorageFromMergeTreeDataPart(const MergeTreeData::DataPartPtr & part_) + explicit StorageFromMergeTreeDataPart( + const MergeTreeData::DataPartPtr & part_, + const MergeTreeData::MutationsSnapshotPtr & mutations_snapshot_) : IStorage(getIDFromPart(part_)) , parts({part_}) - , alter_conversions({part_->storage.getAlterConversionsForPart(part_)}) + , mutations_snapshot(mutations_snapshot_) , storage(part_->storage) , partition_id(part_->info.partition_id) { @@ -71,10 +73,11 @@ public: size_t max_block_size, size_t num_streams) override { + /// TODO: fix query_plan.addStep(MergeTreeDataSelectExecutor(storage) .readFromParts( parts, - alter_conversions, + mutations_snapshot, column_names, storage_snapshot, query_info, @@ -121,7 +124,7 @@ public: private: const MergeTreeData::DataPartsVector parts; - const std::vector alter_conversions; + const MergeTreeData::MutationsSnapshotPtr mutations_snapshot; const MergeTreeData & storage; const String partition_id; const ReadFromMergeTree::AnalysisResultPtr analysis_result_ptr; diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp index 9255ee00340..636d2ba5d53 100644 --- a/src/Storages/StorageMergeTree.cpp +++ b/src/Storages/StorageMergeTree.cpp @@ -498,18 +498,11 @@ Int64 StorageMergeTree::startMutation(const MutationCommands & commands, Context if (txn) txn->addMutation(shared_from_this(), mutation_id); - bool alter_conversions_mutations_updated = updateAlterConversionsMutations(entry.commands, alter_conversions_mutations, /* remove= */ false); - bool inserted = current_mutations_by_version.try_emplace(version, std::move(entry)).second; + auto [it, inserted] = current_mutations_by_version.try_emplace(version, std::move(entry)); if (!inserted) - { - if (alter_conversions_mutations_updated) - { - --alter_conversions_mutations; - chassert(alter_conversions_mutations >= 0); - } throw Exception(ErrorCodes::LOGICAL_ERROR, "Mutation {} already exists, it's a bug", version); - } + incrementMutationsCounters(data_mutations_to_apply, metadata_mutations_to_apply, *it->second.commands, lock); LOG_INFO(log, "Added mutation: {}{}", mutation_id, additional_info); } background_operations_assignee.trigger(); @@ -545,7 +538,7 @@ void StorageMergeTree::updateMutationEntriesErrors(FutureMergedMutatedPartPtr re if (static_cast(result_part->part_info.mutation) == it->first) mutation_backoff_policy.removePartFromFailed(failed_part->name); - updateAlterConversionsMutations(it->second.commands, alter_conversions_mutations, /* remove= */ true); + decrementMutationsCounters(data_mutations_to_apply, metadata_mutations_to_apply, *entry.commands, lock); } } else @@ -744,17 +737,15 @@ std::map StorageMergeTree::getUnfinishedMutationC std::map result; - for (const auto & kv : current_mutations_by_version) + for (const auto & [mutation_version, entry] : current_mutations_by_version) { - Int64 mutation_version = kv.first; - const MergeTreeMutationEntry & entry = kv.second; - const PartVersionWithName needle{mutation_version, ""}; + const PartVersionWithName needle{static_cast(mutation_version), ""}; auto versions_it = std::lower_bound( part_versions_with_names.begin(), part_versions_with_names.end(), needle, comparator); size_t parts_to_do = versions_it - part_versions_with_names.begin(); if (parts_to_do > 0) - result.emplace(entry.file_name, entry.commands); + result.emplace(entry.file_name, *entry.commands); } return result; } @@ -787,7 +778,7 @@ std::vector StorageMergeTree::getMutationsStatus() cons std::map block_numbers_map({{"", entry.block_number}}); - for (const MutationCommand & command : entry.commands) + for (const MutationCommand & command : *entry.commands) { WriteBufferFromOwnString buf; formatAST(*command.ast, buf, false, true); @@ -824,20 +815,15 @@ CancellationCode StorageMergeTree::killMutation(const String & mutation_id) auto it = current_mutations_by_version.find(mutation_version); if (it != current_mutations_by_version.end()) { - bool mutation_finished = true; if (std::optional min_version = getMinPartDataVersion()) - mutation_finished = *min_version > static_cast(mutation_version); + { + bool mutation_finished = *min_version > static_cast(mutation_version); + if (!mutation_finished) + decrementMutationsCounters(data_mutations_to_apply, metadata_mutations_to_apply, *it->second.commands, lock); + } to_kill.emplace(std::move(it->second)); - - if (!mutation_finished) - { - const auto commands = it->second.commands; - current_mutations_by_version.erase(it); - updateAlterConversionsMutations(commands, alter_conversions_mutations, /* remove= */ true); - } - else - current_mutations_by_version.erase(it); + current_mutations_by_version.erase(it); } } @@ -885,6 +871,8 @@ void StorageMergeTree::loadDeduplicationLog() void StorageMergeTree::loadMutations() { + std::lock_guard lock(currently_processing_in_background_mutex); + for (const auto & disk : getDisks()) { for (auto it = disk->iterateDirectory(relative_data_path); it->isValid(); it->next()) @@ -893,7 +881,7 @@ void StorageMergeTree::loadMutations() { MergeTreeMutationEntry entry(disk, relative_data_path, it->name()); UInt64 block_number = entry.block_number; - LOG_DEBUG(log, "Loading mutation: {} entry, commands size: {}", it->name(), entry.commands.size()); + LOG_DEBUG(log, "Loading mutation: {} entry, commands size: {}", it->name(), entry.commands->size()); if (!entry.tid.isPrehistoric() && !entry.csn) { @@ -912,10 +900,11 @@ void StorageMergeTree::loadMutations() } } - auto inserted = current_mutations_by_version.try_emplace(block_number, std::move(entry)).second; + auto [entry_it, inserted] = current_mutations_by_version.try_emplace(block_number, std::move(entry)); if (!inserted) throw Exception(ErrorCodes::LOGICAL_ERROR, "Mutation {} already exists, it's a bug", block_number); - updateAlterConversionsMutations(entry.commands, alter_conversions_mutations, /* remove= */ false); + + incrementMutationsCounters(data_mutations_to_apply, metadata_mutations_to_apply, *entry_it->second.commands, lock); } else if (startsWith(it->name(), "tmp_mutation_")) { @@ -1264,7 +1253,7 @@ MergeMutateSelectedEntryPtr StorageMergeTree::selectPartsToMutate( size_t commands_size = 0; MutationCommands commands_for_size_validation; - for (const auto & command : it->second.commands) + for (const auto & command : *it->second.commands) { if (command.type != MutationCommand::Type::DROP_COLUMN && command.type != MutationCommand::Type::DROP_INDEX @@ -1308,11 +1297,11 @@ MergeMutateSelectedEntryPtr StorageMergeTree::selectPartsToMutate( const auto & single_mutation_commands = it->second.commands; - if (single_mutation_commands.containBarrierCommand()) + if (single_mutation_commands->containBarrierCommand()) { if (commands->empty()) { - commands->insert(commands->end(), single_mutation_commands.begin(), single_mutation_commands.end()); + commands->insert(commands->end(), single_mutation_commands->begin(), single_mutation_commands->end()); last_mutation_to_apply = it; } break; @@ -1320,7 +1309,7 @@ MergeMutateSelectedEntryPtr StorageMergeTree::selectPartsToMutate( else { current_ast_elements += commands_size; - commands->insert(commands->end(), single_mutation_commands.begin(), single_mutation_commands.end()); + commands->insert(commands->end(), single_mutation_commands->begin(), single_mutation_commands->end()); last_mutation_to_apply = it; } @@ -2431,34 +2420,62 @@ void StorageMergeTree::attachRestoredParts(MutableDataPartsVector && parts) } } - -MutationCommands StorageMergeTree::getAlterMutationCommandsForPart(const DataPartPtr & part) const +MutationCommands StorageMergeTree::MutationsSnapshot::getAlterMutationCommandsForPart(const DataPartPtr & part) const { - /// NOTE: there is no need to check part metadata_version, since - /// ALTER_METADATA cannot be done asynchronously, like in - /// ReplicatedMergeTree. - chassert(alter_conversions_mutations >= 0); - if (alter_conversions_mutations == 0) - return {}; - - std::lock_guard lock(currently_processing_in_background_mutex); - - UInt64 part_data_version = part->info.getDataVersion(); MutationCommands result; + UInt64 part_data_version = part->info.getDataVersion(); - for (const auto & [mutation_version, entry] : current_mutations_by_version | std::views::reverse) + for (const auto & [mutation_version, commands] : mutations_by_version | std::views::reverse) { if (mutation_version <= part_data_version) break; - for (const auto & command : entry.commands | std::views::reverse) - if (AlterConversions::supportsMutationCommandType(command.type)) - result.emplace_back(command); + for (const auto & command : *commands | std::views::reverse) + { + if (need_data_mutations && AlterConversions::isSupportedDataMutation(command.type)) + result.push_back(command); + else if (AlterConversions::isSupportedMetadataMutation(command.type)) + result.push_back(command); + } } return result; } +MergeTreeData::MutationsSnapshotPtr StorageMergeTree::getMutationsSnapshot(Int64 metadata_version, bool need_data_mutations) const +{ + auto res = std::make_shared(); + res->metadata_version = metadata_version; + res->need_data_mutations = need_data_mutations; + + std::lock_guard lock(currently_processing_in_background_mutex); + + bool have_data_mutations = res->need_data_mutations && data_mutations_to_apply > 0; + bool have_metadata_mutations = metadata_mutations_to_apply > 0; + + if (!have_data_mutations && !have_metadata_mutations) + return res; + + for (const auto & [version, entry] : current_mutations_by_version) + { + bool has_required_command = std::ranges::any_of(*entry.commands, [&](const auto & command) + { + if (have_data_mutations && AlterConversions::isSupportedDataMutation(command.type)) + return true; + + if (have_metadata_mutations && AlterConversions::isSupportedMetadataMutation(command.type)) + return true; + + return false; + }); + + if (has_required_command) + res->mutations_by_version.emplace(version, entry.commands); + } + + return res; +} + void StorageMergeTree::startBackgroundMovesIfNeeded() { if (areBackgroundMovesNeeded()) diff --git a/src/Storages/StorageMergeTree.h b/src/Storages/StorageMergeTree.h index 4d819508934..1f2af8b9571 100644 --- a/src/Storages/StorageMergeTree.h +++ b/src/Storages/StorageMergeTree.h @@ -17,6 +17,7 @@ #include #include +#include "Storages/MutationCommands.h" namespace DB @@ -147,8 +148,10 @@ private: DataParts currently_merging_mutating_parts; std::map current_mutations_by_version; - /// Unfinished mutations that is required AlterConversions (see getAlterMutationCommandsForPart()) - std::atomic alter_conversions_mutations = 0; + + /// Unfinished mutations that are required for AlterConversions. + Int64 data_mutations_to_apply = 0; + Int64 metadata_mutations_to_apply = 0; std::atomic shutdown_called {false}; std::atomic flush_called {false}; @@ -309,8 +312,21 @@ private: ContextPtr context; }; -protected: - MutationCommands getAlterMutationCommandsForPart(const DataPartPtr & part) const override; + struct MutationsSnapshot : public IMutationsSnapshot + { + MutationsSnapshot() = default; + + Int64 metadata_version = -1; + bool need_data_mutations = false; + + using MutationsByVersion = std::map>; + MutationsByVersion mutations_by_version; + + MutationCommands getAlterMutationCommandsForPart(const MergeTreeData::DataPartPtr & part) const override; + std::shared_ptr cloneEmpty() const override { return std::make_shared(); } + }; + + MutationsSnapshotPtr getMutationsSnapshot(Int64 metadata_version, bool need_data_mutations) const override; }; } diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index a127384c03c..e2bba1e8068 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -9149,13 +9149,11 @@ bool StorageReplicatedMergeTree::canUseAdaptiveGranularity() const (!has_non_adaptive_index_granularity_parts && !other_replicas_fixed_granularity)); } - -MutationCommands StorageReplicatedMergeTree::getAlterMutationCommandsForPart(const DataPartPtr & part) const +MergeTreeData::MutationsSnapshotPtr StorageReplicatedMergeTree::getMutationsSnapshot(Int64 metadata_version, bool need_data_mutations) const { - return queue.getAlterMutationCommandsForPart(part); + return queue.getMutationsSnapshot(metadata_version, need_data_mutations); } - void StorageReplicatedMergeTree::startBackgroundMovesIfNeeded() { if (areBackgroundMovesNeeded()) diff --git a/src/Storages/StorageReplicatedMergeTree.h b/src/Storages/StorageReplicatedMergeTree.h index f96206ce657..3ef367d09ce 100644 --- a/src/Storages/StorageReplicatedMergeTree.h +++ b/src/Storages/StorageReplicatedMergeTree.h @@ -932,7 +932,7 @@ private: void waitMutationToFinishOnReplicas( const Strings & replicas, const String & mutation_id) const; - MutationCommands getAlterMutationCommandsForPart(const DataPartPtr & part) const override; + MutationsSnapshotPtr getMutationsSnapshot(Int64 metadata_version, bool need_data_mutations) const override; void startBackgroundMovesIfNeeded() override; From 306d55f636722ce6098ffca743e8f8e7a8d579f5 Mon Sep 17 00:00:00 2001 From: Arthur Passos Date: Sat, 29 Jun 2024 18:29:05 -0300 Subject: [PATCH 144/844] add some docs --- .../en/sql-reference/statements/alter/user.md | 19 ++++++++++++++++++- .../sql-reference/statements/create/user.md | 8 +++++++- 2 files changed, 25 insertions(+), 2 deletions(-) diff --git a/docs/en/sql-reference/statements/alter/user.md b/docs/en/sql-reference/statements/alter/user.md index b5c156f56a9..6fff131bba3 100644 --- a/docs/en/sql-reference/statements/alter/user.md +++ b/docs/en/sql-reference/statements/alter/user.md @@ -12,7 +12,7 @@ Syntax: ``` sql ALTER USER [IF EXISTS] name1 [ON CLUSTER cluster_name1] [RENAME TO new_name1] [, name2 [ON CLUSTER cluster_name2] [RENAME TO new_name2] ...] - [NOT IDENTIFIED | IDENTIFIED {[WITH {no_password | plaintext_password | sha256_password | sha256_hash | double_sha1_password | double_sha1_hash}] BY {'password' | 'hash'}} | {WITH ldap SERVER 'server_name'} | {WITH kerberos [REALM 'realm']} | {WITH ssl_certificate CN 'common_name'}] + [NOT IDENTIFIED | IDENTIFIED | ADD IDENTIFIED {[WITH {no_password | plaintext_password | sha256_password | sha256_hash | double_sha1_password | double_sha1_hash}] BY {'password' | 'hash'}} | {WITH ldap SERVER 'server_name'} | {WITH kerberos [REALM 'realm']} | {WITH ssl_certificate CN 'common_name'}] [[ADD | DROP] HOST {LOCAL | NAME 'name' | REGEXP 'name_regexp' | IP 'address' | LIKE 'pattern'} [,...] | ANY | NONE] [VALID UNTIL datetime] [DEFAULT ROLE role [,...] | ALL | ALL EXCEPT role [,...] ] @@ -62,3 +62,20 @@ Allows the user with `john` account to grant his privileges to the user with `ja ``` sql ALTER USER john GRANTEES jack; ``` + +Adds new authentication methods to the user while keeping the existing ones + +``` sql +ALTER USER user ADD IDENTIFIED WITH plaintext_password by '1' ADD IDENTIFIED WITH bcrypt_password by '2' ADD IDENTIFIED WITH plaintext_password by '3' +``` + +Reset authentication methods and adds the ones specified in the query (effect of leading IDENTIFIED without the ADD keyword) + +``` sql +ALTER USER user IDENTIFIED WITH plaintext_password by '1' ADD IDENTIFIED WITH bcrypt_password by '2' ADD IDENTIFIED WITH plaintext_password by '3' +``` + +Reset authentication methods and keep the most recent added one +``` sql +ALTER USER user RESET AUTHENTICATION METHODS TO NEW +``` diff --git a/docs/en/sql-reference/statements/create/user.md b/docs/en/sql-reference/statements/create/user.md index aee98cfcd10..8e58f6d87ac 100644 --- a/docs/en/sql-reference/statements/create/user.md +++ b/docs/en/sql-reference/statements/create/user.md @@ -12,7 +12,7 @@ Syntax: ``` sql CREATE USER [IF NOT EXISTS | OR REPLACE] name1 [ON CLUSTER cluster_name1] [, name2 [ON CLUSTER cluster_name2] ...] - [NOT IDENTIFIED | IDENTIFIED {[WITH {no_password | plaintext_password | sha256_password | sha256_hash | double_sha1_password | double_sha1_hash}] BY {'password' | 'hash'}} | {WITH ldap SERVER 'server_name'} | {WITH kerberos [REALM 'realm']} | {WITH ssl_certificate CN 'common_name'} | {WITH ssh_key BY KEY 'public_key' TYPE 'ssh-rsa|...'} | {WITH http SERVER 'server_name' [SCHEME 'Basic']}] + [NOT IDENTIFIED | IDENTIFIED | ADD IDENTIFIED {[WITH {no_password | plaintext_password | sha256_password | sha256_hash | double_sha1_password | double_sha1_hash}] BY {'password' | 'hash'}} | {WITH ldap SERVER 'server_name'} | {WITH kerberos [REALM 'realm']} | {WITH ssl_certificate CN 'common_name'} | {WITH ssh_key BY KEY 'public_key' TYPE 'ssh-rsa|...'} | {WITH http SERVER 'server_name' [SCHEME 'Basic']}] [HOST {LOCAL | NAME 'name' | REGEXP 'name_regexp' | IP 'address' | LIKE 'pattern'} [,...] | ANY | NONE] [VALID UNTIL datetime] [IN access_storage_type] @@ -144,6 +144,12 @@ In ClickHouse Cloud, by default, passwords must meet the following complexity re The available password types are: `plaintext_password`, `sha256_password`, `double_sha1_password`. +7. Multiple authentication methods can be specified: + + ```sql + CREATE USER user1 IDENTIFIED WITH plaintext_password by '1' ADD IDENTIFIED WITH bcrypt_password by '2' ADD IDENTIFIED WITH plaintext_password by '3'' + ``` + ## User Host User host is a host from which a connection to ClickHouse server could be established. The host can be specified in the `HOST` query section in the following ways: From 6f020901a899d30be6f12d1c10fe331393a9b4f3 Mon Sep 17 00:00:00 2001 From: Arthur Passos Date: Sat, 29 Jun 2024 18:39:01 -0300 Subject: [PATCH 145/844] add a few comments --- src/Interpreters/Access/InterpreterCreateUserQuery.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/Interpreters/Access/InterpreterCreateUserQuery.cpp b/src/Interpreters/Access/InterpreterCreateUserQuery.cpp index 5d0d2c434bd..b5d5c13d9e4 100644 --- a/src/Interpreters/Access/InterpreterCreateUserQuery.cpp +++ b/src/Interpreters/Access/InterpreterCreateUserQuery.cpp @@ -58,11 +58,14 @@ namespace "be explicitly specified, check the setting allow_implicit_no_password " "in the server configuration"); + // if user does not have an authentication method and it has not been specified in the query, + // add a default one if (user.authentication_methods.empty() && authentication_methods.empty()) { user.authentication_methods.emplace_back(); } + // a leading IDENTIFIED WITH will drop existing authentication methods in favor of new ones if (replace_authentication_methods) { user.authentication_methods.clear(); @@ -73,6 +76,7 @@ namespace user.authentication_methods.emplace_back(authentication_method); } + // drop existing ones and keep the most recent if (reset_authentication_methods) { auto backup_authentication_method = user.authentication_methods.back(); From 37fbf905dda2cdeb683488b46ff364531d1ac2d5 Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Sun, 30 Jun 2024 20:33:43 +0000 Subject: [PATCH 146/844] Use only local snapshot for in order coordinator --- .../ParallelReplicasReadingCoordinator.cpp | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp b/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp index f66cfdafa1a..b510da13d90 100644 --- a/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp +++ b/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp @@ -836,6 +836,7 @@ public: Parts all_parts_to_read; size_t total_rows_to_read = 0; + bool state_initialized{false}; LoggerPtr log = getLogger(fmt::format("{}{}", magic_enum::enum_name(mode), "Coordinator")); }; @@ -857,6 +858,11 @@ void InOrderCoordinator::doHandleInitialAllRangesAnnouncement(InitialAllRa { LOG_TRACE(log, "Received an announcement {}", announcement.describe()); + ++stats[announcement.replica_num].number_of_requests; + + if (state_initialized) + return; + size_t new_rows_to_read = 0; /// To get rid of duplicates @@ -886,7 +892,7 @@ void InOrderCoordinator::doHandleInitialAllRangesAnnouncement(InitialAllRa std::sort(ranges.begin(), ranges.end()); } - ++stats[announcement.replica_num].number_of_requests; + state_initialized = true; // progress_callback is not set when local plan is used for initiator if (progress_callback && new_rows_to_read > 0) @@ -923,8 +929,15 @@ ParallelReadResponse InOrderCoordinator::handleRequest(ParallelReadRequest if (global_part_it == all_parts_to_read.end()) continue; + if (global_part_it->replicas.empty()) + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "Part {} requested by replica {} is not registered in working set", + part.info.getPartNameV1(), + request.replica_num); + if (!global_part_it->replicas.contains(request.replica_num)) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Part {} doesn't exist on replica {} according to the global state", part.info.getPartNameV1(), request.replica_num); + continue; size_t current_mark_size = 0; From 6b3750ff83f0801322ac0870a30339e34e9182aa Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Mon, 1 Jul 2024 08:28:54 +0000 Subject: [PATCH 147/844] Clean up --- src/Processors/QueryPlan/ReadFromMergeTree.cpp | 3 ++- .../queries/0_stateless/02841_parallel_replicas_summary.sh | 6 +++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp index 7d798e2a399..8ecb36aab7f 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp +++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp @@ -1954,7 +1954,8 @@ void ReadFromMergeTree::initializePipeline(QueryPipelineBuilder & pipeline, cons { auto result = getAnalysisResult(); - if (is_parallel_reading_from_replicas && context->canUseParallelReplicasOnInitiator()) + if (is_parallel_reading_from_replicas && context->canUseParallelReplicasOnInitiator() + && context->getSettingsRef().parallel_replicas_local_plan) { CoordinationMode mode = CoordinationMode::Default; switch (result.read_type) diff --git a/tests/queries/0_stateless/02841_parallel_replicas_summary.sh b/tests/queries/0_stateless/02841_parallel_replicas_summary.sh index bff56feacbf..01e2883c547 100755 --- a/tests/queries/0_stateless/02841_parallel_replicas_summary.sh +++ b/tests/queries/0_stateless/02841_parallel_replicas_summary.sh @@ -27,7 +27,7 @@ $CLICKHOUSE_CLIENT --query "CREATE TABLE replicas_summary (n Int64) ENGINE = Mer query_id_base="02841_summary_$CLICKHOUSE_DATABASE" -# TODO: rethink the test, for now temporary disable allow_experimental_analyzer +# TODO: rethink the test, for now temporary disable parallel_replicas_local_plan echo " SELECT * FROM replicas_summary @@ -38,7 +38,7 @@ echo " allow_experimental_parallel_reading_from_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, interactive_delay=0, - allow_experimental_analyzer=0 + parallel_replicas_local_plan=0 "\ | ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&wait_end_of_query=1&query_id=${query_id_base}_interactive_0" --data-binary @- -vvv 2>&1 \ | grep "Summary" | grep -cv '"read_rows":"0"' @@ -53,7 +53,7 @@ echo " allow_experimental_parallel_reading_from_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, interactive_delay=99999999999, - allow_experimental_analyzer=0 + parallel_replicas_local_plan=0 "\ | ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&wait_end_of_query=1&query_id=${query_id_base}_interactive_high" --data-binary @- -vvv 2>&1 \ | grep "Summary" | grep -cv '"read_rows":"0"' From 105d39b09f241f4b48b70425ba07288d32ae107f Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Tue, 2 Jul 2024 15:06:19 +0000 Subject: [PATCH 148/844] Twick number of threads --- src/Planner/PlannerJoinTree.cpp | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/src/Planner/PlannerJoinTree.cpp b/src/Planner/PlannerJoinTree.cpp index 094562a2837..a4a8dd08561 100644 --- a/src/Planner/PlannerJoinTree.cpp +++ b/src/Planner/PlannerJoinTree.cpp @@ -958,6 +958,18 @@ JoinTreeQueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expres table_expression_query_info.storage_limits, std::move(reading_step)); query_plan = std::move(query_plan_parallel_replicas); + + if (settings.parallel_replicas_local_plan) + { + const auto old_max_threads = query_plan.getMaxThreads(); + query_plan.setMaxThreads(old_max_threads * 2); + + LOG_TRACE( + getLogger("Planner"), + "Increase max threads from {} to {} to have similar number of threads to remote plan", + old_max_threads, + query_plan.getMaxThreads()); + } } else { From cb884d0ac7461499badc169380b9136941258693 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Wed, 3 Jul 2024 15:35:52 +0000 Subject: [PATCH 149/844] fix applying of metadata mutations --- src/Storages/MergeTree/AlterConversions.cpp | 1 + src/Storages/MergeTree/MergeTask.cpp | 10 +- src/Storages/MergeTree/MergeTreeData.cpp | 72 ++++++------ src/Storages/MergeTree/MergeTreeData.h | 34 +++--- src/Storages/MergeTree/MutateTask.cpp | 13 ++- .../MergeTree/ReplicatedMergeTreeQueue.cpp | 103 ++++++++++++------ .../MergeTree/ReplicatedMergeTreeQueue.h | 11 +- src/Storages/StorageMergeTree.cpp | 26 ++--- src/Storages/StorageMergeTree.h | 9 +- src/Storages/StorageReplicatedMergeTree.cpp | 4 +- src/Storages/StorageReplicatedMergeTree.h | 2 +- 11 files changed, 158 insertions(+), 127 deletions(-) diff --git a/src/Storages/MergeTree/AlterConversions.cpp b/src/Storages/MergeTree/AlterConversions.cpp index 82bef500b34..a36611e3d87 100644 --- a/src/Storages/MergeTree/AlterConversions.cpp +++ b/src/Storages/MergeTree/AlterConversions.cpp @@ -11,6 +11,7 @@ namespace ErrorCodes bool AlterConversions::isSupportedDataMutation(MutationCommand::Type) { + /// Currently there is no such mutations. See setting 'apply_mutations_on_fly'. return false; } diff --git a/src/Storages/MergeTree/MergeTask.cpp b/src/Storages/MergeTree/MergeTask.cpp index 5dab0cd0c08..08e6f654f15 100644 --- a/src/Storages/MergeTree/MergeTask.cpp +++ b/src/Storages/MergeTree/MergeTask.cpp @@ -257,9 +257,13 @@ bool MergeTask::ExecuteAndFinalizeHorizontalPart::prepare() if (enabledBlockOffsetColumn(global_ctx)) addGatheringColumn(global_ctx, BlockOffsetColumn::name, BlockOffsetColumn::type); - auto mutations_snapshot = global_ctx->data->getMutationsSnapshot( - global_ctx->metadata_snapshot->getMetadataVersion(), - /*need_data_mutations=*/ false); + MergeTreeData::IMutationsSnapshot::Params params + { + .metadata_version = global_ctx->metadata_snapshot->getMetadataVersion(), + .min_part_metadata_version = MergeTreeData::getMinMetadataVersion(global_ctx->future_part->parts), + }; + + auto mutations_snapshot = global_ctx->data->getMutationsSnapshot(params); SerializationInfo::Settings info_settings = { diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 98c308f5fd1..d06570c3ed8 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -8418,6 +8418,18 @@ bool MergeTreeData::supportsTrivialCountOptimization(const StorageSnapshotPtr &, return !hasLightweightDeletedMask(); } +Int64 MergeTreeData::getMinMetadataVersion(const DataPartsVector & parts) +{ + Int64 version = -1; + for (const auto & part : parts) + { + Int64 part_version = part->getMetadataVersion(); + if (version == -1 || part_version < version) + version = part_version; + } + return version; +} + StorageSnapshotPtr MergeTreeData::getStorageSnapshot(const StorageMetadataPtr & metadata_snapshot, ContextPtr query_context) const { auto snapshot_data = std::make_unique(); @@ -8429,10 +8441,14 @@ StorageSnapshotPtr MergeTreeData::getStorageSnapshot(const StorageMetadataPtr & object_columns_copy = object_columns; } - snapshot_data->mutations_snapshot = getMutationsSnapshot( - metadata_snapshot->getMetadataVersion(), - query_context->getSettingsRef().apply_mutations_on_fly); + IMutationsSnapshot::Params params + { + .metadata_version = metadata_snapshot->getMetadataVersion(), + .min_part_metadata_version = getMinMetadataVersion(snapshot_data->parts), + .need_data_mutations = query_context->getSettingsRef().apply_mutations_on_fly, + }; + snapshot_data->mutations_snapshot = getMutationsSnapshot(params); return std::make_shared(*this, metadata_snapshot, std::move(object_columns_copy), std::move(snapshot_data)); } @@ -8618,59 +8634,33 @@ void MergeTreeData::verifySortingKey(const KeyDescription & sorting_key) } } -static void updateMutationsCounters( - Int64 & data_mutations_to_apply, - Int64 & metadata_mutations_to_apply, - const MutationCommands & commands, - Int64 increment) +static void updateAlterConversionsCounter(Int64 & num_alter_conversions, const MutationCommands & commands, Int64 increment) { - if (data_mutations_to_apply < 0) - throw Exception(ErrorCodes::LOGICAL_ERROR, "On-fly data mutations counter is negative ({})", data_mutations_to_apply); - - if (metadata_mutations_to_apply < 0) - throw Exception(ErrorCodes::LOGICAL_ERROR, "On-fly metadata mutations counter is negative ({})", metadata_mutations_to_apply); - - bool has_data_mutation = false; - bool has_metadata_mutation = false; + if (num_alter_conversions < 0) + throw Exception(ErrorCodes::LOGICAL_ERROR, "On-fly data alter conversions counter is negative ({})", num_alter_conversions); for (const auto & command : commands) { - if (!has_data_mutation && AlterConversions::isSupportedDataMutation(command.type)) + if (AlterConversions::isSupportedDataMutation(command.type) || AlterConversions::isSupportedMetadataMutation(command.type)) { - data_mutations_to_apply += increment; - has_data_mutation = true; + num_alter_conversions += increment; - if (data_mutations_to_apply < 0) - throw Exception(ErrorCodes::LOGICAL_ERROR, "On-fly data mutations counter is negative ({})", data_mutations_to_apply); - } + if (num_alter_conversions < 0) + throw Exception(ErrorCodes::LOGICAL_ERROR, "On-fly data mutations counter is negative ({})", num_alter_conversions); - if (!has_metadata_mutation && AlterConversions::isSupportedMetadataMutation(command.type)) - { - metadata_mutations_to_apply += increment; - has_metadata_mutation = true; - - if (metadata_mutations_to_apply < 0) - throw Exception(ErrorCodes::LOGICAL_ERROR, "On-fly metadata mutations counter is negative ({})", metadata_mutations_to_apply); + return; } } } -void incrementMutationsCounters( - Int64 & data_mutations_to_apply, - Int64 & metadata_mutations_to_apply, - const MutationCommands & commands, - std::lock_guard & /*lock*/) +void incrementAlterConversionsCounter(Int64 & num_alter_conversions, const MutationCommands & commands, std::lock_guard & /*lock*/) { - return updateMutationsCounters(data_mutations_to_apply, metadata_mutations_to_apply, commands, 1); + updateAlterConversionsCounter(num_alter_conversions, commands, 1); } -void decrementMutationsCounters( - Int64 & data_mutations_to_apply, - Int64 & metadata_mutations_to_apply, - const MutationCommands & commands, - std::lock_guard & /*lock*/) +void decrementAlterConversionsCounter(Int64 & num_alter_conversions, const MutationCommands & commands, std::lock_guard & /*lock*/) { - return updateMutationsCounters(data_mutations_to_apply, metadata_mutations_to_apply, commands, -1); + updateAlterConversionsCounter(num_alter_conversions, commands, -1); } } diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h index 765b68b7559..faf55292257 100644 --- a/src/Storages/MergeTree/MergeTreeData.h +++ b/src/Storages/MergeTree/MergeTreeData.h @@ -448,6 +448,21 @@ public: struct IMutationsSnapshot { + struct Params + { + Int64 metadata_version = -1; + Int64 min_part_metadata_version = -1; + bool need_data_mutations = false; + + bool needAnyMutations() const { return need_data_mutations || needMetadataMutations(); } + bool needMetadataMutations() const { return min_part_metadata_version < metadata_version; } + }; + + Params params; + + IMutationsSnapshot() = default; + explicit IMutationsSnapshot(Params params_) : params(std::move(params_)) {} + /// Return pending mutations that weren't applied to `part` yet and should be applied on the fly /// (i.e. when reading from the part). Mutations not supported by AlterConversions /// (supportsMutationCommandType()) can be omitted. @@ -455,7 +470,6 @@ public: /// @return list of mutations, in *reverse* order (newest to oldest) virtual MutationCommands getAlterMutationCommandsForPart(const DataPartPtr & part) const = 0; virtual std::shared_ptr cloneEmpty() const = 0; - virtual ~IMutationsSnapshot() = default; }; @@ -951,7 +965,10 @@ public: Disks getDisks() const { return getStoragePolicy()->getDisks(); } /// TODO: comment - virtual MutationsSnapshotPtr getMutationsSnapshot(Int64 metadata_version, bool need_data_mutations) const = 0; + virtual MutationsSnapshotPtr getMutationsSnapshot(const IMutationsSnapshot::Params & params) const = 0; + + /// TODO: comment + static Int64 getMinMetadataVersion(const DataPartsVector & parts); /// Return alter conversions for part which must be applied on fly. static AlterConversionsPtr getAlterConversionsForPart( @@ -1752,16 +1769,7 @@ struct CurrentlySubmergingEmergingTagger /// Look at MutationCommands if it contains mutations for AlterConversions, update the counter. /// Return true if the counter had been updated -void incrementMutationsCounters( - Int64 & data_mutations_to_apply, - Int64 & metadata_mutations_to_apply, - const MutationCommands & commands, - std::lock_guard & lock); - -void decrementMutationsCounters( - Int64 & data_mutations_to_apply, - Int64 & metadata_mutations_to_apply, - const MutationCommands & commands, - std::lock_guard & lock); +void incrementAlterConversionsCounter(Int64 & num_alter_conversions, const MutationCommands & commands, std::lock_guard & lock); +void decrementAlterConversionsCounter(Int64 & num_alter_conversions, const MutationCommands & commands, std::lock_guard & lock); } diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp index 3c4ef44dbd8..4a9138c10da 100644 --- a/src/Storages/MergeTree/MutateTask.cpp +++ b/src/Storages/MergeTree/MutateTask.cpp @@ -2118,13 +2118,14 @@ bool MutateTask::prepare() ctx->num_mutations = std::make_unique(CurrentMetrics::PartMutation); - auto mutations_snapshot = ctx->data->getMutationsSnapshot( - ctx->metadata_snapshot->getMetadataVersion(), - /*need_data_mutations=*/ false); + MergeTreeData::IMutationsSnapshot::Params params + { + .metadata_version = ctx->metadata_snapshot->getMetadataVersion(), + .min_part_metadata_version = ctx->source_part->getMetadataVersion(), + }; - auto alter_conversions = MergeTreeData::getAlterConversionsForPart( - ctx->source_part, - mutations_snapshot); + auto mutations_snapshot = ctx->data->getMutationsSnapshot(params); + auto alter_conversions = MergeTreeData::getAlterConversionsForPart(ctx->source_part, mutations_snapshot); auto context_for_reading = Context::createCopy(ctx->context); diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp index fbf949b47f5..807fbeebfc4 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp @@ -951,7 +951,7 @@ int32_t ReplicatedMergeTreeQueue::updateMutations(zkutil::ZooKeeperPtr zookeeper { const auto commands = entry.commands; it = mutations_by_znode.erase(it); - decrementMutationsCounters(data_mutations_to_apply, metadata_mutations_to_apply, commands, lock); + decrementAlterConversionsCounter(num_alter_conversions, commands, state_lock); } else it = mutations_by_znode.erase(it); @@ -1000,10 +1000,9 @@ int32_t ReplicatedMergeTreeQueue::updateMutations(zkutil::ZooKeeperPtr zookeeper for (const ReplicatedMergeTreeMutationEntryPtr & entry : new_mutations) { - auto & mutation = mutations_by_znode.emplace(entry->znode_name, MutationStatus(entry, format_version)) - .first->second; + auto & mutation = mutations_by_znode.emplace(entry->znode_name, MutationStatus(entry, format_version)).first->second; + incrementAlterConversionsCounter(num_alter_conversions, entry->commands, lock); - incrementMutationsCounters(data_mutations_to_apply, metadata_mutations_to_apply, entry->commands, lock); NOEXCEPT_SCOPE({ for (const auto & pair : entry->block_numbers) { @@ -1077,7 +1076,7 @@ ReplicatedMergeTreeMutationEntryPtr ReplicatedMergeTreeQueue::removeMutation( } mutations_by_znode.erase(it); - /// decrementMutationsCounters() will be called in updateMutations() + /// decrementAlterConversionsCounter() will be called in updateMutations() LOG_DEBUG(log, "Removed mutation {} from local state.", entry->znode_name); } @@ -1901,6 +1900,7 @@ ReplicatedMergeTreeMergePredicate ReplicatedMergeTreeQueue::getMergePredicate(zk return ReplicatedMergeTreeMergePredicate(*this, zookeeper, std::move(partition_ids_hint)); } + MutationCommands ReplicatedMergeTreeQueue::MutationsSnapshot::getAlterMutationCommandsForPart(const MergeTreeData::DataPartPtr & part) const { auto in_partition = mutations_by_partition.find(part->info.partition_id); @@ -1908,20 +1908,23 @@ MutationCommands ReplicatedMergeTreeQueue::MutationsSnapshot::getAlterMutationCo return {}; Int64 part_data_version = part->info.getDataVersion(); - int32_t part_metadata_version = part->getMetadataVersion(); + Int64 part_metadata_version = part->getMetadataVersion(); MutationCommands result; - bool seen_all_data_mutations = false; - bool seen_all_metadata_mutations = false; + bool seen_all_data_mutations = !params.need_data_mutations; + bool seen_all_metadata_mutations = !params.needMetadataMutations(); + + if (seen_all_data_mutations && seen_all_metadata_mutations) + return {}; auto add_to_result = [&](const ReplicatedMergeTreeMutationEntryPtr & entry) { for (const auto & command : entry->commands | std::views::reverse) { - if (need_data_mutations && AlterConversions::isSupportedDataMutation(command.type)) + if (AlterConversions::isSupportedMetadataMutation(command.type)) result.push_back(command); - else if (AlterConversions::isSupportedMetadataMutation(command.type)) + else if (params.need_data_mutations && AlterConversions::isSupportedDataMutation(command.type)) result.push_back(command); } }; @@ -1935,9 +1938,10 @@ MutationCommands ReplicatedMergeTreeQueue::MutationsSnapshot::getAlterMutationCo break; auto alter_version = entry->alter_version; - if (alter_version != -1) + + if (!seen_all_metadata_mutations && alter_version != -1) { - if (alter_version > metadata_version) + if (alter_version > params.metadata_version) continue; /// We take commands with bigger metadata version @@ -1946,7 +1950,7 @@ MutationCommands ReplicatedMergeTreeQueue::MutationsSnapshot::getAlterMutationCo else seen_all_metadata_mutations = true; } - else + else if (!seen_all_data_mutations) { if (mutation_version > part_data_version) add_to_result(entry); @@ -1958,42 +1962,71 @@ MutationCommands ReplicatedMergeTreeQueue::MutationsSnapshot::getAlterMutationCo return result; } -MergeTreeData::MutationsSnapshotPtr ReplicatedMergeTreeQueue::getMutationsSnapshot(Int64 metadata_version, bool need_data_mutations) const +MergeTreeData::MutationsSnapshotPtr ReplicatedMergeTreeQueue::getMutationsSnapshot(const MutationsSnapshot::Params & params) const { - auto res = std::make_shared(); - res->metadata_version = metadata_version; - res->need_data_mutations = need_data_mutations; + auto res = std::make_shared(params); std::lock_guard lock(state_mutex); - bool have_data_mutations = res->need_data_mutations && data_mutations_to_apply > 0; - bool have_metadata_mutations = metadata_mutations_to_apply > 0; + bool need_data_mutations = res->params.need_data_mutations && num_alter_conversions > 0; + bool need_metatadata_mutations = res->params.needMetadataMutations(); - if (!have_data_mutations && !have_metadata_mutations) + if (!need_data_mutations && !need_metatadata_mutations) return res; + auto is_supported_command = [&](const auto & command) + { + if (need_data_mutations && AlterConversions::isSupportedDataMutation(command.type)) + return true; + + if (need_metatadata_mutations && AlterConversions::isSupportedMetadataMutation(command.type)) + return true; + + return false; + }; + for (const auto & [partition_id, mutations] : mutations_by_partition) { auto & in_partition = res->mutations_by_partition[partition_id]; - for (const auto & [version, status] : mutations | std::views::reverse) + bool seen_all_data_mutations = !need_data_mutations; + bool seen_all_metadata_mutations = !need_metatadata_mutations; + + for (const auto & [mutation_version, status] : mutations | std::views::reverse) { - if (status->is_done) + if (seen_all_data_mutations && seen_all_metadata_mutations) break; - bool has_required_command = std::ranges::any_of(status->entry->commands, [&](const auto & command) + auto alter_version = status->entry->alter_version; + + if (!seen_all_metadata_mutations && alter_version != -1) { - if (have_data_mutations && AlterConversions::isSupportedDataMutation(command.type)) - return true; + if (alter_version > params.metadata_version) + continue; - if (have_metadata_mutations && AlterConversions::isSupportedMetadataMutation(command.type)) - return true; - - return false; - }); - - if (has_required_command) - in_partition.emplace(version, status->entry); + /// We take commands with bigger metadata version + if (alter_version > params.min_part_metadata_version) + { + if (std::ranges::any_of(status->entry->commands, is_supported_command)) + in_partition.emplace(mutation_version, status->entry); + } + else + { + seen_all_metadata_mutations = true; + } + } + else if (!seen_all_data_mutations) + { + if (!status->is_done) + { + if (std::ranges::any_of(status->entry->commands, is_supported_command)) + in_partition.emplace(mutation_version, status->entry); + } + else + { + seen_all_data_mutations = true; + } + } } } @@ -2080,7 +2113,7 @@ bool ReplicatedMergeTreeQueue::tryFinalizeMutations(zkutil::ZooKeeperPtr zookeep mutation.parts_to_do.clear(); } - decrementMutationsCounters(data_mutations_to_apply, metadata_mutations_to_apply, mutation.entry->commands, lock); + decrementAlterConversionsCounter(num_alter_conversions, mutation.entry->commands, lock); } else if (mutation.parts_to_do.size() == 0) { @@ -2137,7 +2170,7 @@ bool ReplicatedMergeTreeQueue::tryFinalizeMutations(zkutil::ZooKeeperPtr zookeep LOG_TRACE(log, "Finishing data alter with version {} for entry {}", entry->alter_version, entry->znode_name); alter_sequence.finishDataAlter(entry->alter_version, lock); } - decrementMutationsCounters(data_mutations_to_apply, metadata_mutations_to_apply, entry->commands, lock); + decrementAlterConversionsCounter(num_alter_conversions, entry->commands, lock); } } } diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h index f9d5487ee3f..954e2fd951e 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h +++ b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h @@ -154,8 +154,7 @@ private: std::map mutations_by_znode; /// Unfinished mutations that are required for AlterConversions. - Int64 data_mutations_to_apply = 0; - Int64 metadata_mutations_to_apply = 0; + Int64 num_alter_conversions; /// Partition -> (block_number -> MutationStatus) std::unordered_map> mutations_by_partition; @@ -416,11 +415,11 @@ public: struct MutationsSnapshot : public MergeTreeData::IMutationsSnapshot { MutationsSnapshot() = default; + explicit MutationsSnapshot(Params params_) : IMutationsSnapshot(std::move(params_)) {} - Int64 metadata_version = -1; - bool need_data_mutations = false; - + using Params = MergeTreeData::IMutationsSnapshot::Params; using MutationsByPartititon = std::unordered_map>; + MutationsByPartititon mutations_by_partition; MutationCommands getAlterMutationCommandsForPart(const MergeTreeData::DataPartPtr & part) const override; @@ -430,7 +429,7 @@ public: /// Return mutation commands for part which could be not applied to /// it according to part mutation version. Used when we apply alter commands on fly, /// without actual data modification on disk. - MergeTreeData::MutationsSnapshotPtr getMutationsSnapshot(Int64 metadata_version, bool need_data_mutations) const; + MergeTreeData::MutationsSnapshotPtr getMutationsSnapshot(const MutationsSnapshot::Params & params) const; /// Mark finished mutations as done. If the function needs to be called again at some later time /// (because some mutations are probably done but we are not sure yet), returns true. diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp index 636d2ba5d53..da90ffddc8d 100644 --- a/src/Storages/StorageMergeTree.cpp +++ b/src/Storages/StorageMergeTree.cpp @@ -502,7 +502,7 @@ Int64 StorageMergeTree::startMutation(const MutationCommands & commands, Context if (!inserted) throw Exception(ErrorCodes::LOGICAL_ERROR, "Mutation {} already exists, it's a bug", version); - incrementMutationsCounters(data_mutations_to_apply, metadata_mutations_to_apply, *it->second.commands, lock); + incrementAlterConversionsCounter(num_alter_conversions, *it->second.commands, lock); LOG_INFO(log, "Added mutation: {}{}", mutation_id, additional_info); } background_operations_assignee.trigger(); @@ -538,7 +538,7 @@ void StorageMergeTree::updateMutationEntriesErrors(FutureMergedMutatedPartPtr re if (static_cast(result_part->part_info.mutation) == it->first) mutation_backoff_policy.removePartFromFailed(failed_part->name); - decrementMutationsCounters(data_mutations_to_apply, metadata_mutations_to_apply, *entry.commands, lock); + decrementAlterConversionsCounter(num_alter_conversions, *entry.commands, lock); } } else @@ -819,7 +819,7 @@ CancellationCode StorageMergeTree::killMutation(const String & mutation_id) { bool mutation_finished = *min_version > static_cast(mutation_version); if (!mutation_finished) - decrementMutationsCounters(data_mutations_to_apply, metadata_mutations_to_apply, *it->second.commands, lock); + decrementAlterConversionsCounter(num_alter_conversions, *it->second.commands, lock); } to_kill.emplace(std::move(it->second)); @@ -904,7 +904,7 @@ void StorageMergeTree::loadMutations() if (!inserted) throw Exception(ErrorCodes::LOGICAL_ERROR, "Mutation {} already exists, it's a bug", block_number); - incrementMutationsCounters(data_mutations_to_apply, metadata_mutations_to_apply, *entry_it->second.commands, lock); + incrementAlterConversionsCounter(num_alter_conversions, *entry_it->second.commands, lock); } else if (startsWith(it->name(), "tmp_mutation_")) { @@ -2432,7 +2432,7 @@ MutationCommands StorageMergeTree::MutationsSnapshot::getAlterMutationCommandsFo for (const auto & command : *commands | std::views::reverse) { - if (need_data_mutations && AlterConversions::isSupportedDataMutation(command.type)) + if (params.need_data_mutations && AlterConversions::isSupportedDataMutation(command.type)) result.push_back(command); else if (AlterConversions::isSupportedMetadataMutation(command.type)) result.push_back(command); @@ -2442,28 +2442,26 @@ MutationCommands StorageMergeTree::MutationsSnapshot::getAlterMutationCommandsFo return result; } -MergeTreeData::MutationsSnapshotPtr StorageMergeTree::getMutationsSnapshot(Int64 metadata_version, bool need_data_mutations) const +MergeTreeData::MutationsSnapshotPtr StorageMergeTree::getMutationsSnapshot(const IMutationsSnapshot::Params & params) const { - auto res = std::make_shared(); - res->metadata_version = metadata_version; - res->need_data_mutations = need_data_mutations; + auto res = std::make_shared(params); std::lock_guard lock(currently_processing_in_background_mutex); - bool have_data_mutations = res->need_data_mutations && data_mutations_to_apply > 0; - bool have_metadata_mutations = metadata_mutations_to_apply > 0; + bool need_data_mutations = res->params.need_data_mutations && num_alter_conversions > 0; + bool need_metatadata_mutations = res->params.needMetadataMutations(); - if (!have_data_mutations && !have_metadata_mutations) + if (!need_data_mutations && !need_metatadata_mutations) return res; for (const auto & [version, entry] : current_mutations_by_version) { bool has_required_command = std::ranges::any_of(*entry.commands, [&](const auto & command) { - if (have_data_mutations && AlterConversions::isSupportedDataMutation(command.type)) + if (need_data_mutations && AlterConversions::isSupportedDataMutation(command.type)) return true; - if (have_metadata_mutations && AlterConversions::isSupportedMetadataMutation(command.type)) + if (need_metatadata_mutations && AlterConversions::isSupportedMetadataMutation(command.type)) return true; return false; diff --git a/src/Storages/StorageMergeTree.h b/src/Storages/StorageMergeTree.h index 1f2af8b9571..d05c6739738 100644 --- a/src/Storages/StorageMergeTree.h +++ b/src/Storages/StorageMergeTree.h @@ -150,8 +150,7 @@ private: std::map current_mutations_by_version; /// Unfinished mutations that are required for AlterConversions. - Int64 data_mutations_to_apply = 0; - Int64 metadata_mutations_to_apply = 0; + Int64 num_alter_conversions; std::atomic shutdown_called {false}; std::atomic flush_called {false}; @@ -315,9 +314,7 @@ private: struct MutationsSnapshot : public IMutationsSnapshot { MutationsSnapshot() = default; - - Int64 metadata_version = -1; - bool need_data_mutations = false; + explicit MutationsSnapshot(Params params_) : IMutationsSnapshot(std::move(params_)) {} using MutationsByVersion = std::map>; MutationsByVersion mutations_by_version; @@ -326,7 +323,7 @@ private: std::shared_ptr cloneEmpty() const override { return std::make_shared(); } }; - MutationsSnapshotPtr getMutationsSnapshot(Int64 metadata_version, bool need_data_mutations) const override; + MutationsSnapshotPtr getMutationsSnapshot(const IMutationsSnapshot::Params & params) const override; }; } diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index e2bba1e8068..baca8cb2695 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -9149,9 +9149,9 @@ bool StorageReplicatedMergeTree::canUseAdaptiveGranularity() const (!has_non_adaptive_index_granularity_parts && !other_replicas_fixed_granularity)); } -MergeTreeData::MutationsSnapshotPtr StorageReplicatedMergeTree::getMutationsSnapshot(Int64 metadata_version, bool need_data_mutations) const +MergeTreeData::MutationsSnapshotPtr StorageReplicatedMergeTree::getMutationsSnapshot(const IMutationsSnapshot::Params & params) const { - return queue.getMutationsSnapshot(metadata_version, need_data_mutations); + return queue.getMutationsSnapshot(params); } void StorageReplicatedMergeTree::startBackgroundMovesIfNeeded() diff --git a/src/Storages/StorageReplicatedMergeTree.h b/src/Storages/StorageReplicatedMergeTree.h index 3ef367d09ce..cd12cfd3c02 100644 --- a/src/Storages/StorageReplicatedMergeTree.h +++ b/src/Storages/StorageReplicatedMergeTree.h @@ -932,7 +932,7 @@ private: void waitMutationToFinishOnReplicas( const Strings & replicas, const String & mutation_id) const; - MutationsSnapshotPtr getMutationsSnapshot(Int64 metadata_version, bool need_data_mutations) const override; + MutationsSnapshotPtr getMutationsSnapshot(const IMutationsSnapshot::Params & params) const override; void startBackgroundMovesIfNeeded() override; From 4e350ff44a4dbdecb1c044fb10df131f6c31ae3d Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Thu, 4 Jul 2024 18:48:41 +0200 Subject: [PATCH 150/844] don't allow explicit uuids and rmt args in replicated db --- src/Core/Settings.h | 3 +- src/Databases/DatabaseReplicated.cpp | 12 ----- src/Interpreters/InterpreterCreateQuery.cpp | 20 ++++++++ ...tractZooKeeperPathFromReplicatedTableDef.h | 2 +- .../MergeTree/registerStorageMergeTree.cpp | 46 ++++++++++++++----- tests/config/users.d/database_replicated.xml | 1 + .../configs/settings.xml | 1 + .../configs/settings2.xml | 1 + .../02858_explicit_uuid_and_zk_path.reference | 10 ++++ .../02858_explicit_uuid_and_zk_path.sh | 41 +++++++++++++++++ 10 files changed, 112 insertions(+), 25 deletions(-) create mode 100644 tests/queries/0_stateless/02858_explicit_uuid_and_zk_path.reference create mode 100755 tests/queries/0_stateless/02858_explicit_uuid_and_zk_path.sh diff --git a/src/Core/Settings.h b/src/Core/Settings.h index d84e5b149f6..c162c75b6a0 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -731,7 +731,8 @@ class IColumn; M(UInt64, max_distributed_depth, 5, "Maximum distributed query depth", 0) \ M(Bool, database_replicated_always_detach_permanently, false, "Execute DETACH TABLE as DETACH TABLE PERMANENTLY if database engine is Replicated", 0) \ M(Bool, database_replicated_allow_only_replicated_engine, false, "Allow to create only Replicated tables in database with engine Replicated", 0) \ - M(Bool, database_replicated_allow_replicated_engine_arguments, true, "Allow to create only Replicated tables in database with engine Replicated with explicit arguments", 0) \ + M(UInt64, database_replicated_allow_replicated_engine_arguments, 0, "0 - Don't allow to explicitly specify ZooKeeper path and replica name for *MergeTree tables in Replicated databases. 1 - Allow. 2 - Allow, but ignore the specified path and use default one instead.", 0) \ + M(UInt64, database_replicated_allow_explicit_uuid, 0, "0 - Don't allow to explicitly specify UUIDs for tables in Replicated databases. 1 - Allow. 2 - Allow, but ignore the specified UUID and generate a random one instead.", 0) \ M(Bool, cloud_mode, false, "Only available in ClickHouse Cloud", 0) \ M(UInt64, cloud_mode_engine, 1, "Only available in ClickHouse Cloud", 0) \ M(DistributedDDLOutputMode, distributed_ddl_output_mode, DistributedDDLOutputMode::THROW, "Format of distributed DDL query result, one of: 'none', 'throw', 'null_status_on_timeout', 'never_throw', 'none_only_active', 'throw_only_active', 'null_status_on_timeout_only_active'", 0) \ diff --git a/src/Databases/DatabaseReplicated.cpp b/src/Databases/DatabaseReplicated.cpp index 4ca9afc49eb..507d1a9affc 100644 --- a/src/Databases/DatabaseReplicated.cpp +++ b/src/Databases/DatabaseReplicated.cpp @@ -774,18 +774,6 @@ void DatabaseReplicated::checkQueryValid(const ASTPtr & query, ContextPtr query_ bool maybe_replica_macros = info.expanded_other; bool enable_functional_tests_helper = getContext()->getConfigRef().has("_functional_tests_helper_database_replicated_replace_args_macros"); - if (!enable_functional_tests_helper) - { - if (query_context->getSettingsRef().database_replicated_allow_replicated_engine_arguments) - LOG_WARNING(log, "It's not recommended to explicitly specify zookeeper_path and replica_name in ReplicatedMergeTree arguments"); - else - throw Exception(ErrorCodes::INCORRECT_QUERY, - "It's not allowed to specify explicit zookeeper_path and replica_name " - "for ReplicatedMergeTree arguments in Replicated database. If you really want to " - "specify them explicitly, enable setting " - "database_replicated_allow_replicated_engine_arguments."); - } - if (maybe_shard_macros && maybe_replica_macros) return; diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index a990eb651ce..28827a1fe37 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -1055,6 +1055,26 @@ void InterpreterCreateQuery::assertOrSetUUID(ASTCreateQuery & create, const Data bool from_path = create.attach_from_path.has_value(); bool is_on_cluster = getContext()->getClientInfo().query_kind == ClientInfo::QueryKind::SECONDARY_QUERY; + if (database->getEngineName() == "Replicated" && create.uuid != UUIDHelpers::Nil && !is_replicated_database_internal) + { + if (getContext()->getSettingsRef().database_replicated_allow_explicit_uuid == 0) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, "It's not allowed to explicitly specify UUIDs for tables in Replicated databases, " + "see database_replicated_allow_explicit_uuid"); + } + else if (getContext()->getSettingsRef().database_replicated_allow_explicit_uuid == 0) + { + LOG_WARNING(&Poco::Logger::get("InterpreterCreateQuery"), "It's not recommended to explicitly specify UUIDs for tables in Replicated databases"); + } + else // if (getContext()->getSettingsRef().database_replicated_allow_explicit_uuid == 2) + { + UUID old_uuid = create.uuid; + create.generateRandomUUID(/*always_generate_new_uuid*/ true); + LOG_INFO(&Poco::Logger::get("InterpreterCreateQuery"), "Replaced a user-provided UUID ({}) with a random one ({}) " + "to make sure it's unique", old_uuid, create.uuid); + } + } + if (is_replicated_database_internal && !internal) { if (create.uuid == UUIDHelpers::Nil) diff --git a/src/Storages/MergeTree/extractZooKeeperPathFromReplicatedTableDef.h b/src/Storages/MergeTree/extractZooKeeperPathFromReplicatedTableDef.h index 5ef5e1db62e..212dc048868 100644 --- a/src/Storages/MergeTree/extractZooKeeperPathFromReplicatedTableDef.h +++ b/src/Storages/MergeTree/extractZooKeeperPathFromReplicatedTableDef.h @@ -14,6 +14,6 @@ using ContextPtr = std::shared_ptr; /// Extracts a zookeeper path from a specified CREATE TABLE query. /// The function checks the table engine and if it is Replicated*MergeTree then it takes the first argument and expands macros in it. /// Returns std::nullopt if the specified CREATE query doesn't describe a Replicated table or its arguments can't be evaluated. -std::optional extractZooKeeperPathFromReplicatedTableDef(const ASTCreateQuery & create_query, const ContextPtr & context); +std::optional extractZooKeeperPathFromReplicatedTableDef(const ASTCreateQuery & create_query, const ContextPtr & local_context); } diff --git a/src/Storages/MergeTree/registerStorageMergeTree.cpp b/src/Storages/MergeTree/registerStorageMergeTree.cpp index d234103e52b..66f7b2114bf 100644 --- a/src/Storages/MergeTree/registerStorageMergeTree.cpp +++ b/src/Storages/MergeTree/registerStorageMergeTree.cpp @@ -9,6 +9,7 @@ #include #include #include +#include #include #include @@ -185,7 +186,7 @@ static void extractZooKeeperPathAndReplicaNameFromEngineArgs( const String & engine_name, ASTs & engine_args, LoadingStrictnessLevel mode, - const ContextPtr & context, + const ContextPtr & local_context, String & zookeeper_path, String & replica_name, RenamingRestrictions & renaming_restrictions) @@ -202,11 +203,11 @@ static void extractZooKeeperPathAndReplicaNameFromEngineArgs( { /// Allow expressions in engine arguments. /// In new syntax argument can be literal or identifier or array/tuple of identifiers. - evaluateEngineArgs(engine_args, context); + evaluateEngineArgs(engine_args, local_context); } - bool is_on_cluster = context->getClientInfo().query_kind == ClientInfo::QueryKind::SECONDARY_QUERY; - bool is_replicated_database = context->getClientInfo().query_kind == ClientInfo::QueryKind::SECONDARY_QUERY && + bool is_on_cluster = local_context->getClientInfo().query_kind == ClientInfo::QueryKind::SECONDARY_QUERY; + bool is_replicated_database = local_context->getClientInfo().query_kind == ClientInfo::QueryKind::SECONDARY_QUERY && DatabaseCatalog::instance().getDatabase(table_id.database_name)->getEngineName() == "Replicated"; /// Allow implicit {uuid} macros only for zookeeper_path in ON CLUSTER queries @@ -226,10 +227,10 @@ static void extractZooKeeperPathAndReplicaNameFromEngineArgs( /// We did unfold it in previous versions to make moving table from Atomic to Ordinary database work correctly, /// but now it's not allowed (and it was the only reason to unfold {uuid} macro). info.table_id.uuid = UUIDHelpers::Nil; - zookeeper_path = context->getMacros()->expand(zookeeper_path, info); + zookeeper_path = local_context->getMacros()->expand(zookeeper_path, info); info.level = 0; - replica_name = context->getMacros()->expand(replica_name, info); + replica_name = local_context->getMacros()->expand(replica_name, info); } ast_zk_path->value = zookeeper_path; @@ -247,11 +248,11 @@ static void extractZooKeeperPathAndReplicaNameFromEngineArgs( } if (!allow_uuid_macro) info.table_id.uuid = UUIDHelpers::Nil; - zookeeper_path = context->getMacros()->expand(zookeeper_path, info); + zookeeper_path = local_context->getMacros()->expand(zookeeper_path, info); info.level = 0; info.table_id.uuid = UUIDHelpers::Nil; - replica_name = context->getMacros()->expand(replica_name, info); + replica_name = local_context->getMacros()->expand(replica_name, info); /// We do not allow renaming table with these macros in metadata, because zookeeper_path will be broken after RENAME TABLE. /// NOTE: it may happen if table was created by older version of ClickHouse (< 20.10) and macros was not unfolded on table creation @@ -268,9 +269,24 @@ static void extractZooKeeperPathAndReplicaNameFromEngineArgs( bool has_arguments = (arg_num + 2 <= arg_cnt); bool has_valid_arguments = has_arguments && engine_args[arg_num]->as() && engine_args[arg_num + 1]->as(); + const auto & server_settings = local_context->getServerSettings(); if (has_valid_arguments) { + if (is_replicated_database && local_context->getSettingsRef().database_replicated_allow_replicated_engine_arguments == 0) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "It's not allowed to specify explicit zookeeper_path and replica_name " + "for ReplicatedMergeTree arguments in Replicated database. If you really want to " + "specify them explicitly, enable setting " + "database_replicated_allow_replicated_engine_arguments."); + } + else if (is_replicated_database && local_context->getSettingsRef().database_replicated_allow_replicated_engine_arguments == 1) + { + LOG_WARNING(&Poco::Logger::get("registerStorageMergeTree"), "It's not recommended to explicitly specify " + "zookeeper_path and replica_name in ReplicatedMergeTree arguments"); + } + /// Get path and name from engine arguments auto * ast_zk_path = engine_args[arg_num]->as(); if (ast_zk_path && ast_zk_path->value.getType() == Field::Types::String) @@ -284,6 +300,15 @@ static void extractZooKeeperPathAndReplicaNameFromEngineArgs( else throw Exception(ErrorCodes::BAD_ARGUMENTS, "Replica name must be a string literal{}", verbose_help_message); + + if (is_replicated_database && local_context->getSettingsRef().database_replicated_allow_replicated_engine_arguments == 2) + { + LOG_INFO(&Poco::Logger::get("registerStorageMergeTree"), "Replacing user-provided ZooKeeper path and replica name ({}, {}) " + "with default arguments", zookeeper_path, replica_name); + engine_args[arg_num]->as()->value = zookeeper_path = server_settings.default_replica_path; + engine_args[arg_num + 1]->as()->value = replica_name = server_settings.default_replica_name; + } + expand_macro(ast_zk_path, ast_replica_name); } else if (is_extended_storage_def @@ -293,7 +318,6 @@ static void extractZooKeeperPathAndReplicaNameFromEngineArgs( { /// Try use default values if arguments are not specified. /// Note: {uuid} macro works for ON CLUSTER queries when database engine is Atomic. - const auto & server_settings = context->getServerSettings(); zookeeper_path = server_settings.default_replica_path; /// TODO maybe use hostname if {replica} is not defined? replica_name = server_settings.default_replica_name; @@ -318,7 +342,7 @@ static void extractZooKeeperPathAndReplicaNameFromEngineArgs( } /// Extracts a zookeeper path from a specified CREATE TABLE query. -std::optional extractZooKeeperPathFromReplicatedTableDef(const ASTCreateQuery & query, const ContextPtr & context) +std::optional extractZooKeeperPathFromReplicatedTableDef(const ASTCreateQuery & query, const ContextPtr & local_context) { if (!query.storage || !query.storage->engine) return {}; @@ -342,7 +366,7 @@ std::optional extractZooKeeperPathFromReplicatedTableDef(const ASTCreate try { - extractZooKeeperPathAndReplicaNameFromEngineArgs(query, table_id, engine_name, engine_args, mode, context, + extractZooKeeperPathAndReplicaNameFromEngineArgs(query, table_id, engine_name, engine_args, mode, local_context, zookeeper_path, replica_name, renaming_restrictions); } catch (Exception & e) diff --git a/tests/config/users.d/database_replicated.xml b/tests/config/users.d/database_replicated.xml index c049c3559fc..1c2cf2ac22b 100644 --- a/tests/config/users.d/database_replicated.xml +++ b/tests/config/users.d/database_replicated.xml @@ -6,6 +6,7 @@ 120 1 1 + 3 diff --git a/tests/integration/test_replicated_database/configs/settings.xml b/tests/integration/test_replicated_database/configs/settings.xml index c637fe8eead..4b1122b1b5f 100644 --- a/tests/integration/test_replicated_database/configs/settings.xml +++ b/tests/integration/test_replicated_database/configs/settings.xml @@ -5,6 +5,7 @@ 1 0 0 + 3 diff --git a/tests/integration/test_replicated_database/configs/settings2.xml b/tests/integration/test_replicated_database/configs/settings2.xml index dad5740a8ae..9a3038871e1 100644 --- a/tests/integration/test_replicated_database/configs/settings2.xml +++ b/tests/integration/test_replicated_database/configs/settings2.xml @@ -5,6 +5,7 @@ 1 0 0 + 3 0 diff --git a/tests/queries/0_stateless/02858_explicit_uuid_and_zk_path.reference b/tests/queries/0_stateless/02858_explicit_uuid_and_zk_path.reference new file mode 100644 index 00000000000..1d3a90daccd --- /dev/null +++ b/tests/queries/0_stateless/02858_explicit_uuid_and_zk_path.reference @@ -0,0 +1,10 @@ +2 +3 +m1 +m2 +rmt1 +rmt2 +02858000-1000-4000-8000-0000000000 +0 +CREATE TABLE rdb_default.rmt1\n(\n `n` Int32\n)\nENGINE = ReplicatedMergeTree(\'/test/default/auto_{shard}\', \'1auto_{replica}\')\nORDER BY n\nSETTINGS index_granularity = 8192 +CREATE TABLE rdb_default.rmt2\n(\n `n` Int32\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/{uuid}/{shard}\', \'{replica}\')\nORDER BY n\nSETTINGS index_granularity = 8192 diff --git a/tests/queries/0_stateless/02858_explicit_uuid_and_zk_path.sh b/tests/queries/0_stateless/02858_explicit_uuid_and_zk_path.sh new file mode 100755 index 00000000000..f2c2f47cd0d --- /dev/null +++ b/tests/queries/0_stateless/02858_explicit_uuid_and_zk_path.sh @@ -0,0 +1,41 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +db=$CLICKHOUSE_DATABASE +if [[ $($CLICKHOUSE_CLIENT -q "SELECT engine = 'Replicated' FROM system.databases WHERE name='$CLICKHOUSE_DATABASE'") != 1 ]]; then + $CLICKHOUSE_CLIENT -q "CREATE DATABASE rdb_$CLICKHOUSE_DATABASE ENGINE=Replicated('/test/$CLICKHOUSE_DATABASE/rdb', '1', '1')" + db="rdb_$CLICKHOUSE_DATABASE" +fi + +$CLICKHOUSE_CLIENT --database_replicated_allow_explicit_uuid=0 -q "CREATE TABLE $db.m0 +UUID '02858000-1000-4000-8000-000000000000' (n int) ENGINE=Memory" 2>&1| grep -Fac "database_replicated_allow_explicit_uuid" + +$CLICKHOUSE_CLIENT --database_replicated_allow_explicit_uuid=1 -q "CREATE TABLE $db.m1 +UUID '02858000-1000-4000-8000-0000000000$(($RANDOM % 10))$(($RANDOM % 10))' (n int) ENGINE=Memory" + +$CLICKHOUSE_CLIENT --database_replicated_allow_explicit_uuid=2 -q "CREATE TABLE $db.m2 +UUID '02858000-1000-4000-8000-000000000002' (n int) ENGINE=Memory" + + +$CLICKHOUSE_CLIENT --database_replicated_allow_replicated_engine_arguments=0 -q "CREATE TABLE $db.rmt0 (n int) +ENGINE=ReplicatedMergeTree('/test/$CLICKHOUSE_DATABASE', '1') ORDER BY n" 2>&1| grep -Fac "database_replicated_allow_replicated_engine_arguments" + +$CLICKHOUSE_CLIENT --database_replicated_allow_replicated_engine_arguments=1 -q "CREATE TABLE $db.rmt1 (n int) +ENGINE=ReplicatedMergeTree('/test/$CLICKHOUSE_DATABASE', '1') ORDER BY n" + +$CLICKHOUSE_CLIENT --database_replicated_allow_replicated_engine_arguments=2 -q "CREATE TABLE $db.rmt2 (n int) +ENGINE=ReplicatedMergeTree('/test/$CLICKHOUSE_DATABASE', '1') ORDER BY n" + + +$CLICKHOUSE_CLIENT -q "SELECT name FROM system.tables WHERE database='$db' ORDER BY name" + +$CLICKHOUSE_CLIENT -q "SELECT substring(toString(uuid) as s, 1, length(s) - 2) FROM system.tables WHERE database='$db' and name='m1'" +$CLICKHOUSE_CLIENT -q "SELECT toString(uuid) LIKE '02858000%' FROM system.tables WHERE database='$db' and name='m2'" + +$CLICKHOUSE_CLIENT -q "SHOW CREATE $db.rmt1" +$CLICKHOUSE_CLIENT -q "SHOW CREATE $db.rmt2" + +$CLICKHOUSE_CLIENT -q "DROP DATABASE IF EXISTS rdb_$CLICKHOUSE_DATABASE" From aa9591419d3f0cb1575ddd2cf7b82f2cfc397e2b Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Thu, 4 Jul 2024 19:15:06 +0200 Subject: [PATCH 151/844] fix --- .../0_stateless/02858_explicit_uuid_and_zk_path.sh | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/queries/0_stateless/02858_explicit_uuid_and_zk_path.sh b/tests/queries/0_stateless/02858_explicit_uuid_and_zk_path.sh index f2c2f47cd0d..1b5d1a06a9b 100755 --- a/tests/queries/0_stateless/02858_explicit_uuid_and_zk_path.sh +++ b/tests/queries/0_stateless/02858_explicit_uuid_and_zk_path.sh @@ -6,7 +6,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) db=$CLICKHOUSE_DATABASE if [[ $($CLICKHOUSE_CLIENT -q "SELECT engine = 'Replicated' FROM system.databases WHERE name='$CLICKHOUSE_DATABASE'") != 1 ]]; then - $CLICKHOUSE_CLIENT -q "CREATE DATABASE rdb_$CLICKHOUSE_DATABASE ENGINE=Replicated('/test/$CLICKHOUSE_DATABASE/rdb', '1', '1')" + $CLICKHOUSE_CLIENT -q "CREATE DATABASE rdb_$CLICKHOUSE_DATABASE ENGINE=Replicated('/test/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/rdb', '1', '1')" db="rdb_$CLICKHOUSE_DATABASE" fi @@ -21,13 +21,13 @@ UUID '02858000-1000-4000-8000-000000000002' (n int) ENGINE=Memory" $CLICKHOUSE_CLIENT --database_replicated_allow_replicated_engine_arguments=0 -q "CREATE TABLE $db.rmt0 (n int) -ENGINE=ReplicatedMergeTree('/test/$CLICKHOUSE_DATABASE', '1') ORDER BY n" 2>&1| grep -Fac "database_replicated_allow_replicated_engine_arguments" +ENGINE=ReplicatedMergeTree('/test/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX', '1') ORDER BY n" 2>&1| grep -Fac "database_replicated_allow_replicated_engine_arguments" $CLICKHOUSE_CLIENT --database_replicated_allow_replicated_engine_arguments=1 -q "CREATE TABLE $db.rmt1 (n int) -ENGINE=ReplicatedMergeTree('/test/$CLICKHOUSE_DATABASE', '1') ORDER BY n" +ENGINE=ReplicatedMergeTree('/test/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX', '1') ORDER BY n" $CLICKHOUSE_CLIENT --database_replicated_allow_replicated_engine_arguments=2 -q "CREATE TABLE $db.rmt2 (n int) -ENGINE=ReplicatedMergeTree('/test/$CLICKHOUSE_DATABASE', '1') ORDER BY n" +ENGINE=ReplicatedMergeTree('/test/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX', '1') ORDER BY n" $CLICKHOUSE_CLIENT -q "SELECT name FROM system.tables WHERE database='$db' ORDER BY name" From 826f6c1ce59893e0e1787a207a182c2eb012c7ae Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Thu, 4 Jul 2024 17:56:08 +0000 Subject: [PATCH 152/844] fix initialization --- src/Storages/MergeTree/ReplicatedMergeTreeQueue.h | 2 +- src/Storages/StorageMergeTree.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h index 954e2fd951e..223ca989feb 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h +++ b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h @@ -154,7 +154,7 @@ private: std::map mutations_by_znode; /// Unfinished mutations that are required for AlterConversions. - Int64 num_alter_conversions; + Int64 num_alter_conversions = 0; /// Partition -> (block_number -> MutationStatus) std::unordered_map> mutations_by_partition; diff --git a/src/Storages/StorageMergeTree.h b/src/Storages/StorageMergeTree.h index d05c6739738..40e5a8e5ea4 100644 --- a/src/Storages/StorageMergeTree.h +++ b/src/Storages/StorageMergeTree.h @@ -150,7 +150,7 @@ private: std::map current_mutations_by_version; /// Unfinished mutations that are required for AlterConversions. - Int64 num_alter_conversions; + Int64 num_alter_conversions = 0; std::atomic shutdown_called {false}; std::atomic flush_called {false}; From 613636bb1b324acf7500c8a760abf5aea764418b Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Thu, 4 Jul 2024 20:49:10 +0200 Subject: [PATCH 153/844] fix --- src/Core/SettingsChangesHistory.cpp | 2 ++ src/Databases/DatabaseReplicated.cpp | 3 ++- src/Interpreters/InterpreterCreateQuery.cpp | 6 +++--- .../MergeTree/registerStorageMergeTree.cpp | 2 +- .../01148_zookeeper_path_macros_unfolding.sql | 2 ++ ...atabase_replicated_no_arguments_for_rmt.sh | 2 +- .../02858_explicit_uuid_and_zk_path.reference | 2 +- .../02858_explicit_uuid_and_zk_path.sh | 21 +++++++++++-------- 8 files changed, 24 insertions(+), 16 deletions(-) diff --git a/src/Core/SettingsChangesHistory.cpp b/src/Core/SettingsChangesHistory.cpp index 6af6b4b15aa..6c730940c87 100644 --- a/src/Core/SettingsChangesHistory.cpp +++ b/src/Core/SettingsChangesHistory.cpp @@ -63,6 +63,8 @@ static std::initializer_listgetSettingsRef().database_replicated_allow_replicated_engine_arguments == 0) + throw Exception(ErrorCodes::INCORRECT_QUERY, "Explicit zookeeper_path and replica_name are specified in ReplicatedMergeTree arguments. " "If you really want to specify it explicitly, then you should use some macros " "to distinguish different shards and replicas"); diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index c32a32be5cc..2862a907bec 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -1062,15 +1062,15 @@ void InterpreterCreateQuery::assertOrSetUUID(ASTCreateQuery & create, const Data throw Exception(ErrorCodes::BAD_ARGUMENTS, "It's not allowed to explicitly specify UUIDs for tables in Replicated databases, " "see database_replicated_allow_explicit_uuid"); } - else if (getContext()->getSettingsRef().database_replicated_allow_explicit_uuid == 0) + else if (getContext()->getSettingsRef().database_replicated_allow_explicit_uuid == 1) { LOG_WARNING(&Poco::Logger::get("InterpreterCreateQuery"), "It's not recommended to explicitly specify UUIDs for tables in Replicated databases"); } - else // if (getContext()->getSettingsRef().database_replicated_allow_explicit_uuid == 2) + else if (getContext()->getSettingsRef().database_replicated_allow_explicit_uuid == 2) { UUID old_uuid = create.uuid; create.generateRandomUUID(/*always_generate_new_uuid*/ true); - LOG_INFO(&Poco::Logger::get("InterpreterCreateQuery"), "Replaced a user-provided UUID ({}) with a random one ({}) " + LOG_WARNING(&Poco::Logger::get("InterpreterCreateQuery"), "Replaced a user-provided UUID ({}) with a random one ({}) " "to make sure it's unique", old_uuid, create.uuid); } } diff --git a/src/Storages/MergeTree/registerStorageMergeTree.cpp b/src/Storages/MergeTree/registerStorageMergeTree.cpp index 66f7b2114bf..d27c786d422 100644 --- a/src/Storages/MergeTree/registerStorageMergeTree.cpp +++ b/src/Storages/MergeTree/registerStorageMergeTree.cpp @@ -303,7 +303,7 @@ static void extractZooKeeperPathAndReplicaNameFromEngineArgs( if (is_replicated_database && local_context->getSettingsRef().database_replicated_allow_replicated_engine_arguments == 2) { - LOG_INFO(&Poco::Logger::get("registerStorageMergeTree"), "Replacing user-provided ZooKeeper path and replica name ({}, {}) " + LOG_WARNING(&Poco::Logger::get("registerStorageMergeTree"), "Replacing user-provided ZooKeeper path and replica name ({}, {}) " "with default arguments", zookeeper_path, replica_name); engine_args[arg_num]->as()->value = zookeeper_path = server_settings.default_replica_path; engine_args[arg_num + 1]->as()->value = replica_name = server_settings.default_replica_name; diff --git a/tests/queries/0_stateless/01148_zookeeper_path_macros_unfolding.sql b/tests/queries/0_stateless/01148_zookeeper_path_macros_unfolding.sql index a585ef1c324..c689542e4c3 100644 --- a/tests/queries/0_stateless/01148_zookeeper_path_macros_unfolding.sql +++ b/tests/queries/0_stateless/01148_zookeeper_path_macros_unfolding.sql @@ -7,6 +7,8 @@ DROP TABLE IF EXISTS rmt1; DROP TABLE IF EXISTS rmt2; DROP TABLE IF EXISTS rmt3; +SET database_replicated_allow_replicated_engine_arguments=1; + CREATE TABLE rmt (n UInt64, s String) ENGINE = ReplicatedMergeTree('/clickhouse/test_01148/{shard}/{database}/{table}', '{replica}') ORDER BY n; SHOW CREATE TABLE rmt; RENAME TABLE rmt TO rmt1; diff --git a/tests/queries/0_stateless/02514_database_replicated_no_arguments_for_rmt.sh b/tests/queries/0_stateless/02514_database_replicated_no_arguments_for_rmt.sh index a0f228e6af4..7b600ade9a0 100755 --- a/tests/queries/0_stateless/02514_database_replicated_no_arguments_for_rmt.sh +++ b/tests/queries/0_stateless/02514_database_replicated_no_arguments_for_rmt.sh @@ -15,7 +15,7 @@ ${CLICKHOUSE_CLIENT} -q "GRANT CREATE TABLE ON ${CLICKHOUSE_DATABASE}_db.* TO us ${CLICKHOUSE_CLIENT} -q "GRANT TABLE ENGINE ON ReplicatedMergeTree TO user_${CLICKHOUSE_DATABASE}" ${CLICKHOUSE_CLIENT} -q "CREATE DATABASE ${CLICKHOUSE_DATABASE}_db engine = Replicated('/clickhouse/databases/${CLICKHOUSE_TEST_ZOOKEEPER_PREFIX}/${CLICKHOUSE_DATABASE}_db', '{shard}', '{replica}')" ${CLICKHOUSE_CLIENT} --distributed_ddl_output_mode=none --user "user_${CLICKHOUSE_DATABASE}" -n --query "CREATE TABLE ${CLICKHOUSE_DATABASE}_db.tab_rmt_ok (x UInt32) engine = ReplicatedMergeTree order by x;" -${CLICKHOUSE_CLIENT} --distributed_ddl_output_mode=none --user "user_${CLICKHOUSE_DATABASE}" -n --query "CREATE TABLE ${CLICKHOUSE_DATABASE}_db.tab_rmt_fail (x UInt32) engine = ReplicatedMergeTree('/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/root/{shard}', '{replica}') order by x; -- { serverError 80 }" +${CLICKHOUSE_CLIENT} --distributed_ddl_output_mode=none --user "user_${CLICKHOUSE_DATABASE}" -n --query "CREATE TABLE ${CLICKHOUSE_DATABASE}_db.tab_rmt_fail (x UInt32) engine = ReplicatedMergeTree('/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/root/{shard}', '{replica}') order by x; -- { serverError 36 }" ${CLICKHOUSE_CLIENT} --query "DROP DATABASE ${CLICKHOUSE_DATABASE}_db" ${CLICKHOUSE_CLIENT} -q "DROP USER user_${CLICKHOUSE_DATABASE}" diff --git a/tests/queries/0_stateless/02858_explicit_uuid_and_zk_path.reference b/tests/queries/0_stateless/02858_explicit_uuid_and_zk_path.reference index 1d3a90daccd..1241d9714f9 100644 --- a/tests/queries/0_stateless/02858_explicit_uuid_and_zk_path.reference +++ b/tests/queries/0_stateless/02858_explicit_uuid_and_zk_path.reference @@ -6,5 +6,5 @@ rmt1 rmt2 02858000-1000-4000-8000-0000000000 0 -CREATE TABLE rdb_default.rmt1\n(\n `n` Int32\n)\nENGINE = ReplicatedMergeTree(\'/test/default/auto_{shard}\', \'1auto_{replica}\')\nORDER BY n\nSETTINGS index_granularity = 8192 +CREATE TABLE rdb_default.rmt1\n(\n `n` Int32\n)\nENGINE = ReplicatedMergeTree(\'/test/02858_explicit_uuid_and_zk_path_default/rmt/auto_{shard}\', \'1auto_{replica}\')\nORDER BY n\nSETTINGS index_granularity = 8192 CREATE TABLE rdb_default.rmt2\n(\n `n` Int32\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/{uuid}/{shard}\', \'{replica}\')\nORDER BY n\nSETTINGS index_granularity = 8192 diff --git a/tests/queries/0_stateless/02858_explicit_uuid_and_zk_path.sh b/tests/queries/0_stateless/02858_explicit_uuid_and_zk_path.sh index 1b5d1a06a9b..b968c02a07a 100755 --- a/tests/queries/0_stateless/02858_explicit_uuid_and_zk_path.sh +++ b/tests/queries/0_stateless/02858_explicit_uuid_and_zk_path.sh @@ -1,33 +1,36 @@ #!/usr/bin/env bash +CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL=error + CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh + db=$CLICKHOUSE_DATABASE if [[ $($CLICKHOUSE_CLIENT -q "SELECT engine = 'Replicated' FROM system.databases WHERE name='$CLICKHOUSE_DATABASE'") != 1 ]]; then $CLICKHOUSE_CLIENT -q "CREATE DATABASE rdb_$CLICKHOUSE_DATABASE ENGINE=Replicated('/test/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/rdb', '1', '1')" db="rdb_$CLICKHOUSE_DATABASE" fi -$CLICKHOUSE_CLIENT --database_replicated_allow_explicit_uuid=0 -q "CREATE TABLE $db.m0 +$CLICKHOUSE_CLIENT --distributed_ddl_output_mode=none --database_replicated_allow_explicit_uuid=0 -q "CREATE TABLE $db.m0 UUID '02858000-1000-4000-8000-000000000000' (n int) ENGINE=Memory" 2>&1| grep -Fac "database_replicated_allow_explicit_uuid" -$CLICKHOUSE_CLIENT --database_replicated_allow_explicit_uuid=1 -q "CREATE TABLE $db.m1 +$CLICKHOUSE_CLIENT --distributed_ddl_output_mode=none --database_replicated_allow_explicit_uuid=1 -q "CREATE TABLE $db.m1 UUID '02858000-1000-4000-8000-0000000000$(($RANDOM % 10))$(($RANDOM % 10))' (n int) ENGINE=Memory" -$CLICKHOUSE_CLIENT --database_replicated_allow_explicit_uuid=2 -q "CREATE TABLE $db.m2 +$CLICKHOUSE_CLIENT --distributed_ddl_output_mode=none --database_replicated_allow_explicit_uuid=2 -q "CREATE TABLE $db.m2 UUID '02858000-1000-4000-8000-000000000002' (n int) ENGINE=Memory" -$CLICKHOUSE_CLIENT --database_replicated_allow_replicated_engine_arguments=0 -q "CREATE TABLE $db.rmt0 (n int) -ENGINE=ReplicatedMergeTree('/test/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX', '1') ORDER BY n" 2>&1| grep -Fac "database_replicated_allow_replicated_engine_arguments" +$CLICKHOUSE_CLIENT --distributed_ddl_output_mode=none --database_replicated_allow_replicated_engine_arguments=0 -q "CREATE TABLE $db.rmt0 (n int) +ENGINE=ReplicatedMergeTree('/test/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/rmt', '1') ORDER BY n" 2>&1| grep -Fac "database_replicated_allow_replicated_engine_arguments" -$CLICKHOUSE_CLIENT --database_replicated_allow_replicated_engine_arguments=1 -q "CREATE TABLE $db.rmt1 (n int) -ENGINE=ReplicatedMergeTree('/test/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX', '1') ORDER BY n" +$CLICKHOUSE_CLIENT --distributed_ddl_output_mode=none --database_replicated_allow_replicated_engine_arguments=1 -q "CREATE TABLE $db.rmt1 (n int) +ENGINE=ReplicatedMergeTree('/test/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/rmt', '1') ORDER BY n" -$CLICKHOUSE_CLIENT --database_replicated_allow_replicated_engine_arguments=2 -q "CREATE TABLE $db.rmt2 (n int) -ENGINE=ReplicatedMergeTree('/test/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX', '1') ORDER BY n" +$CLICKHOUSE_CLIENT --distributed_ddl_output_mode=none --database_replicated_allow_replicated_engine_arguments=2 -q "CREATE TABLE $db.rmt2 (n int) +ENGINE=ReplicatedMergeTree('/test/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/rmt', '1') ORDER BY n" $CLICKHOUSE_CLIENT -q "SELECT name FROM system.tables WHERE database='$db' ORDER BY name" From 0e6fd2251e9048bbdb8535c3af0971fb0c670350 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Fri, 5 Jul 2024 17:15:04 +0200 Subject: [PATCH 154/844] fix --- .../0_stateless/02858_explicit_uuid_and_zk_path.reference | 2 +- .../queries/0_stateless/02858_explicit_uuid_and_zk_path.sh | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/queries/0_stateless/02858_explicit_uuid_and_zk_path.reference b/tests/queries/0_stateless/02858_explicit_uuid_and_zk_path.reference index 1241d9714f9..cb6d0135adf 100644 --- a/tests/queries/0_stateless/02858_explicit_uuid_and_zk_path.reference +++ b/tests/queries/0_stateless/02858_explicit_uuid_and_zk_path.reference @@ -6,5 +6,5 @@ rmt1 rmt2 02858000-1000-4000-8000-0000000000 0 -CREATE TABLE rdb_default.rmt1\n(\n `n` Int32\n)\nENGINE = ReplicatedMergeTree(\'/test/02858_explicit_uuid_and_zk_path_default/rmt/auto_{shard}\', \'1auto_{replica}\')\nORDER BY n\nSETTINGS index_granularity = 8192 +CREATE TABLE rdb_default.rmt1\n(\n `n` Int32\n)\nENGINE = ReplicatedMergeTree(\'/test/02858_explicit_uuid_and_zk_path_default/rmt/{shard}\', \'_{replica}\')\nORDER BY n\nSETTINGS index_granularity = 8192 CREATE TABLE rdb_default.rmt2\n(\n `n` Int32\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/{uuid}/{shard}\', \'{replica}\')\nORDER BY n\nSETTINGS index_granularity = 8192 diff --git a/tests/queries/0_stateless/02858_explicit_uuid_and_zk_path.sh b/tests/queries/0_stateless/02858_explicit_uuid_and_zk_path.sh index b968c02a07a..a959446c822 100755 --- a/tests/queries/0_stateless/02858_explicit_uuid_and_zk_path.sh +++ b/tests/queries/0_stateless/02858_explicit_uuid_and_zk_path.sh @@ -24,13 +24,13 @@ UUID '02858000-1000-4000-8000-000000000002' (n int) ENGINE=Memory" $CLICKHOUSE_CLIENT --distributed_ddl_output_mode=none --database_replicated_allow_replicated_engine_arguments=0 -q "CREATE TABLE $db.rmt0 (n int) -ENGINE=ReplicatedMergeTree('/test/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/rmt', '1') ORDER BY n" 2>&1| grep -Fac "database_replicated_allow_replicated_engine_arguments" +ENGINE=ReplicatedMergeTree('/test/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/rmt/{shard}', '_{replica}') ORDER BY n" 2>&1| grep -Fac "database_replicated_allow_replicated_engine_arguments" $CLICKHOUSE_CLIENT --distributed_ddl_output_mode=none --database_replicated_allow_replicated_engine_arguments=1 -q "CREATE TABLE $db.rmt1 (n int) -ENGINE=ReplicatedMergeTree('/test/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/rmt', '1') ORDER BY n" +ENGINE=ReplicatedMergeTree('/test/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/rmt/{shard}', '_{replica}') ORDER BY n" $CLICKHOUSE_CLIENT --distributed_ddl_output_mode=none --database_replicated_allow_replicated_engine_arguments=2 -q "CREATE TABLE $db.rmt2 (n int) -ENGINE=ReplicatedMergeTree('/test/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/rmt', '1') ORDER BY n" +ENGINE=ReplicatedMergeTree('/test/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/rmt/{shard}', '_{replica}') ORDER BY n" $CLICKHOUSE_CLIENT -q "SELECT name FROM system.tables WHERE database='$db' ORDER BY name" From 5a2b0ea1cc7e53a27046ce7c80bb7284ec504909 Mon Sep 17 00:00:00 2001 From: Arthur Passos Date: Fri, 5 Jul 2024 16:08:10 -0300 Subject: [PATCH 155/844] simplify formatting of astauth --- src/Parsers/Access/ASTAuthenticationData.cpp | 4 +--- src/Parsers/Access/ASTCreateUserQuery.cpp | 12 +++++++----- src/Parsers/IAST.h | 6 +----- 3 files changed, 9 insertions(+), 13 deletions(-) diff --git a/src/Parsers/Access/ASTAuthenticationData.cpp b/src/Parsers/Access/ASTAuthenticationData.cpp index 9c76e73ad90..eb70701d1dc 100644 --- a/src/Parsers/Access/ASTAuthenticationData.cpp +++ b/src/Parsers/Access/ASTAuthenticationData.cpp @@ -160,9 +160,7 @@ void ASTAuthenticationData::formatImpl(const FormatSettings & settings, FormatSt auth_type_name = AuthenticationTypeInfo::get(*type).name; } - const char * identified_string = settings.additional_authentication_method ? " ADD IDENTIFIED" : " IDENTIFIED"; - - settings.ostr << (settings.hilite ? IAST::hilite_keyword : "") << identified_string << (settings.hilite ? IAST::hilite_none : ""); + settings.ostr << (settings.hilite ? IAST::hilite_keyword : "") << " IDENTIFIED" << (settings.hilite ? IAST::hilite_none : ""); if (!auth_type_name.empty()) { diff --git a/src/Parsers/Access/ASTCreateUserQuery.cpp b/src/Parsers/Access/ASTCreateUserQuery.cpp index 0a4c4af0bb7..0aad4fa8152 100644 --- a/src/Parsers/Access/ASTCreateUserQuery.cpp +++ b/src/Parsers/Access/ASTCreateUserQuery.cpp @@ -21,14 +21,16 @@ namespace void formatAuthenticationData(const std::vector> & authentication_methods, const IAST::FormatSettings & settings) { + /* + * The first authentication method must be formatted in the form of: `IDENTIFIED WITH xyz..` + * The remaining ones shall contain the `ADD`: `ADD IDENTIFIED WITH`. + * */ authentication_methods[0]->format(settings); - auto settings_with_additional_authentication_method = settings; - settings_with_additional_authentication_method.additional_authentication_method = true; - - for (auto i = 1u; i < authentication_methods.size(); i++) + for (std::size_t i = 1; i < authentication_methods.size(); i++) { - authentication_methods[i]->format(settings_with_additional_authentication_method); + settings.ostr << (settings.hilite ? IAST::hilite_keyword : "") << " ADD" << (settings.hilite ? IAST::hilite_none : ""); + authentication_methods[i]->format(settings); } } diff --git a/src/Parsers/IAST.h b/src/Parsers/IAST.h index a74f321eead..ee70fed0f07 100644 --- a/src/Parsers/IAST.h +++ b/src/Parsers/IAST.h @@ -201,7 +201,6 @@ public: bool show_secrets; /// Show secret parts of the AST (e.g. passwords, encryption keys). char nl_or_ws; /// Newline or whitespace. LiteralEscapingStyle literal_escaping_style; - bool additional_authentication_method; explicit FormatSettings( WriteBuffer & ostr_, @@ -210,8 +209,7 @@ public: bool always_quote_identifiers_ = false, IdentifierQuotingStyle identifier_quoting_style_ = IdentifierQuotingStyle::Backticks, bool show_secrets_ = true, - LiteralEscapingStyle literal_escaping_style_ = LiteralEscapingStyle::Regular, - bool additional_authentication_method_ = false) + LiteralEscapingStyle literal_escaping_style_ = LiteralEscapingStyle::Regular) : ostr(ostr_) , one_line(one_line_) , hilite(hilite_) @@ -220,7 +218,6 @@ public: , show_secrets(show_secrets_) , nl_or_ws(one_line ? ' ' : '\n') , literal_escaping_style(literal_escaping_style_) - , additional_authentication_method(additional_authentication_method_) { } @@ -233,7 +230,6 @@ public: , show_secrets(other.show_secrets) , nl_or_ws(other.nl_or_ws) , literal_escaping_style(other.literal_escaping_style) - , additional_authentication_method(other.additional_authentication_method) { } From e7105a3faa50ba012e6cff1e9e26e62fce6d75ba Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Fri, 5 Jul 2024 20:51:30 +0200 Subject: [PATCH 156/844] fix --- src/Backups/RestorerFromBackup.cpp | 6 +++++- src/Interpreters/InterpreterCreateQuery.cpp | 2 +- .../0_stateless/02858_explicit_uuid_and_zk_path.reference | 2 +- .../queries/0_stateless/02858_explicit_uuid_and_zk_path.sh | 4 ++-- 4 files changed, 9 insertions(+), 5 deletions(-) diff --git a/src/Backups/RestorerFromBackup.cpp b/src/Backups/RestorerFromBackup.cpp index 454a0468e9f..63ca272db0b 100644 --- a/src/Backups/RestorerFromBackup.cpp +++ b/src/Backups/RestorerFromBackup.cpp @@ -903,11 +903,15 @@ void RestorerFromBackup::createTable(const QualifiedTableName & table_name) table_info.database = DatabaseCatalog::instance().getDatabase(table_name.database); DatabasePtr database = table_info.database; + auto query_context = Context::createCopy(context); + query_context->setSetting("database_replicated_allow_explicit_uuid", 3); + query_context->setSetting("database_replicated_allow_replicated_engine_arguments", 3); + /// Execute CREATE TABLE query (we call IDatabase::createTableRestoredFromBackup() to allow the database to do some /// database-specific things). database->createTableRestoredFromBackup( create_table_query, - context, + query_context, restore_coordination, std::chrono::duration_cast(create_table_timeout).count()); } diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index 2862a907bec..89f503f5fb1 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -1055,7 +1055,7 @@ void InterpreterCreateQuery::assertOrSetUUID(ASTCreateQuery & create, const Data bool from_path = create.attach_from_path.has_value(); bool is_on_cluster = getContext()->getClientInfo().query_kind == ClientInfo::QueryKind::SECONDARY_QUERY; - if (database->getEngineName() == "Replicated" && create.uuid != UUIDHelpers::Nil && !is_replicated_database_internal) + if (database->getEngineName() == "Replicated" && create.uuid != UUIDHelpers::Nil && !is_replicated_database_internal && !create.attach) { if (getContext()->getSettingsRef().database_replicated_allow_explicit_uuid == 0) { diff --git a/tests/queries/0_stateless/02858_explicit_uuid_and_zk_path.reference b/tests/queries/0_stateless/02858_explicit_uuid_and_zk_path.reference index cb6d0135adf..6444e10bb48 100644 --- a/tests/queries/0_stateless/02858_explicit_uuid_and_zk_path.reference +++ b/tests/queries/0_stateless/02858_explicit_uuid_and_zk_path.reference @@ -4,7 +4,7 @@ m1 m2 rmt1 rmt2 -02858000-1000-4000-8000-0000000000 +02858000-1000-4000-8000-000000000 0 CREATE TABLE rdb_default.rmt1\n(\n `n` Int32\n)\nENGINE = ReplicatedMergeTree(\'/test/02858_explicit_uuid_and_zk_path_default/rmt/{shard}\', \'_{replica}\')\nORDER BY n\nSETTINGS index_granularity = 8192 CREATE TABLE rdb_default.rmt2\n(\n `n` Int32\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/{uuid}/{shard}\', \'{replica}\')\nORDER BY n\nSETTINGS index_granularity = 8192 diff --git a/tests/queries/0_stateless/02858_explicit_uuid_and_zk_path.sh b/tests/queries/0_stateless/02858_explicit_uuid_and_zk_path.sh index a959446c822..b011aed613a 100755 --- a/tests/queries/0_stateless/02858_explicit_uuid_and_zk_path.sh +++ b/tests/queries/0_stateless/02858_explicit_uuid_and_zk_path.sh @@ -17,7 +17,7 @@ $CLICKHOUSE_CLIENT --distributed_ddl_output_mode=none --database_replicated_allo UUID '02858000-1000-4000-8000-000000000000' (n int) ENGINE=Memory" 2>&1| grep -Fac "database_replicated_allow_explicit_uuid" $CLICKHOUSE_CLIENT --distributed_ddl_output_mode=none --database_replicated_allow_explicit_uuid=1 -q "CREATE TABLE $db.m1 -UUID '02858000-1000-4000-8000-0000000000$(($RANDOM % 10))$(($RANDOM % 10))' (n int) ENGINE=Memory" +UUID '02858000-1000-4000-8000-000000000$(($RANDOM % 10))$(($RANDOM % 10))$(($RANDOM % 10))' (n int) ENGINE=Memory" $CLICKHOUSE_CLIENT --distributed_ddl_output_mode=none --database_replicated_allow_explicit_uuid=2 -q "CREATE TABLE $db.m2 UUID '02858000-1000-4000-8000-000000000002' (n int) ENGINE=Memory" @@ -35,7 +35,7 @@ ENGINE=ReplicatedMergeTree('/test/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/rmt/{shard}' $CLICKHOUSE_CLIENT -q "SELECT name FROM system.tables WHERE database='$db' ORDER BY name" -$CLICKHOUSE_CLIENT -q "SELECT substring(toString(uuid) as s, 1, length(s) - 2) FROM system.tables WHERE database='$db' and name='m1'" +$CLICKHOUSE_CLIENT -q "SELECT substring(toString(uuid) as s, 1, length(s) - 3) FROM system.tables WHERE database='$db' and name='m1'" $CLICKHOUSE_CLIENT -q "SELECT toString(uuid) LIKE '02858000%' FROM system.tables WHERE database='$db' and name='m2'" $CLICKHOUSE_CLIENT -q "SHOW CREATE $db.rmt1" From 88736a0d74b068f271ee21c69760128f82adbb91 Mon Sep 17 00:00:00 2001 From: Alexey Korepanov Date: Sun, 7 Jul 2024 21:12:26 +0200 Subject: [PATCH 157/844] Update JSONCompactWithProgressRowOutputFormat to print JSON on each row --- src/Formats/JSONUtils.cpp | 64 +++++++++++++++++++ src/Formats/JSONUtils.h | 10 +++ ...JSONCompactWithProgressRowOutputFormat.cpp | 39 ++++++++++- .../JSONCompactWithProgressRowOutputFormat.h | 4 ++ 4 files changed, 115 insertions(+), 2 deletions(-) diff --git a/src/Formats/JSONUtils.cpp b/src/Formats/JSONUtils.cpp index f0985f4a6b7..16083441f3a 100644 --- a/src/Formats/JSONUtils.cpp +++ b/src/Formats/JSONUtils.cpp @@ -483,6 +483,33 @@ namespace JSONUtils writeArrayEnd(out, 1); } + + void writeCompactMetadata(const Names & names, const DataTypes & types, const FormatSettings & settings, WriteBuffer & out) + { + writeCompactArrayStart(out, 0, "meta"); + + for (size_t i = 0; i < names.size(); ++i) + { + writeCompactObjectStart(out); + writeTitle("name", out, 0, ""); + + /// The field names are pre-escaped to be put into JSON string literal. + writeChar('"', out); + writeString(names[i], out); + writeChar('"', out); + + writeFieldCompactDelimiter(out); + writeTitle("type", out, 0, ""); + writeJSONString(types[i]->getName(), out, settings); + writeCompactObjectEnd(out); + + if (i + 1 < names.size()) + writeFieldCompactDelimiter(out); + } + + writeCompactArrayEnd(out); + } + void writeAdditionalInfo( size_t rows, size_t rows_before_limit, @@ -523,6 +550,43 @@ namespace JSONUtils } } + void writeCompactAdditionalInfo( + size_t rows, + size_t rows_before_limit, + bool applied_limit, + const Stopwatch & watch, + const Progress & progress, + bool write_statistics, + WriteBuffer & out) + { + writeCompactObjectStart(out, 0, "statistics"); + writeTitle("rows", out, 0, ""); + writeIntText(rows, out); + writeFieldCompactDelimiter(out); + + if (applied_limit) + { + writeTitle("rows_before_limit_at_least", out, 0, ""); + writeIntText(rows_before_limit, out); + writeFieldCompactDelimiter(out); + } + + if (write_statistics) + { + writeTitle("elapsed", out, 0, ""); + writeText(watch.elapsedSeconds(), out); + writeFieldCompactDelimiter(out); + + writeTitle("rows_read", out, 0, ""); + writeText(progress.read_rows.load(), out); + writeFieldCompactDelimiter(out); + + writeTitle("bytes_read", out, 0, ""); + writeText(progress.read_bytes.load(), out); + } + writeCompactObjectEnd(out); + } + void writeException(const String & exception_message, WriteBuffer & out, const FormatSettings & settings, size_t indent) { writeTitle("exception", out, indent, " "); diff --git a/src/Formats/JSONUtils.h b/src/Formats/JSONUtils.h index 7ee111c1285..718b9b2f610 100644 --- a/src/Formats/JSONUtils.h +++ b/src/Formats/JSONUtils.h @@ -99,6 +99,7 @@ namespace JSONUtils WriteBuffer & out); void writeMetadata(const Names & names, const DataTypes & types, const FormatSettings & settings, WriteBuffer & out); + void writeCompactMetadata(const Names & names, const DataTypes & types, const FormatSettings & settings, WriteBuffer & out); void writeAdditionalInfo( size_t rows, @@ -109,6 +110,15 @@ namespace JSONUtils bool write_statistics, WriteBuffer & out); + void writeCompactAdditionalInfo( + size_t rows, + size_t rows_before_limit, + bool applied_limit, + const Stopwatch & watch, + const Progress & progress, + bool write_statistics, + WriteBuffer & out); + void writeException(const String & exception_message, WriteBuffer & out, const FormatSettings & settings, size_t indent = 0); void skipColon(ReadBuffer & in); diff --git a/src/Processors/Formats/Impl/JSONCompactWithProgressRowOutputFormat.cpp b/src/Processors/Formats/Impl/JSONCompactWithProgressRowOutputFormat.cpp index 39532fb76fb..7a19b56d0ca 100644 --- a/src/Processors/Formats/Impl/JSONCompactWithProgressRowOutputFormat.cpp +++ b/src/Processors/Formats/Impl/JSONCompactWithProgressRowOutputFormat.cpp @@ -17,6 +17,14 @@ JSONCompactWithProgressRowOutputFormat::JSONCompactWithProgressRowOutputFormat( { } +void JSONCompactWithProgressRowOutputFormat::writePrefix() +{ + JSONUtils::writeCompactObjectStart(*ostr); + JSONUtils::writeCompactMetadata(names, types, settings, *ostr); + JSONUtils::writeCompactObjectEnd(*ostr); + writeCString("}\n", *ostr); +} + void JSONCompactWithProgressRowOutputFormat::writeField(const IColumn & column, const ISerialization & serialization, size_t row_num) { JSONUtils::writeFieldFromColumn(column, serialization, row_num, yield_strings, settings, *ostr); @@ -32,16 +40,22 @@ void JSONCompactWithProgressRowOutputFormat::writeRowStartDelimiter() { if (has_progress) writeProgress(); - JSONUtils::writeCompactArrayStart(*ostr, 2); + writeCString("{\"data\":", *ostr); + JSONUtils::writeCompactArrayStart(*ostr); } void JSONCompactWithProgressRowOutputFormat::writeRowEndDelimiter() { JSONUtils::writeCompactArrayEnd(*ostr); + writeCString("}\n", *ostr); field_number = 0; ++row_count; } +void JSONCompactWithProgressRowOutputFormat::writeRowBetweenDelimiter() +{ +} + void JSONCompactWithProgressRowOutputFormat::writeBeforeTotals() { JSONUtils::writeFieldDelimiter(*ostr, 2); @@ -91,7 +105,6 @@ void JSONCompactWithProgressRowOutputFormat::writeSuffix() { if (has_progress) writeProgress(); - JSONRowOutputFormat::writeSuffix(); } void JSONCompactWithProgressRowOutputFormat::writeProgress() @@ -103,6 +116,28 @@ void JSONCompactWithProgressRowOutputFormat::writeProgress() has_progress = false; } +void JSONCompactWithProgressRowOutputFormat::finalizeImpl() +{ + JSONUtils::writeCompactAdditionalInfo( + row_count, + statistics.rows_before_limit, + statistics.applied_limit, + statistics.watch, + statistics.progress, + settings.write_statistics && exception_message.empty(), + *ostr); + + exception_message = "Test exception message."; + if (!exception_message.empty()) + { + writeCString("\n", *ostr); + JSONUtils::writeCompactObjectStart(*ostr); + JSONUtils::writeException(exception_message, *ostr, settings, 0); + JSONUtils::writeCompactObjectEnd(*ostr); + } + ostr->next(); +} + void registerOutputFormatJSONCompactWithProgress(FormatFactory & factory) { factory.registerOutputFormat("JSONCompactWithProgress", []( diff --git a/src/Processors/Formats/Impl/JSONCompactWithProgressRowOutputFormat.h b/src/Processors/Formats/Impl/JSONCompactWithProgressRowOutputFormat.h index 4bc10d41f19..0c7e6b295e8 100644 --- a/src/Processors/Formats/Impl/JSONCompactWithProgressRowOutputFormat.h +++ b/src/Processors/Formats/Impl/JSONCompactWithProgressRowOutputFormat.h @@ -32,6 +32,7 @@ private: void writeFieldDelimiter() override; void writeRowStartDelimiter() override; void writeRowEndDelimiter() override; + void writeRowBetweenDelimiter() override; bool supportTotals() const override { return true; } bool supportExtremes() const override { return true; } void writeBeforeTotals() override; @@ -40,7 +41,10 @@ private: void writeTotals(const Columns & columns, size_t row_num) override; void writeProgress(); + void writePrefix() override; void writeSuffix() override; + void finalizeImpl() override; + Progress progress; std::vector progress_lines; From 2ae9490a681d3d522dd30e96b9f36b2d638ffd10 Mon Sep 17 00:00:00 2001 From: Alexey Korepanov Date: Sun, 7 Jul 2024 21:22:02 +0200 Subject: [PATCH 158/844] Fix progress rows and bytes read --- .../Formats/Impl/JSONCompactWithProgressRowOutputFormat.cpp | 5 ++--- .../Formats/Impl/JSONCompactWithProgressRowOutputFormat.h | 1 - 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/src/Processors/Formats/Impl/JSONCompactWithProgressRowOutputFormat.cpp b/src/Processors/Formats/Impl/JSONCompactWithProgressRowOutputFormat.cpp index 7a19b56d0ca..d72a6556c2b 100644 --- a/src/Processors/Formats/Impl/JSONCompactWithProgressRowOutputFormat.cpp +++ b/src/Processors/Formats/Impl/JSONCompactWithProgressRowOutputFormat.cpp @@ -81,11 +81,11 @@ void JSONCompactWithProgressRowOutputFormat::writeExtremesElement(const char * t void JSONCompactWithProgressRowOutputFormat::onProgress(const Progress & value) { - progress.incrementPiecewiseAtomically(value); + statistics.progress.incrementPiecewiseAtomically(value); String progress_line; WriteBufferFromString buf(progress_line); writeCString("{\"progress\":", buf); - progress.writeJSON(buf); + statistics.progress.writeJSON(buf); writeCString("}\n", buf); buf.finalize(); std::lock_guard lock(progress_lines_mutex); @@ -127,7 +127,6 @@ void JSONCompactWithProgressRowOutputFormat::finalizeImpl() settings.write_statistics && exception_message.empty(), *ostr); - exception_message = "Test exception message."; if (!exception_message.empty()) { writeCString("\n", *ostr); diff --git a/src/Processors/Formats/Impl/JSONCompactWithProgressRowOutputFormat.h b/src/Processors/Formats/Impl/JSONCompactWithProgressRowOutputFormat.h index 0c7e6b295e8..669e342c583 100644 --- a/src/Processors/Formats/Impl/JSONCompactWithProgressRowOutputFormat.h +++ b/src/Processors/Formats/Impl/JSONCompactWithProgressRowOutputFormat.h @@ -46,7 +46,6 @@ private: void finalizeImpl() override; - Progress progress; std::vector progress_lines; std::mutex progress_lines_mutex; /// To not lock mutex and check progress_lines every row, From 2469647609c53ac968d7fba60867c78ecebe522c Mon Sep 17 00:00:00 2001 From: Alexey Korepanov Date: Sun, 7 Jul 2024 21:30:36 +0200 Subject: [PATCH 159/844] Fix statistics section --- src/Formats/JSONUtils.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/Formats/JSONUtils.cpp b/src/Formats/JSONUtils.cpp index 16083441f3a..0918658cf6f 100644 --- a/src/Formats/JSONUtils.cpp +++ b/src/Formats/JSONUtils.cpp @@ -559,6 +559,7 @@ namespace JSONUtils bool write_statistics, WriteBuffer & out) { + writeCompactObjectStart(out); writeCompactObjectStart(out, 0, "statistics"); writeTitle("rows", out, 0, ""); writeIntText(rows, out); @@ -585,6 +586,7 @@ namespace JSONUtils writeText(progress.read_bytes.load(), out); } writeCompactObjectEnd(out); + writeCompactObjectEnd(out); } void writeException(const String & exception_message, WriteBuffer & out, const FormatSettings & settings, size_t indent) From 00e47d64b0c12023d926b383c6dea327e9806fc7 Mon Sep 17 00:00:00 2001 From: Alexey Korepanov Date: Mon, 8 Jul 2024 09:02:07 +0200 Subject: [PATCH 160/844] Add JSONCompactWithProgressStrings format --- .../Impl/JSONCompactWithProgressRowOutputFormat.cpp | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/Processors/Formats/Impl/JSONCompactWithProgressRowOutputFormat.cpp b/src/Processors/Formats/Impl/JSONCompactWithProgressRowOutputFormat.cpp index d72a6556c2b..91608cd5050 100644 --- a/src/Processors/Formats/Impl/JSONCompactWithProgressRowOutputFormat.cpp +++ b/src/Processors/Formats/Impl/JSONCompactWithProgressRowOutputFormat.cpp @@ -146,6 +146,14 @@ void registerOutputFormatJSONCompactWithProgress(FormatFactory & factory) { return std::make_shared(buf, sample, format_settings, false); }); + + factory.registerOutputFormat("JSONCompactWithProgressStrings", []( + WriteBuffer & buf, + const Block & sample, + const FormatSettings & format_settings) + { + return std::make_shared(buf, sample, format_settings, true); + }); } } From 58d34e306cec5b41ac372934e43639c2df24e54e Mon Sep 17 00:00:00 2001 From: Alexey Korepanov Date: Mon, 8 Jul 2024 09:08:03 +0200 Subject: [PATCH 161/844] Add documentation on JSONCompactWithProgress --- docs/en/interfaces/formats.md | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/docs/en/interfaces/formats.md b/docs/en/interfaces/formats.md index ffdd7e2ca25..6161cddefdd 100644 --- a/docs/en/interfaces/formats.md +++ b/docs/en/interfaces/formats.md @@ -38,6 +38,7 @@ The supported formats are: | [JSONCompact](#jsoncompact) | ✔ | ✔ | | [JSONCompactStrings](#jsoncompactstrings) | ✗ | ✔ | | [JSONCompactColumns](#jsoncompactcolumns) | ✔ | ✔ | +| [JSONCompactWithProgress](#jsoncompactwithprogress) | ✗ | ✔ | | [JSONEachRow](#jsoneachrow) | ✔ | ✔ | | [PrettyJSONEachRow](#prettyjsoneachrow) | ✗ | ✔ | | [JSONEachRowWithProgress](#jsoneachrowwithprogress) | ✗ | ✔ | @@ -925,6 +926,23 @@ Example: Columns that are not present in the block will be filled with default values (you can use [input_format_defaults_for_omitted_fields](/docs/en/operations/settings/settings-formats.md/#input_format_defaults_for_omitted_fields) setting here) +## JSONCompactWithProgress (#jsoncompactwithprogress) + +In this format, ClickHouse outputs each row as a separated, newline-delimited JSON Object. + +Each row is either a metadata object, data object, progress information or statistics object. + +Example: + +```json +{"meta": [{"name":"id", "type":"UInt32"}, {"name":"name", "type":"String"}]}} +{"progress":{"read_rows":"8","read_bytes":"168","written_rows":"0","written_bytes":"0","total_rows_to_read":"2","result_rows":"0","result_bytes":"0","elapsed_ns":"0"}} +{"data":["1", "John Doe"]} +{"data":["2", "Joe Doe"]} +{"statistics": {"rows":2, "rows_before_limit_at_least":8, "elapsed":0.001995, "rows_read":8, "bytes_read":168}} +``` + + ## JSONEachRow {#jsoneachrow} In this format, ClickHouse outputs each row as a separated, newline-delimited JSON Object. From 78cb02503f05d1c1c84dbb17fd51700d74f697df Mon Sep 17 00:00:00 2001 From: Alexey Korepanov Date: Mon, 8 Jul 2024 09:22:07 +0200 Subject: [PATCH 162/844] Remove extra space --- .../Formats/Impl/JSONCompactWithProgressRowOutputFormat.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Processors/Formats/Impl/JSONCompactWithProgressRowOutputFormat.cpp b/src/Processors/Formats/Impl/JSONCompactWithProgressRowOutputFormat.cpp index 91608cd5050..6be2d092305 100644 --- a/src/Processors/Formats/Impl/JSONCompactWithProgressRowOutputFormat.cpp +++ b/src/Processors/Formats/Impl/JSONCompactWithProgressRowOutputFormat.cpp @@ -111,7 +111,7 @@ void JSONCompactWithProgressRowOutputFormat::writeProgress() { std::lock_guard lock(progress_lines_mutex); for (const auto & progress_line : progress_lines) - writeString(progress_line, *ostr); + writeString(progress_line, *ostr); progress_lines.clear(); has_progress = false; } From 7fbcaeaae2bddc07898fd4e2eb7aa9331e2d0df5 Mon Sep 17 00:00:00 2001 From: Alexey Korepanov Date: Mon, 8 Jul 2024 09:29:14 +0200 Subject: [PATCH 163/844] Remove wrong comment --- .../Formats/Impl/JSONCompactWithProgressRowOutputFormat.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/Processors/Formats/Impl/JSONCompactWithProgressRowOutputFormat.h b/src/Processors/Formats/Impl/JSONCompactWithProgressRowOutputFormat.h index 669e342c583..dc7a70229e7 100644 --- a/src/Processors/Formats/Impl/JSONCompactWithProgressRowOutputFormat.h +++ b/src/Processors/Formats/Impl/JSONCompactWithProgressRowOutputFormat.h @@ -11,8 +11,6 @@ namespace DB struct FormatSettings; -/** The stream for outputting data in the JSONCompact- formats. - */ class JSONCompactWithProgressRowOutputFormat final : public JSONRowOutputFormat { public: From a3360d0e63c08132ecd73423d2dd9fc576b54589 Mon Sep 17 00:00:00 2001 From: Alexey Korepanov Date: Mon, 8 Jul 2024 13:53:24 +0200 Subject: [PATCH 164/844] Fix code style --- ...JSONCompactWithProgressRowOutputFormat.cpp | 29 +++++++------------ .../JSONCompactWithProgressRowOutputFormat.h | 6 +--- 2 files changed, 11 insertions(+), 24 deletions(-) diff --git a/src/Processors/Formats/Impl/JSONCompactWithProgressRowOutputFormat.cpp b/src/Processors/Formats/Impl/JSONCompactWithProgressRowOutputFormat.cpp index 6be2d092305..9603ce0265d 100644 --- a/src/Processors/Formats/Impl/JSONCompactWithProgressRowOutputFormat.cpp +++ b/src/Processors/Formats/Impl/JSONCompactWithProgressRowOutputFormat.cpp @@ -1,6 +1,6 @@ -#include #include #include +#include #include @@ -9,10 +9,7 @@ namespace DB { JSONCompactWithProgressRowOutputFormat::JSONCompactWithProgressRowOutputFormat( - WriteBuffer & out_, - const Block & header, - const FormatSettings & settings_, - bool yield_strings_) + WriteBuffer & out_, const Block & header, const FormatSettings & settings_, bool yield_strings_) : JSONRowOutputFormat(out_, header, settings_, yield_strings_) { } @@ -139,21 +136,15 @@ void JSONCompactWithProgressRowOutputFormat::finalizeImpl() void registerOutputFormatJSONCompactWithProgress(FormatFactory & factory) { - factory.registerOutputFormat("JSONCompactWithProgress", []( - WriteBuffer & buf, - const Block & sample, - const FormatSettings & format_settings) - { - return std::make_shared(buf, sample, format_settings, false); - }); + factory.registerOutputFormat( + "JSONCompactWithProgress", + [](WriteBuffer & buf, const Block & sample, const FormatSettings & format_settings) + { return std::make_shared(buf, sample, format_settings, false); }); - factory.registerOutputFormat("JSONCompactWithProgressStrings", []( - WriteBuffer & buf, - const Block & sample, - const FormatSettings & format_settings) - { - return std::make_shared(buf, sample, format_settings, true); - }); + factory.registerOutputFormat( + "JSONCompactWithProgressStrings", + [](WriteBuffer & buf, const Block & sample, const FormatSettings & format_settings) + { return std::make_shared(buf, sample, format_settings, true); }); } } diff --git a/src/Processors/Formats/Impl/JSONCompactWithProgressRowOutputFormat.h b/src/Processors/Formats/Impl/JSONCompactWithProgressRowOutputFormat.h index dc7a70229e7..1c21914d8cb 100644 --- a/src/Processors/Formats/Impl/JSONCompactWithProgressRowOutputFormat.h +++ b/src/Processors/Formats/Impl/JSONCompactWithProgressRowOutputFormat.h @@ -14,11 +14,7 @@ struct FormatSettings; class JSONCompactWithProgressRowOutputFormat final : public JSONRowOutputFormat { public: - JSONCompactWithProgressRowOutputFormat( - WriteBuffer & out_, - const Block & header, - const FormatSettings & settings_, - bool yield_strings_); + JSONCompactWithProgressRowOutputFormat(WriteBuffer & out_, const Block & header, const FormatSettings & settings_, bool yield_strings_); String getName() const override { return "JSONCompactWithProgressRowOutputFormat"; } From 7e790d80845fcb3ae955a773c22ab7a96a595d4b Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Mon, 8 Jul 2024 13:25:51 +0000 Subject: [PATCH 165/844] fix rename --- src/Storages/MergeTree/MergeTreeData.cpp | 50 ++++++++++++++----- src/Storages/MergeTree/MergeTreeData.h | 16 ++++-- .../MergeTree/ReplicatedMergeTreeQueue.cpp | 16 +++--- .../MergeTree/ReplicatedMergeTreeQueue.h | 3 +- src/Storages/StorageMergeTree.cpp | 16 +++--- src/Storages/StorageMergeTree.h | 3 +- 6 files changed, 69 insertions(+), 35 deletions(-) diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 6083a441a35..d9529b6870c 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -8668,33 +8668,59 @@ void MergeTreeData::verifySortingKey(const KeyDescription & sorting_key) } } -static void updateAlterConversionsCounter(Int64 & num_alter_conversions, const MutationCommands & commands, Int64 increment) +static void updateMutationsCounters( + Int64 & data_mutations_to_apply, + Int64 & metadata_mutations_to_apply, + const MutationCommands & commands, + Int64 increment) { - if (num_alter_conversions < 0) - throw Exception(ErrorCodes::LOGICAL_ERROR, "On-fly data alter conversions counter is negative ({})", num_alter_conversions); + if (data_mutations_to_apply < 0) + throw Exception(ErrorCodes::LOGICAL_ERROR, "On-fly data mutations counter is negative ({})", data_mutations_to_apply); + + if (metadata_mutations_to_apply < 0) + throw Exception(ErrorCodes::LOGICAL_ERROR, "On-fly metadata mutations counter is negative ({})", metadata_mutations_to_apply); + + bool has_data_mutation = false; + bool has_metadata_mutation = false; for (const auto & command : commands) { - if (AlterConversions::isSupportedDataMutation(command.type) || AlterConversions::isSupportedMetadataMutation(command.type)) + if (!has_data_mutation && AlterConversions::isSupportedDataMutation(command.type)) { - num_alter_conversions += increment; + data_mutations_to_apply += increment; + has_data_mutation = true; - if (num_alter_conversions < 0) - throw Exception(ErrorCodes::LOGICAL_ERROR, "On-fly data mutations counter is negative ({})", num_alter_conversions); + if (data_mutations_to_apply < 0) + throw Exception(ErrorCodes::LOGICAL_ERROR, "On-fly data mutations counter is negative ({})", data_mutations_to_apply); + } - return; + if (!has_metadata_mutation && AlterConversions::isSupportedMetadataMutation(command.type)) + { + metadata_mutations_to_apply += increment; + has_metadata_mutation = true; + + if (metadata_mutations_to_apply < 0) + throw Exception(ErrorCodes::LOGICAL_ERROR, "On-fly metadata mutations counter is negative ({})", metadata_mutations_to_apply); } } } -void incrementAlterConversionsCounter(Int64 & num_alter_conversions, const MutationCommands & commands, std::lock_guard & /*lock*/) +void incrementMutationsCounters( + Int64 & data_mutations_to_apply, + Int64 & metadata_mutations_to_apply, + const MutationCommands & commands, + std::lock_guard & /*lock*/) { - updateAlterConversionsCounter(num_alter_conversions, commands, 1); + return updateMutationsCounters(data_mutations_to_apply, metadata_mutations_to_apply, commands, 1); } -void decrementAlterConversionsCounter(Int64 & num_alter_conversions, const MutationCommands & commands, std::lock_guard & /*lock*/) +void decrementMutationsCounters( + Int64 & data_mutations_to_apply, + Int64 & metadata_mutations_to_apply, + const MutationCommands & commands, + std::lock_guard & /*lock*/) { - updateAlterConversionsCounter(num_alter_conversions, commands, -1); + return updateMutationsCounters(data_mutations_to_apply, metadata_mutations_to_apply, commands, -1); } } diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h index b0773cb4a20..75a7553d157 100644 --- a/src/Storages/MergeTree/MergeTreeData.h +++ b/src/Storages/MergeTree/MergeTreeData.h @@ -453,9 +453,6 @@ public: Int64 metadata_version = -1; Int64 min_part_metadata_version = -1; bool need_data_mutations = false; - - bool needAnyMutations() const { return need_data_mutations || needMetadataMutations(); } - bool needMetadataMutations() const { return min_part_metadata_version < metadata_version; } }; Params params; @@ -1776,7 +1773,16 @@ struct CurrentlySubmergingEmergingTagger /// Look at MutationCommands if it contains mutations for AlterConversions, update the counter. /// Return true if the counter had been updated -void incrementAlterConversionsCounter(Int64 & num_alter_conversions, const MutationCommands & commands, std::lock_guard & lock); -void decrementAlterConversionsCounter(Int64 & num_alter_conversions, const MutationCommands & commands, std::lock_guard & lock); +void incrementMutationsCounters( + Int64 & data_mutations_to_apply, + Int64 & metadata_mutations_to_apply, + const MutationCommands & commands, + std::lock_guard & lock); + +void decrementMutationsCounters( + Int64 & data_mutations_to_apply, + Int64 & metadata_mutations_to_apply, + const MutationCommands & commands, + std::lock_guard & lock); } diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp index 807fbeebfc4..0175e427079 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp @@ -951,7 +951,7 @@ int32_t ReplicatedMergeTreeQueue::updateMutations(zkutil::ZooKeeperPtr zookeeper { const auto commands = entry.commands; it = mutations_by_znode.erase(it); - decrementAlterConversionsCounter(num_alter_conversions, commands, state_lock); + decrementMutationsCounters(data_mutations_to_apply, metadata_mutations_to_apply, commands, state_lock); } else it = mutations_by_znode.erase(it); @@ -1001,7 +1001,7 @@ int32_t ReplicatedMergeTreeQueue::updateMutations(zkutil::ZooKeeperPtr zookeeper for (const ReplicatedMergeTreeMutationEntryPtr & entry : new_mutations) { auto & mutation = mutations_by_znode.emplace(entry->znode_name, MutationStatus(entry, format_version)).first->second; - incrementAlterConversionsCounter(num_alter_conversions, entry->commands, lock); + incrementMutationsCounters(data_mutations_to_apply, metadata_mutations_to_apply, entry->commands, lock); NOEXCEPT_SCOPE({ for (const auto & pair : entry->block_numbers) @@ -1076,7 +1076,7 @@ ReplicatedMergeTreeMutationEntryPtr ReplicatedMergeTreeQueue::removeMutation( } mutations_by_znode.erase(it); - /// decrementAlterConversionsCounter() will be called in updateMutations() + /// decrementMutationsCounters() will be called in updateMutations() LOG_DEBUG(log, "Removed mutation {} from local state.", entry->znode_name); } @@ -1913,7 +1913,7 @@ MutationCommands ReplicatedMergeTreeQueue::MutationsSnapshot::getAlterMutationCo MutationCommands result; bool seen_all_data_mutations = !params.need_data_mutations; - bool seen_all_metadata_mutations = !params.needMetadataMutations(); + bool seen_all_metadata_mutations = part_metadata_version >= params.metadata_version; if (seen_all_data_mutations && seen_all_metadata_mutations) return {}; @@ -1968,8 +1968,8 @@ MergeTreeData::MutationsSnapshotPtr ReplicatedMergeTreeQueue::getMutationsSnapsh std::lock_guard lock(state_mutex); - bool need_data_mutations = res->params.need_data_mutations && num_alter_conversions > 0; - bool need_metatadata_mutations = res->params.needMetadataMutations(); + bool need_data_mutations = params.need_data_mutations && data_mutations_to_apply > 0; + bool need_metatadata_mutations = params.min_part_metadata_version < params.metadata_version; if (!need_data_mutations && !need_metatadata_mutations) return res; @@ -2113,7 +2113,7 @@ bool ReplicatedMergeTreeQueue::tryFinalizeMutations(zkutil::ZooKeeperPtr zookeep mutation.parts_to_do.clear(); } - decrementAlterConversionsCounter(num_alter_conversions, mutation.entry->commands, lock); + decrementMutationsCounters(data_mutations_to_apply, metadata_mutations_to_apply, mutation.entry->commands, lock); } else if (mutation.parts_to_do.size() == 0) { @@ -2170,7 +2170,7 @@ bool ReplicatedMergeTreeQueue::tryFinalizeMutations(zkutil::ZooKeeperPtr zookeep LOG_TRACE(log, "Finishing data alter with version {} for entry {}", entry->alter_version, entry->znode_name); alter_sequence.finishDataAlter(entry->alter_version, lock); } - decrementAlterConversionsCounter(num_alter_conversions, entry->commands, lock); + decrementMutationsCounters(data_mutations_to_apply, metadata_mutations_to_apply, entry->commands, lock); } } } diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h index 223ca989feb..a46fdaf3ac4 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h +++ b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h @@ -154,7 +154,8 @@ private: std::map mutations_by_znode; /// Unfinished mutations that are required for AlterConversions. - Int64 num_alter_conversions = 0; + Int64 data_mutations_to_apply = 0; + Int64 metadata_mutations_to_apply = 0; /// Partition -> (block_number -> MutationStatus) std::unordered_map> mutations_by_partition; diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp index ef2a7bda118..5c15818632c 100644 --- a/src/Storages/StorageMergeTree.cpp +++ b/src/Storages/StorageMergeTree.cpp @@ -502,7 +502,7 @@ Int64 StorageMergeTree::startMutation(const MutationCommands & commands, Context if (!inserted) throw Exception(ErrorCodes::LOGICAL_ERROR, "Mutation {} already exists, it's a bug", version); - incrementAlterConversionsCounter(num_alter_conversions, *it->second.commands, lock); + incrementMutationsCounters(data_mutations_to_apply, metadata_mutations_to_apply, *it->second.commands, lock); LOG_INFO(log, "Added mutation: {}{}", mutation_id, additional_info); } background_operations_assignee.trigger(); @@ -538,7 +538,7 @@ void StorageMergeTree::updateMutationEntriesErrors(FutureMergedMutatedPartPtr re if (static_cast(result_part->part_info.mutation) == it->first) mutation_backoff_policy.removePartFromFailed(failed_part->name); - decrementAlterConversionsCounter(num_alter_conversions, *entry.commands, lock); + decrementMutationsCounters(data_mutations_to_apply, metadata_mutations_to_apply, *entry.commands, lock); } } else @@ -819,7 +819,7 @@ CancellationCode StorageMergeTree::killMutation(const String & mutation_id) { bool mutation_finished = *min_version > static_cast(mutation_version); if (!mutation_finished) - decrementAlterConversionsCounter(num_alter_conversions, *it->second.commands, lock); + decrementMutationsCounters(data_mutations_to_apply, metadata_mutations_to_apply, *it->second.commands, lock); } to_kill.emplace(std::move(it->second)); @@ -904,7 +904,7 @@ void StorageMergeTree::loadMutations() if (!inserted) throw Exception(ErrorCodes::LOGICAL_ERROR, "Mutation {} already exists, it's a bug", block_number); - incrementAlterConversionsCounter(num_alter_conversions, *entry_it->second.commands, lock); + incrementMutationsCounters(data_mutations_to_apply, metadata_mutations_to_apply, *entry_it->second.commands, lock); } else if (startsWith(it->name(), "tmp_mutation_")) { @@ -2449,10 +2449,10 @@ MergeTreeData::MutationsSnapshotPtr StorageMergeTree::getMutationsSnapshot(const std::lock_guard lock(currently_processing_in_background_mutex); - bool need_data_mutations = res->params.need_data_mutations && num_alter_conversions > 0; - bool need_metatadata_mutations = res->params.needMetadataMutations(); + bool need_data_mutations = res->params.need_data_mutations && data_mutations_to_apply > 0; + bool need_metadata_mutations = metadata_mutations_to_apply > 0; - if (!need_data_mutations && !need_metatadata_mutations) + if (!need_data_mutations && !need_metadata_mutations) return res; for (const auto & [version, entry] : current_mutations_by_version) @@ -2462,7 +2462,7 @@ MergeTreeData::MutationsSnapshotPtr StorageMergeTree::getMutationsSnapshot(const if (need_data_mutations && AlterConversions::isSupportedDataMutation(command.type)) return true; - if (need_metatadata_mutations && AlterConversions::isSupportedMetadataMutation(command.type)) + if (need_metadata_mutations && AlterConversions::isSupportedMetadataMutation(command.type)) return true; return false; diff --git a/src/Storages/StorageMergeTree.h b/src/Storages/StorageMergeTree.h index 40e5a8e5ea4..d2ade17e309 100644 --- a/src/Storages/StorageMergeTree.h +++ b/src/Storages/StorageMergeTree.h @@ -150,7 +150,8 @@ private: std::map current_mutations_by_version; /// Unfinished mutations that are required for AlterConversions. - Int64 num_alter_conversions = 0; + Int64 data_mutations_to_apply = 0; + Int64 metadata_mutations_to_apply = 0; std::atomic shutdown_called {false}; std::atomic flush_called {false}; From 6cb361413eff6f870eb2bd43876d82d1c2c5e882 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Tue, 9 Jul 2024 16:45:15 +0200 Subject: [PATCH 166/844] fix --- src/Databases/DatabaseReplicated.cpp | 3 +++ tests/integration/test_disk_over_web_server/test.py | 3 ++- .../00510_materizlized_view_and_deduplication_zookeeper.sql | 2 ++ tests/queries/0_stateless/00609_mv_index_in_in.sql | 1 + tests/queries/0_stateless/00738_lock_for_inner_table.sh | 2 ++ tests/queries/0_stateless/01153_attach_mv_uuid.sql | 1 + .../0_stateless/02858_explicit_uuid_and_zk_path.reference | 4 ++-- tests/queries/0_stateless/02858_explicit_uuid_and_zk_path.sh | 4 ++-- .../0_stateless/02888_replicated_merge_tree_creation.sh | 1 + 9 files changed, 16 insertions(+), 5 deletions(-) diff --git a/src/Databases/DatabaseReplicated.cpp b/src/Databases/DatabaseReplicated.cpp index bc9b2537d1e..25d4a3eb704 100644 --- a/src/Databases/DatabaseReplicated.cpp +++ b/src/Databases/DatabaseReplicated.cpp @@ -1008,6 +1008,9 @@ void DatabaseReplicated::recoverLostReplica(const ZooKeeperPtr & current_zookeep query_context->setSetting("allow_create_index_without_type", 1); query_context->setSetting("allow_experimental_s3queue", 1); + query_context->setSetting("database_replicated_allow_explicit_uuid", 3); + query_context->setSetting("database_replicated_allow_replicated_engine_arguments", 3); + auto txn = std::make_shared(current_zookeeper, zookeeper_path, false, ""); query_context->initZooKeeperMetadataTransaction(txn); return query_context; diff --git a/tests/integration/test_disk_over_web_server/test.py b/tests/integration/test_disk_over_web_server/test.py index 891ee8f00f5..ec0bef23731 100644 --- a/tests/integration/test_disk_over_web_server/test.py +++ b/tests/integration/test_disk_over_web_server/test.py @@ -311,7 +311,8 @@ def test_replicated_database(cluster): SETTINGS storage_policy = 'web'; """.format( uuids[0] - ) + ), + settings={"database_replicated_allow_explicit_uuid": 3}, ) node2 = cluster.instances["node2"] diff --git a/tests/queries/0_stateless/00510_materizlized_view_and_deduplication_zookeeper.sql b/tests/queries/0_stateless/00510_materizlized_view_and_deduplication_zookeeper.sql index d3c4da86b41..5d2f8192d04 100644 --- a/tests/queries/0_stateless/00510_materizlized_view_and_deduplication_zookeeper.sql +++ b/tests/queries/0_stateless/00510_materizlized_view_and_deduplication_zookeeper.sql @@ -6,6 +6,8 @@ DROP TABLE IF EXISTS without_deduplication; DROP TABLE IF EXISTS with_deduplication_mv; DROP TABLE IF EXISTS without_deduplication_mv; +SET database_replicated_allow_explicit_uuid=3; +SET database_replicated_allow_replicated_engine_arguments=3; CREATE TABLE with_deduplication(x UInt32) ENGINE ReplicatedMergeTree('/clickhouse/tables/{database}/test_00510/with_deduplication', 'r1') ORDER BY x; CREATE TABLE without_deduplication(x UInt32) diff --git a/tests/queries/0_stateless/00609_mv_index_in_in.sql b/tests/queries/0_stateless/00609_mv_index_in_in.sql index bd9f35350c1..89fc85651ad 100644 --- a/tests/queries/0_stateless/00609_mv_index_in_in.sql +++ b/tests/queries/0_stateless/00609_mv_index_in_in.sql @@ -6,6 +6,7 @@ DROP TABLE IF EXISTS test_mv_00609; create table test_00609 (a Int8) engine=Memory; insert into test_00609 values (1); +set database_replicated_allow_explicit_uuid=3; set allow_deprecated_syntax_for_merge_tree=1; create materialized view test_mv_00609 uuid '00000609-1000-4000-8000-000000000001' Engine=MergeTree(date, (a), 8192) populate as select a, toDate('2000-01-01') date from test_00609; diff --git a/tests/queries/0_stateless/00738_lock_for_inner_table.sh b/tests/queries/0_stateless/00738_lock_for_inner_table.sh index b62a639d8f4..9bc84dd1063 100755 --- a/tests/queries/0_stateless/00738_lock_for_inner_table.sh +++ b/tests/queries/0_stateless/00738_lock_for_inner_table.sh @@ -7,6 +7,8 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh +CLICKHOUSE_CLIENT="${CLICKHOUSE_CLIENT} --database_replicated_allow_explicit_uuid 3" + # there are some issues with Atomic database, let's generate it uniq # otherwise flaky check will not pass. uuid=$(${CLICKHOUSE_CLIENT} --query "SELECT reinterpretAsUUID(currentDatabase())") diff --git a/tests/queries/0_stateless/01153_attach_mv_uuid.sql b/tests/queries/0_stateless/01153_attach_mv_uuid.sql index 00cce8a1de4..6b167253d8f 100644 --- a/tests/queries/0_stateless/01153_attach_mv_uuid.sql +++ b/tests/queries/0_stateless/01153_attach_mv_uuid.sql @@ -14,6 +14,7 @@ INSERT INTO src VALUES (3), (4); SELECT * FROM mv ORDER BY n; DROP TABLE mv SYNC; +SET database_replicated_allow_explicit_uuid=3; SET show_table_uuid_in_table_create_query_if_not_nil=1; CREATE TABLE ".inner_id.e15f3ab5-6cae-4df3-b879-f40deafd82c2" (n Int32, n2 Int64) ENGINE = MergeTree PARTITION BY n % 10 ORDER BY n; ATTACH MATERIALIZED VIEW mv UUID 'e15f3ab5-6cae-4df3-b879-f40deafd82c2' (n Int32, n2 Int64) ENGINE = MergeTree PARTITION BY n % 10 ORDER BY n AS SELECT n, n * n AS n2 FROM src; diff --git a/tests/queries/0_stateless/02858_explicit_uuid_and_zk_path.reference b/tests/queries/0_stateless/02858_explicit_uuid_and_zk_path.reference index 6444e10bb48..874494fb061 100644 --- a/tests/queries/0_stateless/02858_explicit_uuid_and_zk_path.reference +++ b/tests/queries/0_stateless/02858_explicit_uuid_and_zk_path.reference @@ -6,5 +6,5 @@ rmt1 rmt2 02858000-1000-4000-8000-000000000 0 -CREATE TABLE rdb_default.rmt1\n(\n `n` Int32\n)\nENGINE = ReplicatedMergeTree(\'/test/02858_explicit_uuid_and_zk_path_default/rmt/{shard}\', \'_{replica}\')\nORDER BY n\nSETTINGS index_granularity = 8192 -CREATE TABLE rdb_default.rmt2\n(\n `n` Int32\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/{uuid}/{shard}\', \'{replica}\')\nORDER BY n\nSETTINGS index_granularity = 8192 +CREATE TABLE default.rmt1\n(\n `n` Int32\n)\nENGINE = ReplicatedMergeTree(\'/test/02858_explicit_uuid_and_zk_path_default/rmt/{shard}\', \'_{replica}\')\nORDER BY n\nSETTINGS index_granularity = 8192 +CREATE TABLE default.rmt2\n(\n `n` Int32\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/{uuid}/{shard}\', \'{replica}\')\nORDER BY n\nSETTINGS index_granularity = 8192 diff --git a/tests/queries/0_stateless/02858_explicit_uuid_and_zk_path.sh b/tests/queries/0_stateless/02858_explicit_uuid_and_zk_path.sh index b011aed613a..81a9cef02ff 100755 --- a/tests/queries/0_stateless/02858_explicit_uuid_and_zk_path.sh +++ b/tests/queries/0_stateless/02858_explicit_uuid_and_zk_path.sh @@ -38,7 +38,7 @@ $CLICKHOUSE_CLIENT -q "SELECT name FROM system.tables WHERE database='$db' ORDER $CLICKHOUSE_CLIENT -q "SELECT substring(toString(uuid) as s, 1, length(s) - 3) FROM system.tables WHERE database='$db' and name='m1'" $CLICKHOUSE_CLIENT -q "SELECT toString(uuid) LIKE '02858000%' FROM system.tables WHERE database='$db' and name='m2'" -$CLICKHOUSE_CLIENT -q "SHOW CREATE $db.rmt1" -$CLICKHOUSE_CLIENT -q "SHOW CREATE $db.rmt2" +$CLICKHOUSE_CLIENT -q "SHOW CREATE $db.rmt1" | sed "s/$db/default/g" +$CLICKHOUSE_CLIENT -q "SHOW CREATE $db.rmt2" | sed "s/$db/default/g" $CLICKHOUSE_CLIENT -q "DROP DATABASE IF EXISTS rdb_$CLICKHOUSE_DATABASE" diff --git a/tests/queries/0_stateless/02888_replicated_merge_tree_creation.sh b/tests/queries/0_stateless/02888_replicated_merge_tree_creation.sh index b9603e75d2e..466f0d01a7f 100755 --- a/tests/queries/0_stateless/02888_replicated_merge_tree_creation.sh +++ b/tests/queries/0_stateless/02888_replicated_merge_tree_creation.sh @@ -5,6 +5,7 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh +CLICKHOUSE_CLIENT="${CLICKHOUSE_CLIENT} --database_replicated_allow_explicit_uuid 3 --database_replicated_allow_replicated_engine_arguments 3" ${CLICKHOUSE_CLIENT} -q "DROP TABLE IF EXISTS test_exception_replicated SYNC" From cd0145113f751228e56500f6d2907433929e9622 Mon Sep 17 00:00:00 2001 From: Arthur Passos Date: Tue, 9 Jul 2024 15:05:24 -0300 Subject: [PATCH 167/844] allow other auth methods to be used if no_password is setup but not allowed --- src/Access/IAccessStorage.cpp | 12 +++--------- src/Access/IAccessStorage.h | 1 - 2 files changed, 3 insertions(+), 10 deletions(-) diff --git a/src/Access/IAccessStorage.cpp b/src/Access/IAccessStorage.cpp index 10a0341b301..8e3465f14aa 100644 --- a/src/Access/IAccessStorage.cpp +++ b/src/Access/IAccessStorage.cpp @@ -533,7 +533,9 @@ std::optional IAccessStorage::authenticateImpl( auto auth_type = auth_method.getType(); if (((auth_type == AuthenticationType::NO_PASSWORD) && !allow_no_password) || ((auth_type == AuthenticationType::PLAINTEXT_PASSWORD) && !allow_plaintext_password)) - throwAuthenticationTypeNotAllowed(auth_type); + { + continue; + } if (areCredentialsValid(user->getName(), user->valid_until, auth_method, credentials, external_authenticators, auth_result.settings)) { @@ -756,14 +758,6 @@ void IAccessStorage::throwAddressNotAllowed(const Poco::Net::IPAddress & address throw Exception(ErrorCodes::IP_ADDRESS_NOT_ALLOWED, "Connections from {} are not allowed", address.toString()); } -void IAccessStorage::throwAuthenticationTypeNotAllowed(AuthenticationType auth_type) -{ - throw Exception( - ErrorCodes::AUTHENTICATION_FAILED, - "Authentication type {} is not allowed, check the setting allow_{} in the server configuration", - toString(auth_type), AuthenticationTypeInfo::get(auth_type).name); -} - void IAccessStorage::throwInvalidCredentials() { throw Exception(ErrorCodes::WRONG_PASSWORD, "Invalid credentials"); diff --git a/src/Access/IAccessStorage.h b/src/Access/IAccessStorage.h index 74350bdbd63..e193b65e77a 100644 --- a/src/Access/IAccessStorage.h +++ b/src/Access/IAccessStorage.h @@ -252,7 +252,6 @@ protected: [[noreturn]] void throwReadonlyCannotRemove(AccessEntityType type, const String & name) const; [[noreturn]] static void throwAddressNotAllowed(const Poco::Net::IPAddress & address); [[noreturn]] static void throwInvalidCredentials(); - [[noreturn]] static void throwAuthenticationTypeNotAllowed(AuthenticationType auth_type); [[noreturn]] void throwBackupNotAllowed() const; [[noreturn]] void throwRestoreNotAllowed() const; From e36776551e95fbbec3c47b6c7560da9afc6c5b64 Mon Sep 17 00:00:00 2001 From: Arthur Passos Date: Tue, 9 Jul 2024 15:58:57 -0300 Subject: [PATCH 168/844] remove unused extern --- src/Access/IAccessStorage.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/Access/IAccessStorage.cpp b/src/Access/IAccessStorage.cpp index 8e3465f14aa..b3fb87f0c62 100644 --- a/src/Access/IAccessStorage.cpp +++ b/src/Access/IAccessStorage.cpp @@ -30,7 +30,6 @@ namespace ErrorCodes extern const int IP_ADDRESS_NOT_ALLOWED; extern const int LOGICAL_ERROR; extern const int NOT_IMPLEMENTED; - extern const int AUTHENTICATION_FAILED; } From acc2249288ae94510b42e5d0d4cdaad68c5a89c1 Mon Sep 17 00:00:00 2001 From: Arthur Passos Date: Tue, 9 Jul 2024 22:05:04 -0300 Subject: [PATCH 169/844] do not allow no_password to co-exist with other auth methods --- .../Access/InterpreterCreateUserQuery.cpp | 13 +++++++++++-- src/Parsers/Access/ParserCreateUserQuery.cpp | 15 ++++++++++++++- .../03174_add_identified_with.reference | 8 +++++++- .../0_stateless/03174_add_identified_with.sh | 15 +++++++++++++++ 4 files changed, 47 insertions(+), 4 deletions(-) diff --git a/src/Interpreters/Access/InterpreterCreateUserQuery.cpp b/src/Interpreters/Access/InterpreterCreateUserQuery.cpp index b5d5c13d9e4..75b0aed24c1 100644 --- a/src/Interpreters/Access/InterpreterCreateUserQuery.cpp +++ b/src/Interpreters/Access/InterpreterCreateUserQuery.cpp @@ -65,8 +65,17 @@ namespace user.authentication_methods.emplace_back(); } - // a leading IDENTIFIED WITH will drop existing authentication methods in favor of new ones - if (replace_authentication_methods) + bool has_no_password_authentication_method = std::find_if( + user.authentication_methods.begin(), + user.authentication_methods.end(), + [](const AuthenticationData & authentication_method) + { + return authentication_method.getType() == AuthenticationType::NO_PASSWORD; + }) != user.authentication_methods.end(); + + // 1. a leading IDENTIFIED WITH will drop existing authentication methods in favor of new ones. + // 2. if the user contains an auth method of type NO_PASSWORD and another one is being added, NO_PASSWORD must be dropped + if (replace_authentication_methods || (has_no_password_authentication_method && !authentication_methods.empty())) { user.authentication_methods.clear(); } diff --git a/src/Parsers/Access/ParserCreateUserQuery.cpp b/src/Parsers/Access/ParserCreateUserQuery.cpp index 11f79acd979..96294c380a0 100644 --- a/src/Parsers/Access/ParserCreateUserQuery.cpp +++ b/src/Parsers/Access/ParserCreateUserQuery.cpp @@ -596,6 +596,19 @@ bool ParserCreateUserQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expec break; } + bool has_no_password_authentication_method = std::find_if( + auth_data.begin(), + auth_data.end(), + [](const std::shared_ptr & ast_authentication_data) + { + return ast_authentication_data->type == AuthenticationType::NO_PASSWORD; + }) != auth_data.end(); + + if (has_no_password_authentication_method && auth_data.size() > 1) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, "NO_PASSWORD Authentication method cannot co-exist with other authentication methods"); + } + if (!alter && !hosts) { String common_host_pattern; @@ -630,7 +643,7 @@ bool ParserCreateUserQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expec query->valid_until = std::move(valid_until); query->storage_name = std::move(storage_name); query->reset_authentication_methods_to_new = reset_authentication_methods_to_new.value_or(false); - query->replace_authentication_methods = parsed_identified_with; + query->replace_authentication_methods = parsed_identified_with || has_no_password_authentication_method; for (const auto & authentication_method : query->authentication_methods) { diff --git a/tests/queries/0_stateless/03174_add_identified_with.reference b/tests/queries/0_stateless/03174_add_identified_with.reference index e8e1ee9d294..ae330212941 100644 --- a/tests/queries/0_stateless/03174_add_identified_with.reference +++ b/tests/queries/0_stateless/03174_add_identified_with.reference @@ -37,4 +37,10 @@ CREATE Identified with must precede all add identified with, not allowed BAD_ARGUMENTS Create user with no identification Add identified with -CREATE USER u01_03174 IDENTIFIED WITH no_password ADD IDENTIFIED WITH plaintext_password +CREATE USER u01_03174 IDENTIFIED WITH plaintext_password +Try to provide no_password mixed with other authentication methods, should not be allowed +BAD_ARGUMENTS +Adding no_password, should drop existing auth method +CREATE USER u01_03174 IDENTIFIED WITH no_password +Trying to auth with no pwd, should succeed +1 diff --git a/tests/queries/0_stateless/03174_add_identified_with.sh b/tests/queries/0_stateless/03174_add_identified_with.sh index 0884f31dd73..c3c42047bcf 100755 --- a/tests/queries/0_stateless/03174_add_identified_with.sh +++ b/tests/queries/0_stateless/03174_add_identified_with.sh @@ -13,6 +13,11 @@ ssh_key="-----BEGIN OPENSSH PRIVATE KEY----- Ux7i7d3xPoseFrwnhY4YAAAADWFydGh1ckBhcnRodXI= -----END OPENSSH PRIVATE KEY-----" +function test_login_no_pwd +{ + ${CLICKHOUSE_CLIENT} --user $1 --query "select 1" +} + function test_login_pwd { ${CLICKHOUSE_CLIENT} --user $1 --password $2 --query "select 1" @@ -107,4 +112,14 @@ ${CLICKHOUSE_CLIENT} --query "ALTER USER ${user} ADD IDENTIFIED WITH plaintext_p ${CLICKHOUSE_CLIENT} --query "SHOW CREATE USER ${user}" +echo "Try to provide no_password mixed with other authentication methods, should not be allowed" +${CLICKHOUSE_CLIENT} --query "ALTER USER ${user} ADD IDENTIFIED WITH plaintext_password by '8' ADD IDENTIFIED WITH no_password" 2>&1 | grep -m1 -o "BAD_ARGUMENTS" + +echo "Adding no_password, should drop existing auth method" +${CLICKHOUSE_CLIENT} --query "ALTER USER ${user} ADD IDENTIFIED WITH no_password" +${CLICKHOUSE_CLIENT} --query "SHOW CREATE USER ${user}" + +echo "Trying to auth with no pwd, should succeed" +test_login_no_pwd ${user} + ${CLICKHOUSE_CLIENT} --query "DROP USER IF EXISTS ${user}" From 91e8ef6776402579d79afe8c5eccadf34e2482bb Mon Sep 17 00:00:00 2001 From: Arthur Passos Date: Wed, 10 Jul 2024 09:11:02 -0300 Subject: [PATCH 170/844] do not allow no_pwd to co-exist with other auth methods --- src/Parsers/Access/ParserCreateUserQuery.cpp | 7 +++++++ .../0_stateless/03174_add_identified_with.reference | 5 ++++- tests/queries/0_stateless/03174_add_identified_with.sh | 8 ++++++-- 3 files changed, 17 insertions(+), 3 deletions(-) diff --git a/src/Parsers/Access/ParserCreateUserQuery.cpp b/src/Parsers/Access/ParserCreateUserQuery.cpp index 96294c380a0..3b3e419d615 100644 --- a/src/Parsers/Access/ParserCreateUserQuery.cpp +++ b/src/Parsers/Access/ParserCreateUserQuery.cpp @@ -523,6 +523,13 @@ bool ParserCreateUserQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expec if (parsed_add_new_method) { + if (add_new_auth_method->type == AuthenticationType::NO_PASSWORD) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, "The authentication method 'no_password' cannot be used with the ADD keyword. " + "Use 'ALTER USER xyz IDENTIFIED WITH no_password' to replace existing authentication methods"); + + } + auth_data.push_back(add_new_auth_method); continue; } diff --git a/tests/queries/0_stateless/03174_add_identified_with.reference b/tests/queries/0_stateless/03174_add_identified_with.reference index ae330212941..cf5e038b58e 100644 --- a/tests/queries/0_stateless/03174_add_identified_with.reference +++ b/tests/queries/0_stateless/03174_add_identified_with.reference @@ -40,7 +40,10 @@ Add identified with CREATE USER u01_03174 IDENTIFIED WITH plaintext_password Try to provide no_password mixed with other authentication methods, should not be allowed BAD_ARGUMENTS -Adding no_password, should drop existing auth method +Adding no_password, should fail +BAD_ARGUMENTS +CREATE USER u01_03174 IDENTIFIED WITH plaintext_password +Replacing existing authentication methods in favor of no_password, should succeed CREATE USER u01_03174 IDENTIFIED WITH no_password Trying to auth with no pwd, should succeed 1 diff --git a/tests/queries/0_stateless/03174_add_identified_with.sh b/tests/queries/0_stateless/03174_add_identified_with.sh index c3c42047bcf..23e878438ac 100755 --- a/tests/queries/0_stateless/03174_add_identified_with.sh +++ b/tests/queries/0_stateless/03174_add_identified_with.sh @@ -115,8 +115,12 @@ ${CLICKHOUSE_CLIENT} --query "SHOW CREATE USER ${user}" echo "Try to provide no_password mixed with other authentication methods, should not be allowed" ${CLICKHOUSE_CLIENT} --query "ALTER USER ${user} ADD IDENTIFIED WITH plaintext_password by '8' ADD IDENTIFIED WITH no_password" 2>&1 | grep -m1 -o "BAD_ARGUMENTS" -echo "Adding no_password, should drop existing auth method" -${CLICKHOUSE_CLIENT} --query "ALTER USER ${user} ADD IDENTIFIED WITH no_password" +echo "Adding no_password, should fail" +${CLICKHOUSE_CLIENT} --query "ALTER USER ${user} ADD IDENTIFIED WITH no_password" 2>&1 | grep -m1 -o "BAD_ARGUMENTS" +${CLICKHOUSE_CLIENT} --query "SHOW CREATE USER ${user}" + +echo "Replacing existing authentication methods in favor of no_password, should succeed" +${CLICKHOUSE_CLIENT} --query "ALTER USER ${user} IDENTIFIED WITH no_password" ${CLICKHOUSE_CLIENT} --query "SHOW CREATE USER ${user}" echo "Trying to auth with no pwd, should succeed" From 29e0d5c1e333e83da7e15d2c5ddaad57369e44ad Mon Sep 17 00:00:00 2001 From: Arthur Passos Date: Thu, 11 Jul 2024 17:04:28 -0300 Subject: [PATCH 171/844] use comma separated instead of multiple add id add id --- src/Parsers/Access/ASTAuthenticationData.cpp | 7 +- src/Parsers/Access/ASTCreateUserQuery.cpp | 22 ++- src/Parsers/Access/ParserCreateUserQuery.cpp | 167 +++++++++--------- .../03174_add_identified_with.reference | 12 +- .../0_stateless/03174_add_identified_with.sh | 16 +- 5 files changed, 108 insertions(+), 116 deletions(-) diff --git a/src/Parsers/Access/ASTAuthenticationData.cpp b/src/Parsers/Access/ASTAuthenticationData.cpp index eb70701d1dc..75082041161 100644 --- a/src/Parsers/Access/ASTAuthenticationData.cpp +++ b/src/Parsers/Access/ASTAuthenticationData.cpp @@ -44,7 +44,7 @@ void ASTAuthenticationData::formatImpl(const FormatSettings & settings, FormatSt { if (type && *type == AuthenticationType::NO_PASSWORD) { - settings.ostr << (settings.hilite ? IAST::hilite_keyword : "") << " IDENTIFIED WITH no_password" + settings.ostr << (settings.hilite ? IAST::hilite_keyword : "") << " no_password" << (settings.hilite ? IAST::hilite_none : ""); return; } @@ -160,12 +160,9 @@ void ASTAuthenticationData::formatImpl(const FormatSettings & settings, FormatSt auth_type_name = AuthenticationTypeInfo::get(*type).name; } - settings.ostr << (settings.hilite ? IAST::hilite_keyword : "") << " IDENTIFIED" << (settings.hilite ? IAST::hilite_none : ""); - if (!auth_type_name.empty()) { - settings.ostr << (settings.hilite ? IAST::hilite_keyword : "") << " WITH " << auth_type_name - << (settings.hilite ? IAST::hilite_none : ""); + settings.ostr << (settings.hilite ? IAST::hilite_keyword : "") << " " << auth_type_name << (settings.hilite ? IAST::hilite_none : ""); } if (!prefix.empty()) diff --git a/src/Parsers/Access/ASTCreateUserQuery.cpp b/src/Parsers/Access/ASTCreateUserQuery.cpp index 0aad4fa8152..4eebc471d5e 100644 --- a/src/Parsers/Access/ASTCreateUserQuery.cpp +++ b/src/Parsers/Access/ASTCreateUserQuery.cpp @@ -21,16 +21,24 @@ namespace void formatAuthenticationData(const std::vector> & authentication_methods, const IAST::FormatSettings & settings) { - /* - * The first authentication method must be formatted in the form of: `IDENTIFIED WITH xyz..` - * The remaining ones shall contain the `ADD`: `ADD IDENTIFIED WITH`. - * */ - authentication_methods[0]->format(settings); + settings.ostr << (settings.hilite ? IAST::hilite_keyword : "") << " IDENTIFIED" << (settings.hilite ? IAST::hilite_none : ""); - for (std::size_t i = 1; i < authentication_methods.size(); i++) + // safe because this method is only called if authentication_methods.size > 1 + // if the first type is present, include the `WITH` keyword + if (authentication_methods[0]->type) + { + settings.ostr << (settings.hilite ? IAST::hilite_keyword : "") << " WITH" << (settings.hilite ? IAST::hilite_none : ""); + } + + for (std::size_t i = 0; i < authentication_methods.size(); i++) { - settings.ostr << (settings.hilite ? IAST::hilite_keyword : "") << " ADD" << (settings.hilite ? IAST::hilite_none : ""); authentication_methods[i]->format(settings); + + bool is_last = i < authentication_methods.size() - 1; + if (is_last) + { + settings.ostr << (settings.hilite ? IAST::hilite_keyword : ","); + } } } diff --git a/src/Parsers/Access/ParserCreateUserQuery.cpp b/src/Parsers/Access/ParserCreateUserQuery.cpp index 3b3e419d615..2d5e4eaea98 100644 --- a/src/Parsers/Access/ParserCreateUserQuery.cpp +++ b/src/Parsers/Access/ParserCreateUserQuery.cpp @@ -48,21 +48,10 @@ namespace }); } - bool parseAuthenticationData(IParserBase::Pos & pos, Expected & expected, std::shared_ptr & auth_data) + bool parseAuthenticationData(IParserBase::Pos & pos, Expected & expected, std::shared_ptr & auth_data, bool is_type_specifier_mandatory) { return IParserBase::wrapParseImpl(pos, [&] { - if (ParserKeyword{Keyword::NOT_IDENTIFIED}.ignore(pos, expected)) - { - auth_data = std::make_shared(); - auth_data->type = AuthenticationType::NO_PASSWORD; - - return true; - } - - if (!ParserKeyword{Keyword::IDENTIFIED}.ignore(pos, expected)) - return false; - std::optional type; bool expect_password = false; @@ -73,51 +62,48 @@ namespace bool expect_public_ssh_key = false; bool expect_http_auth_server = false; - if (ParserKeyword{Keyword::WITH}.ignore(pos, expected)) + for (auto check_type : collections::range(AuthenticationType::MAX)) { - for (auto check_type : collections::range(AuthenticationType::MAX)) + if (ParserKeyword{AuthenticationTypeInfo::get(check_type).keyword}.ignore(pos, expected)) { - if (ParserKeyword{AuthenticationTypeInfo::get(check_type).keyword}.ignore(pos, expected)) - { - type = check_type; + type = check_type; - if (check_type == AuthenticationType::LDAP) - expect_ldap_server_name = true; - else if (check_type == AuthenticationType::KERBEROS) - expect_kerberos_realm = true; - else if (check_type == AuthenticationType::SSL_CERTIFICATE) - expect_ssl_cert_subjects = true; - else if (check_type == AuthenticationType::SSH_KEY) - expect_public_ssh_key = true; - else if (check_type == AuthenticationType::HTTP) - expect_http_auth_server = true; - else if (check_type != AuthenticationType::NO_PASSWORD) - expect_password = true; + if (check_type == AuthenticationType::LDAP) + expect_ldap_server_name = true; + else if (check_type == AuthenticationType::KERBEROS) + expect_kerberos_realm = true; + else if (check_type == AuthenticationType::SSL_CERTIFICATE) + expect_ssl_cert_subjects = true; + else if (check_type == AuthenticationType::SSH_KEY) + expect_public_ssh_key = true; + else if (check_type == AuthenticationType::HTTP) + expect_http_auth_server = true; + else if (check_type != AuthenticationType::NO_PASSWORD) + expect_password = true; - break; - } + break; } + } - if (!type) + if (!type) + { + if (ParserKeyword{Keyword::SHA256_HASH}.ignore(pos, expected)) { - if (ParserKeyword{Keyword::SHA256_HASH}.ignore(pos, expected)) - { - type = AuthenticationType::SHA256_PASSWORD; - expect_hash = true; - } - else if (ParserKeyword{Keyword::DOUBLE_SHA1_HASH}.ignore(pos, expected)) - { - type = AuthenticationType::DOUBLE_SHA1_PASSWORD; - expect_hash = true; - } - else if (ParserKeyword{Keyword::BCRYPT_HASH}.ignore(pos, expected)) - { - type = AuthenticationType::BCRYPT_PASSWORD; - expect_hash = true; - } - else - return false; + type = AuthenticationType::SHA256_PASSWORD; + expect_hash = true; } + else if (ParserKeyword{Keyword::DOUBLE_SHA1_HASH}.ignore(pos, expected)) + { + type = AuthenticationType::DOUBLE_SHA1_PASSWORD; + expect_hash = true; + } + else if (ParserKeyword{Keyword::BCRYPT_HASH}.ignore(pos, expected)) + { + type = AuthenticationType::BCRYPT_PASSWORD; + expect_hash = true; + } + else if (is_type_specifier_mandatory) + return false; } /// If authentication type is not specified, then the default password type is used @@ -224,6 +210,43 @@ namespace } + bool parseIdentifiedWith(IParserBase::Pos & pos, Expected & expected, std::vector> & authentication_methods) + { + return IParserBase::wrapParseImpl(pos, [&] + { + if (ParserKeyword{Keyword::NOT_IDENTIFIED}.ignore(pos, expected)) + { + authentication_methods.emplace_back(std::make_shared()); + authentication_methods.back()->type = AuthenticationType::NO_PASSWORD; + + return true; + } + + if (!ParserKeyword{Keyword::IDENTIFIED}.ignore(pos, expected)) + return false; + + bool is_type_specifier_mandatory = ParserKeyword{Keyword::WITH}.ignore(pos, expected); + + std::shared_ptr ast_authentication_data; + while (parseAuthenticationData(pos, expected, ast_authentication_data, is_type_specifier_mandatory)) + { + authentication_methods.push_back(ast_authentication_data); + + if (!ParserToken{TokenType::Comma}.ignore(pos, expected)) + { + break; + } + + ParserToken{TokenType::Whitespace}.ignore(pos, expected); + + is_type_specifier_mandatory = false; + } + + return !authentication_methods.empty(); + }); + } + + bool parseHostsWithoutPrefix(IParserBase::Pos & pos, Expected & expected, AllowedClientHosts & hosts) { AllowedClientHosts res_hosts; @@ -417,7 +440,7 @@ namespace }); } - bool parseAddNewAuthenticationMethod(IParserBase::Pos & pos, Expected & expected, std::shared_ptr & auth_data) + bool parseAddIdentifiedWith(IParserBase::Pos & pos, Expected & expected, std::vector> & auth_data) { return IParserBase::wrapParseImpl(pos, [&] { @@ -426,7 +449,7 @@ namespace return false; } - return parseAuthenticationData(pos, expected, auth_data); + return parseIdentifiedWith(pos, expected, auth_data); }); } @@ -497,41 +520,14 @@ bool ParserCreateUserQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expec while (true) { - std::shared_ptr identified_with_auth_data; - bool parse_auth_data = parseAuthenticationData(pos, expected, identified_with_auth_data); - - bool found_multiple_identified_with = parsed_identified_with && parse_auth_data; - - if (found_multiple_identified_with) + if (auth_data.empty()) { - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Only one identified with is permitted"); - } + parsed_identified_with = parseIdentifiedWith(pos, expected, auth_data); - if (parse_auth_data) - { - if (parsed_add_new_method) + if (!parsed_identified_with) { - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Identified with must precede add new auth"); + parsed_add_new_method = parseAddIdentifiedWith(pos, expected, auth_data); } - auth_data.push_back(identified_with_auth_data); - parsed_identified_with = true; - continue; - } - - std::shared_ptr add_new_auth_method; - parsed_add_new_method = parseAddNewAuthenticationMethod(pos, expected, add_new_auth_method); - - if (parsed_add_new_method) - { - if (add_new_auth_method->type == AuthenticationType::NO_PASSWORD) - { - throw Exception(ErrorCodes::BAD_ARGUMENTS, "The authentication method 'no_password' cannot be used with the ADD keyword. " - "Use 'ALTER USER xyz IDENTIFIED WITH no_password' to replace existing authentication methods"); - - } - - auth_data.push_back(add_new_auth_method); - continue; } if (!reset_authentication_methods_to_new.has_value()) @@ -616,6 +612,13 @@ bool ParserCreateUserQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expec throw Exception(ErrorCodes::BAD_ARGUMENTS, "NO_PASSWORD Authentication method cannot co-exist with other authentication methods"); } + if (has_no_password_authentication_method && parsed_add_new_method) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, "The authentication method 'no_password' cannot be used with the ADD keyword. " + "Use 'ALTER USER xyz IDENTIFIED WITH no_password' to replace existing authentication methods"); + + } + if (!alter && !hosts) { String common_host_pattern; diff --git a/tests/queries/0_stateless/03174_add_identified_with.reference b/tests/queries/0_stateless/03174_add_identified_with.reference index cf5e038b58e..38f55c1c16d 100644 --- a/tests/queries/0_stateless/03174_add_identified_with.reference +++ b/tests/queries/0_stateless/03174_add_identified_with.reference @@ -24,17 +24,9 @@ AUTHENTICATION_FAILED Should work 1 Multiple identified with, not allowed -BAD_ARGUMENTS -Multiple identified with, not allowed, even if mixed -BAD_ARGUMENTS -Identified with must precede all add identified with, not allowed -BAD_ARGUMENTS +Syntax error CREATE Multiple identified with, not allowed -BAD_ARGUMENTS -CREATE Multiple identified with, not allowed, even if mixed -BAD_ARGUMENTS -CREATE Identified with must precede all add identified with, not allowed -BAD_ARGUMENTS +Syntax error Create user with no identification Add identified with CREATE USER u01_03174 IDENTIFIED WITH plaintext_password diff --git a/tests/queries/0_stateless/03174_add_identified_with.sh b/tests/queries/0_stateless/03174_add_identified_with.sh index 23e878438ac..51414c77684 100755 --- a/tests/queries/0_stateless/03174_add_identified_with.sh +++ b/tests/queries/0_stateless/03174_add_identified_with.sh @@ -45,7 +45,7 @@ test_login_pwd_expect_error ${user} '1' echo "New password should work" test_login_pwd ${user} '2' -${CLICKHOUSE_CLIENT} --query "ALTER USER ${user} ADD IDENTIFIED WITH plaintext_password BY '3' ADD IDENTIFIED WITH plaintext_password BY '4'" +${CLICKHOUSE_CLIENT} --query "ALTER USER ${user} ADD IDENTIFIED WITH plaintext_password BY '3', plaintext_password BY '4'" echo "Two new passwords were added, should both work" test_login_pwd ${user} '3' @@ -87,20 +87,12 @@ echo "Should work" test_login_pwd ${user} '6' echo "Multiple identified with, not allowed" -${CLICKHOUSE_CLIENT} --query "ALTER USER ${user} IDENTIFIED WITH plaintext_password by '7' IDENTIFIED WITH plaintext_password by '8'" 2>&1 | grep -m1 -o "BAD_ARGUMENTS" -echo "Multiple identified with, not allowed, even if mixed" -${CLICKHOUSE_CLIENT} --query "ALTER USER ${user} IDENTIFIED WITH plaintext_password by '7' ADD IDENTIFIED WITH plaintext_password by '8' IDENTIFIED WITH plaintext_password by '9'" 2>&1 | grep -m1 -o "BAD_ARGUMENTS" -echo "Identified with must precede all add identified with, not allowed" -${CLICKHOUSE_CLIENT} --query "ALTER USER ${user} ADD IDENTIFIED WITH plaintext_password by '7' IDENTIFIED WITH plaintext_password by '8'" 2>&1 | grep -m1 -o "BAD_ARGUMENTS" +${CLICKHOUSE_CLIENT} --query "ALTER USER ${user} IDENTIFIED WITH plaintext_password by '7', IDENTIFIED plaintext_password by '8'" 2>&1 | grep -m1 -o "Syntax error" ${CLICKHOUSE_CLIENT} --query "DROP USER ${user}" echo "CREATE Multiple identified with, not allowed" -${CLICKHOUSE_CLIENT} --query "CREATE USER ${user} IDENTIFIED WITH plaintext_password by '7' IDENTIFIED WITH plaintext_password by '8'" 2>&1 | grep -m1 -o "BAD_ARGUMENTS" -echo "CREATE Multiple identified with, not allowed, even if mixed" -${CLICKHOUSE_CLIENT} --query "CREATE USER ${user} IDENTIFIED WITH plaintext_password by '7' ADD IDENTIFIED WITH plaintext_password by '8' IDENTIFIED WITH plaintext_password by '9'" 2>&1 | grep -m1 -o "BAD_ARGUMENTS" -echo "CREATE Identified with must precede all add identified with, not allowed" -${CLICKHOUSE_CLIENT} --query "CREATE USER ${user} ADD IDENTIFIED WITH plaintext_password by '7' IDENTIFIED WITH plaintext_password by '8'" 2>&1 | grep -m1 -o "BAD_ARGUMENTS" +${CLICKHOUSE_CLIENT} --query "CREATE USER ${user} IDENTIFIED WITH plaintext_password by '7', IDENTIFIED WITH plaintext_password by '8'" 2>&1 | grep -m1 -o "Syntax error" ${CLICKHOUSE_CLIENT} --query "DROP USER IF EXISTS ${user}" @@ -113,7 +105,7 @@ ${CLICKHOUSE_CLIENT} --query "ALTER USER ${user} ADD IDENTIFIED WITH plaintext_p ${CLICKHOUSE_CLIENT} --query "SHOW CREATE USER ${user}" echo "Try to provide no_password mixed with other authentication methods, should not be allowed" -${CLICKHOUSE_CLIENT} --query "ALTER USER ${user} ADD IDENTIFIED WITH plaintext_password by '8' ADD IDENTIFIED WITH no_password" 2>&1 | grep -m1 -o "BAD_ARGUMENTS" +${CLICKHOUSE_CLIENT} --query "ALTER USER ${user} ADD IDENTIFIED WITH plaintext_password by '8', no_password" 2>&1 | grep -m1 -o "BAD_ARGUMENTS" echo "Adding no_password, should fail" ${CLICKHOUSE_CLIENT} --query "ALTER USER ${user} ADD IDENTIFIED WITH no_password" 2>&1 | grep -m1 -o "BAD_ARGUMENTS" From ee62318348044f82af4bd06aea8d3d2430f9dd0a Mon Sep 17 00:00:00 2001 From: Arthur Passos Date: Thu, 11 Jul 2024 17:21:33 -0300 Subject: [PATCH 172/844] rename test and add a few more --- ...multiple_authentication_methods.reference} | 8 ++++++++ ... 03174_multiple_authentication_methods.sh} | 20 +++++++++++++++++++ 2 files changed, 28 insertions(+) rename tests/queries/0_stateless/{03174_add_identified_with.reference => 03174_multiple_authentication_methods.reference} (68%) rename tests/queries/0_stateless/{03174_add_identified_with.sh => 03174_multiple_authentication_methods.sh} (80%) diff --git a/tests/queries/0_stateless/03174_add_identified_with.reference b/tests/queries/0_stateless/03174_multiple_authentication_methods.reference similarity index 68% rename from tests/queries/0_stateless/03174_add_identified_with.reference rename to tests/queries/0_stateless/03174_multiple_authentication_methods.reference index 38f55c1c16d..27d5df20091 100644 --- a/tests/queries/0_stateless/03174_add_identified_with.reference +++ b/tests/queries/0_stateless/03174_multiple_authentication_methods.reference @@ -39,3 +39,11 @@ Replacing existing authentication methods in favor of no_password, should succee CREATE USER u01_03174 IDENTIFIED WITH no_password Trying to auth with no pwd, should succeed 1 +Create user with mix both implicit and explicit auth type, starting with with +CREATE USER u01_03174 IDENTIFIED WITH plaintext_password, sha256_password, bcrypt_password, sha256_password +Create user with mix both implicit and explicit auth type, starting with by +CREATE USER u01_03174 IDENTIFIED WITH sha256_password, plaintext_password, bcrypt_password, sha256_password +Use WITH without providing authentication type, should fail +Syntax error +Create user with ADD identification, should fail, add is not allowed for create query +BAD_ARGUMENTS diff --git a/tests/queries/0_stateless/03174_add_identified_with.sh b/tests/queries/0_stateless/03174_multiple_authentication_methods.sh similarity index 80% rename from tests/queries/0_stateless/03174_add_identified_with.sh rename to tests/queries/0_stateless/03174_multiple_authentication_methods.sh index 51414c77684..5503134d880 100755 --- a/tests/queries/0_stateless/03174_add_identified_with.sh +++ b/tests/queries/0_stateless/03174_multiple_authentication_methods.sh @@ -119,3 +119,23 @@ echo "Trying to auth with no pwd, should succeed" test_login_no_pwd ${user} ${CLICKHOUSE_CLIENT} --query "DROP USER IF EXISTS ${user}" + +echo "Create user with mix both implicit and explicit auth type, starting with with" +${CLICKHOUSE_CLIENT} --query "CREATE USER ${user} IDENTIFIED WITH plaintext_password by '1', by '2', bcrypt_password by '3', by '4';" +${CLICKHOUSE_CLIENT} --query "SHOW CREATE USER ${user}" + +${CLICKHOUSE_CLIENT} --query "DROP USER IF EXISTS ${user}" + +echo "Create user with mix both implicit and explicit auth type, starting with by" +${CLICKHOUSE_CLIENT} --query "CREATE USER ${user} IDENTIFIED by '1', plaintext_password by '2', bcrypt_password by '3', by '4';" +${CLICKHOUSE_CLIENT} --query "SHOW CREATE USER ${user}" + +${CLICKHOUSE_CLIENT} --query "DROP USER IF EXISTS ${user}" + +echo "Use WITH without providing authentication type, should fail" +${CLICKHOUSE_CLIENT} --query "CREATE USER ${user} IDENTIFIED WITH BY '1';" 2>&1 | grep -m1 -o "Syntax error" + +echo "Create user with ADD identification, should fail, add is not allowed for create query" +${CLICKHOUSE_CLIENT} --query "CREATE USER ${user} ADD IDENTIFIED WITH plaintext_password by '1'" 2>&1 | grep -m1 -o "BAD_ARGUMENTS" + +${CLICKHOUSE_CLIENT} --query "DROP USER IF EXISTS ${user}" From 300d4ae593ce451efcda316125a4ac570d6c3754 Mon Sep 17 00:00:00 2001 From: Arthur Passos Date: Thu, 11 Jul 2024 17:23:37 -0300 Subject: [PATCH 173/844] add comment back and missing file --- src/Access/Authentication.cpp | 2 +- src/Parsers/Access/ParserCreateUserQuery.cpp | 5 +++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/src/Access/Authentication.cpp b/src/Access/Authentication.cpp index 6d7906a0c85..38ef311acaa 100644 --- a/src/Access/Authentication.cpp +++ b/src/Access/Authentication.cpp @@ -120,7 +120,7 @@ namespace { case AuthenticationType::NO_PASSWORD: { - return true; + return true; // N.B. even if the password is not empty! } case AuthenticationType::PLAINTEXT_PASSWORD: { diff --git a/src/Parsers/Access/ParserCreateUserQuery.cpp b/src/Parsers/Access/ParserCreateUserQuery.cpp index 2d5e4eaea98..4da0297dbc7 100644 --- a/src/Parsers/Access/ParserCreateUserQuery.cpp +++ b/src/Parsers/Access/ParserCreateUserQuery.cpp @@ -527,6 +527,11 @@ bool ParserCreateUserQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expec if (!parsed_identified_with) { parsed_add_new_method = parseAddIdentifiedWith(pos, expected, auth_data); + + if (parsed_add_new_method && !alter) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Create user query is not allowed to have ADD IDENTIFIED, remove the ADD keyword."); + } } } From 8e0e2cec289296d819811aa0f694566a57f7756e Mon Sep 17 00:00:00 2001 From: Arthur Passos Date: Thu, 11 Jul 2024 18:06:40 -0300 Subject: [PATCH 174/844] fix hilite test --- .../0_stateless/01316_create_user_syntax_hilite.reference | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/01316_create_user_syntax_hilite.reference b/tests/queries/0_stateless/01316_create_user_syntax_hilite.reference index 48d8b4ee8a1..72e0dd9fb50 100644 --- a/tests/queries/0_stateless/01316_create_user_syntax_hilite.reference +++ b/tests/queries/0_stateless/01316_create_user_syntax_hilite.reference @@ -1 +1 @@ -CREATE USER user IDENTIFIED WITH plaintext_password BY 'hello' +CREATE USER user IDENTIFIED WITH plaintext_password BY 'hello' From b428327c6ec3947293b56c545202471446e42e9e Mon Sep 17 00:00:00 2001 From: Arthur Passos Date: Fri, 12 Jul 2024 09:20:36 -0300 Subject: [PATCH 175/844] trigger ci From 2e9b7e833456fc7312dddd328d351216ab34449f Mon Sep 17 00:00:00 2001 From: Arthur Passos Date: Fri, 12 Jul 2024 10:20:59 -0300 Subject: [PATCH 176/844] Removed stale comment and added log_info about skipped auth methods --- src/Access/IAccessStorage.cpp | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/src/Access/IAccessStorage.cpp b/src/Access/IAccessStorage.cpp index b3fb87f0c62..e8e5d57722d 100644 --- a/src/Access/IAccessStorage.cpp +++ b/src/Access/IAccessStorage.cpp @@ -524,15 +524,15 @@ std::optional IAccessStorage::authenticateImpl( if (!isAddressAllowed(*user, address)) throwAddressNotAllowed(address); - // for now, just throw exception in case a user exists with invalid auth method - // back in the day, it would also throw an exception. There might be a smarter alternative - // like a user scan during startup. + bool skipped_not_allowed_authentication_methods = false; + for (const auto & auth_method : user->authentication_methods) { auto auth_type = auth_method.getType(); if (((auth_type == AuthenticationType::NO_PASSWORD) && !allow_no_password) || ((auth_type == AuthenticationType::PLAINTEXT_PASSWORD) && !allow_plaintext_password)) { + skipped_not_allowed_authentication_methods = true; continue; } @@ -543,6 +543,12 @@ std::optional IAccessStorage::authenticateImpl( } } + if (skipped_not_allowed_authentication_methods) + { + LOG_INFO(log, "Skipped the check for not allowed authentication methods," + "check allow_no_password and allow_plaintext_password settings in the server configuration"); + } + throwInvalidCredentials(); } } From 04c3661b0b18eb4be82ecf4261ded70fa52d7bcc Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Fri, 12 Jul 2024 13:42:25 +0000 Subject: [PATCH 177/844] Randomize parallel_replicas_local_plan --- src/Interpreters/ClusterProxy/executeQuery.cpp | 2 -- src/Planner/PlannerJoinTree.cpp | 12 ------------ tests/clickhouse-test | 1 + .../02731_parallel_replicas_join_subquery.sql | 1 + .../0_stateless/02771_parallel_replicas_analyzer.sql | 2 ++ 5 files changed, 4 insertions(+), 14 deletions(-) diff --git a/src/Interpreters/ClusterProxy/executeQuery.cpp b/src/Interpreters/ClusterProxy/executeQuery.cpp index 832f04913c7..6039c545085 100644 --- a/src/Interpreters/ClusterProxy/executeQuery.cpp +++ b/src/Interpreters/ClusterProxy/executeQuery.cpp @@ -29,8 +29,6 @@ #include #include #include -#include -#include #include #include diff --git a/src/Planner/PlannerJoinTree.cpp b/src/Planner/PlannerJoinTree.cpp index 1d9972ae03f..6d8d58fc8ef 100644 --- a/src/Planner/PlannerJoinTree.cpp +++ b/src/Planner/PlannerJoinTree.cpp @@ -998,18 +998,6 @@ JoinTreeQueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expres table_expression_query_info.storage_limits, std::move(reading_step)); query_plan = std::move(query_plan_parallel_replicas); - - if (settings.parallel_replicas_local_plan) - { - const auto old_max_threads = query_plan.getMaxThreads(); - query_plan.setMaxThreads(old_max_threads * 2); - - LOG_TRACE( - getLogger("Planner"), - "Increase max threads from {} to {} to have similar number of threads to remote plan", - old_max_threads, - query_plan.getMaxThreads()); - } } else { diff --git a/tests/clickhouse-test b/tests/clickhouse-test index 958dde0606f..77e984aa960 100755 --- a/tests/clickhouse-test +++ b/tests/clickhouse-test @@ -832,6 +832,7 @@ class SettingsRandomizer: "cross_join_min_bytes_to_compress": lambda: random.choice([0, 1, 100000000]), "min_external_table_block_size_bytes": lambda: random.choice([0, 1, 100000000]), "max_parsing_threads": lambda: random.choice([0, 1, 10]), + "parallel_replicas_local_plan": lambda: random.randint(0, 1), } @staticmethod diff --git a/tests/queries/0_stateless/02731_parallel_replicas_join_subquery.sql b/tests/queries/0_stateless/02731_parallel_replicas_join_subquery.sql index e91e2a19526..f15e7631753 100644 --- a/tests/queries/0_stateless/02731_parallel_replicas_join_subquery.sql +++ b/tests/queries/0_stateless/02731_parallel_replicas_join_subquery.sql @@ -23,6 +23,7 @@ LIMIT 100; SET max_parallel_replicas = 3; SET cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost'; +SET parallel_replicas_local_plan = 1; SET joined_subquery_requires_alias = 0; SELECT '=============== INNER QUERY (NO PARALLEL) ==============='; diff --git a/tests/queries/0_stateless/02771_parallel_replicas_analyzer.sql b/tests/queries/0_stateless/02771_parallel_replicas_analyzer.sql index 7e27507ada9..3a19768c0cc 100644 --- a/tests/queries/0_stateless/02771_parallel_replicas_analyzer.sql +++ b/tests/queries/0_stateless/02771_parallel_replicas_analyzer.sql @@ -17,6 +17,8 @@ INSERT INTO join_inner_table__fuzz_146_replicated SELECT CAST('833c9e22-c245-4eb5-8745-117a9a1f26b1', 'UUID') AS id, CAST(rowNumberInAllBlocks(), 'String') AS key, * FROM generateRandom('number Int64, value1 String, value2 String, time Int64', 1, 10, 2) LIMIT 10; +SET parallel_replicas_local_plan = 1; + -- Simple query with analyzer and pure parallel replicas SELECT number FROM join_inner_table__fuzz_146_replicated From 4e44ecf286b85d56ba4c17bc6877d62fff98828b Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Fri, 12 Jul 2024 13:52:33 +0000 Subject: [PATCH 178/844] Fixate setting in 02967_parallel_replicas_joins_and_analyzer --- .../02967_parallel_replicas_joins_and_analyzer.sql.j2 | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/queries/0_stateless/02967_parallel_replicas_joins_and_analyzer.sql.j2 b/tests/queries/0_stateless/02967_parallel_replicas_joins_and_analyzer.sql.j2 index 54505b147a3..fd03e4c0857 100644 --- a/tests/queries/0_stateless/02967_parallel_replicas_joins_and_analyzer.sql.j2 +++ b/tests/queries/0_stateless/02967_parallel_replicas_joins_and_analyzer.sql.j2 @@ -10,6 +10,8 @@ insert into tab1 select number, number, number from numbers(16); insert into tab2 select number * 2, number * 2 from numbers(8); insert into tab3 select number * 4, number * 4 from numbers(4); +set parallel_replicas_local_plan=1; + {% for use_global_in in [0, 1] -%} -- { echoOn } From e22d6035fa3496108b2c9619dcb3fa54c131bf6e Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Fri, 12 Jul 2024 14:11:04 +0000 Subject: [PATCH 179/844] Cleanup --- tests/queries/0_stateless/02404_memory_bound_merging.sql | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/queries/0_stateless/02404_memory_bound_merging.sql b/tests/queries/0_stateless/02404_memory_bound_merging.sql index 0167c27ae44..112640a2e2c 100644 --- a/tests/queries/0_stateless/02404_memory_bound_merging.sql +++ b/tests/queries/0_stateless/02404_memory_bound_merging.sql @@ -66,7 +66,6 @@ insert into pr_t select number % 1000, number % 1000 from numbers_mt(1e6); set allow_experimental_parallel_reading_from_replicas = 1; set parallel_replicas_for_non_replicated_merge_tree = 1; set max_parallel_replicas = 3; -set use_hedged_requests = 0; set cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost'; set distributed_aggregation_memory_efficient=1; From ca0b821aaf34f8d5cd6ea71c3b491e841601a13f Mon Sep 17 00:00:00 2001 From: Arthur Passos Date: Fri, 12 Jul 2024 14:26:24 -0300 Subject: [PATCH 180/844] fix a few ut --- src/Parsers/tests/gtest_Parser.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/Parsers/tests/gtest_Parser.cpp b/src/Parsers/tests/gtest_Parser.cpp index e2ccdc96c95..98197069eb5 100644 --- a/src/Parsers/tests/gtest_Parser.cpp +++ b/src/Parsers/tests/gtest_Parser.cpp @@ -292,8 +292,8 @@ INSTANTIATE_TEST_SUITE_P(ParserCreateUserQuery, ParserTest, "CREATE USER user1 IDENTIFIED WITH no_password" }, { - "CREATE USER user1 IDENTIFIED WITH plaintext_password BY 'abc123' ADD IDENTIFIED WITH plaintext_password BY 'def123' ADD IDENTIFIED WITH sha256_password BY 'ghi123'", - "CREATE USER user1 IDENTIFIED WITH plaintext_password BY 'abc123' ADD IDENTIFIED WITH plaintext_password BY 'def123' ADD IDENTIFIED WITH sha256_password BY 'ghi123'" + "CREATE USER user1 IDENTIFIED WITH plaintext_password BY 'abc123', plaintext_password BY 'def123', sha256_password BY 'ghi123'", + "CREATE USER user1 IDENTIFIED WITH plaintext_password BY 'abc123', plaintext_password BY 'def123', sha256_password BY 'ghi123'" }, { "CREATE USER user1 IDENTIFIED WITH sha256_hash BY '7A37B85C8918EAC19A9089C0FA5A2AB4DCE3F90528DCDEEC108B23DDF3607B99' SALT 'salt'", @@ -304,8 +304,8 @@ INSTANTIATE_TEST_SUITE_P(ParserCreateUserQuery, ParserTest, "ALTER USER user1 IDENTIFIED WITH sha256_password BY 'qwe123'" }, { - "ALTER USER user1 IDENTIFIED WITH plaintext_password BY 'abc123' ADD IDENTIFIED WITH plaintext_password BY 'def123' ADD IDENTIFIED WITH sha256_password BY 'ghi123'", - "ALTER USER user1 IDENTIFIED WITH plaintext_password BY 'abc123' ADD IDENTIFIED WITH plaintext_password BY 'def123' ADD IDENTIFIED WITH sha256_password BY 'ghi123'" + "ALTER USER user1 IDENTIFIED WITH plaintext_password BY 'abc123', plaintext_password BY 'def123', sha256_password BY 'ghi123'", + "ALTER USER user1 IDENTIFIED WITH plaintext_password BY 'abc123', plaintext_password BY 'def123', sha256_password BY 'ghi123'" }, { "ALTER USER user1 IDENTIFIED WITH sha256_hash BY '7A37B85C8918EAC19A9089C0FA5A2AB4DCE3F90528DCDEEC108B23DDF3607B99' SALT 'salt'", From 3ab676041297e2b7ca50725b985286da9c901979 Mon Sep 17 00:00:00 2001 From: Arthur Passos Date: Fri, 12 Jul 2024 15:21:29 -0300 Subject: [PATCH 181/844] update docs --- docs/en/sql-reference/statements/alter/user.md | 4 ++-- docs/en/sql-reference/statements/create/user.md | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/en/sql-reference/statements/alter/user.md b/docs/en/sql-reference/statements/alter/user.md index 44c1a931261..e58dc9fbd57 100644 --- a/docs/en/sql-reference/statements/alter/user.md +++ b/docs/en/sql-reference/statements/alter/user.md @@ -66,13 +66,13 @@ ALTER USER john GRANTEES jack; Adds new authentication methods to the user while keeping the existing ones ``` sql -ALTER USER user ADD IDENTIFIED WITH plaintext_password by '1' ADD IDENTIFIED WITH bcrypt_password by '2' ADD IDENTIFIED WITH plaintext_password by '3' +ALTER USER user ADD IDENTIFIED WITH plaintext_password by '1', bcrypt_password by '2', plaintext_password by '3' ``` Reset authentication methods and adds the ones specified in the query (effect of leading IDENTIFIED without the ADD keyword) ``` sql -ALTER USER user IDENTIFIED WITH plaintext_password by '1' ADD IDENTIFIED WITH bcrypt_password by '2' ADD IDENTIFIED WITH plaintext_password by '3' +ALTER USER user IDENTIFIED WITH plaintext_password by '1', bcrypt_password by '2', plaintext_password by '3' ``` Reset authentication methods and keep the most recent added one diff --git a/docs/en/sql-reference/statements/create/user.md b/docs/en/sql-reference/statements/create/user.md index 269e67ab1eb..486d5eb3192 100644 --- a/docs/en/sql-reference/statements/create/user.md +++ b/docs/en/sql-reference/statements/create/user.md @@ -12,7 +12,7 @@ Syntax: ``` sql CREATE USER [IF NOT EXISTS | OR REPLACE] name1 [ON CLUSTER cluster_name1] [, name2 [ON CLUSTER cluster_name2] ...] - [NOT IDENTIFIED | IDENTIFIED | ADD IDENTIFIED {[WITH {no_password | plaintext_password | sha256_password | sha256_hash | double_sha1_password | double_sha1_hash}] BY {'password' | 'hash'}} | {WITH ldap SERVER 'server_name'} | {WITH kerberos [REALM 'realm']} | {WITH ssl_certificate CN 'common_name' | SAN 'TYPE:subject_alt_name'} | {WITH ssh_key BY KEY 'public_key' TYPE 'ssh-rsa|...'} | {WITH http SERVER 'server_name' [SCHEME 'Basic']}] + [NOT IDENTIFIED | IDENTIFIED {[WITH {no_password | plaintext_password | sha256_password | sha256_hash | double_sha1_password | double_sha1_hash}] BY {'password' | 'hash'}} | {WITH ldap SERVER 'server_name'} | {WITH kerberos [REALM 'realm']} | {WITH ssl_certificate CN 'common_name' | SAN 'TYPE:subject_alt_name'} | {WITH ssh_key BY KEY 'public_key' TYPE 'ssh-rsa|...'} | {WITH http SERVER 'server_name' [SCHEME 'Basic']}] [HOST {LOCAL | NAME 'name' | REGEXP 'name_regexp' | IP 'address' | LIKE 'pattern'} [,...] | ANY | NONE] [VALID UNTIL datetime] [IN access_storage_type] @@ -147,7 +147,7 @@ In ClickHouse Cloud, by default, passwords must meet the following complexity re 7. Multiple authentication methods can be specified: ```sql - CREATE USER user1 IDENTIFIED WITH plaintext_password by '1' ADD IDENTIFIED WITH bcrypt_password by '2' ADD IDENTIFIED WITH plaintext_password by '3'' + CREATE USER user1 IDENTIFIED WITH plaintext_password by '1', bcrypt_password by '2', plaintext_password by '3'' ``` ## User Host From b952a11f35c93b4df1e97325119c6295791ec748 Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Fri, 12 Jul 2024 21:20:23 +0000 Subject: [PATCH 182/844] Fix 00177_memory_bound_merging --- tests/queries/1_stateful/00177_memory_bound_merging.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/1_stateful/00177_memory_bound_merging.sh b/tests/queries/1_stateful/00177_memory_bound_merging.sh index 74b5e80e059..1110ab9a61d 100755 --- a/tests/queries/1_stateful/00177_memory_bound_merging.sh +++ b/tests/queries/1_stateful/00177_memory_bound_merging.sh @@ -64,7 +64,7 @@ test3() { FROM test.hits WHERE CounterID = 1704509 AND UserID = 4322253409885123546 GROUP BY URL, EventDate - SETTINGS optimize_aggregation_in_order = 1, enable_memory_bound_merging_of_aggregation_results = 1, allow_experimental_parallel_reading_from_replicas = 1, parallel_replicas_for_non_replicated_merge_tree = 1, max_parallel_replicas = 3 + SETTINGS optimize_aggregation_in_order = 1, enable_memory_bound_merging_of_aggregation_results = 1, allow_experimental_parallel_reading_from_replicas = 1, parallel_replicas_for_non_replicated_merge_tree = 1, max_parallel_replicas = 3, parallel_replicas_local_plan=1 ) WHERE explain LIKE '%Aggr%Transform%' OR explain LIKE '%InOrder%'" } From 32dc3fe8d1c0384221cd1f27adc53bfe45a01e28 Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Sat, 13 Jul 2024 21:19:14 +0000 Subject: [PATCH 183/844] Simplify code --- src/Interpreters/ClusterProxy/executeQuery.cpp | 4 +--- src/Processors/QueryPlan/ReadFromRemote.cpp | 10 ++-------- .../02898_parallel_replicas_progress_bar.sql | 1 + 3 files changed, 4 insertions(+), 11 deletions(-) diff --git a/src/Interpreters/ClusterProxy/executeQuery.cpp b/src/Interpreters/ClusterProxy/executeQuery.cpp index 6039c545085..79f4344e6cc 100644 --- a/src/Interpreters/ClusterProxy/executeQuery.cpp +++ b/src/Interpreters/ClusterProxy/executeQuery.cpp @@ -519,9 +519,7 @@ void executeQueryWithParallelReplicas( "`cluster_for_parallel_replicas` setting refers to cluster with several shards. Expected a cluster with one shard"); } - auto replica_count = new_cluster->getShardsInfo().begin()->getAllNodeCount(); - if (settings.max_parallel_replicas < replica_count) - replica_count = settings.max_parallel_replicas; + const auto replica_count = std::min(settings.max_parallel_replicas.value, new_cluster->getShardsInfo().begin()->getAllNodeCount()); auto coordinator = std::make_shared(replica_count, settings.parallel_replicas_mark_segment_size); diff --git a/src/Processors/QueryPlan/ReadFromRemote.cpp b/src/Processors/QueryPlan/ReadFromRemote.cpp index 26c57c4a760..3da22265c5c 100644 --- a/src/Processors/QueryPlan/ReadFromRemote.cpp +++ b/src/Processors/QueryPlan/ReadFromRemote.cpp @@ -458,7 +458,6 @@ void ReadFromParallelRemoteReplicasStep::initializePipeline(QueryPipelineBuilder pools_to_use.reserve(shuffled_pool.size()); if (exclude_local_replica) { - std::vector local_addr_possitions; for (auto & pool : shuffled_pool) { const auto & hostname = pool.pool->getHost(); @@ -466,17 +465,12 @@ void ReadFromParallelRemoteReplicasStep::initializePipeline(QueryPipelineBuilder begin(shard.local_addresses), end(shard.local_addresses), [&hostname](const Cluster::Address & local_addr) { return hostname == local_addr.host_name; }); - if (it != shard.local_addresses.end()) + if (it == shard.local_addresses.end()) { - pool.pool.reset(); + pools_to_use.push_back(pool.pool); } } } - for (const auto & pool : shuffled_pool) - { - if (pool.pool) - pools_to_use.push_back(pool.pool); - } LOG_DEBUG( getLogger("ReadFromParallelRemoteReplicasStep"), diff --git a/tests/queries/0_stateless/02898_parallel_replicas_progress_bar.sql b/tests/queries/0_stateless/02898_parallel_replicas_progress_bar.sql index d3bf228e0fb..9348ea1dc32 100644 --- a/tests/queries/0_stateless/02898_parallel_replicas_progress_bar.sql +++ b/tests/queries/0_stateless/02898_parallel_replicas_progress_bar.sql @@ -26,6 +26,7 @@ WHERE query_id in (select query_id from system.query_log where current_database AND message LIKE '%Total rows to read: 3000%' SETTINGS allow_experimental_parallel_reading_from_replicas=0; -- reading in order coordinator +-- disable parallel_replicas_local_plan since the test relay on traces which only present in case of no local plan SELECT k, sipHash64(v) FROM t1 order by k limit 5 offset 998 SETTINGS optimize_read_in_order=1, parallel_replicas_local_plan=0, log_comment='02898_inorder_190aed82-2423-413b-ad4c-24dcca50f65b'; SYSTEM FLUSH LOGS; From 0c986cca7c91a462ec77fc6a198e65fdb0ef4b10 Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Sun, 14 Jul 2024 16:37:00 +0000 Subject: [PATCH 184/844] Fix in-order coordinator + register all replicas for parts in working set --- .../MergeTree/ParallelReplicasReadingCoordinator.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp b/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp index 14c706c77ef..ff309f17893 100644 --- a/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp +++ b/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp @@ -863,9 +863,6 @@ void InOrderCoordinator::doHandleInitialAllRangesAnnouncement(InitialAllRa ++stats[announcement.replica_num].number_of_requests; - if (state_initialized) - return; - size_t new_rows_to_read = 0; /// To get rid of duplicates @@ -874,13 +871,16 @@ void InOrderCoordinator::doHandleInitialAllRangesAnnouncement(InitialAllRa auto the_same_it = std::find_if(all_parts_to_read.begin(), all_parts_to_read.end(), [&part] (const Part & other) { return other.description.info == part.info; }); - /// We have the same part - add the info about presence on current replica to it + /// We have the same part - add the info about presence on the corresponding replica to it if (the_same_it != all_parts_to_read.end()) { the_same_it->replicas.insert(announcement.replica_num); continue; } + if (state_initialized) + continue; + auto covering_or_the_same_it = std::find_if(all_parts_to_read.begin(), all_parts_to_read.end(), [&part] (const Part & other) { return other.description.info.contains(part.info) || part.info.contains(other.description.info); }); From 9f20e33d50810f20ea17355512abed0a04a495a2 Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Sun, 14 Jul 2024 17:52:51 +0000 Subject: [PATCH 185/844] Fix --- src/Processors/QueryPlan/ReadFromRemote.cpp | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/Processors/QueryPlan/ReadFromRemote.cpp b/src/Processors/QueryPlan/ReadFromRemote.cpp index 3da22265c5c..712f714376b 100644 --- a/src/Processors/QueryPlan/ReadFromRemote.cpp +++ b/src/Processors/QueryPlan/ReadFromRemote.cpp @@ -456,9 +456,9 @@ void ReadFromParallelRemoteReplicasStep::initializePipeline(QueryPipelineBuilder std::vector pools_to_use; pools_to_use.reserve(shuffled_pool.size()); - if (exclude_local_replica) + for (const auto & pool : shuffled_pool) { - for (auto & pool : shuffled_pool) + if (exclude_local_replica) { const auto & hostname = pool.pool->getHost(); auto it = std::find_if( @@ -466,9 +466,11 @@ void ReadFromParallelRemoteReplicasStep::initializePipeline(QueryPipelineBuilder end(shard.local_addresses), [&hostname](const Cluster::Address & local_addr) { return hostname == local_addr.host_name; }); if (it == shard.local_addresses.end()) - { pools_to_use.push_back(pool.pool); - } + } + else + { + pools_to_use.push_back(pool.pool); } } From f7befaf68e19d60fe3678c15b7367b815ce6da17 Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Sun, 14 Jul 2024 18:46:14 +0000 Subject: [PATCH 186/844] Polish + comments --- src/Processors/QueryPlan/ReadFromRemote.cpp | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/src/Processors/QueryPlan/ReadFromRemote.cpp b/src/Processors/QueryPlan/ReadFromRemote.cpp index 712f714376b..c05bda62648 100644 --- a/src/Processors/QueryPlan/ReadFromRemote.cpp +++ b/src/Processors/QueryPlan/ReadFromRemote.cpp @@ -474,18 +474,17 @@ void ReadFromParallelRemoteReplicasStep::initializePipeline(QueryPipelineBuilder } } + pools_to_use.resize(std::min(pools_to_use.size(), all_replicas_count)); + // if local plan is used for local replica, we should exclude one remote replica + if (exclude_local_replica && !pools_to_use.empty()) + pools_to_use.resize(all_replicas_count - 1); + LOG_DEBUG( getLogger("ReadFromParallelRemoteReplicasStep"), "Number of pools to use is {}. Originally {}", pools_to_use.size(), shuffled_pool.size()); - if (pools_to_use.size() > all_replicas_count) - pools_to_use.resize(all_replicas_count); - - if (exclude_local_replica && !pools_to_use.empty()) - pools_to_use.resize(all_replicas_count - 1); - if (pools_to_use.empty()) return; @@ -497,7 +496,7 @@ void ReadFromParallelRemoteReplicasStep::initializePipeline(QueryPipelineBuilder LOG_DEBUG(getLogger("ReadFromParallelRemoteReplicasStep"), "Addresses to use: {}", pool_addresses); } - /// local replicas has number 0 + /// when using local plan for local replica, local replica has 0 number size_t offset = (exclude_local_replica ? 1 : 0); for (size_t i = 0 + offset; i < all_replicas_count; ++i) { From 261ff133b82a61885a54e10ac890be2b8eb44831 Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Mon, 15 Jul 2024 12:03:12 +0000 Subject: [PATCH 187/844] Fix 02784_parallel_replicas_automatic_decision_join --- .../02784_parallel_replicas_automatic_decision.sh | 1 - ...parallel_replicas_automatic_decision_join.reference | 10 +++++----- .../02784_parallel_replicas_automatic_decision_join.sh | 10 +++++----- 3 files changed, 10 insertions(+), 11 deletions(-) diff --git a/tests/queries/0_stateless/02784_parallel_replicas_automatic_decision.sh b/tests/queries/0_stateless/02784_parallel_replicas_automatic_decision.sh index dd9b5c3f6d9..e5658f31a34 100755 --- a/tests/queries/0_stateless/02784_parallel_replicas_automatic_decision.sh +++ b/tests/queries/0_stateless/02784_parallel_replicas_automatic_decision.sh @@ -6,7 +6,6 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) CLICKHOUSE_CLIENT_TRACE=${CLICKHOUSE_CLIENT/"--send_logs_level=${CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL}"/"--send_logs_level=trace"} function were_parallel_replicas_used () { - # Not using current_database = '$CLICKHOUSE_DATABASE' as nested parallel queries aren't run with it $CLICKHOUSE_CLIENT --query " SELECT initial_query_id, diff --git a/tests/queries/0_stateless/02784_parallel_replicas_automatic_decision_join.reference b/tests/queries/0_stateless/02784_parallel_replicas_automatic_decision_join.reference index 521e3e2edbc..595b35db610 100644 --- a/tests/queries/0_stateless/02784_parallel_replicas_automatic_decision_join.reference +++ b/tests/queries/0_stateless/02784_parallel_replicas_automatic_decision_join.reference @@ -4,8 +4,8 @@ 02784_automatic_parallel_replicas_join-default_simple_join_1M_pure 10 estimated parallel replicas 02784_automatic_parallel_replicas_join-default_simple_join_300k_pure 3 estimated parallel replicas 02784_automatic_parallel_replicas_join-default_simple_join_300k_pure 33 estimated parallel replicas -02784_automatic_parallel_replicas_join-default_simple_join_0_pure Distinct parallel subqueries: 2 Used parallel replicas: true -02784_automatic_parallel_replicas_join-default_simple_join_10M_pure Distinct parallel subqueries: 0 Used parallel replicas: false -02784_automatic_parallel_replicas_join-default_simple_join_5M_pure Distinct parallel subqueries: 1 Used parallel replicas: true -02784_automatic_parallel_replicas_join-default_simple_join_1M_pure Distinct parallel subqueries: 1 Used parallel replicas: true -02784_automatic_parallel_replicas_join-default_simple_join_300k_pure Distinct parallel subqueries: 2 Used parallel replicas: true +02784_automatic_parallel_replicas_join-default_simple_join_0_pure Used parallel replicas: true +02784_automatic_parallel_replicas_join-default_simple_join_10M_pure Used parallel replicas: false +02784_automatic_parallel_replicas_join-default_simple_join_5M_pure Used parallel replicas: true +02784_automatic_parallel_replicas_join-default_simple_join_1M_pure Used parallel replicas: true +02784_automatic_parallel_replicas_join-default_simple_join_300k_pure Used parallel replicas: true diff --git a/tests/queries/0_stateless/02784_parallel_replicas_automatic_decision_join.sh b/tests/queries/0_stateless/02784_parallel_replicas_automatic_decision_join.sh index 801cd22b79f..238d63e6980 100755 --- a/tests/queries/0_stateless/02784_parallel_replicas_automatic_decision_join.sh +++ b/tests/queries/0_stateless/02784_parallel_replicas_automatic_decision_join.sh @@ -9,17 +9,17 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) CLICKHOUSE_CLIENT_TRACE=${CLICKHOUSE_CLIENT/"--send_logs_level=${CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL}"/"--send_logs_level=trace"} function were_parallel_replicas_used () { - # Not using current_database = '$CLICKHOUSE_DATABASE' as nested parallel queries aren't run with it $CLICKHOUSE_CLIENT --query " SELECT initial_query_id, - concat('Distinct parallel subqueries: ' , countDistinctIf(query, initial_query_id != query_id)::String) as subqueries_parallelized, - concat('Used parallel replicas: ', (countIf(initial_query_id != query_id) != 0)::bool::String) as used + concat('Used parallel replicas: ', (ProfileEvents['ParallelReplicasUsedCount'] > 0)::bool::String) as used FROM system.query_log WHERE event_date >= yesterday() AND initial_query_id LIKE '$1%' - GROUP BY initial_query_id - ORDER BY min(event_time_microseconds) ASC + AND query_id = initial_query_id + AND type = 'QueryFinish' + AND current_database = '$CLICKHOUSE_DATABASE' + ORDER BY event_time_microseconds ASC FORMAT TSV" } From 02a2f509163e5df370440b4eed792dcbd995942c Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Mon, 15 Jul 2024 18:37:19 +0000 Subject: [PATCH 188/844] Fix clang warning --- src/Interpreters/ClusterProxy/executeQuery.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Interpreters/ClusterProxy/executeQuery.cpp b/src/Interpreters/ClusterProxy/executeQuery.cpp index 79f4344e6cc..4a637db3887 100644 --- a/src/Interpreters/ClusterProxy/executeQuery.cpp +++ b/src/Interpreters/ClusterProxy/executeQuery.cpp @@ -519,7 +519,7 @@ void executeQueryWithParallelReplicas( "`cluster_for_parallel_replicas` setting refers to cluster with several shards. Expected a cluster with one shard"); } - const auto replica_count = std::min(settings.max_parallel_replicas.value, new_cluster->getShardsInfo().begin()->getAllNodeCount()); + const auto replica_count = std::min(settings.max_parallel_replicas.value, new_cluster->getShardsInfo().begin()->getAllNodeCount()); auto coordinator = std::make_shared(replica_count, settings.parallel_replicas_mark_segment_size); From 006f20858adaba4b9126aa18cfc02a1fd813d967 Mon Sep 17 00:00:00 2001 From: Arthur Passos Date: Mon, 15 Jul 2024 17:01:02 -0300 Subject: [PATCH 189/844] remove dupl inc --- src/Interpreters/Context.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index fc1e87e7b7e..2602afd8b78 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -51,7 +51,6 @@ #include #include #include -#include #include #include #include From 860b18c251a3492682d410fbcc54576e87f4ca25 Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Mon, 15 Jul 2024 20:40:55 +0000 Subject: [PATCH 190/844] Update test for in-(reverse)-order coordinator --- .../ParallelReplicasReadingCoordinator.cpp | 2 +- src/Storages/MergeTree/RequestResponse.cpp | 4 +--- ...02950_parallel_replicas_used_count.reference | 10 ++++++++-- .../02950_parallel_replicas_used_count.sql | 17 ++++++++++++----- 4 files changed, 22 insertions(+), 11 deletions(-) diff --git a/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp b/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp index ff309f17893..e1c0d87837a 100644 --- a/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp +++ b/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp @@ -918,7 +918,7 @@ ParallelReadResponse InOrderCoordinator::handleRequest(ParallelReadRequest "Replica {} decided to read in {} mode, not in {}. This is a bug", request.replica_num, magic_enum::enum_name(request.mode), magic_enum::enum_name(mode)); - LOG_TRACE(log, "Got request from replica {}, data {}", request.replica_num, request.describe()); + LOG_TRACE(log, "Got read request: {}", request.describe()); ParallelReadResponse response; response.description = request.description; diff --git a/src/Storages/MergeTree/RequestResponse.cpp b/src/Storages/MergeTree/RequestResponse.cpp index 2ce0e20dcd2..33cd935c88b 100644 --- a/src/Storages/MergeTree/RequestResponse.cpp +++ b/src/Storages/MergeTree/RequestResponse.cpp @@ -44,9 +44,7 @@ void ParallelReadRequest::serialize(WriteBuffer & out) const String ParallelReadRequest::describe() const { - String result; - result += fmt::format("replica_num: {} \n", replica_num); - result += fmt::format("min_num_of_marks: {} \n", min_number_of_marks); + String result = fmt::format("replica_num {}, min_num_of_marks {}, ", replica_num, min_number_of_marks); result += description.describe(); return result; } diff --git a/tests/queries/0_stateless/02950_parallel_replicas_used_count.reference b/tests/queries/0_stateless/02950_parallel_replicas_used_count.reference index 21b7b527b7a..cc8284bfeac 100644 --- a/tests/queries/0_stateless/02950_parallel_replicas_used_count.reference +++ b/tests/queries/0_stateless/02950_parallel_replicas_used_count.reference @@ -1,8 +1,14 @@ -100 4950 +10000 49995000 1 89 90 91 92 93 -1 +3 +93 +92 +91 +90 +89 +3 diff --git a/tests/queries/0_stateless/02950_parallel_replicas_used_count.sql b/tests/queries/0_stateless/02950_parallel_replicas_used_count.sql index 22f55acd365..69f1dc47ded 100644 --- a/tests/queries/0_stateless/02950_parallel_replicas_used_count.sql +++ b/tests/queries/0_stateless/02950_parallel_replicas_used_count.sql @@ -2,11 +2,12 @@ DROP TABLE IF EXISTS test; CREATE TABLE test (k UInt64, v String) ENGINE = MergeTree -ORDER BY k; +ORDER BY k +SETTINGS index_granularity=1; -INSERT INTO test SELECT number, toString(number) FROM numbers(100); +INSERT INTO test SELECT number, toString(number) FROM numbers(10_000); -SET allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 3, prefer_localhost_replica = 0, parallel_replicas_for_non_replicated_merge_tree=1, cluster_for_parallel_replicas='test_cluster_one_shard_three_replicas_localhost'; +SET allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 3, parallel_replicas_for_non_replicated_merge_tree=1, cluster_for_parallel_replicas='test_cluster_one_shard_three_replicas_localhost'; -- default coordinator SELECT count(), sum(k) @@ -14,12 +15,18 @@ FROM test SETTINGS log_comment = '02950_parallel_replicas_used_replicas_count'; SYSTEM FLUSH LOGS; -SELECT ProfileEvents['ParallelReplicasUsedCount'] FROM system.query_log WHERE type = 'QueryFinish' AND query_id IN (SELECT query_id FROM system.query_log WHERE current_database = currentDatabase() AND log_comment = '02950_parallel_replicas_used_replicas_count' AND type = 'QueryFinish' AND initial_query_id = query_id) SETTINGS allow_experimental_parallel_reading_from_replicas=0; +SELECT ProfileEvents['ParallelReplicasUsedCount'] > 0 FROM system.query_log WHERE type = 'QueryFinish' AND query_id IN (SELECT query_id FROM system.query_log WHERE current_database = currentDatabase() AND log_comment = '02950_parallel_replicas_used_replicas_count' AND type = 'QueryFinish' AND initial_query_id = query_id) SETTINGS allow_experimental_parallel_reading_from_replicas=0; -- In order coordinator -SELECT k FROM test order by k limit 5 offset 89 SETTINGS optimize_read_in_order=1, log_comment='02950_parallel_replicas_used_replicas_count_2'; +SELECT k FROM test order by k limit 5 offset 89 SETTINGS optimize_read_in_order=1, log_comment='02950_parallel_replicas_used_replicas_count_2', merge_tree_min_rows_for_concurrent_read=1, max_threads=1; SYSTEM FLUSH LOGS; SELECT ProfileEvents['ParallelReplicasUsedCount'] FROM system.query_log WHERE type = 'QueryFinish' AND query_id IN (SELECT query_id FROM system.query_log WHERE current_database = currentDatabase() AND log_comment = '02950_parallel_replicas_used_replicas_count_2' AND type = 'QueryFinish' AND initial_query_id = query_id) SETTINGS allow_experimental_parallel_reading_from_replicas=0; +-- In reverse order coordinator +SELECT k FROM test order by k desc limit 5 offset 9906 SETTINGS optimize_read_in_order=1, log_comment='02950_parallel_replicas_used_replicas_count_3', merge_tree_min_rows_for_concurrent_read=1, max_threads=1; + +SYSTEM FLUSH LOGS; +SELECT ProfileEvents['ParallelReplicasUsedCount'] FROM system.query_log WHERE type = 'QueryFinish' AND query_id IN (SELECT query_id FROM system.query_log WHERE current_database = currentDatabase() AND log_comment = '02950_parallel_replicas_used_replicas_count_3' AND type = 'QueryFinish' AND initial_query_id = query_id) SETTINGS allow_experimental_parallel_reading_from_replicas=0; + DROP TABLE test; From 69c1e68359f08ee648eba819f445cbde9b8c82e0 Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Tue, 16 Jul 2024 07:48:40 +0000 Subject: [PATCH 191/844] Fix clang-tidy warning: remove include duplicate --- src/Interpreters/Context.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index fc1e87e7b7e..2602afd8b78 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -51,7 +51,6 @@ #include #include #include -#include #include #include #include From 27db36cd4fbc2b0d74e982a3992778c62ccc2482 Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Tue, 16 Jul 2024 08:46:24 +0000 Subject: [PATCH 192/844] Separate converting actions in separte source file --- .../QueryPlan/ConvertingActions.cpp | 32 +++++++++++++++++++ src/Processors/QueryPlan/ConvertingActions.h | 9 ++++++ .../QueryPlan/DistributedCreateLocalPlan.cpp | 27 +--------------- .../QueryPlan/ParallelReplicasLocalPlan.cpp | 5 +-- 4 files changed, 45 insertions(+), 28 deletions(-) create mode 100644 src/Processors/QueryPlan/ConvertingActions.cpp create mode 100644 src/Processors/QueryPlan/ConvertingActions.h diff --git a/src/Processors/QueryPlan/ConvertingActions.cpp b/src/Processors/QueryPlan/ConvertingActions.cpp new file mode 100644 index 00000000000..ff106ff08c1 --- /dev/null +++ b/src/Processors/QueryPlan/ConvertingActions.cpp @@ -0,0 +1,32 @@ +#include +#include +#include + +namespace DB +{ + +void addConvertingActions(QueryPlan & plan, const Block & header, bool has_missing_objects) +{ + if (blocksHaveEqualStructure(plan.getCurrentDataStream().header, header)) + return; + + auto mode = has_missing_objects ? ActionsDAG::MatchColumnsMode::Position : ActionsDAG::MatchColumnsMode::Name; + + auto get_converting_dag = [mode](const Block & block_, const Block & header_) + { + /// Convert header structure to expected. + /// Also we ignore constants from result and replace it with constants from header. + /// It is needed for functions like `now64()` or `randConstant()` because their values may be different. + return ActionsDAG::makeConvertingActions( + block_.getColumnsWithTypeAndName(), + header_.getColumnsWithTypeAndName(), + mode, + true); + }; + + auto convert_actions_dag = get_converting_dag(plan.getCurrentDataStream().header, header); + auto converting = std::make_unique(plan.getCurrentDataStream(), convert_actions_dag); + plan.addStep(std::move(converting)); +} + +} diff --git a/src/Processors/QueryPlan/ConvertingActions.h b/src/Processors/QueryPlan/ConvertingActions.h new file mode 100644 index 00000000000..6bdf9b8af9a --- /dev/null +++ b/src/Processors/QueryPlan/ConvertingActions.h @@ -0,0 +1,9 @@ +#pragma once + +namespace DB +{ +class QueryPlan; +class Block; + +void addConvertingActions(QueryPlan & plan, const Block & header, bool has_missing_objects); +} diff --git a/src/Processors/QueryPlan/DistributedCreateLocalPlan.cpp b/src/Processors/QueryPlan/DistributedCreateLocalPlan.cpp index bad0380cb46..eb699858bdf 100644 --- a/src/Processors/QueryPlan/DistributedCreateLocalPlan.cpp +++ b/src/Processors/QueryPlan/DistributedCreateLocalPlan.cpp @@ -2,38 +2,13 @@ #include #include -#include #include #include -#include +#include namespace DB { -void addConvertingActions(QueryPlan & plan, const Block & header, bool has_missing_objects) -{ - if (blocksHaveEqualStructure(plan.getCurrentDataStream().header, header)) - return; - - auto mode = has_missing_objects ? ActionsDAG::MatchColumnsMode::Position : ActionsDAG::MatchColumnsMode::Name; - - auto get_converting_dag = [mode](const Block & block_, const Block & header_) - { - /// Convert header structure to expected. - /// Also we ignore constants from result and replace it with constants from header. - /// It is needed for functions like `now64()` or `randConstant()` because their values may be different. - return ActionsDAG::makeConvertingActions( - block_.getColumnsWithTypeAndName(), - header_.getColumnsWithTypeAndName(), - mode, - true); - }; - - auto convert_actions_dag = get_converting_dag(plan.getCurrentDataStream().header, header); - auto converting = std::make_unique(plan.getCurrentDataStream(), convert_actions_dag); - plan.addStep(std::move(converting)); -} - std::unique_ptr createLocalPlan( const ASTPtr & query_ast, const Block & header, diff --git a/src/Processors/QueryPlan/ParallelReplicasLocalPlan.cpp b/src/Processors/QueryPlan/ParallelReplicasLocalPlan.cpp index 5f48a12072b..d2e862a3416 100644 --- a/src/Processors/QueryPlan/ParallelReplicasLocalPlan.cpp +++ b/src/Processors/QueryPlan/ParallelReplicasLocalPlan.cpp @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include @@ -77,8 +78,8 @@ std::unique_ptr createLocalPlanForParallelReplicas( analyzed_result_ptr = analyzed_merge_tree->getAnalyzedResult(); } - MergeTreeAllRangesCallback all_ranges_cb - = [coordinator](InitialAllRangesAnnouncement announcement) { coordinator->handleInitialAllRangesAnnouncement(announcement); }; + MergeTreeAllRangesCallback all_ranges_cb = [coordinator](InitialAllRangesAnnouncement announcement) + { coordinator->handleInitialAllRangesAnnouncement(std::move(announcement)); }; MergeTreeReadTaskCallback read_task_cb = [coordinator](ParallelReadRequest req) -> std::optional { return coordinator->handleRequest(std::move(req)); }; From 4caf9b6e6fa53adb1e7208c992469175a9344c2f Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Tue, 16 Jul 2024 08:57:15 +0000 Subject: [PATCH 193/844] Better diagnostic message formatting --- src/Processors/QueryPlan/ReadFromRemote.cpp | 22 ++++++++------------- 1 file changed, 8 insertions(+), 14 deletions(-) diff --git a/src/Processors/QueryPlan/ReadFromRemote.cpp b/src/Processors/QueryPlan/ReadFromRemote.cpp index 3c6c651ae02..caa332c2ebc 100644 --- a/src/Processors/QueryPlan/ReadFromRemote.cpp +++ b/src/Processors/QueryPlan/ReadFromRemote.cpp @@ -22,7 +22,7 @@ #include #include -#include +#include namespace DB { @@ -391,15 +391,11 @@ ReadFromParallelRemoteReplicasStep::ReadFromParallelRemoteReplicasStep( std::vector replicas; replicas.reserve(cluster->getShardsAddresses().front().size()); - bool first_local = false; for (const auto & addr : cluster->getShardsAddresses().front()) { /// skip first local - if (exclude_local_replica && addr.is_local && !first_local) - { - first_local = true; + if (exclude_local_replica && addr.is_local) continue; - } /// replace hostname with replica name if the hostname started with replica namespace, /// it makes description shorter and more readable @@ -409,7 +405,7 @@ ReadFromParallelRemoteReplicasStep::ReadFromParallelRemoteReplicasStep( replicas.push_back(fmt::format("{}", addr.host_name)); } - auto description = fmt::format("Query: {} Replicas: ", formattedAST(query_ast)) + boost::algorithm::join(replicas, ", "); + auto description = fmt::format("Query: {} Replicas: {}", formattedAST(query_ast), fmt::join(replicas, ", ")); setStepDescription(std::move(description)); } @@ -489,13 +485,11 @@ void ReadFromParallelRemoteReplicasStep::initializePipeline(QueryPipelineBuilder if (pools_to_use.empty()) return; - { - String pool_addresses; - for (const auto & pool : pools_to_use) - pool_addresses += pool->getAddress() + ";"; - - LOG_DEBUG(getLogger("ReadFromParallelRemoteReplicasStep"), "Addresses to use: {}", pool_addresses); - } + std::vector addresses; + addresses.reserve(pools_to_use.size()); + for (const auto & pool : pools_to_use) + addresses.emplace_back(pool->getAddress()); + LOG_DEBUG(getLogger("ReadFromParallelRemoteReplicasStep"), "Addresses to use: {}", fmt::join(addresses, ", ")); /// when using local plan for local replica, local replica has 0 number size_t offset = (exclude_local_replica ? 1 : 0); From 08dc1c8c37d287ddf3cf3a785b6c8334e773840a Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Tue, 16 Jul 2024 19:59:16 +0000 Subject: [PATCH 194/844] Fallback to local execution in case of cluster(shard) has only one node --- .../ClusterProxy/executeQuery.cpp | 39 ++++++++++++++++++- src/Interpreters/ClusterProxy/executeQuery.h | 2 + src/Planner/PlannerJoinTree.cpp | 2 +- 3 files changed, 41 insertions(+), 2 deletions(-) diff --git a/src/Interpreters/ClusterProxy/executeQuery.cpp b/src/Interpreters/ClusterProxy/executeQuery.cpp index 4a637db3887..7207efd9ef6 100644 --- a/src/Interpreters/ClusterProxy/executeQuery.cpp +++ b/src/Interpreters/ClusterProxy/executeQuery.cpp @@ -488,12 +488,12 @@ void executeQueryWithParallelReplicas( shard_num = column->getUInt(0); } - const auto shard_count = not_optimized_cluster->getShardCount(); ClusterPtr new_cluster = not_optimized_cluster; /// if got valid shard_num from query initiator, then parallel replicas scope is the specified shard /// shards are numbered in order of appearance in the cluster config if (shard_num > 0) { + const auto shard_count = not_optimized_cluster->getShardCount(); if (shard_num > shard_count) throw Exception( ErrorCodes::LOGICAL_ERROR, @@ -702,6 +702,43 @@ void executeQueryWithParallelReplicasCustomKey( context, query_info.query, storage_id.getDatabaseName(), storage_id.getTableName(), /*table_function_ptr=*/nullptr); executeQueryWithParallelReplicasCustomKey(query_plan, storage_id, query_info, columns, snapshot, processed_stage, header, context); } + +bool canUseParallelReplicasOnInitiator(const ContextPtr & context) +{ + if (!context->canUseParallelReplicasOnInitiator()) + return false; + + auto cluster = context->getClusterForParallelReplicas(); + if (cluster->getShardCount() == 1) + return cluster->getShardsInfo()[0].getAllNodeCount() > 1; + + /// parallel replicas with distributed table + auto scalars = context->hasQueryContext() ? context->getQueryContext()->getScalars() : Scalars{}; + UInt64 shard_num = 0; /// shard_num is 1-based, so 0 - no shard specified + const auto it = scalars.find("_shard_num"); + if (it != scalars.end()) + { + const Block & block = it->second; + const auto & column = block.safeGetByPosition(0).column; + shard_num = column->getUInt(0); + } + if (shard_num > 0) + { + const auto shard_count = cluster->getShardCount(); + if (shard_num > shard_count) + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "Shard number is greater than shard count: shard_num={} shard_count={} cluster={}", + shard_num, + shard_count, + cluster->getName()); + + return cluster->getShardsInfo().at(shard_num - 1).getAllNodeCount() > 1; + } + + return false; +} + } } diff --git a/src/Interpreters/ClusterProxy/executeQuery.h b/src/Interpreters/ClusterProxy/executeQuery.h index 2c63b7b89f0..2a21f3e8255 100644 --- a/src/Interpreters/ClusterProxy/executeQuery.h +++ b/src/Interpreters/ClusterProxy/executeQuery.h @@ -58,6 +58,8 @@ using AdditionalShardFilterGenerator = std::function; AdditionalShardFilterGenerator getShardFilterGeneratorForCustomKey(const Cluster & cluster, ContextPtr context, const ColumnsDescription & columns); +bool canUseParallelReplicasOnInitiator(const ContextPtr & context); + /// Execute a distributed query, creating a query plan, from which the query pipeline can be built. /// `stream_factory` object encapsulates the logic of creating plans for a different type of query /// (currently SELECT, DESCRIBE). diff --git a/src/Planner/PlannerJoinTree.cpp b/src/Planner/PlannerJoinTree.cpp index 4c21f0d00cc..c1eb2b48045 100644 --- a/src/Planner/PlannerJoinTree.cpp +++ b/src/Planner/PlannerJoinTree.cpp @@ -920,7 +920,7 @@ JoinTreeQueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expres query_plan = std::move(query_plan_parallel_replicas); } } - else if (query_context->canUseParallelReplicasOnInitiator()) + else if (ClusterProxy::canUseParallelReplicasOnInitiator(query_context)) { // (1) find read step QueryPlan::Node * node = query_plan.getRootNode(); From fc693cc982cce7b4aaf71933bd18aa751ed3b7a0 Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Tue, 16 Jul 2024 21:05:27 +0000 Subject: [PATCH 195/844] Fix 02982_parallel_replicas_unexpected_cluster --- src/Interpreters/ClusterProxy/executeQuery.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/Interpreters/ClusterProxy/executeQuery.cpp b/src/Interpreters/ClusterProxy/executeQuery.cpp index 7207efd9ef6..c0d156a8894 100644 --- a/src/Interpreters/ClusterProxy/executeQuery.cpp +++ b/src/Interpreters/ClusterProxy/executeQuery.cpp @@ -736,6 +736,12 @@ bool canUseParallelReplicasOnInitiator(const ContextPtr & context) return cluster->getShardsInfo().at(shard_num - 1).getAllNodeCount() > 1; } + if (cluster->getShardCount() > 1) + throw DB::Exception( + ErrorCodes::UNEXPECTED_CLUSTER, + "`cluster_for_parallel_replicas` setting refers to cluster with {} shards. Expected a cluster with one shard", + cluster->getShardCount()); + return false; } From a3d4fd9246b2e1953ed04d75840a9f3399d37ba4 Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Wed, 17 Jul 2024 10:01:26 +0000 Subject: [PATCH 196/844] Fix 02950_parallel_replicas_used_count --- .../0_stateless/02950_parallel_replicas_used_count.reference | 4 ++-- .../0_stateless/02950_parallel_replicas_used_count.sql | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/queries/0_stateless/02950_parallel_replicas_used_count.reference b/tests/queries/0_stateless/02950_parallel_replicas_used_count.reference index cc8284bfeac..c1265b7ca14 100644 --- a/tests/queries/0_stateless/02950_parallel_replicas_used_count.reference +++ b/tests/queries/0_stateless/02950_parallel_replicas_used_count.reference @@ -5,10 +5,10 @@ 91 92 93 -3 +1 93 92 91 90 89 -3 +1 diff --git a/tests/queries/0_stateless/02950_parallel_replicas_used_count.sql b/tests/queries/0_stateless/02950_parallel_replicas_used_count.sql index 69f1dc47ded..4396ca60e0e 100644 --- a/tests/queries/0_stateless/02950_parallel_replicas_used_count.sql +++ b/tests/queries/0_stateless/02950_parallel_replicas_used_count.sql @@ -21,12 +21,12 @@ SELECT ProfileEvents['ParallelReplicasUsedCount'] > 0 FROM system.query_log WHER SELECT k FROM test order by k limit 5 offset 89 SETTINGS optimize_read_in_order=1, log_comment='02950_parallel_replicas_used_replicas_count_2', merge_tree_min_rows_for_concurrent_read=1, max_threads=1; SYSTEM FLUSH LOGS; -SELECT ProfileEvents['ParallelReplicasUsedCount'] FROM system.query_log WHERE type = 'QueryFinish' AND query_id IN (SELECT query_id FROM system.query_log WHERE current_database = currentDatabase() AND log_comment = '02950_parallel_replicas_used_replicas_count_2' AND type = 'QueryFinish' AND initial_query_id = query_id) SETTINGS allow_experimental_parallel_reading_from_replicas=0; +SELECT ProfileEvents['ParallelReplicasUsedCount'] > 0 FROM system.query_log WHERE type = 'QueryFinish' AND query_id IN (SELECT query_id FROM system.query_log WHERE current_database = currentDatabase() AND log_comment = '02950_parallel_replicas_used_replicas_count_2' AND type = 'QueryFinish' AND initial_query_id = query_id) SETTINGS allow_experimental_parallel_reading_from_replicas=0; -- In reverse order coordinator SELECT k FROM test order by k desc limit 5 offset 9906 SETTINGS optimize_read_in_order=1, log_comment='02950_parallel_replicas_used_replicas_count_3', merge_tree_min_rows_for_concurrent_read=1, max_threads=1; SYSTEM FLUSH LOGS; -SELECT ProfileEvents['ParallelReplicasUsedCount'] FROM system.query_log WHERE type = 'QueryFinish' AND query_id IN (SELECT query_id FROM system.query_log WHERE current_database = currentDatabase() AND log_comment = '02950_parallel_replicas_used_replicas_count_3' AND type = 'QueryFinish' AND initial_query_id = query_id) SETTINGS allow_experimental_parallel_reading_from_replicas=0; +SELECT ProfileEvents['ParallelReplicasUsedCount'] > 0 FROM system.query_log WHERE type = 'QueryFinish' AND query_id IN (SELECT query_id FROM system.query_log WHERE current_database = currentDatabase() AND log_comment = '02950_parallel_replicas_used_replicas_count_3' AND type = 'QueryFinish' AND initial_query_id = query_id) SETTINGS allow_experimental_parallel_reading_from_replicas=0; DROP TABLE test; From baf3408b16b3634c59d6635afbb53b849cf55d0e Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Wed, 17 Jul 2024 11:45:00 +0000 Subject: [PATCH 197/844] Fix: fallback to local plan w/o PR in case of empty table --- src/Interpreters/ClusterProxy/executeQuery.cpp | 8 +++++++- .../QueryPlan/ParallelReplicasLocalPlan.cpp | 11 ++++++----- src/Processors/QueryPlan/ParallelReplicasLocalPlan.h | 2 +- 3 files changed, 14 insertions(+), 7 deletions(-) diff --git a/src/Interpreters/ClusterProxy/executeQuery.cpp b/src/Interpreters/ClusterProxy/executeQuery.cpp index c0d156a8894..a8cf3022a61 100644 --- a/src/Interpreters/ClusterProxy/executeQuery.cpp +++ b/src/Interpreters/ClusterProxy/executeQuery.cpp @@ -528,7 +528,7 @@ void executeQueryWithParallelReplicas( /// do not build local plan for distributed queries for now (address it later) if (settings.allow_experimental_analyzer && settings.parallel_replicas_local_plan && !shard_num) { - auto local_plan = createLocalPlanForParallelReplicas( + auto [local_plan, with_parallel_replicas] = createLocalPlanForParallelReplicas( query_ast, header, new_context, @@ -536,6 +536,12 @@ void executeQueryWithParallelReplicas( coordinator, std::move(analyzed_read_from_merge_tree)); + if (!with_parallel_replicas) + { + query_plan = std::move(*local_plan); + return; + } + auto read_from_remote = std::make_unique( query_ast, new_cluster, diff --git a/src/Processors/QueryPlan/ParallelReplicasLocalPlan.cpp b/src/Processors/QueryPlan/ParallelReplicasLocalPlan.cpp index d2e862a3416..e9d5ef90e30 100644 --- a/src/Processors/QueryPlan/ParallelReplicasLocalPlan.cpp +++ b/src/Processors/QueryPlan/ParallelReplicasLocalPlan.cpp @@ -21,9 +21,7 @@ namespace DB { -void addConvertingActions(QueryPlan & plan, const Block & header, bool has_missing_objects); - -std::unique_ptr createLocalPlanForParallelReplicas( +std::pair, bool> createLocalPlanForParallelReplicas( const ASTPtr & query_ast, const Block & header, ContextPtr context, @@ -68,7 +66,9 @@ std::unique_ptr createLocalPlanForParallelReplicas( node = nullptr; } - chassert(reading); + if (!reading) + /// it can happened if merge tree table is empty, - it'll be replaced with ReadFromPreparedSource + return {std::move(query_plan), false}; ReadFromMergeTree::AnalysisResultPtr analyzed_result_ptr; if (analyzed_read_from_merge_tree.get()) @@ -89,7 +89,8 @@ std::unique_ptr createLocalPlanForParallelReplicas( node->step = std::move(read_from_merge_tree_parallel_replicas); addConvertingActions(*query_plan, header, /*has_missing_objects=*/false); - return query_plan; + + return {std::move(query_plan), true}; } } diff --git a/src/Processors/QueryPlan/ParallelReplicasLocalPlan.h b/src/Processors/QueryPlan/ParallelReplicasLocalPlan.h index 123754458a1..2a49be6347a 100644 --- a/src/Processors/QueryPlan/ParallelReplicasLocalPlan.h +++ b/src/Processors/QueryPlan/ParallelReplicasLocalPlan.h @@ -8,7 +8,7 @@ namespace DB { -std::unique_ptr createLocalPlanForParallelReplicas( +std::pair, bool> createLocalPlanForParallelReplicas( const ASTPtr & query_ast, const Block & header, ContextPtr context, From 97f4ec2adbf614842e9b16badb69a8d5c642abe0 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Wed, 17 Jul 2024 16:59:35 +0200 Subject: [PATCH 198/844] Read cgroup memory usage in async metrics thread --- programs/keeper/Keeper.cpp | 26 ++-- programs/server/Server.cpp | 34 ++--- src/Common/AsynchronousMetrics.cpp | 13 +- src/Common/AsynchronousMetrics.h | 5 +- src/Common/CgroupsMemoryUsageObserver.cpp | 127 ++++-------------- src/Common/CgroupsMemoryUsageObserver.h | 19 +-- src/Common/MemoryTracker.cpp | 38 ++---- src/Common/MemoryTracker.h | 4 +- .../KeeperAsynchronousMetrics.cpp | 2 +- src/Interpreters/Context.cpp | 13 ++ src/Interpreters/Context.h | 4 + .../ServerAsynchronousMetrics.cpp | 2 +- 12 files changed, 112 insertions(+), 175 deletions(-) diff --git a/programs/keeper/Keeper.cpp b/programs/keeper/Keeper.cpp index 44c2daa33ad..3f6020ad48c 100644 --- a/programs/keeper/Keeper.cpp +++ b/programs/keeper/Keeper.cpp @@ -399,6 +399,18 @@ try registerDisks(/*global_skip_access_check=*/false); + auto cgroups_memory_observer_wait_time = config().getUInt64("keeper_server.cgroups_memory_observer_wait_time", 15); + try + { + auto cgroups_reader = createCgroupsReader(); + global_context->setCgroupsReader(createCgroupsReader()); + } + catch (...) + { + if (cgroups_memory_observer_wait_time != 0) + tryLogCurrentException(log, "Failed to create cgroups reader"); + } + /// This object will periodically calculate some metrics. KeeperAsynchronousMetrics async_metrics( global_context, @@ -622,21 +634,19 @@ try main_config_reloader->start(); std::optional cgroups_memory_usage_observer; - try + if (cgroups_memory_observer_wait_time != 0) { - auto wait_time = config().getUInt64("keeper_server.cgroups_memory_observer_wait_time", 15); - if (wait_time != 0) + auto cgroups_reader = global_context->getCgroupsReader(); + if (cgroups_reader) { - cgroups_memory_usage_observer.emplace(std::chrono::seconds(wait_time)); + cgroups_memory_usage_observer.emplace(std::chrono::seconds(cgroups_memory_observer_wait_time), global_context->getCgroupsReader()); /// Not calling cgroups_memory_usage_observer->setLimits() here (as for the normal ClickHouse server) because Keeper controls /// its memory usage by other means (via setting 'max_memory_usage_soft_limit'). cgroups_memory_usage_observer->setOnMemoryAmountAvailableChangedFn([&]() { main_config_reloader->reload(); }); cgroups_memory_usage_observer->startThread(); } - } - catch (Exception &) - { - tryLogCurrentException(log, "Disabling cgroup memory observer because of an error during initialization"); + else + LOG_ERROR(log, "Disabling cgroup memory observer because of an error during initialization of cgroups reader"); } diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index 053ddaf8d8b..c52b1e037ec 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -897,6 +897,17 @@ try LOG_INFO(log, "Background threads finished in {} ms", watch.elapsedMilliseconds()); }); + try + { + auto cgroups_reader = createCgroupsReader(); + global_context->setCgroupsReader(createCgroupsReader()); + } + catch (...) + { + if (server_settings.cgroups_memory_usage_observer_wait_time != 0) + tryLogCurrentException(log, "Failed to create cgroups reader"); + } + /// This object will periodically calculate some metrics. ServerAsynchronousMetrics async_metrics( global_context, @@ -1456,15 +1467,13 @@ try } std::optional cgroups_memory_usage_observer; - try + if (auto wait_time = server_settings.cgroups_memory_usage_observer_wait_time; wait_time != 0) { - auto wait_time = server_settings.cgroups_memory_usage_observer_wait_time; - if (wait_time != 0) - cgroups_memory_usage_observer.emplace(std::chrono::seconds(wait_time)); - } - catch (Exception &) - { - tryLogCurrentException(log, "Disabling cgroup memory observer because of an error during initialization"); + auto cgroups_reader = global_context->getCgroupsReader(); + if (cgroups_reader) + cgroups_memory_usage_observer.emplace(std::chrono::seconds(wait_time), std::move(cgroups_reader)); + else + LOG_ERROR(log, "Disabling cgroup memory observer because of an error during initialization of cgroups reader"); } std::string cert_path = config().getString("openSSL.server.certificateFile", ""); @@ -1532,15 +1541,6 @@ try total_memory_tracker.setDescription("(total)"); total_memory_tracker.setMetric(CurrentMetrics::MemoryTracking); - if (cgroups_memory_usage_observer) - { - double hard_limit_ratio = new_server_settings.cgroup_memory_watcher_hard_limit_ratio; - double soft_limit_ratio = new_server_settings.cgroup_memory_watcher_soft_limit_ratio; - cgroups_memory_usage_observer->setMemoryUsageLimits( - static_cast(max_server_memory_usage * hard_limit_ratio), - static_cast(max_server_memory_usage * soft_limit_ratio)); - } - size_t merges_mutations_memory_usage_soft_limit = new_server_settings.merges_mutations_memory_usage_soft_limit; size_t default_merges_mutations_server_memory_usage = static_cast(current_physical_server_memory * new_server_settings.merges_mutations_memory_usage_to_ram_ratio); diff --git a/src/Common/AsynchronousMetrics.cpp b/src/Common/AsynchronousMetrics.cpp index 6309f6079f6..0953ad88697 100644 --- a/src/Common/AsynchronousMetrics.cpp +++ b/src/Common/AsynchronousMetrics.cpp @@ -57,10 +57,12 @@ static std::unique_ptr openFileIfExists(const std::stri AsynchronousMetrics::AsynchronousMetrics( unsigned update_period_seconds, - const ProtocolServerMetricsFunc & protocol_server_metrics_func_) + const ProtocolServerMetricsFunc & protocol_server_metrics_func_, + std::shared_ptr cgroups_reader_) : update_period(update_period_seconds) , log(getLogger("AsynchronousMetrics")) , protocol_server_metrics_func(protocol_server_metrics_func_) + , cgroups_reader(std::move(cgroups_reader_)) { #if defined(OS_LINUX) openFileIfExists("/proc/meminfo", meminfo); @@ -669,6 +671,13 @@ void AsynchronousMetrics::update(TimePoint update_time, bool force_update) free_memory_in_allocator_arenas = je_malloc_pdirty * getPageSize(); #endif + if (cgroups_reader != nullptr) + { + rss = cgroups_reader->readMemoryUsage(); + new_values["CgroupsMemoryUsage"] = { rss, + "The amount of physical memory used by the server process, reported by cgroups." }; + } + Int64 difference = rss - amount; /// Log only if difference is high. This is for convenience. The threshold is arbitrary. @@ -681,7 +690,7 @@ void AsynchronousMetrics::update(TimePoint update_time, bool force_update) ReadableSize(rss), ReadableSize(difference)); - MemoryTracker::setRSS(rss, free_memory_in_allocator_arenas); + MemoryTracker::setRSS(rss, /*has_free_memory_in_allocator_arenas_=*/free_memory_in_allocator_arenas > 0); } } diff --git a/src/Common/AsynchronousMetrics.h b/src/Common/AsynchronousMetrics.h index 10a972d2458..0b110f41fc3 100644 --- a/src/Common/AsynchronousMetrics.h +++ b/src/Common/AsynchronousMetrics.h @@ -1,5 +1,6 @@ #pragma once +#include #include #include #include @@ -68,7 +69,8 @@ public: AsynchronousMetrics( unsigned update_period_seconds, - const ProtocolServerMetricsFunc & protocol_server_metrics_func_); + const ProtocolServerMetricsFunc & protocol_server_metrics_func_, + std::shared_ptr cgroups_reader_); virtual ~AsynchronousMetrics(); @@ -91,6 +93,7 @@ private: virtual void logImpl(AsynchronousMetricValues &) {} ProtocolServerMetricsFunc protocol_server_metrics_func; + std::shared_ptr cgroups_reader; std::unique_ptr thread; diff --git a/src/Common/CgroupsMemoryUsageObserver.cpp b/src/Common/CgroupsMemoryUsageObserver.cpp index 02bde0d80b7..b12845df098 100644 --- a/src/Common/CgroupsMemoryUsageObserver.cpp +++ b/src/Common/CgroupsMemoryUsageObserver.cpp @@ -17,13 +17,6 @@ #include #include -#include "config.h" -#if USE_JEMALLOC -# include -#define STRINGIFY_HELPER(x) #x -#define STRINGIFY(x) STRINGIFY_HELPER(x) -#endif - using namespace DB; namespace fs = std::filesystem; @@ -155,15 +148,21 @@ std::optional getCgroupsV1Path() return {default_cgroups_mount / "memory"}; } -std::pair getCgroupsPath() +enum class CgroupsVersion : uint8_t +{ + V1, + V2 +}; + +std::pair getCgroupsPath() { auto v2_path = getCgroupsV2Path(); if (v2_path.has_value()) - return {*v2_path, CgroupsMemoryUsageObserver::CgroupsVersion::V2}; + return {*v2_path, CgroupsVersion::V2}; auto v1_path = getCgroupsV1Path(); if (v1_path.has_value()) - return {*v1_path, CgroupsMemoryUsageObserver::CgroupsVersion::V1}; + return {*v1_path, CgroupsVersion::V1}; throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "Cannot find cgroups v1 or v2 current memory file"); } @@ -173,22 +172,29 @@ std::pair getCgroupsPat namespace DB { -CgroupsMemoryUsageObserver::CgroupsMemoryUsageObserver(std::chrono::seconds wait_time_) - : log(getLogger("CgroupsMemoryUsageObserver")), wait_time(wait_time_) +std::shared_ptr createCgroupsReader() { const auto [cgroup_path, version] = getCgroupsPath(); + LOG_INFO( + getLogger("CgroupsReader"), + "Will create cgroup reader from '{}' (cgroups version: {})", + cgroup_path, + (version == CgroupsVersion::V1) ? "v1" : "v2"); if (version == CgroupsVersion::V2) - cgroup_reader = std::make_unique(cgroup_path); + return std::make_shared(cgroup_path); else - cgroup_reader = std::make_unique(cgroup_path); + { + chassert(version == CgroupsVersion::V1); + return std::make_shared(cgroup_path); + } - LOG_INFO( - log, - "Will read the current memory usage from '{}' (cgroups version: {}), wait time is {} sec", - cgroup_path, - (version == CgroupsVersion::V1) ? "v1" : "v2", - wait_time.count()); +} + +CgroupsMemoryUsageObserver::CgroupsMemoryUsageObserver(std::chrono::seconds wait_time_, std::shared_ptr cgroups_reader_) + : log(getLogger("CgroupsMemoryUsageObserver")), wait_time(wait_time_), cgroups_reader(std::move(cgroups_reader_)) +{ + cgroups_reader = createCgroupsReader(); } CgroupsMemoryUsageObserver::~CgroupsMemoryUsageObserver() @@ -196,58 +202,6 @@ CgroupsMemoryUsageObserver::~CgroupsMemoryUsageObserver() stopThread(); } -void CgroupsMemoryUsageObserver::setMemoryUsageLimits(uint64_t hard_limit_, uint64_t soft_limit_) -{ - std::lock_guard limit_lock(limit_mutex); - - if (hard_limit_ == hard_limit && soft_limit_ == soft_limit) - return; - - hard_limit = hard_limit_; - soft_limit = soft_limit_; - - on_hard_limit = [this, hard_limit_](bool up) - { - if (up) - { - LOG_WARNING(log, "Exceeded hard memory limit ({})", ReadableSize(hard_limit_)); - - /// Update current usage in memory tracker. Also reset free_memory_in_allocator_arenas to zero though we don't know if they are - /// really zero. Trying to avoid OOM ... - MemoryTracker::setRSS(hard_limit_, 0); - } - else - { - LOG_INFO(log, "Dropped below hard memory limit ({})", ReadableSize(hard_limit_)); - } - }; - - on_soft_limit = [this, soft_limit_](bool up) - { - if (up) - { - LOG_WARNING(log, "Exceeded soft memory limit ({})", ReadableSize(soft_limit_)); - -# if USE_JEMALLOC - LOG_INFO(log, "Purging jemalloc arenas"); - mallctl("arena." STRINGIFY(MALLCTL_ARENAS_ALL) ".purge", nullptr, nullptr, nullptr, 0); -# endif - /// Reset current usage in memory tracker. Expect zero for free_memory_in_allocator_arenas as we just purged them. - uint64_t memory_usage = cgroup_reader->readMemoryUsage(); - LOG_TRACE(log, "Read current memory usage {} bytes ({}) from cgroups", memory_usage, ReadableSize(memory_usage)); - MemoryTracker::setRSS(memory_usage, 0); - - LOG_INFO(log, "Purged jemalloc arenas. Current memory usage is {}", ReadableSize(memory_usage)); - } - else - { - LOG_INFO(log, "Dropped below soft memory limit ({})", ReadableSize(soft_limit_)); - } - }; - - LOG_INFO(log, "Set new limits, soft limit: {}, hard limit: {}", ReadableSize(soft_limit_), ReadableSize(hard_limit_)); -} - void CgroupsMemoryUsageObserver::setOnMemoryAmountAvailableChangedFn(OnMemoryAmountAvailableChangedFn on_memory_amount_available_changed_) { std::lock_guard memory_amount_available_changed_lock(memory_amount_available_changed_mutex); @@ -301,35 +255,6 @@ void CgroupsMemoryUsageObserver::runThread() std::lock_guard memory_amount_available_changed_lock(memory_amount_available_changed_mutex); on_memory_amount_available_changed(); } - - std::lock_guard limit_lock(limit_mutex); - if (soft_limit > 0 && hard_limit > 0) - { - uint64_t memory_usage = cgroup_reader->readMemoryUsage(); - LOG_TRACE(log, "Read current memory usage {} bytes ({}) from cgroups", memory_usage, ReadableSize(memory_usage)); - if (memory_usage > hard_limit) - { - if (last_memory_usage <= hard_limit) - on_hard_limit(true); - } - else - { - if (last_memory_usage > hard_limit) - on_hard_limit(false); - } - - if (memory_usage > soft_limit) - { - if (last_memory_usage <= soft_limit) - on_soft_limit(true); - } - else - { - if (last_memory_usage > soft_limit) - on_soft_limit(false); - } - last_memory_usage = memory_usage; - } } catch (...) { diff --git a/src/Common/CgroupsMemoryUsageObserver.h b/src/Common/CgroupsMemoryUsageObserver.h index b848a2bff3c..078307a6fa0 100644 --- a/src/Common/CgroupsMemoryUsageObserver.h +++ b/src/Common/CgroupsMemoryUsageObserver.h @@ -16,6 +16,8 @@ struct ICgroupsReader virtual uint64_t readMemoryUsage() = 0; }; +std::shared_ptr createCgroupsReader(); + /// Does two things: /// 1. Periodically reads the memory usage of the process from Linux cgroups. /// You can specify soft or hard memory limits: @@ -35,19 +37,11 @@ struct ICgroupsReader class CgroupsMemoryUsageObserver { public: - using OnMemoryLimitFn = std::function; using OnMemoryAmountAvailableChangedFn = std::function; - enum class CgroupsVersion : uint8_t - { - V1, - V2 - }; - - explicit CgroupsMemoryUsageObserver(std::chrono::seconds wait_time_); + explicit CgroupsMemoryUsageObserver(std::chrono::seconds wait_time_, std::shared_ptr cgroups_reader_); ~CgroupsMemoryUsageObserver(); - void setMemoryUsageLimits(uint64_t hard_limit_, uint64_t soft_limit_); void setOnMemoryAmountAvailableChangedFn(OnMemoryAmountAvailableChangedFn on_memory_amount_available_changed_); void startThread(); @@ -58,22 +52,17 @@ private: const std::chrono::seconds wait_time; std::mutex limit_mutex; - size_t hard_limit TSA_GUARDED_BY(limit_mutex) = 0; - size_t soft_limit TSA_GUARDED_BY(limit_mutex) = 0; - OnMemoryLimitFn on_hard_limit TSA_GUARDED_BY(limit_mutex); - OnMemoryLimitFn on_soft_limit TSA_GUARDED_BY(limit_mutex); std::mutex memory_amount_available_changed_mutex; OnMemoryAmountAvailableChangedFn on_memory_amount_available_changed TSA_GUARDED_BY(memory_amount_available_changed_mutex); - uint64_t last_memory_usage = 0; /// how much memory does the process use uint64_t last_available_memory_amount; /// how much memory can the process use void stopThread(); void runThread(); - std::unique_ptr cgroup_reader; + std::shared_ptr cgroups_reader; std::mutex thread_mutex; std::condition_variable cond; diff --git a/src/Common/MemoryTracker.cpp b/src/Common/MemoryTracker.cpp index 28cfa98666a..eaf34d87ec5 100644 --- a/src/Common/MemoryTracker.cpp +++ b/src/Common/MemoryTracker.cpp @@ -26,7 +26,6 @@ #endif #include -#include #include #include #include @@ -123,7 +122,7 @@ static constexpr size_t log_peak_memory_usage_every = 1ULL << 30; MemoryTracker total_memory_tracker(nullptr, VariableContext::Global); MemoryTracker background_memory_tracker(&total_memory_tracker, VariableContext::User, false); -std::atomic MemoryTracker::free_memory_in_allocator_arenas; +std::atomic MemoryTracker::has_free_memory_in_allocator_arenas; MemoryTracker::MemoryTracker(VariableContext level_) : parent(&total_memory_tracker), level(level_) {} MemoryTracker::MemoryTracker(MemoryTracker * parent_, VariableContext level_) : parent(parent_), level(level_) {} @@ -204,7 +203,7 @@ void MemoryTracker::debugLogBigAllocationWithoutCheck(Int64 size [[maybe_unused] LOG_TEST(getLogger("MemoryTracker"), "Too big allocation ({} bytes) without checking memory limits, " "it may lead to OOM. Stack trace: {}", size, StackTrace().toString()); #else - return; /// Avoid trash logging in release builds + /// Avoid trash logging in release builds #endif } @@ -294,33 +293,18 @@ AllocationTrace MemoryTracker::allocImpl(Int64 size, bool throw_if_memory_exceed } } - Int64 limit_to_check = current_hard_limit; - #if USE_JEMALLOC - if (level == VariableContext::Global && allow_use_jemalloc_memory.load(std::memory_order_relaxed)) + if (level == VariableContext::Global && will_be > soft_limit.load(std::memory_order_relaxed) + && has_free_memory_in_allocator_arenas.exchange(false)) { - /// Jemalloc arenas may keep some extra memory. - /// This memory was substucted from RSS to decrease memory drift. - /// In case memory is close to limit, try to pugre the arenas. - /// This is needed to avoid OOM, because some allocations are directly done with mmap. - Int64 current_free_memory_in_allocator_arenas = free_memory_in_allocator_arenas.load(std::memory_order_relaxed); - - if (current_free_memory_in_allocator_arenas > 0 && current_hard_limit && current_free_memory_in_allocator_arenas + will_be > current_hard_limit) - { - if (free_memory_in_allocator_arenas.exchange(-current_free_memory_in_allocator_arenas) > 0) - { - Stopwatch watch; - mallctl("arena." STRINGIFY(MALLCTL_ARENAS_ALL) ".purge", nullptr, nullptr, nullptr, 0); - ProfileEvents::increment(ProfileEvents::MemoryAllocatorPurge); - ProfileEvents::increment(ProfileEvents::MemoryAllocatorPurgeTimeMicroseconds, watch.elapsedMicroseconds()); - } - } - - limit_to_check += abs(current_free_memory_in_allocator_arenas); + Stopwatch watch; + mallctl("arena." STRINGIFY(MALLCTL_ARENAS_ALL) ".purge", nullptr, nullptr, nullptr, 0); + ProfileEvents::increment(ProfileEvents::MemoryAllocatorPurge); + ProfileEvents::increment(ProfileEvents::MemoryAllocatorPurgeTimeMicroseconds, watch.elapsedMicroseconds()); } #endif - if (unlikely(current_hard_limit && will_be > limit_to_check)) + if (unlikely(current_hard_limit && will_be > current_hard_limit)) { if (memoryTrackerCanThrow(level, false) && throw_if_memory_exceeded) { @@ -526,11 +510,11 @@ void MemoryTracker::reset() } -void MemoryTracker::setRSS(Int64 rss_, Int64 free_memory_in_allocator_arenas_) +void MemoryTracker::setRSS(Int64 rss_, bool has_free_memory_in_allocator_arenas_) { Int64 new_amount = rss_; total_memory_tracker.amount.store(new_amount, std::memory_order_relaxed); - free_memory_in_allocator_arenas.store(free_memory_in_allocator_arenas_, std::memory_order_relaxed); + has_free_memory_in_allocator_arenas.store(has_free_memory_in_allocator_arenas_, std::memory_order_relaxed); auto metric_loaded = total_memory_tracker.metric.load(std::memory_order_relaxed); if (metric_loaded != CurrentMetrics::end()) diff --git a/src/Common/MemoryTracker.h b/src/Common/MemoryTracker.h index fd32b631774..48d02fd1fc6 100644 --- a/src/Common/MemoryTracker.h +++ b/src/Common/MemoryTracker.h @@ -59,7 +59,7 @@ private: std::atomic profiler_limit {0}; std::atomic_bool allow_use_jemalloc_memory {true}; - static std::atomic free_memory_in_allocator_arenas; + static std::atomic has_free_memory_in_allocator_arenas; Int64 profiler_step = 0; @@ -252,7 +252,7 @@ public: /// Reset current counter to an RSS value. /// Jemalloc may have pre-allocated arenas, they are accounted in RSS. /// We can free this arenas in case of exception to avoid OOM. - static void setRSS(Int64 rss_, Int64 free_memory_in_allocator_arenas_); + static void setRSS(Int64 rss_, bool has_free_memory_in_allocator_arenas_); /// Prints info about peak memory consumption into log. void logPeakMemoryUsage(); diff --git a/src/Coordination/KeeperAsynchronousMetrics.cpp b/src/Coordination/KeeperAsynchronousMetrics.cpp index 86166ffe31b..3e404b7152b 100644 --- a/src/Coordination/KeeperAsynchronousMetrics.cpp +++ b/src/Coordination/KeeperAsynchronousMetrics.cpp @@ -115,7 +115,7 @@ void updateKeeperInformation(KeeperDispatcher & keeper_dispatcher, AsynchronousM KeeperAsynchronousMetrics::KeeperAsynchronousMetrics( ContextPtr context_, unsigned update_period_seconds, const ProtocolServerMetricsFunc & protocol_server_metrics_func_) - : AsynchronousMetrics(update_period_seconds, protocol_server_metrics_func_), context(std::move(context_)) + : AsynchronousMetrics(update_period_seconds, protocol_server_metrics_func_, context_->getCgroupsReader()), context(std::move(context_)) { } diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index 2602afd8b78..771b8e9e558 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include @@ -405,6 +406,8 @@ struct ContextSharedPart : boost::noncopyable std::unique_ptr cluster_discovery TSA_GUARDED_BY(clusters_mutex); size_t clusters_version TSA_GUARDED_BY(clusters_mutex) = 0; + std::shared_ptr cgroups_reader; + /// No lock required for async_insert_queue modified only during initialization std::shared_ptr async_insert_queue; @@ -5635,6 +5638,16 @@ const ServerSettings & Context::getServerSettings() const return shared->server_settings; } +void Context::setCgroupsReader(std::shared_ptr cgroups_reader_) +{ + shared->cgroups_reader = std::move(cgroups_reader_); +} + +std::shared_ptr Context::getCgroupsReader() const +{ + return shared->cgroups_reader; +} + uint64_t HTTPContext::getMaxHstsAge() const { return context->getSettingsRef().hsts_max_age; diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h index 284cac50769..f183a72e8e2 100644 --- a/src/Interpreters/Context.h +++ b/src/Interpreters/Context.h @@ -150,6 +150,7 @@ class ServerType; template class MergeTreeBackgroundExecutor; class AsyncLoader; +struct ICgroupsReader; struct TemporaryTableHolder; using TemporaryTablesMapping = std::map>; @@ -1344,6 +1345,9 @@ public: const ServerSettings & getServerSettings() const; + void setCgroupsReader(std::shared_ptr cgroups_reader_); + std::shared_ptr getCgroupsReader() const; + private: std::shared_ptr getSettingsConstraintsAndCurrentProfilesWithLock() const; diff --git a/src/Interpreters/ServerAsynchronousMetrics.cpp b/src/Interpreters/ServerAsynchronousMetrics.cpp index 872a9f864df..6ee0168bede 100644 --- a/src/Interpreters/ServerAsynchronousMetrics.cpp +++ b/src/Interpreters/ServerAsynchronousMetrics.cpp @@ -57,7 +57,7 @@ ServerAsynchronousMetrics::ServerAsynchronousMetrics( unsigned heavy_metrics_update_period_seconds, const ProtocolServerMetricsFunc & protocol_server_metrics_func_) : WithContext(global_context_) - , AsynchronousMetrics(update_period_seconds, protocol_server_metrics_func_) + , AsynchronousMetrics(update_period_seconds, protocol_server_metrics_func_, getContext()->getCgroupsReader()) , heavy_metric_update_period(heavy_metrics_update_period_seconds) { /// sanity check From 21009577d867d493a6ceda48987c060498774f94 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Thu, 18 Jul 2024 09:01:08 +0200 Subject: [PATCH 199/844] Dedicated memory background thread --- programs/server/Server.cpp | 39 ++++++++++++++++++++++++--- src/CMakeLists.txt | 2 +- src/Common/AsynchronousMetrics.cpp | 43 ------------------------------ src/Common/Jemalloc.cpp | 16 ----------- src/Common/Jemalloc.h | 29 ++++++++++++++++++++ src/Common/MemoryTracker.cpp | 33 +++++++++-------------- src/Common/MemoryTracker.h | 1 - 7 files changed, 78 insertions(+), 85 deletions(-) diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index c52b1e037ec..ca46338d1c1 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -447,9 +447,12 @@ void checkForUsersNotInMainConfig( } } +namespace +{ + /// Unused in other builds #if defined(OS_LINUX) -static String readLine(const String & path) +String readLine(const String & path) { ReadBufferFromFile in(path); String contents; @@ -457,7 +460,7 @@ static String readLine(const String & path) return contents; } -static int readNumber(const String & path) +int readNumber(const String & path) { ReadBufferFromFile in(path); int result; @@ -467,7 +470,7 @@ static int readNumber(const String & path) #endif -static void sanityChecks(Server & server) +void sanityChecks(Server & server) { std::string data_path = getCanonicalPath(server.config().getString("path", DBMS_DEFAULT_PATH)); std::string logs_path = server.config().getString("logger.log", ""); @@ -588,6 +591,31 @@ static void sanityChecks(Server & server) } } +[[noreturn]] void backgroundMemoryThread() +{ + std::mutex mutex; + std::condition_variable cv; + + std::unique_lock lock(mutex); + while (true) + { + cv.wait_for(lock, std::chrono::microseconds(200)); + uint64_t epoch = 0; + mallctl("epoch", nullptr, nullptr, &epoch, sizeof(epoch)); + auto maybe_resident = getJemallocValue("stats.resident"); + if (!maybe_resident.has_value()) + continue; + + Int64 resident = *maybe_resident; + //LOG_INFO(getLogger("JEmalloc"), "Resident {}", ReadableSize(resident)); + MemoryTracker::setRSS(resident, false); + if (resident > total_memory_tracker.getHardLimit()) + purgeJemallocArenas(); + } +} + +} + void loadStartupScripts(const Poco::Util::AbstractConfiguration & config, ContextMutablePtr context, Poco::Logger * log) { try @@ -877,6 +905,11 @@ try total_memory_tracker.setSampleMaxAllocationSize(server_settings.total_memory_profiler_sample_max_allocation_size); } + ThreadFromGlobalPool background_memory_thread([] + { + backgroundMemoryThread(); + }); + Poco::ThreadPool server_pool( /* minCapacity */3, /* maxCapacity */server_settings.max_connections, diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index d985595154c..bfa41eacea1 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -174,7 +174,7 @@ add_library (clickhouse_new_delete STATIC Common/new_delete.cpp) target_link_libraries (clickhouse_new_delete PRIVATE clickhouse_common_io) if (TARGET ch_contrib::jemalloc) target_link_libraries (clickhouse_new_delete PRIVATE ch_contrib::jemalloc) - target_link_libraries (clickhouse_common_io PRIVATE ch_contrib::jemalloc) + target_link_libraries (clickhouse_common_io PUBLIC ch_contrib::jemalloc) target_link_libraries (clickhouse_storages_system PRIVATE ch_contrib::jemalloc) endif() diff --git a/src/Common/AsynchronousMetrics.cpp b/src/Common/AsynchronousMetrics.cpp index 0953ad88697..b3e53c29d4a 100644 --- a/src/Common/AsynchronousMetrics.cpp +++ b/src/Common/AsynchronousMetrics.cpp @@ -649,49 +649,6 @@ void AsynchronousMetrics::update(TimePoint update_time, bool force_update) "The amount of virtual memory mapped for the use of stack and for the allocated memory, in bytes." " It is unspecified whether it includes the per-thread stacks and most of the allocated memory, that is allocated with the 'mmap' system call." " This metric exists only for completeness reasons. I recommend to use the `MemoryResident` metric for monitoring."}; - - /// We must update the value of total_memory_tracker periodically. - /// Otherwise it might be calculated incorrectly - it can include a "drift" of memory amount. - /// See https://github.com/ClickHouse/ClickHouse/issues/10293 - { - Int64 amount = total_memory_tracker.get(); - Int64 peak = total_memory_tracker.getPeak(); - Int64 rss = data.resident; - Int64 free_memory_in_allocator_arenas = 0; - -#if USE_JEMALLOC - /// According to jemalloc man, pdirty is: - /// - /// Number of pages within unused extents that are potentially - /// dirty, and for which madvise() or similar has not been called. - /// - /// So they will be subtracted from RSS to make accounting more - /// accurate, since those pages are not really RSS but a memory - /// that can be used at anytime via jemalloc. - free_memory_in_allocator_arenas = je_malloc_pdirty * getPageSize(); -#endif - - if (cgroups_reader != nullptr) - { - rss = cgroups_reader->readMemoryUsage(); - new_values["CgroupsMemoryUsage"] = { rss, - "The amount of physical memory used by the server process, reported by cgroups." }; - } - - Int64 difference = rss - amount; - - /// Log only if difference is high. This is for convenience. The threshold is arbitrary. - if (difference >= 1048576 || difference <= -1048576) - LOG_TRACE(log, - "MemoryTracking: was {}, peak {}, free memory in arenas {}, will set to {} (RSS), difference: {}", - ReadableSize(amount), - ReadableSize(peak), - ReadableSize(free_memory_in_allocator_arenas), - ReadableSize(rss), - ReadableSize(difference)); - - MemoryTracker::setRSS(rss, /*has_free_memory_in_allocator_arenas_=*/free_memory_in_allocator_arenas > 0); - } } { diff --git a/src/Common/Jemalloc.cpp b/src/Common/Jemalloc.cpp index d7cc246db6a..d8ff9268cca 100644 --- a/src/Common/Jemalloc.cpp +++ b/src/Common/Jemalloc.cpp @@ -5,7 +5,6 @@ #include #include #include -#include #define STRINGIFY_HELPER(x) #x #define STRINGIFY(x) STRINGIFY_HELPER(x) @@ -26,7 +25,6 @@ namespace ErrorCodes void purgeJemallocArenas() { - LOG_TRACE(getLogger("SystemJemalloc"), "Purging unused memory"); Stopwatch watch; mallctl("arena." STRINGIFY(MALLCTL_ARENAS_ALL) ".purge", nullptr, nullptr, nullptr, 0); ProfileEvents::increment(ProfileEvents::MemoryAllocatorPurge); @@ -46,20 +44,6 @@ void checkJemallocProfilingEnabled() "set: MALLOC_CONF=background_thread:true,prof:true"); } -template -void setJemallocValue(const char * name, T value) -{ - T old_value; - size_t old_value_size = sizeof(T); - if (mallctl(name, &old_value, &old_value_size, reinterpret_cast(&value), sizeof(T))) - { - LOG_WARNING(getLogger("Jemalloc"), "mallctl for {} failed", name); - return; - } - - LOG_INFO(getLogger("Jemalloc"), "Value for {} set to {} (from {})", name, value, old_value); -} - void setJemallocProfileActive(bool value) { checkJemallocProfilingEnabled(); diff --git a/src/Common/Jemalloc.h b/src/Common/Jemalloc.h index 499a906fd3d..0c533711f78 100644 --- a/src/Common/Jemalloc.h +++ b/src/Common/Jemalloc.h @@ -5,6 +5,8 @@ #if USE_JEMALLOC #include +#include +#include namespace DB { @@ -21,6 +23,33 @@ void setJemallocBackgroundThreads(bool enabled); void setJemallocMaxBackgroundThreads(size_t max_threads); +template +void setJemallocValue(const char * name, T value) +{ + T old_value; + size_t old_value_size = sizeof(T); + if (mallctl(name, &old_value, &old_value_size, reinterpret_cast(&value), sizeof(T))) + { + LOG_WARNING(getLogger("Jemalloc"), "mallctl for {} failed", name); + return; + } + + LOG_INFO(getLogger("Jemalloc"), "Value for {} set to {} (from {})", name, value, old_value); +} + +template +std::optional getJemallocValue(const char * name) +{ + T value; + size_t value_size = sizeof(T); + if (mallctl(name, &value, &value_size, nullptr, 0)) + { + LOG_WARNING(getLogger("Jemalloc"), "mallctl for {} failed", name); + return std::nullopt; + } + return value; +} + } #endif diff --git a/src/Common/MemoryTracker.cpp b/src/Common/MemoryTracker.cpp index eaf34d87ec5..a541eeeb25f 100644 --- a/src/Common/MemoryTracker.cpp +++ b/src/Common/MemoryTracker.cpp @@ -20,9 +20,6 @@ #if USE_JEMALLOC # include -#define STRINGIFY_HELPER(x) #x -#define STRINGIFY(x) STRINGIFY_HELPER(x) - #endif #include @@ -126,11 +123,11 @@ std::atomic MemoryTracker::has_free_memory_in_allocator_arenas; MemoryTracker::MemoryTracker(VariableContext level_) : parent(&total_memory_tracker), level(level_) {} MemoryTracker::MemoryTracker(MemoryTracker * parent_, VariableContext level_) : parent(parent_), level(level_) {} + MemoryTracker::MemoryTracker(MemoryTracker * parent_, VariableContext level_, bool log_peak_memory_usage_in_destructor_) - : parent(parent_) - , log_peak_memory_usage_in_destructor(log_peak_memory_usage_in_destructor_) - , level(level_) -{} + : parent(parent_), log_peak_memory_usage_in_destructor(log_peak_memory_usage_in_destructor_), level(level_) +{ +} MemoryTracker::~MemoryTracker() { @@ -200,8 +197,12 @@ void MemoryTracker::debugLogBigAllocationWithoutCheck(Int64 size [[maybe_unused] return; MemoryTrackerBlockerInThread blocker(VariableContext::Global); - LOG_TEST(getLogger("MemoryTracker"), "Too big allocation ({} bytes) without checking memory limits, " - "it may lead to OOM. Stack trace: {}", size, StackTrace().toString()); + LOG_TEST( + getLogger("MemoryTracker"), + "Too big allocation ({} bytes) without checking memory limits, " + "it may lead to OOM. Stack trace: {}", + size, + StackTrace().toString()); #else /// Avoid trash logging in release builds #endif @@ -293,17 +294,6 @@ AllocationTrace MemoryTracker::allocImpl(Int64 size, bool throw_if_memory_exceed } } -#if USE_JEMALLOC - if (level == VariableContext::Global && will_be > soft_limit.load(std::memory_order_relaxed) - && has_free_memory_in_allocator_arenas.exchange(false)) - { - Stopwatch watch; - mallctl("arena." STRINGIFY(MALLCTL_ARENAS_ALL) ".purge", nullptr, nullptr, nullptr, 0); - ProfileEvents::increment(ProfileEvents::MemoryAllocatorPurge); - ProfileEvents::increment(ProfileEvents::MemoryAllocatorPurgeTimeMicroseconds, watch.elapsedMicroseconds()); - } -#endif - if (unlikely(current_hard_limit && will_be > current_hard_limit)) { if (memoryTrackerCanThrow(level, false) && throw_if_memory_exceeded) @@ -513,7 +503,8 @@ void MemoryTracker::reset() void MemoryTracker::setRSS(Int64 rss_, bool has_free_memory_in_allocator_arenas_) { Int64 new_amount = rss_; - total_memory_tracker.amount.store(new_amount, std::memory_order_relaxed); + if (rss_) + total_memory_tracker.amount.store(new_amount, std::memory_order_relaxed); has_free_memory_in_allocator_arenas.store(has_free_memory_in_allocator_arenas_, std::memory_order_relaxed); auto metric_loaded = total_memory_tracker.metric.load(std::memory_order_relaxed); diff --git a/src/Common/MemoryTracker.h b/src/Common/MemoryTracker.h index 48d02fd1fc6..257ed7d0629 100644 --- a/src/Common/MemoryTracker.h +++ b/src/Common/MemoryTracker.h @@ -2,7 +2,6 @@ #include #include -#include #include #include #include From 9a43183eb39dc056186432e6478e050db5045ecf Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Thu, 18 Jul 2024 10:31:24 +0200 Subject: [PATCH 200/844] Finish background memory thread --- programs/keeper/Keeper.cpp | 30 ++++------ programs/server/Server.cpp | 59 ++++--------------- src/Common/AsynchronousMetrics.cpp | 19 ++---- src/Common/AsynchronousMetrics.h | 3 +- src/Common/CgroupsMemoryUsageObserver.cpp | 8 +-- src/Common/CgroupsMemoryUsageObserver.h | 24 +++----- src/Common/Jemalloc.h | 42 +++++++++---- src/Common/MemoryTracker.cpp | 10 +--- src/Common/MemoryTracker.h | 13 +--- src/Common/MemoryWorker.cpp | 49 +++++++++++++++ src/Common/MemoryWorker.h | 34 +++++++++++ .../KeeperAsynchronousMetrics.cpp | 2 +- src/Core/ServerSettings.h | 1 + src/Interpreters/Context.cpp | 12 ---- src/Interpreters/Context.h | 3 - .../ServerAsynchronousMetrics.cpp | 2 +- .../System/StorageSystemServerSettings.cpp | 1 - tests/integration/test_memory_limit/test.py | 1 - 18 files changed, 158 insertions(+), 155 deletions(-) create mode 100644 src/Common/MemoryWorker.cpp create mode 100644 src/Common/MemoryWorker.h diff --git a/programs/keeper/Keeper.cpp b/programs/keeper/Keeper.cpp index 3f6020ad48c..87f126a0046 100644 --- a/programs/keeper/Keeper.cpp +++ b/programs/keeper/Keeper.cpp @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -371,6 +372,8 @@ try LOG_INFO(log, "Background threads finished in {} ms", watch.elapsedMilliseconds()); }); + MemoryWorker memory_worker(config().getUInt64("memory_worker_period_ms", 100)); + static ServerErrorHandler error_handler; Poco::ErrorHandler::set(&error_handler); @@ -399,18 +402,6 @@ try registerDisks(/*global_skip_access_check=*/false); - auto cgroups_memory_observer_wait_time = config().getUInt64("keeper_server.cgroups_memory_observer_wait_time", 15); - try - { - auto cgroups_reader = createCgroupsReader(); - global_context->setCgroupsReader(createCgroupsReader()); - } - catch (...) - { - if (cgroups_memory_observer_wait_time != 0) - tryLogCurrentException(log, "Failed to create cgroups reader"); - } - /// This object will periodically calculate some metrics. KeeperAsynchronousMetrics async_metrics( global_context, @@ -634,21 +625,22 @@ try main_config_reloader->start(); std::optional cgroups_memory_usage_observer; - if (cgroups_memory_observer_wait_time != 0) + try { - auto cgroups_reader = global_context->getCgroupsReader(); - if (cgroups_reader) + auto wait_time = config().getUInt64("keeper_server.cgroups_memory_observer_wait_time", 15); + if (wait_time != 0) { - cgroups_memory_usage_observer.emplace(std::chrono::seconds(cgroups_memory_observer_wait_time), global_context->getCgroupsReader()); + cgroups_memory_usage_observer.emplace(std::chrono::seconds(wait_time)); /// Not calling cgroups_memory_usage_observer->setLimits() here (as for the normal ClickHouse server) because Keeper controls /// its memory usage by other means (via setting 'max_memory_usage_soft_limit'). cgroups_memory_usage_observer->setOnMemoryAmountAvailableChangedFn([&]() { main_config_reloader->reload(); }); cgroups_memory_usage_observer->startThread(); } - else - LOG_ERROR(log, "Disabling cgroup memory observer because of an error during initialization of cgroups reader"); } - + catch (Exception &) + { + tryLogCurrentException(log, "Disabling cgroup memory observer because of an error during initialization"); + } LOG_INFO(log, "Ready for connections."); diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index ca46338d1c1..ae445477c3a 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -11,7 +11,6 @@ #include #include #include -#include #include #include #include @@ -24,6 +23,7 @@ #include #include #include +#include #include #include #include @@ -110,6 +110,8 @@ #include #include +#include + #include "config.h" #include @@ -591,29 +593,6 @@ void sanityChecks(Server & server) } } -[[noreturn]] void backgroundMemoryThread() -{ - std::mutex mutex; - std::condition_variable cv; - - std::unique_lock lock(mutex); - while (true) - { - cv.wait_for(lock, std::chrono::microseconds(200)); - uint64_t epoch = 0; - mallctl("epoch", nullptr, nullptr, &epoch, sizeof(epoch)); - auto maybe_resident = getJemallocValue("stats.resident"); - if (!maybe_resident.has_value()) - continue; - - Int64 resident = *maybe_resident; - //LOG_INFO(getLogger("JEmalloc"), "Resident {}", ReadableSize(resident)); - MemoryTracker::setRSS(resident, false); - if (resident > total_memory_tracker.getHardLimit()) - purgeJemallocArenas(); - } -} - } void loadStartupScripts(const Poco::Util::AbstractConfiguration & config, ContextMutablePtr context, Poco::Logger * log) @@ -905,11 +884,6 @@ try total_memory_tracker.setSampleMaxAllocationSize(server_settings.total_memory_profiler_sample_max_allocation_size); } - ThreadFromGlobalPool background_memory_thread([] - { - backgroundMemoryThread(); - }); - Poco::ThreadPool server_pool( /* minCapacity */3, /* maxCapacity */server_settings.max_connections, @@ -930,16 +904,7 @@ try LOG_INFO(log, "Background threads finished in {} ms", watch.elapsedMilliseconds()); }); - try - { - auto cgroups_reader = createCgroupsReader(); - global_context->setCgroupsReader(createCgroupsReader()); - } - catch (...) - { - if (server_settings.cgroups_memory_usage_observer_wait_time != 0) - tryLogCurrentException(log, "Failed to create cgroups reader"); - } + MemoryWorker memory_worker(global_context->getServerSettings().memory_worker_period_ms); /// This object will periodically calculate some metrics. ServerAsynchronousMetrics async_metrics( @@ -1500,13 +1465,15 @@ try } std::optional cgroups_memory_usage_observer; - if (auto wait_time = server_settings.cgroups_memory_usage_observer_wait_time; wait_time != 0) + try { - auto cgroups_reader = global_context->getCgroupsReader(); - if (cgroups_reader) - cgroups_memory_usage_observer.emplace(std::chrono::seconds(wait_time), std::move(cgroups_reader)); - else - LOG_ERROR(log, "Disabling cgroup memory observer because of an error during initialization of cgroups reader"); + auto wait_time = server_settings.cgroups_memory_usage_observer_wait_time; + if (wait_time != 0) + cgroups_memory_usage_observer.emplace(std::chrono::seconds(wait_time)); + } + catch (Exception &) + { + tryLogCurrentException(log, "Disabling cgroup memory observer because of an error during initialization"); } std::string cert_path = config().getString("openSSL.server.certificateFile", ""); @@ -1602,8 +1569,6 @@ try background_memory_tracker.setDescription("(background)"); background_memory_tracker.setMetric(CurrentMetrics::MergesMutationsMemoryTracking); - total_memory_tracker.setAllowUseJemallocMemory(new_server_settings.allow_use_jemalloc_memory); - auto * global_overcommit_tracker = global_context->getGlobalOvercommitTracker(); total_memory_tracker.setOvercommitTracker(global_overcommit_tracker); diff --git a/src/Common/AsynchronousMetrics.cpp b/src/Common/AsynchronousMetrics.cpp index b3e53c29d4a..a5c9875188b 100644 --- a/src/Common/AsynchronousMetrics.cpp +++ b/src/Common/AsynchronousMetrics.cpp @@ -4,6 +4,7 @@ #include #include #include +#include #include #include #include @@ -57,12 +58,10 @@ static std::unique_ptr openFileIfExists(const std::stri AsynchronousMetrics::AsynchronousMetrics( unsigned update_period_seconds, - const ProtocolServerMetricsFunc & protocol_server_metrics_func_, - std::shared_ptr cgroups_reader_) + const ProtocolServerMetricsFunc & protocol_server_metrics_func_) : update_period(update_period_seconds) , log(getLogger("AsynchronousMetrics")) , protocol_server_metrics_func(protocol_server_metrics_func_) - , cgroups_reader(std::move(cgroups_reader_)) { #if defined(OS_LINUX) openFileIfExists("/proc/meminfo", meminfo); @@ -378,23 +377,13 @@ void AsynchronousMetrics::run() namespace { -uint64_t updateJemallocEpoch() -{ - uint64_t value = 0; - size_t size = sizeof(value); - mallctl("epoch", &value, &size, &value, size); - return value; -} - template Value saveJemallocMetricImpl( AsynchronousMetricValues & values, const std::string & jemalloc_full_name, const std::string & clickhouse_full_name) { - Value value{}; - size_t size = sizeof(value); - mallctl(jemalloc_full_name.c_str(), &value, &size, nullptr, 0); + auto value = getJemallocValue(jemalloc_full_name.c_str()); values[clickhouse_full_name] = AsynchronousMetricValue(value, "An internal metric of the low-level memory allocator (jemalloc). See https://jemalloc.net/jemalloc.3.html"); return value; } @@ -604,7 +593,7 @@ void AsynchronousMetrics::update(TimePoint update_time, bool force_update) // 'epoch' is a special mallctl -- it updates the statistics. Without it, all // the following calls will return stale values. It increments and returns // the current epoch number, which might be useful to log as a sanity check. - auto epoch = updateJemallocEpoch(); + auto epoch = getJemallocValue("epoch"); new_values["jemalloc.epoch"] = { epoch, "An internal incremental update number of the statistics of jemalloc (Jason Evans' memory allocator), used in all other `jemalloc` metrics." }; // Collect the statistics themselves. diff --git a/src/Common/AsynchronousMetrics.h b/src/Common/AsynchronousMetrics.h index 0b110f41fc3..bc379d4e92b 100644 --- a/src/Common/AsynchronousMetrics.h +++ b/src/Common/AsynchronousMetrics.h @@ -69,8 +69,7 @@ public: AsynchronousMetrics( unsigned update_period_seconds, - const ProtocolServerMetricsFunc & protocol_server_metrics_func_, - std::shared_ptr cgroups_reader_); + const ProtocolServerMetricsFunc & protocol_server_metrics_func_); virtual ~AsynchronousMetrics(); diff --git a/src/Common/CgroupsMemoryUsageObserver.cpp b/src/Common/CgroupsMemoryUsageObserver.cpp index b12845df098..ab7ca69ca04 100644 --- a/src/Common/CgroupsMemoryUsageObserver.cpp +++ b/src/Common/CgroupsMemoryUsageObserver.cpp @@ -191,11 +191,9 @@ std::shared_ptr createCgroupsReader() } -CgroupsMemoryUsageObserver::CgroupsMemoryUsageObserver(std::chrono::seconds wait_time_, std::shared_ptr cgroups_reader_) - : log(getLogger("CgroupsMemoryUsageObserver")), wait_time(wait_time_), cgroups_reader(std::move(cgroups_reader_)) -{ - cgroups_reader = createCgroupsReader(); -} +CgroupsMemoryUsageObserver::CgroupsMemoryUsageObserver(std::chrono::seconds wait_time_) + : log(getLogger("CgroupsMemoryUsageObserver")), wait_time(wait_time_), cgroups_reader(createCgroupsReader()) +{} CgroupsMemoryUsageObserver::~CgroupsMemoryUsageObserver() { diff --git a/src/Common/CgroupsMemoryUsageObserver.h b/src/Common/CgroupsMemoryUsageObserver.h index 078307a6fa0..33e0f167a59 100644 --- a/src/Common/CgroupsMemoryUsageObserver.h +++ b/src/Common/CgroupsMemoryUsageObserver.h @@ -18,28 +18,20 @@ struct ICgroupsReader std::shared_ptr createCgroupsReader(); -/// Does two things: -/// 1. Periodically reads the memory usage of the process from Linux cgroups. -/// You can specify soft or hard memory limits: -/// - When the soft memory limit is hit, drop jemalloc cache. -/// - When the hard memory limit is hit, update MemoryTracking metric to throw memory exceptions faster. -/// The goal of this is to avoid that the process hits the maximum allowed memory limit at which there is a good -/// chance that the Limux OOM killer terminates it. All of this is done is because internal memory tracking in -/// ClickHouse can unfortunately under-estimate the actually used memory. -/// 2. Periodically reads the the maximum memory available to the process (which can change due to cgroups settings). -/// You can specify a callback to react on changes. The callback typically reloads the configuration, i.e. Server -/// or Keeper configuration file. This reloads settings 'max_server_memory_usage' (Server) and 'max_memory_usage_soft_limit' -/// (Keeper) from which various other internal limits are calculated, including the soft and hard limits for (1.). -/// The goal of this is to provide elasticity when the container is scaled-up/scaled-down. The mechanism (polling -/// cgroups) is quite implicit, unfortunately there is currently no better way to communicate memory threshold changes -/// to the database. +/// Periodically reads the the maximum memory available to the process (which can change due to cgroups settings). +/// You can specify a callback to react on changes. The callback typically reloads the configuration, i.e. Server +/// or Keeper configuration file. This reloads settings 'max_server_memory_usage' (Server) and 'max_memory_usage_soft_limit' +/// (Keeper) from which various other internal limits are calculated, including the soft and hard limits for (1.). +/// The goal of this is to provide elasticity when the container is scaled-up/scaled-down. The mechanism (polling +/// cgroups) is quite implicit, unfortunately there is currently no better way to communicate memory threshold changes +/// to the database. #if defined(OS_LINUX) class CgroupsMemoryUsageObserver { public: using OnMemoryAmountAvailableChangedFn = std::function; - explicit CgroupsMemoryUsageObserver(std::chrono::seconds wait_time_, std::shared_ptr cgroups_reader_); + explicit CgroupsMemoryUsageObserver(std::chrono::seconds wait_time_); ~CgroupsMemoryUsageObserver(); void setOnMemoryAmountAvailableChangedFn(OnMemoryAmountAvailableChangedFn on_memory_amount_available_changed_); diff --git a/src/Common/Jemalloc.h b/src/Common/Jemalloc.h index 0c533711f78..629d039b483 100644 --- a/src/Common/Jemalloc.h +++ b/src/Common/Jemalloc.h @@ -28,28 +28,46 @@ void setJemallocValue(const char * name, T value) { T old_value; size_t old_value_size = sizeof(T); - if (mallctl(name, &old_value, &old_value_size, reinterpret_cast(&value), sizeof(T))) - { - LOG_WARNING(getLogger("Jemalloc"), "mallctl for {} failed", name); - return; - } - + mallctl(name, &old_value, &old_value_size, reinterpret_cast(&value), sizeof(T)); LOG_INFO(getLogger("Jemalloc"), "Value for {} set to {} (from {})", name, value, old_value); } template -std::optional getJemallocValue(const char * name) +T getJemallocValue(const char * name) { T value; size_t value_size = sizeof(T); - if (mallctl(name, &value, &value_size, nullptr, 0)) - { - LOG_WARNING(getLogger("Jemalloc"), "mallctl for {} failed", name); - return std::nullopt; - } + mallctl(name, &value, &value_size, nullptr, 0); return value; } +template +struct JemallocMibCache +{ + explicit JemallocMibCache(const char * name) + { + mallctlnametomib(name, mib, &mib_length); + } + + void setValue(T value) + { + mallctlbymib(mib, mib_length, nullptr, nullptr, reinterpret_cast(&value), sizeof(T)); + } + + T getValue() + { + T value; + size_t value_size = sizeof(T); + mallctlbymib(mib, mib_length, &value, &value_size, nullptr, 0); + return value; + } + +private: + static constexpr size_t max_mib_length = 4; + size_t mib[max_mib_length]; + size_t mib_length = max_mib_length; +}; + } #endif diff --git a/src/Common/MemoryTracker.cpp b/src/Common/MemoryTracker.cpp index a541eeeb25f..e237c3a0d33 100644 --- a/src/Common/MemoryTracker.cpp +++ b/src/Common/MemoryTracker.cpp @@ -108,8 +108,6 @@ void AllocationTrace::onFreeImpl(void * ptr, size_t size) const namespace ProfileEvents { extern const Event QueryMemoryLimitExceeded; - extern const Event MemoryAllocatorPurge; - extern const Event MemoryAllocatorPurgeTimeMicroseconds; } using namespace std::chrono_literals; @@ -119,8 +117,6 @@ static constexpr size_t log_peak_memory_usage_every = 1ULL << 30; MemoryTracker total_memory_tracker(nullptr, VariableContext::Global); MemoryTracker background_memory_tracker(&total_memory_tracker, VariableContext::User, false); -std::atomic MemoryTracker::has_free_memory_in_allocator_arenas; - MemoryTracker::MemoryTracker(VariableContext level_) : parent(&total_memory_tracker), level(level_) {} MemoryTracker::MemoryTracker(MemoryTracker * parent_, VariableContext level_) : parent(parent_), level(level_) {} @@ -500,12 +496,10 @@ void MemoryTracker::reset() } -void MemoryTracker::setRSS(Int64 rss_, bool has_free_memory_in_allocator_arenas_) +void MemoryTracker::setRSS(Int64 rss_) { Int64 new_amount = rss_; - if (rss_) - total_memory_tracker.amount.store(new_amount, std::memory_order_relaxed); - has_free_memory_in_allocator_arenas.store(has_free_memory_in_allocator_arenas_, std::memory_order_relaxed); + total_memory_tracker.amount.store(new_amount, std::memory_order_relaxed); auto metric_loaded = total_memory_tracker.metric.load(std::memory_order_relaxed); if (metric_loaded != CurrentMetrics::end()) diff --git a/src/Common/MemoryTracker.h b/src/Common/MemoryTracker.h index 257ed7d0629..4085bb321ed 100644 --- a/src/Common/MemoryTracker.h +++ b/src/Common/MemoryTracker.h @@ -56,9 +56,6 @@ private: std::atomic soft_limit {0}; std::atomic hard_limit {0}; std::atomic profiler_limit {0}; - std::atomic_bool allow_use_jemalloc_memory {true}; - - static std::atomic has_free_memory_in_allocator_arenas; Int64 profiler_step = 0; @@ -153,14 +150,6 @@ public: { return soft_limit.load(std::memory_order_relaxed); } - void setAllowUseJemallocMemory(bool value) - { - allow_use_jemalloc_memory.store(value, std::memory_order_relaxed); - } - bool getAllowUseJemallocMmemory() const - { - return allow_use_jemalloc_memory.load(std::memory_order_relaxed); - } /** Set limit if it was not set. * Otherwise, set limit to new value, if new value is greater than previous limit. @@ -251,7 +240,7 @@ public: /// Reset current counter to an RSS value. /// Jemalloc may have pre-allocated arenas, they are accounted in RSS. /// We can free this arenas in case of exception to avoid OOM. - static void setRSS(Int64 rss_, bool has_free_memory_in_allocator_arenas_); + static void setRSS(Int64 rss_); /// Prints info about peak memory consumption into log. void logPeakMemoryUsage(); diff --git a/src/Common/MemoryWorker.cpp b/src/Common/MemoryWorker.cpp new file mode 100644 index 00000000000..dce47b83667 --- /dev/null +++ b/src/Common/MemoryWorker.cpp @@ -0,0 +1,49 @@ +#include "Common/ThreadPool.h" +#include + +#include +#include + +namespace DB +{ + +#if USE_JEMALLOC +MemoryWorker::MemoryWorker(uint64_t period_ms_) + : period_ms(period_ms_) +{ + background_thread = ThreadFromGlobalPool([this] { backgroundThread(); }); +} + +MemoryWorker::~MemoryWorker() +{ + { + std::unique_lock lock(mutex); + shutdown = true; + } + cv.notify_all(); + + if (background_thread.joinable()) + background_thread.join(); +} + +void MemoryWorker::backgroundThread() +{ + JemallocMibCache epoch_mib("epoch"); + JemallocMibCache resident_mib("stats.resident"); + std::unique_lock lock(mutex); + while (true) + { + cv.wait_for(lock, period_ms, [this] { return shutdown; }); + if (shutdown) + return; + + epoch_mib.setValue(0); + Int64 resident = resident_mib.getValue(); + MemoryTracker::setRSS(resident); + if (resident > total_memory_tracker.getHardLimit()) + purgeJemallocArenas(); + } +} +#endif + +} diff --git a/src/Common/MemoryWorker.h b/src/Common/MemoryWorker.h new file mode 100644 index 00000000000..8048194d9cd --- /dev/null +++ b/src/Common/MemoryWorker.h @@ -0,0 +1,34 @@ +#pragma once + +#include + +#include "config.h" + +namespace DB +{ + +#if USE_JEMALLOC +class MemoryWorker +{ +public: + explicit MemoryWorker(uint64_t period_ms_); + + ~MemoryWorker(); +private: + void backgroundThread(); + + ThreadFromGlobalPool background_thread; + + std::mutex mutex; + std::condition_variable cv; + bool shutdown = false; + + std::chrono::milliseconds period_ms; +}; +#else +class MemoryWorker +{ +}; +#endif + +} diff --git a/src/Coordination/KeeperAsynchronousMetrics.cpp b/src/Coordination/KeeperAsynchronousMetrics.cpp index 3e404b7152b..86166ffe31b 100644 --- a/src/Coordination/KeeperAsynchronousMetrics.cpp +++ b/src/Coordination/KeeperAsynchronousMetrics.cpp @@ -115,7 +115,7 @@ void updateKeeperInformation(KeeperDispatcher & keeper_dispatcher, AsynchronousM KeeperAsynchronousMetrics::KeeperAsynchronousMetrics( ContextPtr context_, unsigned update_period_seconds, const ProtocolServerMetricsFunc & protocol_server_metrics_func_) - : AsynchronousMetrics(update_period_seconds, protocol_server_metrics_func_, context_->getCgroupsReader()), context(std::move(context_)) + : AsynchronousMetrics(update_period_seconds, protocol_server_metrics_func_), context(std::move(context_)) { } diff --git a/src/Core/ServerSettings.h b/src/Core/ServerSettings.h index 28b32a6e6a5..aaea0388239 100644 --- a/src/Core/ServerSettings.h +++ b/src/Core/ServerSettings.h @@ -157,6 +157,7 @@ namespace DB M(Bool, prepare_system_log_tables_on_startup, false, "If true, ClickHouse creates all configured `system.*_log` tables before the startup. It can be helpful if some startup scripts depend on these tables.", 0) \ M(Double, gwp_asan_force_sample_probability, 0.0003, "Probability that an allocation from specific places will be sampled by GWP Asan (i.e. PODArray allocations)", 0) \ M(UInt64, config_reload_interval_ms, 2000, "How often clickhouse will reload config and check for new changes", 0) \ + M(UInt64, memory_worker_period_ms, 100, "Period of background memory worker which corrects memory tracker memory usages and cleans up unused pages during higher memory usage.", 0) \ /// If you add a setting which can be updated at runtime, please update 'changeable_settings' map in StorageSystemServerSettings.cpp diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index 6a24e049998..f70ccfd77be 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -406,8 +406,6 @@ struct ContextSharedPart : boost::noncopyable std::unique_ptr cluster_discovery TSA_GUARDED_BY(clusters_mutex); size_t clusters_version TSA_GUARDED_BY(clusters_mutex) = 0; - std::shared_ptr cgroups_reader; - /// No lock required for async_insert_queue modified only during initialization std::shared_ptr async_insert_queue; @@ -5631,16 +5629,6 @@ const ServerSettings & Context::getServerSettings() const return shared->server_settings; } -void Context::setCgroupsReader(std::shared_ptr cgroups_reader_) -{ - shared->cgroups_reader = std::move(cgroups_reader_); -} - -std::shared_ptr Context::getCgroupsReader() const -{ - return shared->cgroups_reader; -} - uint64_t HTTPContext::getMaxHstsAge() const { return context->getSettingsRef().hsts_max_age; diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h index 33b742d20ad..0e3c0591c12 100644 --- a/src/Interpreters/Context.h +++ b/src/Interpreters/Context.h @@ -1344,9 +1344,6 @@ public: const ServerSettings & getServerSettings() const; - void setCgroupsReader(std::shared_ptr cgroups_reader_); - std::shared_ptr getCgroupsReader() const; - private: std::shared_ptr getSettingsConstraintsAndCurrentProfilesWithLock() const; diff --git a/src/Interpreters/ServerAsynchronousMetrics.cpp b/src/Interpreters/ServerAsynchronousMetrics.cpp index 6ee0168bede..872a9f864df 100644 --- a/src/Interpreters/ServerAsynchronousMetrics.cpp +++ b/src/Interpreters/ServerAsynchronousMetrics.cpp @@ -57,7 +57,7 @@ ServerAsynchronousMetrics::ServerAsynchronousMetrics( unsigned heavy_metrics_update_period_seconds, const ProtocolServerMetricsFunc & protocol_server_metrics_func_) : WithContext(global_context_) - , AsynchronousMetrics(update_period_seconds, protocol_server_metrics_func_, getContext()->getCgroupsReader()) + , AsynchronousMetrics(update_period_seconds, protocol_server_metrics_func_) , heavy_metric_update_period(heavy_metrics_update_period_seconds) { /// sanity check diff --git a/src/Storages/System/StorageSystemServerSettings.cpp b/src/Storages/System/StorageSystemServerSettings.cpp index d242b6de4ec..ee99c472620 100644 --- a/src/Storages/System/StorageSystemServerSettings.cpp +++ b/src/Storages/System/StorageSystemServerSettings.cpp @@ -63,7 +63,6 @@ void StorageSystemServerSettings::fillData(MutableColumns & res_columns, Context /// current setting values, one needs to ask the components directly. std::unordered_map> changeable_settings = { {"max_server_memory_usage", {std::to_string(total_memory_tracker.getHardLimit()), ChangeableWithoutRestart::Yes}}, - {"allow_use_jemalloc_memory", {std::to_string(total_memory_tracker.getAllowUseJemallocMmemory()), ChangeableWithoutRestart::Yes}}, {"max_table_size_to_drop", {std::to_string(context->getMaxTableSizeToDrop()), ChangeableWithoutRestart::Yes}}, {"max_partition_size_to_drop", {std::to_string(context->getMaxPartitionSizeToDrop()), ChangeableWithoutRestart::Yes}}, diff --git a/tests/integration/test_memory_limit/test.py b/tests/integration/test_memory_limit/test.py index 6d6745711da..db68a38c1b1 100644 --- a/tests/integration/test_memory_limit/test.py +++ b/tests/integration/test_memory_limit/test.py @@ -13,7 +13,6 @@ node = cluster.add_instance( "configs/async_metrics_no.xml", ], mem_limit="4g", - env_variables={"MALLOC_CONF": "dirty_decay_ms:0"}, ) From 9ec1fd1ab769b2f6c6ad713b4e747a89bde48b78 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Thu, 18 Jul 2024 11:29:03 +0200 Subject: [PATCH 201/844] Fix non-jemalloc builds --- src/Common/MemoryWorker.cpp | 2 +- src/Common/MemoryWorker.h | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/src/Common/MemoryWorker.cpp b/src/Common/MemoryWorker.cpp index dce47b83667..e5ebbe3b979 100644 --- a/src/Common/MemoryWorker.cpp +++ b/src/Common/MemoryWorker.cpp @@ -1,8 +1,8 @@ -#include "Common/ThreadPool.h" #include #include #include +#include namespace DB { diff --git a/src/Common/MemoryWorker.h b/src/Common/MemoryWorker.h index 8048194d9cd..5f02fd0b1d0 100644 --- a/src/Common/MemoryWorker.h +++ b/src/Common/MemoryWorker.h @@ -28,6 +28,8 @@ private: #else class MemoryWorker { +public: + explicit MemoryWorker(uint64_t /*period_ms_*/) {} }; #endif From 05c7dc582a48d738bed82bcb24d3a7619fec8bc9 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Thu, 18 Jul 2024 13:40:03 +0200 Subject: [PATCH 202/844] Add some comments --- src/Common/Jemalloc.h | 3 +++ src/Common/MemoryWorker.h | 6 ++++++ 2 files changed, 9 insertions(+) diff --git a/src/Common/Jemalloc.h b/src/Common/Jemalloc.h index 629d039b483..dfa265c5e59 100644 --- a/src/Common/Jemalloc.h +++ b/src/Common/Jemalloc.h @@ -41,6 +41,9 @@ T getJemallocValue(const char * name) return value; } +/// Each mallctl call consists of string name lookup which can be expensive. +/// This can be avoided by translating name to "Management Information Base" (MIB) +/// and using it in mallctlbymib calls template struct JemallocMibCache { diff --git a/src/Common/MemoryWorker.h b/src/Common/MemoryWorker.h index 5f02fd0b1d0..6c0a578aa61 100644 --- a/src/Common/MemoryWorker.h +++ b/src/Common/MemoryWorker.h @@ -8,6 +8,12 @@ namespace DB { #if USE_JEMALLOC +/// Correct MemoryTracker based on stats.resident read from jemalloc. +/// This requires jemalloc built with --enable-stats which we use. +/// The worker spawns a background thread which moves the jemalloc epoch (updates internal stats), +/// and fetches the current stats.resident whose value is sent to global MemoryTracker. +/// Additionally, if the current memory usage is higher than global hard limit, +/// jemalloc's dirty pages are forcefully purged. class MemoryWorker { public: From e47fe15968b84a3f9ea5a911bbad2a942fd27188 Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Thu, 18 Jul 2024 12:22:03 +0000 Subject: [PATCH 203/844] Simplify condition --- src/Processors/QueryPlan/ReadFromMergeTree.cpp | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp index 06455526f1b..e967b575acb 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp +++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp @@ -591,10 +591,9 @@ Pipe ReadFromMergeTree::readInOrder( /// If parallel replicas enabled, set total rows in progress here only on initiator with local plan /// Otherwise rows will counted multiple times const UInt64 in_order_limit = query_info.input_order_info ? query_info.input_order_info->limit : 0; - const bool parallel_replicas_remote_plan_for_initiator = is_parallel_reading_from_replicas - && !context->getSettingsRef().parallel_replicas_local_plan && context->canUseParallelReplicasOnInitiator(); - const bool parallel_replicas_follower = is_parallel_reading_from_replicas && context->canUseParallelReplicasOnFollower(); - const bool set_total_rows_approx = !parallel_replicas_follower && !parallel_replicas_remote_plan_for_initiator; + const bool parallel_replicas_local_plan_for_initiator = is_parallel_reading_from_replicas + && context->getSettingsRef().parallel_replicas_local_plan && context->canUseParallelReplicasOnInitiator(); + const bool set_total_rows_approx = !is_parallel_reading_from_replicas || parallel_replicas_local_plan_for_initiator; Pipes pipes; for (size_t i = 0; i < parts_with_ranges.size(); ++i) From 7523dafc067c000ffe329808f1f409c2a52d3e9e Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Thu, 18 Jul 2024 13:09:07 +0000 Subject: [PATCH 204/844] Fix after incorrect merge conflict resolution --- src/Processors/QueryPlan/ReadFromRemote.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Processors/QueryPlan/ReadFromRemote.cpp b/src/Processors/QueryPlan/ReadFromRemote.cpp index 4599343f5c1..fdf35be6460 100644 --- a/src/Processors/QueryPlan/ReadFromRemote.cpp +++ b/src/Processors/QueryPlan/ReadFromRemote.cpp @@ -493,7 +493,7 @@ void ReadFromParallelRemoteReplicasStep::initializePipeline(QueryPipelineBuilder /// when using local plan for local replica, local replica has 0 number size_t offset = (exclude_local_replica ? 1 : 0); - for (size_t i = 0; i < max_replicas_to_use; ++i) + for (size_t i = 0 + offset; i < pools_to_use.size(); ++i) { IConnections::ReplicaInfo replica_info{ /// we should use this number specifically because efficiency of data distribution by consistent hash depends on it. From 7ad07657a32415bd1b9df5541452d46e3b1e0c3d Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Thu, 18 Jul 2024 14:16:44 +0000 Subject: [PATCH 205/844] Fix --- src/Processors/QueryPlan/ReadFromRemote.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Processors/QueryPlan/ReadFromRemote.cpp b/src/Processors/QueryPlan/ReadFromRemote.cpp index fdf35be6460..79ad436c37e 100644 --- a/src/Processors/QueryPlan/ReadFromRemote.cpp +++ b/src/Processors/QueryPlan/ReadFromRemote.cpp @@ -493,7 +493,7 @@ void ReadFromParallelRemoteReplicasStep::initializePipeline(QueryPipelineBuilder /// when using local plan for local replica, local replica has 0 number size_t offset = (exclude_local_replica ? 1 : 0); - for (size_t i = 0 + offset; i < pools_to_use.size(); ++i) + for (size_t i = 0 + offset; i < max_replicas_to_use; ++i) { IConnections::ReplicaInfo replica_info{ /// we should use this number specifically because efficiency of data distribution by consistent hash depends on it. From 7d66f400b25a5bf0f2f43f06c6ed432d1c572fb6 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Thu, 18 Jul 2024 16:51:49 +0200 Subject: [PATCH 206/844] Better --- src/Common/Jemalloc.h | 5 +++++ src/Common/MemoryTracker.cpp | 13 ++++++++----- src/Common/MemoryTracker.h | 8 ++++---- src/Common/MemoryWorker.cpp | 27 +++++++++++++++++++++++++-- src/Common/ProfileEvents.cpp | 3 +++ 5 files changed, 45 insertions(+), 11 deletions(-) diff --git a/src/Common/Jemalloc.h b/src/Common/Jemalloc.h index dfa265c5e59..22a94a44eba 100644 --- a/src/Common/Jemalloc.h +++ b/src/Common/Jemalloc.h @@ -65,6 +65,11 @@ struct JemallocMibCache return value; } + void run() + { + mallctlbymib(mib, mib_length, nullptr, nullptr, nullptr, 0); + } + private: static constexpr size_t max_mib_length = 4; size_t mib[max_mib_length]; diff --git a/src/Common/MemoryTracker.cpp b/src/Common/MemoryTracker.cpp index e237c3a0d33..49a3a6ef7ef 100644 --- a/src/Common/MemoryTracker.cpp +++ b/src/Common/MemoryTracker.cpp @@ -242,6 +242,7 @@ AllocationTrace MemoryTracker::allocImpl(Int64 size, bool throw_if_memory_exceed * So, we allow over-allocations. */ Int64 will_be = size ? size + amount.fetch_add(size, std::memory_order_relaxed) : amount.load(std::memory_order_relaxed); + Int64 will_be_rss = size + rss.load(std::memory_order_relaxed); auto metric_loaded = metric.load(std::memory_order_relaxed); if (metric_loaded != CurrentMetrics::end() && size) @@ -290,7 +291,7 @@ AllocationTrace MemoryTracker::allocImpl(Int64 size, bool throw_if_memory_exceed } } - if (unlikely(current_hard_limit && will_be > current_hard_limit)) + if (unlikely(current_hard_limit && (will_be > current_hard_limit || will_be_rss > current_hard_limit))) { if (memoryTrackerCanThrow(level, false) && throw_if_memory_exceeded) { @@ -310,12 +311,13 @@ AllocationTrace MemoryTracker::allocImpl(Int64 size, bool throw_if_memory_exceed throw DB::Exception( DB::ErrorCodes::MEMORY_LIMIT_EXCEEDED, "Memory limit{}{} exceeded: " - "would use {} (attempt to allocate chunk of {} bytes), maximum: {}." + "would use {} (attempt to allocate chunk of {} bytes), current RSS {}, maximum: {}." "{}{}", description ? " " : "", description ? description : "", formatReadableSizeWithBinarySuffix(will_be), size, + formatReadableSizeWithBinarySuffix(rss.load(std::memory_order_relaxed)), formatReadableSizeWithBinarySuffix(current_hard_limit), overcommit_result == OvercommitResult::NONE ? "" : " OvercommitTracker decision: ", toDescription(overcommit_result)); @@ -496,17 +498,18 @@ void MemoryTracker::reset() } -void MemoryTracker::setRSS(Int64 rss_) +void MemoryTracker::updateValues(Int64 rss_, Int64 allocated_) { - Int64 new_amount = rss_; + Int64 new_amount = allocated_; total_memory_tracker.amount.store(new_amount, std::memory_order_relaxed); + total_memory_tracker.rss.store(rss_, std::memory_order_relaxed); auto metric_loaded = total_memory_tracker.metric.load(std::memory_order_relaxed); if (metric_loaded != CurrentMetrics::end()) CurrentMetrics::set(metric_loaded, new_amount); bool log_memory_usage = true; - total_memory_tracker.updatePeak(rss_, log_memory_usage); + total_memory_tracker.updatePeak(new_amount, log_memory_usage); } diff --git a/src/Common/MemoryTracker.h b/src/Common/MemoryTracker.h index 4085bb321ed..add8bcb43d2 100644 --- a/src/Common/MemoryTracker.h +++ b/src/Common/MemoryTracker.h @@ -57,6 +57,8 @@ private: std::atomic hard_limit {0}; std::atomic profiler_limit {0}; + std::atomic rss{0}; + Int64 profiler_step = 0; /// To test exception safety of calling code, memory tracker throws an exception on each memory allocation with specified probability. @@ -237,10 +239,8 @@ public: /// Reset the accumulated data. void reset(); - /// Reset current counter to an RSS value. - /// Jemalloc may have pre-allocated arenas, they are accounted in RSS. - /// We can free this arenas in case of exception to avoid OOM. - static void setRSS(Int64 rss_); + /// update values based on external information (e.g. jemalloc's stat) + static void updateValues(Int64 rss_, Int64 allocated_); /// Prints info about peak memory consumption into log. void logPeakMemoryUsage(); diff --git a/src/Common/MemoryWorker.cpp b/src/Common/MemoryWorker.cpp index e5ebbe3b979..ae488a47b67 100644 --- a/src/Common/MemoryWorker.cpp +++ b/src/Common/MemoryWorker.cpp @@ -3,11 +3,23 @@ #include #include #include +#include + +namespace ProfileEvents +{ + extern const Event MemoryAllocatorPurge; + extern const Event MemoryAllocatorPurgeTimeMicroseconds; + extern const Event MemoryWorkerRun; + extern const Event MemoryWorkerRunElapsedMicroseconds; +} namespace DB { #if USE_JEMALLOC +#define STRINGIFY_HELPER(x) #x +#define STRINGIFY(x) STRINGIFY_HELPER(x) + MemoryWorker::MemoryWorker(uint64_t period_ms_) : period_ms(period_ms_) { @@ -30,6 +42,8 @@ void MemoryWorker::backgroundThread() { JemallocMibCache epoch_mib("epoch"); JemallocMibCache resident_mib("stats.resident"); + JemallocMibCache allocated_mib("stats.allocated"); + JemallocMibCache purge_mib("arena." STRINGIFY(MALLCTL_ARENAS_ALL) ".purge"); std::unique_lock lock(mutex); while (true) { @@ -37,11 +51,20 @@ void MemoryWorker::backgroundThread() if (shutdown) return; + Stopwatch total_watch; epoch_mib.setValue(0); Int64 resident = resident_mib.getValue(); - MemoryTracker::setRSS(resident); if (resident > total_memory_tracker.getHardLimit()) - purgeJemallocArenas(); + { + Stopwatch purge_watch; + purge_mib.run(); + ProfileEvents::increment(ProfileEvents::MemoryAllocatorPurge); + ProfileEvents::increment(ProfileEvents::MemoryAllocatorPurgeTimeMicroseconds, purge_watch.elapsedMicroseconds()); + } + + MemoryTracker::updateValues(resident, allocated_mib.getValue()); + ProfileEvents::increment(ProfileEvents::MemoryWorkerRun); + ProfileEvents::increment(ProfileEvents::MemoryWorkerRunElapsedMicroseconds, total_watch.elapsedMicroseconds()); } } #endif diff --git a/src/Common/ProfileEvents.cpp b/src/Common/ProfileEvents.cpp index 871ba7cab8b..d85c21fcded 100644 --- a/src/Common/ProfileEvents.cpp +++ b/src/Common/ProfileEvents.cpp @@ -778,6 +778,9 @@ The server successfully detected this situation and will download merged part fr M(GWPAsanAllocateSuccess, "Number of successful allocations done by GWPAsan") \ M(GWPAsanAllocateFailed, "Number of failed allocations done by GWPAsan (i.e. filled pool)") \ M(GWPAsanFree, "Number of free operations done by GWPAsan") \ + \ + M(MemoryWorkerRun, "Number of runs done by MemoryWorker in background") \ + M(MemoryWorkerRunElapsedMicroseconds, "Total time spent by MemoryWorker for background work") \ #ifdef APPLY_FOR_EXTERNAL_EVENTS From 6db09e884b9fcc516c6c4c0afe1dd3a33ec1ada9 Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Fri, 19 Jul 2024 08:24:29 +0000 Subject: [PATCH 207/844] Simplify code --- src/Processors/QueryPlan/ReadFromRemote.cpp | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/Processors/QueryPlan/ReadFromRemote.cpp b/src/Processors/QueryPlan/ReadFromRemote.cpp index 79ad436c37e..8a2d773696b 100644 --- a/src/Processors/QueryPlan/ReadFromRemote.cpp +++ b/src/Processors/QueryPlan/ReadFromRemote.cpp @@ -491,16 +491,17 @@ void ReadFromParallelRemoteReplicasStep::initializePipeline(QueryPipelineBuilder addresses.emplace_back(pool->getAddress()); LOG_DEBUG(getLogger("ReadFromParallelRemoteReplicasStep"), "Addresses to use: {}", fmt::join(addresses, ", ")); - /// when using local plan for local replica, local replica has 0 number - size_t offset = (exclude_local_replica ? 1 : 0); - for (size_t i = 0 + offset; i < max_replicas_to_use; ++i) + /// when using local plan for local replica, 0 is assigned to local replica as replica num, - in this case, starting from 1 here + size_t replica_num = (exclude_local_replica ? 1 : 0); + for (const auto & pool : pools_to_use) { IConnections::ReplicaInfo replica_info{ /// we should use this number specifically because efficiency of data distribution by consistent hash depends on it. - .number_of_current_replica = i, + .number_of_current_replica = replica_num, }; + ++replica_num; - addPipeForSingeReplica(pipes, pools_to_use[i - offset], replica_info); + addPipeForSingeReplica(pipes, pool, replica_info); } auto pipe = Pipe::unitePipes(std::move(pipes)); From 2df3e99666e22e8838bc500eda37393772e1a4f1 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Fri, 19 Jul 2024 17:04:16 +0000 Subject: [PATCH 208/844] better mutations snapshot --- src/Interpreters/MutationsInterpreter.h | 1 - src/Storages/MergeTree/MergeTask.h | 1 - src/Storages/MergeTree/MergeTreeData.cpp | 41 ++++++++++--------- src/Storages/MergeTree/MergeTreeData.h | 38 ++++++++++------- .../MergeTree/MergeTreeDataSelectExecutor.cpp | 3 -- .../MergeTree/MergeTreeMutationEntry.cpp | 1 - .../MergeTree/MergeTreeSequentialSource.cpp | 3 +- src/Storages/MergeTree/MutateTask.cpp | 1 - .../MergeTree/ReplicatedMergeTreeQueue.cpp | 36 +++++++++------- .../MergeTree/ReplicatedMergeTreeQueue.h | 7 ++-- src/Storages/StorageMergeTree.cpp | 24 +++++++---- src/Storages/StorageMergeTree.h | 7 ++-- 12 files changed, 91 insertions(+), 72 deletions(-) diff --git a/src/Interpreters/MutationsInterpreter.h b/src/Interpreters/MutationsInterpreter.h index 8ae438efc19..b3a8dd41736 100644 --- a/src/Interpreters/MutationsInterpreter.h +++ b/src/Interpreters/MutationsInterpreter.h @@ -6,7 +6,6 @@ #include #include #include -#include "Storages/MergeTree/AlterConversions.h" namespace DB diff --git a/src/Storages/MergeTree/MergeTask.h b/src/Storages/MergeTree/MergeTask.h index 619e2fc7b2b..180538480fa 100644 --- a/src/Storages/MergeTree/MergeTask.h +++ b/src/Storages/MergeTree/MergeTask.h @@ -4,7 +4,6 @@ #include #include -#include "Storages/MergeTree/AlterConversions.h" #include #include diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index d631a8833d0..cdfa41eef78 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -8444,9 +8444,10 @@ void MergeTreeData::updateObjectColumns(const DataPartPtr & part, const DataPart DB::updateObjectColumns(object_columns, columns, part->getColumns()); } -bool MergeTreeData::supportsTrivialCountOptimization(const StorageSnapshotPtr &, ContextPtr) const +bool MergeTreeData::supportsTrivialCountOptimization(const StorageSnapshotPtr & storage_snapshot, ContextPtr) const { - return !hasLightweightDeletedMask(); + const auto & snapshot_data = assert_cast(*storage_snapshot->data); + return !hasLightweightDeletedMask() && !snapshot_data.mutations_snapshot->hasDataMutations(); } Int64 MergeTreeData::getMinMetadataVersion(const DataPartsVector & parts) @@ -8698,16 +8699,16 @@ void MergeTreeData::verifySortingKey(const KeyDescription & sorting_key) } static void updateMutationsCounters( - Int64 & data_mutations_to_apply, - Int64 & metadata_mutations_to_apply, + Int64 & num_data_mutations_to_apply, + Int64 & num_metadata_mutations_to_apply, const MutationCommands & commands, Int64 increment) { - if (data_mutations_to_apply < 0) - throw Exception(ErrorCodes::LOGICAL_ERROR, "On-fly data mutations counter is negative ({})", data_mutations_to_apply); + if (num_data_mutations_to_apply < 0) + throw Exception(ErrorCodes::LOGICAL_ERROR, "On-fly data mutations counter is negative ({})", num_data_mutations_to_apply); - if (metadata_mutations_to_apply < 0) - throw Exception(ErrorCodes::LOGICAL_ERROR, "On-fly metadata mutations counter is negative ({})", metadata_mutations_to_apply); + if (num_metadata_mutations_to_apply < 0) + throw Exception(ErrorCodes::LOGICAL_ERROR, "On-fly metadata mutations counter is negative ({})", num_metadata_mutations_to_apply); bool has_data_mutation = false; bool has_metadata_mutation = false; @@ -8716,40 +8717,40 @@ static void updateMutationsCounters( { if (!has_data_mutation && AlterConversions::isSupportedDataMutation(command.type)) { - data_mutations_to_apply += increment; + num_data_mutations_to_apply += increment; has_data_mutation = true; - if (data_mutations_to_apply < 0) - throw Exception(ErrorCodes::LOGICAL_ERROR, "On-fly data mutations counter is negative ({})", data_mutations_to_apply); + if (num_data_mutations_to_apply < 0) + throw Exception(ErrorCodes::LOGICAL_ERROR, "On-fly data mutations counter is negative ({})", num_data_mutations_to_apply); } if (!has_metadata_mutation && AlterConversions::isSupportedMetadataMutation(command.type)) { - metadata_mutations_to_apply += increment; + num_metadata_mutations_to_apply += increment; has_metadata_mutation = true; - if (metadata_mutations_to_apply < 0) - throw Exception(ErrorCodes::LOGICAL_ERROR, "On-fly metadata mutations counter is negative ({})", metadata_mutations_to_apply); + if (num_metadata_mutations_to_apply < 0) + throw Exception(ErrorCodes::LOGICAL_ERROR, "On-fly metadata mutations counter is negative ({})", num_metadata_mutations_to_apply); } } } void incrementMutationsCounters( - Int64 & data_mutations_to_apply, - Int64 & metadata_mutations_to_apply, + Int64 & num_data_mutations_to_apply, + Int64 & num_metadata_mutations_to_apply, const MutationCommands & commands, std::lock_guard & /*lock*/) { - return updateMutationsCounters(data_mutations_to_apply, metadata_mutations_to_apply, commands, 1); + return updateMutationsCounters(num_data_mutations_to_apply, num_metadata_mutations_to_apply, commands, 1); } void decrementMutationsCounters( - Int64 & data_mutations_to_apply, - Int64 & metadata_mutations_to_apply, + Int64 & num_data_mutations_to_apply, + Int64 & num_metadata_mutations_to_apply, const MutationCommands & commands, std::lock_guard & /*lock*/) { - return updateMutationsCounters(data_mutations_to_apply, metadata_mutations_to_apply, commands, -1); + return updateMutationsCounters(num_data_mutations_to_apply, num_metadata_mutations_to_apply, commands, -1); } } diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h index 984662daff0..d5cfddb70af 100644 --- a/src/Storages/MergeTree/MergeTreeData.h +++ b/src/Storages/MergeTree/MergeTreeData.h @@ -34,7 +34,6 @@ #include #include #include -#include "Storages/ProjectionsDescription.h" #include #include @@ -444,10 +443,14 @@ public: bool areAsynchronousInsertsEnabled() const override; - bool supportsTrivialCountOptimization(const StorageSnapshotPtr &, ContextPtr) const override; + bool supportsTrivialCountOptimization(const StorageSnapshotPtr & storage_snapshot, ContextPtr) const override; + /// A snapshot of pending mutations that weren't applied to some of the parts yet + /// and should be applied on the fly (i.e. when reading from the part). + /// Mutations not supported by AlterConversions (supportsMutationCommandType()) can be omitted. struct IMutationsSnapshot { + /// Contains info that doesn't depend on state of mutations. struct Params { Int64 metadata_version = -1; @@ -455,18 +458,25 @@ public: bool need_data_mutations = false; }; + /// Contains info that depends on state of mutations. + struct Info + { + Int64 num_data_mutations = 0; + Int64 num_metadata_mutations = 0; + }; + Params params; + Info info; IMutationsSnapshot() = default; - explicit IMutationsSnapshot(Params params_) : params(std::move(params_)) {} + IMutationsSnapshot(Params params_, Info info_): params(std::move(params_)), info(std::move(info_)) {} - /// Return pending mutations that weren't applied to `part` yet and should be applied on the fly - /// (i.e. when reading from the part). Mutations not supported by AlterConversions - /// (supportsMutationCommandType()) can be omitted. - /// - /// @return list of mutations, in *reverse* order (newest to oldest) + /// Returns mutation commands that are required to be applied to the `part`. + /// @return list of mutation commands, in *reverse* order (newest to oldest) virtual MutationCommands getAlterMutationCommandsForPart(const DataPartPtr & part) const = 0; virtual std::shared_ptr cloneEmpty() const = 0; + bool hasDataMutations() const { return params.need_data_mutations && info.num_data_mutations > 0; } + virtual ~IMutationsSnapshot() = default; }; @@ -956,10 +966,10 @@ public: Disks getDisks() const { return getStoragePolicy()->getDisks(); } - /// TODO: comment + /// Returns a snapshot of mutations that probably will be applied on the fly to parts during reading. virtual MutationsSnapshotPtr getMutationsSnapshot(const IMutationsSnapshot::Params & params) const = 0; - /// TODO: comment + /// Returns the minimum version of metadata among parts. static Int64 getMinMetadataVersion(const DataPartsVector & parts); /// Return alter conversions for part which must be applied on fly. @@ -1761,14 +1771,14 @@ struct CurrentlySubmergingEmergingTagger /// Look at MutationCommands if it contains mutations for AlterConversions, update the counter. /// Return true if the counter had been updated void incrementMutationsCounters( - Int64 & data_mutations_to_apply, - Int64 & metadata_mutations_to_apply, + Int64 & num_data_mutations_to_apply, + Int64 & num_metadata_mutations_to_apply, const MutationCommands & commands, std::lock_guard & lock); void decrementMutationsCounters( - Int64 & data_mutations_to_apply, - Int64 & metadata_mutations_to_apply, + Int64 & num_data_mutations_to_apply, + Int64 & num_metadata_mutations_to_apply, const MutationCommands & commands, std::lock_guard & lock); diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index 94e00c531ba..1e10e4adc9d 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -896,9 +896,6 @@ ReadFromMergeTree::AnalysisResultPtr MergeTreeDataSelectExecutor::estimateNumMar return std::make_shared(); std::optional indexes; - /// NOTE: We don't need mutations snapshot because the returned analysis_result is only used for: - /// 1. estimate the number of rows to read; - /// 2. projection reading, which doesn't have alter conversions. return ReadFromMergeTree::selectRangesToRead( std::move(parts), metadata_snapshot, diff --git a/src/Storages/MergeTree/MergeTreeMutationEntry.cpp b/src/Storages/MergeTree/MergeTreeMutationEntry.cpp index 6f06b921031..5970aed497e 100644 --- a/src/Storages/MergeTree/MergeTreeMutationEntry.cpp +++ b/src/Storages/MergeTree/MergeTreeMutationEntry.cpp @@ -6,7 +6,6 @@ #include #include #include -#include "Storages/MutationCommands.h" #include diff --git a/src/Storages/MergeTree/MergeTreeSequentialSource.cpp b/src/Storages/MergeTree/MergeTreeSequentialSource.cpp index 8368f4e80f7..31e1c0f235f 100644 --- a/src/Storages/MergeTree/MergeTreeSequentialSource.cpp +++ b/src/Storages/MergeTree/MergeTreeSequentialSource.cpp @@ -14,7 +14,6 @@ #include #include #include -#include "Storages/MergeTree/AlterConversions.h" #include namespace DB @@ -65,7 +64,7 @@ private: /// Data part will not be removed if the pointer owns it MergeTreeData::DataPartPtr data_part; - /// TODO: comment. + /// Alter and mutation commands that are required to be applied to the part on-fly. AlterConversionsPtr alter_conversions; /// Columns we have to read (each Block from read will contain them) diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp index 74069d8b06e..a95cccdc0d2 100644 --- a/src/Storages/MergeTree/MutateTask.cpp +++ b/src/Storages/MergeTree/MutateTask.cpp @@ -33,7 +33,6 @@ #include #include #include -#include "Storages/MergeTree/AlterConversions.h" #include diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp index c2fcfbd1810..c3d91ca0705 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp @@ -14,8 +14,6 @@ #include #include #include -#include -#include #include #include @@ -952,7 +950,7 @@ int32_t ReplicatedMergeTreeQueue::updateMutations(zkutil::ZooKeeperPtr zookeeper { const auto commands = entry.commands; it = mutations_by_znode.erase(it); - decrementMutationsCounters(data_mutations_to_apply, metadata_mutations_to_apply, commands, state_lock); + decrementMutationsCounters(num_data_mutations_to_apply, num_metadata_mutations_to_apply, commands, state_lock); } else it = mutations_by_znode.erase(it); @@ -1002,7 +1000,7 @@ int32_t ReplicatedMergeTreeQueue::updateMutations(zkutil::ZooKeeperPtr zookeeper for (const ReplicatedMergeTreeMutationEntryPtr & entry : new_mutations) { auto & mutation = mutations_by_znode.emplace(entry->znode_name, MutationStatus(entry, format_version)).first->second; - incrementMutationsCounters(data_mutations_to_apply, metadata_mutations_to_apply, entry->commands, lock); + incrementMutationsCounters(num_data_mutations_to_apply, num_metadata_mutations_to_apply, entry->commands, lock); NOEXCEPT_SCOPE({ for (const auto & pair : entry->block_numbers) @@ -1913,7 +1911,7 @@ MutationCommands ReplicatedMergeTreeQueue::MutationsSnapshot::getAlterMutationCo MutationCommands result; - bool seen_all_data_mutations = !params.need_data_mutations; + bool seen_all_data_mutations = !hasDataMutations(); bool seen_all_metadata_mutations = part_metadata_version >= params.metadata_version; if (seen_all_data_mutations && seen_all_metadata_mutations) @@ -1940,9 +1938,9 @@ MutationCommands ReplicatedMergeTreeQueue::MutationsSnapshot::getAlterMutationCo auto alter_version = entry->alter_version; - if (!seen_all_metadata_mutations && alter_version != -1) + if (alter_version != -1) { - if (alter_version > params.metadata_version) + if (seen_all_metadata_mutations || alter_version > params.metadata_version) continue; /// We take commands with bigger metadata version @@ -1965,11 +1963,17 @@ MutationCommands ReplicatedMergeTreeQueue::MutationsSnapshot::getAlterMutationCo MergeTreeData::MutationsSnapshotPtr ReplicatedMergeTreeQueue::getMutationsSnapshot(const MutationsSnapshot::Params & params) const { - auto res = std::make_shared(params); - std::lock_guard lock(state_mutex); - bool need_data_mutations = params.need_data_mutations && data_mutations_to_apply > 0; + MutationsSnapshot::Info info + { + .num_data_mutations = num_data_mutations_to_apply, + .num_metadata_mutations = num_metadata_mutations_to_apply, + }; + + auto res = std::make_shared(params, std::move(info)); + + bool need_data_mutations = res->hasDataMutations(); bool need_metatadata_mutations = params.min_part_metadata_version < params.metadata_version; if (!need_data_mutations && !need_metatadata_mutations) @@ -2000,14 +2004,16 @@ MergeTreeData::MutationsSnapshotPtr ReplicatedMergeTreeQueue::getMutationsSnapsh auto alter_version = status->entry->alter_version; - if (!seen_all_metadata_mutations && alter_version != -1) + if (alter_version != -1) { - if (alter_version > params.metadata_version) + if (seen_all_metadata_mutations || alter_version > params.metadata_version) continue; /// We take commands with bigger metadata version if (alter_version > params.min_part_metadata_version) { + /// Copy a pointer to the whole entry to avoid extracting and copying commands. + /// Required commands will be copied later only for specific parts. if (std::ranges::any_of(status->entry->commands, is_supported_command)) in_partition.emplace(mutation_version, status->entry); } @@ -2020,6 +2026,8 @@ MergeTreeData::MutationsSnapshotPtr ReplicatedMergeTreeQueue::getMutationsSnapsh { if (!status->is_done) { + /// Copy a pointer to the whole entry to avoid extracting and copying commands. + /// Required commands will be copied later only for specific parts. if (std::ranges::any_of(status->entry->commands, is_supported_command)) in_partition.emplace(mutation_version, status->entry); } @@ -2114,7 +2122,7 @@ bool ReplicatedMergeTreeQueue::tryFinalizeMutations(zkutil::ZooKeeperPtr zookeep mutation.parts_to_do.clear(); } - decrementMutationsCounters(data_mutations_to_apply, metadata_mutations_to_apply, mutation.entry->commands, lock); + decrementMutationsCounters(num_data_mutations_to_apply, num_metadata_mutations_to_apply, mutation.entry->commands, lock); } else if (mutation.parts_to_do.size() == 0) { @@ -2171,7 +2179,7 @@ bool ReplicatedMergeTreeQueue::tryFinalizeMutations(zkutil::ZooKeeperPtr zookeep LOG_TRACE(log, "Finishing data alter with version {} for entry {}", entry->alter_version, entry->znode_name); alter_sequence.finishDataAlter(entry->alter_version, lock); } - decrementMutationsCounters(data_mutations_to_apply, metadata_mutations_to_apply, entry->commands, lock); + decrementMutationsCounters(num_data_mutations_to_apply, num_metadata_mutations_to_apply, entry->commands, lock); } } } diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h index a46fdaf3ac4..af8e2521f81 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h +++ b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h @@ -154,8 +154,8 @@ private: std::map mutations_by_znode; /// Unfinished mutations that are required for AlterConversions. - Int64 data_mutations_to_apply = 0; - Int64 metadata_mutations_to_apply = 0; + Int64 num_data_mutations_to_apply = 0; + Int64 num_metadata_mutations_to_apply = 0; /// Partition -> (block_number -> MutationStatus) std::unordered_map> mutations_by_partition; @@ -415,8 +415,9 @@ public: struct MutationsSnapshot : public MergeTreeData::IMutationsSnapshot { + public: MutationsSnapshot() = default; - explicit MutationsSnapshot(Params params_) : IMutationsSnapshot(std::move(params_)) {} + MutationsSnapshot(Params params_, Info info_) : IMutationsSnapshot(std::move(params_), std::move(info_)) {} using Params = MergeTreeData::IMutationsSnapshot::Params; using MutationsByPartititon = std::unordered_map>; diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp index c7189925812..900b592d32d 100644 --- a/src/Storages/StorageMergeTree.cpp +++ b/src/Storages/StorageMergeTree.cpp @@ -520,7 +520,7 @@ Int64 StorageMergeTree::startMutation(const MutationCommands & commands, Context if (!inserted) throw Exception(ErrorCodes::LOGICAL_ERROR, "Mutation {} already exists, it's a bug", version); - incrementMutationsCounters(data_mutations_to_apply, metadata_mutations_to_apply, *it->second.commands, lock); + incrementMutationsCounters(num_data_mutations_to_apply, num_metadata_mutations_to_apply, *it->second.commands, lock); LOG_INFO(log, "Added mutation: {}{}", mutation_id, additional_info); } background_operations_assignee.trigger(); @@ -556,7 +556,7 @@ void StorageMergeTree::updateMutationEntriesErrors(FutureMergedMutatedPartPtr re if (static_cast(result_part->part_info.mutation) == it->first) mutation_backoff_policy.removePartFromFailed(failed_part->name); - decrementMutationsCounters(data_mutations_to_apply, metadata_mutations_to_apply, *entry.commands, lock); + decrementMutationsCounters(num_data_mutations_to_apply, num_metadata_mutations_to_apply, *entry.commands, lock); } } else @@ -838,7 +838,7 @@ CancellationCode StorageMergeTree::killMutation(const String & mutation_id) { bool mutation_finished = *min_version > static_cast(mutation_version); if (!mutation_finished) - decrementMutationsCounters(data_mutations_to_apply, metadata_mutations_to_apply, *it->second.commands, lock); + decrementMutationsCounters(num_data_mutations_to_apply, num_metadata_mutations_to_apply, *it->second.commands, lock); } to_kill.emplace(std::move(it->second)); @@ -923,7 +923,7 @@ void StorageMergeTree::loadMutations() if (!inserted) throw Exception(ErrorCodes::LOGICAL_ERROR, "Mutation {} already exists, it's a bug", block_number); - incrementMutationsCounters(data_mutations_to_apply, metadata_mutations_to_apply, *entry_it->second.commands, lock); + incrementMutationsCounters(num_data_mutations_to_apply, num_metadata_mutations_to_apply, *entry_it->second.commands, lock); } else if (startsWith(it->name(), "tmp_mutation_")) { @@ -2466,12 +2466,18 @@ MutationCommands StorageMergeTree::MutationsSnapshot::getAlterMutationCommandsFo MergeTreeData::MutationsSnapshotPtr StorageMergeTree::getMutationsSnapshot(const IMutationsSnapshot::Params & params) const { - auto res = std::make_shared(params); - std::lock_guard lock(currently_processing_in_background_mutex); - bool need_data_mutations = res->params.need_data_mutations && data_mutations_to_apply > 0; - bool need_metadata_mutations = metadata_mutations_to_apply > 0; + MutationsSnapshot::Info info + { + .num_data_mutations = num_data_mutations_to_apply, + .num_metadata_mutations = num_metadata_mutations_to_apply, + }; + + auto res = std::make_shared(params, std::move(info)); + + bool need_data_mutations = res->params.need_data_mutations && num_data_mutations_to_apply > 0; + bool need_metadata_mutations = num_metadata_mutations_to_apply > 0; if (!need_data_mutations && !need_metadata_mutations) return res; @@ -2489,6 +2495,8 @@ MergeTreeData::MutationsSnapshotPtr StorageMergeTree::getMutationsSnapshot(const return false; }); + /// Copy a pointer to all commands to avoid extracting and copying them. + /// Required commands will be copied later only for specific parts. if (has_required_command) res->mutations_by_version.emplace(version, entry.commands); } diff --git a/src/Storages/StorageMergeTree.h b/src/Storages/StorageMergeTree.h index a3633284ac6..16ad79d7586 100644 --- a/src/Storages/StorageMergeTree.h +++ b/src/Storages/StorageMergeTree.h @@ -17,7 +17,6 @@ #include #include -#include "Storages/MutationCommands.h" namespace DB @@ -150,8 +149,8 @@ private: std::map current_mutations_by_version; /// Unfinished mutations that are required for AlterConversions. - Int64 data_mutations_to_apply = 0; - Int64 metadata_mutations_to_apply = 0; + Int64 num_data_mutations_to_apply = 0; + Int64 num_metadata_mutations_to_apply = 0; std::atomic shutdown_called {false}; std::atomic flush_called {false}; @@ -314,7 +313,7 @@ private: struct MutationsSnapshot : public IMutationsSnapshot { MutationsSnapshot() = default; - explicit MutationsSnapshot(Params params_) : IMutationsSnapshot(std::move(params_)) {} + MutationsSnapshot(Params params_, Info info_) : IMutationsSnapshot(std::move(params_), std::move(info_)) {} using MutationsByVersion = std::map>; MutationsByVersion mutations_by_version; From daabf2275e3b04bc40949aade73f544f56d78931 Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Fri, 19 Jul 2024 21:46:14 +0000 Subject: [PATCH 209/844] Remove wrong comment --- src/Processors/QueryPlan/ReadFromRemote.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/Processors/QueryPlan/ReadFromRemote.cpp b/src/Processors/QueryPlan/ReadFromRemote.cpp index 8a2d773696b..46d4aa29e70 100644 --- a/src/Processors/QueryPlan/ReadFromRemote.cpp +++ b/src/Processors/QueryPlan/ReadFromRemote.cpp @@ -393,7 +393,6 @@ ReadFromParallelRemoteReplicasStep::ReadFromParallelRemoteReplicasStep( for (const auto & addr : cluster->getShardsAddresses().front()) { - /// skip first local if (exclude_local_replica && addr.is_local) continue; From 4e69cd0d5262fed5bbebaf30fd2f34d6cb2ad830 Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Sat, 20 Jul 2024 20:53:18 +0000 Subject: [PATCH 210/844] Avoid getting callbacks from context for local plan --- .../QueryPlan/ParallelReplicasLocalPlan.cpp | 2 +- src/Processors/QueryPlan/ReadFromMergeTree.cpp | 15 +++++++++++---- src/Processors/QueryPlan/ReadFromMergeTree.h | 4 ++-- 3 files changed, 14 insertions(+), 7 deletions(-) diff --git a/src/Processors/QueryPlan/ParallelReplicasLocalPlan.cpp b/src/Processors/QueryPlan/ParallelReplicasLocalPlan.cpp index e9d5ef90e30..e8ff0f417dc 100644 --- a/src/Processors/QueryPlan/ParallelReplicasLocalPlan.cpp +++ b/src/Processors/QueryPlan/ParallelReplicasLocalPlan.cpp @@ -85,7 +85,7 @@ std::pair, bool> createLocalPlanForParallelReplicas( { return coordinator->handleRequest(std::move(req)); }; auto read_from_merge_tree_parallel_replicas - = reading->createLocalParallelReplicasReadingStep(analyzed_result_ptr, all_ranges_cb, read_task_cb); + = reading->createLocalParallelReplicasReadingStep(analyzed_result_ptr, std::move(all_ranges_cb), std::move(read_task_cb)); node->step = std::move(read_from_merge_tree_parallel_replicas); addConvertingActions(*query_plan, header, /*has_missing_objects=*/false); diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp index e967b575acb..6f0037684e9 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp +++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp @@ -303,8 +303,15 @@ ReadFromMergeTree::ReadFromMergeTree( { if (is_parallel_reading_from_replicas) { - all_ranges_callback = all_ranges_callback_.value_or(context->getMergeTreeAllRangesCallback()); - read_task_callback = read_task_callback_.value_or(context->getMergeTreeReadTaskCallback()); + if (all_ranges_callback_.has_value()) + all_ranges_callback = all_ranges_callback_.value(); + else + all_ranges_callback = context->getMergeTreeAllRangesCallback(); + + if (read_task_callback_.has_value()) + read_task_callback = read_task_callback_.value(); + else + read_task_callback = context->getMergeTreeReadTaskCallback(); } const auto & settings = context->getSettingsRef(); @@ -340,8 +347,8 @@ ReadFromMergeTree::ReadFromMergeTree( std::unique_ptr ReadFromMergeTree::createLocalParallelReplicasReadingStep( AnalysisResultPtr analyzed_result_ptr_, - std::optional all_ranges_callback_, - std::optional read_task_callback_) + MergeTreeAllRangesCallback all_ranges_callback_, + MergeTreeReadTaskCallback read_task_callback_) { const auto number_of_local_replica = 0; const bool enable_parallel_reading = true; diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.h b/src/Processors/QueryPlan/ReadFromMergeTree.h index c9c6030d207..307b605c01c 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.h +++ b/src/Processors/QueryPlan/ReadFromMergeTree.h @@ -128,8 +128,8 @@ public: std::unique_ptr createLocalParallelReplicasReadingStep( AnalysisResultPtr analyzed_result_ptr_, - std::optional all_ranges_callback_, - std::optional read_task_callback_); + MergeTreeAllRangesCallback all_ranges_callback_, + MergeTreeReadTaskCallback read_task_callback_); static constexpr auto name = "ReadFromMergeTree"; String getName() const override { return name; } From 2147a96475717f0af53dd62f487c011a5b9b933a Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Sun, 21 Jul 2024 11:32:57 +0200 Subject: [PATCH 211/844] Better --- src/Common/AsynchronousMetrics.cpp | 4 +++ src/Common/MemoryTracker.cpp | 31 +++++++++++++++---- src/Common/MemoryTracker.h | 2 +- src/Common/MemoryWorker.cpp | 7 ++++- src/Coordination/KeeperDispatcher.cpp | 8 ++++- .../configs/keeper_config2.xml | 2 +- .../configs/keeper_config3.xml | 2 +- 7 files changed, 45 insertions(+), 11 deletions(-) diff --git a/src/Common/AsynchronousMetrics.cpp b/src/Common/AsynchronousMetrics.cpp index a5c9875188b..dc2f687004b 100644 --- a/src/Common/AsynchronousMetrics.cpp +++ b/src/Common/AsynchronousMetrics.cpp @@ -638,6 +638,10 @@ void AsynchronousMetrics::update(TimePoint update_time, bool force_update) "The amount of virtual memory mapped for the use of stack and for the allocated memory, in bytes." " It is unspecified whether it includes the per-thread stacks and most of the allocated memory, that is allocated with the 'mmap' system call." " This metric exists only for completeness reasons. I recommend to use the `MemoryResident` metric for monitoring."}; + +#if !USE_JEMALLOC + MemoryTracker::updateValues(data.resident, data.resident, /*force_update=*/true); +#endif } { diff --git a/src/Common/MemoryTracker.cpp b/src/Common/MemoryTracker.cpp index 49a3a6ef7ef..07d6ba98745 100644 --- a/src/Common/MemoryTracker.cpp +++ b/src/Common/MemoryTracker.cpp @@ -221,6 +221,7 @@ AllocationTrace MemoryTracker::allocImpl(Int64 size, bool throw_if_memory_exceed { /// For global memory tracker always update memory usage. amount.fetch_add(size, std::memory_order_relaxed); + rss.fetch_add(size, std::memory_order_relaxed); auto metric_loaded = metric.load(std::memory_order_relaxed); if (metric_loaded != CurrentMetrics::end()) @@ -242,7 +243,7 @@ AllocationTrace MemoryTracker::allocImpl(Int64 size, bool throw_if_memory_exceed * So, we allow over-allocations. */ Int64 will_be = size ? size + amount.fetch_add(size, std::memory_order_relaxed) : amount.load(std::memory_order_relaxed); - Int64 will_be_rss = size + rss.load(std::memory_order_relaxed); + Int64 will_be_rss = size ? size + rss.fetch_add(size, std::memory_order_relaxed) : rss.load(std::memory_order_relaxed); auto metric_loaded = metric.load(std::memory_order_relaxed); if (metric_loaded != CurrentMetrics::end() && size) @@ -269,6 +270,7 @@ AllocationTrace MemoryTracker::allocImpl(Int64 size, bool throw_if_memory_exceed { /// Revert amount.fetch_sub(size, std::memory_order_relaxed); + rss.fetch_sub(size, std::memory_order_relaxed); /// Prevent recursion. Exception::ctor -> std::string -> new[] -> MemoryTracker::alloc MemoryTrackerBlockerInThread untrack_lock(VariableContext::Global); @@ -291,7 +293,8 @@ AllocationTrace MemoryTracker::allocImpl(Int64 size, bool throw_if_memory_exceed } } - if (unlikely(current_hard_limit && (will_be > current_hard_limit || will_be_rss > current_hard_limit))) + if (unlikely( + current_hard_limit && (will_be > current_hard_limit || (level == VariableContext::Global && will_be_rss > current_hard_limit)))) { if (memoryTrackerCanThrow(level, false) && throw_if_memory_exceeded) { @@ -303,6 +306,7 @@ AllocationTrace MemoryTracker::allocImpl(Int64 size, bool throw_if_memory_exceed { /// Revert amount.fetch_sub(size, std::memory_order_relaxed); + rss.fetch_sub(size, std::memory_order_relaxed); /// Prevent recursion. Exception::ctor -> std::string -> new[] -> MemoryTracker::alloc MemoryTrackerBlockerInThread untrack_lock(VariableContext::Global); @@ -411,6 +415,7 @@ AllocationTrace MemoryTracker::free(Int64 size, double _sample_probability) { /// For global memory tracker always update memory usage. amount.fetch_sub(size, std::memory_order_relaxed); + rss.fetch_sub(size, std::memory_order_relaxed); auto metric_loaded = metric.load(std::memory_order_relaxed); if (metric_loaded != CurrentMetrics::end()) CurrentMetrics::sub(metric_loaded, size); @@ -424,7 +429,12 @@ AllocationTrace MemoryTracker::free(Int64 size, double _sample_probability) } Int64 accounted_size = size; - if (level == VariableContext::Thread || level == VariableContext::Global) + if (level == VariableContext::Global) + { + amount.fetch_sub(accounted_size, std::memory_order_relaxed); + rss.fetch_sub(accounted_size, std::memory_order_relaxed); + } + else if (level == VariableContext::Thread) { /// Could become negative if memory allocated in this thread is freed in another one amount.fetch_sub(accounted_size, std::memory_order_relaxed); @@ -498,12 +508,21 @@ void MemoryTracker::reset() } -void MemoryTracker::updateValues(Int64 rss_, Int64 allocated_) +void MemoryTracker::updateValues(Int64 rss_, Int64 allocated_, bool force_update) { - Int64 new_amount = allocated_; - total_memory_tracker.amount.store(new_amount, std::memory_order_relaxed); total_memory_tracker.rss.store(rss_, std::memory_order_relaxed); + if (likely(!force_update && total_memory_tracker.amount.load(std::memory_order_relaxed) >= 0)) + return; + + Int64 new_amount = allocated_; + LOG_INFO( + getLogger("MemoryTracker"), + "Correcting the value of global memory tracker from {} to {}", + ReadableSize(total_memory_tracker.amount.load(std::memory_order_relaxed)), + ReadableSize(allocated_)); + total_memory_tracker.amount.store(new_amount, std::memory_order_relaxed); + auto metric_loaded = total_memory_tracker.metric.load(std::memory_order_relaxed); if (metric_loaded != CurrentMetrics::end()) CurrentMetrics::set(metric_loaded, new_amount); diff --git a/src/Common/MemoryTracker.h b/src/Common/MemoryTracker.h index add8bcb43d2..4913be9781f 100644 --- a/src/Common/MemoryTracker.h +++ b/src/Common/MemoryTracker.h @@ -240,7 +240,7 @@ public: void reset(); /// update values based on external information (e.g. jemalloc's stat) - static void updateValues(Int64 rss_, Int64 allocated_); + static void updateValues(Int64 rss_, Int64 allocated_, bool force_update); /// Prints info about peak memory consumption into log. void logPeakMemoryUsage(); diff --git a/src/Common/MemoryWorker.cpp b/src/Common/MemoryWorker.cpp index ae488a47b67..23cd90178ff 100644 --- a/src/Common/MemoryWorker.cpp +++ b/src/Common/MemoryWorker.cpp @@ -44,6 +44,7 @@ void MemoryWorker::backgroundThread() JemallocMibCache resident_mib("stats.resident"); JemallocMibCache allocated_mib("stats.allocated"); JemallocMibCache purge_mib("arena." STRINGIFY(MALLCTL_ARENAS_ALL) ".purge"); + bool first_run = false; std::unique_lock lock(mutex); while (true) { @@ -62,9 +63,13 @@ void MemoryWorker::backgroundThread() ProfileEvents::increment(ProfileEvents::MemoryAllocatorPurgeTimeMicroseconds, purge_watch.elapsedMicroseconds()); } - MemoryTracker::updateValues(resident, allocated_mib.getValue()); + /// force update the allocated stat from jemalloc for the first run to cover the allocations we missed + /// during initialization + MemoryTracker::updateValues(resident, allocated_mib.getValue(), first_run); ProfileEvents::increment(ProfileEvents::MemoryWorkerRun); ProfileEvents::increment(ProfileEvents::MemoryWorkerRunElapsedMicroseconds, total_watch.elapsedMicroseconds()); + + first_run = false; } } #endif diff --git a/src/Coordination/KeeperDispatcher.cpp b/src/Coordination/KeeperDispatcher.cpp index 8c7e6405153..332662117d8 100644 --- a/src/Coordination/KeeperDispatcher.cpp +++ b/src/Coordination/KeeperDispatcher.cpp @@ -148,7 +148,13 @@ void KeeperDispatcher::requestThread() Int64 mem_soft_limit = keeper_context->getKeeperMemorySoftLimit(); if (configuration_and_settings->standalone_keeper && isExceedingMemorySoftLimit() && checkIfRequestIncreaseMem(request.request)) { - LOG_WARNING(log, "Processing requests refused because of max_memory_usage_soft_limit {}, the total used memory is {}, request type is {}", ReadableSize(mem_soft_limit), ReadableSize(total_memory_tracker.get()), request.request->getOpNum()); + LOG_WARNING( + log, + "Processing requests refused because of max_memory_usage_soft_limit {}, the total used memory is {}, request type " + "is {}", + ReadableSize(mem_soft_limit), + ReadableSize(total_memory_tracker.get()), + request.request->getOpNum()); addErrorResponses({request}, Coordination::Error::ZCONNECTIONLOSS); continue; } diff --git a/tests/integration/test_keeper_memory_soft_limit/configs/keeper_config2.xml b/tests/integration/test_keeper_memory_soft_limit/configs/keeper_config2.xml index 25ececea3e8..e71b93379d0 100644 --- a/tests/integration/test_keeper_memory_soft_limit/configs/keeper_config2.xml +++ b/tests/integration/test_keeper_memory_soft_limit/configs/keeper_config2.xml @@ -16,7 +16,7 @@ az-zoo2 1 - 20000000 + 200000000 10000 diff --git a/tests/integration/test_keeper_memory_soft_limit/configs/keeper_config3.xml b/tests/integration/test_keeper_memory_soft_limit/configs/keeper_config3.xml index 81e343b77c9..cf4a4686f2c 100644 --- a/tests/integration/test_keeper_memory_soft_limit/configs/keeper_config3.xml +++ b/tests/integration/test_keeper_memory_soft_limit/configs/keeper_config3.xml @@ -13,7 +13,7 @@ 2181 3 - 20000000 + 200000000 10000 From 1c3f7d0fd0fbf27692ff29d8309382eae7b7a598 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Mon, 22 Jul 2024 14:58:00 +0200 Subject: [PATCH 212/844] Small fix --- programs/server/Server.cpp | 4 ++-- src/Common/MemoryWorker.cpp | 15 ++++++++++----- 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index 04480f0bfe9..5691d82e216 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -904,8 +904,6 @@ try LOG_INFO(log, "Background threads finished in {} ms", watch.elapsedMilliseconds()); }); - MemoryWorker memory_worker(global_context->getServerSettings().memory_worker_period_ms); - /// This object will periodically calculate some metrics. ServerAsynchronousMetrics async_metrics( global_context, @@ -1198,6 +1196,8 @@ try FailPointInjection::enableFromGlobalConfig(config()); + MemoryWorker memory_worker(global_context->getServerSettings().memory_worker_period_ms); + int default_oom_score = 0; #if !defined(NDEBUG) diff --git a/src/Common/MemoryWorker.cpp b/src/Common/MemoryWorker.cpp index 23cd90178ff..2b945a30d3d 100644 --- a/src/Common/MemoryWorker.cpp +++ b/src/Common/MemoryWorker.cpp @@ -2,8 +2,9 @@ #include #include -#include #include +#include +#include namespace ProfileEvents { @@ -23,6 +24,7 @@ namespace DB MemoryWorker::MemoryWorker(uint64_t period_ms_) : period_ms(period_ms_) { + LOG_INFO(getLogger("MemoryWorker"), "Starting background memory thread with period of {}ms", period_ms.count()); background_thread = ThreadFromGlobalPool([this] { backgroundThread(); }); } @@ -42,9 +44,10 @@ void MemoryWorker::backgroundThread() { JemallocMibCache epoch_mib("epoch"); JemallocMibCache resident_mib("stats.resident"); + JemallocMibCache active_mib("stats.active"); JemallocMibCache allocated_mib("stats.allocated"); JemallocMibCache purge_mib("arena." STRINGIFY(MALLCTL_ARENAS_ALL) ".purge"); - bool first_run = false; + bool first_run = true; std::unique_lock lock(mutex); while (true) { @@ -55,6 +58,11 @@ void MemoryWorker::backgroundThread() Stopwatch total_watch; epoch_mib.setValue(0); Int64 resident = resident_mib.getValue(); + + /// force update the allocated stat from jemalloc for the first run to cover the allocations we missed + /// during initialization + MemoryTracker::updateValues(resident, allocated_mib.getValue(), first_run); + if (resident > total_memory_tracker.getHardLimit()) { Stopwatch purge_watch; @@ -63,9 +71,6 @@ void MemoryWorker::backgroundThread() ProfileEvents::increment(ProfileEvents::MemoryAllocatorPurgeTimeMicroseconds, purge_watch.elapsedMicroseconds()); } - /// force update the allocated stat from jemalloc for the first run to cover the allocations we missed - /// during initialization - MemoryTracker::updateValues(resident, allocated_mib.getValue(), first_run); ProfileEvents::increment(ProfileEvents::MemoryWorkerRun); ProfileEvents::increment(ProfileEvents::MemoryWorkerRunElapsedMicroseconds, total_watch.elapsedMicroseconds()); From fa0f4543b688861a97843a8767d895f52b6dee9d Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Mon, 22 Jul 2024 13:44:25 +0000 Subject: [PATCH 213/844] Fix marks multiplier for local replica to avoid overflow with fuzzer --- src/Processors/QueryPlan/ReadFromMergeTree.cpp | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp index 6f0037684e9..5922550eaf3 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp +++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp @@ -384,7 +384,11 @@ Pipe ReadFromMergeTree::readFromPoolParallelReplicas(RangesInDataParts parts_wit /// We have a special logic for local replica. It has to read less data, because in some cases it should /// merge states of aggregate functions or do some other important stuff other than reading from Disk. - const auto multiplier = context->getSettingsRef().parallel_replicas_single_task_marks_count_multiplier; + auto multiplier = context->getSettingsRef().parallel_replicas_single_task_marks_count_multiplier; + // avoid using multiplier if min marks to read is already too big (to avoid overflow) + if (pool_settings.min_marks_for_concurrent_read >= std::numeric_limits::max()) + multiplier = 1.0f; + if (auto result = pool_settings.min_marks_for_concurrent_read * multiplier; canConvertTo(result)) pool_settings.min_marks_for_concurrent_read = static_cast(result); else @@ -553,7 +557,11 @@ Pipe ReadFromMergeTree::readInOrder( .number_of_current_replica = number_of_current_replica.value_or(client_info.number_of_current_replica), }; - const auto multiplier = context->getSettingsRef().parallel_replicas_single_task_marks_count_multiplier; + auto multiplier = context->getSettingsRef().parallel_replicas_single_task_marks_count_multiplier; + // avoid using multiplier if min marks to read is already too big (to avoid overflow) + if (pool_settings.min_marks_for_concurrent_read >= std::numeric_limits::max()) + multiplier = 1.0f; + if (auto result = pool_settings.min_marks_for_concurrent_read * multiplier; canConvertTo(result)) pool_settings.min_marks_for_concurrent_read = static_cast(result); else From 4483f4b3894e846deb43fa4e7d0962eba2a2224b Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Mon, 22 Jul 2024 16:39:07 +0000 Subject: [PATCH 214/844] Better logging for announcement --- .../MergeTree/ParallelReplicasReadingCoordinator.cpp | 7 +++---- src/Storages/MergeTree/RequestResponse.cpp | 5 +---- 2 files changed, 4 insertions(+), 8 deletions(-) diff --git a/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp b/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp index e1c0d87837a..4e3c9b08fb0 100644 --- a/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp +++ b/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp @@ -433,12 +433,11 @@ void DefaultCoordinator::setProgressCallback() void DefaultCoordinator::doHandleInitialAllRangesAnnouncement(InitialAllRangesAnnouncement announcement) { - const auto replica_num = announcement.replica_num; - - LOG_DEBUG(log, "Initial request from replica {}: {}", announcement.replica_num, announcement.describe()); + LOG_DEBUG(log, "Initial request: {}", announcement.describe()); initializeReadingState(std::move(announcement)); + const auto replica_num = announcement.replica_num; if (replica_num >= stats.size()) throw Exception( ErrorCodes::LOGICAL_ERROR, "Replica number ({}) is bigger than total replicas count ({})", replica_num, stats.size()); @@ -859,7 +858,7 @@ void InOrderCoordinator::markReplicaAsUnavailable(size_t replica_number) template void InOrderCoordinator::doHandleInitialAllRangesAnnouncement(InitialAllRangesAnnouncement announcement) { - LOG_TRACE(log, "Received an announcement {}", announcement.describe()); + LOG_TRACE(log, "Received an announcement : {}", announcement.describe()); ++stats[announcement.replica_num].number_of_requests; diff --git a/src/Storages/MergeTree/RequestResponse.cpp b/src/Storages/MergeTree/RequestResponse.cpp index 33cd935c88b..bcdeb443a0b 100644 --- a/src/Storages/MergeTree/RequestResponse.cpp +++ b/src/Storages/MergeTree/RequestResponse.cpp @@ -129,10 +129,7 @@ void InitialAllRangesAnnouncement::serialize(WriteBuffer & out) const String InitialAllRangesAnnouncement::describe() { - String result; - result += description.describe(); - result += fmt::format("----------\nReceived from {} replica\n", replica_num); - return result; + return fmt::format("replica {}, mode {}, {}", replica_num, mode, description.describe()); } InitialAllRangesAnnouncement InitialAllRangesAnnouncement::deserialize(ReadBuffer & in) From 0b151bbe8f5903834368d23e7847a83e0e701055 Mon Sep 17 00:00:00 2001 From: Arthur Passos Date: Mon, 22 Jul 2024 14:31:57 -0300 Subject: [PATCH 215/844] fix conflict --- .../Access/InterpreterCreateUserQuery.cpp | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/src/Interpreters/Access/InterpreterCreateUserQuery.cpp b/src/Interpreters/Access/InterpreterCreateUserQuery.cpp index f9003f266b2..e877d01ead3 100644 --- a/src/Interpreters/Access/InterpreterCreateUserQuery.cpp +++ b/src/Interpreters/Access/InterpreterCreateUserQuery.cpp @@ -324,7 +324,20 @@ void InterpreterCreateUserQuery::updateUserFromQuery(User & user, const ASTCreat if (query.valid_until) valid_until = getValidUntilFromAST(query.valid_until, {}); - updateUserFromQueryImpl(user, query, auth_data, {}, {}, {}, {}, valid_until, query.reset_authentication_methods_to_new, query.replace_authentication_methods, allow_no_password, allow_plaintext_password, true); + updateUserFromQueryImpl( + user, + query, + authentication_methods, + {}, + {}, + {}, + {}, + valid_until, + query.reset_authentication_methods_to_new, + query.replace_authentication_methods, + allow_no_password, + allow_plaintext_password, + true); } void registerInterpreterCreateUserQuery(InterpreterFactory & factory) From 420716d702daf8885796ed1b9b2cf83c437d6e32 Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Tue, 23 Jul 2024 06:38:37 +0000 Subject: [PATCH 216/844] Fix clang-tidy warning --- src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp b/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp index 4e3c9b08fb0..a1b36fe1299 100644 --- a/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp +++ b/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp @@ -435,9 +435,10 @@ void DefaultCoordinator::doHandleInitialAllRangesAnnouncement(InitialAllRangesAn { LOG_DEBUG(log, "Initial request: {}", announcement.describe()); + const auto replica_num = announcement.replica_num; + initializeReadingState(std::move(announcement)); - const auto replica_num = announcement.replica_num; if (replica_num >= stats.size()) throw Exception( ErrorCodes::LOGICAL_ERROR, "Replica number ({}) is bigger than total replicas count ({})", replica_num, stats.size()); From d78cfd030fa8364456ac5283a6a1469703c53b40 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Mon, 22 Jul 2024 21:47:46 +0200 Subject: [PATCH 217/844] Use cgroups as source --- programs/keeper/Keeper.cpp | 8 +- programs/server/Server.cpp | 9 +- src/Common/AsynchronousMetrics.cpp | 32 +- src/Common/AsynchronousMetrics.h | 10 +- src/Common/CgroupsMemoryUsageObserver.cpp | 168 +---------- src/Common/CgroupsMemoryUsageObserver.h | 12 - src/Common/MemoryTracker.cpp | 9 +- src/Common/MemoryTracker.h | 3 +- src/Common/MemoryWorker.cpp | 280 ++++++++++++++++-- src/Common/MemoryWorker.h | 49 ++- .../KeeperAsynchronousMetrics.cpp | 9 +- src/Coordination/KeeperAsynchronousMetrics.h | 8 +- src/Core/ServerSettings.h | 2 +- .../ServerAsynchronousMetrics.cpp | 6 +- src/Interpreters/ServerAsynchronousMetrics.h | 5 +- 15 files changed, 367 insertions(+), 243 deletions(-) diff --git a/programs/keeper/Keeper.cpp b/programs/keeper/Keeper.cpp index b10d3f34623..d308e741311 100644 --- a/programs/keeper/Keeper.cpp +++ b/programs/keeper/Keeper.cpp @@ -376,7 +376,8 @@ try LOG_INFO(log, "Background threads finished in {} ms", watch.elapsedMilliseconds()); }); - MemoryWorker memory_worker(config().getUInt64("memory_worker_period_ms", 100)); + MemoryWorker memory_worker(config().getUInt64("memory_worker_period_ms", 0)); + memory_worker.start(); static ServerErrorHandler error_handler; Poco::ErrorHandler::set(&error_handler); @@ -419,8 +420,9 @@ try for (const auto & server : *servers) metrics.emplace_back(ProtocolServerMetrics{server.getPortName(), server.currentThreads()}); return metrics; - } - ); + }, + /*update_jemalloc_epoch_=*/memory_worker.getSource() != MemoryWorker::MemoryUsageSource::Jemalloc, + /*update_rss_=*/memory_worker.getSource() == MemoryWorker::MemoryUsageSource::None); std::vector listen_hosts = DB::getMultipleValuesFromConfig(config(), "", "listen_host"); diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index 5691d82e216..1fc1df1494c 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -904,6 +904,8 @@ try LOG_INFO(log, "Background threads finished in {} ms", watch.elapsedMilliseconds()); }); + MemoryWorker memory_worker(global_context->getServerSettings().memory_worker_period_ms); + /// This object will periodically calculate some metrics. ServerAsynchronousMetrics async_metrics( global_context, @@ -922,8 +924,9 @@ try for (const auto & server : servers) metrics.emplace_back(ProtocolServerMetrics{server.getPortName(), server.currentThreads()}); return metrics; - } - ); + }, + /*update_jemalloc_epoch_=*/memory_worker.getSource() != MemoryWorker::MemoryUsageSource::Jemalloc, + /*update_rss_=*/memory_worker.getSource() == MemoryWorker::MemoryUsageSource::None); /// NOTE: global context should be destroyed *before* GlobalThreadPool::shutdown() /// Otherwise GlobalThreadPool::shutdown() will hang, since Context holds some threads. @@ -1196,7 +1199,7 @@ try FailPointInjection::enableFromGlobalConfig(config()); - MemoryWorker memory_worker(global_context->getServerSettings().memory_worker_period_ms); + memory_worker.start(); int default_oom_score = 0; diff --git a/src/Common/AsynchronousMetrics.cpp b/src/Common/AsynchronousMetrics.cpp index dc2f687004b..53b8e13eaaa 100644 --- a/src/Common/AsynchronousMetrics.cpp +++ b/src/Common/AsynchronousMetrics.cpp @@ -58,10 +58,14 @@ static std::unique_ptr openFileIfExists(const std::stri AsynchronousMetrics::AsynchronousMetrics( unsigned update_period_seconds, - const ProtocolServerMetricsFunc & protocol_server_metrics_func_) + const ProtocolServerMetricsFunc & protocol_server_metrics_func_, + bool update_jemalloc_epoch_, + bool update_rss_) : update_period(update_period_seconds) , log(getLogger("AsynchronousMetrics")) , protocol_server_metrics_func(protocol_server_metrics_func_) + , update_jemalloc_epoch(update_jemalloc_epoch_) + , update_rss(update_rss_) { #if defined(OS_LINUX) openFileIfExists("/proc/meminfo", meminfo); @@ -377,6 +381,14 @@ void AsynchronousMetrics::run() namespace { +uint64_t updateJemallocEpoch() +{ + uint64_t value = 0; + size_t size = sizeof(value); + mallctl("epoch", &value, &size, &value, size); + return value; +} + template Value saveJemallocMetricImpl( AsynchronousMetricValues & values, @@ -593,8 +605,11 @@ void AsynchronousMetrics::update(TimePoint update_time, bool force_update) // 'epoch' is a special mallctl -- it updates the statistics. Without it, all // the following calls will return stale values. It increments and returns // the current epoch number, which might be useful to log as a sanity check. - auto epoch = getJemallocValue("epoch"); - new_values["jemalloc.epoch"] = { epoch, "An internal incremental update number of the statistics of jemalloc (Jason Evans' memory allocator), used in all other `jemalloc` metrics." }; + auto epoch = update_jemalloc_epoch ? updateJemallocEpoch() : getJemallocValue("epoch"); + new_values["jemalloc.epoch"] + = {epoch, + "An internal incremental update number of the statistics of jemalloc (Jason Evans' memory allocator), used in all other " + "`jemalloc` metrics."}; // Collect the statistics themselves. saveJemallocMetric(new_values, "allocated"); @@ -607,10 +622,10 @@ void AsynchronousMetrics::update(TimePoint update_time, bool force_update) saveJemallocMetric(new_values, "background_thread.num_threads"); saveJemallocMetric(new_values, "background_thread.num_runs"); saveJemallocMetric(new_values, "background_thread.run_intervals"); - saveJemallocProf(new_values, "active"); + saveJemallocProf(new_values, "active"); saveAllArenasMetric(new_values, "pactive"); - [[maybe_unused]] size_t je_malloc_pdirty = saveAllArenasMetric(new_values, "pdirty"); - [[maybe_unused]] size_t je_malloc_pmuzzy = saveAllArenasMetric(new_values, "pmuzzy"); + saveAllArenasMetric(new_values, "pdirty"); + saveAllArenasMetric(new_values, "pmuzzy"); saveAllArenasMetric(new_values, "dirty_purged"); saveAllArenasMetric(new_values, "muzzy_purged"); #endif @@ -639,9 +654,8 @@ void AsynchronousMetrics::update(TimePoint update_time, bool force_update) " It is unspecified whether it includes the per-thread stacks and most of the allocated memory, that is allocated with the 'mmap' system call." " This metric exists only for completeness reasons. I recommend to use the `MemoryResident` metric for monitoring."}; -#if !USE_JEMALLOC - MemoryTracker::updateValues(data.resident, data.resident, /*force_update=*/true); -#endif + if (update_rss) + MemoryTracker::updateRSS(data.resident); } { diff --git a/src/Common/AsynchronousMetrics.h b/src/Common/AsynchronousMetrics.h index bc379d4e92b..eb6ede7a558 100644 --- a/src/Common/AsynchronousMetrics.h +++ b/src/Common/AsynchronousMetrics.h @@ -7,10 +7,8 @@ #include #include -#include #include #include -#include #include #include #include @@ -69,7 +67,9 @@ public: AsynchronousMetrics( unsigned update_period_seconds, - const ProtocolServerMetricsFunc & protocol_server_metrics_func_); + const ProtocolServerMetricsFunc & protocol_server_metrics_func_, + bool update_jemalloc_epoch_, + bool update_rss_); virtual ~AsynchronousMetrics(); @@ -92,7 +92,6 @@ private: virtual void logImpl(AsynchronousMetricValues &) {} ProtocolServerMetricsFunc protocol_server_metrics_func; - std::shared_ptr cgroups_reader; std::unique_ptr thread; @@ -113,6 +112,9 @@ private: MemoryStatisticsOS memory_stat TSA_GUARDED_BY(data_mutex); #endif + const bool update_jemalloc_epoch; + const bool update_rss; + #if defined(OS_LINUX) std::optional meminfo TSA_GUARDED_BY(data_mutex); std::optional loadavg TSA_GUARDED_BY(data_mutex); diff --git a/src/Common/CgroupsMemoryUsageObserver.cpp b/src/Common/CgroupsMemoryUsageObserver.cpp index ab7ca69ca04..afeac1808b2 100644 --- a/src/Common/CgroupsMemoryUsageObserver.cpp +++ b/src/Common/CgroupsMemoryUsageObserver.cpp @@ -13,12 +13,8 @@ #include #include -#include -#include -#include using namespace DB; -namespace fs = std::filesystem; namespace DB { @@ -29,170 +25,8 @@ extern const int FILE_DOESNT_EXIST; extern const int INCORRECT_DATA; } -} - -namespace -{ - -/// Format is -/// kernel 5 -/// rss 15 -/// [...] -uint64_t readMetricFromStatFile(ReadBufferFromFile & buf, const std::string & key) -{ - while (!buf.eof()) - { - std::string current_key; - readStringUntilWhitespace(current_key, buf); - if (current_key != key) - { - std::string dummy; - readStringUntilNewlineInto(dummy, buf); - buf.ignore(); - continue; - } - - assertChar(' ', buf); - uint64_t value = 0; - readIntText(value, buf); - return value; - } - - throw Exception(ErrorCodes::INCORRECT_DATA, "Cannot find '{}' in '{}'", key, buf.getFileName()); -} - -struct CgroupsV1Reader : ICgroupsReader -{ - explicit CgroupsV1Reader(const fs::path & stat_file_dir) : buf(stat_file_dir / "memory.stat") { } - - uint64_t readMemoryUsage() override - { - std::lock_guard lock(mutex); - buf.rewind(); - return readMetricFromStatFile(buf, "rss"); - } - -private: - std::mutex mutex; - ReadBufferFromFile buf TSA_GUARDED_BY(mutex); -}; - -struct CgroupsV2Reader : ICgroupsReader -{ - explicit CgroupsV2Reader(const fs::path & stat_file_dir) - : current_buf(stat_file_dir / "memory.current"), stat_buf(stat_file_dir / "memory.stat") - { - } - - uint64_t readMemoryUsage() override - { - std::lock_guard lock(mutex); - current_buf.rewind(); - stat_buf.rewind(); - - int64_t mem_usage = 0; - /// memory.current contains a single number - /// the reason why we subtract it described here: https://github.com/ClickHouse/ClickHouse/issues/64652#issuecomment-2149630667 - readIntText(mem_usage, current_buf); - mem_usage -= readMetricFromStatFile(stat_buf, "inactive_file"); - chassert(mem_usage >= 0, "Negative memory usage"); - return mem_usage; - } - -private: - std::mutex mutex; - ReadBufferFromFile current_buf TSA_GUARDED_BY(mutex); - ReadBufferFromFile stat_buf TSA_GUARDED_BY(mutex); -}; - -/// Caveats: -/// - All of the logic in this file assumes that the current process is the only process in the -/// containing cgroup (or more precisely: the only process with significant memory consumption). -/// If this is not the case, then other processe's memory consumption may affect the internal -/// memory tracker ... -/// - Cgroups v1 and v2 allow nested cgroup hierarchies. As v1 is deprecated for over half a -/// decade and will go away at some point, hierarchical detection is only implemented for v2. -/// - I did not test what happens if a host has v1 and v2 simultaneously enabled. I believe such -/// systems existed only for a short transition period. - -std::optional getCgroupsV2Path() -{ - if (!cgroupsV2Enabled()) - return {}; - - if (!cgroupsV2MemoryControllerEnabled()) - return {}; - - fs::path current_cgroup = cgroupV2PathOfProcess(); - if (current_cgroup.empty()) - return {}; - - /// Return the bottom-most nested current memory file. If there is no such file at the current - /// level, try again at the parent level as memory settings are inherited. - while (current_cgroup != default_cgroups_mount.parent_path()) - { - const auto current_path = current_cgroup / "memory.current"; - const auto stat_path = current_cgroup / "memory.stat"; - if (fs::exists(current_path) && fs::exists(stat_path)) - return {current_cgroup}; - current_cgroup = current_cgroup.parent_path(); - } - return {}; -} - -std::optional getCgroupsV1Path() -{ - auto path = default_cgroups_mount / "memory/memory.stat"; - if (!fs::exists(path)) - return {}; - return {default_cgroups_mount / "memory"}; -} - -enum class CgroupsVersion : uint8_t -{ - V1, - V2 -}; - -std::pair getCgroupsPath() -{ - auto v2_path = getCgroupsV2Path(); - if (v2_path.has_value()) - return {*v2_path, CgroupsVersion::V2}; - - auto v1_path = getCgroupsV1Path(); - if (v1_path.has_value()) - return {*v1_path, CgroupsVersion::V1}; - - throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "Cannot find cgroups v1 or v2 current memory file"); -} - -} - -namespace DB -{ - -std::shared_ptr createCgroupsReader() -{ - const auto [cgroup_path, version] = getCgroupsPath(); - LOG_INFO( - getLogger("CgroupsReader"), - "Will create cgroup reader from '{}' (cgroups version: {})", - cgroup_path, - (version == CgroupsVersion::V1) ? "v1" : "v2"); - - if (version == CgroupsVersion::V2) - return std::make_shared(cgroup_path); - else - { - chassert(version == CgroupsVersion::V1); - return std::make_shared(cgroup_path); - } - -} - CgroupsMemoryUsageObserver::CgroupsMemoryUsageObserver(std::chrono::seconds wait_time_) - : log(getLogger("CgroupsMemoryUsageObserver")), wait_time(wait_time_), cgroups_reader(createCgroupsReader()) + : log(getLogger("CgroupsMemoryUsageObserver")), wait_time(wait_time_) {} CgroupsMemoryUsageObserver::~CgroupsMemoryUsageObserver() diff --git a/src/Common/CgroupsMemoryUsageObserver.h b/src/Common/CgroupsMemoryUsageObserver.h index 33e0f167a59..3de83d6b437 100644 --- a/src/Common/CgroupsMemoryUsageObserver.h +++ b/src/Common/CgroupsMemoryUsageObserver.h @@ -3,21 +3,11 @@ #include #include -#include #include namespace DB { -struct ICgroupsReader -{ - virtual ~ICgroupsReader() = default; - - virtual uint64_t readMemoryUsage() = 0; -}; - -std::shared_ptr createCgroupsReader(); - /// Periodically reads the the maximum memory available to the process (which can change due to cgroups settings). /// You can specify a callback to react on changes. The callback typically reloads the configuration, i.e. Server /// or Keeper configuration file. This reloads settings 'max_server_memory_usage' (Server) and 'max_memory_usage_soft_limit' @@ -54,8 +44,6 @@ private: void runThread(); - std::shared_ptr cgroups_reader; - std::mutex thread_mutex; std::condition_variable cond; ThreadFromGlobalPool thread; diff --git a/src/Common/MemoryTracker.cpp b/src/Common/MemoryTracker.cpp index 07d6ba98745..0ffae89ffa6 100644 --- a/src/Common/MemoryTracker.cpp +++ b/src/Common/MemoryTracker.cpp @@ -508,13 +508,13 @@ void MemoryTracker::reset() } -void MemoryTracker::updateValues(Int64 rss_, Int64 allocated_, bool force_update) +void MemoryTracker::updateRSS(Int64 rss_) { total_memory_tracker.rss.store(rss_, std::memory_order_relaxed); +} - if (likely(!force_update && total_memory_tracker.amount.load(std::memory_order_relaxed) >= 0)) - return; - +void MemoryTracker::updateAllocated(Int64 allocated_) +{ Int64 new_amount = allocated_; LOG_INFO( getLogger("MemoryTracker"), @@ -531,7 +531,6 @@ void MemoryTracker::updateValues(Int64 rss_, Int64 allocated_, bool force_update total_memory_tracker.updatePeak(new_amount, log_memory_usage); } - void MemoryTracker::setSoftLimit(Int64 value) { soft_limit.store(value, std::memory_order_relaxed); diff --git a/src/Common/MemoryTracker.h b/src/Common/MemoryTracker.h index 4913be9781f..d2db8489f19 100644 --- a/src/Common/MemoryTracker.h +++ b/src/Common/MemoryTracker.h @@ -240,7 +240,8 @@ public: void reset(); /// update values based on external information (e.g. jemalloc's stat) - static void updateValues(Int64 rss_, Int64 allocated_, bool force_update); + static void updateRSS(Int64 rss_); + static void updateAllocated(Int64 allocated_); /// Prints info about peak memory consumption into log. void logPeakMemoryUsage(); diff --git a/src/Common/MemoryWorker.cpp b/src/Common/MemoryWorker.cpp index 2b945a30d3d..42e797a80d6 100644 --- a/src/Common/MemoryWorker.cpp +++ b/src/Common/MemoryWorker.cpp @@ -1,11 +1,21 @@ #include +#include +#include +#include +#include #include #include #include #include #include +#include +#include +#include + +namespace fs = std::filesystem; + namespace ProfileEvents { extern const Event MemoryAllocatorPurge; @@ -17,14 +27,227 @@ namespace ProfileEvents namespace DB { -#if USE_JEMALLOC -#define STRINGIFY_HELPER(x) #x -#define STRINGIFY(x) STRINGIFY_HELPER(x) - -MemoryWorker::MemoryWorker(uint64_t period_ms_) - : period_ms(period_ms_) +namespace ErrorCodes { - LOG_INFO(getLogger("MemoryWorker"), "Starting background memory thread with period of {}ms", period_ms.count()); + extern const int FILE_DOESNT_EXIST; + extern const int INCORRECT_DATA; +} + +#if defined(OS_LINUX) +struct ICgroupsReader +{ + virtual ~ICgroupsReader() = default; + + virtual uint64_t readMemoryUsage() = 0; +}; + +namespace +{ + +/// Format is +/// kernel 5 +/// rss 15 +/// [...] +uint64_t readMetricFromStatFile(ReadBufferFromFile & buf, const std::string & key) +{ + while (!buf.eof()) + { + std::string current_key; + readStringUntilWhitespace(current_key, buf); + if (current_key != key) + { + std::string dummy; + readStringUntilNewlineInto(dummy, buf); + buf.ignore(); + continue; + } + + assertChar(' ', buf); + uint64_t value = 0; + readIntText(value, buf); + return value; + } + + LOG_ERROR(getLogger("CgroupsReader"), "Cannot find '{}' in '{}'", key, buf.getFileName()); + return 0; +} + +struct CgroupsV1Reader : ICgroupsReader +{ + explicit CgroupsV1Reader(const fs::path & stat_file_dir) : buf(stat_file_dir / "memory.stat") { } + + uint64_t readMemoryUsage() override + { + std::lock_guard lock(mutex); + buf.rewind(); + return readMetricFromStatFile(buf, "rss"); + } + +private: + std::mutex mutex; + ReadBufferFromFile buf TSA_GUARDED_BY(mutex); +}; + +struct CgroupsV2Reader : ICgroupsReader +{ + explicit CgroupsV2Reader(const fs::path & stat_file_dir) : stat_buf(stat_file_dir / "memory.stat") { } + + uint64_t readMemoryUsage() override + { + std::lock_guard lock(mutex); + stat_buf.rewind(); + return readMetricFromStatFile(stat_buf, "anon"); + } + +private: + std::mutex mutex; + ReadBufferFromFile stat_buf TSA_GUARDED_BY(mutex); +}; + +/// Caveats: +/// - All of the logic in this file assumes that the current process is the only process in the +/// containing cgroup (or more precisely: the only process with significant memory consumption). +/// If this is not the case, then other processe's memory consumption may affect the internal +/// memory tracker ... +/// - Cgroups v1 and v2 allow nested cgroup hierarchies. As v1 is deprecated for over half a +/// decade and will go away at some point, hierarchical detection is only implemented for v2. +/// - I did not test what happens if a host has v1 and v2 simultaneously enabled. I believe such +/// systems existed only for a short transition period. + +std::optional getCgroupsV2Path() +{ + if (!cgroupsV2Enabled()) + return {}; + + if (!cgroupsV2MemoryControllerEnabled()) + return {}; + + fs::path current_cgroup = cgroupV2PathOfProcess(); + if (current_cgroup.empty()) + return {}; + + /// Return the bottom-most nested current memory file. If there is no such file at the current + /// level, try again at the parent level as memory settings are inherited. + while (current_cgroup != default_cgroups_mount.parent_path()) + { + const auto current_path = current_cgroup / "memory.current"; + const auto stat_path = current_cgroup / "memory.stat"; + if (fs::exists(current_path) && fs::exists(stat_path)) + return {current_cgroup}; + current_cgroup = current_cgroup.parent_path(); + } + return {}; +} + +std::optional getCgroupsV1Path() +{ + auto path = default_cgroups_mount / "memory/memory.stat"; + if (!fs::exists(path)) + return {}; + return {default_cgroups_mount / "memory"}; +} + +enum class CgroupsVersion : uint8_t +{ + V1, + V2 +}; + +std::pair getCgroupsPath() +{ + auto v2_path = getCgroupsV2Path(); + if (v2_path.has_value()) + return {*v2_path, CgroupsVersion::V2}; + + auto v1_path = getCgroupsV1Path(); + if (v1_path.has_value()) + return {*v1_path, CgroupsVersion::V1}; + + throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "Cannot find cgroups v1 or v2 current memory file"); +} + +std::shared_ptr createCgroupsReader() +{ + const auto [cgroup_path, version] = getCgroupsPath(); + LOG_INFO( + getLogger("CgroupsReader"), + "Will create cgroup reader from '{}' (cgroups version: {})", + cgroup_path, + (version == CgroupsVersion::V1) ? "v1" : "v2"); + + if (version == CgroupsVersion::V2) + return std::make_shared(cgroup_path); + else + { + chassert(version == CgroupsVersion::V1); + return std::make_shared(cgroup_path); + } + +} +#endif + +constexpr uint64_t cgroups_memory_usage_tick_ms{50}; +constexpr uint64_t jemalloc_memory_usage_tick_ms{100}; + +std::string_view sourceToString(MemoryWorker::MemoryUsageSource source) +{ + switch (source) + { + case MemoryWorker::MemoryUsageSource::Cgroups: return "Cgroups"; + case MemoryWorker::MemoryUsageSource::Jemalloc: return "Jemalloc"; + case MemoryWorker::MemoryUsageSource::None: return "None"; + } +} + +} + +/// We try to pick the best possible supported source for reading memory usage. +/// Supported sources in order of priority +/// - reading from cgroups' pseudo-files (fastest and most accurate) +/// - reading jemalloc's resident stat (doesn't take into account allocations that didn't use jemalloc) +/// Also, different tick rates are used because not all options are equally fast +MemoryWorker::MemoryWorker(uint64_t period_ms_) + : log(getLogger("MemoryWorker")) + , period_ms(period_ms_) +{ +#if defined(OS_LINUX) + try + { + cgroups_reader = createCgroupsReader(); + source = MemoryUsageSource::Cgroups; + if (period_ms == 0) + period_ms = cgroups_memory_usage_tick_ms; + + return; + } + catch (...) + { + tryLogCurrentException(log, "Cannot use cgroups reader"); + } +#endif + +#if USE_JEMALLOC + source = MemoryUsageSource::Jemalloc; + if (period_ms == 0) + period_ms = jemalloc_memory_usage_tick_ms; +#endif +} + +MemoryWorker::MemoryUsageSource MemoryWorker::getSource() +{ + return source; +} + +void MemoryWorker::start() +{ + if (source == MemoryUsageSource::None) + return; + + LOG_INFO( + getLogger("MemoryWorker"), + "Starting background memory thread with period of {}ms, using {} as source", + period_ms, + sourceToString(source)); background_thread = ThreadFromGlobalPool([this] { backgroundThread(); }); } @@ -40,29 +263,39 @@ MemoryWorker::~MemoryWorker() background_thread.join(); } +uint64_t MemoryWorker::getMemoryUsage() +{ + switch (source) + { + case MemoryUsageSource::Cgroups: + return cgroups_reader->readMemoryUsage(); + case MemoryUsageSource::Jemalloc: + return resident_mib.getValue(); + case MemoryUsageSource::None: + throw DB::Exception(ErrorCodes::LOGICAL_ERROR, "Trying to fetch memory usage while no memory source can be used"); + } +} + void MemoryWorker::backgroundThread() { - JemallocMibCache epoch_mib("epoch"); - JemallocMibCache resident_mib("stats.resident"); - JemallocMibCache active_mib("stats.active"); - JemallocMibCache allocated_mib("stats.allocated"); - JemallocMibCache purge_mib("arena." STRINGIFY(MALLCTL_ARENAS_ALL) ".purge"); + std::chrono::milliseconds chrono_period_ms{period_ms}; bool first_run = true; std::unique_lock lock(mutex); while (true) { - cv.wait_for(lock, period_ms, [this] { return shutdown; }); + cv.wait_for(lock, chrono_period_ms, [this] { return shutdown; }); if (shutdown) return; Stopwatch total_watch; - epoch_mib.setValue(0); - Int64 resident = resident_mib.getValue(); - /// force update the allocated stat from jemalloc for the first run to cover the allocations we missed - /// during initialization - MemoryTracker::updateValues(resident, allocated_mib.getValue(), first_run); + if (source == MemoryUsageSource::Jemalloc) + epoch_mib.setValue(0); + Int64 resident = getMemoryUsage(); + MemoryTracker::updateRSS(resident); + +#if USE_JEMALLOC if (resident > total_memory_tracker.getHardLimit()) { Stopwatch purge_watch; @@ -71,12 +304,19 @@ void MemoryWorker::backgroundThread() ProfileEvents::increment(ProfileEvents::MemoryAllocatorPurgeTimeMicroseconds, purge_watch.elapsedMicroseconds()); } + if (unlikely(first_run || total_memory_tracker.get() < 0)) + { + if (source != MemoryUsageSource::Jemalloc) + epoch_mib.setValue(0); + + MemoryTracker::updateAllocated(allocated_mib.getValue()); + } +#endif + ProfileEvents::increment(ProfileEvents::MemoryWorkerRun); ProfileEvents::increment(ProfileEvents::MemoryWorkerRunElapsedMicroseconds, total_watch.elapsedMicroseconds()); - first_run = false; } } -#endif } diff --git a/src/Common/MemoryWorker.h b/src/Common/MemoryWorker.h index 6c0a578aa61..6fde93d63ad 100644 --- a/src/Common/MemoryWorker.h +++ b/src/Common/MemoryWorker.h @@ -1,13 +1,14 @@ #pragma once +#include #include - -#include "config.h" +#include namespace DB { -#if USE_JEMALLOC +struct ICgroupsReader; + /// Correct MemoryTracker based on stats.resident read from jemalloc. /// This requires jemalloc built with --enable-stats which we use. /// The worker spawns a background thread which moves the jemalloc epoch (updates internal stats), @@ -19,8 +20,21 @@ class MemoryWorker public: explicit MemoryWorker(uint64_t period_ms_); + enum class MemoryUsageSource : uint8_t + { + None, + Cgroups, + Jemalloc + }; + + MemoryUsageSource getSource(); + + void start(); + ~MemoryWorker(); private: + uint64_t getMemoryUsage(); + void backgroundThread(); ThreadFromGlobalPool background_thread; @@ -29,14 +43,27 @@ private: std::condition_variable cv; bool shutdown = false; - std::chrono::milliseconds period_ms; -}; -#else -class MemoryWorker -{ -public: - explicit MemoryWorker(uint64_t /*period_ms_*/) {} -}; + LoggerPtr log; + + uint64_t period_ms; + + MemoryUsageSource source{MemoryUsageSource::None}; + +#if defined(OS_LINUX) + std::shared_ptr cgroups_reader; #endif +#if USE_JEMALLOC + JemallocMibCache epoch_mib{"epoch"}; + JemallocMibCache resident_mib{"stats.resident"}; + JemallocMibCache allocated_mib{"stats.allocated"}; + +#define STRINGIFY_HELPER(x) #x +#define STRINGIFY(x) STRINGIFY_HELPER(x) + JemallocMibCache purge_mib{"arena." STRINGIFY(MALLCTL_ARENAS_ALL) ".purge"}; +#undef STRINGIFY +#undef STRINGIFY_HELPER +#endif +}; + } diff --git a/src/Coordination/KeeperAsynchronousMetrics.cpp b/src/Coordination/KeeperAsynchronousMetrics.cpp index 86166ffe31b..157858f3c44 100644 --- a/src/Coordination/KeeperAsynchronousMetrics.cpp +++ b/src/Coordination/KeeperAsynchronousMetrics.cpp @@ -114,8 +114,13 @@ void updateKeeperInformation(KeeperDispatcher & keeper_dispatcher, AsynchronousM } KeeperAsynchronousMetrics::KeeperAsynchronousMetrics( - ContextPtr context_, unsigned update_period_seconds, const ProtocolServerMetricsFunc & protocol_server_metrics_func_) - : AsynchronousMetrics(update_period_seconds, protocol_server_metrics_func_), context(std::move(context_)) + ContextPtr context_, + unsigned update_period_seconds, + const ProtocolServerMetricsFunc & protocol_server_metrics_func_, + bool update_jemalloc_epoch_, + bool update_rss_) + : AsynchronousMetrics(update_period_seconds, protocol_server_metrics_func_, update_jemalloc_epoch_, update_rss_) + , context(std::move(context_)) { } diff --git a/src/Coordination/KeeperAsynchronousMetrics.h b/src/Coordination/KeeperAsynchronousMetrics.h index ec0e60cbb6e..a2ab7cab756 100644 --- a/src/Coordination/KeeperAsynchronousMetrics.h +++ b/src/Coordination/KeeperAsynchronousMetrics.h @@ -13,9 +13,13 @@ class KeeperAsynchronousMetrics : public AsynchronousMetrics { public: KeeperAsynchronousMetrics( - ContextPtr context_, unsigned update_period_seconds, const ProtocolServerMetricsFunc & protocol_server_metrics_func_); - ~KeeperAsynchronousMetrics() override; + ContextPtr context_, + unsigned update_period_seconds, + const ProtocolServerMetricsFunc & protocol_server_metrics_func_, + bool update_jemalloc_epoch_, + bool update_rss_); + ~KeeperAsynchronousMetrics() override; private: ContextPtr context; diff --git a/src/Core/ServerSettings.h b/src/Core/ServerSettings.h index aaea0388239..ea5a3f19638 100644 --- a/src/Core/ServerSettings.h +++ b/src/Core/ServerSettings.h @@ -157,7 +157,7 @@ namespace DB M(Bool, prepare_system_log_tables_on_startup, false, "If true, ClickHouse creates all configured `system.*_log` tables before the startup. It can be helpful if some startup scripts depend on these tables.", 0) \ M(Double, gwp_asan_force_sample_probability, 0.0003, "Probability that an allocation from specific places will be sampled by GWP Asan (i.e. PODArray allocations)", 0) \ M(UInt64, config_reload_interval_ms, 2000, "How often clickhouse will reload config and check for new changes", 0) \ - M(UInt64, memory_worker_period_ms, 100, "Period of background memory worker which corrects memory tracker memory usages and cleans up unused pages during higher memory usage.", 0) \ + M(UInt64, memory_worker_period_ms, 0, "Tick period of background memory worker which corrects memory tracker memory usages and cleans up unused pages during higher memory usage. If set to 0, default value will be used depending on the memory usage source", 0) \ /// If you add a setting which can be updated at runtime, please update 'changeable_settings' map in StorageSystemServerSettings.cpp diff --git a/src/Interpreters/ServerAsynchronousMetrics.cpp b/src/Interpreters/ServerAsynchronousMetrics.cpp index 872a9f864df..079029695c9 100644 --- a/src/Interpreters/ServerAsynchronousMetrics.cpp +++ b/src/Interpreters/ServerAsynchronousMetrics.cpp @@ -55,9 +55,11 @@ ServerAsynchronousMetrics::ServerAsynchronousMetrics( ContextPtr global_context_, unsigned update_period_seconds, unsigned heavy_metrics_update_period_seconds, - const ProtocolServerMetricsFunc & protocol_server_metrics_func_) + const ProtocolServerMetricsFunc & protocol_server_metrics_func_, + bool update_jemalloc_epoch_, + bool update_rss_) : WithContext(global_context_) - , AsynchronousMetrics(update_period_seconds, protocol_server_metrics_func_) + , AsynchronousMetrics(update_period_seconds, protocol_server_metrics_func_, update_jemalloc_epoch_, update_rss_) , heavy_metric_update_period(heavy_metrics_update_period_seconds) { /// sanity check diff --git a/src/Interpreters/ServerAsynchronousMetrics.h b/src/Interpreters/ServerAsynchronousMetrics.h index e3c83dc748e..5fab419a32b 100644 --- a/src/Interpreters/ServerAsynchronousMetrics.h +++ b/src/Interpreters/ServerAsynchronousMetrics.h @@ -14,7 +14,10 @@ public: ContextPtr global_context_, unsigned update_period_seconds, unsigned heavy_metrics_update_period_seconds, - const ProtocolServerMetricsFunc & protocol_server_metrics_func_); + const ProtocolServerMetricsFunc & protocol_server_metrics_func_, + bool update_jemalloc_epoch_, + bool update_rss_); + ~ServerAsynchronousMetrics() override; private: From 5a1b96ac8453f73de5e891e3a9f235fd96270b50 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Tue, 23 Jul 2024 10:52:14 +0200 Subject: [PATCH 218/844] Style fix --- src/Common/CgroupsMemoryUsageObserver.cpp | 6 ------ src/Common/MemoryWorker.cpp | 2 +- 2 files changed, 1 insertion(+), 7 deletions(-) diff --git a/src/Common/CgroupsMemoryUsageObserver.cpp b/src/Common/CgroupsMemoryUsageObserver.cpp index afeac1808b2..16d5d1cccde 100644 --- a/src/Common/CgroupsMemoryUsageObserver.cpp +++ b/src/Common/CgroupsMemoryUsageObserver.cpp @@ -19,12 +19,6 @@ using namespace DB; namespace DB { -namespace ErrorCodes -{ -extern const int FILE_DOESNT_EXIST; -extern const int INCORRECT_DATA; -} - CgroupsMemoryUsageObserver::CgroupsMemoryUsageObserver(std::chrono::seconds wait_time_) : log(getLogger("CgroupsMemoryUsageObserver")), wait_time(wait_time_) {} diff --git a/src/Common/MemoryWorker.cpp b/src/Common/MemoryWorker.cpp index 42e797a80d6..ddc3fd783f4 100644 --- a/src/Common/MemoryWorker.cpp +++ b/src/Common/MemoryWorker.cpp @@ -30,7 +30,7 @@ namespace DB namespace ErrorCodes { extern const int FILE_DOESNT_EXIST; - extern const int INCORRECT_DATA; + extern const int LOGICAL_ERROR; } #if defined(OS_LINUX) From 5b51a35e015336227dcc5b25445fefbc3da3059c Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Tue, 23 Jul 2024 11:31:34 +0200 Subject: [PATCH 219/844] Add unused variables --- src/Common/AsynchronousMetrics.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Common/AsynchronousMetrics.h b/src/Common/AsynchronousMetrics.h index eb6ede7a558..fedba4e55be 100644 --- a/src/Common/AsynchronousMetrics.h +++ b/src/Common/AsynchronousMetrics.h @@ -112,8 +112,8 @@ private: MemoryStatisticsOS memory_stat TSA_GUARDED_BY(data_mutex); #endif - const bool update_jemalloc_epoch; - const bool update_rss; + [[maybe_unused]] const bool update_jemalloc_epoch; + [[maybe_unused]] const bool update_rss; #if defined(OS_LINUX) std::optional meminfo TSA_GUARDED_BY(data_mutex); From ade79cfd7a8465bf6e332f3b2ca6143ba652251a Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Tue, 23 Jul 2024 11:50:56 +0200 Subject: [PATCH 220/844] More fixes --- src/Common/MemoryWorker.cpp | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/src/Common/MemoryWorker.cpp b/src/Common/MemoryWorker.cpp index ddc3fd783f4..8169bc7d177 100644 --- a/src/Common/MemoryWorker.cpp +++ b/src/Common/MemoryWorker.cpp @@ -186,9 +186,6 @@ std::shared_ptr createCgroupsReader() } #endif -constexpr uint64_t cgroups_memory_usage_tick_ms{50}; -constexpr uint64_t jemalloc_memory_usage_tick_ms{100}; - std::string_view sourceToString(MemoryWorker::MemoryUsageSource source) { switch (source) @@ -213,6 +210,8 @@ MemoryWorker::MemoryWorker(uint64_t period_ms_) #if defined(OS_LINUX) try { + static constexpr uint64_t cgroups_memory_usage_tick_ms{50}; + cgroups_reader = createCgroupsReader(); source = MemoryUsageSource::Cgroups; if (period_ms == 0) @@ -227,6 +226,8 @@ MemoryWorker::MemoryWorker(uint64_t period_ms_) #endif #if USE_JEMALLOC + static constexpr uint64_t jemalloc_memory_usage_tick_ms{100}; + source = MemoryUsageSource::Jemalloc; if (period_ms == 0) period_ms = jemalloc_memory_usage_tick_ms; @@ -270,7 +271,11 @@ uint64_t MemoryWorker::getMemoryUsage() case MemoryUsageSource::Cgroups: return cgroups_reader->readMemoryUsage(); case MemoryUsageSource::Jemalloc: +#if USE_JEMALLOC return resident_mib.getValue(); +#else + return 0; +#endif case MemoryUsageSource::None: throw DB::Exception(ErrorCodes::LOGICAL_ERROR, "Trying to fetch memory usage while no memory source can be used"); } @@ -279,7 +284,7 @@ uint64_t MemoryWorker::getMemoryUsage() void MemoryWorker::backgroundThread() { std::chrono::milliseconds chrono_period_ms{period_ms}; - bool first_run = true; + [[maybe_unused]] bool first_run = true; std::unique_lock lock(mutex); while (true) { @@ -289,8 +294,10 @@ void MemoryWorker::backgroundThread() Stopwatch total_watch; +#if USE_JEMALLOC if (source == MemoryUsageSource::Jemalloc) epoch_mib.setValue(0); +#endif Int64 resident = getMemoryUsage(); MemoryTracker::updateRSS(resident); From 0631ff93778801768edde14c5fd5ed22701f6ec3 Mon Sep 17 00:00:00 2001 From: Alexey Korepanov Date: Tue, 23 Jul 2024 12:14:54 +0100 Subject: [PATCH 221/844] Add jsoncompactwithprogress to aspell-dict.txt --- utils/check-style/aspell-ignore/en/aspell-dict.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt index 3967179f078..2e403f44e4e 100644 --- a/utils/check-style/aspell-ignore/en/aspell-dict.txt +++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt @@ -1855,6 +1855,7 @@ jsoncompactstrings jsoncompactstringseachrow jsoncompactstringseachrowwithnames jsoncompactstringseachrowwithnamesandtypes +jsoncompactwithprogress jsoneachrow jsoneachrowwithprogress jsonobjecteachrow From 53a55221845802e67f9586af615fcb5ed84b20eb Mon Sep 17 00:00:00 2001 From: Alexey Korepanov Date: Tue, 23 Jul 2024 12:19:06 +0100 Subject: [PATCH 222/844] Fix example output in the documentation --- docs/en/interfaces/formats.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/interfaces/formats.md b/docs/en/interfaces/formats.md index 6161cddefdd..8795b71e0ac 100644 --- a/docs/en/interfaces/formats.md +++ b/docs/en/interfaces/formats.md @@ -935,7 +935,7 @@ Each row is either a metadata object, data object, progress information or stati Example: ```json -{"meta": [{"name":"id", "type":"UInt32"}, {"name":"name", "type":"String"}]}} +{"meta": [{"name":"id", "type":"UInt32"}, {"name":"name", "type":"String"}]} {"progress":{"read_rows":"8","read_bytes":"168","written_rows":"0","written_bytes":"0","total_rows_to_read":"2","result_rows":"0","result_bytes":"0","elapsed_ns":"0"}} {"data":["1", "John Doe"]} {"data":["2", "Joe Doe"]} From a4a0611c1c0b40ea6b99d7e5cf3972fda201bb9d Mon Sep 17 00:00:00 2001 From: Alexey Korepanov Date: Tue, 23 Jul 2024 12:29:40 +0100 Subject: [PATCH 223/844] Fix docs spelling --- utils/check-style/aspell-ignore/en/aspell-dict.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt index 2e403f44e4e..c64cfcb0968 100644 --- a/utils/check-style/aspell-ignore/en/aspell-dict.txt +++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt @@ -406,6 +406,7 @@ JSONCompactStrings JSONCompactStringsEachRow JSONCompactStringsEachRowWithNames JSONCompactStringsEachRowWithNamesAndTypes +JSONCompactWithProgress JSONEachRow JSONEachRowWithProgress JSONExtract From 0204de640fae26e64a7b27575b51ff6d723124d0 Mon Sep 17 00:00:00 2001 From: Arthur Passos Date: Tue, 23 Jul 2024 09:32:59 -0300 Subject: [PATCH 224/844] fix test that was recently introduced --- tests/integration/test_user_valid_until/test.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/integration/test_user_valid_until/test.py b/tests/integration/test_user_valid_until/test.py index 40d7ceb380e..f25b0021913 100644 --- a/tests/integration/test_user_valid_until/test.py +++ b/tests/integration/test_user_valid_until/test.py @@ -99,14 +99,14 @@ def test_restart(started_cluster): assert ( node.query("SHOW CREATE USER user_restart") - == "CREATE USER user_restart VALID UNTIL \\'2010-11-06 05:03:20\\'\n" + == "CREATE USER user_restart IDENTIFIED WITH no_password VALID UNTIL \\'2010-11-06 05:03:20\\'\n" ) node.restart_clickhouse() assert ( node.query("SHOW CREATE USER user_restart") - == "CREATE USER user_restart VALID UNTIL \\'2010-11-06 05:03:20\\'\n" + == "CREATE USER user_restart IDENTIFIED WITH no_password VALID UNTIL \\'2010-11-06 05:03:20\\'\n" ) error = "Authentication failed" From 842b51c782f0cc5cf9bd7b695c466933310a7e6c Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Tue, 23 Jul 2024 13:30:40 +0000 Subject: [PATCH 225/844] Try assigning replica numbers consistently independent of initiator --- .../ClusterProxy/executeQuery.cpp | 67 +++++++++++- .../QueryPlan/ParallelReplicasLocalPlan.cpp | 7 +- .../QueryPlan/ParallelReplicasLocalPlan.h | 3 +- .../QueryPlan/ReadFromMergeTree.cpp | 8 +- src/Processors/QueryPlan/ReadFromMergeTree.h | 3 +- src/Processors/QueryPlan/ReadFromRemote.cpp | 102 ++++-------------- src/Processors/QueryPlan/ReadFromRemote.h | 6 +- .../ParallelReplicasReadingCoordinator.cpp | 3 +- 8 files changed, 101 insertions(+), 98 deletions(-) diff --git a/src/Interpreters/ClusterProxy/executeQuery.cpp b/src/Interpreters/ClusterProxy/executeQuery.cpp index a8cf3022a61..80f6d7a864b 100644 --- a/src/Interpreters/ClusterProxy/executeQuery.cpp +++ b/src/Interpreters/ClusterProxy/executeQuery.cpp @@ -40,6 +40,7 @@ namespace ErrorCodes extern const int TOO_LARGE_DISTRIBUTED_DEPTH; extern const int LOGICAL_ERROR; extern const int UNEXPECTED_CLUSTER; + extern const int INCONSISTENT_CLUSTER_DEFINITION; } namespace ClusterProxy @@ -519,22 +520,75 @@ void executeQueryWithParallelReplicas( "`cluster_for_parallel_replicas` setting refers to cluster with several shards. Expected a cluster with one shard"); } - const auto replica_count = std::min(settings.max_parallel_replicas.value, new_cluster->getShardsInfo().begin()->getAllNodeCount()); + const auto & shard = new_cluster->getShardsInfo().at(0); + size_t max_replicas_to_use = settings.max_parallel_replicas; + if (max_replicas_to_use > shard.getAllNodeCount()) + { + LOG_INFO( + getLogger("ReadFromParallelRemoteReplicasStep"), + "The number of replicas requested ({}) is bigger than the real number available in the cluster ({}). " + "Will use the latter number to execute the query.", + settings.max_parallel_replicas, + shard.getAllNodeCount()); + max_replicas_to_use = shard.getAllNodeCount(); + } - auto coordinator = std::make_shared(replica_count, settings.parallel_replicas_mark_segment_size); + auto coordinator = std::make_shared(max_replicas_to_use, settings.parallel_replicas_mark_segment_size); auto external_tables = new_context->getExternalTables(); + std::vector shuffled_pool; + if (max_replicas_to_use < shard.getAllNodeCount()) + { + shuffled_pool = shard.pool->getShuffledPools(settings); + shuffled_pool.resize(max_replicas_to_use); + } + else + { + /// if all replicas in cluster are used for query execution + /// try to preserve replicas order as in cluster definition + /// it's important for data locality during query execution + auto priority_func = [](size_t i) { return Priority{static_cast(i)}; }; + shuffled_pool = shard.pool->getShuffledPools(settings, priority_func); + } + + std::vector pools_to_use; + pools_to_use.reserve(shuffled_pool.size()); + for (auto & pool : shuffled_pool) + pools_to_use.emplace_back(std::move(pool.pool)); + /// do not build local plan for distributed queries for now (address it later) if (settings.allow_experimental_analyzer && settings.parallel_replicas_local_plan && !shard_num) { + /// find local replica index in pool, to assign it as replica number + std::optional local_replica_number; + for (size_t i = 0, s = pools_to_use.size(); i < s; ++i) + { + const auto & hostname = pools_to_use[i]->getHost(); + const auto found = std::find_if( + begin(shard.local_addresses), + end(shard.local_addresses), + [&hostname](const Cluster::Address & local_addr) { return hostname == local_addr.host_name; }); + if (found != shard.local_addresses.end()) + { + local_replica_number = i; + break; + } + } + if (!local_replica_number) + throw Exception( + ErrorCodes::INCONSISTENT_CLUSTER_DEFINITION, + "Local replica is not found in '{}' cluster definition, see 'cluster_for_parallel_replicas' setting", + new_cluster->getName()); + auto [local_plan, with_parallel_replicas] = createLocalPlanForParallelReplicas( query_ast, header, new_context, processed_stage, coordinator, - std::move(analyzed_read_from_merge_tree)); + std::move(analyzed_read_from_merge_tree), + local_replica_number.value()); if (!with_parallel_replicas) { @@ -542,6 +596,8 @@ void executeQueryWithParallelReplicas( return; } + LOG_DEBUG(logger, "Local replica got replica number {}", local_replica_number.value()); + auto read_from_remote = std::make_unique( query_ast, new_cluster, @@ -555,7 +611,8 @@ void executeQueryWithParallelReplicas( std::move(external_tables), getLogger("ReadFromParallelRemoteReplicasStep"), std::move(storage_limits), - /*exclude_local_replica*/ true); + std::move(pools_to_use), + local_replica_number); auto remote_plan = std::make_unique(); remote_plan->addStep(std::move(read_from_remote)); @@ -587,7 +644,7 @@ void executeQueryWithParallelReplicas( std::move(external_tables), getLogger("ReadFromParallelRemoteReplicasStep"), std::move(storage_limits), - /*exclude_local_replica*/ false); + std::move(pools_to_use)); query_plan.addStep(std::move(read_from_remote)); } diff --git a/src/Processors/QueryPlan/ParallelReplicasLocalPlan.cpp b/src/Processors/QueryPlan/ParallelReplicasLocalPlan.cpp index e8ff0f417dc..050044edd3a 100644 --- a/src/Processors/QueryPlan/ParallelReplicasLocalPlan.cpp +++ b/src/Processors/QueryPlan/ParallelReplicasLocalPlan.cpp @@ -27,7 +27,8 @@ std::pair, bool> createLocalPlanForParallelReplicas( ContextPtr context, QueryProcessingStage::Enum processed_stage, ParallelReplicasReadingCoordinatorPtr coordinator, - QueryPlanStepPtr analyzed_read_from_merge_tree) + QueryPlanStepPtr analyzed_read_from_merge_tree, + size_t replica_number) { checkStackSize(); @@ -84,8 +85,8 @@ std::pair, bool> createLocalPlanForParallelReplicas( MergeTreeReadTaskCallback read_task_cb = [coordinator](ParallelReadRequest req) -> std::optional { return coordinator->handleRequest(std::move(req)); }; - auto read_from_merge_tree_parallel_replicas - = reading->createLocalParallelReplicasReadingStep(analyzed_result_ptr, std::move(all_ranges_cb), std::move(read_task_cb)); + auto read_from_merge_tree_parallel_replicas = reading->createLocalParallelReplicasReadingStep( + analyzed_result_ptr, std::move(all_ranges_cb), std::move(read_task_cb), replica_number); node->step = std::move(read_from_merge_tree_parallel_replicas); addConvertingActions(*query_plan, header, /*has_missing_objects=*/false); diff --git a/src/Processors/QueryPlan/ParallelReplicasLocalPlan.h b/src/Processors/QueryPlan/ParallelReplicasLocalPlan.h index 2a49be6347a..ab0bbeaeeff 100644 --- a/src/Processors/QueryPlan/ParallelReplicasLocalPlan.h +++ b/src/Processors/QueryPlan/ParallelReplicasLocalPlan.h @@ -14,5 +14,6 @@ std::pair, bool> createLocalPlanForParallelReplicas( ContextPtr context, QueryProcessingStage::Enum processed_stage, ParallelReplicasReadingCoordinatorPtr coordinator, - QueryPlanStepPtr read_from_merge_tree); + QueryPlanStepPtr read_from_merge_tree, + size_t replica_number); } diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp index 5922550eaf3..aea9d02fe01 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp +++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp @@ -348,9 +348,9 @@ ReadFromMergeTree::ReadFromMergeTree( std::unique_ptr ReadFromMergeTree::createLocalParallelReplicasReadingStep( AnalysisResultPtr analyzed_result_ptr_, MergeTreeAllRangesCallback all_ranges_callback_, - MergeTreeReadTaskCallback read_task_callback_) + MergeTreeReadTaskCallback read_task_callback_, + size_t replica_number) { - const auto number_of_local_replica = 0; const bool enable_parallel_reading = true; return std::make_unique( prepared_parts, @@ -364,11 +364,11 @@ std::unique_ptr ReadFromMergeTree::createLocalParallelReplica requested_num_streams, max_block_numbers_to_read, log, - analyzed_result_ptr_, + std::move(analyzed_result_ptr_), enable_parallel_reading, all_ranges_callback_, read_task_callback_, - number_of_local_replica); + replica_number); } Pipe ReadFromMergeTree::readFromPoolParallelReplicas(RangesInDataParts parts_with_range, Names required_columns, PoolSettings pool_settings) diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.h b/src/Processors/QueryPlan/ReadFromMergeTree.h index 307b605c01c..69e20a7864d 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.h +++ b/src/Processors/QueryPlan/ReadFromMergeTree.h @@ -129,7 +129,8 @@ public: std::unique_ptr createLocalParallelReplicasReadingStep( AnalysisResultPtr analyzed_result_ptr_, MergeTreeAllRangesCallback all_ranges_callback_, - MergeTreeReadTaskCallback read_task_callback_); + MergeTreeReadTaskCallback read_task_callback_, + size_t replica_number); static constexpr auto name = "ReadFromMergeTree"; String getName() const override { return name; } diff --git a/src/Processors/QueryPlan/ReadFromRemote.cpp b/src/Processors/QueryPlan/ReadFromRemote.cpp index 46d4aa29e70..3df46eb1987 100644 --- a/src/Processors/QueryPlan/ReadFromRemote.cpp +++ b/src/Processors/QueryPlan/ReadFromRemote.cpp @@ -371,7 +371,8 @@ ReadFromParallelRemoteReplicasStep::ReadFromParallelRemoteReplicasStep( Tables external_tables_, LoggerPtr log_, std::shared_ptr storage_limits_, - bool exclude_local_replica_) + std::vector pools_to_use_, + std::optional exclude_pool_index_) : ISourceStep(DataStream{.header = std::move(header_)}) , cluster(cluster_) , query_ast(query_ast_) @@ -384,24 +385,20 @@ ReadFromParallelRemoteReplicasStep::ReadFromParallelRemoteReplicasStep( , external_tables{external_tables_} , storage_limits(std::move(storage_limits_)) , log(log_) - , exclude_local_replica(exclude_local_replica_) + , pools_to_use(std::move(pools_to_use_)) + , exclude_pool_index(exclude_pool_index_) { chassert(cluster->getShardCount() == 1); std::vector replicas; - replicas.reserve(cluster->getShardsAddresses().front().size()); + replicas.reserve(pools_to_use.size()); - for (const auto & addr : cluster->getShardsAddresses().front()) + for (size_t i = 0, l = pools_to_use.size(); i < l; ++i) { - if (exclude_local_replica && addr.is_local) + if (exclude_pool_index.has_value() && i == exclude_pool_index) continue; - /// replace hostname with replica name if the hostname started with replica namespace, - /// it makes description shorter and more readable - if (!addr.database_replica_name.empty() && addr.host_name.starts_with(addr.database_replica_name)) - replicas.push_back(fmt::format("{}", addr.database_replica_name)); - else - replicas.push_back(fmt::format("{}", addr.host_name)); + replicas.push_back(pools_to_use[i]->getAddress()); } auto description = fmt::format("Query: {} Replicas: {}", formattedAST(query_ast), fmt::join(replicas, ", ")); @@ -421,86 +418,29 @@ void ReadFromParallelRemoteReplicasStep::enforceAggregationInOrder() void ReadFromParallelRemoteReplicasStep::initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) { Pipes pipes; - const Settings & current_settings = context->getSettingsRef(); - auto timeouts = ConnectionTimeouts::getTCPTimeoutsWithFailover(current_settings); - - const auto & shard = cluster->getShardsInfo().at(0); - size_t max_replicas_to_use = current_settings.max_parallel_replicas; - if (max_replicas_to_use > shard.getAllNodeCount()) - { - LOG_INFO( - getLogger("ReadFromParallelRemoteReplicasStep"), - "The number of replicas requested ({}) is bigger than the real number available in the cluster ({}). " - "Will use the latter number to execute the query.", - current_settings.max_parallel_replicas, - shard.getAllNodeCount()); - max_replicas_to_use = shard.getAllNodeCount(); - } - - std::vector shuffled_pool; - if (max_replicas_to_use < shard.getAllNodeCount()) - { - shuffled_pool = shard.pool->getShuffledPools(current_settings); - } - else - { - /// try to preserve replicas order if all replicas in cluster are used for query execution - /// it's important for data locality during query execution - auto priority_func = [](size_t i) { return Priority{static_cast(i)}; }; - shuffled_pool = shard.pool->getShuffledPools(current_settings, priority_func); - } - - std::vector pools_to_use; - pools_to_use.reserve(shuffled_pool.size()); - for (const auto & pool : shuffled_pool) - { - if (exclude_local_replica) - { - const auto & hostname = pool.pool->getHost(); - auto it = std::find_if( - begin(shard.local_addresses), - end(shard.local_addresses), - [&hostname](const Cluster::Address & local_addr) { return hostname == local_addr.host_name; }); - if (it == shard.local_addresses.end()) - pools_to_use.push_back(pool.pool); - } - else - { - pools_to_use.push_back(pool.pool); - } - } - - pools_to_use.resize(std::min(pools_to_use.size(), max_replicas_to_use)); - // if local plan is used for local replica, we should exclude one remote replica - if (exclude_local_replica && !pools_to_use.empty()) - pools_to_use.resize(max_replicas_to_use - 1); - - LOG_DEBUG( - getLogger("ReadFromParallelRemoteReplicasStep"), - "Number of pools to use is {}. Originally {}", - pools_to_use.size(), - shuffled_pool.size()); - - if (pools_to_use.empty()) - return; std::vector addresses; addresses.reserve(pools_to_use.size()); - for (const auto & pool : pools_to_use) - addresses.emplace_back(pool->getAddress()); + for (size_t i = 0, l = pools_to_use.size(); i < l; ++i) + { + if (exclude_pool_index.has_value() && i == exclude_pool_index) + continue; + + addresses.emplace_back(pools_to_use[i]->getAddress()); + } LOG_DEBUG(getLogger("ReadFromParallelRemoteReplicasStep"), "Addresses to use: {}", fmt::join(addresses, ", ")); - /// when using local plan for local replica, 0 is assigned to local replica as replica num, - in this case, starting from 1 here - size_t replica_num = (exclude_local_replica ? 1 : 0); - for (const auto & pool : pools_to_use) + for (size_t i = 0, l = pools_to_use.size(); i < l; ++i) { + if (exclude_pool_index.has_value() && i == exclude_pool_index) + continue; + IConnections::ReplicaInfo replica_info{ /// we should use this number specifically because efficiency of data distribution by consistent hash depends on it. - .number_of_current_replica = replica_num, + .number_of_current_replica = i, }; - ++replica_num; - addPipeForSingeReplica(pipes, pool, replica_info); + addPipeForSingeReplica(pipes, pools_to_use[i], replica_info); } auto pipe = Pipe::unitePipes(std::move(pipes)); diff --git a/src/Processors/QueryPlan/ReadFromRemote.h b/src/Processors/QueryPlan/ReadFromRemote.h index 442da098a17..74389c8f9eb 100644 --- a/src/Processors/QueryPlan/ReadFromRemote.h +++ b/src/Processors/QueryPlan/ReadFromRemote.h @@ -79,7 +79,8 @@ public: Tables external_tables_, LoggerPtr log_, std::shared_ptr storage_limits_, - bool exclude_local_replica = false); + std::vector pools_to_use, + std::optional exclude_pool_index_ = std::nullopt); String getName() const override { return "ReadFromRemoteParallelReplicas"; } @@ -102,7 +103,8 @@ private: Tables external_tables; std::shared_ptr storage_limits; LoggerPtr log; - bool exclude_local_replica; + std::vector pools_to_use; + std::optional exclude_pool_index; }; } diff --git a/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp b/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp index a1b36fe1299..a1264eabdea 100644 --- a/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp +++ b/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp @@ -185,7 +185,8 @@ public: void handleInitialAllRangesAnnouncement(InitialAllRangesAnnouncement announcement) { if (++sent_initial_requests > replicas_count) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Initiator received more initial requests than there are replicas"); + throw Exception( + ErrorCodes::LOGICAL_ERROR, "Initiator received more initial requests than there are replicas: replica_num={}", announcement.replica_num); doHandleInitialAllRangesAnnouncement(std::move(announcement)); } From ef4ad01d7d57942a79b46ad05c10160d557ff75f Mon Sep 17 00:00:00 2001 From: Alexey Korepanov Date: Tue, 23 Jul 2024 15:03:01 +0100 Subject: [PATCH 226/844] Fix totals formatting --- .../Impl/JSONCompactWithProgressRowOutputFormat.cpp | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/Processors/Formats/Impl/JSONCompactWithProgressRowOutputFormat.cpp b/src/Processors/Formats/Impl/JSONCompactWithProgressRowOutputFormat.cpp index 9603ce0265d..ddf071324c6 100644 --- a/src/Processors/Formats/Impl/JSONCompactWithProgressRowOutputFormat.cpp +++ b/src/Processors/Formats/Impl/JSONCompactWithProgressRowOutputFormat.cpp @@ -55,8 +55,8 @@ void JSONCompactWithProgressRowOutputFormat::writeRowBetweenDelimiter() void JSONCompactWithProgressRowOutputFormat::writeBeforeTotals() { - JSONUtils::writeFieldDelimiter(*ostr, 2); - JSONUtils::writeCompactArrayStart(*ostr, 1, "totals"); + JSONUtils::writeCompactObjectStart(*ostr); + JSONUtils::writeCompactArrayStart(*ostr, 0, "totals"); } void JSONCompactWithProgressRowOutputFormat::writeTotals(const Columns & columns, size_t row_num) @@ -67,6 +67,8 @@ void JSONCompactWithProgressRowOutputFormat::writeTotals(const Columns & columns void JSONCompactWithProgressRowOutputFormat::writeAfterTotals() { JSONUtils::writeCompactArrayEnd(*ostr); + JSONUtils::writeCompactObjectEnd(*ostr); + writeCString("}\n", *ostr); } void JSONCompactWithProgressRowOutputFormat::writeExtremesElement(const char * title, const Columns & columns, size_t row_num) @@ -131,6 +133,7 @@ void JSONCompactWithProgressRowOutputFormat::finalizeImpl() JSONUtils::writeException(exception_message, *ostr, settings, 0); JSONUtils::writeCompactObjectEnd(*ostr); } + writeCString("\n", *ostr); ostr->next(); } From f3710e9a29e4b866bfd55886c18ebad954a48e42 Mon Sep 17 00:00:00 2001 From: Alexey Korepanov Date: Tue, 23 Jul 2024 15:04:05 +0100 Subject: [PATCH 227/844] Add stateless test for JSONCompactWithProgress output format --- ...03174_json_compact_with_progress.reference | 14 +++++++++++++ .../03174_json_compact_with_progress.sh | 20 +++++++++++++++++++ 2 files changed, 34 insertions(+) create mode 100644 tests/queries/0_stateless/03174_json_compact_with_progress.reference create mode 100755 tests/queries/0_stateless/03174_json_compact_with_progress.sh diff --git a/tests/queries/0_stateless/03174_json_compact_with_progress.reference b/tests/queries/0_stateless/03174_json_compact_with_progress.reference new file mode 100644 index 00000000000..6e83db33565 --- /dev/null +++ b/tests/queries/0_stateless/03174_json_compact_with_progress.reference @@ -0,0 +1,14 @@ +1 +{"meta": [{"name":"value", "type":"UInt8"}, {"name":"name", "type":"String"}]}} +{"data":[1, "a"]} +{"data":[2, "b"]} +{"data":[3, "c"]} +{"progress":{"read_rows":"3","read_bytes":"33","written_rows":"0","written_bytes":"0","total_rows_to_read":"3","result_rows":"0","result_bytes":"0","elapsed_ns":"ELAPSED_NS"}} +{"statistics": {"rows":3, "elapsed":ELAPSED, "rows_read":3, "bytes_read":33}} +2 +{"meta": [{"name":"name", "type":"String"}, {"name":"c", "type":"UInt64"}]}} +{"data":["a", "1"]} +{"data":["b", "1"]} +{"data":["c", "1"]} +{"totals": ["", "3"]}} +{"statistics": {"rows":3, "elapsed":ELAPSED, "rows_read":3, "bytes_read":30}} diff --git a/tests/queries/0_stateless/03174_json_compact_with_progress.sh b/tests/queries/0_stateless/03174_json_compact_with_progress.sh new file mode 100755 index 00000000000..c21f7228517 --- /dev/null +++ b/tests/queries/0_stateless/03174_json_compact_with_progress.sh @@ -0,0 +1,20 @@ +#!/usr/bin/env bash +# Tags: no-fasttest, no-parallel + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +$CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS test_table;" + +$CLICKHOUSE_CLIENT -q "SELECT 1;" +# Check JSONCompactWithProgress Output +$CLICKHOUSE_CLIENT -q "CREATE TABLE test_table (value UInt8, name String) ENGINE = MergeTree() ORDER BY value;" +$CLICKHOUSE_CLIENT -q "INSERT INTO test_table VALUES (1, 'a'), (2, 'b'), (3, 'c');" +$CLICKHOUSE_CLIENT -q "SELECT * FROM test_table FORMAT JSONCompactWithProgress settings max_block_size=2;" | sed -E 's/"elapsed_ns":"[0-9]+"/"elapsed_ns":"ELAPSED_NS"/g; s/"elapsed":[0-9]+\.[0-9]+/"elapsed":ELAPSED/g' + +$CLICKHOUSE_CLIENT -q "SELECT 2;" +# Check Totals +$CLICKHOUSE_CLIENT -q "SELECT name, count() AS c FROM test_table GROUP BY name WITH TOTALS ORDER BY name FORMAT JSONCompactWithProgress settings max_block_size=2;" | sed -E 's/"elapsed_ns":"[0-9]+"/"elapsed_ns":"ELAPSED_NS"/g; s/"elapsed":[0-9]+\.[0-9]+/"elapsed":ELAPSED/g' + +$CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS test_table;" From 04d80ec2763a0677f32e9f6190cf2bcda0ebf8b7 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Tue, 23 Jul 2024 16:13:07 +0200 Subject: [PATCH 228/844] Fix non-linux build --- src/Common/MemoryWorker.cpp | 4 ++-- src/Common/MemoryWorker.h | 2 -- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/src/Common/MemoryWorker.cpp b/src/Common/MemoryWorker.cpp index 8169bc7d177..c576772d303 100644 --- a/src/Common/MemoryWorker.cpp +++ b/src/Common/MemoryWorker.cpp @@ -33,7 +33,6 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; } -#if defined(OS_LINUX) struct ICgroupsReader { virtual ~ICgroupsReader() = default; @@ -44,6 +43,7 @@ struct ICgroupsReader namespace { +#if defined(OS_LINUX) /// Format is /// kernel 5 /// rss 15 @@ -269,7 +269,7 @@ uint64_t MemoryWorker::getMemoryUsage() switch (source) { case MemoryUsageSource::Cgroups: - return cgroups_reader->readMemoryUsage(); + return cgroups_reader != nullptr ? cgroups_reader->readMemoryUsage() : 0; case MemoryUsageSource::Jemalloc: #if USE_JEMALLOC return resident_mib.getValue(); diff --git a/src/Common/MemoryWorker.h b/src/Common/MemoryWorker.h index 6fde93d63ad..b1b0495bf14 100644 --- a/src/Common/MemoryWorker.h +++ b/src/Common/MemoryWorker.h @@ -49,9 +49,7 @@ private: MemoryUsageSource source{MemoryUsageSource::None}; -#if defined(OS_LINUX) std::shared_ptr cgroups_reader; -#endif #if USE_JEMALLOC JemallocMibCache epoch_mib{"epoch"}; From f5630f86e4853c06db1267628e4a8c0e80d62889 Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Tue, 23 Jul 2024 15:43:25 +0000 Subject: [PATCH 229/844] Fix: resize pools_to_use correctly (keep local replica in it) --- .../ClusterProxy/executeQuery.cpp | 35 +++++++++++++------ 1 file changed, 24 insertions(+), 11 deletions(-) diff --git a/src/Interpreters/ClusterProxy/executeQuery.cpp b/src/Interpreters/ClusterProxy/executeQuery.cpp index 80f6d7a864b..44474a11b50 100644 --- a/src/Interpreters/ClusterProxy/executeQuery.cpp +++ b/src/Interpreters/ClusterProxy/executeQuery.cpp @@ -540,14 +540,15 @@ void executeQueryWithParallelReplicas( std::vector shuffled_pool; if (max_replicas_to_use < shard.getAllNodeCount()) { + // will be shuffled according to `load_balancing` setting shuffled_pool = shard.pool->getShuffledPools(settings); - shuffled_pool.resize(max_replicas_to_use); } else { - /// if all replicas in cluster are used for query execution - /// try to preserve replicas order as in cluster definition - /// it's important for data locality during query execution + /// If all replicas in cluster are used for query execution, + /// try to preserve replicas order as in cluster definition. + /// It's important for data locality during query execution + /// independently of the query initiator auto priority_func = [](size_t i) { return Priority{static_cast(i)}; }; shuffled_pool = shard.pool->getShuffledPools(settings, priority_func); } @@ -560,8 +561,8 @@ void executeQueryWithParallelReplicas( /// do not build local plan for distributed queries for now (address it later) if (settings.allow_experimental_analyzer && settings.parallel_replicas_local_plan && !shard_num) { - /// find local replica index in pool, to assign it as replica number - std::optional local_replica_number; + /// find local replica index in pool + std::optional local_replica_index; for (size_t i = 0, s = pools_to_use.size(); i < s; ++i) { const auto & hostname = pools_to_use[i]->getHost(); @@ -571,16 +572,25 @@ void executeQueryWithParallelReplicas( [&hostname](const Cluster::Address & local_addr) { return hostname == local_addr.host_name; }); if (found != shard.local_addresses.end()) { - local_replica_number = i; + local_replica_index = i; break; } } - if (!local_replica_number) + if (!local_replica_index) throw Exception( ErrorCodes::INCONSISTENT_CLUSTER_DEFINITION, "Local replica is not found in '{}' cluster definition, see 'cluster_for_parallel_replicas' setting", new_cluster->getName()); + // resize the pool but keep local replicas in it (and udpate its index) + chassert(max_replicas_to_use <= pools_to_use.size()); + if (local_replica_index >= max_replicas_to_use) + { + std::swap(pools_to_use[max_replicas_to_use - 1], pools_to_use[local_replica_index.value()]); + local_replica_index = max_replicas_to_use - 1; + } + pools_to_use.resize(max_replicas_to_use); + auto [local_plan, with_parallel_replicas] = createLocalPlanForParallelReplicas( query_ast, header, @@ -588,7 +598,7 @@ void executeQueryWithParallelReplicas( processed_stage, coordinator, std::move(analyzed_read_from_merge_tree), - local_replica_number.value()); + local_replica_index.value()); if (!with_parallel_replicas) { @@ -596,7 +606,7 @@ void executeQueryWithParallelReplicas( return; } - LOG_DEBUG(logger, "Local replica got replica number {}", local_replica_number.value()); + LOG_DEBUG(logger, "Local replica got replica number {}", local_replica_index.value()); auto read_from_remote = std::make_unique( query_ast, @@ -612,7 +622,7 @@ void executeQueryWithParallelReplicas( getLogger("ReadFromParallelRemoteReplicasStep"), std::move(storage_limits), std::move(pools_to_use), - local_replica_number); + local_replica_index); auto remote_plan = std::make_unique(); remote_plan->addStep(std::move(read_from_remote)); @@ -631,6 +641,9 @@ void executeQueryWithParallelReplicas( } else { + chassert(max_replicas_to_use <= pools_to_use.size()); + pools_to_use.resize(max_replicas_to_use); + auto read_from_remote = std::make_unique( query_ast, new_cluster, From 87ad12e5fceec5c412964fbf0a6336a4d3da7662 Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Tue, 23 Jul 2024 17:16:11 +0000 Subject: [PATCH 230/844] Fix typo --- src/Interpreters/ClusterProxy/executeQuery.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Interpreters/ClusterProxy/executeQuery.cpp b/src/Interpreters/ClusterProxy/executeQuery.cpp index 44474a11b50..444a872f491 100644 --- a/src/Interpreters/ClusterProxy/executeQuery.cpp +++ b/src/Interpreters/ClusterProxy/executeQuery.cpp @@ -582,7 +582,7 @@ void executeQueryWithParallelReplicas( "Local replica is not found in '{}' cluster definition, see 'cluster_for_parallel_replicas' setting", new_cluster->getName()); - // resize the pool but keep local replicas in it (and udpate its index) + // resize the pool but keep local replicas in it (and update its index) chassert(max_replicas_to_use <= pools_to_use.size()); if (local_replica_index >= max_replicas_to_use) { From f49412e967ec549d8a49e53ce3bfba27e13994fc Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Tue, 23 Jul 2024 17:16:40 +0000 Subject: [PATCH 231/844] Change local replica position in `parallel_replicas` cluster --- tests/config/config.d/clusters.xml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/config/config.d/clusters.xml b/tests/config/config.d/clusters.xml index af26565d7e6..ff42b5828e3 100644 --- a/tests/config/config.d/clusters.xml +++ b/tests/config/config.d/clusters.xml @@ -115,10 +115,6 @@ false - - 127.0.0.1 - 9000 - 127.0.0.2 9000 @@ -147,6 +143,10 @@ 127.0.0.8 9000 + + 127.0.0.1 + 9000 + 127.0.0.9 9000 From ec5459a60d0be15e7bc100bd257e2b5c65baf594 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Wed, 24 Jul 2024 09:15:34 +0200 Subject: [PATCH 232/844] Update allocated with resident if no jemalloc --- src/Common/MemoryWorker.cpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/Common/MemoryWorker.cpp b/src/Common/MemoryWorker.cpp index c576772d303..75d0e7c32d8 100644 --- a/src/Common/MemoryWorker.cpp +++ b/src/Common/MemoryWorker.cpp @@ -310,15 +310,19 @@ void MemoryWorker::backgroundThread() ProfileEvents::increment(ProfileEvents::MemoryAllocatorPurge); ProfileEvents::increment(ProfileEvents::MemoryAllocatorPurgeTimeMicroseconds, purge_watch.elapsedMicroseconds()); } +#endif if (unlikely(first_run || total_memory_tracker.get() < 0)) { +#if USE_JEMALLOC if (source != MemoryUsageSource::Jemalloc) epoch_mib.setValue(0); MemoryTracker::updateAllocated(allocated_mib.getValue()); - } +#elif defined(OS_LINUX) + MemoryTracker::updateAllocated(resident); #endif + } ProfileEvents::increment(ProfileEvents::MemoryWorkerRun); ProfileEvents::increment(ProfileEvents::MemoryWorkerRunElapsedMicroseconds, total_watch.elapsedMicroseconds()); From e2e4c8ee0f8fcf1d6ef4d566c2fa4f9ee2123a56 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Wed, 24 Jul 2024 10:21:09 +0200 Subject: [PATCH 233/844] Better --- src/Common/MemoryWorker.cpp | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/src/Common/MemoryWorker.cpp b/src/Common/MemoryWorker.cpp index 84ccffb8e90..e148f7f8f49 100644 --- a/src/Common/MemoryWorker.cpp +++ b/src/Common/MemoryWorker.cpp @@ -65,11 +65,25 @@ Metrics readAllMetricsFromStatFile(ReadBufferFromFile & buf) return metrics; } -uint64_t readMetricFromStatFile(ReadBufferFromFile & buf, const std::string & key) +uint64_t readMetricFromStatFile(ReadBufferFromFile & buf, std::string_view key) { - const auto all_metrics = readAllMetricsFromStatFile(buf); - if (const auto it = all_metrics.find(key); it != all_metrics.end()) - return it->second; + while (!buf.eof()) + { + std::string current_key; + readStringUntilWhitespace(current_key, buf); + if (current_key != key) + { + std::string dummy; + readStringUntilNewlineInto(dummy, buf); + buf.ignore(); + continue; + } + + assertChar(' ', buf); + uint64_t value = 0; + readIntText(value, buf); + return value; + } LOG_ERROR(getLogger("CgroupsReader"), "Cannot find '{}' in '{}'", key, buf.getFileName()); return 0; } From 4712b79960d9da21741aa504cd508a5902e569fe Mon Sep 17 00:00:00 2001 From: JackyWoo Date: Wed, 24 Jul 2024 16:41:37 +0800 Subject: [PATCH 234/844] Add min_max as statistics type --- src/Storages/Statistics/Statistics.cpp | 8 +- src/Storages/Statistics/StatisticsMinMax.cpp | 91 +++++++++++++++++++ src/Storages/Statistics/StatisticsMinMax.h | 31 +++++++ src/Storages/StatisticsDescription.cpp | 8 +- src/Storages/StatisticsDescription.h | 1 + ...2864_statistics_count_min_sketch.reference | 14 --- .../02864_statistics_estimation.reference | 25 +++++ ...ch.sql => 02864_statistics_estimation.sql} | 41 ++++++++- 8 files changed, 198 insertions(+), 21 deletions(-) create mode 100644 src/Storages/Statistics/StatisticsMinMax.cpp create mode 100644 src/Storages/Statistics/StatisticsMinMax.h delete mode 100644 tests/queries/0_stateless/02864_statistics_count_min_sketch.reference create mode 100644 tests/queries/0_stateless/02864_statistics_estimation.reference rename tests/queries/0_stateless/{02864_statistics_count_min_sketch.sql => 02864_statistics_estimation.sql} (57%) diff --git a/src/Storages/Statistics/Statistics.cpp b/src/Storages/Statistics/Statistics.cpp index ade3326288a..5227f3d235e 100644 --- a/src/Storages/Statistics/Statistics.cpp +++ b/src/Storages/Statistics/Statistics.cpp @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include @@ -103,6 +104,8 @@ Float64 ColumnStatistics::estimateLess(const Field & val) const { if (stats.contains(StatisticsType::TDigest)) return stats.at(StatisticsType::TDigest)->estimateLess(val); + if (stats.contains(StatisticsType::MinMax)) + return stats.at(StatisticsType::MinMax)->estimateLess(val); return rows * ConditionSelectivityEstimator::default_normal_cond_factor; } @@ -204,6 +207,9 @@ void MergeTreeStatisticsFactory::registerValidator(StatisticsType stats_type, Va MergeTreeStatisticsFactory::MergeTreeStatisticsFactory() { + registerValidator(StatisticsType::MinMax, minMaxValidator); + registerCreator(StatisticsType::MinMax, minMaxCreator); + registerValidator(StatisticsType::TDigest, tdigestValidator); registerCreator(StatisticsType::TDigest, tdigestCreator); @@ -240,7 +246,7 @@ ColumnStatisticsPtr MergeTreeStatisticsFactory::get(const ColumnStatisticsDescri { auto it = creators.find(type); if (it == creators.end()) - throw Exception(ErrorCodes::INCORRECT_QUERY, "Unknown statistic type '{}'. Available types: 'tdigest' 'uniq' and 'count_min'", type); + throw Exception(ErrorCodes::INCORRECT_QUERY, "Unknown statistic type '{}'. Available types: 'min_max', 'tdigest' 'uniq' and 'count_min'", type); auto stat_ptr = (it->second)(desc, stats.data_type); column_stat->stats[type] = stat_ptr; } diff --git a/src/Storages/Statistics/StatisticsMinMax.cpp b/src/Storages/Statistics/StatisticsMinMax.cpp new file mode 100644 index 00000000000..8138ea9fa87 --- /dev/null +++ b/src/Storages/Statistics/StatisticsMinMax.cpp @@ -0,0 +1,91 @@ +#include +#include +#include +#include +#include +#include +#include + +#include + + +namespace DB +{ + +namespace ErrorCodes +{ +extern const int ILLEGAL_STATISTICS; +} + +StatisticsMinMax::StatisticsMinMax(const SingleStatisticsDescription & stat_, const DataTypePtr & data_type_) + : IStatistics(stat_) + , min(std::numeric_limits::max()) + , max(std::numeric_limits::min()) + , row_count(0) + , data_type(data_type_) +{ +} + +Float64 StatisticsMinMax::estimateLess(const Field & val) const +{ + Field val_converted = convertFieldToType(val, *data_type); + if (val_converted.isNull()) + return 0; + + auto val_float = applyVisitor(FieldVisitorConvertToNumber(), val_converted); + + if (val_float < min) + return 0; + + if (val_float > max) + return row_count; + + if (max == min) + return row_count; + + return ((val_float - min) / (max - min)) * row_count; +} + +void StatisticsMinMax::update(const ColumnPtr & column) +{ + for (size_t row = 0; row < column->size(); ++row) + { + if (column->isNullAt(row)) + continue; + + auto data = column->getFloat64(row); + min = std::min(data, min); + max = std::max(data, max); + } + row_count += column->size(); +} + +void StatisticsMinMax::serialize(WriteBuffer & buf) +{ + writeIntBinary(row_count, buf); + writeFloatBinary(min, buf); + writeFloatBinary(max, buf); +} + +void StatisticsMinMax::deserialize(ReadBuffer & buf) +{ + readIntBinary(row_count, buf); + readFloatBinary(min, buf); + readFloatBinary(max, buf); +} + + +void minMaxValidator(const SingleStatisticsDescription &, DataTypePtr data_type) +{ + data_type = removeNullable(data_type); + data_type = removeLowCardinalityAndNullable(data_type); + if (!data_type->isValueRepresentedByNumber()) + throw Exception(ErrorCodes::ILLEGAL_STATISTICS, "Statistics of type 'min_max' do not support type {}", data_type->getName()); +} + +StatisticsPtr minMaxCreator(const SingleStatisticsDescription & stat, DataTypePtr data_type) +{ + return std::make_shared(stat, data_type); +} + +} diff --git a/src/Storages/Statistics/StatisticsMinMax.h b/src/Storages/Statistics/StatisticsMinMax.h new file mode 100644 index 00000000000..913dcccb798 --- /dev/null +++ b/src/Storages/Statistics/StatisticsMinMax.h @@ -0,0 +1,31 @@ +#include + + +namespace DB +{ + +class StatisticsMinMax : public IStatistics +{ +public: + StatisticsMinMax(const SingleStatisticsDescription & stat_, const DataTypePtr & data_type_); + ~StatisticsMinMax() override = default; + + Float64 estimateLess(const Field & val) const override; + + void update(const ColumnPtr & column) override; + + void serialize(WriteBuffer & buf) override; + void deserialize(ReadBuffer & buf) override; + +private: + Float64 min; + Float64 max; + Float64 row_count; + + DataTypePtr data_type; +}; + +void minMaxValidator(const SingleStatisticsDescription &, DataTypePtr data_type); +StatisticsPtr minMaxCreator(const SingleStatisticsDescription & stat, DataTypePtr data_type); + +} diff --git a/src/Storages/StatisticsDescription.cpp b/src/Storages/StatisticsDescription.cpp index 9c5fd3604b2..0d99c36259f 100644 --- a/src/Storages/StatisticsDescription.cpp +++ b/src/Storages/StatisticsDescription.cpp @@ -51,7 +51,9 @@ static StatisticsType stringToStatisticsType(String type) return StatisticsType::Uniq; if (type == "count_min") return StatisticsType::CountMinSketch; - throw Exception(ErrorCodes::INCORRECT_QUERY, "Unknown statistics type: {}. Supported statistics types are 'tdigest', 'uniq' and 'count_min'.", type); + if (type == "min_max") + return StatisticsType::MinMax; + throw Exception(ErrorCodes::INCORRECT_QUERY, "Unknown statistics type: {}. Supported statistics types are 'min_max', 'tdigest', 'uniq' and 'count_min'.", type); } String SingleStatisticsDescription::getTypeName() const @@ -64,8 +66,10 @@ String SingleStatisticsDescription::getTypeName() const return "Uniq"; case StatisticsType::CountMinSketch: return "count_min"; + case StatisticsType::MinMax: + return "min_max"; default: - throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown statistics type: {}. Supported statistics types are 'tdigest', 'uniq' and 'count_min'.", type); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown statistics type: {}. Supported statistics types are 'min_max', 'tdigest', 'uniq' and 'count_min'.", type); } } diff --git a/src/Storages/StatisticsDescription.h b/src/Storages/StatisticsDescription.h index 03b8fb0d583..1aa51ed03dd 100644 --- a/src/Storages/StatisticsDescription.h +++ b/src/Storages/StatisticsDescription.h @@ -14,6 +14,7 @@ enum class StatisticsType : UInt8 TDigest = 0, Uniq = 1, CountMinSketch = 2, + MinMax = 3, Max = 63, }; diff --git a/tests/queries/0_stateless/02864_statistics_count_min_sketch.reference b/tests/queries/0_stateless/02864_statistics_count_min_sketch.reference deleted file mode 100644 index 02c41656a36..00000000000 --- a/tests/queries/0_stateless/02864_statistics_count_min_sketch.reference +++ /dev/null @@ -1,14 +0,0 @@ -CREATE TABLE default.tab\n(\n `a` String,\n `b` UInt64,\n `c` Int64,\n `pk` String\n)\nENGINE = MergeTree\nORDER BY pk\nSETTINGS min_bytes_for_wide_part = 0, index_granularity = 8192 -Test statistics count_min: - Prewhere info - Prewhere filter - Prewhere filter column: and(equals(a, \'0\'), equals(b, 0), equals(c, 0)) (removed) -Test statistics multi-types: - Prewhere info - Prewhere filter - Prewhere filter column: and(equals(a, \'0\'), less(c, -90), greater(b, 900)) (removed) - Prewhere info - Prewhere filter - Prewhere filter column: and(equals(a, \'10000\'), equals(b, 0), less(c, 0)) (removed) -Test LowCardinality and Nullable data type: -tab2 diff --git a/tests/queries/0_stateless/02864_statistics_estimation.reference b/tests/queries/0_stateless/02864_statistics_estimation.reference new file mode 100644 index 00000000000..a1db6b8dc96 --- /dev/null +++ b/tests/queries/0_stateless/02864_statistics_estimation.reference @@ -0,0 +1,25 @@ +CREATE TABLE default.tab\n(\n `a` String,\n `b` UInt64,\n `c` Int64,\n `d` DateTime64(3),\n `pk` String\n)\nENGINE = MergeTree\nORDER BY pk\nSETTINGS min_bytes_for_wide_part = 0, index_granularity = 8192 +Test statistics min_max: + Prewhere info + Prewhere filter + Prewhere filter column: and(greater(d, _CAST(9998_DateTime(\'UTC\'), \'DateTime(\'UTC\')\')), less(c, -1), greater(b, 0)) (removed) +Test statistics count_min: + Prewhere info + Prewhere filter + Prewhere filter column: and(equals(a, \'0\'), equals(b, 0), equals(c, 0)) (removed) +Test statistics multi-types: + Prewhere info + Prewhere filter + Prewhere filter column: and(equals(a, \'0\'), less(c, -90), greater(b, 900)) (removed) + Prewhere info + Prewhere filter + Prewhere filter column: and(equals(a, \'10000\'), equals(b, 0), less(c, 0)) (removed) +Test statistics min_max and tdigest: + Prewhere info + Prewhere filter + Prewhere filter column: and(less(b, 10), less(c, 0)) (removed) + Prewhere info + Prewhere filter + Prewhere filter column: and(less(b, 10), less(c, 0)) (removed) +Test LowCardinality and Nullable data type: +tab2 diff --git a/tests/queries/0_stateless/02864_statistics_count_min_sketch.sql b/tests/queries/0_stateless/02864_statistics_estimation.sql similarity index 57% rename from tests/queries/0_stateless/02864_statistics_count_min_sketch.sql rename to tests/queries/0_stateless/02864_statistics_estimation.sql index c730aa7b4a7..f3d085b837b 100644 --- a/tests/queries/0_stateless/02864_statistics_count_min_sketch.sql +++ b/tests/queries/0_stateless/02864_statistics_estimation.sql @@ -12,13 +12,28 @@ CREATE TABLE tab a String, b UInt64, c Int64, + d DateTime64, pk String, ) Engine = MergeTree() ORDER BY pk SETTINGS min_bytes_for_wide_part = 0; SHOW CREATE TABLE tab; -INSERT INTO tab select toString(number % 10000), number % 1000, -(number % 100), generateUUIDv4() FROM system.numbers LIMIT 10000; +INSERT INTO tab select toString(number % 10000), number % 1000, -(number % 100), toDateTime(number, 'UTC'), generateUUIDv4() FROM system.numbers LIMIT 10000; + +SELECT 'Test statistics min_max:'; + +ALTER TABLE tab ADD STATISTICS b TYPE min_max; +ALTER TABLE tab ADD STATISTICS c TYPE min_max; +ALTER TABLE tab ADD STATISTICS d TYPE min_max; +ALTER TABLE tab MATERIALIZE STATISTICS b, c, d; + +SELECT replaceRegexpAll(explain, '__table1.|_UInt8|_Int8|_UInt16|_String', '') +FROM (EXPLAIN actions=1 SELECT count(*) FROM tab WHERE b > 0/*10000*/ and c < -1/*9990*/ and d > toDateTime(9998, 'UTC')/*1*/) +WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; + +ALTER TABLE tab DROP STATISTICS b, c, d; + SELECT 'Test statistics count_min:'; @@ -28,7 +43,7 @@ ALTER TABLE tab ADD STATISTICS c TYPE count_min; ALTER TABLE tab MATERIALIZE STATISTICS a, b, c; SELECT replaceRegexpAll(explain, '__table1.|_UInt8|_Int8|_UInt16|_String', '') -FROM (EXPLAIN actions=1 SELECT count(*) FROM tab WHERE c = 0/*100*/ and b = 0/*10*/ and a = '0'/*1*/) xx +FROM (EXPLAIN actions=1 SELECT count(*) FROM tab WHERE c = 0/*100*/ and b = 0/*10*/ and a = '0'/*1*/) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; ALTER TABLE tab DROP STATISTICS a, b, c; @@ -39,7 +54,8 @@ SELECT 'Test statistics multi-types:'; ALTER TABLE tab ADD STATISTICS a TYPE count_min; ALTER TABLE tab ADD STATISTICS b TYPE count_min, uniq, tdigest; ALTER TABLE tab ADD STATISTICS c TYPE count_min, uniq, tdigest; -ALTER TABLE tab MATERIALIZE STATISTICS a, b, c; +ALTER TABLE tab ADD STATISTICS d TYPE count_min, uniq, tdigest; +ALTER TABLE tab MATERIALIZE STATISTICS a, b, c, d; SELECT replaceRegexpAll(explain, '__table1.|_UInt8|_Int8|_UInt16|_String', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM tab WHERE c < -90/*900*/ and b > 900/*990*/ and a = '0'/*1*/) @@ -49,7 +65,24 @@ SELECT replaceRegexpAll(explain, '__table1.|_UInt8|_Int8|_UInt16|_String', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM tab WHERE c < 0/*9900*/ and b = 0/*10*/ and a = '10000'/*0*/) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; -ALTER TABLE tab DROP STATISTICS a, b, c; +ALTER TABLE tab DROP STATISTICS a, b, c, d; + + +SELECT 'Test statistics min_max and tdigest:'; + +ALTER TABLE tab ADD STATISTICS b TYPE min_max; +ALTER TABLE tab MATERIALIZE STATISTICS b; +SELECT replaceRegexpAll(explain, '__table1.|_UInt8|_Int8|_UInt16|_String', '') +FROM (EXPLAIN actions=1 SELECT count(*) FROM tab WHERE c < 0/*5000*/ and b < 10/*100*/) +WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; + +ALTER TABLE tab ADD STATISTICS b TYPE tdigest; +ALTER TABLE tab MATERIALIZE STATISTICS b; +SELECT replaceRegexpAll(explain, '__table1.|_UInt8|_Int8|_UInt16|_String', '') +FROM (EXPLAIN actions=1 SELECT count(*) FROM tab WHERE c < 0/*5000*/ and b < 10/*100*/) +WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; +ALTER TABLE tab DROP STATISTICS b; + DROP TABLE IF EXISTS tab SYNC; From 40fc0ca5743a6b1dcc464268a107ba206ca375ab Mon Sep 17 00:00:00 2001 From: JackyWoo Date: Wed, 24 Jul 2024 17:43:27 +0800 Subject: [PATCH 235/844] Uniq statistics supports more datatypes and refactor logical when there is no statistics. --- .../ConditionSelectivityEstimator.cpp | 41 ++++++------------ .../ConditionSelectivityEstimator.h | 10 ++--- src/Storages/Statistics/Statistics.cpp | 26 ++++++------ src/Storages/Statistics/Statistics.h | 11 +++-- src/Storages/Statistics/StatisticsMinMax.cpp | 8 ++-- src/Storages/Statistics/StatisticsTDigest.cpp | 42 ++++++++++--------- src/Storages/Statistics/StatisticsTDigest.h | 9 ++-- src/Storages/Statistics/StatisticsUniq.cpp | 2 +- src/Storages/Statistics/StatisticsUniq.h | 4 +- .../02864_statistics_estimation.reference | 9 +++- .../02864_statistics_estimation.sql | 21 ++++++++-- 11 files changed, 94 insertions(+), 89 deletions(-) diff --git a/src/Storages/Statistics/ConditionSelectivityEstimator.cpp b/src/Storages/Statistics/ConditionSelectivityEstimator.cpp index 57dff958b9a..86970a3ac46 100644 --- a/src/Storages/Statistics/ConditionSelectivityEstimator.cpp +++ b/src/Storages/Statistics/ConditionSelectivityEstimator.cpp @@ -19,7 +19,7 @@ void ConditionSelectivityEstimator::ColumnSelectivityEstimator::merge(String par Float64 ConditionSelectivityEstimator::ColumnSelectivityEstimator::estimateLess(const Field & val, Float64 rows) const { if (part_statistics.empty()) - return default_normal_cond_factor * rows; + return default_cond_range_factor * rows; Float64 result = 0; Float64 part_rows = 0; for (const auto & [key, estimator] : part_statistics) @@ -38,15 +38,8 @@ Float64 ConditionSelectivityEstimator::ColumnSelectivityEstimator::estimateGreat Float64 ConditionSelectivityEstimator::ColumnSelectivityEstimator::estimateEqual(const Field & val, Float64 rows) const { if (part_statistics.empty()) - { - auto float_val = StatisticsUtils::tryConvertToFloat64(val); - if (!float_val) - return default_unknown_cond_factor * rows; - else if (float_val.value() < - threshold || float_val.value() > threshold) - return default_normal_cond_factor * rows; - else - return default_good_cond_factor * rows; - } + return default_cond_equal_factor * rows; + Float64 result = 0; Float64 partial_cnt = 0; for (const auto & [key, estimator] : part_statistics) @@ -149,30 +142,22 @@ Float64 ConditionSelectivityEstimator::estimateRowCount(const RPNBuilderTreeNode auto [op, val] = extractBinaryOp(node, col); + if (dummy) + { + if (op == "equals") + return default_cond_equal_factor * total_rows; + else if (op == "less" || op == "lessOrEquals" || op == "greater" || op == "greaterOrEquals") + return default_cond_range_factor * total_rows; + else + return default_unknown_cond_factor * total_rows; + } + if (op == "equals") - { - if (dummy) - { - auto float_val = StatisticsUtils::tryConvertToFloat64(val); - if (!float_val || (float_val < - threshold || float_val > threshold)) - return default_normal_cond_factor * total_rows; - else - return default_good_cond_factor * total_rows; - } return estimator.estimateEqual(val, total_rows); - } else if (op == "less" || op == "lessOrEquals") - { - if (dummy) - return default_normal_cond_factor * total_rows; return estimator.estimateLess(val, total_rows); - } else if (op == "greater" || op == "greaterOrEquals") - { - if (dummy) - return default_normal_cond_factor * total_rows; return estimator.estimateGreater(val, total_rows); - } else return default_unknown_cond_factor * total_rows; } diff --git a/src/Storages/Statistics/ConditionSelectivityEstimator.h b/src/Storages/Statistics/ConditionSelectivityEstimator.h index ce7fdd12e92..269ee9ac6cb 100644 --- a/src/Storages/Statistics/ConditionSelectivityEstimator.h +++ b/src/Storages/Statistics/ConditionSelectivityEstimator.h @@ -38,12 +38,10 @@ private: std::pair extractBinaryOp(const RPNBuilderTreeNode & node, const String & column_name) const; - static constexpr auto default_good_cond_factor = 0.1; - static constexpr auto default_normal_cond_factor = 0.5; - static constexpr auto default_unknown_cond_factor = 1.0; - /// Conditions like "x = N" are considered good if abs(N) > threshold. - /// This is used to assume that condition is likely to have good selectivity. - static constexpr auto threshold = 2; + /// Used to estimate the selectivity of a condition when there is no statistics. + static constexpr auto default_cond_range_factor = 0.5; + static constexpr auto default_cond_equal_factor = 0.01; + static constexpr auto default_unknown_cond_factor = 1; UInt64 total_rows = 0; std::map column_estimators; diff --git a/src/Storages/Statistics/Statistics.cpp b/src/Storages/Statistics/Statistics.cpp index 5227f3d235e..3513638ea9e 100644 --- a/src/Storages/Statistics/Statistics.cpp +++ b/src/Storages/Statistics/Statistics.cpp @@ -51,13 +51,6 @@ std::optional StatisticsUtils::tryConvertToFloat64(const Field & field) } } -std::optional StatisticsUtils::tryConvertToString(const DB::Field & field) -{ - if (field.getType() == Field::Types::String) - return field.get(); - return {}; -} - IStatistics::IStatistics(const SingleStatisticsDescription & stat_) : stat(stat_) { @@ -106,7 +99,7 @@ Float64 ColumnStatistics::estimateLess(const Field & val) const return stats.at(StatisticsType::TDigest)->estimateLess(val); if (stats.contains(StatisticsType::MinMax)) return stats.at(StatisticsType::MinMax)->estimateLess(val); - return rows * ConditionSelectivityEstimator::default_normal_cond_factor; + return rows * ConditionSelectivityEstimator::default_cond_range_factor; } Float64 ColumnStatistics::estimateGreater(const Field & val) const @@ -116,8 +109,7 @@ Float64 ColumnStatistics::estimateGreater(const Field & val) const Float64 ColumnStatistics::estimateEqual(const Field & val) const { - auto float_val = StatisticsUtils::tryConvertToFloat64(val); - if (float_val.has_value() && stats.contains(StatisticsType::Uniq) && stats.contains(StatisticsType::TDigest)) + if (stats_desc.data_type->isValueRepresentedByNumber() && stats.contains(StatisticsType::Uniq) && stats.contains(StatisticsType::TDigest)) { /// 2048 is the default number of buckets in TDigest. In this case, TDigest stores exactly one value (with many rows) for every bucket. if (stats.at(StatisticsType::Uniq)->estimateCardinality() < 2048) @@ -127,10 +119,16 @@ Float64 ColumnStatistics::estimateEqual(const Field & val) const if (stats.contains(StatisticsType::CountMinSketch)) return stats.at(StatisticsType::CountMinSketch)->estimateEqual(val); #endif - if (!float_val.has_value() && (float_val < - ConditionSelectivityEstimator::threshold || float_val > ConditionSelectivityEstimator::threshold)) - return rows * ConditionSelectivityEstimator::default_normal_cond_factor; - else - return rows * ConditionSelectivityEstimator::default_good_cond_factor; + if (stats.contains(StatisticsType::Uniq)) + { + auto cardinality = stats.at(StatisticsType::Uniq)->estimateCardinality(); + if (cardinality == 0) + return 0; + /// Assume that the value is uniformly distributed among the unique values. + return static_cast(1) / stats.at(StatisticsType::Uniq)->estimateCardinality(); + } + + return rows * ConditionSelectivityEstimator::default_cond_equal_factor; } /// ------------------------------------- diff --git a/src/Storages/Statistics/Statistics.h b/src/Storages/Statistics/Statistics.h index 16f0c67eabd..f47a707663c 100644 --- a/src/Storages/Statistics/Statistics.h +++ b/src/Storages/Statistics/Statistics.h @@ -19,7 +19,6 @@ struct StatisticsUtils { /// Returns std::nullopt if input Field cannot be converted to a concrete value static std::optional tryConvertToFloat64(const Field & field); - static std::optional tryConvertToString(const Field & field); }; /// Statistics describe properties of the values in the column, @@ -32,11 +31,6 @@ public: explicit IStatistics(const SingleStatisticsDescription & stat_); virtual ~IStatistics() = default; - virtual void update(const ColumnPtr & column) = 0; - - virtual void serialize(WriteBuffer & buf) = 0; - virtual void deserialize(ReadBuffer & buf) = 0; - /// Estimate the cardinality of the column. /// Throws if the statistics object is not able to do a meaningful estimation. virtual UInt64 estimateCardinality() const; @@ -46,6 +40,11 @@ public: virtual Float64 estimateEqual(const Field & val) const; /// cardinality of val in the column virtual Float64 estimateLess(const Field & val) const; /// summarized cardinality of values < val in the column + virtual void update(const ColumnPtr & column) = 0; + + virtual void serialize(WriteBuffer & buf) = 0; + virtual void deserialize(ReadBuffer & buf) = 0; + protected: SingleStatisticsDescription stat; }; diff --git a/src/Storages/Statistics/StatisticsMinMax.cpp b/src/Storages/Statistics/StatisticsMinMax.cpp index 8138ea9fa87..083e8634841 100644 --- a/src/Storages/Statistics/StatisticsMinMax.cpp +++ b/src/Storages/Statistics/StatisticsMinMax.cpp @@ -32,18 +32,18 @@ Float64 StatisticsMinMax::estimateLess(const Field & val) const if (val_converted.isNull()) return 0; - auto val_float = applyVisitor(FieldVisitorConvertToNumber(), val_converted); + auto val_as_float = applyVisitor(FieldVisitorConvertToNumber(), val_converted); - if (val_float < min) + if (val_as_float < min) return 0; - if (val_float > max) + if (val_as_float > max) return row_count; if (max == min) return row_count; - return ((val_float - min) / (max - min)) * row_count; + return ((val_as_float - min) / (max - min)) * row_count; } void StatisticsMinMax::update(const ColumnPtr & column) diff --git a/src/Storages/Statistics/StatisticsTDigest.cpp b/src/Storages/Statistics/StatisticsTDigest.cpp index 66150e00fdb..3544f5cdea3 100644 --- a/src/Storages/Statistics/StatisticsTDigest.cpp +++ b/src/Storages/Statistics/StatisticsTDigest.cpp @@ -1,6 +1,8 @@ #include #include #include +#include +#include namespace DB { @@ -10,24 +12,20 @@ extern const int ILLEGAL_STATISTICS; extern const int LOGICAL_ERROR; } -StatisticsTDigest::StatisticsTDigest(const SingleStatisticsDescription & stat_) - : IStatistics(stat_) +StatisticsTDigest::StatisticsTDigest(const SingleStatisticsDescription & stat_, DataTypePtr data_type_) + : IStatistics(stat_), data_type(data_type_) { } void StatisticsTDigest::update(const ColumnPtr & column) { - size_t rows = column->size(); - for (size_t row = 0; row < rows; ++row) + for (size_t row = 0; row < column->size(); ++row) { - Field field; - column->get(row, field); - - if (field.isNull()) + if (column->isNullAt(row)) continue; - if (auto field_as_float = StatisticsUtils::tryConvertToFloat64(field)) - t_digest.add(*field_as_float, 1); + auto data = column->getFloat64(row); + t_digest.add(data, 1); } } @@ -43,18 +41,22 @@ void StatisticsTDigest::deserialize(ReadBuffer & buf) Float64 StatisticsTDigest::estimateLess(const Field & val) const { - auto val_as_float = StatisticsUtils::tryConvertToFloat64(val); - if (val_as_float) - return t_digest.getCountLessThan(*val_as_float); - throw Exception(ErrorCodes::LOGICAL_ERROR, "Statistics 'tdigest' does not support estimating value of type {}", val.getTypeName()); + Field val_converted = convertFieldToType(val, *data_type); + if (val_converted.isNull()) + return 0; + + auto val_as_float = applyVisitor(FieldVisitorConvertToNumber(), val_converted); + return t_digest.getCountLessThan(val_as_float); } Float64 StatisticsTDigest::estimateEqual(const Field & val) const { - auto val_as_float = StatisticsUtils::tryConvertToFloat64(val); - if (val_as_float) - return t_digest.getCountEqual(*val_as_float); - throw Exception(ErrorCodes::LOGICAL_ERROR, "Statistics 'tdigest' does not support estimating value of type {}", val.getTypeName()); + Field val_converted = convertFieldToType(val, *data_type); + if (val_converted.isNull()) + return 0; + + auto val_as_float = applyVisitor(FieldVisitorConvertToNumber(), val_converted); + return t_digest.getCountEqual(val_as_float); } void tdigestValidator(const SingleStatisticsDescription &, DataTypePtr data_type) @@ -65,9 +67,9 @@ void tdigestValidator(const SingleStatisticsDescription &, DataTypePtr data_type throw Exception(ErrorCodes::ILLEGAL_STATISTICS, "Statistics of type 'tdigest' do not support type {}", data_type->getName()); } -StatisticsPtr tdigestCreator(const SingleStatisticsDescription & stat, DataTypePtr) +StatisticsPtr tdigestCreator(const SingleStatisticsDescription & stat, DataTypePtr data_type) { - return std::make_shared(stat); + return std::make_shared(stat, data_type); } } diff --git a/src/Storages/Statistics/StatisticsTDigest.h b/src/Storages/Statistics/StatisticsTDigest.h index 614973e5d8b..f4de6cc41e0 100644 --- a/src/Storages/Statistics/StatisticsTDigest.h +++ b/src/Storages/Statistics/StatisticsTDigest.h @@ -9,18 +9,19 @@ namespace DB class StatisticsTDigest : public IStatistics { public: - explicit StatisticsTDigest(const SingleStatisticsDescription & stat_); + explicit StatisticsTDigest(const SingleStatisticsDescription & stat_, DataTypePtr data_type_); + + Float64 estimateLess(const Field & val) const override; + Float64 estimateEqual(const Field & val) const override; void update(const ColumnPtr & column) override; void serialize(WriteBuffer & buf) override; void deserialize(ReadBuffer & buf) override; - Float64 estimateLess(const Field & val) const override; - Float64 estimateEqual(const Field & val) const override; - private: QuantileTDigest t_digest; + DataTypePtr data_type; }; void tdigestValidator(const SingleStatisticsDescription &, DataTypePtr data_type); diff --git a/src/Storages/Statistics/StatisticsUniq.cpp b/src/Storages/Statistics/StatisticsUniq.cpp index 8f60ffcf0b5..aace45a14b1 100644 --- a/src/Storages/Statistics/StatisticsUniq.cpp +++ b/src/Storages/Statistics/StatisticsUniq.cpp @@ -56,7 +56,7 @@ void uniqValidator(const SingleStatisticsDescription &, DataTypePtr data_type) { data_type = removeNullable(data_type); data_type = removeLowCardinalityAndNullable(data_type); - if (!data_type->isValueRepresentedByNumber()) + if (!data_type->isValueRepresentedByNumber() && !isStringOrFixedString(data_type)) throw Exception(ErrorCodes::ILLEGAL_STATISTICS, "Statistics of type 'uniq' do not support type {}", data_type->getName()); } diff --git a/src/Storages/Statistics/StatisticsUniq.h b/src/Storages/Statistics/StatisticsUniq.h index faabde8d47c..81162b0a5b9 100644 --- a/src/Storages/Statistics/StatisticsUniq.h +++ b/src/Storages/Statistics/StatisticsUniq.h @@ -13,13 +13,13 @@ public: StatisticsUniq(const SingleStatisticsDescription & stat_, const DataTypePtr & data_type); ~StatisticsUniq() override; + UInt64 estimateCardinality() const override; + void update(const ColumnPtr & column) override; void serialize(WriteBuffer & buf) override; void deserialize(ReadBuffer & buf) override; - UInt64 estimateCardinality() const override; - private: std::unique_ptr arena; AggregateFunctionPtr collector; diff --git a/tests/queries/0_stateless/02864_statistics_estimation.reference b/tests/queries/0_stateless/02864_statistics_estimation.reference index a1db6b8dc96..64f264017d8 100644 --- a/tests/queries/0_stateless/02864_statistics_estimation.reference +++ b/tests/queries/0_stateless/02864_statistics_estimation.reference @@ -14,12 +14,19 @@ Test statistics multi-types: Prewhere info Prewhere filter Prewhere filter column: and(equals(a, \'10000\'), equals(b, 0), less(c, 0)) (removed) -Test statistics min_max and tdigest: +Test estimating range condition: Prewhere info Prewhere filter Prewhere filter column: and(less(b, 10), less(c, 0)) (removed) Prewhere info Prewhere filter Prewhere filter column: and(less(b, 10), less(c, 0)) (removed) +Test estimating equals condition: + Prewhere info + Prewhere filter + Prewhere filter column: and(equals(a, \'0\'), equals(b, 10)) (removed) + Prewhere info + Prewhere filter + Prewhere filter column: and(equals(a, \'0\'), equals(b, 10)) (removed) Test LowCardinality and Nullable data type: tab2 diff --git a/tests/queries/0_stateless/02864_statistics_estimation.sql b/tests/queries/0_stateless/02864_statistics_estimation.sql index f3d085b837b..0ea9eb38e60 100644 --- a/tests/queries/0_stateless/02864_statistics_estimation.sql +++ b/tests/queries/0_stateless/02864_statistics_estimation.sql @@ -68,7 +68,7 @@ WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; ALTER TABLE tab DROP STATISTICS a, b, c, d; -SELECT 'Test statistics min_max and tdigest:'; +SELECT 'Test estimating range condition:'; ALTER TABLE tab ADD STATISTICS b TYPE min_max; ALTER TABLE tab MATERIALIZE STATISTICS b; @@ -84,6 +84,21 @@ WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; ALTER TABLE tab DROP STATISTICS b; +SELECT 'Test estimating equals condition:'; + +ALTER TABLE tab ADD STATISTICS a TYPE uniq; +ALTER TABLE tab MATERIALIZE STATISTICS a; +SELECT replaceRegexpAll(explain, '__table1.|_UInt8|_Int8|_UInt16|_String', '') +FROM (EXPLAIN actions=1 SELECT count(*) FROM tab WHERE b = 10/*100*/ and a = '0'/*1*/) +WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; + +ALTER TABLE tab ADD STATISTICS a TYPE count_min; +ALTER TABLE tab MATERIALIZE STATISTICS a; +SELECT replaceRegexpAll(explain, '__table1.|_UInt8|_Int8|_UInt16|_String', '') +FROM (EXPLAIN actions=1 SELECT count(*) FROM tab WHERE b = 10/*100*/ and a = '0'/*1*/) +WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; +ALTER TABLE tab DROP STATISTICS a; + DROP TABLE IF EXISTS tab SYNC; @@ -93,8 +108,8 @@ SET allow_suspicious_low_cardinality_types=1; CREATE TABLE tab2 ( a LowCardinality(Int64) STATISTICS(count_min), - b Nullable(Int64) STATISTICS(count_min), - c LowCardinality(Nullable(Int64)) STATISTICS(count_min), + b Nullable(Int64) STATISTICS(min_max, count_min), + c LowCardinality(Nullable(Int64)) STATISTICS(min_max, count_min), pk String, ) Engine = MergeTree() ORDER BY pk; From 5ae356e6df457e24220f0f63f4a4984334471202 Mon Sep 17 00:00:00 2001 From: JackyWoo Date: Wed, 24 Jul 2024 17:54:48 +0800 Subject: [PATCH 236/844] Add document for min_max statistics --- docs/en/engines/table-engines/mergetree-family/mergetree.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/docs/en/engines/table-engines/mergetree-family/mergetree.md b/docs/en/engines/table-engines/mergetree-family/mergetree.md index 7ffbd9a5bae..8e8d5e55772 100644 --- a/docs/en/engines/table-engines/mergetree-family/mergetree.md +++ b/docs/en/engines/table-engines/mergetree-family/mergetree.md @@ -991,6 +991,9 @@ They can be used for prewhere optimization only if we enable `set allow_statisti #### Available Types of Column Statistics {#available-types-of-column-statistics} +- `min_max` + + min_max statistics allows to estimate selectivity of range condition for numeric columns. - `TDigest` [TDigest](https://github.com/tdunning/t-digest) sketches which allow to compute approximate percentiles (e.g. the 90th percentile) for numeric columns. From 3f7cd09d168ea13e4ec281c8f3943c0aebe9f920 Mon Sep 17 00:00:00 2001 From: JackyWoo Date: Wed, 24 Jul 2024 18:02:26 +0800 Subject: [PATCH 237/844] Remove useless function tryConvertToFloat64 --- src/Storages/Statistics/Statistics.cpp | 22 ---------------------- src/Storages/Statistics/Statistics.h | 6 ------ 2 files changed, 28 deletions(-) diff --git a/src/Storages/Statistics/Statistics.cpp b/src/Storages/Statistics/Statistics.cpp index 3513638ea9e..0f0f1de4552 100644 --- a/src/Storages/Statistics/Statistics.cpp +++ b/src/Storages/Statistics/Statistics.cpp @@ -28,28 +28,6 @@ enum StatisticsFileVersion : UInt16 V0 = 0, }; -std::optional StatisticsUtils::tryConvertToFloat64(const Field & field) -{ - switch (field.getType()) - { - case Field::Types::Int64: - return field.get(); - case Field::Types::UInt64: - return field.get(); - case Field::Types::Float64: - return field.get(); - case Field::Types::Int128: - return field.get(); - case Field::Types::UInt128: - return field.get(); - case Field::Types::Int256: - return field.get(); - case Field::Types::UInt256: - return field.get(); - default: - return {}; - } -} IStatistics::IStatistics(const SingleStatisticsDescription & stat_) : stat(stat_) diff --git a/src/Storages/Statistics/Statistics.h b/src/Storages/Statistics/Statistics.h index f47a707663c..3eeb5cf92a8 100644 --- a/src/Storages/Statistics/Statistics.h +++ b/src/Storages/Statistics/Statistics.h @@ -15,12 +15,6 @@ constexpr auto STATS_FILE_PREFIX = "statistics_"; constexpr auto STATS_FILE_SUFFIX = ".stats"; -struct StatisticsUtils -{ - /// Returns std::nullopt if input Field cannot be converted to a concrete value - static std::optional tryConvertToFloat64(const Field & field); -}; - /// Statistics describe properties of the values in the column, /// e.g. how many unique values exist, /// what are the N most frequent values, From 118e329e23b51673e7d62bb3ffa8a34a8ecdb330 Mon Sep 17 00:00:00 2001 From: Alexey Korepanov Date: Wed, 24 Jul 2024 13:09:47 +0100 Subject: [PATCH 238/844] Fix flaky test --- ...03174_json_compact_with_progress.reference | 19 ++++++++++--------- .../03174_json_compact_with_progress.sh | 4 ++-- 2 files changed, 12 insertions(+), 11 deletions(-) diff --git a/tests/queries/0_stateless/03174_json_compact_with_progress.reference b/tests/queries/0_stateless/03174_json_compact_with_progress.reference index 6e83db33565..b735c871fbd 100644 --- a/tests/queries/0_stateless/03174_json_compact_with_progress.reference +++ b/tests/queries/0_stateless/03174_json_compact_with_progress.reference @@ -1,14 +1,15 @@ 1 -{"meta": [{"name":"value", "type":"UInt8"}, {"name":"name", "type":"String"}]}} -{"data":[1, "a"]} -{"data":[2, "b"]} -{"data":[3, "c"]} +{"meta": [{"name":"value", "type":"UInt8"}, {"name":"name", "type":"String"}, {"name":"sleep(0.1)", "type":"UInt8"}]}} {"progress":{"read_rows":"3","read_bytes":"33","written_rows":"0","written_bytes":"0","total_rows_to_read":"3","result_rows":"0","result_bytes":"0","elapsed_ns":"ELAPSED_NS"}} +{"data":[1, "a", 0]} +{"data":[2, "b", 0]} +{"data":[3, "c", 0]} {"statistics": {"rows":3, "elapsed":ELAPSED, "rows_read":3, "bytes_read":33}} 2 -{"meta": [{"name":"name", "type":"String"}, {"name":"c", "type":"UInt64"}]}} -{"data":["a", "1"]} -{"data":["b", "1"]} -{"data":["c", "1"]} -{"totals": ["", "3"]}} +{"meta": [{"name":"name", "type":"String"}, {"name":"count()", "type":"UInt64"}, {"name":"c", "type":"UInt8"}]}} +{"progress":{"read_rows":"3","read_bytes":"30","written_rows":"0","written_bytes":"0","total_rows_to_read":"3","result_rows":"0","result_bytes":"0","elapsed_ns":"ELAPSED_NS"}} +{"data":["a", "1", 0]} +{"data":["b", "1", 0]} +{"data":["c", "1", 0]} +{"totals": ["", "3", 0]}} {"statistics": {"rows":3, "elapsed":ELAPSED, "rows_read":3, "bytes_read":30}} diff --git a/tests/queries/0_stateless/03174_json_compact_with_progress.sh b/tests/queries/0_stateless/03174_json_compact_with_progress.sh index c21f7228517..383668de858 100755 --- a/tests/queries/0_stateless/03174_json_compact_with_progress.sh +++ b/tests/queries/0_stateless/03174_json_compact_with_progress.sh @@ -11,10 +11,10 @@ $CLICKHOUSE_CLIENT -q "SELECT 1;" # Check JSONCompactWithProgress Output $CLICKHOUSE_CLIENT -q "CREATE TABLE test_table (value UInt8, name String) ENGINE = MergeTree() ORDER BY value;" $CLICKHOUSE_CLIENT -q "INSERT INTO test_table VALUES (1, 'a'), (2, 'b'), (3, 'c');" -$CLICKHOUSE_CLIENT -q "SELECT * FROM test_table FORMAT JSONCompactWithProgress settings max_block_size=2;" | sed -E 's/"elapsed_ns":"[0-9]+"/"elapsed_ns":"ELAPSED_NS"/g; s/"elapsed":[0-9]+\.[0-9]+/"elapsed":ELAPSED/g' +$CLICKHOUSE_CLIENT -q "SELECT *, sleep(0.1) FROM test_table FORMAT JSONCompactWithProgress settings max_block_size=2;" | sed -E 's/"elapsed_ns":"[0-9]+"/"elapsed_ns":"ELAPSED_NS"/g; s/"elapsed":[0-9]+\.[0-9]+/"elapsed":ELAPSED/g' $CLICKHOUSE_CLIENT -q "SELECT 2;" # Check Totals -$CLICKHOUSE_CLIENT -q "SELECT name, count() AS c FROM test_table GROUP BY name WITH TOTALS ORDER BY name FORMAT JSONCompactWithProgress settings max_block_size=2;" | sed -E 's/"elapsed_ns":"[0-9]+"/"elapsed_ns":"ELAPSED_NS"/g; s/"elapsed":[0-9]+\.[0-9]+/"elapsed":ELAPSED/g' +$CLICKHOUSE_CLIENT -q "SELECT name, count(), sleep(0.1) AS c FROM test_table GROUP BY name WITH TOTALS ORDER BY name FORMAT JSONCompactWithProgress settings max_block_size=2;" | sed -E 's/"elapsed_ns":"[0-9]+"/"elapsed_ns":"ELAPSED_NS"/g; s/"elapsed":[0-9]+\.[0-9]+/"elapsed":ELAPSED/g' $CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS test_table;" From b0ec8c92fcc09857345acb4e4f627827556d17cd Mon Sep 17 00:00:00 2001 From: Alexey Korepanov Date: Wed, 24 Jul 2024 17:49:33 +0100 Subject: [PATCH 239/844] Fix totals json object --- .../Formats/Impl/JSONCompactWithProgressRowOutputFormat.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Processors/Formats/Impl/JSONCompactWithProgressRowOutputFormat.cpp b/src/Processors/Formats/Impl/JSONCompactWithProgressRowOutputFormat.cpp index ddf071324c6..0814e5f45d6 100644 --- a/src/Processors/Formats/Impl/JSONCompactWithProgressRowOutputFormat.cpp +++ b/src/Processors/Formats/Impl/JSONCompactWithProgressRowOutputFormat.cpp @@ -68,7 +68,7 @@ void JSONCompactWithProgressRowOutputFormat::writeAfterTotals() { JSONUtils::writeCompactArrayEnd(*ostr); JSONUtils::writeCompactObjectEnd(*ostr); - writeCString("}\n", *ostr); + writeCString("\n", *ostr); } void JSONCompactWithProgressRowOutputFormat::writeExtremesElement(const char * title, const Columns & columns, size_t row_num) From caaf14a0a91bd4779cc292dd674761a47148d24d Mon Sep 17 00:00:00 2001 From: Alexey Korepanov Date: Wed, 24 Jul 2024 17:56:14 +0100 Subject: [PATCH 240/844] Fix flaky test by ignoring progress lines when comparing output --- ...03174_json_compact_with_progress.reference | 20 +++++++++---------- .../03174_json_compact_with_progress.sh | 4 ++-- 2 files changed, 11 insertions(+), 13 deletions(-) diff --git a/tests/queries/0_stateless/03174_json_compact_with_progress.reference b/tests/queries/0_stateless/03174_json_compact_with_progress.reference index b735c871fbd..b45f296e5c0 100644 --- a/tests/queries/0_stateless/03174_json_compact_with_progress.reference +++ b/tests/queries/0_stateless/03174_json_compact_with_progress.reference @@ -1,15 +1,13 @@ 1 -{"meta": [{"name":"value", "type":"UInt8"}, {"name":"name", "type":"String"}, {"name":"sleep(0.1)", "type":"UInt8"}]}} -{"progress":{"read_rows":"3","read_bytes":"33","written_rows":"0","written_bytes":"0","total_rows_to_read":"3","result_rows":"0","result_bytes":"0","elapsed_ns":"ELAPSED_NS"}} -{"data":[1, "a", 0]} -{"data":[2, "b", 0]} -{"data":[3, "c", 0]} +{"meta": [{"name":"value", "type":"UInt8"}, {"name":"name", "type":"String"}]}} +{"data":[1, "a"]} +{"data":[2, "b"]} +{"data":[3, "c"]} {"statistics": {"rows":3, "elapsed":ELAPSED, "rows_read":3, "bytes_read":33}} 2 -{"meta": [{"name":"name", "type":"String"}, {"name":"count()", "type":"UInt64"}, {"name":"c", "type":"UInt8"}]}} -{"progress":{"read_rows":"3","read_bytes":"30","written_rows":"0","written_bytes":"0","total_rows_to_read":"3","result_rows":"0","result_bytes":"0","elapsed_ns":"ELAPSED_NS"}} -{"data":["a", "1", 0]} -{"data":["b", "1", 0]} -{"data":["c", "1", 0]} -{"totals": ["", "3", 0]}} +{"meta": [{"name":"name", "type":"String"}, {"name":"c", "type":"UInt64"}]}} +{"data":["a", "1"]} +{"data":["b", "1"]} +{"data":["c", "1"]} +{"totals": ["", "3"]} {"statistics": {"rows":3, "elapsed":ELAPSED, "rows_read":3, "bytes_read":30}} diff --git a/tests/queries/0_stateless/03174_json_compact_with_progress.sh b/tests/queries/0_stateless/03174_json_compact_with_progress.sh index 383668de858..b440dbe2891 100755 --- a/tests/queries/0_stateless/03174_json_compact_with_progress.sh +++ b/tests/queries/0_stateless/03174_json_compact_with_progress.sh @@ -11,10 +11,10 @@ $CLICKHOUSE_CLIENT -q "SELECT 1;" # Check JSONCompactWithProgress Output $CLICKHOUSE_CLIENT -q "CREATE TABLE test_table (value UInt8, name String) ENGINE = MergeTree() ORDER BY value;" $CLICKHOUSE_CLIENT -q "INSERT INTO test_table VALUES (1, 'a'), (2, 'b'), (3, 'c');" -$CLICKHOUSE_CLIENT -q "SELECT *, sleep(0.1) FROM test_table FORMAT JSONCompactWithProgress settings max_block_size=2;" | sed -E 's/"elapsed_ns":"[0-9]+"/"elapsed_ns":"ELAPSED_NS"/g; s/"elapsed":[0-9]+\.[0-9]+/"elapsed":ELAPSED/g' +$CLICKHOUSE_CLIENT -q "SELECT * FROM test_table FORMAT JSONCompactWithProgress settings max_block_size=2;" | grep -v --text "progress" | sed -E 's/"elapsed":[0-9]+\.[0-9]+/"elapsed":ELAPSED/g' $CLICKHOUSE_CLIENT -q "SELECT 2;" # Check Totals -$CLICKHOUSE_CLIENT -q "SELECT name, count(), sleep(0.1) AS c FROM test_table GROUP BY name WITH TOTALS ORDER BY name FORMAT JSONCompactWithProgress settings max_block_size=2;" | sed -E 's/"elapsed_ns":"[0-9]+"/"elapsed_ns":"ELAPSED_NS"/g; s/"elapsed":[0-9]+\.[0-9]+/"elapsed":ELAPSED/g' +$CLICKHOUSE_CLIENT -q "SELECT name, count() AS c FROM test_table GROUP BY name WITH TOTALS ORDER BY name FORMAT JSONCompactWithProgress settings max_block_size=2;" | grep -v --text "progress" | sed -E 's/"elapsed":[0-9]+\.[0-9]+/"elapsed":ELAPSED/g' $CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS test_table;" From f449c2fea0487abbe262c10fc5af9a99df1bc822 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Thu, 25 Jul 2024 08:45:08 +0200 Subject: [PATCH 241/844] Fix --- src/Common/MemoryTracker.h | 5 +++++ src/Common/MemoryWorker.cpp | 8 +++----- src/Coordination/KeeperDispatcher.cpp | 3 ++- src/Coordination/KeeperServer.cpp | 2 +- 4 files changed, 11 insertions(+), 7 deletions(-) diff --git a/src/Common/MemoryTracker.h b/src/Common/MemoryTracker.h index d2db8489f19..f15465a20c1 100644 --- a/src/Common/MemoryTracker.h +++ b/src/Common/MemoryTracker.h @@ -120,6 +120,11 @@ public: return amount.load(std::memory_order_relaxed); } + Int64 getRSS() const + { + return rss.load(std::memory_order_relaxed); + } + // Merges and mutations may pass memory ownership to other threads thus in the end of execution // MemoryTracker for background task may have a non-zero counter. // This method is intended to fix the counter inside of background_memory_tracker. diff --git a/src/Common/MemoryWorker.cpp b/src/Common/MemoryWorker.cpp index e148f7f8f49..1b869ed9d6b 100644 --- a/src/Common/MemoryWorker.cpp +++ b/src/Common/MemoryWorker.cpp @@ -312,7 +312,7 @@ uint64_t MemoryWorker::getMemoryUsage() void MemoryWorker::backgroundThread() { std::chrono::milliseconds chrono_period_ms{period_ms}; - bool first_run = true; + [[maybe_unused]] bool first_run = true; std::unique_lock lock(mutex); while (true) { @@ -340,17 +340,15 @@ void MemoryWorker::backgroundThread() } #endif +#if USE_JEMALLOC if (unlikely(first_run || total_memory_tracker.get() < 0)) { -#if USE_JEMALLOC if (source != MemoryUsageSource::Jemalloc) epoch_mib.setValue(0); MemoryTracker::updateAllocated(allocated_mib.getValue()); -#elif defined(OS_LINUX) - MemoryTracker::updateAllocated(resident); -#endif } +#endif ProfileEvents::increment(ProfileEvents::MemoryWorkerRun); ProfileEvents::increment(ProfileEvents::MemoryWorkerRunElapsedMicroseconds, total_watch.elapsedMicroseconds()); diff --git a/src/Coordination/KeeperDispatcher.cpp b/src/Coordination/KeeperDispatcher.cpp index 4c2ccb8db64..893bb8e6082 100644 --- a/src/Coordination/KeeperDispatcher.cpp +++ b/src/Coordination/KeeperDispatcher.cpp @@ -150,10 +150,11 @@ void KeeperDispatcher::requestThread() { LOG_WARNING( log, - "Processing requests refused because of max_memory_usage_soft_limit {}, the total used memory is {}, request type " + "Processing requests refused because of max_memory_usage_soft_limit {}, the total allocated memory is {}, RSS is {}, request type " "is {}", ReadableSize(mem_soft_limit), ReadableSize(total_memory_tracker.get()), + ReadableSize(total_memory_tracker.getRSS()), request.request->getOpNum()); addErrorResponses({request}, Coordination::Error::ZCONNECTIONLOSS); continue; diff --git a/src/Coordination/KeeperServer.cpp b/src/Coordination/KeeperServer.cpp index d40e5ef2e50..ad9e8d32caa 100644 --- a/src/Coordination/KeeperServer.cpp +++ b/src/Coordination/KeeperServer.cpp @@ -599,7 +599,7 @@ bool KeeperServer::isLeaderAlive() const bool KeeperServer::isExceedingMemorySoftLimit() const { Int64 mem_soft_limit = keeper_context->getKeeperMemorySoftLimit(); - return mem_soft_limit > 0 && total_memory_tracker.get() >= mem_soft_limit; + return mem_soft_limit > 0 && std::max(total_memory_tracker.get(), total_memory_tracker.getRSS()) >= mem_soft_limit; } /// TODO test whether taking failed peer in count From b9703ad9242f7492de6f6d10e13ac89a74231210 Mon Sep 17 00:00:00 2001 From: JackyWoo Date: Thu, 25 Jul 2024 15:14:12 +0800 Subject: [PATCH 242/844] Fix logical error when estimating --- src/Storages/Statistics/Statistics.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/Statistics/Statistics.cpp b/src/Storages/Statistics/Statistics.cpp index 0f0f1de4552..17ff8deb720 100644 --- a/src/Storages/Statistics/Statistics.cpp +++ b/src/Storages/Statistics/Statistics.cpp @@ -103,7 +103,7 @@ Float64 ColumnStatistics::estimateEqual(const Field & val) const if (cardinality == 0) return 0; /// Assume that the value is uniformly distributed among the unique values. - return static_cast(1) / stats.at(StatisticsType::Uniq)->estimateCardinality(); + return static_cast(1) / stats.at(StatisticsType::Uniq)->estimateCardinality() * rows; } return rows * ConditionSelectivityEstimator::default_cond_equal_factor; From f3f9d5f4deb0a72cdb9c3fa9f0987bf137427305 Mon Sep 17 00:00:00 2001 From: Arthur Passos Date: Thu, 25 Jul 2024 09:07:13 -0300 Subject: [PATCH 243/844] add notes about no_password behavior --- .../en/sql-reference/statements/alter/user.md | 21 +++++++++++++------ .../sql-reference/statements/create/user.md | 3 +++ 2 files changed, 18 insertions(+), 6 deletions(-) diff --git a/docs/en/sql-reference/statements/alter/user.md b/docs/en/sql-reference/statements/alter/user.md index e58dc9fbd57..959f98b0db1 100644 --- a/docs/en/sql-reference/statements/alter/user.md +++ b/docs/en/sql-reference/statements/alter/user.md @@ -63,19 +63,28 @@ Allows the user with `john` account to grant his privileges to the user with `ja ALTER USER john GRANTEES jack; ``` -Adds new authentication methods to the user while keeping the existing ones +Adds new authentication methods to the user while keeping the existing ones: ``` sql -ALTER USER user ADD IDENTIFIED WITH plaintext_password by '1', bcrypt_password by '2', plaintext_password by '3' +ALTER USER user1 ADD IDENTIFIED WITH plaintext_password by '1', bcrypt_password by '2', plaintext_password by '3' ``` -Reset authentication methods and adds the ones specified in the query (effect of leading IDENTIFIED without the ADD keyword) +Note: `no_password` can not co-exist with other authentication methods for security reasons. +Because of that, it is not possible to `ADD` a `no_password` authentication method. The below query will throw an error: ``` sql -ALTER USER user IDENTIFIED WITH plaintext_password by '1', bcrypt_password by '2', plaintext_password by '3' +ALTER USER user1 ADD IDENTIFIED WITH no_password ``` -Reset authentication methods and keep the most recent added one +If you want to drop authentication methods for a user and rely on `no_password`, you must specify in the below replacing form. + +Reset authentication methods and adds the ones specified in the query (effect of leading IDENTIFIED without the ADD keyword): + ``` sql -ALTER USER user RESET AUTHENTICATION METHODS TO NEW +ALTER USER user1 IDENTIFIED WITH plaintext_password by '1', bcrypt_password by '2', plaintext_password by '3' +``` + +Reset authentication methods and keep the most recent added one: +``` sql +ALTER USER user1 RESET AUTHENTICATION METHODS TO NEW ``` diff --git a/docs/en/sql-reference/statements/create/user.md b/docs/en/sql-reference/statements/create/user.md index 486d5eb3192..8bfd9f2797b 100644 --- a/docs/en/sql-reference/statements/create/user.md +++ b/docs/en/sql-reference/statements/create/user.md @@ -150,6 +150,9 @@ In ClickHouse Cloud, by default, passwords must meet the following complexity re CREATE USER user1 IDENTIFIED WITH plaintext_password by '1', bcrypt_password by '2', plaintext_password by '3'' ``` +Note: `no_password` can not co-exist with other authentication methods for security reasons. Therefore, you can only specify +`no_password` if it is the only authentication method in the query. + ## User Host User host is a host from which a connection to ClickHouse server could be established. The host can be specified in the `HOST` query section in the following ways: From 8eda32600fde5225017aa5ea87ee2ecad551914e Mon Sep 17 00:00:00 2001 From: Arthur Passos Date: Thu, 25 Jul 2024 09:13:09 -0300 Subject: [PATCH 244/844] remove todo --- src/Parsers/tests/gtest_common.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/Parsers/tests/gtest_common.cpp b/src/Parsers/tests/gtest_common.cpp index 0aded8c98f7..594436a1714 100644 --- a/src/Parsers/tests/gtest_common.cpp +++ b/src/Parsers/tests/gtest_common.cpp @@ -63,7 +63,6 @@ TEST_P(ParserKQLTest, parseKQLQuery) { if (input_text.starts_with("ATTACH")) { - // todo arthur check auto salt = (dynamic_cast(ast.get())->authentication_methods.back())->getSalt().value_or(""); EXPECT_TRUE(re2::RE2::FullMatch(salt, expected_ast)); } From 97ceca4b92289352eb71b6cfd0f5280162334609 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Thu, 25 Jul 2024 12:20:39 +0000 Subject: [PATCH 245/844] fix reading from Merge tables --- .../QueryPlan/Optimizations/projectionsCommon.cpp | 11 +++++++++-- src/Storages/IStorage.h | 1 + src/Storages/MergeTree/MergeTreeData.cpp | 10 ++++++++-- src/Storages/MergeTree/MergeTreeData.h | 2 +- src/Storages/StorageMerge.cpp | 6 ++++-- 5 files changed, 23 insertions(+), 7 deletions(-) diff --git a/src/Processors/QueryPlan/Optimizations/projectionsCommon.cpp b/src/Processors/QueryPlan/Optimizations/projectionsCommon.cpp index 487367b8c30..e7df58e1a86 100644 --- a/src/Processors/QueryPlan/Optimizations/projectionsCommon.cpp +++ b/src/Processors/QueryPlan/Optimizations/projectionsCommon.cpp @@ -41,12 +41,19 @@ bool canUseProjectionForReadingStep(ReadFromMergeTree * reading) if (reading->readsInOrder()) return false; + const auto & query_settings = reading->getContext()->getSettingsRef(); + // Currently projection don't support deduplication when moving parts between shards. - if (reading->getContext()->getSettingsRef().allow_experimental_query_deduplication) + if (query_settings.allow_experimental_query_deduplication) return false; // Currently projection don't support settings which implicitly modify aggregate functions. - if (reading->getContext()->getSettingsRef().aggregate_functions_null_for_empty) + if (query_settings.aggregate_functions_null_for_empty) + return false; + + /// Don't use projections if have mutations to apply + /// because we need to apply them on original data. + if (query_settings.apply_mutations_on_fly && reading->getMutationsSnapshot()->hasDataMutations()) return false; return true; diff --git a/src/Storages/IStorage.h b/src/Storages/IStorage.h index 6217470780d..92de82934e3 100644 --- a/src/Storages/IStorage.h +++ b/src/Storages/IStorage.h @@ -271,6 +271,7 @@ public: /// Return true if the trivial count query could be optimized without reading the data at all /// in totalRows() or totalRowsByPartitionPredicate() methods or with optimized reading in read() method. + /// 'storage_snapshot' may be nullptr. virtual bool supportsTrivialCountOptimization(const StorageSnapshotPtr & /*storage_snapshot*/, ContextPtr /*query_context*/) const { return false; diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index b32a3f5eda3..f1054355b07 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -8444,10 +8444,16 @@ void MergeTreeData::updateObjectColumns(const DataPartPtr & part, const DataPart DB::updateObjectColumns(object_columns, columns, part->getColumns()); } -bool MergeTreeData::supportsTrivialCountOptimization(const StorageSnapshotPtr & storage_snapshot, ContextPtr) const +bool MergeTreeData::supportsTrivialCountOptimization(const StorageSnapshotPtr & storage_snapshot, ContextPtr query_context) const { + if (hasLightweightDeletedMask()) + return false; + + if (!storage_snapshot) + return !query_context->getSettingsRef().apply_mutations_on_fly; + const auto & snapshot_data = assert_cast(*storage_snapshot->data); - return !hasLightweightDeletedMask() && !snapshot_data.mutations_snapshot->hasDataMutations(); + return !snapshot_data.mutations_snapshot->hasDataMutations(); } Int64 MergeTreeData::getMinMetadataVersion(const DataPartsVector & parts) diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h index d5cfddb70af..588b31db7b9 100644 --- a/src/Storages/MergeTree/MergeTreeData.h +++ b/src/Storages/MergeTree/MergeTreeData.h @@ -443,7 +443,7 @@ public: bool areAsynchronousInsertsEnabled() const override; - bool supportsTrivialCountOptimization(const StorageSnapshotPtr & storage_snapshot, ContextPtr) const override; + bool supportsTrivialCountOptimization(const StorageSnapshotPtr & storage_snapshot, ContextPtr query_context) const override; /// A snapshot of pending mutations that weren't applied to some of the parts yet /// and should be applied on the fly (i.e. when reading from the part). diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp index f5bc183931f..0cc369c87fa 100644 --- a/src/Storages/StorageMerge.cpp +++ b/src/Storages/StorageMerge.cpp @@ -1617,9 +1617,11 @@ std::tuple StorageMerge::evaluateDatabaseName(cons return {false, ast}; } -bool StorageMerge::supportsTrivialCountOptimization(const StorageSnapshotPtr & storage_snapshot, ContextPtr ctx) const +bool StorageMerge::supportsTrivialCountOptimization(const StorageSnapshotPtr &, ContextPtr ctx) const { - return getFirstTable([&](const auto & table) { return !table->supportsTrivialCountOptimization(storage_snapshot, ctx); }) == nullptr; + /// Here we actually need storage snapshot of all nested tables. + /// But to avoid complexity pass nullptr to make more lightweight check in MergeTreeData. + return getFirstTable([&](const auto & table) { return !table->supportsTrivialCountOptimization(nullptr, ctx); }) == nullptr; } std::optional StorageMerge::totalRows(const Settings & settings) const From 63ccbcbdfc7aa012e1879b11f9f6f4a23f9a1a0c Mon Sep 17 00:00:00 2001 From: Alexey Korepanov Date: Thu, 25 Jul 2024 13:22:47 +0100 Subject: [PATCH 246/844] Remove test tags --- tests/queries/0_stateless/03174_json_compact_with_progress.sh | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/queries/0_stateless/03174_json_compact_with_progress.sh b/tests/queries/0_stateless/03174_json_compact_with_progress.sh index b440dbe2891..8f524cdff76 100755 --- a/tests/queries/0_stateless/03174_json_compact_with_progress.sh +++ b/tests/queries/0_stateless/03174_json_compact_with_progress.sh @@ -1,5 +1,4 @@ #!/usr/bin/env bash -# Tags: no-fasttest, no-parallel CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh From 0404a8e80048fd9f0e2d1772774533bc828b44dc Mon Sep 17 00:00:00 2001 From: Arthur Passos Date: Thu, 25 Jul 2024 14:14:31 -0300 Subject: [PATCH 247/844] make auth_type a vector of int8_t and auth_params a json array --- src/Storages/System/StorageSystemUsers.cpp | 43 +++++++++++----------- 1 file changed, 22 insertions(+), 21 deletions(-) diff --git a/src/Storages/System/StorageSystemUsers.cpp b/src/Storages/System/StorageSystemUsers.cpp index e36c904ded7..105e4af53ef 100644 --- a/src/Storages/System/StorageSystemUsers.cpp +++ b/src/Storages/System/StorageSystemUsers.cpp @@ -16,6 +16,7 @@ #include #include #include +#include #include #include @@ -48,8 +49,8 @@ ColumnsDescription StorageSystemUsers::getColumnsDescription() {"name", std::make_shared(), "User name."}, {"id", std::make_shared(), "User ID."}, {"storage", std::make_shared(), "Path to the storage of users. Configured in the access_control_path parameter."}, - {"auth_type", std::make_shared(getAuthenticationTypeEnumValues()), - "Shows the authentication type. " + {"auth_type", std::make_shared(std::make_shared(getAuthenticationTypeEnumValues())), + "Shows the authentication types. " "There are multiple ways of user identification: " "with no password, with plain text password, with SHA256-encoded password, " "with double SHA-1-encoded password or with bcrypt-encoded password." @@ -97,7 +98,8 @@ void StorageSystemUsers::fillData(MutableColumns & res_columns, ContextPtr conte auto & column_name = assert_cast(*res_columns[column_index++]); auto & column_id = assert_cast(*res_columns[column_index++]).getData(); auto & column_storage = assert_cast(*res_columns[column_index++]); - auto & column_auth_type = assert_cast(*res_columns[column_index++]).getData(); + auto & column_auth_type = assert_cast(assert_cast(*res_columns[column_index]).getData()); + auto & column_auth_type_offsets = assert_cast(*res_columns[column_index++]).getOffsets(); auto & column_auth_params = assert_cast(*res_columns[column_index++]); auto & column_host_ip = assert_cast(assert_cast(*res_columns[column_index]).getData()); auto & column_host_ip_offsets = assert_cast(*res_columns[column_index++]).getOffsets(); @@ -119,11 +121,10 @@ void StorageSystemUsers::fillData(MutableColumns & res_columns, ContextPtr conte auto & column_grantees_except_offsets = assert_cast(*res_columns[column_index++]).getOffsets(); auto & column_default_database = assert_cast(*res_columns[column_index++]); - // todo arthur check this auto add_row = [&](const String & name, const UUID & id, const String & storage_name, - const AuthenticationData & auth_data, + const std::vector & authentication_methods, const AllowedClientHosts & allowed_hosts, const RolesOrUsersSet & default_roles, const RolesOrUsersSet & grantees, @@ -132,12 +133,13 @@ void StorageSystemUsers::fillData(MutableColumns & res_columns, ContextPtr conte column_name.insertData(name.data(), name.length()); column_id.push_back(id.toUnderType()); column_storage.insertData(storage_name.data(), storage_name.length()); - column_auth_type.push_back(static_cast(auth_data.getType())); - if (auth_data.getType() == AuthenticationType::LDAP || - auth_data.getType() == AuthenticationType::KERBEROS || - auth_data.getType() == AuthenticationType::SSL_CERTIFICATE) + Poco::JSON::Array json_array; + + for (const auto & auth_data : authentication_methods) { + column_auth_type.insertValue(static_cast(auth_data.getType())); + Poco::JSON::Object auth_params_json; if (auth_data.getType() == AuthenticationType::LDAP) @@ -165,18 +167,17 @@ void StorageSystemUsers::fillData(MutableColumns & res_columns, ContextPtr conte auth_params_json.set("subject_alt_names", subject_alt_names); } - std::ostringstream oss; // STYLE_CHECK_ALLOW_STD_STRING_STREAM - oss.exceptions(std::ios::failbit); - Poco::JSON::Stringifier::stringify(auth_params_json, oss); - const auto str = oss.str(); + json_array.add(auth_params_json); + } - column_auth_params.insertData(str.data(), str.size()); - } - else - { - static constexpr std::string_view empty_json{"{}"}; - column_auth_params.insertData(empty_json.data(), empty_json.length()); - } + column_auth_type_offsets.push_back(column_auth_type.size()); + + std::ostringstream oss; // STYLE_CHECK_ALLOW_STD_STRING_STREAM + oss.exceptions(std::ios::failbit); + Poco::JSON::Stringifier::stringify(json_array, oss); + const auto authentication_params_str = oss.str(); + + column_auth_params.insertData(authentication_params_str.data(), authentication_params_str.size()); if (allowed_hosts.containsAnyHost()) { @@ -248,7 +249,7 @@ void StorageSystemUsers::fillData(MutableColumns & res_columns, ContextPtr conte if (!storage) continue; - add_row(user->getName(), id, storage->getStorageName(), user->authentication_methods.back(), user->allowed_client_hosts, + add_row(user->getName(), id, storage->getStorageName(), user->authentication_methods, user->allowed_client_hosts, user->default_roles, user->grantees, user->default_database); } } From 77d46aad0555b5044a69cb743969ba79effe516b Mon Sep 17 00:00:00 2001 From: Arthur Passos Date: Thu, 25 Jul 2024 14:24:26 -0300 Subject: [PATCH 248/844] change auth_params type from string to array --- src/Storages/System/StorageSystemUsers.cpp | 27 +++++++++++----------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/src/Storages/System/StorageSystemUsers.cpp b/src/Storages/System/StorageSystemUsers.cpp index 105e4af53ef..ce4950f5e7b 100644 --- a/src/Storages/System/StorageSystemUsers.cpp +++ b/src/Storages/System/StorageSystemUsers.cpp @@ -55,7 +55,9 @@ ColumnsDescription StorageSystemUsers::getColumnsDescription() "with no password, with plain text password, with SHA256-encoded password, " "with double SHA-1-encoded password or with bcrypt-encoded password." }, - {"auth_params", std::make_shared(), "Authentication parameters in the JSON format depending on the auth_type."}, + {"auth_params", std::make_shared(std::make_shared()), + "Authentication parameters in the JSON format depending on the auth_type." + }, {"host_ip", std::make_shared(std::make_shared()), "IP addresses of hosts that are allowed to connect to the ClickHouse server." }, @@ -100,7 +102,8 @@ void StorageSystemUsers::fillData(MutableColumns & res_columns, ContextPtr conte auto & column_storage = assert_cast(*res_columns[column_index++]); auto & column_auth_type = assert_cast(assert_cast(*res_columns[column_index]).getData()); auto & column_auth_type_offsets = assert_cast(*res_columns[column_index++]).getOffsets(); - auto & column_auth_params = assert_cast(*res_columns[column_index++]); + auto & column_auth_params = assert_cast(assert_cast(*res_columns[column_index]).getData()); + auto & column_auth_params_offsets = assert_cast(*res_columns[column_index++]).getOffsets(); auto & column_host_ip = assert_cast(assert_cast(*res_columns[column_index]).getData()); auto & column_host_ip_offsets = assert_cast(*res_columns[column_index++]).getOffsets(); auto & column_host_names = assert_cast(assert_cast(*res_columns[column_index]).getData()); @@ -134,12 +137,8 @@ void StorageSystemUsers::fillData(MutableColumns & res_columns, ContextPtr conte column_id.push_back(id.toUnderType()); column_storage.insertData(storage_name.data(), storage_name.length()); - Poco::JSON::Array json_array; - for (const auto & auth_data : authentication_methods) { - column_auth_type.insertValue(static_cast(auth_data.getType())); - Poco::JSON::Object auth_params_json; if (auth_data.getType() == AuthenticationType::LDAP) @@ -167,18 +166,18 @@ void StorageSystemUsers::fillData(MutableColumns & res_columns, ContextPtr conte auth_params_json.set("subject_alt_names", subject_alt_names); } - json_array.add(auth_params_json); + std::ostringstream oss; // STYLE_CHECK_ALLOW_STD_STRING_STREAM + oss.exceptions(std::ios::failbit); + Poco::JSON::Stringifier::stringify(auth_params_json, oss); + const auto authentication_params_str = oss.str(); + + column_auth_params.insertData(authentication_params_str.data(), authentication_params_str.size()); + column_auth_type.insertValue(static_cast(auth_data.getType())); } + column_auth_params_offsets.push_back(column_auth_params.size()); column_auth_type_offsets.push_back(column_auth_type.size()); - std::ostringstream oss; // STYLE_CHECK_ALLOW_STD_STRING_STREAM - oss.exceptions(std::ios::failbit); - Poco::JSON::Stringifier::stringify(json_array, oss); - const auto authentication_params_str = oss.str(); - - column_auth_params.insertData(authentication_params_str.data(), authentication_params_str.size()); - if (allowed_hosts.containsAnyHost()) { static constexpr std::string_view str{"::/0"}; From f00928afa14864cd8efbc186f63538c8cd99f643 Mon Sep 17 00:00:00 2001 From: avogar Date: Thu, 25 Jul 2024 18:41:43 +0000 Subject: [PATCH 249/844] Improve castOrDefault from Variant/Dynamic columns --- src/Functions/FunctionsConversion.cpp | 52 +++++++++++++++++-- ..._variant_dynamic_cast_or_default.reference | 32 ++++++++++++ .../03212_variant_dynamic_cast_or_default.sql | 9 ++++ 3 files changed, 89 insertions(+), 4 deletions(-) create mode 100644 tests/queries/0_stateless/03212_variant_dynamic_cast_or_default.reference create mode 100644 tests/queries/0_stateless/03212_variant_dynamic_cast_or_default.sql diff --git a/src/Functions/FunctionsConversion.cpp b/src/Functions/FunctionsConversion.cpp index 675283d011e..e516d1dbe54 100644 --- a/src/Functions/FunctionsConversion.cpp +++ b/src/Functions/FunctionsConversion.cpp @@ -4086,9 +4086,30 @@ private: /// Create conversion wrapper for each variant. for (const auto & variant_type : variant_types) - variant_wrappers.push_back(prepareUnpackDictionaries(variant_type, to_type)); + { + WrapperType wrapper; + if (cast_type == CastType::accurateOrNull) + { + /// With accurateOrNull cast type we should insert default values on variants that cannot be casted. + /// We can avoid try/catch here if we will implement check that 2 types can be casted, but it + /// requires quite a lot of work. By now let's simply use try/catch. + try + { + wrapper = prepareUnpackDictionaries(variant_type, to_type); + } + catch (...) + { + /// Leave wrapper empty and check it later. + } + } + else + { + wrapper = prepareUnpackDictionaries(variant_type, to_type); + } + variant_wrappers.push_back(wrapper); + } - return [variant_wrappers, variant_types, to_type] + return [variant_wrappers, variant_types, to_type, cast_type_ = this->cast_type] (ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const ColumnNullable *, size_t input_rows_count) -> ColumnPtr { const auto & column_variant = assert_cast(*arguments.front().column.get()); @@ -4101,7 +4122,30 @@ private: auto variant_col = column_variant.getVariantPtrByGlobalDiscriminator(i); ColumnsWithTypeAndName variant = {{variant_col, variant_types[i], "" }}; const auto & variant_wrapper = variant_wrappers[i]; - casted_variant_columns.push_back(variant_wrapper(variant, result_type, nullptr, variant_col->size())); + ColumnPtr casted_variant; + /// Check if we have wrapper for this variant. + if (variant_wrapper) + { + if (cast_type_ == CastType::accurateOrNull) + { + /// With accurateOrNull cast type wrapper should throw an exception + /// only when the cast between types is not supported. + /// In this case we will insert default values on rows with this variant. + try + { + casted_variant = variant_wrapper(variant, result_type, nullptr, variant_col->size()); + } + catch (...) + { + /// Do nothing. + } + } + else + { + casted_variant = variant_wrapper(variant, result_type, nullptr, variant_col->size()); + } + } + casted_variant_columns.push_back(std::move(casted_variant)); } /// Second, construct resulting column from casted variant columns according to discriminators. @@ -4111,7 +4155,7 @@ private: for (size_t i = 0; i != input_rows_count; ++i) { auto global_discr = column_variant.globalDiscriminatorByLocal(local_discriminators[i]); - if (global_discr == ColumnVariant::NULL_DISCRIMINATOR) + if (global_discr == ColumnVariant::NULL_DISCRIMINATOR || !casted_variant_columns[global_discr]) res->insertDefault(); else res->insertFrom(*casted_variant_columns[global_discr], column_variant.offsetAt(i)); diff --git a/tests/queries/0_stateless/03212_variant_dynamic_cast_or_default.reference b/tests/queries/0_stateless/03212_variant_dynamic_cast_or_default.reference new file mode 100644 index 00000000000..8b1a342181c --- /dev/null +++ b/tests/queries/0_stateless/03212_variant_dynamic_cast_or_default.reference @@ -0,0 +1,32 @@ +0 \N +1 1 +0 str_2 +0 [0,1,2] +0 \N +5 5 +0 str_6 +0 [0,1,2,3,4,5,6] +\N \N +1 1 +\N str_2 +\N [0,1,2] +\N \N +5 5 +\N str_6 +\N [0,1,2,3,4,5,6] +0 \N +1 1 +0 str_2 +0 [0,1,2] +0 \N +5 5 +0 str_6 +0 [0,1,2,3,4,5,6] +\N \N +1 1 +\N str_2 +\N [0,1,2] +\N \N +5 5 +\N str_6 +\N [0,1,2,3,4,5,6] diff --git a/tests/queries/0_stateless/03212_variant_dynamic_cast_or_default.sql b/tests/queries/0_stateless/03212_variant_dynamic_cast_or_default.sql new file mode 100644 index 00000000000..1e71e36780c --- /dev/null +++ b/tests/queries/0_stateless/03212_variant_dynamic_cast_or_default.sql @@ -0,0 +1,9 @@ +set allow_experimental_variant_type = 1; +set use_variant_as_common_type = 1; +set allow_experimental_dynamic_type = 1; + +select accurateCastOrDefault(variant, 'UInt32'), multiIf(number % 4 == 0, NULL, number % 4 == 1, number, number % 4 == 2, 'str_' || toString(number), range(number)) as variant from numbers(8); +select accurateCastOrNull(variant, 'UInt32'), multiIf(number % 4 == 0, NULL, number % 4 == 1, number, number % 4 == 2, 'str_' || toString(number), range(number)) as variant from numbers(8); + +select accurateCastOrDefault(dynamic, 'UInt32'), multiIf(number % 4 == 0, NULL, number % 4 == 1, number, number % 4 == 2, 'str_' || toString(number), range(number))::Dynamic as dynamic from numbers(8); +select accurateCastOrNull(dynamic, 'UInt32'), multiIf(number % 4 == 0, NULL, number % 4 == 1, number, number % 4 == 2, 'str_' || toString(number), range(number))::Dynamic as dynamic from numbers(8); From eb300f4f782bf9a6b216624bddd6e6deffd55d0f Mon Sep 17 00:00:00 2001 From: avogar Date: Thu, 25 Jul 2024 18:59:06 +0000 Subject: [PATCH 250/844] Better implementation --- src/Functions/FunctionsConversion.cpp | 56 ++++++++++++--------------- 1 file changed, 24 insertions(+), 32 deletions(-) diff --git a/src/Functions/FunctionsConversion.cpp b/src/Functions/FunctionsConversion.cpp index e516d1dbe54..c14fa3187d8 100644 --- a/src/Functions/FunctionsConversion.cpp +++ b/src/Functions/FunctionsConversion.cpp @@ -4078,6 +4078,26 @@ private: }; } + /// Create wrapper only if we support this conversion. + WrapperType createWrapperIfCanConvert(const DataTypePtr & from, const DataTypePtr & to) const + { + try + { + /// We can avoid try/catch here if we will implement check that 2 types can be casted, but it + /// requires quite a lot of work. By now let's simply use try/catch. + /// First, check that we can create a wrapper. + WrapperType wrapper = prepareUnpackDictionaries(from, to); + /// Second, check if we can perform a conversion on empty columns. + ColumnsWithTypeAndName column_from = {{from->createColumn(), from, "" }}; + wrapper(column_from, to, nullptr, 0); + return wrapper; + } + catch (...) + { + return {}; + } + } + WrapperType createVariantToColumnWrapper(const DataTypeVariant & from_variant, const DataTypePtr & to_type) const { const auto & variant_types = from_variant.getVariants(); @@ -4090,17 +4110,8 @@ private: WrapperType wrapper; if (cast_type == CastType::accurateOrNull) { - /// With accurateOrNull cast type we should insert default values on variants that cannot be casted. - /// We can avoid try/catch here if we will implement check that 2 types can be casted, but it - /// requires quite a lot of work. By now let's simply use try/catch. - try - { - wrapper = prepareUnpackDictionaries(variant_type, to_type); - } - catch (...) - { - /// Leave wrapper empty and check it later. - } + /// Create wrapper only if we support conversion from variant to the resulting type. + wrapper = createWrapperIfCanConvert(variant_type, to_type); } else { @@ -4109,7 +4120,7 @@ private: variant_wrappers.push_back(wrapper); } - return [variant_wrappers, variant_types, to_type, cast_type_ = this->cast_type] + return [variant_wrappers, variant_types, to_type] (ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const ColumnNullable *, size_t input_rows_count) -> ColumnPtr { const auto & column_variant = assert_cast(*arguments.front().column.get()); @@ -4125,26 +4136,7 @@ private: ColumnPtr casted_variant; /// Check if we have wrapper for this variant. if (variant_wrapper) - { - if (cast_type_ == CastType::accurateOrNull) - { - /// With accurateOrNull cast type wrapper should throw an exception - /// only when the cast between types is not supported. - /// In this case we will insert default values on rows with this variant. - try - { - casted_variant = variant_wrapper(variant, result_type, nullptr, variant_col->size()); - } - catch (...) - { - /// Do nothing. - } - } - else - { - casted_variant = variant_wrapper(variant, result_type, nullptr, variant_col->size()); - } - } + casted_variant = variant_wrapper(variant, result_type, nullptr, variant_col->size()); casted_variant_columns.push_back(std::move(casted_variant)); } From 93cbd4bf9a7cc926ff87281d62d9ae17fd376526 Mon Sep 17 00:00:00 2001 From: Arthur Passos Date: Thu, 25 Jul 2024 17:44:38 -0300 Subject: [PATCH 251/844] update test --- .../0_stateless/02117_show_create_table_system.reference | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/02117_show_create_table_system.reference b/tests/queries/0_stateless/02117_show_create_table_system.reference index 8f62eda9233..c15352ff155 100644 --- a/tests/queries/0_stateless/02117_show_create_table_system.reference +++ b/tests/queries/0_stateless/02117_show_create_table_system.reference @@ -1136,8 +1136,8 @@ CREATE TABLE system.users `name` String, `id` UUID, `storage` String, - `auth_type` Enum8('no_password' = 0, 'plaintext_password' = 1, 'sha256_password' = 2, 'double_sha1_password' = 3, 'ldap' = 4, 'kerberos' = 5, 'ssl_certificate' = 6, 'bcrypt_password' = 7, 'ssh_key' = 8, 'http' = 9, 'jwt' = 10), - `auth_params` String, + `auth_type` Array(Enum8('no_password' = 0, 'plaintext_password' = 1, 'sha256_password' = 2, 'double_sha1_password' = 3, 'ldap' = 4, 'kerberos' = 5, 'ssl_certificate' = 6, 'bcrypt_password' = 7, 'ssh_key' = 8, 'http' = 9, 'jwt' = 10)), + `auth_params` Array(String), `host_ip` Array(String), `host_names` Array(String), `host_names_regexp` Array(String), From 921947e3681879fa6adc1166c4dea9f49073650b Mon Sep 17 00:00:00 2001 From: Arthur Passos Date: Fri, 26 Jul 2024 10:45:25 -0300 Subject: [PATCH 252/844] fix tests and add new one --- tests/integration/test_tlsv1_3/test.py | 4 ++-- tests/queries/0_stateless/01292_create_user.reference | 10 ++++++---- tests/queries/0_stateless/01292_create_user.sql | 5 +++++ 3 files changed, 13 insertions(+), 6 deletions(-) diff --git a/tests/integration/test_tlsv1_3/test.py b/tests/integration/test_tlsv1_3/test.py index e36989a9cdb..06222d0370a 100644 --- a/tests/integration/test_tlsv1_3/test.py +++ b/tests/integration/test_tlsv1_3/test.py @@ -219,6 +219,6 @@ def test_create_user(): instance.query( "SELECT name, auth_type, auth_params FROM system.users WHERE name IN ['emma', 'lucy'] ORDER BY name" ) - == 'emma\tssl_certificate\t{"common_names":["client2"]}\n' - 'lucy\tssl_certificate\t{"common_names":["client2","client3"]}\n' + == 'emma\t[\'ssl_certificate\']\t[\'{"common_names":["client2"]}\']\n' + 'lucy\t[\'ssl_certificate\']\t[\'{"common_names":["client2","client3"]}\']\n' ) diff --git a/tests/queries/0_stateless/01292_create_user.reference b/tests/queries/0_stateless/01292_create_user.reference index c23723b3ec1..3df69ae2669 100644 --- a/tests/queries/0_stateless/01292_create_user.reference +++ b/tests/queries/0_stateless/01292_create_user.reference @@ -106,10 +106,10 @@ CREATE USER u2_01292 IDENTIFIED WITH no_password DEFAULT ROLE r1_01292, r2_01292 CREATE USER u3_01292 IDENTIFIED WITH no_password HOST LIKE \'%.%.myhost.com\' DEFAULT ROLE r1_01292, r2_01292 CREATE USER u4_01292 IDENTIFIED WITH no_password HOST LIKE \'%.%.myhost.com\' DEFAULT ROLE r1_01292, r2_01292 -- system.users -u1_01292 local_directory plaintext_password {} [] ['localhost'] [] [] 1 [] [] -u2_01292 local_directory no_password {} [] [] [] ['%.%.myhost.com'] 0 [] [] -u3_01292 local_directory sha256_password {} ['192.169.1.1','192.168.0.0/16'] ['localhost'] [] [] 0 ['r1_01292'] [] -u4_01292 local_directory double_sha1_password {} ['::/0'] [] [] [] 1 [] ['r1_01292'] +u1_01292 local_directory ['plaintext_password'] ['{}'] [] ['localhost'] [] [] 1 [] [] +u2_01292 local_directory ['no_password'] ['{}'] [] [] [] ['%.%.myhost.com'] 0 [] [] +u3_01292 local_directory ['sha256_password'] ['{}'] ['192.169.1.1','192.168.0.0/16'] ['localhost'] [] [] 0 ['r1_01292'] [] +u4_01292 local_directory ['double_sha1_password'] ['{}'] ['::/0'] [] [] [] 1 [] ['r1_01292'] -- system.settings_profile_elements \N u1_01292 \N 0 readonly 1 \N \N \N \N \N u2_01292 \N 0 \N \N \N \N \N default @@ -117,3 +117,5 @@ u4_01292 local_directory double_sha1_password {} ['::/0'] [] [] [] 1 [] ['r1_012 \N u4_01292 \N 0 \N \N \N \N \N default \N u4_01292 \N 1 max_memory_usage 5000000 \N \N \N \N \N u4_01292 \N 2 readonly 1 \N \N \N \N +-- multiple authentication methods +u1_01292 ['plaintext_password','kerberos','bcrypt_password','ldap'] ['{}','{"realm":"qwerty10"}','{}','{"server":"abc"}'] diff --git a/tests/queries/0_stateless/01292_create_user.sql b/tests/queries/0_stateless/01292_create_user.sql index 46808aec1ef..41633b3d423 100644 --- a/tests/queries/0_stateless/01292_create_user.sql +++ b/tests/queries/0_stateless/01292_create_user.sql @@ -233,3 +233,8 @@ SELECT * FROM system.settings_profile_elements WHERE user_name LIKE 'u%\_01292' DROP USER u1_01292, u2_01292, u3_01292, u4_01292, u5_01292; DROP ROLE r1_01292, r2_01292; + +SELECT '-- multiple authentication methods'; +CREATE USER u1_01292 IDENTIFIED WITH plaintext_password by '1', kerberos REALM 'qwerty10', bcrypt_password by '3', ldap SERVER 'abc'; +SELECT name, auth_type, auth_params FROM system.users WHERE name = 'u1_01292' ORDER BY name; +DROP USER u1_01292; From a21529f66aea514f6b344453cd78415ee22a82d1 Mon Sep 17 00:00:00 2001 From: Arthur Passos Date: Fri, 26 Jul 2024 11:05:05 -0300 Subject: [PATCH 253/844] style --- tests/integration/test_tlsv1_3/test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/test_tlsv1_3/test.py b/tests/integration/test_tlsv1_3/test.py index 06222d0370a..07239c1b31f 100644 --- a/tests/integration/test_tlsv1_3/test.py +++ b/tests/integration/test_tlsv1_3/test.py @@ -219,6 +219,6 @@ def test_create_user(): instance.query( "SELECT name, auth_type, auth_params FROM system.users WHERE name IN ['emma', 'lucy'] ORDER BY name" ) - == 'emma\t[\'ssl_certificate\']\t[\'{"common_names":["client2"]}\']\n' + == "emma\t['ssl_certificate']\t['{\"common_names\":[\"client2\"]}']\n" 'lucy\t[\'ssl_certificate\']\t[\'{"common_names":["client2","client3"]}\']\n' ) From 5a45563f1bd8580fbd2d9e2a708e047142f689be Mon Sep 17 00:00:00 2001 From: Arthur Passos Date: Fri, 26 Jul 2024 14:36:58 -0300 Subject: [PATCH 254/844] add note about downgrading --- docs/en/sql-reference/statements/alter/user.md | 4 +++- docs/en/sql-reference/statements/create/user.md | 6 ++++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/docs/en/sql-reference/statements/alter/user.md b/docs/en/sql-reference/statements/alter/user.md index 959f98b0db1..9c581655a3c 100644 --- a/docs/en/sql-reference/statements/alter/user.md +++ b/docs/en/sql-reference/statements/alter/user.md @@ -69,7 +69,9 @@ Adds new authentication methods to the user while keeping the existing ones: ALTER USER user1 ADD IDENTIFIED WITH plaintext_password by '1', bcrypt_password by '2', plaintext_password by '3' ``` -Note: `no_password` can not co-exist with other authentication methods for security reasons. +Note: +1. Older versions of ClickHouse might not support the syntax of multiple authentication methods. Therefore, if the ClickHouse server contains such users and is downgraded to a version that does not support it, such users will become unusable and some user related operations will be broken. In order to downgrade gracefully, one must set all users to contain a single authentication method prior to downgrading. Alternatively, if the server was downgraded without the proper procedure, the faulty users should be dropped. +2. Note: `no_password` can not co-exist with other authentication methods for security reasons. Because of that, it is not possible to `ADD` a `no_password` authentication method. The below query will throw an error: ``` sql diff --git a/docs/en/sql-reference/statements/create/user.md b/docs/en/sql-reference/statements/create/user.md index 8bfd9f2797b..0329cf0655a 100644 --- a/docs/en/sql-reference/statements/create/user.md +++ b/docs/en/sql-reference/statements/create/user.md @@ -150,8 +150,10 @@ In ClickHouse Cloud, by default, passwords must meet the following complexity re CREATE USER user1 IDENTIFIED WITH plaintext_password by '1', bcrypt_password by '2', plaintext_password by '3'' ``` -Note: `no_password` can not co-exist with other authentication methods for security reasons. Therefore, you can only specify -`no_password` if it is the only authentication method in the query. +Note: +1. Older versions of ClickHouse might not support the syntax of multiple authentication methods. Therefore, if the ClickHouse server contains such users and is downgraded to a version that does not support it, such users will become unusable and some user related operations will be broken. In order to downgrade gracefully, one must set all users to contain a single authentication method prior to downgrading. Alternatively, if the server was downgraded without the proper procedure, the faulty users should be dropped. +2. `no_password` can not co-exist with other authentication methods for security reasons. Therefore, you can only specify +`no_password` if it is the only authentication method in the query. ## User Host From 456613e7fa1c4a563d6c275d0c75627e480b933d Mon Sep 17 00:00:00 2001 From: Arthur Passos Date: Fri, 26 Jul 2024 14:40:50 -0300 Subject: [PATCH 255/844] minor doc change --- docs/en/sql-reference/statements/alter/user.md | 4 ++-- docs/en/sql-reference/statements/create/user.md | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/en/sql-reference/statements/alter/user.md b/docs/en/sql-reference/statements/alter/user.md index 9c581655a3c..1595f2894df 100644 --- a/docs/en/sql-reference/statements/alter/user.md +++ b/docs/en/sql-reference/statements/alter/user.md @@ -69,9 +69,9 @@ Adds new authentication methods to the user while keeping the existing ones: ALTER USER user1 ADD IDENTIFIED WITH plaintext_password by '1', bcrypt_password by '2', plaintext_password by '3' ``` -Note: +Notes: 1. Older versions of ClickHouse might not support the syntax of multiple authentication methods. Therefore, if the ClickHouse server contains such users and is downgraded to a version that does not support it, such users will become unusable and some user related operations will be broken. In order to downgrade gracefully, one must set all users to contain a single authentication method prior to downgrading. Alternatively, if the server was downgraded without the proper procedure, the faulty users should be dropped. -2. Note: `no_password` can not co-exist with other authentication methods for security reasons. +2. `no_password` can not co-exist with other authentication methods for security reasons. Because of that, it is not possible to `ADD` a `no_password` authentication method. The below query will throw an error: ``` sql diff --git a/docs/en/sql-reference/statements/create/user.md b/docs/en/sql-reference/statements/create/user.md index 0329cf0655a..32b43df9248 100644 --- a/docs/en/sql-reference/statements/create/user.md +++ b/docs/en/sql-reference/statements/create/user.md @@ -150,7 +150,7 @@ In ClickHouse Cloud, by default, passwords must meet the following complexity re CREATE USER user1 IDENTIFIED WITH plaintext_password by '1', bcrypt_password by '2', plaintext_password by '3'' ``` -Note: +Notes: 1. Older versions of ClickHouse might not support the syntax of multiple authentication methods. Therefore, if the ClickHouse server contains such users and is downgraded to a version that does not support it, such users will become unusable and some user related operations will be broken. In order to downgrade gracefully, one must set all users to contain a single authentication method prior to downgrading. Alternatively, if the server was downgraded without the proper procedure, the faulty users should be dropped. 2. `no_password` can not co-exist with other authentication methods for security reasons. Therefore, you can only specify `no_password` if it is the only authentication method in the query. From 546cca125169fe966da7b210a422c0f0383c5954 Mon Sep 17 00:00:00 2001 From: Arthur Passos Date: Fri, 26 Jul 2024 14:42:58 -0300 Subject: [PATCH 256/844] small comment update --- src/Interpreters/Access/InterpreterCreateUserQuery.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Interpreters/Access/InterpreterCreateUserQuery.cpp b/src/Interpreters/Access/InterpreterCreateUserQuery.cpp index e877d01ead3..b6e68763a3a 100644 --- a/src/Interpreters/Access/InterpreterCreateUserQuery.cpp +++ b/src/Interpreters/Access/InterpreterCreateUserQuery.cpp @@ -73,7 +73,7 @@ namespace return authentication_method.getType() == AuthenticationType::NO_PASSWORD; }) != user.authentication_methods.end(); - // 1. a leading IDENTIFIED WITH will drop existing authentication methods in favor of new ones. + // 1. an IDENTIFIED WITH will drop existing authentication methods in favor of new ones. // 2. if the user contains an auth method of type NO_PASSWORD and another one is being added, NO_PASSWORD must be dropped if (replace_authentication_methods || (has_no_password_authentication_method && !authentication_methods.empty())) { From 352b502559528bb0628854e5dbd90a0c2b6a11cb Mon Sep 17 00:00:00 2001 From: Arthur Passos Date: Fri, 26 Jul 2024 14:47:37 -0300 Subject: [PATCH 257/844] update logout session --- src/Interpreters/Session.cpp | 2 +- src/Interpreters/SessionLog.cpp | 10 +++++++--- src/Interpreters/SessionLog.h | 6 +++++- 3 files changed, 13 insertions(+), 5 deletions(-) diff --git a/src/Interpreters/Session.cpp b/src/Interpreters/Session.cpp index b65a6bebdf1..a82e8d76c4e 100644 --- a/src/Interpreters/Session.cpp +++ b/src/Interpreters/Session.cpp @@ -304,7 +304,7 @@ Session::~Session() LOG_DEBUG(log, "{} Logout, user_id: {}", toString(auth_id), toString(*user_id)); if (auto session_log = getSessionLog()) { - session_log->addLogOut(auth_id, user, getClientInfo()); + session_log->addLogOut(auth_id, user, user_authenticated_with, getClientInfo()); } } } diff --git a/src/Interpreters/SessionLog.cpp b/src/Interpreters/SessionLog.cpp index 30c3bd570f3..d8c6e3d844f 100644 --- a/src/Interpreters/SessionLog.cpp +++ b/src/Interpreters/SessionLog.cpp @@ -258,15 +258,19 @@ void SessionLog::addLoginFailure( add(std::move(log_entry)); } -void SessionLog::addLogOut(const UUID & auth_id, const UserPtr & login_user, const ClientInfo & client_info) +void SessionLog::addLogOut( + const UUID & auth_id, + const UserPtr & login_user, + const AuthenticationData & user_authenticated_with, + const ClientInfo & client_info) { auto log_entry = SessionLogElement(auth_id, SESSION_LOGOUT); if (login_user) { log_entry.user = login_user->getName(); - log_entry.user_identified_with = login_user->authentication_methods.back().getType(); + log_entry.user_identified_with = user_authenticated_with.getType(); } - log_entry.external_auth_server = login_user ? login_user->authentication_methods.back().getLDAPServerName() : ""; + log_entry.external_auth_server = user_authenticated_with.getLDAPServerName(); log_entry.client_info = client_info; add(std::move(log_entry)); diff --git a/src/Interpreters/SessionLog.h b/src/Interpreters/SessionLog.h index 2037d6c6861..6221267d14c 100644 --- a/src/Interpreters/SessionLog.h +++ b/src/Interpreters/SessionLog.h @@ -82,7 +82,11 @@ public: const AuthenticationData & user_authenticated_with); void addLoginFailure(const UUID & auth_id, const ClientInfo & info, const std::optional & user, const Exception & reason); - void addLogOut(const UUID & auth_id, const UserPtr & login_user, const ClientInfo & client_info); + void addLogOut( + const UUID & auth_id, + const UserPtr & login_user, + const AuthenticationData & user_authenticated_with, + const ClientInfo & client_info); }; } From f5ee7aaf26046fb943b03de3e6a852119c0b24b8 Mon Sep 17 00:00:00 2001 From: Arthur Passos Date: Sun, 28 Jul 2024 10:04:38 -0300 Subject: [PATCH 258/844] update some integ tests --- tests/integration/test_grant_and_revoke/test.py | 8 ++++---- tests/integration/test_ssl_cert_authentication/test.py | 6 +++--- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/tests/integration/test_grant_and_revoke/test.py b/tests/integration/test_grant_and_revoke/test.py index 498c60a1d40..41163e5234a 100644 --- a/tests/integration/test_grant_and_revoke/test.py +++ b/tests/integration/test_grant_and_revoke/test.py @@ -483,8 +483,8 @@ def test_introspection(): [ "A", "local_directory", - "no_password", - "{}", + "['no_password']", + "['{}']", "['::/0']", "[]", "[]", @@ -496,8 +496,8 @@ def test_introspection(): [ "B", "local_directory", - "no_password", - "{}", + "['no_password']", + "['{}']", "['::/0']", "[]", "[]", diff --git a/tests/integration/test_ssl_cert_authentication/test.py b/tests/integration/test_ssl_cert_authentication/test.py index 756a1e1996c..f85f9ec9382 100644 --- a/tests/integration/test_ssl_cert_authentication/test.py +++ b/tests/integration/test_ssl_cert_authentication/test.py @@ -335,8 +335,8 @@ def test_create_user(): instance.query( "SELECT name, auth_type, auth_params FROM system.users WHERE name IN ['emma', 'lucy'] ORDER BY name" ) - == 'emma\tssl_certificate\t{"common_names":["client2"]}\n' - 'lucy\tssl_certificate\t{"common_names":["client2","client3"]}\n' + == "emma\t['ssl_certificate']\t['{\"common_names\":[\"client2\"]}']\n" + 'lucy\t[\'ssl_certificate\']\t[\'{"common_names":["client2","client3"]}\']\n' ) @@ -355,7 +355,7 @@ def test_x509_san_support(): instance.query( "SELECT name, auth_type, auth_params FROM system.users WHERE name='jerome'" ) - == 'jerome\tssl_certificate\t{"subject_alt_names":["URI:spiffe:\\\\/\\\\/foo.com\\\\/bar","URI:spiffe:\\\\/\\\\/foo.com\\\\/baz"]}\n' + == 'jerome\t[\'ssl_certificate\']\t[\'{"subject_alt_names":["URI:spiffe:\\\\/\\\\/foo.com\\\\/bar","URI:spiffe:\\\\/\\\\/foo.com\\\\/baz"]}\']\n' ) # user `jerome` is configured via xml config, but `show create` should work regardless. assert ( From c433d9cfdb321f27289df4f35f434a2685af890d Mon Sep 17 00:00:00 2001 From: Arthur Passos Date: Sun, 28 Jul 2024 12:33:15 -0300 Subject: [PATCH 259/844] retrigger ci From ae72bd57f21d2c049b84b0333f332bce144b46ba Mon Sep 17 00:00:00 2001 From: Arthur Passos Date: Mon, 29 Jul 2024 09:50:54 -0300 Subject: [PATCH 260/844] try to fix docs? is it broken at all --- docs/en/sql-reference/statements/alter/user.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/sql-reference/statements/alter/user.md b/docs/en/sql-reference/statements/alter/user.md index 1595f2894df..34a884c4d13 100644 --- a/docs/en/sql-reference/statements/alter/user.md +++ b/docs/en/sql-reference/statements/alter/user.md @@ -70,7 +70,7 @@ ALTER USER user1 ADD IDENTIFIED WITH plaintext_password by '1', bcrypt_password ``` Notes: -1. Older versions of ClickHouse might not support the syntax of multiple authentication methods. Therefore, if the ClickHouse server contains such users and is downgraded to a version that does not support it, such users will become unusable and some user related operations will be broken. In order to downgrade gracefully, one must set all users to contain a single authentication method prior to downgrading. Alternatively, if the server was downgraded without the proper procedure, the faulty users should be dropped. +1. Older versions of ClickHouse might not support the syntax of multiple authentication methods. Therefore, if the ClickHouse server contains such users and is downgraded to a version that does not support it, such users will become unusable and some user related operations will be broken. In order to downgrade gracefully, one must set all users to contain a single authentication method prior to downgrading. Alternatively, if the server was downgraded without the proper procedure, the faulty users should be dropped. 2. `no_password` can not co-exist with other authentication methods for security reasons. Because of that, it is not possible to `ADD` a `no_password` authentication method. The below query will throw an error: From ce134830b702b117724235de28afdea2b5db0dfc Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Mon, 29 Jul 2024 17:15:15 +0000 Subject: [PATCH 261/844] Disable pr local plan by default --- src/Core/Settings.h | 2 +- src/Core/SettingsChangesHistory.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 4a5dec8dcc8..e175d7f76af 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -934,7 +934,7 @@ class IColumn; M(UInt64, parallel_replicas_min_number_of_rows_per_replica, 0, "Limit the number of replicas used in a query to (estimated rows to read / min_number_of_rows_per_replica). The max is still limited by 'max_parallel_replicas'", 0) \ M(Bool, parallel_replicas_prefer_local_join, true, "If true, and JOIN can be executed with parallel replicas algorithm, and all storages of right JOIN part are *MergeTree, local JOIN will be used instead of GLOBAL JOIN.", 0) \ M(UInt64, parallel_replicas_mark_segment_size, 128, "Parts virtually divided into segments to be distributed between replicas for parallel reading. This setting controls the size of these segments. Not recommended to change until you're absolutely sure in what you're doing", 0) \ - M(Bool, parallel_replicas_local_plan, true, "Build local plan for local replica", 0) \ + M(Bool, parallel_replicas_local_plan, false, "Build local plan for local replica", 0) \ \ M(Bool, allow_experimental_inverted_index, false, "If it is set to true, allow to use experimental inverted index.", 0) \ M(Bool, allow_experimental_full_text_index, false, "If it is set to true, allow to use experimental full-text index.", 0) \ diff --git a/src/Core/SettingsChangesHistory.cpp b/src/Core/SettingsChangesHistory.cpp index 24427dab1f3..ee9cfd3f28c 100644 --- a/src/Core/SettingsChangesHistory.cpp +++ b/src/Core/SettingsChangesHistory.cpp @@ -58,7 +58,7 @@ String ClickHouseVersion::toString() const static std::initializer_list> settings_changes_history_initializer = { {"24.7", {{"output_format_parquet_write_page_index", false, true, "Add a possibility to write page index into parquet files."}, - {"parallel_replicas_local_plan", false, true, "Use local plan for local replica in a query with parallel replicas"}, + {"parallel_replicas_local_plan", false, false, "Use local plan for local replica in a query with parallel replicas"}, {"output_format_binary_encode_types_in_binary_format", false, false, "Added new setting to allow to write type names in binary format in RowBinaryWithNamesAndTypes output format"}, {"input_format_binary_decode_types_in_binary_format", false, false, "Added new setting to allow to read type names in binary format in RowBinaryWithNamesAndTypes input format"}, {"output_format_native_encode_types_in_binary_format", false, false, "Added new setting to allow to write type names in binary format in Native output format"}, From 9501bc8a7551daa74cd37363b0047bb42c029697 Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Mon, 29 Jul 2024 20:33:06 +0000 Subject: [PATCH 262/844] Fix forgotten conflict --- src/Processors/QueryPlan/ReadFromRemote.cpp | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/Processors/QueryPlan/ReadFromRemote.cpp b/src/Processors/QueryPlan/ReadFromRemote.cpp index cec460ef3bc..3df46eb1987 100644 --- a/src/Processors/QueryPlan/ReadFromRemote.cpp +++ b/src/Processors/QueryPlan/ReadFromRemote.cpp @@ -21,13 +21,8 @@ #include #include #include -<<<<<<< HEAD #include -======= -#include -#include ->>>>>>> origin/master namespace DB { From d711ec118c41689d6f9694048bf279c3dbd2e1db Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Tue, 30 Jul 2024 09:04:59 +0000 Subject: [PATCH 263/844] Fix build --- src/Processors/QueryPlan/ConvertingActions.cpp | 2 +- src/Processors/QueryPlan/ReadFromRemote.h | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/src/Processors/QueryPlan/ConvertingActions.cpp b/src/Processors/QueryPlan/ConvertingActions.cpp index ff106ff08c1..b9703ef59cd 100644 --- a/src/Processors/QueryPlan/ConvertingActions.cpp +++ b/src/Processors/QueryPlan/ConvertingActions.cpp @@ -25,7 +25,7 @@ void addConvertingActions(QueryPlan & plan, const Block & header, bool has_missi }; auto convert_actions_dag = get_converting_dag(plan.getCurrentDataStream().header, header); - auto converting = std::make_unique(plan.getCurrentDataStream(), convert_actions_dag); + auto converting = std::make_unique(plan.getCurrentDataStream(), std::move(convert_actions_dag)); plan.addStep(std::move(converting)); } diff --git a/src/Processors/QueryPlan/ReadFromRemote.h b/src/Processors/QueryPlan/ReadFromRemote.h index 2dc4d2f8ed5..74389c8f9eb 100644 --- a/src/Processors/QueryPlan/ReadFromRemote.h +++ b/src/Processors/QueryPlan/ReadFromRemote.h @@ -70,6 +70,7 @@ public: ASTPtr query_ast_, ClusterPtr cluster_, const StorageID & storage_id_, + ParallelReplicasReadingCoordinatorPtr coordinator_, Block header_, QueryProcessingStage::Enum stage_, ContextMutablePtr context_, From bce0f46254d382f78c9a36be1371ed731aeb7880 Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Tue, 30 Jul 2024 16:26:01 +0000 Subject: [PATCH 264/844] Simplify tests/queries/0_stateless/02967_parallel_replicas_join_algo_and_analyzer_3.sh + enable parallel_replicas_local_plan by default --- ...allel_replicas_join_algo_and_analyzer_3.sh | 52 ++++++------------- 1 file changed, 17 insertions(+), 35 deletions(-) diff --git a/tests/queries/0_stateless/02967_parallel_replicas_join_algo_and_analyzer_3.sh b/tests/queries/0_stateless/02967_parallel_replicas_join_algo_and_analyzer_3.sh index e49a340ab67..81c6e8bed18 100755 --- a/tests/queries/0_stateless/02967_parallel_replicas_join_algo_and_analyzer_3.sh +++ b/tests/queries/0_stateless/02967_parallel_replicas_join_algo_and_analyzer_3.sh @@ -21,23 +21,21 @@ insert into num_2 select number * 3, -number from numbers(1.5e6); echo echo "nested join with analyzer and parallel replicas, both global" -$CLICKHOUSE_CLIENT -q " -select * from (select key, value from num_1) l -inner join (select key, value from num_2 inner join - (select number * 7 as key from numbers(1e5)) as nn on num_2.key = nn.key settings parallel_replicas_prefer_local_join=0) r -on l.key = r.key order by l.key limit 10 offset 10000 -SETTINGS allow_experimental_analyzer=1, -allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, -cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', parallel_replicas_prefer_local_join=0" +PARALLEL_REPLICAS_SETTINGS="allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', parallel_replicas_local_plan=1" $CLICKHOUSE_CLIENT -q " select * from (select key, value from num_1) l inner join (select key, value from num_2 inner join (select number * 7 as key from numbers(1e5)) as nn on num_2.key = nn.key settings parallel_replicas_prefer_local_join=0) r on l.key = r.key order by l.key limit 10 offset 10000 -SETTINGS allow_experimental_analyzer=1, send_logs_level='trace', -allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, -cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', parallel_replicas_prefer_local_join=0" 2>&1 | +SETTINGS allow_experimental_analyzer=1, $PARALLEL_REPLICAS_SETTINGS, parallel_replicas_prefer_local_join=0" + +$CLICKHOUSE_CLIENT -q " +select * from (select key, value from num_1) l +inner join (select key, value from num_2 inner join + (select number * 7 as key from numbers(1e5)) as nn on num_2.key = nn.key settings parallel_replicas_prefer_local_join=0) r +on l.key = r.key order by l.key limit 10 offset 10000 +SETTINGS allow_experimental_analyzer=1, send_logs_level='trace', $PARALLEL_REPLICAS_SETTINGS, parallel_replicas_prefer_local_join=0" 2>&1 | grep "executeQuery\|.*Coordinator: Coordination done" | grep -o "SELECT.*WithMergeableState)\|.*Coordinator: Coordination done" | sed -re 's/_data_[[:digit:]]+_[[:digit:]]+/_data_/g' @@ -51,18 +49,14 @@ select * from (select key, value from num_1) l inner join (select key, value from num_2 inner join (select number * 7 as key from numbers(1e5)) as nn on num_2.key = nn.key settings parallel_replicas_prefer_local_join=1) r on l.key = r.key order by l.key limit 10 offset 10000 -SETTINGS allow_experimental_analyzer=1, -allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, -cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', parallel_replicas_prefer_local_join=0" +SETTINGS allow_experimental_analyzer=1, $PARALLEL_REPLICAS_SETTINGS, parallel_replicas_prefer_local_join=0" $CLICKHOUSE_CLIENT -q " select * from (select key, value from num_1) l inner join (select key, value from num_2 inner join (select number * 7 as key from numbers(1e5)) as nn on num_2.key = nn.key settings parallel_replicas_prefer_local_join=1) r on l.key = r.key order by l.key limit 10 offset 10000 -SETTINGS allow_experimental_analyzer=1, send_logs_level='trace', -allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, -cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', parallel_replicas_prefer_local_join=0" 2>&1 | +SETTINGS allow_experimental_analyzer=1, send_logs_level='trace', $PARALLEL_REPLICAS_SETTINGS, parallel_replicas_prefer_local_join=0" 2>&1 | grep "executeQuery\|.*Coordinator: Coordination done" | grep -o "SELECT.*WithMergeableState)\|.*Coordinator: Coordination done" | sed -re 's/_data_[[:digit:]]+_[[:digit:]]+/_data_/g' @@ -77,18 +71,14 @@ select * from (select key, value from num_1) l inner join (select key, value from num_2 inner join (select number * 7 as key from numbers(1e5)) as nn on num_2.key = nn.key settings join_algorithm='full_sorting_merge') r on l.key = r.key order by l.key limit 10 offset 10000 -SETTINGS allow_experimental_analyzer=1, parallel_replicas_prefer_local_join=0, -allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, -cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', join_algorithm='full_sorting_merge'" +SETTINGS allow_experimental_analyzer=1, $PARALLEL_REPLICAS_SETTINGS, parallel_replicas_prefer_local_join=0, join_algorithm='full_sorting_merge'" $CLICKHOUSE_CLIENT -q " select * from (select key, value from num_1) l inner join (select key, value from num_2 inner join (select number * 7 as key from numbers(1e5)) as nn on num_2.key = nn.key settings join_algorithm='full_sorting_merge') r on l.key = r.key order by l.key limit 10 offset 10000 -SETTINGS allow_experimental_analyzer=1, parallel_replicas_prefer_local_join=0, send_logs_level='trace', -allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, -cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', join_algorithm='full_sorting_merge'" 2>&1 | +SETTINGS allow_experimental_analyzer=1, send_logs_level='trace', $PARALLEL_REPLICAS_SETTINGS, parallel_replicas_prefer_local_join=0, join_algorithm='full_sorting_merge'" 2>&1 | grep "executeQuery\|.*Coordinator: Coordination done" | grep -o "SELECT.*WithMergeableState)\|.*Coordinator: Coordination done" | sed -re 's/_data_[[:digit:]]+_[[:digit:]]+/_data_/g' @@ -102,18 +92,14 @@ select * from (select key, value from num_1) l inner join (select key, value from num_2 inner join (select number * 7 as key from numbers(1e5)) as nn on num_2.key = nn.key settings join_algorithm='hash') r on l.key = r.key order by l.key limit 10 offset 10000 -SETTINGS allow_experimental_analyzer=1, parallel_replicas_prefer_local_join=0, -allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, -cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', join_algorithm='full_sorting_merge'" +SETTINGS allow_experimental_analyzer=1, $PARALLEL_REPLICAS_SETTINGS, parallel_replicas_prefer_local_join=0, join_algorithm='full_sorting_merge'" $CLICKHOUSE_CLIENT -q " select * from (select key, value from num_1) l inner join (select key, value from num_2 inner join (select number * 7 as key from numbers(1e5)) as nn on num_2.key = nn.key settings join_algorithm='hash') r on l.key = r.key order by l.key limit 10 offset 10000 -SETTINGS allow_experimental_analyzer=1, parallel_replicas_prefer_local_join=0, send_logs_level='trace', -allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, -cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', join_algorithm='full_sorting_merge'" 2>&1 | +SETTINGS allow_experimental_analyzer=1, send_logs_level='trace', $PARALLEL_REPLICAS_SETTINGS, parallel_replicas_prefer_local_join=0, join_algorithm='full_sorting_merge'" 2>&1 | grep "executeQuery\|.*Coordinator: Coordination done" | grep -o "SELECT.*WithMergeableState)\|.*Coordinator: Coordination done" | sed -re 's/_data_[[:digit:]]+_[[:digit:]]+/_data_/g' @@ -127,18 +113,14 @@ select * from (select key, value from num_1) l inner join (select key, value from num_2 inner join (select number * 7 as key from numbers(1e5)) as nn on num_2.key = nn.key settings join_algorithm='full_sorting_merge') r on l.key = r.key order by l.key limit 10 offset 10000 -SETTINGS allow_experimental_analyzer=1, parallel_replicas_prefer_local_join=0, -allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, -cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', join_algorithm='hash'" +SETTINGS allow_experimental_analyzer=1, $PARALLEL_REPLICAS_SETTINGS, parallel_replicas_prefer_local_join=0, join_algorithm='hash'" $CLICKHOUSE_CLIENT -q " select * from (select key, value from num_1) l inner join (select key, value from num_2 inner join (select number * 7 as key from numbers(1e5)) as nn on num_2.key = nn.key settings join_algorithm='full_sorting_merge') r on l.key = r.key order by l.key limit 10 offset 10000 -SETTINGS allow_experimental_analyzer=1, parallel_replicas_prefer_local_join=0, send_logs_level='trace', -allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, -cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', join_algorithm='hash'" 2>&1 | +SETTINGS allow_experimental_analyzer=1, send_logs_level='trace', $PARALLEL_REPLICAS_SETTINGS, parallel_replicas_prefer_local_join=0, join_algorithm='hash'" 2>&1 | grep "executeQuery\|.*Coordinator: Coordination done" | grep -o "SELECT.*WithMergeableState)\|.*Coordinator: Coordination done" | sed -re 's/_data_[[:digit:]]+_[[:digit:]]+/_data_/g' From 3adbc4cd334a05560ca2397ac5e1a14d8c580167 Mon Sep 17 00:00:00 2001 From: JackyWoo Date: Wed, 31 Jul 2024 15:10:29 +0800 Subject: [PATCH 265/844] Fix code style --- src/Storages/Statistics/StatisticsMinMax.h | 2 ++ src/Storages/Statistics/StatisticsTDigest.cpp | 5 ++--- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/src/Storages/Statistics/StatisticsMinMax.h b/src/Storages/Statistics/StatisticsMinMax.h index 913dcccb798..3e08e6b5768 100644 --- a/src/Storages/Statistics/StatisticsMinMax.h +++ b/src/Storages/Statistics/StatisticsMinMax.h @@ -1,3 +1,5 @@ +#pragma once + #include diff --git a/src/Storages/Statistics/StatisticsTDigest.cpp b/src/Storages/Statistics/StatisticsTDigest.cpp index 3544f5cdea3..25cee4ac8e8 100644 --- a/src/Storages/Statistics/StatisticsTDigest.cpp +++ b/src/Storages/Statistics/StatisticsTDigest.cpp @@ -9,7 +9,6 @@ namespace DB namespace ErrorCodes { extern const int ILLEGAL_STATISTICS; -extern const int LOGICAL_ERROR; } StatisticsTDigest::StatisticsTDigest(const SingleStatisticsDescription & stat_, DataTypePtr data_type_) @@ -44,7 +43,7 @@ Float64 StatisticsTDigest::estimateLess(const Field & val) const Field val_converted = convertFieldToType(val, *data_type); if (val_converted.isNull()) return 0; - + auto val_as_float = applyVisitor(FieldVisitorConvertToNumber(), val_converted); return t_digest.getCountLessThan(val_as_float); } @@ -54,7 +53,7 @@ Float64 StatisticsTDigest::estimateEqual(const Field & val) const Field val_converted = convertFieldToType(val, *data_type); if (val_converted.isNull()) return 0; - + auto val_as_float = applyVisitor(FieldVisitorConvertToNumber(), val_converted); return t_digest.getCountEqual(val_as_float); } From c5a2f3aafbe6373a662f80f715e56a302a7fdd8b Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Thu, 1 Aug 2024 14:21:54 +0000 Subject: [PATCH 266/844] Limit min_marks_for_concurrent_read --- src/Processors/QueryPlan/ReadFromMergeTree.cpp | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp index 772bf427892..ec64db4139f 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp +++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp @@ -383,9 +383,12 @@ Pipe ReadFromMergeTree::readFromPoolParallelReplicas(RangesInDataParts parts_wit /// We have a special logic for local replica. It has to read less data, because in some cases it should /// merge states of aggregate functions or do some other important stuff other than reading from Disk. auto multiplier = context->getSettingsRef().parallel_replicas_single_task_marks_count_multiplier; - // avoid using multiplier if min marks to read is already too big (to avoid overflow) - if (pool_settings.min_marks_for_concurrent_read >= std::numeric_limits::max()) + if (pool_settings.min_marks_for_concurrent_read > std::numeric_limits::max()) + { + /// limit min marks to read in case it's big, happened in test since due to settings randomzation + pool_settings.min_marks_for_concurrent_read = std::numeric_limits::max(); multiplier = 1.0f; + } if (auto result = pool_settings.min_marks_for_concurrent_read * multiplier; canConvertTo(result)) pool_settings.min_marks_for_concurrent_read = static_cast(result); @@ -556,9 +559,12 @@ Pipe ReadFromMergeTree::readInOrder( }; auto multiplier = context->getSettingsRef().parallel_replicas_single_task_marks_count_multiplier; - // avoid using multiplier if min marks to read is already too big (to avoid overflow) - if (pool_settings.min_marks_for_concurrent_read >= std::numeric_limits::max()) + if (pool_settings.min_marks_for_concurrent_read > std::numeric_limits::max()) + { + /// limit min marks to read in case it's big, happened in test since due to settings randomzation + pool_settings.min_marks_for_concurrent_read = std::numeric_limits::max(); multiplier = 1.0f; + } if (auto result = pool_settings.min_marks_for_concurrent_read * multiplier; canConvertTo(result)) pool_settings.min_marks_for_concurrent_read = static_cast(result); From 85b8c11175ff77176e6bbf4bd0b5821a34a14338 Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Thu, 1 Aug 2024 20:32:03 +0000 Subject: [PATCH 267/844] Fix flaky 0_stateless/02784_parallel_replicas_automatic_decision - avoid max_threads randomzation --- .../0_stateless/02784_parallel_replicas_automatic_decision.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/queries/0_stateless/02784_parallel_replicas_automatic_decision.sh b/tests/queries/0_stateless/02784_parallel_replicas_automatic_decision.sh index e5658f31a34..ef41a5d6277 100755 --- a/tests/queries/0_stateless/02784_parallel_replicas_automatic_decision.sh +++ b/tests/queries/0_stateless/02784_parallel_replicas_automatic_decision.sh @@ -53,6 +53,7 @@ function run_query_with_pure_parallel_replicas () { --allow_experimental_parallel_reading_from_replicas 1 \ --parallel_replicas_for_non_replicated_merge_tree 1 \ --parallel_replicas_min_number_of_rows_per_replica "$2" \ + --max_threads 5 \ |& grep "It is enough work for" | awk '{ print substr($7, 2, length($7) - 2) "\t" $20 " estimated parallel replicas" }' } From 0f2024306deeb9b67b631391afc9d2c70d622ffc Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Sat, 3 Aug 2024 17:03:27 +0800 Subject: [PATCH 268/844] optimize array() with single arguments and map() with double arguments --- src/Functions/array/array.cpp | 28 +++++++++++++++------ src/Functions/map.cpp | 47 +++++++++++++++++++++++------------ 2 files changed, 51 insertions(+), 24 deletions(-) diff --git a/src/Functions/array/array.cpp b/src/Functions/array/array.cpp index 03b51808799..202aa83216b 100644 --- a/src/Functions/array/array.cpp +++ b/src/Functions/array/array.cpp @@ -46,8 +46,10 @@ public: size_t num_elements = arguments.size(); if (num_elements == 0) + { /// We should return constant empty array. return result_type->createColumnConstWithDefaultValue(input_rows_count); + } const DataTypePtr & elem_type = static_cast(*result_type).getNestedType(); @@ -59,7 +61,6 @@ public: Columns columns_holder(num_elements); ColumnRawPtrs column_ptrs(num_elements); - for (size_t i = 0; i < num_elements; ++i) { const auto & arg = arguments[i]; @@ -76,22 +77,33 @@ public: } /// Create and fill the result array. - auto out = ColumnArray::create(elem_type->createColumn()); IColumn & out_data = out->getData(); IColumn::Offsets & out_offsets = out->getOffsets(); out_data.reserve(input_rows_count * num_elements); - out_offsets.resize(input_rows_count); + out_offsets.resize_exact(input_rows_count); IColumn::Offset current_offset = 0; - for (size_t i = 0; i < input_rows_count; ++i) + if (num_elements == 1) { - for (size_t j = 0; j < num_elements; ++j) - out_data.insertFrom(*column_ptrs[j], i); + for (size_t i = 0; i < input_rows_count; ++i) + { + ++current_offset; + out_offsets[i] = current_offset; + } + out_data.insertManyFrom(*column_ptrs[0], 0, input_rows_count); + } + else + { + for (size_t i = 0; i < input_rows_count; ++i) + { + for (size_t j = 0; j < num_elements; ++j) + out_data.insertFrom(*column_ptrs[j], i); - current_offset += num_elements; - out_offsets[i] = current_offset; + current_offset += num_elements; + out_offsets[i] = current_offset; + } } return out; diff --git a/src/Functions/map.cpp b/src/Functions/map.cpp index 66cd10a3f0b..b869a475af0 100644 --- a/src/Functions/map.cpp +++ b/src/Functions/map.cpp @@ -1,14 +1,15 @@ -#include +#include +#include +#include +#include +#include +#include +#include #include #include -#include -#include -#include -#include -#include -#include -#include +#include #include +#include #include @@ -123,24 +124,38 @@ public: MutableColumnPtr keys_data = key_type->createColumn(); MutableColumnPtr values_data = value_type->createColumn(); - MutableColumnPtr offsets = DataTypeNumber().createColumn(); + MutableColumnPtr offsets = DataTypeUInt64().createColumn(); size_t total_elements = input_rows_count * num_elements / 2; keys_data->reserve(total_elements); values_data->reserve(total_elements); - offsets->reserve(input_rows_count); + auto & offsets_data = assert_cast(*offsets).getData(); + offsets_data.resize_exact(input_rows_count); IColumn::Offset current_offset = 0; - for (size_t i = 0; i < input_rows_count; ++i) + if (num_elements == 2) { - for (size_t j = 0; j < num_elements; j += 2) + for (size_t i = 0; i < input_rows_count; ++i) { - keys_data->insertFrom(*column_ptrs[j], i); - values_data->insertFrom(*column_ptrs[j + 1], i); + ++current_offset; + offsets_data[i] = current_offset; } + keys_data->insertManyFrom(*column_ptrs[0], 0, input_rows_count); + values_data->insertManyFrom(*column_ptrs[1], 0, input_rows_count); + } + else + { + for (size_t i = 0; i < input_rows_count; ++i) + { + for (size_t j = 0; j < num_elements; j += 2) + { + keys_data->insertFrom(*column_ptrs[j], i); + values_data->insertFrom(*column_ptrs[j + 1], i); + } - current_offset += num_elements / 2; - offsets->insert(current_offset); + current_offset += num_elements / 2; + offsets_data[i] = current_offset; + } } auto nested_column = ColumnArray::create( From e5d4eaa4580f5921a472174d805c852e8b057165 Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Sat, 3 Aug 2024 21:08:21 +0800 Subject: [PATCH 269/844] fix bugs --- src/Functions/array/array.cpp | 2 +- src/Functions/map.cpp | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Functions/array/array.cpp b/src/Functions/array/array.cpp index 202aa83216b..a15fe86a232 100644 --- a/src/Functions/array/array.cpp +++ b/src/Functions/array/array.cpp @@ -92,7 +92,7 @@ public: ++current_offset; out_offsets[i] = current_offset; } - out_data.insertManyFrom(*column_ptrs[0], 0, input_rows_count); + out_data.insertRangeFrom(*column_ptrs[0], 0, input_rows_count); } else { diff --git a/src/Functions/map.cpp b/src/Functions/map.cpp index b869a475af0..ebfa9d4eab4 100644 --- a/src/Functions/map.cpp +++ b/src/Functions/map.cpp @@ -140,8 +140,8 @@ public: ++current_offset; offsets_data[i] = current_offset; } - keys_data->insertManyFrom(*column_ptrs[0], 0, input_rows_count); - values_data->insertManyFrom(*column_ptrs[1], 0, input_rows_count); + keys_data->insertRangeFrom(*column_ptrs[0], 0, input_rows_count); + values_data->insertRangeFrom(*column_ptrs[1], 0, input_rows_count); } else { From a498259ec8c08d4f37f77c8a6870f519bc295801 Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Sun, 4 Aug 2024 13:32:17 +0800 Subject: [PATCH 270/844] optimize array with mutiple arguments --- src/Functions/array/array.cpp | 212 +++++++++++++++++++++++++++++----- 1 file changed, 184 insertions(+), 28 deletions(-) diff --git a/src/Functions/array/array.cpp b/src/Functions/array/array.cpp index a15fe86a232..b25505e9ac1 100644 --- a/src/Functions/array/array.cpp +++ b/src/Functions/array/array.cpp @@ -1,10 +1,14 @@ -#include -#include +#include +#include +#include #include #include -#include -#include +#include +#include #include +#include +#include "Columns/ColumnFixedString.h" +#include "Columns/ColumnNullable.h" namespace DB @@ -43,7 +47,7 @@ public: ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override { - size_t num_elements = arguments.size(); + const size_t num_elements = arguments.size(); if (num_elements == 0) { @@ -81,42 +85,194 @@ public: IColumn & out_data = out->getData(); IColumn::Offsets & out_offsets = out->getOffsets(); - out_data.reserve(input_rows_count * num_elements); + /// Fill out_offsets out_offsets.resize_exact(input_rows_count); - IColumn::Offset current_offset = 0; + for (size_t i = 0; i < input_rows_count; ++i) + { + current_offset += num_elements; + out_offsets[i] = current_offset; + } + + /// Fill out_data + out_data.reserve(input_rows_count * num_elements); if (num_elements == 1) - { - for (size_t i = 0; i < input_rows_count; ++i) - { - ++current_offset; - out_offsets[i] = current_offset; - } out_data.insertRangeFrom(*column_ptrs[0], 0, input_rows_count); - } else - { - for (size_t i = 0; i < input_rows_count; ++i) - { - for (size_t j = 0; j < num_elements; ++j) - out_data.insertFrom(*column_ptrs[j], i); - - current_offset += num_elements; - out_offsets[i] = current_offset; - } - } - + executeAny(column_ptrs, out_data, input_rows_count); return out; } - private: + bool executeAny(const ColumnRawPtrs & columns, IColumn & out_data, size_t input_rows_count) const + { + return false // NOLINT + || executeNumber(columns, out_data, input_rows_count) || executeNumber(columns, out_data, input_rows_count) + || executeNumber(columns, out_data, input_rows_count) || executeNumber(columns, out_data, input_rows_count) + || executeNumber(columns, out_data, input_rows_count) || executeNumber(columns, out_data, input_rows_count) + || executeNumber(columns, out_data, input_rows_count) || executeNumber(columns, out_data, input_rows_count) + || executeNumber(columns, out_data, input_rows_count) || executeNumber(columns, out_data, input_rows_count) + || executeNumber(columns, out_data, input_rows_count) || executeNumber(columns, out_data, input_rows_count) + || executeNumber(columns, out_data, input_rows_count) || executeNumber(columns, out_data, input_rows_count) + || executeNumber(columns, out_data, input_rows_count) + || executeNumber(columns, out_data, input_rows_count) + || executeNumber(columns, out_data, input_rows_count) + || executeNumber(columns, out_data, input_rows_count) + || executeNumber(columns, out_data, input_rows_count) || executeString(columns, out_data, input_rows_count) + || executeNullable(columns, out_data, input_rows_count) || executeTuple(columns, out_data, input_rows_count) + || executeFixedString(columns, out_data, input_rows_count) || executeGeneric(columns, out_data, input_rows_count); + } + + bool executeGeneric(const ColumnRawPtrs & columns, IColumn & out_data, size_t input_rows_count) const + { + for (size_t i = 0; i < input_rows_count; ++i) + for (const auto * column : columns) + out_data.insertFrom(*column, i); + return true; + } + + template + bool executeNumber(const ColumnRawPtrs & columns, IColumn & out_data, size_t input_rows_count) const + { + using Container = ColumnVectorOrDecimal::Container; + std::vector containers(columns.size(), nullptr); + for (size_t i = 0; i < columns.size(); ++i) + { + const ColumnVectorOrDecimal * concrete_column = checkAndGetColumn>(columns[i]); + if (!concrete_column) + return false; + + containers[i] = &concrete_column->getData(); + } + + ColumnVectorOrDecimal & concrete_out_data = assert_cast &>(out_data); + Container & out_container = concrete_out_data.getData(); + out_container.resize_exact(columns.size() * input_rows_count); + + for (size_t row_i = 0; row_i < input_rows_count; ++row_i) + { + const size_t base = row_i * columns.size(); + for (size_t col_i = 0; col_i < columns.size(); ++col_i) + out_container[base + col_i] = (*containers[col_i])[row_i]; + } + return true; + } + + bool executeString(const ColumnRawPtrs & columns, IColumn & out_data, size_t input_rows_count) const + { + size_t total_bytes = 0; + std::vector concrete_columns(columns.size(), nullptr); + for (size_t i = 0; i < columns.size(); ++i) + { + const ColumnString * concrete_column = checkAndGetColumn(columns[i]); + if (!concrete_column) + return false; + + total_bytes += concrete_column->getChars().size(); + concrete_columns[i] = concrete_column; + } + + ColumnString & concreate_out_data = assert_cast(out_data); + auto & out_chars = concreate_out_data.getChars(); + auto & out_offsets = concreate_out_data.getOffsets(); + out_chars.resize_exact(total_bytes); + out_offsets.resize_exact(input_rows_count * columns.size()); + + size_t curr_out_offset = 0; + for (size_t row_i = 0; row_i < input_rows_count; ++row_i) + { + const size_t base = row_i * columns.size(); + for (size_t col_i = 0; col_i < columns.size(); ++col_i) + { + StringRef ref = concrete_columns[col_i]->getDataAt(row_i); + memcpySmallAllowReadWriteOverflow15(&out_chars[curr_out_offset], ref.data, ref.size); + out_chars[curr_out_offset + ref.size] = 0; + + curr_out_offset += ref.size + 1; + out_offsets[base + col_i] = curr_out_offset; + } + } + return true; + } + + bool executeFixedString(const ColumnRawPtrs & columns, IColumn & out_data, size_t input_rows_count) const + { + size_t total_bytes = 0; + std::vector concrete_columns(columns.size(), nullptr); + for (size_t i = 0; i < columns.size(); ++i) + { + const ColumnFixedString * concrete_column = checkAndGetColumn(columns[i]); + if (!concrete_column) + return false; + + total_bytes += concrete_column->getChars().size(); + concrete_columns[i] = concrete_column; + } + + ColumnFixedString & concreate_out_data = assert_cast(out_data); + auto & out_chars = concreate_out_data.getChars(); + out_chars.resize_exact(total_bytes); + + const size_t n = concreate_out_data.getN(); + size_t curr_out_offset = 0; + for (size_t row_i = 0; row_i < input_rows_count; ++row_i) + { + for (size_t col_i = 0; col_i < columns.size(); ++col_i) + { + StringRef ref = concrete_columns[col_i]->getDataAt(row_i); + memcpySmallAllowReadWriteOverflow15(&out_chars[curr_out_offset], ref.data, n); + curr_out_offset += n; + } + } + return true; + } + + bool executeNullable(const ColumnRawPtrs & columns, IColumn & out_data, size_t input_rows_count) const + { + ColumnRawPtrs null_maps(columns.size(), nullptr); + ColumnRawPtrs nested_columns(columns.size(), nullptr); + for (size_t i = 0; i < columns.size(); ++i) + { + const ColumnNullable * concrete_column = checkAndGetColumn(columns[i]); + if (!concrete_column) + return false; + + null_maps[i] = &concrete_column->getNullMapColumn(); + nested_columns[i] = &concrete_column->getNestedColumn(); + } + + ColumnNullable & concreate_out_data = assert_cast(out_data); + auto & out_null_map = concreate_out_data.getNullMapColumn(); + auto & out_nested_column = concreate_out_data.getNestedColumn(); + executeAny(null_maps, out_null_map, input_rows_count); + executeAny(nested_columns, out_nested_column, input_rows_count); + return true; + } + + bool executeTuple(const ColumnRawPtrs & columns, IColumn & out_data, size_t input_rows_count) const + { + ColumnTuple * concreate_out_data = typeid_cast(&out_data); + if (!concreate_out_data) + return false; + + const size_t tuple_size = concreate_out_data->tupleSize(); + for (size_t i = 0; i < tuple_size; ++i) + { + ColumnRawPtrs elem_columns(columns.size(), nullptr); + for (size_t j = 0; j < columns.size(); ++j) + { + const ColumnTuple * concrete_column = assert_cast(columns[j]); + elem_columns[j] = &concrete_column->getColumn(i); + } + executeAny(elem_columns, concreate_out_data->getColumn(i), input_rows_count); + } + return true; + } + String getName() const override { return name; } - bool addField(DataTypePtr type_res, const Field & f, Array & arr) const; - bool use_variant_as_common_type = false; }; From 00a9c30363e16aa1893320c35052ad551b803394 Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Sun, 4 Aug 2024 16:16:51 +0800 Subject: [PATCH 271/844] optimize map function --- src/Functions/map.cpp | 44 +++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 42 insertions(+), 2 deletions(-) diff --git a/src/Functions/map.cpp b/src/Functions/map.cpp index ebfa9d4eab4..994a03374c9 100644 --- a/src/Functions/map.cpp +++ b/src/Functions/map.cpp @@ -32,11 +32,18 @@ class FunctionMap : public IFunction public: static constexpr auto name = "map"; - explicit FunctionMap(bool use_variant_as_common_type_) : use_variant_as_common_type(use_variant_as_common_type_) {} + explicit FunctionMap(ContextPtr context_) + : context(std::move(context_)) + , use_variant_as_common_type( + context->getSettingsRef().allow_experimental_variant_type && context->getSettingsRef().use_variant_as_common_type) + , function_array(FunctionFactory::instance().get("array", context)) + , function_map_from_arrays(FunctionFactory::instance().get("mapFromArrays", context)) + { + } static FunctionPtr create(ContextPtr context) { - return std::make_shared(context->getSettingsRef().allow_experimental_variant_type && context->getSettingsRef().use_variant_as_common_type); + return std::make_shared(std::move(context)); } String getName() const override @@ -97,7 +104,36 @@ public: ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override { size_t num_elements = arguments.size(); + if (num_elements == 0) + return result_type->createColumnConstWithDefaultValue(input_rows_count); + ColumnsWithTypeAndName key_args; + ColumnsWithTypeAndName value_args; + for (size_t i = 0; i < num_elements; i += 2) + { + key_args.emplace_back(arguments[i]); + value_args.emplace_back(arguments[i+1]); + } + + const auto & result_type_map = static_cast(*result_type); + const DataTypePtr & key_type = result_type_map.getKeyType(); + const DataTypePtr & value_type = result_type_map.getValueType(); + const DataTypePtr & key_array_type = std::make_shared(key_type); + const DataTypePtr & value_array_type = std::make_shared(value_type); + + /// key_array = array(args[0], args[2]...) + ColumnPtr key_array = function_array->build(key_args)->execute(key_args, key_array_type, input_rows_count); + /// value_array = array(args[1], args[3]...) + ColumnPtr value_array = function_array->build(value_args)->execute(value_args, value_array_type, input_rows_count); + + /// result = mapFromArrays(key_array, value_array) + ColumnsWithTypeAndName map_args{{key_array, key_array_type, ""}, {value_array, value_array_type, ""}}; + return function_map_from_arrays->build(map_args)->execute(map_args, result_type, input_rows_count); + } + /* + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override + { + size_t num_elements = arguments.size(); if (num_elements == 0) return result_type->createColumnConstWithDefaultValue(input_rows_count); @@ -164,9 +200,13 @@ public: return ColumnMap::create(nested_column); } + */ private: + ContextPtr context; bool use_variant_as_common_type = false; + FunctionOverloadResolverPtr function_array; + FunctionOverloadResolverPtr function_map_from_arrays; }; /// mapFromArrays(keys, values) is a function that allows you to make key-value pair from a pair of arrays From 122c9794a65cb6d0579ec41f6fb3b4848e5c989b Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Sun, 4 Aug 2024 17:04:04 +0800 Subject: [PATCH 272/844] remove useless codes --- src/Functions/map.cpp | 71 ------------------------------------------- 1 file changed, 71 deletions(-) diff --git a/src/Functions/map.cpp b/src/Functions/map.cpp index 994a03374c9..4b1e41af796 100644 --- a/src/Functions/map.cpp +++ b/src/Functions/map.cpp @@ -130,77 +130,6 @@ public: ColumnsWithTypeAndName map_args{{key_array, key_array_type, ""}, {value_array, value_array_type, ""}}; return function_map_from_arrays->build(map_args)->execute(map_args, result_type, input_rows_count); } - /* - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override - { - size_t num_elements = arguments.size(); - if (num_elements == 0) - return result_type->createColumnConstWithDefaultValue(input_rows_count); - - const auto & result_type_map = static_cast(*result_type); - const DataTypePtr & key_type = result_type_map.getKeyType(); - const DataTypePtr & value_type = result_type_map.getValueType(); - - Columns columns_holder(num_elements); - ColumnRawPtrs column_ptrs(num_elements); - - for (size_t i = 0; i < num_elements; ++i) - { - const auto & arg = arguments[i]; - const auto to_type = i % 2 == 0 ? key_type : value_type; - - ColumnPtr preprocessed_column = castColumn(arg, to_type); - preprocessed_column = preprocessed_column->convertToFullColumnIfConst(); - - columns_holder[i] = std::move(preprocessed_column); - column_ptrs[i] = columns_holder[i].get(); - } - - /// Create and fill the result map. - - MutableColumnPtr keys_data = key_type->createColumn(); - MutableColumnPtr values_data = value_type->createColumn(); - MutableColumnPtr offsets = DataTypeUInt64().createColumn(); - - size_t total_elements = input_rows_count * num_elements / 2; - keys_data->reserve(total_elements); - values_data->reserve(total_elements); - auto & offsets_data = assert_cast(*offsets).getData(); - offsets_data.resize_exact(input_rows_count); - - IColumn::Offset current_offset = 0; - if (num_elements == 2) - { - for (size_t i = 0; i < input_rows_count; ++i) - { - ++current_offset; - offsets_data[i] = current_offset; - } - keys_data->insertRangeFrom(*column_ptrs[0], 0, input_rows_count); - values_data->insertRangeFrom(*column_ptrs[1], 0, input_rows_count); - } - else - { - for (size_t i = 0; i < input_rows_count; ++i) - { - for (size_t j = 0; j < num_elements; j += 2) - { - keys_data->insertFrom(*column_ptrs[j], i); - values_data->insertFrom(*column_ptrs[j + 1], i); - } - - current_offset += num_elements / 2; - offsets_data[i] = current_offset; - } - } - - auto nested_column = ColumnArray::create( - ColumnTuple::create(Columns{std::move(keys_data), std::move(values_data)}), - std::move(offsets)); - - return ColumnMap::create(nested_column); - } - */ private: ContextPtr context; From d1305d9fadf3ea2ad2393ee5f13a676110bf6ef7 Mon Sep 17 00:00:00 2001 From: JackyWoo Date: Mon, 5 Aug 2024 11:26:57 +0800 Subject: [PATCH 273/844] Some fixups and split tests --- .../mergetree-family/mergetree.md | 5 ++- src/Storages/Statistics/Statistics.cpp | 24 ++++++------ .../Statistics/StatisticsCountMinSketch.cpp | 4 +- .../Statistics/StatisticsCountMinSketch.h | 4 +- src/Storages/Statistics/StatisticsMinMax.cpp | 17 ++++----- src/Storages/Statistics/StatisticsMinMax.h | 11 +++--- src/Storages/Statistics/StatisticsTDigest.cpp | 4 +- src/Storages/Statistics/StatisticsTDigest.h | 4 +- src/Storages/Statistics/StatisticsUniq.cpp | 4 +- src/Storages/Statistics/StatisticsUniq.h | 4 +- src/Storages/StatisticsDescription.cpp | 8 ++-- ...atistics_create_materialize_drop.reference | 6 +++ ...864_statistics_create_materialize_drop.sql | 37 +++++++++++++++++++ .../02864_statistics_estimation.reference | 6 +-- .../02864_statistics_estimation.sql | 28 +++----------- 15 files changed, 94 insertions(+), 72 deletions(-) create mode 100644 tests/queries/0_stateless/02864_statistics_create_materialize_drop.reference create mode 100644 tests/queries/0_stateless/02864_statistics_create_materialize_drop.sql diff --git a/docs/en/engines/table-engines/mergetree-family/mergetree.md b/docs/en/engines/table-engines/mergetree-family/mergetree.md index 8e8d5e55772..42968097799 100644 --- a/docs/en/engines/table-engines/mergetree-family/mergetree.md +++ b/docs/en/engines/table-engines/mergetree-family/mergetree.md @@ -991,9 +991,10 @@ They can be used for prewhere optimization only if we enable `set allow_statisti #### Available Types of Column Statistics {#available-types-of-column-statistics} -- `min_max` +- `MinMax` + + MinMax statistics allows to estimate selectivity of range condition for numeric columns. - min_max statistics allows to estimate selectivity of range condition for numeric columns. - `TDigest` [TDigest](https://github.com/tdunning/t-digest) sketches which allow to compute approximate percentiles (e.g. the 90th percentile) for numeric columns. diff --git a/src/Storages/Statistics/Statistics.cpp b/src/Storages/Statistics/Statistics.cpp index 17ff8deb720..42b742419f0 100644 --- a/src/Storages/Statistics/Statistics.cpp +++ b/src/Storages/Statistics/Statistics.cpp @@ -4,9 +4,9 @@ #include #include #include +#include #include #include -#include #include #include #include @@ -99,11 +99,11 @@ Float64 ColumnStatistics::estimateEqual(const Field & val) const #endif if (stats.contains(StatisticsType::Uniq)) { - auto cardinality = stats.at(StatisticsType::Uniq)->estimateCardinality(); + UInt64 cardinality = stats.at(StatisticsType::Uniq)->estimateCardinality(); if (cardinality == 0) return 0; /// Assume that the value is uniformly distributed among the unique values. - return static_cast(1) / stats.at(StatisticsType::Uniq)->estimateCardinality() * rows; + return 1.0 / cardinality * rows; } return rows * ConditionSelectivityEstimator::default_cond_equal_factor; @@ -183,18 +183,18 @@ void MergeTreeStatisticsFactory::registerValidator(StatisticsType stats_type, Va MergeTreeStatisticsFactory::MergeTreeStatisticsFactory() { - registerValidator(StatisticsType::MinMax, minMaxValidator); - registerCreator(StatisticsType::MinMax, minMaxCreator); + registerValidator(StatisticsType::MinMax, minMaxStatisticsValidator); + registerCreator(StatisticsType::MinMax, minMaxStatisticsCreator); - registerValidator(StatisticsType::TDigest, tdigestValidator); - registerCreator(StatisticsType::TDigest, tdigestCreator); + registerValidator(StatisticsType::TDigest, tdigestStatisticsValidator); + registerCreator(StatisticsType::TDigest, tdigestStatisticsCreator); - registerValidator(StatisticsType::Uniq, uniqValidator); - registerCreator(StatisticsType::Uniq, uniqCreator); + registerValidator(StatisticsType::Uniq, uniqStatisticsValidator); + registerCreator(StatisticsType::Uniq, uniqStatisticsCreator); #if USE_DATASKETCHES - registerValidator(StatisticsType::CountMinSketch, countMinSketchValidator); - registerCreator(StatisticsType::CountMinSketch, countMinSketchCreator); + registerValidator(StatisticsType::CountMinSketch, countMinSketchStatisticsValidator); + registerCreator(StatisticsType::CountMinSketch, countMinSketchStatisticsCreator); #endif } @@ -222,7 +222,7 @@ ColumnStatisticsPtr MergeTreeStatisticsFactory::get(const ColumnStatisticsDescri { auto it = creators.find(type); if (it == creators.end()) - throw Exception(ErrorCodes::INCORRECT_QUERY, "Unknown statistic type '{}'. Available types: 'min_max', 'tdigest' 'uniq' and 'count_min'", type); + throw Exception(ErrorCodes::INCORRECT_QUERY, "Unknown statistic type '{}'. Available types: 'count_min', 'minmax', 'tdigest' and 'uniq'", type); auto stat_ptr = (it->second)(desc, stats.data_type); column_stat->stats[type] = stat_ptr; } diff --git a/src/Storages/Statistics/StatisticsCountMinSketch.cpp b/src/Storages/Statistics/StatisticsCountMinSketch.cpp index e69bbc1515b..50d3b6e515c 100644 --- a/src/Storages/Statistics/StatisticsCountMinSketch.cpp +++ b/src/Storages/Statistics/StatisticsCountMinSketch.cpp @@ -84,7 +84,7 @@ void StatisticsCountMinSketch::deserialize(ReadBuffer & buf) } -void countMinSketchValidator(const SingleStatisticsDescription &, DataTypePtr data_type) +void countMinSketchStatisticsValidator(const SingleStatisticsDescription &, DataTypePtr data_type) { data_type = removeNullable(data_type); data_type = removeLowCardinalityAndNullable(data_type); @@ -92,7 +92,7 @@ void countMinSketchValidator(const SingleStatisticsDescription &, DataTypePtr da throw Exception(ErrorCodes::ILLEGAL_STATISTICS, "Statistics of type 'count_min' does not support type {}", data_type->getName()); } -StatisticsPtr countMinSketchCreator(const SingleStatisticsDescription & stat, DataTypePtr data_type) +StatisticsPtr countMinSketchStatisticsCreator(const SingleStatisticsDescription & stat, DataTypePtr data_type) { return std::make_shared(stat, data_type); } diff --git a/src/Storages/Statistics/StatisticsCountMinSketch.h b/src/Storages/Statistics/StatisticsCountMinSketch.h index 6c8b74f8c35..d10bc78a88e 100644 --- a/src/Storages/Statistics/StatisticsCountMinSketch.h +++ b/src/Storages/Statistics/StatisticsCountMinSketch.h @@ -31,8 +31,8 @@ private: }; -void countMinSketchValidator(const SingleStatisticsDescription &, DataTypePtr data_type); -StatisticsPtr countMinSketchCreator(const SingleStatisticsDescription & stat, DataTypePtr); +void countMinSketchStatisticsValidator(const SingleStatisticsDescription &, DataTypePtr data_type); +StatisticsPtr countMinSketchStatisticsCreator(const SingleStatisticsDescription & stat, DataTypePtr); } diff --git a/src/Storages/Statistics/StatisticsMinMax.cpp b/src/Storages/Statistics/StatisticsMinMax.cpp index 083e8634841..f689f23d75d 100644 --- a/src/Storages/Statistics/StatisticsMinMax.cpp +++ b/src/Storages/Statistics/StatisticsMinMax.cpp @@ -19,9 +19,6 @@ extern const int ILLEGAL_STATISTICS; StatisticsMinMax::StatisticsMinMax(const SingleStatisticsDescription & stat_, const DataTypePtr & data_type_) : IStatistics(stat_) - , min(std::numeric_limits::max()) - , max(std::numeric_limits::min()) - , row_count(0) , data_type(data_type_) { } @@ -41,7 +38,7 @@ Float64 StatisticsMinMax::estimateLess(const Field & val) const return row_count; if (max == min) - return row_count; + return (val_as_float < max) ? 0 : row_count; return ((val_as_float - min) / (max - min)) * row_count; } @@ -53,9 +50,9 @@ void StatisticsMinMax::update(const ColumnPtr & column) if (column->isNullAt(row)) continue; - auto data = column->getFloat64(row); - min = std::min(data, min); - max = std::max(data, max); + auto value = column->getFloat64(row); + min = std::min(value, min); + max = std::max(value, max); } row_count += column->size(); } @@ -75,15 +72,15 @@ void StatisticsMinMax::deserialize(ReadBuffer & buf) } -void minMaxValidator(const SingleStatisticsDescription &, DataTypePtr data_type) +void minMaxStatisticsValidator(const SingleStatisticsDescription &, DataTypePtr data_type) { data_type = removeNullable(data_type); data_type = removeLowCardinalityAndNullable(data_type); if (!data_type->isValueRepresentedByNumber()) - throw Exception(ErrorCodes::ILLEGAL_STATISTICS, "Statistics of type 'min_max' do not support type {}", data_type->getName()); + throw Exception(ErrorCodes::ILLEGAL_STATISTICS, "Statistics of type 'minmax' do not support type {}", data_type->getName()); } -StatisticsPtr minMaxCreator(const SingleStatisticsDescription & stat, DataTypePtr data_type) +StatisticsPtr minMaxStatisticsCreator(const SingleStatisticsDescription & stat, DataTypePtr data_type) { return std::make_shared(stat, data_type); } diff --git a/src/Storages/Statistics/StatisticsMinMax.h b/src/Storages/Statistics/StatisticsMinMax.h index 3e08e6b5768..4a67504350b 100644 --- a/src/Storages/Statistics/StatisticsMinMax.h +++ b/src/Storages/Statistics/StatisticsMinMax.h @@ -10,7 +10,6 @@ class StatisticsMinMax : public IStatistics { public: StatisticsMinMax(const SingleStatisticsDescription & stat_, const DataTypePtr & data_type_); - ~StatisticsMinMax() override = default; Float64 estimateLess(const Field & val) const override; @@ -20,14 +19,14 @@ public: void deserialize(ReadBuffer & buf) override; private: - Float64 min; - Float64 max; - Float64 row_count; + Float64 min = std::numeric_limits::max(); + Float64 max = std::numeric_limits::min(); + UInt64 row_count = 0; DataTypePtr data_type; }; -void minMaxValidator(const SingleStatisticsDescription &, DataTypePtr data_type); -StatisticsPtr minMaxCreator(const SingleStatisticsDescription & stat, DataTypePtr data_type); +void minMaxStatisticsValidator(const SingleStatisticsDescription &, DataTypePtr data_type); +StatisticsPtr minMaxStatisticsCreator(const SingleStatisticsDescription & stat, DataTypePtr data_type); } diff --git a/src/Storages/Statistics/StatisticsTDigest.cpp b/src/Storages/Statistics/StatisticsTDigest.cpp index 25cee4ac8e8..d418a761e7c 100644 --- a/src/Storages/Statistics/StatisticsTDigest.cpp +++ b/src/Storages/Statistics/StatisticsTDigest.cpp @@ -58,7 +58,7 @@ Float64 StatisticsTDigest::estimateEqual(const Field & val) const return t_digest.getCountEqual(val_as_float); } -void tdigestValidator(const SingleStatisticsDescription &, DataTypePtr data_type) +void tdigestStatisticsValidator(const SingleStatisticsDescription &, DataTypePtr data_type) { data_type = removeNullable(data_type); data_type = removeLowCardinalityAndNullable(data_type); @@ -66,7 +66,7 @@ void tdigestValidator(const SingleStatisticsDescription &, DataTypePtr data_type throw Exception(ErrorCodes::ILLEGAL_STATISTICS, "Statistics of type 'tdigest' do not support type {}", data_type->getName()); } -StatisticsPtr tdigestCreator(const SingleStatisticsDescription & stat, DataTypePtr data_type) +StatisticsPtr tdigestStatisticsCreator(const SingleStatisticsDescription & stat, DataTypePtr data_type) { return std::make_shared(stat, data_type); } diff --git a/src/Storages/Statistics/StatisticsTDigest.h b/src/Storages/Statistics/StatisticsTDigest.h index f4de6cc41e0..b83132c5e0d 100644 --- a/src/Storages/Statistics/StatisticsTDigest.h +++ b/src/Storages/Statistics/StatisticsTDigest.h @@ -24,7 +24,7 @@ private: DataTypePtr data_type; }; -void tdigestValidator(const SingleStatisticsDescription &, DataTypePtr data_type); -StatisticsPtr tdigestCreator(const SingleStatisticsDescription & stat, DataTypePtr); +void tdigestStatisticsValidator(const SingleStatisticsDescription &, DataTypePtr data_type); +StatisticsPtr tdigestStatisticsCreator(const SingleStatisticsDescription & stat, DataTypePtr); } diff --git a/src/Storages/Statistics/StatisticsUniq.cpp b/src/Storages/Statistics/StatisticsUniq.cpp index aace45a14b1..628ba98aba3 100644 --- a/src/Storages/Statistics/StatisticsUniq.cpp +++ b/src/Storages/Statistics/StatisticsUniq.cpp @@ -52,7 +52,7 @@ UInt64 StatisticsUniq::estimateCardinality() const return column->getUInt(0); } -void uniqValidator(const SingleStatisticsDescription &, DataTypePtr data_type) +void uniqStatisticsValidator(const SingleStatisticsDescription &, DataTypePtr data_type) { data_type = removeNullable(data_type); data_type = removeLowCardinalityAndNullable(data_type); @@ -60,7 +60,7 @@ void uniqValidator(const SingleStatisticsDescription &, DataTypePtr data_type) throw Exception(ErrorCodes::ILLEGAL_STATISTICS, "Statistics of type 'uniq' do not support type {}", data_type->getName()); } -StatisticsPtr uniqCreator(const SingleStatisticsDescription & stat, DataTypePtr data_type) +StatisticsPtr uniqStatisticsCreator(const SingleStatisticsDescription & stat, DataTypePtr data_type) { return std::make_shared(stat, data_type); } diff --git a/src/Storages/Statistics/StatisticsUniq.h b/src/Storages/Statistics/StatisticsUniq.h index 81162b0a5b9..219e2ffbc55 100644 --- a/src/Storages/Statistics/StatisticsUniq.h +++ b/src/Storages/Statistics/StatisticsUniq.h @@ -27,7 +27,7 @@ private: }; -void uniqValidator(const SingleStatisticsDescription &, DataTypePtr data_type); -StatisticsPtr uniqCreator(const SingleStatisticsDescription & stat, DataTypePtr data_type); +void uniqStatisticsValidator(const SingleStatisticsDescription &, DataTypePtr data_type); +StatisticsPtr uniqStatisticsCreator(const SingleStatisticsDescription & stat, DataTypePtr data_type); } diff --git a/src/Storages/StatisticsDescription.cpp b/src/Storages/StatisticsDescription.cpp index c54752dd42f..f3ce9deb836 100644 --- a/src/Storages/StatisticsDescription.cpp +++ b/src/Storages/StatisticsDescription.cpp @@ -51,9 +51,9 @@ static StatisticsType stringToStatisticsType(String type) return StatisticsType::Uniq; if (type == "count_min") return StatisticsType::CountMinSketch; - if (type == "min_max") + if (type == "minmax") return StatisticsType::MinMax; - throw Exception(ErrorCodes::INCORRECT_QUERY, "Unknown statistics type: {}. Supported statistics types are 'min_max', 'tdigest', 'uniq' and 'count_min'.", type); + throw Exception(ErrorCodes::INCORRECT_QUERY, "Unknown statistics type: {}. Supported statistics types are 'count_min', 'minmax', 'tdigest' and 'uniq'.", type); } String SingleStatisticsDescription::getTypeName() const @@ -67,9 +67,9 @@ String SingleStatisticsDescription::getTypeName() const case StatisticsType::CountMinSketch: return "count_min"; case StatisticsType::MinMax: - return "min_max"; + return "minmax"; default: - throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown statistics type: {}. Supported statistics types are 'min_max', 'tdigest', 'uniq' and 'count_min'.", type); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown statistics type: {}. Supported statistics types are 'count_min', 'minmax', 'tdigest' and 'uniq'.", type); } } diff --git a/tests/queries/0_stateless/02864_statistics_create_materialize_drop.reference b/tests/queries/0_stateless/02864_statistics_create_materialize_drop.reference new file mode 100644 index 00000000000..5aff1ac63d4 --- /dev/null +++ b/tests/queries/0_stateless/02864_statistics_create_materialize_drop.reference @@ -0,0 +1,6 @@ +Test create statistics: +CREATE TABLE default.tab\n(\n `a` LowCardinality(Int64) STATISTICS(uniq, count_min, minmax),\n `b` Nullable(Int64),\n `c` LowCardinality(Nullable(Int64)) STATISTICS(count_min, minmax),\n `d` DateTime STATISTICS(tdigest, uniq, count_min, minmax),\n `pk` String\n)\nENGINE = MergeTree\nORDER BY pk\nSETTINGS index_granularity = 8192 +tab +Test materialize and drop statistics: +CREATE TABLE default.tab\n(\n `a` LowCardinality(Int64) STATISTICS(uniq, count_min, minmax),\n `b` Nullable(Int64) STATISTICS(tdigest, uniq, count_min, minmax),\n `c` LowCardinality(Nullable(Int64)) STATISTICS(count_min, minmax),\n `d` DateTime STATISTICS(tdigest, uniq, count_min, minmax),\n `pk` String\n)\nENGINE = MergeTree\nORDER BY pk\nSETTINGS index_granularity = 8192 +CREATE TABLE default.tab\n(\n `a` LowCardinality(Int64),\n `b` Nullable(Int64),\n `c` LowCardinality(Nullable(Int64)),\n `d` DateTime,\n `pk` String\n)\nENGINE = MergeTree\nORDER BY pk\nSETTINGS index_granularity = 8192 diff --git a/tests/queries/0_stateless/02864_statistics_create_materialize_drop.sql b/tests/queries/0_stateless/02864_statistics_create_materialize_drop.sql new file mode 100644 index 00000000000..442079ab947 --- /dev/null +++ b/tests/queries/0_stateless/02864_statistics_create_materialize_drop.sql @@ -0,0 +1,37 @@ +-- Tags: no-fasttest + +DROP TABLE IF EXISTS tab SYNC; + +SET allow_experimental_statistics = 1; +SET allow_statistics_optimize = 1; +SET allow_suspicious_low_cardinality_types=1; +SET mutations_sync = 2; + + +SELECT 'Test create statistics:'; + +CREATE TABLE tab +( + a LowCardinality(Int64) STATISTICS(count_min, minmax, uniq), + b Nullable(Int64), + c LowCardinality(Nullable(Int64)) STATISTICS(minmax, count_min), + d DateTime STATISTICS(count_min, minmax, tdigest, uniq), + pk String, +) Engine = MergeTree() ORDER BY pk; + +SHOW CREATE TABLE tab; + +SELECT name FROM system.tables WHERE name = 'tab' AND database = currentDatabase(); +INSERT INTO tab select number, number, number, toDateTime(number, 'UTC'), generateUUIDv4() FROM system.numbers LIMIT 10000; + + +SELECT 'Test materialize and drop statistics:'; + +ALTER TABLE tab ADD STATISTICS b TYPE count_min, minmax, tdigest, uniq; +ALTER TABLE tab MATERIALIZE STATISTICS b; +SHOW CREATE TABLE tab; + +ALTER TABLE tab DROP STATISTICS a, b, c, d; +SHOW CREATE TABLE tab; + +DROP TABLE IF EXISTS tab SYNC; diff --git a/tests/queries/0_stateless/02864_statistics_estimation.reference b/tests/queries/0_stateless/02864_statistics_estimation.reference index 64f264017d8..a5225c6338f 100644 --- a/tests/queries/0_stateless/02864_statistics_estimation.reference +++ b/tests/queries/0_stateless/02864_statistics_estimation.reference @@ -1,5 +1,5 @@ -CREATE TABLE default.tab\n(\n `a` String,\n `b` UInt64,\n `c` Int64,\n `d` DateTime64(3),\n `pk` String\n)\nENGINE = MergeTree\nORDER BY pk\nSETTINGS min_bytes_for_wide_part = 0, index_granularity = 8192 -Test statistics min_max: +CREATE TABLE default.tab\n(\n `a` String,\n `b` UInt64,\n `c` Int64,\n `d` DateTime,\n `pk` String\n)\nENGINE = MergeTree\nORDER BY pk\nSETTINGS min_bytes_for_wide_part = 0, index_granularity = 8192 +Test statistics minmax: Prewhere info Prewhere filter Prewhere filter column: and(greater(d, _CAST(9998_DateTime(\'UTC\'), \'DateTime(\'UTC\')\')), less(c, -1), greater(b, 0)) (removed) @@ -28,5 +28,3 @@ Test estimating equals condition: Prewhere info Prewhere filter Prewhere filter column: and(equals(a, \'0\'), equals(b, 10)) (removed) -Test LowCardinality and Nullable data type: -tab2 diff --git a/tests/queries/0_stateless/02864_statistics_estimation.sql b/tests/queries/0_stateless/02864_statistics_estimation.sql index 0ea9eb38e60..117e24bd732 100644 --- a/tests/queries/0_stateless/02864_statistics_estimation.sql +++ b/tests/queries/0_stateless/02864_statistics_estimation.sql @@ -12,7 +12,7 @@ CREATE TABLE tab a String, b UInt64, c Int64, - d DateTime64, + d DateTime, pk String, ) Engine = MergeTree() ORDER BY pk SETTINGS min_bytes_for_wide_part = 0; @@ -21,11 +21,11 @@ SHOW CREATE TABLE tab; INSERT INTO tab select toString(number % 10000), number % 1000, -(number % 100), toDateTime(number, 'UTC'), generateUUIDv4() FROM system.numbers LIMIT 10000; -SELECT 'Test statistics min_max:'; +SELECT 'Test statistics minmax:'; -ALTER TABLE tab ADD STATISTICS b TYPE min_max; -ALTER TABLE tab ADD STATISTICS c TYPE min_max; -ALTER TABLE tab ADD STATISTICS d TYPE min_max; +ALTER TABLE tab ADD STATISTICS b TYPE minmax; +ALTER TABLE tab ADD STATISTICS c TYPE minmax; +ALTER TABLE tab ADD STATISTICS d TYPE minmax; ALTER TABLE tab MATERIALIZE STATISTICS b, c, d; SELECT replaceRegexpAll(explain, '__table1.|_UInt8|_Int8|_UInt16|_String', '') @@ -70,7 +70,7 @@ ALTER TABLE tab DROP STATISTICS a, b, c, d; SELECT 'Test estimating range condition:'; -ALTER TABLE tab ADD STATISTICS b TYPE min_max; +ALTER TABLE tab ADD STATISTICS b TYPE minmax; ALTER TABLE tab MATERIALIZE STATISTICS b; SELECT replaceRegexpAll(explain, '__table1.|_UInt8|_Int8|_UInt16|_String', '') FROM (EXPLAIN actions=1 SELECT count(*) FROM tab WHERE c < 0/*5000*/ and b < 10/*100*/) @@ -100,19 +100,3 @@ WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; ALTER TABLE tab DROP STATISTICS a; DROP TABLE IF EXISTS tab SYNC; - - -SELECT 'Test LowCardinality and Nullable data type:'; -DROP TABLE IF EXISTS tab2 SYNC; -SET allow_suspicious_low_cardinality_types=1; -CREATE TABLE tab2 -( - a LowCardinality(Int64) STATISTICS(count_min), - b Nullable(Int64) STATISTICS(min_max, count_min), - c LowCardinality(Nullable(Int64)) STATISTICS(min_max, count_min), - pk String, -) Engine = MergeTree() ORDER BY pk; - -select name from system.tables where name = 'tab2' and database = currentDatabase(); - -DROP TABLE IF EXISTS tab2 SYNC; From 045551f5098e36822c69cc2bb5852d7d16cfe41b Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Mon, 5 Aug 2024 11:33:58 +0800 Subject: [PATCH 274/844] fix bugs --- src/Functions/map.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Functions/map.cpp b/src/Functions/map.cpp index f49d66c0329..d9643087b11 100644 --- a/src/Functions/map.cpp +++ b/src/Functions/map.cpp @@ -157,6 +157,7 @@ public: bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; } bool useDefaultImplementationForNulls() const override { return false; } bool useDefaultImplementationForConstants() const override { return true; } + bool useDefaultImplementationForLowCardinalityColumns() const override { return false; } DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override { From 7765ff6d5227da58eecc426287c98c333c75402d Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Mon, 5 Aug 2024 06:46:04 +0000 Subject: [PATCH 275/844] Minor fixups --- .../mergetree-family/mergetree.md | 2 +- src/Storages/Statistics/Statistics.cpp | 22 ++++---- src/Storages/Statistics/Statistics.h | 12 ++-- .../Statistics/StatisticsCountMinSketch.cpp | 52 +++++++++--------- .../Statistics/StatisticsCountMinSketch.h | 11 ++-- src/Storages/Statistics/StatisticsMinMax.cpp | 55 +++++++++---------- src/Storages/Statistics/StatisticsMinMax.h | 11 ++-- src/Storages/Statistics/StatisticsTDigest.cpp | 11 ++-- src/Storages/Statistics/StatisticsTDigest.h | 13 +++-- src/Storages/Statistics/StatisticsUniq.cpp | 10 ++-- src/Storages/Statistics/StatisticsUniq.h | 12 ++-- 11 files changed, 107 insertions(+), 104 deletions(-) diff --git a/docs/en/engines/table-engines/mergetree-family/mergetree.md b/docs/en/engines/table-engines/mergetree-family/mergetree.md index 42968097799..c2438ac593e 100644 --- a/docs/en/engines/table-engines/mergetree-family/mergetree.md +++ b/docs/en/engines/table-engines/mergetree-family/mergetree.md @@ -993,7 +993,7 @@ They can be used for prewhere optimization only if we enable `set allow_statisti - `MinMax` - MinMax statistics allows to estimate selectivity of range condition for numeric columns. + The minimum and maximum column value which allows to estimate the selectivity of range filters on numeric columns. - `TDigest` diff --git a/src/Storages/Statistics/Statistics.cpp b/src/Storages/Statistics/Statistics.cpp index 42b742419f0..1f159211a93 100644 --- a/src/Storages/Statistics/Statistics.cpp +++ b/src/Storages/Statistics/Statistics.cpp @@ -62,14 +62,17 @@ Float64 IStatistics::estimateLess(const Field & /*val*/) const } /// ------------------------------------- -/// Implementation of the estimation: -/// Note: Each statistics object supports certain types predicates natively, e.g. -/// - TDigest: '< X' (less-than predicates) -/// - Count-min sketches: '= X' (equal predicates) -/// - Uniq (HyperLogLog): 'count distinct(*)' (column cardinality) -/// If multiple statistics objects are available per column, it is sometimes also possible to combine them in a clever way. -/// For that reason, all estimation are performed in a central place (here), and we don't simply pass the predicate to the first statistics -/// object that supports it natively. +/// Notes: +/// - Statistics object usually only support estimation for certain types of predicates, e.g. +/// - TDigest: '< X' (less-than predicates) +/// - Count-min sketches: '= X' (equal predicates) +/// - Uniq (HyperLogLog): 'count distinct(*)' (column cardinality) +/// +/// If multiple statistics objects in a column support estimating a predicate, we want to try statistics in order of descending accuracy +/// (e.g. MinMax statistics are simpler than TDigest statistics and thus worse for estimating 'less' predicates). +/// +/// Sometimes, it is possible to combine multiple statistics in a clever way. For that reason, all estimation are performed in a central +/// place (here), and we don't simply pass the predicate to the first statistics object that supports it natively. Float64 ColumnStatistics::estimateLess(const Field & val) const { @@ -102,8 +105,7 @@ Float64 ColumnStatistics::estimateEqual(const Field & val) const UInt64 cardinality = stats.at(StatisticsType::Uniq)->estimateCardinality(); if (cardinality == 0) return 0; - /// Assume that the value is uniformly distributed among the unique values. - return 1.0 / cardinality * rows; + return 1.0 / cardinality * rows; /// assume uniform distribution } return rows * ConditionSelectivityEstimator::default_cond_equal_factor; diff --git a/src/Storages/Statistics/Statistics.h b/src/Storages/Statistics/Statistics.h index 3eeb5cf92a8..1425be89d47 100644 --- a/src/Storages/Statistics/Statistics.h +++ b/src/Storages/Statistics/Statistics.h @@ -25,20 +25,20 @@ public: explicit IStatistics(const SingleStatisticsDescription & stat_); virtual ~IStatistics() = default; + virtual void update(const ColumnPtr & column) = 0; + + virtual void serialize(WriteBuffer & buf) = 0; + virtual void deserialize(ReadBuffer & buf) = 0; + /// Estimate the cardinality of the column. /// Throws if the statistics object is not able to do a meaningful estimation. virtual UInt64 estimateCardinality() const; /// Per-value estimations. - /// Throws if the statistics object is not able to do a meaningful estimation. + /// Throws a LOGICAL_ERROR if the statistics object is not able to do a meaningful estimation. virtual Float64 estimateEqual(const Field & val) const; /// cardinality of val in the column virtual Float64 estimateLess(const Field & val) const; /// summarized cardinality of values < val in the column - virtual void update(const ColumnPtr & column) = 0; - - virtual void serialize(WriteBuffer & buf) = 0; - virtual void deserialize(ReadBuffer & buf) = 0; - protected: SingleStatisticsDescription stat; }; diff --git a/src/Storages/Statistics/StatisticsCountMinSketch.cpp b/src/Storages/Statistics/StatisticsCountMinSketch.cpp index 50d3b6e515c..c0ea7a84ded 100644 --- a/src/Storages/Statistics/StatisticsCountMinSketch.cpp +++ b/src/Storages/Statistics/StatisticsCountMinSketch.cpp @@ -1,4 +1,3 @@ - #include #include #include @@ -25,34 +24,13 @@ extern const int ILLEGAL_STATISTICS; static constexpr auto num_hashes = 7uz; static constexpr auto num_buckets = 2718uz; -StatisticsCountMinSketch::StatisticsCountMinSketch(const SingleStatisticsDescription & stat_, DataTypePtr data_type_) - : IStatistics(stat_) +StatisticsCountMinSketch::StatisticsCountMinSketch(const SingleStatisticsDescription & statistics_description, DataTypePtr data_type_) + : IStatistics(statistics_description) , sketch(num_hashes, num_buckets) , data_type(data_type_) { } -Float64 StatisticsCountMinSketch::estimateEqual(const Field & val) const -{ - /// Try to convert field to data_type. Converting string to proper data types such as: number, date, datetime, IPv4, Decimal etc. - /// Return null if val larger than the range of data_type - /// - /// For example: if data_type is Int32: - /// 1. For 1.0, 1, '1', return Field(1) - /// 2. For 1.1, max_value_int64, return null - Field val_converted = convertFieldToType(val, *data_type); - if (val_converted.isNull()) - return 0; - - if (data_type->isValueRepresentedByNumber()) - return sketch.get_estimate(&val_converted, data_type->getSizeOfValueInMemory()); - - if (isStringOrFixedString(data_type)) - return sketch.get_estimate(val.get()); - - throw Exception(ErrorCodes::LOGICAL_ERROR, "Statistics 'count_min' does not support estimate data type of {}", data_type->getName()); -} - void StatisticsCountMinSketch::update(const ColumnPtr & column) { for (size_t row = 0; row < column->size(); ++row) @@ -83,8 +61,28 @@ void StatisticsCountMinSketch::deserialize(ReadBuffer & buf) sketch = Sketch::deserialize(bytes.data(), size); } +Float64 StatisticsCountMinSketch::estimateEqual(const Field & val) const +{ + /// Try to convert field to data_type. Converting string to proper data types such as: number, date, datetime, IPv4, Decimal etc. + /// Return null if val larger than the range of data_type + /// + /// For example: if data_type is Int32: + /// 1. For 1.0, 1, '1', return Field(1) + /// 2. For 1.1, max_value_int64, return null + Field val_converted = convertFieldToType(val, *data_type); + if (val_converted.isNull()) + return 0; -void countMinSketchStatisticsValidator(const SingleStatisticsDescription &, DataTypePtr data_type) + if (data_type->isValueRepresentedByNumber()) + return sketch.get_estimate(&val_converted, data_type->getSizeOfValueInMemory()); + + if (isStringOrFixedString(data_type)) + return sketch.get_estimate(val.get()); + + throw Exception(ErrorCodes::LOGICAL_ERROR, "Statistics 'count_min' does not support estimate data type of {}", data_type->getName()); +} + +void countMinSketchStatisticsValidator(const SingleStatisticsDescription & /*statistics_description*/, DataTypePtr data_type) { data_type = removeNullable(data_type); data_type = removeLowCardinalityAndNullable(data_type); @@ -92,9 +90,9 @@ void countMinSketchStatisticsValidator(const SingleStatisticsDescription &, Data throw Exception(ErrorCodes::ILLEGAL_STATISTICS, "Statistics of type 'count_min' does not support type {}", data_type->getName()); } -StatisticsPtr countMinSketchStatisticsCreator(const SingleStatisticsDescription & stat, DataTypePtr data_type) +StatisticsPtr countMinSketchStatisticsCreator(const SingleStatisticsDescription & statistics_description, DataTypePtr data_type) { - return std::make_shared(stat, data_type); + return std::make_shared(statistics_description, data_type); } } diff --git a/src/Storages/Statistics/StatisticsCountMinSketch.h b/src/Storages/Statistics/StatisticsCountMinSketch.h index d10bc78a88e..452483c8773 100644 --- a/src/Storages/Statistics/StatisticsCountMinSketch.h +++ b/src/Storages/Statistics/StatisticsCountMinSketch.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include "config.h" @@ -14,15 +15,15 @@ namespace DB class StatisticsCountMinSketch : public IStatistics { public: - StatisticsCountMinSketch(const SingleStatisticsDescription & stat_, DataTypePtr data_type_); - - Float64 estimateEqual(const Field & val) const override; + StatisticsCountMinSketch(const SingleStatisticsDescription & statistics_description, DataTypePtr data_type_); void update(const ColumnPtr & column) override; void serialize(WriteBuffer & buf) override; void deserialize(ReadBuffer & buf) override; + Float64 estimateEqual(const Field & val) const override; + private: using Sketch = datasketches::count_min_sketch; Sketch sketch; @@ -31,8 +32,8 @@ private: }; -void countMinSketchStatisticsValidator(const SingleStatisticsDescription &, DataTypePtr data_type); -StatisticsPtr countMinSketchStatisticsCreator(const SingleStatisticsDescription & stat, DataTypePtr); +void countMinSketchStatisticsValidator(const SingleStatisticsDescription & statistics_description, DataTypePtr data_type); +StatisticsPtr countMinSketchStatisticsCreator(const SingleStatisticsDescription & statistics_description, DataTypePtr); } diff --git a/src/Storages/Statistics/StatisticsMinMax.cpp b/src/Storages/Statistics/StatisticsMinMax.cpp index f689f23d75d..11a0704dae7 100644 --- a/src/Storages/Statistics/StatisticsMinMax.cpp +++ b/src/Storages/Statistics/StatisticsMinMax.cpp @@ -1,10 +1,10 @@ #include -#include -#include -#include #include +#include +#include #include #include +#include #include @@ -17,32 +17,12 @@ namespace ErrorCodes extern const int ILLEGAL_STATISTICS; } -StatisticsMinMax::StatisticsMinMax(const SingleStatisticsDescription & stat_, const DataTypePtr & data_type_) - : IStatistics(stat_) +StatisticsMinMax::StatisticsMinMax(const SingleStatisticsDescription & statistics_description, const DataTypePtr & data_type_) + : IStatistics(statistics_description) , data_type(data_type_) { } -Float64 StatisticsMinMax::estimateLess(const Field & val) const -{ - Field val_converted = convertFieldToType(val, *data_type); - if (val_converted.isNull()) - return 0; - - auto val_as_float = applyVisitor(FieldVisitorConvertToNumber(), val_converted); - - if (val_as_float < min) - return 0; - - if (val_as_float > max) - return row_count; - - if (max == min) - return (val_as_float < max) ? 0 : row_count; - - return ((val_as_float - min) / (max - min)) * row_count; -} - void StatisticsMinMax::update(const ColumnPtr & column) { for (size_t row = 0; row < column->size(); ++row) @@ -71,8 +51,27 @@ void StatisticsMinMax::deserialize(ReadBuffer & buf) readFloatBinary(max, buf); } +Float64 StatisticsMinMax::estimateLess(const Field & val) const +{ + Field val_converted = convertFieldToType(val, *data_type); + if (val_converted.isNull()) + return 0; -void minMaxStatisticsValidator(const SingleStatisticsDescription &, DataTypePtr data_type) + auto val_as_float = applyVisitor(FieldVisitorConvertToNumber(), val_converted); + + if (val_as_float < min) + return 0; + + if (val_as_float > max) + return row_count; + + if (min == max) + return (val_as_float != max) ? 0 : row_count; + + return ((val_as_float - min) / (max - min)) * row_count; +} + +void minMaxStatisticsValidator(const SingleStatisticsDescription & /*statistics_description*/, DataTypePtr data_type) { data_type = removeNullable(data_type); data_type = removeLowCardinalityAndNullable(data_type); @@ -80,9 +79,9 @@ void minMaxStatisticsValidator(const SingleStatisticsDescription &, DataTypePtr throw Exception(ErrorCodes::ILLEGAL_STATISTICS, "Statistics of type 'minmax' do not support type {}", data_type->getName()); } -StatisticsPtr minMaxStatisticsCreator(const SingleStatisticsDescription & stat, DataTypePtr data_type) +StatisticsPtr minMaxStatisticsCreator(const SingleStatisticsDescription & statistics_description, DataTypePtr data_type) { - return std::make_shared(stat, data_type); + return std::make_shared(statistics_description, data_type); } } diff --git a/src/Storages/Statistics/StatisticsMinMax.h b/src/Storages/Statistics/StatisticsMinMax.h index 4a67504350b..524494b520b 100644 --- a/src/Storages/Statistics/StatisticsMinMax.h +++ b/src/Storages/Statistics/StatisticsMinMax.h @@ -1,6 +1,7 @@ #pragma once #include +#include namespace DB @@ -9,15 +10,15 @@ namespace DB class StatisticsMinMax : public IStatistics { public: - StatisticsMinMax(const SingleStatisticsDescription & stat_, const DataTypePtr & data_type_); - - Float64 estimateLess(const Field & val) const override; + StatisticsMinMax(const SingleStatisticsDescription & statistics_description, const DataTypePtr & data_type_); void update(const ColumnPtr & column) override; void serialize(WriteBuffer & buf) override; void deserialize(ReadBuffer & buf) override; + Float64 estimateLess(const Field & val) const override; + private: Float64 min = std::numeric_limits::max(); Float64 max = std::numeric_limits::min(); @@ -26,7 +27,7 @@ private: DataTypePtr data_type; }; -void minMaxStatisticsValidator(const SingleStatisticsDescription &, DataTypePtr data_type); -StatisticsPtr minMaxStatisticsCreator(const SingleStatisticsDescription & stat, DataTypePtr data_type); +void minMaxStatisticsValidator(const SingleStatisticsDescription & statistics_description, DataTypePtr data_type); +StatisticsPtr minMaxStatisticsCreator(const SingleStatisticsDescription & statistics_description, DataTypePtr data_type); } diff --git a/src/Storages/Statistics/StatisticsTDigest.cpp b/src/Storages/Statistics/StatisticsTDigest.cpp index d418a761e7c..cfd626c8ea8 100644 --- a/src/Storages/Statistics/StatisticsTDigest.cpp +++ b/src/Storages/Statistics/StatisticsTDigest.cpp @@ -11,8 +11,9 @@ namespace ErrorCodes extern const int ILLEGAL_STATISTICS; } -StatisticsTDigest::StatisticsTDigest(const SingleStatisticsDescription & stat_, DataTypePtr data_type_) - : IStatistics(stat_), data_type(data_type_) +StatisticsTDigest::StatisticsTDigest(const SingleStatisticsDescription & statistics_description, DataTypePtr data_type_) + : IStatistics(statistics_description) + , data_type(data_type_) { } @@ -58,7 +59,7 @@ Float64 StatisticsTDigest::estimateEqual(const Field & val) const return t_digest.getCountEqual(val_as_float); } -void tdigestStatisticsValidator(const SingleStatisticsDescription &, DataTypePtr data_type) +void tdigestStatisticsValidator(const SingleStatisticsDescription & /*statistics_description*/, DataTypePtr data_type) { data_type = removeNullable(data_type); data_type = removeLowCardinalityAndNullable(data_type); @@ -66,9 +67,9 @@ void tdigestStatisticsValidator(const SingleStatisticsDescription &, DataTypePtr throw Exception(ErrorCodes::ILLEGAL_STATISTICS, "Statistics of type 'tdigest' do not support type {}", data_type->getName()); } -StatisticsPtr tdigestStatisticsCreator(const SingleStatisticsDescription & stat, DataTypePtr data_type) +StatisticsPtr tdigestStatisticsCreator(const SingleStatisticsDescription & statistics_description, DataTypePtr data_type) { - return std::make_shared(stat, data_type); + return std::make_shared(statistics_description, data_type); } } diff --git a/src/Storages/Statistics/StatisticsTDigest.h b/src/Storages/Statistics/StatisticsTDigest.h index b83132c5e0d..ad2a03836aa 100644 --- a/src/Storages/Statistics/StatisticsTDigest.h +++ b/src/Storages/Statistics/StatisticsTDigest.h @@ -2,6 +2,7 @@ #include #include +#include namespace DB { @@ -9,22 +10,22 @@ namespace DB class StatisticsTDigest : public IStatistics { public: - explicit StatisticsTDigest(const SingleStatisticsDescription & stat_, DataTypePtr data_type_); - - Float64 estimateLess(const Field & val) const override; - Float64 estimateEqual(const Field & val) const override; + explicit StatisticsTDigest(const SingleStatisticsDescription & statistics_description, DataTypePtr data_type_); void update(const ColumnPtr & column) override; void serialize(WriteBuffer & buf) override; void deserialize(ReadBuffer & buf) override; + Float64 estimateLess(const Field & val) const override; + Float64 estimateEqual(const Field & val) const override; + private: QuantileTDigest t_digest; DataTypePtr data_type; }; -void tdigestStatisticsValidator(const SingleStatisticsDescription &, DataTypePtr data_type); -StatisticsPtr tdigestStatisticsCreator(const SingleStatisticsDescription & stat, DataTypePtr); +void tdigestStatisticsValidator(const SingleStatisticsDescription & statistics_description, DataTypePtr data_type); +StatisticsPtr tdigestStatisticsCreator(const SingleStatisticsDescription & statistics_description, DataTypePtr); } diff --git a/src/Storages/Statistics/StatisticsUniq.cpp b/src/Storages/Statistics/StatisticsUniq.cpp index 628ba98aba3..e895157635f 100644 --- a/src/Storages/Statistics/StatisticsUniq.cpp +++ b/src/Storages/Statistics/StatisticsUniq.cpp @@ -11,8 +11,8 @@ namespace ErrorCodes extern const int ILLEGAL_STATISTICS; } -StatisticsUniq::StatisticsUniq(const SingleStatisticsDescription & stat_, const DataTypePtr & data_type) - : IStatistics(stat_) +StatisticsUniq::StatisticsUniq(const SingleStatisticsDescription & statistics_description, const DataTypePtr & data_type) + : IStatistics(statistics_description) { arena = std::make_unique(); AggregateFunctionProperties properties; @@ -52,7 +52,7 @@ UInt64 StatisticsUniq::estimateCardinality() const return column->getUInt(0); } -void uniqStatisticsValidator(const SingleStatisticsDescription &, DataTypePtr data_type) +void uniqStatisticsValidator(const SingleStatisticsDescription & /*statistics_description*/, DataTypePtr data_type) { data_type = removeNullable(data_type); data_type = removeLowCardinalityAndNullable(data_type); @@ -60,9 +60,9 @@ void uniqStatisticsValidator(const SingleStatisticsDescription &, DataTypePtr da throw Exception(ErrorCodes::ILLEGAL_STATISTICS, "Statistics of type 'uniq' do not support type {}", data_type->getName()); } -StatisticsPtr uniqStatisticsCreator(const SingleStatisticsDescription & stat, DataTypePtr data_type) +StatisticsPtr uniqStatisticsCreator(const SingleStatisticsDescription & statistics_description, DataTypePtr data_type) { - return std::make_shared(stat, data_type); + return std::make_shared(statistics_description, data_type); } } diff --git a/src/Storages/Statistics/StatisticsUniq.h b/src/Storages/Statistics/StatisticsUniq.h index 219e2ffbc55..b3000e4895a 100644 --- a/src/Storages/Statistics/StatisticsUniq.h +++ b/src/Storages/Statistics/StatisticsUniq.h @@ -3,6 +3,7 @@ #include #include #include +#include namespace DB { @@ -10,24 +11,23 @@ namespace DB class StatisticsUniq : public IStatistics { public: - StatisticsUniq(const SingleStatisticsDescription & stat_, const DataTypePtr & data_type); + StatisticsUniq(const SingleStatisticsDescription & statistics_description, const DataTypePtr & data_type); ~StatisticsUniq() override; - UInt64 estimateCardinality() const override; - void update(const ColumnPtr & column) override; void serialize(WriteBuffer & buf) override; void deserialize(ReadBuffer & buf) override; + UInt64 estimateCardinality() const override; + private: std::unique_ptr arena; AggregateFunctionPtr collector; AggregateDataPtr data; - }; -void uniqStatisticsValidator(const SingleStatisticsDescription &, DataTypePtr data_type); -StatisticsPtr uniqStatisticsCreator(const SingleStatisticsDescription & stat, DataTypePtr data_type); +void uniqStatisticsValidator(const SingleStatisticsDescription & statistics_description, DataTypePtr data_type); +StatisticsPtr uniqStatisticsCreator(const SingleStatisticsDescription & statistics_description, DataTypePtr data_type); } From c61225ede47e37bd773bd249804739f915feefd7 Mon Sep 17 00:00:00 2001 From: JackyWoo Date: Mon, 5 Aug 2024 18:37:36 +0800 Subject: [PATCH 276/844] Rewrite tests --- ...atistics_create_materialize_drop.reference | 7 +- ...864_statistics_create_materialize_drop.sql | 16 ++-- .../02864_statistics_estimation.reference | 34 +++---- .../02864_statistics_estimation.sql | 92 ++++++++----------- 4 files changed, 62 insertions(+), 87 deletions(-) diff --git a/tests/queries/0_stateless/02864_statistics_create_materialize_drop.reference b/tests/queries/0_stateless/02864_statistics_create_materialize_drop.reference index 5aff1ac63d4..4ca2c5e5f9b 100644 --- a/tests/queries/0_stateless/02864_statistics_create_materialize_drop.reference +++ b/tests/queries/0_stateless/02864_statistics_create_materialize_drop.reference @@ -1,6 +1,5 @@ Test create statistics: -CREATE TABLE default.tab\n(\n `a` LowCardinality(Int64) STATISTICS(uniq, count_min, minmax),\n `b` Nullable(Int64),\n `c` LowCardinality(Nullable(Int64)) STATISTICS(count_min, minmax),\n `d` DateTime STATISTICS(tdigest, uniq, count_min, minmax),\n `pk` String\n)\nENGINE = MergeTree\nORDER BY pk\nSETTINGS index_granularity = 8192 -tab +CREATE TABLE default.tab\n(\n `a` LowCardinality(Int64) STATISTICS(tdigest, uniq, count_min, minmax),\n `b` LowCardinality(Nullable(String)) STATISTICS(uniq, count_min),\n `c` LowCardinality(Nullable(Int64)) STATISTICS(tdigest, uniq, count_min, minmax),\n `d` DateTime STATISTICS(tdigest, uniq, count_min, minmax),\n `pk` String\n)\nENGINE = MergeTree\nORDER BY pk\nSETTINGS index_granularity = 8192 Test materialize and drop statistics: -CREATE TABLE default.tab\n(\n `a` LowCardinality(Int64) STATISTICS(uniq, count_min, minmax),\n `b` Nullable(Int64) STATISTICS(tdigest, uniq, count_min, minmax),\n `c` LowCardinality(Nullable(Int64)) STATISTICS(count_min, minmax),\n `d` DateTime STATISTICS(tdigest, uniq, count_min, minmax),\n `pk` String\n)\nENGINE = MergeTree\nORDER BY pk\nSETTINGS index_granularity = 8192 -CREATE TABLE default.tab\n(\n `a` LowCardinality(Int64),\n `b` Nullable(Int64),\n `c` LowCardinality(Nullable(Int64)),\n `d` DateTime,\n `pk` String\n)\nENGINE = MergeTree\nORDER BY pk\nSETTINGS index_granularity = 8192 +CREATE TABLE default.tab\n(\n `a` LowCardinality(Int64),\n `b` LowCardinality(Nullable(String)) STATISTICS(uniq, count_min),\n `c` LowCardinality(Nullable(Int64)),\n `d` DateTime,\n `pk` String\n)\nENGINE = MergeTree\nORDER BY pk\nSETTINGS index_granularity = 8192 +CREATE TABLE default.tab\n(\n `a` LowCardinality(Int64),\n `b` LowCardinality(Nullable(String)),\n `c` LowCardinality(Nullable(Int64)),\n `d` DateTime,\n `pk` String\n)\nENGINE = MergeTree\nORDER BY pk\nSETTINGS index_granularity = 8192 diff --git a/tests/queries/0_stateless/02864_statistics_create_materialize_drop.sql b/tests/queries/0_stateless/02864_statistics_create_materialize_drop.sql index 442079ab947..90a57c99624 100644 --- a/tests/queries/0_stateless/02864_statistics_create_materialize_drop.sql +++ b/tests/queries/0_stateless/02864_statistics_create_materialize_drop.sql @@ -12,26 +12,24 @@ SELECT 'Test create statistics:'; CREATE TABLE tab ( - a LowCardinality(Int64) STATISTICS(count_min, minmax, uniq), - b Nullable(Int64), - c LowCardinality(Nullable(Int64)) STATISTICS(minmax, count_min), + a LowCardinality(Int64) STATISTICS(count_min, minmax, tdigest, uniq), + b LowCardinality(Nullable(String)) STATISTICS(count_min, uniq), + c LowCardinality(Nullable(Int64)) STATISTICS(count_min, minmax, tdigest, uniq), d DateTime STATISTICS(count_min, minmax, tdigest, uniq), pk String, ) Engine = MergeTree() ORDER BY pk; +INSERT INTO tab select number, number, number, toDateTime(number), generateUUIDv4() FROM system.numbers LIMIT 10000; SHOW CREATE TABLE tab; -SELECT name FROM system.tables WHERE name = 'tab' AND database = currentDatabase(); -INSERT INTO tab select number, number, number, toDateTime(number, 'UTC'), generateUUIDv4() FROM system.numbers LIMIT 10000; - SELECT 'Test materialize and drop statistics:'; - -ALTER TABLE tab ADD STATISTICS b TYPE count_min, minmax, tdigest, uniq; +ALTER TABLE tab DROP STATISTICS a, b, c, d; +ALTER TABLE tab ADD STATISTICS b TYPE count_min, uniq; ALTER TABLE tab MATERIALIZE STATISTICS b; SHOW CREATE TABLE tab; -ALTER TABLE tab DROP STATISTICS a, b, c, d; +ALTER TABLE tab DROP STATISTICS b; SHOW CREATE TABLE tab; DROP TABLE IF EXISTS tab SYNC; diff --git a/tests/queries/0_stateless/02864_statistics_estimation.reference b/tests/queries/0_stateless/02864_statistics_estimation.reference index a5225c6338f..f59f94013da 100644 --- a/tests/queries/0_stateless/02864_statistics_estimation.reference +++ b/tests/queries/0_stateless/02864_statistics_estimation.reference @@ -1,30 +1,24 @@ CREATE TABLE default.tab\n(\n `a` String,\n `b` UInt64,\n `c` Int64,\n `d` DateTime,\n `pk` String\n)\nENGINE = MergeTree\nORDER BY pk\nSETTINGS min_bytes_for_wide_part = 0, index_granularity = 8192 -Test statistics minmax: - Prewhere info - Prewhere filter - Prewhere filter column: and(greater(d, _CAST(9998_DateTime(\'UTC\'), \'DateTime(\'UTC\')\')), less(c, -1), greater(b, 0)) (removed) Test statistics count_min: Prewhere info Prewhere filter Prewhere filter column: and(equals(a, \'0\'), equals(b, 0), equals(c, 0)) (removed) +Test statistics minmax: + Prewhere info + Prewhere filter + Prewhere filter column: and(greater(d, _CAST(9998, \'DateTime\')), less(c, -1), greater(b, 0)) (removed) +Test statistics tdigest: + Prewhere info + Prewhere filter + Prewhere filter column: and(greater(d, _CAST(9998, \'DateTime\')), less(c, -1), greater(b, 0)) (removed) +Test statistics uniq: + Prewhere info + Prewhere filter + Prewhere filter column: and(equals(a, \'0\'), equals(d, _CAST(1, \'DateTime\')), equals(b, 0), equals(c, 0)) (removed) Test statistics multi-types: Prewhere info Prewhere filter - Prewhere filter column: and(equals(a, \'0\'), less(c, -90), greater(b, 900)) (removed) + Prewhere filter column: and(equals(a, \'0\'), equals(d, _CAST(1, \'DateTime\')), less(c, -90), greater(b, 900)) (removed) Prewhere info Prewhere filter - Prewhere filter column: and(equals(a, \'10000\'), equals(b, 0), less(c, 0)) (removed) -Test estimating range condition: - Prewhere info - Prewhere filter - Prewhere filter column: and(less(b, 10), less(c, 0)) (removed) - Prewhere info - Prewhere filter - Prewhere filter column: and(less(b, 10), less(c, 0)) (removed) -Test estimating equals condition: - Prewhere info - Prewhere filter - Prewhere filter column: and(equals(a, \'0\'), equals(b, 10)) (removed) - Prewhere info - Prewhere filter - Prewhere filter column: and(equals(a, \'0\'), equals(b, 10)) (removed) + Prewhere filter column: and(equals(a, \'10000\'), equals(b, 0), less(c, 0), greater(d, _CAST(1, \'DateTime\'))) (removed) diff --git a/tests/queries/0_stateless/02864_statistics_estimation.sql b/tests/queries/0_stateless/02864_statistics_estimation.sql index 117e24bd732..65b111c285b 100644 --- a/tests/queries/0_stateless/02864_statistics_estimation.sql +++ b/tests/queries/0_stateless/02864_statistics_estimation.sql @@ -19,84 +19,68 @@ SETTINGS min_bytes_for_wide_part = 0; SHOW CREATE TABLE tab; -INSERT INTO tab select toString(number % 10000), number % 1000, -(number % 100), toDateTime(number, 'UTC'), generateUUIDv4() FROM system.numbers LIMIT 10000; +INSERT INTO tab select toString(number % 10000), number % 1000, -(number % 100), cast(number, 'DateTime'), generateUUIDv4() FROM system.numbers LIMIT 10000; + + +SELECT 'Test statistics count_min:'; + +ALTER TABLE tab ADD STATISTICS a, b, c TYPE count_min; +ALTER TABLE tab MATERIALIZE STATISTICS a, b, c; +SELECT replaceRegexpAll(explain, '__table1.|_UInt8|_Int8|_UInt16|_String|_DateTime', '') +FROM (EXPLAIN actions=1 SELECT count(*) FROM tab WHERE c = 0/*100*/ and b = 0/*10*/ and a = '0'/*1*/) +WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; +ALTER TABLE tab DROP STATISTICS a, b, c; + SELECT 'Test statistics minmax:'; -ALTER TABLE tab ADD STATISTICS b TYPE minmax; -ALTER TABLE tab ADD STATISTICS c TYPE minmax; -ALTER TABLE tab ADD STATISTICS d TYPE minmax; +ALTER TABLE tab ADD STATISTICS b, c, d TYPE minmax; ALTER TABLE tab MATERIALIZE STATISTICS b, c, d; -SELECT replaceRegexpAll(explain, '__table1.|_UInt8|_Int8|_UInt16|_String', '') -FROM (EXPLAIN actions=1 SELECT count(*) FROM tab WHERE b > 0/*10000*/ and c < -1/*9990*/ and d > toDateTime(9998, 'UTC')/*1*/) +SELECT replaceRegexpAll(explain, '__table1.|_UInt8|_Int8|_UInt16|_String|_DateTime', '') +FROM (EXPLAIN actions=1 SELECT count(*) FROM tab WHERE b > 0/*10000*/ and c < -1/*9990*/ and d > cast(9998, 'DateTime')/*1*/) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; ALTER TABLE tab DROP STATISTICS b, c, d; -SELECT 'Test statistics count_min:'; +SELECT 'Test statistics tdigest:'; -ALTER TABLE tab ADD STATISTICS a TYPE count_min; -ALTER TABLE tab ADD STATISTICS b TYPE count_min; -ALTER TABLE tab ADD STATISTICS c TYPE count_min; -ALTER TABLE tab MATERIALIZE STATISTICS a, b, c; - -SELECT replaceRegexpAll(explain, '__table1.|_UInt8|_Int8|_UInt16|_String', '') -FROM (EXPLAIN actions=1 SELECT count(*) FROM tab WHERE c = 0/*100*/ and b = 0/*10*/ and a = '0'/*1*/) +ALTER TABLE tab ADD STATISTICS b, c, d TYPE tdigest; +ALTER TABLE tab MATERIALIZE STATISTICS b, c, d; +SELECT replaceRegexpAll(explain, '__table1.|_UInt8|_Int8|_UInt16|_String|_DateTime', '') +FROM (EXPLAIN actions=1 SELECT count(*) FROM tab WHERE b > 0/*10000*/ and c < -1/*9990*/ and d > cast(9998, 'DateTime')/*1*/) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; +ALTER TABLE tab DROP STATISTICS b, c, d; -ALTER TABLE tab DROP STATISTICS a, b, c; + +SELECT 'Test statistics uniq:'; + +ALTER TABLE tab ADD STATISTICS a, b, c, d TYPE uniq; +ALTER TABLE tab MATERIALIZE STATISTICS a, b, c, d; +SELECT replaceRegexpAll(explain, '__table1.|_UInt8|_Int8|_UInt16|_String|_DateTime', '') +FROM (EXPLAIN actions=1 SELECT count(*) FROM tab WHERE d = cast(1, 'DateTime')/*100*/ and c = 0/*1000*/ and b = 0/*100*/ and a = '0'/*100*/) +WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; +ALTER TABLE tab DROP STATISTICS a, b, c, d; SELECT 'Test statistics multi-types:'; -ALTER TABLE tab ADD STATISTICS a TYPE count_min; -ALTER TABLE tab ADD STATISTICS b TYPE count_min, uniq, tdigest; -ALTER TABLE tab ADD STATISTICS c TYPE count_min, uniq, tdigest; -ALTER TABLE tab ADD STATISTICS d TYPE count_min, uniq, tdigest; +ALTER TABLE tab ADD STATISTICS a TYPE count_min, uniq; +ALTER TABLE tab ADD STATISTICS b TYPE count_min, minmax, uniq, tdigest; +ALTER TABLE tab ADD STATISTICS c TYPE count_min, minmax, uniq, tdigest; +ALTER TABLE tab ADD STATISTICS d TYPE count_min, minmax, uniq, tdigest; ALTER TABLE tab MATERIALIZE STATISTICS a, b, c, d; -SELECT replaceRegexpAll(explain, '__table1.|_UInt8|_Int8|_UInt16|_String', '') -FROM (EXPLAIN actions=1 SELECT count(*) FROM tab WHERE c < -90/*900*/ and b > 900/*990*/ and a = '0'/*1*/) +SELECT replaceRegexpAll(explain, '__table1.|_UInt8|_Int8|_UInt16|_String|_DateTime', '') +FROM (EXPLAIN actions=1 SELECT count(*) FROM tab WHERE d = cast(1, 'DateTime')/*1*/ and c < -90/*900*/ and b > 900/*990*/ and a = '0'/*1*/) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; -SELECT replaceRegexpAll(explain, '__table1.|_UInt8|_Int8|_UInt16|_String', '') -FROM (EXPLAIN actions=1 SELECT count(*) FROM tab WHERE c < 0/*9900*/ and b = 0/*10*/ and a = '10000'/*0*/) +SELECT replaceRegexpAll(explain, '__table1.|_UInt8|_Int8|_UInt16|_String|_DateTime', '') +FROM (EXPLAIN actions=1 SELECT count(*) FROM tab WHERE d > cast(1, 'DateTime')/*9999*/ and c < 0/*9900*/ and b = 0/*10*/ and a = '10000'/*0*/) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; ALTER TABLE tab DROP STATISTICS a, b, c, d; -SELECT 'Test estimating range condition:'; - -ALTER TABLE tab ADD STATISTICS b TYPE minmax; -ALTER TABLE tab MATERIALIZE STATISTICS b; -SELECT replaceRegexpAll(explain, '__table1.|_UInt8|_Int8|_UInt16|_String', '') -FROM (EXPLAIN actions=1 SELECT count(*) FROM tab WHERE c < 0/*5000*/ and b < 10/*100*/) -WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; - -ALTER TABLE tab ADD STATISTICS b TYPE tdigest; -ALTER TABLE tab MATERIALIZE STATISTICS b; -SELECT replaceRegexpAll(explain, '__table1.|_UInt8|_Int8|_UInt16|_String', '') -FROM (EXPLAIN actions=1 SELECT count(*) FROM tab WHERE c < 0/*5000*/ and b < 10/*100*/) -WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; -ALTER TABLE tab DROP STATISTICS b; - - -SELECT 'Test estimating equals condition:'; - -ALTER TABLE tab ADD STATISTICS a TYPE uniq; -ALTER TABLE tab MATERIALIZE STATISTICS a; -SELECT replaceRegexpAll(explain, '__table1.|_UInt8|_Int8|_UInt16|_String', '') -FROM (EXPLAIN actions=1 SELECT count(*) FROM tab WHERE b = 10/*100*/ and a = '0'/*1*/) -WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; - -ALTER TABLE tab ADD STATISTICS a TYPE count_min; -ALTER TABLE tab MATERIALIZE STATISTICS a; -SELECT replaceRegexpAll(explain, '__table1.|_UInt8|_Int8|_UInt16|_String', '') -FROM (EXPLAIN actions=1 SELECT count(*) FROM tab WHERE b = 10/*100*/ and a = '0'/*1*/) -WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; -ALTER TABLE tab DROP STATISTICS a; - DROP TABLE IF EXISTS tab SYNC; From a36424fc8c99fd5c20b52d80a10d232c8a61451c Mon Sep 17 00:00:00 2001 From: JackyWoo Date: Mon, 5 Aug 2024 18:59:09 +0800 Subject: [PATCH 277/844] Add supported data types to documents --- .../table-engines/mergetree-family/mergetree.md | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/docs/en/engines/table-engines/mergetree-family/mergetree.md b/docs/en/engines/table-engines/mergetree-family/mergetree.md index c2438ac593e..8fd60c36f94 100644 --- a/docs/en/engines/table-engines/mergetree-family/mergetree.md +++ b/docs/en/engines/table-engines/mergetree-family/mergetree.md @@ -1007,6 +1007,19 @@ They can be used for prewhere optimization only if we enable `set allow_statisti [Count-min](https://en.wikipedia.org/wiki/Count%E2%80%93min_sketch) sketches which provide an approximate count of the frequency of each value in a column. +#### Supported Data Types {#supported-data-types} + +| Data Type / Statistics | count_min | minmax | tdigest | uniq | +|-------------------------|-----------|--------|---------|------| +| Integer types | ✔ | ✔ | ✔ | ✔ | +| Floating-point numbers | ✔ | ✔ | ✔ | ✔ | +| Boolean | ✔ | ✔ | ✔ | ✔ | +| Strings | ✔ | ✗ | ✗ | ✔ | +| Dates | ✔ | ✔ | ✔ | ✔ | + +You can also add `LowCardinality` and `Nullable` modifiers to the data types. + + ## Column-level Settings {#column-level-settings} Certain MergeTree settings can be override at column level: From 4fa30da1189a3ce8139dad6bce5da9264019d3b3 Mon Sep 17 00:00:00 2001 From: JackyWoo Date: Tue, 6 Aug 2024 09:56:38 +0800 Subject: [PATCH 278/844] Fix docs --- .../mergetree-family/mergetree.md | 24 ++++++++++++------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/docs/en/engines/table-engines/mergetree-family/mergetree.md b/docs/en/engines/table-engines/mergetree-family/mergetree.md index 8fd60c36f94..80e5bf694d4 100644 --- a/docs/en/engines/table-engines/mergetree-family/mergetree.md +++ b/docs/en/engines/table-engines/mergetree-family/mergetree.md @@ -994,30 +994,36 @@ They can be used for prewhere optimization only if we enable `set allow_statisti - `MinMax` The minimum and maximum column value which allows to estimate the selectivity of range filters on numeric columns. + Supported data types: (U)Int*, Float*, Decimal(*), Boolean and Date*. - `TDigest` [TDigest](https://github.com/tdunning/t-digest) sketches which allow to compute approximate percentiles (e.g. the 90th percentile) for numeric columns. + Supported data types: (U)Int*, Float*, Decimal(*), Boolean and Date*. - `Uniq` [HyperLogLog](https://en.wikipedia.org/wiki/HyperLogLog) sketches which provide an estimation how many distinct values a column contains. + Supported data types: (U)Int*, Float*, Decimal(*), Boolean, Date* and (Fixed)String. - `count_min` [Count-min](https://en.wikipedia.org/wiki/Count%E2%80%93min_sketch) sketches which provide an approximate count of the frequency of each value in a column. + Supported data types: (U)Int*, Float*, Decimal(*), Boolean, Date* and (Fixed)String. -#### Supported Data Types {#supported-data-types} +Note that all statistics types support `LowCardinality` and `Nullable` modifiers to data types. -| Data Type / Statistics | count_min | minmax | tdigest | uniq | -|-------------------------|-----------|--------|---------|------| -| Integer types | ✔ | ✔ | ✔ | ✔ | -| Floating-point numbers | ✔ | ✔ | ✔ | ✔ | -| Boolean | ✔ | ✔ | ✔ | ✔ | -| Strings | ✔ | ✗ | ✗ | ✔ | -| Dates | ✔ | ✔ | ✔ | ✔ | +#### Supported operations of Column Statistics {#supported-operations-of-column-statistics} + +| | Equals | Range | +|-----------|---------|-------| +| count_min | ✔ | ✔ | +| MinMax | ✔ | ✔ | +| TDigest | ✔ | ✔ | +| Uniq | ✔ | ✗ | + +Please note that operation range represents >, >=, <, <=. -You can also add `LowCardinality` and `Nullable` modifiers to the data types. ## Column-level Settings {#column-level-settings} From 46da03030cd6ae115edb7d4b9054704a8312283c Mon Sep 17 00:00:00 2001 From: JackyWoo Date: Tue, 6 Aug 2024 10:00:34 +0800 Subject: [PATCH 279/844] Add test for implicitly type conversion --- .../table-engines/mergetree-family/mergetree.md | 10 +++++++--- .../0_stateless/02864_statistics_estimation.sql | 6 ++++++ 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/docs/en/engines/table-engines/mergetree-family/mergetree.md b/docs/en/engines/table-engines/mergetree-family/mergetree.md index 80e5bf694d4..caf72e39467 100644 --- a/docs/en/engines/table-engines/mergetree-family/mergetree.md +++ b/docs/en/engines/table-engines/mergetree-family/mergetree.md @@ -994,22 +994,26 @@ They can be used for prewhere optimization only if we enable `set allow_statisti - `MinMax` The minimum and maximum column value which allows to estimate the selectivity of range filters on numeric columns. + Supported data types: (U)Int*, Float*, Decimal(*), Boolean and Date*. - `TDigest` [TDigest](https://github.com/tdunning/t-digest) sketches which allow to compute approximate percentiles (e.g. the 90th percentile) for numeric columns. + Supported data types: (U)Int*, Float*, Decimal(*), Boolean and Date*. - `Uniq` [HyperLogLog](https://en.wikipedia.org/wiki/HyperLogLog) sketches which provide an estimation how many distinct values a column contains. - Supported data types: (U)Int*, Float*, Decimal(*), Boolean, Date* and (Fixed)String. + + Supported data types: (U)Int*, Float*, Decimal*, Boolean, Date* and (Fixed)String. - `count_min` [Count-min](https://en.wikipedia.org/wiki/Count%E2%80%93min_sketch) sketches which provide an approximate count of the frequency of each value in a column. - Supported data types: (U)Int*, Float*, Decimal(*), Boolean, Date* and (Fixed)String. + + Supported data types: (U)Int*, Float*, Decimal*, Boolean, Date* and (Fixed)String. Note that all statistics types support `LowCardinality` and `Nullable` modifiers to data types. @@ -1022,7 +1026,7 @@ Note that all statistics types support `LowCardinality` and `Nullable` modifiers | TDigest | ✔ | ✔ | | Uniq | ✔ | ✗ | -Please note that operation range represents >, >=, <, <=. +Please note that operation `Range` represents >, >=, < or <=. diff --git a/tests/queries/0_stateless/02864_statistics_estimation.sql b/tests/queries/0_stateless/02864_statistics_estimation.sql index 65b111c285b..4a221686069 100644 --- a/tests/queries/0_stateless/02864_statistics_estimation.sql +++ b/tests/queries/0_stateless/02864_statistics_estimation.sql @@ -80,6 +80,12 @@ SELECT replaceRegexpAll(explain, '__table1.|_UInt8|_Int8|_UInt16|_String|_DateTi FROM (EXPLAIN actions=1 SELECT count(*) FROM tab WHERE d > cast(1, 'DateTime')/*9999*/ and c < 0/*9900*/ and b = 0/*10*/ and a = '10000'/*0*/) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; + +SELECT 'Test statistics implicitly type conversion:'; +SELECT replaceRegexpAll(explain, '__table1.|_UInt8|_Int8|_UInt16|_String|_DateTime', '') +FROM (EXPLAIN actions=1 SELECT count(*) FROM tab WHERE d = '2024-08-06 09:58:09'/*0*/ and c = '0'/*100*/ and b > 0/*9990*/ and a = '10000'/*0*/) +WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; + ALTER TABLE tab DROP STATISTICS a, b, c, d; From 8259a9827e5f1764fffa508f8e721f643c102d15 Mon Sep 17 00:00:00 2001 From: JackyWoo Date: Tue, 6 Aug 2024 10:29:42 +0800 Subject: [PATCH 280/844] Update reference file for tests --- .../engines/table-engines/mergetree-family/mergetree.md | 9 ++++----- .../0_stateless/02864_statistics_estimation.reference | 4 ++++ 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/docs/en/engines/table-engines/mergetree-family/mergetree.md b/docs/en/engines/table-engines/mergetree-family/mergetree.md index caf72e39467..751b07afe91 100644 --- a/docs/en/engines/table-engines/mergetree-family/mergetree.md +++ b/docs/en/engines/table-engines/mergetree-family/mergetree.md @@ -1021,18 +1021,17 @@ Note that all statistics types support `LowCardinality` and `Nullable` modifiers | | Equals | Range | |-----------|---------|-------| -| count_min | ✔ | ✔ | -| MinMax | ✔ | ✔ | -| TDigest | ✔ | ✔ | +| count_min | ✔ | ✗ | +| MinMax | ✗ | ✔ | +| TDigest | ✗ | ✔ | | Uniq | ✔ | ✗ | Please note that operation `Range` represents >, >=, < or <=. - ## Column-level Settings {#column-level-settings} -Certain MergeTree settings can be override at column level: +Certain MergeTree settings can be overridden at column level: - `max_compress_block_size` — Maximum size of blocks of uncompressed data before compressing for writing to a table. - `min_compress_block_size` — Minimum size of blocks of uncompressed data required for compression when writing the next mark. diff --git a/tests/queries/0_stateless/02864_statistics_estimation.reference b/tests/queries/0_stateless/02864_statistics_estimation.reference index f59f94013da..cb274b2990e 100644 --- a/tests/queries/0_stateless/02864_statistics_estimation.reference +++ b/tests/queries/0_stateless/02864_statistics_estimation.reference @@ -22,3 +22,7 @@ Test statistics multi-types: Prewhere info Prewhere filter Prewhere filter column: and(equals(a, \'10000\'), equals(b, 0), less(c, 0), greater(d, _CAST(1, \'DateTime\'))) (removed) +Test statistics implicitly type conversion: + Prewhere info + Prewhere filter + Prewhere filter column: and(equals(a, \'10000\'), equals(d, \'2024-08-06 09:58:09\'), equals(c, \'0\'), greater(b, 0)) (removed) From 3769f8a46537bdc2865d13ff948e48328efb3548 Mon Sep 17 00:00:00 2001 From: JackyWoo Date: Tue, 6 Aug 2024 20:44:42 +0800 Subject: [PATCH 281/844] Fix tests --- .../0_stateless/02864_statistics_estimation.reference | 6 +++--- tests/queries/0_stateless/02864_statistics_estimation.sql | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/tests/queries/0_stateless/02864_statistics_estimation.reference b/tests/queries/0_stateless/02864_statistics_estimation.reference index cb274b2990e..fc370691830 100644 --- a/tests/queries/0_stateless/02864_statistics_estimation.reference +++ b/tests/queries/0_stateless/02864_statistics_estimation.reference @@ -14,15 +14,15 @@ Test statistics tdigest: Test statistics uniq: Prewhere info Prewhere filter - Prewhere filter column: and(equals(a, \'0\'), equals(d, _CAST(1, \'DateTime\')), equals(b, 0), equals(c, 0)) (removed) + Prewhere filter column: and(equals(d, _CAST(1, \'DateTime\')), equals(c, 0)) (removed) Test statistics multi-types: Prewhere info Prewhere filter - Prewhere filter column: and(equals(a, \'0\'), equals(d, _CAST(1, \'DateTime\')), less(c, -90), greater(b, 900)) (removed) + Prewhere filter column: and(equals(d, _CAST(1, \'DateTime\')), less(c, -90), greater(b, 900)) (removed) Prewhere info Prewhere filter Prewhere filter column: and(equals(a, \'10000\'), equals(b, 0), less(c, 0), greater(d, _CAST(1, \'DateTime\'))) (removed) Test statistics implicitly type conversion: Prewhere info Prewhere filter - Prewhere filter column: and(equals(a, \'10000\'), equals(d, \'2024-08-06 09:58:09\'), equals(c, \'0\'), greater(b, 0)) (removed) + Prewhere filter column: and(equals(a, \'1\'), equals(d, \'2024-08-06 09:58:09\'), equals(c, \'0\'), greater(b, 0)) (removed) diff --git a/tests/queries/0_stateless/02864_statistics_estimation.sql b/tests/queries/0_stateless/02864_statistics_estimation.sql index 4a221686069..6436cf68738 100644 --- a/tests/queries/0_stateless/02864_statistics_estimation.sql +++ b/tests/queries/0_stateless/02864_statistics_estimation.sql @@ -59,7 +59,7 @@ SELECT 'Test statistics uniq:'; ALTER TABLE tab ADD STATISTICS a, b, c, d TYPE uniq; ALTER TABLE tab MATERIALIZE STATISTICS a, b, c, d; SELECT replaceRegexpAll(explain, '__table1.|_UInt8|_Int8|_UInt16|_String|_DateTime', '') -FROM (EXPLAIN actions=1 SELECT count(*) FROM tab WHERE d = cast(1, 'DateTime')/*100*/ and c = 0/*1000*/ and b = 0/*100*/ and a = '0'/*100*/) +FROM (EXPLAIN actions=1 SELECT count(*) FROM tab WHERE d = cast(1, 'DateTime')/*100*/ and c = 0/*1000*/) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; ALTER TABLE tab DROP STATISTICS a, b, c, d; @@ -73,7 +73,7 @@ ALTER TABLE tab ADD STATISTICS d TYPE count_min, minmax, uniq, tdigest; ALTER TABLE tab MATERIALIZE STATISTICS a, b, c, d; SELECT replaceRegexpAll(explain, '__table1.|_UInt8|_Int8|_UInt16|_String|_DateTime', '') -FROM (EXPLAIN actions=1 SELECT count(*) FROM tab WHERE d = cast(1, 'DateTime')/*1*/ and c < -90/*900*/ and b > 900/*990*/ and a = '0'/*1*/) +FROM (EXPLAIN actions=1 SELECT count(*) FROM tab WHERE d = cast(1, 'DateTime')/*1*/ and c < -90/*900*/ and b > 900/*990*/) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; SELECT replaceRegexpAll(explain, '__table1.|_UInt8|_Int8|_UInt16|_String|_DateTime', '') @@ -83,7 +83,7 @@ WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; SELECT 'Test statistics implicitly type conversion:'; SELECT replaceRegexpAll(explain, '__table1.|_UInt8|_Int8|_UInt16|_String|_DateTime', '') -FROM (EXPLAIN actions=1 SELECT count(*) FROM tab WHERE d = '2024-08-06 09:58:09'/*0*/ and c = '0'/*100*/ and b > 0/*9990*/ and a = '10000'/*0*/) +FROM (EXPLAIN actions=1 SELECT count(*) FROM tab WHERE d = '2024-08-06 09:58:09'/*0*/ and c = '0'/*100*/ and b > 0/*9990*/ and a = '1'/*1*/) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; ALTER TABLE tab DROP STATISTICS a, b, c, d; From 0a54ba65756a5789c6e64e78141fe7ad962f2356 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Tue, 6 Aug 2024 15:28:45 +0000 Subject: [PATCH 282/844] better on-fly mutations --- .../Optimizations/projectionsCommon.cpp | 1 + .../QueryPlan/ReadFromMergeTree.cpp | 107 +++++++++++------- src/Processors/QueryPlan/ReadFromMergeTree.h | 1 + src/Storages/MergeTree/AlterConversions.h | 13 ++- src/Storages/MergeTree/MergeTask.cpp | 2 +- src/Storages/MergeTree/MergeTreeData.cpp | 18 +-- src/Storages/MergeTree/MergeTreeData.h | 13 ++- .../MergeTree/MergeTreeDataSelectExecutor.cpp | 2 + .../MergeTree/MergeTreeDataSelectExecutor.h | 1 + .../MergeTree/MergeTreePrefetchedReadPool.cpp | 1 - .../MergeTree/MergeTreePrefetchedReadPool.h | 2 +- .../MergeTree/MergeTreeReadPoolBase.cpp | 8 +- .../MergeTree/MergeTreeReadPoolBase.h | 2 +- src/Storages/MergeTree/MutateTask.cpp | 2 +- .../MergeTree/ReplicatedMergeTreeQueue.cpp | 25 +++- .../MergeTree/ReplicatedMergeTreeQueue.h | 1 + src/Storages/MutationCommands.cpp | 9 ++ src/Storages/MutationCommands.h | 1 + src/Storages/StorageMergeTree.cpp | 22 +++- src/Storages/StorageMergeTree.h | 1 + 20 files changed, 156 insertions(+), 76 deletions(-) diff --git a/src/Processors/QueryPlan/Optimizations/projectionsCommon.cpp b/src/Processors/QueryPlan/Optimizations/projectionsCommon.cpp index e5f9c3b23ea..998b606ec57 100644 --- a/src/Processors/QueryPlan/Optimizations/projectionsCommon.cpp +++ b/src/Processors/QueryPlan/Optimizations/projectionsCommon.cpp @@ -243,6 +243,7 @@ bool analyzeProjectionCandidate( auto projection_result_ptr = reader.estimateNumMarksToRead( std::move(projection_parts), + reading.getMutationsSnapshot()->cloneEmpty(), required_column_names, candidate.projection->metadata, projection_query_info, diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp index 7d6e650cba0..de29e7a9d5a 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp +++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp @@ -1373,6 +1373,7 @@ ReadFromMergeTree::AnalysisResultPtr ReadFromMergeTree::selectRangesToRead(Merge { return selectRangesToRead( std::move(parts), + mutations_snapshot, metadata_for_reading, query_info, context, @@ -1390,9 +1391,11 @@ static void buildIndexes( const ActionsDAG * filter_actions_dag, const MergeTreeData & data, const MergeTreeData::DataPartsVector & parts, + const MergeTreeData::MutationsSnapshotPtr & mutations_snapshot, const ContextPtr & context, const SelectQueryInfo & query_info, - const StorageMetadataPtr & metadata_snapshot) + const StorageMetadataPtr & metadata_snapshot, + const LoggerPtr & log) { indexes.reset(); @@ -1418,19 +1421,21 @@ static void buildIndexes( indexes->partition_pruner.emplace(metadata_snapshot, filter_actions_dag, context, false /* strict */); } - indexes->part_values - = MergeTreeDataSelectExecutor::filterPartsByVirtualColumns(metadata_snapshot, data, parts, filter_actions_dag, context); + indexes->part_values = MergeTreeDataSelectExecutor::filterPartsByVirtualColumns(metadata_snapshot, data, parts, filter_actions_dag, context); MergeTreeDataSelectExecutor::buildKeyConditionFromPartOffset(indexes->part_offset_condition, filter_actions_dag, context); indexes->use_skip_indexes = settings.use_skip_indexes; - bool final = query_info.isFinal(); - - if (final && !settings.use_skip_indexes_if_final) + if (query_info.isFinal() && !settings.use_skip_indexes_if_final) indexes->use_skip_indexes = false; if (!indexes->use_skip_indexes) return; + const auto & all_indexes = metadata_snapshot->getSecondaryIndices(); + + if (all_indexes.empty()) + return; + std::unordered_set ignored_index_names; if (settings.ignore_data_skipping_indices.changed) @@ -1455,49 +1460,68 @@ static void buildIndexes( throw Exception(ErrorCodes::CANNOT_PARSE_TEXT, "Cannot parse ignore_data_skipping_indices ('{}')", indices); } + auto all_updated_columns = mutations_snapshot->getAllUpdatedColumns(); + UsefulSkipIndexes skip_indexes; using Key = std::pair; std::map merged; - for (const auto & index : metadata_snapshot->getSecondaryIndices()) + for (const auto & index : all_indexes) { - if (!ignored_index_names.contains(index.name)) + if (ignored_index_names.contains(index.name)) + continue; + + auto index_helper = MergeTreeIndexFactory::instance().get(index); + + if (!all_updated_columns.empty()) { - auto index_helper = MergeTreeIndexFactory::instance().get(index); - if (index_helper->isMergeable()) + auto required_columns = index_helper->getColumnsRequiredForIndexCalc(); + auto it = std::ranges::find_if(required_columns, [&](const auto & column_name) { - auto [it, inserted] = merged.emplace(Key{index_helper->index.type, index_helper->getGranularity()}, skip_indexes.merged_indices.size()); - if (inserted) - { - skip_indexes.merged_indices.emplace_back(); - skip_indexes.merged_indices.back().condition = index_helper->createIndexMergedCondition(query_info, metadata_snapshot); - } + return all_updated_columns.contains(column_name); + }); - skip_indexes.merged_indices[it->second].addIndex(index_helper); - } - else + if (it != required_columns.end()) { - MergeTreeIndexConditionPtr condition; - if (index_helper->isVectorSearch()) - { -#ifdef ENABLE_ANNOY - if (const auto * annoy = typeid_cast(index_helper.get())) - condition = annoy->createIndexCondition(query_info, context); -#endif -#ifdef ENABLE_USEARCH - if (const auto * usearch = typeid_cast(index_helper.get())) - condition = usearch->createIndexCondition(query_info, context); -#endif - if (!condition) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown vector search index {}", index_helper->index.name); - } - else - condition = index_helper->createIndexCondition(filter_actions_dag, context); - - if (!condition->alwaysUnknownOrTrue()) - skip_indexes.useful_indices.emplace_back(index_helper, condition); + LOG_TRACE(log, "Index {} is not used because it depends on column {} which will be updated on fly", index.name, *it); + continue; } } + + if (index_helper->isMergeable()) + { + auto [it, inserted] = merged.emplace(Key{index_helper->index.type, index_helper->getGranularity()}, skip_indexes.merged_indices.size()); + if (inserted) + { + skip_indexes.merged_indices.emplace_back(); + skip_indexes.merged_indices.back().condition = index_helper->createIndexMergedCondition(query_info, metadata_snapshot); + } + + skip_indexes.merged_indices[it->second].addIndex(index_helper); + continue; + } + + MergeTreeIndexConditionPtr condition; + if (index_helper->isVectorSearch()) + { +#ifdef ENABLE_ANNOY + if (const auto * annoy = typeid_cast(index_helper.get())) + condition = annoy->createIndexCondition(query_info, context); +#endif +#ifdef ENABLE_USEARCH + if (const auto * usearch = typeid_cast(index_helper.get())) + condition = usearch->createIndexCondition(query_info, context); +#endif + if (!condition) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown vector search index {}", index_helper->index.name); + } + else + { + condition = index_helper->createIndexCondition(filter_actions_dag, context); + } + + if (!condition->alwaysUnknownOrTrue()) + skip_indexes.useful_indices.emplace_back(index_helper, condition); } // move minmax indices to first positions, so they will be applied first as cheapest ones @@ -1535,14 +1559,17 @@ void ReadFromMergeTree::applyFilters(ActionDAGNodes added_filter_nodes) query_info.filter_actions_dag.get(), data, prepared_parts, + mutations_snapshot, context, query_info, - metadata_for_reading); + metadata_for_reading, + log); } } ReadFromMergeTree::AnalysisResultPtr ReadFromMergeTree::selectRangesToRead( MergeTreeData::DataPartsVector parts, + MergeTreeData::MutationsSnapshotPtr mutations_snapshot, const StorageMetadataPtr & metadata_snapshot, const SelectQueryInfo & query_info_, ContextPtr context_, @@ -1573,7 +1600,7 @@ ReadFromMergeTree::AnalysisResultPtr ReadFromMergeTree::selectRangesToRead( const Names & primary_key_column_names = primary_key.column_names; if (!indexes) - buildIndexes(indexes, query_info_.filter_actions_dag.get(), data, parts, context_, query_info_, metadata_snapshot); + buildIndexes(indexes, query_info_.filter_actions_dag.get(), data, parts, mutations_snapshot, context_, query_info_, metadata_snapshot, log); if (indexes->part_values && indexes->part_values->empty()) return std::make_shared(std::move(result)); diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.h b/src/Processors/QueryPlan/ReadFromMergeTree.h index 10b9e92d99b..e441eda7505 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.h +++ b/src/Processors/QueryPlan/ReadFromMergeTree.h @@ -154,6 +154,7 @@ public: static AnalysisResultPtr selectRangesToRead( MergeTreeData::DataPartsVector parts, + MergeTreeData::MutationsSnapshotPtr mutations_snapshot, const StorageMetadataPtr & metadata_snapshot, const SelectQueryInfo & query_info, ContextPtr context, diff --git a/src/Storages/MergeTree/AlterConversions.h b/src/Storages/MergeTree/AlterConversions.h index cee23bc4efc..046cc1d2491 100644 --- a/src/Storages/MergeTree/AlterConversions.h +++ b/src/Storages/MergeTree/AlterConversions.h @@ -1,8 +1,8 @@ #pragma once #include -#include -#include +#include +#include namespace DB @@ -11,11 +11,17 @@ namespace DB /// Alter conversions which should be applied on-fly for part. /// Built from of the most recent mutation commands for part. /// Now only ALTER RENAME COLUMN is applied. -class AlterConversions : private boost::noncopyable +class AlterConversions : private WithContext, boost::noncopyable { public: AlterConversions() = default; + AlterConversions(StorageMetadataPtr metadata_snapshot_, ContextPtr context_) + : WithContext(context_) + , metadata_snapshot(std::move(metadata_snapshot_)) + { + } + struct RenamePair { std::string rename_to; @@ -40,6 +46,7 @@ public: private: /// Rename map new_name -> old_name. std::vector rename_map; + StorageMetadataPtr metadata_snapshot; }; using AlterConversionsPtr = std::shared_ptr; diff --git a/src/Storages/MergeTree/MergeTask.cpp b/src/Storages/MergeTree/MergeTask.cpp index e9a48c655e8..ea4b9261af8 100644 --- a/src/Storages/MergeTree/MergeTask.cpp +++ b/src/Storages/MergeTree/MergeTask.cpp @@ -301,7 +301,7 @@ bool MergeTask::ExecuteAndFinalizeHorizontalPart::prepare() infos.add(part_infos); } - global_ctx->alter_conversions.push_back(MergeTreeData::getAlterConversionsForPart(part, mutations_snapshot)); + global_ctx->alter_conversions.push_back(MergeTreeData::getAlterConversionsForPart(part, mutations_snapshot, global_ctx->metadata_snapshot, global_ctx->context)); } const auto & local_part_min_ttl = global_ctx->new_data_part->ttl_infos.part_min_ttl; diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 3852f282f65..32c0251ef53 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -7137,11 +7137,11 @@ UInt64 MergeTreeData::estimateNumberOfRowsToRead( ContextPtr query_context, const StorageSnapshotPtr & storage_snapshot, const SelectQueryInfo & query_info) const { const auto & snapshot_data = assert_cast(*storage_snapshot->data); - const auto & parts = snapshot_data.parts; MergeTreeDataSelectExecutor reader(*this); auto result_ptr = reader.estimateNumMarksToRead( - parts, + snapshot_data.parts, + snapshot_data.mutations_snapshot, storage_snapshot->getMetadataForQuery()->getColumns().getAll().getNames(), storage_snapshot->metadata, query_info, @@ -8162,10 +8162,12 @@ bool MergeTreeData::canUsePolymorphicParts(const MergeTreeSettings & settings, S AlterConversionsPtr MergeTreeData::getAlterConversionsForPart( const MergeTreeDataPartPtr & part, - const MutationsSnapshotPtr & snapshot) + const MutationsSnapshotPtr & mutations, + const StorageMetadataPtr & metadata, + const ContextPtr & query_context) { - auto commands = snapshot->getAlterMutationCommandsForPart(part); - auto result = std::make_shared(); + auto commands = mutations->getAlterMutationCommandsForPart(part); + auto result = std::make_shared(metadata, query_context); for (const auto & command : commands | std::views::reverse) result->addMutationCommand(command); @@ -8758,8 +8760,7 @@ static void updateMutationsCounters( void incrementMutationsCounters( Int64 & num_data_mutations_to_apply, Int64 & num_metadata_mutations_to_apply, - const MutationCommands & commands, - std::lock_guard & /*lock*/) + const MutationCommands & commands) { return updateMutationsCounters(num_data_mutations_to_apply, num_metadata_mutations_to_apply, commands, 1); } @@ -8767,8 +8768,7 @@ void incrementMutationsCounters( void decrementMutationsCounters( Int64 & num_data_mutations_to_apply, Int64 & num_metadata_mutations_to_apply, - const MutationCommands & commands, - std::lock_guard & /*lock*/) + const MutationCommands & commands) { return updateMutationsCounters(num_data_mutations_to_apply, num_metadata_mutations_to_apply, commands, -1); } diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h index b14121e0c78..dc37d5e7dad 100644 --- a/src/Storages/MergeTree/MergeTreeData.h +++ b/src/Storages/MergeTree/MergeTreeData.h @@ -475,6 +475,8 @@ public: /// @return list of mutation commands, in *reverse* order (newest to oldest) virtual MutationCommands getAlterMutationCommandsForPart(const DataPartPtr & part) const = 0; virtual std::shared_ptr cloneEmpty() const = 0; + virtual NameSet getAllUpdatedColumns() const = 0; + bool hasDataMutations() const { return params.need_data_mutations && info.num_data_mutations > 0; } virtual ~IMutationsSnapshot() = default; @@ -975,7 +977,9 @@ public: /// Return alter conversions for part which must be applied on fly. static AlterConversionsPtr getAlterConversionsForPart( const MergeTreeDataPartPtr & part, - const MutationsSnapshotPtr & snapshot); + const MutationsSnapshotPtr & mutations, + const StorageMetadataPtr & metadata, + const ContextPtr & query_context); /// Returns destination disk or volume for the TTL rule according to current storage policy. SpacePtr getDestinationForMoveTTL(const TTLDescription & move_ttl) const; @@ -1769,17 +1773,14 @@ struct CurrentlySubmergingEmergingTagger }; /// Look at MutationCommands if it contains mutations for AlterConversions, update the counter. -/// Return true if the counter had been updated void incrementMutationsCounters( Int64 & num_data_mutations_to_apply, Int64 & num_metadata_mutations_to_apply, - const MutationCommands & commands, - std::lock_guard & lock); + const MutationCommands & commands); void decrementMutationsCounters( Int64 & num_data_mutations_to_apply, Int64 & num_metadata_mutations_to_apply, - const MutationCommands & commands, - std::lock_guard & lock); + const MutationCommands & commands); } diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index e294def040b..2d9a5c6084c 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -884,6 +884,7 @@ std::shared_ptr MergeTreeDataSelectExecutor::checkLimits( ReadFromMergeTree::AnalysisResultPtr MergeTreeDataSelectExecutor::estimateNumMarksToRead( MergeTreeData::DataPartsVector parts, + MergeTreeData::MutationsSnapshotPtr mutations_snapshot, const Names & column_names_to_return, const StorageMetadataPtr & metadata_snapshot, const SelectQueryInfo & query_info, @@ -898,6 +899,7 @@ ReadFromMergeTree::AnalysisResultPtr MergeTreeDataSelectExecutor::estimateNumMar std::optional indexes; return ReadFromMergeTree::selectRangesToRead( std::move(parts), + mutations_snapshot, metadata_snapshot, query_info, context, diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h index 8e4c55f2c1d..3668eb0ad90 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h @@ -56,6 +56,7 @@ public: /// This method is used to select best projection for table. ReadFromMergeTree::AnalysisResultPtr estimateNumMarksToRead( MergeTreeData::DataPartsVector parts, + MergeTreeData::MutationsSnapshotPtr mutations_snapshot, const Names & column_names, const StorageMetadataPtr & metadata_snapshot, const SelectQueryInfo & query_info, diff --git a/src/Storages/MergeTree/MergeTreePrefetchedReadPool.cpp b/src/Storages/MergeTree/MergeTreePrefetchedReadPool.cpp index faed543420c..09bbf33ba9b 100644 --- a/src/Storages/MergeTree/MergeTreePrefetchedReadPool.cpp +++ b/src/Storages/MergeTree/MergeTreePrefetchedReadPool.cpp @@ -105,7 +105,6 @@ MergeTreePrefetchedReadPool::MergeTreePrefetchedReadPool( column_names_, settings_, context_) - , WithContext(context_) , prefetch_threadpool(getContext()->getPrefetchThreadpool()) , log(getLogger("MergeTreePrefetchedReadPool(" + (parts_ranges.empty() ? "" : parts_ranges.front().data_part->storage.getStorageID().getNameForLogs()) + ")")) { diff --git a/src/Storages/MergeTree/MergeTreePrefetchedReadPool.h b/src/Storages/MergeTree/MergeTreePrefetchedReadPool.h index 65a7d62ad2d..1a709250937 100644 --- a/src/Storages/MergeTree/MergeTreePrefetchedReadPool.h +++ b/src/Storages/MergeTree/MergeTreePrefetchedReadPool.h @@ -14,7 +14,7 @@ using MergeTreeReaderPtr = std::unique_ptr; /// A class which is responsible for creating read tasks /// which are later taken by readers via getTask method. /// Does prefetching for the read tasks it creates. -class MergeTreePrefetchedReadPool : public MergeTreeReadPoolBase, private WithContext +class MergeTreePrefetchedReadPool : public MergeTreeReadPoolBase { public: MergeTreePrefetchedReadPool( diff --git a/src/Storages/MergeTree/MergeTreeReadPoolBase.cpp b/src/Storages/MergeTree/MergeTreeReadPoolBase.cpp index fce8d649617..021b340d746 100644 --- a/src/Storages/MergeTree/MergeTreeReadPoolBase.cpp +++ b/src/Storages/MergeTree/MergeTreeReadPoolBase.cpp @@ -4,9 +4,6 @@ #include #include -#include - - namespace DB { @@ -26,7 +23,8 @@ MergeTreeReadPoolBase::MergeTreeReadPoolBase( const Names & column_names_, const PoolSettings & pool_settings_, const ContextPtr & context_) - : parts_ranges(std::move(parts_)) + : WithContext(context_) + , parts_ranges(std::move(parts_)) , mutations_snapshot(std::move(mutations_snapshot_)) , shared_virtual_fields(std::move(shared_virtual_fields_)) , storage_snapshot(storage_snapshot_) @@ -121,7 +119,7 @@ void MergeTreeReadPoolBase::fillPerPartInfos(const Settings & settings) } read_task_info.part_index_in_query = part_with_ranges.part_index_in_query; - read_task_info.alter_conversions = MergeTreeData::getAlterConversionsForPart(part_with_ranges.data_part, mutations_snapshot); + read_task_info.alter_conversions = MergeTreeData::getAlterConversionsForPart(part_with_ranges.data_part, mutations_snapshot, storage_snapshot->metadata, getContext()); LoadedMergeTreeDataPartInfoForReader part_info(part_with_ranges.data_part, read_task_info.alter_conversions); diff --git a/src/Storages/MergeTree/MergeTreeReadPoolBase.h b/src/Storages/MergeTree/MergeTreeReadPoolBase.h index 7b4e034d892..7f9106d476e 100644 --- a/src/Storages/MergeTree/MergeTreeReadPoolBase.h +++ b/src/Storages/MergeTree/MergeTreeReadPoolBase.h @@ -6,7 +6,7 @@ namespace DB { -class MergeTreeReadPoolBase : public IMergeTreeReadPool +class MergeTreeReadPoolBase : public IMergeTreeReadPool, protected WithContext { public: using MutationsSnapshotPtr = MergeTreeData::MutationsSnapshotPtr; diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp index a95cccdc0d2..86e63782c18 100644 --- a/src/Storages/MergeTree/MutateTask.cpp +++ b/src/Storages/MergeTree/MutateTask.cpp @@ -2125,7 +2125,7 @@ bool MutateTask::prepare() }; auto mutations_snapshot = ctx->data->getMutationsSnapshot(params); - auto alter_conversions = MergeTreeData::getAlterConversionsForPart(ctx->source_part, mutations_snapshot); + auto alter_conversions = MergeTreeData::getAlterConversionsForPart(ctx->source_part, mutations_snapshot, ctx->metadata_snapshot, ctx->context); auto context_for_reading = Context::createCopy(ctx->context); diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp index c3d91ca0705..8661b2aa784 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp @@ -950,7 +950,7 @@ int32_t ReplicatedMergeTreeQueue::updateMutations(zkutil::ZooKeeperPtr zookeeper { const auto commands = entry.commands; it = mutations_by_znode.erase(it); - decrementMutationsCounters(num_data_mutations_to_apply, num_metadata_mutations_to_apply, commands, state_lock); + decrementMutationsCounters(num_data_mutations_to_apply, num_metadata_mutations_to_apply, commands); } else it = mutations_by_znode.erase(it); @@ -1000,7 +1000,7 @@ int32_t ReplicatedMergeTreeQueue::updateMutations(zkutil::ZooKeeperPtr zookeeper for (const ReplicatedMergeTreeMutationEntryPtr & entry : new_mutations) { auto & mutation = mutations_by_znode.emplace(entry->znode_name, MutationStatus(entry, format_version)).first->second; - incrementMutationsCounters(num_data_mutations_to_apply, num_metadata_mutations_to_apply, entry->commands, lock); + incrementMutationsCounters(num_data_mutations_to_apply, num_metadata_mutations_to_apply, entry->commands); NOEXCEPT_SCOPE({ for (const auto & pair : entry->block_numbers) @@ -1961,6 +1961,23 @@ MutationCommands ReplicatedMergeTreeQueue::MutationsSnapshot::getAlterMutationCo return result; } +NameSet ReplicatedMergeTreeQueue::MutationsSnapshot::getAllUpdatedColumns() const +{ + if (!params.need_data_mutations) + return {}; + + NameSet res; + for (const auto & [partition_id, mutations] : mutations_by_partition) + { + for (const auto & [version, entry] : mutations) + { + auto names = entry->commands.getAllUpdatedColumns(); + std::move(names.begin(), names.end(), std::inserter(res, res.end())); + } + } + return res; +} + MergeTreeData::MutationsSnapshotPtr ReplicatedMergeTreeQueue::getMutationsSnapshot(const MutationsSnapshot::Params & params) const { std::lock_guard lock(state_mutex); @@ -2122,7 +2139,7 @@ bool ReplicatedMergeTreeQueue::tryFinalizeMutations(zkutil::ZooKeeperPtr zookeep mutation.parts_to_do.clear(); } - decrementMutationsCounters(num_data_mutations_to_apply, num_metadata_mutations_to_apply, mutation.entry->commands, lock); + decrementMutationsCounters(num_data_mutations_to_apply, num_metadata_mutations_to_apply, mutation.entry->commands); } else if (mutation.parts_to_do.size() == 0) { @@ -2179,7 +2196,7 @@ bool ReplicatedMergeTreeQueue::tryFinalizeMutations(zkutil::ZooKeeperPtr zookeep LOG_TRACE(log, "Finishing data alter with version {} for entry {}", entry->alter_version, entry->znode_name); alter_sequence.finishDataAlter(entry->alter_version, lock); } - decrementMutationsCounters(num_data_mutations_to_apply, num_metadata_mutations_to_apply, entry->commands, lock); + decrementMutationsCounters(num_data_mutations_to_apply, num_metadata_mutations_to_apply, entry->commands); } } } diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h index af8e2521f81..91a23b6a3b6 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h +++ b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h @@ -426,6 +426,7 @@ public: MutationCommands getAlterMutationCommandsForPart(const MergeTreeData::DataPartPtr & part) const override; std::shared_ptr cloneEmpty() const override { return std::make_shared(); } + NameSet getAllUpdatedColumns() const override; }; /// Return mutation commands for part which could be not applied to diff --git a/src/Storages/MutationCommands.cpp b/src/Storages/MutationCommands.cpp index f736c863eee..1aa9f5e23f8 100644 --- a/src/Storages/MutationCommands.cpp +++ b/src/Storages/MutationCommands.cpp @@ -268,4 +268,13 @@ bool MutationCommands::containBarrierCommand() const return false; } +NameSet MutationCommands::getAllUpdatedColumns() const +{ + NameSet res; + for (const auto & command : *this) + for (const auto & [column_name, _] : command.column_to_update_expression) + res.insert(column_name); + return res; +} + } diff --git a/src/Storages/MutationCommands.h b/src/Storages/MutationCommands.h index f999aab1f4d..5ae537bb657 100644 --- a/src/Storages/MutationCommands.h +++ b/src/Storages/MutationCommands.h @@ -92,6 +92,7 @@ public: /// stick with other commands. Commands from one set have already been validated /// to be executed without issues on the creation state. bool containBarrierCommand() const; + NameSet getAllUpdatedColumns() const; }; using MutationCommandsConstPtr = std::shared_ptr; diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp index da782b3634a..19cb2bec2e7 100644 --- a/src/Storages/StorageMergeTree.cpp +++ b/src/Storages/StorageMergeTree.cpp @@ -519,7 +519,7 @@ Int64 StorageMergeTree::startMutation(const MutationCommands & commands, Context if (!inserted) throw Exception(ErrorCodes::LOGICAL_ERROR, "Mutation {} already exists, it's a bug", version); - incrementMutationsCounters(num_data_mutations_to_apply, num_metadata_mutations_to_apply, *it->second.commands, lock); + incrementMutationsCounters(num_data_mutations_to_apply, num_metadata_mutations_to_apply, *it->second.commands); } LOG_INFO(log, "Added mutation: {}{}", mutation_id, additional_info); @@ -556,7 +556,7 @@ void StorageMergeTree::updateMutationEntriesErrors(FutureMergedMutatedPartPtr re if (static_cast(result_part->part_info.mutation) == it->first) mutation_backoff_policy.removePartFromFailed(failed_part->name); - decrementMutationsCounters(num_data_mutations_to_apply, num_metadata_mutations_to_apply, *entry.commands, lock); + decrementMutationsCounters(num_data_mutations_to_apply, num_metadata_mutations_to_apply, *entry.commands); } } else @@ -838,7 +838,7 @@ CancellationCode StorageMergeTree::killMutation(const String & mutation_id) { bool mutation_finished = *min_version > static_cast(mutation_version); if (!mutation_finished) - decrementMutationsCounters(num_data_mutations_to_apply, num_metadata_mutations_to_apply, *it->second.commands, lock); + decrementMutationsCounters(num_data_mutations_to_apply, num_metadata_mutations_to_apply, *it->second.commands); } to_kill.emplace(std::move(it->second)); @@ -923,7 +923,7 @@ void StorageMergeTree::loadMutations() if (!inserted) throw Exception(ErrorCodes::LOGICAL_ERROR, "Mutation {} already exists, it's a bug", block_number); - incrementMutationsCounters(num_data_mutations_to_apply, num_metadata_mutations_to_apply, *entry_it->second.commands, lock); + incrementMutationsCounters(num_data_mutations_to_apply, num_metadata_mutations_to_apply, *entry_it->second.commands); } else if (startsWith(it->name(), "tmp_mutation_")) { @@ -2464,6 +2464,20 @@ MutationCommands StorageMergeTree::MutationsSnapshot::getAlterMutationCommandsFo return result; } +NameSet StorageMergeTree::MutationsSnapshot::getAllUpdatedColumns() const +{ + if (!params.need_data_mutations) + return {}; + + NameSet res; + for (const auto & [version, commands] : mutations_by_version) + { + auto names = commands->getAllUpdatedColumns(); + std::move(names.begin(), names.end(), std::inserter(res, res.end())); + } + return res; +} + MergeTreeData::MutationsSnapshotPtr StorageMergeTree::getMutationsSnapshot(const IMutationsSnapshot::Params & params) const { std::lock_guard lock(currently_processing_in_background_mutex); diff --git a/src/Storages/StorageMergeTree.h b/src/Storages/StorageMergeTree.h index 8ea0db37528..ef333fe3f18 100644 --- a/src/Storages/StorageMergeTree.h +++ b/src/Storages/StorageMergeTree.h @@ -320,6 +320,7 @@ private: MutationCommands getAlterMutationCommandsForPart(const MergeTreeData::DataPartPtr & part) const override; std::shared_ptr cloneEmpty() const override { return std::make_shared(); } + NameSet getAllUpdatedColumns() const override; }; MutationsSnapshotPtr getMutationsSnapshot(const IMutationsSnapshot::Params & params) const override; From 704ec8dea6b4fa583689508e4362644517321005 Mon Sep 17 00:00:00 2001 From: Matt Woenker Date: Mon, 5 Aug 2024 14:55:33 -0400 Subject: [PATCH 283/844] Handle incomplete sequences at end of input --- src/IO/WriteBufferValidUTF8.cpp | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/src/IO/WriteBufferValidUTF8.cpp b/src/IO/WriteBufferValidUTF8.cpp index d611befac37..426f302cb02 100644 --- a/src/IO/WriteBufferValidUTF8.cpp +++ b/src/IO/WriteBufferValidUTF8.cpp @@ -149,9 +149,27 @@ void WriteBufferValidUTF8::finalizeImpl() /// Write all complete sequences from buffer. nextImpl(); - /// If unfinished sequence at end, then write replacement. - if (working_buffer.begin() != memory.data()) - putReplacement(); + /// Handle remaining bytes if we have an incomplete sequence + if (working_buffer.begin() != memory.data()) { + char * p = memory.data(); + + while (p < pos) { + UInt8 len = length_of_utf8_sequence[static_cast(*p)]; + if (p + len > pos) { + // Incomplete sequence. Skip one byte. + putReplacement(); + ++p; + } else if (Poco::UTF8Encoding::isLegal(reinterpret_cast(p), len)) { + // Valid sequence + putValid(p, len); + p += len; + } else { + // Invalid sequence, skip first byte. + putReplacement(); + ++p; + } + } + } } } From 2a1ee419b473350323c5582bcb950ab5630a07e2 Mon Sep 17 00:00:00 2001 From: Matt Woenker Date: Tue, 6 Aug 2024 15:53:49 -0400 Subject: [PATCH 284/844] Fix style issue --- src/IO/WriteBufferValidUTF8.cpp | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/src/IO/WriteBufferValidUTF8.cpp b/src/IO/WriteBufferValidUTF8.cpp index 426f302cb02..8441b4eafa2 100644 --- a/src/IO/WriteBufferValidUTF8.cpp +++ b/src/IO/WriteBufferValidUTF8.cpp @@ -150,20 +150,27 @@ void WriteBufferValidUTF8::finalizeImpl() nextImpl(); /// Handle remaining bytes if we have an incomplete sequence - if (working_buffer.begin() != memory.data()) { + if (working_buffer.begin() != memory.data()) + { char * p = memory.data(); - while (p < pos) { + while (p < pos) + { UInt8 len = length_of_utf8_sequence[static_cast(*p)]; - if (p + len > pos) { + if (p + len > pos) + { // Incomplete sequence. Skip one byte. putReplacement(); ++p; - } else if (Poco::UTF8Encoding::isLegal(reinterpret_cast(p), len)) { + } + else if (Poco::UTF8Encoding::isLegal(reinterpret_cast(p), len)) + { // Valid sequence putValid(p, len); p += len; - } else { + } + else + { // Invalid sequence, skip first byte. putReplacement(); ++p; From f4ed3f5c6d138dc6ac7209393c60562ddf9e8d24 Mon Sep 17 00:00:00 2001 From: Matt Woenker Date: Tue, 6 Aug 2024 16:14:26 -0400 Subject: [PATCH 285/844] Add test --- .../03221_incomplete-utf8-sequence.reference | 16 ++++++++++++++++ .../03221_incomplete-utf8-sequence.sql | 2 ++ 2 files changed, 18 insertions(+) create mode 100644 tests/queries/0_stateless/03221_incomplete-utf8-sequence.reference create mode 100644 tests/queries/0_stateless/03221_incomplete-utf8-sequence.sql diff --git a/tests/queries/0_stateless/03221_incomplete-utf8-sequence.reference b/tests/queries/0_stateless/03221_incomplete-utf8-sequence.reference new file mode 100644 index 00000000000..4577427251d --- /dev/null +++ b/tests/queries/0_stateless/03221_incomplete-utf8-sequence.reference @@ -0,0 +1,16 @@ +{ + "meta": + [ + { + "name": "unhex('f0')", + "type": "String" + } + ], + + "data": + [ + ["�"] + ], + + "rows": 1 +} diff --git a/tests/queries/0_stateless/03221_incomplete-utf8-sequence.sql b/tests/queries/0_stateless/03221_incomplete-utf8-sequence.sql new file mode 100644 index 00000000000..ee4f25f3b4a --- /dev/null +++ b/tests/queries/0_stateless/03221_incomplete-utf8-sequence.sql @@ -0,0 +1,2 @@ +SET output_format_write_statistics = 0; +SELECT unhex('f0') FORMAT JSONCompact; From f0f1015162ddfb7ef2e8046fa9c952c5c7e6b89a Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Wed, 7 Aug 2024 10:47:13 +0800 Subject: [PATCH 286/844] fix style --- src/Functions/array/array.cpp | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/src/Functions/array/array.cpp b/src/Functions/array/array.cpp index 21b1841f3ed..f193acafe06 100644 --- a/src/Functions/array/array.cpp +++ b/src/Functions/array/array.cpp @@ -172,9 +172,9 @@ private: concrete_columns[i] = concrete_column; } - ColumnString & concreate_out_data = assert_cast(out_data); - auto & out_chars = concreate_out_data.getChars(); - auto & out_offsets = concreate_out_data.getOffsets(); + ColumnString & concrete_out_data = assert_cast(out_data); + auto & out_chars = concrete_out_data.getChars(); + auto & out_offsets = concrete_out_data.getOffsets(); out_chars.resize_exact(total_bytes); out_offsets.resize_exact(input_rows_count * columns.size()); @@ -197,7 +197,6 @@ private: bool executeFixedString(const ColumnRawPtrs & columns, IColumn & out_data, size_t input_rows_count) const { - size_t total_bytes = 0; std::vector concrete_columns(columns.size(), nullptr); for (size_t i = 0; i < columns.size(); ++i) { @@ -205,15 +204,16 @@ private: if (!concrete_column) return false; - total_bytes += concrete_column->getChars().size(); concrete_columns[i] = concrete_column; } - ColumnFixedString & concreate_out_data = assert_cast(out_data); - auto & out_chars = concreate_out_data.getChars(); + ColumnFixedString & concrete_out_data = assert_cast(out_data); + auto & out_chars = concrete_out_data.getChars(); + + const size_t n = concrete_out_data.getN(); + size_t total_bytes = concrete_out_data.getN() * columns.size() * input_rows_count; out_chars.resize_exact(total_bytes); - const size_t n = concreate_out_data.getN(); size_t curr_out_offset = 0; for (size_t row_i = 0; row_i < input_rows_count; ++row_i) { @@ -241,9 +241,9 @@ private: nested_columns[i] = &concrete_column->getNestedColumn(); } - ColumnNullable & concreate_out_data = assert_cast(out_data); - auto & out_null_map = concreate_out_data.getNullMapColumn(); - auto & out_nested_column = concreate_out_data.getNestedColumn(); + ColumnNullable & concrete_out_data = assert_cast(out_data); + auto & out_null_map = concrete_out_data.getNullMapColumn(); + auto & out_nested_column = concrete_out_data.getNestedColumn(); executeAny(null_maps, out_null_map, input_rows_count); executeAny(nested_columns, out_nested_column, input_rows_count); return true; @@ -251,11 +251,11 @@ private: bool executeTuple(const ColumnRawPtrs & columns, IColumn & out_data, size_t input_rows_count) const { - ColumnTuple * concreate_out_data = typeid_cast(&out_data); - if (!concreate_out_data) + ColumnTuple * concrete_out_data = typeid_cast(&out_data); + if (!concrete_out_data) return false; - const size_t tuple_size = concreate_out_data->tupleSize(); + const size_t tuple_size = concrete_out_data->tupleSize(); for (size_t i = 0; i < tuple_size; ++i) { ColumnRawPtrs elem_columns(columns.size(), nullptr); @@ -264,7 +264,7 @@ private: const ColumnTuple * concrete_column = assert_cast(columns[j]); elem_columns[j] = &concrete_column->getColumn(i); } - executeAny(elem_columns, concreate_out_data->getColumn(i), input_rows_count); + executeAny(elem_columns, concrete_out_data->getColumn(i), input_rows_count); } return true; } From aafe498b7f6045e75414c17abecad168aa0efb88 Mon Sep 17 00:00:00 2001 From: JackyWoo Date: Wed, 7 Aug 2024 10:43:49 +0800 Subject: [PATCH 287/844] Handle edge case: col_int32 > 10.6 --- src/Storages/Statistics/Statistics.cpp | 22 +++++++++++++++++++ src/Storages/Statistics/Statistics.h | 6 +++++ src/Storages/Statistics/StatisticsMinMax.cpp | 10 +++------ src/Storages/Statistics/StatisticsTDigest.cpp | 16 +++++--------- .../02864_statistics_estimation.sql | 10 +++++---- 5 files changed, 43 insertions(+), 21 deletions(-) diff --git a/src/Storages/Statistics/Statistics.cpp b/src/Storages/Statistics/Statistics.cpp index 1f159211a93..e94d8b67aff 100644 --- a/src/Storages/Statistics/Statistics.cpp +++ b/src/Storages/Statistics/Statistics.cpp @@ -1,6 +1,8 @@ #include #include #include +#include +#include #include #include #include @@ -10,6 +12,7 @@ #include #include #include +#include #include "config.h" /// USE_DATASKETCHES @@ -28,6 +31,25 @@ enum StatisticsFileVersion : UInt16 V0 = 0, }; +std::optional StatisticsUtils::tryConvertToFloat64(const Field & value, const DataTypePtr & value_data_type) +{ + if (value_data_type->isValueRepresentedByNumber()) + { + Field val_converted; + + /// For case val_int32 < 10.5 or val_int32 < '10.5' we should convert 10.5 to Float64. + if (isInteger(value_data_type) && (value.getType() == Field::Types::Float64 || value.getType() == Field::Types::String)) + val_converted = convertFieldToType(value, *DataTypeFactory::instance().get("Float64")); + + /// We should convert value to the real column data type and then translate it to Float64. + /// For example for expression col_date > '2024-08-07', if we directly convert '2024-08-07' to Float64, we will get null. + val_converted = convertFieldToType(value, *value_data_type); + if (val_converted.isNull()) + return {}; + return applyVisitor(FieldVisitorConvertToNumber(), val_converted); + } + return {}; +} IStatistics::IStatistics(const SingleStatisticsDescription & stat_) : stat(stat_) diff --git a/src/Storages/Statistics/Statistics.h b/src/Storages/Statistics/Statistics.h index 1425be89d47..9f82f137f1c 100644 --- a/src/Storages/Statistics/Statistics.h +++ b/src/Storages/Statistics/Statistics.h @@ -15,6 +15,12 @@ constexpr auto STATS_FILE_PREFIX = "statistics_"; constexpr auto STATS_FILE_SUFFIX = ".stats"; +struct StatisticsUtils +{ + /// Returns std::nullopt if input Field cannot be converted to a concrete value + static std::optional tryConvertToFloat64(const Field & value, const DataTypePtr & value_data_type); +}; + /// Statistics describe properties of the values in the column, /// e.g. how many unique values exist, /// what are the N most frequent values, diff --git a/src/Storages/Statistics/StatisticsMinMax.cpp b/src/Storages/Statistics/StatisticsMinMax.cpp index 11a0704dae7..88522894427 100644 --- a/src/Storages/Statistics/StatisticsMinMax.cpp +++ b/src/Storages/Statistics/StatisticsMinMax.cpp @@ -1,10 +1,8 @@ #include -#include #include #include #include #include -#include #include @@ -53,12 +51,10 @@ void StatisticsMinMax::deserialize(ReadBuffer & buf) Float64 StatisticsMinMax::estimateLess(const Field & val) const { - Field val_converted = convertFieldToType(val, *data_type); - if (val_converted.isNull()) + auto val_as_float = StatisticsUtils::tryConvertToFloat64(val, data_type); + if (!val_as_float.has_value()) return 0; - auto val_as_float = applyVisitor(FieldVisitorConvertToNumber(), val_converted); - if (val_as_float < min) return 0; @@ -68,7 +64,7 @@ Float64 StatisticsMinMax::estimateLess(const Field & val) const if (min == max) return (val_as_float != max) ? 0 : row_count; - return ((val_as_float - min) / (max - min)) * row_count; + return ((*val_as_float - min) / (max - min)) * row_count; } void minMaxStatisticsValidator(const SingleStatisticsDescription & /*statistics_description*/, DataTypePtr data_type) diff --git a/src/Storages/Statistics/StatisticsTDigest.cpp b/src/Storages/Statistics/StatisticsTDigest.cpp index cfd626c8ea8..a7ce43d2fe1 100644 --- a/src/Storages/Statistics/StatisticsTDigest.cpp +++ b/src/Storages/Statistics/StatisticsTDigest.cpp @@ -41,22 +41,18 @@ void StatisticsTDigest::deserialize(ReadBuffer & buf) Float64 StatisticsTDigest::estimateLess(const Field & val) const { - Field val_converted = convertFieldToType(val, *data_type); - if (val_converted.isNull()) + auto val_as_float = StatisticsUtils::tryConvertToFloat64(val, data_type); + if (!val_as_float.has_value()) return 0; - - auto val_as_float = applyVisitor(FieldVisitorConvertToNumber(), val_converted); - return t_digest.getCountLessThan(val_as_float); + return t_digest.getCountLessThan(*val_as_float); } Float64 StatisticsTDigest::estimateEqual(const Field & val) const { - Field val_converted = convertFieldToType(val, *data_type); - if (val_converted.isNull()) + auto val_as_float = StatisticsUtils::tryConvertToFloat64(val, data_type); + if (!val_as_float.has_value()) return 0; - - auto val_as_float = applyVisitor(FieldVisitorConvertToNumber(), val_converted); - return t_digest.getCountEqual(val_as_float); + return t_digest.getCountEqual(*val_as_float); } void tdigestStatisticsValidator(const SingleStatisticsDescription & /*statistics_description*/, DataTypePtr data_type) diff --git a/tests/queries/0_stateless/02864_statistics_estimation.sql b/tests/queries/0_stateless/02864_statistics_estimation.sql index 6436cf68738..5eb51fe7111 100644 --- a/tests/queries/0_stateless/02864_statistics_estimation.sql +++ b/tests/queries/0_stateless/02864_statistics_estimation.sql @@ -67,9 +67,7 @@ ALTER TABLE tab DROP STATISTICS a, b, c, d; SELECT 'Test statistics multi-types:'; ALTER TABLE tab ADD STATISTICS a TYPE count_min, uniq; -ALTER TABLE tab ADD STATISTICS b TYPE count_min, minmax, uniq, tdigest; -ALTER TABLE tab ADD STATISTICS c TYPE count_min, minmax, uniq, tdigest; -ALTER TABLE tab ADD STATISTICS d TYPE count_min, minmax, uniq, tdigest; +ALTER TABLE tab ADD STATISTICS b, c, d TYPE count_min, minmax, uniq, tdigest; ALTER TABLE tab MATERIALIZE STATISTICS a, b, c, d; SELECT replaceRegexpAll(explain, '__table1.|_UInt8|_Int8|_UInt16|_String|_DateTime', '') @@ -82,10 +80,14 @@ WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; SELECT 'Test statistics implicitly type conversion:'; + SELECT replaceRegexpAll(explain, '__table1.|_UInt8|_Int8|_UInt16|_String|_DateTime', '') -FROM (EXPLAIN actions=1 SELECT count(*) FROM tab WHERE d = '2024-08-06 09:58:09'/*0*/ and c = '0'/*100*/ and b > 0/*9990*/ and a = '1'/*1*/) +FROM (EXPLAIN actions=1 SELECT count(*) FROM tab WHERE d = '2024-08-06 09:58:09'/*0*/ and c = '0'/*100*/) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; +SELECT replaceRegexpAll(explain, '__table1.|_UInt8|_Int8|_UInt16|_String|_DateTime', '') +FROM (EXPLAIN actions=1 SELECT count(*) FROM tab WHERE d = '2024-08-06 09:58:09'/*0*/ and b > 50.1/*5000*/) +WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; ALTER TABLE tab DROP STATISTICS a, b, c, d; From cfaa852af9e94bd25217bfbb690f2d6ad641f3d6 Mon Sep 17 00:00:00 2001 From: JackyWoo Date: Wed, 7 Aug 2024 11:40:01 +0800 Subject: [PATCH 288/844] Little fix --- src/Storages/Statistics/Statistics.cpp | 7 ++++--- .../0_stateless/02864_statistics_estimation.reference | 5 ++++- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/src/Storages/Statistics/Statistics.cpp b/src/Storages/Statistics/Statistics.cpp index e94d8b67aff..691cf42aa42 100644 --- a/src/Storages/Statistics/Statistics.cpp +++ b/src/Storages/Statistics/Statistics.cpp @@ -40,10 +40,11 @@ std::optional StatisticsUtils::tryConvertToFloat64(const Field & value, /// For case val_int32 < 10.5 or val_int32 < '10.5' we should convert 10.5 to Float64. if (isInteger(value_data_type) && (value.getType() == Field::Types::Float64 || value.getType() == Field::Types::String)) val_converted = convertFieldToType(value, *DataTypeFactory::instance().get("Float64")); + else + /// We should convert value to the real column data type and then translate it to Float64. + /// For example for expression col_date > '2024-08-07', if we directly convert '2024-08-07' to Float64, we will get null. + val_converted = convertFieldToType(value, *value_data_type); - /// We should convert value to the real column data type and then translate it to Float64. - /// For example for expression col_date > '2024-08-07', if we directly convert '2024-08-07' to Float64, we will get null. - val_converted = convertFieldToType(value, *value_data_type); if (val_converted.isNull()) return {}; return applyVisitor(FieldVisitorConvertToNumber(), val_converted); diff --git a/tests/queries/0_stateless/02864_statistics_estimation.reference b/tests/queries/0_stateless/02864_statistics_estimation.reference index fc370691830..d475926df8b 100644 --- a/tests/queries/0_stateless/02864_statistics_estimation.reference +++ b/tests/queries/0_stateless/02864_statistics_estimation.reference @@ -25,4 +25,7 @@ Test statistics multi-types: Test statistics implicitly type conversion: Prewhere info Prewhere filter - Prewhere filter column: and(equals(a, \'1\'), equals(d, \'2024-08-06 09:58:09\'), equals(c, \'0\'), greater(b, 0)) (removed) + Prewhere filter column: and(equals(d, \'2024-08-06 09:58:09\'), equals(c, \'0\')) (removed) + Prewhere info + Prewhere filter + Prewhere filter column: and(equals(d, \'2024-08-06 09:58:09\'), greater(b, 50.1_Float64)) (removed) From f5c07b89383a1401797902a6acb3a5a300efa3fe Mon Sep 17 00:00:00 2001 From: vdimir Date: Wed, 7 Aug 2024 09:43:13 +0000 Subject: [PATCH 289/844] Add test cases to 03217_datetime64_constant_to_ast --- ...03217_datetime64_constant_to_ast.reference | 3 +++ .../03217_datetime64_constant_to_ast.sql | 19 +++++++++++++++++++ 2 files changed, 22 insertions(+) diff --git a/tests/queries/0_stateless/03217_datetime64_constant_to_ast.reference b/tests/queries/0_stateless/03217_datetime64_constant_to_ast.reference index c20baa0d261..90120f85a71 100644 --- a/tests/queries/0_stateless/03217_datetime64_constant_to_ast.reference +++ b/tests/queries/0_stateless/03217_datetime64_constant_to_ast.reference @@ -1,2 +1,5 @@ 1970-01-01 00:00:01.000 1970-01-01 00:00:01.000 +1970-01-01 00:00:01.000 0 +1970-01-01 00:00:01.000 0 +1970-01-01 00:00:01.000 0 localhost diff --git a/tests/queries/0_stateless/03217_datetime64_constant_to_ast.sql b/tests/queries/0_stateless/03217_datetime64_constant_to_ast.sql index 63334a511c7..d01bb8d72f1 100644 --- a/tests/queries/0_stateless/03217_datetime64_constant_to_ast.sql +++ b/tests/queries/0_stateless/03217_datetime64_constant_to_ast.sql @@ -4,3 +4,22 @@ SET session_timezone = 'UTC'; SELECT toDateTime64('1970-01-01 00:00:01', 3) FROM remote('127.0.0.{1,2}', system, one) ; + +SELECT toDateTime64('1970-01-01 00:00:01', 3), dummy +FROM remote('127.0.0.{1,2}', system, one) +GROUP BY dummy +ORDER BY dummy +; + +SELECT materialize(toDateTime64('1970-01-01 00:00:01', 3)), dummy +FROM remote('127.0.0.{1,2}', system, one) +GROUP BY dummy +ORDER BY dummy +; + + +SELECT toDateTime64('1970-01-01 00:00:01', 3), sum(dummy), hostname() +FROM remote('127.0.0.{1,2}', system, one) +GROUP BY hostname() +ORDER BY ALL +; From cff7cbac94b1b6f39dd4585fca5bcbcb2f952747 Mon Sep 17 00:00:00 2001 From: Matt Woenker Date: Wed, 7 Aug 2024 16:21:16 -0400 Subject: [PATCH 290/844] Style fix --- src/IO/WriteBufferValidUTF8.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/IO/WriteBufferValidUTF8.cpp b/src/IO/WriteBufferValidUTF8.cpp index 8441b4eafa2..25c7b3d4820 100644 --- a/src/IO/WriteBufferValidUTF8.cpp +++ b/src/IO/WriteBufferValidUTF8.cpp @@ -159,19 +159,19 @@ void WriteBufferValidUTF8::finalizeImpl() UInt8 len = length_of_utf8_sequence[static_cast(*p)]; if (p + len > pos) { - // Incomplete sequence. Skip one byte. + /// Incomplete sequence. Skip one byte. putReplacement(); ++p; } else if (Poco::UTF8Encoding::isLegal(reinterpret_cast(p), len)) { - // Valid sequence + /// Valid sequence putValid(p, len); p += len; } else { - // Invalid sequence, skip first byte. + /// Invalid sequence, skip first byte. putReplacement(); ++p; } From d09531e48a90db637a3e5b42a2fd9569c911ed44 Mon Sep 17 00:00:00 2001 From: Matt Woenker Date: Wed, 7 Aug 2024 16:25:01 -0400 Subject: [PATCH 291/844] Rename test files --- ...equence.reference => 03221_incomplete_utf8_sequence.reference} | 0 ...plete-utf8-sequence.sql => 03221_incomplete_utf8_sequence.sql} | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename tests/queries/0_stateless/{03221_incomplete-utf8-sequence.reference => 03221_incomplete_utf8_sequence.reference} (100%) rename tests/queries/0_stateless/{03221_incomplete-utf8-sequence.sql => 03221_incomplete_utf8_sequence.sql} (100%) diff --git a/tests/queries/0_stateless/03221_incomplete-utf8-sequence.reference b/tests/queries/0_stateless/03221_incomplete_utf8_sequence.reference similarity index 100% rename from tests/queries/0_stateless/03221_incomplete-utf8-sequence.reference rename to tests/queries/0_stateless/03221_incomplete_utf8_sequence.reference diff --git a/tests/queries/0_stateless/03221_incomplete-utf8-sequence.sql b/tests/queries/0_stateless/03221_incomplete_utf8_sequence.sql similarity index 100% rename from tests/queries/0_stateless/03221_incomplete-utf8-sequence.sql rename to tests/queries/0_stateless/03221_incomplete_utf8_sequence.sql From f8a14e86d8db56e8ad7046b8848c7521d235a777 Mon Sep 17 00:00:00 2001 From: Matt Woenker Date: Wed, 7 Aug 2024 16:32:13 -0400 Subject: [PATCH 292/844] Add const a few places --- src/IO/WriteBufferValidUTF8.cpp | 8 ++++---- src/IO/WriteBufferValidUTF8.h | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/IO/WriteBufferValidUTF8.cpp b/src/IO/WriteBufferValidUTF8.cpp index 25c7b3d4820..2a86f8c2801 100644 --- a/src/IO/WriteBufferValidUTF8.cpp +++ b/src/IO/WriteBufferValidUTF8.cpp @@ -54,7 +54,7 @@ inline void WriteBufferValidUTF8::putReplacement() } -inline void WriteBufferValidUTF8::putValid(char *data, size_t len) +inline void WriteBufferValidUTF8::putValid(const char *data, size_t len) { if (len == 0) return; @@ -152,18 +152,18 @@ void WriteBufferValidUTF8::finalizeImpl() /// Handle remaining bytes if we have an incomplete sequence if (working_buffer.begin() != memory.data()) { - char * p = memory.data(); + const char * p = memory.data(); while (p < pos) { - UInt8 len = length_of_utf8_sequence[static_cast(*p)]; + UInt8 len = length_of_utf8_sequence[static_cast(*p)]; if (p + len > pos) { /// Incomplete sequence. Skip one byte. putReplacement(); ++p; } - else if (Poco::UTF8Encoding::isLegal(reinterpret_cast(p), len)) + else if (Poco::UTF8Encoding::isLegal(reinterpret_cast(p), len)) { /// Valid sequence putValid(p, len); diff --git a/src/IO/WriteBufferValidUTF8.h b/src/IO/WriteBufferValidUTF8.h index daaf0427f88..a398b8ded01 100644 --- a/src/IO/WriteBufferValidUTF8.h +++ b/src/IO/WriteBufferValidUTF8.h @@ -26,7 +26,7 @@ public: private: void putReplacement(); - void putValid(char * data, size_t len); + void putValid(const char * data, size_t len); void nextImpl() override; void finalizeImpl() override; From 5f747bc41174cb03c06288393a026d2385d57e8b Mon Sep 17 00:00:00 2001 From: JackyWoo Date: Thu, 8 Aug 2024 11:09:59 +0800 Subject: [PATCH 293/844] Add more test for implicitly type conversion --- .../02864_statistics_estimation.reference | 26 +++++++-- .../02864_statistics_estimation.sql | 55 ++++++++++++++++--- 2 files changed, 67 insertions(+), 14 deletions(-) diff --git a/tests/queries/0_stateless/02864_statistics_estimation.reference b/tests/queries/0_stateless/02864_statistics_estimation.reference index d475926df8b..077476d28c1 100644 --- a/tests/queries/0_stateless/02864_statistics_estimation.reference +++ b/tests/queries/0_stateless/02864_statistics_estimation.reference @@ -23,9 +23,23 @@ Test statistics multi-types: Prewhere filter Prewhere filter column: and(equals(a, \'10000\'), equals(b, 0), less(c, 0), greater(d, _CAST(1, \'DateTime\'))) (removed) Test statistics implicitly type conversion: - Prewhere info - Prewhere filter - Prewhere filter column: and(equals(d, \'2024-08-06 09:58:09\'), equals(c, \'0\')) (removed) - Prewhere info - Prewhere filter - Prewhere filter column: and(equals(d, \'2024-08-06 09:58:09\'), greater(b, 50.1_Float64)) (removed) +1 +0 +0 +0 +0 +50 +50 +0 +0 +50 +0 +0 +1 +1 +1 +0 +1 +1 +1 +1 diff --git a/tests/queries/0_stateless/02864_statistics_estimation.sql b/tests/queries/0_stateless/02864_statistics_estimation.sql index 5eb51fe7111..1189de46267 100644 --- a/tests/queries/0_stateless/02864_statistics_estimation.sql +++ b/tests/queries/0_stateless/02864_statistics_estimation.sql @@ -1,6 +1,7 @@ -- Tags: no-fasttest DROP TABLE IF EXISTS tab SYNC; +DROP TABLE IF EXISTS tab2 SYNC; SET allow_experimental_statistics = 1; SET allow_statistics_optimize = 1; @@ -78,17 +79,55 @@ SELECT replaceRegexpAll(explain, '__table1.|_UInt8|_Int8|_UInt16|_String|_DateTi FROM (EXPLAIN actions=1 SELECT count(*) FROM tab WHERE d > cast(1, 'DateTime')/*9999*/ and c < 0/*9900*/ and b = 0/*10*/ and a = '10000'/*0*/) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; +DROP TABLE IF EXISTS tab SYNC; SELECT 'Test statistics implicitly type conversion:'; -SELECT replaceRegexpAll(explain, '__table1.|_UInt8|_Int8|_UInt16|_String|_DateTime', '') -FROM (EXPLAIN actions=1 SELECT count(*) FROM tab WHERE d = '2024-08-06 09:58:09'/*0*/ and c = '0'/*100*/) -WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; +CREATE TABLE tab2 +( + a String, + b UInt64, + c UInt8, + d DateTime, + e Boolean, + f Float64, + g Decimal32(1), + pk String, +) Engine = MergeTree() ORDER BY pk; -SELECT replaceRegexpAll(explain, '__table1.|_UInt8|_Int8|_UInt16|_String|_DateTime', '') -FROM (EXPLAIN actions=1 SELECT count(*) FROM tab WHERE d = '2024-08-06 09:58:09'/*0*/ and b > 50.1/*5000*/) -WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; -ALTER TABLE tab DROP STATISTICS a, b, c, d; +ALTER TABLE tab2 ADD STATISTICS a TYPE count_min, uniq; +ALTER TABLE tab2 ADD STATISTICS b, c, d, e, f, g TYPE count_min, minmax, uniq, tdigest; +INSERT INTO tab2 select toString(number), number, number, cast(number, 'DateTime'), number % 2, number, toDecimal32(number, 1), toString(number) FROM system.numbers LIMIT 100; -DROP TABLE IF EXISTS tab SYNC; +SELECT count(*) FROM tab2 WHERE a = '0'; +SELECT count(*) FROM tab2 WHERE a = 0; -- { serverError NO_COMMON_TYPE } + +SELECT count(*) FROM tab2 WHERE b = 1.1; + +SELECT count(*) FROM tab2 WHERE c = 1.1; +SELECT count(*) FROM tab2 WHERE c = 1000; -- out of range of UInt16 + +SELECT count(*) FROM tab2 WHERE d = '2024-08-06 09:58:09'; +SELECT count(*) FROM tab2 WHERE d = '2024-08-06 09:58:0'; -- { serverError CANNOT_PARSE_DATETIME } + +SELECT count(*) FROM tab2 WHERE e = true; +SELECT count(*) FROM tab2 WHERE e = 'true'; -- { serverError TYPE_MISMATCH } +SELECT count(*) FROM tab2 WHERE e = 1; +SELECT count(*) FROM tab2 WHERE e = 2; +SELECT count(*) FROM tab2 WHERE e = 1.1; +SELECT count(*) FROM tab2 WHERE e = '1'; + +SELECT count(*) FROM tab2 WHERE f = 1.1; +SELECT count(*) FROM tab2 WHERE f = '1.1'; +SELECT count(*) FROM tab2 WHERE f = 1; +SELECT count(*) FROM tab2 WHERE f = '1'; + +SELECT count(*) FROM tab2 WHERE g = toDecimal32(1.0, 1); +SELECT count(*) FROM tab2 WHERE g = toDecimal32(1.10, 1); +SELECT count(*) FROM tab2 WHERE g = toDecimal32(1.0, 2); +SELECT count(*) FROM tab2 WHERE g = 1.0; +SELECT count(*) FROM tab2 WHERE g = 1.0; +SELECT count(*) FROM tab2 WHERE g = '1.0'; + +DROP TABLE IF EXISTS tab2 SYNC; From 21cdbe03f94bd3bdaef8b02e61b09e5e79ab380c Mon Sep 17 00:00:00 2001 From: JackyWoo Date: Thu, 8 Aug 2024 11:21:19 +0800 Subject: [PATCH 294/844] Add more test cases --- .../queries/0_stateless/02864_statistics_estimation.reference | 2 ++ tests/queries/0_stateless/02864_statistics_estimation.sql | 3 +++ 2 files changed, 5 insertions(+) diff --git a/tests/queries/0_stateless/02864_statistics_estimation.reference b/tests/queries/0_stateless/02864_statistics_estimation.reference index 077476d28c1..ad923d91dda 100644 --- a/tests/queries/0_stateless/02864_statistics_estimation.reference +++ b/tests/queries/0_stateless/02864_statistics_estimation.reference @@ -24,6 +24,8 @@ Test statistics multi-types: Prewhere filter column: and(equals(a, \'10000\'), equals(b, 0), less(c, 0), greater(d, _CAST(1, \'DateTime\'))) (removed) Test statistics implicitly type conversion: 1 +1 +1 0 0 0 diff --git a/tests/queries/0_stateless/02864_statistics_estimation.sql b/tests/queries/0_stateless/02864_statistics_estimation.sql index 1189de46267..63cc2e47c97 100644 --- a/tests/queries/0_stateless/02864_statistics_estimation.sql +++ b/tests/queries/0_stateless/02864_statistics_estimation.sql @@ -103,7 +103,10 @@ INSERT INTO tab2 select toString(number), number, number, cast(number, 'DateTime SELECT count(*) FROM tab2 WHERE a = '0'; SELECT count(*) FROM tab2 WHERE a = 0; -- { serverError NO_COMMON_TYPE } +SELECT count(*) FROM tab2 WHERE b = 1; +SELECT count(*) FROM tab2 WHERE b = '1'; SELECT count(*) FROM tab2 WHERE b = 1.1; +SELECT count(*) FROM tab2 WHERE b = '1.1'; -- { serverError TYPE_MISMATCH } SELECT count(*) FROM tab2 WHERE c = 1.1; SELECT count(*) FROM tab2 WHERE c = 1000; -- out of range of UInt16 From 6e7587e9b3dbc791f6970fe23703c1d65aa4b7f0 Mon Sep 17 00:00:00 2001 From: JackyWoo Date: Thu, 8 Aug 2024 14:44:08 +0800 Subject: [PATCH 295/844] Fix test for old analyzer --- .../02864_statistics_estimation.reference | 10 ++++---- .../02864_statistics_estimation.sql | 25 ++++++++++--------- 2 files changed, 18 insertions(+), 17 deletions(-) diff --git a/tests/queries/0_stateless/02864_statistics_estimation.reference b/tests/queries/0_stateless/02864_statistics_estimation.reference index ad923d91dda..0e6cad6528b 100644 --- a/tests/queries/0_stateless/02864_statistics_estimation.reference +++ b/tests/queries/0_stateless/02864_statistics_estimation.reference @@ -6,22 +6,22 @@ Test statistics count_min: Test statistics minmax: Prewhere info Prewhere filter - Prewhere filter column: and(greater(d, _CAST(9998, \'DateTime\')), less(c, -1), greater(b, 0)) (removed) + Prewhere filter column: and(greater(d, CAST(9998, \'DateTime\')), less(c, -1), greater(b, 0)) (removed) Test statistics tdigest: Prewhere info Prewhere filter - Prewhere filter column: and(greater(d, _CAST(9998, \'DateTime\')), less(c, -1), greater(b, 0)) (removed) + Prewhere filter column: and(greater(d, CAST(9998, \'DateTime\')), less(c, -1), greater(b, 0)) (removed) Test statistics uniq: Prewhere info Prewhere filter - Prewhere filter column: and(equals(d, _CAST(1, \'DateTime\')), equals(c, 0)) (removed) + Prewhere filter column: and(equals(d, CAST(1, \'DateTime\')), equals(c, 0)) (removed) Test statistics multi-types: Prewhere info Prewhere filter - Prewhere filter column: and(equals(d, _CAST(1, \'DateTime\')), less(c, -90), greater(b, 900)) (removed) + Prewhere filter column: and(equals(d, CAST(1, \'DateTime\')), less(c, -90), greater(b, 900)) (removed) Prewhere info Prewhere filter - Prewhere filter column: and(equals(a, \'10000\'), equals(b, 0), less(c, 0), greater(d, _CAST(1, \'DateTime\'))) (removed) + Prewhere filter column: and(equals(a, \'10000\'), equals(b, 0), less(c, 0), greater(d, CAST(1, \'DateTime\'))) (removed) Test statistics implicitly type conversion: 1 1 diff --git a/tests/queries/0_stateless/02864_statistics_estimation.sql b/tests/queries/0_stateless/02864_statistics_estimation.sql index 63cc2e47c97..94fd7d12e0a 100644 --- a/tests/queries/0_stateless/02864_statistics_estimation.sql +++ b/tests/queries/0_stateless/02864_statistics_estimation.sql @@ -6,6 +6,7 @@ DROP TABLE IF EXISTS tab2 SYNC; SET allow_experimental_statistics = 1; SET allow_statistics_optimize = 1; SET allow_suspicious_low_cardinality_types=1; +SET allow_experimental_analyzer=1; SET mutations_sync = 2; CREATE TABLE tab @@ -27,9 +28,9 @@ SELECT 'Test statistics count_min:'; ALTER TABLE tab ADD STATISTICS a, b, c TYPE count_min; ALTER TABLE tab MATERIALIZE STATISTICS a, b, c; -SELECT replaceRegexpAll(explain, '__table1.|_UInt8|_Int8|_UInt16|_String|_DateTime', '') +SELECT replaceRegexpAll(explain, '_CAST', 'CAST') FROM (SELECT replaceRegexpAll(explain, '__table1.|_UInt8|_Int8|_UInt16|_String|_DateTime', '') as explain FROM (EXPLAIN actions=1 SELECT count(*) FROM tab WHERE c = 0/*100*/ and b = 0/*10*/ and a = '0'/*1*/) -WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; +WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'); ALTER TABLE tab DROP STATISTICS a, b, c; @@ -38,9 +39,9 @@ SELECT 'Test statistics minmax:'; ALTER TABLE tab ADD STATISTICS b, c, d TYPE minmax; ALTER TABLE tab MATERIALIZE STATISTICS b, c, d; -SELECT replaceRegexpAll(explain, '__table1.|_UInt8|_Int8|_UInt16|_String|_DateTime', '') +SELECT replaceRegexpAll(explain, '_CAST', 'CAST') FROM (SELECT replaceRegexpAll(explain, '__table1.|_UInt8|_Int8|_UInt16|_String|_DateTime', '') as explain FROM (EXPLAIN actions=1 SELECT count(*) FROM tab WHERE b > 0/*10000*/ and c < -1/*9990*/ and d > cast(9998, 'DateTime')/*1*/) -WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; +WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'); ALTER TABLE tab DROP STATISTICS b, c, d; @@ -49,9 +50,9 @@ SELECT 'Test statistics tdigest:'; ALTER TABLE tab ADD STATISTICS b, c, d TYPE tdigest; ALTER TABLE tab MATERIALIZE STATISTICS b, c, d; -SELECT replaceRegexpAll(explain, '__table1.|_UInt8|_Int8|_UInt16|_String|_DateTime', '') +SELECT replaceRegexpAll(explain, '_CAST', 'CAST') FROM (SELECT replaceRegexpAll(explain, '__table1.|_UInt8|_Int8|_UInt16|_String|_DateTime', '') as explain FROM (EXPLAIN actions=1 SELECT count(*) FROM tab WHERE b > 0/*10000*/ and c < -1/*9990*/ and d > cast(9998, 'DateTime')/*1*/) -WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; +WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'); ALTER TABLE tab DROP STATISTICS b, c, d; @@ -59,9 +60,9 @@ SELECT 'Test statistics uniq:'; ALTER TABLE tab ADD STATISTICS a, b, c, d TYPE uniq; ALTER TABLE tab MATERIALIZE STATISTICS a, b, c, d; -SELECT replaceRegexpAll(explain, '__table1.|_UInt8|_Int8|_UInt16|_String|_DateTime', '') +SELECT replaceRegexpAll(explain, '_CAST', 'CAST') FROM (SELECT replaceRegexpAll(explain, '__table1.|_UInt8|_Int8|_UInt16|_String|_DateTime', '') as explain FROM (EXPLAIN actions=1 SELECT count(*) FROM tab WHERE d = cast(1, 'DateTime')/*100*/ and c = 0/*1000*/) -WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; +WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'); ALTER TABLE tab DROP STATISTICS a, b, c, d; @@ -71,13 +72,13 @@ ALTER TABLE tab ADD STATISTICS a TYPE count_min, uniq; ALTER TABLE tab ADD STATISTICS b, c, d TYPE count_min, minmax, uniq, tdigest; ALTER TABLE tab MATERIALIZE STATISTICS a, b, c, d; -SELECT replaceRegexpAll(explain, '__table1.|_UInt8|_Int8|_UInt16|_String|_DateTime', '') +SELECT replaceRegexpAll(explain, '_CAST', 'CAST') FROM (SELECT replaceRegexpAll(explain, '__table1.|_UInt8|_Int8|_UInt16|_String|_DateTime', '') as explain FROM (EXPLAIN actions=1 SELECT count(*) FROM tab WHERE d = cast(1, 'DateTime')/*1*/ and c < -90/*900*/ and b > 900/*990*/) -WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; +WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'); -SELECT replaceRegexpAll(explain, '__table1.|_UInt8|_Int8|_UInt16|_String|_DateTime', '') +SELECT replaceRegexpAll(explain, '_CAST', 'CAST') FROM (SELECT replaceRegexpAll(explain, '__table1.|_UInt8|_Int8|_UInt16|_String|_DateTime', '') as explain FROM (EXPLAIN actions=1 SELECT count(*) FROM tab WHERE d > cast(1, 'DateTime')/*9999*/ and c < 0/*9900*/ and b = 0/*10*/ and a = '10000'/*0*/) -WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'; +WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'); DROP TABLE IF EXISTS tab SYNC; From 66534e379573875821b9c68c08642465a695c177 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Thu, 8 Aug 2024 16:32:40 +0000 Subject: [PATCH 296/844] fix build --- src/Processors/QueryPlan/ReadFromMergeTree.cpp | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp index 18cd6f2c1dd..37a59576edd 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp +++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp @@ -1561,12 +1561,8 @@ void ReadFromMergeTree::applyFilters(ActionDAGNodes added_filter_nodes) mutations_snapshot, context, query_info, -<<<<<<< HEAD - metadata_for_reading, + storage_snapshot->metadata, log); -======= - storage_snapshot->metadata); ->>>>>>> upstream/master } } From b6f5eb12115ad54ea9cfade80b71fce2052ce6c0 Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Sun, 11 Aug 2024 23:15:19 +0000 Subject: [PATCH 297/844] Fix screwed merge --- src/Core/SettingsChangesHistory.cpp | 261 +----------------- ...allel_replicas_join_algo_and_analyzer_3.sh | 89 +----- ...llel_replicas_joins_and_analyzer.reference | 16 +- 3 files changed, 15 insertions(+), 351 deletions(-) diff --git a/src/Core/SettingsChangesHistory.cpp b/src/Core/SettingsChangesHistory.cpp index e81786a902e..7cfc119f734 100644 --- a/src/Core/SettingsChangesHistory.cpp +++ b/src/Core/SettingsChangesHistory.cpp @@ -57,266 +57,6 @@ String ClickHouseVersion::toString() const /// Note: please check if the key already exists to prevent duplicate entries. static std::initializer_list> settings_changes_history_initializer = { - {"24.7", {{"output_format_parquet_write_page_index", false, true, "Add a possibility to write page index into parquet files."}, - {"parallel_replicas_local_plan", false, false, "Use local plan for local replica in a query with parallel replicas"}, - {"output_format_binary_encode_types_in_binary_format", false, false, "Added new setting to allow to write type names in binary format in RowBinaryWithNamesAndTypes output format"}, - {"input_format_binary_decode_types_in_binary_format", false, false, "Added new setting to allow to read type names in binary format in RowBinaryWithNamesAndTypes input format"}, - {"output_format_native_encode_types_in_binary_format", false, false, "Added new setting to allow to write type names in binary format in Native output format"}, - {"input_format_native_decode_types_in_binary_format", false, false, "Added new setting to allow to read type names in binary format in Native output format"}, - {"read_in_order_use_buffering", false, true, "Use buffering before merging while reading in order of primary key"}, - {"enable_named_columns_in_function_tuple", false, true, "Generate named tuples in function tuple() when all names are unique and can be treated as unquoted identifiers."}, - {"input_format_json_case_insensitive_column_matching", false, false, "Ignore case when matching JSON keys with CH columns."}, - {"optimize_trivial_insert_select", true, false, "The optimization does not make sense in many cases."}, - {"dictionary_validate_primary_key_type", false, false, "Validate primary key type for dictionaries. By default id type for simple layouts will be implicitly converted to UInt64."}, - {"collect_hash_table_stats_during_joins", false, true, "New setting."}, - {"max_size_to_preallocate_for_joins", 0, 100'000'000, "New setting."}, - {"input_format_orc_reader_time_zone_name", "GMT", "GMT", "The time zone name for ORC row reader, the default ORC row reader's time zone is GMT."}, - {"lightweight_mutation_projection_mode", "throw", "throw", "When lightweight delete happens on a table with projection(s), the possible operations include throw the exception as projection exists, or drop all projection related to this table then do lightweight delete."}, - {"database_replicated_allow_heavy_create", true, false, "Long-running DDL queries (CREATE AS SELECT and POPULATE) for Replicated database engine was forbidden"}, - {"query_plan_merge_filters", false, false, "Allow to merge filters in the query plan"}, - {"azure_sdk_max_retries", 10, 10, "Maximum number of retries in azure sdk"}, - {"azure_sdk_retry_initial_backoff_ms", 10, 10, "Minimal backoff between retries in azure sdk"}, - {"azure_sdk_retry_max_backoff_ms", 1000, 1000, "Maximal backoff between retries in azure sdk"}, - {"merge_tree_min_bytes_per_task_for_remote_reading", 4194304, 2097152, "Value is unified with `filesystem_prefetch_min_bytes_for_single_read_task`"}, - {"ignore_on_cluster_for_replicated_named_collections_queries", false, false, "Ignore ON CLUSTER clause for replicated named collections management queries."}, - {"backup_restore_s3_retry_attempts", 1000,1000, "Setting for Aws::Client::RetryStrategy, Aws::Client does retries itself, 0 means no retries. It takes place only for backup/restore."}, - {"postgresql_connection_attempt_timeout", 2, 2, "Allow to control 'connect_timeout' parameter of PostgreSQL connection."}, - {"postgresql_connection_pool_retries", 2, 2, "Allow to control the number of retries in PostgreSQL connection pool."} - }}, - {"24.6", {{"materialize_skip_indexes_on_insert", true, true, "Added new setting to allow to disable materialization of skip indexes on insert"}, - {"materialize_statistics_on_insert", true, true, "Added new setting to allow to disable materialization of statistics on insert"}, - {"input_format_parquet_use_native_reader", false, false, "When reading Parquet files, to use native reader instead of arrow reader."}, - {"hdfs_throw_on_zero_files_match", false, false, "Allow to throw an error when ListObjects request cannot match any files in HDFS engine instead of empty query result"}, - {"azure_throw_on_zero_files_match", false, false, "Allow to throw an error when ListObjects request cannot match any files in AzureBlobStorage engine instead of empty query result"}, - {"s3_validate_request_settings", true, true, "Allow to disable S3 request settings validation"}, - {"allow_experimental_full_text_index", false, false, "Enable experimental full-text index"}, - {"azure_skip_empty_files", false, false, "Allow to skip empty files in azure table engine"}, - {"hdfs_ignore_file_doesnt_exist", false, false, "Allow to return 0 rows when the requested files don't exist instead of throwing an exception in HDFS table engine"}, - {"azure_ignore_file_doesnt_exist", false, false, "Allow to return 0 rows when the requested files don't exist instead of throwing an exception in AzureBlobStorage table engine"}, - {"s3_ignore_file_doesnt_exist", false, false, "Allow to return 0 rows when the requested files don't exist instead of throwing an exception in S3 table engine"}, - {"s3_max_part_number", 10000, 10000, "Maximum part number number for s3 upload part"}, - {"s3_max_single_operation_copy_size", 32 * 1024 * 1024, 32 * 1024 * 1024, "Maximum size for a single copy operation in s3"}, - {"input_format_parquet_max_block_size", 8192, DEFAULT_BLOCK_SIZE, "Increase block size for parquet reader."}, - {"input_format_parquet_prefer_block_bytes", 0, DEFAULT_BLOCK_SIZE * 256, "Average block bytes output by parquet reader."}, - {"enable_blob_storage_log", true, true, "Write information about blob storage operations to system.blob_storage_log table"}, - {"allow_deprecated_snowflake_conversion_functions", true, false, "Disabled deprecated functions snowflakeToDateTime[64] and dateTime[64]ToSnowflake."}, - {"allow_statistic_optimize", false, false, "Old setting which popped up here being renamed."}, - {"allow_experimental_statistic", false, false, "Old setting which popped up here being renamed."}, - {"allow_statistics_optimize", false, false, "The setting was renamed. The previous name is `allow_statistic_optimize`."}, - {"allow_experimental_statistics", false, false, "The setting was renamed. The previous name is `allow_experimental_statistic`."}, - {"enable_vertical_final", false, true, "Enable vertical final by default again after fixing bug"}, - {"parallel_replicas_custom_key_range_lower", 0, 0, "Add settings to control the range filter when using parallel replicas with dynamic shards"}, - {"parallel_replicas_custom_key_range_upper", 0, 0, "Add settings to control the range filter when using parallel replicas with dynamic shards. A value of 0 disables the upper limit"}, - {"output_format_pretty_display_footer_column_names", 0, 1, "Add a setting to display column names in the footer if there are many rows. Threshold value is controlled by output_format_pretty_display_footer_column_names_min_rows."}, - {"output_format_pretty_display_footer_column_names_min_rows", 0, 50, "Add a setting to control the threshold value for setting output_format_pretty_display_footer_column_names_min_rows. Default 50."}, - {"output_format_csv_serialize_tuple_into_separate_columns", true, true, "A new way of how interpret tuples in CSV format was added."}, - {"input_format_csv_deserialize_separate_columns_into_tuple", true, true, "A new way of how interpret tuples in CSV format was added."}, - {"input_format_csv_try_infer_strings_from_quoted_tuples", true, true, "A new way of how interpret tuples in CSV format was added."}, - }}, - {"24.5", {{"allow_deprecated_error_prone_window_functions", true, false, "Allow usage of deprecated error prone window functions (neighbor, runningAccumulate, runningDifferenceStartingWithFirstValue, runningDifference)"}, - {"allow_experimental_join_condition", false, false, "Support join with inequal conditions which involve columns from both left and right table. e.g. t1.y < t2.y."}, - {"input_format_tsv_crlf_end_of_line", false, false, "Enables reading of CRLF line endings with TSV formats"}, - {"output_format_parquet_use_custom_encoder", false, true, "Enable custom Parquet encoder."}, - {"cross_join_min_rows_to_compress", 0, 10000000, "Minimal count of rows to compress block in CROSS JOIN. Zero value means - disable this threshold. This block is compressed when any of the two thresholds (by rows or by bytes) are reached."}, - {"cross_join_min_bytes_to_compress", 0, 1_GiB, "Minimal size of block to compress in CROSS JOIN. Zero value means - disable this threshold. This block is compressed when any of the two thresholds (by rows or by bytes) are reached."}, - {"http_max_chunk_size", 0, 0, "Internal limitation"}, - {"prefer_external_sort_block_bytes", 0, DEFAULT_BLOCK_SIZE * 256, "Prefer maximum block bytes for external sort, reduce the memory usage during merging."}, - {"input_format_force_null_for_omitted_fields", false, false, "Disable type-defaults for omitted fields when needed"}, - {"cast_string_to_dynamic_use_inference", false, false, "Add setting to allow converting String to Dynamic through parsing"}, - {"allow_experimental_dynamic_type", false, false, "Add new experimental Dynamic type"}, - {"azure_max_blocks_in_multipart_upload", 50000, 50000, "Maximum number of blocks in multipart upload for Azure."}, - }}, - {"24.4", {{"input_format_json_throw_on_bad_escape_sequence", true, true, "Allow to save JSON strings with bad escape sequences"}, - {"max_parsing_threads", 0, 0, "Add a separate setting to control number of threads in parallel parsing from files"}, - {"ignore_drop_queries_probability", 0, 0, "Allow to ignore drop queries in server with specified probability for testing purposes"}, - {"lightweight_deletes_sync", 2, 2, "The same as 'mutation_sync', but controls only execution of lightweight deletes"}, - {"query_cache_system_table_handling", "save", "throw", "The query cache no longer caches results of queries against system tables"}, - {"input_format_json_ignore_unnecessary_fields", false, true, "Ignore unnecessary fields and not parse them. Enabling this may not throw exceptions on json strings of invalid format or with duplicated fields"}, - {"input_format_hive_text_allow_variable_number_of_columns", false, true, "Ignore extra columns in Hive Text input (if file has more columns than expected) and treat missing fields in Hive Text input as default values."}, - {"allow_experimental_database_replicated", false, true, "Database engine Replicated is now in Beta stage"}, - {"temporary_data_in_cache_reserve_space_wait_lock_timeout_milliseconds", (10 * 60 * 1000), (10 * 60 * 1000), "Wait time to lock cache for sapce reservation in temporary data in filesystem cache"}, - {"optimize_rewrite_sum_if_to_count_if", false, true, "Only available for the analyzer, where it works correctly"}, - {"azure_allow_parallel_part_upload", "true", "true", "Use multiple threads for azure multipart upload."}, - {"max_recursive_cte_evaluation_depth", DBMS_RECURSIVE_CTE_MAX_EVALUATION_DEPTH, DBMS_RECURSIVE_CTE_MAX_EVALUATION_DEPTH, "Maximum limit on recursive CTE evaluation depth"}, - {"query_plan_convert_outer_join_to_inner_join", false, true, "Allow to convert OUTER JOIN to INNER JOIN if filter after JOIN always filters default values"}, - }}, - {"24.3", {{"s3_connect_timeout_ms", 1000, 1000, "Introduce new dedicated setting for s3 connection timeout"}, - {"allow_experimental_shared_merge_tree", false, true, "The setting is obsolete"}, - {"use_page_cache_for_disks_without_file_cache", false, false, "Added userspace page cache"}, - {"read_from_page_cache_if_exists_otherwise_bypass_cache", false, false, "Added userspace page cache"}, - {"page_cache_inject_eviction", false, false, "Added userspace page cache"}, - {"default_table_engine", "None", "MergeTree", "Set default table engine to MergeTree for better usability"}, - {"input_format_json_use_string_type_for_ambiguous_paths_in_named_tuples_inference_from_objects", false, false, "Allow to use String type for ambiguous paths during named tuple inference from JSON objects"}, - {"traverse_shadow_remote_data_paths", false, false, "Traverse shadow directory when query system.remote_data_paths."}, - {"throw_if_deduplication_in_dependent_materialized_views_enabled_with_async_insert", false, true, "Deduplication in dependent materialized view cannot work together with async inserts."}, - {"parallel_replicas_allow_in_with_subquery", false, true, "If true, subquery for IN will be executed on every follower replica"}, - {"log_processors_profiles", false, true, "Enable by default"}, - {"function_locate_has_mysql_compatible_argument_order", false, true, "Increase compatibility with MySQL's locate function."}, - {"allow_suspicious_primary_key", true, false, "Forbid suspicious PRIMARY KEY/ORDER BY for MergeTree (i.e. SimpleAggregateFunction)"}, - {"filesystem_cache_reserve_space_wait_lock_timeout_milliseconds", 1000, 1000, "Wait time to lock cache for sapce reservation in filesystem cache"}, - {"max_parser_backtracks", 0, 1000000, "Limiting the complexity of parsing"}, - {"analyzer_compatibility_join_using_top_level_identifier", false, false, "Force to resolve identifier in JOIN USING from projection"}, - {"distributed_insert_skip_read_only_replicas", false, false, "If true, INSERT into Distributed will skip read-only replicas"}, - {"keeper_max_retries", 10, 10, "Max retries for general keeper operations"}, - {"keeper_retry_initial_backoff_ms", 100, 100, "Initial backoff timeout for general keeper operations"}, - {"keeper_retry_max_backoff_ms", 5000, 5000, "Max backoff timeout for general keeper operations"}, - {"s3queue_allow_experimental_sharded_mode", false, false, "Enable experimental sharded mode of S3Queue table engine. It is experimental because it will be rewritten"}, - {"allow_experimental_analyzer", false, true, "Enable analyzer and planner by default."}, - {"merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability", 0.0, 0.0, "For testing of `PartsSplitter` - split read ranges into intersecting and non intersecting every time you read from MergeTree with the specified probability."}, - {"allow_get_client_http_header", false, false, "Introduced a new function."}, - {"output_format_pretty_row_numbers", false, true, "It is better for usability."}, - {"output_format_pretty_max_value_width_apply_for_single_value", true, false, "Single values in Pretty formats won't be cut."}, - {"output_format_parquet_string_as_string", false, true, "ClickHouse allows arbitrary binary data in the String data type, which is typically UTF-8. Parquet/ORC/Arrow Strings only support UTF-8. That's why you can choose which Arrow's data type to use for the ClickHouse String data type - String or Binary. While Binary would be more correct and compatible, using String by default will correspond to user expectations in most cases."}, - {"output_format_orc_string_as_string", false, true, "ClickHouse allows arbitrary binary data in the String data type, which is typically UTF-8. Parquet/ORC/Arrow Strings only support UTF-8. That's why you can choose which Arrow's data type to use for the ClickHouse String data type - String or Binary. While Binary would be more correct and compatible, using String by default will correspond to user expectations in most cases."}, - {"output_format_arrow_string_as_string", false, true, "ClickHouse allows arbitrary binary data in the String data type, which is typically UTF-8. Parquet/ORC/Arrow Strings only support UTF-8. That's why you can choose which Arrow's data type to use for the ClickHouse String data type - String or Binary. While Binary would be more correct and compatible, using String by default will correspond to user expectations in most cases."}, - {"output_format_parquet_compression_method", "lz4", "zstd", "Parquet/ORC/Arrow support many compression methods, including lz4 and zstd. ClickHouse supports each and every compression method. Some inferior tools, such as 'duckdb', lack support for the faster `lz4` compression method, that's why we set zstd by default."}, - {"output_format_orc_compression_method", "lz4", "zstd", "Parquet/ORC/Arrow support many compression methods, including lz4 and zstd. ClickHouse supports each and every compression method. Some inferior tools, such as 'duckdb', lack support for the faster `lz4` compression method, that's why we set zstd by default."}, - {"output_format_pretty_highlight_digit_groups", false, true, "If enabled and if output is a terminal, highlight every digit corresponding to the number of thousands, millions, etc. with underline."}, - {"geo_distance_returns_float64_on_float64_arguments", false, true, "Increase the default precision."}, - {"azure_max_inflight_parts_for_one_file", 20, 20, "The maximum number of a concurrent loaded parts in multipart upload request. 0 means unlimited."}, - {"azure_strict_upload_part_size", 0, 0, "The exact size of part to upload during multipart upload to Azure blob storage."}, - {"azure_min_upload_part_size", 16*1024*1024, 16*1024*1024, "The minimum size of part to upload during multipart upload to Azure blob storage."}, - {"azure_max_upload_part_size", 5ull*1024*1024*1024, 5ull*1024*1024*1024, "The maximum size of part to upload during multipart upload to Azure blob storage."}, - {"azure_upload_part_size_multiply_factor", 2, 2, "Multiply azure_min_upload_part_size by this factor each time azure_multiply_parts_count_threshold parts were uploaded from a single write to Azure blob storage."}, - {"azure_upload_part_size_multiply_parts_count_threshold", 500, 500, "Each time this number of parts was uploaded to Azure blob storage, azure_min_upload_part_size is multiplied by azure_upload_part_size_multiply_factor."}, - {"output_format_csv_serialize_tuple_into_separate_columns", true, true, "A new way of how interpret tuples in CSV format was added."}, - {"input_format_csv_deserialize_separate_columns_into_tuple", true, true, "A new way of how interpret tuples in CSV format was added."}, - {"input_format_csv_try_infer_strings_from_quoted_tuples", true, true, "A new way of how interpret tuples in CSV format was added."}, - }}, - {"24.2", {{"allow_suspicious_variant_types", true, false, "Don't allow creating Variant type with suspicious variants by default"}, - {"validate_experimental_and_suspicious_types_inside_nested_types", false, true, "Validate usage of experimental and suspicious types inside nested types"}, - {"output_format_values_escape_quote_with_quote", false, false, "If true escape ' with '', otherwise quoted with \\'"}, - {"output_format_pretty_single_large_number_tip_threshold", 0, 1'000'000, "Print a readable number tip on the right side of the table if the block consists of a single number which exceeds this value (except 0)"}, - {"input_format_try_infer_exponent_floats", true, false, "Don't infer floats in exponential notation by default"}, - {"query_plan_optimize_prewhere", true, true, "Allow to push down filter to PREWHERE expression for supported storages"}, - {"async_insert_max_data_size", 1000000, 10485760, "The previous value appeared to be too small."}, - {"async_insert_poll_timeout_ms", 10, 10, "Timeout in milliseconds for polling data from asynchronous insert queue"}, - {"async_insert_use_adaptive_busy_timeout", false, true, "Use adaptive asynchronous insert timeout"}, - {"async_insert_busy_timeout_min_ms", 50, 50, "The minimum value of the asynchronous insert timeout in milliseconds; it also serves as the initial value, which may be increased later by the adaptive algorithm"}, - {"async_insert_busy_timeout_max_ms", 200, 200, "The minimum value of the asynchronous insert timeout in milliseconds; async_insert_busy_timeout_ms is aliased to async_insert_busy_timeout_max_ms"}, - {"async_insert_busy_timeout_increase_rate", 0.2, 0.2, "The exponential growth rate at which the adaptive asynchronous insert timeout increases"}, - {"async_insert_busy_timeout_decrease_rate", 0.2, 0.2, "The exponential growth rate at which the adaptive asynchronous insert timeout decreases"}, - {"format_template_row_format", "", "", "Template row format string can be set directly in query"}, - {"format_template_resultset_format", "", "", "Template result set format string can be set in query"}, - {"split_parts_ranges_into_intersecting_and_non_intersecting_final", true, true, "Allow to split parts ranges into intersecting and non intersecting during FINAL optimization"}, - {"split_intersecting_parts_ranges_into_layers_final", true, true, "Allow to split intersecting parts ranges into layers during FINAL optimization"}, - {"azure_max_single_part_copy_size", 256*1024*1024, 256*1024*1024, "The maximum size of object to copy using single part copy to Azure blob storage."}, - {"min_external_table_block_size_rows", DEFAULT_INSERT_BLOCK_SIZE, DEFAULT_INSERT_BLOCK_SIZE, "Squash blocks passed to external table to specified size in rows, if blocks are not big enough"}, - {"min_external_table_block_size_bytes", DEFAULT_INSERT_BLOCK_SIZE * 256, DEFAULT_INSERT_BLOCK_SIZE * 256, "Squash blocks passed to external table to specified size in bytes, if blocks are not big enough."}, - {"parallel_replicas_prefer_local_join", true, true, "If true, and JOIN can be executed with parallel replicas algorithm, and all storages of right JOIN part are *MergeTree, local JOIN will be used instead of GLOBAL JOIN."}, - {"optimize_time_filter_with_preimage", true, true, "Optimize Date and DateTime predicates by converting functions into equivalent comparisons without conversions (e.g. toYear(col) = 2023 -> col >= '2023-01-01' AND col <= '2023-12-31')"}, - {"extract_key_value_pairs_max_pairs_per_row", 0, 0, "Max number of pairs that can be produced by the `extractKeyValuePairs` function. Used as a safeguard against consuming too much memory."}, - {"default_view_definer", "CURRENT_USER", "CURRENT_USER", "Allows to set default `DEFINER` option while creating a view"}, - {"default_materialized_view_sql_security", "DEFINER", "DEFINER", "Allows to set a default value for SQL SECURITY option when creating a materialized view"}, - {"default_normal_view_sql_security", "INVOKER", "INVOKER", "Allows to set default `SQL SECURITY` option while creating a normal view"}, - {"mysql_map_string_to_text_in_show_columns", false, true, "Reduce the configuration effort to connect ClickHouse with BI tools."}, - {"mysql_map_fixed_string_to_text_in_show_columns", false, true, "Reduce the configuration effort to connect ClickHouse with BI tools."}, - }}, - {"24.1", {{"print_pretty_type_names", false, true, "Better user experience."}, - {"input_format_json_read_bools_as_strings", false, true, "Allow to read bools as strings in JSON formats by default"}, - {"output_format_arrow_use_signed_indexes_for_dictionary", false, true, "Use signed indexes type for Arrow dictionaries by default as it's recommended"}, - {"allow_experimental_variant_type", false, false, "Add new experimental Variant type"}, - {"use_variant_as_common_type", false, false, "Allow to use Variant in if/multiIf if there is no common type"}, - {"output_format_arrow_use_64_bit_indexes_for_dictionary", false, false, "Allow to use 64 bit indexes type in Arrow dictionaries"}, - {"parallel_replicas_mark_segment_size", 128, 128, "Add new setting to control segment size in new parallel replicas coordinator implementation"}, - {"ignore_materialized_views_with_dropped_target_table", false, false, "Add new setting to allow to ignore materialized views with dropped target table"}, - {"output_format_compression_level", 3, 3, "Allow to change compression level in the query output"}, - {"output_format_compression_zstd_window_log", 0, 0, "Allow to change zstd window log in the query output when zstd compression is used"}, - {"enable_zstd_qat_codec", false, false, "Add new ZSTD_QAT codec"}, - {"enable_vertical_final", false, true, "Use vertical final by default"}, - {"output_format_arrow_use_64_bit_indexes_for_dictionary", false, false, "Allow to use 64 bit indexes type in Arrow dictionaries"}, - {"max_rows_in_set_to_optimize_join", 100000, 0, "Disable join optimization as it prevents from read in order optimization"}, - {"output_format_pretty_color", true, "auto", "Setting is changed to allow also for auto value, disabling ANSI escapes if output is not a tty"}, - {"function_visible_width_behavior", 0, 1, "We changed the default behavior of `visibleWidth` to be more precise"}, - {"max_estimated_execution_time", 0, 0, "Separate max_execution_time and max_estimated_execution_time"}, - {"iceberg_engine_ignore_schema_evolution", false, false, "Allow to ignore schema evolution in Iceberg table engine"}, - {"optimize_injective_functions_in_group_by", false, true, "Replace injective functions by it's arguments in GROUP BY section in analyzer"}, - {"update_insert_deduplication_token_in_dependent_materialized_views", false, false, "Allow to update insert deduplication token with table identifier during insert in dependent materialized views"}, - {"azure_max_unexpected_write_error_retries", 4, 4, "The maximum number of retries in case of unexpected errors during Azure blob storage write"}, - {"split_parts_ranges_into_intersecting_and_non_intersecting_final", false, true, "Allow to split parts ranges into intersecting and non intersecting during FINAL optimization"}, - {"split_intersecting_parts_ranges_into_layers_final", true, true, "Allow to split intersecting parts ranges into layers during FINAL optimization"}}}, - {"23.12", {{"allow_suspicious_ttl_expressions", true, false, "It is a new setting, and in previous versions the behavior was equivalent to allowing."}, - {"input_format_parquet_allow_missing_columns", false, true, "Allow missing columns in Parquet files by default"}, - {"input_format_orc_allow_missing_columns", false, true, "Allow missing columns in ORC files by default"}, - {"input_format_arrow_allow_missing_columns", false, true, "Allow missing columns in Arrow files by default"}}}, - {"23.11", {{"parsedatetime_parse_without_leading_zeros", false, true, "Improved compatibility with MySQL DATE_FORMAT/STR_TO_DATE"}}}, - {"23.9", {{"optimize_group_by_constant_keys", false, true, "Optimize group by constant keys by default"}, - {"input_format_json_try_infer_named_tuples_from_objects", false, true, "Try to infer named Tuples from JSON objects by default"}, - {"input_format_json_read_numbers_as_strings", false, true, "Allow to read numbers as strings in JSON formats by default"}, - {"input_format_json_read_arrays_as_strings", false, true, "Allow to read arrays as strings in JSON formats by default"}, - {"input_format_json_infer_incomplete_types_as_strings", false, true, "Allow to infer incomplete types as Strings in JSON formats by default"}, - {"input_format_json_try_infer_numbers_from_strings", true, false, "Don't infer numbers from strings in JSON formats by default to prevent possible parsing errors"}, - {"http_write_exception_in_output_format", false, true, "Output valid JSON/XML on exception in HTTP streaming."}}}, - {"23.8", {{"rewrite_count_distinct_if_with_count_distinct_implementation", false, true, "Rewrite countDistinctIf with count_distinct_implementation configuration"}}}, - {"23.7", {{"function_sleep_max_microseconds_per_block", 0, 3000000, "In previous versions, the maximum sleep time of 3 seconds was applied only for `sleep`, but not for `sleepEachRow` function. In the new version, we introduce this setting. If you set compatibility with the previous versions, we will disable the limit altogether."}}}, - {"23.6", {{"http_send_timeout", 180, 30, "3 minutes seems crazy long. Note that this is timeout for a single network write call, not for the whole upload operation."}, - {"http_receive_timeout", 180, 30, "See http_send_timeout."}}}, - {"23.5", {{"input_format_parquet_preserve_order", true, false, "Allow Parquet reader to reorder rows for better parallelism."}, - {"parallelize_output_from_storages", false, true, "Allow parallelism when executing queries that read from file/url/s3/etc. This may reorder rows."}, - {"use_with_fill_by_sorting_prefix", false, true, "Columns preceding WITH FILL columns in ORDER BY clause form sorting prefix. Rows with different values in sorting prefix are filled independently"}, - {"output_format_parquet_compliant_nested_types", false, true, "Change an internal field name in output Parquet file schema."}}}, - {"23.4", {{"allow_suspicious_indices", true, false, "If true, index can defined with identical expressions"}, - {"allow_nonconst_timezone_arguments", true, false, "Allow non-const timezone arguments in certain time-related functions like toTimeZone(), fromUnixTimestamp*(), snowflakeToDateTime*()."}, - {"connect_timeout_with_failover_ms", 50, 1000, "Increase default connect timeout because of async connect"}, - {"connect_timeout_with_failover_secure_ms", 100, 1000, "Increase default secure connect timeout because of async connect"}, - {"hedged_connection_timeout_ms", 100, 50, "Start new connection in hedged requests after 50 ms instead of 100 to correspond with previous connect timeout"}, - {"formatdatetime_f_prints_single_zero", true, false, "Improved compatibility with MySQL DATE_FORMAT()/STR_TO_DATE()"}, - {"formatdatetime_parsedatetime_m_is_month_name", false, true, "Improved compatibility with MySQL DATE_FORMAT/STR_TO_DATE"}}}, - {"23.3", {{"output_format_parquet_version", "1.0", "2.latest", "Use latest Parquet format version for output format"}, - {"input_format_json_ignore_unknown_keys_in_named_tuple", false, true, "Improve parsing JSON objects as named tuples"}, - {"input_format_native_allow_types_conversion", false, true, "Allow types conversion in Native input forma"}, - {"output_format_arrow_compression_method", "none", "lz4_frame", "Use lz4 compression in Arrow output format by default"}, - {"output_format_parquet_compression_method", "snappy", "lz4", "Use lz4 compression in Parquet output format by default"}, - {"output_format_orc_compression_method", "none", "lz4_frame", "Use lz4 compression in ORC output format by default"}, - {"async_query_sending_for_remote", false, true, "Create connections and send query async across shards"}}}, - {"23.2", {{"output_format_parquet_fixed_string_as_fixed_byte_array", false, true, "Use Parquet FIXED_LENGTH_BYTE_ARRAY type for FixedString by default"}, - {"output_format_arrow_fixed_string_as_fixed_byte_array", false, true, "Use Arrow FIXED_SIZE_BINARY type for FixedString by default"}, - {"query_plan_remove_redundant_distinct", false, true, "Remove redundant Distinct step in query plan"}, - {"optimize_duplicate_order_by_and_distinct", true, false, "Remove duplicate ORDER BY and DISTINCT if it's possible"}, - {"insert_keeper_max_retries", 0, 20, "Enable reconnections to Keeper on INSERT, improve reliability"}}}, - {"23.1", {{"input_format_json_read_objects_as_strings", 0, 1, "Enable reading nested json objects as strings while object type is experimental"}, - {"input_format_json_defaults_for_missing_elements_in_named_tuple", false, true, "Allow missing elements in JSON objects while reading named tuples by default"}, - {"input_format_csv_detect_header", false, true, "Detect header in CSV format by default"}, - {"input_format_tsv_detect_header", false, true, "Detect header in TSV format by default"}, - {"input_format_custom_detect_header", false, true, "Detect header in CustomSeparated format by default"}, - {"query_plan_remove_redundant_sorting", false, true, "Remove redundant sorting in query plan. For example, sorting steps related to ORDER BY clauses in subqueries"}}}, - {"22.12", {{"max_size_to_preallocate_for_aggregation", 10'000'000, 100'000'000, "This optimizes performance"}, - {"query_plan_aggregation_in_order", 0, 1, "Enable some refactoring around query plan"}, - {"format_binary_max_string_size", 0, 1_GiB, "Prevent allocating large amount of memory"}}}, - {"22.11", {{"use_structure_from_insertion_table_in_table_functions", 0, 2, "Improve using structure from insertion table in table functions"}}}, - {"22.9", {{"force_grouping_standard_compatibility", false, true, "Make GROUPING function output the same as in SQL standard and other DBMS"}}}, - {"22.7", {{"cross_to_inner_join_rewrite", 1, 2, "Force rewrite comma join to inner"}, - {"enable_positional_arguments", false, true, "Enable positional arguments feature by default"}, - {"format_csv_allow_single_quotes", true, false, "Most tools don't treat single quote in CSV specially, don't do it by default too"}}}, - {"22.6", {{"output_format_json_named_tuples_as_objects", false, true, "Allow to serialize named tuples as JSON objects in JSON formats by default"}, - {"input_format_skip_unknown_fields", false, true, "Optimize reading subset of columns for some input formats"}}}, - {"22.5", {{"memory_overcommit_ratio_denominator", 0, 1073741824, "Enable memory overcommit feature by default"}, - {"memory_overcommit_ratio_denominator_for_user", 0, 1073741824, "Enable memory overcommit feature by default"}}}, - {"22.4", {{"allow_settings_after_format_in_insert", true, false, "Do not allow SETTINGS after FORMAT for INSERT queries because ClickHouse interpret SETTINGS as some values, which is misleading"}}}, - {"22.3", {{"cast_ipv4_ipv6_default_on_conversion_error", true, false, "Make functions cast(value, 'IPv4') and cast(value, 'IPv6') behave same as toIPv4 and toIPv6 functions"}}}, - {"21.12", {{"stream_like_engine_allow_direct_select", true, false, "Do not allow direct select for Kafka/RabbitMQ/FileLog by default"}}}, - {"21.9", {{"output_format_decimal_trailing_zeros", true, false, "Do not output trailing zeros in text representation of Decimal types by default for better looking output"}, - {"use_hedged_requests", false, true, "Enable Hedged Requests feature by default"}}}, - {"21.7", {{"legacy_column_name_of_tuple_literal", true, false, "Add this setting only for compatibility reasons. It makes sense to set to 'true', while doing rolling update of cluster from version lower than 21.7 to higher"}}}, - {"21.5", {{"async_socket_for_remote", false, true, "Fix all problems and turn on asynchronous reads from socket for remote queries by default again"}}}, - {"21.3", {{"async_socket_for_remote", true, false, "Turn off asynchronous reads from socket for remote queries because of some problems"}, - {"optimize_normalize_count_variants", false, true, "Rewrite aggregate functions that semantically equals to count() as count() by default"}, - {"normalize_function_names", false, true, "Normalize function names to their canonical names, this was needed for projection query routing"}}}, - {"21.2", {{"enable_global_with_statement", false, true, "Propagate WITH statements to UNION queries and all subqueries by default"}}}, - {"21.1", {{"insert_quorum_parallel", false, true, "Use parallel quorum inserts by default. It is significantly more convenient to use than sequential quorum inserts"}, - {"input_format_null_as_default", false, true, "Allow to insert NULL as default for input formats by default"}, - {"optimize_on_insert", false, true, "Enable data optimization on INSERT by default for better user experience"}, - {"use_compact_format_in_distributed_parts_names", false, true, "Use compact format for async INSERT into Distributed tables by default"}}}, - {"20.10", {{"format_regexp_escaping_rule", "Escaped", "Raw", "Use Raw as default escaping rule for Regexp format to male the behaviour more like to what users expect"}}}, - {"20.7", {{"show_table_uuid_in_table_create_query_if_not_nil", true, false, "Stop showing UID of the table in its CREATE query for Engine=Atomic"}}}, - {"20.5", {{"input_format_with_names_use_header", false, true, "Enable using header with names for formats with WithNames/WithNamesAndTypes suffixes"}, - {"allow_suspicious_codecs", true, false, "Don't allow to specify meaningless compression codecs"}}}, - {"20.4", {{"validate_polygons", false, true, "Throw exception if polygon is invalid in function pointInPolygon by default instead of returning possibly wrong results"}}}, - {"19.18", {{"enable_scalar_subquery_optimization", false, true, "Prevent scalar subqueries from (de)serializing large scalar values and possibly avoid running the same subquery more than once"}}}, - {"19.14", {{"any_join_distinct_right_table_keys", true, false, "Disable ANY RIGHT and ANY FULL JOINs by default to avoid inconsistency"}}}, - {"19.12", {{"input_format_defaults_for_omitted_fields", false, true, "Enable calculation of complex default expressions for omitted fields for some input formats, because it should be the expected behaviour"}}}, - {"19.5", {{"max_partitions_per_insert_block", 0, 100, "Add a limit for the number of partitions in one block"}}}, - {"18.12.17", {{"enable_optimize_predicate_expression", 0, 1, "Optimize predicates to subqueries by default"}}}, {"24.12", { } @@ -345,6 +85,7 @@ static std::initializer_list>>>>>> origin/master +SETTINGS enable_analyzer=1, $PARALLEL_REPLICAS_SETTINGS, parallel_replicas_prefer_local_join=0" $CLICKHOUSE_CLIENT -q " select * from (select key, value from num_1) l inner join (select key, value from num_2 inner join (select number * 7 as key from numbers(1e5)) as nn on num_2.key = nn.key settings parallel_replicas_prefer_local_join=0) r on l.key = r.key order by l.key limit 10 offset 10000 -<<<<<<< HEAD -SETTINGS allow_experimental_analyzer=1, $PARALLEL_REPLICAS_SETTINGS, parallel_replicas_prefer_local_join=0" - -$CLICKHOUSE_CLIENT -q " -select * from (select key, value from num_1) l -inner join (select key, value from num_2 inner join - (select number * 7 as key from numbers(1e5)) as nn on num_2.key = nn.key settings parallel_replicas_prefer_local_join=0) r -on l.key = r.key order by l.key limit 10 offset 10000 -SETTINGS allow_experimental_analyzer=1, send_logs_level='trace', $PARALLEL_REPLICAS_SETTINGS, parallel_replicas_prefer_local_join=0" 2>&1 | -======= -SETTINGS enable_analyzer=1, send_logs_level='trace', -allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, -cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', parallel_replicas_prefer_local_join=0" 2>&1 | ->>>>>>> origin/master +SETTINGS enable_analyzer=1, $PARALLEL_REPLICAS_SETTINGS, send_logs_level='trace', parallel_replicas_prefer_local_join=0" 2>&1 | grep "executeQuery\|.*Coordinator: Coordination done" | grep -o "SELECT.*WithMergeableState)\|.*Coordinator: Coordination done" | sed -re 's/_data_[[:digit:]]+_[[:digit:]]+/_data_/g' @@ -66,26 +49,14 @@ select * from (select key, value from num_1) l inner join (select key, value from num_2 inner join (select number * 7 as key from numbers(1e5)) as nn on num_2.key = nn.key settings parallel_replicas_prefer_local_join=1) r on l.key = r.key order by l.key limit 10 offset 10000 -<<<<<<< HEAD -SETTINGS allow_experimental_analyzer=1, $PARALLEL_REPLICAS_SETTINGS, parallel_replicas_prefer_local_join=0" -======= -SETTINGS enable_analyzer=1, -allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, -cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', parallel_replicas_prefer_local_join=0" ->>>>>>> origin/master +SETTINGS enable_analyzer=1, $PARALLEL_REPLICAS_SETTINGS, parallel_replicas_prefer_local_join=0" $CLICKHOUSE_CLIENT -q " select * from (select key, value from num_1) l inner join (select key, value from num_2 inner join (select number * 7 as key from numbers(1e5)) as nn on num_2.key = nn.key settings parallel_replicas_prefer_local_join=1) r on l.key = r.key order by l.key limit 10 offset 10000 -<<<<<<< HEAD -SETTINGS allow_experimental_analyzer=1, send_logs_level='trace', $PARALLEL_REPLICAS_SETTINGS, parallel_replicas_prefer_local_join=0" 2>&1 | -======= -SETTINGS enable_analyzer=1, send_logs_level='trace', -allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, -cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', parallel_replicas_prefer_local_join=0" 2>&1 | ->>>>>>> origin/master +SETTINGS enable_analyzer=1, $PARALLEL_REPLICAS_SETTINGS, send_logs_level='trace', parallel_replicas_prefer_local_join=0" 2>&1 | grep "executeQuery\|.*Coordinator: Coordination done" | grep -o "SELECT.*WithMergeableState)\|.*Coordinator: Coordination done" | sed -re 's/_data_[[:digit:]]+_[[:digit:]]+/_data_/g' @@ -100,26 +71,14 @@ select * from (select key, value from num_1) l inner join (select key, value from num_2 inner join (select number * 7 as key from numbers(1e5)) as nn on num_2.key = nn.key settings join_algorithm='full_sorting_merge') r on l.key = r.key order by l.key limit 10 offset 10000 -<<<<<<< HEAD -SETTINGS allow_experimental_analyzer=1, $PARALLEL_REPLICAS_SETTINGS, parallel_replicas_prefer_local_join=0, join_algorithm='full_sorting_merge'" -======= -SETTINGS enable_analyzer=1, parallel_replicas_prefer_local_join=0, -allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, -cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', join_algorithm='full_sorting_merge'" ->>>>>>> origin/master +SETTINGS enable_analyzer=1, $PARALLEL_REPLICAS_SETTINGS, parallel_replicas_prefer_local_join=0, join_algorithm='full_sorting_merge'" $CLICKHOUSE_CLIENT -q " select * from (select key, value from num_1) l inner join (select key, value from num_2 inner join (select number * 7 as key from numbers(1e5)) as nn on num_2.key = nn.key settings join_algorithm='full_sorting_merge') r on l.key = r.key order by l.key limit 10 offset 10000 -<<<<<<< HEAD -SETTINGS allow_experimental_analyzer=1, send_logs_level='trace', $PARALLEL_REPLICAS_SETTINGS, parallel_replicas_prefer_local_join=0, join_algorithm='full_sorting_merge'" 2>&1 | -======= -SETTINGS enable_analyzer=1, parallel_replicas_prefer_local_join=0, send_logs_level='trace', -allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, -cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', join_algorithm='full_sorting_merge'" 2>&1 | ->>>>>>> origin/master +SETTINGS enable_analyzer=1, $PARALLEL_REPLICAS_SETTINGS, parallel_replicas_prefer_local_join=0, send_logs_level='trace', join_algorithm='full_sorting_merge'" 2>&1 | grep "executeQuery\|.*Coordinator: Coordination done" | grep -o "SELECT.*WithMergeableState)\|.*Coordinator: Coordination done" | sed -re 's/_data_[[:digit:]]+_[[:digit:]]+/_data_/g' @@ -133,26 +92,14 @@ select * from (select key, value from num_1) l inner join (select key, value from num_2 inner join (select number * 7 as key from numbers(1e5)) as nn on num_2.key = nn.key settings join_algorithm='hash') r on l.key = r.key order by l.key limit 10 offset 10000 -<<<<<<< HEAD -SETTINGS allow_experimental_analyzer=1, $PARALLEL_REPLICAS_SETTINGS, parallel_replicas_prefer_local_join=0, join_algorithm='full_sorting_merge'" -======= -SETTINGS enable_analyzer=1, parallel_replicas_prefer_local_join=0, -allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, -cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', join_algorithm='full_sorting_merge'" ->>>>>>> origin/master +SETTINGS enable_analyzer=1, $PARALLEL_REPLICAS_SETTINGS, parallel_replicas_prefer_local_join=0, join_algorithm='full_sorting_merge'" $CLICKHOUSE_CLIENT -q " select * from (select key, value from num_1) l inner join (select key, value from num_2 inner join (select number * 7 as key from numbers(1e5)) as nn on num_2.key = nn.key settings join_algorithm='hash') r on l.key = r.key order by l.key limit 10 offset 10000 -<<<<<<< HEAD -SETTINGS allow_experimental_analyzer=1, send_logs_level='trace', $PARALLEL_REPLICAS_SETTINGS, parallel_replicas_prefer_local_join=0, join_algorithm='full_sorting_merge'" 2>&1 | -======= -SETTINGS enable_analyzer=1, parallel_replicas_prefer_local_join=0, send_logs_level='trace', -allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, -cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', join_algorithm='full_sorting_merge'" 2>&1 | ->>>>>>> origin/master +SETTINGS enable_analyzer=1, $PARALLEL_REPLICAS_SETTINGS, parallel_replicas_prefer_local_join=0, send_logs_level='trace', join_algorithm='full_sorting_merge'" 2>&1 | grep "executeQuery\|.*Coordinator: Coordination done" | grep -o "SELECT.*WithMergeableState)\|.*Coordinator: Coordination done" | sed -re 's/_data_[[:digit:]]+_[[:digit:]]+/_data_/g' @@ -166,26 +113,14 @@ select * from (select key, value from num_1) l inner join (select key, value from num_2 inner join (select number * 7 as key from numbers(1e5)) as nn on num_2.key = nn.key settings join_algorithm='full_sorting_merge') r on l.key = r.key order by l.key limit 10 offset 10000 -<<<<<<< HEAD -SETTINGS allow_experimental_analyzer=1, $PARALLEL_REPLICAS_SETTINGS, parallel_replicas_prefer_local_join=0, join_algorithm='hash'" -======= -SETTINGS enable_analyzer=1, parallel_replicas_prefer_local_join=0, -allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, -cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', join_algorithm='hash'" ->>>>>>> origin/master +SETTINGS enable_analyzer=1, $PARALLEL_REPLICAS_SETTINGS, parallel_replicas_prefer_local_join=0, join_algorithm='hash'" $CLICKHOUSE_CLIENT -q " select * from (select key, value from num_1) l inner join (select key, value from num_2 inner join (select number * 7 as key from numbers(1e5)) as nn on num_2.key = nn.key settings join_algorithm='full_sorting_merge') r on l.key = r.key order by l.key limit 10 offset 10000 -<<<<<<< HEAD -SETTINGS allow_experimental_analyzer=1, send_logs_level='trace', $PARALLEL_REPLICAS_SETTINGS, parallel_replicas_prefer_local_join=0, join_algorithm='hash'" 2>&1 | -======= -SETTINGS enable_analyzer=1, parallel_replicas_prefer_local_join=0, send_logs_level='trace', -allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, -cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', join_algorithm='hash'" 2>&1 | ->>>>>>> origin/master +SETTINGS enable_analyzer=1, $PARALLEL_REPLICAS_SETTINGS, parallel_replicas_prefer_local_join=0, send_logs_level='trace', join_algorithm='hash'" 2>&1 | grep "executeQuery\|.*Coordinator: Coordination done" | grep -o "SELECT.*WithMergeableState)\|.*Coordinator: Coordination done" | sed -re 's/_data_[[:digit:]]+_[[:digit:]]+/_data_/g' diff --git a/tests/queries/0_stateless/02967_parallel_replicas_joins_and_analyzer.reference b/tests/queries/0_stateless/02967_parallel_replicas_joins_and_analyzer.reference index 453caf1cffe..ba67acd7d0b 100644 --- a/tests/queries/0_stateless/02967_parallel_replicas_joins_and_analyzer.reference +++ b/tests/queries/0_stateless/02967_parallel_replicas_joins_and_analyzer.reference @@ -18,8 +18,7 @@ select x, y, r.y, z, rr.z, a from (select l.x, l.y, r.y, r.z as z from (select x 13 13 0 0 0 0 14 14 14 14 0 0 15 15 0 0 0 0 -<<<<<<< HEAD -explain description=0 select x, y, r.y, z, rr.z, a from (select l.x, l.y, r.y, r.z as z from (select x, y from tab1 where x != 2) l any left join (select y, z from tab2 where y != 4) r on l.y = r.y) ll any left join (select z, a from tab3 where z != 8) rr on ll.z = rr.z SETTINGS allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, prefer_localhost_replica = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', allow_experimental_analyzer=1; +explain description=0 select x, y, r.y, z, rr.z, a from (select l.x, l.y, r.y, r.z as z from (select x, y from tab1 where x != 2) l any left join (select y, z from tab2 where y != 4) r on l.y = r.y) ll any left join (select z, a from tab3 where z != 8) rr on ll.z = rr.z SETTINGS allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, prefer_localhost_replica = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', enable_analyzer=1; Union Expression Join @@ -34,11 +33,6 @@ Union ReadFromMemoryStorage Expression ReadFromRemoteParallelReplicas -======= -explain description=0 select x, y, r.y, z, rr.z, a from (select l.x, l.y, r.y, r.z as z from (select x, y from tab1 where x != 2) l any left join (select y, z from tab2 where y != 4) r on l.y = r.y) ll any left join (select z, a from tab3 where z != 8) rr on ll.z = rr.z SETTINGS allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, prefer_localhost_replica = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', enable_analyzer=1; -Expression - ReadFromRemoteParallelReplicas ->>>>>>> origin/master -- -- The same query with cte; with sub1 as (select x, y from tab1 where x != 2), @@ -477,8 +471,7 @@ select x, y, r.y, z, rr.z, a from (select l.x, l.y, r.y, r.z as z from (select x 13 13 0 0 0 0 14 14 14 14 0 0 15 15 0 0 0 0 -<<<<<<< HEAD -explain description=0 select x, y, r.y, z, rr.z, a from (select l.x, l.y, r.y, r.z as z from (select x, y from tab1 where x != 2) l any left join (select y, z from tab2 where y != 4) r on l.y = r.y) ll any left join (select z, a from tab3 where z != 8) rr on ll.z = rr.z SETTINGS allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, prefer_localhost_replica = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', allow_experimental_analyzer=1; +explain description=0 select x, y, r.y, z, rr.z, a from (select l.x, l.y, r.y, r.z as z from (select x, y from tab1 where x != 2) l any left join (select y, z from tab2 where y != 4) r on l.y = r.y) ll any left join (select z, a from tab3 where z != 8) rr on ll.z = rr.z SETTINGS allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, prefer_localhost_replica = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', enable_analyzer=1; Union Expression Join @@ -495,11 +488,6 @@ Union ReadFromMergeTree Expression ReadFromRemoteParallelReplicas -======= -explain description=0 select x, y, r.y, z, rr.z, a from (select l.x, l.y, r.y, r.z as z from (select x, y from tab1 where x != 2) l any left join (select y, z from tab2 where y != 4) r on l.y = r.y) ll any left join (select z, a from tab3 where z != 8) rr on ll.z = rr.z SETTINGS allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, prefer_localhost_replica = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', enable_analyzer=1; -Expression - ReadFromRemoteParallelReplicas ->>>>>>> origin/master -- -- The same query with cte; with sub1 as (select x, y from tab1 where x != 2), From 2068fbb15759dcc6f6140c84e2e08b596da4f766 Mon Sep 17 00:00:00 2001 From: marco-vb Date: Mon, 12 Aug 2024 10:05:10 +0000 Subject: [PATCH 298/844] Added support for single wildcard usage in x509 SubjectAltName user identification (both DNS and URI). --- .../external-authenticators/ssl-x509.md | 8 +- src/Access/Authentication.cpp | 24 +++++ .../certs/ca-cert.pem | 56 +++++----- .../certs/ca-cert.srl | 2 +- .../certs/ca-key.pem | 100 +++++++++--------- .../certs/client1-cert.pem | 52 ++++----- .../certs/client1-key.pem | 100 +++++++++--------- .../certs/client1-req.pem | 46 ++++---- .../certs/client2-cert.pem | 52 ++++----- .../certs/client2-key.pem | 100 +++++++++--------- .../certs/client2-req.pem | 46 ++++---- .../certs/client3-cert.pem | 52 ++++----- .../certs/client3-key.pem | 100 +++++++++--------- .../certs/client3-req.pem | 46 ++++---- .../certs/client4-cert.pem | 53 +++++----- .../certs/client4-key.pem | 100 +++++++++--------- .../certs/client4-req.pem | 46 ++++---- .../certs/client5-cert.pem | 33 ++++++ .../certs/client5-ext.cnf | 1 + .../certs/client5-key.pem | 52 +++++++++ .../certs/client5-req.pem | 27 +++++ .../certs/client6-cert.pem | 33 ++++++ .../certs/client6-ext.cnf | 1 + .../certs/client6-key.pem | 52 +++++++++ .../certs/client6-req.pem | 27 +++++ .../certs/generate_certs.sh | 4 + .../certs/server-cert.pem | 56 +++++----- .../certs/server-key.pem | 100 +++++++++--------- .../certs/server-req.pem | 46 ++++---- .../certs/wrong-cert.pem | 52 ++++----- .../certs/wrong-key.pem | 100 +++++++++--------- .../configs/ssl_config.xml | 0 .../configs/users_with_ssl_auth.xml | 5 + .../test_ssl_cert_authentication/test.py | 30 ++++++ 34 files changed, 950 insertions(+), 652 deletions(-) create mode 100644 tests/integration/test_ssl_cert_authentication/certs/client5-cert.pem create mode 100644 tests/integration/test_ssl_cert_authentication/certs/client5-ext.cnf create mode 100644 tests/integration/test_ssl_cert_authentication/certs/client5-key.pem create mode 100644 tests/integration/test_ssl_cert_authentication/certs/client5-req.pem create mode 100644 tests/integration/test_ssl_cert_authentication/certs/client6-cert.pem create mode 100644 tests/integration/test_ssl_cert_authentication/certs/client6-ext.cnf create mode 100644 tests/integration/test_ssl_cert_authentication/certs/client6-key.pem create mode 100644 tests/integration/test_ssl_cert_authentication/certs/client6-req.pem mode change 100644 => 100755 tests/integration/test_ssl_cert_authentication/configs/ssl_config.xml mode change 100644 => 100755 tests/integration/test_ssl_cert_authentication/configs/users_with_ssl_auth.xml diff --git a/docs/en/operations/external-authenticators/ssl-x509.md b/docs/en/operations/external-authenticators/ssl-x509.md index 09fac45d7ae..435dd8227fc 100644 --- a/docs/en/operations/external-authenticators/ssl-x509.md +++ b/docs/en/operations/external-authenticators/ssl-x509.md @@ -6,7 +6,7 @@ import SelfManaged from '@site/docs/en/_snippets/_self_managed_only_no_roadmap.m -[SSL 'strict' option](../server-configuration-parameters/settings.md#server_configuration_parameters-openssl) enables mandatory certificate validation for the incoming connections. In this case, only connections with trusted certificates can be established. Connections with untrusted certificates will be rejected. Thus, certificate validation allows to uniquely authenticate an incoming connection. `Common Name` or `subjectAltName extension` field of the certificate is used to identify the connected user. This allows to associate multiple certificates with the same user. Additionally, reissuing and revoking of the certificates does not affect the ClickHouse configuration. +[SSL 'strict' option](../server-configuration-parameters/settings.md#server_configuration_parameters-openssl) enables mandatory certificate validation for the incoming connections. In this case, only connections with trusted certificates can be established. Connections with untrusted certificates will be rejected. Thus, certificate validation allows to uniquely authenticate an incoming connection. `Common Name` or `subjectAltName extension` field of the certificate is used to identify the connected user. `subjectAltName extension` supports the usage of one wildcard '*' in the server configuration: see [test_x509_san_wildcard_support](../../../../tests/integration/test_ssl_cert_authentication/test.py). This allows to associate multiple certificates with the same user. Additionally, reissuing and revoking of the certificates does not affect the ClickHouse configuration. To enable SSL certificate authentication, a list of `Common Name`'s or `Subject Alt Name`'s for each ClickHouse user must be specified in the settings file `users.xml `: @@ -30,6 +30,12 @@ To enable SSL certificate authentication, a list of `Common Name`'s or `Subject + + + + URI:spiffe://foo.com/*/bar + + ``` diff --git a/src/Access/Authentication.cpp b/src/Access/Authentication.cpp index 6b9a6e05cf6..994aec57f0c 100644 --- a/src/Access/Authentication.cpp +++ b/src/Access/Authentication.cpp @@ -1,3 +1,4 @@ +#include #include #include #include @@ -5,11 +6,13 @@ #include #include #include +#include "Common/LoggingFormatStringHelpers.h" #include #include #include #include +#include "base/types.h" #include "config.h" namespace DB @@ -239,15 +242,36 @@ bool Authentication::areCredentialsValid( throw Authentication::Require(auth_data.getKerberosRealm()); case AuthenticationType::SSL_CERTIFICATE: + { for (SSLCertificateSubjects::Type type : {SSLCertificateSubjects::Type::CN, SSLCertificateSubjects::Type::SAN}) { for (const auto & subject : auth_data.getSSLCertificateSubjects().at(type)) { if (ssl_certificate_credentials->getSSLCertificateSubjects().at(type).contains(subject)) return true; + + // Wildcard support (1 only) + if (subject.contains('*')) + { + auto prefix = std::string_view(subject).substr(0, subject.find('*')); + auto suffix = std::string_view(subject).substr(subject.find('*') + 1); + auto slashes = std::count(subject.begin(), subject.end(), '/'); + + for (const auto & certificate_subject : ssl_certificate_credentials->getSSLCertificateSubjects().at(type)) + { + bool matches_wildcard = certificate_subject.starts_with(prefix) && certificate_subject.ends_with(suffix); + + // '*' must not represent a '/' in URI, so check if the number of '/' are equal + bool matches_slashes = slashes == count(certificate_subject.begin(), certificate_subject.end(), '/'); + + if (matches_wildcard && matches_slashes) + return true; + } + } } } return false; + } case AuthenticationType::SSH_KEY: #if USE_SSH diff --git a/tests/integration/test_ssl_cert_authentication/certs/ca-cert.pem b/tests/integration/test_ssl_cert_authentication/certs/ca-cert.pem index d1e4a3a88d9..bec7f132e23 100644 --- a/tests/integration/test_ssl_cert_authentication/certs/ca-cert.pem +++ b/tests/integration/test_ssl_cert_authentication/certs/ca-cert.pem @@ -1,32 +1,32 @@ -----BEGIN CERTIFICATE----- -MIIFhTCCA22gAwIBAgIUZmPYBB6vdp8uxKlJcS8mI0SArqQwDQYJKoZIhvcNAQEL +MIIFhTCCA22gAwIBAgIULizd839OeWFia09eiBiQGk7m/wYwDQYJKoZIhvcNAQEL BQAwUjELMAkGA1UEBhMCUlUxEzARBgNVBAgMClNvbWUtU3RhdGUxITAfBgNVBAoM -GEludGVybmV0IFdpZGdpdHMgUHR5IEx0ZDELMAkGA1UEAwwCY2EwHhcNMjQwNjI2 -MTAyNTAwWhcNMzQwNjI0MTAyNTAwWjBSMQswCQYDVQQGEwJSVTETMBEGA1UECAwK +GEludGVybmV0IFdpZGdpdHMgUHR5IEx0ZDELMAkGA1UEAwwCY2EwHhcNMjQwODEy +MTAzOTI4WhcNMzQwODEwMTAzOTI4WjBSMQswCQYDVQQGEwJSVTETMBEGA1UECAwK U29tZS1TdGF0ZTEhMB8GA1UECgwYSW50ZXJuZXQgV2lkZ2l0cyBQdHkgTHRkMQsw -CQYDVQQDDAJjYTCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBALHzp+SR -T8EtrMBYNlgwIUGMZaXomQPwsjOpjt3RUsdE3LCQ15rLuyBwZ0SbLMDOazCDA7Xr -+AXHDYwg/PCJhe2N4NTzfgnWqbkIGwYLhys95Xxq+q+kV7Csk/27JSk1rsK3Nru/ -Iuj/NWgBkAQC8n10qfbJSXHcXm9wDVy/L8t7DXwxkX70LToGWsb6QW6VkiWe/QOW -QYTMVncqjrtLTCTnqGIk0s6ZIifgPLfaYJxtwK+LdIoJioSbAuVfHmVTe10vjonO -YqJ56KfqOn/G7dIf1l9gOLFBAe4jUf3RS1wkQMdk+oEGLbGnShgW+BQMfB48RkrQ -486h7slzwC29jBJLRARI2Oc9p8/zBDVph0pxkjVGka8dfIkZbmTD932h/1gfMgQl -F20G/H5FF1jk37bDcsczns0c24S1F2uJbzOlHjFLhqmH1IaVCWsYawfBs9khModW -VS6+WAv//cqWE3KmmJ2EdtAmzMCJzAQUEyrMZWmrFrBzpACyMq2zFEtyIXsCXpgq -eW4odxIIZlClibo1FGrflqN+hXnAhRyCj7WJBQ0ZPrEdRMRpBYhYdmygPJ+lWfsg -HOtNnshSuJTXGJTtsuJVr4Ioeq6tKfWGofRu4vvT6cILZjbp9VaxxfVLS7bTsjWR -c+5xHp+KbcjSfw2cJHQN85hlWTpMe9jPhgt/AgMBAAGjUzBRMB0GA1UdDgQWBBSJ -Kj1dSYN0jW+Mu0xFQrobZFmQpTAfBgNVHSMEGDAWgBSJKj1dSYN0jW+Mu0xFQrob -ZFmQpTAPBgNVHRMBAf8EBTADAQH/MA0GCSqGSIb3DQEBCwUAA4ICAQCEm/epjNWe -w0A+1M3i0y8adrXWdTeRXuP3Klgtjh3NGFKNm9WGnOAufsqUIpVwKLw1uYNIhiEA -fj5KuD7YIVU9CrWx4Z/+lILUZhyn/br5mACtRiRTF/75/QwLcDz5z7K9MyMzdL99 -DLQ9bd3JCuEgVj6zacPrALwWVhM8u9fNGJxdQANnIC8yTY5+ZE59/fn7UI787JuR -4njOGWSVnDavbTyJCPMPiUkgwqL+QSWBcNbGAPzMaAblvc1SL2Lj/ikFDAETAZs2 -T/3ZqBqHEOuVhFQYTAvMAdMQX3w8bYv/CGL8++W+qHazY+uqPypd9CLnICbnkZmr -P+id9WleGl2F//u1CQ+YA2Q3EazSFhwRLA7IKIVCrYVaBsbe/bpxxZb6+AQVfM/i -+7+fCbr7A5HDe9Fi4dClv6xPI0GZZkarhQPsoLPaDQeqM4OE+K6oPSHJnqfAB8v3 -NgTt1QuVnKhwveX5DDEP4t/Qt4j2n7AFpeoZDEA8aM33K0noXNrwqHz3991O1RWz -t/gd+cFG/Z1jRP8kYtfAV8go2nzt8QvqBhfIcNnMwD8cwuKJ5G7SdqLvDFj3XCCO -YqQAALl4QFs046eVDEWLajSESmj4fCaTmO05sHHLy7U5asoAo/MWGbmGmL+8ExUX -sPO9r12lPJ7IThJ13PSqbJIJnnloL/XCxA== +CQYDVQQDDAJjYTCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBAME6XxeV +Nl6RnBKhr1yygLIXYIVFmqgcEyAm/5hWhV2bERP6urmIYLWcokaFpBMCXkH5tumA +2B5PQQgVVTTVv51qGNgCHR7Vfmtf3s0BlAn1XI719TelWrtVP7XzLlDjDl5ubb3I +WB0ZhckU57of0kZrUIfrqJKaSyKqYp/Hgjdczuk2uIgv12eX7r47qhfq6B9rmhVp +a/WRB2LbBoyJyjsY6UdRLld7vO2wVSWdmy9wxVhssbkP9zNY3JbE81HUMhyw4F/W +j+ryTesILSYUGvx4DjxvP8zlMvL4N81/V7q8nVkwiu9A3owECqUz5pAHfVk6U4zF +vhU+7TTLP3eyFwJk9o1ctxjT0zKtxLRv+ss8eER/7loLOD+zvq3Y/5XCHGd5WILv +9cDfz44XS0UMRdE/M2RQnFmTDPDKy1IY2TxpQLEv4txtY9h/edRDwcPPaU17gY8Z +boXZR61d90zOIDGITAr8DD/xrsei57ly/W9cbIKfNj0QfH0T6ESP5lW3fS9GRwjY +djoeOZ8uK0d+pzHGUVTkE/9Hf9Zl4NfnwvvFvvF3X/U2VeBkxw+ZgcEiphI+uUjx +aXsn7leNuAemPB50LEnQD5bWKIT1qIeVIQWOHzP5g+tGxCsfg/YzoUb8MvZSe+n2 +FLDqSYKUPy6mZf0U38teh7eIfrIT6yux5IzXAgMBAAGjUzBRMB0GA1UdDgQWBBRG +jKB4cx0JVNfQH8o4feWwhu6NRDAfBgNVHSMEGDAWgBRGjKB4cx0JVNfQH8o4feWw +hu6NRDAPBgNVHRMBAf8EBTADAQH/MA0GCSqGSIb3DQEBCwUAA4ICAQBtD9HsFU2D +28aJozGf2sr2CL1wjlpuOSB5LYu3rcuMrU8R+6xR7rSSUsxuXGB5drnnur9UQ8gt +9Q/sxWUyErHTycfFSQy3mhsFIUy0bGwebSeEPCK3DJo5q3QsncxidcIpTJMgUHe3 +MfyEA2ow+qnlTKROHitiALA+OzEcDdTbU1+JSUM4w0JPfHSdHcwlZzTQu5BhJOjQ +msS3letdX5iqD8xdfIG7EWn/LRupTJjSMsMwspOq9Cfx9kcgI+n9rPJt9iL9eSE7 +gBhUc6Df3JDuX/qImH8lMzE/F7H/7XReCdWXinApW94SZttTbnEWhMhpsMw1psNo +vfw0HSQnu6G6eRdbZK2TCXgzc9BwGcowfQXbv1UvWAhoNBw75UO5nOzCR2T7SakE +R1fNHEQyjFBmq60r7AYitRjcX+nE7F+T/MatpBKFoEz1aDxWfcRx+kwuJZi0sw+B +US4PWVIGN2SFfShp9JpVa8Q3PQmUFyIWCPJlEbBohTTja/nvppr8/OSTg3A3a1s1 +J2DTscxDmQGCWRBbO2LlQE495ylNMfS7BdTDrp/Xv3h+pM+qi4my8+JOC7uX/9dL +X6b7Kz9d8HlxQGug/HvnaNN6k5sQpRJN7LBGPHUj1RG5/kw0WdEDs12SfXfMle6b +q+kwPJhMAGv7CfFXKWtCjt4M5sLCAagdxA== -----END CERTIFICATE----- diff --git a/tests/integration/test_ssl_cert_authentication/certs/ca-cert.srl b/tests/integration/test_ssl_cert_authentication/certs/ca-cert.srl index cf47b0dc79c..71859de222d 100644 --- a/tests/integration/test_ssl_cert_authentication/certs/ca-cert.srl +++ b/tests/integration/test_ssl_cert_authentication/certs/ca-cert.srl @@ -1 +1 @@ -05F10C67567FE30795D77AF2540F6AC8D4CF2470 +05F10C67567FE30795D77AF2540F6AC8D4CF247E diff --git a/tests/integration/test_ssl_cert_authentication/certs/ca-key.pem b/tests/integration/test_ssl_cert_authentication/certs/ca-key.pem index 2dea2ccd837..d4466037e77 100644 --- a/tests/integration/test_ssl_cert_authentication/certs/ca-key.pem +++ b/tests/integration/test_ssl_cert_authentication/certs/ca-key.pem @@ -1,52 +1,52 @@ -----BEGIN PRIVATE KEY----- -MIIJRAIBADANBgkqhkiG9w0BAQEFAASCCS4wggkqAgEAAoICAQCx86fkkU/BLazA -WDZYMCFBjGWl6JkD8LIzqY7d0VLHRNywkNeay7sgcGdEmyzAzmswgwO16/gFxw2M -IPzwiYXtjeDU834J1qm5CBsGC4crPeV8avqvpFewrJP9uyUpNa7Ctza7vyLo/zVo -AZAEAvJ9dKn2yUlx3F5vcA1cvy/Lew18MZF+9C06BlrG+kFulZIlnv0DlkGEzFZ3 -Ko67S0wk56hiJNLOmSIn4Dy32mCcbcCvi3SKCYqEmwLlXx5lU3tdL46JzmKieein -6jp/xu3SH9ZfYDixQQHuI1H90UtcJEDHZPqBBi2xp0oYFvgUDHwePEZK0OPOoe7J -c8AtvYwSS0QESNjnPafP8wQ1aYdKcZI1RpGvHXyJGW5kw/d9of9YHzIEJRdtBvx+ -RRdY5N+2w3LHM57NHNuEtRdriW8zpR4xS4aph9SGlQlrGGsHwbPZITKHVlUuvlgL -//3KlhNyppidhHbQJszAicwEFBMqzGVpqxawc6QAsjKtsxRLciF7Al6YKnluKHcS -CGZQpYm6NRRq35ajfoV5wIUcgo+1iQUNGT6xHUTEaQWIWHZsoDyfpVn7IBzrTZ7I -UriU1xiU7bLiVa+CKHqurSn1hqH0buL70+nCC2Y26fVWscX1S0u207I1kXPucR6f -im3I0n8NnCR0DfOYZVk6THvYz4YLfwIDAQABAoICABZRI14j5yen7cFVjsMyjgkl -bV4INKBs4DxgaF1jMglxlmfCUnfEUxx3XEwbVdp8SK8VzzJSfJFk7EsFnBMifBxV -rbunKchcFn7xCEqSyYnfwlb/J589cg3jJtAsVzW62MbsqT2Uc/FaiD0Z7RDDuduH -9QTRK5fO9jzthY97HqhbL07C/Kc6Qi3DvEC2A9y1f1WegcagYmkgIzvgp3PPtqXu -M4zTZ2inhcQQeCzqgzE7Bm49hAkHt0p4Ej3n1u0IMjF2lF6t9mq/9TCRzHJX5V1z -xrPBYnrAV1ihL1gwlk3g8grPnCbwOmzMORuaTdRd2HcGQh6B4f/5CPRUwuY5nkY7 -UMcX4rbRCcBDzG8s/NllTRaVC6yvEJPN4B/zOek6DI+tRy4tRj+BZO/1771bindD -nsYAklxbbpTiS7B073b+as3RFZW0xvmoqLyhzRQNW6GqSGj4C0KVigkWQ7Br69b7 -O2++oEurmLqI5Hm3RsJeJJ9obG4vKhPUPdtStF/W0W2TO2i0gpTOm+NeO5uYBRB1 -6OvhJH9dzMi+a0ekCpdQeD2xG4NLzwSe62/Ozz9Oi0rpAqHHhu+SvF+WEepLbkyO -2zx/OYpFK47idBRCAlHLC/9UyXpvw2yU9sleElVuHM41CzMe8Pwj3Qk0YdiHHDzS -Y19XEVHh/riXUufsAHVhAoIBAQDpUe+UJLgAIT6boz+MPLrPIGLeazCySEHEsdnp -jGuAx0M59PIDn5OeXQpMqCIRFDw6zhA/4gji0UegFRPIGA3Qduq+GsjVYRt6SHLC -N/rBRi2xg77yyKOMxv/+nwKFh/3TKIQbUc9EQj63WGBGCHu/EyMV7i9V0j8e7D2u -v/Z23nV/+XqawJXi4u2bHB3M/upOoclKfb3ewZBVouajzZc92kNC8XYfPn10Eofu -Pz7jxDX10IJNmzIYOozO9mlBsds7nFIBXS5wMz3iVg3GEUB05pPEKoEtZGrw474u -0M+gW9d7PV3qYdFgjSogiQf4JrSrOwXJQL/26nyfRX9QVplxAoIBAQDDP+fFT7Zl -eFLvouZu73lr++bV1+LogHgX+GHCSIujBELPyFEAyAeElFKFqnJ/NEOuPLG9X7tL -PDhh9NUijcBTPhVvwbH2/CRBdSX7Yf6RHh5fY+2Ik3hTF81L4bQcf0fgyX4roJY9 -YqpjQWFYGmSk4niCqWd+re/ZrYx/zpF+qgN21v37BAZNOCI+KidFScrq29p8kpLj -MdBWa0m7bhJcv4MPO46s2EJZVdczBU7iK86v5NVrGz7fPVm+tGxEDpwhyfYiu961 -U05XzT+trAaBa88KlSKdmUFq3qDdC4bFb6D+Ra4g+mhqNGtfBYXsavnENZgt0N99 -9M/dgaAEa/vvAoIBAQCm4YcFo9nDpgOc2H/Mc2d+XIC661oyOkJoSHk/dcMyodNw -scUkWFACdjO2ro9nPdzyho7S0n5elSew1UKH3MSMtXGjNY8uJ726kfUa+2UsNmDa -VgwOpPlt6KwTV3I7RhCDprgOvk4MWYF4LAr4LHsuKKbwuaM7tByXpotb4UuMrALI -3Q0XgOX0GTGvvsWF6VJ3mXpbAGL839+3kMN8p8EkaWewivpc0Jp0mgiFnNEDokSi -JFf+4CFNeRtbsJ2KcocHNQDmntpnQA9kQv6pC4/ZzU4lge1RJUDkOVC/NXU8ElSm -fjcdPIfAklduW/TKRgz1aEr0Lo7fMcqfNNsiAD7RAoIBAQCaeplZ13OsXMLhrrU6 -2GXtNeSxFJoG8n4SGQbfvJ4eYGSsGQVd5OVt1BxmfTERy7wwwvytpGx/XioN9rQb -HqQoOFqljU7M5zmYQKPIfQP4tSe6uUlaYbM1qwNXIkBqu5mXFFSrF+dGsiW1Wik2 -l8tBWZ2XY4jrBZtbUqBzDnC3ErSi9f8E92408lDFdnyTqYrOvxviq+VjtCnt9fzk -OnZ0w2FiT/DWeFQmcnBNgcmj0J07NYZVs7zOy6+R3xY50oVdhnkjihjuxfaaKV5U -fmK3SyEIcm5s2rCTaYlE2rXKyENMar0WgojSXp8FE02efBUZVH4O4c+xzFwaGVEN -rpIpAoIBAQDnAmhVy85n9oX6Bi+bXg9aZFa/teiCX33lCjNvNeJ5GljGqO0X6423 -6FVg91BYvnCbHQ0frqyKimVxNO/XYxCnid+Or48Cb9LwD31Wyip3UZABljg5sTb0 -fiNK0pxx6G8x1MsX0u97LogGwd5bMuff2lMi4xpinkz6ydB+fYsmM0UXGKsNkB/d -zR1izlqm87TMeQVi+pVZuOLmg/+hXVgISI2M7TlSoytODmpnSg5SZbaT7ut1IIwm -hJdWMTPHLv0X7NwsvhV4Knu27BJBw4470+okrOLfDuZ9+LZ6JHvYg22MywjCJx9s -MJuwAZJiZb+dQc0uwMkAEPMEfOQ1BI1+ +MIIJQwIBADANBgkqhkiG9w0BAQEFAASCCS0wggkpAgEAAoICAQDBOl8XlTZekZwS +oa9csoCyF2CFRZqoHBMgJv+YVoVdmxET+rq5iGC1nKJGhaQTAl5B+bbpgNgeT0EI +FVU01b+dahjYAh0e1X5rX97NAZQJ9VyO9fU3pVq7VT+18y5Q4w5ebm29yFgdGYXJ +FOe6H9JGa1CH66iSmksiqmKfx4I3XM7pNriIL9dnl+6+O6oX6ugfa5oVaWv1kQdi +2waMico7GOlHUS5Xe7ztsFUlnZsvcMVYbLG5D/czWNyWxPNR1DIcsOBf1o/q8k3r +CC0mFBr8eA48bz/M5TLy+DfNf1e6vJ1ZMIrvQN6MBAqlM+aQB31ZOlOMxb4VPu00 +yz93shcCZPaNXLcY09MyrcS0b/rLPHhEf+5aCzg/s76t2P+VwhxneViC7/XA38+O +F0tFDEXRPzNkUJxZkwzwystSGNk8aUCxL+LcbWPYf3nUQ8HDz2lNe4GPGW6F2Uet +XfdMziAxiEwK/Aw/8a7Houe5cv1vXGyCnzY9EHx9E+hEj+ZVt30vRkcI2HY6Hjmf +LitHfqcxxlFU5BP/R3/WZeDX58L7xb7xd1/1NlXgZMcPmYHBIqYSPrlI8Wl7J+5X +jbgHpjwedCxJ0A+W1iiE9aiHlSEFjh8z+YPrRsQrH4P2M6FG/DL2Unvp9hSw6kmC +lD8upmX9FN/LXoe3iH6yE+srseSM1wIDAQABAoICAESmzQdPc9A4srdp66inIRNl +O91N5AtrYh4vzOrefqbJw9ER6Yqmmhmca4lbzPYWBtAw/RxMMzzdkrDhB3NsqBYx +Dl+crKT6XF6g4sN6lpLNMpP2bifVn31kZezq2B9T9JR3bpUZm7jHCyHsxAH8cVSk +pymLgEqJ+Dz6RW0YULsDxd3VG84DFWiQcfnzr9SsmMklDUsjOAC5BONKBzMid4/0 +o1k7zhe19mOKnU3uh6bczbjDcYdbVUxQe5szoXGGz0EjVKyoSlzCtw1Uy9m2Ffpy +Wzh8nDHbkvUBXK4t2skVX2BDX3Fu3kLn4rJcVMQCbTYpA2tGDGl3AIcb2VJZG1QX +lWlrdnWIxsn69Yzqp9f4XE7PByJRcjLVRTROjzh8Zq2ygthAV3t9LOLUb34HehSz +56fENAIihwUZXHJoJVIcN4/1fn64TNxzeXGHc3CzFeeV/NhZb2ERBXT2SO5p3m65 +jX+vuqxsBphR5eHhXATIZnEOUwHGk4FcFu/Zv907ssxF5RqGNG2UZv8MNc6O8dmd +/uRHuCaNg0LpESS5Y6yu3lLfuVjxmZd5k364gf2U9bxedKMED8LLyzLWMAVLJ0wP +P4GuJlAKldoyhOK1R1QkIS84fWn+1YZbtorEvVtauN6fJeZzxjwEGE4TCTN89tjp +7Q6HvXzo+QUvYBzThiM9AoIBAQDtM8AkkLKEl4w/8pz1kkYXID8u9A7u9ltzfqEX +gn+StuhhOb50EkMP5LkguvHS9u4tHGl/pAfaaugdR2/m0658YZqSxFuMRQk9JUYl +fkL2zl5kQh1xC3dRhB7Wv/LGh701hFKnn54qpkzBbeMTQEkLNxkmeCEBVcJTwl83 +TnDnIchvC97CbHO5z7cnkF3glSVSVW4+EO4Qywly4n7A8yU9RFG/d20bnc/OwCah +Qxt2rHzesrJF2T5tQyqIpZNTMdyLQ7qwXNSCiLWjyF8jbah/czqE1E6vLsxi+JCA +MLc/5JEWERFJA+ck25m5EjWy7dkyAR5zBZbkVavgEHy3HR3DAoIBAQDQin5bjETG +HMC/N7hc7eijRs02qFvqHI/6nGszXPt+q4kTOMFnSFFcJEnshx4dlZ+MaPJoN6vD +6Vp6TA0F8UcgTTCxIpGrjfdTG+UwmOU8bZXjfJ0C4TDAaZhOGe2qSZRKM7ilZ8mH ++tZtEIj8qeFfOsWzeWYVdYFem4xLaJzHE8/O2s311gP5VSCeDhv9wCfoajz1h69G +AGYxCkHs7WEYdkuAHr+2/ln2CUgVIsI/Mq3zeNqkH2W6EcvoPaE33y81L+ccT+O2 +Xzm0E7gqyT5KMpS73MyKqMdUWIgdrXVolMRe5Uu8hY6RaqdD3JeEOriZdz2CklqI +bAtqQcaxb39dAoIBAQDNddkGEyv+e8KpFX5tnBEIMQ9pnU9eH5ihVts0oP3d2H86 +0CivI65hEu5kcmnLIca23nhnbm66+4Uo71r3Zv4pkOWyIVFnE9ln1GSJT2+zlpfe +YgJci+EDg9yuiT9CRgtXUtHBjhoTuU6c8ZESeMmMZGJtlvqHzH/xy1hscI6NNg8G +WRLP997Z4Rk1tPAL+0SDp6pBKqr7ctMxZw0CFtp30ji5Dzo8BJse7APKBXXBerEC +uWh/5igTvm0WRc1x76uoGbeY0h/lHYOwZXw7FN4x316d9cReK7wwMjwAPbJNLq+Z +nJkLVM0fePQTgblvzmQm0x9L5FiKWcbx43YTqq2/AoIBAGgyGopRXneCHEguZf6A +nHEgSh6aJjqmBMZZ6zIhrkMPIBErlW6ucoBQXWNB66kqfipgsWWjEz5y1NMwNn2W +nUwlZUPMGu6Vo1KyKGSMsFTuCCs+sxCiRYKOABxd1iL0WpRsnUB/x/EaQpMB5QAh +PUsgiXFwIbJtILONkp6G6XaKjpUzPB8tzJSClSaDtDaoEQiONGEVZ1zZjDXKg0AH +JzFawcSFGJMtKPPGlW6CDH4ZK1LwOoYwbdp3U+N5D5lj+SlhAt9hh0wy7hjslWND +Y80c0rW+z7AkPlwFVBVH+tbDJg6oEJxkOuSrXmGI0WevE1cSffhEedmZ6mYv8T/s +jr0CggEBAI9xrRumnEM2Xc5Ui58xnbI7/29DUx4bZRQgGsplXGzaS7puI7YKBcSI +sq3wW/jhFzDJZVZLa5RUOyLJJbLkSiFshD2gdFmlwbn3mEwDeUyInCOcTpWQjCTJ +4CS5Jq0aUOMwYxgnXKFgQOZoXAF5BfeKrBKwYYVz9BM+vi7ytKyZLjzuE4P0ov1Y +It2xCffK5EvpegjeTfv45UguDNOKn9yEnofv20v20q7MvPsLhPOEHJoLjlH+Vwu7 +ITmkZ+Cww5qLuRuHO2/y0VcRGxqcpk578QlNssO9dLkq0Bs8ohvHdFBna/PfCC2a +JLQFvYyizF5x7A50l4gv6PKcsyHFEyQ= -----END PRIVATE KEY----- diff --git a/tests/integration/test_ssl_cert_authentication/certs/client1-cert.pem b/tests/integration/test_ssl_cert_authentication/certs/client1-cert.pem index 068855d96ea..c8fd22c9e40 100644 --- a/tests/integration/test_ssl_cert_authentication/certs/client1-cert.pem +++ b/tests/integration/test_ssl_cert_authentication/certs/client1-cert.pem @@ -1,30 +1,30 @@ -----BEGIN CERTIFICATE----- -MIIFMDCCAxgCFAXxDGdWf+MHldd68lQPasjUzyRtMA0GCSqGSIb3DQEBCwUAMFIx +MIIFMDCCAxgCFAXxDGdWf+MHldd68lQPasjUzyR5MA0GCSqGSIb3DQEBCwUAMFIx CzAJBgNVBAYTAlJVMRMwEQYDVQQIDApTb21lLVN0YXRlMSEwHwYDVQQKDBhJbnRl -cm5ldCBXaWRnaXRzIFB0eSBMdGQxCzAJBgNVBAMMAmNhMB4XDTI0MDYyNjEwMjUw -NFoXDTM0MDYyNDEwMjUwNFowVzELMAkGA1UEBhMCUlUxEzARBgNVBAgMClNvbWUt +cm5ldCBXaWRnaXRzIFB0eSBMdGQxCzAJBgNVBAMMAmNhMB4XDTI0MDgxMjEwMzk0 +NFoXDTM0MDgxMDEwMzk0NFowVzELMAkGA1UEBhMCUlUxEzARBgNVBAgMClNvbWUt U3RhdGUxITAfBgNVBAoMGEludGVybmV0IFdpZGdpdHMgUHR5IEx0ZDEQMA4GA1UE -AwwHY2xpZW50MTCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBALaJgDgT -PPqKnBNHrxu/jVwMpfPa9ENvaHbtSi88jQrvu2O+nRV/EoOBb/ol3pQn7RveMyj0 -PV6Io3KzmkMQRcX2Z5H0BNPAM0WijXpzfxWSdS9yZ/BKbpyq3QfHPX9FTAcnnn0S -gBgKUDfSxnQt4tn32kNv09q8nk0H1JXzjxILwKAhllHweJlyVBt1a6AG/yNe9K+9 -atyhYgwuQHv7z0OdbF8NfBWhWODLPkZ4Lj4LktVakYSi5XS5sdTtYXwi8mEDkucZ -1CrlZswIzR44nlmkJVoZssMbGphLzxWw/XiDn2ZtO4QNU1cJYU/WbJiMY+SUjIyr -AsLf2GAbU5bUiluG+XZsYKUjvrzVV6gYHWncXPDVWvjz96PDRsc6M19rsrhMT9N1 -tMgydCPqpxJkweC8/IRt7GZhTlxQNL3ioFsjksPmvm112012XYHd+NiuVIZucY6P -c0dFRWi0VKZUjvLVRbtxWMlSawTk0S/C6sbL2r556GwxJTwkm+EIuK6nGDKg7Kmw -yLFlSyqtXkvUOnnAnIOzEH3VdjUyUniUbfFT4ODs6TLzIkFSSJDN7W4klP6p1Ot1 -ZUkB030FYpFt1r39AfWLPWLjwzKvMWenWaTSpZIRO3h8sXbh6gt7zVZKNMrf8AFJ -uyOnfYaQpUwrxvWvuJdWZETS7lFgoRrJxGDvAgMBAAEwDQYJKoZIhvcNAQELBQAD -ggIBAJ8XAILsGKVXyCBMazuq3k2rup8kkNU/hVg1RuYGmy4bNClNN6havzu/S1SO -/g00+tlXGBlSiPlRXq/p/oZgOI/efqIWSBfcmHuR9STdELfa7zugdFpscgaTOhTq -Ko5o1B81pZKw6wzVfn2JlGxnEy9e+lCC7ptMdFiBqc7SGrapkjCjLReszn1Jctk/ -9lsSvhWZ/7GhvRO/L93X3/ZM51K7VZxEwFnibULApObDZQBFipYdfKlrrVrqtdj2 -M7Plx2Hh+Ivt16Kj/DqRcRLcWVlaM8rp4QAtjn4bDYArFEGGi8ElWFRNjs5ztE12 -f0Iu+yqGmvDn0lHEocNf8fgxHIN1uJ2sYImS11Yn7xHp5FPb7efvYh8Ih6voCaTg -NojHi61q26YIU112A1ylStV4xMKgxt2rqRvmc6UTnWDtzNO9jp3NscQVHtUEJpv2 -Jd+JsDf1c/w42KTwTyOAz5j+D0acRmw1YRsv2BpO5tcly8nvdMX9k7JesdiQL9bx -ik863yOLG0AOMdWtZPVG1BAuiUUlbBS2RRUp3qsP4OuJ+eVKlXFieX+2NuzqyddV -CywaA+R05nutX5R34h3Cm2MmQOERAk9FUeHFX7cZMAXQRcmoBZKtUfKOGUKF0zOT -ZEs7xmHSqAOTx8ufDU26pnbyCxOBYwn1DVX9nYSskMGMSfGU +AwwHY2xpZW50MTCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBALygXybg +ZQVWMyk3lYQS3DNZ+1Cs4qEgCHREtEKR2MgcE71ShJeWNzZEtPjjG18pCCpc2g3+ +TDM6fHlSmCx1mH0QsiEXEZsq6zbQVzA5a8ts1tHNOQKBK3BxDKDS38MScYTOo8nz +27zvVIJGSOPiuZKzr/aZ72yUlbhDEUEuf90vruAnZz3CbcD7COQ8E6XdlJfkCBMR +8MGXd4onHyWcfWU89dRvV/bfw4FU66MQkQlHa/z5DTQIH3Tst9Sdj8lp+w6sS8nP +p53yJbAOJ9hq4zUNyUjuNoeO+KFnJ2yETIHVGFPUuXqpMPzSk7AofEwxRHc1xtYa +tYM7KXKKZ2L50PMxRHEMbp/xgaSB+xOW/725uB6EqlUNQZZiuyhsk6c5G8alTc1k +nYhoNIleyfn+tmG3jLPgV+gIZnIorVtoZKh8bio3KAuqusONnWFHyhLhr4Y9tEMR +1lNK4vGCdoqVEPRB+Jm4oY7UQ75Dz1d3ivKktGzA4oSNwxfc3tBccKVV6j7qqPmq +IcqkWBZ4M8/6ulAvUB+9zEQ/HzajWZCW+PjDXHTSnSVybelzFv+ixBAFt+IGFGNW +SJRmu4zWOj8ZBMYJ834Ft7/kqfiTmGQzuZ4kQg+4qM5PQpkDnA+EwjrJ/OYeKbIc +9kB4WY02tKTiziW1hMr0Yk2gdWewmyo29bplAgMBAAEwDQYJKoZIhvcNAQELBQAD +ggIBAK/dfx3lQt/ySYQ3POrAxZETvSk+Z7Fw2G8nJ3CNPtrA2nWJFvOS11iUjR3n +nsF6e7CfApHNE91CE8x+eCm7nukkuJTq8Vt0MszMQXyEIOKuVzTMYcIm7o3MtiHy +lGVkueOfYTV0d14aZyY9SKSVodtP3aJDzTTdcj4r/zVeSqWAD3qwfTxHTWoWEBLG +7/6yV5fWVN0ntzlf3EwADaV4qtazXmn1/a7HokJ0PlWRkB5LqqhU6c1dSAYvQIdG +fmZYZXAQKuhfBaOk4mLM8uNc3WzbdFeOMhVa4JpYdqVjboD5r+dWXyIrZPkHa+oQ +rZAe9GTBrZjZ7dfi7g/kdDCsLiRC4it3+cH2M1wwOcO/j8ZGvHil0bXDD4ZeRNzi +rgj7/U4mtaUn+LPZaAn5Re9fMTXE6VWADMim2aQnYz+OULlLu1O+tAcwGteUpuuJ +IESit3T2DTlC7ohHhDo9aq/LblsIZ1fPsITI83RIovYArdBqz52ULPYkkXf3Gsu7 +bhSU/XoEu7245AkOWr+dqcGcspU7bS7bBErXTI+G2uwNxY6oSBhhZNeSvIdvWPlQ +wIpUUZFd3rqTHQn8b7KHXoMbpZoTXBWrKxMgITSHWsxyK6htbGpqN/nlGf/U1tcF ++Ww7lXzJSA0HGJvgJcSwBSs7vaLBkAZpBW2rzoWGip64zrCL -----END CERTIFICATE----- diff --git a/tests/integration/test_ssl_cert_authentication/certs/client1-key.pem b/tests/integration/test_ssl_cert_authentication/certs/client1-key.pem index 8d9b887b033..b4186a06ba4 100644 --- a/tests/integration/test_ssl_cert_authentication/certs/client1-key.pem +++ b/tests/integration/test_ssl_cert_authentication/certs/client1-key.pem @@ -1,52 +1,52 @@ -----BEGIN PRIVATE KEY----- -MIIJRAIBADANBgkqhkiG9w0BAQEFAASCCS4wggkqAgEAAoICAQC2iYA4Ezz6ipwT -R68bv41cDKXz2vRDb2h27UovPI0K77tjvp0VfxKDgW/6Jd6UJ+0b3jMo9D1eiKNy -s5pDEEXF9meR9ATTwDNFoo16c38VknUvcmfwSm6cqt0Hxz1/RUwHJ559EoAYClA3 -0sZ0LeLZ99pDb9PavJ5NB9SV848SC8CgIZZR8HiZclQbdWugBv8jXvSvvWrcoWIM -LkB7+89DnWxfDXwVoVjgyz5GeC4+C5LVWpGEouV0ubHU7WF8IvJhA5LnGdQq5WbM -CM0eOJ5ZpCVaGbLDGxqYS88VsP14g59mbTuEDVNXCWFP1myYjGPklIyMqwLC39hg -G1OW1Ipbhvl2bGClI7681VeoGB1p3Fzw1Vr48/ejw0bHOjNfa7K4TE/TdbTIMnQj -6qcSZMHgvPyEbexmYU5cUDS94qBbI5LD5r5tddtNdl2B3fjYrlSGbnGOj3NHRUVo -tFSmVI7y1UW7cVjJUmsE5NEvwurGy9q+eehsMSU8JJvhCLiupxgyoOypsMixZUsq -rV5L1Dp5wJyDsxB91XY1MlJ4lG3xU+Dg7Oky8yJBUkiQze1uJJT+qdTrdWVJAdN9 -BWKRbda9/QH1iz1i48MyrzFnp1mk0qWSETt4fLF24eoLe81WSjTK3/ABSbsjp32G -kKVMK8b1r7iXVmRE0u5RYKEaycRg7wIDAQABAoICACgRktW8U1xj5NLOn3+l0q/s -DtmyrH/JCtNgTzKDRiqqaSYCB5VaaYP4e84bVfqLsR627eAFjRsdP1PEXQ5vmgFU -j3OYbx7UR+z3O7svcywXFCYwJOS4UgON9iro73Tqjz/a0I1/7CJa0TUPzYRfNjbG -k2DOQWD4mn8qQt4Pss4xSj1cYhTmhnKYiCHm6pMcNhFbnLafC8AWpOErnfgZVGvx -OIK9AQn2ev4NX0Q0yWHRRJAU63CEGX4/7OtimE2Zlj75e9vC7bHk3WXYYL5Li2b+ -Azz9+yGc53+a1IBcc6dqrSjcvX3FNxAZ/QR7eycZWiwo95lBSL/iRysBlJ29VglW -YScc2Il/xWNp97PORwsJEDpeWq5UYdinARFK6PAGjnxmADZNAeZHP+r1C5CQaw72 -y31aIrhL2s9wRPZ2DytIlkSmvffIoNpZJW2AyVdJn8L37Aot0Hwr0SsU8/zibvZ7 -4d+7/6rnPnE1jTZlpnDgyH5e5Mtn3YUYDlPAEQudfYyvh0QrNfSOMnetWSYTh/Oi -40iQM2vaKDiK91deTR50g90A88eSgxWMGG6WUzqNoE5CwwiNQxHPhrmFi4H1V6y2 -uaF3s0Gx6aF6j+ws1ImbgrkpAbvgTCENoDtmS8MYbZjXgTzwnG4UtIwqdc5bK2B5 -i9mdb5w1v2v6XLUxVvKhAoIBAQDhVgmw/ssCscde91dWLMylm5Wf+Q7Ry32eYSr0 -UXqYUbChHkYNK5HpVY5b6Br7C13FrhboDk0hsz3agOFexcrua1N2huyJ8gGjlAzz -i+286WuwbhsX9rYgqTvAZmQYpklAfWLZH8nlwtt3iafNhgSVaa//A2m4hhZagElT -pctVakSyG3OYaNDTXBDOnZi9xagc3eWmxkS8PWFaYC0DJCw9yf+9ynH6+FRZg75x -t7nnDd/eSxtW9QUALUCheOO+yIp/uJUiIyWR69cfojQ2vNx5t8FzpK6EqHFCujhq -e+kJB81BAc2P59O8oGqw9fvc9pzCQXyFbx7dtl/Xu/JyMEqnAoIBAQDPYH0afED6 -qyvtQ1le6bjLW3jGCaymflgGwc0sm/pm/3XY4WXhrmqeSIF3tbhP2HnANGinW0wP -nFd0vAi8RU9UxB7iIUAZ6wXRS8/YQmv5zIouPzSCpmvW0l0IcnqjDUS0IZPRo+UA -FTzS2KIQ/yOsHSZoVNQe/Tsdk7Z8XVAJlq+YZ7o7pGR25yGPleUUbVwbIhoEiBPq -EFA+4BErok4CFQB9J8jLRdzmTEQFjQ/w4w066ZkplcIy009a4+LXIvL+MCPG3qMD -+2K/HlTYfMd+CyozaF0ZGTECtakrK+PWbbTj+VV30SD9Sckk8ZqIFUq18Fb574gF -K2KSq5SkYSh5AoIBAQDdenJ2HEkvcctzJQsbsVbII58yKFsPi8IBjKHql7c2xXwl -MJtL0JpOzH/rB7yVKXvWk6ECHyRizkkqXeil/STTqHvVkRInF83SmO8N5mgaeRcW -x3Ir4JrsiUoodrtFmxN+pn8kx+DqytZprMxY7rPMo5+PuCwOaQTJmTP5Woj7gELb -CK5ajBNM2z3Nxwrc48yz6soRXOksV+w7JzK21rQBW2zZf4T+V1yYyyvBnALF/lYe -qJXLp3Jt1QykaSz4VSYEGUnDzuXbggHknspRTtopbJpg7ul1jBYeruhKiVXoQVnV -3k7MdeEgkk+rdWtDqMU1Daa1hB3Db8DOS3YmFB8bAoIBAQDPDD476F0UKTzlWf3r -9pzLZNuTlmsrnC+VJ4ALjvwWQ+7MiFapWfQXbrrc47FO/wqoLWtj1JJ/b5Ad+/MY -znajYmCXU61lczLOwcuV1tNph59bBz4NR82ZoVTDr1DkZMX4tyGYCPQF/i5JMYO2 -Rpa+LCiBuFhFTH3uTOHBD4Vu3WUaXE4jaEHqOWBXtMgQehOg/45MgfSoGHuWGy7p -itYp3AAt9T/UPD+OLA0qIaoNzxQRgtOqIlzPVA0B6U89jyZfRX8i+nx16FKyEL2T -nBmtrcYHp6Zz/aPiWa+6a8rB96zIhNOhmko+uaG7YgHw5pk+R+T/C/mZd7SmTetN -p7e5AoIBAQDXqOVl33+eRw3CxNCJZsfglrD/Jz8VuZv5MZwRolEB4IQwm/whXdnT -34y0UUpUQHnVepzAP3QjsLKANPUY2rKO+f8NAX4Uakzn43QLI+hZcxx6hVkV6OkJ -Hi9fwSEBZzx5DWEbxmYMIGlaRL1yVff8wevQci7WA4rrztb9D5B0c49ItCrMkLNs -X6+9Bh4zafL/FxJSkTahQLe+KGNXSGGGrYB9M31oLSKKM955ZTRnICPxuyA2hffx -8lmHZ/5hmP+eMKoAJ9khilX4LmnkdXJEZ2w5lQTPUTNP8ggaXvFWpijjUsaXEdkR -NMnXQHpKE2RaT22UJ6z3W+biQqNlhlVW +MIIJQgIBADANBgkqhkiG9w0BAQEFAASCCSwwggkoAgEAAoICAQC8oF8m4GUFVjMp +N5WEEtwzWftQrOKhIAh0RLRCkdjIHBO9UoSXljc2RLT44xtfKQgqXNoN/kwzOnx5 +UpgsdZh9ELIhFxGbKus20FcwOWvLbNbRzTkCgStwcQyg0t/DEnGEzqPJ89u871SC +Rkjj4rmSs6/2me9slJW4QxFBLn/dL67gJ2c9wm3A+wjkPBOl3ZSX5AgTEfDBl3eK +Jx8lnH1lPPXUb1f238OBVOujEJEJR2v8+Q00CB907LfUnY/JafsOrEvJz6ed8iWw +DifYauM1DclI7jaHjvihZydshEyB1RhT1Ll6qTD80pOwKHxMMUR3NcbWGrWDOyly +imdi+dDzMURxDG6f8YGkgfsTlv+9ubgehKpVDUGWYrsobJOnORvGpU3NZJ2IaDSJ +Xsn5/rZht4yz4FfoCGZyKK1baGSofG4qNygLqrrDjZ1hR8oS4a+GPbRDEdZTSuLx +gnaKlRD0QfiZuKGO1EO+Q89Xd4rypLRswOKEjcMX3N7QXHClVeo+6qj5qiHKpFgW +eDPP+rpQL1AfvcxEPx82o1mQlvj4w1x00p0lcm3pcxb/osQQBbfiBhRjVkiUZruM +1jo/GQTGCfN+Bbe/5Kn4k5hkM7meJEIPuKjOT0KZA5wPhMI6yfzmHimyHPZAeFmN +NrSk4s4ltYTK9GJNoHVnsJsqNvW6ZQIDAQABAoICAB+qwt65fGw9cQBh8wmFyw8Z +862zIKcRRCoU4LhuWNkbkZrOsDDsyJ8lQMaqBg3fSx4lkXwY7b0J/By4J5ELOwsA +Ew+oGk4VEgEVUBXqBfVUc+osDH6lpd/OMuMLFALhZUp5EJlWhdT9zw/B8fnENghx +f0CkzkPJ3Crfo5VwU4oUN8UtCFGfgyKLYo5Cob/OZ+RqJYriD6vInIqGj92WHJ0y +80hzWu642U8srRSmstq+cVw6iaNrG7DbDu+0dg/H6Zyog17BrG/BV99rOPhF1Zgy +FNFYILJ4Z7hdI4q92URQvLFSPMNhhTZkueOwoFGAUfC7iAKo4EFEXOx8XOgKbbAk +gumbWjDMuLcl5NINpnh1yZz5vvDph7rkBjGBpja4FDMyxbCMjsLoTXtM0jCr/59r +hhriPavGZTQ5YhaF8dv/WaoOjXD0PDo8ujS925CoUJteuS/gNHe28chy9rI+dFVR +5XpO7kVQsoAb/54NdeLd0AKD/dQHfStMcxMucRRtZ9muYsXvIsXaSw9HnoG9RAyF ++VuHIUEs76+EAwYrKroUqzPwe6FD/LjmoQCiD6lVudpJQkY507eoCkWMrERXgGlF +y33YFVUmQ8+Uh1Tfq6XWGNGCxU3e1b36s557Lixbc4n45I6Zhso5lBTokch7dPHb +Gj1Tlx8fCSKmV8Gekqc/AoIBAQDlUEBcKtGK7l08jQyTMLfz8XMazbSuguVkIyU2 +aSW/qEl17F8BV2BgRnqCneIZER490RtooOpnAbTZ2AMw9Xx2ZCarCbYv/2ipp+fq +TwzCm96LipLXQCeu/Khwsez+04J3xwg3ziRGCOCwDT3aI1yvuTA4+DN5qa7gJc5I +qTyzQ7FkPcq0JOVhpqqNgDf2bHxteGUQz4Dnh6KVJ5WktVRS/x1fN7l/InUz4pWR +UmRGEmHcY4m5OS0mTDioRq6IoMDktjX7b/hicFC0lKBNGsYKW5FZgMZawseF6B9M +J4UBS1kuCacktumDROhxFvKym5n+qF8Yxe8D7usedg05pKrPAoIBAQDSk/cGkYoL +AYXG0h0Pu5GCw52MpgmQF4FKU82nUnWCFdHJQDKklBplD3e7IfjgjgVz51omPhq5 +jqcqvQJP+PaM1dPAP83Qg8IVJccrhKr8Uhm+yLsM/G6/zR/Jh+fvOXuOimYQptSB +LmMV9R/WS4GJAAWbrEkFRfXnV8U7hnjsQyb92fy3i32FZyB9wbhvA2830rEW/7X+ +bEFnqFliamxj3QfiKMH3pWKmBps1HiejgwScMzrAshD+SYdegNIsBU25M9fduBj0 +P4vHijfpikCW7KI8g/Ua7zomNwvPMAcrqf2K12D50g7sppPLeZVyiNxVupeU0iK9 +mwz7EllTckSLAoIBAQCrYXL4bzf0MXmVS4VebEbPFmEEY0DqPAHc+TdSlvsTZl6g +t2u5wOfSjFn7TKwyN8z2PlXRuJH3MhtLxFl+ObovcWJE6hkzfhtm0IpPX40V1RBa +EdL09Im0CjiJITESslkL08CclIAfp3rlmL/lTBB1dF5J9ZXjE6IS+65UN8eZ4rmt +RVE9mWWlub41yuIIVcQYvKYsDik04PXtq4v3cV34OZpmE2ubTNFzWaCW35D5nbtt +zyabO9Q2jzi46zp9+kavqE5oAKMOTwrCK2IyvhPMoerbjZaxNmmP9o+Nhuusr/eS +pLw6gEm9Rb7C5ne0bP6GiKeqCCQ+OE2YknoH3r2ZAoIBABMZSHZYDOUipjd6Jxie +MnxhsZBoHsXq29cZ8rUSTYYW5IgUQoOayyoPVRSr6B61wtjYStzdFR9vSMofim4m +bnYnjqUJJGmdCaVRX9camUd5Ssx6+DwdSQ4sVI26vdZ8qDAtbURlXTC6PFJpgRG2 +l3ENh3SvE5Be4AazdzgG4kDLdmXft4QOKrxGtjOxtkCbVyMBAP7NAnyo5OW55yF9 +9Ya5LVXdzHnhjijwwHL44S97WpewLcv8Wn41vQldNbK4aGw7v4Bkih7dBJh8eMW9 +vE3z+GL0tO5UdDmzFGcePAhByrUlA66k1qWwFgeGZ3+T2igMXGMrC+uWfH7pEyD2 +ZDECggEAT0o1NvWbsnms65w1oF8/2RFsvlTVeb0tpS9D3ijzCoeRv72fThiZ/diF +WtQPEW7WdYPWLG5UMOggake/GS4zF4+TBDNqZDcdVgu0esk738CUvLHzBl7n1Los +7CIOH0dT2Qook3XoOe2lLUD1arfqrpj/ygUW0MoHo9ihSPOact1jusNNyOlMAcma +OzSY6jJnRDE7vJr9h4+1fDJYd4PbSq83KNHJyVayaSk6RBxijPEjmkBZ6bWNdDt7 +/kihyxT3+ZBGvuFcvnBGFgRtUV6ZGsz/74MGDvKnmIyvt+PuvZueMJIqMKytUva5 +CG/KQT7wu3i0IjmX91IiDXTzH8oa4w== -----END PRIVATE KEY----- diff --git a/tests/integration/test_ssl_cert_authentication/certs/client1-req.pem b/tests/integration/test_ssl_cert_authentication/certs/client1-req.pem index d5cd522bc8f..37288c0a23f 100644 --- a/tests/integration/test_ssl_cert_authentication/certs/client1-req.pem +++ b/tests/integration/test_ssl_cert_authentication/certs/client1-req.pem @@ -1,27 +1,27 @@ -----BEGIN CERTIFICATE REQUEST----- MIIEnDCCAoQCAQAwVzELMAkGA1UEBhMCUlUxEzARBgNVBAgMClNvbWUtU3RhdGUx ITAfBgNVBAoMGEludGVybmV0IFdpZGdpdHMgUHR5IEx0ZDEQMA4GA1UEAwwHY2xp -ZW50MTCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBALaJgDgTPPqKnBNH -rxu/jVwMpfPa9ENvaHbtSi88jQrvu2O+nRV/EoOBb/ol3pQn7RveMyj0PV6Io3Kz -mkMQRcX2Z5H0BNPAM0WijXpzfxWSdS9yZ/BKbpyq3QfHPX9FTAcnnn0SgBgKUDfS -xnQt4tn32kNv09q8nk0H1JXzjxILwKAhllHweJlyVBt1a6AG/yNe9K+9atyhYgwu -QHv7z0OdbF8NfBWhWODLPkZ4Lj4LktVakYSi5XS5sdTtYXwi8mEDkucZ1CrlZswI -zR44nlmkJVoZssMbGphLzxWw/XiDn2ZtO4QNU1cJYU/WbJiMY+SUjIyrAsLf2GAb -U5bUiluG+XZsYKUjvrzVV6gYHWncXPDVWvjz96PDRsc6M19rsrhMT9N1tMgydCPq -pxJkweC8/IRt7GZhTlxQNL3ioFsjksPmvm112012XYHd+NiuVIZucY6Pc0dFRWi0 -VKZUjvLVRbtxWMlSawTk0S/C6sbL2r556GwxJTwkm+EIuK6nGDKg7KmwyLFlSyqt -XkvUOnnAnIOzEH3VdjUyUniUbfFT4ODs6TLzIkFSSJDN7W4klP6p1Ot1ZUkB030F -YpFt1r39AfWLPWLjwzKvMWenWaTSpZIRO3h8sXbh6gt7zVZKNMrf8AFJuyOnfYaQ -pUwrxvWvuJdWZETS7lFgoRrJxGDvAgMBAAGgADANBgkqhkiG9w0BAQsFAAOCAgEA -L2KVSFlGtuYjmV6sTYF0GlA4V0RvUTbLM7qnd321957ReDR8iRqj9ZnqKDEHeH9g -jfuW+TV/BeQFjTTAXM5Gy+LRz23xOwqcvxKh0WYjGLhDXIjWx1zzzGBaAcyWMllq -+o6LAUadST3yhfncP0A79hPLxOsPgMGd3OXGajKskDNmU3MTTsbtQ065HsBwo07P -leMx2jDkapesUaNaTmXERg6MT86hsknGsbO/tU3dGNy4hBuOX5O6bEkijF6eESLd -U4Xc54yScVvxpWqTSEAz9xHjIOpOZNfW+enbLdpApxq6IZkeVM8z7yy8DNjTJ2SD -aS/xKexqrWjWFxNa/CtKezkaZgmLs9jGGan+hmlNBeuixvJEekPliv6Syj3wvLp/ -L3/PmLgBzZj6iRdw5fky0swCn1qwpgwYRjBSN+SL0E8yG6BGKFWByQfwWbdOu9DS -lN/CPBe73yi8kYY5gBvBmPsrt3VMVRbXBLNM16jO6lkyYzyC48jTdicqpLsHazZn -Z4I6GZoUQKc9WPzSdu6tEXjM6e/2lkT8kaPmrae3JOKnP+lzjZjfplV1NylICNQY -whPWBVGaRg0dy8dZSTGtzygTNMoHS3zYsBGE4MuGZtm/4+x/XLkz32n1k58wAKxJ -JKafNaOReYFxJKd+ML5XnYOVICuw3nxQY+CeVZlz1Bc= +ZW50MTCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBALygXybgZQVWMyk3 +lYQS3DNZ+1Cs4qEgCHREtEKR2MgcE71ShJeWNzZEtPjjG18pCCpc2g3+TDM6fHlS +mCx1mH0QsiEXEZsq6zbQVzA5a8ts1tHNOQKBK3BxDKDS38MScYTOo8nz27zvVIJG +SOPiuZKzr/aZ72yUlbhDEUEuf90vruAnZz3CbcD7COQ8E6XdlJfkCBMR8MGXd4on +HyWcfWU89dRvV/bfw4FU66MQkQlHa/z5DTQIH3Tst9Sdj8lp+w6sS8nPp53yJbAO +J9hq4zUNyUjuNoeO+KFnJ2yETIHVGFPUuXqpMPzSk7AofEwxRHc1xtYatYM7KXKK +Z2L50PMxRHEMbp/xgaSB+xOW/725uB6EqlUNQZZiuyhsk6c5G8alTc1knYhoNIle +yfn+tmG3jLPgV+gIZnIorVtoZKh8bio3KAuqusONnWFHyhLhr4Y9tEMR1lNK4vGC +doqVEPRB+Jm4oY7UQ75Dz1d3ivKktGzA4oSNwxfc3tBccKVV6j7qqPmqIcqkWBZ4 +M8/6ulAvUB+9zEQ/HzajWZCW+PjDXHTSnSVybelzFv+ixBAFt+IGFGNWSJRmu4zW +Oj8ZBMYJ834Ft7/kqfiTmGQzuZ4kQg+4qM5PQpkDnA+EwjrJ/OYeKbIc9kB4WY02 +tKTiziW1hMr0Yk2gdWewmyo29bplAgMBAAGgADANBgkqhkiG9w0BAQsFAAOCAgEA +BQWFyVKS6BQF+5jNHtyV1mozn39/+YVfHKN+QkNIF6EBBLE69swO4spVCyT0vkdg +5Dok6qomBf1bvqySyfvRNNNbFaUE3ySbUS0Egwtn9NonPZRS1oUNUM7O5f/hmRAL +wJI2nbsvc5XxeWPRpm7VKw9z3Rq9oFeZy6iMbQyRlYKDbUMcR+4x4o0p5GuFqXDD +NWJMAC1iilYiid3vE0Odjsy77yzLnTdzCuelxxOWNVQccF/Dv5vACpRkX7A6GHJS +Cc/hwuBzJxuz9K+oq4bsGDtuFOV4SyIbfW7OfYtVwfDNPjkPQmZbt4aSmpxtD8s4 +00qfYxCq7ZPU/yCnX3Qrz+VEj5vzVpzeeAKaHAeBFtJXzZdI1o+fUJVFmJ4pvNiP +Q4kJbkhTn+NuVQ2GycsGcg2rutcbNYVypP5wrujbWJll1DDB2O6AbCMgZvYR/uIP +SHB1kXvrY0rEGfbWoEQ66mhR+hrLQtcgkEwEJGQ0US1TcDi7T6imeSZQxvIqOuUW +Z4w72gaet20sSczKPKHIokJsEPgM+5dVCsWwZ92bJdADEVcf9jDMtUYRJw0wL8cm +3df2dY2mlioDm/Rj990jbdM4K453vJODZmuSfCX0yFh3Kxa6oAbe8GmuB5hiV9B6 +WxZvadyxvop6y1wUnHYv/q1ZfgufsJ3GaejPEm897bg= -----END CERTIFICATE REQUEST----- diff --git a/tests/integration/test_ssl_cert_authentication/certs/client2-cert.pem b/tests/integration/test_ssl_cert_authentication/certs/client2-cert.pem index 9d317f07963..5eea9eedccd 100644 --- a/tests/integration/test_ssl_cert_authentication/certs/client2-cert.pem +++ b/tests/integration/test_ssl_cert_authentication/certs/client2-cert.pem @@ -1,30 +1,30 @@ -----BEGIN CERTIFICATE----- -MIIFMDCCAxgCFAXxDGdWf+MHldd68lQPasjUzyRuMA0GCSqGSIb3DQEBCwUAMFIx +MIIFMDCCAxgCFAXxDGdWf+MHldd68lQPasjUzyR6MA0GCSqGSIb3DQEBCwUAMFIx CzAJBgNVBAYTAlJVMRMwEQYDVQQIDApTb21lLVN0YXRlMSEwHwYDVQQKDBhJbnRl -cm5ldCBXaWRnaXRzIFB0eSBMdGQxCzAJBgNVBAMMAmNhMB4XDTI0MDYyNjEwMjUw -NFoXDTM0MDYyNDEwMjUwNFowVzELMAkGA1UEBhMCUlUxEzARBgNVBAgMClNvbWUt +cm5ldCBXaWRnaXRzIFB0eSBMdGQxCzAJBgNVBAMMAmNhMB4XDTI0MDgxMjEwMzk0 +NFoXDTM0MDgxMDEwMzk0NFowVzELMAkGA1UEBhMCUlUxEzARBgNVBAgMClNvbWUt U3RhdGUxITAfBgNVBAoMGEludGVybmV0IFdpZGdpdHMgUHR5IEx0ZDEQMA4GA1UE -AwwHY2xpZW50MjCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBANUgBLgZ -EjiW3LmjGlCu17h2ERYc75YX+7esbw/iMWMrcrXfvNNutJ4H7hOsV81ZpMEQouHR -fog8InraKCuwmb28DLhJZJlpE49hZxeUJws4yN8VCCTgr/tLtTsvsSS+TGA58B29 -HHaglJwY0w2mOlOVcUxOkjne4VHokLOomhzzqcqTLCjwUslZqRn+SDgXyRw9P5re -J/m6E36dFyBeVglZvealVp4uK/TTqVVFJYBljD22M9wrOeo9AIvrru5VlgDNHu+r -wqgSeE/bGcwhX2J9J++lwOxsFDR7OILY33yoD7/6NN61SJEoRh7xZTQkr/Dc3Hpv -jYyj4YagdWKq2xDZzIxH+QfEMQssuFvulq5L76TJpXB3ceyCu2NEvb/563p1EvXQ -bp5Zjz7Ojo1elobs9epvfbCmiANmTxG8GLKteLXfjcph492gdIw0nsV9/bIzE5C7 -lnff4nEU9E/uEJz0FTw61VjcZPtpqEzLE/8abBU48pTj5HqKo8Nsjx9hPl6trO4h -81yMaqwbQDmza1KsU+CPIFiycyv8Hn4w6JEjwnUc08rOoQ3e7HjqLNpn8X6RirVQ -UrwSU7L8RTKeOCOBLg6AMXfH/frPRnNQjUG/Z7tBTjTJhm38qucxyHI3J5jwX6vn -/jBfdFHfT6510V0Q9fzgzp3H3fyHpnLW1qxDAgMBAAEwDQYJKoZIhvcNAQELBQAD -ggIBAF9fs1tF/yL+eBTf7F/RMdDrb1q9/YZCZ6btJH6CnxCuj4M3o4EkTW2PPSY5 -AeTX0zYaNXvlHT54vjdO+9H3ocyY0HfjVSzttw7qiTkvsssRLLW0PMpc8QMRBpz4 -CmD8vfjY63hKzE2cF5GyP1RveCuFVf7//wM2dfPwrQkIOtKrctejYjn1tOAfgJtX -It+RWvJ8T9t4e3KxYgKSa6eyYxyNMZV67X91C3jIJLgTTLwXXGQF5G8hH3KsclSl -RDE3CAYoyDTtaMlI6A3qDtmvfFKzeltKZc8w7uIbjgHvF49p+n4oh1WwDc/C8SUy -1QAx6DSSW1f470Egtfp0hJKT9yJh7C+/EdeAq8Oh1vMxYKBrtjswCsrFQ+bayEcl -2SzMLez2S/bIFSF0WaDqqIOZDzcjpXjbFlm/px01qoPDk5lkTPGA18Zq8mVc0y2N -R3vYzvfpigjkjXgMcOIfP1Jnlrx1x/4+txR723hUkHQd38nKENepsoEoLrcpmbIl -VAKYTALTle6jJKGf6oZf1TIs09Bc1Qs8Oo4IymubOXD+FlUSmggVwMiST15O5vQu -zdvidRHhAE581DKK04GLmWn0UE0Ko4uaNHAgl2gzZsuJQ5oZynOxmh/z6t+mgA7L -l2qS1WOq29Cq2qWrrfvqbl21LWLrf2X75UyTd3GAlQ19aqLV +AwwHY2xpZW50MjCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBALU3+Z07 +LANG4lgyVe9pjWQEHyAZn4iadESZqpkcVKFpeHXmEmCvDkt4f+xs/tJle2YmzmVI +OgTrhOeY+3Et9ydfkNM8N1LUiem6i0LQq4yhg8/xM9jeKbOzol1CNM9TbRzZXHKy +PX1Ykics3iRGE3wqwK9quQqYIvBnUT367N7w9kvsgvMr+E825BaQ2hmmefi61vXb +CRO7dyRPIt4z9ERMeAHY1Y4ahM1OwWcGNfhqPnMoscWrWarwIE78SoiUMXd3mVUE +27m/opmLbvk59qh8o9bu5sje+4iKW7P5LVEXWhVnbUptht786f6zTFzWEeG8XUF6 +TFQIAtO5P+QxVmKhsliEt/vCJdTWVt4TKT7c6T+S6ivFl9Fn3/KWkscKLITsILLC ++DKxBracohoROMfzYSGgDmE2d6jnQZ8F4U2SxtGelk1BVYTOLP2y3Rg/xlPB7FgH +iq9OJitIuUIN8yAtjEedQN47OpUt8fMMZHukcslTjXCDgzFpZVh/J+oIULBoRBah +NWsroT45rrDhum/n1W8jc8R7VfLVXhb/YK84unxxGduElbNwW3T0mtDMqHBdvwj2 +pfNmGj/kgtSSuD9kREnIuywyiQCLMM8N3UacMwPZadjFxeugBuKCXX0U4j/llnas +w31ljfEL9EMSuk1a1PpLyPM3VM9guOzX6FKXAgMBAAEwDQYJKoZIhvcNAQELBQAD +ggIBAC369EfL0SqOqGSleQfoQS88UDazzyysbTsNeqKqieSkK7Uf09dtXflg33mi +W+dEvEdcMbhouQT1nIi/nfeFktlhqtrFav5lUDQljVHbNteBQPMWgvMSGbuYlDoo +pxEmvCcof7C3IAzj9btEtMdo99V6Ecp6PfDAhDJWX9VQqvmKiwbpdVhl0Z32o9lV +XoQZvIYGMi15Ny/j6/8m9xeTql7fLq3o7xTg1fOVSJweT8MNYu8zYeohwDOIcLx1 +6mJLdx0DS7ok/7swMECdyd4zwCTgn7wDworS3MyPty5nVOIinlr51NXOKjbpNBTI +41z2NPfX793Qx43Er6zRBKhrRmF10DwiMgtrxJdF0khFjVuMczHZuOL3gVn573X/ +FTVHW/A5SyDmilMZRn+VYbnad1JAy/RqIb7e2EkkPzNMFMbUT8f++HQF6Uo5XALi +EsBiabf5KZ+uF9N9XN/7fOR+mPltc466etUqFCP+MjHRiLyisDYwcAay0V1Q350h +GEWWQj53akIVi3cEV73dW9co391xQ33EGRkqY4+kY6Uh7fKN+roRoyu62e8kKV60 +p3FjCtjDoOFSHknGITSofERfc7TMwBmhFBU1anooMZs2+zs352TAzk/qcnKfmSzV +PQoZSDuUzt+aaj+w4XisguJEq2Hdbt+41a8Sb4XA8+hzXUo8 -----END CERTIFICATE----- diff --git a/tests/integration/test_ssl_cert_authentication/certs/client2-key.pem b/tests/integration/test_ssl_cert_authentication/certs/client2-key.pem index ed0d179712c..26cdf77769f 100644 --- a/tests/integration/test_ssl_cert_authentication/certs/client2-key.pem +++ b/tests/integration/test_ssl_cert_authentication/certs/client2-key.pem @@ -1,52 +1,52 @@ -----BEGIN PRIVATE KEY----- -MIIJRAIBADANBgkqhkiG9w0BAQEFAASCCS4wggkqAgEAAoICAQDVIAS4GRI4lty5 -oxpQrte4dhEWHO+WF/u3rG8P4jFjK3K137zTbrSeB+4TrFfNWaTBEKLh0X6IPCJ6 -2igrsJm9vAy4SWSZaROPYWcXlCcLOMjfFQgk4K/7S7U7L7EkvkxgOfAdvRx2oJSc -GNMNpjpTlXFMTpI53uFR6JCzqJoc86nKkywo8FLJWakZ/kg4F8kcPT+a3if5uhN+ -nRcgXlYJWb3mpVaeLiv006lVRSWAZYw9tjPcKznqPQCL667uVZYAzR7vq8KoEnhP -2xnMIV9ifSfvpcDsbBQ0eziC2N98qA+/+jTetUiRKEYe8WU0JK/w3Nx6b42Mo+GG -oHViqtsQ2cyMR/kHxDELLLhb7pauS++kyaVwd3HsgrtjRL2/+et6dRL10G6eWY8+ -zo6NXpaG7PXqb32wpogDZk8RvBiyrXi1343KYePdoHSMNJ7Fff2yMxOQu5Z33+Jx -FPRP7hCc9BU8OtVY3GT7aahMyxP/GmwVOPKU4+R6iqPDbI8fYT5erazuIfNcjGqs -G0A5s2tSrFPgjyBYsnMr/B5+MOiRI8J1HNPKzqEN3ux46izaZ/F+kYq1UFK8ElOy -/EUynjgjgS4OgDF3x/36z0ZzUI1Bv2e7QU40yYZt/KrnMchyNyeY8F+r5/4wX3RR -30+uddFdEPX84M6dx938h6Zy1tasQwIDAQABAoICAQDIuNYY+OvTRipt37IaCQF8 -Zh4jgG8ZIk9dJlaXVAYFi1cG+chiLSKIr5lHCArNiT8E4gE1wtNzxYcHw00QEMxL -CL/GFMFdRrw4TpkEePDovbtZdvprmP3FJAF00673lw5hlk+SApi7FPPBrBOiCEto -ixfgsSNAw6vcM7eMrR8wY0AnXMK7b9PYdMwxge5MfgJXyUuNNOvbY6eWmKa+Qnqv -ZcjXYCKa6YtWkr4pY+005u7U9DQViNSLypYoMXlYWFzlNkqLmW3EU1jihMzgFxI5 -tPwW1TpEsGm7H84SVeTuB26F9UUz9vJ4W8DmxZz2JhNaOvifi056BaKS466KlbWo -iZgt57ajj0VmYxB0ZL7QgQqb2xDZL12kU1AU09QAXJnpy/RqvV2HloKbqrOd5h4L -oME6j8vT6Q8o1vsh2zJuLXHAsMr30XK8x1HhLDDzln49gq/d3GrZkNrPDjcumiwI -o6PYR91Q4QI11kdqR/3005wV50g847uURFNF6J4ziDeDGsHqj2phmOIt6d8vWcFo -XBEovCZkXQUSx+4NgAAy1GRBjK6tLRRQnS9bGgkELS8+Jx84NlgKkH3m6+lKNJQ1 -o5SpUqmk1dYnpTv99U2+5qvA/o9SVy56wlfuo+u0GlbMjs3OmItXErg46UBPhd4d -HipFZrBItpw0DYAF+voLQQKCAQEA9ePjYyy53VGLq2+vRx02IZOMQOLaaBDfybtP -51ksHfCPg+XZ4jWsDH4EPn5DeUmjZ2nG8GYSuv8dAQ4v9kArToLDyy/qhXHzaING -uSd6KlTGrVrPK1Dyu2p69xYrnduD6Vm06sJ4olDq792rEj4/hdzVwrtgw+d1ZLXG -3ropWgrHQT8z7+B9CAIAOXhYlKrV7+UdbAod+j8OpCIHk5X3+NkT4Ht7biqzQvbo -pJJILFA1qHi230N9YR8ng3PHQYObYJ6NFBrxhpXIfXwbuPyrEApY3zaL3HbkYC52 -aAI3zy7WOqZSqRZ6aDzXdf2EMGusNSxj9/TAZhTAiJvwHdwBowKCAQEA3eNC/iMt -kmy4R3FQgti0Zq+CBUErMn46pQhBCcQreI/a5U4UT/iY5WGutKXp45d/BM2ztyQL -T/8p+85RkasVF/rJB2PwlzUZKAAq29nGXuV0I6N6EiMYa2LfFLzrrleNamPQ9Ubn -atp0kiyLiPZ6P0+Y5wZMirHlMyup+fzG6xsS7KVy0Z5Fy4YetP63r6xCVQ+Rdu3l -dvXqGb2Bdc9g4OxES1Zj7MKHg0b3ce2lYaL0cq0z3kJ52MAVbL9tQQOstJX41VYv -/QSVIjC5VACSa2qsqzquqxYOyT1U0l/8innHfD/uY/8907/q/JqoO1hU5LtvZ7OO -ZF/e/ycZCM2U4QKCAQAXUIJQ9v6wk3jQyoguEAD/8gOMa3YWA/OUJySOZRAfzp1s -/jBImJo1nQU9/67aIzdRKOBqDuObw3C2lufJS5BPo2p5K5PrD0DrGfdsuueEeAFW -kpOuIcDCMHh0US/ViejaCV10HPhfO5jrIXOFCU3wnV3PVwD30kx5PhsbJz+ggAEg -mKOODRUN21K2IEkV35TlaC3//n2VKsFyop9hSQj4GW0fDdZIPdg9czff0tbxDLHp -xXhhdv6+ZLvUZPfxqE7lPGNYEq3v+ufFrizav2pg3PpMP9nHD6bbz8v+VKeCB4jc -isSvr6fvlkU/tMgB51OuvwTDj/tmMnWG/nIoAqJNAoIBAQDWiLYsS8zzJwUhplDm -winiosz+0Zy3jE6dZBamH7K8NbK6RLzk+YKzPbgSV9yFPeQEu/KIH2SEqzxnh3tc -cWLKtaKK77keKaux/j9yI+RlukqJbrVHNgGVSppQTb096s8DT5Eopa54pNFSx5j+ -Cvn1nrtCm9eDvi7SQ+RrnVii1qF8hxc1z2bCOmIUM7dcNhxIa+4EZE2ZsHjw/EZg -puqPbkE16khhEVC+v+3djJ17gngBLK/atMFkrYvJgmhbFPd1/w8BDf0GENk0npGB -w6/OBez+/ZUGPCR9tDv/z+i35rjWzGVs78tSodvM8qe4AVbLdOJpDLWfHQbaAm51 -EXhhAoIBAQDmZVXAS4dTDAp/cGwPoXOyFxu+UNnGnAKO0S3aJW9qV6E5N7jsLqzI -4eD2Mk6chkBrO4Upmwgx4sLVnDMlHQGvoqpWUZES9k6oIgNZ6N7KXnvFm5GI7mlR -ySA2LftCeSb4BzQmwyX5wcVjOzfB6bkSgEkvuMFrRStSL6+79XZCoh54jBm+fW6g -up6oXa0+lJbyO4Qrx+oWoe2G9nrUJzsjV1Gj1njnxDECCMrmB+X5P4D02Ac2FgxP -rN+bxs0TpvO4fXsvBN4B+/dtF2Hjgo3rQm5FQ/NmpoO5lAs1VZPjVUiFCjhm3Fyk -Xe2nzT23gDTuPny4yivLAMHPZPfGLLg4 +MIIJQgIBADANBgkqhkiG9w0BAQEFAASCCSwwggkoAgEAAoICAQC1N/mdOywDRuJY +MlXvaY1kBB8gGZ+ImnREmaqZHFShaXh15hJgrw5LeH/sbP7SZXtmJs5lSDoE64Tn +mPtxLfcnX5DTPDdS1InpuotC0KuMoYPP8TPY3imzs6JdQjTPU20c2Vxysj19WJIn +LN4kRhN8KsCvarkKmCLwZ1E9+uze8PZL7ILzK/hPNuQWkNoZpnn4utb12wkTu3ck +TyLeM/RETHgB2NWOGoTNTsFnBjX4aj5zKLHFq1mq8CBO/EqIlDF3d5lVBNu5v6KZ +i275OfaofKPW7ubI3vuIiluz+S1RF1oVZ21KbYbe/On+s0xc1hHhvF1BekxUCALT +uT/kMVZiobJYhLf7wiXU1lbeEyk+3Ok/kuorxZfRZ9/ylpLHCiyE7CCywvgysQa2 +nKIaETjH82EhoA5hNneo50GfBeFNksbRnpZNQVWEziz9st0YP8ZTwexYB4qvTiYr +SLlCDfMgLYxHnUDeOzqVLfHzDGR7pHLJU41wg4MxaWVYfyfqCFCwaEQWoTVrK6E+ +Oa6w4bpv59VvI3PEe1Xy1V4W/2CvOLp8cRnbhJWzcFt09JrQzKhwXb8I9qXzZho/ +5ILUkrg/ZERJyLssMokAizDPDd1GnDMD2WnYxcXroAbigl19FOI/5ZZ2rMN9ZY3x +C/RDErpNWtT6S8jzN1TPYLjs1+hSlwIDAQABAoICACQBPBH2OLdp9Pyq+5H2ucXe +X8TD8oN0lJcwpMmI7HOpfeqGzK3y+fcauOfje/mXeh4Apc9Pu19Q3/YX2hVRmaJ0 +BVVCyIcnfUM38gVTalIloQfGl5OGCGkQzriSt+VdYPdzZD/RlA/8cgVgj75LQgHQ +iLoRjTRCp+Z10Jls6nUdEQJiul3QJaSvdj/ZhcvE3MUDckQkwbfu7iwDoRze27Ba +NRnA2CVEtZAJDroGbOCvUzsUIcXxVn50+SZYUAK01uMymv9eWL2eCpNRNbpEvvIY +2YCRtL7CNt2ZB8FJR6yt5BMOmpNIEZzqSR92xjxHuAPOpgSlZNbpRI6GZuBSwSO/ +04O/xGGCw7H3iOZdUurUd9+liVpNGjCk8vcgRDcacc7ibJTuXPnbNqcrpTU+Ohhw +PDgwOyxAhcpFREvofwHAVG0OlDngEb5AM6E/ocYB/nOPYvkgmVb39gOzoDHhEreG +KWzbs9w+gbt+UZHMnvjZe2LMOpovBZMFwReyuYjljeq8yt8MAi4uaB82I2XDGMhA +W3NcdF0BSNeVYQkxCF5Qrleogu84i+ldxwlPBr3Y2ve9kE59dditRZYY0ghlRi4b +p5ytq00cSog572tKiKOWTwR6HTDqKeS2xiqPtIcuRvI3F5ArautsyPJdZ/oF28UD +tY0iX6gg4Bxylkj3nmfxAoIBAQDljokxPggXH/4q1yAwVC7UHN7PsVjNUoU/oEzU +Kt6QiUog0TbFgY2km0xD+qSueK94IQrywdvBjpWU4GL2musPjGRx4g1BzIgBmLk1 +dQgCvaZNsZHL3K/c9CENb0U7iW5QSnf3l/Ua9sXHSFVXVxFaYZBJYB4JPjonZQZ4 +uAQUN7wgypdYFNzjmOEJ5lXesFyU5uGwr+Ek+3VDnQBqXD9OAVM2dS0hvPW71IG0 +kjoBdTvCKXBqVUQDcn0uv/K4wzP4mfd0RlFXKavfSrfXeijOyqccV7Niui7xHsOF +1ePN36Aaf35xy2PbD+7+oVBgeFLDQkewYFgkZAilTxPsFRE/AoIBAQDKF/uJgEHk +/2oyuhOaBhLHfekUrPZPfrENa24YiDP/fQXS+AwBM2a9kh6WneLT1Mj8wteyAqWq +qVwVmREvIG9iNvVnCAjJGkDESOZZmq1Ai0qVlUMB411z98raYltTLYCJ0Zab/gq3 +/F0c/kWk1kITF30w12rJEtmpiAxF6ptwMtJe1sonx0YEbwn1W/L/c0/B+wDRebgr +cABx/YN1+2LwoNCLEByX/Itra9s0PHnIX178RiWE0WFet9DlnMumCsMSr+sWOzHh +7SbabDDZyZCuE5cWpp83Yi/XcvoWDBzyqfwZTcIqCia4KAeM0OKT8fuLyQXZ7uqw +UYoGt/esFBCpAoIBAH8m1Y0mC/srKHD4WgcLCilJmI04cbTBKbeVhhjbag5rTVki +KZF68kckEjlxQvi1NpR20QaoJJ5w6R/erlJkeyFQpW6DRM6WiRtBSg13Nqm1/MD6 +Q4TFR9A+lXRIlvOkayskJJFLZWO7HtAEm+jV/HGwquhJyjlac4HQrqX9X50HRjhv +nycV30heLQB7ykKgM5fyzz48HZyLgGekNk+zqwj9KOd4Pjh63y5cRkAL4v9dvW0N +4Qu1EQhLyVU00zBOR6JDPlL0hCegmGgFjhFXw/TPQYMsfNuJv2ilnq2qTRWC8b7b +seE8RKJZkAsQtWHKJCWtt+HVa900X6mZRLK29KcCggEAS4jBNDhi6KNqXwFbeDMS +68ssxdSLz1SL+ncP1E18Gd7xwVOCsSQ0H2Cw3Byw/fnHEWoF0nvYzbu1NkFaweF+ +oBfoixTeq8OLN5IDvrPoU3p8awI55EfF0yVnhU5D0gfsxhY3E6DVAsVpS//hqeWH +FrmtygaJ3BOWlR3LcIpqhHoKVPY04r9BdUDHWRR/82h3BAq72I+E/X9EO1bEvuEx +rrx8la108n2q4oCSkqiT4Ad9ag8R0N2tZgiQJHIBT9Mv7BkiZdHEKCxKXFMZwxzq +XZXm38lxMvMY7I8F2B246B4OoqqEis/2ftg/Uit0tFU6bpm4/SamQzaGv1IdWQy6 +2QKCAQEA1Nr2xcoHH1XoSMI9gB3TNWXSEUvT4Rs1jFLabPNjJ2s4y3kEm+Ii1/Uv +KAyL12k7nb5uCpkWO//Seu3LcyUeiOIzUqlqcP1RtpqWsOz+5tJ9okN90x526FuH +vNsv7uzCQC3qXZ48u3P6bEZONc8QMx8a32pHNvvvPjjUHai07EoyMOH1WQc71m10 +3DTqHwTN1drOY/wkomzzCYojQ45KN84noLUJdLxUb8IBBwe3RipCJ/no25zBUdhV +tWMOVjg7f/7iOjygM8EAbR2MsgdmKiLjVUGCML1vtE78PC03OlFCKx6wOL9/pM9A +VXHl/tHRg27YjKQT1VZIyhcSLcxS2A== -----END PRIVATE KEY----- diff --git a/tests/integration/test_ssl_cert_authentication/certs/client2-req.pem b/tests/integration/test_ssl_cert_authentication/certs/client2-req.pem index f36eb94205b..225a6eb1212 100644 --- a/tests/integration/test_ssl_cert_authentication/certs/client2-req.pem +++ b/tests/integration/test_ssl_cert_authentication/certs/client2-req.pem @@ -1,27 +1,27 @@ -----BEGIN CERTIFICATE REQUEST----- MIIEnDCCAoQCAQAwVzELMAkGA1UEBhMCUlUxEzARBgNVBAgMClNvbWUtU3RhdGUx ITAfBgNVBAoMGEludGVybmV0IFdpZGdpdHMgUHR5IEx0ZDEQMA4GA1UEAwwHY2xp -ZW50MjCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBANUgBLgZEjiW3Lmj -GlCu17h2ERYc75YX+7esbw/iMWMrcrXfvNNutJ4H7hOsV81ZpMEQouHRfog8Inra -KCuwmb28DLhJZJlpE49hZxeUJws4yN8VCCTgr/tLtTsvsSS+TGA58B29HHaglJwY -0w2mOlOVcUxOkjne4VHokLOomhzzqcqTLCjwUslZqRn+SDgXyRw9P5reJ/m6E36d -FyBeVglZvealVp4uK/TTqVVFJYBljD22M9wrOeo9AIvrru5VlgDNHu+rwqgSeE/b -GcwhX2J9J++lwOxsFDR7OILY33yoD7/6NN61SJEoRh7xZTQkr/Dc3HpvjYyj4Yag -dWKq2xDZzIxH+QfEMQssuFvulq5L76TJpXB3ceyCu2NEvb/563p1EvXQbp5Zjz7O -jo1elobs9epvfbCmiANmTxG8GLKteLXfjcph492gdIw0nsV9/bIzE5C7lnff4nEU -9E/uEJz0FTw61VjcZPtpqEzLE/8abBU48pTj5HqKo8Nsjx9hPl6trO4h81yMaqwb -QDmza1KsU+CPIFiycyv8Hn4w6JEjwnUc08rOoQ3e7HjqLNpn8X6RirVQUrwSU7L8 -RTKeOCOBLg6AMXfH/frPRnNQjUG/Z7tBTjTJhm38qucxyHI3J5jwX6vn/jBfdFHf -T6510V0Q9fzgzp3H3fyHpnLW1qxDAgMBAAGgADANBgkqhkiG9w0BAQsFAAOCAgEA -dr0LKtpOa+Xu9PKwnlsM48/ltph4q9+tsu4CeC8XGoLFNbVIALuZZsKZDehTf+/d -bgEtjW8vVnBGAvVodo1MgCHnhPPensDLfyggAULT2X400cly+suGbKeu3kIOlKCs -TQsFdNKOPm17NcpuM1wTik2UT2EWLdzZ25Wy3Coid+ILrf5YZ75djqtxZlYbRiw4 -4IndIjN0bYsn8l6Z8Pt5HdJ1nQnbDZhQrx6FXWZ3eSSmpklfl4O07z0KlXi1Nmaf -OiVcOMvZUnM8pYmNvul8Jus/XmP8x3jSbYzJDNOJ3YV8+OD8DVG3pLM8U1FmjCZ7 -KiR5DNSxZFpHGXhUqDpTrhLgoqGK9chOqPdzU7Mp4taEO9FV8Goc7BCeOKB3Znxb -XDIszs0oBIHO/tsqUwEcWBI0vjyC2pBYQAYK++qwwmvbfWg5lrb7eH1ZO42DU9QD -AVR/5luxImAA11AmSsGf8i+FJ3F63PzSr0uUG7BnTLC03xna7dPdKXS/pGojNVBT -Q5A5J0rB3+4L2mZLE3mjst3t1xHfLW/0RVRqGwz0QUIloZkO6wPN6Jz6l5Q+TgCY -uEks1YN/qlwjHwI3ycT+Hr/sY5igT0OAySo7qa7lN13qTiO2z7eAMDgafNnq34kJ -4OQDCE28Bni0fFRIaqVCqTU31Kei5jbORif2wK81Zmw= +ZW50MjCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBALU3+Z07LANG4lgy +Ve9pjWQEHyAZn4iadESZqpkcVKFpeHXmEmCvDkt4f+xs/tJle2YmzmVIOgTrhOeY ++3Et9ydfkNM8N1LUiem6i0LQq4yhg8/xM9jeKbOzol1CNM9TbRzZXHKyPX1Ykics +3iRGE3wqwK9quQqYIvBnUT367N7w9kvsgvMr+E825BaQ2hmmefi61vXbCRO7dyRP +It4z9ERMeAHY1Y4ahM1OwWcGNfhqPnMoscWrWarwIE78SoiUMXd3mVUE27m/opmL +bvk59qh8o9bu5sje+4iKW7P5LVEXWhVnbUptht786f6zTFzWEeG8XUF6TFQIAtO5 +P+QxVmKhsliEt/vCJdTWVt4TKT7c6T+S6ivFl9Fn3/KWkscKLITsILLC+DKxBrac +ohoROMfzYSGgDmE2d6jnQZ8F4U2SxtGelk1BVYTOLP2y3Rg/xlPB7FgHiq9OJitI +uUIN8yAtjEedQN47OpUt8fMMZHukcslTjXCDgzFpZVh/J+oIULBoRBahNWsroT45 +rrDhum/n1W8jc8R7VfLVXhb/YK84unxxGduElbNwW3T0mtDMqHBdvwj2pfNmGj/k +gtSSuD9kREnIuywyiQCLMM8N3UacMwPZadjFxeugBuKCXX0U4j/llnasw31ljfEL +9EMSuk1a1PpLyPM3VM9guOzX6FKXAgMBAAGgADANBgkqhkiG9w0BAQsFAAOCAgEA +b1qAUlTncymAKrwibT26XK8txk2ldN290bqpMAAGIYxQqn6ogjKAt6s2Tz3LOnZr +rKbLBVB6RJgevu8OEJdk+6HlqzCRLFwk10w4+aU+FIkLPMUsqyD0I4cfXoW2kSkP +LOMfjXjuxFE5wjiMgzKf9oIdeKcFaiMH7ThkxbWt/HXnI1VSbLH1Ff/vciA7tShu +Y8drVmps7+yGeNW2fzHkx//vIRLR6cufEZpAnvn0HyHiDHBmjCEPwihu7g+H2s57 +m2B17Pd4/kcuYEvv4pExjlslewBjqAfe5peN5/Lo2rful9oGB7OPwPCO3Ndb1kY9 +VQjjYkw8Ie3bQa/v6MQSfpBjuKDEkpG47mNQXy3n+pfl8Bo0E+FQQx0pE3cWrifZ +Rk+PmNqlaERuJvNpDmXhq22IWuUs5BwtttqKPJTox6e6wggIMXCukXxWE9ovSzu6 +VlNM9QbyfrACvLuP+4YPboXFiBHK4afZbpqWl1uT48XJxg6/caJvazKSx9KmV6PN +zaMFCtyGYhY/G14NWDfG3lW4VBRhwXHxHA3p8Qci3ldVgxCsakMtpnwCe/BLo1vP +aCdMrnUEwjs31wldiRFtOA5dP9TaWXtm57nsOSfL9tbE702wfT4oO8JehvbD1iLu +ozapJfOOc4nC0vUqVXQ+TZb7MWGdsV/t8Lly4qi8Ydg= -----END CERTIFICATE REQUEST----- diff --git a/tests/integration/test_ssl_cert_authentication/certs/client3-cert.pem b/tests/integration/test_ssl_cert_authentication/certs/client3-cert.pem index 376c85ab8f7..b674ad4bb5b 100644 --- a/tests/integration/test_ssl_cert_authentication/certs/client3-cert.pem +++ b/tests/integration/test_ssl_cert_authentication/certs/client3-cert.pem @@ -1,30 +1,30 @@ -----BEGIN CERTIFICATE----- -MIIFMDCCAxgCFAXxDGdWf+MHldd68lQPasjUzyRvMA0GCSqGSIb3DQEBCwUAMFIx +MIIFMDCCAxgCFAXxDGdWf+MHldd68lQPasjUzyR7MA0GCSqGSIb3DQEBCwUAMFIx CzAJBgNVBAYTAlJVMRMwEQYDVQQIDApTb21lLVN0YXRlMSEwHwYDVQQKDBhJbnRl -cm5ldCBXaWRnaXRzIFB0eSBMdGQxCzAJBgNVBAMMAmNhMB4XDTI0MDYyNjEwMjUw -NFoXDTM0MDYyNDEwMjUwNFowVzELMAkGA1UEBhMCUlUxEzARBgNVBAgMClNvbWUt +cm5ldCBXaWRnaXRzIFB0eSBMdGQxCzAJBgNVBAMMAmNhMB4XDTI0MDgxMjEwMzk0 +NFoXDTM0MDgxMDEwMzk0NFowVzELMAkGA1UEBhMCUlUxEzARBgNVBAgMClNvbWUt U3RhdGUxITAfBgNVBAoMGEludGVybmV0IFdpZGdpdHMgUHR5IEx0ZDEQMA4GA1UE -AwwHY2xpZW50MzCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBAPrdk2YZ -HRhd4RPfa89z2Ay/TOby545N5n9R+kpQvyhnCbr41afzIU5DrBX7y8cKcvo7y9Dk -Cdd17Xqn4oYliSvVNI8B2nwkBz87BUYT8pNVD+QUc3Jf2P4Wj6XQM4pd9Ntaw7rO -yIf2Oo3Tq58SSjbXYrqAbCYuX4cs/VWLWyq9PapBwxEPeQ756GZS3Xvq1IfCNIKv -NLYRgExctHzkUWPf6WAS0lKydBcCobYvSMfEzPkbBlIhuDRdMmzuji4FefODY4lI -zvnaD7+IIiRC4+IY9xNhdH6s0UobpSIqLCSxOJNYwqhUQt6gNAO1mdJhUirV/XIl -xG5nCGbQS77yeoBLIBEL1t7tpo3/AdEzkR+/wS11dSpcllSj+7BJaKBhTKVZrX1i -gMqUSbiTF1Et9PnAkS1jtUy1w3Ja7FyPWfM8nt/K6vfNRudg/xwY0iY1RFdCXuMw -kPZSr4W+QryGaPqm0RlpCpLHZBOxBDpf0ZcA37ullh5hjXsn5CRw/0ZYpanqrkrq -2cVZLnY98IrJI1QfQhHlDqUP7prR4Omk8C7edXBqQqE/0mqL7AEhppOyLedLFC7W -wUBepmc1bNH+Ho11CZeSdTZfIgwAcD3v6MiMA5kMTRcW6HAHNS309zNJeDf3Eesz -TBXOSCqNBBbk+oW8bxkTLRdHRgdlLT7N6qzLAgMBAAEwDQYJKoZIhvcNAQELBQAD -ggIBAADJZ+I3CJs6E9U2RjIzi1lMo2sYgdcKJS5+yWW8CNjB+DibKfkWjgWvq2K0 -i3hT0Uc6y+ter4OOeIkGtofOiUPekaZsQkPpi73sabwhDVnlki9QL9Ayrd1qDX82 -fMM5roL7w/a+YdKzTQE9hiwPoQhrpj/2mhu7LeYhidSqwzH1anU5YtTKHq3ZrdGN -imhnklcmbqfcNQU0K2l2bu5vuJXFs/v5FCp72ux2p6QDPWwMbwvr413wibt8o7ZT -bBGsQ1MtfJynRVwLGLosn+2t3NPJTfjd4dMEsZhkDY0EX4vbE1/X+K09EN7jPOHe -aJ2AOt3cO3A2EHCR3Dbmt055C6Lb/YR6s05dX4lBT8zY0knsWSL3R77kQoa3+7oR -hU46ydU6K/Kt67nO938WBvFgI81IatRVKVRsXfTIP2oEa0TkwzuvS7nzj3czNU8o -EOa9ixawVYRlEkcuE4KE7x3TcLEGa1gYJDGbsXAfJct1Hur1SJ/rTDwZvlc+qp3o -wWOLtN0mVHEH1OaGlWmeeTuRG16CuTcku2DYiqeuRNy5eZddSuMOag/DKnIN5ZqV -s1GNrpnxPCxd/KFKtdGl+l++Bc9dBmkd+r1dJ/kRGvhul77Zm2xEnGdyybIs64iQ -gvXq8d8ohbZOPxswiFo3p8fbBjWjv0qm3UnlU3P4B3RDMrrC +AwwHY2xpZW50MzCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBAOpzzJkJ +jTA4iBVvh4G+z4QNXoOyFsco6zFllwNNWyIGa6kemP057eSJLnacNNPboHEIK1zl +NinF9Sfj1R9PfdCrVRLnopX/6Vkj3L/pxPS/J+NpAZPWW2FosqkrFm8Ud8/p1AOm +dNNubgjmHaq4xAXPWeb/CQglTxYbsswHJtt3G+0ll5V6oJkGR89TYmko8Hqlgdw1 +3LDuJwlIfjxllLUsKB1cBQFxbeCPbG76F+tW3WatJXtEP7foKC+PZjeh4ZyLwSZr +AhbebrYdSyIwG8hCnAS9Dif1W4xPbyhZZuMe73t9APrdxZLHzmLaSzSOzEqFhEND +ab9fJ+goBWEt5p3qY+aL74jnJwwcneJAcsu8Jwsf5T8XOg6deNfpFTD1vax/A086 +8LzIeNTigoRo0Nt/Df0+s1qAr966TKcxkdBAVamNbbGR7Bj5OQO4GjKWPc3pNLEc +Xh/f441ZLoBoCRaWxIawEl5ZBJ5pwWuZdEVZFZj5oc4OPkKIfG6WkA6gy0kluuvm +3ArC8e2/6tQ7GytPIGot3HDz7QKIAvCWXS7uMb5/WBhw4R7pQ9rSYXLKdh3hPOg/ +GscwskBjQJTQk4XVtj3UQQFX3KgBXKOHg5XmNQRmDYNcNZs45PkgfwXM5copJEZN +TTwo1TDMF76CNA6V8akP2VsBM9hOFcHlHqlBAgMBAAEwDQYJKoZIhvcNAQELBQAD +ggIBAA1n8oB//2uheGDoRmRWXM4o7uPLcjN1JkRoCkj6ysQ79X4qM0y9hkH8S6fF +k/sz3zSnpB4MpA+eNbZPMZQzcFMSVdM6iau0QN6gnDA6gy3f24J251AHLC0itTGb +2uh3f7ENgO/oMwqTR46ATh15yS9RnL5Fi+/coLvvUPpT/rChPbQUgzKOCpV2OoEg +2IDpp0UD7GmS3Xq/CxS/Q3+dKtXlnlSodr+pwx93ZD/ViHltv5p74pkJOXtadQ5Z +qF3+gMB/7qoPsbarkPz+UiLjefqhCD4WLWBkg3WEouejrvrpHL61Wl50nryx0IM1 +funVy0H+DEzLJD7i5l8IvAIZTIZC1Ewsh66bavYpyCySemapTsEnnVxWLG2LkTlo +iRchS5W3S8Bm/b3z6XY19uhOhguYpHBOnRhoykbR+jV8oyrt23r/RegqfMVSPqbz +1rmetb0qCYhhoFwJ2/kattLas1lHEbD86e1WwJ5Hc3P4Vd7JdU5zrNd/YGhV5hmF +1xDCoFzkS80yvnjHrsUiz6r1fgM1YcpsfYuDLhOGVHO8CQywrScGoWe+CUPD0f63 +azylUumnexFZVG9CMCxrePwVktbBOJvMN1IY47P5h+UyStHg4PJ3gHBbTIJJxVdP +W7LqqSw3gynzT3UMLWuYCc/KH/87Cf355hERFLetmTeIY6i3 -----END CERTIFICATE----- diff --git a/tests/integration/test_ssl_cert_authentication/certs/client3-key.pem b/tests/integration/test_ssl_cert_authentication/certs/client3-key.pem index f88456215fe..a5552f19c0a 100644 --- a/tests/integration/test_ssl_cert_authentication/certs/client3-key.pem +++ b/tests/integration/test_ssl_cert_authentication/certs/client3-key.pem @@ -1,52 +1,52 @@ -----BEGIN PRIVATE KEY----- -MIIJRAIBADANBgkqhkiG9w0BAQEFAASCCS4wggkqAgEAAoICAQD63ZNmGR0YXeET -32vPc9gMv0zm8ueOTeZ/UfpKUL8oZwm6+NWn8yFOQ6wV+8vHCnL6O8vQ5AnXde16 -p+KGJYkr1TSPAdp8JAc/OwVGE/KTVQ/kFHNyX9j+Fo+l0DOKXfTbWsO6zsiH9jqN -06ufEko212K6gGwmLl+HLP1Vi1sqvT2qQcMRD3kO+ehmUt176tSHwjSCrzS2EYBM -XLR85FFj3+lgEtJSsnQXAqG2L0jHxMz5GwZSIbg0XTJs7o4uBXnzg2OJSM752g+/ -iCIkQuPiGPcTYXR+rNFKG6UiKiwksTiTWMKoVELeoDQDtZnSYVIq1f1yJcRuZwhm -0Eu+8nqASyARC9be7aaN/wHRM5Efv8EtdXUqXJZUo/uwSWigYUylWa19YoDKlEm4 -kxdRLfT5wJEtY7VMtcNyWuxcj1nzPJ7fyur3zUbnYP8cGNImNURXQl7jMJD2Uq+F -vkK8hmj6ptEZaQqSx2QTsQQ6X9GXAN+7pZYeYY17J+QkcP9GWKWp6q5K6tnFWS52 -PfCKySNUH0IR5Q6lD+6a0eDppPAu3nVwakKhP9Jqi+wBIaaTsi3nSxQu1sFAXqZn -NWzR/h6NdQmXknU2XyIMAHA97+jIjAOZDE0XFuhwBzUt9PczSXg39xHrM0wVzkgq -jQQW5PqFvG8ZEy0XR0YHZS0+zeqsywIDAQABAoICAQDAMTs48CqTPXEvyW6OS+EM -uw7OrO/r3RCnIIYRo1UgPfh9byA5AJLWpA/V88eF4SJ/RYp7qglEMcvTuYVZYq55 -j2kp2rCphOysa6o5qxSf/X4kLerYiEf1OhGpZh3mdt8doqbrmnqVd3YarD0CrH+B -DnhMDBFPGx4CsNwRSqd40ezJYIJyspj7eUisA/Y9doaGz6ltKY/HoRba6fc4667T -RntEKIdL5f38lv6PViB7M/IZMrQf/kdijrgQLp9s8LMiddmvFsHDN2XzRfdqMnjm -AlxgU7xtRDc/gHh9+TNClSeT81+GmK92YeQXp2yGehr6SGFYr0iTkIomQpSVYK2p -0haIIjQMHlc7E6WVkDELdpAxERgvV4uDN9iEkd4t9oNDPPRioPJQ4bhbMSxCO+CP -NdFHTxIbaDr39OdgqNNE14j7WJsFaCsYXH2NFF8jvwIkPQ3QVMQT/JPGStkyF+9P -5IjFfQ9aEF2i4mAVYiG0DE3NyD/OOI9/uF05POn15H9U+bA9hfBE0Rtm9nMqfVy+ -zgmajXkVb0jTHdL2t/UKv0YdgaglvDcWGFdEUskjJoB00NJwBGorSvcMZiSTxpLD -cGRqywRHOEqNIAbKv0Dt2AX5ZdBSQu7/z1/5Jcdmx8vp9lVhQKeMzYxsFKE4V7fr -ztDuPOlFGyffxpRenBIxUQKCAQEA/XVyoOW1cSFqeG46mjw+dbwjqRmLtEVhAMsG -TtW8pnMJHZ8u7lfM/UJyMN4NQEPJElrABns6I3dqPOwaKOy1a2leHMg5Vvd0/uqp -s5a2fduP6l9PXvhhWDN2sChbeKhl0jJDVnaTO7tiye8ZGMYOM/AlfQX/+PY4QNgd -O7UwcLKhoytxtPtHFZTOZp+cECdTvlmX9lZoNEzFp0nfzFaLVwDsy0B9e6KGt1xJ -fV3Drw7p7PeUyYBNKkyCRVee5S/pn5fT7pkIxMHvaL9BBnWVpwiH3Vi0hfTfFZk4 -8tLcVZgf3n0Y4dMVP2VQRF+kKBTL0coLne36HksQEJyk/4KZEwKCAQEA/WF4z6kc -YXwsU5847+ywq4ipq9efadkMDaGzI6Ez06TQjRYNsZGplCV9fiGxKX2YmZyFzjTf -4joqOmI6UANk+JZKW0Eyyak/TnxugrjMFq8WnK64cIz1TK054tAM/bHGkavaYb8K -bCfbKmaSkwkTbb/OasbQqsC7jbALdbM6Ae0PMrpPmI90YYIMYLRogIaBqCkB43vp -GEZN2VeNS7blhRMiq7YBDXn807aSMQ0+skNSQ7MA8F5i4BFvWyPb1nKZWux1RWLZ -O23IxGWmoGho1CAaEk55LXbqLygU5ZYlBSqkrP9N/elJykOp0LwpjoYBgjMPmanz -o6jy8XIUP78MaQKCAQEAi8+YjqaHosMTDyGG1AN9VMaWSTYdOTC4JI7ZiO0f5hU4 -pw1i/viRy/Y2NTyXxKZfqO9EU47v8BZ0FO0MNRz1qi1yS6Aq+Q0BjYh2Wek9+0j9 -JwSyLKoIUHX694sbggAqQnuVZ4F7EAz6nnd0uZSuyvmiREfl/jgbqbFM1t3IvbHb -tb1GONYPTRlLjZJnrQV0jWCwkaLyUj8zHGeEuxvWOwT4mdmWHnf1pfmTVEM/qTYp -1Zxwh4JtjnKrvYJq1PPMBEvlDQ1/p8FuxbISNXTxOzVadL/0vJvp3ukpX9Du14xV -sA4DhrZAVzsUvtKfI7jtAWlZZSGbwdAYKYGvBn7M3wKCAQAHZWX6YcxTSCWfF0G5 -NyZ9C1Mwke20UEKaz0KEYrs5jVENHTyvFzpk+actHFyogmMG8NuzBjYWy23aIG3l -UgQLgY+QFFogKtGPP/CV3kEO1HOLhUoa9vJeF5xd84a9jQfnzqVkPwhV2d/6392d -byFjDbs/wKfspA2VeDMNb3rc/Yd5CpkyMdXK1tn3pKx8O/Di8Ld+ZWqLa9nv4y9b -q24NsV5MttZXB12K7IRd7C4NVAu9sCbx3T9znO6sMWLEYrn5Pne528XNh0nZ+cGg -YwvUTU+VgzbkTdlOIRRjEzvnZ7RA3H7xT3L49XqqfiOUZnL60vS8nopfF5pn09Wl -erUpAoIBAQDWHJQT+Jvj+dXPC42oIRQKCiYtp1buM8YyL+dJNi7p73brF+2Oqx3k -XNT5eP9GthGqpVGJ732FWJDbPViuZB12zlx9tpGF3ghQTq3p/95KOhGEb2fG7mnl -bEcPqOoFEsAlc4DZYqsDDUvmsifimKm20ZWi4VjTqQJUHYCegJsjrA7D4obGbOxX -FujRMq7/idXRjEoWLloQTMPAQ0Uu4Omwnea25daRzrrrJ34uYrTO1sNgOKk9JAem -rGgrOzsRVG1aNwddcZT/t/icLKS25G7AszLnrNFxJB7DRAfgzpHkJBwNLpcPVtfR -KB6GTGRi7uqGHYScU6+wRMHjdVzdKGNM +MIIJQQIBADANBgkqhkiG9w0BAQEFAASCCSswggknAgEAAoICAQDqc8yZCY0wOIgV +b4eBvs+EDV6DshbHKOsxZZcDTVsiBmupHpj9Oe3kiS52nDTT26BxCCtc5TYpxfUn +49UfT33Qq1US56KV/+lZI9y/6cT0vyfjaQGT1lthaLKpKxZvFHfP6dQDpnTTbm4I +5h2quMQFz1nm/wkIJU8WG7LMBybbdxvtJZeVeqCZBkfPU2JpKPB6pYHcNdyw7icJ +SH48ZZS1LCgdXAUBcW3gj2xu+hfrVt1mrSV7RD+36Cgvj2Y3oeGci8EmawIW3m62 +HUsiMBvIQpwEvQ4n9VuMT28oWWbjHu97fQD63cWSx85i2ks0jsxKhYRDQ2m/Xyfo +KAVhLead6mPmi++I5ycMHJ3iQHLLvCcLH+U/FzoOnXjX6RUw9b2sfwNPOvC8yHjU +4oKEaNDbfw39PrNagK/eukynMZHQQFWpjW2xkewY+TkDuBoylj3N6TSxHF4f3+ON +WS6AaAkWlsSGsBJeWQSeacFrmXRFWRWY+aHODj5CiHxulpAOoMtJJbrr5twKwvHt +v+rUOxsrTyBqLdxw8+0CiALwll0u7jG+f1gYcOEe6UPa0mFyynYd4TzoPxrHMLJA +Y0CU0JOF1bY91EEBV9yoAVyjh4OV5jUEZg2DXDWbOOT5IH8FzOXKKSRGTU08KNUw +zBe+gjQOlfGpD9lbATPYThXB5R6pQQIDAQABAoICAAMZGKEFEUqL1LCD0sSSIufZ +zV/sIITtM35pmz897HBcqVuIvfjKpSwZ6/VBRyltg4c61mfZ14dhyEWIqy5IvJ7f +RLaFPQ7CXPECmk4m5qVdSUemAZFUicyVt5aorRk2qgajTlvl/TE2ClovwECbRGvX +O5bj09i5tXvTTd+IUKkhv8q4bnJZNnoPLS++KFS/Z74XJcolJA9qdjWXMaPWq8ph +FP6eUqqcNxl6i7JDt8EyWqaarx4b3sOtW6qVOIKPrw2Egz7gtxxaQBhD9tQy0oso +5irh3KgGg/ksq4la9RMXO47kLfkiqROxdDa2L7w4DtcFQKQq5eDTfxGAReHs21lA +XzBgd2/D9X10HQc+t37DVej60kJ0I8G2YqCcWlmcmoWcRN67LSg0Brje+m4ohYEQ +rEz2tPWDIKQBRecsfegP1p07/kFd24PgEU60MSp9D6tOunIeiI66ix6UDxoFrSA9 +wzyBNBw/12IJESJX5ggFA2RrkAA/ZX88oWGlXJHl9SxSq0MFO6t7BtgeInuUigTr +YYexKAhrkz/M6PRSH1fyNd4PYdJjb6TYT7ITarfBU0YR7x6yjTgbLGQrnddbMc5K +THF5KuyImPw0/ewCw063zyC1Ze4XVNJ/i11Gx1knqYsN1XvE/OnbeAu0EyyNjpe+ +GSHXxfd6nF4B/tKFaL4pAoIBAQD9bqupNI5Yo+q4PrlGAjoqvhovmKdjlfbi2Rbr +Zqz9ooOnLp/5E8dwEQy4ofMlRUOe4yH0Cv/DVZ2tR5Z0GVmHbIBKn289cnJw80HC +ljeth7x6k9VUVX2m/2srkjAmPy+fY/iqxsR8Sei0uKAS3v8TTq9BXuM9Woh9W8WM +z4FJtRICCAqnhLewopvxgb3wHoMm28Tug8fW5+WQhAxD0fWNs9lUe628EFD1acqp +SJ4SVAa8pMXl8k9EmLEInDtImJcLFo3DEH+WF/IJF/WXYaNTDbzb6uvw7jYW5zYp +GRDDeReErcrO6nTaRdKUwv6Kjz9H2tZAmJ5JJ09QgpmQVT6TAoIBAQDs0+ZxRUGu +qI0csPBLz1F4YSl63tbYPBS7HWPhBNg7d1PExQRhOyV51OShgtFtB99cgw5uJ2ch +OWbqKi4vQehQEHTCEdkPCpRT5Sj64AKHHNUCgd1D/Z5ZdA+Aef+e3NiDptTWaCQ5 +Eaf53KCm/VqoGBQBctzOAkh9pGvsIIyXzgDLI03i5/aiePVJNSTFfRzOROtmcsyz +4Tup0ntbgu09qigylPJQ46P9Js7MCkuK5V+Bp+kYvUMv8Yz8oknaaHYmo01yn77L +XshN9Si1zY5T8CSAfJvGpgqRLpcsbHryKRYm9uUu9b5TUYi/pPiCPiTUCdV/fnIN +0mmMZ9hykMlbAoIBAGDdYP0+Wj3lAPzE9Jmelk7p9CZHCYuVsRSJdyooR2x2Ji3L +M1fHSI475gnX1JBlRfA/ziVx4sntOmQVnZroaYDKZsqe36yzxqwHPhY6xjMsU0zi +nkIqnukqbPLtYDvuIKyiUFQtvsHmmewhOQWdeA8QHwo0U1SK+uo3Hm2wjjD25Vgj +bLcUkgUQUdxgA5H4h5Zdn4qukb5BSkwPPITbhihQGnwg/YmJDviOI+jKGajVtvF5 +ZS57i/KjDd9Fn58iu3CAgVSSRMHAi0EQiE0BA6Tl9k50HxQqaEAexWO18eNUsDmF +F6Q4lssqrs8vLI0XLU+wg/2Sl8VMIhOap0k0W8MCggEAdyCn/DZAMeErClGeriOc +8Za+TMYnACJIs063XQsY0eDWTFZmO5qK4VvLncq2Gcgp/NkXuyUq5TWApS2Oicr3 +Vr7QXIapzr0dm43cLUXdP6WrKFW2vWqn6otM+O1lPb4NUzvqN+euwN42xqLKVPWA +Uqm59niWxTG00S6R8vb0ga/oCka5+PeBwnxhte97jbO/d6qHVsNMYPddEbGEx6V2 +PNyI19jAgxve84o/37cWBMoYXpnd0MnzL/yrVLE1wR1mwUzHum7MhHQrAbvePIUn +oOVdFxyXJzeCfUEYXgo7VfCA6hyrcBHBKRwMU4piTvF/iQFhWX57VKSQ30WlRQu7 +nQKCAQB0dBfxvhJVMIdWE5IkiXj8ui+86LjYiBrdb0QBGSesYjZ38eChi57X6UF5 +Z0h0zvb95P6D9ZOFwEJan3pd9nwS9ud4/kXfnx8k4KtimOpw/41bjjHja3oliY0t +wgrSvRvJtfAqaUf+2gbbFy+eoiNdtCmu9btnwPBV1h1uVLK7wi53mkrufP0X9Cx9 +wjz4TkDtckjxzxYp1vuGa8SKZsxS+uXK9vaut9cme0gpVqwNeTq9yQD+GvIcpGs3 +eOXqHRMuyClyt2/jInwVSAJPq3hgGL+Zn+DAa++VhWbHASh9m//ZdwEtUeIiyHhM +Tea8lz7hwFEIIR1F88FY6UjN1dIn -----END PRIVATE KEY----- diff --git a/tests/integration/test_ssl_cert_authentication/certs/client3-req.pem b/tests/integration/test_ssl_cert_authentication/certs/client3-req.pem index 7c679b4b367..0163a020c5d 100644 --- a/tests/integration/test_ssl_cert_authentication/certs/client3-req.pem +++ b/tests/integration/test_ssl_cert_authentication/certs/client3-req.pem @@ -1,27 +1,27 @@ -----BEGIN CERTIFICATE REQUEST----- MIIEnDCCAoQCAQAwVzELMAkGA1UEBhMCUlUxEzARBgNVBAgMClNvbWUtU3RhdGUx ITAfBgNVBAoMGEludGVybmV0IFdpZGdpdHMgUHR5IEx0ZDEQMA4GA1UEAwwHY2xp -ZW50MzCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBAPrdk2YZHRhd4RPf -a89z2Ay/TOby545N5n9R+kpQvyhnCbr41afzIU5DrBX7y8cKcvo7y9DkCdd17Xqn -4oYliSvVNI8B2nwkBz87BUYT8pNVD+QUc3Jf2P4Wj6XQM4pd9Ntaw7rOyIf2Oo3T -q58SSjbXYrqAbCYuX4cs/VWLWyq9PapBwxEPeQ756GZS3Xvq1IfCNIKvNLYRgExc -tHzkUWPf6WAS0lKydBcCobYvSMfEzPkbBlIhuDRdMmzuji4FefODY4lIzvnaD7+I -IiRC4+IY9xNhdH6s0UobpSIqLCSxOJNYwqhUQt6gNAO1mdJhUirV/XIlxG5nCGbQ -S77yeoBLIBEL1t7tpo3/AdEzkR+/wS11dSpcllSj+7BJaKBhTKVZrX1igMqUSbiT -F1Et9PnAkS1jtUy1w3Ja7FyPWfM8nt/K6vfNRudg/xwY0iY1RFdCXuMwkPZSr4W+ -QryGaPqm0RlpCpLHZBOxBDpf0ZcA37ullh5hjXsn5CRw/0ZYpanqrkrq2cVZLnY9 -8IrJI1QfQhHlDqUP7prR4Omk8C7edXBqQqE/0mqL7AEhppOyLedLFC7WwUBepmc1 -bNH+Ho11CZeSdTZfIgwAcD3v6MiMA5kMTRcW6HAHNS309zNJeDf3EeszTBXOSCqN -BBbk+oW8bxkTLRdHRgdlLT7N6qzLAgMBAAGgADANBgkqhkiG9w0BAQsFAAOCAgEA -WLhSuFZ6pnoDe8LQx6eMPXzRkQb1qsJpyjpegUxFe71o2e23V/1yMnTfFiO+DsBQ -PP8RkLWUKAvkAvqPyttJBx9U5ZYspsSsTVhPsCjUFZ4IG+fc/dVP1ZRid5HQJz2+ -bFf4KPgErZkJZR02Q2q6ZpKq9clRzbDkho56OZXLYI/o2Z4xADbhzpa0xt8sx533 -bm0rKvz85WxH3cimRjKaGKzuKg38ZaXmmUbsigV3dzImT00KDWmMmaW9SB8lIm2R -JToms0Qs+mOr9qD2NiRoiUd1wmgG2QpFDViIqAZKJjjeesmeV2CAcPfLztOZBim4 -6bRIOIXDhYYOyDgs52XuijXUr4BR8aQmqBrjnccCMcGE8Ol5ZH/IDg4pCRSduCWe -T7ThhH7BpAWYdgF3ITcp5oEcpXK8IdAMAst1/6vk7Z1JHIOejxksbLsGDYkaLM6w -yTn4X3Ak0X6bVmLAY+xAL/WjAJhVtDPqGYAmpx4iQ6QjYG/8gRdOiUI8H7MCK8+h -P0auhyyMmO+kdhNnzwuX/eeLXZfNvnyK4n2uHWYgwV5I+Kv282zw94UIQgwVQ2DN -/IbXD7K57s7+ff9Eff8L/B8rt1i1cmv01mEgQ4kMsLOClGaceGcz/ivfzDCosmsk -Xg/zVmdunUY0lswYL4SQM3BhWB3xJ4likHikfQHklM4= +ZW50MzCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBAOpzzJkJjTA4iBVv +h4G+z4QNXoOyFsco6zFllwNNWyIGa6kemP057eSJLnacNNPboHEIK1zlNinF9Sfj +1R9PfdCrVRLnopX/6Vkj3L/pxPS/J+NpAZPWW2FosqkrFm8Ud8/p1AOmdNNubgjm +Haq4xAXPWeb/CQglTxYbsswHJtt3G+0ll5V6oJkGR89TYmko8Hqlgdw13LDuJwlI +fjxllLUsKB1cBQFxbeCPbG76F+tW3WatJXtEP7foKC+PZjeh4ZyLwSZrAhbebrYd +SyIwG8hCnAS9Dif1W4xPbyhZZuMe73t9APrdxZLHzmLaSzSOzEqFhENDab9fJ+go +BWEt5p3qY+aL74jnJwwcneJAcsu8Jwsf5T8XOg6deNfpFTD1vax/A0868LzIeNTi +goRo0Nt/Df0+s1qAr966TKcxkdBAVamNbbGR7Bj5OQO4GjKWPc3pNLEcXh/f441Z +LoBoCRaWxIawEl5ZBJ5pwWuZdEVZFZj5oc4OPkKIfG6WkA6gy0kluuvm3ArC8e2/ +6tQ7GytPIGot3HDz7QKIAvCWXS7uMb5/WBhw4R7pQ9rSYXLKdh3hPOg/GscwskBj +QJTQk4XVtj3UQQFX3KgBXKOHg5XmNQRmDYNcNZs45PkgfwXM5copJEZNTTwo1TDM +F76CNA6V8akP2VsBM9hOFcHlHqlBAgMBAAGgADANBgkqhkiG9w0BAQsFAAOCAgEA +m+9LdrSuXxt1vtz+sIqotYTfWOlEO5yDvxYIV5jS9BaCGrexbj7ku/kaA7AvnosA +ztEb4ZOsiFFyETSYRR6+br+A3BHDBgyiKdilEhH7AzZYPxDmkUmmYWRXeia5o1tu +qQWWoB4pncEs303a6dGY4IFjbYTCu1UPP1A46da4xuDmsTVf1OE93T3KClEyyAZ9 +DCbAzEj0Mc0iiJtEumP71nl5xr8xeq7sQ0Qf2YQHG4svV8TKNwQSRd3taaFQ82H3 +gxqL7u2wS9lk2Xg5BOvcpzGVXjxw5qwNGgL9G0g8FiAT6fWi2Wxl707WOOUx4uKn +wLJJfO6q0ojSlsd8JXlPXfvoVKIUmDUKIwuz6/UgLpDdm43X62aFyO9htSVVK56L +OawMXFHLSkf1SDHirqhhTIDVlOCM4o6KjbnEQhjU3l+kFmKbAUQ+wfDOO16i/emS +tkuu1FRouZVwwTMXeEUciVjadUSrBJr+balX9fjJdqTDSTc1/9ytkQ66h1JlJfy9 +U4T2hZTzUlMKXm+x99qxQyI4fG97zgoL0vU2tKJf/5dkqTki5b66RLtiPbuduJtH +Sdz7GyuGQ/H8qzgAQx55qWA9j3AQVCZZUr7mA02Cuv71YnwpqW1uHM4re48+pKKJ +Uys9m1yS1AEloa9P+2qwLKylbsEzO+UxFXZYImfCLRA= -----END CERTIFICATE REQUEST----- diff --git a/tests/integration/test_ssl_cert_authentication/certs/client4-cert.pem b/tests/integration/test_ssl_cert_authentication/certs/client4-cert.pem index 5eae58da627..175bca39128 100644 --- a/tests/integration/test_ssl_cert_authentication/certs/client4-cert.pem +++ b/tests/integration/test_ssl_cert_authentication/certs/client4-cert.pem @@ -1,31 +1,32 @@ -----BEGIN CERTIFICATE----- -MIIFWjCCA0KgAwIBAgIUBfEMZ1Z/4weV13ryVA9qyNTPJHAwDQYJKoZIhvcNAQEL +MIIFmjCCA4KgAwIBAgIUBfEMZ1Z/4weV13ryVA9qyNTPJHwwDQYJKoZIhvcNAQEL BQAwUjELMAkGA1UEBhMCUlUxEzARBgNVBAgMClNvbWUtU3RhdGUxITAfBgNVBAoM -GEludGVybmV0IFdpZGdpdHMgUHR5IEx0ZDELMAkGA1UEAwwCY2EwHhcNMjQwNjI2 -MTAyNTA0WhcNMzQwNjI0MTAyNTA0WjBXMQswCQYDVQQGEwJSVTETMBEGA1UECAwK +GEludGVybmV0IFdpZGdpdHMgUHR5IEx0ZDELMAkGA1UEAwwCY2EwHhcNMjQwODEy +MTAzOTQ0WhcNMzQwODEwMTAzOTQ0WjBXMQswCQYDVQQGEwJSVTETMBEGA1UECAwK U29tZS1TdGF0ZTEhMB8GA1UECgwYSW50ZXJuZXQgV2lkZ2l0cyBQdHkgTHRkMRAw DgYDVQQDDAdjbGllbnQ0MIICIjANBgkqhkiG9w0BAQEFAAOCAg8AMIICCgKCAgEA -353z74lGXkwEc1n6r/M0FS1XTXhoVGMYIqK7HPCBOEGLeyyGwGfSQ7lRfT1xSkii -zBGG0Nod9cRT1CAewOSJ6BjVfkQcGEjlnVYm42nD6PMd9iFJj9Y5atPeFNvvr+wF -OFX+E8FRu8u9aEu7MIj+KCqoqBukFhFgJYX8sMbRROfLOPaCq0cSC+Vod4qR83+W -ITrQ5n8+/CC39uLY/oKgAKdVnmff595Uy76BVdYzuit1IRKwJxqIWMRrfNI+szmS -hdj0AHwgmwEGCaTNcOQyqvBLxW6qB5tc1FyV4LYv4iNftroqNQvlUbJ4UqVr55Fh -vZ38C1BQ4sWgo6FSS/B6u13clwpRzDh3H8tOMTTz1inUtg61Y49p2G8k3kNVH+QU -fRM4xvCkhFzIArgSiJ+/YUKboltSG5K28pegkk8RRMsaQK8g+NScKKu7/8ddRGE8 -454AqxPpzASij+djM0vxzgad6BB4e+iIVdj77NSjAxVAfg9GIjNHG1DZ87jLLgtk -SN2jaYsBRBRnmenslEGDtwO1SeWrzzicVfP9GRdiMLJwCkwv5P5hCzwrCB5eUPhm -tGHm4K8eXDAd+Ol9pKMySC79E5/W372wdbaO1fcAUKvpHhcRZnSusNAJkLGJYCkV -2gzTWlaeX4rGqjNVs4MSmNuMT+a0IafZeZivptxdgLkCAwEAAaMjMCEwHwYDVR0R -BBgwFoYUc3BpZmZlOi8vZm9vLmNvbS9iYXIwDQYJKoZIhvcNAQELBQADggIBAFox -3myVDr9yJkrF5+vB9gUlTv14JIPRd0OFCLcPOlHpvYKEwjRjTwT9oL3zU5PoRPX0 -AiD9sL5TOo0zraiFPUi1k5X6SoW/qU/kOJ/j5CgfChyyyit/V773LitM/cVXZGui -YX32V1zV9+RaCowC/16oHvfjMA8xNOYoYW83FgQ3GrKgRuqqVMT7JAHoDebVSqyb -w5W0G7RH3hHM1nCv51tnT1SZDn+qRBcX5faPUVARzdcRrZ/VSU2RoVIU/fPPiet8 -5TRioZFslZaFDWOLOuP0ZcOj5MsY3vQZtx2/NRgNc+iLF593YBUhRJYqfT5ePW3H -LwbZp/Rvd2kLucYd/W9WhKEzJKKvzm1V2hCDnh5dl32sZgdBzrdKzgyNB723cLR2 -cHFTIEj1Q/scay+iiSoV+VNfMSDQ71vkHqFHNhEqPFUpdF/SeooDFeQaDvYkomgr -Z9BJFtbp4kZRIEuPX+niTi0S/zwi7htiUn17wOIBcydcgG2GXBer5H3JyFnCXM1N -0jFQsuBFRj8xP71xzhN8YjA2Pe+MGYrMWiwaVMLTz8mdQ+Y2aEvOkfXFSaeNUqW3 -GYxAjEkhVCvzhOd6sD3QjLRX2qhwh8NJCJDbkTok66hno8QsHWASbaDiCMG9z7le -ci4dOHzu/buwqS4LVTmFWTn7mkd+FAlSY9Hj0WVI +2fVodgIdjzIVOxWDmdmdC56f3JOhzYXrYPP8JHyrSUQqL3puJM3/J1JBQvQF1Nbm +fVR/FmnI1FT2HlqWUh8M+G6d2a1iP1I3S08glSghPzuD6rb95o+e0f8ghWfpNt6I +wpf08wWVGWS8ONBm0BVIiLJ8rKb2ftI2H8VmCGIj+xGNXtNNdsOmkpIQiIHtzgw4 +b9yH0onBA4F13TWxOUExgcyXIeZICxR3CHHHJEyDKzGb2iKFaIzBeXxOrZs2aoYF ++A7F1J8O8CYaDU/p22g15hYPbvZ+K3idemzkkZVX35usiaa4Flo3rkHRMvFNGij0 +vvJbQsbMBAdRKsQwt5pP9LbGPywFDqxDCVx8FdFbtBlWpXfZLDSKNOUNp+Bojh+O +luJF+9qRq0ISj41nAf3tAB7djn6bvVpoLJ4Yxz90IDm4KoR3L2Lmd4xscghRffza +apcHKGqjy2G7nkGpxD5itqErzMZ/JgSN1Ue91afOtHO7dLm1nSHnriLCtW4v3NXD +FZmqz2APsbRWIazgOay+5Ai2FLRljN6ieZwMELcIAofzGY/BeaDv5hk6Rj8OBi1f +eIntvDJOXj9le/lOB8P4eW8WxiArLYktaD4BAJTIkEUvA0ckjPqkYN0AN8ICuPQ1 +Jhc0Q3bMSA5FEZs/Zj7Pw828q0/2PEABakLsgIS/A6sCAwEAAaNjMGEwHwYDVR0R +BBgwFoYUc3BpZmZlOi8vZm9vLmNvbS9iYXIwHQYDVR0OBBYEFITmpe10c7Is+D2L +GCRFfgf88duQMB8GA1UdIwQYMBaAFEaMoHhzHQlU19Afyjh95bCG7o1EMA0GCSqG +SIb3DQEBCwUAA4ICAQAiklhYl9ak9up3UBXyeayXVYsX5WJRP3xpYVBN2LFRYsTj +gWv/Q4N8ChegS1xSploVH8jkQorBep+zRj+s1u2k1NCxCAaIIakG1BNN2/W2qBBl +8JLnW4hJwy/XTLitmobqcFE9WTsi/d0V0GrsT9Ojh+Zmx3GCNGS2VWHJqM5S62d5 +iJycUIKEcWy9h2vjyKb2cAEQ9+OOtl8ODonfBIu7JwpUFAqWiBGG/r9fQCwS/9Yz +OCljZqaPi6w/w+04dA+0xRO86kd685QSvkD+hmtm8DzYLdI07gAiSxXD2sMszTF2 +C8JDKVxaIW0SV2mP2LcPdKEAKpZnW51rG4QiXjWRrDYluRAWjhlSHFVHau9vJOSM +6rCHahhGtUldaIGvruEN1TuEAuxQXupeBgQDMq/OBndrHSHwTRSEHzprrEvdFzuS +rn6MVx9f6VyEAsku23nhFuidXKRObF30b8T6t7T5JOE/KmeYcE0YGczob1YEbTR6 +rSDcOn41ymOQ4Jo0NRpe3N+Zz8USh2xzkFTgx4sxwKfyhh4peGya18klkc+fZqho +CMLtvkdoK4w6/bXxxGviakIe8OJJvKqKbSv43LoIDYgnYoiFVl1TRhvVe0uVothR +Yh3/laO/+p4WzPjFUHUtFo1yw+vOOsfA1++FvODgZa/p3DaTH7VKgRw6PEZQ6Q== -----END CERTIFICATE----- diff --git a/tests/integration/test_ssl_cert_authentication/certs/client4-key.pem b/tests/integration/test_ssl_cert_authentication/certs/client4-key.pem index f1f17525a51..d3587f78e63 100644 --- a/tests/integration/test_ssl_cert_authentication/certs/client4-key.pem +++ b/tests/integration/test_ssl_cert_authentication/certs/client4-key.pem @@ -1,52 +1,52 @@ -----BEGIN PRIVATE KEY----- -MIIJRAIBADANBgkqhkiG9w0BAQEFAASCCS4wggkqAgEAAoICAQDfnfPviUZeTARz -Wfqv8zQVLVdNeGhUYxgiorsc8IE4QYt7LIbAZ9JDuVF9PXFKSKLMEYbQ2h31xFPU -IB7A5InoGNV+RBwYSOWdVibjacPo8x32IUmP1jlq094U2++v7AU4Vf4TwVG7y71o -S7swiP4oKqioG6QWEWAlhfywxtFE58s49oKrRxIL5Wh3ipHzf5YhOtDmfz78ILf2 -4tj+gqAAp1WeZ9/n3lTLvoFV1jO6K3UhErAnGohYxGt80j6zOZKF2PQAfCCbAQYJ -pM1w5DKq8EvFbqoHm1zUXJXgti/iI1+2uio1C+VRsnhSpWvnkWG9nfwLUFDixaCj -oVJL8Hq7XdyXClHMOHcfy04xNPPWKdS2DrVjj2nYbyTeQ1Uf5BR9EzjG8KSEXMgC -uBKIn79hQpuiW1Ibkrbyl6CSTxFEyxpAryD41Jwoq7v/x11EYTzjngCrE+nMBKKP -52MzS/HOBp3oEHh76IhV2Pvs1KMDFUB+D0YiM0cbUNnzuMsuC2RI3aNpiwFEFGeZ -6eyUQYO3A7VJ5avPOJxV8/0ZF2IwsnAKTC/k/mELPCsIHl5Q+Ga0Yebgrx5cMB34 -6X2kozJILv0Tn9bfvbB1to7V9wBQq+keFxFmdK6w0AmQsYlgKRXaDNNaVp5fisaq -M1WzgxKY24xP5rQhp9l5mK+m3F2AuQIDAQABAoICAAQfLTflF971F7/okK5dlUAu -rcVHyuSDTxaUWU6XQEqBKskCcRlq0H1fFRlx4Hy2Cgoo6ItA+fxlugXW8bosfD5C -9ux05O+tqE3WILFgabQJhyvaQTjdggFuFlHcG/bqKs53B0/l6FPF1Z/uhWzHmaez -4Zf3qnadq2AFsDqx73mNrDlIkfAGR1bgy6QocbhDSckjBGa7QbX0BHAQjl9imQBq -FTHuSDpF5to6kLe8UwfDdU0+wvB1lL3OIQ0T8wPqs8Cz1wuLPi6dPjc/SmoiSqzL -8RmaiJfLTVK8wiZ6NTe93y3HELAZoAh5ea5MTkjebSbJmrO6r0L+0Y8ykgnETP7O -Ug9PWeDDE15sNXIQCKtRe3QpHtJaoAJU1MGhNqwm9oKMcuSvBOV8XRuZORinTYRL -Q2ZD7czaT5VZXCQI4uHwE+KIlQF+658c9M9WETxClgUlhbzqig3ilUz3QUweaPvE -tqArjiYLsT2KtrgmsZ2DaDn2IlGSIRjXMZJ3kn6i49C0uhH2YkSZgU9/7kH2myse -3opxE1EbT4ARFWUbGgqXTOc/OSb9DAsxUK2u0eR/4yOMLQJkvxNJWQgwmi6N18iU -WdvTphNtMtmdsAhst9luwNaeJItzTDm7JeWx+MPs8f7PVOOkTz8HcBAvZnISH1Md -0i+0lBrBXbAcRK5X7tvhAoIBAQDwKPQA0uNk4Hemt5yke1jrg4B03OLimHeQ/1PY -I99hThh/RLncYaMxqsd5WkXXbjsWyGidKHYh3/cG9akmgU6D2Z16CFNMRhgBgRX2 -+LJkdS2QSuHPJlB9ERtOOiWFt7IDafB+tMKHE/VRQdxFRtvLe6pQMzP4veVXZsq8 -NNJGAQ8egUa6HDvkXzR2VDf2Kc61t4ZwT4JT6C12GnCfvXobaVkU1aWhcguoX8vI -o3UOkeracEKc/80ZRdFhA/nvPPXCobyjFjLi8WGp6PUySrVhN9eAdZaUBNeFHLdg -8urNvy5Q6mBAByEfHZeJNZbBeAEAw7S5YAxgL96blj2IPOerAoIBAQDuXazpCDXD -dG6XDZ9FS7MKBWriHUa54UgbX4JfQsOOFA8uxglIe5E4IKFSEbetlDORnKZjcmGa -SKTm0MLLi/kKrkhoDgi6HNbmbo9ZmKIhmEwws5L1TLeUdLrWj8GLgNphotOBKs1V -vQQkfh6rzovyFsMj44Xea8Kgx5ONVlB1L5pEepKdIyDRiQfxhwFox719HACSqCEa -06eFNGtUOLLqNMZhpur59bqgiVQtIZKis9juwzZID0svXBElpDrNvbWS1V5MCBOT -6AStW66YkmVWFCn7qQmNqMh4x19GveW8ajgrBSr/8GP/WXiACBDEsunWRORW57iS -KiPmC0uHlMUrAoIBAQCYTrCokRZLlJvtbIb4PY3gFw7xjmCJqn4xw+wNqHpzgI7C -r/hbjsRrrE5DZP/kJ3Fr+n92JAH/a8WDcWrsE5eSwQFBMmR5e/6ffZlLft/MHBBg -cU0SDc9/8chqbS/8xMotphMymDrCZeLvvKAQg2bDftM9d6ufNfdr3bH3eFxery9C -fmQ3hc5qAAMKhFDVWiBRWGn3ckVKJ3Ylb5E7jXQSTFaFgxU+9U/1YYOg5CFJszrJ -e+aTIRuWypOGPnpUwkluPRqgJ2TwTntMwYQ3d+/eDwcp3ek4SHXSYqrd3lERWQzr -niiakqrry92d1BGe8xdXv8Yuxn4yxkkcTUUK0O1vAoIBAQDLY0LW1BqL3B1A5m6w -QhdSxaydoz1l/cP5F1W20tDpulP6JSBmqIkQy0bbMCL6CSq3ZGLVGBQQAUwzZo3Q -AG9PncZKgy8PHux/UncejA5LfBgGtjL++6bpFXEXAzKyRhAQn065ODxcnBucx8CD -+ImQ17tKNClVz70SUzijsLKWSzfmlm/jhMXMBJCyle+t6EDXL72NZchZi5+1GTU7 -d+Wx0bY0PKji/7luob8hgzQLgEnp8MewVNxiXLyE0c0bIHR+BXGgjoOmAKN9CG3B -4ah1+l6YTXPJW+syo2u4gPA2BKxIiPBX0laA22bmV/t22vKL0dzECpSCo1JeR+T6 -mwZhAoIBAQDpLqRLxfZk2TK3wJ/bloXXsRg4TjSQ4m2Y3htVRiOQF83iERVRlAwg -9yKlyd99ux8tlYAK368Q+FFAwYTvUyghmfVTPARFTmeX0F5u+MX00WOa2FhPs3du -+ImYeQH3hg2O7qyVDMCwtqgIIuGLNwsVPqUUF5bx0He7wTwwzQmx3EVCOu6yZXG7 -Aw3qpOM2VhrtWgGP1mTONiUg5dh4sGbXX70gjG9cUpo/Owr69Q4Y8/OEyx9bzqSW -5BeVN0vONzQC+LHG5EvgNF6yOU7iCkuoDirZUrVchuAf+IDapK85TLIH8bm57LKN -Etg/x+MCoqlEQBVgnY7f3suMB89XerER +MIIJQgIBADANBgkqhkiG9w0BAQEFAASCCSwwggkoAgEAAoICAQDZ9Wh2Ah2PMhU7 +FYOZ2Z0Lnp/ck6HNhetg8/wkfKtJRCovem4kzf8nUkFC9AXU1uZ9VH8WacjUVPYe +WpZSHwz4bp3ZrWI/UjdLTyCVKCE/O4Pqtv3mj57R/yCFZ+k23ojCl/TzBZUZZLw4 +0GbQFUiIsnyspvZ+0jYfxWYIYiP7EY1e0012w6aSkhCIge3ODDhv3IfSicEDgXXd +NbE5QTGBzJch5kgLFHcIccckTIMrMZvaIoVojMF5fE6tmzZqhgX4DsXUnw7wJhoN +T+nbaDXmFg9u9n4reJ16bOSRlVffm6yJprgWWjeuQdEy8U0aKPS+8ltCxswEB1Eq +xDC3mk/0tsY/LAUOrEMJXHwV0Vu0GVald9ksNIo05Q2n4GiOH46W4kX72pGrQhKP +jWcB/e0AHt2Ofpu9WmgsnhjHP3QgObgqhHcvYuZ3jGxyCFF9/NpqlwcoaqPLYbue +QanEPmK2oSvMxn8mBI3VR73Vp860c7t0ubWdIeeuIsK1bi/c1cMVmarPYA+xtFYh +rOA5rL7kCLYUtGWM3qJ5nAwQtwgCh/MZj8F5oO/mGTpGPw4GLV94ie28Mk5eP2V7 ++U4Hw/h5bxbGICstiS1oPgEAlMiQRS8DRySM+qRg3QA3wgK49DUmFzRDdsxIDkUR +mz9mPs/DzbyrT/Y8QAFqQuyAhL8DqwIDAQABAoICABRNlMcACHKJLErI2hhFRKig +meyeq0utULvtzmGJuXJFkWvDIFUjrpkiQ5EHO9nT5jxA2DvQFn9CUNDgCFuq/SF7 +5blcD32th21norXc5fKcH9GH7UaCahLOt1/7Siase8SAtOEZy9tF3nX+Ym1ku3vR +qzpxV8nVhBQukQQb5up20UVG3VLrA8O91nYY1abuzX0MvRw4F8zfTN7n0alEhjuR +7CF3cDjpVZ3pl1mCxRSLdk9PYupWc5lWTS/vzZWxBAeo50sYlqLOhyO7+3akPPfA +u1GkpXkgLwSIoJFifivcC6pLkkoGv7lm886mhVOirWByIV0q4+CbhoyQzPxKuW16 +vHSjqwQDp0EhMjk1NZ1+emTOA37E8vtE2jVCLZITPwEiRtbwdl9MjgGjQKedS/oI +BJ5sGFupfwzmY1rAjkC5U014LQVOPhvXzPxwLAH6szlrXlUOJYtG/D3qlEEnW3oT +c/DfJYhEmyhaWDnHC72c74e9E4JjpMp8cbJt9uCQd+rxd/FPtzaC7zi9+b38aktH +gp9UazWOjNkBYdxk1ULn/iVf+5hxsGYuOGfdeu/62bJnNev3YTiV2IKeQRREjRgD +AG560k/IMN/aYwJ+M7hAYfUwHA1oW5CwuzdVl1bBLe6iXRdZTfqFvR1dHTJI3xRP +6Qbbo1pSZGpsGTT84Xk1AoIBAQD0C5c8A3Nnf5j/dqRS6OVUo+aTZvztyydwo/ae +AT+Ju+InmZGvBd8qpyd0pJlGPgM1MJ+74kYzLvmFrUdlCX91PfgG+1PEaK1Yh9r/ +uMTOp62K7x65/Cp6LFXfddIywB8RCDn1EfVmA6mw/Q8lAaI5B764WHGwcdncVzhH +1omYBPXz2QzOAsGh9Z86RvMvkQI7pb1oEpp5n98MxGXZq4wqvqhIYcXfFfFSAkGg +DyMm4DRKUpUMp+fE4hjD0DFAtcgOFCog/kcMsIR1W24+/R5mtwt4zet8m7EXh3yD +xtcggYT2IPqdvYYpNqRs6lewawH/jnni5oaJr9ZJWJ7rKF5/AoIBAQDkoq6V+0KF +r9uckkaSAQ2fnMDfx15icK7J6s87pTY+RoonhzsvM8YZFYgfmKtaR1y/A+8oMzBj +g/5C2nqUjBuIbwmT1cWpoODGI0yZ4tltDHf8LSehvjvws1RCKH6nBBr3VK+BFcSj +9ZX9QeKTCSYiMUvC2A08wXOipWCF6R2X1q3/VtjGYEsDcE+vbn9PVzMneHvZT7nX +u0kinRbg5el0ifclzMhT8SawSMf0UZF+Xzt89nwV8CgA2EGmkn8aVWD7AhYBz99X +6jPt/Ny89RZSpRtiWSrH7gO7U9c6Z1bWA6IfitKMqwuutg2lUUJT77KXPVynLhXu +9FwMYS+57ZzVAoIBAC2qKY9SOfro1lEe+xqEd70Bujhksfkw5V1fYNsp8qCsJmIw +iyEL4TlS88AQuJl7KxB927XFXktfg9MunEhcoZvnj4yG5KLAz4bcTO9CvJjKqV9w +5Pf6JlycBSl+G2nhpy6Bso6X4KpJDyyN/ZJ4Hsm7GEXYTtWsSA7Bx0cx7qcibn9F +Yu79dwv+MOAvvHtvD9LnbWyW5o7RimQSL9iuD9lDuatpmCPpMDGfOHZhvrscgsxR +bZBjrquZfvbM8xpfqWJxBX0M93B5ax+mjxd8N38SyzO8nE7Btz5m3Lavszti4OVJ +QHvZpcv6bIPwyZ26ODKAj3AUoKIM4/TvfzUTZbMCggEAeL35GTex78vpXIxbPGvV +9/7eVM+C5tW/WN7qRgN/2++AslNWTB05drHKS3ViuLCLKE05la9C1Ks2MbQTnDNJ +rwmEs0/nrSdKzRiDMZl9XXuCOejWzzIZkYoC61VbXXRrkUZl0PPf/v6JEVAkCaaB +Pvltmx5iDz3ShYh0qwNrH26+QORqYokj1FMRogHmtc3FNAkKzr6t1GIRw3fWRzrj +ySm7HXlCKiJmXh5xinMxNN2yGtJIwYUsexW5xfADs7J3HtgBKz/OIS2L8xjgseF0 +wnxzM8MHNsJZsioatbsxPqB4k9PKUYNbDg8SscVyE8W4OOqs7ZUG/ESL//WrMLNt +aQKCAQEAx9ZBfjVVqmMVPp1GV61+B3LySueyI7UTrFDxKEGPQAzcLxQr41iWnkCy +MNDn1E9fkvNND2pVqKC88NZ+GDk62W7j96pUP2AqxIE3lKjB3vFAWdH8izWQwUWp +P+/yKYscaqo5wAgMNV2W4lqZUmQ6QaK0rJU2SKaXtjlnUD6fbHhJaYUWdBc8VmTb +FkYGJnpOPuP0eo/ijThdDpCVTUnekSCEdoinuUmprBv04ypn2RCjx4dpHHXSK13M +X8Gnn2API9//cDuuv8TjV0UG+yOz1aHz6ciLpQX8AxBbgetkdf/jJTQFWMiFHMxU +LBnc/Bj54Ydh01rOez6w3PkOCAugFQ== -----END PRIVATE KEY----- diff --git a/tests/integration/test_ssl_cert_authentication/certs/client4-req.pem b/tests/integration/test_ssl_cert_authentication/certs/client4-req.pem index 224484f6611..5180a8697d7 100644 --- a/tests/integration/test_ssl_cert_authentication/certs/client4-req.pem +++ b/tests/integration/test_ssl_cert_authentication/certs/client4-req.pem @@ -1,27 +1,27 @@ -----BEGIN CERTIFICATE REQUEST----- MIIEnDCCAoQCAQAwVzELMAkGA1UEBhMCUlUxEzARBgNVBAgMClNvbWUtU3RhdGUx ITAfBgNVBAoMGEludGVybmV0IFdpZGdpdHMgUHR5IEx0ZDEQMA4GA1UEAwwHY2xp -ZW50NDCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBAN+d8++JRl5MBHNZ -+q/zNBUtV014aFRjGCKiuxzwgThBi3sshsBn0kO5UX09cUpIoswRhtDaHfXEU9Qg -HsDkiegY1X5EHBhI5Z1WJuNpw+jzHfYhSY/WOWrT3hTb76/sBThV/hPBUbvLvWhL -uzCI/igqqKgbpBYRYCWF/LDG0UTnyzj2gqtHEgvlaHeKkfN/liE60OZ/Pvwgt/bi -2P6CoACnVZ5n3+feVMu+gVXWM7ordSESsCcaiFjEa3zSPrM5koXY9AB8IJsBBgmk -zXDkMqrwS8VuqgebXNRcleC2L+IjX7a6KjUL5VGyeFKla+eRYb2d/AtQUOLFoKOh -Ukvwertd3JcKUcw4dx/LTjE089Yp1LYOtWOPadhvJN5DVR/kFH0TOMbwpIRcyAK4 -Eoifv2FCm6JbUhuStvKXoJJPEUTLGkCvIPjUnCiru//HXURhPOOeAKsT6cwEoo/n -YzNL8c4GnegQeHvoiFXY++zUowMVQH4PRiIzRxtQ2fO4yy4LZEjdo2mLAUQUZ5np -7JRBg7cDtUnlq884nFXz/RkXYjCycApML+T+YQs8KwgeXlD4ZrRh5uCvHlwwHfjp -faSjMkgu/ROf1t+9sHW2jtX3AFCr6R4XEWZ0rrDQCZCxiWApFdoM01pWnl+Kxqoz -VbODEpjbjE/mtCGn2XmYr6bcXYC5AgMBAAGgADANBgkqhkiG9w0BAQsFAAOCAgEA -2JVul/xcJ+YlepOHxJ9dczIcXEjjMOBxWuyK+G9/6wASgHg9e2SB+WS1VSeUARC6 -VkID3Jlwr1gEw4gR3lW2h5I21kdCaBfCHshUoOr9rV5uE76r9kxgyEsMUZMvSClC -eQd8VK4fUT9JEKigIJeCFT9IE9PyxrdH1xpp89jOLy40t3PkubDi8WR8dvPckg3N -juLU/6EtbrtgFMnCqB2TmH4mc6YSCeENUTvt+nSiBKZUblGDuIxu/edX3SscS0Yv -qPM5LPcNHEGeeMC5ZSfotaSzRP+x3OlV9VJNROG4brbaI+3kECtegBgFvKIiK+JY -m7dkt8oIpQc8CKZkM8Kk6e3JXHzKf4vAiWHf0Wyag3gqCukxdas/PMx/3ROi7iDm -XQN713lxhIjtqfXQZjZcRmYQwdnkaSY+H7hgAyhnavqkBmPLeMU5hffdBswrjH+0 -fD0FOIDOWNM9e2Q/qdtHxtglNUmox0ETvl/3gYRkN1I56zNan6FNzGMubilntt2z -xXQwxP4Jn+RoAwb5U9mIlaLJ73FDbl6KAvFSJHlZl34R/o1nKOOAiFSv4V+RcTMd -x49P7cyAcW+eSsIgDzqabhx1OcrFEFRtBy342w5m5Qdq62TpFmeALgRYhAarA9UZ -YY/XOglN88K/3iR+A5LO7Hdv0Q/wShokghcSdAE3JOo= +ZW50NDCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBANn1aHYCHY8yFTsV +g5nZnQuen9yToc2F62Dz/CR8q0lEKi96biTN/ydSQUL0BdTW5n1UfxZpyNRU9h5a +llIfDPhundmtYj9SN0tPIJUoIT87g+q2/eaPntH/IIVn6TbeiMKX9PMFlRlkvDjQ +ZtAVSIiyfKym9n7SNh/FZghiI/sRjV7TTXbDppKSEIiB7c4MOG/ch9KJwQOBdd01 +sTlBMYHMlyHmSAsUdwhxxyRMgysxm9oihWiMwXl8Tq2bNmqGBfgOxdSfDvAmGg1P +6dtoNeYWD272fit4nXps5JGVV9+brImmuBZaN65B0TLxTRoo9L7yW0LGzAQHUSrE +MLeaT/S2xj8sBQ6sQwlcfBXRW7QZVqV32Sw0ijTlDafgaI4fjpbiRfvakatCEo+N +ZwH97QAe3Y5+m71aaCyeGMc/dCA5uCqEdy9i5neMbHIIUX382mqXByhqo8thu55B +qcQ+YrahK8zGfyYEjdVHvdWnzrRzu3S5tZ0h564iwrVuL9zVwxWZqs9gD7G0ViGs +4DmsvuQIthS0ZYzeonmcDBC3CAKH8xmPwXmg7+YZOkY/DgYtX3iJ7bwyTl4/ZXv5 +TgfD+HlvFsYgKy2JLWg+AQCUyJBFLwNHJIz6pGDdADfCArj0NSYXNEN2zEgORRGb +P2Y+z8PNvKtP9jxAAWpC7ICEvwOrAgMBAAGgADANBgkqhkiG9w0BAQsFAAOCAgEA +liXJjRTg9O4AEadeTV7BMMFtRIKXdUvj7WBbUgim1UsbDYjCvURLnNR1dAhn3QrE +c8CHrqF58uegW540UWfqDXcvGPUYgLPPweK/j2eyQmhUL9jab0Fnwo1ALgB3VZe8 +vP6sleQLlswhl5kpJgyGFUsf14zzRA7WW0UggFZVTbvO81ymgz81uGQl760FfqtC +5dczp4Y5FeUFlfgFj/nK1y39ijYrxWM+otCvRvxSKbi0JT/FB/w9B+rHaIlwIU32 +Xg8KJKwHI2ykGw7tTTfcdiSYP24rO0Trc97fJLHggJqeC69htjwtNCgrMnkDJAsc +N5BKXB/zUXnbCrjE0NIhrbcDlYgRaD0+VAyxdbUvLtAy6pWhf4f0bkAg1b8+V7fE +Fam9iHxC8NcKmwtrt3D5/me4CX4x9j7YFvrjVY0iJefqemI0jxD/Nm3v28xiH+/E +ys7PaEyKdVZrTVvKzt3H0p9HJqwwp9ETBOdqE3Zeu24eqs59qbMnAoFaW2iG6h1T +XaAAkJyBH8ax0KVdmuc3igL+uf1Oy+F3eoPX8Ds2svre/KABfgeObAr93NW9e3t9 +u8rL0yKEzG8crx+W0KCLDKtghIumWgSrB8ezjpNrqhiOzYyE0Or65WfXpUpG1Qhj +CsUJlXESSOSAGotLPYClXYaJqZG7ttGJ26drRtlyWxg= -----END CERTIFICATE REQUEST----- diff --git a/tests/integration/test_ssl_cert_authentication/certs/client5-cert.pem b/tests/integration/test_ssl_cert_authentication/certs/client5-cert.pem new file mode 100644 index 00000000000..890ff5b209d --- /dev/null +++ b/tests/integration/test_ssl_cert_authentication/certs/client5-cert.pem @@ -0,0 +1,33 @@ +-----BEGIN CERTIFICATE----- +MIIFojCCA4qgAwIBAgIUBfEMZ1Z/4weV13ryVA9qyNTPJH0wDQYJKoZIhvcNAQEL +BQAwUjELMAkGA1UEBhMCUlUxEzARBgNVBAgMClNvbWUtU3RhdGUxITAfBgNVBAoM +GEludGVybmV0IFdpZGdpdHMgUHR5IEx0ZDELMAkGA1UEAwwCY2EwHhcNMjQwODEy +MTAzOTQ0WhcNMzQwODEwMTAzOTQ0WjBXMQswCQYDVQQGEwJSVTETMBEGA1UECAwK +U29tZS1TdGF0ZTEhMB8GA1UECgwYSW50ZXJuZXQgV2lkZ2l0cyBQdHkgTHRkMRAw +DgYDVQQDDAdjbGllbnQ1MIICIjANBgkqhkiG9w0BAQEFAAOCAg8AMIICCgKCAgEA +t7MEWyM5qCSWFfKIxMAL/Z6GvQtoBY2GmNBDwx9FSBHAXm38jRm1fxIm3xVoujse +zxxZwya1enE8Z/qKKVmElRQypnX0K/TqV4VM2qomfDsv6/f+ueMWgAnPrbK0+Ggb +O1Yw2VB6b3qsGYgETZ9TJjKdHbuqy/hLosdLvZu/+qd6Rl8iCqgIa7jnLu1HFiMc +ZiCN4qAJ39virYvog3t57jS3awsF072kO+HagDmOoFxNhLq+Q7m1DNs6jBiU3Nii +0bhKDEZHER2Fvz5oE9re+gGde/B9mZk2sktZY3Uiw4eT0JagruGgduInvqKAtTo4 +pU23C+zKzSVHXyuhzNjaK/RtgSCTmOvFgXN4uWthcjd3tZOTMY7iapRqRe3JM0Ck +gHEAomR79a3KkYSfuFBdIew3sR04+r3lm/kB+++xSL1ypDFznbI0l2ur0e6+2r9f +whR0d2zA4pNIyl6itUA0FuECVd8Hs0cV/6IdqbCmRz+qWgyDeBhMW3miqy2KXSQm +00HT2Yqf/XWq5bfmQ1sq0oY6+DvsYTW9haVirFb1lsvFKlcAPHke23jKY3pMSkqj +oz+mJd1XpIxqzprpLjd5ayNu9FtTWSt2U8Ys693IVTne9Axh0knmGnLdIwhYTfJC +8lM3lNCqqYQ3eLy12qTPUclObh6dZtjaiSP3ohdXL0ECAwEAAaNrMGkwJwYDVR0R +BCAwHoYcc3BpZmZlOi8vYmFyLmNvbS9mb28vYm9vL2ZhcjAdBgNVHQ4EFgQUwRwX +0H/tt7jtTS0knaDOucaxGKQwHwYDVR0jBBgwFoAURoygeHMdCVTX0B/KOH3lsIbu +jUQwDQYJKoZIhvcNAQELBQADggIBACettwt0CjSG4u1QpjZhgpVI0EIJ97QKld7I +ahSaM4G8SHmU1ROf8kzam1lHVMEZU3YOIkqhyPgZPtcMlg9/CFvgpgHsPTkQu15l +e01Inf7lb3OIj/+PdX3e+P8wu0dRGjH4u3891V+yjkUUQJ8EbUw1DMI5iRiNTGuY +oLi1Oyizpgzu+dsI66QM9ntQPVrpMY27gDHgOSGqW5Mx1pTFL3/Rr9AKtMeh9i01 +YtTJ6LlmjhTIvzVmbSLVDgnR39Oq5LOw0kwK56AErEsubIeds0V+9T4M6wBjrReK +5ubolfDo/Y8Q2ZZONmIPPCh0M7gNdn14tfzgIKXoGWPU0m62PXZv4Syt+McK5twr +V7kXAcMp6MVSZCMqP/FXsUO13NLQLdU/a6BRakzCnrL9ulkB+RWmPKWQYvA4wSEO +9U00a2HVTfNSogMFcABxoRAkJEb951X+rn8L1EKJ1b4HTASbVEE7iYyktl9wWH1j +JXjqRTSgCNRKeezbEkXjJ90PzvCVfQmNxTipKuJjxVzLZpklZzb5uEV0Nx99bOde +QSexYccwADvCcptm6K77BQNcK9jhqfDxpFi4y61dHurFNIQQOQotHg9cKM6obmIG +2WzBBy7e3zs4yI6/jyx1Zh+dZbIi2+m8c8xSnZZDgVTKszAdIDFs6kH+j8zkLwlv +qvounDzC +-----END CERTIFICATE----- diff --git a/tests/integration/test_ssl_cert_authentication/certs/client5-ext.cnf b/tests/integration/test_ssl_cert_authentication/certs/client5-ext.cnf new file mode 100644 index 00000000000..8cb20e70290 --- /dev/null +++ b/tests/integration/test_ssl_cert_authentication/certs/client5-ext.cnf @@ -0,0 +1 @@ +subjectAltName=URI:spiffe://bar.com/foo/boo/far diff --git a/tests/integration/test_ssl_cert_authentication/certs/client5-key.pem b/tests/integration/test_ssl_cert_authentication/certs/client5-key.pem new file mode 100644 index 00000000000..4abf9d1d980 --- /dev/null +++ b/tests/integration/test_ssl_cert_authentication/certs/client5-key.pem @@ -0,0 +1,52 @@ +-----BEGIN PRIVATE KEY----- +MIIJQgIBADANBgkqhkiG9w0BAQEFAASCCSwwggkoAgEAAoICAQC3swRbIzmoJJYV +8ojEwAv9noa9C2gFjYaY0EPDH0VIEcBebfyNGbV/EibfFWi6Ox7PHFnDJrV6cTxn ++oopWYSVFDKmdfQr9OpXhUzaqiZ8Oy/r9/654xaACc+tsrT4aBs7VjDZUHpveqwZ +iARNn1MmMp0du6rL+Euix0u9m7/6p3pGXyIKqAhruOcu7UcWIxxmII3ioAnf2+Kt +i+iDe3nuNLdrCwXTvaQ74dqAOY6gXE2Eur5DubUM2zqMGJTc2KLRuEoMRkcRHYW/ +PmgT2t76AZ178H2ZmTayS1ljdSLDh5PQlqCu4aB24ie+ooC1OjilTbcL7MrNJUdf +K6HM2Nor9G2BIJOY68WBc3i5a2FyN3e1k5MxjuJqlGpF7ckzQKSAcQCiZHv1rcqR +hJ+4UF0h7DexHTj6veWb+QH777FIvXKkMXOdsjSXa6vR7r7av1/CFHR3bMDik0jK +XqK1QDQW4QJV3wezRxX/oh2psKZHP6paDIN4GExbeaKrLYpdJCbTQdPZip/9darl +t+ZDWyrShjr4O+xhNb2FpWKsVvWWy8UqVwA8eR7beMpjekxKSqOjP6Yl3VekjGrO +mukuN3lrI270W1NZK3ZTxizr3chVOd70DGHSSeYact0jCFhN8kLyUzeU0KqphDd4 +vLXapM9RyU5uHp1m2NqJI/eiF1cvQQIDAQABAoICABA9VC+WZUd3DImeKkxydIeU +QccT62XqRVwxtSaFF4PaheNoh8DAOr+kDtRT49vq1BDCfzD1tWoV+Ugb6aY5fV4M +g2Rx3bHGcge7XLhL/cunvoHMJ7RZM1OttQgJr16dHXiDvcfIXzqHDBB/FMT5RnQv +BJ1sn28ZYTO1Df4SIcj6kKBd/U1siL9f1kLbbm2yPEAkX/Fel+DhYyBX06ij/CXR +z+aM+pPlxPoXlPsW8Nk+WmAB05t7HwVRmtMTF+iPgj1WCb3HYgr3otZC9xfM6+oK +xdhUwgBtKgezCjufpFYddd96dslfCueCRg81SyQ6TxMAqXj1aFb1v7crIp+ltWEH +rpOrp7Z+VH1vWJHZQHRdlNKeM+JEnzwegsf40+L92jItTrNzQqtZwHjdwC+LvtNu +0iXPg5qGku/0rRJpIp6lBeinuFivnm1YFow2Z/fZT3kbai7aokQyavJixTwIH+Ze +hjkBOhMnv37Mn8EhtrlJYPO2m8VqaISFZOmbx+lGb4eQ2i5dVm/rZ0j2y37bMLEK +xe6NcXprMTlVTChVCBGSEJ1J7M0RFOi4I6YIw0A6mcZcEtS1MX7PonMLzhQjlyQt +WawoVhtYSIhpsa+0AqetkmBcjYHpu2vXyTGP32c6DiZDSUOz46jpP/XdcbghfZyy +SyFgjTCyrnrHCwo1odwNAoIBAQDhIlEqPsmN07/6IlwSxvEChEAe6ePsQ7qDC1xQ +Xh42e6E0GaiTeb7px75E0KiumDvqQ55m7QFcOWFJiVn3tBpekplN2T6aBIY1MwYw +2aCBO+SetdNtfeuaFjkO/RfNNgaKN43jfNfwpHeSZFIQf0v9l3xz55+pHkDDYTRv +Z1sAWTJniuyZdLtXgXpWAm31UMF+lNxje8gtg96hQfAQhfQohJ7d8m5IRzcyYbJZ +bguTNLWFH8d5RHEO6AfyOAzv//ExoewrqE8HcdydqJ0b6a2PqdOh/VOcmqDCsZvv +QTt2v81OK/TxTEaRCXNwh6ueTi/6lEr6+JIQWGm/ZzEH6WQTAoIBAQDQ4m+QuvY/ +uMQWlL82Lw+SuHJcjsVQML/PMvDR35+1G5IYliuHEK4URpRZBal9wXVzmqROR2gP +YK1KwUVcLCVjG3jxnCaCHJy634e5/8Hlj+HAPpeHrHldtvkLFjXNT9/zfo0mopjr +P58A5av8XEjjgTWLycD6ba049DntN1xpYCoT/QxdbxKcKDIKOBFfVQ5MnCNEU48P +hFkXxN+/j+im9KHOpz/qnen0WotvI4O6KHBOM48J8OIMpdgwi08hLqKeR+u3Fksm +mbIT77ivWsmmFMF9+Qr8pzW8Qr1ThUDfV3l/yTpXM0jLsp5L47ReueNn9St5muhg +y6STis3Z24HbAoIBABwagZFJdzWaKkvaQC4j4XAT/EEfp1EV/1FeeK6boR/h/PSZ +A18tgnSCd7D1bykkXc5XlI16poHRs+BPIgj21ZfaqwJ5zQPC7tHzRaGReSTIyjg5 +sGPppSB+B69kTrVSOizE1JSCQB6NGVQP6+KdUWodn+6ECn09Bo6dMsN5XnXDrLQa +NoDNfplk01WrRfiqlQztHjnY+NsdrAvrREF7x0fLjl1cZwHkyfIjGfVp9E3y9Xnh +xq/PqGFY+6zz7EGFbKFqeiXVA9CFGSohI0OZvw3BAGcnTWmuL8U+EJ4kQX0IYmOQ +vynkAKcYS+3PMdA4KGsaWeXMY8dgkgrPoeI/8zsCggEBAIXgCmb1HKv8bFiYnvBI +Oy/q3QjrTBS4GcbfPC8WptI5SGpPnLgk/ZzrG1ru4RFDM1aTTykIZO4uJQKbWivX +82JQkjQvwxJy6hRu/Rs9ivTpQWOn+hAvf12LzpPHeVYMElmJy0duwJlNnkfB63eD +YgfFoR7lgLRAJNwy0wlxCN5UjsNe1FTxwjipHmEkTCYQami5TXUWJMNqLHuqZ0js +H80ZTrcWxEVHTlGY0nK5jUx7bHcCXnOdmhNHG4cJUf7PV9Qf/jfKpwGnUdWrN2/T +vRefh9+IBP2m4S62i4cDCW/bLXiz7JtEuCwtX3z+BHUNqGzRWjrHCJY5DlKtjH9L +Sy8CggEAO7GVsRaSB8awfFBK0qI33WQvW84GGeMqhEIUeLsUplEIcw05rsV+TxFD +m8YC21yNEUUHICpEA3w5ZPXb4e7ZX7WRZwWpkF97sklX5tJBqwa9i/x/zhB3Jz5z +dQeAZmLotF1OFczw77lAwxqGTMEsuQ3iZdXMNug1tx/Uv2DehdB2b0kLl9/G2MXf +KhfsH2+08kdg1pkXmQqO27pUzX6CNNHe4g2qM/t9qpPj1cJEyIgL5Wk9GNrWhJLI +gwBOf97eEQ9qxTIJEWxLUVbGMz6ULo23hmZi63+wdlQXXQbrqQiUw2iUuWDvgEX0 +GmrseRP8qnzBQ6OdHEn48aOT5XMXGg== +-----END PRIVATE KEY----- diff --git a/tests/integration/test_ssl_cert_authentication/certs/client5-req.pem b/tests/integration/test_ssl_cert_authentication/certs/client5-req.pem new file mode 100644 index 00000000000..a4a3c9d6531 --- /dev/null +++ b/tests/integration/test_ssl_cert_authentication/certs/client5-req.pem @@ -0,0 +1,27 @@ +-----BEGIN CERTIFICATE REQUEST----- +MIIEnDCCAoQCAQAwVzELMAkGA1UEBhMCUlUxEzARBgNVBAgMClNvbWUtU3RhdGUx +ITAfBgNVBAoMGEludGVybmV0IFdpZGdpdHMgUHR5IEx0ZDEQMA4GA1UEAwwHY2xp +ZW50NTCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBALezBFsjOagklhXy +iMTAC/2ehr0LaAWNhpjQQ8MfRUgRwF5t/I0ZtX8SJt8VaLo7Hs8cWcMmtXpxPGf6 +iilZhJUUMqZ19Cv06leFTNqqJnw7L+v3/rnjFoAJz62ytPhoGztWMNlQem96rBmI +BE2fUyYynR27qsv4S6LHS72bv/qnekZfIgqoCGu45y7tRxYjHGYgjeKgCd/b4q2L +6IN7ee40t2sLBdO9pDvh2oA5jqBcTYS6vkO5tQzbOowYlNzYotG4SgxGRxEdhb8+ +aBPa3voBnXvwfZmZNrJLWWN1IsOHk9CWoK7hoHbiJ76igLU6OKVNtwvsys0lR18r +oczY2iv0bYEgk5jrxYFzeLlrYXI3d7WTkzGO4mqUakXtyTNApIBxAKJke/WtypGE +n7hQXSHsN7EdOPq95Zv5AfvvsUi9cqQxc52yNJdrq9Huvtq/X8IUdHdswOKTSMpe +orVANBbhAlXfB7NHFf+iHamwpkc/qloMg3gYTFt5oqstil0kJtNB09mKn/11quW3 +5kNbKtKGOvg77GE1vYWlYqxW9ZbLxSpXADx5Htt4ymN6TEpKo6M/piXdV6SMas6a +6S43eWsjbvRbU1krdlPGLOvdyFU53vQMYdJJ5hpy3SMIWE3yQvJTN5TQqqmEN3i8 +tdqkz1HJTm4enWbY2okj96IXVy9BAgMBAAGgADANBgkqhkiG9w0BAQsFAAOCAgEA +nMHx8YWaXuz3XKbouq27h4/2PWl07evoXfKMcG2F5hon6zhrk/scEbKIlLOlSZSE +L1zVZ0jQsWRFsezHVGo4e9ZEmNflWO1XSgHOquQW1NTkyHPL3D2duJTlVLus/4Yo +gqLGgyfhU3JWsFGD9hnlku9z95HdnT8BFXLCikb1KsaC8JOFwYAgxMITb9aKzP1h +eX2hVtXz+O5TZNYSTIXHs+9AtRjvX1Mhj0L4gCYQueAjS/f5jU1/RhqOf10ZLe+B +SbCirZgwvlErULM9KI6sT88OZlEgfoLaoxN2X/9YGGQTD8JvByFPryc7tCTpV6t4 +pKpUQ3vTS3LA6KUQYEHFAXg7u6/r11dk1k2Ni4q/NhPpjhSsooE9qTUDh0jUNW62 +BuOhkxyDFIQVgJqgkD2Uq6GswkgL+GX6fUBwYNYZbdC7tv7ssWX7yGcn0jYZEAIU +8SPhkW+IFqQrG4zhJKh9ohasAn7wV3AfSQBE2P+cgOCasQyTYQU2++1FBVe319lu +IoKDLRAU0shc2vuwnTbnWOlshBBD3gKfSsF4YxV+5zOe550HW+19v8Mlj4mdRaQ3 +Uh3SWGPBGVy+bJgBaSOjoOoUSDPPmzIPo9bGCZ8SOut+/jsZ3PJ8Mn1b+geBugt4 +wmJiTGQXbRunGYV0pft038caNzVJFp/uSd2O3PegwLs= +-----END CERTIFICATE REQUEST----- diff --git a/tests/integration/test_ssl_cert_authentication/certs/client6-cert.pem b/tests/integration/test_ssl_cert_authentication/certs/client6-cert.pem new file mode 100644 index 00000000000..22e57c3f2b4 --- /dev/null +++ b/tests/integration/test_ssl_cert_authentication/certs/client6-cert.pem @@ -0,0 +1,33 @@ +-----BEGIN CERTIFICATE----- +MIIFojCCA4qgAwIBAgIUBfEMZ1Z/4weV13ryVA9qyNTPJH4wDQYJKoZIhvcNAQEL +BQAwUjELMAkGA1UEBhMCUlUxEzARBgNVBAgMClNvbWUtU3RhdGUxITAfBgNVBAoM +GEludGVybmV0IFdpZGdpdHMgUHR5IEx0ZDELMAkGA1UEAwwCY2EwHhcNMjQwODEy +MTAzOTQ0WhcNMzQwODEwMTAzOTQ0WjBXMQswCQYDVQQGEwJSVTETMBEGA1UECAwK +U29tZS1TdGF0ZTEhMB8GA1UECgwYSW50ZXJuZXQgV2lkZ2l0cyBQdHkgTHRkMRAw +DgYDVQQDDAdjbGllbnQ2MIICIjANBgkqhkiG9w0BAQEFAAOCAg8AMIICCgKCAgEA +ld8aBatdDBGEVARor//8lYQWW75VzNsTaNXzPFjitA0G4Dx4F1pcV2mBxoR/1aqv +4pdwku7FqWxqT7DfEzVMoeBFzMi6Vlrn8YFM7mT1fJ7r/jI+JzDc5yoOF7S963Sz +yuwWHFKrkyXftaxhorv8upz3qTNFvui+3NkCD2N6FNb++Qz2ircB/KQFq3dIluP8 +wL6zEMgyKcgYhr1MLPfe6JSqU4PtGUnKjEpFN6Fi3Ubvd6NjKqxIwQd46xJyFi9V +XLbMOhCtKXXcaO0Qi3rzzS+7seCINX06bAnUHMTMlWNmYw7b09+w9e+8yjICsKZA +hNyyNxA+CikhlcZpP3oMn3T/slIVekuRYszlxD0KLMbZ+lOC/LmEPIrAcfhCc45M +ys7s0ijaXO58NqsGstGHTSz8WW9YP6VWvfcd6OA2sXDAiLDkkgk4Sh6hGGOkRfc2 +RmMmKPgMdLQOlBC5HZ+m+EOsTTQRKDPs6HsxKksxe+mOkAnPExmpYgquSWEHXE0g +eo/BAfLMbYsob72XAe0fMXjRIGKQlJXv9hFIKhcqwnGaOs/n1d4uBkdvdof9RjUt +p+JQxkKIU72J960hSWj3YmVTzdN8HtHK9f78eenr7+HmB4sUFr1qzwA5h+Y9+6Ou +FUNM7+WfXWfqGHaMNbEVZA7saaEYjyR7FjM7+lQikMMCAwEAAaNrMGkwJwYDVR0R +BCAwHoYcc3BpZmZlOi8vYmFyLmNvbS9mb28vYmF6L2ZhcjAdBgNVHQ4EFgQUmRBH +PAQ50KDlkzSbJg9KYGQsbJUwHwYDVR0jBBgwFoAURoygeHMdCVTX0B/KOH3lsIbu +jUQwDQYJKoZIhvcNAQELBQADggIBABzYUj4S02LGetmZJbMs+g1BmD3mwerE44Ji +X6cqOlnIRtjnSjRZqDufAMh9qQvwn1g9d2GyFkkk3FDM0KRubW4w26CV1g4CtduV +CiIibpoVrUCkoHTbJ3WEygzWby1AQ4crgQPFbJwvyzsnjBnLO9EORyL+T9QAx5/j +vOOB/WSOwpu+LEjw3PHlCR5TuDfN8eYoO7thfqlCQ9A6aTVFizYLxfaQi5zluLyh +1UFgBJ5ZEzlkem4chvhNBDYSDnqmCw3X553MVGTa5pE5Q/vv0FzPyPUG/iml7DiT +xsGFLH3tumWw47HTKQit+9spMg29xiiakRcr+TlXy5DTmnR7ERYoV5jHg8EpJqid +dcacvjEVRYblmfb1lRrZaULqXE9+w7JqP4V0r6vQeiftGQ1Pn2MPqcxMHqehr730 +pp7o4L4c9IpyuGs7SQAf1LCQMhb3H+iQEeVI1kExeWcPZZJs6Bhi3fEb4/m61W6f +6vJd2WJBv0uxgMHKrv3k/5biPbVwz6yA1oCdwEGiCHyCFmyYNC76qrbU+A6F3zqs +fmTRlS8PLZ7iJJw6g6URh+Dvez/+PMq0KlyHKP8CFjyWr0i5rpeNBTTgnfCrKiJQ +TQdEC2+KATwYlzB8DXeC8jcj+L4K8Cp6XRBIQnALnOmwJcRsTSVggVHxF3zuxVf6 +SXfWywZU +-----END CERTIFICATE----- diff --git a/tests/integration/test_ssl_cert_authentication/certs/client6-ext.cnf b/tests/integration/test_ssl_cert_authentication/certs/client6-ext.cnf new file mode 100644 index 00000000000..d421f4198b4 --- /dev/null +++ b/tests/integration/test_ssl_cert_authentication/certs/client6-ext.cnf @@ -0,0 +1 @@ +subjectAltName=URI:spiffe://bar.com/foo/baz/far diff --git a/tests/integration/test_ssl_cert_authentication/certs/client6-key.pem b/tests/integration/test_ssl_cert_authentication/certs/client6-key.pem new file mode 100644 index 00000000000..9bf833a3a79 --- /dev/null +++ b/tests/integration/test_ssl_cert_authentication/certs/client6-key.pem @@ -0,0 +1,52 @@ +-----BEGIN PRIVATE KEY----- +MIIJQgIBADANBgkqhkiG9w0BAQEFAASCCSwwggkoAgEAAoICAQCV3xoFq10MEYRU +BGiv//yVhBZbvlXM2xNo1fM8WOK0DQbgPHgXWlxXaYHGhH/Vqq/il3CS7sWpbGpP +sN8TNUyh4EXMyLpWWufxgUzuZPV8nuv+Mj4nMNznKg4XtL3rdLPK7BYcUquTJd+1 +rGGiu/y6nPepM0W+6L7c2QIPY3oU1v75DPaKtwH8pAWrd0iW4/zAvrMQyDIpyBiG +vUws997olKpTg+0ZScqMSkU3oWLdRu93o2MqrEjBB3jrEnIWL1Vctsw6EK0pddxo +7RCLevPNL7ux4Ig1fTpsCdQcxMyVY2ZjDtvT37D177zKMgKwpkCE3LI3ED4KKSGV +xmk/egyfdP+yUhV6S5FizOXEPQosxtn6U4L8uYQ8isBx+EJzjkzKzuzSKNpc7nw2 +qway0YdNLPxZb1g/pVa99x3o4DaxcMCIsOSSCThKHqEYY6RF9zZGYyYo+Ax0tA6U +ELkdn6b4Q6xNNBEoM+zoezEqSzF76Y6QCc8TGaliCq5JYQdcTSB6j8EB8sxtiyhv +vZcB7R8xeNEgYpCUle/2EUgqFyrCcZo6z+fV3i4GR292h/1GNS2n4lDGQohTvYn3 +rSFJaPdiZVPN03we0cr1/vx56evv4eYHixQWvWrPADmH5j37o64VQ0zv5Z9dZ+oY +dow1sRVkDuxpoRiPJHsWMzv6VCKQwwIDAQABAoICABApNaok12zEustAinVoGjmZ +O7v640+q1AX7BDaesxuNSA+IFb8W7Fw0sVzRzuMge8ToZahCfSgO28vqG7P7jF7s +SzqQ7p7/QIGnWr9ePaaJkJMhodgiCcAXpdtijxRDylt4Z834EC5W6C+z/fdJZDBP +WtlxAPVcLbCs6e704CY9JNOAVVmR8HHo0f+yZi53OTsjlTWbEOW2gQaOhRdYRz43 +QcKtuDSA44mLPLmJ+po32vzArqHcYCAIiT54B7tisUbFI3Kjd7i/z9u0rJMN2Bpp +Kkk9GBMEhPc/fBL6GcGGLXVDFetb9L4gcvAuwmGAG/b3wXP0mWwJEWkQMys4IwIo +oJ+FRCGtkUq7cdwtwX+mJyNKA1g1xO9YfLMhmKpUaxcLvsMBMb2ZO33/sIH0GNSL +csuIVAiVzwODNo9TbbgKylkKUjbVC7XnQwdGOUrl7wGQdI/jTjbfwHR67S7S6Zkw +Ctobr9+lxz9pUcR7U9QMAKwCheNcx+vVbmeU0QesL2pVWKjCgWJLI4hmK9epGrTQ +ssKmuIV4k8fDQ+4yOL6uk8SbGRLKE/tjG5IArloh+0TWlF537xhFu1U48CT9i60I +GN4FYes+P4LkJToJP4w1Dh83VuHLJBR6xoK+tb352SaQHONv0nKTnxm9PeRsADwV +Fu1YNitG7P86cpkvVgMxAoIBAQDHPgalQK/ld/aNbLj9Nx+M4Ok/eukNnQYjGstW +ScW4OPHhNDDS90X9vAA3sKK6qwpt2B0BTBoSb9zo34G2Zx4IJGOgzVlnRKb202aM +I1tSdt8ULWwjCXyN4jH078FAn9RKtA0T4V/Ps1R177oB3P5ohOCAWAFatK2ASd9y +bt7n5THi8xcjNikJiMqyHXzVMIB6owe9AXpoY89KRJm8c2J/tgypYSJ73JbMTkQ/ +sOQz7G6BpaZMc0IcjqsyhY7o4kcM+G2VYvoLvvstAJkLgWShZkHEFARmOfgVhNw6 +Fps+0VpmCCqoEIsMXA1D+Cr/XVDJ/O8xQLOQP6jhVeV2ySGpAoIBAQDAkKc5Nuzl +SQwnQNY68h4n8NPjpgtxwdxVwBvYWiBNo9wol+RjltgXb37ks3/SE9eoBJOLuzAM +agA8pR3hk8B0Jshpe+ucBwT2k5to2lCSWVcr+VdMjKcm4s1ts+zCacbaEZDsbBW1 +J5qXxy/78V2rLWcd4aWEu4U35mM48V4W6RuC7cqHWYargXM12U3aTQX/ZA71EONw +VjNYgOJ4YvBAoKaaPVBEjLftWQPCwuFua+dC/i+1QYJUhoxVUd5ySvz/P3FCVrP2 +vd/PHi02mim2lGkrl71bSeu3DLbJFGf60/LbsdK5/nOp66vqCJa+KBQ8UEqK9Pb2 +IzktLFWjBTqLAoIBAFY3+Rg5zeQ27XJ1wfeRiw2jpkvaHE/Py4AVcDh+5Bi8S69w +rlAcwSTz3gQ7Y6/zpCe0hZiyHfYsgiQj1DXimZRcauCC/FU72CSnhpDOHA6rcg2B +OnJPAJ9Faujbd31HrM5G7AHWXWe76qi+fHeh8lW4ao4fhzaBSTNixFb4s20WOWhE +WZbbMRb3Iv/A0uxOeMqZEhgwM5BI4ML6vKxYIh+x3/jMF+gRpQ/0LoSBP79YaNmh +nT0oA0voUR0jvpv4j1aFiqZCloHOu1LWF2RrMejGH1+CagTywPBO9h4M+lFMtxnA +HvVrc0B99R+cnL602ukNk1R08z9QzPv1975Xe0ECggEBAJ+A8lLtEQrXF+8HwKvw +01PIIuslJcOjjaulxirOdIV7HiitsCthzjqTSyuLF2xerTpqGAEdGy6dOvcA2iB4 +r2hGm6jsUXvbQJwyJf+THjwrLo6pKUuqEeae5QRijdF3ppQJCt8apFx6oo3oGvH4 +utrIb+qLdvvcC/wCpNuM6p/VlMk9yI2WdXtobZMEHX2eYUJrkgwiYhIyBLPhhjWr +1k/iAj4uXWd6m/tIyVmw7OP3Ewcl30SnL5puHJ2rg7NuM+QFm/4ULVtLabB40YCx +761hfz/xn5KXR11HdbxXX82fdEHQKLmRcCMmqC7h0GNQdXqEE0rIoCu/f2PQnlq7 +QQMCggEAZBCnrS1NtlP4KN+tbSfUssxvnU/Lriw+VzkGclsw5QMRjeTTvcEV0Bh+ +ylvf7xwX0nansg+hRbB0k9wYmkasqFPSOhQYTQ1H+N8oHgoOEDjZz7xkP5j7h9qb +K51uS6qoRE0qroI+BDWSom+9MPcnk1eyIzfNm2hk5ymbOz6lxSoSI3HdoxtAFoz8 +D3h/7o9dBMZIjBSawh1M/YLKYemZW/FyV3EDQqi5wbWV+C8vZkAiIohr4N55MM1A +BWZG9+PdP5kCiacwZqUzX5vKyLTG9d8Fv2GQ3eCzMb2aRMKX61Z224+XuLy5/rsF +jbMZsV6A1MRCzhDoVIaUcb/2RUtChA== +-----END PRIVATE KEY----- diff --git a/tests/integration/test_ssl_cert_authentication/certs/client6-req.pem b/tests/integration/test_ssl_cert_authentication/certs/client6-req.pem new file mode 100644 index 00000000000..a83e6508dbf --- /dev/null +++ b/tests/integration/test_ssl_cert_authentication/certs/client6-req.pem @@ -0,0 +1,27 @@ +-----BEGIN CERTIFICATE REQUEST----- +MIIEnDCCAoQCAQAwVzELMAkGA1UEBhMCUlUxEzARBgNVBAgMClNvbWUtU3RhdGUx +ITAfBgNVBAoMGEludGVybmV0IFdpZGdpdHMgUHR5IEx0ZDEQMA4GA1UEAwwHY2xp +ZW50NjCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBAJXfGgWrXQwRhFQE +aK///JWEFlu+VczbE2jV8zxY4rQNBuA8eBdaXFdpgcaEf9Wqr+KXcJLuxalsak+w +3xM1TKHgRczIulZa5/GBTO5k9Xye6/4yPicw3OcqDhe0vet0s8rsFhxSq5Ml37Ws +YaK7/Lqc96kzRb7ovtzZAg9jehTW/vkM9oq3AfykBat3SJbj/MC+sxDIMinIGIa9 +TCz33uiUqlOD7RlJyoxKRTehYt1G73ejYyqsSMEHeOsSchYvVVy2zDoQrSl13Gjt +EIt6880vu7HgiDV9OmwJ1BzEzJVjZmMO29PfsPXvvMoyArCmQITcsjcQPgopIZXG +aT96DJ90/7JSFXpLkWLM5cQ9CizG2fpTgvy5hDyKwHH4QnOOTMrO7NIo2lzufDar +BrLRh00s/FlvWD+lVr33HejgNrFwwIiw5JIJOEoeoRhjpEX3NkZjJij4DHS0DpQQ +uR2fpvhDrE00ESgz7Oh7MSpLMXvpjpAJzxMZqWIKrklhB1xNIHqPwQHyzG2LKG+9 +lwHtHzF40SBikJSV7/YRSCoXKsJxmjrP59XeLgZHb3aH/UY1LafiUMZCiFO9ifet +IUlo92JlU83TfB7RyvX+/Hnp6+/h5geLFBa9as8AOYfmPfujrhVDTO/ln11n6hh2 +jDWxFWQO7GmhGI8kexYzO/pUIpDDAgMBAAGgADANBgkqhkiG9w0BAQsFAAOCAgEA +aQr207af+Rbh3xeTB6o7xA8okzQnya3E0sBHqXGTWGvDOYg+yMlkR7/06YWcM1uu +O//iyRcu2lfd/+NmduxG2RtBT+Nn91WSF9UZH37uIKZIBsj5zaYhNFMnCLjtdpgO +lL0HFPU0IVR41Fq+vSGuSiOLLjnNG42/irGJYcvfEQ2yNRYQ2/y6GV/puGjvH/oN +np+ucn7ae/sLlpQYMT5OZ8Nwy/2b5hcEC4ekuubGeYtnWx4is9hxz8IeJZAxRHJI +xxIDkhP82elJpTMv4dsZYTHYHv++M/WbWb73HiAlD9g0BKYuzolbCu1TYZzbEz+a +4/lnbkR6FkKxFlP0JrTJSceQW3d3iEuER+n71Si7thwk8Bh/np7GjbHSs0ZnT+NB +ua59bE2YBWpZEoA5IgfRK+4i8AtzyJqDPnQNM8DEBN/BLqppGLitw/nMcEC2nWT+ +b+PIB907kkikBUbyZmjF6f9Fhpm35WQuCubVz86f+BbGZLwfGyVnl7TLxXiVANI6 +DXqptFt3ppv/pPRzg473pUZutjzFsUbcJNal2FWtCH4tV5lTWaBeypC3PIzahdkA +PwF1IxmpBqmuWIHXIqId60g0LLah32sT9w4BaC0rpUNeuG2yTeXz3RRjG7a/x8xl +6m751kWb28j9L3ujZI1fr6E1RKVIhe0OC9wU3D/5J/Q= +-----END CERTIFICATE REQUEST----- diff --git a/tests/integration/test_ssl_cert_authentication/certs/generate_certs.sh b/tests/integration/test_ssl_cert_authentication/certs/generate_certs.sh index a09b7b2874e..e8d649fcff1 100755 --- a/tests/integration/test_ssl_cert_authentication/certs/generate_certs.sh +++ b/tests/integration/test_ssl_cert_authentication/certs/generate_certs.sh @@ -14,12 +14,16 @@ openssl req -newkey rsa:4096 -nodes -batch -keyout client1-key.pem -out client1- openssl req -newkey rsa:4096 -nodes -batch -keyout client2-key.pem -out client2-req.pem -subj "/C=RU/ST=Some-State/O=Internet Widgits Pty Ltd/CN=client2" openssl req -newkey rsa:4096 -nodes -batch -keyout client3-key.pem -out client3-req.pem -subj "/C=RU/ST=Some-State/O=Internet Widgits Pty Ltd/CN=client3" openssl req -newkey rsa:4096 -nodes -batch -keyout client4-key.pem -out client4-req.pem -subj "/C=RU/ST=Some-State/O=Internet Widgits Pty Ltd/CN=client4" +openssl req -newkey rsa:4096 -nodes -batch -keyout client5-key.pem -out client5-req.pem -subj "/C=RU/ST=Some-State/O=Internet Widgits Pty Ltd/CN=client5" +openssl req -newkey rsa:4096 -nodes -batch -keyout client6-key.pem -out client6-req.pem -subj "/C=RU/ST=Some-State/O=Internet Widgits Pty Ltd/CN=client6" # 5. Use CA's private key to sign client's CSR and get back the signed certificate openssl x509 -req -days 3650 -in client1-req.pem -CA ca-cert.pem -CAkey ca-key.pem -CAcreateserial -out client1-cert.pem openssl x509 -req -days 3650 -in client2-req.pem -CA ca-cert.pem -CAkey ca-key.pem -CAcreateserial -out client2-cert.pem openssl x509 -req -days 3650 -in client3-req.pem -CA ca-cert.pem -CAkey ca-key.pem -CAcreateserial -out client3-cert.pem openssl x509 -req -days 3650 -in client4-req.pem -CA ca-cert.pem -CAkey ca-key.pem -CAcreateserial -extfile client4-ext.cnf -out client4-cert.pem +openssl x509 -req -days 3650 -in client5-req.pem -CA ca-cert.pem -CAkey ca-key.pem -CAcreateserial -extfile client5-ext.cnf -out client5-cert.pem +openssl x509 -req -days 3650 -in client6-req.pem -CA ca-cert.pem -CAkey ca-key.pem -CAcreateserial -extfile client6-ext.cnf -out client6-cert.pem # 6. Generate one more self-signed certificate and private key for using as wrong certificate (because it's not signed by CA) openssl req -newkey rsa:4096 -x509 -days 3650 -nodes -batch -keyout wrong-key.pem -out wrong-cert.pem -subj "/C=RU/ST=Some-State/O=Internet Widgits Pty Ltd/CN=client" diff --git a/tests/integration/test_ssl_cert_authentication/certs/server-cert.pem b/tests/integration/test_ssl_cert_authentication/certs/server-cert.pem index 073c6485bd2..1ad3712d547 100644 --- a/tests/integration/test_ssl_cert_authentication/certs/server-cert.pem +++ b/tests/integration/test_ssl_cert_authentication/certs/server-cert.pem @@ -1,31 +1,33 @@ -----BEGIN CERTIFICATE----- -MIIFZTCCA02gAwIBAgIUBfEMZ1Z/4weV13ryVA9qyNTPJGwwDQYJKoZIhvcNAQEL +MIIFpTCCA42gAwIBAgIUBfEMZ1Z/4weV13ryVA9qyNTPJHgwDQYJKoZIhvcNAQEL BQAwUjELMAkGA1UEBhMCUlUxEzARBgNVBAgMClNvbWUtU3RhdGUxITAfBgNVBAoM -GEludGVybmV0IFdpZGdpdHMgUHR5IEx0ZDELMAkGA1UEAwwCY2EwHhcNMjQwNjI2 -MTAyNTAxWhcNMzQwNjI0MTAyNTAxWjBWMQswCQYDVQQGEwJSVTETMBEGA1UECAwK +GEludGVybmV0IFdpZGdpdHMgUHR5IEx0ZDELMAkGA1UEAwwCY2EwHhcNMjQwODEy +MTAzOTMyWhcNMzQwODEwMTAzOTMyWjBWMQswCQYDVQQGEwJSVTETMBEGA1UECAwK U29tZS1TdGF0ZTEhMB8GA1UECgwYSW50ZXJuZXQgV2lkZ2l0cyBQdHkgTHRkMQ8w -DQYDVQQDDAZzZXJ2ZXIwggIiMA0GCSqGSIb3DQEBAQUAA4ICDwAwggIKAoICAQDd -l4bW8vca9/iFDASgmGxyCQvEK5roWwPzE/DXdS+mMJM/fffyqB7Cldm1KW2ILDYU -6JGnlYdxvWLOEa7n4cIBrQ2za7vfDU4LzGJTBN/9C2TYGFP13FTfZi2gZOuipT+M -pkFCfJG1zMWR13yKNxiS6Ixf76c6ADUyt7GmRcuKSctKzM7hUARfeko5iBN8aHpD -nxYa6xSGlHXHDzHHEqEpsk+iQ/eK6o3EeElg7v74sZSzqleIkEfFLiRIPNvuAwEE -IRIQg+k/2S673kdANlyfszXXbUGCYU9lOOPYgAEFgb5ljSXZJBTW3uFlyIONK5I4 -EHgELy9Hs0qnqttXCx8d5I/eI/+Zx9GPF3x8gkMi5wQePiBpqde20kRYDvP2eGeZ -eFTnKYpqCshVAJ4+1Kg96f4S9+GZ1wfzsgS1lA7+9qr3qTvI+XnKhC2h3bqyWS+C -BQikgADbURgZT4EzhXvq7fSCFHFTA6xZzXNEVO7DkRrYbvkq06bIl1Ov9vcbCN4I -zOuJJMxlk2Dv3C3mUox3HmcOdO+vvtWnZt3HbFY7ZPCV68ObSf77YD3O5on5RFQu -hk+AinrsDIL6NiiVzALXBL+e8flkqZDzRk1mGphVXGcRP6nn4VtrfN+Bmbm8pu3m -6aYuqSX6vQXb7EHW1tAbvlfbxIlP/Hp5GoV8zqI/tQIDAQABoy8wLTArBgNVHREE -JDAigiBpbnRlZ3JhdGlvbi10ZXN0cy5jbGlja2hvdXNlLmNvbTANBgkqhkiG9w0B -AQsFAAOCAgEANvZ7QDkHIKRq/g4GPkuiU7DN44/TW4bOFe7rDC5S4z5sh/i/Tvur -JYW7m97vLui5PJf6Vbd7xyl5MFIiz2KzoLi26rlvYcI/BT8mIG8jMg7pJjp/wJGa -QdCxdO99a4SIwg7x8pvWChFAOij5e6RhrIvsEB0LN5kKRSnQ0sW5khfsdFbn+3Iy -VwyvvE+nsCqE+KK358EMHicn8FVD3Ze+YzckX0am9DbshL5+eVQ9nOhUV2B8PcbG -SGzqJF07wOBwCdcn3eY+V98SQqrpGC9tCXmv8qErfkq7pkUGWq15d+miF/gaUz+Y -yDPwgi1pephBJ34IhLUUk0IPZJ23uVv/zfB+SpZ9/5pjsmnapR3Zf725jWrhjeT8 -44i5kNeVCvZPzQO9cTOsLXJbWb0vqRzKsvuSvffDQZql4bMMvhPjMibqCiRuSHO/ -yPlWiJjhkZz52DPJX6+LOeP2pFfUe2TR6IqcFPfUs/bV6aD2L/s5UZfZXWS5i5FR -I8uvcKOWL7NBbdY+NVE5aT7DqfhaRurjp61Aym18FgXLHpDHYV9IpkU34+A1MBUi -bzHZRWhxZMRxYezC7jE4zsZ5CQtSq1miDPcDaeK5vMd/Vdys5MIekqCfKUh+Cd5Q -gfC2QgNgodcWRF2plNgA+3E0dUULR3+1s83gGWGC8/UFW+9dtYV4nv8= +DQYDVQQDDAZzZXJ2ZXIwggIiMA0GCSqGSIb3DQEBAQUAA4ICDwAwggIKAoICAQC1 +l+TO19J5ZBVs+ZVzT62wM3JRh+CzO9/ogc5A0ERwVsfBFQYz1r6pGHHTxM2w3auu +ehyALbiW95eVvkpu0QyOd+CaH4K0DI/reotYyvAX5+Qv5tnHGAwFKNeDnNG4hpCR +MO/BlohZEh3Ppcm7jvMVpPhSM8ojZwSvHsEMBVb4QinvpvROLIDrA/zPQSlW0S0A +ZfvV+cjxIoDjTsg/M0Jn+6VNNRrAemx8aKv4TyOh2b+zYNa8wnlhdspTgjCseMeC +ORzDOKMP784p0xWFwQzaoS0HYrxLoijiwIw6/I72ADQxkupW4OY22W6qjheDe67M +33/KZVZ9lssc7QKGBuvkxgsMq63M+VI8umY3gsIRLJKEwXkqviIXNCqw+5zMjsRB +L3UUBjHibD1cHpc170xf4IOMa0Nk5svNy0tQ8OdTC90t2ni99O5E+MOHNTn3mr4G +Chv/+PVlWjf+2PWksmhNdTBkE64JXSrf3fb4OScbkhwD06YWVPtak5xv61QNgL9q +srARcm8f5jrAlzqU6rYHnCkjB2A011DLh3idwxZfOtxT1jfu+NMP2N8xFgIjtp8y +iVRQopXMA/HT/3J/INO6SLo3t5nBRjphDsaOcp3tOlAUYa++2QXM87+QKBlRBgC2 +Mr2iUonqPJFmAa62TudYGoBKqeVfz5yFmclUGZlb/QIDAQABo28wbTArBgNVHREE +JDAigiBpbnRlZ3JhdGlvbi10ZXN0cy5jbGlja2hvdXNlLmNvbTAdBgNVHQ4EFgQU +q1dxD9JZn+JeqZOIKHi85hq3J20wHwYDVR0jBBgwFoAURoygeHMdCVTX0B/KOH3l +sIbujUQwDQYJKoZIhvcNAQELBQADggIBALywV91PI6aClAL7LmZjVq+IF0AeSH64 +gqx4WJsOr8sRSu7SXea85JW5qHrZByjdH4oqxVrugji0hET1M5tqExsiQSBBH94W +kpPtr1UbJHWdKPKRFe8iqnFfSaTUZwA3gNJkmdv01gPGrySb5O8AhvGqCzhEQ5Iu +PH9Fu+xclKwF7UoZgfTWsQgw1GaNpSg7+86J3F9vfOzphsth2vKxGYVxnsTxbKtl +pwiy6jOwhUeix6z9+OgVOfmIxj6OvNRsGHRvlOumo3NyVMGrnABnIjBLj+k8rsn8 +wzKdck8K6ttj2fXQOdTywwipN6BrkMK9AJS5a+0ylKsyWG/scjbt0cMY1uvxLXJs +/KOw7FUy/FfdnD0BnYhmiIy65guWSjtyw+n5QK6z9OXTfcgeYZyKhgyK/HLxfMtX +PNs/DghZrI8vqX+oGW+HTBpitRThxCZ0gBra+OZRidF0Rqd4nBKwFJGaYk6B7uF8 +9oek3IHaPEBV5bqknHH8/DjzXnDpFZ/hphkBgmzU8wquGiX2sfYiQUQlcS5b1t05 +VYtcnOtIhbBe/kixcDtyqRCSCPRdrgg4vOXduOpUbeejmGpjSPe/KeVhMfP6JHWt +rxfwkTdN5xtlM6JMrbN/NBWtIrMsHyM5uwXqY9LnO19mHVYYzEZqSCaot5yOMvzO +BMSyrrc+OTy6 -----END CERTIFICATE----- diff --git a/tests/integration/test_ssl_cert_authentication/certs/server-key.pem b/tests/integration/test_ssl_cert_authentication/certs/server-key.pem index 067adf4e1fc..8097d371e0a 100644 --- a/tests/integration/test_ssl_cert_authentication/certs/server-key.pem +++ b/tests/integration/test_ssl_cert_authentication/certs/server-key.pem @@ -1,52 +1,52 @@ -----BEGIN PRIVATE KEY----- -MIIJQwIBADANBgkqhkiG9w0BAQEFAASCCS0wggkpAgEAAoICAQDdl4bW8vca9/iF -DASgmGxyCQvEK5roWwPzE/DXdS+mMJM/fffyqB7Cldm1KW2ILDYU6JGnlYdxvWLO -Ea7n4cIBrQ2za7vfDU4LzGJTBN/9C2TYGFP13FTfZi2gZOuipT+MpkFCfJG1zMWR -13yKNxiS6Ixf76c6ADUyt7GmRcuKSctKzM7hUARfeko5iBN8aHpDnxYa6xSGlHXH -DzHHEqEpsk+iQ/eK6o3EeElg7v74sZSzqleIkEfFLiRIPNvuAwEEIRIQg+k/2S67 -3kdANlyfszXXbUGCYU9lOOPYgAEFgb5ljSXZJBTW3uFlyIONK5I4EHgELy9Hs0qn -qttXCx8d5I/eI/+Zx9GPF3x8gkMi5wQePiBpqde20kRYDvP2eGeZeFTnKYpqCshV -AJ4+1Kg96f4S9+GZ1wfzsgS1lA7+9qr3qTvI+XnKhC2h3bqyWS+CBQikgADbURgZ -T4EzhXvq7fSCFHFTA6xZzXNEVO7DkRrYbvkq06bIl1Ov9vcbCN4IzOuJJMxlk2Dv -3C3mUox3HmcOdO+vvtWnZt3HbFY7ZPCV68ObSf77YD3O5on5RFQuhk+AinrsDIL6 -NiiVzALXBL+e8flkqZDzRk1mGphVXGcRP6nn4VtrfN+Bmbm8pu3m6aYuqSX6vQXb -7EHW1tAbvlfbxIlP/Hp5GoV8zqI/tQIDAQABAoICAQDaRKlTDRwN+ndXRlFAhyM6 -6GIopvL9MLmhM+EluY5n2q0P+1rCMIusC8LYSahUW4gh7DucoRM7G9s5M/3e9mcN -E5LNSq9RtF9OC9JGCCVBsXlxyfTZ1l/bdWA3/3CDUtZYCmN5xA4az0tErsdDtaWE -/39V+E/2N8Iu5PYd293zp2CRm0+kbBcCnQiDxt+6yYa1GPzDIw+iyJWCsBrOBjGt -SrBaGyy4LvXZsspEquWHvhPFLWLvZ37qYNroNNpFhbv4f0K19dlJRPpdn0L7oxB1 -VicQvdOrQ4LbJ8B2vw9Ch1wt12ySiJHmXMAUa//4jBSJGN++72NY8uf0Y72N7ayF -Bck5QE0we4i7hhuN0WL+IftYD/O5NgOnprjMWifLOhQ8OECZfhOKgbRU+i3aJl8D -+raQBW7/GM0uL7xIoMcEZSwMs/sQR4loNCJ0UsIeWTdWNrhXrEuzDQGXoWcB8K/r -VVayDO5Qqx8R77HB82/pRdqEWNaNQd7DhPgnWiixISpIGm6zMvT3S0hzEkxu7FNb -uciq9i82BrBkKhg1kiF70FqG13VeMFqTJUuqtoRs1QEQgumvWB47n6FiVwHdDbbH -sUeKZYwbrY22Cn4rrfXH+0KKM9TDR0CiCv+CkSGmG57l5tW7aSUWun46qP8vh7sc -ztzb4LzyUt6XEBIWIqBIQQKCAQEA9+f4TyGo88qzTKaQoko5OAYhvAGr1UGAS6qh -sJpd7rHd9g6DnXyOInpQNglToiJ94mNdIK8f/oOjUXZh2E4CWuxeK291BNiaqCxe -s3LS3XjkdHQIHvqJUw/r4YJ+zfoGznthNbDwDkBob9x3h9rknTbGdLcgaTGi/0PZ -cFnyWDPNPskbcZ3Dxr41oDHiVsOx8n4d4HtspXzh+EPiQiJz5HVfwGNEyzhmFWIa -EzQaxnHL+WF1Pqe1wrzOwZia0Jss8lxbHcTnJupaV5WDvbxY0E4ynofShicv1U76 -B41xDKP/8hFWck9LiMDXk9vrbQIHvGAcsYr5N/jzIrDiEXxvGQKCAQEA5NOegb6m -Ak0mXg+XKhZnSdR1KaWT4/hbVWqYjwxAeAJfMaxjD4MXA8qeEAJo3l3ETkdFCHp/ -lr/BZzNYXnmHU6uvDn2Xq8gPO04ruSV0WWthikXb5gszXdkUH+6fryHn6L0kYJQH -NARQzOgdEcmTP7sy/5GDKzydWbT5qRNOnESUWgnJi9ePjGB9zWxn4jx9AfOYtozh -UmEgofSDGbFlamQic8HGnSJFgOxIZ0AfurPIRSR51gvXx2D5TcsPjLlDrY07IcF3 -DjqfJl0gC1XN5BXdpPvjvNrns+ZK/SRoGlgb8Q4tZLQevox9W110amvMeZj4yMTK -9mgGOSYCzZ6U/QKCAQEA1mBZ4Qwpj1DNRk6PqlfnLSRYTb1gO9UdvdE7a33CFuTn -HZ2lgS2xt+zvqhrcoMuU8o2cfeQTFcP+Gjb2G9gxvzDBqmwC1IL/Esjzx9hWssCV -RoMEds2OrS6Ke4OeZj59XldhU83DeX+HEJylHO1UXwN8EHg/5dfPrVCeGsMdh9qb -9VxxiAm2wAnCU9pvcTpfimQ3L+VrqZvZyRfi8+/ZKkm52KO/XMFTvdAM3mhjcxH7 -Ipd9jQX4bwNZBB8UWaqm7pqhDJg2j/d+0lhwCUZzwwasTV0E14/RlHNsUdWlWhoD -/e+yQr2BgyvIAIvgBW8JA4RVq86S/y0gC/LMO/TQGQKCAQBB2rlaY7DJJsTs+xWp -EiuFrvRNGQ734+j9KyFewcrn/t7An/keZL7B45UbzGW74UZ2tMIkT4TasLMLbVZ4 -UgdlSBqoU/LLiFcB3Vxt+16BwYqfzb0cdorA7pGBIx6nu11PuOd4OAHesYNDhWWg -Ud/jzo89x/X1AovSXmgfhaPxCzeatghgC5iPcNGjxhgbnwbnAeEoYGEUYUmP8pus -UEZ8mPblU5ZCcLOKB/ZKaMT46Xawl2/M7zmZcsos3kzKViMpFmU3MMN/v9U/qDtp -p7cKdlSEf82p82INfzCDq++d7U+VT1w3CDN06V/GZJ31ZrLBKAopVaGHyqZH2i2i -WYpNAoIBACmfr9BoJh1/mbLdd/WpOyORKTbnwstlMgoUcARRJn86iPZuyI4QoSSb -TePZqrWRVmO/K5M65hFjhUpqTWKJGJy5LYIZ4yuIbonJAPNUhjA0bkar9cULBFzy -rb0xmW6sRlBnqhv4aDlOkhHkkR9lB9rTIUW+ankuvVBiGWo4eE8DvZYo30frltku -2K/kqd3NppTl7dN4EnGTo8ROZvr3EMwSu6nE+wUr4G7YuCLdPxwb8gAB8dbmaUsn -AXocUh96kYqTwRxo8FO9SqgQYMf81/ovPUfv+7mwO40oygzy/YkGmB1shFIbQuzU -lJvRfdXyyC9DbllQkxWfdvaanLS3r1w= +MIIJQwIBADANBgkqhkiG9w0BAQEFAASCCS0wggkpAgEAAoICAQC1l+TO19J5ZBVs ++ZVzT62wM3JRh+CzO9/ogc5A0ERwVsfBFQYz1r6pGHHTxM2w3auuehyALbiW95eV +vkpu0QyOd+CaH4K0DI/reotYyvAX5+Qv5tnHGAwFKNeDnNG4hpCRMO/BlohZEh3P +pcm7jvMVpPhSM8ojZwSvHsEMBVb4QinvpvROLIDrA/zPQSlW0S0AZfvV+cjxIoDj +Tsg/M0Jn+6VNNRrAemx8aKv4TyOh2b+zYNa8wnlhdspTgjCseMeCORzDOKMP784p +0xWFwQzaoS0HYrxLoijiwIw6/I72ADQxkupW4OY22W6qjheDe67M33/KZVZ9lssc +7QKGBuvkxgsMq63M+VI8umY3gsIRLJKEwXkqviIXNCqw+5zMjsRBL3UUBjHibD1c +Hpc170xf4IOMa0Nk5svNy0tQ8OdTC90t2ni99O5E+MOHNTn3mr4GChv/+PVlWjf+ +2PWksmhNdTBkE64JXSrf3fb4OScbkhwD06YWVPtak5xv61QNgL9qsrARcm8f5jrA +lzqU6rYHnCkjB2A011DLh3idwxZfOtxT1jfu+NMP2N8xFgIjtp8yiVRQopXMA/HT +/3J/INO6SLo3t5nBRjphDsaOcp3tOlAUYa++2QXM87+QKBlRBgC2Mr2iUonqPJFm +Aa62TudYGoBKqeVfz5yFmclUGZlb/QIDAQABAoICAAE49rAsDa+JOAFZN/pg4lqw +4nzhxUWeZjazwj4/kBOETfFZG65gYPvBRBeg20WkmaCyLbTWsO8bqWa+CK1gT3mc +uslCh94kIanBH4XA0NJ2U8hNcN3WpeMySWxstnzVqaVj3eYPkARMkd9fW7cQH6NI +qle3OhsFhj54sj/17duGDoHBx6X87VPSYOwfc5cnV8Mc5hZv8mPHP0BFON3K02Wz +CzTYtBZMt8T4l7vkjLSUNinX1WNIZJpkKrkzTXV00IxCyJSBafO4NOHm8alarbFl +4AXbwd9GUzNe4QqY0xZHztcRkp73xvG0j4r2c+0zN64RqpwJapA8f9RGdYjWvMhD +8gB39KWCKjuVsFMnDrepmGjb2D5LgOlY8UTaG/9bA7U8QwJeCv7c8Z1GO5LxUIT4 +OcMoIkC+ooEOIrnty2AdWGVa8yyDXNaqejE1v76ZLKhM7lZxcmAJ4YU04gGhBp6k +wGxQHFPdfe4A4d+pfNsz8WnBKGihZJaC5EEef8loKuu84UezPVbup/YZqgfYWmQd +FoIxiY3aTre8jqEzSDiff8oKmjkSB+ab+ypCXxzzTDO4QV7BItdaV+T1ok5fdGqm ++k+M8pe9Fj35HTuBItwsvjPlvow5pt+qIe3YNjwgBOdrX+AeO5PDscZbOscAenr1 +ulCE/49ZIvT5kfNX9h0dAoIBAQDqI9hDGigVYChG4js7auJFMMouo6k19BbcUCBH +2jasHUI9MZrUiMZRY574YydPMA9jIzoNE8KuJUT5uvjIROYb3HbT2TxgOZhVCYr2 +Zr59AUtg4uIiXVqCTioLkoxc1PxnC+UR0kL162yS67w0xy4cEL/1RHSN3Q4NG1rZ +IC1Fzb5miSVReRbWLhdfxakcVdgYH3hwyRSgQo4H1Hjmi6oLAYGYDDgGwVPKmolC +hr0NBLkoKzXi015BTM5rcbBbm6vbrrcchWg8Q79fpc5tfASNdtoe1UHUFoypX3km +7dpyOfK225p/Ko4NqcpqsyKA1WcEswjwd4LL4muXLCmEu/6LAoIBAQDGjCLsErpG +XN5Ehl5iIuMVlGgMYoKG3qQcq3pJAsupQxic7p49JNPVqQHHhUbWow9p+4bfOGwp +WIg9NiX0917Jh8yiyNIR9w6LdD3j2EX826McRhTBCZb58Q4/wxYZS63XDnguX202 +WE5i9s3RGmPt7YlX6V1dyqrp6zpsU56tC0mnsuWjWwtXa3vdFpV4uMtCitWphvr7 +b+bEFtAf9fxzFfVwd7D3XhZXqB9ADS3UXNc68nufEd/arWaWHESTmuam/DL5FStq +nrMT4RYvlxGQ8TIjj3K99JKAOvBQtE5OBycP7FKgl+HKa6W4Caa2/2rqqTajM0/l +v/zuVZSXYaiXAoIBAQCX2CM6cU4pfrDgN3t+bzTK9ndIg+8LBH+G5ZfpwUTPnjIF +CZSF3SXwRpEkOPmXS23PN1tAe943ngf7hRwulTA+RV3yyeFz7iYyj/vFDKDg5OQD +s2BANRarhxGRa7sHwUDnezlVs2ylm8ZQCf2GpN3saZhz8a89CW1BZsomIc36hRHD +4ZGIIuWChWW3QLMJo0p/anlre5yayk7eGRdHPLBMbu+isdnr15kFve7ibAtS1AtY +V52cusNXyf3chGCBKJXt5ILwRjxxSOZlm1wieNli/P88G8WTTARxG2+wpXSTu60C +lay+Z9S0W7bgN8hakQs8aveTK45xtydbZNKQOHM9AoIBAAlJGoUB5zZNN5Pq2NUs +HoSu2SLjJImcNf6lQmXRvKag3bDRNBNV+pY+fjfPkt2M6LajLc6Yu+4/FrgOJT5M +p2Ezea2PSMvQSxzYheB3B+boertix6uzgB0WWf0/aXQrZujs40//5IKrJJdaRYvm ++Q9ykX7MCjLNvKqN9sCENKKRKZOvOTAnnOMswrE3UixAMDlfdtij2G8T763yBy/H +eFQFjeIBpwQaoV7eHQAI3cDVyrcWChQJaPe4LlkM32Qr1Wev2c5uYAZvf56JY1k8 +bnTh4t9o4QvjOUrH9t8/X34ktX34JnEeSVAHMsvln6dlUKHC4ixFxRHQpcqbtARU +a+cCggEBANs4GG0pXMXyxhqHANzvhukWQ6Q3shwGXf5iCInW4Ti5JcxjzkIzeLE4 +ou+EE/3c7YJ5xXCPPyhfIpxA8ff4xpiarlazm0qfnGu7F6oAdJVG0qdmN1S5tYXu +b4NIf6WCIWWI+qxhW5At3/91TC7ykxT/7qkI3uwIErzJK/1wttP+nxgw6xEOyW87 +AsPwVa/c3Ym41Mg5yqzoOXda/V90hcm0Xbi44hZBgVMP+pLhAOyfnGG56JF3ILit +VkEGVALXop6m7+/aGMr9z+xcCb1mp35r8aPP/GTKXvzGfN+yNx1di+kbXu1vIHd0 +0oKmfeNXirYgQwbe3yjrS9cCOW0+ERE= -----END PRIVATE KEY----- diff --git a/tests/integration/test_ssl_cert_authentication/certs/server-req.pem b/tests/integration/test_ssl_cert_authentication/certs/server-req.pem index bd8e2e1fb7f..53408abf254 100644 --- a/tests/integration/test_ssl_cert_authentication/certs/server-req.pem +++ b/tests/integration/test_ssl_cert_authentication/certs/server-req.pem @@ -1,27 +1,27 @@ -----BEGIN CERTIFICATE REQUEST----- MIIEmzCCAoMCAQAwVjELMAkGA1UEBhMCUlUxEzARBgNVBAgMClNvbWUtU3RhdGUx ITAfBgNVBAoMGEludGVybmV0IFdpZGdpdHMgUHR5IEx0ZDEPMA0GA1UEAwwGc2Vy -dmVyMIICIjANBgkqhkiG9w0BAQEFAAOCAg8AMIICCgKCAgEA3ZeG1vL3Gvf4hQwE -oJhscgkLxCua6FsD8xPw13UvpjCTP3338qgewpXZtSltiCw2FOiRp5WHcb1izhGu -5+HCAa0Ns2u73w1OC8xiUwTf/Qtk2BhT9dxU32YtoGTroqU/jKZBQnyRtczFkdd8 -ijcYkuiMX++nOgA1MrexpkXLiknLSszO4VAEX3pKOYgTfGh6Q58WGusUhpR1xw8x -xxKhKbJPokP3iuqNxHhJYO7++LGUs6pXiJBHxS4kSDzb7gMBBCESEIPpP9kuu95H -QDZcn7M1121BgmFPZTjj2IABBYG+ZY0l2SQU1t7hZciDjSuSOBB4BC8vR7NKp6rb -VwsfHeSP3iP/mcfRjxd8fIJDIucEHj4gaanXttJEWA7z9nhnmXhU5ymKagrIVQCe -PtSoPen+EvfhmdcH87IEtZQO/vaq96k7yPl5yoQtod26slkvggUIpIAA21EYGU+B -M4V76u30ghRxUwOsWc1zRFTuw5Ea2G75KtOmyJdTr/b3GwjeCMzriSTMZZNg79wt -5lKMdx5nDnTvr77Vp2bdx2xWO2TwlevDm0n++2A9zuaJ+URULoZPgIp67AyC+jYo -lcwC1wS/nvH5ZKmQ80ZNZhqYVVxnET+p5+Fba3zfgZm5vKbt5ummLqkl+r0F2+xB -1tbQG75X28SJT/x6eRqFfM6iP7UCAwEAAaAAMA0GCSqGSIb3DQEBCwUAA4ICAQBc -TPShRdB7ZIL4xQNAxWLGoEbshbY/UpJQZdjojxn27roVwEhwP6B1/KgiVKV2X6bE -a36LUnaWYllIMAh4oOHJkIm2gZ3xitdEK1anCf5NJga7TnkwGfD4jTZA91fWCynt -a/64s0KQggKsUVY12TTJVQvOH/l9RMrXOq+jgIh4OURwCBtHTCS6oOp3eF04pEu7 -w54dMAsxp/N9AQaLh14IZUZAQ2v5kdipL99EEQH/G6llU08XrJ4LfGgwDkUjJSsA -TzrX2uk9MLHJVwjpZ99ktjNBs8Gyr4fGOmstT5TXEOO6bVhqZDC6kEL7vrmFSnDZ -g/9lrd4wLUT/STt+E4Qukedi0n/419IpkIAE5C1HOXRnQaOUdcnrLixUB21mBHt/ -n7gkwdY7Cu77dYvBIShzeCnxiJED0+XrBPD5yPokxEjE3MmiIK6meHHCuIwqLjX8 -I78ysv4COeH0svwSjvJInveS9QRCAWBpdvskxPJR8dpoysAff+jiyp3ZvkwcIiUO -Vbsusorpp8pFJXZxvPDBXCy5TOlFnIG/9itjPj98pFRIl5bxzNwDf4wrkwHEpR3i -jpM6y+/RWZn69BpTeAG0vZHhGk3SuXdU56cRzatys4X3biCImgDqeJMUcAoxlrIZ -vgbJVTorwqQmPz5kt28b8ddnUVobbZEPlRITcqjwFg== +dmVyMIICIjANBgkqhkiG9w0BAQEFAAOCAg8AMIICCgKCAgEAtZfkztfSeWQVbPmV +c0+tsDNyUYfgszvf6IHOQNBEcFbHwRUGM9a+qRhx08TNsN2rrnocgC24lveXlb5K +btEMjnfgmh+CtAyP63qLWMrwF+fkL+bZxxgMBSjXg5zRuIaQkTDvwZaIWRIdz6XJ +u47zFaT4UjPKI2cErx7BDAVW+EIp76b0TiyA6wP8z0EpVtEtAGX71fnI8SKA407I +PzNCZ/ulTTUawHpsfGir+E8jodm/s2DWvMJ5YXbKU4IwrHjHgjkcwzijD+/OKdMV +hcEM2qEtB2K8S6Io4sCMOvyO9gA0MZLqVuDmNtluqo4Xg3uuzN9/ymVWfZbLHO0C +hgbr5MYLDKutzPlSPLpmN4LCESyShMF5Kr4iFzQqsPuczI7EQS91FAYx4mw9XB6X +Ne9MX+CDjGtDZObLzctLUPDnUwvdLdp4vfTuRPjDhzU595q+Bgob//j1ZVo3/tj1 +pLJoTXUwZBOuCV0q3932+DknG5IcA9OmFlT7WpOcb+tUDYC/arKwEXJvH+Y6wJc6 +lOq2B5wpIwdgNNdQy4d4ncMWXzrcU9Y37vjTD9jfMRYCI7afMolUUKKVzAPx0/9y +fyDTuki6N7eZwUY6YQ7GjnKd7TpQFGGvvtkFzPO/kCgZUQYAtjK9olKJ6jyRZgGu +tk7nWBqASqnlX8+chZnJVBmZW/0CAwEAAaAAMA0GCSqGSIb3DQEBCwUAA4ICAQBv +CpHPJBbcSRdDqO/iwJeySMSwhcM85CvyJmJNmqHLjXCl+cKQ1uJkjXFsz6HlPK1L +kXY5/ubRwQ3XuJIHi4PSshjEY4AXYTRJwTq8k2PGbDI4kBC36uEDQRI5eZBEH83E +KEvNggWXp2h1M0QBHJuG3qkrRnKjzLCUJ8Y2NjEq9+xNPFRR86WGy91uTMMct8xs +6bPPCLB6ySu9jsaenK5mTWEtYIAkxHJTK32STlFSOsost9vyYdvh7wd+mFDleD1K +vkZ4C0E8CWJISDQHHKIZEFaLFV0fEbOqvV9R18QTzAuV3uVzpP11pJxtciFHiuHg +/MCLJHWVcFNX6dSF4m/RmW8MKvdaI+fcuBgao1h8x6PMGPkPHYDQvupWwD0w5hrl +axqkXLpe73IXP5XMGeICVD8Nt77KYKW6LbtRNKIU28MfmhLn/zXqF0tf9EYtBn7s +rDwzzPN5ywhV+YXfQYSpCzheVMyfxjM3X060V5hyRWc9hMS7aC56aMNmEDI72IWl +ZAegSLnU5YF2iXaYRfftTPYiLL1n+TffK56qPI5QSn7fxuvG3N7cvVC3zeli8PGJ +gWWw4zPwRFvHk/NSepN2ZfiLZpMExZW4l5jvUrfWWjdQdh7B0a6JD6YfDS+Ot9nZ +eZkLUSJnrGqHq63BGjHDBNJo1A3GcegeaYrVkaGDzg== -----END CERTIFICATE REQUEST----- diff --git a/tests/integration/test_ssl_cert_authentication/certs/wrong-cert.pem b/tests/integration/test_ssl_cert_authentication/certs/wrong-cert.pem index b56a10f8a92..6096794a1d8 100644 --- a/tests/integration/test_ssl_cert_authentication/certs/wrong-cert.pem +++ b/tests/integration/test_ssl_cert_authentication/certs/wrong-cert.pem @@ -1,32 +1,32 @@ -----BEGIN CERTIFICATE----- -MIIFjTCCA3WgAwIBAgIUMcX2R8I2H8vNtASHi0EoufIgWEUwDQYJKoZIhvcNAQEL +MIIFjTCCA3WgAwIBAgIUF+tVv6V0/vm8r0Gtvaro922XodswDQYJKoZIhvcNAQEL BQAwVjELMAkGA1UEBhMCUlUxEzARBgNVBAgMClNvbWUtU3RhdGUxITAfBgNVBAoM GEludGVybmV0IFdpZGdpdHMgUHR5IEx0ZDEPMA0GA1UEAwwGY2xpZW50MB4XDTI0 -MDYyNjEwMjUwNVoXDTM0MDYyNDEwMjUwNVowVjELMAkGA1UEBhMCUlUxEzARBgNV +MDgxMjEwMzk0NVoXDTM0MDgxMDEwMzk0NVowVjELMAkGA1UEBhMCUlUxEzARBgNV BAgMClNvbWUtU3RhdGUxITAfBgNVBAoMGEludGVybmV0IFdpZGdpdHMgUHR5IEx0 ZDEPMA0GA1UEAwwGY2xpZW50MIICIjANBgkqhkiG9w0BAQEFAAOCAg8AMIICCgKC -AgEApABgxjCrAiDY529xEjboRE/PVwgVStD0dPE2vSOOqxfVYYOqP7VF7pG2PeA7 -Ek9Qqv2cqaDWbDWn5vT3AiN+aslfHtXJgGjPBrpOQ3Me6RSu2aUHPIguabUjz+kJ -VSkoj0pkbAqqIxKse1hZtUNHVwOmg/PthkpGPNxofNX1kCfBWoNJyuKFOoGeNgmY -6joY10zTaiHpq8hhA3b7WY35QdNGD7SwkYBvTjGGzBr/hu26fhX/DnceZ9kf9n6q -899gB2kZH5T1eTlShh12TI1sHa+BGz1YwR0HqM88zXDyAf7bWl7Hy5f8e9keZdx7 -Fx/ws93Bb3GusA5zwUm1iUe1OIwTLFlL+Kdkr0qzofDcQ0ZnNwrME4oAhCJFwSnJ -OWnrCmUou2XXj86Xl481uBana30zzJ9TniHM/hA54ttHsva/yB8tyoXcI4FASwk3 -GdihsOBbRS6KvmeKpEXQpsvBQ9GejSL/UUWuKg+O0ysHE9+QX/+OznFp66h/x7PP -Q7g6ipwAjgwuG5jm/Czz+dw4j3Qp5N5f7Dn3QhDzmXkKgzRzirKh9XVQqUFRwlLn -8VuzAhD5SjRN0JDE2jlt0Hin90zx/nkOV2b5hTYu9NVgmrfCye6uB/qsK7PQBh69 -9i4+8tBGXrcS2Nenm+Hm12fFhNum96A0ahj134H2ks4JcKcCAwEAAaNTMFEwHQYD -VR0OBBYEFIZYdI/00qzj+5JqEzEJfpj93AphMB8GA1UdIwQYMBaAFIZYdI/00qzj -+5JqEzEJfpj93AphMA8GA1UdEwEB/wQFMAMBAf8wDQYJKoZIhvcNAQELBQADggIB -AE+9y6Ea3pjIegOeffXzI725vXO0OSuv76vRoswUVQDKL7j7QcvqdcKCK/2hmeMW -MBNwKZneG5iumH2Yp5RQ14arihS9+SYEpgftvfNOZrzZ37RttJhSjBQ7GUorH7pD -uQ4s1unqKXv981JCtlKOQh5ZKthHAMsP/sfYuWg2ksVn1hvFTJZirewVUOgR8zPB -Djl6PdjZLDu1FlglQQ5YUNgsJbAqkBPcA0hEwGU0j5QEncvdspn4LiH2/mHhnzM9 -3QEvsXUxgJo989Am6+E77XNX0wcALj2zUmPPDWYgHgLKO/ZcSAOQ9JaauVhUre2D -7jPwnN47yLak5obVcyCpaDPKYk6sEUZkiWRaONvugoIbjYivmB/BJc0njfVA0kzT -FDwpUTtSddZgHzdTXe0p5C7IGmYkp/vgKlSpSYY6+aCiVApJSdJjL6FZKoOXqDnr -OgoQGSOJif4mDeipKOdrb2JtYwJkRl0c1S+tgOi8PU+ROvZxQGWI9/i20H58M7j0 -r/WhbudhcAqWglk5WOpCodbJhXffCrbUm5NjoFr7AKswxLJVz39WIe/duHPEGV7v -jLd/zj7eJRv5ycDyt91rbGxQ9NKzEx+by/5WIZTi+z+2PG75tdpQUwgEIh1c/XOt -6uXtS0sNnnjHVmXPBC+Myz+1NolYWjZMcBQ2xGIORvm8 +AgEAvKjjhLJvnrHpaxX/JgSqymisQ7IvMoiEapAf3fArNt1dg5b6E13w33ZuwtJ8 +Dem8BYceFWmu5YeOWQP038iJRDr5uofVTJAqyVzt4aM5YJVo7CaXVSlRRLhDN1zu +WsOnsC28uB9+0zn90729iJyU0DbQ2Tym/C6emzBS7OcD8cy84MvMdvyn6F5O8pjQ +A6V7l0dS2fD32GbAg8tpRABORUGnYDDHUIklwHQxQtZWQmJneYKGl0M3BdGaCA9g +AtRlRLwpNBQYwk5APh8TKNfAsEg6565IQ8sV12xYzphF9XMTFkRiLtyqMES+zpqu +rUu+xZ9gKFBL1rr+Jm36cA5zfDg0xdyr0u+j5zq2adM7lTctycaCw0pn4l1rGZHq +A33LV3Qgd4woCui2asgPvEiJU27qEcK4RM8x+Mf9vpm6jGhYjxLCAxoMHDaAoKKL +t3xd4PwqG8VBK3FbObU9qTXYnJW0tGvoMyo4u6onIGyQtqLul/Va8Op1QM9rOr2+ +3zUiNT5Zocg8EVb1gT+mJP/axSjCDAhk0+lh2HenEAR036Hicefp9O6SZkBdarTJ +ffqdIRNgbq5uIBqQ/Q6/A9lt5lR+lz1q+ZIJiwBD4ysE9dqwqUQboHoQg/yR80bu ++mlLow4E82rWW+qjD1FODLXukthibPh3vOLJyNl/UOKrj8UCAwEAAaNTMFEwHQYD +VR0OBBYEFGzWyPpUSrjEFc4yrS8QHu2wpVFVMB8GA1UdIwQYMBaAFGzWyPpUSrjE +Fc4yrS8QHu2wpVFVMA8GA1UdEwEB/wQFMAMBAf8wDQYJKoZIhvcNAQELBQADggIB +AGfn+XugP3raB31fEwqwN/wshe6jmcq9q2XYW+dHdrBq++ChE6Cw6BpjCFKLWWzr +udIo4opiDTkAauG3BOfQfUwJgyL8s81tAfOPxrB8O9NcvDNK92RugmT1FWGkvBV9 +lPRExxkjPbhaNzlcOIcqVX/O1eVuv+Oli+ZZBN2mwNySXR0VZiHjOV3eh6Kf7c49 +BWjgClWlGe93ZNTyaUJ4RXasBNQ4551Kvqw+pEMaaVLU3uVgJk2QQT+lSOJQrwqc +DHC7WqYS/uCm22CGOt9vP3dK5Xo8AMPYLvB3O8JcWrNdwJ/SoK6lTmKNLhqYer0r +UkP6noRRAJcXbTsH5iQvj9Daw6qjHEs2RuV2iRFxvCzAzf5bURAwXkjnCsZ24SJa +oc4yG4jfjzFUmO+xQ6+ykgMOHrLPHXcJOfgRhA0XW0BiTEKaZfI/M64W8kfU9w+M +Zo7n8CAWp75VDcAYXDfO8en6YLphpCkfhGrBqO4DfPpsHuNIETRv4wMexiaAc5xK +dud1EmuX46vreEMPtjlsO8BZTtVT5JF2P7y4wS8vd4s9vVHixvfEqVNcixVDNrQh +YZrUiTL14kPDrJy9UfzylWeEc4cQfTwzId2QKRfX7+u9Xe6LPfdsGVN5gGXdKtkY +z2aV9XUEX0VmDmKdKcvhMu+ReOsfM66vFheT1ZEAKNnG -----END CERTIFICATE----- diff --git a/tests/integration/test_ssl_cert_authentication/certs/wrong-key.pem b/tests/integration/test_ssl_cert_authentication/certs/wrong-key.pem index 3924eac91c2..7a7bfe60326 100644 --- a/tests/integration/test_ssl_cert_authentication/certs/wrong-key.pem +++ b/tests/integration/test_ssl_cert_authentication/certs/wrong-key.pem @@ -1,52 +1,52 @@ -----BEGIN PRIVATE KEY----- -MIIJQgIBADANBgkqhkiG9w0BAQEFAASCCSwwggkoAgEAAoICAQCkAGDGMKsCINjn -b3ESNuhET89XCBVK0PR08Ta9I46rF9Vhg6o/tUXukbY94DsST1Cq/ZypoNZsNafm -9PcCI35qyV8e1cmAaM8Guk5Dcx7pFK7ZpQc8iC5ptSPP6QlVKSiPSmRsCqojEqx7 -WFm1Q0dXA6aD8+2GSkY83Gh81fWQJ8Fag0nK4oU6gZ42CZjqOhjXTNNqIemryGED -dvtZjflB00YPtLCRgG9OMYbMGv+G7bp+Ff8Odx5n2R/2fqrz32AHaRkflPV5OVKG -HXZMjWwdr4EbPVjBHQeozzzNcPIB/ttaXsfLl/x72R5l3HsXH/Cz3cFvca6wDnPB -SbWJR7U4jBMsWUv4p2SvSrOh8NxDRmc3CswTigCEIkXBKck5aesKZSi7ZdePzpeX -jzW4FqdrfTPMn1OeIcz+EDni20ey9r/IHy3KhdwjgUBLCTcZ2KGw4FtFLoq+Z4qk -RdCmy8FD0Z6NIv9RRa4qD47TKwcT35Bf/47OcWnrqH/Hs89DuDqKnACODC4bmOb8 -LPP53DiPdCnk3l/sOfdCEPOZeQqDNHOKsqH1dVCpQVHCUufxW7MCEPlKNE3QkMTa -OW3QeKf3TPH+eQ5XZvmFNi701WCat8LJ7q4H+qwrs9AGHr32Lj7y0EZetxLY16eb -4ebXZ8WE26b3oDRqGPXfgfaSzglwpwIDAQABAoICACiTq1399qGtLN1+NjSyfx8/ -u+Ylqtb7AjDY6Zk8bfUpDXN2Fy5yFF5lkPiYPSVXmHbmDtftYoAdenBrVZ4i2All -z3IapSNvSyG4ANsxZYl3w5c3/KVecFVZKwYq+1MlvtJNLrGIpfXNjf1qq69seP8v -eQiW1sLuJ5ixU+znJz3GiFFzwFNBXoNORK3MDBiPzUufx4Mv5tfI2S/5RVEwDmbZ -9jC2nSUy6Nco69geKfDhas39dUDH+i7pir37MyLptqG+wCePPHkE1MU4Duf76a8i -mEf8ErSdESMUO0/9TPNvcihW4Qofjam624mKVq4vCegGyvBe6UDIIp3FNfREWLLC -ilit37ZVOHbq79qV+DobLADYXZGXrptzr6VqVZvvQUEwOjftD2B8GJzxbNxRl77F -BbeOhYA/IDr8YR7Qos0HjqDDekRavHKAa5kcf8yFVJaQycirHJZpjr3oNFktqg6H -9eb537SdPI5nHtgTSQSosxst+iBsjMCJ7rU7aowy9gKG75s9eME06hiQsukNcOI3 -hmBqQBeX+yLWh7Z6A2Y7MepHuYHWKereBGISR58bvTmNyI4mLWYwJZzjON2tot3a -MJM86gw83TuX1Qmp3+NjduQtdtMjDSXLN2yBbK4CufQYaTxK1xdHUoK/uvG9kIq3 -tP+/fiTHZHyuTSSgwOypAoIBAQDT2Vj2uahOypboLv33XtFr2kuDe1sogpLaAVu4 -Dv4vO835gto4fx79rK3u2fBxiVx0yv7kwitvpcwaoMdkUEoSBtOugYB9UezW+SX5 -91bpY0mBH0ToSmosPFVc6PEki6LRV+VGZ1gFXU7uZ4Wia9opENfT7d8cjBQ4NZ/M -sCyqHR2KYB82DHx6Lrtrs0eWn33N8BVgsy4xSbOi2YrgQCnJvfPWVYtcXjRbplj4 -jCVGnPlac0Z/bv3Kb7Q7EOS+d+RFi1ZpsFYPbW5KRhGshzOxGw5d/nCjkEXCV0ht -uK6KndjFOvCGfikZW7WVpw7bkCe0W2Ko/JSX99ccJBDyau1NAoIBAQDGLj/wVxss -dllwswBOAV3oSL9xbUPs+Xbr/FK4XKcL7wcY6cfdZBlsQLQCoAzyTZ8t+g756Hlt -a8qmW2/Wvdo+m63Z2ecnbI9pJsVyYeT8pVumx4biHuXbRBYO/0ZZPtB5kTT6Hzhv -ZHxoUj4jb7L/5kwEdEPFIZX4rVEXY20LJL5wtv2zEQylQk9kunjUgrP1L/GtcNh+ -QRzLXiJWAoC4HWcXmdxb/Hc0BU5geSwZL4bbl3YL3lwWvkd3aY17T90EjWy4uA6G -tsHCxbxauul1q8OkmCcLEfhYnDh95YoVddR97XhC33S0v4dYjX/iS46g8fJ0HhRo -9YGWsD+tRevDAoIBAFCp/5/iTV3C8fbyfa1FI0SH2Bz2SV2Bal0sCzpoKwzdHq6U -znaYoLpCl+/MeCsi/FtUN/3umQ9n9/FjqshdcfavNsbJdJ1DJoUsVPN65FL1hTVv -LJOuUgMJ7g70e21I5fQEHb7S9scEIlvQeye/HVBpo2SEvGFoTQKiGHid1EPp1ies -NfYkhvkW9jIqD2Yg0IwrkFhDoaEOySGG58Q/ainw8/l2lRvUmucSzenFoyPh/Wgd -YIiBQI1mPyAGbLLBf9+jEIIprHsvVcFeMLiaumoDPVM44LbG5mj7Rw7QNVV+iN2A -dbkgLJIFQ3z6IUQk/ZlE+qoRkprSuctzSCil4jkCggEAdiYWilNz6NL5yX193gNk -l9nfAGFS0JF8+31nV3AtSqkLAyhEtlE58ta0Oqhub3olPwTILucQlVJg80Kp700q -Mo8fWzRUYaWP7fFmXyXLnW97r3dei6o+ALWbrP81UnlnUkJmYgOA4q/2lz8Iupma -DoOeqD0kNf8q6KFzKc1lsfIK8ym1IC826cMZkAS3ioINhUw6+dq/xq1M3FVXhQ1i -7eDhmClrPQ/LhSDwtAUpbC5waLPodXTwU8LG2oL8DRr0ugUSXyGjz15fL54xB6pN -CpEHRzZKeIgTFci0ySGya87eiuCrBLsxWZyhtQJOznubIYp8sAtKwbQzuMGEhOmd -fwKCAQEAlZoi1SzHstg6PfpwrIHJV3imLa550k9hyAu610CKjMsg6IsFsgu9/J0b -9hkhlafeW+p9zhKSwjl3aCuWUMNE53R+zYmMIJJrBzejC+1H0SKW0Zix9+ghixOX -da1jRaUxUqApJYvvxUC8FbnATM/Eq0ofhGkG3o575SlO+54twJO+bXGAUf/C6xMY -AQUQh90pTbZ96Q3Wdm2Qmrhd/GUaC6k1vAHVHHU8WQHiLmo1fF4gL/TqRv5KEPUM -un6ld7h8BEWtMClhSIiL2h5nvSYGcB6Lai6rPO0UUbGkWBQFpGaeglUmoYi0ciC5 -lMRrRHGUiWHW9C4/siOKYrHBeH5oNQ== +MIIJQQIBADANBgkqhkiG9w0BAQEFAASCCSswggknAgEAAoICAQC8qOOEsm+eselr +Ff8mBKrKaKxDsi8yiIRqkB/d8Cs23V2DlvoTXfDfdm7C0nwN6bwFhx4Vaa7lh45Z +A/TfyIlEOvm6h9VMkCrJXO3hozlglWjsJpdVKVFEuEM3XO5aw6ewLby4H37TOf3T +vb2InJTQNtDZPKb8Lp6bMFLs5wPxzLzgy8x2/KfoXk7ymNADpXuXR1LZ8PfYZsCD +y2lEAE5FQadgMMdQiSXAdDFC1lZCYmd5goaXQzcF0ZoID2AC1GVEvCk0FBjCTkA+ +HxMo18CwSDrnrkhDyxXXbFjOmEX1cxMWRGIu3KowRL7Omq6tS77Fn2AoUEvWuv4m +bfpwDnN8ODTF3KvS76PnOrZp0zuVNy3JxoLDSmfiXWsZkeoDfctXdCB3jCgK6LZq +yA+8SIlTbuoRwrhEzzH4x/2+mbqMaFiPEsIDGgwcNoCgoou3fF3g/CobxUErcVs5 +tT2pNdiclbS0a+gzKji7qicgbJC2ou6X9Vrw6nVAz2s6vb7fNSI1PlmhyDwRVvWB +P6Yk/9rFKMIMCGTT6WHYd6cQBHTfoeJx5+n07pJmQF1qtMl9+p0hE2Burm4gGpD9 +Dr8D2W3mVH6XPWr5kgmLAEPjKwT12rCpRBugehCD/JHzRu76aUujDgTzatZb6qMP +UU4Mte6S2GJs+He84snI2X9Q4quPxQIDAQABAoICAApGgpncJCM3GnkIKiz2bRz4 +JYXMYzCz6c5qCipK5fenh+veYcGDSNbK+w5ma3ZQiDqe2N8esfVzdCfaBNDZecx7 +D9X+hvoUEhiElLpI6xudF6lhErYDOZduF88goyTTakM4woIeyQgVLQOG3pddu+c5 +TRe/63Jp0Z6vO50Gmhrl5VWzE/BZI4YO+OrSsuW38irTqioPq1gghJTJE/MttxWj +lUuybHCw/5rjWTmENg+Ij405ND5x3UHWYDbXK4oL6nYbb30UKSMQIwSfKap8UdTo +IjkzL2Ft4sModg/OkGTlfyEj2VsnDqfxXpkfKKtsqDfYTeL0OKU56xTJwa2vw89k +gtCP0t/UQzv2MRPvoCOKEIj8AiUj5X9uLPY6ijA4jD396OVnuZh9+KgpLwB7piT+ +WNrbm/Nrfe617kK41NGAVpVIMo1y8fSbplZqMOWSSqLO0eyHkEJK5yK1WtFfzVAE +UKH5jRjS52XcLrLa7VxAcsywv45UNiSiYRSxBY7MOg1mL94BlcghW/YHg3h3vKZC +NVEBoaDHNd8Q3V5sQgpENUjXmWFaE/1Su9i/oooy/GAwRBySHnUkKPvBP6Unus/S +Pvc2vQPMHpfMbPc+L9YxxbbgVHBembYJn7qIejoqfedtaZoqB240s4AowxDiK88f +QN1L8TiaDiGJBIuvfhtRAoIBAQD/2TAXiLZ4fkbcIbpxrU8nmQNrqxZPlqVLKLDj +4eq4wBLMywOsW1RGoaipooWmEWICgrh11YtoE3GjgPYh2x+oXPOpUn5P4Gbx88o7 +8rKMDu//yRKnj/e0GNVy+ugXqVZj3+o2Mdyv2nlAqLEeZsuzFi7nXoaNJjOsumyq +e3eP/iM9jV2CakhRPnZH9B5/6dgFjwR4Vw9DU2cCaQZtIEOGANCEfP3UWaP1GW0Y +eMtDYiGAxkYqpCDLwbQT2jbgCZUUBtIbkZhkVgXDrE6K2Ey9M7HroLo7Tl5NGBMA +V7lZxpVa/4T2R7RFlXZFChDNul9lngQEFHlrbITJG8iOLfpdAoIBAQC8xYIlIyLW +q/mzUErH7cqL76ORcm8ejymD+ii+ERQpqwTQJljBf5mnadfTQPN1kD7YLAXSgufq +oEnVNFYR/k/M2aztTWfsUNGW0MAhS4tTewFgcia8macfhOccP3mItZ3KOQgpXxnW ++h8DXLlq6FFvybK00msribpxNQybS7y4DK5VMRbg0aBZAn+6O4Lz5uLR+PU02pFG +FgaLVh1E0h2mp0ib42BaRyVJI0CY402XKrSm/BDnHhGfoLW5kK53gvk2g6RzIDwy +qF/DJ0HHlAQL9Z9OxoMy4haHXcBGP9rGJX59eyb6adinsSBl7wXStdjvTsmlNYD7 +KqltgoAdkaSJAoIBAHWZs01d/eGsyY1tw3F1JCkjYDshTQQQTrIZZhWZRnbdcsbH +mkyjPj2pGQnANoZ6/v4AcCFZotaX+WgaYwh03DxbXXS7AmxczXTxhke/6uoOA6sj +FXwH5OfXcmWDhyM5JwiJZ/K5QKNkXM+nuqIqxf7vd2fKPzaqFJ6UolZKok6Bllk4 +nX7Qs6UEfQHd6BcLucv0TS2zdsSPlY26EMYgSmlR/oannVT6Ty7eHRNekq/Kb3Pt +r1ryTlDaHJfzeb3JKckmyXT6m32jPMsQbJnNiph9Jo8UNgYEo7v0EOfbasslSImn +YcqCcw55AQAC/G5T+H2RAG+PqbADFZYLO0h/QdECggEAAJxS83PNpQwhXqcf1s26 +HgHEcbABFAQ6iibBAvxjKE9ZUKMPHnEfOh9ph1FqeDLjQSfDTQ8HWQbztjDZJm3A +LFV37byWXXlVdDtwo6ru5HI9auZzoaCNndh8NuctqXeM7x6IHDomhz9/4i7mmqXt +vYLVhSg5GIb1h3A4fjgcgCvqVHQ4Mrn63s7XQu72WXuhuDQp9uXOGn/vvXul1Jcp +aWSZI4f0w9X/FOF8UAJMOfT1aKTgGR9Lx0xpyhPhvJk73SVH3ud3ymIpDSSPXeno +qXE99q9FtWBt2jo/aPrD5mgwpI9FbQHypXg5NpszVZ8o+H00wcgOhsF4ktYdO/tA +oQKCAQB7TZVBjOsOY/S8eWbTZo0w5K0THL15k++gfZAOCcQamhGe+Cqar85kyJ50 +aAE+p04i7ZuNAtxAetPU8GvdYK/DNdDa+yeUTvBjl7Lpi6FjsPwdCSDQJNQk4qCW +rPHnBfCK7wHKpOKsQuwKXH4Ndo3En6y4EmICHMdfu9OWZlNhKc2o00SdkdXF9+Wt +7iKNb3JDCG1ZOEISMkBdzbRjNfzzxoleQFr+2C3nUogMkGdgwMtC4JmgO+K5dsSp +/vNHeNUOYxLj8D7H0y75iMPR+LqhlKvFj6P3BlslD0Lb/yg80U55v821mYBRs8pg +AVHPFI5a8A+VunzBvaL+SEP/rwrI -----END PRIVATE KEY----- diff --git a/tests/integration/test_ssl_cert_authentication/configs/ssl_config.xml b/tests/integration/test_ssl_cert_authentication/configs/ssl_config.xml old mode 100644 new mode 100755 diff --git a/tests/integration/test_ssl_cert_authentication/configs/users_with_ssl_auth.xml b/tests/integration/test_ssl_cert_authentication/configs/users_with_ssl_auth.xml old mode 100644 new mode 100755 index 4bd30163ea6..b697c010195 --- a/tests/integration/test_ssl_cert_authentication/configs/users_with_ssl_auth.xml +++ b/tests/integration/test_ssl_cert_authentication/configs/users_with_ssl_auth.xml @@ -17,6 +17,11 @@ URI:spiffe://foo.com/baz + + + URI:spiffe://bar.com/foo/*/far + + diff --git a/tests/integration/test_ssl_cert_authentication/test.py b/tests/integration/test_ssl_cert_authentication/test.py index 3af88759e82..1adfa4855f1 100644 --- a/tests/integration/test_ssl_cert_authentication/test.py +++ b/tests/integration/test_ssl_cert_authentication/test.py @@ -369,3 +369,33 @@ def test_x509_san_support(): instance.query("SHOW CREATE USER jemma") == "CREATE USER jemma IDENTIFIED WITH ssl_certificate SAN \\'URI:spiffe://foo.com/bar\\', \\'URI:spiffe://foo.com/baz\\'\n" ) + +def test_x509_san_wildcard_support(): + assert ( + execute_query_native(instance, "SELECT currentUser()", user="stewie", cert_name="client5") + == "stewie\n" + ) + + assert ( + instance.query("SELECT name, auth_type, auth_params FROM system.users WHERE name='stewie'") + == 'stewie\tssl_certificate\t{"subject_alt_names":["URI:spiffe:\\\\/\\\\/bar.com\\\\/foo\\\\/*\\\\/far"]}\n' + ) + + assert ( + instance.query("SHOW CREATE USER stewie") + == "CREATE USER stewie IDENTIFIED WITH ssl_certificate SAN \\'URI:spiffe://bar.com/foo/*/far\\'\n" + ) + + instance.query( + "CREATE USER brian IDENTIFIED WITH ssl_certificate SAN 'URI:spiffe://bar.com/foo/*/far'" + ) + + assert ( + execute_query_https("SELECT currentUser()", user="brian", cert_name="client6") + == "brian\n" + ) + + assert ( + instance.query("SHOW CREATE USER brian") + == "CREATE USER brian IDENTIFIED WITH ssl_certificate SAN \\'URI:spiffe://bar.com/foo/*/far\\'\n" + ) From 4c6aca2eed4da2406c5796bb661cee0a175e5eec Mon Sep 17 00:00:00 2001 From: Arthur Passos Date: Mon, 12 Aug 2024 15:58:10 -0300 Subject: [PATCH 299/844] add docs about reset auth method --- docs/en/sql-reference/statements/alter/user.md | 1 + docs/en/sql-reference/statements/create/user.md | 1 + 2 files changed, 2 insertions(+) diff --git a/docs/en/sql-reference/statements/alter/user.md b/docs/en/sql-reference/statements/alter/user.md index 34a884c4d13..c5c436b151b 100644 --- a/docs/en/sql-reference/statements/alter/user.md +++ b/docs/en/sql-reference/statements/alter/user.md @@ -15,6 +15,7 @@ ALTER USER [IF EXISTS] name1 [ON CLUSTER cluster_name1] [RENAME TO new_name1] [NOT IDENTIFIED | IDENTIFIED | ADD IDENTIFIED {[WITH {no_password | plaintext_password | sha256_password | sha256_hash | double_sha1_password | double_sha1_hash}] BY {'password' | 'hash'}} | {WITH ldap SERVER 'server_name'} | {WITH kerberos [REALM 'realm']} | {WITH ssl_certificate CN 'common_name' | SAN 'TYPE:subject_alt_name'}] [[ADD | DROP] HOST {LOCAL | NAME 'name' | REGEXP 'name_regexp' | IP 'address' | LIKE 'pattern'} [,...] | ANY | NONE] [VALID UNTIL datetime] + [RESET AUTHENTICATION METHODS TO NEW] [DEFAULT ROLE role [,...] | ALL | ALL EXCEPT role [,...] ] [GRANTEES {user | role | ANY | NONE} [,...] [EXCEPT {user | role} [,...]]] [SETTINGS variable [= value] [MIN [=] min_value] [MAX [=] max_value] [READONLY | WRITABLE] | PROFILE 'profile_name'] [,...] diff --git a/docs/en/sql-reference/statements/create/user.md b/docs/en/sql-reference/statements/create/user.md index 32b43df9248..218589391a2 100644 --- a/docs/en/sql-reference/statements/create/user.md +++ b/docs/en/sql-reference/statements/create/user.md @@ -15,6 +15,7 @@ CREATE USER [IF NOT EXISTS | OR REPLACE] name1 [ON CLUSTER cluster_name1] [NOT IDENTIFIED | IDENTIFIED {[WITH {no_password | plaintext_password | sha256_password | sha256_hash | double_sha1_password | double_sha1_hash}] BY {'password' | 'hash'}} | {WITH ldap SERVER 'server_name'} | {WITH kerberos [REALM 'realm']} | {WITH ssl_certificate CN 'common_name' | SAN 'TYPE:subject_alt_name'} | {WITH ssh_key BY KEY 'public_key' TYPE 'ssh-rsa|...'} | {WITH http SERVER 'server_name' [SCHEME 'Basic']}] [HOST {LOCAL | NAME 'name' | REGEXP 'name_regexp' | IP 'address' | LIKE 'pattern'} [,...] | ANY | NONE] [VALID UNTIL datetime] + [RESET AUTHENTICATION METHODS TO NEW] [IN access_storage_type] [DEFAULT ROLE role [,...]] [DEFAULT DATABASE database | NONE] From 9abc001296dbd7b8b623cace6d9c651ba9447138 Mon Sep 17 00:00:00 2001 From: Arthur Passos Date: Tue, 13 Aug 2024 10:17:05 -0300 Subject: [PATCH 300/844] fix trailing comma issue --- src/Parsers/Access/ParserCreateUserQuery.cpp | 32 +++++++++++++------ ..._multiple_authentication_methods.reference | 2 ++ .../03174_multiple_authentication_methods.sh | 3 ++ 3 files changed, 28 insertions(+), 9 deletions(-) diff --git a/src/Parsers/Access/ParserCreateUserQuery.cpp b/src/Parsers/Access/ParserCreateUserQuery.cpp index 4da0297dbc7..fdee49f9e80 100644 --- a/src/Parsers/Access/ParserCreateUserQuery.cpp +++ b/src/Parsers/Access/ParserCreateUserQuery.cpp @@ -225,21 +225,35 @@ namespace if (!ParserKeyword{Keyword::IDENTIFIED}.ignore(pos, expected)) return false; - bool is_type_specifier_mandatory = ParserKeyword{Keyword::WITH}.ignore(pos, expected); - - std::shared_ptr ast_authentication_data; - while (parseAuthenticationData(pos, expected, ast_authentication_data, is_type_specifier_mandatory)) + // Parse first authentication method which doesn't come with a leading comma { - authentication_methods.push_back(ast_authentication_data); + bool is_type_specifier_mandatory = ParserKeyword{Keyword::WITH}.ignore(pos, expected); - if (!ParserToken{TokenType::Comma}.ignore(pos, expected)) + std::shared_ptr ast_authentication_data; + + if (!parseAuthenticationData(pos, expected, ast_authentication_data, is_type_specifier_mandatory)) + { + return false; + } + + authentication_methods.push_back(ast_authentication_data); + } + + // Need to save current position, process comma and only update real position in case there is an authentication method after + // the comma. Otherwise, position should not be changed as it needs to be processed by other parsers and possibly throw error + // on trailing comma. + IParserBase::Pos aux_pos = pos; + while (ParserToken{TokenType::Comma}.ignore(aux_pos, expected)) + { + std::shared_ptr ast_authentication_data; + + if (!parseAuthenticationData(aux_pos, expected, ast_authentication_data, false)) { break; } - ParserToken{TokenType::Whitespace}.ignore(pos, expected); - - is_type_specifier_mandatory = false; + pos = aux_pos; + authentication_methods.push_back(ast_authentication_data); } return !authentication_methods.empty(); diff --git a/tests/queries/0_stateless/03174_multiple_authentication_methods.reference b/tests/queries/0_stateless/03174_multiple_authentication_methods.reference index 27d5df20091..27a84ab9e95 100644 --- a/tests/queries/0_stateless/03174_multiple_authentication_methods.reference +++ b/tests/queries/0_stateless/03174_multiple_authentication_methods.reference @@ -47,3 +47,5 @@ Use WITH without providing authentication type, should fail Syntax error Create user with ADD identification, should fail, add is not allowed for create query BAD_ARGUMENTS +Trailing comma should result in syntax error +SYNTAX_ERROR diff --git a/tests/queries/0_stateless/03174_multiple_authentication_methods.sh b/tests/queries/0_stateless/03174_multiple_authentication_methods.sh index 5503134d880..1ea129246cc 100755 --- a/tests/queries/0_stateless/03174_multiple_authentication_methods.sh +++ b/tests/queries/0_stateless/03174_multiple_authentication_methods.sh @@ -138,4 +138,7 @@ ${CLICKHOUSE_CLIENT} --query "CREATE USER ${user} IDENTIFIED WITH BY '1';" 2>&1 echo "Create user with ADD identification, should fail, add is not allowed for create query" ${CLICKHOUSE_CLIENT} --query "CREATE USER ${user} ADD IDENTIFIED WITH plaintext_password by '1'" 2>&1 | grep -m1 -o "BAD_ARGUMENTS" +echo "Trailing comma should result in syntax error" +${CLICKHOUSE_CLIENT} --query "ALTER USER ${user} ADD IDENTIFIED WITH plaintext_password by '1'," 2>&1 | grep -m1 -o "SYNTAX_ERROR" + ${CLICKHOUSE_CLIENT} --query "DROP USER IF EXISTS ${user}" From 8d6e2e26a57764d2dfc133d8b063578848fd3e2e Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Tue, 13 Aug 2024 14:29:52 +0000 Subject: [PATCH 301/844] New test for asan issue with pr --- .../03222_pr_asan_index_granularity.reference | 100 ++++++++++++++++++ .../03222_pr_asan_index_granularity.sql | 14 +++ 2 files changed, 114 insertions(+) create mode 100644 tests/queries/0_stateless/03222_pr_asan_index_granularity.reference create mode 100644 tests/queries/0_stateless/03222_pr_asan_index_granularity.sql diff --git a/tests/queries/0_stateless/03222_pr_asan_index_granularity.reference b/tests/queries/0_stateless/03222_pr_asan_index_granularity.reference new file mode 100644 index 00000000000..662880055bd --- /dev/null +++ b/tests/queries/0_stateless/03222_pr_asan_index_granularity.reference @@ -0,0 +1,100 @@ +0 18 9899 +0 18 9898 +0 18 9897 +0 18 9896 +0 18 9895 +0 18 9894 +0 18 9893 +0 18 9892 +0 18 9891 +0 18 9890 +0 18 9889 +0 18 9888 +0 18 9887 +0 18 9886 +0 18 9885 +0 18 9884 +0 18 9883 +0 18 9882 +0 18 9881 +0 18 9880 +0 18 9879 +0 18 9878 +0 18 9877 +0 18 9876 +0 18 9875 +0 18 9874 +0 18 9873 +0 18 9872 +0 18 9871 +0 18 9870 +0 18 9869 +0 18 9868 +0 18 9867 +0 18 9866 +0 18 9865 +0 18 9864 +0 18 9863 +0 18 9862 +0 18 9861 +0 18 9860 +0 18 9859 +0 18 9858 +0 18 9857 +0 18 9856 +0 18 9855 +0 18 9854 +0 18 9853 +0 18 9852 +0 18 9851 +0 18 9850 +0 18 9849 +0 18 9848 +0 18 9847 +0 18 9846 +0 18 9845 +0 18 9844 +0 18 9843 +0 18 9842 +0 18 9841 +0 18 9840 +0 18 9839 +0 18 9838 +0 18 9837 +0 18 9836 +0 18 9835 +0 18 9834 +0 18 9833 +0 18 9832 +0 18 9831 +0 18 9830 +0 18 9829 +0 18 9828 +0 18 9827 +0 18 9826 +0 18 9825 +0 18 9824 +0 18 9823 +0 18 9822 +0 18 9821 +0 18 9820 +0 18 9819 +0 18 9818 +0 18 9817 +0 18 9816 +0 18 9815 +0 18 9814 +0 18 9813 +0 18 9812 +0 18 9811 +0 18 9810 +0 18 9809 +0 18 9808 +0 18 9807 +0 18 9806 +0 18 9805 +0 18 9804 +0 18 9803 +0 18 9802 +0 18 9801 +0 18 9800 diff --git a/tests/queries/0_stateless/03222_pr_asan_index_granularity.sql b/tests/queries/0_stateless/03222_pr_asan_index_granularity.sql new file mode 100644 index 00000000000..b7f37dd2856 --- /dev/null +++ b/tests/queries/0_stateless/03222_pr_asan_index_granularity.sql @@ -0,0 +1,14 @@ +DROP TABLE IF EXISTS test; + +CREATE TABLE test (k UInt64, v String) +ENGINE = MergeTree +ORDER BY k +SETTINGS index_granularity=1; + +INSERT INTO test SELECT number, toString(number) FROM numbers(10_000); + +SET allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 3, parallel_replicas_for_non_replicated_merge_tree=1, cluster_for_parallel_replicas='test_cluster_one_shard_three_replicas_localhost'; + +SELECT 0, materialize(18), k FROM test PREWHERE toNullable(toNullable(11)) WHERE toNullable(11) ORDER BY k DESC NULLS LAST LIMIT 100, 100 SETTINGS optimize_read_in_order = 1, merge_tree_min_rows_for_concurrent_read = 9223372036854775806, max_threads = 1; + +-- DROP TABLE test; From 99dcf7e60ebf91858e4b16d9235bb1623a78b9b0 Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Tue, 13 Aug 2024 15:57:09 +0000 Subject: [PATCH 302/844] Add 03222_pr_final_not_supported.sql --- .../03222_pr_final_not_supported.reference | 0 .../03222_pr_final_not_supported.sql | 22 +++++++++++++++++++ 2 files changed, 22 insertions(+) create mode 100644 tests/queries/0_stateless/03222_pr_final_not_supported.reference create mode 100644 tests/queries/0_stateless/03222_pr_final_not_supported.sql diff --git a/tests/queries/0_stateless/03222_pr_final_not_supported.reference b/tests/queries/0_stateless/03222_pr_final_not_supported.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/03222_pr_final_not_supported.sql b/tests/queries/0_stateless/03222_pr_final_not_supported.sql new file mode 100644 index 00000000000..a018bfc53b4 --- /dev/null +++ b/tests/queries/0_stateless/03222_pr_final_not_supported.sql @@ -0,0 +1,22 @@ +DROP TABLE IF EXISTS test_00808; + +CREATE TABLE test_00808 +( + `date` Date, + `id` Int8, + `name` String, + `value` Int64, + `sign` Int8 +) +ENGINE = CollapsingMergeTree(sign) +ORDER BY (id, date) +SETTINGS index_granularity = 62918, min_bytes_for_wide_part = 0, ratio_of_defaults_for_sparse_serialization = 0., replace_long_file_name_to_hash = false, max_file_name_length = 79, min_bytes_for_full_part_storage = 536870912, compact_parts_max_bytes_to_buffer = 525685803, compact_parts_max_granules_to_buffer = 148, compact_parts_merge_max_bytes_to_prefetch_part = 23343456, merge_max_block_size = 22854, old_parts_lifetime = 93., prefer_fetch_merged_part_size_threshold = 3610162434, vertical_merge_algorithm_min_rows_to_activate = 1000000, vertical_merge_algorithm_min_columns_to_activate = 1, min_merge_bytes_to_use_direct_io = 1, index_granularity_bytes = 9550183, concurrent_part_removal_threshold = 34, allow_vertical_merges_from_compact_to_wide_parts = false, cache_populated_by_fetch = false, marks_compress_block_size = 97756, primary_key_compress_block_size = 80902; + +INSERT INTO test_00808 VALUES('2000-01-01', 1, 'test string 1', 1, 1); +INSERT INTO test_00808 VALUES('2000-01-01', 2, 'test string 2', 2, 1); + +SET enable_analyzer=1, allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 3, parallel_replicas_for_non_replicated_merge_tree=1, cluster_for_parallel_replicas='test_cluster_one_shard_three_replicas_localhost'; + +SELECT * FROM (SELECT * FROM test_00808 FINAL) WHERE id = 1; -- { serverError SUPPORT_IS_DISABLED } + +DROP TABLE test_00808; From 4448880bbe11d80047de2c3988165b4b54710f1e Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Tue, 13 Aug 2024 16:15:28 +0000 Subject: [PATCH 303/844] Remove settings 03222_pr_final_not_supported --- tests/queries/0_stateless/03222_pr_final_not_supported.sql | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tests/queries/0_stateless/03222_pr_final_not_supported.sql b/tests/queries/0_stateless/03222_pr_final_not_supported.sql index a018bfc53b4..6c2e05c12a9 100644 --- a/tests/queries/0_stateless/03222_pr_final_not_supported.sql +++ b/tests/queries/0_stateless/03222_pr_final_not_supported.sql @@ -9,13 +9,12 @@ CREATE TABLE test_00808 `sign` Int8 ) ENGINE = CollapsingMergeTree(sign) -ORDER BY (id, date) -SETTINGS index_granularity = 62918, min_bytes_for_wide_part = 0, ratio_of_defaults_for_sparse_serialization = 0., replace_long_file_name_to_hash = false, max_file_name_length = 79, min_bytes_for_full_part_storage = 536870912, compact_parts_max_bytes_to_buffer = 525685803, compact_parts_max_granules_to_buffer = 148, compact_parts_merge_max_bytes_to_prefetch_part = 23343456, merge_max_block_size = 22854, old_parts_lifetime = 93., prefer_fetch_merged_part_size_threshold = 3610162434, vertical_merge_algorithm_min_rows_to_activate = 1000000, vertical_merge_algorithm_min_columns_to_activate = 1, min_merge_bytes_to_use_direct_io = 1, index_granularity_bytes = 9550183, concurrent_part_removal_threshold = 34, allow_vertical_merges_from_compact_to_wide_parts = false, cache_populated_by_fetch = false, marks_compress_block_size = 97756, primary_key_compress_block_size = 80902; +ORDER BY (id, date); INSERT INTO test_00808 VALUES('2000-01-01', 1, 'test string 1', 1, 1); INSERT INTO test_00808 VALUES('2000-01-01', 2, 'test string 2', 2, 1); -SET enable_analyzer=1, allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 3, parallel_replicas_for_non_replicated_merge_tree=1, cluster_for_parallel_replicas='test_cluster_one_shard_three_replicas_localhost'; +SET allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 3, parallel_replicas_for_non_replicated_merge_tree=1, cluster_for_parallel_replicas='test_cluster_one_shard_three_replicas_localhost'; SELECT * FROM (SELECT * FROM test_00808 FINAL) WHERE id = 1; -- { serverError SUPPORT_IS_DISABLED } From d36176ad85a0a51a63bd91849878a045b83008e1 Mon Sep 17 00:00:00 2001 From: Alexis Arnaud Date: Wed, 17 Jul 2024 03:19:07 +0200 Subject: [PATCH 304/844] Support for the `input_format_json_empty_as_default` setting --- docs/en/interfaces/formats.md | 1 + .../operations/settings/settings-formats.md | 11 ++++ src/Core/Settings.h | 1 + src/DataTypes/DataTypeAggregateFunction.h | 2 + src/DataTypes/DataTypeDate.h | 1 + src/DataTypes/DataTypeDate32.h | 1 + src/DataTypes/DataTypeDateTime.h | 1 + src/DataTypes/DataTypeDateTime64.h | 2 + src/DataTypes/DataTypeIPv4andIPv6.h | 2 + src/DataTypes/DataTypeUUID.h | 1 + src/DataTypes/IDataType.h | 3 + src/DataTypes/Serializations/ISerialization.h | 10 +++- .../SerializationAggregateFunction.cpp | 2 +- .../SerializationAggregateFunction.h | 5 +- .../SerializationAsStringNonTrivialJSON.h | 56 +++++++++++++++++++ .../Serializations/SerializationDate.cpp | 4 +- .../Serializations/SerializationDate.h | 7 ++- .../Serializations/SerializationDate32.cpp | 4 +- .../Serializations/SerializationDate32.h | 7 ++- .../Serializations/SerializationDateTime.cpp | 4 +- .../Serializations/SerializationDateTime.h | 7 ++- .../SerializationDateTime64.cpp | 6 +- .../Serializations/SerializationDateTime64.h | 7 ++- .../SerializationIPv4andIPv6.cpp | 4 +- .../Serializations/SerializationIPv4andIPv6.h | 7 ++- .../Serializations/SerializationNullable.cpp | 52 ++++++++++++++--- .../Serializations/SerializationNullable.h | 1 + .../Serializations/SerializationUUID.cpp | 4 +- .../Serializations/SerializationUUID.h | 7 ++- src/Formats/FormatFactory.cpp | 2 + src/Formats/FormatSettings.h | 2 + src/Formats/JSONUtils.cpp | 30 ++++++++-- src/IO/ReadHelpers.cpp | 17 ++++++ src/IO/ReadHelpers.h | 4 ++ .../03203_json_empty_as_default.reference | 20 +++++++ .../03203_json_empty_as_default.sql | 53 ++++++++++++++++++ 36 files changed, 300 insertions(+), 48 deletions(-) create mode 100644 src/DataTypes/Serializations/SerializationAsStringNonTrivialJSON.h create mode 100644 tests/queries/0_stateless/03203_json_empty_as_default.reference create mode 100644 tests/queries/0_stateless/03203_json_empty_as_default.sql diff --git a/docs/en/interfaces/formats.md b/docs/en/interfaces/formats.md index 8892c6d8d3f..8fc66bef5b1 100644 --- a/docs/en/interfaces/formats.md +++ b/docs/en/interfaces/formats.md @@ -1342,6 +1342,7 @@ SELECT * FROM json_each_row_nested - [input_format_json_ignore_unknown_keys_in_named_tuple](/docs/en/operations/settings/settings-formats.md/#input_format_json_ignore_unknown_keys_in_named_tuple) - ignore unknown keys in json object for named tuples. Default value - `false`. - [input_format_json_compact_allow_variable_number_of_columns](/docs/en/operations/settings/settings-formats.md/#input_format_json_compact_allow_variable_number_of_columns) - allow variable number of columns in JSONCompact/JSONCompactEachRow format, ignore extra columns and use default values on missing columns. Default value - `false`. - [input_format_json_throw_on_bad_escape_sequence](/docs/en/operations/settings/settings-formats.md/#input_format_json_throw_on_bad_escape_sequence) - throw an exception if JSON string contains bad escape sequence. If disabled, bad escape sequences will remain as is in the data. Default value - `true`. +- [input_format_json_empty_as_default](/docs/en/operations/settings/settings-formats.md/#input_format_json_empty_as_default) - treat empty fields in JSON input as default values. Default value - `false`. For complex default expressions [input_format_defaults_for_omitted_fields](/docs/en/operations/settings/settings-formats.md/#input_format_defaults_for_omitted_fields) must be enabled too. - [output_format_json_quote_64bit_integers](/docs/en/operations/settings/settings-formats.md/#output_format_json_quote_64bit_integers) - controls quoting of 64-bit integers in JSON output format. Default value - `true`. - [output_format_json_quote_64bit_floats](/docs/en/operations/settings/settings-formats.md/#output_format_json_quote_64bit_floats) - controls quoting of 64-bit floats in JSON output format. Default value - `false`. - [output_format_json_quote_denormals](/docs/en/operations/settings/settings-formats.md/#output_format_json_quote_denormals) - enables '+nan', '-nan', '+inf', '-inf' outputs in JSON output format. Default value - `false`. diff --git a/docs/en/operations/settings/settings-formats.md b/docs/en/operations/settings/settings-formats.md index f8b40cd81ac..758d5c1ab49 100644 --- a/docs/en/operations/settings/settings-formats.md +++ b/docs/en/operations/settings/settings-formats.md @@ -741,6 +741,17 @@ Possible values: Default value: 0. +### input_format_json_empty_as_default {#input_format_json_empty_as_default} + +When enabled, replace empty input fields in JSON with default values. For complex default expressions `input_format_defaults_for_omitted_fields` must be enabled too. + +Possible values: + ++ 0 — Disable. ++ 1 — Enable. + +Default value: 0. + ## TSV format settings {#tsv-format-settings} ### input_format_tsv_empty_as_default {#input_format_tsv_empty_as_default} diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 0808e8eb49f..90045b9a8ff 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -1133,6 +1133,7 @@ class IColumn; M(Bool, input_format_json_throw_on_bad_escape_sequence, true, "Throw an exception if JSON string contains bad escape sequence in JSON input formats. If disabled, bad escape sequences will remain as is in the data", 0) \ M(Bool, input_format_json_ignore_unnecessary_fields, true, "Ignore unnecessary fields and not parse them. Enabling this may not throw exceptions on json strings of invalid format or with duplicated fields", 0) \ M(UInt64, input_format_json_max_depth, 1000, "Maximum depth of a field in JSON. This is not a strict limit, it does not have to be applied precisely.", 0) \ + M(Bool, input_format_json_empty_as_default, false, "Treat empty fields in JSON input as default values.", 0) \ M(Bool, input_format_try_infer_integers, true, "Try to infer integers instead of floats while schema inference in text formats", 0) \ M(Bool, input_format_try_infer_dates, true, "Try to infer dates from string fields while schema inference in text formats", 0) \ M(Bool, input_format_try_infer_datetimes, true, "Try to infer datetimes from string fields while schema inference in text formats", 0) \ diff --git a/src/DataTypes/DataTypeAggregateFunction.h b/src/DataTypes/DataTypeAggregateFunction.h index e3a4f9726d9..2ab2e53100b 100644 --- a/src/DataTypes/DataTypeAggregateFunction.h +++ b/src/DataTypes/DataTypeAggregateFunction.h @@ -64,6 +64,8 @@ public: SerializationPtr doGetDefaultSerialization() const override; bool supportsSparseSerialization() const override { return false; } + bool isNonTriviallySerializedAsStringJSON() const override { return true; } + bool isVersioned() const; /// Version is not empty only if it was parsed from AST or implicitly cast to 0 or version according diff --git a/src/DataTypes/DataTypeDate.h b/src/DataTypes/DataTypeDate.h index 0e08b9ba2ca..cb7a603705d 100644 --- a/src/DataTypes/DataTypeDate.h +++ b/src/DataTypes/DataTypeDate.h @@ -17,6 +17,7 @@ public: bool canBeUsedAsVersion() const override { return true; } bool canBeInsideNullable() const override { return true; } + bool isNonTriviallySerializedAsStringJSON() const override { return true; } bool equals(const IDataType & rhs) const override; diff --git a/src/DataTypes/DataTypeDate32.h b/src/DataTypes/DataTypeDate32.h index 65633e7a228..53bd010b7cf 100644 --- a/src/DataTypes/DataTypeDate32.h +++ b/src/DataTypes/DataTypeDate32.h @@ -18,6 +18,7 @@ public: bool canBeUsedAsVersion() const override { return true; } bool canBeInsideNullable() const override { return true; } + bool isNonTriviallySerializedAsStringJSON() const override { return true; } bool equals(const IDataType & rhs) const override; diff --git a/src/DataTypes/DataTypeDateTime.h b/src/DataTypes/DataTypeDateTime.h index 5519240dee1..11b579920ba 100644 --- a/src/DataTypes/DataTypeDateTime.h +++ b/src/DataTypes/DataTypeDateTime.h @@ -44,6 +44,7 @@ public: bool canBeUsedAsVersion() const override { return true; } bool canBeInsideNullable() const override { return true; } + bool isNonTriviallySerializedAsStringJSON() const override { return true; } bool equals(const IDataType & rhs) const override; diff --git a/src/DataTypes/DataTypeDateTime64.h b/src/DataTypes/DataTypeDateTime64.h index 64cedd798d1..dd5ff7e6550 100644 --- a/src/DataTypes/DataTypeDateTime64.h +++ b/src/DataTypes/DataTypeDateTime64.h @@ -39,6 +39,8 @@ public: bool isSummable() const override { return false; } + bool isNonTriviallySerializedAsStringJSON() const override { return true; } + protected: SerializationPtr doGetDefaultSerialization() const override; }; diff --git a/src/DataTypes/DataTypeIPv4andIPv6.h b/src/DataTypes/DataTypeIPv4andIPv6.h index 5aea55751a7..520af4f21e0 100644 --- a/src/DataTypes/DataTypeIPv4andIPv6.h +++ b/src/DataTypes/DataTypeIPv4andIPv6.h @@ -46,6 +46,7 @@ public: size_t getSizeOfValueInMemory() const override { return sizeof(IPv4); } bool isCategorial() const override { return true; } bool canBeInsideLowCardinality() const override { return true; } + bool isNonTriviallySerializedAsStringJSON() const override { return true; } SerializationPtr doGetDefaultSerialization() const override { return std::make_shared>(); } }; @@ -84,6 +85,7 @@ public: size_t getSizeOfValueInMemory() const override { return sizeof(IPv6); } bool isCategorial() const override { return true; } bool canBeInsideLowCardinality() const override { return true; } + bool isNonTriviallySerializedAsStringJSON() const override { return true; } SerializationPtr doGetDefaultSerialization() const override { return std::make_shared>(); } }; diff --git a/src/DataTypes/DataTypeUUID.h b/src/DataTypes/DataTypeUUID.h index 90cdd90d68d..1aeab1b78ba 100644 --- a/src/DataTypes/DataTypeUUID.h +++ b/src/DataTypes/DataTypeUUID.h @@ -42,6 +42,7 @@ public: size_t getSizeOfValueInMemory() const override { return sizeof(UUID); } bool isCategorial() const override { return true; } bool canBeInsideLowCardinality() const override { return true; } + bool isNonTriviallySerializedAsStringJSON() const override { return true; } SerializationPtr doGetDefaultSerialization() const override; }; diff --git a/src/DataTypes/IDataType.h b/src/DataTypes/IDataType.h index 397ae3d8be9..8033e82d1bc 100644 --- a/src/DataTypes/IDataType.h +++ b/src/DataTypes/IDataType.h @@ -328,6 +328,9 @@ public: /// Updates avg_value_size_hint for newly read column. Uses to optimize deserialization. Zero expected for first column. static void updateAvgValueSizeHint(const IColumn & column, double & avg_value_size_hint); + /// non-numeric non-string data type serialized as JSON string + virtual bool isNonTriviallySerializedAsStringJSON() const { return false; } + protected: friend class DataTypeFactory; friend class AggregateFunctionSimpleState; diff --git a/src/DataTypes/Serializations/ISerialization.h b/src/DataTypes/Serializations/ISerialization.h index 5d0bf60c59f..76c88fe7522 100644 --- a/src/DataTypes/Serializations/ISerialization.h +++ b/src/DataTypes/Serializations/ISerialization.h @@ -398,12 +398,20 @@ public: virtual void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const = 0; virtual void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const = 0; virtual bool tryDeserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const; + /// The following two methods are implemented only for non-numeric non-string simple data types. + virtual void deserializeTextNoEmptyCheckJSON(IColumn & /*column*/, ReadBuffer & /*istr*/, const FormatSettings &) const + { + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method deserializeTextNoEmptyCheckJSON is not supported"); + } + virtual bool tryDeserializeTextNoEmptyCheckJSON(IColumn & /*column*/, ReadBuffer & /*istr*/, const FormatSettings &) const + { + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method tryDeserializeTextNoEmptyCheckJSON is not supported"); + } virtual void serializeTextJSONPretty(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings, size_t /*indent*/) const { serializeTextJSON(column, row_num, ostr, settings); } - /** Text serialization for putting into the XML format. */ virtual void serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const diff --git a/src/DataTypes/Serializations/SerializationAggregateFunction.cpp b/src/DataTypes/Serializations/SerializationAggregateFunction.cpp index 41b198890e4..39c3f389619 100644 --- a/src/DataTypes/Serializations/SerializationAggregateFunction.cpp +++ b/src/DataTypes/Serializations/SerializationAggregateFunction.cpp @@ -182,7 +182,7 @@ void SerializationAggregateFunction::serializeTextJSON(const IColumn & column, s } -void SerializationAggregateFunction::deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const +void SerializationAggregateFunction::deserializeTextNoEmptyCheckJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const { String s; readJSONString(s, istr, settings.json); diff --git a/src/DataTypes/Serializations/SerializationAggregateFunction.h b/src/DataTypes/Serializations/SerializationAggregateFunction.h index c45fc79f714..6afa1cd4e97 100644 --- a/src/DataTypes/Serializations/SerializationAggregateFunction.h +++ b/src/DataTypes/Serializations/SerializationAggregateFunction.h @@ -3,12 +3,13 @@ #include #include +#include namespace DB { -class SerializationAggregateFunction final : public ISerialization +class SerializationAggregateFunction final : public SerializationAsStringNonTrivialJSON { private: AggregateFunctionPtr function; @@ -37,7 +38,7 @@ public: void deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; - void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; + void deserializeTextNoEmptyCheckJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; void serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override; diff --git a/src/DataTypes/Serializations/SerializationAsStringNonTrivialJSON.h b/src/DataTypes/Serializations/SerializationAsStringNonTrivialJSON.h new file mode 100644 index 00000000000..7d8375368a7 --- /dev/null +++ b/src/DataTypes/Serializations/SerializationAsStringNonTrivialJSON.h @@ -0,0 +1,56 @@ +#pragma once + +#include +#include +#include +#include +#include +#include + +#include + +namespace DB +{ + +/** Serialization for non-numeric non-string data types serialized as JSON strings + * For these data types, we support an option, input_format_json_empty_as_default, which, when set to 1, + * allows for JSON deserialization to treat an encountered empty string as a default value for the specified type. + * Derived classes must implement the following methods: + * deserializeTextNoEmptyCheckJSON() and tryDeserializeTextNoEmptyCheckJSON() + * instead of deserializeTextJSON() and tryDeserializeTextJSON() respectively. + */ +template +requires std::derived_from +class SerializationAsStringNonTrivialJSON : public T +{ +public: + using T::T; + + void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & format_settings) const override + { + if (format_settings.json.empty_as_default && tryMatchEmptyString(istr)) + column.insertDefault(); + else + deserializeTextNoEmptyCheckJSON(column, istr, format_settings); + } + + bool tryDeserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & format_settings) const override + { + if (format_settings.json.empty_as_default && tryMatchEmptyString(istr)) + { + column.insertDefault(); + return true; + } + else + return tryDeserializeTextNoEmptyCheckJSON(column, istr, format_settings); + } + + virtual void deserializeTextNoEmptyCheckJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override = 0; + + virtual bool tryDeserializeTextNoEmptyCheckJSON(IColumn & /*column*/, ReadBuffer & /*istr*/, const FormatSettings &) const override + { + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method tryDeserializeTextNoEmptyCheckJSON is not supported"); + } +}; + +} diff --git a/src/DataTypes/Serializations/SerializationDate.cpp b/src/DataTypes/Serializations/SerializationDate.cpp index 38e1bb87b6d..3f122189c22 100644 --- a/src/DataTypes/Serializations/SerializationDate.cpp +++ b/src/DataTypes/Serializations/SerializationDate.cpp @@ -85,7 +85,7 @@ void SerializationDate::serializeTextJSON(const IColumn & column, size_t row_num writeChar('"', ostr); } -void SerializationDate::deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const +void SerializationDate::deserializeTextNoEmptyCheckJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const { DayNum x; assertChar('"', istr); @@ -94,7 +94,7 @@ void SerializationDate::deserializeTextJSON(IColumn & column, ReadBuffer & istr, assert_cast(column).getData().push_back(x); } -bool SerializationDate::tryDeserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const +bool SerializationDate::tryDeserializeTextNoEmptyCheckJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const { DayNum x; if (!checkChar('"', istr) || !tryReadDateText(x, istr, time_zone) || !checkChar('"', istr)) diff --git a/src/DataTypes/Serializations/SerializationDate.h b/src/DataTypes/Serializations/SerializationDate.h index dcf79eb49da..10c83171527 100644 --- a/src/DataTypes/Serializations/SerializationDate.h +++ b/src/DataTypes/Serializations/SerializationDate.h @@ -1,12 +1,13 @@ #pragma once #include +#include #include namespace DB { -class SerializationDate final : public SerializationNumber +class SerializationDate final : public SerializationAsStringNonTrivialJSON> { public: explicit SerializationDate(const DateLUTImpl & time_zone_ = DateLUT::instance()); @@ -21,8 +22,8 @@ public: void deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; bool tryDeserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; - void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; - bool tryDeserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; + void deserializeTextNoEmptyCheckJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; + bool tryDeserializeTextNoEmptyCheckJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override; bool tryDeserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override; diff --git a/src/DataTypes/Serializations/SerializationDate32.cpp b/src/DataTypes/Serializations/SerializationDate32.cpp index 70a22d59e42..8ad07b534ce 100644 --- a/src/DataTypes/Serializations/SerializationDate32.cpp +++ b/src/DataTypes/Serializations/SerializationDate32.cpp @@ -83,7 +83,7 @@ void SerializationDate32::serializeTextJSON(const IColumn & column, size_t row_n writeChar('"', ostr); } -void SerializationDate32::deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const +void SerializationDate32::deserializeTextNoEmptyCheckJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const { ExtendedDayNum x; assertChar('"', istr); @@ -92,7 +92,7 @@ void SerializationDate32::deserializeTextJSON(IColumn & column, ReadBuffer & ist assert_cast(column).getData().push_back(x); } -bool SerializationDate32::tryDeserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const +bool SerializationDate32::tryDeserializeTextNoEmptyCheckJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const { ExtendedDayNum x; if (!checkChar('"', istr) || !tryReadDateText(x, istr, time_zone) || !checkChar('"', istr)) diff --git a/src/DataTypes/Serializations/SerializationDate32.h b/src/DataTypes/Serializations/SerializationDate32.h index be2e2b76c1d..ac6239fbc2b 100644 --- a/src/DataTypes/Serializations/SerializationDate32.h +++ b/src/DataTypes/Serializations/SerializationDate32.h @@ -1,11 +1,12 @@ #pragma once #include +#include #include namespace DB { -class SerializationDate32 final : public SerializationNumber +class SerializationDate32 final : public SerializationAsStringNonTrivialJSON> { public: explicit SerializationDate32(const DateLUTImpl & time_zone_ = DateLUT::instance()); @@ -20,8 +21,8 @@ public: void deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; bool tryDeserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; - void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; - bool tryDeserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; + void deserializeTextNoEmptyCheckJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; + bool tryDeserializeTextNoEmptyCheckJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override; bool tryDeserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override; diff --git a/src/DataTypes/Serializations/SerializationDateTime.cpp b/src/DataTypes/Serializations/SerializationDateTime.cpp index c5c819ce7fa..80b2d51140d 100644 --- a/src/DataTypes/Serializations/SerializationDateTime.cpp +++ b/src/DataTypes/Serializations/SerializationDateTime.cpp @@ -180,7 +180,7 @@ void SerializationDateTime::serializeTextJSON(const IColumn & column, size_t row writeChar('"', ostr); } -void SerializationDateTime::deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const +void SerializationDateTime::deserializeTextNoEmptyCheckJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const { time_t x = 0; if (checkChar('"', istr)) @@ -196,7 +196,7 @@ void SerializationDateTime::deserializeTextJSON(IColumn & column, ReadBuffer & i assert_cast(column).getData().push_back(static_cast(x)); } -bool SerializationDateTime::tryDeserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const +bool SerializationDateTime::tryDeserializeTextNoEmptyCheckJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const { time_t x = 0; if (checkChar('"', istr)) diff --git a/src/DataTypes/Serializations/SerializationDateTime.h b/src/DataTypes/Serializations/SerializationDateTime.h index 584b0c4116b..0041f221ccf 100644 --- a/src/DataTypes/Serializations/SerializationDateTime.h +++ b/src/DataTypes/Serializations/SerializationDateTime.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include class DateLUTImpl; @@ -8,7 +9,7 @@ class DateLUTImpl; namespace DB { -class SerializationDateTime final : public SerializationNumber, public TimezoneMixin +class SerializationDateTime final : public SerializationAsStringNonTrivialJSON>, public TimezoneMixin { public: explicit SerializationDateTime(const TimezoneMixin & time_zone_); @@ -23,8 +24,8 @@ public: void deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; bool tryDeserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; - void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; - bool tryDeserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; + void deserializeTextNoEmptyCheckJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; + bool tryDeserializeTextNoEmptyCheckJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override; bool tryDeserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override; diff --git a/src/DataTypes/Serializations/SerializationDateTime64.cpp b/src/DataTypes/Serializations/SerializationDateTime64.cpp index 442e29edd52..cdac7f785d1 100644 --- a/src/DataTypes/Serializations/SerializationDateTime64.cpp +++ b/src/DataTypes/Serializations/SerializationDateTime64.cpp @@ -15,7 +15,7 @@ namespace DB SerializationDateTime64::SerializationDateTime64( UInt32 scale_, const TimezoneMixin & time_zone_) - : SerializationDecimalBase(DecimalUtils::max_precision, scale_) + : SerializationAsStringNonTrivialJSON>(DecimalUtils::max_precision, scale_) , TimezoneMixin(time_zone_) { } @@ -170,7 +170,7 @@ void SerializationDateTime64::serializeTextJSON(const IColumn & column, size_t r writeChar('"', ostr); } -void SerializationDateTime64::deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const +void SerializationDateTime64::deserializeTextNoEmptyCheckJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const { DateTime64 x = 0; if (checkChar('"', istr)) @@ -185,7 +185,7 @@ void SerializationDateTime64::deserializeTextJSON(IColumn & column, ReadBuffer & assert_cast(column).getData().push_back(x); } -bool SerializationDateTime64::tryDeserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const +bool SerializationDateTime64::tryDeserializeTextNoEmptyCheckJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const { DateTime64 x = 0; if (checkChar('"', istr)) diff --git a/src/DataTypes/Serializations/SerializationDateTime64.h b/src/DataTypes/Serializations/SerializationDateTime64.h index b49bd1e9098..a3bc4d1ad4e 100644 --- a/src/DataTypes/Serializations/SerializationDateTime64.h +++ b/src/DataTypes/Serializations/SerializationDateTime64.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include class DateLUTImpl; @@ -8,7 +9,7 @@ class DateLUTImpl; namespace DB { -class SerializationDateTime64 final : public SerializationDecimalBase, public TimezoneMixin +class SerializationDateTime64 final : public SerializationAsStringNonTrivialJSON>, public TimezoneMixin { public: SerializationDateTime64(UInt32 scale_, const TimezoneMixin & time_zone_); @@ -25,8 +26,8 @@ public: void deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; bool tryDeserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; - void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; - bool tryDeserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; + void deserializeTextNoEmptyCheckJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; + bool tryDeserializeTextNoEmptyCheckJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override; bool tryDeserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override; diff --git a/src/DataTypes/Serializations/SerializationIPv4andIPv6.cpp b/src/DataTypes/Serializations/SerializationIPv4andIPv6.cpp index c1beceb4533..ecd50a0b9b8 100644 --- a/src/DataTypes/Serializations/SerializationIPv4andIPv6.cpp +++ b/src/DataTypes/Serializations/SerializationIPv4andIPv6.cpp @@ -69,7 +69,7 @@ void SerializationIP::serializeTextJSON(const DB::IColumn & column, size_t } template -void SerializationIP::deserializeTextJSON(DB::IColumn & column, DB::ReadBuffer & istr, const DB::FormatSettings & settings) const +void SerializationIP::deserializeTextNoEmptyCheckJSON(DB::IColumn & column, DB::ReadBuffer & istr, const DB::FormatSettings & settings) const { IPv x; assertChar('"', istr); @@ -84,7 +84,7 @@ void SerializationIP::deserializeTextJSON(DB::IColumn & column, DB::ReadBuf } template -bool SerializationIP::tryDeserializeTextJSON(DB::IColumn & column, DB::ReadBuffer & istr, const DB::FormatSettings &) const +bool SerializationIP::tryDeserializeTextNoEmptyCheckJSON(DB::IColumn & column, DB::ReadBuffer & istr, const DB::FormatSettings &) const { IPv x; if (!checkChar('"', istr) || !tryReadText(x, istr) || !checkChar('"', istr)) diff --git a/src/DataTypes/Serializations/SerializationIPv4andIPv6.h b/src/DataTypes/Serializations/SerializationIPv4andIPv6.h index a53f257646b..44f36252741 100644 --- a/src/DataTypes/Serializations/SerializationIPv4andIPv6.h +++ b/src/DataTypes/Serializations/SerializationIPv4andIPv6.h @@ -4,13 +4,14 @@ #include #include #include +#include #include namespace DB { template -class SerializationIP : public SimpleTextSerialization +class SerializationIP : public SerializationAsStringNonTrivialJSON { public: void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; @@ -22,8 +23,8 @@ public: bool tryDeserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override; void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override; - void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override; - bool tryDeserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override; + void deserializeTextNoEmptyCheckJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override; + bool tryDeserializeTextNoEmptyCheckJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override; void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override; void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings &/* settings*/) const override; diff --git a/src/DataTypes/Serializations/SerializationNullable.cpp b/src/DataTypes/Serializations/SerializationNullable.cpp index e72dd3a42f5..72c6a661dde 100644 --- a/src/DataTypes/Serializations/SerializationNullable.cpp +++ b/src/DataTypes/Serializations/SerializationNullable.cpp @@ -844,25 +844,52 @@ bool SerializationNullable::tryDeserializeNullJSON(DB::ReadBuffer & istr) return checkString("null", istr); } -template -ReturnType deserializeTextJSONImpl(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, const SerializationPtr & nested, bool & is_null) +namespace +{ + +enum class Strategy : uint8_t +{ + Deserialize, + DeserializeNoEmptyCheck, + TryDeserialize +}; + +template struct ReturnTypeImpl; +template <> struct ReturnTypeImpl { using Type = void; }; +template <> struct ReturnTypeImpl { using Type = bool; }; +template <> struct ReturnTypeImpl { using Type = void; }; + +template +using ReturnType = typename ReturnTypeImpl::Type; + +template struct AlwaysFalse : std::false_type {}; + +template +ReturnType deserializeTextJSONImpl(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, const SerializationPtr & nested, bool & is_null) { auto check_for_null = [](ReadBuffer & buf){ return checkStringByFirstCharacterAndAssertTheRest("null", buf); }; auto deserialize_nested = [&nested, &settings](IColumn & nested_column, ReadBuffer & buf) { - if constexpr (std::is_same_v) + if constexpr (strategy == Strategy::TryDeserialize) return nested->tryDeserializeTextJSON(nested_column, buf, settings); - nested->deserializeTextJSON(nested_column, buf, settings); + else if constexpr (strategy == Strategy::Deserialize) + nested->deserializeTextJSON(nested_column, buf, settings); + else if constexpr (strategy == Strategy::DeserializeNoEmptyCheck) + nested->deserializeTextNoEmptyCheckJSON(nested_column, buf, settings); + else + static_assert(AlwaysFalse::value); }; - return deserializeImpl(column, istr, check_for_null, deserialize_nested, is_null); + return deserializeImpl>(column, istr, check_for_null, deserialize_nested, is_null); +} + } void SerializationNullable::deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const { ColumnNullable & col = assert_cast(column); bool is_null; - deserializeTextJSONImpl(col.getNestedColumn(), istr, settings, nested, is_null); + deserializeTextJSONImpl(col.getNestedColumn(), istr, settings, nested, is_null); safeAppendToNullMap(col, is_null); } @@ -870,20 +897,27 @@ bool SerializationNullable::tryDeserializeTextJSON(IColumn & column, ReadBuffer { ColumnNullable & col = assert_cast(column); bool is_null; - return deserializeTextJSONImpl(col.getNestedColumn(), istr, settings, nested, is_null) && safeAppendToNullMap(col, is_null); + return deserializeTextJSONImpl(col.getNestedColumn(), istr, settings, nested, is_null) && safeAppendToNullMap(col, is_null); } bool SerializationNullable::deserializeNullAsDefaultOrNestedTextJSON(DB::IColumn & nested_column, DB::ReadBuffer & istr, const DB::FormatSettings & settings, const DB::SerializationPtr & nested_serialization) { bool is_null; - deserializeTextJSONImpl(nested_column, istr, settings, nested_serialization, is_null); + deserializeTextJSONImpl(nested_column, istr, settings, nested_serialization, is_null); + return !is_null; +} + +bool SerializationNullable::deserializeNullAsDefaultOrNestedTextNoEmptyCheckJSON(DB::IColumn & nested_column, DB::ReadBuffer & istr, const DB::FormatSettings & settings, const DB::SerializationPtr & nested_serialization) +{ + bool is_null; + deserializeTextJSONImpl(nested_column, istr, settings, nested_serialization, is_null); return !is_null; } bool SerializationNullable::tryDeserializeNullAsDefaultOrNestedTextJSON(DB::IColumn & nested_column, DB::ReadBuffer & istr, const DB::FormatSettings & settings, const DB::SerializationPtr & nested_serialization) { bool is_null; - return deserializeTextJSONImpl(nested_column, istr, settings, nested_serialization, is_null); + return deserializeTextJSONImpl(nested_column, istr, settings, nested_serialization, is_null); } void SerializationNullable::serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const diff --git a/src/DataTypes/Serializations/SerializationNullable.h b/src/DataTypes/Serializations/SerializationNullable.h index f7d2d2eadf0..c5215e2a39f 100644 --- a/src/DataTypes/Serializations/SerializationNullable.h +++ b/src/DataTypes/Serializations/SerializationNullable.h @@ -88,6 +88,7 @@ public: static bool deserializeNullAsDefaultOrNestedTextQuoted(IColumn & nested_column, ReadBuffer & istr, const FormatSettings &, const SerializationPtr & nested_serialization); static bool deserializeNullAsDefaultOrNestedTextCSV(IColumn & nested_column, ReadBuffer & istr, const FormatSettings & settings, const SerializationPtr & nested_serialization); static bool deserializeNullAsDefaultOrNestedTextJSON(IColumn & nested_column, ReadBuffer & istr, const FormatSettings &, const SerializationPtr & nested_serialization); + static bool deserializeNullAsDefaultOrNestedTextNoEmptyCheckJSON(IColumn & nested_column, ReadBuffer & istr, const FormatSettings &, const SerializationPtr & nested_serialization); static bool deserializeNullAsDefaultOrNestedTextRaw(IColumn & nested_column, ReadBuffer & istr, const FormatSettings & settings, const SerializationPtr & nested_serialization); /// If Check for NULL and deserialize value into non-nullable column or insert default value of nested type. diff --git a/src/DataTypes/Serializations/SerializationUUID.cpp b/src/DataTypes/Serializations/SerializationUUID.cpp index f18466ad8ad..21a0ccf676c 100644 --- a/src/DataTypes/Serializations/SerializationUUID.cpp +++ b/src/DataTypes/Serializations/SerializationUUID.cpp @@ -94,7 +94,7 @@ void SerializationUUID::serializeTextJSON(const IColumn & column, size_t row_num writeChar('"', ostr); } -void SerializationUUID::deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const +void SerializationUUID::deserializeTextNoEmptyCheckJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const { UUID x; assertChar('"', istr); @@ -103,7 +103,7 @@ void SerializationUUID::deserializeTextJSON(IColumn & column, ReadBuffer & istr, assert_cast(column).getData().push_back(x); } -bool SerializationUUID::tryDeserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const +bool SerializationUUID::tryDeserializeTextNoEmptyCheckJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const { UUID x; if (!checkChar('"', istr) || !tryReadText(x, istr) || !checkChar('"', istr)) diff --git a/src/DataTypes/Serializations/SerializationUUID.h b/src/DataTypes/Serializations/SerializationUUID.h index 458504f8f42..185d1d44c16 100644 --- a/src/DataTypes/Serializations/SerializationUUID.h +++ b/src/DataTypes/Serializations/SerializationUUID.h @@ -1,11 +1,12 @@ #pragma once #include +#include namespace DB { -class SerializationUUID : public SimpleTextSerialization +class SerializationUUID : public SerializationAsStringNonTrivialJSON { public: void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; @@ -15,8 +16,8 @@ public: void deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; bool tryDeserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; - void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; - bool tryDeserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; + void deserializeTextNoEmptyCheckJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; + bool tryDeserializeTextNoEmptyCheckJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override; bool tryDeserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override; diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp index a78836ff63c..59131f34697 100644 --- a/src/Formats/FormatFactory.cpp +++ b/src/Formats/FormatFactory.cpp @@ -151,6 +151,8 @@ FormatSettings getFormatSettings(const ContextPtr & context, const Settings & se format_settings.json.try_infer_objects_as_tuples = settings.input_format_json_try_infer_named_tuples_from_objects; format_settings.json.throw_on_bad_escape_sequence = settings.input_format_json_throw_on_bad_escape_sequence; format_settings.json.ignore_unnecessary_fields = settings.input_format_json_ignore_unnecessary_fields; + format_settings.json.case_insensitive_column_matching = settings.input_format_json_case_insensitive_column_matching; + format_settings.json.empty_as_default = settings.input_format_json_empty_as_default; format_settings.null_as_default = settings.input_format_null_as_default; format_settings.force_null_for_omitted_fields = settings.input_format_force_null_for_omitted_fields; format_settings.decimal_trailing_zeros = settings.output_format_decimal_trailing_zeros; diff --git a/src/Formats/FormatSettings.h b/src/Formats/FormatSettings.h index f0359218775..8c247bb960c 100644 --- a/src/Formats/FormatSettings.h +++ b/src/Formats/FormatSettings.h @@ -234,6 +234,8 @@ struct FormatSettings bool infer_incomplete_types_as_strings = true; bool throw_on_bad_escape_sequence = true; bool ignore_unnecessary_fields = true; + bool case_insensitive_column_matching = false; + bool empty_as_default = false; } json{}; struct diff --git a/src/Formats/JSONUtils.cpp b/src/Formats/JSONUtils.cpp index 017befe5b0e..73189e81f97 100644 --- a/src/Formats/JSONUtils.cpp +++ b/src/Formats/JSONUtils.cpp @@ -286,11 +286,33 @@ namespace JSONUtils return true; } - if (as_nullable) - return SerializationNullable::deserializeNullAsDefaultOrNestedTextJSON(column, in, format_settings, serialization); + if (format_settings.json.empty_as_default && type->isNonTriviallySerializedAsStringJSON()) + { + /// We have a non-numeric non-string data type at the top level. + /// At first glance, it looks like we sort of duplicate the work done in + /// SerializationAsStringNonTrivialJSON. Actually we need to proceed as + /// done here because we want to return false if we inserted a default + /// value on purpose, which the ISerialization interface does not allow for. + if (tryMatchEmptyString(in)) + { + column.insertDefault(); + return false; + } - serialization->deserializeTextJSON(column, in, format_settings); - return true; + if (as_nullable) + return SerializationNullable::deserializeNullAsDefaultOrNestedTextNoEmptyCheckJSON(column, in, format_settings, serialization); + + serialization->deserializeTextNoEmptyCheckJSON(column, in, format_settings); + return true; + } + else + { + if (as_nullable) + return SerializationNullable::deserializeNullAsDefaultOrNestedTextJSON(column, in, format_settings, serialization); + + serialization->deserializeTextJSON(column, in, format_settings); + return true; + } } catch (Exception & e) { diff --git a/src/IO/ReadHelpers.cpp b/src/IO/ReadHelpers.cpp index 9559462e62b..e8ef667a4e3 100644 --- a/src/IO/ReadHelpers.cpp +++ b/src/IO/ReadHelpers.cpp @@ -1137,6 +1137,23 @@ template void readCSVStringInto(String & s, ReadBuffer & b template void readCSVStringInto(String & s, ReadBuffer & buf, const FormatSettings::CSV & settings); template void readCSVStringInto, false, false>(PaddedPODArray & s, ReadBuffer & buf, const FormatSettings::CSV & settings); +bool tryMatchEmptyString(ReadBuffer & buf) +{ + if (buf.eof() || *buf.position() != '"') + return false; + + ++buf.position(); + + if (buf.eof() || *buf.position() != '"') + { + --buf.position(); + return false; + } + + ++buf.position(); + + return true; +} template ReturnType readJSONStringInto(Vector & s, ReadBuffer & buf, const FormatSettings::JSON & settings) diff --git a/src/IO/ReadHelpers.h b/src/IO/ReadHelpers.h index ffba4fafb5c..59c1923e02a 100644 --- a/src/IO/ReadHelpers.h +++ b/src/IO/ReadHelpers.h @@ -665,6 +665,10 @@ void readStringUntilEOFInto(Vector & s, ReadBuffer & buf); template void readCSVStringInto(Vector & s, ReadBuffer & buf, const FormatSettings::CSV & settings); +/// Consumes the current token if it is an empty string, i.e. two consecutive double quotes, +/// Returns true if consumed. +bool tryMatchEmptyString(ReadBuffer & buf); + /// ReturnType is either bool or void. If bool, the function will return false instead of throwing an exception. template ReturnType readJSONStringInto(Vector & s, ReadBuffer & buf, const FormatSettings::JSON & settings); diff --git a/tests/queries/0_stateless/03203_json_empty_as_default.reference b/tests/queries/0_stateless/03203_json_empty_as_default.reference new file mode 100644 index 00000000000..53d7014e452 --- /dev/null +++ b/tests/queries/0_stateless/03203_json_empty_as_default.reference @@ -0,0 +1,20 @@ +1970-01-01 +1970-01-01 +1970-01-01 00:00:00 +1970-01-01 00:00:00.000 +0.0.0.0 +:: +00000000-0000-0000-0000-000000000000 +1 +:: +2001:db8:3333:4444:5555:6666:7777:8888 +:: +['00000000-0000-0000-0000-000000000000','b15f852c-c41a-4fd6-9247-1929c841715e','00000000-0000-0000-0000-000000000000'] +['::','::'] +('1970-01-01','0.0.0.0','abc') +{'abc':'::'} +00000000-0000-0000-0000-000000000000 +[['2001:db8:3333:4444:cccc:dddd:eeee:ffff','::'],['::','2001:db8:3333:4444:5555:6666:7777:8888']] +['00000000-0000-0000-0000-000000000000','b15f852c-c41a-4fd6-9247-1929c841715e'] +(['00000000-0000-0000-0000-000000000000'],('00000000-0000-0000-0000-000000000000',{'abc':'::'})) +{('1970-01-01','0.0.0.0'):'00000000-0000-0000-0000-000000000000'} diff --git a/tests/queries/0_stateless/03203_json_empty_as_default.sql b/tests/queries/0_stateless/03203_json_empty_as_default.sql new file mode 100644 index 00000000000..35652f4c751 --- /dev/null +++ b/tests/queries/0_stateless/03203_json_empty_as_default.sql @@ -0,0 +1,53 @@ +set input_format_json_empty_as_default = 1; +set allow_experimental_variant_type = 1; + +# Simple types +SELECT x FROM format(JSONEachRow, 'x Date', '{"x":""}'); +SELECT x FROM format(JSONEachRow, 'x Date32', '{"x":""}'); +SELECT toTimeZone(x, 'UTC') FROM format(JSONEachRow, 'x DateTime', '{"x":""}'); +SELECT toTimeZone(x, 'UTC') FROM format(JSONEachRow, 'x DateTime64', '{"x":""}'); +SELECT x FROM format(JSONEachRow, 'x IPv4', '{"x":""}'); +SELECT x FROM format(JSONEachRow, 'x IPv6', '{"x":""}'); +SELECT x FROM format(JSONEachRow, 'x UUID', '{"x":""}'); + +# Simple type AggregateFunction +DROP TABLE IF EXISTS table1; +CREATE TABLE table1(col AggregateFunction(uniq, UInt64)) ENGINE=Memory(); +DROP TABLE IF EXISTS table2; +CREATE TABLE table2(UserID UInt64) ENGINE=Memory(); + +INSERT INTO table1 SELECT uniqState(UserID) FROM table2; +INSERT INTO table1 SELECT x FROM format(JSONEachRow, 'x AggregateFunction(uniq, UInt64)' AS T, '{"x":""}'); +SELECT COUNT(DISTINCT col) FROM table1; + +DROP TABLE table1; +DROP TABLE table2; + +# The setting input_format_defaults_for_omitted_fields determines the default value if enabled. +CREATE TABLE table1(address IPv6 DEFAULT toIPv6('2001:db8:3333:4444:5555:6666:7777:8888')) ENGINE=Memory(); + +set input_format_defaults_for_omitted_fields = 0; +INSERT INTO table1 FORMAT JSONEachRow {"address":""}; + +set input_format_defaults_for_omitted_fields = 1; +INSERT INTO table1 FORMAT JSONEachRow {"address":""}; + +SELECT * FROM table1 ORDER BY address ASC; + +DROP TABLE table1; + +# Nullable +SELECT x FROM format(JSONEachRow, 'x Nullable(IPv6)', '{"x":""}'); + +# Compound types +SELECT x FROM format(JSONEachRow, 'x Array(UUID)', '{"x":["00000000-0000-0000-0000-000000000000","b15f852c-c41a-4fd6-9247-1929c841715e",""]}'); +SELECT x FROM format(JSONEachRow, 'x Array(Nullable(IPv6))', '{"x":["",""]}'); +SELECT x FROM format(JSONEachRow, 'x Tuple(Date, IPv4, String)', '{"x":["", "", "abc"]}'); +SELECT x FROM format(JSONEachRow, 'x Map(String, IPv6)', '{"x":{"abc": ""}}'); +SELECT x FROM format(JSONEachRow, 'x Variant(Date, UUID)', '{"x":""}'); + +# Deep composition +SELECT x FROM format(JSONEachRow, 'x Array(Array(IPv6))', '{"x":[["2001:db8:3333:4444:CCCC:DDDD:EEEE:FFFF", ""], ["", "2001:db8:3333:4444:5555:6666:7777:8888"]]}'); +SELECT x FROM format(JSONEachRow, 'x Variant(Date, Array(UUID))', '{"x":["", "b15f852c-c41a-4fd6-9247-1929c841715e"]}'); +SELECT x FROM format(JSONEachRow, 'x Tuple(Array(UUID), Tuple(UUID, Map(String, IPv6)))', '{"x":[[""], ["",{"abc":""}]]}'); +SELECT x FROM format(JSONEachRow, 'x Map(Tuple(Date,IPv4), Variant(UUID,IPv6))', '{"x":{["",""]:""}}'); From ad24989b31d1517757591d712959ea64ee3a900d Mon Sep 17 00:00:00 2001 From: Alexis Arnaud Date: Sun, 21 Jul 2024 16:39:55 +0200 Subject: [PATCH 305/844] slightly better comment --- src/DataTypes/IDataType.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/DataTypes/IDataType.h b/src/DataTypes/IDataType.h index 8033e82d1bc..174531cad27 100644 --- a/src/DataTypes/IDataType.h +++ b/src/DataTypes/IDataType.h @@ -328,7 +328,7 @@ public: /// Updates avg_value_size_hint for newly read column. Uses to optimize deserialization. Zero expected for first column. static void updateAvgValueSizeHint(const IColumn & column, double & avg_value_size_hint); - /// non-numeric non-string data type serialized as JSON string + /// Checks if this is a non-numeric non-string type which is serialized as a JSON string. virtual bool isNonTriviallySerializedAsStringJSON() const { return false; } protected: From d5bea37c96c1ccdeacccb980295cb02765c94197 Mon Sep 17 00:00:00 2001 From: Alexis Arnaud Date: Tue, 23 Jul 2024 00:43:15 +0200 Subject: [PATCH 306/844] post-review changes --- src/DataTypes/Serializations/ISerialization.h | 1 + .../SerializationAsStringNonTrivialJSON.h | 5 ++++ .../Serializations/SerializationNullable.cpp | 2 +- .../03203_json_empty_as_default.reference | 27 +++++++++++++++++++ .../03203_json_empty_as_default.sql | 27 ++++++++++++------- 5 files changed, 51 insertions(+), 11 deletions(-) diff --git a/src/DataTypes/Serializations/ISerialization.h b/src/DataTypes/Serializations/ISerialization.h index 76c88fe7522..3f4f4b8f875 100644 --- a/src/DataTypes/Serializations/ISerialization.h +++ b/src/DataTypes/Serializations/ISerialization.h @@ -17,6 +17,7 @@ namespace DB namespace ErrorCodes { extern const int LOGICAL_ERROR; + extern const int NOT_IMPLEMENTED; } class IDataType; diff --git a/src/DataTypes/Serializations/SerializationAsStringNonTrivialJSON.h b/src/DataTypes/Serializations/SerializationAsStringNonTrivialJSON.h index 7d8375368a7..c6d6b864bed 100644 --- a/src/DataTypes/Serializations/SerializationAsStringNonTrivialJSON.h +++ b/src/DataTypes/Serializations/SerializationAsStringNonTrivialJSON.h @@ -12,6 +12,11 @@ namespace DB { +namespace ErrorCodes +{ + extern const int NOT_IMPLEMENTED; +} + /** Serialization for non-numeric non-string data types serialized as JSON strings * For these data types, we support an option, input_format_json_empty_as_default, which, when set to 1, * allows for JSON deserialization to treat an encountered empty string as a default value for the specified type. diff --git a/src/DataTypes/Serializations/SerializationNullable.cpp b/src/DataTypes/Serializations/SerializationNullable.cpp index 72c6a661dde..3cd767e58eb 100644 --- a/src/DataTypes/Serializations/SerializationNullable.cpp +++ b/src/DataTypes/Serializations/SerializationNullable.cpp @@ -868,7 +868,7 @@ template ReturnType deserializeTextJSONImpl(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, const SerializationPtr & nested, bool & is_null) { auto check_for_null = [](ReadBuffer & buf){ return checkStringByFirstCharacterAndAssertTheRest("null", buf); }; - auto deserialize_nested = [&nested, &settings](IColumn & nested_column, ReadBuffer & buf) + auto deserialize_nested = [&nested, &settings](IColumn & nested_column, ReadBuffer & buf) -> ReturnType { if constexpr (strategy == Strategy::TryDeserialize) return nested->tryDeserializeTextJSON(nested_column, buf, settings); diff --git a/tests/queries/0_stateless/03203_json_empty_as_default.reference b/tests/queries/0_stateless/03203_json_empty_as_default.reference index 53d7014e452..1c689228cdf 100644 --- a/tests/queries/0_stateless/03203_json_empty_as_default.reference +++ b/tests/queries/0_stateless/03203_json_empty_as_default.reference @@ -1,20 +1,47 @@ +-- Simple types +-- { echoOn } +SELECT x FROM format(JSONEachRow, 'x Date', '{"x":""}'); 1970-01-01 +SELECT x FROM format(JSONEachRow, 'x Date32', '{"x":""}'); 1970-01-01 +SELECT toTimeZone(x, 'UTC') FROM format(JSONEachRow, 'x DateTime', '{"x":""}'); 1970-01-01 00:00:00 +SELECT toTimeZone(x, 'UTC') FROM format(JSONEachRow, 'x DateTime64', '{"x":""}'); 1970-01-01 00:00:00.000 +SELECT x FROM format(JSONEachRow, 'x IPv4', '{"x":""}'); 0.0.0.0 +SELECT x FROM format(JSONEachRow, 'x IPv6', '{"x":""}'); :: +SELECT x FROM format(JSONEachRow, 'x UUID', '{"x":""}'); 00000000-0000-0000-0000-000000000000 +-- { echoOn } +SELECT COUNT(DISTINCT col) FROM table1; 1 +-- { echoOn } +SELECT * FROM table1 ORDER BY address ASC; :: 2001:db8:3333:4444:5555:6666:7777:8888 +-- Nullable +-- { echoOn } +SELECT x FROM format(JSONEachRow, 'x Nullable(IPv6)', '{"x":""}'); :: +-- Compound types +SELECT x FROM format(JSONEachRow, 'x Array(UUID)', '{"x":["00000000-0000-0000-0000-000000000000","b15f852c-c41a-4fd6-9247-1929c841715e",""]}'); ['00000000-0000-0000-0000-000000000000','b15f852c-c41a-4fd6-9247-1929c841715e','00000000-0000-0000-0000-000000000000'] +SELECT x FROM format(JSONEachRow, 'x Array(Nullable(IPv6))', '{"x":["",""]}'); ['::','::'] +SELECT x FROM format(JSONEachRow, 'x Tuple(Date, IPv4, String)', '{"x":["", "", "abc"]}'); ('1970-01-01','0.0.0.0','abc') +SELECT x FROM format(JSONEachRow, 'x Map(String, IPv6)', '{"x":{"abc": ""}}'); {'abc':'::'} +SELECT x FROM format(JSONEachRow, 'x Variant(Date, UUID)', '{"x":""}'); 00000000-0000-0000-0000-000000000000 +-- Deep composition +SELECT x FROM format(JSONEachRow, 'x Array(Array(IPv6))', '{"x":[["2001:db8:3333:4444:CCCC:DDDD:EEEE:FFFF", ""], ["", "2001:db8:3333:4444:5555:6666:7777:8888"]]}'); [['2001:db8:3333:4444:cccc:dddd:eeee:ffff','::'],['::','2001:db8:3333:4444:5555:6666:7777:8888']] +SELECT x FROM format(JSONEachRow, 'x Variant(Date, Array(UUID))', '{"x":["", "b15f852c-c41a-4fd6-9247-1929c841715e"]}'); ['00000000-0000-0000-0000-000000000000','b15f852c-c41a-4fd6-9247-1929c841715e'] +SELECT x FROM format(JSONEachRow, 'x Tuple(Array(UUID), Tuple(UUID, Map(String, IPv6)))', '{"x":[[""], ["",{"abc":""}]]}'); (['00000000-0000-0000-0000-000000000000'],('00000000-0000-0000-0000-000000000000',{'abc':'::'})) +SELECT x FROM format(JSONEachRow, 'x Map(Tuple(Date,IPv4), Variant(UUID,IPv6))', '{"x":{["",""]:""}}'); {('1970-01-01','0.0.0.0'):'00000000-0000-0000-0000-000000000000'} diff --git a/tests/queries/0_stateless/03203_json_empty_as_default.sql b/tests/queries/0_stateless/03203_json_empty_as_default.sql index 35652f4c751..1243d450c2e 100644 --- a/tests/queries/0_stateless/03203_json_empty_as_default.sql +++ b/tests/queries/0_stateless/03203_json_empty_as_default.sql @@ -1,7 +1,7 @@ -set input_format_json_empty_as_default = 1; -set allow_experimental_variant_type = 1; +SET input_format_json_empty_as_default = 1, allow_experimental_variant_type = 1; -# Simple types +-- Simple types +-- { echoOn } SELECT x FROM format(JSONEachRow, 'x Date', '{"x":""}'); SELECT x FROM format(JSONEachRow, 'x Date32', '{"x":""}'); SELECT toTimeZone(x, 'UTC') FROM format(JSONEachRow, 'x DateTime', '{"x":""}'); @@ -9,8 +9,9 @@ SELECT toTimeZone(x, 'UTC') FROM format(JSONEachRow, 'x DateTime64', '{"x":""}') SELECT x FROM format(JSONEachRow, 'x IPv4', '{"x":""}'); SELECT x FROM format(JSONEachRow, 'x IPv6', '{"x":""}'); SELECT x FROM format(JSONEachRow, 'x UUID', '{"x":""}'); +-- { echoOff } -# Simple type AggregateFunction +-- Simple type AggregateFunction DROP TABLE IF EXISTS table1; CREATE TABLE table1(col AggregateFunction(uniq, UInt64)) ENGINE=Memory(); DROP TABLE IF EXISTS table2; @@ -18,35 +19,41 @@ CREATE TABLE table2(UserID UInt64) ENGINE=Memory(); INSERT INTO table1 SELECT uniqState(UserID) FROM table2; INSERT INTO table1 SELECT x FROM format(JSONEachRow, 'x AggregateFunction(uniq, UInt64)' AS T, '{"x":""}'); + +-- { echoOn } SELECT COUNT(DISTINCT col) FROM table1; +-- { echoOff } DROP TABLE table1; DROP TABLE table2; -# The setting input_format_defaults_for_omitted_fields determines the default value if enabled. +-- The setting input_format_defaults_for_omitted_fields determines the default value if enabled. CREATE TABLE table1(address IPv6 DEFAULT toIPv6('2001:db8:3333:4444:5555:6666:7777:8888')) ENGINE=Memory(); -set input_format_defaults_for_omitted_fields = 0; +SET input_format_defaults_for_omitted_fields = 0; INSERT INTO table1 FORMAT JSONEachRow {"address":""}; -set input_format_defaults_for_omitted_fields = 1; +SET input_format_defaults_for_omitted_fields = 1; INSERT INTO table1 FORMAT JSONEachRow {"address":""}; +-- { echoOn } SELECT * FROM table1 ORDER BY address ASC; +-- { echoOff } DROP TABLE table1; -# Nullable +-- Nullable +-- { echoOn } SELECT x FROM format(JSONEachRow, 'x Nullable(IPv6)', '{"x":""}'); -# Compound types +-- Compound types SELECT x FROM format(JSONEachRow, 'x Array(UUID)', '{"x":["00000000-0000-0000-0000-000000000000","b15f852c-c41a-4fd6-9247-1929c841715e",""]}'); SELECT x FROM format(JSONEachRow, 'x Array(Nullable(IPv6))', '{"x":["",""]}'); SELECT x FROM format(JSONEachRow, 'x Tuple(Date, IPv4, String)', '{"x":["", "", "abc"]}'); SELECT x FROM format(JSONEachRow, 'x Map(String, IPv6)', '{"x":{"abc": ""}}'); SELECT x FROM format(JSONEachRow, 'x Variant(Date, UUID)', '{"x":""}'); -# Deep composition +-- Deep composition SELECT x FROM format(JSONEachRow, 'x Array(Array(IPv6))', '{"x":[["2001:db8:3333:4444:CCCC:DDDD:EEEE:FFFF", ""], ["", "2001:db8:3333:4444:5555:6666:7777:8888"]]}'); SELECT x FROM format(JSONEachRow, 'x Variant(Date, Array(UUID))', '{"x":["", "b15f852c-c41a-4fd6-9247-1929c841715e"]}'); SELECT x FROM format(JSONEachRow, 'x Tuple(Array(UUID), Tuple(UUID, Map(String, IPv6)))', '{"x":[[""], ["",{"abc":""}]]}'); From 7b09ec9ccbf80f6d156f0029c2e6aefa32d7159a Mon Sep 17 00:00:00 2001 From: Alexis Arnaud Date: Tue, 23 Jul 2024 01:40:11 +0200 Subject: [PATCH 307/844] added input_format_json_empty_as_default to setting changes history --- src/Core/SettingsChangesHistory.cpp | 260 ++++++++++++++++++++++++++++ 1 file changed, 260 insertions(+) diff --git a/src/Core/SettingsChangesHistory.cpp b/src/Core/SettingsChangesHistory.cpp index 20a8721c10e..6a3b32f0d1b 100644 --- a/src/Core/SettingsChangesHistory.cpp +++ b/src/Core/SettingsChangesHistory.cpp @@ -509,6 +509,266 @@ static std::initializer_list col >= '2023-01-01' AND col <= '2023-12-31')"}, + {"extract_key_value_pairs_max_pairs_per_row", 0, 0, "Max number of pairs that can be produced by the `extractKeyValuePairs` function. Used as a safeguard against consuming too much memory."}, + {"default_view_definer", "CURRENT_USER", "CURRENT_USER", "Allows to set default `DEFINER` option while creating a view"}, + {"default_materialized_view_sql_security", "DEFINER", "DEFINER", "Allows to set a default value for SQL SECURITY option when creating a materialized view"}, + {"default_normal_view_sql_security", "INVOKER", "INVOKER", "Allows to set default `SQL SECURITY` option while creating a normal view"}, + {"mysql_map_string_to_text_in_show_columns", false, true, "Reduce the configuration effort to connect ClickHouse with BI tools."}, + {"mysql_map_fixed_string_to_text_in_show_columns", false, true, "Reduce the configuration effort to connect ClickHouse with BI tools."}, + }}, + {"24.1", {{"print_pretty_type_names", false, true, "Better user experience."}, + {"input_format_json_read_bools_as_strings", false, true, "Allow to read bools as strings in JSON formats by default"}, + {"output_format_arrow_use_signed_indexes_for_dictionary", false, true, "Use signed indexes type for Arrow dictionaries by default as it's recommended"}, + {"allow_experimental_variant_type", false, false, "Add new experimental Variant type"}, + {"use_variant_as_common_type", false, false, "Allow to use Variant in if/multiIf if there is no common type"}, + {"output_format_arrow_use_64_bit_indexes_for_dictionary", false, false, "Allow to use 64 bit indexes type in Arrow dictionaries"}, + {"parallel_replicas_mark_segment_size", 128, 128, "Add new setting to control segment size in new parallel replicas coordinator implementation"}, + {"ignore_materialized_views_with_dropped_target_table", false, false, "Add new setting to allow to ignore materialized views with dropped target table"}, + {"output_format_compression_level", 3, 3, "Allow to change compression level in the query output"}, + {"output_format_compression_zstd_window_log", 0, 0, "Allow to change zstd window log in the query output when zstd compression is used"}, + {"enable_zstd_qat_codec", false, false, "Add new ZSTD_QAT codec"}, + {"enable_vertical_final", false, true, "Use vertical final by default"}, + {"output_format_arrow_use_64_bit_indexes_for_dictionary", false, false, "Allow to use 64 bit indexes type in Arrow dictionaries"}, + {"max_rows_in_set_to_optimize_join", 100000, 0, "Disable join optimization as it prevents from read in order optimization"}, + {"output_format_pretty_color", true, "auto", "Setting is changed to allow also for auto value, disabling ANSI escapes if output is not a tty"}, + {"function_visible_width_behavior", 0, 1, "We changed the default behavior of `visibleWidth` to be more precise"}, + {"max_estimated_execution_time", 0, 0, "Separate max_execution_time and max_estimated_execution_time"}, + {"iceberg_engine_ignore_schema_evolution", false, false, "Allow to ignore schema evolution in Iceberg table engine"}, + {"optimize_injective_functions_in_group_by", false, true, "Replace injective functions by it's arguments in GROUP BY section in analyzer"}, + {"update_insert_deduplication_token_in_dependent_materialized_views", false, false, "Allow to update insert deduplication token with table identifier during insert in dependent materialized views"}, + {"azure_max_unexpected_write_error_retries", 4, 4, "The maximum number of retries in case of unexpected errors during Azure blob storage write"}, + {"split_parts_ranges_into_intersecting_and_non_intersecting_final", false, true, "Allow to split parts ranges into intersecting and non intersecting during FINAL optimization"}, + {"split_intersecting_parts_ranges_into_layers_final", true, true, "Allow to split intersecting parts ranges into layers during FINAL optimization"}}}, + {"23.12", {{"allow_suspicious_ttl_expressions", true, false, "It is a new setting, and in previous versions the behavior was equivalent to allowing."}, + {"input_format_parquet_allow_missing_columns", false, true, "Allow missing columns in Parquet files by default"}, + {"input_format_orc_allow_missing_columns", false, true, "Allow missing columns in ORC files by default"}, + {"input_format_arrow_allow_missing_columns", false, true, "Allow missing columns in Arrow files by default"}}}, + {"23.11", {{"parsedatetime_parse_without_leading_zeros", false, true, "Improved compatibility with MySQL DATE_FORMAT/STR_TO_DATE"}}}, + {"23.9", {{"optimize_group_by_constant_keys", false, true, "Optimize group by constant keys by default"}, + {"input_format_json_try_infer_named_tuples_from_objects", false, true, "Try to infer named Tuples from JSON objects by default"}, + {"input_format_json_read_numbers_as_strings", false, true, "Allow to read numbers as strings in JSON formats by default"}, + {"input_format_json_read_arrays_as_strings", false, true, "Allow to read arrays as strings in JSON formats by default"}, + {"input_format_json_infer_incomplete_types_as_strings", false, true, "Allow to infer incomplete types as Strings in JSON formats by default"}, + {"input_format_json_try_infer_numbers_from_strings", true, false, "Don't infer numbers from strings in JSON formats by default to prevent possible parsing errors"}, + {"http_write_exception_in_output_format", false, true, "Output valid JSON/XML on exception in HTTP streaming."}}}, + {"23.8", {{"rewrite_count_distinct_if_with_count_distinct_implementation", false, true, "Rewrite countDistinctIf with count_distinct_implementation configuration"}}}, + {"23.7", {{"function_sleep_max_microseconds_per_block", 0, 3000000, "In previous versions, the maximum sleep time of 3 seconds was applied only for `sleep`, but not for `sleepEachRow` function. In the new version, we introduce this setting. If you set compatibility with the previous versions, we will disable the limit altogether."}}}, + {"23.6", {{"http_send_timeout", 180, 30, "3 minutes seems crazy long. Note that this is timeout for a single network write call, not for the whole upload operation."}, + {"http_receive_timeout", 180, 30, "See http_send_timeout."}}}, + {"23.5", {{"input_format_parquet_preserve_order", true, false, "Allow Parquet reader to reorder rows for better parallelism."}, + {"parallelize_output_from_storages", false, true, "Allow parallelism when executing queries that read from file/url/s3/etc. This may reorder rows."}, + {"use_with_fill_by_sorting_prefix", false, true, "Columns preceding WITH FILL columns in ORDER BY clause form sorting prefix. Rows with different values in sorting prefix are filled independently"}, + {"output_format_parquet_compliant_nested_types", false, true, "Change an internal field name in output Parquet file schema."}}}, + {"23.4", {{"allow_suspicious_indices", true, false, "If true, index can defined with identical expressions"}, + {"allow_nonconst_timezone_arguments", true, false, "Allow non-const timezone arguments in certain time-related functions like toTimeZone(), fromUnixTimestamp*(), snowflakeToDateTime*()."}, + {"connect_timeout_with_failover_ms", 50, 1000, "Increase default connect timeout because of async connect"}, + {"connect_timeout_with_failover_secure_ms", 100, 1000, "Increase default secure connect timeout because of async connect"}, + {"hedged_connection_timeout_ms", 100, 50, "Start new connection in hedged requests after 50 ms instead of 100 to correspond with previous connect timeout"}, + {"formatdatetime_f_prints_single_zero", true, false, "Improved compatibility with MySQL DATE_FORMAT()/STR_TO_DATE()"}, + {"formatdatetime_parsedatetime_m_is_month_name", false, true, "Improved compatibility with MySQL DATE_FORMAT/STR_TO_DATE"}}}, + {"23.3", {{"output_format_parquet_version", "1.0", "2.latest", "Use latest Parquet format version for output format"}, + {"input_format_json_ignore_unknown_keys_in_named_tuple", false, true, "Improve parsing JSON objects as named tuples"}, + {"input_format_native_allow_types_conversion", false, true, "Allow types conversion in Native input forma"}, + {"output_format_arrow_compression_method", "none", "lz4_frame", "Use lz4 compression in Arrow output format by default"}, + {"output_format_parquet_compression_method", "snappy", "lz4", "Use lz4 compression in Parquet output format by default"}, + {"output_format_orc_compression_method", "none", "lz4_frame", "Use lz4 compression in ORC output format by default"}, + {"async_query_sending_for_remote", false, true, "Create connections and send query async across shards"}}}, + {"23.2", {{"output_format_parquet_fixed_string_as_fixed_byte_array", false, true, "Use Parquet FIXED_LENGTH_BYTE_ARRAY type for FixedString by default"}, + {"output_format_arrow_fixed_string_as_fixed_byte_array", false, true, "Use Arrow FIXED_SIZE_BINARY type for FixedString by default"}, + {"query_plan_remove_redundant_distinct", false, true, "Remove redundant Distinct step in query plan"}, + {"optimize_duplicate_order_by_and_distinct", true, false, "Remove duplicate ORDER BY and DISTINCT if it's possible"}, + {"insert_keeper_max_retries", 0, 20, "Enable reconnections to Keeper on INSERT, improve reliability"}}}, + {"23.1", {{"input_format_json_read_objects_as_strings", 0, 1, "Enable reading nested json objects as strings while object type is experimental"}, + {"input_format_json_defaults_for_missing_elements_in_named_tuple", false, true, "Allow missing elements in JSON objects while reading named tuples by default"}, + {"input_format_csv_detect_header", false, true, "Detect header in CSV format by default"}, + {"input_format_tsv_detect_header", false, true, "Detect header in TSV format by default"}, + {"input_format_custom_detect_header", false, true, "Detect header in CustomSeparated format by default"}, + {"query_plan_remove_redundant_sorting", false, true, "Remove redundant sorting in query plan. For example, sorting steps related to ORDER BY clauses in subqueries"}}}, + {"22.12", {{"max_size_to_preallocate_for_aggregation", 10'000'000, 100'000'000, "This optimizes performance"}, + {"query_plan_aggregation_in_order", 0, 1, "Enable some refactoring around query plan"}, + {"format_binary_max_string_size", 0, 1_GiB, "Prevent allocating large amount of memory"}}}, + {"22.11", {{"use_structure_from_insertion_table_in_table_functions", 0, 2, "Improve using structure from insertion table in table functions"}}}, + {"22.9", {{"force_grouping_standard_compatibility", false, true, "Make GROUPING function output the same as in SQL standard and other DBMS"}}}, + {"22.7", {{"cross_to_inner_join_rewrite", 1, 2, "Force rewrite comma join to inner"}, + {"enable_positional_arguments", false, true, "Enable positional arguments feature by default"}, + {"format_csv_allow_single_quotes", true, false, "Most tools don't treat single quote in CSV specially, don't do it by default too"}}}, + {"22.6", {{"output_format_json_named_tuples_as_objects", false, true, "Allow to serialize named tuples as JSON objects in JSON formats by default"}, + {"input_format_skip_unknown_fields", false, true, "Optimize reading subset of columns for some input formats"}}}, + {"22.5", {{"memory_overcommit_ratio_denominator", 0, 1073741824, "Enable memory overcommit feature by default"}, + {"memory_overcommit_ratio_denominator_for_user", 0, 1073741824, "Enable memory overcommit feature by default"}}}, + {"22.4", {{"allow_settings_after_format_in_insert", true, false, "Do not allow SETTINGS after FORMAT for INSERT queries because ClickHouse interpret SETTINGS as some values, which is misleading"}}}, + {"22.3", {{"cast_ipv4_ipv6_default_on_conversion_error", true, false, "Make functions cast(value, 'IPv4') and cast(value, 'IPv6') behave same as toIPv4 and toIPv6 functions"}}}, + {"21.12", {{"stream_like_engine_allow_direct_select", true, false, "Do not allow direct select for Kafka/RabbitMQ/FileLog by default"}}}, + {"21.9", {{"output_format_decimal_trailing_zeros", true, false, "Do not output trailing zeros in text representation of Decimal types by default for better looking output"}, + {"use_hedged_requests", false, true, "Enable Hedged Requests feature by default"}}}, + {"21.7", {{"legacy_column_name_of_tuple_literal", true, false, "Add this setting only for compatibility reasons. It makes sense to set to 'true', while doing rolling update of cluster from version lower than 21.7 to higher"}}}, + {"21.5", {{"async_socket_for_remote", false, true, "Fix all problems and turn on asynchronous reads from socket for remote queries by default again"}}}, + {"21.3", {{"async_socket_for_remote", true, false, "Turn off asynchronous reads from socket for remote queries because of some problems"}, + {"optimize_normalize_count_variants", false, true, "Rewrite aggregate functions that semantically equals to count() as count() by default"}, + {"normalize_function_names", false, true, "Normalize function names to their canonical names, this was needed for projection query routing"}}}, + {"21.2", {{"enable_global_with_statement", false, true, "Propagate WITH statements to UNION queries and all subqueries by default"}}}, + {"21.1", {{"insert_quorum_parallel", false, true, "Use parallel quorum inserts by default. It is significantly more convenient to use than sequential quorum inserts"}, + {"input_format_null_as_default", false, true, "Allow to insert NULL as default for input formats by default"}, + {"optimize_on_insert", false, true, "Enable data optimization on INSERT by default for better user experience"}, + {"use_compact_format_in_distributed_parts_names", false, true, "Use compact format for async INSERT into Distributed tables by default"}}}, + {"20.10", {{"format_regexp_escaping_rule", "Escaped", "Raw", "Use Raw as default escaping rule for Regexp format to male the behaviour more like to what users expect"}}}, + {"20.7", {{"show_table_uuid_in_table_create_query_if_not_nil", true, false, "Stop showing UID of the table in its CREATE query for Engine=Atomic"}}}, + {"20.5", {{"input_format_with_names_use_header", false, true, "Enable using header with names for formats with WithNames/WithNamesAndTypes suffixes"}, + {"allow_suspicious_codecs", true, false, "Don't allow to specify meaningless compression codecs"}}}, + {"20.4", {{"validate_polygons", false, true, "Throw exception if polygon is invalid in function pointInPolygon by default instead of returning possibly wrong results"}}}, + {"19.18", {{"enable_scalar_subquery_optimization", false, true, "Prevent scalar subqueries from (de)serializing large scalar values and possibly avoid running the same subquery more than once"}}}, + {"19.14", {{"any_join_distinct_right_table_keys", true, false, "Disable ANY RIGHT and ANY FULL JOINs by default to avoid inconsistency"}}}, + {"19.12", {{"input_format_defaults_for_omitted_fields", false, true, "Enable calculation of complex default expressions for omitted fields for some input formats, because it should be the expected behaviour"}}}, + {"19.5", {{"max_partitions_per_insert_block", 0, 100, "Add a limit for the number of partitions in one block"}}}, + {"18.12.17", {{"enable_optimize_predicate_expression", 0, 1, "Optimize predicates to subqueries by default"}}}, }; From 906a181b97253355193e51e99ebc3320eb6d8907 Mon Sep 17 00:00:00 2001 From: Alexis Arnaud Date: Tue, 23 Jul 2024 12:28:23 +0200 Subject: [PATCH 308/844] fix for clang-tidy --- .../Serializations/SerializationAsStringNonTrivialJSON.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/DataTypes/Serializations/SerializationAsStringNonTrivialJSON.h b/src/DataTypes/Serializations/SerializationAsStringNonTrivialJSON.h index c6d6b864bed..f52e1982830 100644 --- a/src/DataTypes/Serializations/SerializationAsStringNonTrivialJSON.h +++ b/src/DataTypes/Serializations/SerializationAsStringNonTrivialJSON.h @@ -50,9 +50,9 @@ public: return tryDeserializeTextNoEmptyCheckJSON(column, istr, format_settings); } - virtual void deserializeTextNoEmptyCheckJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override = 0; + void deserializeTextNoEmptyCheckJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override = 0; - virtual bool tryDeserializeTextNoEmptyCheckJSON(IColumn & /*column*/, ReadBuffer & /*istr*/, const FormatSettings &) const override + bool tryDeserializeTextNoEmptyCheckJSON(IColumn & /*column*/, ReadBuffer & /*istr*/, const FormatSettings &) const override { throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method tryDeserializeTextNoEmptyCheckJSON is not supported"); } From 14dad25adce2492957c10f147812308a92587b26 Mon Sep 17 00:00:00 2001 From: Alexis Arnaud Date: Tue, 23 Jul 2024 21:56:40 +0200 Subject: [PATCH 309/844] trigger build From 5d6d378f2470d3147f561f34bebe77200ff2181c Mon Sep 17 00:00:00 2001 From: Alexis Arnaud Date: Wed, 24 Jul 2024 15:40:27 +0200 Subject: [PATCH 310/844] renumbered tests --- ...as_default.reference => 03210_json_empty_as_default.reference} | 0 ..._json_empty_as_default.sql => 03210_json_empty_as_default.sql} | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename tests/queries/0_stateless/{03203_json_empty_as_default.reference => 03210_json_empty_as_default.reference} (100%) rename tests/queries/0_stateless/{03203_json_empty_as_default.sql => 03210_json_empty_as_default.sql} (100%) diff --git a/tests/queries/0_stateless/03203_json_empty_as_default.reference b/tests/queries/0_stateless/03210_json_empty_as_default.reference similarity index 100% rename from tests/queries/0_stateless/03203_json_empty_as_default.reference rename to tests/queries/0_stateless/03210_json_empty_as_default.reference diff --git a/tests/queries/0_stateless/03203_json_empty_as_default.sql b/tests/queries/0_stateless/03210_json_empty_as_default.sql similarity index 100% rename from tests/queries/0_stateless/03203_json_empty_as_default.sql rename to tests/queries/0_stateless/03210_json_empty_as_default.sql From 0ff90e4a5c222caa93863dc5e4dcfa2816a64d20 Mon Sep 17 00:00:00 2001 From: Alexis Arnaud Date: Thu, 1 Aug 2024 01:31:31 +0200 Subject: [PATCH 311/844] post-review changes --- src/DataTypes/DataTypeAggregateFunction.h | 2 - src/DataTypes/DataTypeDate.h | 1 - src/DataTypes/DataTypeDate32.h | 1 - src/DataTypes/DataTypeDateTime.h | 1 - src/DataTypes/DataTypeDateTime64.h | 2 - src/DataTypes/DataTypeIPv4andIPv6.h | 2 - src/DataTypes/DataTypeUUID.h | 1 - src/DataTypes/IDataType.h | 3 - src/DataTypes/Serializations/ISerialization.h | 11 +- .../SerializationAggregateFunction.cpp | 2 +- .../SerializationAggregateFunction.h | 5 +- .../Serializations/SerializationArray.cpp | 128 +++++++++++++++--- .../SerializationAsStringNonTrivialJSON.h | 61 --------- .../Serializations/SerializationDate.cpp | 4 +- .../Serializations/SerializationDate.h | 7 +- .../Serializations/SerializationDate32.cpp | 4 +- .../Serializations/SerializationDate32.h | 7 +- .../Serializations/SerializationDateTime.cpp | 4 +- .../Serializations/SerializationDateTime.h | 7 +- .../SerializationDateTime64.cpp | 6 +- .../Serializations/SerializationDateTime64.h | 7 +- .../SerializationIPv4andIPv6.cpp | 4 +- .../Serializations/SerializationIPv4andIPv6.h | 7 +- .../Serializations/SerializationMap.cpp | 114 ++++++++++++++-- .../Serializations/SerializationMap.h | 3 + .../Serializations/SerializationNullable.cpp | 54 ++------ .../Serializations/SerializationNullable.h | 1 - .../Serializations/SerializationTuple.cpp | 89 +++++++++++- .../Serializations/SerializationUUID.cpp | 4 +- .../Serializations/SerializationUUID.h | 7 +- src/Formats/JSONUtils.cpp | 84 +++++++++--- src/IO/ReadHelpers.cpp | 17 --- src/IO/ReadHelpers.h | 4 - ... => 03215_json_empty_as_default.reference} | 8 +- ...lt.sql => 03215_json_empty_as_default.sql} | 0 35 files changed, 415 insertions(+), 247 deletions(-) delete mode 100644 src/DataTypes/Serializations/SerializationAsStringNonTrivialJSON.h rename tests/queries/0_stateless/{03210_json_empty_as_default.reference => 03215_json_empty_as_default.reference} (95%) rename tests/queries/0_stateless/{03210_json_empty_as_default.sql => 03215_json_empty_as_default.sql} (100%) diff --git a/src/DataTypes/DataTypeAggregateFunction.h b/src/DataTypes/DataTypeAggregateFunction.h index 2ab2e53100b..e3a4f9726d9 100644 --- a/src/DataTypes/DataTypeAggregateFunction.h +++ b/src/DataTypes/DataTypeAggregateFunction.h @@ -64,8 +64,6 @@ public: SerializationPtr doGetDefaultSerialization() const override; bool supportsSparseSerialization() const override { return false; } - bool isNonTriviallySerializedAsStringJSON() const override { return true; } - bool isVersioned() const; /// Version is not empty only if it was parsed from AST or implicitly cast to 0 or version according diff --git a/src/DataTypes/DataTypeDate.h b/src/DataTypes/DataTypeDate.h index cb7a603705d..0e08b9ba2ca 100644 --- a/src/DataTypes/DataTypeDate.h +++ b/src/DataTypes/DataTypeDate.h @@ -17,7 +17,6 @@ public: bool canBeUsedAsVersion() const override { return true; } bool canBeInsideNullable() const override { return true; } - bool isNonTriviallySerializedAsStringJSON() const override { return true; } bool equals(const IDataType & rhs) const override; diff --git a/src/DataTypes/DataTypeDate32.h b/src/DataTypes/DataTypeDate32.h index 53bd010b7cf..65633e7a228 100644 --- a/src/DataTypes/DataTypeDate32.h +++ b/src/DataTypes/DataTypeDate32.h @@ -18,7 +18,6 @@ public: bool canBeUsedAsVersion() const override { return true; } bool canBeInsideNullable() const override { return true; } - bool isNonTriviallySerializedAsStringJSON() const override { return true; } bool equals(const IDataType & rhs) const override; diff --git a/src/DataTypes/DataTypeDateTime.h b/src/DataTypes/DataTypeDateTime.h index 11b579920ba..5519240dee1 100644 --- a/src/DataTypes/DataTypeDateTime.h +++ b/src/DataTypes/DataTypeDateTime.h @@ -44,7 +44,6 @@ public: bool canBeUsedAsVersion() const override { return true; } bool canBeInsideNullable() const override { return true; } - bool isNonTriviallySerializedAsStringJSON() const override { return true; } bool equals(const IDataType & rhs) const override; diff --git a/src/DataTypes/DataTypeDateTime64.h b/src/DataTypes/DataTypeDateTime64.h index dd5ff7e6550..64cedd798d1 100644 --- a/src/DataTypes/DataTypeDateTime64.h +++ b/src/DataTypes/DataTypeDateTime64.h @@ -39,8 +39,6 @@ public: bool isSummable() const override { return false; } - bool isNonTriviallySerializedAsStringJSON() const override { return true; } - protected: SerializationPtr doGetDefaultSerialization() const override; }; diff --git a/src/DataTypes/DataTypeIPv4andIPv6.h b/src/DataTypes/DataTypeIPv4andIPv6.h index 520af4f21e0..5aea55751a7 100644 --- a/src/DataTypes/DataTypeIPv4andIPv6.h +++ b/src/DataTypes/DataTypeIPv4andIPv6.h @@ -46,7 +46,6 @@ public: size_t getSizeOfValueInMemory() const override { return sizeof(IPv4); } bool isCategorial() const override { return true; } bool canBeInsideLowCardinality() const override { return true; } - bool isNonTriviallySerializedAsStringJSON() const override { return true; } SerializationPtr doGetDefaultSerialization() const override { return std::make_shared>(); } }; @@ -85,7 +84,6 @@ public: size_t getSizeOfValueInMemory() const override { return sizeof(IPv6); } bool isCategorial() const override { return true; } bool canBeInsideLowCardinality() const override { return true; } - bool isNonTriviallySerializedAsStringJSON() const override { return true; } SerializationPtr doGetDefaultSerialization() const override { return std::make_shared>(); } }; diff --git a/src/DataTypes/DataTypeUUID.h b/src/DataTypes/DataTypeUUID.h index 1aeab1b78ba..90cdd90d68d 100644 --- a/src/DataTypes/DataTypeUUID.h +++ b/src/DataTypes/DataTypeUUID.h @@ -42,7 +42,6 @@ public: size_t getSizeOfValueInMemory() const override { return sizeof(UUID); } bool isCategorial() const override { return true; } bool canBeInsideLowCardinality() const override { return true; } - bool isNonTriviallySerializedAsStringJSON() const override { return true; } SerializationPtr doGetDefaultSerialization() const override; }; diff --git a/src/DataTypes/IDataType.h b/src/DataTypes/IDataType.h index 174531cad27..397ae3d8be9 100644 --- a/src/DataTypes/IDataType.h +++ b/src/DataTypes/IDataType.h @@ -328,9 +328,6 @@ public: /// Updates avg_value_size_hint for newly read column. Uses to optimize deserialization. Zero expected for first column. static void updateAvgValueSizeHint(const IColumn & column, double & avg_value_size_hint); - /// Checks if this is a non-numeric non-string type which is serialized as a JSON string. - virtual bool isNonTriviallySerializedAsStringJSON() const { return false; } - protected: friend class DataTypeFactory; friend class AggregateFunctionSimpleState; diff --git a/src/DataTypes/Serializations/ISerialization.h b/src/DataTypes/Serializations/ISerialization.h index 3f4f4b8f875..5d0bf60c59f 100644 --- a/src/DataTypes/Serializations/ISerialization.h +++ b/src/DataTypes/Serializations/ISerialization.h @@ -17,7 +17,6 @@ namespace DB namespace ErrorCodes { extern const int LOGICAL_ERROR; - extern const int NOT_IMPLEMENTED; } class IDataType; @@ -399,20 +398,12 @@ public: virtual void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const = 0; virtual void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const = 0; virtual bool tryDeserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const; - /// The following two methods are implemented only for non-numeric non-string simple data types. - virtual void deserializeTextNoEmptyCheckJSON(IColumn & /*column*/, ReadBuffer & /*istr*/, const FormatSettings &) const - { - throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method deserializeTextNoEmptyCheckJSON is not supported"); - } - virtual bool tryDeserializeTextNoEmptyCheckJSON(IColumn & /*column*/, ReadBuffer & /*istr*/, const FormatSettings &) const - { - throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method tryDeserializeTextNoEmptyCheckJSON is not supported"); - } virtual void serializeTextJSONPretty(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings, size_t /*indent*/) const { serializeTextJSON(column, row_num, ostr, settings); } + /** Text serialization for putting into the XML format. */ virtual void serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const diff --git a/src/DataTypes/Serializations/SerializationAggregateFunction.cpp b/src/DataTypes/Serializations/SerializationAggregateFunction.cpp index 39c3f389619..41b198890e4 100644 --- a/src/DataTypes/Serializations/SerializationAggregateFunction.cpp +++ b/src/DataTypes/Serializations/SerializationAggregateFunction.cpp @@ -182,7 +182,7 @@ void SerializationAggregateFunction::serializeTextJSON(const IColumn & column, s } -void SerializationAggregateFunction::deserializeTextNoEmptyCheckJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const +void SerializationAggregateFunction::deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const { String s; readJSONString(s, istr, settings.json); diff --git a/src/DataTypes/Serializations/SerializationAggregateFunction.h b/src/DataTypes/Serializations/SerializationAggregateFunction.h index 6afa1cd4e97..c45fc79f714 100644 --- a/src/DataTypes/Serializations/SerializationAggregateFunction.h +++ b/src/DataTypes/Serializations/SerializationAggregateFunction.h @@ -3,13 +3,12 @@ #include #include -#include namespace DB { -class SerializationAggregateFunction final : public SerializationAsStringNonTrivialJSON +class SerializationAggregateFunction final : public ISerialization { private: AggregateFunctionPtr function; @@ -38,7 +37,7 @@ public: void deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; - void deserializeTextNoEmptyCheckJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; + void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; void serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override; diff --git a/src/DataTypes/Serializations/SerializationArray.cpp b/src/DataTypes/Serializations/SerializationArray.cpp index 0a9c4529e23..eb24307d9e9 100644 --- a/src/DataTypes/Serializations/SerializationArray.cpp +++ b/src/DataTypes/Serializations/SerializationArray.cpp @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include @@ -614,29 +615,124 @@ void SerializationArray::serializeTextJSONPretty(const IColumn & column, size_t writeChar(']', ostr); } +namespace +{ +template +ReturnType deserializeTextJSONArrayImpl(IColumn & column, ReadBuffer & istr, const SerializationPtr & nested, const FormatSettings & settings) +{ + static constexpr auto throw_exception = std::is_same_v; + + static constexpr auto EMPTY_STRING = "\"\""; + static constexpr auto EMPTY_STRING_LENGTH = std::string_view(EMPTY_STRING).length(); + + auto do_deserialize_nested = [](IColumn & nested_column, ReadBuffer & buf, auto && check_for_empty_string, auto && deserialize) -> ReturnType + { + if (check_for_empty_string(buf)) + { + nested_column.insertDefault(); + return ReturnType(true); + } + return deserialize(nested_column, buf); + }; + + auto deserialize_nested_impl = [&settings, &nested](IColumn & nested_column, ReadBuffer & buf) -> ReturnType + { + if constexpr (throw_exception) + { + if (settings.null_as_default && !isColumnNullableOrLowCardinalityNullable(nested_column)) + SerializationNullable::deserializeNullAsDefaultOrNestedTextJSON(nested_column, buf, settings, nested); + else + nested->deserializeTextJSON(nested_column, buf, settings); + return; + } + else + { + if (settings.null_as_default && !isColumnNullableOrLowCardinalityNullable(nested_column)) + return SerializationNullable::tryDeserializeNullAsDefaultOrNestedTextJSON(nested_column, buf, settings, nested); + else + return nested->tryDeserializeTextJSON(nested_column, buf, settings); + } + }; + + auto deserialize_nested = [&settings, &do_deserialize_nested, &deserialize_nested_impl](IColumn & nested_column, ReadBuffer & buf) -> ReturnType + { + if (!settings.json.empty_as_default || buf.eof() || *buf.position() != EMPTY_STRING[0]) + return deserialize_nested_impl(nested_column, buf); + + if (buf.available() >= EMPTY_STRING_LENGTH) + { + /// We have enough data in buffer to check if we have an empty string. + auto check_for_empty_string = [](ReadBuffer & buf_) -> bool + { + auto * pos = buf_.position(); + if (checkString(EMPTY_STRING, buf_)) + return true; + else + { + buf_.position() = pos; + return false; + } + }; + + return do_deserialize_nested(nested_column, buf, check_for_empty_string, deserialize_nested_impl); + } + + /// We don't have enough data in buffer to check if we have an empty string. + /// Use PeekableReadBuffer to make a checkpoint before checking for an + /// empty string and rollback if check was failed. + + auto check_for_empty_string = [](ReadBuffer & buf_) -> bool + { + auto & peekable_buf = assert_cast(buf_); + peekable_buf.setCheckpoint(); + SCOPE_EXIT(peekable_buf.dropCheckpoint()); + if (checkString(EMPTY_STRING, peekable_buf)) + return true; + else + { + peekable_buf.rollbackToCheckpoint(); + return false; + } + }; + + auto deserialize_nested_impl_with_check = [&deserialize_nested_impl](IColumn & nested_column_, ReadBuffer & buf_) -> ReturnType + { + auto & peekable_buf = assert_cast(buf_); + if constexpr (throw_exception) + { + deserialize_nested_impl(nested_column_, peekable_buf); + assert(!peekable_buf.hasUnreadData()); + } + else + { + if (!deserialize_nested_impl(nested_column_, peekable_buf)) + return false; + if (likely(!peekable_buf.hasUnreadData())) + return true; + return false; + } + }; + + PeekableReadBuffer peekable_buf(buf, true); + return do_deserialize_nested(nested_column, peekable_buf, check_for_empty_string, deserialize_nested_impl_with_check); + }; + + return deserializeTextImpl(column, istr, + [&deserialize_nested, &istr](IColumn & nested_column) -> ReturnType + { + return deserialize_nested(nested_column, istr); + }, false); +} +} void SerializationArray::deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const { - deserializeTextImpl(column, istr, - [&](IColumn & nested_column) - { - if (settings.null_as_default && !isColumnNullableOrLowCardinalityNullable(nested_column)) - SerializationNullable::deserializeNullAsDefaultOrNestedTextJSON(nested_column, istr, settings, nested); - else - nested->deserializeTextJSON(nested_column, istr, settings); - }, false); + deserializeTextJSONArrayImpl(column, istr, nested, settings); } bool SerializationArray::tryDeserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const { - auto read_nested = [&](IColumn & nested_column) - { - if (settings.null_as_default && !isColumnNullableOrLowCardinalityNullable(nested_column)) - return SerializationNullable::tryDeserializeNullAsDefaultOrNestedTextJSON(nested_column, istr, settings, nested); - return nested->tryDeserializeTextJSON(nested_column, istr, settings); - }; - - return deserializeTextImpl(column, istr, std::move(read_nested), false); + return deserializeTextJSONArrayImpl(column, istr, nested, settings); } diff --git a/src/DataTypes/Serializations/SerializationAsStringNonTrivialJSON.h b/src/DataTypes/Serializations/SerializationAsStringNonTrivialJSON.h deleted file mode 100644 index f52e1982830..00000000000 --- a/src/DataTypes/Serializations/SerializationAsStringNonTrivialJSON.h +++ /dev/null @@ -1,61 +0,0 @@ -#pragma once - -#include -#include -#include -#include -#include -#include - -#include - -namespace DB -{ - -namespace ErrorCodes -{ - extern const int NOT_IMPLEMENTED; -} - -/** Serialization for non-numeric non-string data types serialized as JSON strings - * For these data types, we support an option, input_format_json_empty_as_default, which, when set to 1, - * allows for JSON deserialization to treat an encountered empty string as a default value for the specified type. - * Derived classes must implement the following methods: - * deserializeTextNoEmptyCheckJSON() and tryDeserializeTextNoEmptyCheckJSON() - * instead of deserializeTextJSON() and tryDeserializeTextJSON() respectively. - */ -template -requires std::derived_from -class SerializationAsStringNonTrivialJSON : public T -{ -public: - using T::T; - - void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & format_settings) const override - { - if (format_settings.json.empty_as_default && tryMatchEmptyString(istr)) - column.insertDefault(); - else - deserializeTextNoEmptyCheckJSON(column, istr, format_settings); - } - - bool tryDeserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & format_settings) const override - { - if (format_settings.json.empty_as_default && tryMatchEmptyString(istr)) - { - column.insertDefault(); - return true; - } - else - return tryDeserializeTextNoEmptyCheckJSON(column, istr, format_settings); - } - - void deserializeTextNoEmptyCheckJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override = 0; - - bool tryDeserializeTextNoEmptyCheckJSON(IColumn & /*column*/, ReadBuffer & /*istr*/, const FormatSettings &) const override - { - throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method tryDeserializeTextNoEmptyCheckJSON is not supported"); - } -}; - -} diff --git a/src/DataTypes/Serializations/SerializationDate.cpp b/src/DataTypes/Serializations/SerializationDate.cpp index 3f122189c22..38e1bb87b6d 100644 --- a/src/DataTypes/Serializations/SerializationDate.cpp +++ b/src/DataTypes/Serializations/SerializationDate.cpp @@ -85,7 +85,7 @@ void SerializationDate::serializeTextJSON(const IColumn & column, size_t row_num writeChar('"', ostr); } -void SerializationDate::deserializeTextNoEmptyCheckJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const +void SerializationDate::deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const { DayNum x; assertChar('"', istr); @@ -94,7 +94,7 @@ void SerializationDate::deserializeTextNoEmptyCheckJSON(IColumn & column, ReadBu assert_cast(column).getData().push_back(x); } -bool SerializationDate::tryDeserializeTextNoEmptyCheckJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const +bool SerializationDate::tryDeserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const { DayNum x; if (!checkChar('"', istr) || !tryReadDateText(x, istr, time_zone) || !checkChar('"', istr)) diff --git a/src/DataTypes/Serializations/SerializationDate.h b/src/DataTypes/Serializations/SerializationDate.h index 10c83171527..dcf79eb49da 100644 --- a/src/DataTypes/Serializations/SerializationDate.h +++ b/src/DataTypes/Serializations/SerializationDate.h @@ -1,13 +1,12 @@ #pragma once #include -#include #include namespace DB { -class SerializationDate final : public SerializationAsStringNonTrivialJSON> +class SerializationDate final : public SerializationNumber { public: explicit SerializationDate(const DateLUTImpl & time_zone_ = DateLUT::instance()); @@ -22,8 +21,8 @@ public: void deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; bool tryDeserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; - void deserializeTextNoEmptyCheckJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; - bool tryDeserializeTextNoEmptyCheckJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; + void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; + bool tryDeserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override; bool tryDeserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override; diff --git a/src/DataTypes/Serializations/SerializationDate32.cpp b/src/DataTypes/Serializations/SerializationDate32.cpp index 8ad07b534ce..70a22d59e42 100644 --- a/src/DataTypes/Serializations/SerializationDate32.cpp +++ b/src/DataTypes/Serializations/SerializationDate32.cpp @@ -83,7 +83,7 @@ void SerializationDate32::serializeTextJSON(const IColumn & column, size_t row_n writeChar('"', ostr); } -void SerializationDate32::deserializeTextNoEmptyCheckJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const +void SerializationDate32::deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const { ExtendedDayNum x; assertChar('"', istr); @@ -92,7 +92,7 @@ void SerializationDate32::deserializeTextNoEmptyCheckJSON(IColumn & column, Read assert_cast(column).getData().push_back(x); } -bool SerializationDate32::tryDeserializeTextNoEmptyCheckJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const +bool SerializationDate32::tryDeserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const { ExtendedDayNum x; if (!checkChar('"', istr) || !tryReadDateText(x, istr, time_zone) || !checkChar('"', istr)) diff --git a/src/DataTypes/Serializations/SerializationDate32.h b/src/DataTypes/Serializations/SerializationDate32.h index ac6239fbc2b..be2e2b76c1d 100644 --- a/src/DataTypes/Serializations/SerializationDate32.h +++ b/src/DataTypes/Serializations/SerializationDate32.h @@ -1,12 +1,11 @@ #pragma once #include -#include #include namespace DB { -class SerializationDate32 final : public SerializationAsStringNonTrivialJSON> +class SerializationDate32 final : public SerializationNumber { public: explicit SerializationDate32(const DateLUTImpl & time_zone_ = DateLUT::instance()); @@ -21,8 +20,8 @@ public: void deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; bool tryDeserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; - void deserializeTextNoEmptyCheckJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; - bool tryDeserializeTextNoEmptyCheckJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; + void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; + bool tryDeserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override; bool tryDeserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override; diff --git a/src/DataTypes/Serializations/SerializationDateTime.cpp b/src/DataTypes/Serializations/SerializationDateTime.cpp index 80b2d51140d..c5c819ce7fa 100644 --- a/src/DataTypes/Serializations/SerializationDateTime.cpp +++ b/src/DataTypes/Serializations/SerializationDateTime.cpp @@ -180,7 +180,7 @@ void SerializationDateTime::serializeTextJSON(const IColumn & column, size_t row writeChar('"', ostr); } -void SerializationDateTime::deserializeTextNoEmptyCheckJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const +void SerializationDateTime::deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const { time_t x = 0; if (checkChar('"', istr)) @@ -196,7 +196,7 @@ void SerializationDateTime::deserializeTextNoEmptyCheckJSON(IColumn & column, Re assert_cast(column).getData().push_back(static_cast(x)); } -bool SerializationDateTime::tryDeserializeTextNoEmptyCheckJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const +bool SerializationDateTime::tryDeserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const { time_t x = 0; if (checkChar('"', istr)) diff --git a/src/DataTypes/Serializations/SerializationDateTime.h b/src/DataTypes/Serializations/SerializationDateTime.h index 0041f221ccf..584b0c4116b 100644 --- a/src/DataTypes/Serializations/SerializationDateTime.h +++ b/src/DataTypes/Serializations/SerializationDateTime.h @@ -1,7 +1,6 @@ #pragma once #include -#include #include class DateLUTImpl; @@ -9,7 +8,7 @@ class DateLUTImpl; namespace DB { -class SerializationDateTime final : public SerializationAsStringNonTrivialJSON>, public TimezoneMixin +class SerializationDateTime final : public SerializationNumber, public TimezoneMixin { public: explicit SerializationDateTime(const TimezoneMixin & time_zone_); @@ -24,8 +23,8 @@ public: void deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; bool tryDeserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; - void deserializeTextNoEmptyCheckJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; - bool tryDeserializeTextNoEmptyCheckJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; + void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; + bool tryDeserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override; bool tryDeserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override; diff --git a/src/DataTypes/Serializations/SerializationDateTime64.cpp b/src/DataTypes/Serializations/SerializationDateTime64.cpp index cdac7f785d1..442e29edd52 100644 --- a/src/DataTypes/Serializations/SerializationDateTime64.cpp +++ b/src/DataTypes/Serializations/SerializationDateTime64.cpp @@ -15,7 +15,7 @@ namespace DB SerializationDateTime64::SerializationDateTime64( UInt32 scale_, const TimezoneMixin & time_zone_) - : SerializationAsStringNonTrivialJSON>(DecimalUtils::max_precision, scale_) + : SerializationDecimalBase(DecimalUtils::max_precision, scale_) , TimezoneMixin(time_zone_) { } @@ -170,7 +170,7 @@ void SerializationDateTime64::serializeTextJSON(const IColumn & column, size_t r writeChar('"', ostr); } -void SerializationDateTime64::deserializeTextNoEmptyCheckJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const +void SerializationDateTime64::deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const { DateTime64 x = 0; if (checkChar('"', istr)) @@ -185,7 +185,7 @@ void SerializationDateTime64::deserializeTextNoEmptyCheckJSON(IColumn & column, assert_cast(column).getData().push_back(x); } -bool SerializationDateTime64::tryDeserializeTextNoEmptyCheckJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const +bool SerializationDateTime64::tryDeserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const { DateTime64 x = 0; if (checkChar('"', istr)) diff --git a/src/DataTypes/Serializations/SerializationDateTime64.h b/src/DataTypes/Serializations/SerializationDateTime64.h index a3bc4d1ad4e..b49bd1e9098 100644 --- a/src/DataTypes/Serializations/SerializationDateTime64.h +++ b/src/DataTypes/Serializations/SerializationDateTime64.h @@ -1,7 +1,6 @@ #pragma once #include -#include #include class DateLUTImpl; @@ -9,7 +8,7 @@ class DateLUTImpl; namespace DB { -class SerializationDateTime64 final : public SerializationAsStringNonTrivialJSON>, public TimezoneMixin +class SerializationDateTime64 final : public SerializationDecimalBase, public TimezoneMixin { public: SerializationDateTime64(UInt32 scale_, const TimezoneMixin & time_zone_); @@ -26,8 +25,8 @@ public: void deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; bool tryDeserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; - void deserializeTextNoEmptyCheckJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; - bool tryDeserializeTextNoEmptyCheckJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; + void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; + bool tryDeserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override; bool tryDeserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override; diff --git a/src/DataTypes/Serializations/SerializationIPv4andIPv6.cpp b/src/DataTypes/Serializations/SerializationIPv4andIPv6.cpp index ecd50a0b9b8..c1beceb4533 100644 --- a/src/DataTypes/Serializations/SerializationIPv4andIPv6.cpp +++ b/src/DataTypes/Serializations/SerializationIPv4andIPv6.cpp @@ -69,7 +69,7 @@ void SerializationIP::serializeTextJSON(const DB::IColumn & column, size_t } template -void SerializationIP::deserializeTextNoEmptyCheckJSON(DB::IColumn & column, DB::ReadBuffer & istr, const DB::FormatSettings & settings) const +void SerializationIP::deserializeTextJSON(DB::IColumn & column, DB::ReadBuffer & istr, const DB::FormatSettings & settings) const { IPv x; assertChar('"', istr); @@ -84,7 +84,7 @@ void SerializationIP::deserializeTextNoEmptyCheckJSON(DB::IColumn & column, } template -bool SerializationIP::tryDeserializeTextNoEmptyCheckJSON(DB::IColumn & column, DB::ReadBuffer & istr, const DB::FormatSettings &) const +bool SerializationIP::tryDeserializeTextJSON(DB::IColumn & column, DB::ReadBuffer & istr, const DB::FormatSettings &) const { IPv x; if (!checkChar('"', istr) || !tryReadText(x, istr) || !checkChar('"', istr)) diff --git a/src/DataTypes/Serializations/SerializationIPv4andIPv6.h b/src/DataTypes/Serializations/SerializationIPv4andIPv6.h index 44f36252741..a53f257646b 100644 --- a/src/DataTypes/Serializations/SerializationIPv4andIPv6.h +++ b/src/DataTypes/Serializations/SerializationIPv4andIPv6.h @@ -4,14 +4,13 @@ #include #include #include -#include #include namespace DB { template -class SerializationIP : public SerializationAsStringNonTrivialJSON +class SerializationIP : public SimpleTextSerialization { public: void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; @@ -23,8 +22,8 @@ public: bool tryDeserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override; void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override; - void deserializeTextNoEmptyCheckJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override; - bool tryDeserializeTextNoEmptyCheckJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override; + void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override; + bool tryDeserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override; void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override; void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings &/* settings*/) const override; diff --git a/src/DataTypes/Serializations/SerializationMap.cpp b/src/DataTypes/Serializations/SerializationMap.cpp index c722b3ac7a1..4a9f701c5c1 100644 --- a/src/DataTypes/Serializations/SerializationMap.cpp +++ b/src/DataTypes/Serializations/SerializationMap.cpp @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include @@ -315,29 +316,116 @@ void SerializationMap::serializeTextJSONPretty(const IColumn & column, size_t ro writeChar('}', ostr); } - -void SerializationMap::deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const +template +ReturnType SerializationMap::deserializeTextJSONMapImpl(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const { - deserializeTextImpl(column, istr, - [&settings](ReadBuffer & buf, const SerializationPtr & subcolumn_serialization, IColumn & subcolumn) + static constexpr auto throw_exception = std::is_same_v; + + static constexpr auto EMPTY_STRING = "\"\""; + static constexpr auto EMPTY_STRING_LENGTH = std::string_view(EMPTY_STRING).length(); + + auto do_deserialize_subcolumn = [](ReadBuffer & buf, const SerializationPtr & subcolumn_serialization, IColumn & subcolumn, auto && check_for_empty_string, auto && deserialize) -> ReturnType + { + if (check_for_empty_string(buf)) + { + subcolumn.insertDefault(); + return ReturnType(true); + } + return deserialize(buf, subcolumn_serialization, subcolumn); + }; + + auto deserialize_subcolumn_impl = [&settings](ReadBuffer & buf, const SerializationPtr & subcolumn_serialization, IColumn & subcolumn) -> ReturnType + { + if constexpr (throw_exception) { if (settings.null_as_default && !isColumnNullableOrLowCardinalityNullable(subcolumn)) SerializationNullable::deserializeNullAsDefaultOrNestedTextJSON(subcolumn, buf, settings, subcolumn_serialization); else subcolumn_serialization->deserializeTextJSON(subcolumn, buf, settings); - }); + } + else + { + if (settings.null_as_default && !isColumnNullableOrLowCardinalityNullable(subcolumn)) + return SerializationNullable::tryDeserializeNullAsDefaultOrNestedTextJSON(subcolumn, buf, settings, subcolumn_serialization); + else + return subcolumn_serialization->tryDeserializeTextJSON(subcolumn, buf, settings); + } + }; + + auto deserialize_subcolumn = [&settings, &do_deserialize_subcolumn, &deserialize_subcolumn_impl](ReadBuffer & buf, const SerializationPtr & subcolumn_serialization, IColumn & subcolumn) -> ReturnType + { + if (!settings.json.empty_as_default || buf.eof() || *buf.position() != EMPTY_STRING[0]) + return do_deserialize_subcolumn(buf, subcolumn_serialization, subcolumn, [](ReadBuffer &){ return false; }, deserialize_subcolumn_impl); + + if (buf.available() >= EMPTY_STRING_LENGTH) + { + /// We have enough data in buffer to check if we have an empty string. + auto check_for_empty_string = [](ReadBuffer & buf_) -> bool + { + auto * pos = buf_.position(); + if (checkString(EMPTY_STRING, buf_)) + return true; + else + { + buf_.position() = pos; + return false; + } + }; + + return do_deserialize_subcolumn(buf, subcolumn_serialization, subcolumn, check_for_empty_string, deserialize_subcolumn_impl); + } + + /// We don't have enough data in buffer to check if we have an empty string. + /// Use PeekableReadBuffer to make a checkpoint before checking for an + /// empty string and rollback if check was failed. + + auto check_for_empty_string = [](ReadBuffer & buf_) -> bool + { + auto & peekable_buf = assert_cast(buf_); + peekable_buf.setCheckpoint(); + SCOPE_EXIT(peekable_buf.dropCheckpoint()); + if (checkString(EMPTY_STRING, peekable_buf)) + return true; + else + { + peekable_buf.rollbackToCheckpoint(); + return false; + } + }; + + auto deserialize_subcolumn_impl_with_check = [&deserialize_subcolumn_impl](ReadBuffer & buf_, const SerializationPtr & subcolumn_serialization_, IColumn & subcolumn_) -> ReturnType + { + auto & peekable_buf = assert_cast(buf_); + if constexpr (throw_exception) + { + deserialize_subcolumn_impl(peekable_buf, subcolumn_serialization_, subcolumn_); + assert(!peekable_buf.hasUnreadData()); + } + else + { + if (!deserialize_subcolumn_impl(peekable_buf, subcolumn_serialization_, subcolumn_)) + return false; + if (likely(!peekable_buf.hasUnreadData())) + return true; + return false; + } + }; + + PeekableReadBuffer peekable_buf(buf, true); + return do_deserialize_subcolumn(peekable_buf, subcolumn_serialization, subcolumn, check_for_empty_string, deserialize_subcolumn_impl_with_check); + }; + + return deserializeTextImpl(column, istr, deserialize_subcolumn); +} + +void SerializationMap::deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const +{ + deserializeTextJSONMapImpl(column, istr, settings); } bool SerializationMap::tryDeserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const { - auto reader = [&settings](ReadBuffer & buf, const SerializationPtr & subcolumn_serialization, IColumn & subcolumn) - { - if (settings.null_as_default && !isColumnNullableOrLowCardinalityNullable(subcolumn)) - return SerializationNullable::tryDeserializeNullAsDefaultOrNestedTextJSON(subcolumn, buf, settings, subcolumn_serialization); - return subcolumn_serialization->tryDeserializeTextJSON(subcolumn, buf, settings); - }; - - return deserializeTextImpl(column, istr, reader); + return deserializeTextJSONMapImpl(column, istr, settings); } void SerializationMap::serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const diff --git a/src/DataTypes/Serializations/SerializationMap.h b/src/DataTypes/Serializations/SerializationMap.h index cfcde445c1f..9bdc110c445 100644 --- a/src/DataTypes/Serializations/SerializationMap.h +++ b/src/DataTypes/Serializations/SerializationMap.h @@ -74,6 +74,9 @@ private: template ReturnType deserializeTextImpl(IColumn & column, ReadBuffer & istr, Reader && reader) const; + + template + ReturnType deserializeTextJSONMapImpl(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const; }; } diff --git a/src/DataTypes/Serializations/SerializationNullable.cpp b/src/DataTypes/Serializations/SerializationNullable.cpp index 3cd767e58eb..e72dd3a42f5 100644 --- a/src/DataTypes/Serializations/SerializationNullable.cpp +++ b/src/DataTypes/Serializations/SerializationNullable.cpp @@ -844,52 +844,25 @@ bool SerializationNullable::tryDeserializeNullJSON(DB::ReadBuffer & istr) return checkString("null", istr); } -namespace -{ - -enum class Strategy : uint8_t -{ - Deserialize, - DeserializeNoEmptyCheck, - TryDeserialize -}; - -template struct ReturnTypeImpl; -template <> struct ReturnTypeImpl { using Type = void; }; -template <> struct ReturnTypeImpl { using Type = bool; }; -template <> struct ReturnTypeImpl { using Type = void; }; - -template -using ReturnType = typename ReturnTypeImpl::Type; - -template struct AlwaysFalse : std::false_type {}; - -template -ReturnType deserializeTextJSONImpl(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, const SerializationPtr & nested, bool & is_null) +template +ReturnType deserializeTextJSONImpl(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, const SerializationPtr & nested, bool & is_null) { auto check_for_null = [](ReadBuffer & buf){ return checkStringByFirstCharacterAndAssertTheRest("null", buf); }; - auto deserialize_nested = [&nested, &settings](IColumn & nested_column, ReadBuffer & buf) -> ReturnType + auto deserialize_nested = [&nested, &settings](IColumn & nested_column, ReadBuffer & buf) { - if constexpr (strategy == Strategy::TryDeserialize) + if constexpr (std::is_same_v) return nested->tryDeserializeTextJSON(nested_column, buf, settings); - else if constexpr (strategy == Strategy::Deserialize) - nested->deserializeTextJSON(nested_column, buf, settings); - else if constexpr (strategy == Strategy::DeserializeNoEmptyCheck) - nested->deserializeTextNoEmptyCheckJSON(nested_column, buf, settings); - else - static_assert(AlwaysFalse::value); + nested->deserializeTextJSON(nested_column, buf, settings); }; - return deserializeImpl>(column, istr, check_for_null, deserialize_nested, is_null); -} - + return deserializeImpl(column, istr, check_for_null, deserialize_nested, is_null); } void SerializationNullable::deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const { ColumnNullable & col = assert_cast(column); bool is_null; - deserializeTextJSONImpl(col.getNestedColumn(), istr, settings, nested, is_null); + deserializeTextJSONImpl(col.getNestedColumn(), istr, settings, nested, is_null); safeAppendToNullMap(col, is_null); } @@ -897,27 +870,20 @@ bool SerializationNullable::tryDeserializeTextJSON(IColumn & column, ReadBuffer { ColumnNullable & col = assert_cast(column); bool is_null; - return deserializeTextJSONImpl(col.getNestedColumn(), istr, settings, nested, is_null) && safeAppendToNullMap(col, is_null); + return deserializeTextJSONImpl(col.getNestedColumn(), istr, settings, nested, is_null) && safeAppendToNullMap(col, is_null); } bool SerializationNullable::deserializeNullAsDefaultOrNestedTextJSON(DB::IColumn & nested_column, DB::ReadBuffer & istr, const DB::FormatSettings & settings, const DB::SerializationPtr & nested_serialization) { bool is_null; - deserializeTextJSONImpl(nested_column, istr, settings, nested_serialization, is_null); - return !is_null; -} - -bool SerializationNullable::deserializeNullAsDefaultOrNestedTextNoEmptyCheckJSON(DB::IColumn & nested_column, DB::ReadBuffer & istr, const DB::FormatSettings & settings, const DB::SerializationPtr & nested_serialization) -{ - bool is_null; - deserializeTextJSONImpl(nested_column, istr, settings, nested_serialization, is_null); + deserializeTextJSONImpl(nested_column, istr, settings, nested_serialization, is_null); return !is_null; } bool SerializationNullable::tryDeserializeNullAsDefaultOrNestedTextJSON(DB::IColumn & nested_column, DB::ReadBuffer & istr, const DB::FormatSettings & settings, const DB::SerializationPtr & nested_serialization) { bool is_null; - return deserializeTextJSONImpl(nested_column, istr, settings, nested_serialization, is_null); + return deserializeTextJSONImpl(nested_column, istr, settings, nested_serialization, is_null); } void SerializationNullable::serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const diff --git a/src/DataTypes/Serializations/SerializationNullable.h b/src/DataTypes/Serializations/SerializationNullable.h index c5215e2a39f..f7d2d2eadf0 100644 --- a/src/DataTypes/Serializations/SerializationNullable.h +++ b/src/DataTypes/Serializations/SerializationNullable.h @@ -88,7 +88,6 @@ public: static bool deserializeNullAsDefaultOrNestedTextQuoted(IColumn & nested_column, ReadBuffer & istr, const FormatSettings &, const SerializationPtr & nested_serialization); static bool deserializeNullAsDefaultOrNestedTextCSV(IColumn & nested_column, ReadBuffer & istr, const FormatSettings & settings, const SerializationPtr & nested_serialization); static bool deserializeNullAsDefaultOrNestedTextJSON(IColumn & nested_column, ReadBuffer & istr, const FormatSettings &, const SerializationPtr & nested_serialization); - static bool deserializeNullAsDefaultOrNestedTextNoEmptyCheckJSON(IColumn & nested_column, ReadBuffer & istr, const FormatSettings &, const SerializationPtr & nested_serialization); static bool deserializeNullAsDefaultOrNestedTextRaw(IColumn & nested_column, ReadBuffer & istr, const FormatSettings & settings, const SerializationPtr & nested_serialization); /// If Check for NULL and deserialize value into non-nullable column or insert default value of nested type. diff --git a/src/DataTypes/Serializations/SerializationTuple.cpp b/src/DataTypes/Serializations/SerializationTuple.cpp index 594a23ab507..7238c2bdbd7 100644 --- a/src/DataTypes/Serializations/SerializationTuple.cpp +++ b/src/DataTypes/Serializations/SerializationTuple.cpp @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include @@ -317,24 +318,100 @@ ReturnType SerializationTuple::deserializeTextJSONImpl(IColumn & column, ReadBuf { static constexpr bool throw_exception = std::is_same_v; - auto deserialize_element = [&](IColumn & element_column, size_t element_pos) + static constexpr auto EMPTY_STRING = "\"\""; + static constexpr auto EMPTY_STRING_LENGTH = std::string_view(EMPTY_STRING).length(); + + auto do_deserialize_element = [](IColumn & element_column, size_t element_pos, ReadBuffer & buf, auto && check_for_empty_string, auto && deserialize) -> ReturnType + { + if (check_for_empty_string(buf)) + { + element_column.insertDefault(); + return ReturnType(true); + } + + return deserialize(element_column, element_pos, buf); + }; + + auto deserialize_element_impl = [&settings, this](IColumn & element_column, size_t element_pos, ReadBuffer & buf) -> ReturnType { if constexpr (throw_exception) { if (settings.null_as_default && !isColumnNullableOrLowCardinalityNullable(element_column)) - SerializationNullable::deserializeNullAsDefaultOrNestedTextJSON(element_column, istr, settings, elems[element_pos]); + SerializationNullable::deserializeNullAsDefaultOrNestedTextJSON(element_column, buf, settings, elems[element_pos]); else - elems[element_pos]->deserializeTextJSON(element_column, istr, settings); - return true; + elems[element_pos]->deserializeTextJSON(element_column, buf, settings); } else { if (settings.null_as_default && !isColumnNullableOrLowCardinalityNullable(element_column)) - return SerializationNullable::tryDeserializeNullAsDefaultOrNestedTextJSON(element_column, istr, settings, elems[element_pos]); - return elems[element_pos]->tryDeserializeTextJSON(element_column, istr, settings); + return SerializationNullable::tryDeserializeNullAsDefaultOrNestedTextJSON(element_column, buf, settings, elems[element_pos]); + return elems[element_pos]->tryDeserializeTextJSON(element_column, buf, settings); } }; + auto deserialize_element = [&settings, &do_deserialize_element, &deserialize_element_impl, &istr](IColumn & element_column, size_t element_pos) -> ReturnType + { + if (!settings.json.empty_as_default || istr.eof() || *istr.position() != EMPTY_STRING[0]) + return do_deserialize_element(element_column, element_pos, istr, [](ReadBuffer &) { return false; }, deserialize_element_impl); + + if (istr.available() >= EMPTY_STRING_LENGTH) + { + /// We have enough data in buffer to check if we have an empty string. + auto check_for_empty_string = [](ReadBuffer & buf_) -> bool + { + auto * pos = buf_.position(); + if (checkString(EMPTY_STRING, buf_)) + return true; + else + { + buf_.position() = pos; + return false; + } + }; + + return do_deserialize_element(element_column, element_pos, istr, check_for_empty_string, deserialize_element_impl); + } + + /// We don't have enough data in buffer to check if we have an empty string. + /// Use PeekableReadBuffer to make a checkpoint before checking for an + /// empty string and rollback if check was failed. + + auto check_for_empty_string = [](ReadBuffer & buf_) -> bool + { + auto & peekable_buf = assert_cast(buf_); + peekable_buf.setCheckpoint(); + SCOPE_EXIT(peekable_buf.dropCheckpoint()); + if (checkString(EMPTY_STRING, peekable_buf)) + return true; + else + { + peekable_buf.rollbackToCheckpoint(); + return false; + } + }; + + auto deserialize_element_impl_with_check = [&deserialize_element_impl](IColumn & element_column_, size_t element_pos_, ReadBuffer & buf_) -> ReturnType + { + auto & peekable_buf = assert_cast(buf_); + if constexpr (throw_exception) + { + deserialize_element_impl(element_column_, element_pos_, peekable_buf); + assert(!peekable_buf.hasUnreadData()); + } + else + { + if (!deserialize_element_impl(element_column_, element_pos_, peekable_buf)) + return false; + if (likely(!peekable_buf.hasUnreadData())) + return true; + return false; + } + }; + + PeekableReadBuffer peekable_buf(istr, true); + return do_deserialize_element(element_column, element_pos, peekable_buf, check_for_empty_string, deserialize_element_impl_with_check); + }; + if (settings.json.read_named_tuples_as_objects && have_explicit_names) { diff --git a/src/DataTypes/Serializations/SerializationUUID.cpp b/src/DataTypes/Serializations/SerializationUUID.cpp index 21a0ccf676c..f18466ad8ad 100644 --- a/src/DataTypes/Serializations/SerializationUUID.cpp +++ b/src/DataTypes/Serializations/SerializationUUID.cpp @@ -94,7 +94,7 @@ void SerializationUUID::serializeTextJSON(const IColumn & column, size_t row_num writeChar('"', ostr); } -void SerializationUUID::deserializeTextNoEmptyCheckJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const +void SerializationUUID::deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const { UUID x; assertChar('"', istr); @@ -103,7 +103,7 @@ void SerializationUUID::deserializeTextNoEmptyCheckJSON(IColumn & column, ReadBu assert_cast(column).getData().push_back(x); } -bool SerializationUUID::tryDeserializeTextNoEmptyCheckJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const +bool SerializationUUID::tryDeserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const { UUID x; if (!checkChar('"', istr) || !tryReadText(x, istr) || !checkChar('"', istr)) diff --git a/src/DataTypes/Serializations/SerializationUUID.h b/src/DataTypes/Serializations/SerializationUUID.h index 185d1d44c16..458504f8f42 100644 --- a/src/DataTypes/Serializations/SerializationUUID.h +++ b/src/DataTypes/Serializations/SerializationUUID.h @@ -1,12 +1,11 @@ #pragma once #include -#include namespace DB { -class SerializationUUID : public SerializationAsStringNonTrivialJSON +class SerializationUUID : public SimpleTextSerialization { public: void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; @@ -16,8 +15,8 @@ public: void deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; bool tryDeserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; - void deserializeTextNoEmptyCheckJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; - bool tryDeserializeTextNoEmptyCheckJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; + void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; + bool tryDeserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override; bool tryDeserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override; diff --git a/src/Formats/JSONUtils.cpp b/src/Formats/JSONUtils.cpp index 73189e81f97..34f55ef00d8 100644 --- a/src/Formats/JSONUtils.cpp +++ b/src/Formats/JSONUtils.cpp @@ -3,6 +3,8 @@ #include #include #include +#include +#include #include #include #include @@ -268,6 +270,9 @@ namespace JSONUtils const FormatSettings & format_settings, bool yield_strings) { + static constexpr auto EMPTY_STRING = "\"\""; + static constexpr auto EMPTY_STRING_LENGTH = std::string_view(EMPTY_STRING).length(); + try { bool as_nullable = format_settings.null_as_default && !isNullableOrLowCardinalityNullable(type); @@ -286,33 +291,78 @@ namespace JSONUtils return true; } - if (format_settings.json.empty_as_default && type->isNonTriviallySerializedAsStringJSON()) + auto do_deserialize = [](IColumn & column_, ReadBuffer & buf_, auto && check_for_empty_string, auto && deserialize) -> bool { - /// We have a non-numeric non-string data type at the top level. - /// At first glance, it looks like we sort of duplicate the work done in - /// SerializationAsStringNonTrivialJSON. Actually we need to proceed as - /// done here because we want to return false if we inserted a default - /// value on purpose, which the ISerialization interface does not allow for. - if (tryMatchEmptyString(in)) + if (check_for_empty_string(buf_)) { - column.insertDefault(); + column_.insertDefault(); return false; } + else + return deserialize(column_, buf_); + }; - if (as_nullable) - return SerializationNullable::deserializeNullAsDefaultOrNestedTextNoEmptyCheckJSON(column, in, format_settings, serialization); - - serialization->deserializeTextNoEmptyCheckJSON(column, in, format_settings); - return true; - } - else + auto deserialize_impl = [as_nullable, &format_settings, &serialization](IColumn & column_, ReadBuffer & buf_) -> bool { if (as_nullable) - return SerializationNullable::deserializeNullAsDefaultOrNestedTextJSON(column, in, format_settings, serialization); + return SerializationNullable::deserializeNullAsDefaultOrNestedTextJSON(column_, buf_, format_settings, serialization); - serialization->deserializeTextJSON(column, in, format_settings); + serialization->deserializeTextJSON(column_, buf_, format_settings); return true; + }; + + if (!format_settings.json.empty_as_default || in.eof() || *in.position() != EMPTY_STRING[0]) + return do_deserialize(column, in, [](ReadBuffer &) { return false; }, deserialize_impl); + + if (in.available() >= EMPTY_STRING_LENGTH) + { + /// We have enough data in buffer to check if we have an empty string. + auto check_for_empty_string = [](ReadBuffer & buf_) + { + auto * pos = buf_.position(); + if (checkString(EMPTY_STRING, buf_)) + return true; + else + { + buf_.position() = pos; + return false; + } + }; + + return do_deserialize(column, in, check_for_empty_string, deserialize_impl); } + + /// We don't have enough data in buffer to check if we have an empty string. + /// Use PeekableReadBuffer to make a checkpoint before checking for an + /// empty string and rollback if check was failed. + + auto check_for_empty_string = [](ReadBuffer & buf_) -> bool + { + auto & peekable_buf = assert_cast(buf_); + peekable_buf.setCheckpoint(); + SCOPE_EXIT(peekable_buf.dropCheckpoint()); + if (checkString(EMPTY_STRING, peekable_buf)) + return true; + else + { + peekable_buf.rollbackToCheckpoint(); + return false; + } + }; + + auto deserialize_impl_with_check = [&deserialize_impl](IColumn & column_, ReadBuffer & buf_) -> bool + { + auto & peekable_buf = assert_cast(buf_); + + if (!deserialize_impl(column_, peekable_buf)) + return false; + if (likely(!peekable_buf.hasUnreadData())) + return true; + return false; + }; + + PeekableReadBuffer peekable_buf(in, true); + return do_deserialize(column, peekable_buf, check_for_empty_string, deserialize_impl_with_check); } catch (Exception & e) { diff --git a/src/IO/ReadHelpers.cpp b/src/IO/ReadHelpers.cpp index e8ef667a4e3..9559462e62b 100644 --- a/src/IO/ReadHelpers.cpp +++ b/src/IO/ReadHelpers.cpp @@ -1137,23 +1137,6 @@ template void readCSVStringInto(String & s, ReadBuffer & b template void readCSVStringInto(String & s, ReadBuffer & buf, const FormatSettings::CSV & settings); template void readCSVStringInto, false, false>(PaddedPODArray & s, ReadBuffer & buf, const FormatSettings::CSV & settings); -bool tryMatchEmptyString(ReadBuffer & buf) -{ - if (buf.eof() || *buf.position() != '"') - return false; - - ++buf.position(); - - if (buf.eof() || *buf.position() != '"') - { - --buf.position(); - return false; - } - - ++buf.position(); - - return true; -} template ReturnType readJSONStringInto(Vector & s, ReadBuffer & buf, const FormatSettings::JSON & settings) diff --git a/src/IO/ReadHelpers.h b/src/IO/ReadHelpers.h index 59c1923e02a..ffba4fafb5c 100644 --- a/src/IO/ReadHelpers.h +++ b/src/IO/ReadHelpers.h @@ -665,10 +665,6 @@ void readStringUntilEOFInto(Vector & s, ReadBuffer & buf); template void readCSVStringInto(Vector & s, ReadBuffer & buf, const FormatSettings::CSV & settings); -/// Consumes the current token if it is an empty string, i.e. two consecutive double quotes, -/// Returns true if consumed. -bool tryMatchEmptyString(ReadBuffer & buf); - /// ReturnType is either bool or void. If bool, the function will return false instead of throwing an exception. template ReturnType readJSONStringInto(Vector & s, ReadBuffer & buf, const FormatSettings::JSON & settings); diff --git a/tests/queries/0_stateless/03210_json_empty_as_default.reference b/tests/queries/0_stateless/03215_json_empty_as_default.reference similarity index 95% rename from tests/queries/0_stateless/03210_json_empty_as_default.reference rename to tests/queries/0_stateless/03215_json_empty_as_default.reference index 1c689228cdf..1a98f45577a 100644 --- a/tests/queries/0_stateless/03210_json_empty_as_default.reference +++ b/tests/queries/0_stateless/03215_json_empty_as_default.reference @@ -24,18 +24,18 @@ SELECT * FROM table1 ORDER BY address ASC; -- Nullable -- { echoOn } SELECT x FROM format(JSONEachRow, 'x Nullable(IPv6)', '{"x":""}'); -:: +\N -- Compound types SELECT x FROM format(JSONEachRow, 'x Array(UUID)', '{"x":["00000000-0000-0000-0000-000000000000","b15f852c-c41a-4fd6-9247-1929c841715e",""]}'); ['00000000-0000-0000-0000-000000000000','b15f852c-c41a-4fd6-9247-1929c841715e','00000000-0000-0000-0000-000000000000'] SELECT x FROM format(JSONEachRow, 'x Array(Nullable(IPv6))', '{"x":["",""]}'); -['::','::'] +[NULL,NULL] SELECT x FROM format(JSONEachRow, 'x Tuple(Date, IPv4, String)', '{"x":["", "", "abc"]}'); ('1970-01-01','0.0.0.0','abc') SELECT x FROM format(JSONEachRow, 'x Map(String, IPv6)', '{"x":{"abc": ""}}'); {'abc':'::'} SELECT x FROM format(JSONEachRow, 'x Variant(Date, UUID)', '{"x":""}'); -00000000-0000-0000-0000-000000000000 +\N -- Deep composition SELECT x FROM format(JSONEachRow, 'x Array(Array(IPv6))', '{"x":[["2001:db8:3333:4444:CCCC:DDDD:EEEE:FFFF", ""], ["", "2001:db8:3333:4444:5555:6666:7777:8888"]]}'); [['2001:db8:3333:4444:cccc:dddd:eeee:ffff','::'],['::','2001:db8:3333:4444:5555:6666:7777:8888']] @@ -44,4 +44,4 @@ SELECT x FROM format(JSONEachRow, 'x Variant(Date, Array(UUID))', '{"x":["", "b1 SELECT x FROM format(JSONEachRow, 'x Tuple(Array(UUID), Tuple(UUID, Map(String, IPv6)))', '{"x":[[""], ["",{"abc":""}]]}'); (['00000000-0000-0000-0000-000000000000'],('00000000-0000-0000-0000-000000000000',{'abc':'::'})) SELECT x FROM format(JSONEachRow, 'x Map(Tuple(Date,IPv4), Variant(UUID,IPv6))', '{"x":{["",""]:""}}'); -{('1970-01-01','0.0.0.0'):'00000000-0000-0000-0000-000000000000'} +{('1970-01-01','0.0.0.0'):NULL} diff --git a/tests/queries/0_stateless/03210_json_empty_as_default.sql b/tests/queries/0_stateless/03215_json_empty_as_default.sql similarity index 100% rename from tests/queries/0_stateless/03210_json_empty_as_default.sql rename to tests/queries/0_stateless/03215_json_empty_as_default.sql From 3c586d80c85168d13620042d1656fad67f35d09e Mon Sep 17 00:00:00 2001 From: Alexis Arnaud Date: Sat, 3 Aug 2024 10:57:27 +0200 Subject: [PATCH 312/844] post-rebase fixes --- src/Core/SettingsChangesHistory.cpp | 261 +--------------------------- 1 file changed, 1 insertion(+), 260 deletions(-) diff --git a/src/Core/SettingsChangesHistory.cpp b/src/Core/SettingsChangesHistory.cpp index 6a3b32f0d1b..2f99910d7a0 100644 --- a/src/Core/SettingsChangesHistory.cpp +++ b/src/Core/SettingsChangesHistory.cpp @@ -88,6 +88,7 @@ static std::initializer_list col >= '2023-01-01' AND col <= '2023-12-31')"}, - {"extract_key_value_pairs_max_pairs_per_row", 0, 0, "Max number of pairs that can be produced by the `extractKeyValuePairs` function. Used as a safeguard against consuming too much memory."}, - {"default_view_definer", "CURRENT_USER", "CURRENT_USER", "Allows to set default `DEFINER` option while creating a view"}, - {"default_materialized_view_sql_security", "DEFINER", "DEFINER", "Allows to set a default value for SQL SECURITY option when creating a materialized view"}, - {"default_normal_view_sql_security", "INVOKER", "INVOKER", "Allows to set default `SQL SECURITY` option while creating a normal view"}, - {"mysql_map_string_to_text_in_show_columns", false, true, "Reduce the configuration effort to connect ClickHouse with BI tools."}, - {"mysql_map_fixed_string_to_text_in_show_columns", false, true, "Reduce the configuration effort to connect ClickHouse with BI tools."}, - }}, - {"24.1", {{"print_pretty_type_names", false, true, "Better user experience."}, - {"input_format_json_read_bools_as_strings", false, true, "Allow to read bools as strings in JSON formats by default"}, - {"output_format_arrow_use_signed_indexes_for_dictionary", false, true, "Use signed indexes type for Arrow dictionaries by default as it's recommended"}, - {"allow_experimental_variant_type", false, false, "Add new experimental Variant type"}, - {"use_variant_as_common_type", false, false, "Allow to use Variant in if/multiIf if there is no common type"}, - {"output_format_arrow_use_64_bit_indexes_for_dictionary", false, false, "Allow to use 64 bit indexes type in Arrow dictionaries"}, - {"parallel_replicas_mark_segment_size", 128, 128, "Add new setting to control segment size in new parallel replicas coordinator implementation"}, - {"ignore_materialized_views_with_dropped_target_table", false, false, "Add new setting to allow to ignore materialized views with dropped target table"}, - {"output_format_compression_level", 3, 3, "Allow to change compression level in the query output"}, - {"output_format_compression_zstd_window_log", 0, 0, "Allow to change zstd window log in the query output when zstd compression is used"}, - {"enable_zstd_qat_codec", false, false, "Add new ZSTD_QAT codec"}, - {"enable_vertical_final", false, true, "Use vertical final by default"}, - {"output_format_arrow_use_64_bit_indexes_for_dictionary", false, false, "Allow to use 64 bit indexes type in Arrow dictionaries"}, - {"max_rows_in_set_to_optimize_join", 100000, 0, "Disable join optimization as it prevents from read in order optimization"}, - {"output_format_pretty_color", true, "auto", "Setting is changed to allow also for auto value, disabling ANSI escapes if output is not a tty"}, - {"function_visible_width_behavior", 0, 1, "We changed the default behavior of `visibleWidth` to be more precise"}, - {"max_estimated_execution_time", 0, 0, "Separate max_execution_time and max_estimated_execution_time"}, - {"iceberg_engine_ignore_schema_evolution", false, false, "Allow to ignore schema evolution in Iceberg table engine"}, - {"optimize_injective_functions_in_group_by", false, true, "Replace injective functions by it's arguments in GROUP BY section in analyzer"}, - {"update_insert_deduplication_token_in_dependent_materialized_views", false, false, "Allow to update insert deduplication token with table identifier during insert in dependent materialized views"}, - {"azure_max_unexpected_write_error_retries", 4, 4, "The maximum number of retries in case of unexpected errors during Azure blob storage write"}, - {"split_parts_ranges_into_intersecting_and_non_intersecting_final", false, true, "Allow to split parts ranges into intersecting and non intersecting during FINAL optimization"}, - {"split_intersecting_parts_ranges_into_layers_final", true, true, "Allow to split intersecting parts ranges into layers during FINAL optimization"}}}, - {"23.12", {{"allow_suspicious_ttl_expressions", true, false, "It is a new setting, and in previous versions the behavior was equivalent to allowing."}, - {"input_format_parquet_allow_missing_columns", false, true, "Allow missing columns in Parquet files by default"}, - {"input_format_orc_allow_missing_columns", false, true, "Allow missing columns in ORC files by default"}, - {"input_format_arrow_allow_missing_columns", false, true, "Allow missing columns in Arrow files by default"}}}, - {"23.11", {{"parsedatetime_parse_without_leading_zeros", false, true, "Improved compatibility with MySQL DATE_FORMAT/STR_TO_DATE"}}}, - {"23.9", {{"optimize_group_by_constant_keys", false, true, "Optimize group by constant keys by default"}, - {"input_format_json_try_infer_named_tuples_from_objects", false, true, "Try to infer named Tuples from JSON objects by default"}, - {"input_format_json_read_numbers_as_strings", false, true, "Allow to read numbers as strings in JSON formats by default"}, - {"input_format_json_read_arrays_as_strings", false, true, "Allow to read arrays as strings in JSON formats by default"}, - {"input_format_json_infer_incomplete_types_as_strings", false, true, "Allow to infer incomplete types as Strings in JSON formats by default"}, - {"input_format_json_try_infer_numbers_from_strings", true, false, "Don't infer numbers from strings in JSON formats by default to prevent possible parsing errors"}, - {"http_write_exception_in_output_format", false, true, "Output valid JSON/XML on exception in HTTP streaming."}}}, - {"23.8", {{"rewrite_count_distinct_if_with_count_distinct_implementation", false, true, "Rewrite countDistinctIf with count_distinct_implementation configuration"}}}, - {"23.7", {{"function_sleep_max_microseconds_per_block", 0, 3000000, "In previous versions, the maximum sleep time of 3 seconds was applied only for `sleep`, but not for `sleepEachRow` function. In the new version, we introduce this setting. If you set compatibility with the previous versions, we will disable the limit altogether."}}}, - {"23.6", {{"http_send_timeout", 180, 30, "3 minutes seems crazy long. Note that this is timeout for a single network write call, not for the whole upload operation."}, - {"http_receive_timeout", 180, 30, "See http_send_timeout."}}}, - {"23.5", {{"input_format_parquet_preserve_order", true, false, "Allow Parquet reader to reorder rows for better parallelism."}, - {"parallelize_output_from_storages", false, true, "Allow parallelism when executing queries that read from file/url/s3/etc. This may reorder rows."}, - {"use_with_fill_by_sorting_prefix", false, true, "Columns preceding WITH FILL columns in ORDER BY clause form sorting prefix. Rows with different values in sorting prefix are filled independently"}, - {"output_format_parquet_compliant_nested_types", false, true, "Change an internal field name in output Parquet file schema."}}}, - {"23.4", {{"allow_suspicious_indices", true, false, "If true, index can defined with identical expressions"}, - {"allow_nonconst_timezone_arguments", true, false, "Allow non-const timezone arguments in certain time-related functions like toTimeZone(), fromUnixTimestamp*(), snowflakeToDateTime*()."}, - {"connect_timeout_with_failover_ms", 50, 1000, "Increase default connect timeout because of async connect"}, - {"connect_timeout_with_failover_secure_ms", 100, 1000, "Increase default secure connect timeout because of async connect"}, - {"hedged_connection_timeout_ms", 100, 50, "Start new connection in hedged requests after 50 ms instead of 100 to correspond with previous connect timeout"}, - {"formatdatetime_f_prints_single_zero", true, false, "Improved compatibility with MySQL DATE_FORMAT()/STR_TO_DATE()"}, - {"formatdatetime_parsedatetime_m_is_month_name", false, true, "Improved compatibility with MySQL DATE_FORMAT/STR_TO_DATE"}}}, - {"23.3", {{"output_format_parquet_version", "1.0", "2.latest", "Use latest Parquet format version for output format"}, - {"input_format_json_ignore_unknown_keys_in_named_tuple", false, true, "Improve parsing JSON objects as named tuples"}, - {"input_format_native_allow_types_conversion", false, true, "Allow types conversion in Native input forma"}, - {"output_format_arrow_compression_method", "none", "lz4_frame", "Use lz4 compression in Arrow output format by default"}, - {"output_format_parquet_compression_method", "snappy", "lz4", "Use lz4 compression in Parquet output format by default"}, - {"output_format_orc_compression_method", "none", "lz4_frame", "Use lz4 compression in ORC output format by default"}, - {"async_query_sending_for_remote", false, true, "Create connections and send query async across shards"}}}, - {"23.2", {{"output_format_parquet_fixed_string_as_fixed_byte_array", false, true, "Use Parquet FIXED_LENGTH_BYTE_ARRAY type for FixedString by default"}, - {"output_format_arrow_fixed_string_as_fixed_byte_array", false, true, "Use Arrow FIXED_SIZE_BINARY type for FixedString by default"}, - {"query_plan_remove_redundant_distinct", false, true, "Remove redundant Distinct step in query plan"}, - {"optimize_duplicate_order_by_and_distinct", true, false, "Remove duplicate ORDER BY and DISTINCT if it's possible"}, - {"insert_keeper_max_retries", 0, 20, "Enable reconnections to Keeper on INSERT, improve reliability"}}}, - {"23.1", {{"input_format_json_read_objects_as_strings", 0, 1, "Enable reading nested json objects as strings while object type is experimental"}, - {"input_format_json_defaults_for_missing_elements_in_named_tuple", false, true, "Allow missing elements in JSON objects while reading named tuples by default"}, - {"input_format_csv_detect_header", false, true, "Detect header in CSV format by default"}, - {"input_format_tsv_detect_header", false, true, "Detect header in TSV format by default"}, - {"input_format_custom_detect_header", false, true, "Detect header in CustomSeparated format by default"}, - {"query_plan_remove_redundant_sorting", false, true, "Remove redundant sorting in query plan. For example, sorting steps related to ORDER BY clauses in subqueries"}}}, - {"22.12", {{"max_size_to_preallocate_for_aggregation", 10'000'000, 100'000'000, "This optimizes performance"}, - {"query_plan_aggregation_in_order", 0, 1, "Enable some refactoring around query plan"}, - {"format_binary_max_string_size", 0, 1_GiB, "Prevent allocating large amount of memory"}}}, - {"22.11", {{"use_structure_from_insertion_table_in_table_functions", 0, 2, "Improve using structure from insertion table in table functions"}}}, - {"22.9", {{"force_grouping_standard_compatibility", false, true, "Make GROUPING function output the same as in SQL standard and other DBMS"}}}, - {"22.7", {{"cross_to_inner_join_rewrite", 1, 2, "Force rewrite comma join to inner"}, - {"enable_positional_arguments", false, true, "Enable positional arguments feature by default"}, - {"format_csv_allow_single_quotes", true, false, "Most tools don't treat single quote in CSV specially, don't do it by default too"}}}, - {"22.6", {{"output_format_json_named_tuples_as_objects", false, true, "Allow to serialize named tuples as JSON objects in JSON formats by default"}, - {"input_format_skip_unknown_fields", false, true, "Optimize reading subset of columns for some input formats"}}}, - {"22.5", {{"memory_overcommit_ratio_denominator", 0, 1073741824, "Enable memory overcommit feature by default"}, - {"memory_overcommit_ratio_denominator_for_user", 0, 1073741824, "Enable memory overcommit feature by default"}}}, - {"22.4", {{"allow_settings_after_format_in_insert", true, false, "Do not allow SETTINGS after FORMAT for INSERT queries because ClickHouse interpret SETTINGS as some values, which is misleading"}}}, - {"22.3", {{"cast_ipv4_ipv6_default_on_conversion_error", true, false, "Make functions cast(value, 'IPv4') and cast(value, 'IPv6') behave same as toIPv4 and toIPv6 functions"}}}, - {"21.12", {{"stream_like_engine_allow_direct_select", true, false, "Do not allow direct select for Kafka/RabbitMQ/FileLog by default"}}}, - {"21.9", {{"output_format_decimal_trailing_zeros", true, false, "Do not output trailing zeros in text representation of Decimal types by default for better looking output"}, - {"use_hedged_requests", false, true, "Enable Hedged Requests feature by default"}}}, - {"21.7", {{"legacy_column_name_of_tuple_literal", true, false, "Add this setting only for compatibility reasons. It makes sense to set to 'true', while doing rolling update of cluster from version lower than 21.7 to higher"}}}, - {"21.5", {{"async_socket_for_remote", false, true, "Fix all problems and turn on asynchronous reads from socket for remote queries by default again"}}}, - {"21.3", {{"async_socket_for_remote", true, false, "Turn off asynchronous reads from socket for remote queries because of some problems"}, - {"optimize_normalize_count_variants", false, true, "Rewrite aggregate functions that semantically equals to count() as count() by default"}, - {"normalize_function_names", false, true, "Normalize function names to their canonical names, this was needed for projection query routing"}}}, - {"21.2", {{"enable_global_with_statement", false, true, "Propagate WITH statements to UNION queries and all subqueries by default"}}}, - {"21.1", {{"insert_quorum_parallel", false, true, "Use parallel quorum inserts by default. It is significantly more convenient to use than sequential quorum inserts"}, - {"input_format_null_as_default", false, true, "Allow to insert NULL as default for input formats by default"}, - {"optimize_on_insert", false, true, "Enable data optimization on INSERT by default for better user experience"}, - {"use_compact_format_in_distributed_parts_names", false, true, "Use compact format for async INSERT into Distributed tables by default"}}}, - {"20.10", {{"format_regexp_escaping_rule", "Escaped", "Raw", "Use Raw as default escaping rule for Regexp format to male the behaviour more like to what users expect"}}}, - {"20.7", {{"show_table_uuid_in_table_create_query_if_not_nil", true, false, "Stop showing UID of the table in its CREATE query for Engine=Atomic"}}}, - {"20.5", {{"input_format_with_names_use_header", false, true, "Enable using header with names for formats with WithNames/WithNamesAndTypes suffixes"}, - {"allow_suspicious_codecs", true, false, "Don't allow to specify meaningless compression codecs"}}}, - {"20.4", {{"validate_polygons", false, true, "Throw exception if polygon is invalid in function pointInPolygon by default instead of returning possibly wrong results"}}}, - {"19.18", {{"enable_scalar_subquery_optimization", false, true, "Prevent scalar subqueries from (de)serializing large scalar values and possibly avoid running the same subquery more than once"}}}, - {"19.14", {{"any_join_distinct_right_table_keys", true, false, "Disable ANY RIGHT and ANY FULL JOINs by default to avoid inconsistency"}}}, - {"19.12", {{"input_format_defaults_for_omitted_fields", false, true, "Enable calculation of complex default expressions for omitted fields for some input formats, because it should be the expected behaviour"}}}, - {"19.5", {{"max_partitions_per_insert_block", 0, 100, "Add a limit for the number of partitions in one block"}}}, - {"18.12.17", {{"enable_optimize_predicate_expression", 0, 1, "Optimize predicates to subqueries by default"}}}, }; From 229fffcd56414727fc806c711b7ca8d16f876878 Mon Sep 17 00:00:00 2001 From: Alexis Arnaud Date: Tue, 6 Aug 2024 19:31:30 +0200 Subject: [PATCH 313/844] post-review changes --- src/Core/SettingsChangesHistory.cpp | 1 + .../Serializations/SerializationArray.cpp | 104 +--------------- .../Serializations/SerializationMap.cpp | 99 +-------------- .../Serializations/SerializationTuple.cpp | 116 +++--------------- .../Serializations/SerializationTuple.h | 3 + src/Formats/FormatFactory.cpp | 1 - src/Formats/JSONUtils.cpp | 31 ++--- src/Formats/JSONUtils.h | 109 ++++++++++++++++ 8 files changed, 154 insertions(+), 310 deletions(-) diff --git a/src/Core/SettingsChangesHistory.cpp b/src/Core/SettingsChangesHistory.cpp index 2f99910d7a0..e385f72fb38 100644 --- a/src/Core/SettingsChangesHistory.cpp +++ b/src/Core/SettingsChangesHistory.cpp @@ -89,6 +89,7 @@ static std::initializer_list #include #include -#include #include #include #include #include #include +#include namespace DB { @@ -620,108 +620,16 @@ namespace template ReturnType deserializeTextJSONArrayImpl(IColumn & column, ReadBuffer & istr, const SerializationPtr & nested, const FormatSettings & settings) { - static constexpr auto throw_exception = std::is_same_v; - - static constexpr auto EMPTY_STRING = "\"\""; - static constexpr auto EMPTY_STRING_LENGTH = std::string_view(EMPTY_STRING).length(); - - auto do_deserialize_nested = [](IColumn & nested_column, ReadBuffer & buf, auto && check_for_empty_string, auto && deserialize) -> ReturnType + auto deserializer = [&nested](IColumn & column_, ReadBuffer & istr_, auto && deserialize_nested) -> ReturnType { - if (check_for_empty_string(buf)) + auto adapter = [&deserialize_nested, &istr_, &nested](IColumn & nested_column) -> ReturnType { - nested_column.insertDefault(); - return ReturnType(true); - } - return deserialize(nested_column, buf); - }; - - auto deserialize_nested_impl = [&settings, &nested](IColumn & nested_column, ReadBuffer & buf) -> ReturnType - { - if constexpr (throw_exception) - { - if (settings.null_as_default && !isColumnNullableOrLowCardinalityNullable(nested_column)) - SerializationNullable::deserializeNullAsDefaultOrNestedTextJSON(nested_column, buf, settings, nested); - else - nested->deserializeTextJSON(nested_column, buf, settings); - return; - } - else - { - if (settings.null_as_default && !isColumnNullableOrLowCardinalityNullable(nested_column)) - return SerializationNullable::tryDeserializeNullAsDefaultOrNestedTextJSON(nested_column, buf, settings, nested); - else - return nested->tryDeserializeTextJSON(nested_column, buf, settings); - } - }; - - auto deserialize_nested = [&settings, &do_deserialize_nested, &deserialize_nested_impl](IColumn & nested_column, ReadBuffer & buf) -> ReturnType - { - if (!settings.json.empty_as_default || buf.eof() || *buf.position() != EMPTY_STRING[0]) - return deserialize_nested_impl(nested_column, buf); - - if (buf.available() >= EMPTY_STRING_LENGTH) - { - /// We have enough data in buffer to check if we have an empty string. - auto check_for_empty_string = [](ReadBuffer & buf_) -> bool - { - auto * pos = buf_.position(); - if (checkString(EMPTY_STRING, buf_)) - return true; - else - { - buf_.position() = pos; - return false; - } - }; - - return do_deserialize_nested(nested_column, buf, check_for_empty_string, deserialize_nested_impl); - } - - /// We don't have enough data in buffer to check if we have an empty string. - /// Use PeekableReadBuffer to make a checkpoint before checking for an - /// empty string and rollback if check was failed. - - auto check_for_empty_string = [](ReadBuffer & buf_) -> bool - { - auto & peekable_buf = assert_cast(buf_); - peekable_buf.setCheckpoint(); - SCOPE_EXIT(peekable_buf.dropCheckpoint()); - if (checkString(EMPTY_STRING, peekable_buf)) - return true; - else - { - peekable_buf.rollbackToCheckpoint(); - return false; - } + return deserialize_nested(nested_column, istr_, nested); }; - - auto deserialize_nested_impl_with_check = [&deserialize_nested_impl](IColumn & nested_column_, ReadBuffer & buf_) -> ReturnType - { - auto & peekable_buf = assert_cast(buf_); - if constexpr (throw_exception) - { - deserialize_nested_impl(nested_column_, peekable_buf); - assert(!peekable_buf.hasUnreadData()); - } - else - { - if (!deserialize_nested_impl(nested_column_, peekable_buf)) - return false; - if (likely(!peekable_buf.hasUnreadData())) - return true; - return false; - } - }; - - PeekableReadBuffer peekable_buf(buf, true); - return do_deserialize_nested(nested_column, peekable_buf, check_for_empty_string, deserialize_nested_impl_with_check); + return deserializeTextImpl(column_, istr_, adapter, false); }; - return deserializeTextImpl(column, istr, - [&deserialize_nested, &istr](IColumn & nested_column) -> ReturnType - { - return deserialize_nested(nested_column, istr); - }, false); + return JSONUtils::deserializeEmpyStringAsDefaultOrNested(column, istr, settings, deserializer); } } diff --git a/src/DataTypes/Serializations/SerializationMap.cpp b/src/DataTypes/Serializations/SerializationMap.cpp index 4a9f701c5c1..0412a85ee44 100644 --- a/src/DataTypes/Serializations/SerializationMap.cpp +++ b/src/DataTypes/Serializations/SerializationMap.cpp @@ -6,9 +6,9 @@ #include #include #include +#include #include #include -#include #include #include #include @@ -319,103 +319,16 @@ void SerializationMap::serializeTextJSONPretty(const IColumn & column, size_t ro template ReturnType SerializationMap::deserializeTextJSONMapImpl(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const { - static constexpr auto throw_exception = std::is_same_v; - - static constexpr auto EMPTY_STRING = "\"\""; - static constexpr auto EMPTY_STRING_LENGTH = std::string_view(EMPTY_STRING).length(); - - auto do_deserialize_subcolumn = [](ReadBuffer & buf, const SerializationPtr & subcolumn_serialization, IColumn & subcolumn, auto && check_for_empty_string, auto && deserialize) -> ReturnType + auto deserializer = [this](IColumn & column_, ReadBuffer & istr_, auto && deserialize_nested) -> ReturnType { - if (check_for_empty_string(buf)) + auto adapter = [&deserialize_nested](ReadBuffer & buf, const SerializationPtr & nested_column_serialization, IColumn & nested_column) -> ReturnType { - subcolumn.insertDefault(); - return ReturnType(true); - } - return deserialize(buf, subcolumn_serialization, subcolumn); - }; - - auto deserialize_subcolumn_impl = [&settings](ReadBuffer & buf, const SerializationPtr & subcolumn_serialization, IColumn & subcolumn) -> ReturnType - { - if constexpr (throw_exception) - { - if (settings.null_as_default && !isColumnNullableOrLowCardinalityNullable(subcolumn)) - SerializationNullable::deserializeNullAsDefaultOrNestedTextJSON(subcolumn, buf, settings, subcolumn_serialization); - else - subcolumn_serialization->deserializeTextJSON(subcolumn, buf, settings); - } - else - { - if (settings.null_as_default && !isColumnNullableOrLowCardinalityNullable(subcolumn)) - return SerializationNullable::tryDeserializeNullAsDefaultOrNestedTextJSON(subcolumn, buf, settings, subcolumn_serialization); - else - return subcolumn_serialization->tryDeserializeTextJSON(subcolumn, buf, settings); - } - }; - - auto deserialize_subcolumn = [&settings, &do_deserialize_subcolumn, &deserialize_subcolumn_impl](ReadBuffer & buf, const SerializationPtr & subcolumn_serialization, IColumn & subcolumn) -> ReturnType - { - if (!settings.json.empty_as_default || buf.eof() || *buf.position() != EMPTY_STRING[0]) - return do_deserialize_subcolumn(buf, subcolumn_serialization, subcolumn, [](ReadBuffer &){ return false; }, deserialize_subcolumn_impl); - - if (buf.available() >= EMPTY_STRING_LENGTH) - { - /// We have enough data in buffer to check if we have an empty string. - auto check_for_empty_string = [](ReadBuffer & buf_) -> bool - { - auto * pos = buf_.position(); - if (checkString(EMPTY_STRING, buf_)) - return true; - else - { - buf_.position() = pos; - return false; - } - }; - - return do_deserialize_subcolumn(buf, subcolumn_serialization, subcolumn, check_for_empty_string, deserialize_subcolumn_impl); - } - - /// We don't have enough data in buffer to check if we have an empty string. - /// Use PeekableReadBuffer to make a checkpoint before checking for an - /// empty string and rollback if check was failed. - - auto check_for_empty_string = [](ReadBuffer & buf_) -> bool - { - auto & peekable_buf = assert_cast(buf_); - peekable_buf.setCheckpoint(); - SCOPE_EXIT(peekable_buf.dropCheckpoint()); - if (checkString(EMPTY_STRING, peekable_buf)) - return true; - else - { - peekable_buf.rollbackToCheckpoint(); - return false; - } + return deserialize_nested(nested_column, buf, nested_column_serialization); }; - - auto deserialize_subcolumn_impl_with_check = [&deserialize_subcolumn_impl](ReadBuffer & buf_, const SerializationPtr & subcolumn_serialization_, IColumn & subcolumn_) -> ReturnType - { - auto & peekable_buf = assert_cast(buf_); - if constexpr (throw_exception) - { - deserialize_subcolumn_impl(peekable_buf, subcolumn_serialization_, subcolumn_); - assert(!peekable_buf.hasUnreadData()); - } - else - { - if (!deserialize_subcolumn_impl(peekable_buf, subcolumn_serialization_, subcolumn_)) - return false; - if (likely(!peekable_buf.hasUnreadData())) - return true; - return false; - } - }; - - PeekableReadBuffer peekable_buf(buf, true); - return do_deserialize_subcolumn(peekable_buf, subcolumn_serialization, subcolumn, check_for_empty_string, deserialize_subcolumn_impl_with_check); + return this->deserializeTextImpl(column_, istr_, adapter); }; - return deserializeTextImpl(column, istr, deserialize_subcolumn); + return JSONUtils::deserializeEmpyStringAsDefaultOrNested(column, istr, settings, deserializer); } void SerializationMap::deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const diff --git a/src/DataTypes/Serializations/SerializationTuple.cpp b/src/DataTypes/Serializations/SerializationTuple.cpp index 7238c2bdbd7..459677d40cd 100644 --- a/src/DataTypes/Serializations/SerializationTuple.cpp +++ b/src/DataTypes/Serializations/SerializationTuple.cpp @@ -5,7 +5,7 @@ #include #include #include -#include +#include #include #include #include @@ -314,103 +314,9 @@ void SerializationTuple::serializeTextJSONPretty(const IColumn & column, size_t } template -ReturnType SerializationTuple::deserializeTextJSONImpl(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const +ReturnType SerializationTuple::deserializeTextJSONTupleImpl(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, auto && deserialize_element) const { - static constexpr bool throw_exception = std::is_same_v; - - static constexpr auto EMPTY_STRING = "\"\""; - static constexpr auto EMPTY_STRING_LENGTH = std::string_view(EMPTY_STRING).length(); - - auto do_deserialize_element = [](IColumn & element_column, size_t element_pos, ReadBuffer & buf, auto && check_for_empty_string, auto && deserialize) -> ReturnType - { - if (check_for_empty_string(buf)) - { - element_column.insertDefault(); - return ReturnType(true); - } - - return deserialize(element_column, element_pos, buf); - }; - - auto deserialize_element_impl = [&settings, this](IColumn & element_column, size_t element_pos, ReadBuffer & buf) -> ReturnType - { - if constexpr (throw_exception) - { - if (settings.null_as_default && !isColumnNullableOrLowCardinalityNullable(element_column)) - SerializationNullable::deserializeNullAsDefaultOrNestedTextJSON(element_column, buf, settings, elems[element_pos]); - else - elems[element_pos]->deserializeTextJSON(element_column, buf, settings); - } - else - { - if (settings.null_as_default && !isColumnNullableOrLowCardinalityNullable(element_column)) - return SerializationNullable::tryDeserializeNullAsDefaultOrNestedTextJSON(element_column, buf, settings, elems[element_pos]); - return elems[element_pos]->tryDeserializeTextJSON(element_column, buf, settings); - } - }; - - auto deserialize_element = [&settings, &do_deserialize_element, &deserialize_element_impl, &istr](IColumn & element_column, size_t element_pos) -> ReturnType - { - if (!settings.json.empty_as_default || istr.eof() || *istr.position() != EMPTY_STRING[0]) - return do_deserialize_element(element_column, element_pos, istr, [](ReadBuffer &) { return false; }, deserialize_element_impl); - - if (istr.available() >= EMPTY_STRING_LENGTH) - { - /// We have enough data in buffer to check if we have an empty string. - auto check_for_empty_string = [](ReadBuffer & buf_) -> bool - { - auto * pos = buf_.position(); - if (checkString(EMPTY_STRING, buf_)) - return true; - else - { - buf_.position() = pos; - return false; - } - }; - - return do_deserialize_element(element_column, element_pos, istr, check_for_empty_string, deserialize_element_impl); - } - - /// We don't have enough data in buffer to check if we have an empty string. - /// Use PeekableReadBuffer to make a checkpoint before checking for an - /// empty string and rollback if check was failed. - - auto check_for_empty_string = [](ReadBuffer & buf_) -> bool - { - auto & peekable_buf = assert_cast(buf_); - peekable_buf.setCheckpoint(); - SCOPE_EXIT(peekable_buf.dropCheckpoint()); - if (checkString(EMPTY_STRING, peekable_buf)) - return true; - else - { - peekable_buf.rollbackToCheckpoint(); - return false; - } - }; - - auto deserialize_element_impl_with_check = [&deserialize_element_impl](IColumn & element_column_, size_t element_pos_, ReadBuffer & buf_) -> ReturnType - { - auto & peekable_buf = assert_cast(buf_); - if constexpr (throw_exception) - { - deserialize_element_impl(element_column_, element_pos_, peekable_buf); - assert(!peekable_buf.hasUnreadData()); - } - else - { - if (!deserialize_element_impl(element_column_, element_pos_, peekable_buf)) - return false; - if (likely(!peekable_buf.hasUnreadData())) - return true; - return false; - } - }; - - PeekableReadBuffer peekable_buf(istr, true); - return do_deserialize_element(element_column, element_pos, peekable_buf, check_for_empty_string, deserialize_element_impl_with_check); - }; + static constexpr auto throw_exception = std::is_same_v; if (settings.json.read_named_tuples_as_objects && have_explicit_names) @@ -583,6 +489,22 @@ ReturnType SerializationTuple::deserializeTextJSONImpl(IColumn & column, ReadBuf } } +template +ReturnType SerializationTuple::deserializeTextJSONImpl(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const +{ + auto deserializer = [&settings, this](IColumn & column_, ReadBuffer & istr_, auto && deserialize_nested) -> ReturnType + { + auto adapter = [&deserialize_nested, &istr_, this](IColumn & nested_column, size_t element_pos) -> ReturnType + { + return deserialize_nested(nested_column, istr_, this->elems[element_pos]); + }; + + return this->deserializeTextJSONTupleImpl(column_, istr_, settings, adapter); + }; + + return JSONUtils::deserializeEmpyStringAsDefaultOrNested(column, istr, settings, deserializer); +} + void SerializationTuple::deserializeTextJSON(DB::IColumn & column, DB::ReadBuffer & istr, const DB::FormatSettings & settings) const { deserializeTextJSONImpl(column, istr, settings); diff --git a/src/DataTypes/Serializations/SerializationTuple.h b/src/DataTypes/Serializations/SerializationTuple.h index 810673d8b21..4a55ea6eedf 100644 --- a/src/DataTypes/Serializations/SerializationTuple.h +++ b/src/DataTypes/Serializations/SerializationTuple.h @@ -81,6 +81,9 @@ private: template ReturnType deserializeTextImpl(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, bool whole) const; + template + ReturnType deserializeTextJSONTupleImpl(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, auto && deserialize_element) const; + template ReturnType deserializeTextJSONImpl(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const; diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp index 59131f34697..c1723a65e6d 100644 --- a/src/Formats/FormatFactory.cpp +++ b/src/Formats/FormatFactory.cpp @@ -151,7 +151,6 @@ FormatSettings getFormatSettings(const ContextPtr & context, const Settings & se format_settings.json.try_infer_objects_as_tuples = settings.input_format_json_try_infer_named_tuples_from_objects; format_settings.json.throw_on_bad_escape_sequence = settings.input_format_json_throw_on_bad_escape_sequence; format_settings.json.ignore_unnecessary_fields = settings.input_format_json_ignore_unnecessary_fields; - format_settings.json.case_insensitive_column_matching = settings.input_format_json_case_insensitive_column_matching; format_settings.json.empty_as_default = settings.input_format_json_empty_as_default; format_settings.null_as_default = settings.input_format_null_as_default; format_settings.force_null_for_omitted_fields = settings.input_format_force_null_for_omitted_fields; diff --git a/src/Formats/JSONUtils.cpp b/src/Formats/JSONUtils.cpp index 34f55ef00d8..4f0187b4d87 100644 --- a/src/Formats/JSONUtils.cpp +++ b/src/Formats/JSONUtils.cpp @@ -3,10 +3,7 @@ #include #include #include -#include -#include #include -#include #include #include #include @@ -311,8 +308,8 @@ namespace JSONUtils return true; }; - if (!format_settings.json.empty_as_default || in.eof() || *in.position() != EMPTY_STRING[0]) - return do_deserialize(column, in, [](ReadBuffer &) { return false; }, deserialize_impl); + if (!format_settings.json.empty_as_default || in.eof() || *in.position() != EMPTY_STRING[0]) + return deserialize_impl(column, in); if (in.available() >= EMPTY_STRING_LENGTH) { @@ -322,11 +319,8 @@ namespace JSONUtils auto * pos = buf_.position(); if (checkString(EMPTY_STRING, buf_)) return true; - else - { - buf_.position() = pos; - return false; - } + buf_.position() = pos; + return false; }; return do_deserialize(column, in, check_for_empty_string, deserialize_impl); @@ -343,22 +337,17 @@ namespace JSONUtils SCOPE_EXIT(peekable_buf.dropCheckpoint()); if (checkString(EMPTY_STRING, peekable_buf)) return true; - else - { - peekable_buf.rollbackToCheckpoint(); - return false; - } + peekable_buf.rollbackToCheckpoint(); + return false; }; auto deserialize_impl_with_check = [&deserialize_impl](IColumn & column_, ReadBuffer & buf_) -> bool { auto & peekable_buf = assert_cast(buf_); - - if (!deserialize_impl(column_, peekable_buf)) - return false; - if (likely(!peekable_buf.hasUnreadData())) - return true; - return false; + bool res = deserialize_impl(column_, peekable_buf); + if (unlikely(peekable_buf.hasUnreadData())) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Incorrect state while parsing JSON: PeekableReadBuffer has unread data in own memory: {}", String(peekable_buf.position(), peekable_buf.available())); + return res; }; PeekableReadBuffer peekable_buf(in, true); diff --git a/src/Formats/JSONUtils.h b/src/Formats/JSONUtils.h index e2ac3467971..2800017bfed 100644 --- a/src/Formats/JSONUtils.h +++ b/src/Formats/JSONUtils.h @@ -2,11 +2,15 @@ #include #include +#include #include #include +#include #include +#include #include #include +#include #include #include @@ -16,6 +20,11 @@ namespace DB class Block; struct JSONInferenceInfo; +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + namespace JSONUtils { std::pair fileSegmentationEngineJSONEachRow(ReadBuffer & in, DB::Memory<> & memory, size_t min_bytes, size_t max_rows); @@ -136,6 +145,106 @@ namespace JSONUtils bool skipUntilFieldInObject(ReadBuffer & in, const String & desired_field_name, const FormatSettings::JSON & settings); void skipTheRestOfObject(ReadBuffer & in, const FormatSettings::JSON & settings); + + template + ReturnType deserializeEmpyStringAsDefaultOrNested(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, auto && deserializer) + { + static constexpr auto throw_exception = std::is_same_v; + + static constexpr auto EMPTY_STRING = "\"\""; + static constexpr auto EMPTY_STRING_LENGTH = std::string_view(EMPTY_STRING).length(); + + auto do_deserialize_nested = [](IColumn & nested_column, ReadBuffer & buf, auto && check_for_empty_string, auto && deserialize, const SerializationPtr & nested_column_serialization) -> ReturnType + { + if (check_for_empty_string(buf)) + { + nested_column.insertDefault(); + return ReturnType(true); + } + return deserialize(nested_column, buf, nested_column_serialization); + }; + + auto deserialize_nested_impl = [&settings](IColumn & nested_column, ReadBuffer & buf, const SerializationPtr & nested_column_serialization) -> ReturnType + { + if constexpr (throw_exception) + { + if (settings.null_as_default && !isColumnNullableOrLowCardinalityNullable(nested_column)) + SerializationNullable::deserializeNullAsDefaultOrNestedTextJSON(nested_column, buf, settings, nested_column_serialization); + else + nested_column_serialization->deserializeTextJSON(nested_column, buf, settings); + } + else + { + if (settings.null_as_default && !isColumnNullableOrLowCardinalityNullable(nested_column)) + return SerializationNullable::tryDeserializeNullAsDefaultOrNestedTextJSON(nested_column, buf, settings, nested_column_serialization); + return nested_column_serialization->tryDeserializeTextJSON(nested_column, buf, settings); + } + }; + + auto deserialize_nested = [&settings, &do_deserialize_nested, &deserialize_nested_impl](IColumn & nested_column, ReadBuffer & buf, const SerializationPtr & nested_column_serialization) -> ReturnType + { + if (!settings.json.empty_as_default || buf.eof() || *buf.position() != EMPTY_STRING[0]) + return deserialize_nested_impl(nested_column, buf, nested_column_serialization); + + if (buf.available() >= EMPTY_STRING_LENGTH) + { + /// We have enough data in buffer to check if we have an empty string. + auto check_for_empty_string = [](ReadBuffer & buf_) -> bool + { + auto * pos = buf_.position(); + if (checkString(EMPTY_STRING, buf_)) + return true; + buf_.position() = pos; + return false; + }; + + return do_deserialize_nested(nested_column, buf, check_for_empty_string, deserialize_nested_impl, nested_column_serialization); + } + + /// We don't have enough data in buffer to check if we have an empty string. + /// Use PeekableReadBuffer to make a checkpoint before checking for an + /// empty string and rollback if check was failed. + + auto check_for_empty_string = [](ReadBuffer & buf_) -> bool + { + auto & peekable_buf = assert_cast(buf_); + peekable_buf.setCheckpoint(); + SCOPE_EXIT(peekable_buf.dropCheckpoint()); + if (checkString(EMPTY_STRING, peekable_buf)) + return true; + peekable_buf.rollbackToCheckpoint(); + return false; + }; + + auto deserialize_nested_impl_with_check = [&deserialize_nested_impl](IColumn & nested_column_, ReadBuffer & buf_, const SerializationPtr & nested_column_serialization_) -> ReturnType + { + auto & peekable_buf = assert_cast(buf_); + + auto enforceNoUnreadData = [&peekable_buf]() -> void + { + if (unlikely(peekable_buf.hasUnreadData())) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Incorrect state while parsing JSON: PeekableReadBuffer has unread data in own memory: {}", String(peekable_buf.position(), peekable_buf.available())); + }; + + if constexpr (throw_exception) + { + deserialize_nested_impl(nested_column_, peekable_buf, nested_column_serialization_); + enforceNoUnreadData(); + } + else + { + bool res = deserialize_nested_impl(nested_column_, peekable_buf, nested_column_serialization_); + enforceNoUnreadData(); + return res; + } + }; + + PeekableReadBuffer peekable_buf(buf, true); + return do_deserialize_nested(nested_column, peekable_buf, check_for_empty_string, deserialize_nested_impl_with_check, nested_column_serialization); + }; + + return deserializer(column, istr, deserialize_nested); + } } } From a39a4b108099346b129cb162edbbf85456ca650c Mon Sep 17 00:00:00 2001 From: Alexis Arnaud Date: Wed, 7 Aug 2024 11:31:36 +0200 Subject: [PATCH 314/844] better --- .../Serializations/SerializationArray.cpp | 29 +++++++++++++--- .../Serializations/SerializationMap.cpp | 29 +++++++++++++--- .../Serializations/SerializationMap.h | 2 +- .../Serializations/SerializationTuple.cpp | 33 +++++++++++++++---- .../Serializations/SerializationTuple.h | 4 +-- src/Formats/JSONUtils.h | 4 +-- 6 files changed, 79 insertions(+), 22 deletions(-) diff --git a/src/DataTypes/Serializations/SerializationArray.cpp b/src/DataTypes/Serializations/SerializationArray.cpp index 5891a0a6e75..60f908a249b 100644 --- a/src/DataTypes/Serializations/SerializationArray.cpp +++ b/src/DataTypes/Serializations/SerializationArray.cpp @@ -618,7 +618,7 @@ void SerializationArray::serializeTextJSONPretty(const IColumn & column, size_t namespace { template -ReturnType deserializeTextJSONArrayImpl(IColumn & column, ReadBuffer & istr, const SerializationPtr & nested, const FormatSettings & settings) +ReturnType deserializeEmpyStringAsDefaultOrNested(IColumn & column, ReadBuffer & istr, const SerializationPtr & nested, const FormatSettings & settings) { auto deserializer = [&nested](IColumn & column_, ReadBuffer & istr_, auto && deserialize_nested) -> ReturnType { @@ -626,21 +626,40 @@ ReturnType deserializeTextJSONArrayImpl(IColumn & column, ReadBuffer & istr, con { return deserialize_nested(nested_column, istr_, nested); }; - return deserializeTextImpl(column_, istr_, adapter, false); + return deserializeTextImpl(column_, istr_, std::move(adapter), false); }; - return JSONUtils::deserializeEmpyStringAsDefaultOrNested(column, istr, settings, deserializer); + return JSONUtils::deserializeEmpyStringAsDefaultOrNested(column, istr, settings, std::move(deserializer)); } } void SerializationArray::deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const { - deserializeTextJSONArrayImpl(column, istr, nested, settings); + if (settings.json.empty_as_default) + deserializeEmpyStringAsDefaultOrNested(column, istr, nested, settings); + else + deserializeTextImpl(column, istr, + [&settings, &istr, this](IColumn & nested_column) + { + if (settings.null_as_default && !isColumnNullableOrLowCardinalityNullable(nested_column)) + SerializationNullable::deserializeNullAsDefaultOrNestedTextJSON(nested_column, istr, settings, nested); + else + nested->deserializeTextJSON(nested_column, istr, settings); + }, false); } bool SerializationArray::tryDeserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const { - return deserializeTextJSONArrayImpl(column, istr, nested, settings); + if (settings.json.empty_as_default) + return deserializeEmpyStringAsDefaultOrNested(column, istr, nested, settings); + + return deserializeTextImpl(column, istr, + [&settings, &istr, this](IColumn & nested_column) + { + if (settings.null_as_default && !isColumnNullableOrLowCardinalityNullable(nested_column)) + return SerializationNullable::tryDeserializeNullAsDefaultOrNestedTextJSON(nested_column, istr, settings, nested); + return nested->tryDeserializeTextJSON(nested_column, istr, settings); + }, false); } diff --git a/src/DataTypes/Serializations/SerializationMap.cpp b/src/DataTypes/Serializations/SerializationMap.cpp index 0412a85ee44..96c21f19805 100644 --- a/src/DataTypes/Serializations/SerializationMap.cpp +++ b/src/DataTypes/Serializations/SerializationMap.cpp @@ -317,7 +317,7 @@ void SerializationMap::serializeTextJSONPretty(const IColumn & column, size_t ro } template -ReturnType SerializationMap::deserializeTextJSONMapImpl(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const +ReturnType SerializationMap::deserializeEmpyStringAsDefaultOrNested(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const { auto deserializer = [this](IColumn & column_, ReadBuffer & istr_, auto && deserialize_nested) -> ReturnType { @@ -325,20 +325,39 @@ ReturnType SerializationMap::deserializeTextJSONMapImpl(IColumn & column, ReadBu { return deserialize_nested(nested_column, buf, nested_column_serialization); }; - return this->deserializeTextImpl(column_, istr_, adapter); + return this->deserializeTextImpl(column_, istr_, std::move(adapter)); }; - return JSONUtils::deserializeEmpyStringAsDefaultOrNested(column, istr, settings, deserializer); + return JSONUtils::deserializeEmpyStringAsDefaultOrNested(column, istr, settings, std::move(deserializer)); } void SerializationMap::deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const { - deserializeTextJSONMapImpl(column, istr, settings); + if (settings.json.empty_as_default) + deserializeEmpyStringAsDefaultOrNested(column, istr, settings); + else + deserializeTextImpl(column, istr, + [&settings](ReadBuffer & buf, const SerializationPtr & subcolumn_serialization, IColumn & subcolumn) + { + if (settings.null_as_default && !isColumnNullableOrLowCardinalityNullable(subcolumn)) + SerializationNullable::deserializeNullAsDefaultOrNestedTextJSON(subcolumn, buf, settings, subcolumn_serialization); + else + subcolumn_serialization->deserializeTextJSON(subcolumn, buf, settings); + }); } bool SerializationMap::tryDeserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const { - return deserializeTextJSONMapImpl(column, istr, settings); + if (settings.json.empty_as_default) + return deserializeEmpyStringAsDefaultOrNested(column, istr, settings); + + return deserializeTextImpl(column, istr, + [&settings](ReadBuffer & buf, const SerializationPtr & subcolumn_serialization, IColumn & subcolumn) + { + if (settings.null_as_default && !isColumnNullableOrLowCardinalityNullable(subcolumn)) + return SerializationNullable::tryDeserializeNullAsDefaultOrNestedTextJSON(subcolumn, buf, settings, subcolumn_serialization); + return subcolumn_serialization->tryDeserializeTextJSON(subcolumn, buf, settings); + }); } void SerializationMap::serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const diff --git a/src/DataTypes/Serializations/SerializationMap.h b/src/DataTypes/Serializations/SerializationMap.h index 9bdc110c445..ddf8047f061 100644 --- a/src/DataTypes/Serializations/SerializationMap.h +++ b/src/DataTypes/Serializations/SerializationMap.h @@ -76,7 +76,7 @@ private: ReturnType deserializeTextImpl(IColumn & column, ReadBuffer & istr, Reader && reader) const; template - ReturnType deserializeTextJSONMapImpl(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const; + ReturnType deserializeEmpyStringAsDefaultOrNested(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const; }; } diff --git a/src/DataTypes/Serializations/SerializationTuple.cpp b/src/DataTypes/Serializations/SerializationTuple.cpp index 459677d40cd..f8cb894c19b 100644 --- a/src/DataTypes/Serializations/SerializationTuple.cpp +++ b/src/DataTypes/Serializations/SerializationTuple.cpp @@ -314,7 +314,7 @@ void SerializationTuple::serializeTextJSONPretty(const IColumn & column, size_t } template -ReturnType SerializationTuple::deserializeTextJSONTupleImpl(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, auto && deserialize_element) const +ReturnType SerializationTuple::deserializeTextJSONImpl(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, auto && deserialize_element) const { static constexpr auto throw_exception = std::is_same_v; @@ -490,29 +490,48 @@ ReturnType SerializationTuple::deserializeTextJSONTupleImpl(IColumn & column, Re } template -ReturnType SerializationTuple::deserializeTextJSONImpl(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const +ReturnType SerializationTuple::deserializeEmpyStringAsDefaultOrNested(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const { auto deserializer = [&settings, this](IColumn & column_, ReadBuffer & istr_, auto && deserialize_nested) -> ReturnType { auto adapter = [&deserialize_nested, &istr_, this](IColumn & nested_column, size_t element_pos) -> ReturnType { - return deserialize_nested(nested_column, istr_, this->elems[element_pos]); + return deserialize_nested(nested_column, istr_, elems[element_pos]); }; - return this->deserializeTextJSONTupleImpl(column_, istr_, settings, adapter); + return deserializeTextJSONImpl(column_, istr_, settings, std::move(adapter)); }; - return JSONUtils::deserializeEmpyStringAsDefaultOrNested(column, istr, settings, deserializer); + return JSONUtils::deserializeEmpyStringAsDefaultOrNested(column, istr, settings, std::move(deserializer)); } void SerializationTuple::deserializeTextJSON(DB::IColumn & column, DB::ReadBuffer & istr, const DB::FormatSettings & settings) const { - deserializeTextJSONImpl(column, istr, settings); + if (settings.json.empty_as_default) + deserializeEmpyStringAsDefaultOrNested(column, istr, settings); + else + deserializeTextJSONImpl(column, istr, settings, + [&settings, &istr, this](IColumn & nested_column, size_t element_pos) -> void + { + if (settings.null_as_default && !isColumnNullableOrLowCardinalityNullable(nested_column)) + SerializationNullable::deserializeNullAsDefaultOrNestedTextJSON(nested_column, istr, settings, elems[element_pos]); + else + elems[element_pos]->deserializeTextJSON(nested_column, istr, settings); + }); } bool SerializationTuple::tryDeserializeTextJSON(DB::IColumn & column, DB::ReadBuffer & istr, const DB::FormatSettings & settings) const { - return deserializeTextJSONImpl(column, istr, settings); + if (settings.json.empty_as_default) + return deserializeEmpyStringAsDefaultOrNested(column, istr, settings); + + return deserializeTextJSONImpl(column, istr, settings, + [&settings, &istr, this](IColumn & nested_column, size_t element_pos) -> bool + { + if (settings.null_as_default && !isColumnNullableOrLowCardinalityNullable(nested_column)) + return SerializationNullable::tryDeserializeNullAsDefaultOrNestedTextJSON(nested_column, istr, settings, elems[element_pos]); + return elems[element_pos]->tryDeserializeTextJSON(nested_column, istr, settings); + }); } diff --git a/src/DataTypes/Serializations/SerializationTuple.h b/src/DataTypes/Serializations/SerializationTuple.h index 4a55ea6eedf..54084617d3b 100644 --- a/src/DataTypes/Serializations/SerializationTuple.h +++ b/src/DataTypes/Serializations/SerializationTuple.h @@ -82,10 +82,10 @@ private: ReturnType deserializeTextImpl(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, bool whole) const; template - ReturnType deserializeTextJSONTupleImpl(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, auto && deserialize_element) const; + ReturnType deserializeTextJSONImpl(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, auto && deserialize_element) const; template - ReturnType deserializeTextJSONImpl(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const; + ReturnType deserializeEmpyStringAsDefaultOrNested(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const; template ReturnType deserializeTextCSVImpl(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const; diff --git a/src/Formats/JSONUtils.h b/src/Formats/JSONUtils.h index 2800017bfed..a8e7113388a 100644 --- a/src/Formats/JSONUtils.h +++ b/src/Formats/JSONUtils.h @@ -181,9 +181,9 @@ namespace JSONUtils } }; - auto deserialize_nested = [&settings, &do_deserialize_nested, &deserialize_nested_impl](IColumn & nested_column, ReadBuffer & buf, const SerializationPtr & nested_column_serialization) -> ReturnType + auto deserialize_nested = [&do_deserialize_nested, &deserialize_nested_impl](IColumn & nested_column, ReadBuffer & buf, const SerializationPtr & nested_column_serialization) -> ReturnType { - if (!settings.json.empty_as_default || buf.eof() || *buf.position() != EMPTY_STRING[0]) + if (buf.eof() || *buf.position() != EMPTY_STRING[0]) return deserialize_nested_impl(nested_column, buf, nested_column_serialization); if (buf.available() >= EMPTY_STRING_LENGTH) From 62e45a4c8fefcd846d531c7c4d52e4277435d3c3 Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Tue, 13 Aug 2024 20:55:20 +0000 Subject: [PATCH 315/844] Test 03222_parallel_replicas_min_marks_to_read_overflow --- ..._replicas_min_marks_to_read_overflow.reference | 10 ++++++++++ ...rallel_replicas_min_marks_to_read_overflow.sql | 15 +++++++++++++++ 2 files changed, 25 insertions(+) create mode 100644 tests/queries/0_stateless/03222_parallel_replicas_min_marks_to_read_overflow.reference create mode 100644 tests/queries/0_stateless/03222_parallel_replicas_min_marks_to_read_overflow.sql diff --git a/tests/queries/0_stateless/03222_parallel_replicas_min_marks_to_read_overflow.reference b/tests/queries/0_stateless/03222_parallel_replicas_min_marks_to_read_overflow.reference new file mode 100644 index 00000000000..7fafd4d13ea --- /dev/null +++ b/tests/queries/0_stateless/03222_parallel_replicas_min_marks_to_read_overflow.reference @@ -0,0 +1,10 @@ +100 100 +101 101 +102 102 +103 103 +104 104 +105 105 +106 106 +107 107 +108 108 +109 109 diff --git a/tests/queries/0_stateless/03222_parallel_replicas_min_marks_to_read_overflow.sql b/tests/queries/0_stateless/03222_parallel_replicas_min_marks_to_read_overflow.sql new file mode 100644 index 00000000000..112373e5db2 --- /dev/null +++ b/tests/queries/0_stateless/03222_parallel_replicas_min_marks_to_read_overflow.sql @@ -0,0 +1,15 @@ +DROP TABLE IF EXISTS test__fuzz_22 SYNC; + +CREATE TABLE test__fuzz_22 (k Float32, v String) ENGINE = ReplicatedMergeTree('/clickhouse/03222/{database}/test__fuzz_22', 'r1') ORDER BY k SETTINGS index_granularity = 1; + +INSERT INTO test__fuzz_22 SELECT number, toString(number) FROM numbers(10_000); + +SET allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 3, cluster_for_parallel_replicas='test_cluster_one_shard_three_replicas_localhost'; + +SELECT k, v +FROM test__fuzz_22 +ORDER BY k +LIMIT 100, 10 +SETTINGS merge_tree_min_rows_for_concurrent_read = 9223372036854775806; + +DROP TABLE test__fuzz_22 SYNC; From 1a8f45464c1ef81dc8c2c16c8fbba2e696762962 Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Tue, 13 Aug 2024 21:05:29 +0000 Subject: [PATCH 316/844] Fix min_marks_to_read overflow --- src/Storages/MergeTree/MergeTreeIndexGranularity.cpp | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/src/Storages/MergeTree/MergeTreeIndexGranularity.cpp b/src/Storages/MergeTree/MergeTreeIndexGranularity.cpp index 2a45ab1d927..2f9a4a47b11 100644 --- a/src/Storages/MergeTree/MergeTreeIndexGranularity.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexGranularity.cpp @@ -103,8 +103,16 @@ size_t MergeTreeIndexGranularity::countMarksForRows(size_t from_mark, size_t num /// This is a heuristic to respect min_marks_to_read which is ignored by MergeTreeReadPool in case of remote disk. /// See comment in IMergeTreeSelectAlgorithm. - if (min_marks_to_read && from_mark + 2 * min_marks_to_read <= to_mark) - to_mark = from_mark + min_marks_to_read; + if (min_marks_to_read) + { + // check that ... + bool overflow = ((1ULL << 63) & min_marks_to_read); // further multiplication by 2 will not overflow + if (!overflow) + overflow = (std::numeric_limits::max() - from_mark) < 2 * min_marks_to_read; // further addition will not overflow + + if (!overflow && from_mark + 2 * min_marks_to_read <= to_mark) + to_mark = from_mark + min_marks_to_read; + } return getRowsCountInRange(from_mark, std::max(1UL, to_mark)) - offset_in_rows; } From 0dc7cd7eb40bd3fb80e2c05871ce193a720b296c Mon Sep 17 00:00:00 2001 From: Michael Kolupaev Date: Wed, 14 Aug 2024 01:12:11 +0000 Subject: [PATCH 317/844] Update musl to have unwind info --- contrib/llvm-project-cmake/CMakeLists.txt | 6 ++++++ contrib/sysroot | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/contrib/llvm-project-cmake/CMakeLists.txt b/contrib/llvm-project-cmake/CMakeLists.txt index 76e620314a2..f5dce1c4178 100644 --- a/contrib/llvm-project-cmake/CMakeLists.txt +++ b/contrib/llvm-project-cmake/CMakeLists.txt @@ -140,6 +140,12 @@ if (CMAKE_CROSSCOMPILING) message (STATUS "CROSS COMPILING SET LLVM HOST TRIPLE ${LLVM_HOST_TRIPLE}") endif() +# llvm-project/llvm/cmake/config-ix.cmake does a weird thing: it defines _LARGEFILE64_SOURCE, +# then checks if lseek64() function exists, then undefines _LARGEFILE64_SOURCE. +# Then the actual code that uses this function *doesn't* define _LARGEFILE64_SOURCE, so lseek64() +# may not exist and compilation fails. This happens with musl. +add_compile_definitions("_LARGEFILE64_SOURCE") + add_subdirectory ("${LLVM_SOURCE_DIR}" "${LLVM_BINARY_DIR}") set_directory_properties (PROPERTIES diff --git a/contrib/sysroot b/contrib/sysroot index cc385041b22..866364fea62 160000 --- a/contrib/sysroot +++ b/contrib/sysroot @@ -1 +1 @@ -Subproject commit cc385041b226d1fc28ead14dbab5d40a5f821dd8 +Subproject commit 866364fea629aa3e519ec967836561a6b3b21885 From 0716b460db35524f7cfa82501b5d1d2812904688 Mon Sep 17 00:00:00 2001 From: Michael Kolupaev Date: Wed, 14 Aug 2024 03:00:51 +0000 Subject: [PATCH 318/844] Fix duplicate symbol errors --- base/harmful/harmful.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/base/harmful/harmful.c b/base/harmful/harmful.c index 54b552a84ea..19bb962999f 100644 --- a/base/harmful/harmful.c +++ b/base/harmful/harmful.c @@ -66,13 +66,11 @@ TRAP(gethostbyname) TRAP(gethostbyname2) TRAP(gethostent) TRAP(getlogin) -TRAP(getmntent) TRAP(getnetbyaddr) TRAP(getnetbyname) TRAP(getnetent) TRAP(getnetgrent) TRAP(getnetgrent_r) -TRAP(getopt) TRAP(getopt_long) TRAP(getopt_long_only) TRAP(getpass) @@ -133,7 +131,6 @@ TRAP(nrand48) TRAP(__ppc_get_timebase_freq) TRAP(ptsname) TRAP(putchar_unlocked) -TRAP(putenv) TRAP(pututline) TRAP(pututxline) TRAP(putwchar_unlocked) @@ -148,7 +145,6 @@ TRAP(sethostent) TRAP(sethostid) TRAP(setkey) //TRAP(setlocale) // Used by replxx at startup -TRAP(setlogmask) TRAP(setnetent) TRAP(setnetgrent) TRAP(setprotoent) @@ -203,7 +199,6 @@ TRAP(lgammal) TRAP(nftw) TRAP(nl_langinfo) TRAP(putc_unlocked) -TRAP(rand) /** In the current POSIX.1 specification (POSIX.1-2008), readdir() is not required to be thread-safe. However, in modern * implementations (including the glibc implementation), concurrent calls to readdir() that specify different directory streams * are thread-safe. In cases where multiple threads must read from the same directory stream, using readdir() with external @@ -288,4 +283,14 @@ TRAP(tss_get) TRAP(tss_set) TRAP(tss_delete) +#ifndef USE_MUSL +/// These produce duplicate symbol errors when statically linking with musl. +/// Maybe we can remove them from the musl fork. +TRAP(getopt) +TRAP(putenv) +TRAP(setlogmask) +TRAP(rand) +TRAP(getmntent) +#endif + #endif From c869b0651932bc61d5040395a3a2d0689485a5a1 Mon Sep 17 00:00:00 2001 From: Michael Kolupaev Date: Wed, 14 Aug 2024 03:13:57 +0000 Subject: [PATCH 319/844] Remove getpwuid() calls in Poco::PathImpl --- base/poco/Foundation/src/Path_UNIX.cpp | 18 +++++------------- contrib/sysroot | 2 +- 2 files changed, 6 insertions(+), 14 deletions(-) diff --git a/base/poco/Foundation/src/Path_UNIX.cpp b/base/poco/Foundation/src/Path_UNIX.cpp index 957a62db180..fb2ed71622f 100644 --- a/base/poco/Foundation/src/Path_UNIX.cpp +++ b/base/poco/Foundation/src/Path_UNIX.cpp @@ -48,25 +48,17 @@ std::string PathImpl::currentImpl() std::string PathImpl::homeImpl() { std::string path; -#if defined(_POSIX_C_SOURCE) || defined(_BSD_SOURCE) || defined(_POSIX_C_SOURCE) size_t buf_size = 1024; // Same as glibc use for getpwuid std::vector buf(buf_size); struct passwd res; struct passwd* pwd = nullptr; getpwuid_r(getuid(), &res, buf.data(), buf_size, &pwd); -#else - struct passwd* pwd = getpwuid(getuid()); -#endif if (pwd) path = pwd->pw_dir; else { -#if defined(_POSIX_C_SOURCE) || defined(_BSD_SOURCE) || defined(_POSIX_C_SOURCE) getpwuid_r(getuid(), &res, buf.data(), buf_size, &pwd); -#else - pwd = getpwuid(geteuid()); -#endif if (pwd) path = pwd->pw_dir; else @@ -82,7 +74,7 @@ std::string PathImpl::configHomeImpl() { std::string path = PathImpl::homeImpl(); std::string::size_type n = path.size(); - if (n > 0 && path[n - 1] == '/') + if (n > 0 && path[n - 1] == '/') #if POCO_OS == POCO_OS_MAC_OS_X path.append("Library/Preferences/"); #else @@ -97,7 +89,7 @@ std::string PathImpl::dataHomeImpl() { std::string path = PathImpl::homeImpl(); std::string::size_type n = path.size(); - if (n > 0 && path[n - 1] == '/') + if (n > 0 && path[n - 1] == '/') #if POCO_OS == POCO_OS_MAC_OS_X path.append("Library/Application Support/"); #else @@ -112,7 +104,7 @@ std::string PathImpl::cacheHomeImpl() { std::string path = PathImpl::homeImpl(); std::string::size_type n = path.size(); - if (n > 0 && path[n - 1] == '/') + if (n > 0 && path[n - 1] == '/') #if POCO_OS == POCO_OS_MAC_OS_X path.append("Library/Caches/"); #else @@ -127,7 +119,7 @@ std::string PathImpl::tempHomeImpl() { std::string path = PathImpl::homeImpl(); std::string::size_type n = path.size(); - if (n > 0 && path[n - 1] == '/') + if (n > 0 && path[n - 1] == '/') #if POCO_OS == POCO_OS_MAC_OS_X path.append("Library/Caches/"); #else @@ -159,7 +151,7 @@ std::string PathImpl::tempImpl() std::string PathImpl::configImpl() { std::string path; - + #if POCO_OS == POCO_OS_MAC_OS_X path = "/Library/Preferences/"; #else diff --git a/contrib/sysroot b/contrib/sysroot index 866364fea62..c2d74e21ba1 160000 --- a/contrib/sysroot +++ b/contrib/sysroot @@ -1 +1 @@ -Subproject commit 866364fea629aa3e519ec967836561a6b3b21885 +Subproject commit c2d74e21ba1b8a27966e344693e176f927e4eb50 From 29bc7cf5d58264e52fa90a83206c9508b208f93a Mon Sep 17 00:00:00 2001 From: Alexis Arnaud Date: Tue, 13 Aug 2024 14:50:37 +0200 Subject: [PATCH 320/844] post-review changes --- src/Core/SettingsChangesHistory.cpp | 1 - .../Serializations/SerializationArray.cpp | 61 ++++---- .../Serializations/SerializationArray.h | 4 + .../Serializations/SerializationMap.cpp | 73 ++++----- .../Serializations/SerializationMap.h | 2 +- .../Serializations/SerializationTuple.cpp | 68 +++++---- .../Serializations/SerializationTuple.h | 6 +- src/Formats/JSONUtils.cpp | 143 ++++++++++-------- src/Formats/JSONUtils.h | 112 +------------- ... => 03222_json_empty_as_default.reference} | 0 ...lt.sql => 03222_json_empty_as_default.sql} | 0 ...pty_as_default_small_read_buffer.reference | 8 + ...json_empty_as_default_small_read_buffer.sh | 31 ++++ 13 files changed, 241 insertions(+), 268 deletions(-) rename tests/queries/0_stateless/{03215_json_empty_as_default.reference => 03222_json_empty_as_default.reference} (100%) rename tests/queries/0_stateless/{03215_json_empty_as_default.sql => 03222_json_empty_as_default.sql} (100%) create mode 100644 tests/queries/0_stateless/03222_json_empty_as_default_small_read_buffer.reference create mode 100755 tests/queries/0_stateless/03222_json_empty_as_default_small_read_buffer.sh diff --git a/src/Core/SettingsChangesHistory.cpp b/src/Core/SettingsChangesHistory.cpp index e385f72fb38..ae08566ebd2 100644 --- a/src/Core/SettingsChangesHistory.cpp +++ b/src/Core/SettingsChangesHistory.cpp @@ -88,7 +88,6 @@ static std::initializer_list -ReturnType deserializeEmpyStringAsDefaultOrNested(IColumn & column, ReadBuffer & istr, const SerializationPtr & nested, const FormatSettings & settings) +ReturnType SerializationArray::deserializeTextJSONImpl(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const { - auto deserializer = [&nested](IColumn & column_, ReadBuffer & istr_, auto && deserialize_nested) -> ReturnType + auto deserialize_nested = [&settings, this](IColumn & nested_column, ReadBuffer & buf) -> ReturnType { - auto adapter = [&deserialize_nested, &istr_, &nested](IColumn & nested_column) -> ReturnType + if constexpr (std::is_same_v) { - return deserialize_nested(nested_column, istr_, nested); - }; - return deserializeTextImpl(column_, istr_, std::move(adapter), false); + if (settings.null_as_default && !isColumnNullableOrLowCardinalityNullable(nested_column)) + SerializationNullable::deserializeNullAsDefaultOrNestedTextJSON(nested_column, buf, settings, nested); + else + nested->deserializeTextJSON(nested_column, buf, settings); + } + else + { + if (settings.null_as_default && !isColumnNullableOrLowCardinalityNullable(nested_column)) + return SerializationNullable::tryDeserializeNullAsDefaultOrNestedTextJSON(nested_column, buf, settings, nested); + return nested->tryDeserializeTextJSON(nested_column, buf, settings); + } }; - return JSONUtils::deserializeEmpyStringAsDefaultOrNested(column, istr, settings, std::move(deserializer)); -} + if (settings.json.empty_as_default) + return deserializeTextImpl(column, istr, + [&deserialize_nested, &istr](IColumn & nested_column) -> ReturnType + { + return JSONUtils::deserializeEmpyStringAsDefaultOrNested(nested_column, istr, deserialize_nested); + }, false); + else + return deserializeTextImpl(column, istr, + [&deserialize_nested, &istr](IColumn & nested_column) -> ReturnType + { + return deserialize_nested(nested_column, istr); + }, false); } + void SerializationArray::deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const { - if (settings.json.empty_as_default) - deserializeEmpyStringAsDefaultOrNested(column, istr, nested, settings); - else - deserializeTextImpl(column, istr, - [&settings, &istr, this](IColumn & nested_column) - { - if (settings.null_as_default && !isColumnNullableOrLowCardinalityNullable(nested_column)) - SerializationNullable::deserializeNullAsDefaultOrNestedTextJSON(nested_column, istr, settings, nested); - else - nested->deserializeTextJSON(nested_column, istr, settings); - }, false); + deserializeTextJSONImpl(column, istr, settings); } bool SerializationArray::tryDeserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const { - if (settings.json.empty_as_default) - return deserializeEmpyStringAsDefaultOrNested(column, istr, nested, settings); - - return deserializeTextImpl(column, istr, - [&settings, &istr, this](IColumn & nested_column) - { - if (settings.null_as_default && !isColumnNullableOrLowCardinalityNullable(nested_column)) - return SerializationNullable::tryDeserializeNullAsDefaultOrNestedTextJSON(nested_column, istr, settings, nested); - return nested->tryDeserializeTextJSON(nested_column, istr, settings); - }, false); + return deserializeTextJSONImpl(column, istr, settings); } diff --git a/src/DataTypes/Serializations/SerializationArray.h b/src/DataTypes/Serializations/SerializationArray.h index c3353f0c251..7e34abfac90 100644 --- a/src/DataTypes/Serializations/SerializationArray.h +++ b/src/DataTypes/Serializations/SerializationArray.h @@ -82,6 +82,10 @@ public: SerializationPtr create(const SerializationPtr & prev) const override; ColumnPtr create(const ColumnPtr & prev) const override; }; + +private: + template + ReturnType deserializeTextJSONImpl(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const; }; } diff --git a/src/DataTypes/Serializations/SerializationMap.cpp b/src/DataTypes/Serializations/SerializationMap.cpp index 96c21f19805..ae864cbf7b4 100644 --- a/src/DataTypes/Serializations/SerializationMap.cpp +++ b/src/DataTypes/Serializations/SerializationMap.cpp @@ -316,48 +316,53 @@ void SerializationMap::serializeTextJSONPretty(const IColumn & column, size_t ro writeChar('}', ostr); } + template -ReturnType SerializationMap::deserializeEmpyStringAsDefaultOrNested(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const +ReturnType SerializationMap::deserializeTextJSONImpl(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const { - auto deserializer = [this](IColumn & column_, ReadBuffer & istr_, auto && deserialize_nested) -> ReturnType + auto deserialize_nested = [&settings](IColumn & subcolumn, ReadBuffer & buf, const SerializationPtr & subcolumn_serialization) -> ReturnType { - auto adapter = [&deserialize_nested](ReadBuffer & buf, const SerializationPtr & nested_column_serialization, IColumn & nested_column) -> ReturnType + if constexpr (std::is_same_v) { - return deserialize_nested(nested_column, buf, nested_column_serialization); - }; - return this->deserializeTextImpl(column_, istr_, std::move(adapter)); - }; - - return JSONUtils::deserializeEmpyStringAsDefaultOrNested(column, istr, settings, std::move(deserializer)); -} - -void SerializationMap::deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const -{ - if (settings.json.empty_as_default) - deserializeEmpyStringAsDefaultOrNested(column, istr, settings); - else - deserializeTextImpl(column, istr, - [&settings](ReadBuffer & buf, const SerializationPtr & subcolumn_serialization, IColumn & subcolumn) - { - if (settings.null_as_default && !isColumnNullableOrLowCardinalityNullable(subcolumn)) - SerializationNullable::deserializeNullAsDefaultOrNestedTextJSON(subcolumn, buf, settings, subcolumn_serialization); - else - subcolumn_serialization->deserializeTextJSON(subcolumn, buf, settings); - }); -} - -bool SerializationMap::tryDeserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const -{ - if (settings.json.empty_as_default) - return deserializeEmpyStringAsDefaultOrNested(column, istr, settings); - - return deserializeTextImpl(column, istr, - [&settings](ReadBuffer & buf, const SerializationPtr & subcolumn_serialization, IColumn & subcolumn) + if (settings.null_as_default && !isColumnNullableOrLowCardinalityNullable(subcolumn)) + SerializationNullable::deserializeNullAsDefaultOrNestedTextJSON(subcolumn, buf, settings, subcolumn_serialization); + else + subcolumn_serialization->deserializeTextJSON(subcolumn, buf, settings); + } + else { if (settings.null_as_default && !isColumnNullableOrLowCardinalityNullable(subcolumn)) return SerializationNullable::tryDeserializeNullAsDefaultOrNestedTextJSON(subcolumn, buf, settings, subcolumn_serialization); return subcolumn_serialization->tryDeserializeTextJSON(subcolumn, buf, settings); - }); + } + }; + + if (settings.json.empty_as_default) + return deserializeTextImpl(column, istr, + [&deserialize_nested](ReadBuffer & buf, const SerializationPtr & subcolumn_serialization, IColumn & subcolumn) -> ReturnType + { + return JSONUtils::deserializeEmpyStringAsDefaultOrNested(subcolumn, buf, + [&deserialize_nested, &subcolumn_serialization](IColumn & subcolumn_, ReadBuffer & buf_) -> ReturnType + { + return deserialize_nested(subcolumn_, buf_, subcolumn_serialization); + }); + }); + else + return deserializeTextImpl(column, istr, + [&deserialize_nested](ReadBuffer & buf, const SerializationPtr & subcolumn_serialization, IColumn & subcolumn) -> ReturnType + { + return deserialize_nested(subcolumn, buf, subcolumn_serialization); + }); +} + +void SerializationMap::deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const +{ + deserializeTextJSONImpl(column, istr, settings); +} + +bool SerializationMap::tryDeserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const +{ + return deserializeTextJSONImpl(column, istr, settings); } void SerializationMap::serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const diff --git a/src/DataTypes/Serializations/SerializationMap.h b/src/DataTypes/Serializations/SerializationMap.h index ddf8047f061..007d153ec7e 100644 --- a/src/DataTypes/Serializations/SerializationMap.h +++ b/src/DataTypes/Serializations/SerializationMap.h @@ -76,7 +76,7 @@ private: ReturnType deserializeTextImpl(IColumn & column, ReadBuffer & istr, Reader && reader) const; template - ReturnType deserializeEmpyStringAsDefaultOrNested(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const; + ReturnType deserializeTextJSONImpl(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const; }; } diff --git a/src/DataTypes/Serializations/SerializationTuple.cpp b/src/DataTypes/Serializations/SerializationTuple.cpp index f8cb894c19b..e1fcb1a8d48 100644 --- a/src/DataTypes/Serializations/SerializationTuple.cpp +++ b/src/DataTypes/Serializations/SerializationTuple.cpp @@ -314,7 +314,7 @@ void SerializationTuple::serializeTextJSONPretty(const IColumn & column, size_t } template -ReturnType SerializationTuple::deserializeTextJSONImpl(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, auto && deserialize_element) const +ReturnType SerializationTuple::deserializeTupleJSONImpl(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, auto && deserialize_element) const { static constexpr auto throw_exception = std::is_same_v; @@ -490,48 +490,52 @@ ReturnType SerializationTuple::deserializeTextJSONImpl(IColumn & column, ReadBuf } template -ReturnType SerializationTuple::deserializeEmpyStringAsDefaultOrNested(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const +ReturnType SerializationTuple::deserializeTextJSONImpl(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const { - auto deserializer = [&settings, this](IColumn & column_, ReadBuffer & istr_, auto && deserialize_nested) -> ReturnType + auto deserialize_nested = [&settings](IColumn & nested_column, ReadBuffer & buf, const SerializationPtr & nested_column_serialization) -> ReturnType { - auto adapter = [&deserialize_nested, &istr_, this](IColumn & nested_column, size_t element_pos) -> ReturnType + if constexpr (std::is_same_v) { - return deserialize_nested(nested_column, istr_, elems[element_pos]); - }; - - return deserializeTextJSONImpl(column_, istr_, settings, std::move(adapter)); + if (settings.null_as_default && !isColumnNullableOrLowCardinalityNullable(nested_column)) + SerializationNullable::deserializeNullAsDefaultOrNestedTextJSON(nested_column, buf, settings, nested_column_serialization); + else + nested_column_serialization->deserializeTextJSON(nested_column, buf, settings); + } + else + { + if (settings.null_as_default && !isColumnNullableOrLowCardinalityNullable(nested_column)) + return SerializationNullable::tryDeserializeNullAsDefaultOrNestedTextJSON(nested_column, buf, settings, nested_column_serialization); + else + return nested_column_serialization->tryDeserializeTextJSON(nested_column, buf, settings); + } }; - return JSONUtils::deserializeEmpyStringAsDefaultOrNested(column, istr, settings, std::move(deserializer)); -} - -void SerializationTuple::deserializeTextJSON(DB::IColumn & column, DB::ReadBuffer & istr, const DB::FormatSettings & settings) const -{ if (settings.json.empty_as_default) - deserializeEmpyStringAsDefaultOrNested(column, istr, settings); + return deserializeTupleJSONImpl(column, istr, settings, + [&deserialize_nested, &istr, this](IColumn & nested_column, size_t element_pos) -> ReturnType + { + return JSONUtils::deserializeEmpyStringAsDefaultOrNested(nested_column, istr, + [&deserialize_nested, element_pos, this](IColumn & nested_column_, ReadBuffer & buf) -> ReturnType + { + return deserialize_nested(nested_column_, buf, elems[element_pos]); + }); + }); else - deserializeTextJSONImpl(column, istr, settings, - [&settings, &istr, this](IColumn & nested_column, size_t element_pos) -> void - { - if (settings.null_as_default && !isColumnNullableOrLowCardinalityNullable(nested_column)) - SerializationNullable::deserializeNullAsDefaultOrNestedTextJSON(nested_column, istr, settings, elems[element_pos]); - else - elems[element_pos]->deserializeTextJSON(nested_column, istr, settings); - }); + return deserializeTupleJSONImpl(column, istr, settings, + [&deserialize_nested, &istr, this](IColumn & nested_column, size_t element_pos) -> ReturnType + { + return deserialize_nested(nested_column, istr, elems[element_pos]); + }); } -bool SerializationTuple::tryDeserializeTextJSON(DB::IColumn & column, DB::ReadBuffer & istr, const DB::FormatSettings & settings) const +void SerializationTuple::deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const { - if (settings.json.empty_as_default) - return deserializeEmpyStringAsDefaultOrNested(column, istr, settings); + deserializeTextJSONImpl(column, istr, settings); +} - return deserializeTextJSONImpl(column, istr, settings, - [&settings, &istr, this](IColumn & nested_column, size_t element_pos) -> bool - { - if (settings.null_as_default && !isColumnNullableOrLowCardinalityNullable(nested_column)) - return SerializationNullable::tryDeserializeNullAsDefaultOrNestedTextJSON(nested_column, istr, settings, elems[element_pos]); - return elems[element_pos]->tryDeserializeTextJSON(nested_column, istr, settings); - }); +bool SerializationTuple::tryDeserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const +{ + return deserializeTextJSONImpl(column, istr, settings); } diff --git a/src/DataTypes/Serializations/SerializationTuple.h b/src/DataTypes/Serializations/SerializationTuple.h index 54084617d3b..c51adb6e536 100644 --- a/src/DataTypes/Serializations/SerializationTuple.h +++ b/src/DataTypes/Serializations/SerializationTuple.h @@ -82,10 +82,10 @@ private: ReturnType deserializeTextImpl(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, bool whole) const; template - ReturnType deserializeTextJSONImpl(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, auto && deserialize_element) const; + ReturnType deserializeTupleJSONImpl(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, auto && deserialize_element) const; - template - ReturnType deserializeEmpyStringAsDefaultOrNested(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const; + template + ReturnType deserializeTextJSONImpl(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const; template ReturnType deserializeTextCSVImpl(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const; diff --git a/src/Formats/JSONUtils.cpp b/src/Formats/JSONUtils.cpp index 4f0187b4d87..d85c9898074 100644 --- a/src/Formats/JSONUtils.cpp +++ b/src/Formats/JSONUtils.cpp @@ -2,11 +2,14 @@ #include #include #include +#include #include #include +#include #include #include #include +#include #include @@ -267,9 +270,6 @@ namespace JSONUtils const FormatSettings & format_settings, bool yield_strings) { - static constexpr auto EMPTY_STRING = "\"\""; - static constexpr auto EMPTY_STRING_LENGTH = std::string_view(EMPTY_STRING).length(); - try { bool as_nullable = format_settings.null_as_default && !isNullableOrLowCardinalityNullable(type); @@ -288,70 +288,19 @@ namespace JSONUtils return true; } - auto do_deserialize = [](IColumn & column_, ReadBuffer & buf_, auto && check_for_empty_string, auto && deserialize) -> bool - { - if (check_for_empty_string(buf_)) - { - column_.insertDefault(); - return false; - } - else - return deserialize(column_, buf_); - }; - - auto deserialize_impl = [as_nullable, &format_settings, &serialization](IColumn & column_, ReadBuffer & buf_) -> bool + auto deserialize = [as_nullable, &format_settings, &serialization](IColumn & column_, ReadBuffer & buf) -> bool { if (as_nullable) - return SerializationNullable::deserializeNullAsDefaultOrNestedTextJSON(column_, buf_, format_settings, serialization); + return SerializationNullable::deserializeNullAsDefaultOrNestedTextJSON(column_, buf, format_settings, serialization); - serialization->deserializeTextJSON(column_, buf_, format_settings); + serialization->deserializeTextJSON(column_, buf, format_settings); return true; }; - if (!format_settings.json.empty_as_default || in.eof() || *in.position() != EMPTY_STRING[0]) - return deserialize_impl(column, in); - - if (in.available() >= EMPTY_STRING_LENGTH) - { - /// We have enough data in buffer to check if we have an empty string. - auto check_for_empty_string = [](ReadBuffer & buf_) - { - auto * pos = buf_.position(); - if (checkString(EMPTY_STRING, buf_)) - return true; - buf_.position() = pos; - return false; - }; - - return do_deserialize(column, in, check_for_empty_string, deserialize_impl); - } - - /// We don't have enough data in buffer to check if we have an empty string. - /// Use PeekableReadBuffer to make a checkpoint before checking for an - /// empty string and rollback if check was failed. - - auto check_for_empty_string = [](ReadBuffer & buf_) -> bool - { - auto & peekable_buf = assert_cast(buf_); - peekable_buf.setCheckpoint(); - SCOPE_EXIT(peekable_buf.dropCheckpoint()); - if (checkString(EMPTY_STRING, peekable_buf)) - return true; - peekable_buf.rollbackToCheckpoint(); - return false; - }; - - auto deserialize_impl_with_check = [&deserialize_impl](IColumn & column_, ReadBuffer & buf_) -> bool - { - auto & peekable_buf = assert_cast(buf_); - bool res = deserialize_impl(column_, peekable_buf); - if (unlikely(peekable_buf.hasUnreadData())) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Incorrect state while parsing JSON: PeekableReadBuffer has unread data in own memory: {}", String(peekable_buf.position(), peekable_buf.available())); - return res; - }; - - PeekableReadBuffer peekable_buf(in, true); - return do_deserialize(column, peekable_buf, check_for_empty_string, deserialize_impl_with_check); + if (format_settings.json.empty_as_default) + return JSONUtils::deserializeEmpyStringAsDefaultOrNested(column, in, deserialize); + else + return deserialize(column, in); } catch (Exception & e) { @@ -915,6 +864,78 @@ namespace JSONUtils } } + template + ReturnType deserializeEmpyStringAsDefaultOrNested(IColumn & column, ReadBuffer & istr, const NestedDeserialize & deserialize_nested) + { + static constexpr auto throw_exception = std::is_same_v; + + static constexpr auto EMPTY_STRING = "\"\""; + static constexpr auto EMPTY_STRING_LENGTH = std::string_view(EMPTY_STRING).length(); + + if (istr.eof() || *istr.position() != EMPTY_STRING[0]) + return deserialize_nested(column, istr); + + auto do_deserialize = [](IColumn & column_, ReadBuffer & buf, auto && check_for_empty_string, auto && deserialize) -> ReturnType + { + if (check_for_empty_string(buf)) + { + column_.insertDefault(); + return ReturnType(default_column_return_value); + } + return deserialize(column_, buf); + }; + + if (istr.available() >= EMPTY_STRING_LENGTH) + { + /// We have enough data in buffer to check if we have an empty string. + auto check_for_empty_string = [](ReadBuffer & buf) -> bool + { + auto * pos = buf.position(); + if (checkString(EMPTY_STRING, buf)) + return true; + buf.position() = pos; + return false; + }; + + return do_deserialize(column, istr, check_for_empty_string, deserialize_nested); + } + + /// We don't have enough data in buffer to check if we have an empty string. + /// Use PeekableReadBuffer to make a checkpoint before checking for an + /// empty string and rollback if check was failed. + + auto check_for_empty_string = [](ReadBuffer & buf) -> bool + { + auto & peekable_buf = assert_cast(buf); + peekable_buf.setCheckpoint(); + SCOPE_EXIT(peekable_buf.dropCheckpoint()); + if (checkString(EMPTY_STRING, peekable_buf)) + return true; + peekable_buf.rollbackToCheckpoint(); + return false; + }; + + auto deserialize_nested_with_check = [&deserialize_nested](IColumn & column_, ReadBuffer & buf) -> ReturnType + { + auto & peekable_buf = assert_cast(buf); + if constexpr (throw_exception) + deserialize_nested(column_, peekable_buf); + else if (!deserialize_nested(column_, peekable_buf)) + return ReturnType(false); + + if (unlikely(peekable_buf.hasUnreadData())) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Incorrect state while parsing JSON: PeekableReadBuffer has unread data in own memory: {}", String(peekable_buf.position(), peekable_buf.available())); + + return ReturnType(true); + }; + + PeekableReadBuffer peekable_buf(istr, true); + return do_deserialize(column, peekable_buf, check_for_empty_string, deserialize_nested_with_check); + } + + template void deserializeEmpyStringAsDefaultOrNested(IColumn & column, ReadBuffer & istr, const NestedDeserialize & deserialize_nested); + template bool deserializeEmpyStringAsDefaultOrNested(IColumn & column, ReadBuffer & istr, const NestedDeserialize & deserialize_nested); + template bool deserializeEmpyStringAsDefaultOrNested(IColumn & column, ReadBuffer & istr, const NestedDeserialize & deserialize_nested); } } diff --git a/src/Formats/JSONUtils.h b/src/Formats/JSONUtils.h index a8e7113388a..50e182c7d27 100644 --- a/src/Formats/JSONUtils.h +++ b/src/Formats/JSONUtils.h @@ -2,16 +2,13 @@ #include #include -#include #include #include -#include #include -#include #include #include -#include #include +#include #include namespace DB @@ -20,11 +17,6 @@ namespace DB class Block; struct JSONInferenceInfo; -namespace ErrorCodes -{ - extern const int LOGICAL_ERROR; -} - namespace JSONUtils { std::pair fileSegmentationEngineJSONEachRow(ReadBuffer & in, DB::Memory<> & memory, size_t min_bytes, size_t max_rows); @@ -147,104 +139,14 @@ namespace JSONUtils void skipTheRestOfObject(ReadBuffer & in, const FormatSettings::JSON & settings); template - ReturnType deserializeEmpyStringAsDefaultOrNested(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, auto && deserializer) - { - static constexpr auto throw_exception = std::is_same_v; + using NestedDeserialize = std::function; - static constexpr auto EMPTY_STRING = "\"\""; - static constexpr auto EMPTY_STRING_LENGTH = std::string_view(EMPTY_STRING).length(); + template + ReturnType deserializeEmpyStringAsDefaultOrNested(IColumn & column, ReadBuffer & istr, const NestedDeserialize & deserialize_nested); - auto do_deserialize_nested = [](IColumn & nested_column, ReadBuffer & buf, auto && check_for_empty_string, auto && deserialize, const SerializationPtr & nested_column_serialization) -> ReturnType - { - if (check_for_empty_string(buf)) - { - nested_column.insertDefault(); - return ReturnType(true); - } - return deserialize(nested_column, buf, nested_column_serialization); - }; - - auto deserialize_nested_impl = [&settings](IColumn & nested_column, ReadBuffer & buf, const SerializationPtr & nested_column_serialization) -> ReturnType - { - if constexpr (throw_exception) - { - if (settings.null_as_default && !isColumnNullableOrLowCardinalityNullable(nested_column)) - SerializationNullable::deserializeNullAsDefaultOrNestedTextJSON(nested_column, buf, settings, nested_column_serialization); - else - nested_column_serialization->deserializeTextJSON(nested_column, buf, settings); - } - else - { - if (settings.null_as_default && !isColumnNullableOrLowCardinalityNullable(nested_column)) - return SerializationNullable::tryDeserializeNullAsDefaultOrNestedTextJSON(nested_column, buf, settings, nested_column_serialization); - return nested_column_serialization->tryDeserializeTextJSON(nested_column, buf, settings); - } - }; - - auto deserialize_nested = [&do_deserialize_nested, &deserialize_nested_impl](IColumn & nested_column, ReadBuffer & buf, const SerializationPtr & nested_column_serialization) -> ReturnType - { - if (buf.eof() || *buf.position() != EMPTY_STRING[0]) - return deserialize_nested_impl(nested_column, buf, nested_column_serialization); - - if (buf.available() >= EMPTY_STRING_LENGTH) - { - /// We have enough data in buffer to check if we have an empty string. - auto check_for_empty_string = [](ReadBuffer & buf_) -> bool - { - auto * pos = buf_.position(); - if (checkString(EMPTY_STRING, buf_)) - return true; - buf_.position() = pos; - return false; - }; - - return do_deserialize_nested(nested_column, buf, check_for_empty_string, deserialize_nested_impl, nested_column_serialization); - } - - /// We don't have enough data in buffer to check if we have an empty string. - /// Use PeekableReadBuffer to make a checkpoint before checking for an - /// empty string and rollback if check was failed. - - auto check_for_empty_string = [](ReadBuffer & buf_) -> bool - { - auto & peekable_buf = assert_cast(buf_); - peekable_buf.setCheckpoint(); - SCOPE_EXIT(peekable_buf.dropCheckpoint()); - if (checkString(EMPTY_STRING, peekable_buf)) - return true; - peekable_buf.rollbackToCheckpoint(); - return false; - }; - - auto deserialize_nested_impl_with_check = [&deserialize_nested_impl](IColumn & nested_column_, ReadBuffer & buf_, const SerializationPtr & nested_column_serialization_) -> ReturnType - { - auto & peekable_buf = assert_cast(buf_); - - auto enforceNoUnreadData = [&peekable_buf]() -> void - { - if (unlikely(peekable_buf.hasUnreadData())) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Incorrect state while parsing JSON: PeekableReadBuffer has unread data in own memory: {}", String(peekable_buf.position(), peekable_buf.available())); - }; - - if constexpr (throw_exception) - { - deserialize_nested_impl(nested_column_, peekable_buf, nested_column_serialization_); - enforceNoUnreadData(); - } - else - { - bool res = deserialize_nested_impl(nested_column_, peekable_buf, nested_column_serialization_); - enforceNoUnreadData(); - return res; - } - }; - - PeekableReadBuffer peekable_buf(buf, true); - return do_deserialize_nested(nested_column, peekable_buf, check_for_empty_string, deserialize_nested_impl_with_check, nested_column_serialization); - }; - - return deserializer(column, istr, deserialize_nested); - } + extern template void deserializeEmpyStringAsDefaultOrNested(IColumn & column, ReadBuffer & istr, const NestedDeserialize & deserialize_nested); + extern template bool deserializeEmpyStringAsDefaultOrNested(IColumn & column, ReadBuffer & istr, const NestedDeserialize & deserialize_nested); + extern template bool deserializeEmpyStringAsDefaultOrNested(IColumn & column, ReadBuffer & istr, const NestedDeserialize & deserialize_nested); } } diff --git a/tests/queries/0_stateless/03215_json_empty_as_default.reference b/tests/queries/0_stateless/03222_json_empty_as_default.reference similarity index 100% rename from tests/queries/0_stateless/03215_json_empty_as_default.reference rename to tests/queries/0_stateless/03222_json_empty_as_default.reference diff --git a/tests/queries/0_stateless/03215_json_empty_as_default.sql b/tests/queries/0_stateless/03222_json_empty_as_default.sql similarity index 100% rename from tests/queries/0_stateless/03215_json_empty_as_default.sql rename to tests/queries/0_stateless/03222_json_empty_as_default.sql diff --git a/tests/queries/0_stateless/03222_json_empty_as_default_small_read_buffer.reference b/tests/queries/0_stateless/03222_json_empty_as_default_small_read_buffer.reference new file mode 100644 index 00000000000..8176d7895d8 --- /dev/null +++ b/tests/queries/0_stateless/03222_json_empty_as_default_small_read_buffer.reference @@ -0,0 +1,8 @@ +Array(UUID) +{"x":["00000000-0000-0000-0000-000000000000","b15f852c-c41a-4fd6-9247-1929c841715e","00000000-0000-0000-0000-000000000000"]} +{"x":["00000000-0000-0000-0000-000000000000","b15f852c-c41a-4fd6-9247-1929c841715e","00000000-0000-0000-0000-000000000000"]} +{"x":["00000000-0000-0000-0000-000000000000","b15f852c-c41a-4fd6-9247-1929c841715e","00000000-0000-0000-0000-000000000000"]} +Tuple(Array(UUID), Tuple(UUID, Map(String, IPv6))) +{"x":[["00000000-0000-0000-0000-000000000000"],["00000000-0000-0000-0000-000000000000",{"abc":"::"}]]} +{"x":[["00000000-0000-0000-0000-000000000000"],["00000000-0000-0000-0000-000000000000",{"abc":"::"}]]} +{"x":[["00000000-0000-0000-0000-000000000000"],["00000000-0000-0000-0000-000000000000",{"abc":"::"}]]} diff --git a/tests/queries/0_stateless/03222_json_empty_as_default_small_read_buffer.sh b/tests/queries/0_stateless/03222_json_empty_as_default_small_read_buffer.sh new file mode 100755 index 00000000000..6b69fb2e9dc --- /dev/null +++ b/tests/queries/0_stateless/03222_json_empty_as_default_small_read_buffer.sh @@ -0,0 +1,31 @@ +#!/usr/bin/env bash +# Tags: no-parallel + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +DATA_FILE=$CLICKHOUSE_TEST_UNIQUE_NAME.json + +# Wrapper for clickhouse-client to always output in JSONEachRow format, that +# way format settings will not affect output. +function clickhouse_local() +{ + $CLICKHOUSE_LOCAL --output-format JSONEachRow "$@" +} + +echo 'Array(UUID)' +echo '{"x":["00000000-0000-0000-0000-000000000000","b15f852c-c41a-4fd6-9247-1929c841715e",""]}' > $DATA_FILE +# Use increasingly smaller read buffers. +clickhouse_local -q "SELECT x FROM file('$DATA_FILE', 'JSONEachRow', 'x Array(UUID)') SETTINGS input_format_json_empty_as_default=1, input_format_parallel_parsing=0, storage_file_read_method='read', max_read_buffer_size=4" +clickhouse_local -q "SELECT x FROM file('$DATA_FILE', 'JSONEachRow', 'x Array(UUID)') SETTINGS input_format_json_empty_as_default=1, input_format_parallel_parsing=0, storage_file_read_method='read', max_read_buffer_size=2" +clickhouse_local -q "SELECT x FROM file('$DATA_FILE', 'JSONEachRow', 'x Array(UUID)') SETTINGS input_format_json_empty_as_default=1, input_format_parallel_parsing=0, storage_file_read_method='read', max_read_buffer_size=1" + +echo 'Tuple(Array(UUID), Tuple(UUID, Map(String, IPv6)))' +echo '{"x":[[""], ["",{"abc":""}]]}' > $DATA_FILE +# Use increasingly smaller read buffers. +clickhouse_local -q "SELECT x FROM file('$DATA_FILE', 'JSONEachRow', 'x Tuple(Array(UUID), Tuple(UUID, Map(String, IPv6)))') SETTINGS input_format_json_empty_as_default=1, input_format_parallel_parsing=0, storage_file_read_method='read', max_read_buffer_size=16" +clickhouse_local -q "SELECT x FROM file('$DATA_FILE', 'JSONEachRow', 'x Tuple(Array(UUID), Tuple(UUID, Map(String, IPv6)))') SETTINGS input_format_json_empty_as_default=1, input_format_parallel_parsing=0, storage_file_read_method='read', max_read_buffer_size=8" +clickhouse_local -q "SELECT x FROM file('$DATA_FILE', 'JSONEachRow', 'x Tuple(Array(UUID), Tuple(UUID, Map(String, IPv6)))') SETTINGS input_format_json_empty_as_default=1, input_format_parallel_parsing=0, storage_file_read_method='read', max_read_buffer_size=1" + +rm $DATA_FILE From 026fa0a7fd73944a0ef930610ba78d1347bc0939 Mon Sep 17 00:00:00 2001 From: Arthur Passos Date: Wed, 14 Aug 2024 12:39:27 -0300 Subject: [PATCH 321/844] fix leading id method without WITH and with type specified being allowed --- src/Parsers/Access/ParserCreateUserQuery.cpp | 15 ++++++++++++--- ...3174_multiple_authentication_methods.reference | 2 ++ .../03174_multiple_authentication_methods.sh | 3 +++ 3 files changed, 17 insertions(+), 3 deletions(-) diff --git a/src/Parsers/Access/ParserCreateUserQuery.cpp b/src/Parsers/Access/ParserCreateUserQuery.cpp index fdee49f9e80..c36dc48e7a3 100644 --- a/src/Parsers/Access/ParserCreateUserQuery.cpp +++ b/src/Parsers/Access/ParserCreateUserQuery.cpp @@ -48,7 +48,12 @@ namespace }); } - bool parseAuthenticationData(IParserBase::Pos & pos, Expected & expected, std::shared_ptr & auth_data, bool is_type_specifier_mandatory) + bool parseAuthenticationData( + IParserBase::Pos & pos, + Expected & expected, + std::shared_ptr & auth_data, + bool is_type_specifier_mandatory, + bool is_type_specifier_allowed) { return IParserBase::wrapParseImpl(pos, [&] { @@ -105,6 +110,10 @@ namespace else if (is_type_specifier_mandatory) return false; } + else if (!is_type_specifier_allowed) + { + return false; + } /// If authentication type is not specified, then the default password type is used if (!type) @@ -231,7 +240,7 @@ namespace std::shared_ptr ast_authentication_data; - if (!parseAuthenticationData(pos, expected, ast_authentication_data, is_type_specifier_mandatory)) + if (!parseAuthenticationData(pos, expected, ast_authentication_data, is_type_specifier_mandatory, is_type_specifier_mandatory)) { return false; } @@ -247,7 +256,7 @@ namespace { std::shared_ptr ast_authentication_data; - if (!parseAuthenticationData(aux_pos, expected, ast_authentication_data, false)) + if (!parseAuthenticationData(aux_pos, expected, ast_authentication_data, false, true)) { break; } diff --git a/tests/queries/0_stateless/03174_multiple_authentication_methods.reference b/tests/queries/0_stateless/03174_multiple_authentication_methods.reference index 27a84ab9e95..f30a9566c68 100644 --- a/tests/queries/0_stateless/03174_multiple_authentication_methods.reference +++ b/tests/queries/0_stateless/03174_multiple_authentication_methods.reference @@ -49,3 +49,5 @@ Create user with ADD identification, should fail, add is not allowed for create BAD_ARGUMENTS Trailing comma should result in syntax error SYNTAX_ERROR +First auth method can't specify type if WITH keyword is not present +SYNTAX_ERROR diff --git a/tests/queries/0_stateless/03174_multiple_authentication_methods.sh b/tests/queries/0_stateless/03174_multiple_authentication_methods.sh index 1ea129246cc..2945d072583 100755 --- a/tests/queries/0_stateless/03174_multiple_authentication_methods.sh +++ b/tests/queries/0_stateless/03174_multiple_authentication_methods.sh @@ -141,4 +141,7 @@ ${CLICKHOUSE_CLIENT} --query "CREATE USER ${user} ADD IDENTIFIED WITH plaintext_ echo "Trailing comma should result in syntax error" ${CLICKHOUSE_CLIENT} --query "ALTER USER ${user} ADD IDENTIFIED WITH plaintext_password by '1'," 2>&1 | grep -m1 -o "SYNTAX_ERROR" +echo "First auth method can't specify type if WITH keyword is not present" +${CLICKHOUSE_CLIENT} --query "CREATE USER ${user} IDENTIFIED plaintext_password by '1'" 2>&1 | grep -m1 -o "SYNTAX_ERROR" + ${CLICKHOUSE_CLIENT} --query "DROP USER IF EXISTS ${user}" From 3247f3ad08f1908df5c58f81b59b1b805c152fae Mon Sep 17 00:00:00 2001 From: Arthur Passos Date: Wed, 14 Aug 2024 12:44:43 -0300 Subject: [PATCH 322/844] make sure reset authentication methods can only be used on alter queries --- src/Parsers/Access/ParserCreateUserQuery.cpp | 5 +++++ .../03174_multiple_authentication_methods.reference | 2 ++ .../0_stateless/03174_multiple_authentication_methods.sh | 3 +++ 3 files changed, 10 insertions(+) diff --git a/src/Parsers/Access/ParserCreateUserQuery.cpp b/src/Parsers/Access/ParserCreateUserQuery.cpp index c36dc48e7a3..466fd544c63 100644 --- a/src/Parsers/Access/ParserCreateUserQuery.cpp +++ b/src/Parsers/Access/ParserCreateUserQuery.cpp @@ -561,6 +561,11 @@ bool ParserCreateUserQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expec if (!reset_authentication_methods_to_new.has_value()) { reset_authentication_methods_to_new = parseResetAuthenticationMethods(pos, expected); + + if (reset_authentication_methods_to_new.value() && !alter) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, "RESET AUTHENTICATION METHODS TO NEW can only be used on ALTER statement"); + } } if (!valid_until) diff --git a/tests/queries/0_stateless/03174_multiple_authentication_methods.reference b/tests/queries/0_stateless/03174_multiple_authentication_methods.reference index f30a9566c68..8cdc1f9d613 100644 --- a/tests/queries/0_stateless/03174_multiple_authentication_methods.reference +++ b/tests/queries/0_stateless/03174_multiple_authentication_methods.reference @@ -51,3 +51,5 @@ Trailing comma should result in syntax error SYNTAX_ERROR First auth method can't specify type if WITH keyword is not present SYNTAX_ERROR +RESET AUTHENTICATION METHODS TO NEW can only be used on alter statement +BAD_ARGUMENTS diff --git a/tests/queries/0_stateless/03174_multiple_authentication_methods.sh b/tests/queries/0_stateless/03174_multiple_authentication_methods.sh index 2945d072583..5e4c5048d55 100755 --- a/tests/queries/0_stateless/03174_multiple_authentication_methods.sh +++ b/tests/queries/0_stateless/03174_multiple_authentication_methods.sh @@ -144,4 +144,7 @@ ${CLICKHOUSE_CLIENT} --query "ALTER USER ${user} ADD IDENTIFIED WITH plaintext_p echo "First auth method can't specify type if WITH keyword is not present" ${CLICKHOUSE_CLIENT} --query "CREATE USER ${user} IDENTIFIED plaintext_password by '1'" 2>&1 | grep -m1 -o "SYNTAX_ERROR" +echo "RESET AUTHENTICATION METHODS TO NEW can only be used on alter statement" +${CLICKHOUSE_CLIENT} --query "CREATE USER ${user} RESET AUTHENTICATION METHODS TO NEW" 2>&1 | grep -m1 -o "BAD_ARGUMENTS" + ${CLICKHOUSE_CLIENT} --query "DROP USER IF EXISTS ${user}" From 7a4bd49c4285ef3489d90aa163fb6b1858a43562 Mon Sep 17 00:00:00 2001 From: Yatsishin Ilya <2159081+qoega@users.noreply.github.com> Date: Wed, 14 Aug 2024 17:16:36 +0000 Subject: [PATCH 323/844] More improvements in integration test --- .../test_alter_settings_on_cluster/test.py | 5 +++ .../test_always_fetch_merged/test.py | 3 ++ .../test.py | 6 +-- .../test_async_load_databases/test.py | 14 +++++-- .../test.py | 42 +++++++++---------- 5 files changed, 42 insertions(+), 28 deletions(-) diff --git a/tests/integration/test_alter_settings_on_cluster/test.py b/tests/integration/test_alter_settings_on_cluster/test.py index 32f7f2efa30..7a552c383c9 100644 --- a/tests/integration/test_alter_settings_on_cluster/test.py +++ b/tests/integration/test_alter_settings_on_cluster/test.py @@ -73,3 +73,8 @@ def test_default_database_on_cluster(started_cluster): database="test_default_database", sql="SHOW CREATE test_local_table FORMAT TSV", ).endswith("old_parts_lifetime = 100\n") + + ch1.query( + database="test_default_database", + sql="DROP TABLE test_local_table ON CLUSTER 'cluster' SYNC", + ) diff --git a/tests/integration/test_always_fetch_merged/test.py b/tests/integration/test_always_fetch_merged/test.py index ca8e775fb97..3bbfc8867f8 100644 --- a/tests/integration/test_always_fetch_merged/test.py +++ b/tests/integration/test_always_fetch_merged/test.py @@ -80,3 +80,6 @@ def test_replica_always_download(started_cluster): assert int(node1_parts) < 10 assert int(node2_parts) < 10 + + node1.query_with_retry("DROP TABLE test_table SYNC") + node2.query_with_retry("DROP TABLE test_table SYNC") diff --git a/tests/integration/test_async_insert_adaptive_busy_timeout/test.py b/tests/integration/test_async_insert_adaptive_busy_timeout/test.py index 93319a56d0f..0ea076b1468 100644 --- a/tests/integration/test_async_insert_adaptive_busy_timeout/test.py +++ b/tests/integration/test_async_insert_adaptive_busy_timeout/test.py @@ -157,7 +157,7 @@ def test_with_replicated_merge_tree(): array_size_range=[10, 50], ) - node.query("DROP TABLE IF EXISTS {}".format(table_name)) + node.query("DROP TABLE {} SYNC".format(table_name)) def test_with_replicated_merge_tree_multithread(): @@ -185,7 +185,7 @@ def test_with_replicated_merge_tree_multithread(): array_size_range=[10, 15], ) - node.query("DROP TABLE IF EXISTS {}".format(table_name)) + node.query("DROP TABLE {} SYNC".format(table_name)) # Ensure that the combined duration of inserts with adaptive timeouts is less than @@ -369,4 +369,4 @@ def test_change_queries_frequency(): for line in res.splitlines(): assert int(line) == min_ms - node.query("DROP TABLE IF EXISTS {}".format(table_name)) + node.query("DROP TABLE IF EXISTS {} SYNC".format(table_name)) diff --git a/tests/integration/test_async_load_databases/test.py b/tests/integration/test_async_load_databases/test.py index d06897b1045..f36cff76ea2 100644 --- a/tests/integration/test_async_load_databases/test.py +++ b/tests/integration/test_async_load_databases/test.py @@ -28,9 +28,6 @@ def started_cluster(): """ CREATE DATABASE IF NOT EXISTS dict ENGINE=Dictionary; CREATE DATABASE IF NOT EXISTS test; - DROP TABLE IF EXISTS test.elements; - CREATE TABLE test.elements (id UInt64, a String, b Int32, c Float64) ENGINE=Log; - INSERT INTO test.elements VALUES (0, 'water', 10, 1), (1, 'air', 40, 0.01), (2, 'earth', 100, 1.7); """ ) @@ -49,6 +46,13 @@ def get_status(dictionary_name): def test_dict_get_data(started_cluster): query = instance.query + query( + "CREATE TABLE test.elements (id UInt64, a String, b Int32, c Float64) ENGINE=Log;" + ) + query( + "INSERT INTO test.elements VALUES (0, 'water', 10, 1), (1, 'air', 40, 0.01), (2, 'earth', 100, 1.7);" + ) + # dictionaries_lazy_load == false, so these dictionary are not loaded. assert get_status("dep_x") == "NOT_LOADED" assert get_status("dep_y") == "NOT_LOADED" @@ -97,6 +101,8 @@ def test_dict_get_data(started_cluster): assert query("SELECT dictGetString('dep_x', 'a', toUInt64(4))") == "XX\n" assert query("SELECT dictGetString('dep_y', 'a', toUInt64(4))") == "ether\n" assert query("SELECT dictGetString('dep_z', 'a', toUInt64(4))") == "ZZ\n" + query("DROP TABLE IF EXISTS test.elements;") + instance.restart_clickhouse() def dependent_tables_assert(): @@ -175,3 +181,5 @@ def test_multiple_tables(started_cluster): random.shuffle(order) for i in order: assert query(f"select count() from test.table_{i}") == "100\n" + for i in range(tables_count): + query(f"drop table test.table_{i}") diff --git a/tests/integration/test_asynchronous_metric_log_table/test.py b/tests/integration/test_asynchronous_metric_log_table/test.py index 622620e232a..efdd96004d2 100644 --- a/tests/integration/test_asynchronous_metric_log_table/test.py +++ b/tests/integration/test_asynchronous_metric_log_table/test.py @@ -26,26 +26,24 @@ def started_cluster(): # asynchronous_metric_update_period_s is being set to 2s so that the metrics are populated faster and # are available for querying during the test. def test_event_time_microseconds_field(started_cluster): - try: - cluster.start() - node1.query("SET log_queries = 1;") - node1.query("CREATE DATABASE replica;") - query_create = """CREATE TABLE replica.test - ( - id Int64, - event_time DateTime - ) - Engine=MergeTree() - PARTITION BY toYYYYMMDD(event_time) - ORDER BY id;""" - time.sleep(2) - node1.query(query_create) - node1.query("""INSERT INTO replica.test VALUES (1, now())""") - node1.query("SYSTEM FLUSH LOGS;") + node1.query("SET log_queries = 1;") + node1.query("CREATE DATABASE replica;") + query_create = """CREATE TABLE replica.test + ( + id Int64, + event_time DateTime + ) + Engine=MergeTree() + PARTITION BY toYYYYMMDD(event_time) + ORDER BY id;""" + time.sleep(2) + node1.query(query_create) + node1.query("""INSERT INTO replica.test VALUES (1, now())""") + node1.query("SYSTEM FLUSH LOGS;") - test_query = ( - "SELECT count() > 0 ? 'ok' : 'fail' FROM system.asynchronous_metric_log" - ) - assert "ok\n" in node1.query(test_query) - finally: - cluster.shutdown() + test_query = ( + "SELECT count() > 0 ? 'ok' : 'fail' FROM system.asynchronous_metric_log" + ) + assert "ok\n" in node1.query(test_query) + node1.query("DROP TABLE replica.test") + node1.query("DROP DATABASE replica") From d31b36ca50d8905b8eaac945c197c425d5ddb208 Mon Sep 17 00:00:00 2001 From: Yatsishin Ilya <2159081+qoega@users.noreply.github.com> Date: Thu, 15 Aug 2024 07:16:01 +0000 Subject: [PATCH 324/844] Rework integration flaky check as it was timeouting too fast as run as a single group --- tests/ci/integration_tests_runner.py | 98 +++++++++++++++++----------- 1 file changed, 60 insertions(+), 38 deletions(-) diff --git a/tests/ci/integration_tests_runner.py b/tests/ci/integration_tests_runner.py index 2b348be8b51..1513a8bee4f 100755 --- a/tests/ci/integration_tests_runner.py +++ b/tests/ci/integration_tests_runner.py @@ -784,40 +784,65 @@ class ClickhouseIntegrationTestsRunner: logging.info("Starting check with retries") final_retry = 0 logs = [] - tires_num = 1 if should_fail else FLAKY_TRIES_COUNT - for i in range(tires_num): - final_retry += 1 - logging.info("Running tests for the %s time", i) - counters, tests_times, log_paths = self.try_run_test_group( - repo_path, - "bugfix" if should_fail else "flaky", - tests_to_run, - 1, - 1, - FLAKY_REPEAT_COUNT, - ) - logs += log_paths - if counters["FAILED"]: - logging.info("Found failed tests: %s", " ".join(counters["FAILED"])) - description_prefix = "Failed tests found: " - result_state = "failure" - if not should_fail: + counters = { + "ERROR": [], + "PASSED": [], + "FAILED": [], + "SKIPPED": [], + "BROKEN": [], + "NOT_FAILED": [], + } # type: Dict + tests_times = defaultdict(float) # type: Dict + tests_log_paths = defaultdict(list) + + for test_to_run in tests_to_run: + tries_num = 1 if should_fail else FLAKY_TRIES_COUNT + for i in range(tries_num): + final_retry += 1 + logging.info("Running tests for the %s time", i) + group_counters, group_test_times, log_paths = self.try_run_test_group( + repo_path, + "bugfix" if should_fail else "flaky", + [test_to_run], + 1, + 1, + FLAKY_REPEAT_COUNT, + ) + for counter, value in group_counters.items(): + logging.info( + "Tests from group %s stats, %s count %s", + test_to_run, + counter, + len(value), + ) + counters[counter] += value + + for test_name, test_time in group_test_times.items(): + tests_times[test_name] = test_time + tests_log_paths[test_name] = log_paths + if not should_fail and ( + group_counters["FAILED"] or group_counters["ERROR"] + ): + logging.info( + "Unexpected failure in group %s. Fail fast for current group", + test_to_run, + ) break - if counters["ERROR"]: - description_prefix = "Failed tests found: " - logging.info("Found error tests: %s", " ".join(counters["ERROR"])) - # NOTE "error" result state will restart the whole test task, - # so we use "failure" here - result_state = "failure" - if not should_fail: - break - logging.info("Try is OK, all tests passed, going to clear env") - clear_ip_tables_and_restart_daemons() - logging.info("And going to sleep for some time") - if time.time() - start > MAX_TIME_SECONDS: - logging.info("Timeout reached, going to finish flaky check") - break - time.sleep(5) + + if group_counters["FAILED"]: + logging.info("Found failed tests: %s", " ".join(counters["FAILED"])) + description_prefix = "Failed tests found: " + result_state = "failure" + if group_counters["ERROR"]: + description_prefix = "Failed tests found: " + logging.info("Found error tests: %s", " ".join(counters["ERROR"])) + # NOTE "error" result state will restart the whole test task, + # so we use "failure" here + result_state = "failure" + logging.info("Try is OK, all tests passed, going to clear env") + clear_ip_tables_and_restart_daemons() + logging.info("And going to sleep for some time") + time.sleep(5) test_result = [] for state in ("ERROR", "FAILED", "PASSED", "SKIPPED"): @@ -828,13 +853,10 @@ class ClickhouseIntegrationTestsRunner: else: text_state = state test_result += [ - ( - c + " (✕" + str(final_retry) + ")", - text_state, - f"{tests_times[c]:.2f}", - ) + (c, text_state, f"{tests_times[c]:.2f}", tests_log_paths[c]) for c in counters[state] ] + status_text = description_prefix + ", ".join( [ str(n).lower().replace("failed", "fail") + ": " + str(len(c)) From c48b6d25f763d9c38b4e7ed35dec9a98d913e5c7 Mon Sep 17 00:00:00 2001 From: Yatsishin Ilya <2159081+qoega@users.noreply.github.com> Date: Thu, 15 Aug 2024 08:58:36 +0000 Subject: [PATCH 325/844] more tests --- .../test_filesystem_layout/test.py | 4 ++++ .../integration/test_grant_and_revoke/test.py | 2 ++ .../test_parallel_replicas_failover/test.py | 19 ++++++++++--------- .../test.py | 4 ++-- 4 files changed, 18 insertions(+), 11 deletions(-) diff --git a/tests/integration/test_filesystem_layout/test.py b/tests/integration/test_filesystem_layout/test.py index 4e719aa0fe9..31d6c830a2f 100644 --- a/tests/integration/test_filesystem_layout/test.py +++ b/tests/integration/test_filesystem_layout/test.py @@ -79,3 +79,7 @@ def test_file_path_escaping(started_cluster): "test -f /var/lib/clickhouse/shadow/2/store/123/12345678-1000-4000-8000-000000000001/1_1_1_0/%7EId.bin", ] ) + node.query("DROP TABLE test.`T.a_b,l-e!` SYNC") + node.query("DROP TABLE `test 2`.`T.a_b,l-e!` SYNC") + node.query("DROP DATABASE test") + node.query("DROP DATABASE `test 2`") diff --git a/tests/integration/test_grant_and_revoke/test.py b/tests/integration/test_grant_and_revoke/test.py index e533cced1e4..81cba966cae 100644 --- a/tests/integration/test_grant_and_revoke/test.py +++ b/tests/integration/test_grant_and_revoke/test.py @@ -359,6 +359,8 @@ def test_implicit_create_view_grant(): instance.query("GRANT CREATE VIEW ON test.* TO B", user="A") instance.query("CREATE VIEW test.view_2 AS SELECT 1", user="B") assert instance.query("SELECT * FROM test.view_2") == "1\n" + instance.query("DROP USER A") + instance.query("DROP VIEW test.view_2") def test_implicit_create_temporary_table_grant(): diff --git a/tests/integration/test_parallel_replicas_failover/test.py b/tests/integration/test_parallel_replicas_failover/test.py index bf25136bff7..2da26ee03c9 100644 --- a/tests/integration/test_parallel_replicas_failover/test.py +++ b/tests/integration/test_parallel_replicas_failover/test.py @@ -1,5 +1,5 @@ import pytest - +import uuid from helpers.cluster import ClickHouseCluster cluster = ClickHouseCluster(__file__) @@ -25,19 +25,15 @@ def start_cluster(): def create_tables(cluster, table_name, skip_last_replica): - node1.query(f"DROP TABLE IF EXISTS {table_name} SYNC") - node2.query(f"DROP TABLE IF EXISTS {table_name} SYNC") - node3.query(f"DROP TABLE IF EXISTS {table_name} SYNC") - node1.query( - f"CREATE TABLE IF NOT EXISTS {table_name} (key Int64, value String) Engine=ReplicatedMergeTree('/test_parallel_replicas/shard1/{table_name}', 'r1') ORDER BY (key)" + f"CREATE TABLE {table_name} (key Int64, value String) Engine=ReplicatedMergeTree('/test_parallel_replicas/shard1/{table_name}', 'r1') ORDER BY (key)" ) node2.query( - f"CREATE TABLE IF NOT EXISTS {table_name} (key Int64, value String) Engine=ReplicatedMergeTree('/test_parallel_replicas/shard1/{table_name}', 'r2') ORDER BY (key)" + f"CREATE TABLE {table_name} (key Int64, value String) Engine=ReplicatedMergeTree('/test_parallel_replicas/shard1/{table_name}', 'r2') ORDER BY (key)" ) if not skip_last_replica: node3.query( - f"CREATE TABLE IF NOT EXISTS {table_name} (key Int64, value String) Engine=ReplicatedMergeTree('/test_parallel_replicas/shard1/{table_name}', 'r3') ORDER BY (key)" + f"CREATE TABLE {table_name} (key Int64, value String) Engine=ReplicatedMergeTree('/test_parallel_replicas/shard1/{table_name}', 'r3') ORDER BY (key)" ) # populate data @@ -67,7 +63,7 @@ def test_skip_replicas_without_table(start_cluster): for i in range(4): expected_result += f"{i}\t1000\n" - log_comment = "5230b069-9574-407d-9b80-891b5a175f41" + log_comment = uuid.uuid4() assert ( node1.query( f"SELECT key, count() FROM {table_name} GROUP BY key ORDER BY key", @@ -88,6 +84,8 @@ def test_skip_replicas_without_table(start_cluster): ) == "1\t1\n" ) + node1.query(f"DROP TABLE {table_name} SYNC") + node2.query(f"DROP TABLE {table_name} SYNC") def test_skip_unresponsive_replicas(start_cluster): @@ -112,3 +110,6 @@ def test_skip_unresponsive_replicas(start_cluster): ) == expected_result ) + node1.query(f"DROP TABLE {table_name} SYNC") + node2.query(f"DROP TABLE {table_name} SYNC") + node3.query(f"DROP TABLE {table_name} SYNC") diff --git a/tests/integration/test_parallel_replicas_invisible_parts/test.py b/tests/integration/test_parallel_replicas_invisible_parts/test.py index cab3fb46fe9..7093e3b3292 100644 --- a/tests/integration/test_parallel_replicas_invisible_parts/test.py +++ b/tests/integration/test_parallel_replicas_invisible_parts/test.py @@ -35,11 +35,10 @@ def start_cluster(): def _create_tables(table_name, table_size, index_granularity): - nodes[0].query(f"DROP TABLE IF EXISTS {table_name} ON CLUSTER {cluster_name}") nodes[0].query( f""" - CREATE TABLE IF NOT EXISTS {table_name} ON CLUSTER '{cluster_name}' (key Int64, value String) + CREATE TABLE {table_name} ON CLUSTER '{cluster_name}' (key Int64, value String) Engine=ReplicatedMergeTree('/test_parallel_replicas/shard/{table_name}/', '{{replica}}') ORDER BY (key) SETTINGS index_granularity = {index_granularity}, max_bytes_to_merge_at_max_space_in_pool = 0, max_bytes_to_merge_at_max_space_in_pool = 1 @@ -128,3 +127,4 @@ def test_reading_with_invisible_parts( ) == f"{expected}\n" ) + nodes[0].query(f"DROP TABLE {table_name} ON CLUSTER {cluster_name} SYNC") From a3978b24d0ee8847e5b332a08014ced896edae2c Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Thu, 15 Aug 2024 11:11:49 +0000 Subject: [PATCH 326/844] Fix for subqueries with FINAL --- src/Planner/findParallelReplicasQuery.cpp | 6 ++++++ ... => 03222_parallel_replicas_final_in_subquery.reference} | 0 ...ed.sql => 03222_parallel_replicas_final_in_subquery.sql} | 0 3 files changed, 6 insertions(+) rename tests/queries/0_stateless/{03222_pr_final_not_supported.reference => 03222_parallel_replicas_final_in_subquery.reference} (100%) rename tests/queries/0_stateless/{03222_pr_final_not_supported.sql => 03222_parallel_replicas_final_in_subquery.sql} (100%) diff --git a/src/Planner/findParallelReplicasQuery.cpp b/src/Planner/findParallelReplicasQuery.cpp index 39edb1e6516..cd02e01c392 100644 --- a/src/Planner/findParallelReplicasQuery.cpp +++ b/src/Planner/findParallelReplicasQuery.cpp @@ -52,7 +52,13 @@ std::stack getSupportingParallelReplicasQuery(const IQueryTre const auto & storage = table_node.getStorage(); /// Here we check StorageDummy as well, to support a query tree with replaced storages. if (std::dynamic_pointer_cast(storage) || typeid_cast(storage.get())) + { + /// parallel replicas is not supported with FINAL + if (table_node.getTableExpressionModifiers()->hasFinal()) + return {}; + return res; + } return {}; } diff --git a/tests/queries/0_stateless/03222_pr_final_not_supported.reference b/tests/queries/0_stateless/03222_parallel_replicas_final_in_subquery.reference similarity index 100% rename from tests/queries/0_stateless/03222_pr_final_not_supported.reference rename to tests/queries/0_stateless/03222_parallel_replicas_final_in_subquery.reference diff --git a/tests/queries/0_stateless/03222_pr_final_not_supported.sql b/tests/queries/0_stateless/03222_parallel_replicas_final_in_subquery.sql similarity index 100% rename from tests/queries/0_stateless/03222_pr_final_not_supported.sql rename to tests/queries/0_stateless/03222_parallel_replicas_final_in_subquery.sql From 17c1cef52b9ea08e85fab108d590c5e1cf3e2e02 Mon Sep 17 00:00:00 2001 From: Arthur Passos Date: Thu, 15 Aug 2024 10:30:33 -0300 Subject: [PATCH 327/844] add server setting --- src/Access/AccessEntityIO.cpp | 2 +- src/Core/ServerSettings.h | 1 + src/Core/Settings.h | 1 - .../Access/InterpreterCreateUserQuery.cpp | 28 +++++++++++++++---- .../Access/InterpreterCreateUserQuery.h | 7 ++++- 5 files changed, 31 insertions(+), 8 deletions(-) diff --git a/src/Access/AccessEntityIO.cpp b/src/Access/AccessEntityIO.cpp index 1b073329296..53b073cb750 100644 --- a/src/Access/AccessEntityIO.cpp +++ b/src/Access/AccessEntityIO.cpp @@ -82,7 +82,7 @@ AccessEntityPtr deserializeAccessEntityImpl(const String & definition) if (res) throw Exception(ErrorCodes::INCORRECT_ACCESS_ENTITY_DEFINITION, "Two access entities attached in the same file"); res = user = std::make_unique(); - InterpreterCreateUserQuery::updateUserFromQuery(*user, *create_user_query, /* allow_no_password = */ true, /* allow_plaintext_password = */ true); + InterpreterCreateUserQuery::updateUserFromQuery(*user, *create_user_query, /* allow_no_password = */ true, /* allow_plaintext_password = */ true, /* max_number_of_authentication_methods = */ std::numeric_limits::max()); } else if (auto * create_role_query = query->as()) { diff --git a/src/Core/ServerSettings.h b/src/Core/ServerSettings.h index d13e6251ca9..887beba4b13 100644 --- a/src/Core/ServerSettings.h +++ b/src/Core/ServerSettings.h @@ -116,6 +116,7 @@ namespace DB M(UInt64, max_part_num_to_warn, 100000lu, "If the number of parts is greater than this value, the server will create a warning that will displayed to user.", 0) \ M(UInt64, max_table_num_to_throw, 0lu, "If number of tables is greater than this value, server will throw an exception. 0 means no limitation. View, remote tables, dictionary, system tables are not counted. Only count table in Atomic/Ordinary/Replicated/Lazy database engine.", 0) \ M(UInt64, max_database_num_to_throw, 0lu, "If number of databases is greater than this value, server will throw an exception. 0 means no limitation.", 0) \ + M(UInt64, max_authentication_methods_per_user, 256, "The maximum number of authentication methods a user can be created with or altered. Changing this setting does not affect existing users.", 0) \ M(UInt64, concurrent_threads_soft_limit_num, 0, "Sets how many concurrent thread can be allocated before applying CPU pressure. Zero means unlimited.", 0) \ M(UInt64, concurrent_threads_soft_limit_ratio_to_cores, 0, "Same as concurrent_threads_soft_limit_num, but with ratio to cores.", 0) \ \ diff --git a/src/Core/Settings.h b/src/Core/Settings.h index acdc8316a4d..d27c9fce7bb 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -966,7 +966,6 @@ class IColumn; \ M(Bool, allow_experimental_database_materialized_mysql, false, "Allow to create database with Engine=MaterializedMySQL(...).", 0) \ M(Bool, allow_experimental_database_materialized_postgresql, false, "Allow to create database with Engine=MaterializedPostgreSQL(...).", 0) \ - \ /** Experimental feature for moving data between shards. */ \ M(Bool, allow_experimental_query_deduplication, false, "Experimental data deduplication for SELECT queries based on part UUIDs", 0) \ diff --git a/src/Interpreters/Access/InterpreterCreateUserQuery.cpp b/src/Interpreters/Access/InterpreterCreateUserQuery.cpp index b6e68763a3a..5178ad184d6 100644 --- a/src/Interpreters/Access/InterpreterCreateUserQuery.cpp +++ b/src/Interpreters/Access/InterpreterCreateUserQuery.cpp @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include @@ -43,7 +44,8 @@ namespace bool replace_authentication_methods, bool allow_implicit_no_password, bool allow_no_password, - bool allow_plaintext_password) + bool allow_plaintext_password, + std::size_t max_number_of_authentication_methods) { if (override_name) user.setName(override_name->toString()); @@ -80,6 +82,14 @@ namespace user.authentication_methods.clear(); } + auto number_of_authentication_methods = user.authentication_methods.size() + authentication_methods.size(); + if (number_of_authentication_methods > max_number_of_authentication_methods) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "User can not be created/updated because it exceeds the allowed quantity of authentication methods per user." + "Check the `max_authentication_methods_per_user` setting"); + } + for (const auto & authentication_method : authentication_methods) { user.authentication_methods.emplace_back(authentication_method); @@ -251,7 +261,8 @@ BlockIO InterpreterCreateUserQuery::execute() updateUserFromQueryImpl( *updated_user, query, authentication_methods, {}, default_roles_from_query, settings_from_query, grantees_from_query, valid_until, query.reset_authentication_methods_to_new, query.replace_authentication_methods, - implicit_no_password_allowed, no_password_allowed, plaintext_password_allowed); + implicit_no_password_allowed, no_password_allowed, + plaintext_password_allowed, getContext()->getServerSettings().max_authentication_methods_per_user); return updated_user; }; @@ -272,7 +283,8 @@ BlockIO InterpreterCreateUserQuery::execute() updateUserFromQueryImpl( *new_user, query, authentication_methods, name, default_roles_from_query, settings_from_query, RolesOrUsersSet::AllTag{}, valid_until, query.reset_authentication_methods_to_new, query.replace_authentication_methods, - implicit_no_password_allowed, no_password_allowed, plaintext_password_allowed); + implicit_no_password_allowed, no_password_allowed, + plaintext_password_allowed, getContext()->getServerSettings().max_authentication_methods_per_user); new_users.emplace_back(std::move(new_user)); } @@ -309,7 +321,12 @@ BlockIO InterpreterCreateUserQuery::execute() } -void InterpreterCreateUserQuery::updateUserFromQuery(User & user, const ASTCreateUserQuery & query, bool allow_no_password, bool allow_plaintext_password) +void InterpreterCreateUserQuery::updateUserFromQuery( + User & user, + const ASTCreateUserQuery & query, + bool allow_no_password, + bool allow_plaintext_password, + std::size_t max_number_of_authentication_methods) { std::vector authentication_methods; if (!query.authentication_methods.empty()) @@ -337,7 +354,8 @@ void InterpreterCreateUserQuery::updateUserFromQuery(User & user, const ASTCreat query.replace_authentication_methods, allow_no_password, allow_plaintext_password, - true); + true, + max_number_of_authentication_methods); } void registerInterpreterCreateUserQuery(InterpreterFactory & factory) diff --git a/src/Interpreters/Access/InterpreterCreateUserQuery.h b/src/Interpreters/Access/InterpreterCreateUserQuery.h index 372066cfd5e..fea87d33703 100644 --- a/src/Interpreters/Access/InterpreterCreateUserQuery.h +++ b/src/Interpreters/Access/InterpreterCreateUserQuery.h @@ -17,7 +17,12 @@ public: BlockIO execute() override; - static void updateUserFromQuery(User & user, const ASTCreateUserQuery & query, bool allow_no_password, bool allow_plaintext_password); + static void updateUserFromQuery( + User & user, + const ASTCreateUserQuery & query, + bool allow_no_password, + bool allow_plaintext_password, + std::size_t max_number_of_authentication_methods); private: ASTPtr query_ptr; From 72f1695014919c62c048afc6d1016655af3cdf09 Mon Sep 17 00:00:00 2001 From: Arthur Passos Date: Thu, 15 Aug 2024 12:03:30 -0300 Subject: [PATCH 328/844] add integ tests for new setting --- .../__init__.py | 0 .../configs/max_auth_limited.xml | 3 ++ .../test.py | 49 +++++++++++++++++++ 3 files changed, 52 insertions(+) create mode 100644 tests/integration/test_max_authentication_methods_per_user/__init__.py create mode 100644 tests/integration/test_max_authentication_methods_per_user/configs/max_auth_limited.xml create mode 100644 tests/integration/test_max_authentication_methods_per_user/test.py diff --git a/tests/integration/test_max_authentication_methods_per_user/__init__.py b/tests/integration/test_max_authentication_methods_per_user/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_max_authentication_methods_per_user/configs/max_auth_limited.xml b/tests/integration/test_max_authentication_methods_per_user/configs/max_auth_limited.xml new file mode 100644 index 00000000000..c6d4b0077be --- /dev/null +++ b/tests/integration/test_max_authentication_methods_per_user/configs/max_auth_limited.xml @@ -0,0 +1,3 @@ + + 2 + \ No newline at end of file diff --git a/tests/integration/test_max_authentication_methods_per_user/test.py b/tests/integration/test_max_authentication_methods_per_user/test.py new file mode 100644 index 00000000000..81d36dfd21d --- /dev/null +++ b/tests/integration/test_max_authentication_methods_per_user/test.py @@ -0,0 +1,49 @@ +import pytest +from helpers.cluster import ClickHouseCluster +from helpers.client import QueryRuntimeException + +cluster = ClickHouseCluster(__file__) + +limited_node = cluster.add_instance( + "limited_node", + main_configs=["configs/max_auth_limited.xml"], +) + +default_node = cluster.add_instance( + "default_node", +) + + +@pytest.fixture(scope="module") +def started_cluster(): + try: + cluster.start() + yield cluster + + finally: + cluster.shutdown() + + +def test_create(started_cluster): + expected_error = "User can not be created/updated because it exceeds the allowed quantity of authentication methods per user" + assert expected_error in limited_node.query_and_get_error("CREATE USER u_max_authentication_methods IDENTIFIED BY '1', BY '2', BY '3'") + + + assert expected_error not in limited_node.query_and_get_answer_with_error("CREATE USER u_max_authentication_methods IDENTIFIED BY '1', BY '2'") + + limited_node.query("DROP USER u_max_authentication_methods") + +def test_alter(started_cluster): + limited_node.query("CREATE USER u_max_authentication_methods IDENTIFIED BY '1'") + + expected_error = "User can not be created/updated because it exceeds the allowed quantity of authentication methods per user" + assert expected_error in limited_node.query_and_get_error("ALTER USER u_max_authentication_methods ADD IDENTIFIED BY '2', BY '3'") + + expected_error = "User can not be created/updated because it exceeds the allowed quantity of authentication methods per user" + assert expected_error in limited_node.query_and_get_error("ALTER USER u_max_authentication_methods IDENTIFIED BY '3', BY '4', BY '5'") + + assert expected_error not in limited_node.query_and_get_answer_with_error("ALTER USER u_max_authentication_methods ADD IDENTIFIED BY '2'") + + assert expected_error not in limited_node.query_and_get_answer_with_error("ALTER USER u_max_authentication_methods IDENTIFIED BY '2', BY '3'") + + limited_node.query("DROP USER u_max_authentication_methods") From 714a4d871c6abbaab19b48076e6e395cfd5fcf8f Mon Sep 17 00:00:00 2001 From: Arthur Passos Date: Thu, 15 Aug 2024 12:18:09 -0300 Subject: [PATCH 329/844] add integ tests for defa7ult value --- .../test.py | 47 +++++++++++++++++-- 1 file changed, 43 insertions(+), 4 deletions(-) diff --git a/tests/integration/test_max_authentication_methods_per_user/test.py b/tests/integration/test_max_authentication_methods_per_user/test.py index 81d36dfd21d..2a1d54d9b5a 100644 --- a/tests/integration/test_max_authentication_methods_per_user/test.py +++ b/tests/integration/test_max_authentication_methods_per_user/test.py @@ -23,11 +23,11 @@ def started_cluster(): finally: cluster.shutdown() +expected_error = "User can not be created/updated because it exceeds the allowed quantity of authentication methods per user" def test_create(started_cluster): - expected_error = "User can not be created/updated because it exceeds the allowed quantity of authentication methods per user" - assert expected_error in limited_node.query_and_get_error("CREATE USER u_max_authentication_methods IDENTIFIED BY '1', BY '2', BY '3'") + assert expected_error in limited_node.query_and_get_error("CREATE USER u_max_authentication_methods IDENTIFIED BY '1', BY '2', BY '3'") assert expected_error not in limited_node.query_and_get_answer_with_error("CREATE USER u_max_authentication_methods IDENTIFIED BY '1', BY '2'") @@ -36,10 +36,8 @@ def test_create(started_cluster): def test_alter(started_cluster): limited_node.query("CREATE USER u_max_authentication_methods IDENTIFIED BY '1'") - expected_error = "User can not be created/updated because it exceeds the allowed quantity of authentication methods per user" assert expected_error in limited_node.query_and_get_error("ALTER USER u_max_authentication_methods ADD IDENTIFIED BY '2', BY '3'") - expected_error = "User can not be created/updated because it exceeds the allowed quantity of authentication methods per user" assert expected_error in limited_node.query_and_get_error("ALTER USER u_max_authentication_methods IDENTIFIED BY '3', BY '4', BY '5'") assert expected_error not in limited_node.query_and_get_answer_with_error("ALTER USER u_max_authentication_methods ADD IDENTIFIED BY '2'") @@ -47,3 +45,44 @@ def test_alter(started_cluster): assert expected_error not in limited_node.query_and_get_answer_with_error("ALTER USER u_max_authentication_methods IDENTIFIED BY '2', BY '3'") limited_node.query("DROP USER u_max_authentication_methods") + + +def get_query_with_multiple_identified_with(operation, username, identified_with_count, add_operation = ""): + identified_clauses = ", ".join([f"BY '1'" for _ in range(identified_with_count)]) + query = f"{operation} USER {username} {add_operation} IDENTIFIED {identified_clauses}" + return query + + +def test_create_default_setting(started_cluster): + expected_error = "User can not be created/updated because it exceeds the allowed quantity of authentication methods per user" + + query_exceeds = get_query_with_multiple_identified_with("CREATE", "u_max_authentication_methods", 257) + + assert expected_error in default_node.query_and_get_error(query_exceeds) + + query_not_exceeds = get_query_with_multiple_identified_with("CREATE", "u_max_authentication_methods", 256) + + assert expected_error not in default_node.query_and_get_answer_with_error(query_not_exceeds) + + default_node.query("DROP USER u_max_authentication_methods") + +def test_alter_default_setting(started_cluster): + default_node.query("CREATE USER u_max_authentication_methods IDENTIFIED BY '1'") + + query_add_exceeds = get_query_with_multiple_identified_with("ALTER", "u_max_authentication_methods", 256, "ADD") + + assert expected_error in default_node.query_and_get_error(query_add_exceeds) + + query_replace_exceeds = get_query_with_multiple_identified_with("ALTER", "u_max_authentication_methods", 257) + + assert expected_error in default_node.query_and_get_error(query_replace_exceeds) + + query_add_not_exceeds = get_query_with_multiple_identified_with("ALTER", "u_max_authentication_methods", 1, "ADD") + + assert expected_error not in default_node.query_and_get_answer_with_error(query_add_not_exceeds) + + query_replace_not_exceeds = get_query_with_multiple_identified_with("ALTER", "u_max_authentication_methods", 2) + + assert expected_error not in default_node.query_and_get_answer_with_error(query_replace_not_exceeds) + + default_node.query("DROP USER u_max_authentication_methods") From 12e6645058a78f3dd74bdbd6982ad9ee49586e7b Mon Sep 17 00:00:00 2001 From: Arthur Passos Date: Thu, 15 Aug 2024 12:19:36 -0300 Subject: [PATCH 330/844] fix black --- .../test.py | 24 ++++++++++++++----- 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/tests/integration/test_max_authentication_methods_per_user/test.py b/tests/integration/test_max_authentication_methods_per_user/test.py index 2a1d54d9b5a..4c20f9cf376 100644 --- a/tests/integration/test_max_authentication_methods_per_user/test.py +++ b/tests/integration/test_max_authentication_methods_per_user/test.py @@ -27,22 +27,34 @@ expected_error = "User can not be created/updated because it exceeds the allowed def test_create(started_cluster): - assert expected_error in limited_node.query_and_get_error("CREATE USER u_max_authentication_methods IDENTIFIED BY '1', BY '2', BY '3'") + assert expected_error in limited_node.query_and_get_error( + "CREATE USER u_max_authentication_methods IDENTIFIED BY '1', BY '2', BY '3'" + ) - assert expected_error not in limited_node.query_and_get_answer_with_error("CREATE USER u_max_authentication_methods IDENTIFIED BY '1', BY '2'") + assert expected_error not in limited_node.query_and_get_answer_with_error( + "CREATE USER u_max_authentication_methods IDENTIFIED BY '1', BY '2'" + ) limited_node.query("DROP USER u_max_authentication_methods") def test_alter(started_cluster): limited_node.query("CREATE USER u_max_authentication_methods IDENTIFIED BY '1'") - assert expected_error in limited_node.query_and_get_error("ALTER USER u_max_authentication_methods ADD IDENTIFIED BY '2', BY '3'") + assert expected_error in limited_node.query_and_get_error( + "ALTER USER u_max_authentication_methods ADD IDENTIFIED BY '2', BY '3'" + ) - assert expected_error in limited_node.query_and_get_error("ALTER USER u_max_authentication_methods IDENTIFIED BY '3', BY '4', BY '5'") + assert expected_error in limited_node.query_and_get_error( + "ALTER USER u_max_authentication_methods IDENTIFIED BY '3', BY '4', BY '5'" + ) - assert expected_error not in limited_node.query_and_get_answer_with_error("ALTER USER u_max_authentication_methods ADD IDENTIFIED BY '2'") + assert expected_error not in limited_node.query_and_get_answer_with_error( + "ALTER USER u_max_authentication_methods ADD IDENTIFIED BY '2'" + ) - assert expected_error not in limited_node.query_and_get_answer_with_error("ALTER USER u_max_authentication_methods IDENTIFIED BY '2', BY '3'") + assert expected_error not in limited_node.query_and_get_answer_with_error( + "ALTER USER u_max_authentication_methods IDENTIFIED BY '2', BY '3'" + ) limited_node.query("DROP USER u_max_authentication_methods") From 70e7c4e63d58c67d77ebb7e979cc41e35a8025b1 Mon Sep 17 00:00:00 2001 From: Arthur Passos Date: Thu, 15 Aug 2024 12:21:06 -0300 Subject: [PATCH 331/844] fix black with script --- .../test.py | 48 ++++++++++++++----- 1 file changed, 37 insertions(+), 11 deletions(-) diff --git a/tests/integration/test_max_authentication_methods_per_user/test.py b/tests/integration/test_max_authentication_methods_per_user/test.py index 4c20f9cf376..fb02ef73bf6 100644 --- a/tests/integration/test_max_authentication_methods_per_user/test.py +++ b/tests/integration/test_max_authentication_methods_per_user/test.py @@ -23,8 +23,10 @@ def started_cluster(): finally: cluster.shutdown() + expected_error = "User can not be created/updated because it exceeds the allowed quantity of authentication methods per user" + def test_create(started_cluster): assert expected_error in limited_node.query_and_get_error( @@ -37,6 +39,7 @@ def test_create(started_cluster): limited_node.query("DROP USER u_max_authentication_methods") + def test_alter(started_cluster): limited_node.query("CREATE USER u_max_authentication_methods IDENTIFIED BY '1'") @@ -59,42 +62,65 @@ def test_alter(started_cluster): limited_node.query("DROP USER u_max_authentication_methods") -def get_query_with_multiple_identified_with(operation, username, identified_with_count, add_operation = ""): +def get_query_with_multiple_identified_with( + operation, username, identified_with_count, add_operation="" +): identified_clauses = ", ".join([f"BY '1'" for _ in range(identified_with_count)]) - query = f"{operation} USER {username} {add_operation} IDENTIFIED {identified_clauses}" + query = ( + f"{operation} USER {username} {add_operation} IDENTIFIED {identified_clauses}" + ) return query def test_create_default_setting(started_cluster): expected_error = "User can not be created/updated because it exceeds the allowed quantity of authentication methods per user" - query_exceeds = get_query_with_multiple_identified_with("CREATE", "u_max_authentication_methods", 257) + query_exceeds = get_query_with_multiple_identified_with( + "CREATE", "u_max_authentication_methods", 257 + ) assert expected_error in default_node.query_and_get_error(query_exceeds) - query_not_exceeds = get_query_with_multiple_identified_with("CREATE", "u_max_authentication_methods", 256) + query_not_exceeds = get_query_with_multiple_identified_with( + "CREATE", "u_max_authentication_methods", 256 + ) - assert expected_error not in default_node.query_and_get_answer_with_error(query_not_exceeds) + assert expected_error not in default_node.query_and_get_answer_with_error( + query_not_exceeds + ) default_node.query("DROP USER u_max_authentication_methods") + def test_alter_default_setting(started_cluster): default_node.query("CREATE USER u_max_authentication_methods IDENTIFIED BY '1'") - query_add_exceeds = get_query_with_multiple_identified_with("ALTER", "u_max_authentication_methods", 256, "ADD") + query_add_exceeds = get_query_with_multiple_identified_with( + "ALTER", "u_max_authentication_methods", 256, "ADD" + ) assert expected_error in default_node.query_and_get_error(query_add_exceeds) - query_replace_exceeds = get_query_with_multiple_identified_with("ALTER", "u_max_authentication_methods", 257) + query_replace_exceeds = get_query_with_multiple_identified_with( + "ALTER", "u_max_authentication_methods", 257 + ) assert expected_error in default_node.query_and_get_error(query_replace_exceeds) - query_add_not_exceeds = get_query_with_multiple_identified_with("ALTER", "u_max_authentication_methods", 1, "ADD") + query_add_not_exceeds = get_query_with_multiple_identified_with( + "ALTER", "u_max_authentication_methods", 1, "ADD" + ) - assert expected_error not in default_node.query_and_get_answer_with_error(query_add_not_exceeds) + assert expected_error not in default_node.query_and_get_answer_with_error( + query_add_not_exceeds + ) - query_replace_not_exceeds = get_query_with_multiple_identified_with("ALTER", "u_max_authentication_methods", 2) + query_replace_not_exceeds = get_query_with_multiple_identified_with( + "ALTER", "u_max_authentication_methods", 2 + ) - assert expected_error not in default_node.query_and_get_answer_with_error(query_replace_not_exceeds) + assert expected_error not in default_node.query_and_get_answer_with_error( + query_replace_not_exceeds + ) default_node.query("DROP USER u_max_authentication_methods") From cb0335446eb6b1ca2e452c9246893fe2053a7e1e Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Thu, 15 Aug 2024 16:55:47 +0100 Subject: [PATCH 332/844] impl --- src/Common/ProfileEvents.cpp | 2 + src/Core/ProtocolDefines.h | 3 +- src/Core/Settings.h | 2 +- .../IO/CachedOnDiskReadBufferFromFile.cpp | 2 + .../QueryPlan/ReadFromMergeTree.cpp | 9 +- src/Processors/QueryPlan/ReadFromRemote.cpp | 3 +- .../MergeTreeReadPoolParallelReplicas.cpp | 87 ++++++++++++++++++- .../MergeTreeReadPoolParallelReplicas.h | 3 +- ...rgeTreeReadPoolParallelReplicasInOrder.cpp | 17 ++-- .../MergeTree/MergeTreeSelectProcessor.h | 1 + .../ParallelReplicasReadingCoordinator.cpp | 16 ++-- .../ParallelReplicasReadingCoordinator.h | 3 +- src/Storages/MergeTree/RequestResponse.cpp | 10 ++- src/Storages/MergeTree/RequestResponse.h | 9 +- 14 files changed, 130 insertions(+), 37 deletions(-) diff --git a/src/Common/ProfileEvents.cpp b/src/Common/ProfileEvents.cpp index ccdce7ff584..e5bad44ae93 100644 --- a/src/Common/ProfileEvents.cpp +++ b/src/Common/ProfileEvents.cpp @@ -329,6 +329,7 @@ The server successfully detected this situation and will download merged part fr M(ParallelReplicasReadAssignedMarks, "Sum across all replicas of how many of scheduled marks were assigned by consistent hash") \ M(ParallelReplicasReadUnassignedMarks, "Sum across all replicas of how many unassigned marks were scheduled") \ M(ParallelReplicasReadAssignedForStealingMarks, "Sum across all replicas of how many of scheduled marks were assigned for stealing by consistent hash") \ + M(ParallelReplicasReadMarks, "How many marks were read by the given replica") \ \ M(ParallelReplicasStealingByHashMicroseconds, "Time spent collecting segments meant for stealing by hash") \ M(ParallelReplicasProcessingPartsMicroseconds, "Time spent processing data parts") \ @@ -482,6 +483,7 @@ The server successfully detected this situation and will download merged part fr M(CachedReadBufferReadFromCacheMicroseconds, "Time reading from filesystem cache") \ M(CachedReadBufferReadFromSourceBytes, "Bytes read from filesystem cache source (from remote fs, etc)") \ M(CachedReadBufferReadFromCacheBytes, "Bytes read from filesystem cache") \ + M(CachedReadBufferPredownloadedBytes, "Bytes read from filesystem cache source. Cache segments are read from left to right as a whole, it might be that we need to predownload some part of the segment irrelevant for the current task just to get to the needed data") \ M(CachedReadBufferCacheWriteBytes, "Bytes written from source (remote fs, etc) to filesystem cache") \ M(CachedReadBufferCacheWriteMicroseconds, "Time spent writing data into filesystem cache") \ M(CachedReadBufferCreateBufferMicroseconds, "Prepare buffer time") \ diff --git a/src/Core/ProtocolDefines.h b/src/Core/ProtocolDefines.h index 02d54221ed3..dd37daadaff 100644 --- a/src/Core/ProtocolDefines.h +++ b/src/Core/ProtocolDefines.h @@ -33,7 +33,8 @@ static constexpr auto DBMS_MIN_REVISION_WITH_AGGREGATE_FUNCTIONS_VERSIONING = 54 static constexpr auto DBMS_CLUSTER_PROCESSING_PROTOCOL_VERSION = 1; -static constexpr auto DBMS_PARALLEL_REPLICAS_PROTOCOL_VERSION = 3; +static constexpr auto DBMS_MIN_SUPPORTED_PARALLEL_REPLICAS_PROTOCOL_VERSION = 3; +static constexpr auto DBMS_PARALLEL_REPLICAS_PROTOCOL_VERSION = 4; static constexpr auto DBMS_MIN_REVISION_WITH_PARALLEL_REPLICAS = 54453; static constexpr auto DBMS_MERGE_TREE_PART_INFO_VERSION = 1; diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 0808e8eb49f..6a980a850c8 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -938,7 +938,7 @@ class IColumn; M(Bool, parallel_replicas_for_non_replicated_merge_tree, false, "If true, ClickHouse will use parallel replicas algorithm also for non-replicated MergeTree tables", 0) \ M(UInt64, parallel_replicas_min_number_of_rows_per_replica, 0, "Limit the number of replicas used in a query to (estimated rows to read / min_number_of_rows_per_replica). The max is still limited by 'max_parallel_replicas'", 0) \ M(Bool, parallel_replicas_prefer_local_join, true, "If true, and JOIN can be executed with parallel replicas algorithm, and all storages of right JOIN part are *MergeTree, local JOIN will be used instead of GLOBAL JOIN.", 0) \ - M(UInt64, parallel_replicas_mark_segment_size, 128, "Parts virtually divided into segments to be distributed between replicas for parallel reading. This setting controls the size of these segments. Not recommended to change until you're absolutely sure in what you're doing", 0) \ + M(UInt64, parallel_replicas_mark_segment_size, 0, "Parts virtually divided into segments to be distributed between replicas for parallel reading. This setting controls the size of these segments. Not recommended to change until you're absolutely sure in what you're doing. Value should be in range [128; 16384]", 0) \ M(Bool, allow_archive_path_syntax, true, "File/S3 engines/table function will parse paths with '::' as ' :: ' if archive has correct extension", 0) \ \ M(Bool, allow_experimental_inverted_index, false, "If it is set to true, allow to use experimental inverted index.", 0) \ diff --git a/src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp b/src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp index b471f3fc58f..6363c40a9ad 100644 --- a/src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp +++ b/src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp @@ -28,6 +28,7 @@ extern const Event CachedReadBufferReadFromCacheMicroseconds; extern const Event CachedReadBufferCacheWriteMicroseconds; extern const Event CachedReadBufferReadFromSourceBytes; extern const Event CachedReadBufferReadFromCacheBytes; +extern const Event CachedReadBufferPredownloadedBytes; extern const Event CachedReadBufferCacheWriteBytes; extern const Event CachedReadBufferCreateBufferMicroseconds; @@ -644,6 +645,7 @@ void CachedOnDiskReadBufferFromFile::predownload(FileSegment & file_segment) size_t current_predownload_size = std::min(current_impl_buffer_size, bytes_to_predownload); ProfileEvents::increment(ProfileEvents::CachedReadBufferReadFromSourceBytes, current_impl_buffer_size); + ProfileEvents::increment(ProfileEvents::CachedReadBufferPredownloadedBytes, current_impl_buffer_size); bool continue_predownload = file_segment.reserve( current_predownload_size, settings.filesystem_cache_reserve_space_wait_lock_timeout_milliseconds); diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp index 348019d7d10..768d1cbc639 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp +++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp @@ -52,6 +52,7 @@ #include #include +#include "Interpreters/Cluster.h" #include "config.h" using namespace DB; @@ -343,11 +344,11 @@ Pipe ReadFromMergeTree::readFromPoolParallelReplicas( { const auto & client_info = context->getClientInfo(); - auto extension = ParallelReadingExtension - { + auto extension = ParallelReadingExtension{ .all_callback = all_ranges_callback.value(), .callback = read_task_callback.value(), .number_of_current_replica = client_info.number_of_current_replica, + .total_nodes_count = context->getClusterForParallelReplicas()->getShardsInfo().begin()->getAllNodeCount(), }; /// We have a special logic for local replica. It has to read less data, because in some cases it should @@ -514,11 +515,11 @@ Pipe ReadFromMergeTree::readInOrder( if (is_parallel_reading_from_replicas) { const auto & client_info = context->getClientInfo(); - ParallelReadingExtension extension - { + ParallelReadingExtension extension{ .all_callback = all_ranges_callback.value(), .callback = read_task_callback.value(), .number_of_current_replica = client_info.number_of_current_replica, + .total_nodes_count = context->getClusterForParallelReplicas()->getShardsInfo().begin()->getAllNodeCount(), }; const auto multiplier = context->getSettingsRef().parallel_replicas_single_task_marks_count_multiplier; diff --git a/src/Processors/QueryPlan/ReadFromRemote.cpp b/src/Processors/QueryPlan/ReadFromRemote.cpp index cf11052cd59..9dd90d26487 100644 --- a/src/Processors/QueryPlan/ReadFromRemote.cpp +++ b/src/Processors/QueryPlan/ReadFromRemote.cpp @@ -436,8 +436,7 @@ void ReadFromParallelRemoteReplicasStep::initializePipeline(QueryPipelineBuilder shuffled_pool = shard.pool->getShuffledPools(current_settings, priority_func); } - coordinator - = std::make_shared(max_replicas_to_use, current_settings.parallel_replicas_mark_segment_size); + coordinator = std::make_shared(max_replicas_to_use); for (size_t i=0; i < max_replicas_to_use; ++i) { diff --git a/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicas.cpp b/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicas.cpp index 33eaf5a49bd..2736aab2603 100644 --- a/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicas.cpp +++ b/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicas.cpp @@ -1,6 +1,79 @@ -#include #include +#include +#include + +#include +#include +#include + + +namespace +{ + +size_t chooseSegmentSize( + LoggerPtr log, size_t mark_segment_size, size_t min_marks_per_task, size_t threads, size_t sum_marks, size_t number_of_replicas) +{ + /// Mark segment size determines the granularity of work distribution between replicas. + /// Namely, coordinator will take mark segments of size `mark_segment_size` granules, calculate hash of this segment and assign it to corresponding replica. + /// Small segments are good when we read a small random subset of a table, big - when we do full-scan over a large table. + /// With small segments there is a problem: consider a query like `select max(time) from wikistat`. Average size of `time` per granule is ~5KB. So when we + /// read 128 granules we still read only ~0.5MB of data. With default fs cache segment size of 4MB it means a lot of data will be downloaded and written + /// in cache for no reason. General case will look like this: + /// + /// +---------- useful data + /// v + /// +------+--+------+ + /// |------|++| | + /// |------|++| | + /// +------+--+------+ + /// ^ + /// predownloaded data -----------+ + /// + /// Having large segments solves all the problems in this case. Also bigger segments mean less requests (especially for big tables and full-scans). + /// These three values below chosen mostly intuitively. 128 granules is 1M rows - just a good starting point, 16384 seems to still make sense when reading + /// billions of rows and 1024 - is a reasonable point in between. We limit our choice to only these three options because when we change segment size + /// we essentially change distribution of data between replicas and of course we don't want to use simultaneously tens of different distributions, because + /// it would be a huge waste of cache space. + constexpr std::array borders{128, 1024, 16384}; + + LOG_DEBUG( + log, + "mark_segment_size={}, min_marks_per_task*threads={}, sum_marks/number_of_replicas^2={}", + mark_segment_size, + min_marks_per_task * threads, + sum_marks / number_of_replicas / number_of_replicas); + + /// Here we take max of three numbers: + /// * user provided setting (0 by default) + /// * (min_marks_per_task * threads) = the number of marks we request from the coordinator each time - there is no point to have segments smaller than one unit of work for a replica + /// * (sum_marks / number_of_replicas^2) - we use consistent hashing for work distribution (including work stealing). If we have a really slow replica + /// everything up to (1/number_of_replicas) portion of its work will be stolen by other replicas. And it owns (1/number_of_replicas) share of total number of marks. + /// Aslo important to note here that sum_marks is calculated after PK analysis, it means in particular that different segment sizes might be used for the + /// same table for different queries (it is intentional). + mark_segment_size = std::max({mark_segment_size, min_marks_per_task * threads, sum_marks / number_of_replicas / number_of_replicas}); + + /// Squeeze the value to the borders. + mark_segment_size = std::clamp(mark_segment_size, borders.front(), borders.back()); + /// After we calculated a hopefully good value for segment_size let's just find the maximal border that is not bigger than the chosen value. + for (auto border : borders | std::views::reverse) + { + if (mark_segment_size >= border) + { + LOG_DEBUG(log, "Chosen segment size: {}", border); + return border; + } + } + + UNREACHABLE(); +} + +} + +namespace ProfileEvents +{ +extern const Event ParallelReplicasReadMarks; +} namespace DB { @@ -34,12 +107,19 @@ MergeTreeReadPoolParallelReplicas::MergeTreeReadPoolParallelReplicas( , extension(std::move(extension_)) , coordination_mode(CoordinationMode::Default) , min_marks_per_task(pool_settings.min_marks_for_concurrent_read) + , mark_segment_size(chooseSegmentSize( + log, + context_->getSettingsRef().parallel_replicas_mark_segment_size, + min_marks_per_task, + pool_settings.threads, + pool_settings.sum_marks, + extension.total_nodes_count)) { for (const auto & info : per_part_infos) min_marks_per_task = std::max(min_marks_per_task, info->min_marks_per_task); - extension.all_callback( - InitialAllRangesAnnouncement(coordination_mode, parts_ranges.getDescriptions(), extension.number_of_current_replica)); + extension.all_callback(InitialAllRangesAnnouncement( + coordination_mode, parts_ranges.getDescriptions(), extension.number_of_current_replica, mark_segment_size)); } MergeTreeReadTaskPtr MergeTreeReadPoolParallelReplicas::getTask(size_t /*task_idx*/, MergeTreeReadTask * previous_task) @@ -104,6 +184,7 @@ MergeTreeReadTaskPtr MergeTreeReadPoolParallelReplicas::getTask(size_t /*task_id if (current_task.ranges.empty()) buffered_ranges.pop_front(); + ProfileEvents::increment(ProfileEvents::ParallelReplicasReadMarks, current_sum_marks); return createTask(per_part_infos[part_idx], std::move(ranges_to_read), previous_task); } diff --git a/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicas.h b/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicas.h index 6ba63cc2c9a..1c79bd736bd 100644 --- a/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicas.h +++ b/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicas.h @@ -30,12 +30,13 @@ public: private: mutable std::mutex mutex; + LoggerPtr log = getLogger("MergeTreeReadPoolParallelReplicas"); const ParallelReadingExtension extension; const CoordinationMode coordination_mode; size_t min_marks_per_task{0}; + size_t mark_segment_size{0}; RangesInDataPartsDescription buffered_ranges; bool no_more_tasks_available{false}; - LoggerPtr log = getLogger("MergeTreeReadPoolParallelReplicas"); }; } diff --git a/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicasInOrder.cpp b/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicasInOrder.cpp index 6b5cf978423..e0cb88c209a 100644 --- a/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicasInOrder.cpp +++ b/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicasInOrder.cpp @@ -1,5 +1,10 @@ #include +namespace ProfileEvents +{ +extern const Event ParallelReplicasReadMarks; +} + namespace DB { @@ -43,11 +48,8 @@ MergeTreeReadPoolParallelReplicasInOrder::MergeTreeReadPoolParallelReplicasInOrd for (const auto & part : parts_ranges) buffered_tasks.push_back({part.data_part->info, MarkRanges{}}); - extension.all_callback(InitialAllRangesAnnouncement( - mode, - parts_ranges.getDescriptions(), - extension.number_of_current_replica - )); + extension.all_callback( + InitialAllRangesAnnouncement(mode, parts_ranges.getDescriptions(), extension.number_of_current_replica, /*mark_segment_size_=*/0)); } MergeTreeReadTaskPtr MergeTreeReadPoolParallelReplicasInOrder::getTask(size_t task_idx, MergeTreeReadTask * previous_task) @@ -68,13 +70,14 @@ MergeTreeReadTaskPtr MergeTreeReadPoolParallelReplicasInOrder::getTask(size_t ta { auto result = std::move(desc.ranges); desc.ranges = MarkRanges{}; + ProfileEvents::increment(ProfileEvents::ParallelReplicasReadMarks, desc.ranges.getNumberOfMarks()); return result; } } return std::nullopt; }; - if (auto result = get_from_buffer(); result) + if (auto result = get_from_buffer()) return createTask(per_part_infos[task_idx], std::move(*result), previous_task); if (no_more_tasks) @@ -97,7 +100,7 @@ MergeTreeReadTaskPtr MergeTreeReadPoolParallelReplicasInOrder::getTask(size_t ta std::move(new_ranges.begin(), new_ranges.end(), std::back_inserter(old_ranges)); } - if (auto result = get_from_buffer(); result) + if (auto result = get_from_buffer()) return createTask(per_part_infos[task_idx], std::move(*result), previous_task); return nullptr; diff --git a/src/Storages/MergeTree/MergeTreeSelectProcessor.h b/src/Storages/MergeTree/MergeTreeSelectProcessor.h index 7a9cebbcb2e..e20427dbff0 100644 --- a/src/Storages/MergeTree/MergeTreeSelectProcessor.h +++ b/src/Storages/MergeTree/MergeTreeSelectProcessor.h @@ -27,6 +27,7 @@ struct ParallelReadingExtension MergeTreeAllRangesCallback all_callback; MergeTreeReadTaskCallback callback; size_t number_of_current_replica{0}; + size_t total_nodes_count{0}; }; /// Base class for MergeTreeThreadSelectAlgorithm and MergeTreeSelectAlgorithm diff --git a/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp b/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp index f46b4de10b7..4a1b6d0bada 100644 --- a/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp +++ b/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp @@ -211,14 +211,11 @@ using PartRefs = std::deque; class DefaultCoordinator : public ParallelReplicasReadingCoordinator::ImplInterface { public: - explicit DefaultCoordinator(size_t replicas_count_, size_t mark_segment_size_) + explicit DefaultCoordinator(size_t replicas_count_) : ParallelReplicasReadingCoordinator::ImplInterface(replicas_count_) - , mark_segment_size(mark_segment_size_) , replica_status(replicas_count_) , distribution_by_hash_queue(replicas_count_) { - if (mark_segment_size == 0) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Zero value provided for `mark_segment_size`"); } ~DefaultCoordinator() override; @@ -231,7 +228,7 @@ public: private: /// This many granules will represent a single segment of marks that will be assigned to a replica - const size_t mark_segment_size{0}; + size_t mark_segment_size{0}; bool state_initialized{false}; size_t finished_replicas{0}; @@ -393,6 +390,10 @@ void DefaultCoordinator::initializeReadingState(InitialAllRangesAnnouncement ann state_initialized = true; source_replica_for_parts_snapshot = announcement.replica_num; + mark_segment_size = announcement.mark_segment_size; + if (mark_segment_size == 0) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Zero value provided for `mark_segment_size`"); + LOG_DEBUG(log, "Reading state is fully initialized: {}", fmt::join(all_parts_to_read, "; ")); } @@ -1043,7 +1044,7 @@ void ParallelReplicasReadingCoordinator::initialize(CoordinationMode mode) switch (mode) { case CoordinationMode::Default: - pimpl = std::make_unique(replicas_count, mark_segment_size); + pimpl = std::make_unique(replicas_count); break; case CoordinationMode::WithOrder: pimpl = std::make_unique>(replicas_count); @@ -1060,8 +1061,7 @@ void ParallelReplicasReadingCoordinator::initialize(CoordinationMode mode) pimpl->markReplicaAsUnavailable(replica); } -ParallelReplicasReadingCoordinator::ParallelReplicasReadingCoordinator(size_t replicas_count_, size_t mark_segment_size_) - : replicas_count(replicas_count_), mark_segment_size(mark_segment_size_) +ParallelReplicasReadingCoordinator::ParallelReplicasReadingCoordinator(size_t replicas_count_) : replicas_count(replicas_count_) { } diff --git a/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.h b/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.h index 8b463fda395..ad51d20f553 100644 --- a/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.h +++ b/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.h @@ -15,7 +15,7 @@ class ParallelReplicasReadingCoordinator public: class ImplInterface; - explicit ParallelReplicasReadingCoordinator(size_t replicas_count_, size_t mark_segment_size_ = 0); + explicit ParallelReplicasReadingCoordinator(size_t replicas_count_); ~ParallelReplicasReadingCoordinator(); void handleInitialAllRangesAnnouncement(InitialAllRangesAnnouncement); @@ -35,7 +35,6 @@ private: std::mutex mutex; const size_t replicas_count{0}; - size_t mark_segment_size{0}; std::unique_ptr pimpl; ProgressCallback progress_callback; // store the callback only to bypass it to coordinator implementation std::set replicas_used; diff --git a/src/Storages/MergeTree/RequestResponse.cpp b/src/Storages/MergeTree/RequestResponse.cpp index 2ce0e20dcd2..1b0ad3cca48 100644 --- a/src/Storages/MergeTree/RequestResponse.cpp +++ b/src/Storages/MergeTree/RequestResponse.cpp @@ -126,6 +126,7 @@ void InitialAllRangesAnnouncement::serialize(WriteBuffer & out) const writeIntBinary(mode, out); description.serialize(out); writeIntBinary(replica_num, out); + writeIntBinary(mark_segment_size, out); } @@ -156,10 +157,15 @@ InitialAllRangesAnnouncement InitialAllRangesAnnouncement::deserialize(ReadBuffe description.deserialize(in); readIntBinary(replica_num, in); - return InitialAllRangesAnnouncement { + size_t mark_segment_size = 128; + if (version >= 4) + readIntBinary(mark_segment_size, in); + + return InitialAllRangesAnnouncement{ mode, description, - replica_num + replica_num, + mark_segment_size, }; } diff --git a/src/Storages/MergeTree/RequestResponse.h b/src/Storages/MergeTree/RequestResponse.h index 5f5516a6804..17518adf833 100644 --- a/src/Storages/MergeTree/RequestResponse.h +++ b/src/Storages/MergeTree/RequestResponse.h @@ -93,17 +93,14 @@ struct InitialAllRangesAnnouncement /// No default constructor, you must initialize all fields at once. InitialAllRangesAnnouncement( - CoordinationMode mode_, - RangesInDataPartsDescription description_, - size_t replica_num_) - : mode(mode_) - , description(description_) - , replica_num(replica_num_) + CoordinationMode mode_, RangesInDataPartsDescription description_, size_t replica_num_, size_t mark_segment_size_) + : mode(mode_), description(description_), replica_num(replica_num_), mark_segment_size(mark_segment_size_) {} CoordinationMode mode; RangesInDataPartsDescription description; size_t replica_num; + size_t mark_segment_size; void serialize(WriteBuffer & out) const; String describe(); From 891f9c5358dce8c572848fdb24513c0e56629bd8 Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Thu, 15 Aug 2024 18:44:31 +0100 Subject: [PATCH 333/844] fix typo --- src/Storages/MergeTree/MergeTreeReadPoolParallelReplicas.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicas.cpp b/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicas.cpp index 2736aab2603..87fc1c12ddd 100644 --- a/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicas.cpp +++ b/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicas.cpp @@ -49,7 +49,7 @@ size_t chooseSegmentSize( /// * (min_marks_per_task * threads) = the number of marks we request from the coordinator each time - there is no point to have segments smaller than one unit of work for a replica /// * (sum_marks / number_of_replicas^2) - we use consistent hashing for work distribution (including work stealing). If we have a really slow replica /// everything up to (1/number_of_replicas) portion of its work will be stolen by other replicas. And it owns (1/number_of_replicas) share of total number of marks. - /// Aslo important to note here that sum_marks is calculated after PK analysis, it means in particular that different segment sizes might be used for the + /// Also important to note here that sum_marks is calculated after PK analysis, it means in particular that different segment sizes might be used for the /// same table for different queries (it is intentional). mark_segment_size = std::max({mark_segment_size, min_marks_per_task * threads, sum_marks / number_of_replicas / number_of_replicas}); From 80d985a690d66621dd994b9e8066788b16cfe044 Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Thu, 15 Aug 2024 19:11:23 +0100 Subject: [PATCH 334/844] add setting change --- src/Core/SettingsChangesHistory.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Core/SettingsChangesHistory.cpp b/src/Core/SettingsChangesHistory.cpp index 20a8721c10e..0c2f9190553 100644 --- a/src/Core/SettingsChangesHistory.cpp +++ b/src/Core/SettingsChangesHistory.cpp @@ -88,6 +88,7 @@ static std::initializer_list Date: Fri, 16 Aug 2024 01:24:52 +0400 Subject: [PATCH 335/844] parallel attach from multiple partitions --- src/Storages/StorageReplicatedMergeTree.cpp | 112 ++++++++++++++------ src/Storages/StorageReplicatedMergeTree.h | 13 +++ 2 files changed, 95 insertions(+), 30 deletions(-) diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index a3c1ab7cdff..67f51b186c5 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -8030,25 +8030,84 @@ void StorageReplicatedMergeTree::replacePartitionFrom( /// First argument is true, because we possibly will add new data to current table. auto lock1 = lockForShare(query_context->getCurrentQueryId(), query_context->getSettingsRef().lock_acquire_timeout); auto lock2 = source_table->lockForShare(query_context->getCurrentQueryId(), query_context->getSettingsRef().lock_acquire_timeout); - auto storage_settings_ptr = getSettings(); - auto source_metadata_snapshot = source_table->getInMemoryMetadataPtr(); - auto metadata_snapshot = getInMemoryMetadataPtr(); + const auto storage_settings_ptr = getSettings(); + const auto source_metadata_snapshot = source_table->getInMemoryMetadataPtr(); + const auto metadata_snapshot = getInMemoryMetadataPtr(); + const MergeTreeData & src_data = checkStructureAndGetMergeTreeData(source_table, source_metadata_snapshot, metadata_snapshot); - Stopwatch watch; + std::unordered_set partitions; + if (partition->as()->all) + { + if (replace) + throw DB::Exception(ErrorCodes::SUPPORT_IS_DISABLED, "Only support DROP/DETACH/ATTACH PARTITION ALL currently"); + + partitions = src_data.getAllPartitionIds(); + } + else + { + partitions = std::unordered_set(); + partitions.emplace(getPartitionIDFromQuery(partition, query_context)); + } + LOG_INFO(log, "Will try to attach {} partitions", partitions.size()); + + const Stopwatch watch; ProfileEventsScope profile_events_scope; + const auto zookeeper = getZooKeeper(); - MergeTreeData & src_data = checkStructureAndGetMergeTreeData(source_table, source_metadata_snapshot, metadata_snapshot); - String partition_id = getPartitionIDFromQuery(partition, query_context); + const bool zero_copy_enabled = storage_settings_ptr->allow_remote_fs_zero_copy_replication + || dynamic_cast(source_table.get())->getSettings()->allow_remote_fs_zero_copy_replication; + try + { + std::unique_ptr entries[partitions.size()]; + size_t idx = 0; + for (const auto & partition_id : partitions) + { + entries[idx] = replacePartitionFromImpl(watch, + profile_events_scope, + metadata_snapshot, + src_data, + partition_id, + zookeeper, + replace, + zero_copy_enabled, + storage_settings_ptr->always_use_copy_instead_of_hardlinks, + query_context); + ++idx; + } + + for (const auto & entry : entries) + waitForLogEntryToBeProcessedIfNecessary(*entry, query_context); + + lock2.reset(); + lock1.reset(); + } + catch(...) + { + lock2.reset(); + lock1.reset(); + + throw; + } +} + +std::unique_ptr StorageReplicatedMergeTree::replacePartitionFromImpl( + const Stopwatch & watch, + ProfileEventsScope & profile_events_scope, + const StorageMetadataPtr & metadata_snapshot, + const MergeTreeData & src_data, + const String & partition_id, + const ZooKeeperPtr & zookeeper, + bool replace, + const bool & zero_copy_enabled, + const bool & always_use_copy_instead_of_hardlinks, + const ContextPtr & query_context) +{ /// NOTE: Some covered parts may be missing in src_all_parts if corresponding log entries are not executed yet. DataPartsVector src_all_parts = src_data.getVisibleDataPartsVectorInPartition(query_context, partition_id); - LOG_DEBUG(log, "Cloning {} parts", src_all_parts.size()); - static const String TMP_PREFIX = "tmp_replace_from_"; - auto zookeeper = getZooKeeper(); - /// Retry if alter_partition_version changes for (size_t retry = 0; retry < 1000; ++retry) { @@ -8133,11 +8192,9 @@ void StorageReplicatedMergeTree::replacePartitionFrom( UInt64 index = lock->getNumber(); MergeTreePartInfo dst_part_info(partition_id, index, index, src_part->info.level); - bool zero_copy_enabled = storage_settings_ptr->allow_remote_fs_zero_copy_replication - || dynamic_cast(source_table.get())->getSettings()->allow_remote_fs_zero_copy_replication; IDataPartStorage::ClonePartParams clone_params { - .copy_instead_of_hardlink = storage_settings_ptr->always_use_copy_instead_of_hardlinks || (zero_copy_enabled && src_part->isStoredOnRemoteDiskWithZeroCopySupport()), + .copy_instead_of_hardlink = always_use_copy_instead_of_hardlinks || (zero_copy_enabled && src_part->isStoredOnRemoteDiskWithZeroCopySupport()), .metadata_version_to_write = metadata_snapshot->getMetadataVersion() }; if (replace) @@ -8145,7 +8202,7 @@ void StorageReplicatedMergeTree::replacePartitionFrom( /// Replace can only work on the same disk auto [dst_part, part_lock] = cloneAndLoadDataPart( src_part, - TMP_PREFIX, + TMP_PREFIX_REPLACE_PARTITION_FROM, dst_part_info, metadata_snapshot, clone_params, @@ -8160,7 +8217,7 @@ void StorageReplicatedMergeTree::replacePartitionFrom( /// Attach can work on another disk auto [dst_part, part_lock] = cloneAndLoadDataPart( src_part, - TMP_PREFIX, + TMP_PREFIX_REPLACE_PARTITION_FROM, dst_part_info, metadata_snapshot, clone_params, @@ -8176,15 +8233,16 @@ void StorageReplicatedMergeTree::replacePartitionFrom( part_checksums.emplace_back(hash_hex); } - ReplicatedMergeTreeLogEntryData entry; + //ReplicatedMergeTreeLogEntryData entry; + auto entry = std::make_unique(); { auto src_table_id = src_data.getStorageID(); - entry.type = ReplicatedMergeTreeLogEntryData::REPLACE_RANGE; - entry.source_replica = replica_name; - entry.create_time = time(nullptr); - entry.replace_range_entry = std::make_shared(); + entry->type = ReplicatedMergeTreeLogEntryData::REPLACE_RANGE; + entry->source_replica = replica_name; + entry->create_time = time(nullptr); + entry->replace_range_entry = std::make_shared(); - auto & entry_replace = *entry.replace_range_entry; + auto & entry_replace = *entry->replace_range_entry; entry_replace.drop_range_part_name = drop_range_fake_part_name; entry_replace.from_database = src_table_id.database_name; entry_replace.from_table = src_table_id.table_name; @@ -8225,7 +8283,7 @@ void StorageReplicatedMergeTree::replacePartitionFrom( ops.emplace_back(zkutil::makeSetRequest(alter_partition_version_path, "", alter_partition_version_stat.version)); /// Just update version, because merges assignment relies on it ops.emplace_back(zkutil::makeSetRequest(fs::path(zookeeper_path) / "log", "", -1)); - ops.emplace_back(zkutil::makeCreateRequest(fs::path(zookeeper_path) / "log/log-", entry.toString(), zkutil::CreateMode::PersistentSequential)); + ops.emplace_back(zkutil::makeCreateRequest(fs::path(zookeeper_path) / "log/log-", entry->toString(), zkutil::CreateMode::PersistentSequential)); Transaction transaction(*this, NO_TRANSACTION_RAW); { @@ -8275,14 +8333,11 @@ void StorageReplicatedMergeTree::replacePartitionFrom( } String log_znode_path = dynamic_cast(*op_results.back()).path_created; - entry.znode_name = log_znode_path.substr(log_znode_path.find_last_of('/') + 1); + entry->znode_name = log_znode_path.substr(log_znode_path.find_last_of('/') + 1); for (auto & lock : ephemeral_locks) lock.assumeUnlocked(); - lock2.reset(); - lock1.reset(); - /// We need to pull the REPLACE_RANGE before cleaning the replaced parts (otherwise CHeckThread may decide that parts are lost) queue.pullLogsToQueue(getZooKeeperAndAssertNotReadonly(), {}, ReplicatedMergeTreeQueue::SYNC); // No need to block operations further, especially that in case we have to wait for mutation to finish, the intent would block @@ -8291,10 +8346,7 @@ void StorageReplicatedMergeTree::replacePartitionFrom( parts_holder.clear(); cleanup_thread.wakeup(); - - waitForLogEntryToBeProcessedIfNecessary(entry, query_context); - - return; + return entry; } throw Exception( diff --git a/src/Storages/StorageReplicatedMergeTree.h b/src/Storages/StorageReplicatedMergeTree.h index 2e54f17d5d5..4291aebf3e8 100644 --- a/src/Storages/StorageReplicatedMergeTree.h +++ b/src/Storages/StorageReplicatedMergeTree.h @@ -37,6 +37,7 @@ #include #include #include +#include #include #include @@ -1013,6 +1014,18 @@ private: DataPartsVector::const_iterator it; }; + const String TMP_PREFIX_REPLACE_PARTITION_FROM = "tmp_replace_from_"; + std::unique_ptr replacePartitionFromImpl( + const Stopwatch & watch, + ProfileEventsScope & profile_events_scope, + const StorageMetadataPtr & metadata_snapshot, + const MergeTreeData & src_data, + const String & partition_id, + const zkutil::ZooKeeperPtr & zookeeper, + bool replace, + const bool & zero_copy_enabled, + const bool & always_use_copy_instead_of_hardlinks, + const ContextPtr & query_context); }; String getPartNamePossiblyFake(MergeTreeDataFormatVersion format_version, const MergeTreePartInfo & part_info); From b1963738bdf8e02e78a0be1b2cce1b992a6d533c Mon Sep 17 00:00:00 2001 From: Michael Kolupaev Date: Thu, 15 Aug 2024 21:41:40 +0000 Subject: [PATCH 336/844] Rebase --- contrib/sysroot | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/sysroot b/contrib/sysroot index c2d74e21ba1..eb35c10ac57 160000 --- a/contrib/sysroot +++ b/contrib/sysroot @@ -1 +1 @@ -Subproject commit c2d74e21ba1b8a27966e344693e176f927e4eb50 +Subproject commit eb35c10ac5725da7ef4be88b303895e1b5d153be From c31c4154ca47c6f1313c04974b7bb95bbcb34cff Mon Sep 17 00:00:00 2001 From: Kirill Nikiforov Date: Fri, 16 Aug 2024 01:49:54 +0400 Subject: [PATCH 337/844] fix cs --- src/Storages/StorageReplicatedMergeTree.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 67f51b186c5..38309282768 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -8083,7 +8083,7 @@ void StorageReplicatedMergeTree::replacePartitionFrom( lock2.reset(); lock1.reset(); } - catch(...) + catch (...) { lock2.reset(); lock1.reset(); From b3a3d1e7202837e2da45b6775b4ea8c389c881a7 Mon Sep 17 00:00:00 2001 From: Michael Kolupaev Date: Fri, 16 Aug 2024 05:53:17 +0000 Subject: [PATCH 338/844] another rebase --- contrib/sysroot | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/sysroot b/contrib/sysroot index eb35c10ac57..73752937366 160000 --- a/contrib/sysroot +++ b/contrib/sysroot @@ -1 +1 @@ -Subproject commit eb35c10ac5725da7ef4be88b303895e1b5d153be +Subproject commit 737529373665bc067971ba098a12d6928580a0ae From 7c691cbb797594d0c6649b36be13ff99c3c4664b Mon Sep 17 00:00:00 2001 From: Ilya Yatsishin <2159081+qoega@users.noreply.github.com> Date: Fri, 16 Aug 2024 10:44:11 +0200 Subject: [PATCH 339/844] style --- tests/ci/integration_tests_runner.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tests/ci/integration_tests_runner.py b/tests/ci/integration_tests_runner.py index f63afc297e6..006958abe6d 100755 --- a/tests/ci/integration_tests_runner.py +++ b/tests/ci/integration_tests_runner.py @@ -774,10 +774,8 @@ class ClickhouseIntegrationTestsRunner: logging.info("Found '%s' tests to run", " ".join(tests_to_run)) result_state = "success" description_prefix = "No flaky tests: " - start = time.time() logging.info("Starting check with retries") final_retry = 0 - logs = [] counters = { "ERROR": [], "PASSED": [], @@ -858,7 +856,7 @@ class ClickhouseIntegrationTestsRunner: ] ) - return result_state, status_text, test_result, logs + return result_state, status_text, test_result, tests_log_paths def run_impl(self, repo_path, build_path): if self.flaky_check or self.bugfix_validate_check: From 29fffd34ac5ab769153563f9c83b134cbaefb117 Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Fri, 16 Aug 2024 11:06:20 +0000 Subject: [PATCH 340/844] Fix: check if table expression is present --- src/Planner/findParallelReplicasQuery.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Planner/findParallelReplicasQuery.cpp b/src/Planner/findParallelReplicasQuery.cpp index cd02e01c392..25481d06670 100644 --- a/src/Planner/findParallelReplicasQuery.cpp +++ b/src/Planner/findParallelReplicasQuery.cpp @@ -54,7 +54,7 @@ std::stack getSupportingParallelReplicasQuery(const IQueryTre if (std::dynamic_pointer_cast(storage) || typeid_cast(storage.get())) { /// parallel replicas is not supported with FINAL - if (table_node.getTableExpressionModifiers()->hasFinal()) + if (table_node.getTableExpressionModifiers() && table_node.getTableExpressionModifiers()->hasFinal()) return {}; return res; From 46f27b03f9dd2f122da6cfcb9905f5bb7ec944e7 Mon Sep 17 00:00:00 2001 From: Kirill Nikiforov Date: Fri, 16 Aug 2024 15:48:28 +0400 Subject: [PATCH 341/844] fix transaction --- src/Storages/StorageReplicatedMergeTree.cpp | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 38309282768..4915bf9f366 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -8108,6 +8108,13 @@ std::unique_ptr StorageReplicatedMergeTree::rep DataPartsVector src_all_parts = src_data.getVisibleDataPartsVectorInPartition(query_context, partition_id); LOG_DEBUG(log, "Cloning {} parts", src_all_parts.size()); + std::optional txn; + if (auto query_txn = query_context->getZooKeeperMetadataTransaction()) + txn.emplace(query_txn->getZooKeeper(), + query_txn->getDatabaseZooKeeperPath(), + query_txn->isInitialQuery(), + query_txn->getTaskZooKeeperPath()); + /// Retry if alter_partition_version changes for (size_t retry = 0; retry < 1000; ++retry) { @@ -8275,7 +8282,7 @@ std::unique_ptr StorageReplicatedMergeTree::rep ephemeral_locks[i].getUnlockOp(ops); } - if (auto txn = query_context->getZooKeeperMetadataTransaction()) + if (txn) txn->moveOpsTo(ops); delimiting_block_lock->getUnlockOp(ops); From 770804ffdca82552516ac9e7892c955d61dee1f3 Mon Sep 17 00:00:00 2001 From: Arthur Passos Date: Fri, 16 Aug 2024 08:57:11 -0300 Subject: [PATCH 342/844] change default setting value to 100 --- src/Core/ServerSettings.h | 2 +- .../test_max_authentication_methods_per_user/test.py | 12 ++++++------ 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/Core/ServerSettings.h b/src/Core/ServerSettings.h index 887beba4b13..29a45d23163 100644 --- a/src/Core/ServerSettings.h +++ b/src/Core/ServerSettings.h @@ -116,7 +116,7 @@ namespace DB M(UInt64, max_part_num_to_warn, 100000lu, "If the number of parts is greater than this value, the server will create a warning that will displayed to user.", 0) \ M(UInt64, max_table_num_to_throw, 0lu, "If number of tables is greater than this value, server will throw an exception. 0 means no limitation. View, remote tables, dictionary, system tables are not counted. Only count table in Atomic/Ordinary/Replicated/Lazy database engine.", 0) \ M(UInt64, max_database_num_to_throw, 0lu, "If number of databases is greater than this value, server will throw an exception. 0 means no limitation.", 0) \ - M(UInt64, max_authentication_methods_per_user, 256, "The maximum number of authentication methods a user can be created with or altered. Changing this setting does not affect existing users.", 0) \ + M(UInt64, max_authentication_methods_per_user, 100, "The maximum number of authentication methods a user can be created with or altered. Changing this setting does not affect existing users.", 0) \ M(UInt64, concurrent_threads_soft_limit_num, 0, "Sets how many concurrent thread can be allocated before applying CPU pressure. Zero means unlimited.", 0) \ M(UInt64, concurrent_threads_soft_limit_ratio_to_cores, 0, "Same as concurrent_threads_soft_limit_num, but with ratio to cores.", 0) \ \ diff --git a/tests/integration/test_max_authentication_methods_per_user/test.py b/tests/integration/test_max_authentication_methods_per_user/test.py index fb02ef73bf6..0142c7db746 100644 --- a/tests/integration/test_max_authentication_methods_per_user/test.py +++ b/tests/integration/test_max_authentication_methods_per_user/test.py @@ -76,13 +76,13 @@ def test_create_default_setting(started_cluster): expected_error = "User can not be created/updated because it exceeds the allowed quantity of authentication methods per user" query_exceeds = get_query_with_multiple_identified_with( - "CREATE", "u_max_authentication_methods", 257 + "CREATE", "u_max_authentication_methods", 101 ) assert expected_error in default_node.query_and_get_error(query_exceeds) query_not_exceeds = get_query_with_multiple_identified_with( - "CREATE", "u_max_authentication_methods", 256 + "CREATE", "u_max_authentication_methods", 100 ) assert expected_error not in default_node.query_and_get_answer_with_error( @@ -96,19 +96,19 @@ def test_alter_default_setting(started_cluster): default_node.query("CREATE USER u_max_authentication_methods IDENTIFIED BY '1'") query_add_exceeds = get_query_with_multiple_identified_with( - "ALTER", "u_max_authentication_methods", 256, "ADD" + "ALTER", "u_max_authentication_methods", 100, "ADD" ) assert expected_error in default_node.query_and_get_error(query_add_exceeds) query_replace_exceeds = get_query_with_multiple_identified_with( - "ALTER", "u_max_authentication_methods", 257 + "ALTER", "u_max_authentication_methods", 101 ) assert expected_error in default_node.query_and_get_error(query_replace_exceeds) query_add_not_exceeds = get_query_with_multiple_identified_with( - "ALTER", "u_max_authentication_methods", 1, "ADD" + "ALTER", "u_max_authentication_methods", 99, "ADD" ) assert expected_error not in default_node.query_and_get_answer_with_error( @@ -116,7 +116,7 @@ def test_alter_default_setting(started_cluster): ) query_replace_not_exceeds = get_query_with_multiple_identified_with( - "ALTER", "u_max_authentication_methods", 2 + "ALTER", "u_max_authentication_methods", 100 ) assert expected_error not in default_node.query_and_get_answer_with_error( From cb9039e91d073942ff07a68e10336f8db38f4a79 Mon Sep 17 00:00:00 2001 From: Jiebin Sun Date: Tue, 6 Aug 2024 19:05:12 +0800 Subject: [PATCH 343/844] Add thread pool and cancellation if merge data with key This patch will add thread pool and cancellation if merge data with key. During the merge, if the data size is large, we may convert the singleLevelHash to twoLevelHash and merge in parallel. Test the patch with 2 x 80 vCPUs, Q8 and Q9 has got 10.3% and 7.6% performance improvement. Signed-off-by: Jiebin Sun --- .../AggregateFunctionUniq.h | 2 ++ src/AggregateFunctions/IAggregateFunction.h | 12 +++++++--- src/AggregateFunctions/UniqExactSet.h | 7 ++++++ src/Interpreters/Aggregator.cpp | 24 ++++++++++++------- src/Interpreters/Aggregator.h | 4 ++-- .../Transforms/AggregatingTransform.cpp | 2 +- 6 files changed, 36 insertions(+), 15 deletions(-) diff --git a/src/AggregateFunctions/AggregateFunctionUniq.h b/src/AggregateFunctions/AggregateFunctionUniq.h index cef23f766c7..35d6e599e38 100644 --- a/src/AggregateFunctions/AggregateFunctionUniq.h +++ b/src/AggregateFunctions/AggregateFunctionUniq.h @@ -459,6 +459,8 @@ public: bool isParallelizeMergePrepareNeeded() const override { return is_parallelize_merge_prepare_needed; } + constexpr static bool parallelizeMergeWithKey() { return true; } + void parallelizeMergePrepare(AggregateDataPtrs & places, ThreadPool & thread_pool, std::atomic & is_cancelled) const override { if constexpr (is_parallelize_merge_prepare_needed) diff --git a/src/AggregateFunctions/IAggregateFunction.h b/src/AggregateFunctions/IAggregateFunction.h index ee227db6d9d..f8e7051d635 100644 --- a/src/AggregateFunctions/IAggregateFunction.h +++ b/src/AggregateFunctions/IAggregateFunction.h @@ -145,6 +145,8 @@ public: virtual bool isParallelizeMergePrepareNeeded() const { return false; } + constexpr static bool parallelizeMergeWithKey() { return false; } + virtual void parallelizeMergePrepare(AggregateDataPtrs & /*places*/, ThreadPool & /*thread_pool*/, std::atomic & /*is_cancelled*/) const { throw Exception(ErrorCodes::NOT_IMPLEMENTED, "parallelizeMergePrepare() with thread pool parameter isn't implemented for {} ", getName()); @@ -169,7 +171,7 @@ public: /// Merges states (on which src places points to) with other states (on which dst places points to) of current aggregation function /// then destroy states (on which src places points to). - virtual void mergeAndDestroyBatch(AggregateDataPtr * dst_places, AggregateDataPtr * src_places, size_t size, size_t offset, Arena * arena) const = 0; + virtual void mergeAndDestroyBatch(AggregateDataPtr * dst_places, AggregateDataPtr * src_places, size_t size, size_t offset, ThreadPool & thread_pool, std::atomic & is_cancelled, Arena * arena) const = 0; /// Serializes state (to transmit it over the network, for example). virtual void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional version = std::nullopt) const = 0; /// NOLINT @@ -499,11 +501,15 @@ public: static_cast(this)->merge(places[i] + place_offset, rhs[i], arena); } - void mergeAndDestroyBatch(AggregateDataPtr * dst_places, AggregateDataPtr * rhs_places, size_t size, size_t offset, Arena * arena) const override + void mergeAndDestroyBatch(AggregateDataPtr * dst_places, AggregateDataPtr * rhs_places, size_t size, size_t offset, ThreadPool & thread_pool, std::atomic & is_cancelled, Arena * arena) const override { for (size_t i = 0; i < size; ++i) { - static_cast(this)->merge(dst_places[i] + offset, rhs_places[i] + offset, arena); + if constexpr (Derived::parallelizeMergeWithKey()) + static_cast(this)->merge(dst_places[i] + offset, rhs_places[i] + offset, thread_pool, is_cancelled, arena); + else + static_cast(this)->merge(dst_places[i] + offset, rhs_places[i] + offset, arena); + static_cast(this)->destroy(rhs_places[i] + offset); } } diff --git a/src/AggregateFunctions/UniqExactSet.h b/src/AggregateFunctions/UniqExactSet.h index 2ae8c3a8386..25c6f7ac55f 100644 --- a/src/AggregateFunctions/UniqExactSet.h +++ b/src/AggregateFunctions/UniqExactSet.h @@ -101,6 +101,13 @@ public: auto merge(const UniqExactSet & other, ThreadPool * thread_pool = nullptr, std::atomic * is_cancelled = nullptr) { + /// If the size is large, we may convert the singleLevelHash to twoLevelHash and merge in parallel. + if (other.size() > 40000) + { + if (isSingleLevel()) + convertToTwoLevel(); + } + if (isSingleLevel() && other.isTwoLevel()) convertToTwoLevel(); diff --git a/src/Interpreters/Aggregator.cpp b/src/Interpreters/Aggregator.cpp index e073b7a49b6..71be3c1c30c 100644 --- a/src/Interpreters/Aggregator.cpp +++ b/src/Interpreters/Aggregator.cpp @@ -2371,7 +2371,7 @@ void NO_INLINE Aggregator::mergeDataNullKey( template void NO_INLINE Aggregator::mergeDataImpl( - Table & table_dst, Table & table_src, Arena * arena, bool use_compiled_functions [[maybe_unused]], bool prefetch) const + Table & table_dst, Table & table_src, Arena * arena, bool use_compiled_functions [[maybe_unused]], bool prefetch, ThreadPool & thread_pool, std::atomic & is_cancelled) const { if constexpr (Method::low_cardinality_optimization || Method::one_key_nullable_optimization) mergeDataNullKey(table_dst, table_src, arena); @@ -2410,7 +2410,7 @@ void NO_INLINE Aggregator::mergeDataImpl( { if (!is_aggregate_function_compiled[i]) aggregate_functions[i]->mergeAndDestroyBatch( - dst_places.data(), src_places.data(), dst_places.size(), offsets_of_aggregate_states[i], arena); + dst_places.data(), src_places.data(), dst_places.size(), offsets_of_aggregate_states[i], thread_pool, is_cancelled, arena); } return; @@ -2420,7 +2420,7 @@ void NO_INLINE Aggregator::mergeDataImpl( for (size_t i = 0; i < params.aggregates_size; ++i) { aggregate_functions[i]->mergeAndDestroyBatch( - dst_places.data(), src_places.data(), dst_places.size(), offsets_of_aggregate_states[i], arena); + dst_places.data(), src_places.data(), dst_places.size(), offsets_of_aggregate_states[i], thread_pool, is_cancelled, arena); } } @@ -2535,8 +2535,10 @@ void NO_INLINE Aggregator::mergeWithoutKeyDataImpl( template void NO_INLINE Aggregator::mergeSingleLevelDataImpl( - ManyAggregatedDataVariants & non_empty_data) const + ManyAggregatedDataVariants & non_empty_data, std::atomic & is_cancelled) const { + ThreadPool thread_pool{CurrentMetrics::AggregatorThreads, CurrentMetrics::AggregatorThreadsActive, CurrentMetrics::AggregatorThreadsScheduled, params.max_threads}; + AggregatedDataVariantsPtr & res = non_empty_data[0]; bool no_more_keys = false; @@ -2557,13 +2559,13 @@ void NO_INLINE Aggregator::mergeSingleLevelDataImpl( if (compiled_aggregate_functions_holder) { mergeDataImpl( - getDataVariant(*res).data, getDataVariant(current).data, res->aggregates_pool, true, prefetch); + getDataVariant(*res).data, getDataVariant(current).data, res->aggregates_pool, true, prefetch, thread_pool, is_cancelled); } else #endif { mergeDataImpl( - getDataVariant(*res).data, getDataVariant(current).data, res->aggregates_pool, false, prefetch); + getDataVariant(*res).data, getDataVariant(current).data, res->aggregates_pool, false, prefetch, thread_pool, is_cancelled); } } else if (res->without_key) @@ -2589,7 +2591,7 @@ void NO_INLINE Aggregator::mergeSingleLevelDataImpl( #define M(NAME) \ template void NO_INLINE Aggregator::mergeSingleLevelDataImpl( \ - ManyAggregatedDataVariants & non_empty_data) const; + ManyAggregatedDataVariants & non_empty_data, std::atomic & is_cancelled) const; APPLY_FOR_VARIANTS_SINGLE_LEVEL(M) #undef M @@ -2597,6 +2599,8 @@ template void NO_INLINE Aggregator::mergeBucketImpl( ManyAggregatedDataVariants & data, Int32 bucket, Arena * arena, std::atomic & is_cancelled) const { + ThreadPool thread_pool{CurrentMetrics::AggregatorThreads, CurrentMetrics::AggregatorThreadsActive, CurrentMetrics::AggregatorThreadsScheduled, params.max_threads}; + /// We merge all aggregation results to the first. AggregatedDataVariantsPtr & res = data[0]; @@ -2613,7 +2617,7 @@ void NO_INLINE Aggregator::mergeBucketImpl( if (compiled_aggregate_functions_holder) { mergeDataImpl( - getDataVariant(*res).data.impls[bucket], getDataVariant(current).data.impls[bucket], arena, true, prefetch); + getDataVariant(*res).data.impls[bucket], getDataVariant(current).data.impls[bucket], arena, true, prefetch, thread_pool, is_cancelled); } else #endif @@ -2623,7 +2627,9 @@ void NO_INLINE Aggregator::mergeBucketImpl( getDataVariant(current).data.impls[bucket], arena, false, - prefetch); + prefetch, + thread_pool, + is_cancelled); } } } diff --git a/src/Interpreters/Aggregator.h b/src/Interpreters/Aggregator.h index f4f1e9a1df3..c357da2d9de 100644 --- a/src/Interpreters/Aggregator.h +++ b/src/Interpreters/Aggregator.h @@ -455,7 +455,7 @@ private: /// Merge data from hash table `src` into `dst`. template - void mergeDataImpl(Table & table_dst, Table & table_src, Arena * arena, bool use_compiled_functions, bool prefetch) const; + void mergeDataImpl(Table & table_dst, Table & table_src, Arena * arena, bool use_compiled_functions, bool prefetch, ThreadPool & thread_pool, std::atomic & is_cancelled) const; /// Merge data from hash table `src` into `dst`, but only for keys that already exist in dst. In other cases, merge the data into `overflows`. template @@ -478,7 +478,7 @@ private: template void mergeSingleLevelDataImpl( - ManyAggregatedDataVariants & non_empty_data) const; + ManyAggregatedDataVariants & non_empty_data, std::atomic & is_cancelled) const; template using ConvertToBlockRes = std::conditional_t; diff --git a/src/Processors/Transforms/AggregatingTransform.cpp b/src/Processors/Transforms/AggregatingTransform.cpp index cdbe194cfac..5e7fba6d80c 100644 --- a/src/Processors/Transforms/AggregatingTransform.cpp +++ b/src/Processors/Transforms/AggregatingTransform.cpp @@ -495,7 +495,7 @@ private: #define M(NAME) \ else if (first->type == AggregatedDataVariants::Type::NAME) \ - params->aggregator.mergeSingleLevelDataImplNAME)::element_type>(*data); + params->aggregator.mergeSingleLevelDataImplNAME)::element_type>(*data, shared_data->is_cancelled); if (false) {} // NOLINT APPLY_FOR_VARIANTS_SINGLE_LEVEL(M) #undef M From 32f4b1f89143a293047beb83169d83c3e20c5480 Mon Sep 17 00:00:00 2001 From: avogar Date: Fri, 16 Aug 2024 17:49:38 +0000 Subject: [PATCH 344/844] Add aggregate functions distinctDynamicTypes/distinctJSONPaths/distinctJSONPathsAndTypes --- .../reference/distinctdynamictypes.md | 44 +++ .../reference/distinctjsonpaths.md | 84 +++++ docs/en/sql-reference/data-types/newjson.md | 125 ++++++- .../AggregateFunctionDistinctDynamicTypes.cpp | 155 ++++++++ .../AggregateFunctionDistinctJSONPaths.cpp | 331 ++++++++++++++++++ .../registerAggregateFunctions.cpp | 4 + src/Columns/ColumnDynamic.cpp | 35 ++ src/Columns/ColumnDynamic.h | 3 + ...istinct_dynamic_types_json_paths.reference | 121 +++++++ ...3227_distinct_dynamic_types_json_paths.sql | 50 +++ 10 files changed, 951 insertions(+), 1 deletion(-) create mode 100644 docs/en/sql-reference/aggregate-functions/reference/distinctdynamictypes.md create mode 100644 docs/en/sql-reference/aggregate-functions/reference/distinctjsonpaths.md create mode 100644 src/AggregateFunctions/AggregateFunctionDistinctDynamicTypes.cpp create mode 100644 src/AggregateFunctions/AggregateFunctionDistinctJSONPaths.cpp create mode 100644 tests/queries/0_stateless/03227_distinct_dynamic_types_json_paths.reference create mode 100644 tests/queries/0_stateless/03227_distinct_dynamic_types_json_paths.sql diff --git a/docs/en/sql-reference/aggregate-functions/reference/distinctdynamictypes.md b/docs/en/sql-reference/aggregate-functions/reference/distinctdynamictypes.md new file mode 100644 index 00000000000..970209252fa --- /dev/null +++ b/docs/en/sql-reference/aggregate-functions/reference/distinctdynamictypes.md @@ -0,0 +1,44 @@ +--- +slug: /en/sql-reference/aggregate-functions/reference/distinctdynamictypes +sidebar_position: 215 +--- + +# distinctDynamicTypes + +Calculates the list of distinct data types stored in [Dynamic](../../data-types/dynamic.md) column. + +**Syntax** + +```sql +distinctDynamicTypes(dynamic) +``` + +**Arguments** + +- `dynamic` — [Dynamic](../../data-types/dynamic.md) column. + +**Returned Value** + +- The sorted list of data type names [Array(String)](../../data-types/array.md). + +**Example** + +Query: + +```sql +DROP TABLE IF EXISTS test_dynamic; +CREATE TABLE test_dynamic(d Dynamic) ENGINE = Memory; +INSERT INTO test_dynamic VALUES (42), (NULL), ('Hello'), ([1, 2, 3]), ('2020-01-01'), (map(1, 2)), (43), ([4, 5]), (NULL), ('World'), (map(3, 4)) +``` + +```sql +SELECT distinctDynamicTypes(d) FROM test_dynamic; +``` + +Result: + +```reference +┌─distinctDynamicTypes(d)──────────────────────────────────────┐ +│ ['Array(Int64)','Date','Int64','Map(UInt8, UInt8)','String'] │ +└──────────────────────────────────────────────────────────────┘ +``` diff --git a/docs/en/sql-reference/aggregate-functions/reference/distinctjsonpaths.md b/docs/en/sql-reference/aggregate-functions/reference/distinctjsonpaths.md new file mode 100644 index 00000000000..f916734ca44 --- /dev/null +++ b/docs/en/sql-reference/aggregate-functions/reference/distinctjsonpaths.md @@ -0,0 +1,84 @@ +--- +slug: /en/sql-reference/aggregate-functions/reference/distinctjsonpaths +sidebar_position: 216 +--- + +# distinctJSONPaths + +Calculates the list of distinct paths stored in [JSON](../../data-types/newjson.md) column. + +**Syntax** + +```sql +distinctJSONPaths(json) +``` + +**Arguments** + +- `json` — [JSON](../../data-types/newjson.md) column. + +**Returned Value** + +- The sorted list of paths [Array(String)](../../data-types/array.md). + +**Example** + +Query: + +```sql +DROP TABLE IF EXISTS test_json; +CREATE TABLE test_json(json JSON) ENGINE = Memory; +INSERT INTO test_json VALUES ('{"a" : 42, "b" : "Hello"}'), ('{"b" : [1, 2, 3], "c" : {"d" : {"e" : "2020-01-01"}}}'), ('{"a" : 43, "c" : {"d" : {"f" : [{"g" : 42}]}}}') +``` + +```sql +SELECT distinctJSONPaths(json) FROM test_json; +``` + +Result: + +```reference +┌─distinctJSONPaths(json)───┐ +│ ['a','b','c.d.e','c.d.f'] │ +└───────────────────────────┘ +``` + +# distinctJSONPathsAndTypes + +Calculates the list of distinct paths and their types stored in [JSON](../../data-types/newjson.md) column. + +**Syntax** + +```sql +distinctJSONPathsAndTypes(json) +``` + +**Arguments** + +- `json` — [JSON](../../data-types/newjson.md) column. + +**Returned Value** + +- The sorted map of paths and types [Map(String, Array(String))](../../data-types/map.md). + +**Example** + +Query: + +```sql +DROP TABLE IF EXISTS test_json; +CREATE TABLE test_json(json JSON) ENGINE = Memory; +INSERT INTO test_json VALUES ('{"a" : 42, "b" : "Hello"}'), ('{"b" : [1, 2, 3], "c" : {"d" : {"e" : "2020-01-01"}}}'), ('{"a" : 43, "c" : {"d" : {"f" : [{"g" : 42}]}}}') +``` + +```sql +SELECT distinctJSONPathsAndTypes(json) FROM test_json; +``` + +Result: + +```reference +┌─distinctJSONPathsAndTypes(json)───────────────────────────────────────────────────────────────────────────────────────────────────────────────┐ +│ {'a':['Int64'],'b':['Array(Nullable(Int64))','String'],'c.d.e':['Date'],'c.d.f':['Array(JSON(max_dynamic_types=16, max_dynamic_paths=256))']} │ +└───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘ +``` diff --git a/docs/en/sql-reference/data-types/newjson.md b/docs/en/sql-reference/data-types/newjson.md index 9e43216df6c..16cc324a168 100644 --- a/docs/en/sql-reference/data-types/newjson.md +++ b/docs/en/sql-reference/data-types/newjson.md @@ -505,7 +505,130 @@ As we can see, ClickHouse kept the most frequent paths `a`, `b` and `c` and move ## Introspection functions -There are several functions that can help to inspect the content of the JSON column: [JSONAllPaths](../functions/json-functions.md#jsonallpaths), [JSONAllPathsWithTypes](../functions/json-functions.md#jsonallpathswithtypes), [JSONDynamicPaths](../functions/json-functions.md#jsondynamicpaths), [JSONDynamicPathsWithTypes](../functions/json-functions.md#jsondynamicpathswithtypes), [JSONSharedDataPaths](../functions/json-functions.md#jsonshareddatapaths), [JSONSharedDataPathsWithTypes](../functions/json-functions.md#jsonshareddatapathswithtypes). +There are several functions that can help to inspect the content of the JSON column: [JSONAllPaths](../functions/json-functions.md#jsonallpaths), [JSONAllPathsWithTypes](../functions/json-functions.md#jsonallpathswithtypes), [JSONDynamicPaths](../functions/json-functions.md#jsondynamicpaths), [JSONDynamicPathsWithTypes](../functions/json-functions.md#jsondynamicpathswithtypes), [JSONSharedDataPaths](../functions/json-functions.md#jsonshareddatapaths), [JSONSharedDataPathsWithTypes](../functions/json-functions.md#jsonshareddatapathswithtypes), [distinctDynamicTypes](../aggregate-functions/reference/distinctdynamictypes.md), [distinctJSONPaths and distinctJSONPathsAndTypes](../aggregate-functions/reference/distinctjsonpaths.md) + +**Examples** + +Let's investigate the content of [GH Archive](https://www.gharchive.org/) dataset for `2020-01-01` date: + +```sql +SELECT arrayJoin(distinctJSONPaths(json)) FROM s3('s3://clickhouse-public-datasets/gharchive/original/2020-01-01-*.json.gz', JSONAsObject) +``` + +```text +┌─arrayJoin(distinctJSONPaths(json))─────────────────────────┐ +│ actor.avatar_url │ +│ actor.display_login │ +│ actor.gravatar_id │ +│ actor.id │ +│ actor.login │ +│ actor.url │ +│ created_at │ +│ id │ +│ org.avatar_url │ +│ org.gravatar_id │ +│ org.id │ +│ org.login │ +│ org.url │ +│ payload.action │ +│ payload.before │ +│ payload.comment._links.html.href │ +│ payload.comment._links.pull_request.href │ +│ payload.comment._links.self.href │ +│ payload.comment.author_association │ +│ payload.comment.body │ +│ payload.comment.commit_id │ +│ payload.comment.created_at │ +│ payload.comment.diff_hunk │ +│ payload.comment.html_url │ +│ payload.comment.id │ +│ payload.comment.in_reply_to_id │ +│ payload.comment.issue_url │ +│ payload.comment.line │ +│ payload.comment.node_id │ +│ payload.comment.original_commit_id │ +│ payload.comment.original_position │ +│ payload.comment.path │ +│ payload.comment.position │ +│ payload.comment.pull_request_review_id │ +... +│ payload.release.node_id │ +│ payload.release.prerelease │ +│ payload.release.published_at │ +│ payload.release.tag_name │ +│ payload.release.tarball_url │ +│ payload.release.target_commitish │ +│ payload.release.upload_url │ +│ payload.release.url │ +│ payload.release.zipball_url │ +│ payload.size │ +│ public │ +│ repo.id │ +│ repo.name │ +│ repo.url │ +│ type │ +└─arrayJoin(distinctJSONPaths(json))─────────────────────────┘ +``` + +```sql +SELECT arrayJoin(distinctJSONPathsAndTypes(json)) FROM s3('s3://clickhouse-public-datasets/gharchive/original/2020-01-01-*.json.gz', JSONAsObject) +``` + + +```text +┌─arrayJoin(distinctJSONPathsAndTypes(json))──────────────────┐ +│ ('actor.avatar_url',['String']) │ +│ ('actor.display_login',['String']) │ +│ ('actor.gravatar_id',['String']) │ +│ ('actor.id',['Int64']) │ +│ ('actor.login',['String']) │ +│ ('actor.url',['String']) │ +│ ('created_at',['String']) │ +│ ('id',['String']) │ +│ ('org.avatar_url',['String']) │ +│ ('org.gravatar_id',['String']) │ +│ ('org.id',['Int64']) │ +│ ('org.login',['String']) │ +│ ('org.url',['String']) │ +│ ('payload.action',['String']) │ +│ ('payload.before',['String']) │ +│ ('payload.comment._links.html.href',['String']) │ +│ ('payload.comment._links.pull_request.href',['String']) │ +│ ('payload.comment._links.self.href',['String']) │ +│ ('payload.comment.author_association',['String']) │ +│ ('payload.comment.body',['String']) │ +│ ('payload.comment.commit_id',['String']) │ +│ ('payload.comment.created_at',['String']) │ +│ ('payload.comment.diff_hunk',['String']) │ +│ ('payload.comment.html_url',['String']) │ +│ ('payload.comment.id',['Int64']) │ +│ ('payload.comment.in_reply_to_id',['Int64']) │ +│ ('payload.comment.issue_url',['String']) │ +│ ('payload.comment.line',['Int64']) │ +│ ('payload.comment.node_id',['String']) │ +│ ('payload.comment.original_commit_id',['String']) │ +│ ('payload.comment.original_position',['Int64']) │ +│ ('payload.comment.path',['String']) │ +│ ('payload.comment.position',['Int64']) │ +│ ('payload.comment.pull_request_review_id',['Int64']) │ +... +│ ('payload.release.node_id',['String']) │ +│ ('payload.release.prerelease',['Bool']) │ +│ ('payload.release.published_at',['String']) │ +│ ('payload.release.tag_name',['String']) │ +│ ('payload.release.tarball_url',['String']) │ +│ ('payload.release.target_commitish',['String']) │ +│ ('payload.release.upload_url',['String']) │ +│ ('payload.release.url',['String']) │ +│ ('payload.release.zipball_url',['String']) │ +│ ('payload.size',['Int64']) │ +│ ('public',['Bool']) │ +│ ('repo.id',['Int64']) │ +│ ('repo.name',['String']) │ +│ ('repo.url',['String']) │ +│ ('type',['String']) │ +└─arrayJoin(distinctJSONPathsAndTypes(json))──────────────────┘ +``` ## Tips for better usage of the JSON type diff --git a/src/AggregateFunctions/AggregateFunctionDistinctDynamicTypes.cpp b/src/AggregateFunctions/AggregateFunctionDistinctDynamicTypes.cpp new file mode 100644 index 00000000000..6c899d6cda8 --- /dev/null +++ b/src/AggregateFunctions/AggregateFunctionDistinctDynamicTypes.cpp @@ -0,0 +1,155 @@ +#include +#include +#include + +#include +#include +#include +#include + +#include +#include +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; + extern const int ILLEGAL_TYPE_OF_ARGUMENT; +} + +struct AggregateFunctionDistinctDynamicTypesData +{ + std::unordered_set data; + + void add(const String & type) + { + data.insert(type); + } + + void merge(const AggregateFunctionDistinctDynamicTypesData & other) + { + data.insert(other.data.begin(), other.data.end()); + } + + void serialize(WriteBuffer & buf) const + { + writeVarUInt(data.size(), buf); + for (const auto & type : data) + writeStringBinary(type, buf); + } + + void deserialize(ReadBuffer & buf) + { + size_t size; + readVarUInt(size, buf); + data.reserve(size); + String type; + for (size_t i = 0; i != size; ++i) + { + readStringBinary(type, buf); + data.insert(type); + } + } + + void insertResultInto(IColumn & column) + { + /// Insert types in sorted order for better output. + auto & array_column = assert_cast(column); + auto & string_column = assert_cast(array_column.getData()); + std::vector sorted_data(data.begin(), data.end()); + std::sort(sorted_data.begin(), sorted_data.end()); + for (const auto & type : sorted_data) + string_column.insertData(type.data(), type.size()); + array_column.getOffsets().push_back(string_column.size()); + } +}; + +/// Calculates the list of distinct data types in Dynamic column. +class AggregateFunctionDistinctDynamicTypes final : public IAggregateFunctionDataHelper +{ +public: + explicit AggregateFunctionDistinctDynamicTypes(const DataTypes & argument_types_) + : IAggregateFunctionDataHelper(argument_types_, {}, std::make_shared(std::make_shared())) + { + } + + String getName() const override { return "distinctDynamicTypes"; } + + bool allocatesMemoryInArena() const override { return false; } + + void ALWAYS_INLINE add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override + { + const auto & dynamic_column = assert_cast(*columns[0]); + if (dynamic_column.isNullAt(row_num)) + return; + + this->data(place).add(dynamic_column.getTypeNameAt(row_num)); + } + + void ALWAYS_INLINE addBatchSinglePlace( + size_t row_begin, size_t row_end, AggregateDataPtr __restrict place, const IColumn ** columns, Arena * arena, ssize_t if_argument_pos) + const override + { + if (if_argument_pos >= 0 || row_begin != 0 || row_end != columns[0]->size()) + IAggregateFunctionDataHelper::addBatchSinglePlace(row_begin, row_end, place, columns, arena, if_argument_pos); + /// Optimization for case when we add all rows from the column into single place. + /// In this case we can avoid iterating over all rows because we can get all types + /// in Dynamic column in a more efficient way. + else + assert_cast(*columns[0]).getAllTypeNames(this->data(place).data); + } + + void addManyDefaults( + AggregateDataPtr __restrict /*place*/, + const IColumn ** /*columns*/, + size_t /*length*/, + Arena * /*arena*/) const override + { + /// Default value for Dynamic is NULL, so nothing to add. + } + + void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override + { + this->data(place).merge(this->data(rhs)); + } + + void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional /* version */) const override + { + this->data(place).serialize(buf); + } + + void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional /* version */, Arena *) const override + { + this->data(place).deserialize(buf); + } + + void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override + { + this->data(place).insertResultInto(to); + } +}; + +AggregateFunctionPtr createAggregateFunctionDistinctDynamicTypes( + const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings *) +{ + assertNoParameters(name, parameters); + if (argument_types.size() != 1) + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Incorrect number of arguments for aggregate function {}. Expected single argument with type Dynamic, got {} arguments", name, argument_types.size()); + + if (!isDynamic(argument_types[0])) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument for aggregate function {}. Expected type Dynamic", argument_types[0]->getName(), name); + + return std::make_shared(argument_types); +} + +void registerAggregateFunctionDistinctDynamicTypes(AggregateFunctionFactory & factory) +{ + factory.registerFunction("distinctDynamicTypes", createAggregateFunctionDistinctDynamicTypes); +} + +} diff --git a/src/AggregateFunctions/AggregateFunctionDistinctJSONPaths.cpp b/src/AggregateFunctions/AggregateFunctionDistinctJSONPaths.cpp new file mode 100644 index 00000000000..a3ede0750f2 --- /dev/null +++ b/src/AggregateFunctions/AggregateFunctionDistinctJSONPaths.cpp @@ -0,0 +1,331 @@ +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + + +namespace DB +{ + +struct AggregateFunctionDistinctJSONPathsData +{ + static constexpr auto name = "distinctJSONPaths"; + + std::unordered_set data; + + void add(const ColumnObject & column, size_t row_num, const std::unordered_map &) + { + for (const auto & [path, _] : column.getTypedPaths()) + data.insert(path); + for (const auto & [path, dynamic_column] : column.getDynamicPathsPtrs()) + { + /// Add path from dynamic paths only if it's not NULL in this row. + if (!dynamic_column->isNullAt(row_num)) + data.insert(path); + } + + /// Iterate over paths in shared data in this row. + const auto [shared_data_paths, _] = column.getSharedDataPathsAndValues(); + const auto & shared_data_offsets = column.getSharedDataOffsets(); + size_t start = shared_data_offsets[static_cast(row_num) - 1]; + size_t end = shared_data_offsets[static_cast(row_num)]; + for (size_t i = start; i != end; ++i) + data.insert(shared_data_paths->getDataAt(i).toString()); + } + + void addWholeColumn(const ColumnObject & column, const std::unordered_map &) + { + for (const auto & [path, _] : column.getTypedPaths()) + data.insert(path); + for (const auto & [path, dynamic_column] : column.getDynamicPathsPtrs()) + { + /// Add dynamic path only if it has at least one non-null value. + /// getNumberOfDefaultRows for Dynamic column is O(1). + if (dynamic_column->getNumberOfDefaultRows() != dynamic_column->size()) + data.insert(path); + } + + /// Iterate over all paths in shared data. + const auto [shared_data_paths, _] = column.getSharedDataPathsAndValues(); + for (size_t i = 0; i != shared_data_paths->size(); ++i) + data.insert(shared_data_paths->getDataAt(i).toString()); + } + + void merge(const AggregateFunctionDistinctJSONPathsData & other) + { + data.insert(other.data.begin(), other.data.end()); + } + + void serialize(WriteBuffer & buf) const + { + writeVarUInt(data.size(), buf); + for (const auto & path : data) + writeStringBinary(path, buf); + } + + void deserialize(ReadBuffer & buf) + { + size_t size; + readVarUInt(size, buf); + String path; + for (size_t i = 0; i != size; ++i) + { + readStringBinary(path, buf); + data.insert(path); + } + } + + void insertResultInto(IColumn & column) + { + /// Insert paths in sorted order for better output. + auto & array_column = assert_cast(column); + auto & string_column = assert_cast(array_column.getData()); + std::vector sorted_data(data.begin(), data.end()); + std::sort(sorted_data.begin(), sorted_data.end()); + for (const auto & path : sorted_data) + string_column.insertData(path.data(), path.size()); + array_column.getOffsets().push_back(string_column.size()); + } + + static DataTypePtr getResultType() + { + return std::make_shared(std::make_shared()); + } +}; + +struct AggregateFunctionDistinctJSONPathsAndTypesData +{ + static constexpr auto name = "distinctJSONPathsAndTypes"; + + std::unordered_map> data; + + void add(const ColumnObject & column, size_t row_num, const std::unordered_map & typed_paths_type_names) + { + for (const auto & [path, _] : column.getTypedPaths()) + data[path].insert(typed_paths_type_names.at(path)); + for (const auto & [path, dynamic_column] : column.getDynamicPathsPtrs()) + { + if (!dynamic_column->isNullAt(row_num)) + data[path].insert(dynamic_column->getTypeNameAt(row_num)); + } + + /// Iterate over paths om shared data in this row and decode the data types. + const auto [shared_data_paths, shared_data_values] = column.getSharedDataPathsAndValues(); + const auto & shared_data_offsets = column.getSharedDataOffsets(); + size_t start = shared_data_offsets[static_cast(row_num) - 1]; + size_t end = shared_data_offsets[static_cast(row_num)]; + for (size_t i = start; i != end; ++i) + { + auto path = shared_data_paths->getDataAt(i).toString(); + auto value = shared_data_values->getDataAt(i); + ReadBufferFromMemory buf(value.data, value.size); + auto type = decodeDataType(buf); + /// We should not have Nulls here but let's check just in case. + if (!isNothing(type)) + data[path].insert(type->getName()); + } + } + + void addWholeColumn(const ColumnObject & column, const std::unordered_map & typed_paths_type_names) + { + for (const auto & [path, _] : column.getTypedPaths()) + data[path].insert(typed_paths_type_names.at(path)); + for (const auto & [path, dynamic_column] : column.getDynamicPathsPtrs()) + { + /// Add dynamic path only if it has at least one non-null value. + /// getNumberOfDefaultRows for Dynamic column is O(1). + if (dynamic_column->getNumberOfDefaultRows() != dynamic_column->size()) + dynamic_column->getAllTypeNames(data[path]); + } + + /// Iterate over all paths in shared data and decode the data types. + const auto [shared_data_paths, shared_data_values] = column.getSharedDataPathsAndValues(); + for (size_t i = 0; i != shared_data_paths->size(); ++i) + { + auto path = shared_data_paths->getDataAt(i).toString(); + auto value = shared_data_values->getDataAt(i); + ReadBufferFromMemory buf(value.data, value.size); + auto type = decodeDataType(buf); + /// We should not have Nulls here but let's check just in case. + if (!isNothing(type)) + data[path].insert(type->getName()); + } + } + + void merge(const AggregateFunctionDistinctJSONPathsAndTypesData & other) + { + for (const auto & [path, types] : other.data) + data[path].insert(types.begin(), types.end()); + } + + void serialize(WriteBuffer & buf) const + { + writeVarUInt(data.size(), buf); + for (const auto & [path, types] : data) + { + writeStringBinary(path, buf); + writeVarUInt(types.size(), buf); + for (const auto & type : types) + writeStringBinary(type, buf); + } + } + + void deserialize(ReadBuffer & buf) + { + size_t paths_size, types_size; + readVarUInt(paths_size, buf); + data.reserve(paths_size); + String path, type; + for (size_t i = 0; i != paths_size; ++i) + { + readStringBinary(path, buf); + readVarUInt(types_size, buf); + data[path].reserve(types_size); + for (size_t j = 0; j != types_size; ++j) + { + readStringBinary(type, buf); + data[path].insert(type); + } + } + } + + void insertResultInto(IColumn & column) + { + /// Insert sorted paths and types for better output. + auto & array_column = assert_cast(column).getNestedColumn(); + auto & tuple_column = assert_cast(array_column.getData()); + auto & key_column = assert_cast(tuple_column.getColumn(0)); + auto & value_column = assert_cast(tuple_column.getColumn(1)); + auto & value_column_data = assert_cast(value_column.getData()); + std::vector>> sorted_data; + sorted_data.reserve(data.size()); + for (const auto & [path, types] : data) + { + std::vector sorted_types(types.begin(), types.end()); + std::sort(sorted_types.begin(), sorted_types.end()); + sorted_data.emplace_back(path, std::move(sorted_types)); + } + std::sort(sorted_data.begin(), sorted_data.end()); + + for (const auto & [path, types] : sorted_data) + { + key_column.insertData(path.data(), path.size()); + for (const auto & type : types) + value_column_data.insertData(type.data(), type.size()); + value_column.getOffsets().push_back(value_column_data.size()); + } + + array_column.getOffsets().push_back(key_column.size()); + } + + static DataTypePtr getResultType() + { + return std::make_shared(std::make_shared(), std::make_shared(std::make_shared())); + } +}; + +/// Calculates the list of distinct data types in Dynamic column. +template +class AggregateFunctionDistinctJSONPathsAndTypes final : public IAggregateFunctionDataHelper> +{ +public: + explicit AggregateFunctionDistinctJSONPathsAndTypes(const DataTypes & argument_types_) + : IAggregateFunctionDataHelper>( + argument_types_, {}, Data::getResultType()) + { + const auto & typed_paths_types = assert_cast(*argument_types_[0]).getTypedPaths(); + typed_paths_type_names.reserve(typed_paths_types.size()); + for (const auto & [path, type] : typed_paths_types) + typed_paths_type_names[path] = type->getName(); + } + + String getName() const override { return Data::name; } + + bool allocatesMemoryInArena() const override { return false; } + + void ALWAYS_INLINE add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override + { + const auto & object_column = assert_cast(*columns[0]); + this->data(place).add(object_column, row_num, typed_paths_type_names); + } + + void ALWAYS_INLINE addBatchSinglePlace( + size_t row_begin, size_t row_end, AggregateDataPtr __restrict place, const IColumn ** columns, Arena * arena, ssize_t if_argument_pos) + const override + { + if (if_argument_pos >= 0 || row_begin != 0 || row_end != columns[0]->size()) + IAggregateFunctionDataHelper>::addBatchSinglePlace(row_begin, row_end, place, columns, arena, if_argument_pos); + /// Optimization for case when we add all rows from the column into single place. + /// In this case we can avoid iterating over all rows because we can get all paths + /// and types in JSON column in a more efficient way. + else + this->data(place).addWholeColumn(assert_cast(*columns[0]), typed_paths_type_names); + } + + void addManyDefaults( + AggregateDataPtr __restrict /*place*/, + const IColumn ** /*columns*/, + size_t /*length*/, + Arena * /*arena*/) const override + { + /// Default value for JSON is empty object, so nothing to add. + } + + void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override + { + this->data(place).merge(this->data(rhs)); + } + + void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional /* version */) const override + { + this->data(place).serialize(buf); + } + + void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional /* version */, Arena *) const override + { + this->data(place).deserialize(buf); + } + + void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override + { + this->data(place).insertResultInto(to); + } + +private: + std::unordered_map typed_paths_type_names; +}; + +template +AggregateFunctionPtr createAggregateFunctionDistinctJSONPathsAndTypes( + const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings *) +{ + assertNoParameters(name, parameters); + if (argument_types.size() != 1) + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Incorrect number of arguments for aggregate function {}. Expected single argument with type JSON, got {} arguments", name, argument_types.size()); + + if (!isObject(argument_types[0])) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument for aggregate function {}. Expected type JSON", argument_types[0]->getName(), name); + + return std::make_shared>(argument_types); +} + +void registerAggregateFunctionDistinctJSONPathsAndTypes(AggregateFunctionFactory & factory) +{ + factory.registerFunction("distinctJSONPaths", createAggregateFunctionDistinctJSONPathsAndTypes); + factory.registerFunction("distinctJSONPathsAndTypes", createAggregateFunctionDistinctJSONPathsAndTypes); +} + +} diff --git a/src/AggregateFunctions/registerAggregateFunctions.cpp b/src/AggregateFunctions/registerAggregateFunctions.cpp index 4ac25e14ee6..068f1aebd87 100644 --- a/src/AggregateFunctions/registerAggregateFunctions.cpp +++ b/src/AggregateFunctions/registerAggregateFunctions.cpp @@ -89,6 +89,8 @@ void registerAggregateFunctionAnalysisOfVariance(AggregateFunctionFactory &); void registerAggregateFunctionFlameGraph(AggregateFunctionFactory &); void registerAggregateFunctionKolmogorovSmirnovTest(AggregateFunctionFactory & factory); void registerAggregateFunctionLargestTriangleThreeBuckets(AggregateFunctionFactory & factory); +void registerAggregateFunctionDistinctDynamicTypes(AggregateFunctionFactory & factory); +void registerAggregateFunctionDistinctJSONPathsAndTypes(AggregateFunctionFactory & factory); class AggregateFunctionCombinatorFactory; void registerAggregateFunctionCombinatorIf(AggregateFunctionCombinatorFactory &); @@ -191,6 +193,8 @@ void registerAggregateFunctions() registerAggregateFunctionFlameGraph(factory); registerAggregateFunctionKolmogorovSmirnovTest(factory); registerAggregateFunctionLargestTriangleThreeBuckets(factory); + registerAggregateFunctionDistinctDynamicTypes(factory); + registerAggregateFunctionDistinctJSONPathsAndTypes(factory); registerWindowFunctions(factory); } diff --git a/src/Columns/ColumnDynamic.cpp b/src/Columns/ColumnDynamic.cpp index 1f37add9d2d..b5378e983c6 100644 --- a/src/Columns/ColumnDynamic.cpp +++ b/src/Columns/ColumnDynamic.cpp @@ -980,6 +980,41 @@ ColumnPtr ColumnDynamic::compress() const }); } +String ColumnDynamic::getTypeNameAt(size_t row_num) const +{ + const auto & variant_col = getVariantColumn(); + size_t discr = variant_col.globalDiscriminatorAt(row_num); + if (discr == ColumnVariant::NULL_DISCRIMINATOR) + return ""; + + if (discr == getSharedVariantDiscriminator()) + { + const auto value = getSharedVariant().getDataAt(variant_col.offsetAt(row_num)); + ReadBufferFromMemory buf(value.data, value.size); + return decodeDataType(buf)->getName(); + } + + return variant_info.variant_names[discr]; +} + +void ColumnDynamic::getAllTypeNames(std::unordered_set & names) const +{ + auto shared_variant_discr = getSharedVariantDiscriminator(); + for (size_t i = 0; i != variant_info.variant_names.size(); ++i) + { + if (i != shared_variant_discr && !variant_column_ptr->getVariantByGlobalDiscriminator(i).empty()) + names.insert(variant_info.variant_names[i]); + } + + const auto & shared_variant = getSharedVariant(); + for (size_t i = 0; i != shared_variant.size(); ++i) + { + const auto value = shared_variant.getDataAt(i); + ReadBufferFromMemory buf(value.data, value.size); + names.insert(decodeDataType(buf)->getName()); + } +} + void ColumnDynamic::prepareForSquashing(const Columns & source_columns) { if (source_columns.empty()) diff --git a/src/Columns/ColumnDynamic.h b/src/Columns/ColumnDynamic.h index 2ae862de3af..c06c31bb8c9 100644 --- a/src/Columns/ColumnDynamic.h +++ b/src/Columns/ColumnDynamic.h @@ -430,6 +430,9 @@ public: const SerializationPtr & getVariantSerialization(const DataTypePtr & variant_type) const { return getVariantSerialization(variant_type, variant_type->getName()); } + String getTypeNameAt(size_t row_num) const; + void getAllTypeNames(std::unordered_set & names) const; + private: void createVariantInfo(const DataTypePtr & variant_type); diff --git a/tests/queries/0_stateless/03227_distinct_dynamic_types_json_paths.reference b/tests/queries/0_stateless/03227_distinct_dynamic_types_json_paths.reference new file mode 100644 index 00000000000..d6538a1c4eb --- /dev/null +++ b/tests/queries/0_stateless/03227_distinct_dynamic_types_json_paths.reference @@ -0,0 +1,121 @@ +a0 +a1 +a10 +a11 +a12 +a2 +a3 +a4 +a5 +a6 +a7 +a8 +a9 +('a0',['Array(Nullable(Int64))','Date','Int64','String']) +('a1',['String']) +('a10',['Array(Nullable(Int64))','Date','Int64','String']) +('a11',['Array(Nullable(Int64))','Date','Int64','String']) +('a12',['Array(Nullable(Int64))','Date','Int64','String']) +('a2',['Array(Nullable(Int64))','Date','Int64','String']) +('a3',['Array(Nullable(Int64))','Date','Int64','String']) +('a4',['Array(Nullable(Int64))','Date','Int64','String']) +('a5',['Array(Nullable(Int64))','Date','Int64','String']) +('a6',['Array(Nullable(Int64))','Date','Int64','String']) +('a7',['Array(Nullable(Int64))','Date','Int64','String']) +('a8',['Array(Nullable(Int64))','Date','Int64','String']) +('a9',['Array(Nullable(Int64))','Date','Int64','String']) +Array(Nullable(Int64)) +Date +Int64 +String +Array(Nullable(Int64)) +Date +Int64 +String +Filter +a1 +a2 +('a1',['String']) +('a2',['String']) +String +If +a1 +a2 +('a1',['String']) +('a2',['String']) +String +Group by +Array(Nullable(Int64)) ['a1','a2'] +Date ['a1','a2'] +Int64 ['a1','a2'] +None ['a0','a1','a10','a11','a12','a3','a4','a5','a6','a7','a8','a9'] +String ['a1','a2'] +Array(Nullable(Int64)) {'a1':['String'],'a2':['Array(Nullable(Int64))']} +Date {'a1':['String'],'a2':['Date']} +Int64 {'a1':['String'],'a2':['Int64']} +None {'a0':['Array(Nullable(Int64))','Date','Int64','String'],'a1':['String'],'a10':['Array(Nullable(Int64))','Date','Int64','String'],'a11':['Array(Nullable(Int64))','Date','Int64','String'],'a12':['Array(Nullable(Int64))','Date','Int64','String'],'a3':['Array(Nullable(Int64))','Date','Int64','String'],'a4':['Array(Nullable(Int64))','Date','Int64','String'],'a5':['Array(Nullable(Int64))','Date','Int64','String'],'a6':['Array(Nullable(Int64))','Date','Int64','String'],'a7':['Array(Nullable(Int64))','Date','Int64','String'],'a8':['Array(Nullable(Int64))','Date','Int64','String'],'a9':['Array(Nullable(Int64))','Date','Int64','String']} +String {'a1':['String'],'a2':['String']} +Array(Nullable(Int64)) ['Array(Nullable(Int64))'] +Date ['Date'] +Int64 ['Int64'] +None [] +String ['String'] +Remote +a0 +a1 +a10 +a11 +a12 +a2 +a3 +a4 +a5 +a6 +a7 +a8 +a9 +('a0',['Array(Nullable(Int64))','Date','Int64','String']) +('a1',['String']) +('a10',['Array(Nullable(Int64))','Date','Int64','String']) +('a11',['Array(Nullable(Int64))','Date','Int64','String']) +('a12',['Array(Nullable(Int64))','Date','Int64','String']) +('a2',['Array(Nullable(Int64))','Date','Int64','String']) +('a3',['Array(Nullable(Int64))','Date','Int64','String']) +('a4',['Array(Nullable(Int64))','Date','Int64','String']) +('a5',['Array(Nullable(Int64))','Date','Int64','String']) +('a6',['Array(Nullable(Int64))','Date','Int64','String']) +('a7',['Array(Nullable(Int64))','Date','Int64','String']) +('a8',['Array(Nullable(Int64))','Date','Int64','String']) +('a9',['Array(Nullable(Int64))','Date','Int64','String']) +Array(Nullable(Int64)) +Date +Int64 +String +Remote filter +a1 +a2 +('a1',['String']) +('a2',['String']) +String +Remote if +a1 +a2 +('a1',['String']) +('a2',['String']) +String +Remote group by +Array(Nullable(Int64)) ['a1','a2'] +Date ['a1','a2'] +Int64 ['a1','a2'] +None ['a0','a1','a10','a11','a12','a3','a4','a5','a6','a7','a8','a9'] +String ['a1','a2'] +Array(Nullable(Int64)) {'a1':['String'],'a2':['Array(Nullable(Int64))']} +Date {'a1':['String'],'a2':['Date']} +Int64 {'a1':['String'],'a2':['Int64']} +None {'a0':['Array(Nullable(Int64))','Date','Int64','String'],'a1':['String'],'a10':['Array(Nullable(Int64))','Date','Int64','String'],'a11':['Array(Nullable(Int64))','Date','Int64','String'],'a12':['Array(Nullable(Int64))','Date','Int64','String'],'a3':['Array(Nullable(Int64))','Date','Int64','String'],'a4':['Array(Nullable(Int64))','Date','Int64','String'],'a5':['Array(Nullable(Int64))','Date','Int64','String'],'a6':['Array(Nullable(Int64))','Date','Int64','String'],'a7':['Array(Nullable(Int64))','Date','Int64','String'],'a8':['Array(Nullable(Int64))','Date','Int64','String'],'a9':['Array(Nullable(Int64))','Date','Int64','String']} +String {'a1':['String'],'a2':['String']} +Array(Nullable(Int64)) ['Array(Nullable(Int64))'] +Date ['Date'] +Int64 ['Int64'] +None [] +String ['String'] diff --git a/tests/queries/0_stateless/03227_distinct_dynamic_types_json_paths.sql b/tests/queries/0_stateless/03227_distinct_dynamic_types_json_paths.sql new file mode 100644 index 00000000000..4f33cc10d46 --- /dev/null +++ b/tests/queries/0_stateless/03227_distinct_dynamic_types_json_paths.sql @@ -0,0 +1,50 @@ +set allow_experimental_dynamic_type = 1; +set allow_experimental_json_type = 1; +set allow_experimental_variant_type = 1; +set use_variant_as_common_type = 1; + +drop table if exists test_json_dynamic_aggregate_functions; +create table test_json_dynamic_aggregate_functions (json JSON(a1 String, max_dynamic_paths=2, max_dynamic_types=2)) engine=Memory; +insert into test_json_dynamic_aggregate_functions select toJSONString(map('a' || number % 13, multiIf(number % 5 == 0, NULL, number % 5 == 1, number::UInt32, number % 5 == 2, 'str_' || number, number % 5 == 3, range(number % 5), toDate(number)))) from numbers(200000); +select arrayJoin(distinctJSONPaths(json)) from test_json_dynamic_aggregate_functions; +select arrayJoin(distinctJSONPathsAndTypes(json)) from test_json_dynamic_aggregate_functions; +select arrayJoin(distinctDynamicTypes(json.a2)) from test_json_dynamic_aggregate_functions; +select arrayJoin(distinctDynamicTypes(json.a3)) from test_json_dynamic_aggregate_functions; +select arrayJoin(distinctDynamicTypes(json.a42)) from test_json_dynamic_aggregate_functions; + +select 'Filter'; +select arrayJoin(distinctJSONPaths(json)) from test_json_dynamic_aggregate_functions where dynamicType(json.a2) == 'String'; +select arrayJoin(distinctJSONPathsAndTypes(json)) from test_json_dynamic_aggregate_functions where dynamicType(json.a2) == 'String'; +select arrayJoin(distinctDynamicTypes(json.a2)) from test_json_dynamic_aggregate_functions where dynamicType(json.a2) == 'String'; + +select 'If'; +select arrayJoin(distinctJSONPathsIf(json, dynamicType(json.a2) == 'String')) from test_json_dynamic_aggregate_functions; +select arrayJoin(distinctJSONPathsAndTypesIf(json, dynamicType(json.a2) == 'String')) from test_json_dynamic_aggregate_functions; +select arrayJoin(distinctDynamicTypesIf(json.a2, dynamicType(json.a2) == 'String')) from test_json_dynamic_aggregate_functions; + +select 'Group by'; +select dynamicType(json.a2), distinctJSONPaths(json) from test_json_dynamic_aggregate_functions group by dynamicType(json.a2) order by dynamicType(json.a2); +select dynamicType(json.a2), distinctJSONPathsAndTypes(json) from test_json_dynamic_aggregate_functions group by dynamicType(json.a2) order by dynamicType(json.a2); +select dynamicType(json.a2), distinctDynamicTypes(json.a2) from test_json_dynamic_aggregate_functions group by dynamicType(json.a2) order by dynamicType(json.a2); + +select 'Remote'; +select arrayJoin(distinctJSONPaths(json)) from remote('127.0.0.{1,2,3}', currentDatabase(), test_json_dynamic_aggregate_functions); +select arrayJoin(distinctJSONPathsAndTypes(json)) from remote('127.0.0.{1,2,3}', currentDatabase(), test_json_dynamic_aggregate_functions); +select arrayJoin(distinctDynamicTypes(json.a2)) from remote('127.0.0.{1,2,3}', currentDatabase(), test_json_dynamic_aggregate_functions); + +select 'Remote filter'; +select arrayJoin(distinctJSONPaths(json)) from remote('127.0.0.{1,2,3}', currentDatabase(), test_json_dynamic_aggregate_functions) where dynamicType(json.a2) == 'String'; +select arrayJoin(distinctJSONPathsAndTypes(json)) from remote('127.0.0.{1,2,3}', currentDatabase(), test_json_dynamic_aggregate_functions) where dynamicType(json.a2) == 'String'; +select arrayJoin(distinctDynamicTypes(json.a2)) from remote('127.0.0.{1,2,3}', currentDatabase(), test_json_dynamic_aggregate_functions) where dynamicType(json.a2) == 'String'; + +select 'Remote if'; +select arrayJoin(distinctJSONPathsIf(json, dynamicType(json.a2) == 'String')) from remote('127.0.0.{1,2,3}', currentDatabase(), test_json_dynamic_aggregate_functions); +select arrayJoin(distinctJSONPathsAndTypesIf(json, dynamicType(json.a2) == 'String')) from remote('127.0.0.{1,2,3}', currentDatabase(), test_json_dynamic_aggregate_functions); +select arrayJoin(distinctDynamicTypesIf(json.a2, dynamicType(json.a2) == 'String')) from remote('127.0.0.{1,2,3}', currentDatabase(), test_json_dynamic_aggregate_functions); + +select 'Remote group by'; +select dynamicType(json.a2), distinctJSONPaths(json) from remote('127.0.0.{1,2,3}', currentDatabase(), test_json_dynamic_aggregate_functions) group by dynamicType(json.a2) order by dynamicType(json.a2); +select dynamicType(json.a2), distinctJSONPathsAndTypes(json) from remote('127.0.0.{1,2,3}', currentDatabase(), test_json_dynamic_aggregate_functions) group by dynamicType(json.a2) order by dynamicType(json.a2); +select dynamicType(json.a2), distinctDynamicTypes(json.a2) from remote('127.0.0.{1,2,3}', currentDatabase(), test_json_dynamic_aggregate_functions) group by dynamicType(json.a2) order by dynamicType(json.a2); + +drop table test_json_dynamic_aggregate_functions; From 85a813bbedeec62fdf4704041a62092bbf6a9789 Mon Sep 17 00:00:00 2001 From: avogar Date: Fri, 16 Aug 2024 17:55:57 +0000 Subject: [PATCH 345/844] Better example --- docs/en/sql-reference/data-types/newjson.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/en/sql-reference/data-types/newjson.md b/docs/en/sql-reference/data-types/newjson.md index 16cc324a168..99156b11a0b 100644 --- a/docs/en/sql-reference/data-types/newjson.md +++ b/docs/en/sql-reference/data-types/newjson.md @@ -571,7 +571,7 @@ SELECT arrayJoin(distinctJSONPaths(json)) FROM s3('s3://clickhouse-public-datase ``` ```sql -SELECT arrayJoin(distinctJSONPathsAndTypes(json)) FROM s3('s3://clickhouse-public-datasets/gharchive/original/2020-01-01-*.json.gz', JSONAsObject) +SELECT arrayJoin(distinctJSONPathsAndTypes(json)) FROM s3('s3://clickhouse-public-datasets/gharchive/original/2020-01-01-*.json.gz', JSONAsObject) SETTINGS date_time_input_format='best_effort' ``` @@ -583,7 +583,7 @@ SELECT arrayJoin(distinctJSONPathsAndTypes(json)) FROM s3('s3://clickhouse-publi │ ('actor.id',['Int64']) │ │ ('actor.login',['String']) │ │ ('actor.url',['String']) │ -│ ('created_at',['String']) │ +│ ('created_at',['DateTime']) │ │ ('id',['String']) │ │ ('org.avatar_url',['String']) │ │ ('org.gravatar_id',['String']) │ @@ -598,7 +598,7 @@ SELECT arrayJoin(distinctJSONPathsAndTypes(json)) FROM s3('s3://clickhouse-publi │ ('payload.comment.author_association',['String']) │ │ ('payload.comment.body',['String']) │ │ ('payload.comment.commit_id',['String']) │ -│ ('payload.comment.created_at',['String']) │ +│ ('payload.comment.created_at',['DateTime']) │ │ ('payload.comment.diff_hunk',['String']) │ │ ('payload.comment.html_url',['String']) │ │ ('payload.comment.id',['Int64']) │ @@ -614,7 +614,7 @@ SELECT arrayJoin(distinctJSONPathsAndTypes(json)) FROM s3('s3://clickhouse-publi ... │ ('payload.release.node_id',['String']) │ │ ('payload.release.prerelease',['Bool']) │ -│ ('payload.release.published_at',['String']) │ +│ ('payload.release.published_at',['DateTime']) │ │ ('payload.release.tag_name',['String']) │ │ ('payload.release.tarball_url',['String']) │ │ ('payload.release.target_commitish',['String']) │ From f21e982f03488634cd09ec7ab570fec07d78a0e1 Mon Sep 17 00:00:00 2001 From: avogar Date: Fri, 16 Aug 2024 18:01:35 +0000 Subject: [PATCH 346/844] Fix style, update tests --- .../AggregateFunctionDistinctJSONPaths.cpp | 18 ++++++++++++------ ...03227_distinct_dynamic_types_json_paths.sql | 10 ++++++++++ .../aspell-ignore/en/aspell-dict.txt | 5 +++++ 3 files changed, 27 insertions(+), 6 deletions(-) diff --git a/src/AggregateFunctions/AggregateFunctionDistinctJSONPaths.cpp b/src/AggregateFunctions/AggregateFunctionDistinctJSONPaths.cpp index a3ede0750f2..58ce7e27ab0 100644 --- a/src/AggregateFunctions/AggregateFunctionDistinctJSONPaths.cpp +++ b/src/AggregateFunctions/AggregateFunctionDistinctJSONPaths.cpp @@ -19,6 +19,12 @@ namespace DB { +namespace ErrorCodes +{ + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; + extern const int ILLEGAL_TYPE_OF_ARGUMENT; +} + struct AggregateFunctionDistinctJSONPathsData { static constexpr auto name = "distinctJSONPaths"; @@ -44,7 +50,7 @@ struct AggregateFunctionDistinctJSONPathsData for (size_t i = start; i != end; ++i) data.insert(shared_data_paths->getDataAt(i).toString()); } - + void addWholeColumn(const ColumnObject & column, const std::unordered_map &) { for (const auto & [path, _] : column.getTypedPaths()) @@ -98,7 +104,7 @@ struct AggregateFunctionDistinctJSONPathsData string_column.insertData(path.data(), path.size()); array_column.getOffsets().push_back(string_column.size()); } - + static DataTypePtr getResultType() { return std::make_shared(std::make_shared()); @@ -110,7 +116,7 @@ struct AggregateFunctionDistinctJSONPathsAndTypesData static constexpr auto name = "distinctJSONPathsAndTypes"; std::unordered_map> data; - + void add(const ColumnObject & column, size_t row_num, const std::unordered_map & typed_paths_type_names) { for (const auto & [path, _] : column.getTypedPaths()) @@ -137,7 +143,7 @@ struct AggregateFunctionDistinctJSONPathsAndTypesData data[path].insert(type->getName()); } } - + void addWholeColumn(const ColumnObject & column, const std::unordered_map & typed_paths_type_names) { for (const auto & [path, _] : column.getTypedPaths()) @@ -229,7 +235,7 @@ struct AggregateFunctionDistinctJSONPathsAndTypesData array_column.getOffsets().push_back(key_column.size()); } - + static DataTypePtr getResultType() { return std::make_shared(std::make_shared(), std::make_shared(std::make_shared())); @@ -302,7 +308,7 @@ public: { this->data(place).insertResultInto(to); } - + private: std::unordered_map typed_paths_type_names; }; diff --git a/tests/queries/0_stateless/03227_distinct_dynamic_types_json_paths.sql b/tests/queries/0_stateless/03227_distinct_dynamic_types_json_paths.sql index 4f33cc10d46..f7707b536e6 100644 --- a/tests/queries/0_stateless/03227_distinct_dynamic_types_json_paths.sql +++ b/tests/queries/0_stateless/03227_distinct_dynamic_types_json_paths.sql @@ -47,4 +47,14 @@ select dynamicType(json.a2), distinctJSONPaths(json) from remote('127.0.0.{1,2,3 select dynamicType(json.a2), distinctJSONPathsAndTypes(json) from remote('127.0.0.{1,2,3}', currentDatabase(), test_json_dynamic_aggregate_functions) group by dynamicType(json.a2) order by dynamicType(json.a2); select dynamicType(json.a2), distinctDynamicTypes(json.a2) from remote('127.0.0.{1,2,3}', currentDatabase(), test_json_dynamic_aggregate_functions) group by dynamicType(json.a2) order by dynamicType(json.a2); +select distinctJSONPaths() from test_json_dynamic_aggregate_functions; -- {serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH} +select distinctJSONPaths(json, 42) from test_json_dynamic_aggregate_functions; -- {serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH} +select distinctJSONPaths(42) from test_json_dynamic_aggregate_functions; -- {serverError ILLEGAL_TYPE_OF_ARGUMENT} +select distinctJSONPathsAndTypes() from test_json_dynamic_aggregate_functions; -- {serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH} +select distinctJSONPathsAndTypes(json, 42) from test_json_dynamic_aggregate_functions; -- {serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH} +select distinctJSONPathsAndTypes(42) from test_json_dynamic_aggregate_functions; -- {serverError ILLEGAL_TYPE_OF_ARGUMENT} +select distinctDynamicTypes() from test_json_dynamic_aggregate_functions; -- {serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH} +select distinctDynamicTypes(json.a2, 42) from test_json_dynamic_aggregate_functions; -- {serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH} +select distinctDynamicTypes(42) from test_json_dynamic_aggregate_functions; -- {serverError ILLEGAL_TYPE_OF_ARGUMENT} + drop table test_json_dynamic_aggregate_functions; diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt index fd836d93143..a5dd1ac5f9d 100644 --- a/utils/check-style/aspell-ignore/en/aspell-dict.txt +++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt @@ -1551,6 +1551,11 @@ disjunctions displayName displaySecretsInShowAndSelect distro +distinctdynamictypes +distinctDynamicTypes +distinctjsonpaths +distinctJSONPaths +distinctJSONPathsAndTypes divideDecimal dmesg doesnt From 76960eff8005818db7f744115798888e72e4b2e5 Mon Sep 17 00:00:00 2001 From: Michael Kolupaev Date: Fri, 16 Aug 2024 18:38:03 +0000 Subject: [PATCH 347/844] This should work --- contrib/sysroot | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/sysroot b/contrib/sysroot index 73752937366..b0fce6066fc 160000 --- a/contrib/sysroot +++ b/contrib/sysroot @@ -1 +1 @@ -Subproject commit 737529373665bc067971ba098a12d6928580a0ae +Subproject commit b0fce6066fc2678fa17ee7a98f794da9da8492ff From b29d5242be83294af8429d30fd560a83de3c08b4 Mon Sep 17 00:00:00 2001 From: avogar Date: Fri, 16 Aug 2024 22:10:11 +0000 Subject: [PATCH 348/844] Fix tidy build --- .../AggregateFunctionDistinctDynamicTypes.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/AggregateFunctions/AggregateFunctionDistinctDynamicTypes.cpp b/src/AggregateFunctions/AggregateFunctionDistinctDynamicTypes.cpp index 6c899d6cda8..649d64a3904 100644 --- a/src/AggregateFunctions/AggregateFunctionDistinctDynamicTypes.cpp +++ b/src/AggregateFunctions/AggregateFunctionDistinctDynamicTypes.cpp @@ -87,7 +87,7 @@ public: if (dynamic_column.isNullAt(row_num)) return; - this->data(place).add(dynamic_column.getTypeNameAt(row_num)); + data(place).add(dynamic_column.getTypeNameAt(row_num)); } void ALWAYS_INLINE addBatchSinglePlace( @@ -100,7 +100,7 @@ public: /// In this case we can avoid iterating over all rows because we can get all types /// in Dynamic column in a more efficient way. else - assert_cast(*columns[0]).getAllTypeNames(this->data(place).data); + assert_cast(*columns[0]).getAllTypeNames(data(place).data); } void addManyDefaults( @@ -114,22 +114,22 @@ public: void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override { - this->data(place).merge(this->data(rhs)); + data(place).merge(data(rhs)); } void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional /* version */) const override { - this->data(place).serialize(buf); + data(place).serialize(buf); } void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional /* version */, Arena *) const override { - this->data(place).deserialize(buf); + data(place).deserialize(buf); } void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override { - this->data(place).insertResultInto(to); + data(place).insertResultInto(to); } }; From e7fc89ba2606e130082a364de5f3b7e6087ec262 Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Thu, 15 Aug 2024 18:14:07 +0100 Subject: [PATCH 349/844] add bw-compatibility test --- .../configs/clusters.xml | 20 ++++++ .../test_parallel_replicas_protocol.py | 64 +++++++++++++++++++ 2 files changed, 84 insertions(+) create mode 100644 tests/integration/test_backward_compatibility/configs/clusters.xml create mode 100644 tests/integration/test_backward_compatibility/test_parallel_replicas_protocol.py diff --git a/tests/integration/test_backward_compatibility/configs/clusters.xml b/tests/integration/test_backward_compatibility/configs/clusters.xml new file mode 100644 index 00000000000..ac773152df9 --- /dev/null +++ b/tests/integration/test_backward_compatibility/configs/clusters.xml @@ -0,0 +1,20 @@ + + + + + + node0 + 9000 + + + node1 + 9000 + + + node2 + 9000 + + + + + diff --git a/tests/integration/test_backward_compatibility/test_parallel_replicas_protocol.py b/tests/integration/test_backward_compatibility/test_parallel_replicas_protocol.py new file mode 100644 index 00000000000..6f97df95876 --- /dev/null +++ b/tests/integration/test_backward_compatibility/test_parallel_replicas_protocol.py @@ -0,0 +1,64 @@ +import pytest + +from helpers.cluster import ClickHouseCluster + + +cluster = ClickHouseCluster(__file__) +cluster_name = "parallel_replicas" +nodes = [ + cluster.add_instance( + f"node{num}", + main_configs=["configs/clusters.xml"], + with_zookeeper=False, + image="clickhouse/clickhouse-server", + tag="23.11", + stay_alive=True, + use_old_analyzer=True, + with_installed_binary=True, + ) + for num in range(2) +] + [ + cluster.add_instance( + "node2", + main_configs=["configs/clusters.xml"], + with_zookeeper=False, + use_old_analyzer=True, + ) +] + + +@pytest.fixture(scope="module") +def start_cluster(): + try: + cluster.start() + yield cluster + + finally: + cluster.shutdown() + + +def test_backward_compatability(start_cluster): + for node in nodes: + node.query("create table t (a UInt64) engine = MergeTree order by tuple()") + node.query("insert into t select number % 100000 from numbers_mt(1000000)") + + # all we want is the query to run without errors + for node in nodes: + assert ( + node.query( + """ + select sum(a) + from t + """, + settings={ + "cluster_for_parallel_replicas": "parallel_replicas", + "max_parallel_replicas": 3, + "allow_experimental_parallel_reading_from_replicas": 1, + "parallel_replicas_for_non_replicated_merge_tree": 1, + }, + ) + == "49999500000\n" + ) + + for node in nodes: + node.query("drop table t") From 41f6e06cbe07f6bd1ce3bdb48154f1f2745b3cc2 Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Sat, 17 Aug 2024 13:08:30 +0200 Subject: [PATCH 350/844] Update 03227_distinct_dynamic_types_json_paths.sql --- .../0_stateless/03227_distinct_dynamic_types_json_paths.sql | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/queries/0_stateless/03227_distinct_dynamic_types_json_paths.sql b/tests/queries/0_stateless/03227_distinct_dynamic_types_json_paths.sql index f7707b536e6..c9c86277bce 100644 --- a/tests/queries/0_stateless/03227_distinct_dynamic_types_json_paths.sql +++ b/tests/queries/0_stateless/03227_distinct_dynamic_types_json_paths.sql @@ -1,3 +1,5 @@ +-- Tags: long + set allow_experimental_dynamic_type = 1; set allow_experimental_json_type = 1; set allow_experimental_variant_type = 1; From 628a4300ba5d492cf1ae0b4b077459cd2a37f1be Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Sun, 18 Aug 2024 16:53:00 +0100 Subject: [PATCH 351/844] fix --- src/Client/Connection.cpp | 2 +- src/Core/ProtocolDefines.h | 4 +- src/Server/TCPHandler.cpp | 2 +- src/Storages/MergeTree/RequestResponse.cpp | 63 +++++++++++-------- src/Storages/MergeTree/RequestResponse.h | 4 +- .../test_parallel_replicas_protocol.py | 2 +- 6 files changed, 43 insertions(+), 34 deletions(-) diff --git a/src/Client/Connection.cpp b/src/Client/Connection.cpp index 07f4bf19f05..4f19fece5ef 100644 --- a/src/Client/Connection.cpp +++ b/src/Client/Connection.cpp @@ -1330,7 +1330,7 @@ ParallelReadRequest Connection::receiveParallelReadRequest() const InitialAllRangesAnnouncement Connection::receiveInitialParallelReadAnnouncement() const { - return InitialAllRangesAnnouncement::deserialize(*in); + return InitialAllRangesAnnouncement::deserialize(*in, server_revision); } diff --git a/src/Core/ProtocolDefines.h b/src/Core/ProtocolDefines.h index dd37daadaff..5765704e8ac 100644 --- a/src/Core/ProtocolDefines.h +++ b/src/Core/ProtocolDefines.h @@ -33,8 +33,6 @@ static constexpr auto DBMS_MIN_REVISION_WITH_AGGREGATE_FUNCTIONS_VERSIONING = 54 static constexpr auto DBMS_CLUSTER_PROCESSING_PROTOCOL_VERSION = 1; -static constexpr auto DBMS_MIN_SUPPORTED_PARALLEL_REPLICAS_PROTOCOL_VERSION = 3; -static constexpr auto DBMS_PARALLEL_REPLICAS_PROTOCOL_VERSION = 4; static constexpr auto DBMS_MIN_REVISION_WITH_PARALLEL_REPLICAS = 54453; static constexpr auto DBMS_MERGE_TREE_PART_INFO_VERSION = 1; @@ -84,6 +82,8 @@ static constexpr auto DBMS_MIN_REVISION_WITH_SYSTEM_KEYWORDS_TABLE = 54468; static constexpr auto DBMS_MIN_REVISION_WITH_ROWS_BEFORE_AGGREGATION = 54469; +static constexpr auto DBMS_MIN_REVISION_WITH_ADAPTIVE_MARK_SEGMENT_FOR_PARALLEL_REPLICAS = 54470; + /// Version of ClickHouse TCP protocol. /// /// Should be incremented manually on protocol changes. diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp index 679f72b85ff..4a017b2ae40 100644 --- a/src/Server/TCPHandler.cpp +++ b/src/Server/TCPHandler.cpp @@ -1221,7 +1221,7 @@ void TCPHandler::sendReadTaskRequestAssumeLocked() void TCPHandler::sendMergeTreeAllRangesAnnouncementAssumeLocked(InitialAllRangesAnnouncement announcement) { writeVarUInt(Protocol::Server::MergeTreeAllRangesAnnouncement, *out); - announcement.serialize(*out); + announcement.serialize(*out, client_tcp_protocol_version); out->next(); } diff --git a/src/Storages/MergeTree/RequestResponse.cpp b/src/Storages/MergeTree/RequestResponse.cpp index 1b0ad3cca48..5e7e392384f 100644 --- a/src/Storages/MergeTree/RequestResponse.cpp +++ b/src/Storages/MergeTree/RequestResponse.cpp @@ -2,10 +2,10 @@ #include #include -#include +#include #include #include -#include +#include #include @@ -20,20 +20,21 @@ namespace ErrorCodes namespace { - CoordinationMode validateAndGet(uint8_t candidate) - { - if (candidate <= static_cast(CoordinationMode::MAX)) - return static_cast(candidate); +constexpr UInt64 DEPRECATED_FIELD_PARALLEL_REPLICAS_PROTOCOL_VERSION = 3; - throw Exception(ErrorCodes::UNKNOWN_ELEMENT_OF_ENUM, "Unknown reading mode: {}", candidate); - } +CoordinationMode validateAndGet(uint8_t candidate) +{ + if (candidate <= static_cast(CoordinationMode::MAX)) + return static_cast(candidate); + + throw Exception(ErrorCodes::UNKNOWN_ELEMENT_OF_ENUM, "Unknown reading mode: {}", candidate); +} } void ParallelReadRequest::serialize(WriteBuffer & out) const { - UInt64 version = DBMS_PARALLEL_REPLICAS_PROTOCOL_VERSION; /// Must be the first - writeIntBinary(version, out); + writeIntBinary(DEPRECATED_FIELD_PARALLEL_REPLICAS_PROTOCOL_VERSION, out); writeIntBinary(mode, out); writeIntBinary(replica_num, out); @@ -55,10 +56,13 @@ ParallelReadRequest ParallelReadRequest::deserialize(ReadBuffer & in) { UInt64 version; readIntBinary(version, in); - if (version != DBMS_PARALLEL_REPLICAS_PROTOCOL_VERSION) - throw Exception(ErrorCodes::UNKNOWN_PROTOCOL, "Protocol versions for parallel reading "\ + if (version != DEPRECATED_FIELD_PARALLEL_REPLICAS_PROTOCOL_VERSION) + throw Exception( + ErrorCodes::UNKNOWN_PROTOCOL, + "Protocol versions for parallel reading " "from replicas differ. Got: {}, supported version: {}", - version, DBMS_PARALLEL_REPLICAS_PROTOCOL_VERSION); + version, + DEPRECATED_FIELD_PARALLEL_REPLICAS_PROTOCOL_VERSION); CoordinationMode mode; size_t replica_num; @@ -90,9 +94,8 @@ void ParallelReadRequest::merge(ParallelReadRequest & other) void ParallelReadResponse::serialize(WriteBuffer & out) const { - UInt64 version = DBMS_PARALLEL_REPLICAS_PROTOCOL_VERSION; /// Must be the first - writeIntBinary(version, out); + writeIntBinary(DEPRECATED_FIELD_PARALLEL_REPLICAS_PROTOCOL_VERSION, out); writeBoolText(finish, out); description.serialize(out); @@ -107,26 +110,29 @@ void ParallelReadResponse::deserialize(ReadBuffer & in) { UInt64 version; readIntBinary(version, in); - if (version != DBMS_PARALLEL_REPLICAS_PROTOCOL_VERSION) - throw Exception(ErrorCodes::UNKNOWN_PROTOCOL, "Protocol versions for parallel reading " \ + if (version != DEPRECATED_FIELD_PARALLEL_REPLICAS_PROTOCOL_VERSION) + throw Exception( + ErrorCodes::UNKNOWN_PROTOCOL, + "Protocol versions for parallel reading " "from replicas differ. Got: {}, supported version: {}", - version, DBMS_PARALLEL_REPLICAS_PROTOCOL_VERSION); + version, + DEPRECATED_FIELD_PARALLEL_REPLICAS_PROTOCOL_VERSION); readBoolText(finish, in); description.deserialize(in); } -void InitialAllRangesAnnouncement::serialize(WriteBuffer & out) const +void InitialAllRangesAnnouncement::serialize(WriteBuffer & out, UInt64 client_protocol_revision) const { - UInt64 version = DBMS_PARALLEL_REPLICAS_PROTOCOL_VERSION; /// Must be the first - writeIntBinary(version, out); + writeIntBinary(DEPRECATED_FIELD_PARALLEL_REPLICAS_PROTOCOL_VERSION, out); writeIntBinary(mode, out); description.serialize(out); writeIntBinary(replica_num, out); - writeIntBinary(mark_segment_size, out); + if (client_protocol_revision >= DBMS_MIN_REVISION_WITH_ADAPTIVE_MARK_SEGMENT_FOR_PARALLEL_REPLICAS) + writeIntBinary(mark_segment_size, out); } @@ -138,14 +144,17 @@ String InitialAllRangesAnnouncement::describe() return result; } -InitialAllRangesAnnouncement InitialAllRangesAnnouncement::deserialize(ReadBuffer & in) +InitialAllRangesAnnouncement InitialAllRangesAnnouncement::deserialize(ReadBuffer & in, UInt64 client_protocol_revision) { UInt64 version; readIntBinary(version, in); - if (version != DBMS_PARALLEL_REPLICAS_PROTOCOL_VERSION) - throw Exception(ErrorCodes::UNKNOWN_PROTOCOL, "Protocol versions for parallel reading " \ + if (version != DEPRECATED_FIELD_PARALLEL_REPLICAS_PROTOCOL_VERSION) + throw Exception( + ErrorCodes::UNKNOWN_PROTOCOL, + "Protocol versions for parallel reading " "from replicas differ. Got: {}, supported version: {}", - version, DBMS_PARALLEL_REPLICAS_PROTOCOL_VERSION); + version, + DEPRECATED_FIELD_PARALLEL_REPLICAS_PROTOCOL_VERSION); CoordinationMode mode; RangesInDataPartsDescription description; @@ -158,7 +167,7 @@ InitialAllRangesAnnouncement InitialAllRangesAnnouncement::deserialize(ReadBuffe readIntBinary(replica_num, in); size_t mark_segment_size = 128; - if (version >= 4) + if (client_protocol_revision >= DBMS_MIN_REVISION_WITH_ADAPTIVE_MARK_SEGMENT_FOR_PARALLEL_REPLICAS) readIntBinary(mark_segment_size, in); return InitialAllRangesAnnouncement{ diff --git a/src/Storages/MergeTree/RequestResponse.h b/src/Storages/MergeTree/RequestResponse.h index 17518adf833..fcb6147c087 100644 --- a/src/Storages/MergeTree/RequestResponse.h +++ b/src/Storages/MergeTree/RequestResponse.h @@ -102,9 +102,9 @@ struct InitialAllRangesAnnouncement size_t replica_num; size_t mark_segment_size; - void serialize(WriteBuffer & out) const; + void serialize(WriteBuffer & out, UInt64 client_protocol_revision) const; String describe(); - static InitialAllRangesAnnouncement deserialize(ReadBuffer & in); + static InitialAllRangesAnnouncement deserialize(ReadBuffer & i, UInt64 client_protocol_revisionn); }; diff --git a/tests/integration/test_backward_compatibility/test_parallel_replicas_protocol.py b/tests/integration/test_backward_compatibility/test_parallel_replicas_protocol.py index 6f97df95876..e1b9049ef5d 100644 --- a/tests/integration/test_backward_compatibility/test_parallel_replicas_protocol.py +++ b/tests/integration/test_backward_compatibility/test_parallel_replicas_protocol.py @@ -11,7 +11,7 @@ nodes = [ main_configs=["configs/clusters.xml"], with_zookeeper=False, image="clickhouse/clickhouse-server", - tag="23.11", + tag="23.11", # earlier versions lead to "Not found column sum(a) in block." exception 🤷 stay_alive=True, use_old_analyzer=True, with_installed_binary=True, From 30229a3bfdd9ca0e827de0f741b9ff1d9553203d Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Sun, 18 Aug 2024 17:44:16 +0100 Subject: [PATCH 352/844] better --- src/Core/ProtocolDefines.h | 4 +-- .../QueryPlan/ReadFromMergeTree.cpp | 26 ++++++++++--------- .../MergeTreeReadPoolParallelReplicas.cpp | 8 +++--- src/Storages/MergeTree/RequestResponse.cpp | 4 +++ src/Storages/MergeTree/RequestResponse.h | 2 +- 5 files changed, 25 insertions(+), 19 deletions(-) diff --git a/src/Core/ProtocolDefines.h b/src/Core/ProtocolDefines.h index 55c87976355..5acc07e70b7 100644 --- a/src/Core/ProtocolDefines.h +++ b/src/Core/ProtocolDefines.h @@ -94,6 +94,4 @@ static constexpr auto DBMS_MIN_REVISION_WITH_ADAPTIVE_MARK_SEGMENT_FOR_PARALLEL_ /// NOTE: DBMS_TCP_PROTOCOL_VERSION has nothing common with VERSION_REVISION, /// later is just a number for server version (one number instead of commit SHA) /// for simplicity (sometimes it may be more convenient in some use cases). -static constexpr auto DBMS_TCP_PROTOCOL_VERSION = 54470; - -} +static constexpr auto DBMS_TCP_PROTOCOL_VERSION = 54471; diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp index 5d3a676c959..a52a36daf7d 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp +++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp @@ -1,6 +1,8 @@ #include +#include #include +#include #include #include #include @@ -8,6 +10,8 @@ #include #include #include +#include +#include #include #include #include @@ -16,6 +20,7 @@ #include #include #include +#include #include #include #include @@ -24,10 +29,11 @@ #include #include #include -#include #include -#include +#include +#include #include +#include #include #include #include @@ -41,18 +47,12 @@ #include #include #include -#include -#include -#include -#include -#include #include #include #include #include -#include "Interpreters/Cluster.h" #include "config.h" using namespace DB; @@ -344,11 +344,12 @@ Pipe ReadFromMergeTree::readFromPoolParallelReplicas( { const auto & client_info = context->getClientInfo(); - auto extension = ParallelReadingExtension{ + auto extension = ParallelReadingExtension + { .all_callback = all_ranges_callback.value(), .callback = read_task_callback.value(), .number_of_current_replica = client_info.number_of_current_replica, - .total_nodes_count = context->getClusterForParallelReplicas()->getShardsInfo().begin()->getAllNodeCount(), + .total_nodes_count = context->getClusterForParallelReplicas()->getShardsInfo().at(0).getAllNodeCount(), }; /// We have a special logic for local replica. It has to read less data, because in some cases it should @@ -523,11 +524,12 @@ Pipe ReadFromMergeTree::readInOrder( if (is_parallel_reading_from_replicas) { const auto & client_info = context->getClientInfo(); - ParallelReadingExtension extension{ + ParallelReadingExtension extension + { .all_callback = all_ranges_callback.value(), .callback = read_task_callback.value(), .number_of_current_replica = client_info.number_of_current_replica, - .total_nodes_count = context->getClusterForParallelReplicas()->getShardsInfo().begin()->getAllNodeCount(), + .total_nodes_count = context->getClusterForParallelReplicas()->getShardsInfo().at(0).getAllNodeCount(), }; auto multiplier = context->getSettingsRef().parallel_replicas_single_task_marks_count_multiplier; diff --git a/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicas.cpp b/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicas.cpp index 87fc1c12ddd..fc982fc0249 100644 --- a/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicas.cpp +++ b/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicas.cpp @@ -44,14 +44,16 @@ size_t chooseSegmentSize( min_marks_per_task * threads, sum_marks / number_of_replicas / number_of_replicas); - /// Here we take max of three numbers: - /// * user provided setting (0 by default) + /// Here we take max of two numbers: /// * (min_marks_per_task * threads) = the number of marks we request from the coordinator each time - there is no point to have segments smaller than one unit of work for a replica /// * (sum_marks / number_of_replicas^2) - we use consistent hashing for work distribution (including work stealing). If we have a really slow replica /// everything up to (1/number_of_replicas) portion of its work will be stolen by other replicas. And it owns (1/number_of_replicas) share of total number of marks. /// Also important to note here that sum_marks is calculated after PK analysis, it means in particular that different segment sizes might be used for the /// same table for different queries (it is intentional). - mark_segment_size = std::max({mark_segment_size, min_marks_per_task * threads, sum_marks / number_of_replicas / number_of_replicas}); + /// + /// Positive `mark_segment_size` means it is a user provided value, we have to preserve it. + if (mark_segment_size == 0) + mark_segment_size = std::max(min_marks_per_task * threads, sum_marks / number_of_replicas / number_of_replicas); /// Squeeze the value to the borders. mark_segment_size = std::clamp(mark_segment_size, borders.front(), borders.back()); diff --git a/src/Storages/MergeTree/RequestResponse.cpp b/src/Storages/MergeTree/RequestResponse.cpp index 5e7e392384f..48ff6ebccfd 100644 --- a/src/Storages/MergeTree/RequestResponse.cpp +++ b/src/Storages/MergeTree/RequestResponse.cpp @@ -20,6 +20,10 @@ namespace ErrorCodes namespace { +/// Previously we had a separate protocol version number for parallel replicas. +/// But we didn't maintain backward compatibility and every protocol change was breaking. +/// Now we have to support at least minimal tail of the previous versions and the implementation +/// is based on the common tcp protocol version as in all other places. constexpr UInt64 DEPRECATED_FIELD_PARALLEL_REPLICAS_PROTOCOL_VERSION = 3; CoordinationMode validateAndGet(uint8_t candidate) diff --git a/src/Storages/MergeTree/RequestResponse.h b/src/Storages/MergeTree/RequestResponse.h index fcb6147c087..da2fa2795a3 100644 --- a/src/Storages/MergeTree/RequestResponse.h +++ b/src/Storages/MergeTree/RequestResponse.h @@ -94,7 +94,7 @@ struct InitialAllRangesAnnouncement InitialAllRangesAnnouncement( CoordinationMode mode_, RangesInDataPartsDescription description_, size_t replica_num_, size_t mark_segment_size_) - : mode(mode_), description(description_), replica_num(replica_num_), mark_segment_size(mark_segment_size_) + : mode(mode_), description(std::move(description_)), replica_num(replica_num_), mark_segment_size(mark_segment_size_) {} CoordinationMode mode; From c252b3c8b05d85acb355054543d0b7f78d171af0 Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Sun, 18 Aug 2024 18:29:48 +0100 Subject: [PATCH 353/844] fix build --- src/Core/ProtocolDefines.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/Core/ProtocolDefines.h b/src/Core/ProtocolDefines.h index 5acc07e70b7..f80ddc646bb 100644 --- a/src/Core/ProtocolDefines.h +++ b/src/Core/ProtocolDefines.h @@ -95,3 +95,5 @@ static constexpr auto DBMS_MIN_REVISION_WITH_ADAPTIVE_MARK_SEGMENT_FOR_PARALLEL_ /// later is just a number for server version (one number instead of commit SHA) /// for simplicity (sometimes it may be more convenient in some use cases). static constexpr auto DBMS_TCP_PROTOCOL_VERSION = 54471; + +} From 9d183b890529d686a673d5f6ecd2d057cf534478 Mon Sep 17 00:00:00 2001 From: avogar Date: Sun, 18 Aug 2024 20:35:30 +0000 Subject: [PATCH 354/844] Add check for loo large array size --- .../AggregateFunctionDistinctDynamicTypes.cpp | 6 ++++++ .../AggregateFunctionDistinctJSONPaths.cpp | 13 +++++++++++++ src/Core/Settings.h | 1 - 3 files changed, 19 insertions(+), 1 deletion(-) diff --git a/src/AggregateFunctions/AggregateFunctionDistinctDynamicTypes.cpp b/src/AggregateFunctions/AggregateFunctionDistinctDynamicTypes.cpp index 649d64a3904..17e32b20a99 100644 --- a/src/AggregateFunctions/AggregateFunctionDistinctDynamicTypes.cpp +++ b/src/AggregateFunctions/AggregateFunctionDistinctDynamicTypes.cpp @@ -19,10 +19,13 @@ namespace ErrorCodes { extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; extern const int ILLEGAL_TYPE_OF_ARGUMENT; + extern const int TOO_LARGE_ARRAY_SIZE; } struct AggregateFunctionDistinctDynamicTypesData { + constexpr static size_t MAX_ARRAY_SIZE = 0xFFFFFF; + std::unordered_set data; void add(const String & type) @@ -46,6 +49,9 @@ struct AggregateFunctionDistinctDynamicTypesData { size_t size; readVarUInt(size, buf); + if (size > MAX_ARRAY_SIZE) + throw Exception(ErrorCodes::TOO_LARGE_ARRAY_SIZE, "Too large array size (maximum: {}): {}", MAX_ARRAY_SIZE, size); + data.reserve(size); String type; for (size_t i = 0; i != size; ++i) diff --git a/src/AggregateFunctions/AggregateFunctionDistinctJSONPaths.cpp b/src/AggregateFunctions/AggregateFunctionDistinctJSONPaths.cpp index 58ce7e27ab0..6100bd57515 100644 --- a/src/AggregateFunctions/AggregateFunctionDistinctJSONPaths.cpp +++ b/src/AggregateFunctions/AggregateFunctionDistinctJSONPaths.cpp @@ -23,8 +23,12 @@ namespace ErrorCodes { extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; extern const int ILLEGAL_TYPE_OF_ARGUMENT; + extern const int TOO_LARGE_ARRAY_SIZE; } +constexpr static size_t DISTINCT_JSON_PATHS_MAX_ARRAY_SIZE = 0xFFFFFF; + + struct AggregateFunctionDistinctJSONPathsData { static constexpr auto name = "distinctJSONPaths"; @@ -85,6 +89,9 @@ struct AggregateFunctionDistinctJSONPathsData { size_t size; readVarUInt(size, buf); + if (size > DISTINCT_JSON_PATHS_MAX_ARRAY_SIZE) + throw Exception(ErrorCodes::TOO_LARGE_ARRAY_SIZE, "Too large array size (maximum: {}): {}", DISTINCT_JSON_PATHS_MAX_ARRAY_SIZE, size); + String path; for (size_t i = 0; i != size; ++i) { @@ -192,12 +199,18 @@ struct AggregateFunctionDistinctJSONPathsAndTypesData { size_t paths_size, types_size; readVarUInt(paths_size, buf); + if (paths_size > DISTINCT_JSON_PATHS_MAX_ARRAY_SIZE) + throw Exception(ErrorCodes::TOO_LARGE_ARRAY_SIZE, "Too large array size (maximum: {}): {}", DISTINCT_JSON_PATHS_MAX_ARRAY_SIZE, paths_size); + data.reserve(paths_size); String path, type; for (size_t i = 0; i != paths_size; ++i) { readStringBinary(path, buf); readVarUInt(types_size, buf); + if (types_size > DISTINCT_JSON_PATHS_MAX_ARRAY_SIZE) + throw Exception(ErrorCodes::TOO_LARGE_ARRAY_SIZE, "Too large array size (maximum: {}): {}", DISTINCT_JSON_PATHS_MAX_ARRAY_SIZE, types_size); + data[path].reserve(types_size); for (size_t j = 0; j != types_size; ++j) { diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 0d84ad9022a..dfcff052740 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -616,7 +616,6 @@ class IColumn; M(Bool, throw_if_deduplication_in_dependent_materialized_views_enabled_with_async_insert, true, "Throw exception on INSERT query when the setting `deduplicate_blocks_in_dependent_materialized_views` is enabled along with `async_insert`. It guarantees correctness, because these features can't work together.", 0) \ M(Bool, materialized_views_ignore_errors, false, "Allows to ignore errors for MATERIALIZED VIEW, and deliver original block to the table regardless of MVs", 0) \ M(Bool, ignore_materialized_views_with_dropped_target_table, false, "Ignore MVs with dropped target table during pushing to views", 0) \ - M(Bool, allow_materialized_view_with_bad_select, true, "Allow CREATE MATERIALIZED VIEW with SELECT query that references nonexistent tables or columns. It must still be syntactically valid. Doesn't apply to refreshable MVs. Doesn't apply if the MV schema needs to be inferred from the SELECT query (i.e. if the CREATE has no column list and no TO table). Can be used for creating MV before its source table.", 0) \ M(Bool, use_compact_format_in_distributed_parts_names, true, "Changes format of directories names for distributed table insert parts.", 0) \ M(Bool, validate_polygons, true, "Throw exception if polygon is invalid in function pointInPolygon (e.g. self-tangent, self-intersecting). If the setting is false, the function will accept invalid polygons but may silently return wrong result.", 0) \ M(UInt64, max_parser_depth, DBMS_DEFAULT_MAX_PARSER_DEPTH, "Maximum parser depth (recursion depth of recursive descend parser).", 0) \ From 165d08f088d614dd835ee65a2d70f4e1fb0a27e2 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 19 Aug 2024 05:53:59 +0200 Subject: [PATCH 355/844] Fix bad exception messages --- .../MergeTree/ReplicatedMergeTreePartCheckThread.cpp | 2 +- src/Storages/MergeTree/checkDataPart.cpp | 2 +- tests/integration/test_broken_projections/test.py | 12 ++++++------ 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp b/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp index dc242a7b084..8877ebff6a1 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp @@ -391,7 +391,7 @@ ReplicatedCheckResult ReplicatedMergeTreePartCheckThread::checkPartImpl(const St { WriteBufferFromOwnString wb; message = PreformattedMessage::create( - "Part {} has a broken projections. It will be ignored. Broken projections info: {}", + "Part `{}` has broken projections. It will be ignored. Broken projections info: {}", part_name, getCurrentExceptionMessage(true)); LOG_DEBUG(log, message); result.action = ReplicatedCheckResult::DoNothing; diff --git a/src/Storages/MergeTree/checkDataPart.cpp b/src/Storages/MergeTree/checkDataPart.cpp index 3a22daa0011..4a16e0257a0 100644 --- a/src/Storages/MergeTree/checkDataPart.cpp +++ b/src/Storages/MergeTree/checkDataPart.cpp @@ -323,7 +323,7 @@ static IMergeTreeDataPart::Checksums checkDataPart( broken_projections_message += "\n"; broken_projections_message += fmt::format( - "Part {} has a broken projection {} (error: {})", + "Part `{}` has broken projection `{}` (error: {})", data_part->name, name, exception_message); } diff --git a/tests/integration/test_broken_projections/test.py b/tests/integration/test_broken_projections/test.py index 578ff42369c..15ed7a0bf3c 100644 --- a/tests/integration/test_broken_projections/test.py +++ b/tests/integration/test_broken_projections/test.py @@ -405,7 +405,7 @@ def test_materialize_broken_projection(cluster): assert "NO_FILE_IN_DATA_PART" in get_broken_projections_info( node, table_name, part="all_1_1_0", projection="proj1" ) - assert "Part all_1_1_0 has a broken projection proj1" in check_table_full( + assert "Part `all_1_1_0` has broken projection `proj1`" in check_table_full( node, table_name ) @@ -415,13 +415,13 @@ def test_materialize_broken_projection(cluster): assert "FILE_DOESNT_EXIST" in get_broken_projections_info( node, table_name, part="all_1_1_0", projection="proj2" ) - assert "Part all_1_1_0 has a broken projection proj2" in check_table_full( + assert "Part `all_1_1_0` has broken projection `proj2`" in check_table_full( node, table_name ) materialize_projection(node, table_name, "proj1") - assert "has a broken projection" not in check_table_full(node, table_name) + assert "has broken projection" not in check_table_full(node, table_name) def test_broken_ignored_replicated(cluster): @@ -443,13 +443,13 @@ def test_broken_ignored_replicated(cluster): check(node, table_name2, 1) break_projection(node, table_name, "proj1", "all_0_0_0", "data") - assert "Part all_0_0_0 has a broken projection proj1" in check_table_full( + assert "Part `all_0_0_0` has broken projection `proj1`" in check_table_full( node, table_name ) break_part(node, table_name, "all_0_0_0") node.query(f"SYSTEM SYNC REPLICA {table_name}") - assert "has a broken projection" not in check_table_full(node, table_name) + assert "has broken projection" not in check_table_full(node, table_name) def get_random_string(string_length=8): @@ -528,7 +528,7 @@ def test_broken_projections_in_backups_3(cluster): check(node, table_name, 1) break_projection(node, table_name, "proj1", "all_1_1_0", "part") - assert "Part all_1_1_0 has a broken projection proj1" in check_table_full( + assert "Part `all_1_1_0` has broken projection `proj1`" in check_table_full( node, table_name ) assert "FILE_DOESNT_EXIST" in get_broken_projections_info( From 228cdfde63100267ce1b8c80ddfeb689979487ba Mon Sep 17 00:00:00 2001 From: Yatsishin Ilya <2159081+qoega@users.noreply.github.com> Date: Thu, 15 Aug 2024 08:58:36 +0000 Subject: [PATCH 356/844] more tests --- .../test_filesystem_layout/test.py | 4 ++++ .../test_with_table_engine_grant.py | 2 ++ .../test_parallel_replicas_failover/test.py | 19 ++++++++++--------- .../test.py | 4 ++-- 4 files changed, 18 insertions(+), 11 deletions(-) diff --git a/tests/integration/test_filesystem_layout/test.py b/tests/integration/test_filesystem_layout/test.py index 4e719aa0fe9..31d6c830a2f 100644 --- a/tests/integration/test_filesystem_layout/test.py +++ b/tests/integration/test_filesystem_layout/test.py @@ -79,3 +79,7 @@ def test_file_path_escaping(started_cluster): "test -f /var/lib/clickhouse/shadow/2/store/123/12345678-1000-4000-8000-000000000001/1_1_1_0/%7EId.bin", ] ) + node.query("DROP TABLE test.`T.a_b,l-e!` SYNC") + node.query("DROP TABLE `test 2`.`T.a_b,l-e!` SYNC") + node.query("DROP DATABASE test") + node.query("DROP DATABASE `test 2`") diff --git a/tests/integration/test_grant_and_revoke/test_with_table_engine_grant.py b/tests/integration/test_grant_and_revoke/test_with_table_engine_grant.py index 25ca7913e4e..5fc8f67b75b 100644 --- a/tests/integration/test_grant_and_revoke/test_with_table_engine_grant.py +++ b/tests/integration/test_grant_and_revoke/test_with_table_engine_grant.py @@ -359,6 +359,8 @@ def test_implicit_create_view_grant(): instance.query("GRANT CREATE VIEW ON test.* TO B", user="A") instance.query("CREATE VIEW test.view_2 AS SELECT 1", user="B") assert instance.query("SELECT * FROM test.view_2") == "1\n" + instance.query("DROP USER A") + instance.query("DROP VIEW test.view_2") def test_implicit_create_temporary_table_grant(): diff --git a/tests/integration/test_parallel_replicas_failover/test.py b/tests/integration/test_parallel_replicas_failover/test.py index bf25136bff7..2da26ee03c9 100644 --- a/tests/integration/test_parallel_replicas_failover/test.py +++ b/tests/integration/test_parallel_replicas_failover/test.py @@ -1,5 +1,5 @@ import pytest - +import uuid from helpers.cluster import ClickHouseCluster cluster = ClickHouseCluster(__file__) @@ -25,19 +25,15 @@ def start_cluster(): def create_tables(cluster, table_name, skip_last_replica): - node1.query(f"DROP TABLE IF EXISTS {table_name} SYNC") - node2.query(f"DROP TABLE IF EXISTS {table_name} SYNC") - node3.query(f"DROP TABLE IF EXISTS {table_name} SYNC") - node1.query( - f"CREATE TABLE IF NOT EXISTS {table_name} (key Int64, value String) Engine=ReplicatedMergeTree('/test_parallel_replicas/shard1/{table_name}', 'r1') ORDER BY (key)" + f"CREATE TABLE {table_name} (key Int64, value String) Engine=ReplicatedMergeTree('/test_parallel_replicas/shard1/{table_name}', 'r1') ORDER BY (key)" ) node2.query( - f"CREATE TABLE IF NOT EXISTS {table_name} (key Int64, value String) Engine=ReplicatedMergeTree('/test_parallel_replicas/shard1/{table_name}', 'r2') ORDER BY (key)" + f"CREATE TABLE {table_name} (key Int64, value String) Engine=ReplicatedMergeTree('/test_parallel_replicas/shard1/{table_name}', 'r2') ORDER BY (key)" ) if not skip_last_replica: node3.query( - f"CREATE TABLE IF NOT EXISTS {table_name} (key Int64, value String) Engine=ReplicatedMergeTree('/test_parallel_replicas/shard1/{table_name}', 'r3') ORDER BY (key)" + f"CREATE TABLE {table_name} (key Int64, value String) Engine=ReplicatedMergeTree('/test_parallel_replicas/shard1/{table_name}', 'r3') ORDER BY (key)" ) # populate data @@ -67,7 +63,7 @@ def test_skip_replicas_without_table(start_cluster): for i in range(4): expected_result += f"{i}\t1000\n" - log_comment = "5230b069-9574-407d-9b80-891b5a175f41" + log_comment = uuid.uuid4() assert ( node1.query( f"SELECT key, count() FROM {table_name} GROUP BY key ORDER BY key", @@ -88,6 +84,8 @@ def test_skip_replicas_without_table(start_cluster): ) == "1\t1\n" ) + node1.query(f"DROP TABLE {table_name} SYNC") + node2.query(f"DROP TABLE {table_name} SYNC") def test_skip_unresponsive_replicas(start_cluster): @@ -112,3 +110,6 @@ def test_skip_unresponsive_replicas(start_cluster): ) == expected_result ) + node1.query(f"DROP TABLE {table_name} SYNC") + node2.query(f"DROP TABLE {table_name} SYNC") + node3.query(f"DROP TABLE {table_name} SYNC") diff --git a/tests/integration/test_parallel_replicas_invisible_parts/test.py b/tests/integration/test_parallel_replicas_invisible_parts/test.py index cab3fb46fe9..7093e3b3292 100644 --- a/tests/integration/test_parallel_replicas_invisible_parts/test.py +++ b/tests/integration/test_parallel_replicas_invisible_parts/test.py @@ -35,11 +35,10 @@ def start_cluster(): def _create_tables(table_name, table_size, index_granularity): - nodes[0].query(f"DROP TABLE IF EXISTS {table_name} ON CLUSTER {cluster_name}") nodes[0].query( f""" - CREATE TABLE IF NOT EXISTS {table_name} ON CLUSTER '{cluster_name}' (key Int64, value String) + CREATE TABLE {table_name} ON CLUSTER '{cluster_name}' (key Int64, value String) Engine=ReplicatedMergeTree('/test_parallel_replicas/shard/{table_name}/', '{{replica}}') ORDER BY (key) SETTINGS index_granularity = {index_granularity}, max_bytes_to_merge_at_max_space_in_pool = 0, max_bytes_to_merge_at_max_space_in_pool = 1 @@ -128,3 +127,4 @@ def test_reading_with_invisible_parts( ) == f"{expected}\n" ) + nodes[0].query(f"DROP TABLE {table_name} ON CLUSTER {cluster_name} SYNC") From 90ad110a7dddbd1c9430b34b0016dc22eaa3d646 Mon Sep 17 00:00:00 2001 From: Yatsishin Ilya <2159081+qoega@users.noreply.github.com> Date: Mon, 19 Aug 2024 13:58:44 +0000 Subject: [PATCH 357/844] more --- .../test.py | 28 +++++++++++-------- 1 file changed, 17 insertions(+), 11 deletions(-) diff --git a/tests/integration/test_postgresql_replica_database_engine_2/test.py b/tests/integration/test_postgresql_replica_database_engine_2/test.py index 5e04c9e4d12..fb7aab366be 100644 --- a/tests/integration/test_postgresql_replica_database_engine_2/test.py +++ b/tests/integration/test_postgresql_replica_database_engine_2/test.py @@ -1,5 +1,6 @@ import pytest +import uuid import time import psycopg2 import os.path as p @@ -59,8 +60,6 @@ instance2 = cluster.add_instance( pg_manager = PostgresManager() pg_manager2 = PostgresManager() pg_manager_instance2 = PostgresManager() -pg_manager3 = PostgresManager() - @pytest.fixture(scope="module") def started_cluster(): @@ -82,12 +81,6 @@ def started_cluster(): pg_manager2.init( instance2, cluster.postgres_ip, cluster.postgres_port, "postgres_database2" ) - pg_manager3.init( - instance, - cluster.postgres_ip, - cluster.postgres_port, - default_database="postgres-postgres", - ) yield cluster @@ -924,16 +917,27 @@ def test_failed_load_from_snapshot(started_cluster): def test_symbols_in_publication_name(started_cluster): - table = "test_symbols_in_publication_name" + id = uuid.uuid4() + db = f'test_{id}' + table = f"test_symbols_in_publication_name" + + pg_manager3 = PostgresManager() + pg_manager3.init( + instance, + cluster.postgres_ip, + cluster.postgres_port, + default_database=db, + ) pg_manager3.create_postgres_table(table) instance.query( - f"INSERT INTO `{pg_manager3.get_default_database()}`.`{table}` SELECT number, number from numbers(0, 50)" + f"INSERT INTO `{db}`.`{table}` SELECT number, number from numbers(0, 50)" ) pg_manager3.create_materialized_db( ip=started_cluster.postgres_ip, port=started_cluster.postgres_port, + materialized_database=db, settings=[ f"materialized_postgresql_tables_list = '{table}'", "materialized_postgresql_backoff_min_ms = 100", @@ -941,8 +945,10 @@ def test_symbols_in_publication_name(started_cluster): ], ) check_tables_are_synchronized( - instance, table, postgres_database=pg_manager3.get_default_database() + instance, table, materialized_database=db, postgres_database=db ) + pg_manager3.drop_materialized_db(db) + pg_manager3.execute(f'drop table "{table}"') def test_generated_columns(started_cluster): From 0793585acde8f469bf534f0639850a91efc254f1 Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Mon, 19 Aug 2024 16:59:30 +0200 Subject: [PATCH 358/844] Fix bad conflict resolution --- src/Core/Settings.h | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index dfcff052740..215d333f6c4 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -616,6 +616,7 @@ class IColumn; M(Bool, throw_if_deduplication_in_dependent_materialized_views_enabled_with_async_insert, true, "Throw exception on INSERT query when the setting `deduplicate_blocks_in_dependent_materialized_views` is enabled along with `async_insert`. It guarantees correctness, because these features can't work together.", 0) \ M(Bool, materialized_views_ignore_errors, false, "Allows to ignore errors for MATERIALIZED VIEW, and deliver original block to the table regardless of MVs", 0) \ M(Bool, ignore_materialized_views_with_dropped_target_table, false, "Ignore MVs with dropped target table during pushing to views", 0) \ + M(Bool, allow_materialized_view_with_bad_select, true, "Allow CREATE MATERIALIZED VIEW with SELECT query that references nonexistent tables or columns. It must still be syntactically valid. Doesn't apply to refreshable MVs. Doesn't apply if the MV schema needs to be inferred from the SELECT query (i.e. if the CREATE has no column list and no TO table). Can be used for creating MV before its source table.", 0) \ M(Bool, use_compact_format_in_distributed_parts_names, true, "Changes format of directories names for distributed table insert parts.", 0) \ M(Bool, validate_polygons, true, "Throw exception if polygon is invalid in function pointInPolygon (e.g. self-tangent, self-intersecting). If the setting is false, the function will accept invalid polygons but may silently return wrong result.", 0) \ M(UInt64, max_parser_depth, DBMS_DEFAULT_MAX_PARSER_DEPTH, "Maximum parser depth (recursion depth of recursive descend parser).", 0) \ From 64bce7afa115f13ff72e2b4f0f6e1b4eacb881e7 Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Mon, 19 Aug 2024 17:22:17 +0200 Subject: [PATCH 359/844] Fix spaces --- src/Core/Settings.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 215d333f6c4..0d84ad9022a 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -616,7 +616,7 @@ class IColumn; M(Bool, throw_if_deduplication_in_dependent_materialized_views_enabled_with_async_insert, true, "Throw exception on INSERT query when the setting `deduplicate_blocks_in_dependent_materialized_views` is enabled along with `async_insert`. It guarantees correctness, because these features can't work together.", 0) \ M(Bool, materialized_views_ignore_errors, false, "Allows to ignore errors for MATERIALIZED VIEW, and deliver original block to the table regardless of MVs", 0) \ M(Bool, ignore_materialized_views_with_dropped_target_table, false, "Ignore MVs with dropped target table during pushing to views", 0) \ - M(Bool, allow_materialized_view_with_bad_select, true, "Allow CREATE MATERIALIZED VIEW with SELECT query that references nonexistent tables or columns. It must still be syntactically valid. Doesn't apply to refreshable MVs. Doesn't apply if the MV schema needs to be inferred from the SELECT query (i.e. if the CREATE has no column list and no TO table). Can be used for creating MV before its source table.", 0) \ + M(Bool, allow_materialized_view_with_bad_select, true, "Allow CREATE MATERIALIZED VIEW with SELECT query that references nonexistent tables or columns. It must still be syntactically valid. Doesn't apply to refreshable MVs. Doesn't apply if the MV schema needs to be inferred from the SELECT query (i.e. if the CREATE has no column list and no TO table). Can be used for creating MV before its source table.", 0) \ M(Bool, use_compact_format_in_distributed_parts_names, true, "Changes format of directories names for distributed table insert parts.", 0) \ M(Bool, validate_polygons, true, "Throw exception if polygon is invalid in function pointInPolygon (e.g. self-tangent, self-intersecting). If the setting is false, the function will accept invalid polygons but may silently return wrong result.", 0) \ M(UInt64, max_parser_depth, DBMS_DEFAULT_MAX_PARSER_DEPTH, "Maximum parser depth (recursion depth of recursive descend parser).", 0) \ From 01f5337f69f7b78a8962b640731ab6d2869b56ac Mon Sep 17 00:00:00 2001 From: Arthur Passos Date: Mon, 19 Aug 2024 15:13:39 -0300 Subject: [PATCH 360/844] fix serialization to fix on cluster commands --- src/Parsers/Access/ASTCreateUserQuery.cpp | 16 +++++++++++++--- src/Parsers/Access/ASTCreateUserQuery.h | 1 + src/Parsers/Access/ParserCreateUserQuery.cpp | 17 +++++++++-------- 3 files changed, 23 insertions(+), 11 deletions(-) diff --git a/src/Parsers/Access/ASTCreateUserQuery.cpp b/src/Parsers/Access/ASTCreateUserQuery.cpp index 4eebc471d5e..8f44cb3c409 100644 --- a/src/Parsers/Access/ASTCreateUserQuery.cpp +++ b/src/Parsers/Access/ASTCreateUserQuery.cpp @@ -21,8 +21,6 @@ namespace void formatAuthenticationData(const std::vector> & authentication_methods, const IAST::FormatSettings & settings) { - settings.ostr << (settings.hilite ? IAST::hilite_keyword : "") << " IDENTIFIED" << (settings.hilite ? IAST::hilite_none : ""); - // safe because this method is only called if authentication_methods.size > 1 // if the first type is present, include the `WITH` keyword if (authentication_methods[0]->type) @@ -200,7 +198,8 @@ ASTPtr ASTCreateUserQuery::clone() const if (settings) res->settings = std::static_pointer_cast(settings->clone()); - if (authentication_methods.empty()) + // If identification (auth method) is missing from query, we should serialize it in the form of `NO_PASSWORD` unless it is alter query + if (!alter && authentication_methods.empty()) { auto ast = std::make_shared(); ast->type = AuthenticationType::NO_PASSWORD; @@ -255,7 +254,15 @@ void ASTCreateUserQuery::formatImpl(const FormatSettings & format, FormatState & formatRenameTo(*new_name, format); if (!authentication_methods.empty()) + { + if (add_identified_with) + { + format.ostr << (format.hilite ? IAST::hilite_keyword : "") << " ADD" << (format.hilite ? IAST::hilite_none : ""); + } + + format.ostr << (format.hilite ? IAST::hilite_keyword : "") << " IDENTIFIED" << (format.hilite ? IAST::hilite_none : ""); formatAuthenticationData(authentication_methods, format); + } if (valid_until) formatValidUntil(*valid_until, format); @@ -278,6 +285,9 @@ void ASTCreateUserQuery::formatImpl(const FormatSettings & format, FormatState & if (grantees) formatGrantees(*grantees, format); + + if (reset_authentication_methods_to_new) + format.ostr << (format.hilite ? hilite_keyword : "") << " RESET AUTHENTICATION METHODS TO NEW" << (format.hilite ? hilite_none : ""); } } diff --git a/src/Parsers/Access/ASTCreateUserQuery.h b/src/Parsers/Access/ASTCreateUserQuery.h index 66a31366993..e1bae98f2f3 100644 --- a/src/Parsers/Access/ASTCreateUserQuery.h +++ b/src/Parsers/Access/ASTCreateUserQuery.h @@ -43,6 +43,7 @@ public: bool if_not_exists = false; bool or_replace = false; bool reset_authentication_methods_to_new = false; + bool add_identified_with = false; bool replace_authentication_methods = false; std::shared_ptr names; diff --git a/src/Parsers/Access/ParserCreateUserQuery.cpp b/src/Parsers/Access/ParserCreateUserQuery.cpp index 466fd544c63..0ef658d2377 100644 --- a/src/Parsers/Access/ParserCreateUserQuery.cpp +++ b/src/Parsers/Access/ParserCreateUserQuery.cpp @@ -536,10 +536,10 @@ bool ParserCreateUserQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expec ASTPtr valid_until; String cluster; String storage_name; - std::optional reset_authentication_methods_to_new; + bool reset_authentication_methods_to_new = false; bool parsed_identified_with = false; - bool parsed_add_new_method = false; + bool parsed_add_identified_with = false; while (true) { @@ -549,20 +549,20 @@ bool ParserCreateUserQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expec if (!parsed_identified_with) { - parsed_add_new_method = parseAddIdentifiedWith(pos, expected, auth_data); + parsed_add_identified_with = parseAddIdentifiedWith(pos, expected, auth_data); - if (parsed_add_new_method && !alter) + if (parsed_add_identified_with && !alter) { throw Exception(ErrorCodes::BAD_ARGUMENTS, "Create user query is not allowed to have ADD IDENTIFIED, remove the ADD keyword."); } } } - if (!reset_authentication_methods_to_new.has_value()) + if (!reset_authentication_methods_to_new) { reset_authentication_methods_to_new = parseResetAuthenticationMethods(pos, expected); - if (reset_authentication_methods_to_new.value() && !alter) + if (reset_authentication_methods_to_new && !alter) { throw Exception(ErrorCodes::BAD_ARGUMENTS, "RESET AUTHENTICATION METHODS TO NEW can only be used on ALTER statement"); } @@ -645,7 +645,7 @@ bool ParserCreateUserQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expec throw Exception(ErrorCodes::BAD_ARGUMENTS, "NO_PASSWORD Authentication method cannot co-exist with other authentication methods"); } - if (has_no_password_authentication_method && parsed_add_new_method) + if (has_no_password_authentication_method && parsed_add_identified_with) { throw Exception(ErrorCodes::BAD_ARGUMENTS, "The authentication method 'no_password' cannot be used with the ADD keyword. " "Use 'ALTER USER xyz IDENTIFIED WITH no_password' to replace existing authentication methods"); @@ -685,7 +685,8 @@ bool ParserCreateUserQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expec query->default_database = std::move(default_database); query->valid_until = std::move(valid_until); query->storage_name = std::move(storage_name); - query->reset_authentication_methods_to_new = reset_authentication_methods_to_new.value_or(false); + query->reset_authentication_methods_to_new = reset_authentication_methods_to_new; + query->add_identified_with = parsed_add_identified_with; query->replace_authentication_methods = parsed_identified_with || has_no_password_authentication_method; for (const auto & authentication_method : query->authentication_methods) From 623c507e5febdee3f220797799c20d921fd8a9a9 Mon Sep 17 00:00:00 2001 From: Arthur Passos Date: Mon, 19 Aug 2024 16:42:48 -0300 Subject: [PATCH 361/844] fix in the right place.. --- src/Parsers/Access/ASTCreateUserQuery.cpp | 30 ++++++++++------------- 1 file changed, 13 insertions(+), 17 deletions(-) diff --git a/src/Parsers/Access/ASTCreateUserQuery.cpp b/src/Parsers/Access/ASTCreateUserQuery.cpp index 8f44cb3c409..ec48c32b684 100644 --- a/src/Parsers/Access/ASTCreateUserQuery.cpp +++ b/src/Parsers/Access/ASTCreateUserQuery.cpp @@ -198,23 +198,11 @@ ASTPtr ASTCreateUserQuery::clone() const if (settings) res->settings = std::static_pointer_cast(settings->clone()); - // If identification (auth method) is missing from query, we should serialize it in the form of `NO_PASSWORD` unless it is alter query - if (!alter && authentication_methods.empty()) + for (const auto & authentication_method : authentication_methods) { - auto ast = std::make_shared(); - ast->type = AuthenticationType::NO_PASSWORD; - - res->authentication_methods.push_back(ast); - res->children.push_back(ast); - } - else - { - for (const auto & authentication_method : authentication_methods) - { - auto ast_clone = std::static_pointer_cast(authentication_method->clone()); - res->authentication_methods.push_back(ast_clone); - res->children.push_back(ast_clone); - } + auto ast_clone = std::static_pointer_cast(authentication_method->clone()); + res->authentication_methods.push_back(ast_clone); + res->children.push_back(ast_clone); } return res; @@ -253,7 +241,15 @@ void ASTCreateUserQuery::formatImpl(const FormatSettings & format, FormatState & if (new_name) formatRenameTo(*new_name, format); - if (!authentication_methods.empty()) + if (authentication_methods.empty()) + { + // If identification (auth method) is missing from query, we should serialize it in the form of `NO_PASSWORD` unless it is alter query + if (!alter) + { + format.ostr << (format.hilite ? IAST::hilite_keyword : "") << " IDENTIFIED WITH no_password" << (format.hilite ? IAST::hilite_none : ""); + } + } + else { if (add_identified_with) { From a2b08e93e43c357f8adbdd604a54ccb75056e4ec Mon Sep 17 00:00:00 2001 From: avogar Date: Mon, 19 Aug 2024 20:16:50 +0000 Subject: [PATCH 362/844] Change test --- ...istinct_dynamic_types_json_paths.reference | 70 +++++++++---------- ...3227_distinct_dynamic_types_json_paths.sql | 2 +- 2 files changed, 36 insertions(+), 36 deletions(-) diff --git a/tests/queries/0_stateless/03227_distinct_dynamic_types_json_paths.reference b/tests/queries/0_stateless/03227_distinct_dynamic_types_json_paths.reference index d6538a1c4eb..891982fdc81 100644 --- a/tests/queries/0_stateless/03227_distinct_dynamic_types_json_paths.reference +++ b/tests/queries/0_stateless/03227_distinct_dynamic_types_json_paths.reference @@ -11,25 +11,25 @@ a6 a7 a8 a9 -('a0',['Array(Nullable(Int64))','Date','Int64','String']) +('a0',['Array(Nullable(Int64))','Bool','Int64','String']) ('a1',['String']) -('a10',['Array(Nullable(Int64))','Date','Int64','String']) -('a11',['Array(Nullable(Int64))','Date','Int64','String']) -('a12',['Array(Nullable(Int64))','Date','Int64','String']) -('a2',['Array(Nullable(Int64))','Date','Int64','String']) -('a3',['Array(Nullable(Int64))','Date','Int64','String']) -('a4',['Array(Nullable(Int64))','Date','Int64','String']) -('a5',['Array(Nullable(Int64))','Date','Int64','String']) -('a6',['Array(Nullable(Int64))','Date','Int64','String']) -('a7',['Array(Nullable(Int64))','Date','Int64','String']) -('a8',['Array(Nullable(Int64))','Date','Int64','String']) -('a9',['Array(Nullable(Int64))','Date','Int64','String']) +('a10',['Array(Nullable(Int64))','Bool','Int64','String']) +('a11',['Array(Nullable(Int64))','Bool','Int64','String']) +('a12',['Array(Nullable(Int64))','Bool','Int64','String']) +('a2',['Array(Nullable(Int64))','Bool','Int64','String']) +('a3',['Array(Nullable(Int64))','Bool','Int64','String']) +('a4',['Array(Nullable(Int64))','Bool','Int64','String']) +('a5',['Array(Nullable(Int64))','Bool','Int64','String']) +('a6',['Array(Nullable(Int64))','Bool','Int64','String']) +('a7',['Array(Nullable(Int64))','Bool','Int64','String']) +('a8',['Array(Nullable(Int64))','Bool','Int64','String']) +('a9',['Array(Nullable(Int64))','Bool','Int64','String']) Array(Nullable(Int64)) -Date +Bool Int64 String Array(Nullable(Int64)) -Date +Bool Int64 String Filter @@ -46,17 +46,17 @@ a2 String Group by Array(Nullable(Int64)) ['a1','a2'] -Date ['a1','a2'] +Bool ['a1','a2'] Int64 ['a1','a2'] None ['a0','a1','a10','a11','a12','a3','a4','a5','a6','a7','a8','a9'] String ['a1','a2'] Array(Nullable(Int64)) {'a1':['String'],'a2':['Array(Nullable(Int64))']} -Date {'a1':['String'],'a2':['Date']} +Bool {'a1':['String'],'a2':['Bool']} Int64 {'a1':['String'],'a2':['Int64']} -None {'a0':['Array(Nullable(Int64))','Date','Int64','String'],'a1':['String'],'a10':['Array(Nullable(Int64))','Date','Int64','String'],'a11':['Array(Nullable(Int64))','Date','Int64','String'],'a12':['Array(Nullable(Int64))','Date','Int64','String'],'a3':['Array(Nullable(Int64))','Date','Int64','String'],'a4':['Array(Nullable(Int64))','Date','Int64','String'],'a5':['Array(Nullable(Int64))','Date','Int64','String'],'a6':['Array(Nullable(Int64))','Date','Int64','String'],'a7':['Array(Nullable(Int64))','Date','Int64','String'],'a8':['Array(Nullable(Int64))','Date','Int64','String'],'a9':['Array(Nullable(Int64))','Date','Int64','String']} +None {'a0':['Array(Nullable(Int64))','Bool','Int64','String'],'a1':['String'],'a10':['Array(Nullable(Int64))','Bool','Int64','String'],'a11':['Array(Nullable(Int64))','Bool','Int64','String'],'a12':['Array(Nullable(Int64))','Bool','Int64','String'],'a3':['Array(Nullable(Int64))','Bool','Int64','String'],'a4':['Array(Nullable(Int64))','Bool','Int64','String'],'a5':['Array(Nullable(Int64))','Bool','Int64','String'],'a6':['Array(Nullable(Int64))','Bool','Int64','String'],'a7':['Array(Nullable(Int64))','Bool','Int64','String'],'a8':['Array(Nullable(Int64))','Bool','Int64','String'],'a9':['Array(Nullable(Int64))','Bool','Int64','String']} String {'a1':['String'],'a2':['String']} Array(Nullable(Int64)) ['Array(Nullable(Int64))'] -Date ['Date'] +Bool ['Bool'] Int64 ['Int64'] None [] String ['String'] @@ -74,21 +74,21 @@ a6 a7 a8 a9 -('a0',['Array(Nullable(Int64))','Date','Int64','String']) +('a0',['Array(Nullable(Int64))','Bool','Int64','String']) ('a1',['String']) -('a10',['Array(Nullable(Int64))','Date','Int64','String']) -('a11',['Array(Nullable(Int64))','Date','Int64','String']) -('a12',['Array(Nullable(Int64))','Date','Int64','String']) -('a2',['Array(Nullable(Int64))','Date','Int64','String']) -('a3',['Array(Nullable(Int64))','Date','Int64','String']) -('a4',['Array(Nullable(Int64))','Date','Int64','String']) -('a5',['Array(Nullable(Int64))','Date','Int64','String']) -('a6',['Array(Nullable(Int64))','Date','Int64','String']) -('a7',['Array(Nullable(Int64))','Date','Int64','String']) -('a8',['Array(Nullable(Int64))','Date','Int64','String']) -('a9',['Array(Nullable(Int64))','Date','Int64','String']) +('a10',['Array(Nullable(Int64))','Bool','Int64','String']) +('a11',['Array(Nullable(Int64))','Bool','Int64','String']) +('a12',['Array(Nullable(Int64))','Bool','Int64','String']) +('a2',['Array(Nullable(Int64))','Bool','Int64','String']) +('a3',['Array(Nullable(Int64))','Bool','Int64','String']) +('a4',['Array(Nullable(Int64))','Bool','Int64','String']) +('a5',['Array(Nullable(Int64))','Bool','Int64','String']) +('a6',['Array(Nullable(Int64))','Bool','Int64','String']) +('a7',['Array(Nullable(Int64))','Bool','Int64','String']) +('a8',['Array(Nullable(Int64))','Bool','Int64','String']) +('a9',['Array(Nullable(Int64))','Bool','Int64','String']) Array(Nullable(Int64)) -Date +Bool Int64 String Remote filter @@ -105,17 +105,17 @@ a2 String Remote group by Array(Nullable(Int64)) ['a1','a2'] -Date ['a1','a2'] +Bool ['a1','a2'] Int64 ['a1','a2'] None ['a0','a1','a10','a11','a12','a3','a4','a5','a6','a7','a8','a9'] String ['a1','a2'] Array(Nullable(Int64)) {'a1':['String'],'a2':['Array(Nullable(Int64))']} -Date {'a1':['String'],'a2':['Date']} +Bool {'a1':['String'],'a2':['Bool']} Int64 {'a1':['String'],'a2':['Int64']} -None {'a0':['Array(Nullable(Int64))','Date','Int64','String'],'a1':['String'],'a10':['Array(Nullable(Int64))','Date','Int64','String'],'a11':['Array(Nullable(Int64))','Date','Int64','String'],'a12':['Array(Nullable(Int64))','Date','Int64','String'],'a3':['Array(Nullable(Int64))','Date','Int64','String'],'a4':['Array(Nullable(Int64))','Date','Int64','String'],'a5':['Array(Nullable(Int64))','Date','Int64','String'],'a6':['Array(Nullable(Int64))','Date','Int64','String'],'a7':['Array(Nullable(Int64))','Date','Int64','String'],'a8':['Array(Nullable(Int64))','Date','Int64','String'],'a9':['Array(Nullable(Int64))','Date','Int64','String']} +None {'a0':['Array(Nullable(Int64))','Bool','Int64','String'],'a1':['String'],'a10':['Array(Nullable(Int64))','Bool','Int64','String'],'a11':['Array(Nullable(Int64))','Bool','Int64','String'],'a12':['Array(Nullable(Int64))','Bool','Int64','String'],'a3':['Array(Nullable(Int64))','Bool','Int64','String'],'a4':['Array(Nullable(Int64))','Bool','Int64','String'],'a5':['Array(Nullable(Int64))','Bool','Int64','String'],'a6':['Array(Nullable(Int64))','Bool','Int64','String'],'a7':['Array(Nullable(Int64))','Bool','Int64','String'],'a8':['Array(Nullable(Int64))','Bool','Int64','String'],'a9':['Array(Nullable(Int64))','Bool','Int64','String']} String {'a1':['String'],'a2':['String']} Array(Nullable(Int64)) ['Array(Nullable(Int64))'] -Date ['Date'] +Bool ['Bool'] Int64 ['Int64'] None [] String ['String'] diff --git a/tests/queries/0_stateless/03227_distinct_dynamic_types_json_paths.sql b/tests/queries/0_stateless/03227_distinct_dynamic_types_json_paths.sql index c9c86277bce..6930f5a3d44 100644 --- a/tests/queries/0_stateless/03227_distinct_dynamic_types_json_paths.sql +++ b/tests/queries/0_stateless/03227_distinct_dynamic_types_json_paths.sql @@ -7,7 +7,7 @@ set use_variant_as_common_type = 1; drop table if exists test_json_dynamic_aggregate_functions; create table test_json_dynamic_aggregate_functions (json JSON(a1 String, max_dynamic_paths=2, max_dynamic_types=2)) engine=Memory; -insert into test_json_dynamic_aggregate_functions select toJSONString(map('a' || number % 13, multiIf(number % 5 == 0, NULL, number % 5 == 1, number::UInt32, number % 5 == 2, 'str_' || number, number % 5 == 3, range(number % 5), toDate(number)))) from numbers(200000); +insert into test_json_dynamic_aggregate_functions select toJSONString(map('a' || number % 13, multiIf(number % 5 == 0, NULL, number % 5 == 1, number::UInt32, number % 5 == 2, 'str_' || number, number % 5 == 3, range(number % 5), toBool(number % 2)))) from numbers(200000); select arrayJoin(distinctJSONPaths(json)) from test_json_dynamic_aggregate_functions; select arrayJoin(distinctJSONPathsAndTypes(json)) from test_json_dynamic_aggregate_functions; select arrayJoin(distinctDynamicTypes(json.a2)) from test_json_dynamic_aggregate_functions; From dbd4ee44edd8476ddf9c8745696f2295849b3676 Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Tue, 20 Aug 2024 14:09:14 +0800 Subject: [PATCH 363/844] enable dict encoding in orc writer --- src/Processors/Formats/Impl/ORCBlockOutputFormat.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/Processors/Formats/Impl/ORCBlockOutputFormat.cpp b/src/Processors/Formats/Impl/ORCBlockOutputFormat.cpp index 4a7a23158ff..3356adb5315 100644 --- a/src/Processors/Formats/Impl/ORCBlockOutputFormat.cpp +++ b/src/Processors/Formats/Impl/ORCBlockOutputFormat.cpp @@ -78,7 +78,9 @@ void ORCOutputStream::write(const void* buf, size_t length) } ORCBlockOutputFormat::ORCBlockOutputFormat(WriteBuffer & out_, const Block & header_, const FormatSettings & format_settings_) - : IOutputFormat(header_, out_), format_settings{format_settings_}, output_stream(out_) + : IOutputFormat(header_, out_) + , format_settings{format_settings_} + , output_stream(out_) { for (const auto & type : header_.getDataTypes()) data_types.push_back(recursiveRemoveLowCardinality(type)); @@ -565,6 +567,7 @@ void ORCBlockOutputFormat::prepareWriter() schema = orc::createStructType(); options.setCompression(getORCCompression(format_settings.orc.output_compression_method)); options.setRowIndexStride(format_settings.orc.output_row_index_stride); + options.setDictionaryKeySizeThreshold(format_settings.orc.output_dictionary_key_size_threshold); size_t columns_count = header.columns(); for (size_t i = 0; i != columns_count; ++i) schema->addStructField(header.safeGetByPosition(i).name, getORCType(recursiveRemoveLowCardinality(data_types[i]))); From 03ab6252653ddcb70d1fef0a340bfb52a8d63f89 Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Tue, 20 Aug 2024 15:47:26 +0800 Subject: [PATCH 364/844] enable string dict encoding in orc output format --- src/Core/Settings.h | 1 + src/Formats/FormatFactory.cpp | 1 + src/Formats/FormatSettings.h | 1 + 3 files changed, 3 insertions(+) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index d8837d26e54..3c507bc064f 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -1265,6 +1265,7 @@ class IColumn; M(Bool, output_format_orc_string_as_string, true, "Use ORC String type instead of Binary for String columns", 0) \ M(ORCCompression, output_format_orc_compression_method, "zstd", "Compression method for ORC output format. Supported codecs: lz4, snappy, zlib, zstd, none (uncompressed)", 0) \ M(UInt64, output_format_orc_row_index_stride, 10'000, "Target row index stride in ORC output format", 0) \ + M(Double, output_format_orc_dictionary_key_size_threshold, 0.0, "For a string column in ORC output format, if the number of distinct values is greater than this fraction of the total number of non-null rows, turn off dictionary encoding. Otherwise dictionary encoding is enabled", 0) \ \ M(CapnProtoEnumComparingMode, format_capn_proto_enum_comparising_mode, FormatSettings::CapnProtoEnumComparingMode::BY_VALUES, "How to map ClickHouse Enum and CapnProto Enum", 0) \ \ diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp index 865b6e6f3f1..479d7a3f029 100644 --- a/src/Formats/FormatFactory.cpp +++ b/src/Formats/FormatFactory.cpp @@ -243,6 +243,7 @@ FormatSettings getFormatSettings(const ContextPtr & context, const Settings & se format_settings.orc.output_string_as_string = settings.output_format_orc_string_as_string; format_settings.orc.output_compression_method = settings.output_format_orc_compression_method; format_settings.orc.output_row_index_stride = settings.output_format_orc_row_index_stride; + format_settings.orc.output_dictionary_key_size_threshold = settings.output_format_orc_dictionary_key_size_threshold; format_settings.orc.use_fast_decoder = settings.input_format_orc_use_fast_decoder; format_settings.orc.filter_push_down = settings.input_format_orc_filter_push_down; format_settings.orc.reader_time_zone_name = settings.input_format_orc_reader_time_zone_name; diff --git a/src/Formats/FormatSettings.h b/src/Formats/FormatSettings.h index da25da74134..06535bc07a3 100644 --- a/src/Formats/FormatSettings.h +++ b/src/Formats/FormatSettings.h @@ -413,6 +413,7 @@ struct FormatSettings bool filter_push_down = true; UInt64 output_row_index_stride = 10'000; String reader_time_zone_name = "GMT"; + double output_dictionary_key_size_threshold = 0.0; } orc{}; /// For capnProto format we should determine how to From 37c3f4a8707598022d94ce5b20c10893882cf58c Mon Sep 17 00:00:00 2001 From: kevinyhzou Date: Fri, 10 May 2024 12:18:06 +0800 Subject: [PATCH 365/844] add threshold for table rows --- src/Core/Settings.h | 2 + src/Core/SettingsChangesHistory.cpp | 253 +++++++++++ src/Interpreters/HashJoin/AddedColumns.cpp | 63 +++ src/Interpreters/HashJoin/AddedColumns.h | 6 + src/Interpreters/HashJoin/HashJoin.cpp | 84 +++- src/Interpreters/HashJoin/HashJoin.h | 7 +- src/Interpreters/HashJoin/HashJoinMethods.h | 424 +++++++++++++++++- src/Interpreters/IJoin.h | 1 + src/Interpreters/RowRefs.h | 1 + .../Transforms/JoiningTransform.cpp | 6 +- 10 files changed, 841 insertions(+), 6 deletions(-) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index d8837d26e54..c1433ca7250 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -922,6 +922,8 @@ class IColumn; M(Bool, implicit_transaction, false, "If enabled and not already inside a transaction, wraps the query inside a full transaction (begin + commit or rollback)", 0) \ M(UInt64, grace_hash_join_initial_buckets, 1, "Initial number of grace hash join buckets", 0) \ M(UInt64, grace_hash_join_max_buckets, 1024, "Limit on the number of grace hash join buckets", 0) \ + M(Int32, join_to_sort_perkey_rows_threshold, 40, "The lower limit of per-key average rows in the right table to determine whether to sort it in hash join.", 0) \ + M(Int32, join_to_sort_table_rows_threshold, 10000, "The upper limit of rows in the right table to determine whether to sort it in hash join.", 0) \ M(Timezone, session_timezone, "", "This setting can be removed in the future due to potential caveats. It is experimental and is not suitable for production usage. The default timezone for current session or query. The server default timezone if empty.", 0) \ M(Bool, use_hive_partitioning, false, "Allows to use hive partitioning for File, URL, S3, AzureBlobStorage and HDFS engines.", 0)\ \ diff --git a/src/Core/SettingsChangesHistory.cpp b/src/Core/SettingsChangesHistory.cpp index 2415323b4a0..b975c6b2fad 100644 --- a/src/Core/SettingsChangesHistory.cpp +++ b/src/Core/SettingsChangesHistory.cpp @@ -516,6 +516,259 @@ static std::initializer_list col >= '2023-01-01' AND col <= '2023-12-31')"}, + {"extract_key_value_pairs_max_pairs_per_row", 0, 0, "Max number of pairs that can be produced by the `extractKeyValuePairs` function. Used as a safeguard against consuming too much memory."}, + {"default_view_definer", "CURRENT_USER", "CURRENT_USER", "Allows to set default `DEFINER` option while creating a view"}, + {"default_materialized_view_sql_security", "DEFINER", "DEFINER", "Allows to set a default value for SQL SECURITY option when creating a materialized view"}, + {"default_normal_view_sql_security", "INVOKER", "INVOKER", "Allows to set default `SQL SECURITY` option while creating a normal view"}, + {"mysql_map_string_to_text_in_show_columns", false, true, "Reduce the configuration effort to connect ClickHouse with BI tools."}, + {"mysql_map_fixed_string_to_text_in_show_columns", false, true, "Reduce the configuration effort to connect ClickHouse with BI tools."}, + }}, + {"24.1", {{"print_pretty_type_names", false, true, "Better user experience."}, + {"input_format_json_read_bools_as_strings", false, true, "Allow to read bools as strings in JSON formats by default"}, + {"output_format_arrow_use_signed_indexes_for_dictionary", false, true, "Use signed indexes type for Arrow dictionaries by default as it's recommended"}, + {"allow_experimental_variant_type", false, false, "Add new experimental Variant type"}, + {"use_variant_as_common_type", false, false, "Allow to use Variant in if/multiIf if there is no common type"}, + {"output_format_arrow_use_64_bit_indexes_for_dictionary", false, false, "Allow to use 64 bit indexes type in Arrow dictionaries"}, + {"parallel_replicas_mark_segment_size", 128, 128, "Add new setting to control segment size in new parallel replicas coordinator implementation"}, + {"ignore_materialized_views_with_dropped_target_table", false, false, "Add new setting to allow to ignore materialized views with dropped target table"}, + {"output_format_compression_level", 3, 3, "Allow to change compression level in the query output"}, + {"output_format_compression_zstd_window_log", 0, 0, "Allow to change zstd window log in the query output when zstd compression is used"}, + {"enable_zstd_qat_codec", false, false, "Add new ZSTD_QAT codec"}, + {"enable_vertical_final", false, true, "Use vertical final by default"}, + {"output_format_arrow_use_64_bit_indexes_for_dictionary", false, false, "Allow to use 64 bit indexes type in Arrow dictionaries"}, + {"max_rows_in_set_to_optimize_join", 100000, 0, "Disable join optimization as it prevents from read in order optimization"}, + {"output_format_pretty_color", true, "auto", "Setting is changed to allow also for auto value, disabling ANSI escapes if output is not a tty"}, + {"function_visible_width_behavior", 0, 1, "We changed the default behavior of `visibleWidth` to be more precise"}, + {"max_estimated_execution_time", 0, 0, "Separate max_execution_time and max_estimated_execution_time"}, + {"iceberg_engine_ignore_schema_evolution", false, false, "Allow to ignore schema evolution in Iceberg table engine"}, + {"optimize_injective_functions_in_group_by", false, true, "Replace injective functions by it's arguments in GROUP BY section in analyzer"}, + {"update_insert_deduplication_token_in_dependent_materialized_views", false, false, "Allow to update insert deduplication token with table identifier during insert in dependent materialized views"}, + {"azure_max_unexpected_write_error_retries", 4, 4, "The maximum number of retries in case of unexpected errors during Azure blob storage write"}, + {"split_parts_ranges_into_intersecting_and_non_intersecting_final", false, true, "Allow to split parts ranges into intersecting and non intersecting during FINAL optimization"}, + {"split_intersecting_parts_ranges_into_layers_final", true, true, "Allow to split intersecting parts ranges into layers during FINAL optimization"}}}, + {"23.12", {{"allow_suspicious_ttl_expressions", true, false, "It is a new setting, and in previous versions the behavior was equivalent to allowing."}, + {"input_format_parquet_allow_missing_columns", false, true, "Allow missing columns in Parquet files by default"}, + {"input_format_orc_allow_missing_columns", false, true, "Allow missing columns in ORC files by default"}, + {"input_format_arrow_allow_missing_columns", false, true, "Allow missing columns in Arrow files by default"}}}, + {"23.11", {{"parsedatetime_parse_without_leading_zeros", false, true, "Improved compatibility with MySQL DATE_FORMAT/STR_TO_DATE"}}}, + {"23.9", {{"optimize_group_by_constant_keys", false, true, "Optimize group by constant keys by default"}, + {"input_format_json_try_infer_named_tuples_from_objects", false, true, "Try to infer named Tuples from JSON objects by default"}, + {"input_format_json_read_numbers_as_strings", false, true, "Allow to read numbers as strings in JSON formats by default"}, + {"input_format_json_read_arrays_as_strings", false, true, "Allow to read arrays as strings in JSON formats by default"}, + {"input_format_json_infer_incomplete_types_as_strings", false, true, "Allow to infer incomplete types as Strings in JSON formats by default"}, + {"input_format_json_try_infer_numbers_from_strings", true, false, "Don't infer numbers from strings in JSON formats by default to prevent possible parsing errors"}, + {"http_write_exception_in_output_format", false, true, "Output valid JSON/XML on exception in HTTP streaming."}}}, + {"23.8", {{"rewrite_count_distinct_if_with_count_distinct_implementation", false, true, "Rewrite countDistinctIf with count_distinct_implementation configuration"}}}, + {"23.7", {{"function_sleep_max_microseconds_per_block", 0, 3000000, "In previous versions, the maximum sleep time of 3 seconds was applied only for `sleep`, but not for `sleepEachRow` function. In the new version, we introduce this setting. If you set compatibility with the previous versions, we will disable the limit altogether."}}}, + {"23.6", {{"http_send_timeout", 180, 30, "3 minutes seems crazy long. Note that this is timeout for a single network write call, not for the whole upload operation."}, + {"http_receive_timeout", 180, 30, "See http_send_timeout."}}}, + {"23.5", {{"input_format_parquet_preserve_order", true, false, "Allow Parquet reader to reorder rows for better parallelism."}, + {"parallelize_output_from_storages", false, true, "Allow parallelism when executing queries that read from file/url/s3/etc. This may reorder rows."}, + {"use_with_fill_by_sorting_prefix", false, true, "Columns preceding WITH FILL columns in ORDER BY clause form sorting prefix. Rows with different values in sorting prefix are filled independently"}, + {"output_format_parquet_compliant_nested_types", false, true, "Change an internal field name in output Parquet file schema."}}}, + {"23.4", {{"allow_suspicious_indices", true, false, "If true, index can defined with identical expressions"}, + {"allow_nonconst_timezone_arguments", true, false, "Allow non-const timezone arguments in certain time-related functions like toTimeZone(), fromUnixTimestamp*(), snowflakeToDateTime*()."}, + {"connect_timeout_with_failover_ms", 50, 1000, "Increase default connect timeout because of async connect"}, + {"connect_timeout_with_failover_secure_ms", 100, 1000, "Increase default secure connect timeout because of async connect"}, + {"hedged_connection_timeout_ms", 100, 50, "Start new connection in hedged requests after 50 ms instead of 100 to correspond with previous connect timeout"}, + {"formatdatetime_f_prints_single_zero", true, false, "Improved compatibility with MySQL DATE_FORMAT()/STR_TO_DATE()"}, + {"formatdatetime_parsedatetime_m_is_month_name", false, true, "Improved compatibility with MySQL DATE_FORMAT/STR_TO_DATE"}}}, + {"23.3", {{"output_format_parquet_version", "1.0", "2.latest", "Use latest Parquet format version for output format"}, + {"input_format_json_ignore_unknown_keys_in_named_tuple", false, true, "Improve parsing JSON objects as named tuples"}, + {"input_format_native_allow_types_conversion", false, true, "Allow types conversion in Native input forma"}, + {"output_format_arrow_compression_method", "none", "lz4_frame", "Use lz4 compression in Arrow output format by default"}, + {"output_format_parquet_compression_method", "snappy", "lz4", "Use lz4 compression in Parquet output format by default"}, + {"output_format_orc_compression_method", "none", "lz4_frame", "Use lz4 compression in ORC output format by default"}, + {"async_query_sending_for_remote", false, true, "Create connections and send query async across shards"}}}, + {"23.2", {{"output_format_parquet_fixed_string_as_fixed_byte_array", false, true, "Use Parquet FIXED_LENGTH_BYTE_ARRAY type for FixedString by default"}, + {"output_format_arrow_fixed_string_as_fixed_byte_array", false, true, "Use Arrow FIXED_SIZE_BINARY type for FixedString by default"}, + {"query_plan_remove_redundant_distinct", false, true, "Remove redundant Distinct step in query plan"}, + {"optimize_duplicate_order_by_and_distinct", true, false, "Remove duplicate ORDER BY and DISTINCT if it's possible"}, + {"insert_keeper_max_retries", 0, 20, "Enable reconnections to Keeper on INSERT, improve reliability"}}}, + {"23.1", {{"input_format_json_read_objects_as_strings", 0, 1, "Enable reading nested json objects as strings while object type is experimental"}, + {"input_format_json_defaults_for_missing_elements_in_named_tuple", false, true, "Allow missing elements in JSON objects while reading named tuples by default"}, + {"input_format_csv_detect_header", false, true, "Detect header in CSV format by default"}, + {"input_format_tsv_detect_header", false, true, "Detect header in TSV format by default"}, + {"input_format_custom_detect_header", false, true, "Detect header in CustomSeparated format by default"}, + {"query_plan_remove_redundant_sorting", false, true, "Remove redundant sorting in query plan. For example, sorting steps related to ORDER BY clauses in subqueries"}}}, + {"22.12", {{"max_size_to_preallocate_for_aggregation", 10'000'000, 100'000'000, "This optimizes performance"}, + {"query_plan_aggregation_in_order", 0, 1, "Enable some refactoring around query plan"}, + {"format_binary_max_string_size", 0, 1_GiB, "Prevent allocating large amount of memory"}}}, + {"22.11", {{"use_structure_from_insertion_table_in_table_functions", 0, 2, "Improve using structure from insertion table in table functions"}}}, + {"22.9", {{"force_grouping_standard_compatibility", false, true, "Make GROUPING function output the same as in SQL standard and other DBMS"}}}, + {"22.7", {{"cross_to_inner_join_rewrite", 1, 2, "Force rewrite comma join to inner"}, + {"enable_positional_arguments", false, true, "Enable positional arguments feature by default"}, + {"format_csv_allow_single_quotes", true, false, "Most tools don't treat single quote in CSV specially, don't do it by default too"}}}, + {"22.6", {{"output_format_json_named_tuples_as_objects", false, true, "Allow to serialize named tuples as JSON objects in JSON formats by default"}, + {"input_format_skip_unknown_fields", false, true, "Optimize reading subset of columns for some input formats"}}}, + {"22.5", {{"memory_overcommit_ratio_denominator", 0, 1073741824, "Enable memory overcommit feature by default"}, + {"memory_overcommit_ratio_denominator_for_user", 0, 1073741824, "Enable memory overcommit feature by default"}}}, + {"22.4", {{"allow_settings_after_format_in_insert", true, false, "Do not allow SETTINGS after FORMAT for INSERT queries because ClickHouse interpret SETTINGS as some values, which is misleading"}}}, + {"22.3", {{"cast_ipv4_ipv6_default_on_conversion_error", true, false, "Make functions cast(value, 'IPv4') and cast(value, 'IPv6') behave same as toIPv4 and toIPv6 functions"}}}, + {"21.12", {{"stream_like_engine_allow_direct_select", true, false, "Do not allow direct select for Kafka/RabbitMQ/FileLog by default"}}}, + {"21.9", {{"output_format_decimal_trailing_zeros", true, false, "Do not output trailing zeros in text representation of Decimal types by default for better looking output"}, + {"use_hedged_requests", false, true, "Enable Hedged Requests feature by default"}}}, + {"21.7", {{"legacy_column_name_of_tuple_literal", true, false, "Add this setting only for compatibility reasons. It makes sense to set to 'true', while doing rolling update of cluster from version lower than 21.7 to higher"}}}, + {"21.5", {{"async_socket_for_remote", false, true, "Fix all problems and turn on asynchronous reads from socket for remote queries by default again"}}}, + {"21.3", {{"async_socket_for_remote", true, false, "Turn off asynchronous reads from socket for remote queries because of some problems"}, + {"optimize_normalize_count_variants", false, true, "Rewrite aggregate functions that semantically equals to count() as count() by default"}, + {"normalize_function_names", false, true, "Normalize function names to their canonical names, this was needed for projection query routing"}}}, + {"21.2", {{"enable_global_with_statement", false, true, "Propagate WITH statements to UNION queries and all subqueries by default"}}}, + {"21.1", {{"insert_quorum_parallel", false, true, "Use parallel quorum inserts by default. It is significantly more convenient to use than sequential quorum inserts"}, + {"input_format_null_as_default", false, true, "Allow to insert NULL as default for input formats by default"}, + {"optimize_on_insert", false, true, "Enable data optimization on INSERT by default for better user experience"}, + {"use_compact_format_in_distributed_parts_names", false, true, "Use compact format for async INSERT into Distributed tables by default"}}}, + {"20.10", {{"format_regexp_escaping_rule", "Escaped", "Raw", "Use Raw as default escaping rule for Regexp format to male the behaviour more like to what users expect"}}}, + {"20.7", {{"show_table_uuid_in_table_create_query_if_not_nil", true, false, "Stop showing UID of the table in its CREATE query for Engine=Atomic"}}}, + {"20.5", {{"input_format_with_names_use_header", false, true, "Enable using header with names for formats with WithNames/WithNamesAndTypes suffixes"}, + {"allow_suspicious_codecs", true, false, "Don't allow to specify meaningless compression codecs"}}}, + {"20.4", {{"validate_polygons", false, true, "Throw exception if polygon is invalid in function pointInPolygon by default instead of returning possibly wrong results"}}}, + {"19.18", {{"enable_scalar_subquery_optimization", false, true, "Prevent scalar subqueries from (de)serializing large scalar values and possibly avoid running the same subquery more than once"}}}, + {"19.14", {{"any_join_distinct_right_table_keys", true, false, "Disable ANY RIGHT and ANY FULL JOINs by default to avoid inconsistency"}}}, + {"19.12", {{"input_format_defaults_for_omitted_fields", false, true, "Enable calculation of complex default expressions for omitted fields for some input formats, because it should be the expected behaviour"}}}, + {"19.5", {{"max_partitions_per_insert_block", 0, 100, "Add a limit for the number of partitions in one block"}}}, + {"18.12.17", {{"enable_optimize_predicate_expression", 0, 1, "Optimize predicates to subqueries by default"}}}, }; diff --git a/src/Interpreters/HashJoin/AddedColumns.cpp b/src/Interpreters/HashJoin/AddedColumns.cpp index 21cb6e401ed..d70781d2fb3 100644 --- a/src/Interpreters/HashJoin/AddedColumns.cpp +++ b/src/Interpreters/HashJoin/AddedColumns.cpp @@ -20,10 +20,13 @@ void AddedColumns::buildOutput() {} template<> void AddedColumns::buildJoinGetOutput() {} +<<<<<<< HEAD template<> template void AddedColumns::buildOutputFromBlocks() {} +======= +>>>>>>> add threshold for table rows template<> void AddedColumns::buildOutput() @@ -32,9 +35,15 @@ void AddedColumns::buildOutput() buildOutputFromBlocks(); else { +<<<<<<< HEAD if (join_data_avg_perkey_rows < output_by_row_list_threshold) buildOutputFromBlocks(); else +======= + if (join_data_avg_perkey_rows < sort_right_perkey_rows_threshold) + buildOutputFromBlocks(); + else if (join_data_sorted) +>>>>>>> add threshold for table rows { for (size_t i = 0; i < this->size(); ++i) { @@ -44,14 +53,19 @@ void AddedColumns::buildOutput() if (row_ref_i) { const RowRefList * row_ref_list = reinterpret_cast(row_ref_i); +<<<<<<< HEAD for (auto it = row_ref_list->begin(); it.ok(); ++it) col->insertFrom(*it->block->getByPosition(right_indexes[i]).column, it->row_num); +======= + col->insertRangeFrom(*row_ref_list->block->getByPosition(right_indexes[i]).column, row_ref_list->row_num, row_ref_list->rows); +>>>>>>> add threshold for table rows } else type_name[i].type->insertDefaultInto(*col); } } } +<<<<<<< HEAD } } @@ -74,6 +88,25 @@ void AddedColumns::buildJoinGetOutput() nullable_col->insertFromNotNullable(*column_from_block.column, row_ref->row_num); else col->insertFrom(*column_from_block.column, row_ref->row_num); +======= + else + { + for (size_t i = 0; i < this->size(); ++i) + { + auto & col = columns[i]; + for (auto row_ref_i : lazy_output.row_refs) + { + if (row_ref_i) + { + const RowRefList * row_ref_list = reinterpret_cast(row_ref_i); + for (auto it = row_ref_list->begin(); it.ok(); ++it) + col->insertFrom(*it->block->getByPosition(right_indexes[i]).column, it->row_num); + } + else + type_name[i].type->insertDefaultInto(*col); + } + } +>>>>>>> add threshold for table rows } } } @@ -82,7 +115,11 @@ template<> template void AddedColumns::buildOutputFromBlocks() { +<<<<<<< HEAD if (this->size() == 0) +======= + if (this->size() == 0) +>>>>>>> add threshold for table rows return; std::vector blocks; std::vector row_nums; @@ -123,6 +160,32 @@ void AddedColumns::buildOutputFromBlocks() col->insertFrom(*blocks[j]->getByPosition(right_indexes[i]).column, row_nums[j]); else type_name[i].type->insertDefaultInto(*col); +<<<<<<< HEAD +======= + } + } +} + +template<> +void AddedColumns::buildJoinGetOutput() +{ + for (size_t i = 0; i < this->size(); ++i) + { + auto & col = columns[i]; + for (auto row_ref_i : lazy_output.row_refs) + { + if (!row_ref_i) + { + type_name[i].type->insertDefaultInto(*col); + continue; + } + const auto * row_ref = reinterpret_cast(row_ref_i); + const auto & column_from_block = row_ref->block->getByPosition(right_indexes[i]); + if (auto * nullable_col = typeid_cast(col.get()); nullable_col && !column_from_block.column->isNullable()) + nullable_col->insertFromNotNullable(*column_from_block.column, row_ref->row_num); + else + col->insertFrom(*column_from_block.column, row_ref->row_num); +>>>>>>> add threshold for table rows } } } diff --git a/src/Interpreters/HashJoin/AddedColumns.h b/src/Interpreters/HashJoin/AddedColumns.h index f1b95a63be6..5ae69fbbf66 100644 --- a/src/Interpreters/HashJoin/AddedColumns.h +++ b/src/Interpreters/HashJoin/AddedColumns.h @@ -196,6 +196,12 @@ private: } } + /** Build output from the blocks that extract from `RowRef` or `RowRefList`, to avoid block cache miss which may cause performance slow down. + * And This problem would happen it we directly build output from `RowRef` or `RowRefList`. + */ + template + void buildOutputFromBlocks(); + MutableColumns columns; bool is_join_get; std::vector right_indexes; diff --git a/src/Interpreters/HashJoin/HashJoin.cpp b/src/Interpreters/HashJoin/HashJoin.cpp index 9c07a71e614..6f332118f8a 100644 --- a/src/Interpreters/HashJoin/HashJoin.cpp +++ b/src/Interpreters/HashJoin/HashJoin.cpp @@ -649,7 +649,6 @@ bool HashJoin::addBlockToJoin(const Block & source_block_, bool check_limits) } data->keys_to_join = total_rows; shrinkStoredBlocksToFit(total_bytes); - return table_join->sizeLimits().check(total_rows, total_bytes, "JOIN", ErrorCodes::SET_SIZE_LIMIT_EXCEEDED); } @@ -1361,4 +1360,87 @@ bool HashJoin::needUsedFlagsForPerRightTableRow(std::shared_ptr table return false; } +template +void HashJoin::tryRerangeRightTableDataImpl(Map & map [[maybe_unused]]) +{ + constexpr JoinFeatures join_features; + if constexpr (join_features.is_all_join && (join_features.left || join_features.inner)) + { + auto merge_rows_into_one_block = [&](BlocksList & blocks, RowRefList & rows_ref) + { + auto it = rows_ref.begin(); + if (it.ok()) + { + if (blocks.empty() || blocks.back().rows() > DEFAULT_BLOCK_SIZE) + blocks.emplace_back(it->block->cloneEmpty()); + } + else + { + return; + } + auto & block = blocks.back(); + size_t start_row = block.rows(); + for (; it.ok(); ++it) + { + for (size_t i = 0; i < block.columns(); ++i) + { + auto & col = *(block.getByPosition(i).column->assumeMutable()); + col.insertFrom(*it->block->getByPosition(i).column, it->row_num); + } + } + if (block.rows() > start_row) + { + RowRefList new_rows_ref(&block, start_row, block.rows() - start_row); + rows_ref = std::move(new_rows_ref); + } + }; + + auto visit_rows_map = [&](BlocksList & blocks, MapsAll & rows_map) + { + switch (data->type) + { + #define M(TYPE) \ + case Type::TYPE: \ + {\ + rows_map.TYPE->forEachMapped([&](RowRefList & rows_ref) { merge_rows_into_one_block(blocks, rows_ref); }); \ + break; \ + } + APPLY_FOR_JOIN_VARIANTS(M) + #undef M + default: + break; + } + }; + BlocksList sorted_blocks; + visit_rows_map(sorted_blocks, map); + data->blocks.swap(sorted_blocks); + } +} + +void HashJoin::tryRerangeRightTableData() +{ + if ((kind != JoinKind::Inner && kind != JoinKind::Left) || strictness != JoinStrictness::All || table_join->getMixedJoinExpression()) + return; + + if (!data || data->sorted || data->blocks.empty() || data->maps.size() > 1) + return; + + if (data->keys_to_join == 0) + data->keys_to_join = getTotalRowCount(); + if (sample_block_with_columns_to_add.columns() == 0 || data->rows_to_join > table_join->sortRightTableRowsThreshold() || data->avgPerKeyRows() < table_join->sortRightPerkeyRowsThreshold()) + { + LOG_DEBUG(log, "The joined right table total rows :{}, total keys :{}, columns added:{}", + data->rows_to_join, data->keys_to_join, sample_block_with_columns_to_add.columns()); + return; + } + std::cout << "sort right table rows" << std::endl; + joinDispatch( + kind, + strictness, + data->maps.front(), + [&](auto kind_, auto strictness_, auto & map_) { tryRerangeRightTableDataImpl(map_); }); + std::cout << "sort right finished" << std::endl; + data->sorted = true; +} + } diff --git a/src/Interpreters/HashJoin/HashJoin.h b/src/Interpreters/HashJoin/HashJoin.h index d645b8e9273..230343691ea 100644 --- a/src/Interpreters/HashJoin/HashJoin.h +++ b/src/Interpreters/HashJoin/HashJoin.h @@ -345,11 +345,12 @@ public: size_t blocks_allocated_size = 0; size_t blocks_nullmaps_allocated_size = 0; - /// Number of rows of right table to join size_t rows_to_join = 0; /// Number of keys of right table to join size_t keys_to_join = 0; + /// Whether the right table reranged by key + bool sorted = false; size_t avgPerKeyRows() const { @@ -465,6 +466,10 @@ private: void validateAdditionalFilterExpression(std::shared_ptr additional_filter_expression); bool needUsedFlagsForPerRightTableRow(std::shared_ptr table_join_) const; + + void tryRerangeRightTableData() override; + template + void tryRerangeRightTableDataImpl(Map & map); }; } diff --git a/src/Interpreters/HashJoin/HashJoinMethods.h b/src/Interpreters/HashJoin/HashJoinMethods.h index 97ad57d26ea..9d94c3f62c2 100644 --- a/src/Interpreters/HashJoin/HashJoinMethods.h +++ b/src/Interpreters/HashJoin/HashJoinMethods.h @@ -121,7 +121,142 @@ private: std::vector && key_getter_vector, const std::vector & mapv, AddedColumns & added_columns, - JoinStuff::JoinUsedFlags & used_flags); + JoinStuff::JoinUsedFlags & used_flags) + { + constexpr JoinFeatures join_features; + + size_t rows = added_columns.rows_to_add; + if constexpr (need_filter) + added_columns.filter = IColumn::Filter(rows, 0); + if constexpr (!flag_per_row && (STRICTNESS == JoinStrictness::All || (STRICTNESS == JoinStrictness::Semi && KIND == JoinKind::Right))) + added_columns.output_by_row_list = true; + + Arena pool; + + if constexpr (join_features.need_replication) + added_columns.offsets_to_replicate = std::make_unique(rows); + + IColumn::Offset current_offset = 0; + size_t max_joined_block_rows = added_columns.max_joined_block_rows; + size_t i = 0; + for (; i < rows; ++i) + { + if constexpr (join_features.need_replication) + { + if (unlikely(current_offset >= max_joined_block_rows)) + { + added_columns.offsets_to_replicate->resize_assume_reserved(i); + added_columns.filter.resize_assume_reserved(i); + break; + } + } + + bool right_row_found = false; + + KnownRowsHolder known_rows; + for (size_t onexpr_idx = 0; onexpr_idx < added_columns.join_on_keys.size(); ++onexpr_idx) + { + const auto & join_keys = added_columns.join_on_keys[onexpr_idx]; + if (join_keys.null_map && (*join_keys.null_map)[i]) + continue; + + bool row_acceptable = !join_keys.isRowFiltered(i); + using FindResult = typename KeyGetter::FindResult; + auto find_result = row_acceptable ? key_getter_vector[onexpr_idx].findKey(*(mapv[onexpr_idx]), i, pool) : FindResult(); + + if (find_result.isFound()) + { + right_row_found = true; + auto & mapped = find_result.getMapped(); + if constexpr (join_features.is_asof_join) + { + const IColumn & left_asof_key = added_columns.leftAsofKey(); + + auto row_ref = mapped->findAsof(left_asof_key, i); + if (row_ref && row_ref->block) + { + setUsed(added_columns.filter, i); + if constexpr (flag_per_row) + used_flags.template setUsed(row_ref->block, row_ref->row_num, 0); + else + used_flags.template setUsed(find_result); + + added_columns.appendFromBlock(row_ref, join_features.add_missing); + } + else + addNotFoundRow(added_columns, current_offset); + } + else if constexpr (join_features.is_all_join) + { + setUsed(added_columns.filter, i); + used_flags.template setUsed(find_result); + auto used_flags_opt = join_features.need_flags ? &used_flags : nullptr; + addFoundRowAll(mapped, added_columns, current_offset, known_rows, used_flags_opt); + } + else if constexpr ((join_features.is_any_join || join_features.is_semi_join) && join_features.right) + { + /// Use first appeared left key + it needs left columns replication + bool used_once = used_flags.template setUsedOnce(find_result); + if (used_once) + { + auto used_flags_opt = join_features.need_flags ? &used_flags : nullptr; + setUsed(added_columns.filter, i); + addFoundRowAll( + mapped, added_columns, current_offset, known_rows, used_flags_opt); + } + } + else if constexpr (join_features.is_any_join && KIND == JoinKind::Inner) + { + bool used_once = used_flags.template setUsedOnce(find_result); + + /// Use first appeared left key only + if (used_once) + { + setUsed(added_columns.filter, i); + added_columns.appendFromBlock(&mapped, join_features.add_missing); + } + + break; + } + else if constexpr (join_features.is_any_join && join_features.full) + { + /// TODO + } + else if constexpr (join_features.is_anti_join) + { + if constexpr (join_features.right && join_features.need_flags) + used_flags.template setUsed(find_result); + } + else /// ANY LEFT, SEMI LEFT, old ANY (RightAny) + { + setUsed(added_columns.filter, i); + used_flags.template setUsed(find_result); + added_columns.appendFromBlock(&mapped, join_features.add_missing); + + if (join_features.is_any_or_semi_join) + { + break; + } + } + } + } + + if (!right_row_found) + { + if constexpr (join_features.is_anti_join && join_features.left) + setUsed(added_columns.filter, i); + addNotFoundRow(added_columns, current_offset); + } + + if constexpr (join_features.need_replication) + { + (*added_columns.offsets_to_replicate)[i] = current_offset; + } + } + + added_columns.applyLazyDefaults(); + return i; + } template static void setUsed(IColumn::Filter & filter [[maybe_unused]], size_t pos [[maybe_unused]]); @@ -131,7 +266,120 @@ private: size_t left_start_row, const std::vector & selected_rows, const std::vector & row_replicate_offset, - AddedColumns & added_columns); + AddedColumns & added_columns) + { + ColumnPtr result_column; + do + { + if (selected_rows.empty()) + { + result_column = ColumnUInt8::create(); + break; + } + const Block & sample_right_block = *((*selected_rows.begin())->block); + if (!sample_right_block || !added_columns.additional_filter_expression) + { + auto filter = ColumnUInt8::create(); + filter->insertMany(1, selected_rows.size()); + result_column = std::move(filter); + break; + } + + auto required_cols = added_columns.additional_filter_expression->getRequiredColumnsWithTypes(); + if (required_cols.empty()) + { + Block block; + added_columns.additional_filter_expression->execute(block); + result_column = block.getByPosition(0).column->cloneResized(selected_rows.size()); + break; + } + NameSet required_column_names; + for (auto & col : required_cols) + required_column_names.insert(col.name); + + Block executed_block; + size_t right_col_pos = 0; + for (const auto & col : sample_right_block.getColumnsWithTypeAndName()) + { + if (required_column_names.contains(col.name)) + { + auto new_col = col.column->cloneEmpty(); + for (const auto & selected_row : selected_rows) + { + const auto & src_col = selected_row->block->getByPosition(right_col_pos); + new_col->insertFrom(*src_col.column, selected_row->row_num); + } + executed_block.insert({std::move(new_col), col.type, col.name}); + } + right_col_pos += 1; + } + if (!executed_block) + { + result_column = ColumnUInt8::create(); + break; + } + + for (const auto & col_name : required_column_names) + { + const auto * src_col = added_columns.left_block.findByName(col_name); + if (!src_col) + continue; + auto new_col = src_col->column->cloneEmpty(); + size_t prev_left_offset = 0; + for (size_t i = 1; i < row_replicate_offset.size(); ++i) + { + const size_t & left_offset = row_replicate_offset[i]; + size_t rows = left_offset - prev_left_offset; + if (rows) + new_col->insertManyFrom(*src_col->column, left_start_row + i - 1, rows); + prev_left_offset = left_offset; + } + executed_block.insert({std::move(new_col), src_col->type, col_name}); + } + if (!executed_block) + { + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "required columns: [{}], but not found any in left/right table. right table: {}, left table: {}", + required_cols.toString(), + sample_right_block.dumpNames(), + added_columns.left_block.dumpNames()); + } + + for (const auto & col : executed_block.getColumnsWithTypeAndName()) + if (!col.column || !col.type) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Illegal nullptr column in input block: {}", executed_block.dumpStructure()); + + added_columns.additional_filter_expression->execute(executed_block); + result_column = executed_block.getByPosition(0).column->convertToFullColumnIfConst(); + executed_block.clear(); + } while (false); + + result_column = result_column->convertToFullIfNeeded(); + if (result_column->isNullable()) + { + /// Convert Nullable(UInt8) to UInt8 ensuring that nulls are zeros + /// Trying to avoid copying data, since we are the only owner of the column. + ColumnPtr mask_column = assert_cast(*result_column).getNullMapColumnPtr(); + + MutableColumnPtr mutable_column; + { + ColumnPtr nested_column = assert_cast(*result_column).getNestedColumnPtr(); + result_column.reset(); + mutable_column = IColumn::mutate(std::move(nested_column)); + } + + auto & column_data = assert_cast(*mutable_column).getData(); + const auto & mask_column_data = assert_cast(*mask_column).getData(); + for (size_t i = 0; i < column_data.size(); ++i) + { + if (mask_column_data[i]) + column_data[i] = 0; + } + return mutable_column; + } + return result_column; + } /// First to collect all matched rows refs by join keys, then filter out rows which are not true in additional filter expression. template @@ -141,7 +389,177 @@ private: AddedColumns & added_columns, JoinStuff::JoinUsedFlags & used_flags [[maybe_unused]], bool need_filter [[maybe_unused]], - bool flag_per_row [[maybe_unused]]); + bool need_flags [[maybe_unused]], + bool add_missing [[maybe_unused]], + bool flag_per_row [[maybe_unused]]) + { + size_t left_block_rows = added_columns.rows_to_add; + if (need_filter) + added_columns.filter = IColumn::Filter(left_block_rows, 0); + + std::unique_ptr pool; + + if constexpr (need_replication) + added_columns.offsets_to_replicate = std::make_unique(left_block_rows); + + std::vector row_replicate_offset; + row_replicate_offset.reserve(left_block_rows); + + using FindResult = typename KeyGetter::FindResult; + size_t max_joined_block_rows = added_columns.max_joined_block_rows; + size_t left_row_iter = 0; + PreSelectedRows selected_rows; + selected_rows.reserve(left_block_rows); + std::vector find_results; + find_results.reserve(left_block_rows); + bool exceeded_max_block_rows = false; + IColumn::Offset total_added_rows = 0; + IColumn::Offset current_added_rows = 0; + + auto collect_keys_matched_rows_refs = [&]() + { + pool = std::make_unique(); + find_results.clear(); + row_replicate_offset.clear(); + row_replicate_offset.push_back(0); + current_added_rows = 0; + selected_rows.clear(); + for (; left_row_iter < left_block_rows; ++left_row_iter) + { + if constexpr (need_replication) + { + if (unlikely(total_added_rows + current_added_rows >= max_joined_block_rows)) + { + break; + } + } + KnownRowsHolder all_flag_known_rows; + KnownRowsHolder single_flag_know_rows; + for (size_t join_clause_idx = 0; join_clause_idx < added_columns.join_on_keys.size(); ++join_clause_idx) + { + const auto & join_keys = added_columns.join_on_keys[join_clause_idx]; + if (join_keys.null_map && (*join_keys.null_map)[left_row_iter]) + continue; + + bool row_acceptable = !join_keys.isRowFiltered(left_row_iter); + auto find_result = row_acceptable + ? key_getter_vector[join_clause_idx].findKey(*(mapv[join_clause_idx]), left_row_iter, *pool) + : FindResult(); + + if (find_result.isFound()) + { + auto & mapped = find_result.getMapped(); + find_results.push_back(find_result); + if (flag_per_row) + addFoundRowAll(mapped, selected_rows, current_added_rows, all_flag_known_rows, nullptr); + else + addFoundRowAll(mapped, selected_rows, current_added_rows, single_flag_know_rows, nullptr); + } + } + row_replicate_offset.push_back(current_added_rows); + } + }; + + auto copy_final_matched_rows = [&](size_t left_start_row, ColumnPtr filter_col) + { + const PaddedPODArray & filter_flags = assert_cast(*filter_col).getData(); + + size_t prev_replicated_row = 0; + auto selected_right_row_it = selected_rows.begin(); + size_t find_result_index = 0; + for (size_t i = 1, n = row_replicate_offset.size(); i < n; ++i) + { + bool any_matched = false; + /// For all right join, flag_per_row is true, we need mark used flags for each row. + if (flag_per_row) + { + for (size_t replicated_row = prev_replicated_row; replicated_row < row_replicate_offset[i]; ++replicated_row) + { + if (filter_flags[replicated_row]) + { + any_matched = true; + added_columns.appendFromBlock(*selected_right_row_it, add_missing); + total_added_rows += 1; + if (need_flags) + used_flags.template setUsed((*selected_right_row_it)->block, (*selected_right_row_it)->row_num, 0); + } + ++selected_right_row_it; + } + } + else + { + for (size_t replicated_row = prev_replicated_row; replicated_row < row_replicate_offset[i]; ++replicated_row) + { + if (filter_flags[replicated_row]) + { + any_matched = true; + added_columns.appendFromBlock(*selected_right_row_it, add_missing); + total_added_rows += 1; + } + ++selected_right_row_it; + } + } + if (!any_matched) + { + if (add_missing) + addNotFoundRow(added_columns, total_added_rows); + else + addNotFoundRow(added_columns, total_added_rows); + } + else + { + if (!flag_per_row && need_flags) + used_flags.template setUsed(find_results[find_result_index]); + if (need_filter) + setUsed(added_columns.filter, left_start_row + i - 1); + if (add_missing) + added_columns.applyLazyDefaults(); + } + find_result_index += (prev_replicated_row != row_replicate_offset[i]); + + if constexpr (need_replication) + { + (*added_columns.offsets_to_replicate)[left_start_row + i - 1] = total_added_rows; + } + prev_replicated_row = row_replicate_offset[i]; + } + }; + + while (left_row_iter < left_block_rows && !exceeded_max_block_rows) + { + auto left_start_row = left_row_iter; + collect_keys_matched_rows_refs(); + if (selected_rows.size() != current_added_rows || row_replicate_offset.size() != left_row_iter - left_start_row + 1) + { + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "Sizes are mismatched. selected_rows.size:{}, current_added_rows:{}, row_replicate_offset.size:{}, left_row_iter: {}, " + "left_start_row: {}", + selected_rows.size(), + current_added_rows, + row_replicate_offset.size(), + left_row_iter, + left_start_row); + } + auto filter_col = buildAdditionalFilter(left_start_row, selected_rows, row_replicate_offset, added_columns); + copy_final_matched_rows(left_start_row, filter_col); + + if constexpr (need_replication) + { + // Add a check for current_added_rows to avoid run the filter expression on too small size batch. + if (total_added_rows >= max_joined_block_rows || current_added_rows < 1024) + exceeded_max_block_rows = true; + } + } + + if constexpr (need_replication) + { + added_columns.offsets_to_replicate->resize_assume_reserved(left_row_iter); + added_columns.filter.resize_assume_reserved(left_row_iter); + } + added_columns.applyLazyDefaults(); + return left_row_iter; + } /// Cut first num_rows rows from block in place and returns block with remaining rows static Block sliceBlock(Block & block, size_t num_rows); diff --git a/src/Interpreters/IJoin.h b/src/Interpreters/IJoin.h index 7374348da50..8f648de2538 100644 --- a/src/Interpreters/IJoin.h +++ b/src/Interpreters/IJoin.h @@ -115,6 +115,7 @@ public: /// Peek next stream of delayed joined blocks. virtual IBlocksStreamPtr getDelayedBlocks() { return nullptr; } virtual bool hasDelayedBlocks() const { return false; } + virtual void tryRerangeRightTableData() {} virtual IBlocksStreamPtr getNonJoinedBlocks(const Block & left_sample_block, const Block & result_sample_block, UInt64 max_block_size) const = 0; diff --git a/src/Interpreters/RowRefs.h b/src/Interpreters/RowRefs.h index 7c98c47dd11..f8ac68191d6 100644 --- a/src/Interpreters/RowRefs.h +++ b/src/Interpreters/RowRefs.h @@ -123,6 +123,7 @@ struct RowRefList : RowRef RowRefList() {} /// NOLINT RowRefList(const Block * block_, size_t row_num_) : RowRef(block_, row_num_), rows(1) {} + RowRefList(const Block * block_, size_t row_start_, size_t rows_) : RowRef(block_, row_start_), rows(static_cast(rows_)) {} ForwardIterator begin() const { return ForwardIterator(this); } diff --git a/src/Processors/Transforms/JoiningTransform.cpp b/src/Processors/Transforms/JoiningTransform.cpp index ca204bcb482..f2fb6327129 100644 --- a/src/Processors/Transforms/JoiningTransform.cpp +++ b/src/Processors/Transforms/JoiningTransform.cpp @@ -299,13 +299,17 @@ IProcessor::Status FillingRightJoinSideTransform::prepare() void FillingRightJoinSideTransform::work() { - auto block = inputs.front().getHeader().cloneWithColumns(chunk.detachColumns()); + auto & input = inputs.front(); + auto block = input.getHeader().cloneWithColumns(chunk.detachColumns()); if (for_totals) join->setTotals(block); else stop_reading = !join->addBlockToJoin(block); + if (input.isFinished()) + join->tryRerangeRightTableData(); + set_totals = for_totals; } From 29c94195e1830e098b3973b43ff6272012735dac Mon Sep 17 00:00:00 2001 From: kevinyhzou Date: Mon, 12 Aug 2024 18:01:40 +0800 Subject: [PATCH 366/844] add setting tests/performance/all_join_opt.xml --- src/Core/SettingsChangesHistory.cpp | 255 +----------- src/Interpreters/HashJoin/HashJoin.cpp | 5 +- src/Interpreters/HashJoin/HashJoinMethods.h | 426 +------------------- tests/performance/all_join_opt.xml | 8 +- 4 files changed, 11 insertions(+), 683 deletions(-) diff --git a/src/Core/SettingsChangesHistory.cpp b/src/Core/SettingsChangesHistory.cpp index b975c6b2fad..9ddf40e87b1 100644 --- a/src/Core/SettingsChangesHistory.cpp +++ b/src/Core/SettingsChangesHistory.cpp @@ -95,6 +95,8 @@ static std::initializer_list col >= '2023-01-01' AND col <= '2023-12-31')"}, - {"extract_key_value_pairs_max_pairs_per_row", 0, 0, "Max number of pairs that can be produced by the `extractKeyValuePairs` function. Used as a safeguard against consuming too much memory."}, - {"default_view_definer", "CURRENT_USER", "CURRENT_USER", "Allows to set default `DEFINER` option while creating a view"}, - {"default_materialized_view_sql_security", "DEFINER", "DEFINER", "Allows to set a default value for SQL SECURITY option when creating a materialized view"}, - {"default_normal_view_sql_security", "INVOKER", "INVOKER", "Allows to set default `SQL SECURITY` option while creating a normal view"}, - {"mysql_map_string_to_text_in_show_columns", false, true, "Reduce the configuration effort to connect ClickHouse with BI tools."}, - {"mysql_map_fixed_string_to_text_in_show_columns", false, true, "Reduce the configuration effort to connect ClickHouse with BI tools."}, - }}, - {"24.1", {{"print_pretty_type_names", false, true, "Better user experience."}, - {"input_format_json_read_bools_as_strings", false, true, "Allow to read bools as strings in JSON formats by default"}, - {"output_format_arrow_use_signed_indexes_for_dictionary", false, true, "Use signed indexes type for Arrow dictionaries by default as it's recommended"}, - {"allow_experimental_variant_type", false, false, "Add new experimental Variant type"}, - {"use_variant_as_common_type", false, false, "Allow to use Variant in if/multiIf if there is no common type"}, - {"output_format_arrow_use_64_bit_indexes_for_dictionary", false, false, "Allow to use 64 bit indexes type in Arrow dictionaries"}, - {"parallel_replicas_mark_segment_size", 128, 128, "Add new setting to control segment size in new parallel replicas coordinator implementation"}, - {"ignore_materialized_views_with_dropped_target_table", false, false, "Add new setting to allow to ignore materialized views with dropped target table"}, - {"output_format_compression_level", 3, 3, "Allow to change compression level in the query output"}, - {"output_format_compression_zstd_window_log", 0, 0, "Allow to change zstd window log in the query output when zstd compression is used"}, - {"enable_zstd_qat_codec", false, false, "Add new ZSTD_QAT codec"}, - {"enable_vertical_final", false, true, "Use vertical final by default"}, - {"output_format_arrow_use_64_bit_indexes_for_dictionary", false, false, "Allow to use 64 bit indexes type in Arrow dictionaries"}, - {"max_rows_in_set_to_optimize_join", 100000, 0, "Disable join optimization as it prevents from read in order optimization"}, - {"output_format_pretty_color", true, "auto", "Setting is changed to allow also for auto value, disabling ANSI escapes if output is not a tty"}, - {"function_visible_width_behavior", 0, 1, "We changed the default behavior of `visibleWidth` to be more precise"}, - {"max_estimated_execution_time", 0, 0, "Separate max_execution_time and max_estimated_execution_time"}, - {"iceberg_engine_ignore_schema_evolution", false, false, "Allow to ignore schema evolution in Iceberg table engine"}, - {"optimize_injective_functions_in_group_by", false, true, "Replace injective functions by it's arguments in GROUP BY section in analyzer"}, - {"update_insert_deduplication_token_in_dependent_materialized_views", false, false, "Allow to update insert deduplication token with table identifier during insert in dependent materialized views"}, - {"azure_max_unexpected_write_error_retries", 4, 4, "The maximum number of retries in case of unexpected errors during Azure blob storage write"}, - {"split_parts_ranges_into_intersecting_and_non_intersecting_final", false, true, "Allow to split parts ranges into intersecting and non intersecting during FINAL optimization"}, - {"split_intersecting_parts_ranges_into_layers_final", true, true, "Allow to split intersecting parts ranges into layers during FINAL optimization"}}}, - {"23.12", {{"allow_suspicious_ttl_expressions", true, false, "It is a new setting, and in previous versions the behavior was equivalent to allowing."}, - {"input_format_parquet_allow_missing_columns", false, true, "Allow missing columns in Parquet files by default"}, - {"input_format_orc_allow_missing_columns", false, true, "Allow missing columns in ORC files by default"}, - {"input_format_arrow_allow_missing_columns", false, true, "Allow missing columns in Arrow files by default"}}}, - {"23.11", {{"parsedatetime_parse_without_leading_zeros", false, true, "Improved compatibility with MySQL DATE_FORMAT/STR_TO_DATE"}}}, - {"23.9", {{"optimize_group_by_constant_keys", false, true, "Optimize group by constant keys by default"}, - {"input_format_json_try_infer_named_tuples_from_objects", false, true, "Try to infer named Tuples from JSON objects by default"}, - {"input_format_json_read_numbers_as_strings", false, true, "Allow to read numbers as strings in JSON formats by default"}, - {"input_format_json_read_arrays_as_strings", false, true, "Allow to read arrays as strings in JSON formats by default"}, - {"input_format_json_infer_incomplete_types_as_strings", false, true, "Allow to infer incomplete types as Strings in JSON formats by default"}, - {"input_format_json_try_infer_numbers_from_strings", true, false, "Don't infer numbers from strings in JSON formats by default to prevent possible parsing errors"}, - {"http_write_exception_in_output_format", false, true, "Output valid JSON/XML on exception in HTTP streaming."}}}, - {"23.8", {{"rewrite_count_distinct_if_with_count_distinct_implementation", false, true, "Rewrite countDistinctIf with count_distinct_implementation configuration"}}}, - {"23.7", {{"function_sleep_max_microseconds_per_block", 0, 3000000, "In previous versions, the maximum sleep time of 3 seconds was applied only for `sleep`, but not for `sleepEachRow` function. In the new version, we introduce this setting. If you set compatibility with the previous versions, we will disable the limit altogether."}}}, - {"23.6", {{"http_send_timeout", 180, 30, "3 minutes seems crazy long. Note that this is timeout for a single network write call, not for the whole upload operation."}, - {"http_receive_timeout", 180, 30, "See http_send_timeout."}}}, - {"23.5", {{"input_format_parquet_preserve_order", true, false, "Allow Parquet reader to reorder rows for better parallelism."}, - {"parallelize_output_from_storages", false, true, "Allow parallelism when executing queries that read from file/url/s3/etc. This may reorder rows."}, - {"use_with_fill_by_sorting_prefix", false, true, "Columns preceding WITH FILL columns in ORDER BY clause form sorting prefix. Rows with different values in sorting prefix are filled independently"}, - {"output_format_parquet_compliant_nested_types", false, true, "Change an internal field name in output Parquet file schema."}}}, - {"23.4", {{"allow_suspicious_indices", true, false, "If true, index can defined with identical expressions"}, - {"allow_nonconst_timezone_arguments", true, false, "Allow non-const timezone arguments in certain time-related functions like toTimeZone(), fromUnixTimestamp*(), snowflakeToDateTime*()."}, - {"connect_timeout_with_failover_ms", 50, 1000, "Increase default connect timeout because of async connect"}, - {"connect_timeout_with_failover_secure_ms", 100, 1000, "Increase default secure connect timeout because of async connect"}, - {"hedged_connection_timeout_ms", 100, 50, "Start new connection in hedged requests after 50 ms instead of 100 to correspond with previous connect timeout"}, - {"formatdatetime_f_prints_single_zero", true, false, "Improved compatibility with MySQL DATE_FORMAT()/STR_TO_DATE()"}, - {"formatdatetime_parsedatetime_m_is_month_name", false, true, "Improved compatibility with MySQL DATE_FORMAT/STR_TO_DATE"}}}, - {"23.3", {{"output_format_parquet_version", "1.0", "2.latest", "Use latest Parquet format version for output format"}, - {"input_format_json_ignore_unknown_keys_in_named_tuple", false, true, "Improve parsing JSON objects as named tuples"}, - {"input_format_native_allow_types_conversion", false, true, "Allow types conversion in Native input forma"}, - {"output_format_arrow_compression_method", "none", "lz4_frame", "Use lz4 compression in Arrow output format by default"}, - {"output_format_parquet_compression_method", "snappy", "lz4", "Use lz4 compression in Parquet output format by default"}, - {"output_format_orc_compression_method", "none", "lz4_frame", "Use lz4 compression in ORC output format by default"}, - {"async_query_sending_for_remote", false, true, "Create connections and send query async across shards"}}}, - {"23.2", {{"output_format_parquet_fixed_string_as_fixed_byte_array", false, true, "Use Parquet FIXED_LENGTH_BYTE_ARRAY type for FixedString by default"}, - {"output_format_arrow_fixed_string_as_fixed_byte_array", false, true, "Use Arrow FIXED_SIZE_BINARY type for FixedString by default"}, - {"query_plan_remove_redundant_distinct", false, true, "Remove redundant Distinct step in query plan"}, - {"optimize_duplicate_order_by_and_distinct", true, false, "Remove duplicate ORDER BY and DISTINCT if it's possible"}, - {"insert_keeper_max_retries", 0, 20, "Enable reconnections to Keeper on INSERT, improve reliability"}}}, - {"23.1", {{"input_format_json_read_objects_as_strings", 0, 1, "Enable reading nested json objects as strings while object type is experimental"}, - {"input_format_json_defaults_for_missing_elements_in_named_tuple", false, true, "Allow missing elements in JSON objects while reading named tuples by default"}, - {"input_format_csv_detect_header", false, true, "Detect header in CSV format by default"}, - {"input_format_tsv_detect_header", false, true, "Detect header in TSV format by default"}, - {"input_format_custom_detect_header", false, true, "Detect header in CustomSeparated format by default"}, - {"query_plan_remove_redundant_sorting", false, true, "Remove redundant sorting in query plan. For example, sorting steps related to ORDER BY clauses in subqueries"}}}, - {"22.12", {{"max_size_to_preallocate_for_aggregation", 10'000'000, 100'000'000, "This optimizes performance"}, - {"query_plan_aggregation_in_order", 0, 1, "Enable some refactoring around query plan"}, - {"format_binary_max_string_size", 0, 1_GiB, "Prevent allocating large amount of memory"}}}, - {"22.11", {{"use_structure_from_insertion_table_in_table_functions", 0, 2, "Improve using structure from insertion table in table functions"}}}, - {"22.9", {{"force_grouping_standard_compatibility", false, true, "Make GROUPING function output the same as in SQL standard and other DBMS"}}}, - {"22.7", {{"cross_to_inner_join_rewrite", 1, 2, "Force rewrite comma join to inner"}, - {"enable_positional_arguments", false, true, "Enable positional arguments feature by default"}, - {"format_csv_allow_single_quotes", true, false, "Most tools don't treat single quote in CSV specially, don't do it by default too"}}}, - {"22.6", {{"output_format_json_named_tuples_as_objects", false, true, "Allow to serialize named tuples as JSON objects in JSON formats by default"}, - {"input_format_skip_unknown_fields", false, true, "Optimize reading subset of columns for some input formats"}}}, - {"22.5", {{"memory_overcommit_ratio_denominator", 0, 1073741824, "Enable memory overcommit feature by default"}, - {"memory_overcommit_ratio_denominator_for_user", 0, 1073741824, "Enable memory overcommit feature by default"}}}, - {"22.4", {{"allow_settings_after_format_in_insert", true, false, "Do not allow SETTINGS after FORMAT for INSERT queries because ClickHouse interpret SETTINGS as some values, which is misleading"}}}, - {"22.3", {{"cast_ipv4_ipv6_default_on_conversion_error", true, false, "Make functions cast(value, 'IPv4') and cast(value, 'IPv6') behave same as toIPv4 and toIPv6 functions"}}}, - {"21.12", {{"stream_like_engine_allow_direct_select", true, false, "Do not allow direct select for Kafka/RabbitMQ/FileLog by default"}}}, - {"21.9", {{"output_format_decimal_trailing_zeros", true, false, "Do not output trailing zeros in text representation of Decimal types by default for better looking output"}, - {"use_hedged_requests", false, true, "Enable Hedged Requests feature by default"}}}, - {"21.7", {{"legacy_column_name_of_tuple_literal", true, false, "Add this setting only for compatibility reasons. It makes sense to set to 'true', while doing rolling update of cluster from version lower than 21.7 to higher"}}}, - {"21.5", {{"async_socket_for_remote", false, true, "Fix all problems and turn on asynchronous reads from socket for remote queries by default again"}}}, - {"21.3", {{"async_socket_for_remote", true, false, "Turn off asynchronous reads from socket for remote queries because of some problems"}, - {"optimize_normalize_count_variants", false, true, "Rewrite aggregate functions that semantically equals to count() as count() by default"}, - {"normalize_function_names", false, true, "Normalize function names to their canonical names, this was needed for projection query routing"}}}, - {"21.2", {{"enable_global_with_statement", false, true, "Propagate WITH statements to UNION queries and all subqueries by default"}}}, - {"21.1", {{"insert_quorum_parallel", false, true, "Use parallel quorum inserts by default. It is significantly more convenient to use than sequential quorum inserts"}, - {"input_format_null_as_default", false, true, "Allow to insert NULL as default for input formats by default"}, - {"optimize_on_insert", false, true, "Enable data optimization on INSERT by default for better user experience"}, - {"use_compact_format_in_distributed_parts_names", false, true, "Use compact format for async INSERT into Distributed tables by default"}}}, - {"20.10", {{"format_regexp_escaping_rule", "Escaped", "Raw", "Use Raw as default escaping rule for Regexp format to male the behaviour more like to what users expect"}}}, - {"20.7", {{"show_table_uuid_in_table_create_query_if_not_nil", true, false, "Stop showing UID of the table in its CREATE query for Engine=Atomic"}}}, - {"20.5", {{"input_format_with_names_use_header", false, true, "Enable using header with names for formats with WithNames/WithNamesAndTypes suffixes"}, - {"allow_suspicious_codecs", true, false, "Don't allow to specify meaningless compression codecs"}}}, - {"20.4", {{"validate_polygons", false, true, "Throw exception if polygon is invalid in function pointInPolygon by default instead of returning possibly wrong results"}}}, - {"19.18", {{"enable_scalar_subquery_optimization", false, true, "Prevent scalar subqueries from (de)serializing large scalar values and possibly avoid running the same subquery more than once"}}}, - {"19.14", {{"any_join_distinct_right_table_keys", true, false, "Disable ANY RIGHT and ANY FULL JOINs by default to avoid inconsistency"}}}, - {"19.12", {{"input_format_defaults_for_omitted_fields", false, true, "Enable calculation of complex default expressions for omitted fields for some input formats, because it should be the expected behaviour"}}}, - {"19.5", {{"max_partitions_per_insert_block", 0, 100, "Add a limit for the number of partitions in one block"}}}, - {"18.12.17", {{"enable_optimize_predicate_expression", 0, 1, "Optimize predicates to subqueries by default"}}}, }; diff --git a/src/Interpreters/HashJoin/HashJoin.cpp b/src/Interpreters/HashJoin/HashJoin.cpp index 6f332118f8a..e394b9913b5 100644 --- a/src/Interpreters/HashJoin/HashJoin.cpp +++ b/src/Interpreters/HashJoin/HashJoin.cpp @@ -1363,7 +1363,7 @@ bool HashJoin::needUsedFlagsForPerRightTableRow(std::shared_ptr table template void HashJoin::tryRerangeRightTableDataImpl(Map & map [[maybe_unused]]) { - constexpr JoinFeatures join_features; + constexpr JoinFeatures join_features; if constexpr (join_features.is_all_join && (join_features.left || join_features.inner)) { auto merge_rows_into_one_block = [&](BlocksList & blocks, RowRefList & rows_ref) @@ -1433,13 +1433,12 @@ void HashJoin::tryRerangeRightTableData() data->rows_to_join, data->keys_to_join, sample_block_with_columns_to_add.columns()); return; } - std::cout << "sort right table rows" << std::endl; joinDispatch( kind, strictness, data->maps.front(), + false, [&](auto kind_, auto strictness_, auto & map_) { tryRerangeRightTableDataImpl(map_); }); - std::cout << "sort right finished" << std::endl; data->sorted = true; } diff --git a/src/Interpreters/HashJoin/HashJoinMethods.h b/src/Interpreters/HashJoin/HashJoinMethods.h index 9d94c3f62c2..c5b54a62f36 100644 --- a/src/Interpreters/HashJoin/HashJoinMethods.h +++ b/src/Interpreters/HashJoin/HashJoinMethods.h @@ -83,7 +83,6 @@ public: const Block & block_with_columns_to_add, const MapsTemplateVector & maps_, bool is_join_get = false); - private: template static KeyGetter createKeyGetter(const ColumnRawPtrs & key_columns, const Sizes & key_sizes); @@ -121,142 +120,7 @@ private: std::vector && key_getter_vector, const std::vector & mapv, AddedColumns & added_columns, - JoinStuff::JoinUsedFlags & used_flags) - { - constexpr JoinFeatures join_features; - - size_t rows = added_columns.rows_to_add; - if constexpr (need_filter) - added_columns.filter = IColumn::Filter(rows, 0); - if constexpr (!flag_per_row && (STRICTNESS == JoinStrictness::All || (STRICTNESS == JoinStrictness::Semi && KIND == JoinKind::Right))) - added_columns.output_by_row_list = true; - - Arena pool; - - if constexpr (join_features.need_replication) - added_columns.offsets_to_replicate = std::make_unique(rows); - - IColumn::Offset current_offset = 0; - size_t max_joined_block_rows = added_columns.max_joined_block_rows; - size_t i = 0; - for (; i < rows; ++i) - { - if constexpr (join_features.need_replication) - { - if (unlikely(current_offset >= max_joined_block_rows)) - { - added_columns.offsets_to_replicate->resize_assume_reserved(i); - added_columns.filter.resize_assume_reserved(i); - break; - } - } - - bool right_row_found = false; - - KnownRowsHolder known_rows; - for (size_t onexpr_idx = 0; onexpr_idx < added_columns.join_on_keys.size(); ++onexpr_idx) - { - const auto & join_keys = added_columns.join_on_keys[onexpr_idx]; - if (join_keys.null_map && (*join_keys.null_map)[i]) - continue; - - bool row_acceptable = !join_keys.isRowFiltered(i); - using FindResult = typename KeyGetter::FindResult; - auto find_result = row_acceptable ? key_getter_vector[onexpr_idx].findKey(*(mapv[onexpr_idx]), i, pool) : FindResult(); - - if (find_result.isFound()) - { - right_row_found = true; - auto & mapped = find_result.getMapped(); - if constexpr (join_features.is_asof_join) - { - const IColumn & left_asof_key = added_columns.leftAsofKey(); - - auto row_ref = mapped->findAsof(left_asof_key, i); - if (row_ref && row_ref->block) - { - setUsed(added_columns.filter, i); - if constexpr (flag_per_row) - used_flags.template setUsed(row_ref->block, row_ref->row_num, 0); - else - used_flags.template setUsed(find_result); - - added_columns.appendFromBlock(row_ref, join_features.add_missing); - } - else - addNotFoundRow(added_columns, current_offset); - } - else if constexpr (join_features.is_all_join) - { - setUsed(added_columns.filter, i); - used_flags.template setUsed(find_result); - auto used_flags_opt = join_features.need_flags ? &used_flags : nullptr; - addFoundRowAll(mapped, added_columns, current_offset, known_rows, used_flags_opt); - } - else if constexpr ((join_features.is_any_join || join_features.is_semi_join) && join_features.right) - { - /// Use first appeared left key + it needs left columns replication - bool used_once = used_flags.template setUsedOnce(find_result); - if (used_once) - { - auto used_flags_opt = join_features.need_flags ? &used_flags : nullptr; - setUsed(added_columns.filter, i); - addFoundRowAll( - mapped, added_columns, current_offset, known_rows, used_flags_opt); - } - } - else if constexpr (join_features.is_any_join && KIND == JoinKind::Inner) - { - bool used_once = used_flags.template setUsedOnce(find_result); - - /// Use first appeared left key only - if (used_once) - { - setUsed(added_columns.filter, i); - added_columns.appendFromBlock(&mapped, join_features.add_missing); - } - - break; - } - else if constexpr (join_features.is_any_join && join_features.full) - { - /// TODO - } - else if constexpr (join_features.is_anti_join) - { - if constexpr (join_features.right && join_features.need_flags) - used_flags.template setUsed(find_result); - } - else /// ANY LEFT, SEMI LEFT, old ANY (RightAny) - { - setUsed(added_columns.filter, i); - used_flags.template setUsed(find_result); - added_columns.appendFromBlock(&mapped, join_features.add_missing); - - if (join_features.is_any_or_semi_join) - { - break; - } - } - } - } - - if (!right_row_found) - { - if constexpr (join_features.is_anti_join && join_features.left) - setUsed(added_columns.filter, i); - addNotFoundRow(added_columns, current_offset); - } - - if constexpr (join_features.need_replication) - { - (*added_columns.offsets_to_replicate)[i] = current_offset; - } - } - - added_columns.applyLazyDefaults(); - return i; - } + JoinStuff::JoinUsedFlags & used_flags); template static void setUsed(IColumn::Filter & filter [[maybe_unused]], size_t pos [[maybe_unused]]); @@ -266,120 +130,7 @@ private: size_t left_start_row, const std::vector & selected_rows, const std::vector & row_replicate_offset, - AddedColumns & added_columns) - { - ColumnPtr result_column; - do - { - if (selected_rows.empty()) - { - result_column = ColumnUInt8::create(); - break; - } - const Block & sample_right_block = *((*selected_rows.begin())->block); - if (!sample_right_block || !added_columns.additional_filter_expression) - { - auto filter = ColumnUInt8::create(); - filter->insertMany(1, selected_rows.size()); - result_column = std::move(filter); - break; - } - - auto required_cols = added_columns.additional_filter_expression->getRequiredColumnsWithTypes(); - if (required_cols.empty()) - { - Block block; - added_columns.additional_filter_expression->execute(block); - result_column = block.getByPosition(0).column->cloneResized(selected_rows.size()); - break; - } - NameSet required_column_names; - for (auto & col : required_cols) - required_column_names.insert(col.name); - - Block executed_block; - size_t right_col_pos = 0; - for (const auto & col : sample_right_block.getColumnsWithTypeAndName()) - { - if (required_column_names.contains(col.name)) - { - auto new_col = col.column->cloneEmpty(); - for (const auto & selected_row : selected_rows) - { - const auto & src_col = selected_row->block->getByPosition(right_col_pos); - new_col->insertFrom(*src_col.column, selected_row->row_num); - } - executed_block.insert({std::move(new_col), col.type, col.name}); - } - right_col_pos += 1; - } - if (!executed_block) - { - result_column = ColumnUInt8::create(); - break; - } - - for (const auto & col_name : required_column_names) - { - const auto * src_col = added_columns.left_block.findByName(col_name); - if (!src_col) - continue; - auto new_col = src_col->column->cloneEmpty(); - size_t prev_left_offset = 0; - for (size_t i = 1; i < row_replicate_offset.size(); ++i) - { - const size_t & left_offset = row_replicate_offset[i]; - size_t rows = left_offset - prev_left_offset; - if (rows) - new_col->insertManyFrom(*src_col->column, left_start_row + i - 1, rows); - prev_left_offset = left_offset; - } - executed_block.insert({std::move(new_col), src_col->type, col_name}); - } - if (!executed_block) - { - throw Exception( - ErrorCodes::LOGICAL_ERROR, - "required columns: [{}], but not found any in left/right table. right table: {}, left table: {}", - required_cols.toString(), - sample_right_block.dumpNames(), - added_columns.left_block.dumpNames()); - } - - for (const auto & col : executed_block.getColumnsWithTypeAndName()) - if (!col.column || !col.type) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Illegal nullptr column in input block: {}", executed_block.dumpStructure()); - - added_columns.additional_filter_expression->execute(executed_block); - result_column = executed_block.getByPosition(0).column->convertToFullColumnIfConst(); - executed_block.clear(); - } while (false); - - result_column = result_column->convertToFullIfNeeded(); - if (result_column->isNullable()) - { - /// Convert Nullable(UInt8) to UInt8 ensuring that nulls are zeros - /// Trying to avoid copying data, since we are the only owner of the column. - ColumnPtr mask_column = assert_cast(*result_column).getNullMapColumnPtr(); - - MutableColumnPtr mutable_column; - { - ColumnPtr nested_column = assert_cast(*result_column).getNestedColumnPtr(); - result_column.reset(); - mutable_column = IColumn::mutate(std::move(nested_column)); - } - - auto & column_data = assert_cast(*mutable_column).getData(); - const auto & mask_column_data = assert_cast(*mask_column).getData(); - for (size_t i = 0; i < column_data.size(); ++i) - { - if (mask_column_data[i]) - column_data[i] = 0; - } - return mutable_column; - } - return result_column; - } + AddedColumns & added_columns); /// First to collect all matched rows refs by join keys, then filter out rows which are not true in additional filter expression. template @@ -389,177 +140,7 @@ private: AddedColumns & added_columns, JoinStuff::JoinUsedFlags & used_flags [[maybe_unused]], bool need_filter [[maybe_unused]], - bool need_flags [[maybe_unused]], - bool add_missing [[maybe_unused]], - bool flag_per_row [[maybe_unused]]) - { - size_t left_block_rows = added_columns.rows_to_add; - if (need_filter) - added_columns.filter = IColumn::Filter(left_block_rows, 0); - - std::unique_ptr pool; - - if constexpr (need_replication) - added_columns.offsets_to_replicate = std::make_unique(left_block_rows); - - std::vector row_replicate_offset; - row_replicate_offset.reserve(left_block_rows); - - using FindResult = typename KeyGetter::FindResult; - size_t max_joined_block_rows = added_columns.max_joined_block_rows; - size_t left_row_iter = 0; - PreSelectedRows selected_rows; - selected_rows.reserve(left_block_rows); - std::vector find_results; - find_results.reserve(left_block_rows); - bool exceeded_max_block_rows = false; - IColumn::Offset total_added_rows = 0; - IColumn::Offset current_added_rows = 0; - - auto collect_keys_matched_rows_refs = [&]() - { - pool = std::make_unique(); - find_results.clear(); - row_replicate_offset.clear(); - row_replicate_offset.push_back(0); - current_added_rows = 0; - selected_rows.clear(); - for (; left_row_iter < left_block_rows; ++left_row_iter) - { - if constexpr (need_replication) - { - if (unlikely(total_added_rows + current_added_rows >= max_joined_block_rows)) - { - break; - } - } - KnownRowsHolder all_flag_known_rows; - KnownRowsHolder single_flag_know_rows; - for (size_t join_clause_idx = 0; join_clause_idx < added_columns.join_on_keys.size(); ++join_clause_idx) - { - const auto & join_keys = added_columns.join_on_keys[join_clause_idx]; - if (join_keys.null_map && (*join_keys.null_map)[left_row_iter]) - continue; - - bool row_acceptable = !join_keys.isRowFiltered(left_row_iter); - auto find_result = row_acceptable - ? key_getter_vector[join_clause_idx].findKey(*(mapv[join_clause_idx]), left_row_iter, *pool) - : FindResult(); - - if (find_result.isFound()) - { - auto & mapped = find_result.getMapped(); - find_results.push_back(find_result); - if (flag_per_row) - addFoundRowAll(mapped, selected_rows, current_added_rows, all_flag_known_rows, nullptr); - else - addFoundRowAll(mapped, selected_rows, current_added_rows, single_flag_know_rows, nullptr); - } - } - row_replicate_offset.push_back(current_added_rows); - } - }; - - auto copy_final_matched_rows = [&](size_t left_start_row, ColumnPtr filter_col) - { - const PaddedPODArray & filter_flags = assert_cast(*filter_col).getData(); - - size_t prev_replicated_row = 0; - auto selected_right_row_it = selected_rows.begin(); - size_t find_result_index = 0; - for (size_t i = 1, n = row_replicate_offset.size(); i < n; ++i) - { - bool any_matched = false; - /// For all right join, flag_per_row is true, we need mark used flags for each row. - if (flag_per_row) - { - for (size_t replicated_row = prev_replicated_row; replicated_row < row_replicate_offset[i]; ++replicated_row) - { - if (filter_flags[replicated_row]) - { - any_matched = true; - added_columns.appendFromBlock(*selected_right_row_it, add_missing); - total_added_rows += 1; - if (need_flags) - used_flags.template setUsed((*selected_right_row_it)->block, (*selected_right_row_it)->row_num, 0); - } - ++selected_right_row_it; - } - } - else - { - for (size_t replicated_row = prev_replicated_row; replicated_row < row_replicate_offset[i]; ++replicated_row) - { - if (filter_flags[replicated_row]) - { - any_matched = true; - added_columns.appendFromBlock(*selected_right_row_it, add_missing); - total_added_rows += 1; - } - ++selected_right_row_it; - } - } - if (!any_matched) - { - if (add_missing) - addNotFoundRow(added_columns, total_added_rows); - else - addNotFoundRow(added_columns, total_added_rows); - } - else - { - if (!flag_per_row && need_flags) - used_flags.template setUsed(find_results[find_result_index]); - if (need_filter) - setUsed(added_columns.filter, left_start_row + i - 1); - if (add_missing) - added_columns.applyLazyDefaults(); - } - find_result_index += (prev_replicated_row != row_replicate_offset[i]); - - if constexpr (need_replication) - { - (*added_columns.offsets_to_replicate)[left_start_row + i - 1] = total_added_rows; - } - prev_replicated_row = row_replicate_offset[i]; - } - }; - - while (left_row_iter < left_block_rows && !exceeded_max_block_rows) - { - auto left_start_row = left_row_iter; - collect_keys_matched_rows_refs(); - if (selected_rows.size() != current_added_rows || row_replicate_offset.size() != left_row_iter - left_start_row + 1) - { - throw Exception( - ErrorCodes::LOGICAL_ERROR, - "Sizes are mismatched. selected_rows.size:{}, current_added_rows:{}, row_replicate_offset.size:{}, left_row_iter: {}, " - "left_start_row: {}", - selected_rows.size(), - current_added_rows, - row_replicate_offset.size(), - left_row_iter, - left_start_row); - } - auto filter_col = buildAdditionalFilter(left_start_row, selected_rows, row_replicate_offset, added_columns); - copy_final_matched_rows(left_start_row, filter_col); - - if constexpr (need_replication) - { - // Add a check for current_added_rows to avoid run the filter expression on too small size batch. - if (total_added_rows >= max_joined_block_rows || current_added_rows < 1024) - exceeded_max_block_rows = true; - } - } - - if constexpr (need_replication) - { - added_columns.offsets_to_replicate->resize_assume_reserved(left_row_iter); - added_columns.filter.resize_assume_reserved(left_row_iter); - } - added_columns.applyLazyDefaults(); - return left_row_iter; - } + bool flag_per_row [[maybe_unused]]); /// Cut first num_rows rows from block in place and returns block with remaining rows static Block sliceBlock(Block & block, size_t num_rows); @@ -617,4 +198,3 @@ extern template class HashJoinMethods; extern template class HashJoinMethods; } - diff --git a/tests/performance/all_join_opt.xml b/tests/performance/all_join_opt.xml index 0ab9c39f67c..2ecd76ee976 100644 --- a/tests/performance/all_join_opt.xml +++ b/tests/performance/all_join_opt.xml @@ -5,10 +5,10 @@ INSERT INTO test SELECT number % 10000, number % 10000, number % 10000 FROM numbers(10000000) INSERT INTO test1 SELECT number % 1000 , number % 1000, number % 1000 FROM numbers(100000) - SELECT MAX(test1.a) FROM test INNER JOIN test1 on test.b = test1.b - SELECT MAX(test1.a) FROM test LEFT JOIN test1 on test.b = test1.b - SELECT MAX(test1.a) FROM test RIGHT JOIN test1 on test.b = test1.b - SELECT MAX(test1.a) FROM test FULL JOIN test1 on test.b = test1.b + SELECT MAX(test1.a) FROM test INNER JOIN test1 on test.b = test1.b SETTINGS join_to_sort_table_rows_threshold=100000 + SELECT MAX(test1.a) FROM test LEFT JOIN test1 on test.b = test1.b SETTINGS join_to_sort_table_rows_threshold=100000 + SELECT MAX(test1.a) FROM test RIGHT JOIN test1 on test.b = test1.b SETTINGS join_to_sort_table_rows_threshold=100000 + SELECT MAX(test1.a) FROM test FULL JOIN test1 on test.b = test1.b SETTINGS join_to_sort_table_rows_threshold=100000 DROP TABLE IF EXISTS test DROP TABLE IF EXISTS test1 From b8e967ff9c5b03dcf9376ad316f0a73af133cb90 Mon Sep 17 00:00:00 2001 From: kevinyhzou Date: Tue, 13 Aug 2024 09:41:19 +0800 Subject: [PATCH 367/844] add allowReadCaseInsentitive func --- src/Processors/Formats/IRowInputFormat.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/Processors/Formats/IRowInputFormat.h b/src/Processors/Formats/IRowInputFormat.h index f8796df8604..07014bec452 100644 --- a/src/Processors/Formats/IRowInputFormat.h +++ b/src/Processors/Formats/IRowInputFormat.h @@ -68,6 +68,8 @@ protected: virtual bool allowSyncAfterError() const { return false; } virtual void syncAfterError(); + virtual bool allReadColumnCaseInsensitive() const { return false; } + /// In case of parse error, try to roll back and parse last one or two rows very carefully /// and collect as much as possible diagnostic information about error. /// If not implemented, returns empty string. From cfa4ca6fb122580b98e4f4630dc14fb047ba6ccb Mon Sep 17 00:00:00 2001 From: kevinyhzou Date: Tue, 13 Aug 2024 09:46:53 +0800 Subject: [PATCH 368/844] remove useless code --- src/Processors/Formats/IRowInputFormat.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/Processors/Formats/IRowInputFormat.h b/src/Processors/Formats/IRowInputFormat.h index 07014bec452..f8796df8604 100644 --- a/src/Processors/Formats/IRowInputFormat.h +++ b/src/Processors/Formats/IRowInputFormat.h @@ -68,8 +68,6 @@ protected: virtual bool allowSyncAfterError() const { return false; } virtual void syncAfterError(); - virtual bool allReadColumnCaseInsensitive() const { return false; } - /// In case of parse error, try to roll back and parse last one or two rows very carefully /// and collect as much as possible diagnostic information about error. /// If not implemented, returns empty string. From 21e64f2aa98fbf381971ef855d6de11a7e062c39 Mon Sep 17 00:00:00 2001 From: "Sergey (Finn) Gnezdilov" <72432693+Sergey2Gnezdilov@users.noreply.github.com> Date: Tue, 20 Aug 2024 11:33:13 +0300 Subject: [PATCH 369/844] Update nats.md "kafka_handle_error_mode" fixed on "nats_handle_error_mode" --- docs/en/engines/table-engines/integrations/nats.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/engines/table-engines/integrations/nats.md b/docs/en/engines/table-engines/integrations/nats.md index 78ce537224c..2ea2fdb01ae 100644 --- a/docs/en/engines/table-engines/integrations/nats.md +++ b/docs/en/engines/table-engines/integrations/nats.md @@ -167,7 +167,7 @@ If you want to change the target table by using `ALTER`, we recommend disabling - `_subject` - NATS message subject. Data type: `String`. -Additional virtual columns when `kafka_handle_error_mode='stream'`: +Additional virtual columns when `nats_handle_error_mode='stream'`: - `_raw_message` - Raw message that couldn't be parsed successfully. Data type: `Nullable(String)`. - `_error` - Exception message happened during failed parsing. Data type: `Nullable(String)`. From 2fcbe2465a44708fc500a3f6123dd575f2f07e5c Mon Sep 17 00:00:00 2001 From: vdimir Date: Tue, 20 Aug 2024 09:01:43 +0000 Subject: [PATCH 370/844] Fix style in Functions/printf.cpp --- src/Functions/printf.cpp | 18 ++++++++---------- ..._new_functions_must_be_documented.reference | 1 - 2 files changed, 8 insertions(+), 11 deletions(-) diff --git a/src/Functions/printf.cpp b/src/Functions/printf.cpp index 3cf3efaf534..afacf164a00 100644 --- a/src/Functions/printf.cpp +++ b/src/Functions/printf.cpp @@ -50,13 +50,6 @@ private: return executeNonconstant(input); } - [[maybe_unused]] String toString() const - { - WriteBufferFromOwnString buf; - buf << "format:" << format << ", rows:" << rows << ", is_literal:" << is_literal << ", input:" << input.dumpStructure() << "\n"; - return buf.str(); - } - private: ColumnWithTypeAndName executeLiteral(std::string_view literal) const { @@ -231,9 +224,7 @@ public: const auto & instruction = instructions[i]; try { - // std::cout << "instruction[" << i << "]:" << instructions[i].toString() << std::endl; concat_args[i] = instruction.execute(); - // std::cout << "concat_args[" << i << "]:" << concat_args[i].dumpStructure() << std::endl; } catch (const fmt::v9::format_error & e) { @@ -358,7 +349,14 @@ private: REGISTER_FUNCTION(Printf) { - factory.registerFunction(); + factory.registerFunction( + FunctionDocumentation{.description=R"( +The `printf` function formats the given string with the values (strings, integers, floating-points etc.) listed in the arguments, similar to printf function in C++. +The format string can contain format specifiers starting with `%` character. +Anything not contained in `%` and the following format specifier is considered literal text and copied verbatim into the output. +Literal `%` character can be escaped by `%%`.)", .examples{{"sum", "select printf('%%%s %s %d', 'Hello', 'World', 2024);", "%Hello World 2024"}}, .categories{"String"} +}); + } } diff --git a/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference b/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference index c39f1fb1ce9..8dd8910c858 100644 --- a/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference +++ b/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference @@ -562,7 +562,6 @@ positionCaseInsensitive positionCaseInsensitiveUTF8 positionUTF8 pow -printf proportionsZTest protocol queryID From add486b62a45a615b1d1c2ee08a945d08b984943 Mon Sep 17 00:00:00 2001 From: kevinyhzou Date: Tue, 20 Aug 2024 17:33:08 +0800 Subject: [PATCH 371/844] rebase and reslove conflict --- src/Core/SettingsChangesHistory.cpp | 2 +- src/Interpreters/HashJoin/AddedColumns.cpp | 82 +++++----------------- src/Interpreters/HashJoin/AddedColumns.h | 8 +-- src/Interpreters/HashJoin/HashJoin.cpp | 4 +- src/Interpreters/TableJoin.cpp | 2 + src/Interpreters/TableJoin.h | 4 ++ tests/performance/all_join_opt.xml | 8 +-- 7 files changed, 33 insertions(+), 77 deletions(-) diff --git a/src/Core/SettingsChangesHistory.cpp b/src/Core/SettingsChangesHistory.cpp index 9ddf40e87b1..392f0dbc2ee 100644 --- a/src/Core/SettingsChangesHistory.cpp +++ b/src/Core/SettingsChangesHistory.cpp @@ -94,7 +94,7 @@ static std::initializer_list::buildOutput() {} template<> void AddedColumns::buildJoinGetOutput() {} -<<<<<<< HEAD template<> template void AddedColumns::buildOutputFromBlocks() {} -======= ->>>>>>> add threshold for table rows template<> void AddedColumns::buildOutput() @@ -35,15 +32,9 @@ void AddedColumns::buildOutput() buildOutputFromBlocks(); else { -<<<<<<< HEAD if (join_data_avg_perkey_rows < output_by_row_list_threshold) buildOutputFromBlocks(); - else -======= - if (join_data_avg_perkey_rows < sort_right_perkey_rows_threshold) - buildOutputFromBlocks(); else if (join_data_sorted) ->>>>>>> add threshold for table rows { for (size_t i = 0; i < this->size(); ++i) { @@ -53,19 +44,31 @@ void AddedColumns::buildOutput() if (row_ref_i) { const RowRefList * row_ref_list = reinterpret_cast(row_ref_i); -<<<<<<< HEAD - for (auto it = row_ref_list->begin(); it.ok(); ++it) - col->insertFrom(*it->block->getByPosition(right_indexes[i]).column, it->row_num); -======= col->insertRangeFrom(*row_ref_list->block->getByPosition(right_indexes[i]).column, row_ref_list->row_num, row_ref_list->rows); ->>>>>>> add threshold for table rows } else type_name[i].type->insertDefaultInto(*col); } } } -<<<<<<< HEAD + else + { + for (size_t i = 0; i < this->size(); ++i) + { + auto & col = columns[i]; + for (auto row_ref_i : lazy_output.row_refs) + { + if (row_ref_i) + { + const RowRefList * row_ref_list = reinterpret_cast(row_ref_i); + for (auto it = row_ref_list->begin(); it.ok(); ++it) + col->insertFrom(*it->block->getByPosition(right_indexes[i]).column, it->row_num); + } + else + type_name[i].type->insertDefaultInto(*col); + } + } + } } } @@ -88,25 +91,6 @@ void AddedColumns::buildJoinGetOutput() nullable_col->insertFromNotNullable(*column_from_block.column, row_ref->row_num); else col->insertFrom(*column_from_block.column, row_ref->row_num); -======= - else - { - for (size_t i = 0; i < this->size(); ++i) - { - auto & col = columns[i]; - for (auto row_ref_i : lazy_output.row_refs) - { - if (row_ref_i) - { - const RowRefList * row_ref_list = reinterpret_cast(row_ref_i); - for (auto it = row_ref_list->begin(); it.ok(); ++it) - col->insertFrom(*it->block->getByPosition(right_indexes[i]).column, it->row_num); - } - else - type_name[i].type->insertDefaultInto(*col); - } - } ->>>>>>> add threshold for table rows } } } @@ -115,11 +99,7 @@ template<> template void AddedColumns::buildOutputFromBlocks() { -<<<<<<< HEAD if (this->size() == 0) -======= - if (this->size() == 0) ->>>>>>> add threshold for table rows return; std::vector blocks; std::vector row_nums; @@ -160,32 +140,6 @@ void AddedColumns::buildOutputFromBlocks() col->insertFrom(*blocks[j]->getByPosition(right_indexes[i]).column, row_nums[j]); else type_name[i].type->insertDefaultInto(*col); -<<<<<<< HEAD -======= - } - } -} - -template<> -void AddedColumns::buildJoinGetOutput() -{ - for (size_t i = 0; i < this->size(); ++i) - { - auto & col = columns[i]; - for (auto row_ref_i : lazy_output.row_refs) - { - if (!row_ref_i) - { - type_name[i].type->insertDefaultInto(*col); - continue; - } - const auto * row_ref = reinterpret_cast(row_ref_i); - const auto & column_from_block = row_ref->block->getByPosition(right_indexes[i]); - if (auto * nullable_col = typeid_cast(col.get()); nullable_col && !column_from_block.column->isNullable()) - nullable_col->insertFromNotNullable(*column_from_block.column, row_ref->row_num); - else - col->insertFrom(*column_from_block.column, row_ref->row_num); ->>>>>>> add threshold for table rows } } } diff --git a/src/Interpreters/HashJoin/AddedColumns.h b/src/Interpreters/HashJoin/AddedColumns.h index 5ae69fbbf66..3f90b215602 100644 --- a/src/Interpreters/HashJoin/AddedColumns.h +++ b/src/Interpreters/HashJoin/AddedColumns.h @@ -115,6 +115,7 @@ public: } join_data_avg_perkey_rows = join.getJoinedData()->avgPerKeyRows(); output_by_row_list_threshold = join.getTableJoin().outputByRowListPerkeyRowsThreshold(); + join_data_sorted = join.getJoinedData()->sorted; } size_t size() const { return columns.size(); } @@ -147,6 +148,7 @@ public: std::unique_ptr offsets_to_replicate; bool need_filter = false; bool output_by_row_list = false; + bool join_data_sorted = false; size_t join_data_avg_perkey_rows = 0; size_t output_by_row_list_threshold = 0; IColumn::Filter filter; @@ -196,12 +198,6 @@ private: } } - /** Build output from the blocks that extract from `RowRef` or `RowRefList`, to avoid block cache miss which may cause performance slow down. - * And This problem would happen it we directly build output from `RowRef` or `RowRefList`. - */ - template - void buildOutputFromBlocks(); - MutableColumns columns; bool is_join_get; std::vector right_indexes; diff --git a/src/Interpreters/HashJoin/HashJoin.cpp b/src/Interpreters/HashJoin/HashJoin.cpp index e394b9913b5..59888d7a71d 100644 --- a/src/Interpreters/HashJoin/HashJoin.cpp +++ b/src/Interpreters/HashJoin/HashJoin.cpp @@ -1422,12 +1422,12 @@ void HashJoin::tryRerangeRightTableData() if ((kind != JoinKind::Inner && kind != JoinKind::Left) || strictness != JoinStrictness::All || table_join->getMixedJoinExpression()) return; - if (!data || data->sorted || data->blocks.empty() || data->maps.size() > 1) + if (!data || data->sorted || data->blocks.empty() || data->maps.size() > 1 || data->rows_to_join > table_join->sortRightTableRowsThreshold() || data->avgPerKeyRows() < table_join->sortRightPerkeyRowsThreshold()) return; if (data->keys_to_join == 0) data->keys_to_join = getTotalRowCount(); - if (sample_block_with_columns_to_add.columns() == 0 || data->rows_to_join > table_join->sortRightTableRowsThreshold() || data->avgPerKeyRows() < table_join->sortRightPerkeyRowsThreshold()) + if (sample_block_with_columns_to_add.columns() == 0) { LOG_DEBUG(log, "The joined right table total rows :{}, total keys :{}, columns added:{}", data->rows_to_join, data->keys_to_join, sample_block_with_columns_to_add.columns()); diff --git a/src/Interpreters/TableJoin.cpp b/src/Interpreters/TableJoin.cpp index 138085f0710..8bcaef77939 100644 --- a/src/Interpreters/TableJoin.cpp +++ b/src/Interpreters/TableJoin.cpp @@ -116,6 +116,8 @@ TableJoin::TableJoin(const Settings & settings, VolumePtr tmp_volume_, Temporary , max_files_to_merge(settings.join_on_disk_max_files_to_merge) , temporary_files_codec(settings.temporary_files_codec) , output_by_rowlist_perkey_rows_threshold(settings.join_output_by_rowlist_perkey_rows_threshold) + , sort_right_perkey_rows_threshold(settings.join_to_sort_perkey_rows_threshold) + , sort_right_table_rows_threshold(settings.join_to_sort_table_rows_threshold) , max_memory_usage(settings.max_memory_usage) , tmp_volume(tmp_volume_) , tmp_data(tmp_data_) diff --git a/src/Interpreters/TableJoin.h b/src/Interpreters/TableJoin.h index 4d626084d81..09d7f0f2b2a 100644 --- a/src/Interpreters/TableJoin.h +++ b/src/Interpreters/TableJoin.h @@ -149,6 +149,8 @@ private: const size_t max_files_to_merge = 0; const String temporary_files_codec = "LZ4"; const size_t output_by_rowlist_perkey_rows_threshold = 0; + const size_t sort_right_perkey_rows_threshold = 0; + const size_t sort_right_table_rows_threshold = 0; /// Value if setting max_memory_usage for query, can be used when max_bytes_in_join is not specified. size_t max_memory_usage = 0; @@ -297,6 +299,8 @@ public: } size_t outputByRowListPerkeyRowsThreshold() const { return output_by_rowlist_perkey_rows_threshold; } + size_t sortRightPerkeyRowsThreshold() const { return sort_right_perkey_rows_threshold; } + size_t sortRightTableRowsThreshold() const { return sort_right_table_rows_threshold; } size_t defaultMaxBytes() const { return default_max_bytes; } size_t maxJoinedBlockRows() const { return max_joined_block_rows; } size_t maxRowsInRightBlock() const { return partial_merge_join_rows_in_right_blocks; } diff --git a/tests/performance/all_join_opt.xml b/tests/performance/all_join_opt.xml index 2ecd76ee976..0ab9c39f67c 100644 --- a/tests/performance/all_join_opt.xml +++ b/tests/performance/all_join_opt.xml @@ -5,10 +5,10 @@ INSERT INTO test SELECT number % 10000, number % 10000, number % 10000 FROM numbers(10000000) INSERT INTO test1 SELECT number % 1000 , number % 1000, number % 1000 FROM numbers(100000) - SELECT MAX(test1.a) FROM test INNER JOIN test1 on test.b = test1.b SETTINGS join_to_sort_table_rows_threshold=100000 - SELECT MAX(test1.a) FROM test LEFT JOIN test1 on test.b = test1.b SETTINGS join_to_sort_table_rows_threshold=100000 - SELECT MAX(test1.a) FROM test RIGHT JOIN test1 on test.b = test1.b SETTINGS join_to_sort_table_rows_threshold=100000 - SELECT MAX(test1.a) FROM test FULL JOIN test1 on test.b = test1.b SETTINGS join_to_sort_table_rows_threshold=100000 + SELECT MAX(test1.a) FROM test INNER JOIN test1 on test.b = test1.b + SELECT MAX(test1.a) FROM test LEFT JOIN test1 on test.b = test1.b + SELECT MAX(test1.a) FROM test RIGHT JOIN test1 on test.b = test1.b + SELECT MAX(test1.a) FROM test FULL JOIN test1 on test.b = test1.b DROP TABLE IF EXISTS test DROP TABLE IF EXISTS test1 From 27ee4dd61168795623fd07fafe240c85fa8b5e5a Mon Sep 17 00:00:00 2001 From: Arthur Passos Date: Tue, 20 Aug 2024 14:17:05 -0300 Subject: [PATCH 372/844] throw syntax error instead of bad arguments in case of add not identified --- src/Parsers/Access/ParserCreateUserQuery.cpp | 26 ++++++++++++------- ..._multiple_authentication_methods.reference | 2 ++ .../03174_multiple_authentication_methods.sh | 3 +++ 3 files changed, 22 insertions(+), 9 deletions(-) diff --git a/src/Parsers/Access/ParserCreateUserQuery.cpp b/src/Parsers/Access/ParserCreateUserQuery.cpp index 0ef658d2377..cbd0612d246 100644 --- a/src/Parsers/Access/ParserCreateUserQuery.cpp +++ b/src/Parsers/Access/ParserCreateUserQuery.cpp @@ -223,14 +223,6 @@ namespace { return IParserBase::wrapParseImpl(pos, [&] { - if (ParserKeyword{Keyword::NOT_IDENTIFIED}.ignore(pos, expected)) - { - authentication_methods.emplace_back(std::make_shared()); - authentication_methods.back()->type = AuthenticationType::NO_PASSWORD; - - return true; - } - if (!ParserKeyword{Keyword::IDENTIFIED}.ignore(pos, expected)) return false; @@ -269,6 +261,22 @@ namespace }); } + bool parseIdentifiedOrNotIdentified(IParserBase::Pos & pos, Expected & expected, std::vector> & authentication_methods) + { + return IParserBase::wrapParseImpl(pos, [&] + { + if (ParserKeyword{Keyword::NOT_IDENTIFIED}.ignore(pos, expected)) + { + authentication_methods.emplace_back(std::make_shared()); + authentication_methods.back()->type = AuthenticationType::NO_PASSWORD; + + return true; + } + + return parseIdentifiedWith(pos, expected, authentication_methods); + }); + } + bool parseHostsWithoutPrefix(IParserBase::Pos & pos, Expected & expected, AllowedClientHosts & hosts) { @@ -545,7 +553,7 @@ bool ParserCreateUserQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expec { if (auth_data.empty()) { - parsed_identified_with = parseIdentifiedWith(pos, expected, auth_data); + parsed_identified_with = parseIdentifiedOrNotIdentified(pos, expected, auth_data); if (!parsed_identified_with) { diff --git a/tests/queries/0_stateless/03174_multiple_authentication_methods.reference b/tests/queries/0_stateless/03174_multiple_authentication_methods.reference index 8cdc1f9d613..cd8dde2a8a4 100644 --- a/tests/queries/0_stateless/03174_multiple_authentication_methods.reference +++ b/tests/queries/0_stateless/03174_multiple_authentication_methods.reference @@ -53,3 +53,5 @@ First auth method can't specify type if WITH keyword is not present SYNTAX_ERROR RESET AUTHENTICATION METHODS TO NEW can only be used on alter statement BAD_ARGUMENTS +ADD NOT IDENTIFIED should result in syntax error +SYNTAX_ERROR diff --git a/tests/queries/0_stateless/03174_multiple_authentication_methods.sh b/tests/queries/0_stateless/03174_multiple_authentication_methods.sh index 5e4c5048d55..e25bbbabb50 100755 --- a/tests/queries/0_stateless/03174_multiple_authentication_methods.sh +++ b/tests/queries/0_stateless/03174_multiple_authentication_methods.sh @@ -147,4 +147,7 @@ ${CLICKHOUSE_CLIENT} --query "CREATE USER ${user} IDENTIFIED plaintext_password echo "RESET AUTHENTICATION METHODS TO NEW can only be used on alter statement" ${CLICKHOUSE_CLIENT} --query "CREATE USER ${user} RESET AUTHENTICATION METHODS TO NEW" 2>&1 | grep -m1 -o "BAD_ARGUMENTS" +echo "ADD NOT IDENTIFIED should result in syntax error" +${CLICKHOUSE_CLIENT} --query "ALTER USER ${user} ADD NOT IDENTIFIED" 2>&1 | grep -m1 -o "SYNTAX_ERROR" + ${CLICKHOUSE_CLIENT} --query "DROP USER IF EXISTS ${user}" From 1980959c8b032841763acff7d5ad9757cb636e10 Mon Sep 17 00:00:00 2001 From: Arthur Passos Date: Tue, 20 Aug 2024 14:27:53 -0300 Subject: [PATCH 373/844] exception message --- src/Parsers/Access/ParserCreateUserQuery.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Parsers/Access/ParserCreateUserQuery.cpp b/src/Parsers/Access/ParserCreateUserQuery.cpp index cbd0612d246..b56dda79875 100644 --- a/src/Parsers/Access/ParserCreateUserQuery.cpp +++ b/src/Parsers/Access/ParserCreateUserQuery.cpp @@ -650,12 +650,12 @@ bool ParserCreateUserQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expec if (has_no_password_authentication_method && auth_data.size() > 1) { - throw Exception(ErrorCodes::BAD_ARGUMENTS, "NO_PASSWORD Authentication method cannot co-exist with other authentication methods"); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Authentication method 'no_password' cannot co-exist with other authentication methods."); } if (has_no_password_authentication_method && parsed_add_identified_with) { - throw Exception(ErrorCodes::BAD_ARGUMENTS, "The authentication method 'no_password' cannot be used with the ADD keyword. " + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Authentication method 'no_password' cannot co-exist with other authentication methods. " "Use 'ALTER USER xyz IDENTIFIED WITH no_password' to replace existing authentication methods"); } From f0223aeddef16c041d16f3548dc7573e79fa2ced Mon Sep 17 00:00:00 2001 From: Arthur Passos Date: Tue, 20 Aug 2024 16:28:12 -0300 Subject: [PATCH 374/844] add on cluster tests --- ..._multiple_authentication_methods.reference | 75 ++++++++ .../03174_multiple_authentication_methods.sh | 166 +++++++++--------- 2 files changed, 162 insertions(+), 79 deletions(-) diff --git a/tests/queries/0_stateless/03174_multiple_authentication_methods.reference b/tests/queries/0_stateless/03174_multiple_authentication_methods.reference index cd8dde2a8a4..f45d371f156 100644 --- a/tests/queries/0_stateless/03174_multiple_authentication_methods.reference +++ b/tests/queries/0_stateless/03174_multiple_authentication_methods.reference @@ -55,3 +55,78 @@ RESET AUTHENTICATION METHODS TO NEW can only be used on alter statement BAD_ARGUMENTS ADD NOT IDENTIFIED should result in syntax error SYNTAX_ERROR +localhost 9000 0 0 0 +localhost 9000 0 0 0 +Basic authentication after user creation +1 +localhost 9000 0 0 0 +Changed password, old password should not work +AUTHENTICATION_FAILED +New password should work +1 +localhost 9000 0 0 0 +Two new passwords were added, should both work +1 +1 +localhost 9000 0 0 0 +Authenticating with ssh key +1 +Altering credentials and keeping only bcrypt_password +localhost 9000 0 0 0 +Asserting SSH does not work anymore +AUTHENTICATION_FAILED +Asserting bcrypt_password works +1 +Adding new bcrypt_password +localhost 9000 0 0 0 +Both current authentication methods should work +1 +1 +Reset authentication methods to new +localhost 9000 0 0 0 +Only the latest should work, below should fail +AUTHENTICATION_FAILED +Should work +1 +Multiple identified with, not allowed +Syntax error +localhost 9000 0 0 0 +CREATE Multiple identified with, not allowed +Syntax error +localhost 9000 0 0 0 +Create user with no identification +localhost 9000 0 0 0 +Add identified with +localhost 9000 0 0 0 +CREATE USER u01_03174 IDENTIFIED WITH plaintext_password +Try to provide no_password mixed with other authentication methods, should not be allowed +BAD_ARGUMENTS +Adding no_password, should fail +BAD_ARGUMENTS +CREATE USER u01_03174 IDENTIFIED WITH plaintext_password +Replacing existing authentication methods in favor of no_password, should succeed +localhost 9000 0 0 0 +CREATE USER u01_03174 IDENTIFIED WITH no_password +Trying to auth with no pwd, should succeed +1 +localhost 9000 0 0 0 +Create user with mix both implicit and explicit auth type, starting with with +localhost 9000 0 0 0 +CREATE USER u01_03174 IDENTIFIED WITH plaintext_password, sha256_password, bcrypt_password, sha256_password +localhost 9000 0 0 0 +Create user with mix both implicit and explicit auth type, starting with by +localhost 9000 0 0 0 +CREATE USER u01_03174 IDENTIFIED WITH sha256_password, plaintext_password, bcrypt_password, sha256_password +localhost 9000 0 0 0 +Use WITH without providing authentication type, should fail +Syntax error +Create user with ADD identification, should fail, add is not allowed for create query +BAD_ARGUMENTS +Trailing comma should result in syntax error +SYNTAX_ERROR +First auth method can't specify type if WITH keyword is not present +SYNTAX_ERROR +RESET AUTHENTICATION METHODS TO NEW can only be used on alter statement +BAD_ARGUMENTS +ADD NOT IDENTIFIED should result in syntax error +SYNTAX_ERROR diff --git a/tests/queries/0_stateless/03174_multiple_authentication_methods.sh b/tests/queries/0_stateless/03174_multiple_authentication_methods.sh index e25bbbabb50..4c9914b0cda 100755 --- a/tests/queries/0_stateless/03174_multiple_authentication_methods.sh +++ b/tests/queries/0_stateless/03174_multiple_authentication_methods.sh @@ -28,126 +28,134 @@ function test_login_pwd_expect_error test_login_pwd "$1" "$2" 2>&1 | grep -m1 -o 'AUTHENTICATION_FAILED' } -user="u01_03174" +function test +{ + user="u01_03174" -${CLICKHOUSE_CLIENT} --query "DROP USER IF EXISTS ${user}" + ${CLICKHOUSE_CLIENT} --query "DROP USER IF EXISTS ${user} $1" -${CLICKHOUSE_CLIENT} --query "CREATE USER ${user} IDENTIFIED WITH plaintext_password BY '1'" + ${CLICKHOUSE_CLIENT} --query "CREATE USER ${user} $1 IDENTIFIED WITH plaintext_password BY '1'" -echo "Basic authentication after user creation" -test_login_pwd ${user} '1' + echo "Basic authentication after user creation" + test_login_pwd ${user} '1' -${CLICKHOUSE_CLIENT} --query "ALTER USER ${user} IDENTIFIED WITH plaintext_password BY '2'" + ${CLICKHOUSE_CLIENT} --query "ALTER USER ${user} $1 IDENTIFIED WITH plaintext_password BY '2'" -echo "Changed password, old password should not work" -test_login_pwd_expect_error ${user} '1' + echo "Changed password, old password should not work" + test_login_pwd_expect_error ${user} '1' -echo "New password should work" -test_login_pwd ${user} '2' + echo "New password should work" + test_login_pwd ${user} '2' -${CLICKHOUSE_CLIENT} --query "ALTER USER ${user} ADD IDENTIFIED WITH plaintext_password BY '3', plaintext_password BY '4'" + ${CLICKHOUSE_CLIENT} --query "ALTER USER ${user} $1 ADD IDENTIFIED WITH plaintext_password BY '3', plaintext_password BY '4'" -echo "Two new passwords were added, should both work" -test_login_pwd ${user} '3' + echo "Two new passwords were added, should both work" + test_login_pwd ${user} '3' -test_login_pwd ${user} '4' + test_login_pwd ${user} '4' -ssh_pub_key="AAAAC3NzaC1lZDI1NTE5AAAAIBzqa3duS0ce6QYkzUgko9W0Ux7i7d3xPoseFrwnhY4Y" + ssh_pub_key="AAAAC3NzaC1lZDI1NTE5AAAAIBzqa3duS0ce6QYkzUgko9W0Ux7i7d3xPoseFrwnhY4Y" -${CLICKHOUSE_CLIENT} --query "ALTER USER ${user} ADD IDENTIFIED WITH ssh_key BY KEY '${ssh_pub_key}' TYPE 'ssh-ed25519'" + ${CLICKHOUSE_CLIENT} --query "ALTER USER ${user} $1 ADD IDENTIFIED WITH ssh_key BY KEY '${ssh_pub_key}' TYPE 'ssh-ed25519'" -echo ${ssh_key} > ssh_key + echo ${ssh_key} > ssh_key -echo "Authenticating with ssh key" -${CLICKHOUSE_CLIENT} --user ${user} --ssh-key-file 'ssh_key' --ssh-key-passphrase "" --query "SELECT 1" + echo "Authenticating with ssh key" + ${CLICKHOUSE_CLIENT} --user ${user} --ssh-key-file 'ssh_key' --ssh-key-passphrase "" --query "SELECT 1" -echo "Altering credentials and keeping only bcrypt_password" -${CLICKHOUSE_CLIENT} --query "ALTER USER ${user} IDENTIFIED WITH bcrypt_password BY '5'" + echo "Altering credentials and keeping only bcrypt_password" + ${CLICKHOUSE_CLIENT} --query "ALTER USER ${user} $1 IDENTIFIED WITH bcrypt_password BY '5'" -echo "Asserting SSH does not work anymore" -${CLICKHOUSE_CLIENT} --user ${user} --ssh-key-file 'ssh_key' --ssh-key-passphrase "" --query "SELECT 1" 2>&1 | grep -m1 -o 'AUTHENTICATION_FAILED' + echo "Asserting SSH does not work anymore" + ${CLICKHOUSE_CLIENT} --user ${user} --ssh-key-file 'ssh_key' --ssh-key-passphrase "" --query "SELECT 1" 2>&1 | grep -m1 -o 'AUTHENTICATION_FAILED' -echo "Asserting bcrypt_password works" -test_login_pwd ${user} '5' + echo "Asserting bcrypt_password works" + test_login_pwd ${user} '5' -echo "Adding new bcrypt_password" -${CLICKHOUSE_CLIENT} --query "ALTER USER ${user} ADD IDENTIFIED WITH bcrypt_password BY '6'" + echo "Adding new bcrypt_password" + ${CLICKHOUSE_CLIENT} --query "ALTER USER ${user} $1 ADD IDENTIFIED WITH bcrypt_password BY '6'" -echo "Both current authentication methods should work" -test_login_pwd ${user} '5' -test_login_pwd ${user} '6' + echo "Both current authentication methods should work" + test_login_pwd ${user} '5' + test_login_pwd ${user} '6' -echo "Reset authentication methods to new" -${CLICKHOUSE_CLIENT} --query "ALTER USER ${user} RESET AUTHENTICATION METHODS TO NEW" + echo "Reset authentication methods to new" + ${CLICKHOUSE_CLIENT} --query "ALTER USER ${user} $1 RESET AUTHENTICATION METHODS TO NEW" -echo "Only the latest should work, below should fail" -test_login_pwd_expect_error ${user} '5' + echo "Only the latest should work, below should fail" + test_login_pwd_expect_error ${user} '5' -echo "Should work" -test_login_pwd ${user} '6' + echo "Should work" + test_login_pwd ${user} '6' -echo "Multiple identified with, not allowed" -${CLICKHOUSE_CLIENT} --query "ALTER USER ${user} IDENTIFIED WITH plaintext_password by '7', IDENTIFIED plaintext_password by '8'" 2>&1 | grep -m1 -o "Syntax error" + echo "Multiple identified with, not allowed" + ${CLICKHOUSE_CLIENT} --query "ALTER USER ${user} $1 IDENTIFIED WITH plaintext_password by '7', IDENTIFIED plaintext_password by '8'" 2>&1 | grep -m1 -o "Syntax error" -${CLICKHOUSE_CLIENT} --query "DROP USER ${user}" + ${CLICKHOUSE_CLIENT} --query "DROP USER ${user} $1" -echo "CREATE Multiple identified with, not allowed" -${CLICKHOUSE_CLIENT} --query "CREATE USER ${user} IDENTIFIED WITH plaintext_password by '7', IDENTIFIED WITH plaintext_password by '8'" 2>&1 | grep -m1 -o "Syntax error" + echo "CREATE Multiple identified with, not allowed" + ${CLICKHOUSE_CLIENT} --query "CREATE USER ${user} $1 IDENTIFIED WITH plaintext_password by '7', IDENTIFIED WITH plaintext_password by '8'" 2>&1 | grep -m1 -o "Syntax error" -${CLICKHOUSE_CLIENT} --query "DROP USER IF EXISTS ${user}" + ${CLICKHOUSE_CLIENT} --query "DROP USER IF EXISTS ${user} $1" -echo "Create user with no identification" -${CLICKHOUSE_CLIENT} --query "CREATE USER ${user}" + echo "Create user with no identification" + ${CLICKHOUSE_CLIENT} --query "CREATE USER ${user} $1" -echo "Add identified with" -${CLICKHOUSE_CLIENT} --query "ALTER USER ${user} ADD IDENTIFIED WITH plaintext_password by '7'" + echo "Add identified with" + ${CLICKHOUSE_CLIENT} --query "ALTER USER ${user} $1 ADD IDENTIFIED WITH plaintext_password by '7'" -${CLICKHOUSE_CLIENT} --query "SHOW CREATE USER ${user}" + ${CLICKHOUSE_CLIENT} --query "SHOW CREATE USER ${user}" -echo "Try to provide no_password mixed with other authentication methods, should not be allowed" -${CLICKHOUSE_CLIENT} --query "ALTER USER ${user} ADD IDENTIFIED WITH plaintext_password by '8', no_password" 2>&1 | grep -m1 -o "BAD_ARGUMENTS" + echo "Try to provide no_password mixed with other authentication methods, should not be allowed" + ${CLICKHOUSE_CLIENT} --query "ALTER USER ${user} $1 ADD IDENTIFIED WITH plaintext_password by '8', no_password" 2>&1 | grep -m1 -o "BAD_ARGUMENTS" -echo "Adding no_password, should fail" -${CLICKHOUSE_CLIENT} --query "ALTER USER ${user} ADD IDENTIFIED WITH no_password" 2>&1 | grep -m1 -o "BAD_ARGUMENTS" -${CLICKHOUSE_CLIENT} --query "SHOW CREATE USER ${user}" + echo "Adding no_password, should fail" + ${CLICKHOUSE_CLIENT} --query "ALTER USER ${user} $1 ADD IDENTIFIED WITH no_password" 2>&1 | grep -m1 -o "BAD_ARGUMENTS" + ${CLICKHOUSE_CLIENT} --query "SHOW CREATE USER ${user}" -echo "Replacing existing authentication methods in favor of no_password, should succeed" -${CLICKHOUSE_CLIENT} --query "ALTER USER ${user} IDENTIFIED WITH no_password" -${CLICKHOUSE_CLIENT} --query "SHOW CREATE USER ${user}" + echo "Replacing existing authentication methods in favor of no_password, should succeed" + ${CLICKHOUSE_CLIENT} --query "ALTER USER ${user} $1 IDENTIFIED WITH no_password" + ${CLICKHOUSE_CLIENT} --query "SHOW CREATE USER ${user}" -echo "Trying to auth with no pwd, should succeed" -test_login_no_pwd ${user} + echo "Trying to auth with no pwd, should succeed" + test_login_no_pwd ${user} -${CLICKHOUSE_CLIENT} --query "DROP USER IF EXISTS ${user}" + ${CLICKHOUSE_CLIENT} --query "DROP USER IF EXISTS ${user} $1" -echo "Create user with mix both implicit and explicit auth type, starting with with" -${CLICKHOUSE_CLIENT} --query "CREATE USER ${user} IDENTIFIED WITH plaintext_password by '1', by '2', bcrypt_password by '3', by '4';" -${CLICKHOUSE_CLIENT} --query "SHOW CREATE USER ${user}" + echo "Create user with mix both implicit and explicit auth type, starting with with" + ${CLICKHOUSE_CLIENT} --query "CREATE USER ${user} $1 IDENTIFIED WITH plaintext_password by '1', by '2', bcrypt_password by '3', by '4';" + ${CLICKHOUSE_CLIENT} --query "SHOW CREATE USER ${user}" -${CLICKHOUSE_CLIENT} --query "DROP USER IF EXISTS ${user}" + ${CLICKHOUSE_CLIENT} --query "DROP USER IF EXISTS ${user} $1" -echo "Create user with mix both implicit and explicit auth type, starting with by" -${CLICKHOUSE_CLIENT} --query "CREATE USER ${user} IDENTIFIED by '1', plaintext_password by '2', bcrypt_password by '3', by '4';" -${CLICKHOUSE_CLIENT} --query "SHOW CREATE USER ${user}" + echo "Create user with mix both implicit and explicit auth type, starting with by" + ${CLICKHOUSE_CLIENT} --query "CREATE USER ${user} $1 IDENTIFIED by '1', plaintext_password by '2', bcrypt_password by '3', by '4';" + ${CLICKHOUSE_CLIENT} --query "SHOW CREATE USER ${user}" -${CLICKHOUSE_CLIENT} --query "DROP USER IF EXISTS ${user}" + ${CLICKHOUSE_CLIENT} --query "DROP USER IF EXISTS ${user} $1" -echo "Use WITH without providing authentication type, should fail" -${CLICKHOUSE_CLIENT} --query "CREATE USER ${user} IDENTIFIED WITH BY '1';" 2>&1 | grep -m1 -o "Syntax error" + echo "Use WITH without providing authentication type, should fail" + ${CLICKHOUSE_CLIENT} --query "CREATE USER ${user} $1 IDENTIFIED WITH BY '1';" 2>&1 | grep -m1 -o "Syntax error" -echo "Create user with ADD identification, should fail, add is not allowed for create query" -${CLICKHOUSE_CLIENT} --query "CREATE USER ${user} ADD IDENTIFIED WITH plaintext_password by '1'" 2>&1 | grep -m1 -o "BAD_ARGUMENTS" + echo "Create user with ADD identification, should fail, add is not allowed for create query" + ${CLICKHOUSE_CLIENT} --query "CREATE USER ${user} $1 ADD IDENTIFIED WITH plaintext_password by '1'" 2>&1 | grep -m1 -o "BAD_ARGUMENTS" -echo "Trailing comma should result in syntax error" -${CLICKHOUSE_CLIENT} --query "ALTER USER ${user} ADD IDENTIFIED WITH plaintext_password by '1'," 2>&1 | grep -m1 -o "SYNTAX_ERROR" + echo "Trailing comma should result in syntax error" + ${CLICKHOUSE_CLIENT} --query "ALTER USER ${user} $1 ADD IDENTIFIED WITH plaintext_password by '1'," 2>&1 | grep -m1 -o "SYNTAX_ERROR" -echo "First auth method can't specify type if WITH keyword is not present" -${CLICKHOUSE_CLIENT} --query "CREATE USER ${user} IDENTIFIED plaintext_password by '1'" 2>&1 | grep -m1 -o "SYNTAX_ERROR" + echo "First auth method can't specify type if WITH keyword is not present" + ${CLICKHOUSE_CLIENT} --query "CREATE USER ${user} $1 IDENTIFIED plaintext_password by '1'" 2>&1 | grep -m1 -o "SYNTAX_ERROR" -echo "RESET AUTHENTICATION METHODS TO NEW can only be used on alter statement" -${CLICKHOUSE_CLIENT} --query "CREATE USER ${user} RESET AUTHENTICATION METHODS TO NEW" 2>&1 | grep -m1 -o "BAD_ARGUMENTS" + echo "RESET AUTHENTICATION METHODS TO NEW can only be used on alter statement" + ${CLICKHOUSE_CLIENT} --query "CREATE USER ${user} $1 RESET AUTHENTICATION METHODS TO NEW" 2>&1 | grep -m1 -o "BAD_ARGUMENTS" -echo "ADD NOT IDENTIFIED should result in syntax error" -${CLICKHOUSE_CLIENT} --query "ALTER USER ${user} ADD NOT IDENTIFIED" 2>&1 | grep -m1 -o "SYNTAX_ERROR" + echo "ADD NOT IDENTIFIED should result in syntax error" + ${CLICKHOUSE_CLIENT} --query "ALTER USER ${user} $1 ADD NOT IDENTIFIED" 2>&1 | grep -m1 -o "SYNTAX_ERROR" -${CLICKHOUSE_CLIENT} --query "DROP USER IF EXISTS ${user}" + ${CLICKHOUSE_CLIENT} --query "DROP USER IF EXISTS ${user}" + +} + +test "" + +test "ON CLUSTER test_shard_localhost" From 4fd19c3ad993398b43977229ce629d6fa85c06cb Mon Sep 17 00:00:00 2001 From: Arthur Passos Date: Tue, 20 Aug 2024 17:16:12 -0300 Subject: [PATCH 375/844] do not allow reset auth to new to be used along with add identified clauses --- src/Parsers/Access/ParserCreateUserQuery.cpp | 5 +++++ .../03174_multiple_authentication_methods.reference | 4 ++++ .../0_stateless/03174_multiple_authentication_methods.sh | 3 +++ 3 files changed, 12 insertions(+) diff --git a/src/Parsers/Access/ParserCreateUserQuery.cpp b/src/Parsers/Access/ParserCreateUserQuery.cpp index b56dda79875..abf01182ec8 100644 --- a/src/Parsers/Access/ParserCreateUserQuery.cpp +++ b/src/Parsers/Access/ParserCreateUserQuery.cpp @@ -660,6 +660,11 @@ bool ParserCreateUserQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expec } + if (reset_authentication_methods_to_new && !auth_data.empty()) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, "RESET AUTHENTICATION METHODS TO NEW cannot be used along with [ADD] IDENTIFIED clauses"); + } + if (!alter && !hosts) { String common_host_pattern; diff --git a/tests/queries/0_stateless/03174_multiple_authentication_methods.reference b/tests/queries/0_stateless/03174_multiple_authentication_methods.reference index f45d371f156..429b04f3907 100644 --- a/tests/queries/0_stateless/03174_multiple_authentication_methods.reference +++ b/tests/queries/0_stateless/03174_multiple_authentication_methods.reference @@ -55,6 +55,8 @@ RESET AUTHENTICATION METHODS TO NEW can only be used on alter statement BAD_ARGUMENTS ADD NOT IDENTIFIED should result in syntax error SYNTAX_ERROR +RESET AUTHENTICATION METHODS TO NEW cannot be used along with [ADD] IDENTIFIED clauses +BAD_ARGUMENTS localhost 9000 0 0 0 localhost 9000 0 0 0 Basic authentication after user creation @@ -130,3 +132,5 @@ RESET AUTHENTICATION METHODS TO NEW can only be used on alter statement BAD_ARGUMENTS ADD NOT IDENTIFIED should result in syntax error SYNTAX_ERROR +RESET AUTHENTICATION METHODS TO NEW cannot be used along with [ADD] IDENTIFIED clauses +BAD_ARGUMENTS diff --git a/tests/queries/0_stateless/03174_multiple_authentication_methods.sh b/tests/queries/0_stateless/03174_multiple_authentication_methods.sh index 4c9914b0cda..7fe91f409ac 100755 --- a/tests/queries/0_stateless/03174_multiple_authentication_methods.sh +++ b/tests/queries/0_stateless/03174_multiple_authentication_methods.sh @@ -152,6 +152,9 @@ function test echo "ADD NOT IDENTIFIED should result in syntax error" ${CLICKHOUSE_CLIENT} --query "ALTER USER ${user} $1 ADD NOT IDENTIFIED" 2>&1 | grep -m1 -o "SYNTAX_ERROR" + echo "RESET AUTHENTICATION METHODS TO NEW cannot be used along with [ADD] IDENTIFIED clauses" + ${CLICKHOUSE_CLIENT} --query "ALTER USER ${user} $1 IDENTIFIED WITH plaintext_password by '1' RESET AUTHENTICATION METHODS TO NEW" 2>&1 | grep -m1 -o "BAD_ARGUMENTS" + ${CLICKHOUSE_CLIENT} --query "DROP USER IF EXISTS ${user}" } From 6fb5051237448addf82faf50677179c551958f1b Mon Sep 17 00:00:00 2001 From: JackyWoo Date: Wed, 21 Aug 2024 12:47:12 +0800 Subject: [PATCH 376/844] Fix conflicts --- .../Statistics/ConditionSelectivityEstimator.cpp | 2 -- src/Storages/Statistics/StatisticsMinMax.cpp | 16 ++++++++-------- src/Storages/Statistics/StatisticsMinMax.h | 4 ++-- src/Storages/Statistics/StatisticsTDigest.cpp | 6 +++--- src/Storages/Statistics/StatisticsTDigest.h | 6 +++--- 5 files changed, 16 insertions(+), 18 deletions(-) diff --git a/src/Storages/Statistics/ConditionSelectivityEstimator.cpp b/src/Storages/Statistics/ConditionSelectivityEstimator.cpp index eadd15a8f7f..432659f51f8 100644 --- a/src/Storages/Statistics/ConditionSelectivityEstimator.cpp +++ b/src/Storages/Statistics/ConditionSelectivityEstimator.cpp @@ -41,8 +41,6 @@ Float64 ConditionSelectivityEstimator::ColumnSelectivityEstimator::estimateEqual { return default_cond_equal_factor * rows; } - return default_cond_equal_factor * rows; - Float64 result = 0; Float64 partial_cnt = 0; for (const auto & [key, estimator] : part_statistics) diff --git a/src/Storages/Statistics/StatisticsMinMax.cpp b/src/Storages/Statistics/StatisticsMinMax.cpp index 88522894427..aafa34bc6d8 100644 --- a/src/Storages/Statistics/StatisticsMinMax.cpp +++ b/src/Storages/Statistics/StatisticsMinMax.cpp @@ -15,8 +15,8 @@ namespace ErrorCodes extern const int ILLEGAL_STATISTICS; } -StatisticsMinMax::StatisticsMinMax(const SingleStatisticsDescription & statistics_description, const DataTypePtr & data_type_) - : IStatistics(statistics_description) +StatisticsMinMax::StatisticsMinMax(const SingleStatisticsDescription & description, const DataTypePtr & data_type_) + : IStatistics(description) , data_type(data_type_) { } @@ -67,17 +67,17 @@ Float64 StatisticsMinMax::estimateLess(const Field & val) const return ((*val_as_float - min) / (max - min)) * row_count; } -void minMaxStatisticsValidator(const SingleStatisticsDescription & /*statistics_description*/, DataTypePtr data_type) +void minMaxStatisticsValidator(const SingleStatisticsDescription & /*description*/, const DataTypePtr & data_type) { - data_type = removeNullable(data_type); - data_type = removeLowCardinalityAndNullable(data_type); + auto inner_data_type = removeNullable(data_type); + inner_data_type = removeLowCardinalityAndNullable(data_type); if (!data_type->isValueRepresentedByNumber()) - throw Exception(ErrorCodes::ILLEGAL_STATISTICS, "Statistics of type 'minmax' do not support type {}", data_type->getName()); + throw Exception(ErrorCodes::ILLEGAL_STATISTICS, "Statistics of type 'minmax' do not support type {}", inner_data_type->getName()); } -StatisticsPtr minMaxStatisticsCreator(const SingleStatisticsDescription & statistics_description, DataTypePtr data_type) +StatisticsPtr minMaxStatisticsCreator(const SingleStatisticsDescription & description, const DataTypePtr & data_type) { - return std::make_shared(statistics_description, data_type); + return std::make_shared(description, data_type); } } diff --git a/src/Storages/Statistics/StatisticsMinMax.h b/src/Storages/Statistics/StatisticsMinMax.h index 524494b520b..c60fa810c47 100644 --- a/src/Storages/Statistics/StatisticsMinMax.h +++ b/src/Storages/Statistics/StatisticsMinMax.h @@ -27,7 +27,7 @@ private: DataTypePtr data_type; }; -void minMaxStatisticsValidator(const SingleStatisticsDescription & statistics_description, DataTypePtr data_type); -StatisticsPtr minMaxStatisticsCreator(const SingleStatisticsDescription & statistics_description, DataTypePtr data_type); +void minMaxStatisticsValidator(const SingleStatisticsDescription & description, const DataTypePtr & data_type); +StatisticsPtr minMaxStatisticsCreator(const SingleStatisticsDescription & description, const DataTypePtr & data_type); } diff --git a/src/Storages/Statistics/StatisticsTDigest.cpp b/src/Storages/Statistics/StatisticsTDigest.cpp index b90075b10be..285b779036f 100644 --- a/src/Storages/Statistics/StatisticsTDigest.cpp +++ b/src/Storages/Statistics/StatisticsTDigest.cpp @@ -53,7 +53,7 @@ Float64 StatisticsTDigest::estimateEqual(const Field & val) const return t_digest.getCountEqual(*val_as_float); } -void tdigestValidator(const SingleStatisticsDescription &, DataTypePtr data_type) +void tdigestStatisticsValidator(const SingleStatisticsDescription & /*description*/, const DataTypePtr & data_type) { DataTypePtr inner_data_type = removeNullable(data_type); inner_data_type = removeLowCardinalityAndNullable(inner_data_type); @@ -61,9 +61,9 @@ void tdigestValidator(const SingleStatisticsDescription &, DataTypePtr data_type throw Exception(ErrorCodes::ILLEGAL_STATISTICS, "Statistics of type 'tdigest' do not support type {}", data_type->getName()); } -StatisticsPtr tdigestCreator(const SingleStatisticsDescription & stat, DataTypePtr) +StatisticsPtr tdigestStatisticsCreator(const SingleStatisticsDescription & description, const DataTypePtr & data_type) { - return std::make_shared(stat); + return std::make_shared(description, data_type); } } diff --git a/src/Storages/Statistics/StatisticsTDigest.h b/src/Storages/Statistics/StatisticsTDigest.h index 91a477c2b94..5e744fee2ce 100644 --- a/src/Storages/Statistics/StatisticsTDigest.h +++ b/src/Storages/Statistics/StatisticsTDigest.h @@ -9,7 +9,7 @@ namespace DB class StatisticsTDigest : public IStatistics { public: - explicit StatisticsTDigest(const SingleStatisticsDescription & stat_); + explicit StatisticsTDigest(const SingleStatisticsDescription & description, const DataTypePtr & data_type_); void update(const ColumnPtr & column) override; @@ -24,7 +24,7 @@ private: DataTypePtr data_type; }; -void tdigestValidator(const SingleStatisticsDescription &, DataTypePtr data_type); -StatisticsPtr tdigestCreator(const SingleStatisticsDescription & stat, DataTypePtr); +void tdigestStatisticsValidator(const SingleStatisticsDescription & description, const DataTypePtr & data_type); +StatisticsPtr tdigestStatisticsCreator(const SingleStatisticsDescription & description, const DataTypePtr & data_type); } From fc704020e9d64e15b80f8dd3c6db8871925bca9b Mon Sep 17 00:00:00 2001 From: JackyWoo Date: Wed, 21 Aug 2024 13:29:42 +0800 Subject: [PATCH 377/844] Update tests --- src/Storages/Statistics/StatisticsMinMax.cpp | 6 +- src/Storages/Statistics/StatisticsUniq.cpp | 2 +- .../0_stateless/02864_statistics_ddl.sql | 32 ++++ .../02864_statistics_estimation.reference | 47 ------ .../02864_statistics_estimation.sql | 137 ------------------ .../02864_statistics_predicates.reference | 23 +++ .../02864_statistics_predicates.sql | 46 +++++- 7 files changed, 100 insertions(+), 193 deletions(-) delete mode 100644 tests/queries/0_stateless/02864_statistics_estimation.reference delete mode 100644 tests/queries/0_stateless/02864_statistics_estimation.sql diff --git a/src/Storages/Statistics/StatisticsMinMax.cpp b/src/Storages/Statistics/StatisticsMinMax.cpp index aafa34bc6d8..6ae05cb0a6b 100644 --- a/src/Storages/Statistics/StatisticsMinMax.cpp +++ b/src/Storages/Statistics/StatisticsMinMax.cpp @@ -70,9 +70,9 @@ Float64 StatisticsMinMax::estimateLess(const Field & val) const void minMaxStatisticsValidator(const SingleStatisticsDescription & /*description*/, const DataTypePtr & data_type) { auto inner_data_type = removeNullable(data_type); - inner_data_type = removeLowCardinalityAndNullable(data_type); - if (!data_type->isValueRepresentedByNumber()) - throw Exception(ErrorCodes::ILLEGAL_STATISTICS, "Statistics of type 'minmax' do not support type {}", inner_data_type->getName()); + inner_data_type = removeLowCardinalityAndNullable(inner_data_type); + if (!inner_data_type->isValueRepresentedByNumber()) + throw Exception(ErrorCodes::ILLEGAL_STATISTICS, "Statistics of type 'minmax' do not support type {}", data_type->getName()); } StatisticsPtr minMaxStatisticsCreator(const SingleStatisticsDescription & description, const DataTypePtr & data_type) diff --git a/src/Storages/Statistics/StatisticsUniq.cpp b/src/Storages/Statistics/StatisticsUniq.cpp index 07311b5b86d..21bc5d227ef 100644 --- a/src/Storages/Statistics/StatisticsUniq.cpp +++ b/src/Storages/Statistics/StatisticsUniq.cpp @@ -56,7 +56,7 @@ void uniqStatisticsValidator(const SingleStatisticsDescription & /*description*/ { DataTypePtr inner_data_type = removeNullable(data_type); inner_data_type = removeLowCardinalityAndNullable(inner_data_type); - if (!inner_data_type->isValueRepresentedByNumber()) + if (!inner_data_type->isValueRepresentedByNumber() && !isStringOrFixedString(data_type)) throw Exception(ErrorCodes::ILLEGAL_STATISTICS, "Statistics of type 'uniq' do not support type {}", data_type->getName()); } diff --git a/tests/queries/0_stateless/02864_statistics_ddl.sql b/tests/queries/0_stateless/02864_statistics_ddl.sql index 32b56a842b7..0fc29d0c362 100644 --- a/tests/queries/0_stateless/02864_statistics_ddl.sql +++ b/tests/queries/0_stateless/02864_statistics_ddl.sql @@ -7,6 +7,7 @@ SET mutations_sync = 1; DROP TABLE IF EXISTS tab; +SET allow_experimental_statistics = 0; -- Error case: Can't create statistics when allow_experimental_statistics = 0 CREATE TABLE tab (col Float64 STATISTICS(tdigest)) Engine = MergeTree() ORDER BY tuple(); -- { serverError INCORRECT_QUERY } @@ -94,6 +95,30 @@ CREATE TABLE tab (col Map(UInt64, UInt64) STATISTICS(count_min)) Engine = MergeT CREATE TABLE tab (col UUID STATISTICS(count_min)) Engine = MergeTree() ORDER BY tuple(); -- { serverError ILLEGAL_STATISTICS } CREATE TABLE tab (col IPv6 STATISTICS(count_min)) Engine = MergeTree() ORDER BY tuple(); -- { serverError ILLEGAL_STATISTICS } +-- minmax requires data_type.isValueRepresentedByInteger +-- These types work: +CREATE TABLE tab (col UInt8 STATISTICS(minmax)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; +CREATE TABLE tab (col UInt256 STATISTICS(minmax)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; +CREATE TABLE tab (col Float32 STATISTICS(minmax)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; +CREATE TABLE tab (col Decimal32(3) STATISTICS(minmax)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; +CREATE TABLE tab (col Date STATISTICS(minmax)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; +CREATE TABLE tab (col Date32 STATISTICS(minmax)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; +CREATE TABLE tab (col DateTime STATISTICS(minmax)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; +CREATE TABLE tab (col DateTime64 STATISTICS(minmax)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; +CREATE TABLE tab (col Enum('hello', 'world') STATISTICS(minmax)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; +CREATE TABLE tab (col IPv4 STATISTICS(minmax)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; +CREATE TABLE tab (col Nullable(UInt8) STATISTICS(minmax)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; +CREATE TABLE tab (col LowCardinality(UInt8) STATISTICS(minmax)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; +CREATE TABLE tab (col LowCardinality(Nullable(UInt8)) STATISTICS(minmax)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; +-- These types don't work: +CREATE TABLE tab (col String STATISTICS(minmax)) Engine = MergeTree() ORDER BY tuple(); -- { serverError ILLEGAL_STATISTICS } +CREATE TABLE tab (col FixedString(1) STATISTICS(minmax)) Engine = MergeTree() ORDER BY tuple(); -- { serverError ILLEGAL_STATISTICS } +CREATE TABLE tab (col Array(Float64) STATISTICS(minmax)) Engine = MergeTree() ORDER BY tuple(); -- { serverError ILLEGAL_STATISTICS } +CREATE TABLE tab (col Tuple(Float64, Float64) STATISTICS(minmax)) Engine = MergeTree() ORDER BY tuple(); -- { serverError ILLEGAL_STATISTICS } +CREATE TABLE tab (col Map(UInt64, UInt64) STATISTICS(minmax)) Engine = MergeTree() ORDER BY tuple(); -- { serverError ILLEGAL_STATISTICS } +CREATE TABLE tab (col UUID STATISTICS(minmax)) Engine = MergeTree() ORDER BY tuple(); -- { serverError ILLEGAL_STATISTICS } +CREATE TABLE tab (col IPv6 STATISTICS(minmax)) Engine = MergeTree() ORDER BY tuple(); -- { serverError ILLEGAL_STATISTICS } + -- CREATE TABLE was easy, ALTER is more fun CREATE TABLE tab @@ -173,6 +198,13 @@ ALTER TABLE tab MODIFY STATISTICS f64 TYPE count_min; ALTER TABLE tab DROP STATI -- Doesn't work: ALTER TABLE tab ADD STATISTICS a TYPE count_min; -- { serverError ILLEGAL_STATISTICS } ALTER TABLE tab MODIFY STATISTICS a TYPE count_min; -- { serverError ILLEGAL_STATISTICS } +-- minmax +-- Works: +ALTER TABLE tab ADD STATISTICS f64 TYPE minmax; ALTER TABLE tab DROP STATISTICS f64; +ALTER TABLE tab MODIFY STATISTICS f64 TYPE minmax; ALTER TABLE tab DROP STATISTICS f64; +-- Doesn't work: +ALTER TABLE tab ADD STATISTICS a TYPE minmax; -- { serverError ILLEGAL_STATISTICS } +ALTER TABLE tab MODIFY STATISTICS a TYPE minmax; -- { serverError ILLEGAL_STATISTICS } -- Any data type changes on columns with statistics are disallowed, for simplicity even if the new data type is compatible with all existing -- statistics objects (e.g. tdigest can be created on Float64 and UInt64) diff --git a/tests/queries/0_stateless/02864_statistics_estimation.reference b/tests/queries/0_stateless/02864_statistics_estimation.reference deleted file mode 100644 index 0e6cad6528b..00000000000 --- a/tests/queries/0_stateless/02864_statistics_estimation.reference +++ /dev/null @@ -1,47 +0,0 @@ -CREATE TABLE default.tab\n(\n `a` String,\n `b` UInt64,\n `c` Int64,\n `d` DateTime,\n `pk` String\n)\nENGINE = MergeTree\nORDER BY pk\nSETTINGS min_bytes_for_wide_part = 0, index_granularity = 8192 -Test statistics count_min: - Prewhere info - Prewhere filter - Prewhere filter column: and(equals(a, \'0\'), equals(b, 0), equals(c, 0)) (removed) -Test statistics minmax: - Prewhere info - Prewhere filter - Prewhere filter column: and(greater(d, CAST(9998, \'DateTime\')), less(c, -1), greater(b, 0)) (removed) -Test statistics tdigest: - Prewhere info - Prewhere filter - Prewhere filter column: and(greater(d, CAST(9998, \'DateTime\')), less(c, -1), greater(b, 0)) (removed) -Test statistics uniq: - Prewhere info - Prewhere filter - Prewhere filter column: and(equals(d, CAST(1, \'DateTime\')), equals(c, 0)) (removed) -Test statistics multi-types: - Prewhere info - Prewhere filter - Prewhere filter column: and(equals(d, CAST(1, \'DateTime\')), less(c, -90), greater(b, 900)) (removed) - Prewhere info - Prewhere filter - Prewhere filter column: and(equals(a, \'10000\'), equals(b, 0), less(c, 0), greater(d, CAST(1, \'DateTime\'))) (removed) -Test statistics implicitly type conversion: -1 -1 -1 -0 -0 -0 -0 -50 -50 -0 -0 -50 -0 -0 -1 -1 -1 -0 -1 -1 -1 -1 diff --git a/tests/queries/0_stateless/02864_statistics_estimation.sql b/tests/queries/0_stateless/02864_statistics_estimation.sql deleted file mode 100644 index 94fd7d12e0a..00000000000 --- a/tests/queries/0_stateless/02864_statistics_estimation.sql +++ /dev/null @@ -1,137 +0,0 @@ --- Tags: no-fasttest - -DROP TABLE IF EXISTS tab SYNC; -DROP TABLE IF EXISTS tab2 SYNC; - -SET allow_experimental_statistics = 1; -SET allow_statistics_optimize = 1; -SET allow_suspicious_low_cardinality_types=1; -SET allow_experimental_analyzer=1; -SET mutations_sync = 2; - -CREATE TABLE tab -( - a String, - b UInt64, - c Int64, - d DateTime, - pk String, -) Engine = MergeTree() ORDER BY pk -SETTINGS min_bytes_for_wide_part = 0; - -SHOW CREATE TABLE tab; - -INSERT INTO tab select toString(number % 10000), number % 1000, -(number % 100), cast(number, 'DateTime'), generateUUIDv4() FROM system.numbers LIMIT 10000; - - -SELECT 'Test statistics count_min:'; - -ALTER TABLE tab ADD STATISTICS a, b, c TYPE count_min; -ALTER TABLE tab MATERIALIZE STATISTICS a, b, c; -SELECT replaceRegexpAll(explain, '_CAST', 'CAST') FROM (SELECT replaceRegexpAll(explain, '__table1.|_UInt8|_Int8|_UInt16|_String|_DateTime', '') as explain -FROM (EXPLAIN actions=1 SELECT count(*) FROM tab WHERE c = 0/*100*/ and b = 0/*10*/ and a = '0'/*1*/) -WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'); -ALTER TABLE tab DROP STATISTICS a, b, c; - - -SELECT 'Test statistics minmax:'; - -ALTER TABLE tab ADD STATISTICS b, c, d TYPE minmax; -ALTER TABLE tab MATERIALIZE STATISTICS b, c, d; - -SELECT replaceRegexpAll(explain, '_CAST', 'CAST') FROM (SELECT replaceRegexpAll(explain, '__table1.|_UInt8|_Int8|_UInt16|_String|_DateTime', '') as explain -FROM (EXPLAIN actions=1 SELECT count(*) FROM tab WHERE b > 0/*10000*/ and c < -1/*9990*/ and d > cast(9998, 'DateTime')/*1*/) -WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'); - -ALTER TABLE tab DROP STATISTICS b, c, d; - - -SELECT 'Test statistics tdigest:'; - -ALTER TABLE tab ADD STATISTICS b, c, d TYPE tdigest; -ALTER TABLE tab MATERIALIZE STATISTICS b, c, d; -SELECT replaceRegexpAll(explain, '_CAST', 'CAST') FROM (SELECT replaceRegexpAll(explain, '__table1.|_UInt8|_Int8|_UInt16|_String|_DateTime', '') as explain -FROM (EXPLAIN actions=1 SELECT count(*) FROM tab WHERE b > 0/*10000*/ and c < -1/*9990*/ and d > cast(9998, 'DateTime')/*1*/) -WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'); -ALTER TABLE tab DROP STATISTICS b, c, d; - - -SELECT 'Test statistics uniq:'; - -ALTER TABLE tab ADD STATISTICS a, b, c, d TYPE uniq; -ALTER TABLE tab MATERIALIZE STATISTICS a, b, c, d; -SELECT replaceRegexpAll(explain, '_CAST', 'CAST') FROM (SELECT replaceRegexpAll(explain, '__table1.|_UInt8|_Int8|_UInt16|_String|_DateTime', '') as explain -FROM (EXPLAIN actions=1 SELECT count(*) FROM tab WHERE d = cast(1, 'DateTime')/*100*/ and c = 0/*1000*/) -WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'); -ALTER TABLE tab DROP STATISTICS a, b, c, d; - - -SELECT 'Test statistics multi-types:'; - -ALTER TABLE tab ADD STATISTICS a TYPE count_min, uniq; -ALTER TABLE tab ADD STATISTICS b, c, d TYPE count_min, minmax, uniq, tdigest; -ALTER TABLE tab MATERIALIZE STATISTICS a, b, c, d; - -SELECT replaceRegexpAll(explain, '_CAST', 'CAST') FROM (SELECT replaceRegexpAll(explain, '__table1.|_UInt8|_Int8|_UInt16|_String|_DateTime', '') as explain -FROM (EXPLAIN actions=1 SELECT count(*) FROM tab WHERE d = cast(1, 'DateTime')/*1*/ and c < -90/*900*/ and b > 900/*990*/) -WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'); - -SELECT replaceRegexpAll(explain, '_CAST', 'CAST') FROM (SELECT replaceRegexpAll(explain, '__table1.|_UInt8|_Int8|_UInt16|_String|_DateTime', '') as explain -FROM (EXPLAIN actions=1 SELECT count(*) FROM tab WHERE d > cast(1, 'DateTime')/*9999*/ and c < 0/*9900*/ and b = 0/*10*/ and a = '10000'/*0*/) -WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%'); - -DROP TABLE IF EXISTS tab SYNC; - -SELECT 'Test statistics implicitly type conversion:'; - -CREATE TABLE tab2 -( - a String, - b UInt64, - c UInt8, - d DateTime, - e Boolean, - f Float64, - g Decimal32(1), - pk String, -) Engine = MergeTree() ORDER BY pk; - -ALTER TABLE tab2 ADD STATISTICS a TYPE count_min, uniq; -ALTER TABLE tab2 ADD STATISTICS b, c, d, e, f, g TYPE count_min, minmax, uniq, tdigest; - -INSERT INTO tab2 select toString(number), number, number, cast(number, 'DateTime'), number % 2, number, toDecimal32(number, 1), toString(number) FROM system.numbers LIMIT 100; - -SELECT count(*) FROM tab2 WHERE a = '0'; -SELECT count(*) FROM tab2 WHERE a = 0; -- { serverError NO_COMMON_TYPE } - -SELECT count(*) FROM tab2 WHERE b = 1; -SELECT count(*) FROM tab2 WHERE b = '1'; -SELECT count(*) FROM tab2 WHERE b = 1.1; -SELECT count(*) FROM tab2 WHERE b = '1.1'; -- { serverError TYPE_MISMATCH } - -SELECT count(*) FROM tab2 WHERE c = 1.1; -SELECT count(*) FROM tab2 WHERE c = 1000; -- out of range of UInt16 - -SELECT count(*) FROM tab2 WHERE d = '2024-08-06 09:58:09'; -SELECT count(*) FROM tab2 WHERE d = '2024-08-06 09:58:0'; -- { serverError CANNOT_PARSE_DATETIME } - -SELECT count(*) FROM tab2 WHERE e = true; -SELECT count(*) FROM tab2 WHERE e = 'true'; -- { serverError TYPE_MISMATCH } -SELECT count(*) FROM tab2 WHERE e = 1; -SELECT count(*) FROM tab2 WHERE e = 2; -SELECT count(*) FROM tab2 WHERE e = 1.1; -SELECT count(*) FROM tab2 WHERE e = '1'; - -SELECT count(*) FROM tab2 WHERE f = 1.1; -SELECT count(*) FROM tab2 WHERE f = '1.1'; -SELECT count(*) FROM tab2 WHERE f = 1; -SELECT count(*) FROM tab2 WHERE f = '1'; - -SELECT count(*) FROM tab2 WHERE g = toDecimal32(1.0, 1); -SELECT count(*) FROM tab2 WHERE g = toDecimal32(1.10, 1); -SELECT count(*) FROM tab2 WHERE g = toDecimal32(1.0, 2); -SELECT count(*) FROM tab2 WHERE g = 1.0; -SELECT count(*) FROM tab2 WHERE g = 1.0; -SELECT count(*) FROM tab2 WHERE g = '1.0'; - -DROP TABLE IF EXISTS tab2 SYNC; diff --git a/tests/queries/0_stateless/02864_statistics_predicates.reference b/tests/queries/0_stateless/02864_statistics_predicates.reference index ffbd7269e05..c2d74300fa9 100644 --- a/tests/queries/0_stateless/02864_statistics_predicates.reference +++ b/tests/queries/0_stateless/02864_statistics_predicates.reference @@ -3,10 +3,13 @@ u64 and = 10 10 10 +10 0 0 0 0 +0 +10 10 10 10 @@ -16,10 +19,13 @@ u64 and < 70 70 70 +70 80 80 80 80 +80 +70 70 70 70 @@ -29,6 +35,8 @@ f64 and = 10 10 10 +10 +0 0 0 0 @@ -37,6 +45,8 @@ f64 and = 10 10 10 +10 +0 0 0 0 @@ -46,6 +56,8 @@ f64 and < 70 70 70 +70 +80 80 80 80 @@ -54,6 +66,8 @@ f64 and < 70 70 70 +70 +80 80 80 80 @@ -63,6 +77,8 @@ dt and = 0 0 0 +0 +10 10 10 10 @@ -72,6 +88,8 @@ dt and < 10000 10000 10000 +10000 +70 70 70 70 @@ -89,6 +107,10 @@ b and = 5000 5000 5000 +5000 +5000 +5000 +0 0 0 0 @@ -96,3 +118,4 @@ b and = s and = 10 10 +10 diff --git a/tests/queries/0_stateless/02864_statistics_predicates.sql b/tests/queries/0_stateless/02864_statistics_predicates.sql index 779116cf19a..473a7bc95ad 100644 --- a/tests/queries/0_stateless/02864_statistics_predicates.sql +++ b/tests/queries/0_stateless/02864_statistics_predicates.sql @@ -12,46 +12,56 @@ CREATE TABLE tab ( u64 UInt64, u64_tdigest UInt64 STATISTICS(tdigest), + u64_minmax UInt64 STATISTICS(minmax), u64_count_min UInt64 STATISTICS(count_min), u64_uniq UInt64 STATISTICS(uniq), f64 Float64, f64_tdigest Float64 STATISTICS(tdigest), + f64_minmax Float64 STATISTICS(minmax), f64_count_min Float64 STATISTICS(count_min), f64_uniq Float64 STATISTICS(uniq), dt DateTime, dt_tdigest DateTime STATISTICS(tdigest), + dt_minmax DateTime STATISTICS(minmax), dt_count_min DateTime STATISTICS(count_min), dt_uniq DateTime STATISTICS(uniq), b Bool, b_tdigest Bool STATISTICS(tdigest), + b_minmax Bool STATISTICS(minmax), b_count_min Bool STATISTICS(count_min), b_uniq Bool STATISTICS(uniq), s String, -- s_tdigest String STATISTICS(tdigest), -- not supported by tdigest - s_count_min String STATISTICS(count_min) - -- s_uniq String STATISTICS(uniq), -- not supported by uniq + -- s_minmax String STATISTICS(minmax), -- not supported by minmax + s_count_min String STATISTICS(count_min), + s_uniq String STATISTICS(uniq) ) Engine = MergeTree() ORDER BY tuple() SETTINGS min_bytes_for_wide_part = 0; INSERT INTO tab -- SELECT number % 10000, number % 1000, -(number % 100) FROM system.numbers LIMIT 10000; -SELECT number % 1000, +SELECT number % 1000, -- u64 number % 1000, number % 1000, number % 1000, number % 1000, + number % 1000, -- f64 number % 1000, number % 1000, number % 1000, number % 1000, + number % 1000, -- dt number % 1000, number % 1000, number % 1000, + number % 1000, + number % 2, -- b number % 2, number % 2, number % 2, number % 2, toString(number % 1000), + toString(number % 1000), toString(number % 1000) FROM system.numbers LIMIT 10000; @@ -61,21 +71,25 @@ SELECT 'u64 and ='; SELECT count(*) FROM tab WHERE u64 = 7; SELECT count(*) FROM tab WHERE u64_tdigest = 7; +SELECT count(*) FROM tab WHERE u64_minmax = 7; SELECT count(*) FROM tab WHERE u64_count_min = 7; SELECT count(*) FROM tab WHERE u64_uniq = 7; SELECT count(*) FROM tab WHERE u64 = 7.7; SELECT count(*) FROM tab WHERE u64_tdigest = 7.7; +SELECT count(*) FROM tab WHERE u64_minmax = 7.7; SELECT count(*) FROM tab WHERE u64_count_min = 7.7; SELECT count(*) FROM tab WHERE u64_uniq = 7.7; SELECT count(*) FROM tab WHERE u64 = '7'; SELECT count(*) FROM tab WHERE u64_tdigest = '7'; +SELECT count(*) FROM tab WHERE u64_minmax = '7'; SELECT count(*) FROM tab WHERE u64_count_min = '7'; SELECT count(*) FROM tab WHERE u64_uniq = '7'; SELECT count(*) FROM tab WHERE u64 = '7.7'; -- { serverError TYPE_MISMATCH } SELECT count(*) FROM tab WHERE u64_tdigest = '7.7'; -- { serverError TYPE_MISMATCH } +SELECT count(*) FROM tab WHERE u64_minmax = '7.7'; -- { serverError TYPE_MISMATCH } SELECT count(*) FROM tab WHERE u64_count_min = '7.7'; -- { serverError TYPE_MISMATCH } SELECT count(*) FROM tab WHERE u64_uniq = '7.7'; -- { serverError TYPE_MISMATCH } @@ -83,21 +97,25 @@ SELECT 'u64 and <'; SELECT count(*) FROM tab WHERE u64 < 7; SELECT count(*) FROM tab WHERE u64_tdigest < 7; +SELECT count(*) FROM tab WHERE u64_minmax < 7; SELECT count(*) FROM tab WHERE u64_count_min < 7; SELECT count(*) FROM tab WHERE u64_uniq < 7; SELECT count(*) FROM tab WHERE u64 < 7.7; SELECT count(*) FROM tab WHERE u64_tdigest < 7.7; +SELECT count(*) FROM tab WHERE u64_minmax < 7.7; SELECT count(*) FROM tab WHERE u64_count_min < 7.7; SELECT count(*) FROM tab WHERE u64_uniq < 7.7; SELECT count(*) FROM tab WHERE u64 < '7'; SELECT count(*) FROM tab WHERE u64_tdigest < '7'; +SELECT count(*) FROM tab WHERE u64_minmax < '7'; SELECT count(*) FROM tab WHERE u64_count_min < '7'; SELECT count(*) FROM tab WHERE u64_uniq < '7'; SELECT count(*) FROM tab WHERE u64 < '7.7'; -- { serverError TYPE_MISMATCH } SELECT count(*) FROM tab WHERE u64_tdigest < '7.7'; -- { serverError TYPE_MISMATCH } +SELECT count(*) FROM tab WHERE u64_minmax < '7.7'; -- { serverError TYPE_MISMATCH } SELECT count(*) FROM tab WHERE u64_count_min < '7.7'; -- { serverError TYPE_MISMATCH } SELECT count(*) FROM tab WHERE u64_uniq < '7.7'; -- { serverError TYPE_MISMATCH } @@ -107,21 +125,25 @@ SELECT 'f64 and ='; SELECT count(*) FROM tab WHERE f64 = 7; SELECT count(*) FROM tab WHERE f64_tdigest = 7; +SELECT count(*) FROM tab WHERE f64_minmax = 7; SELECT count(*) FROM tab WHERE f64_count_min = 7; SELECT count(*) FROM tab WHERE f64_uniq = 7; SELECT count(*) FROM tab WHERE f64 = 7.7; SELECT count(*) FROM tab WHERE f64_tdigest = 7.7; +SELECT count(*) FROM tab WHERE f64_minmax = 7.7; SELECT count(*) FROM tab WHERE f64_count_min = 7.7; SELECT count(*) FROM tab WHERE f64_uniq = 7.7; SELECT count(*) FROM tab WHERE f64 = '7'; SELECT count(*) FROM tab WHERE f64_tdigest = '7'; +SELECT count(*) FROM tab WHERE f64_minmax = '7'; SELECT count(*) FROM tab WHERE f64_count_min = '7'; SELECT count(*) FROM tab WHERE f64_uniq = '7'; SELECT count(*) FROM tab WHERE f64 = '7.7'; SELECT count(*) FROM tab WHERE f64_tdigest = '7.7'; +SELECT count(*) FROM tab WHERE f64_minmax = '7.7'; SELECT count(*) FROM tab WHERE f64_count_min = '7.7'; SELECT count(*) FROM tab WHERE f64_uniq = '7.7'; @@ -129,21 +151,25 @@ SELECT 'f64 and <'; SELECT count(*) FROM tab WHERE f64 < 7; SELECT count(*) FROM tab WHERE f64_tdigest < 7; +SELECT count(*) FROM tab WHERE f64_minmax < 7; SELECT count(*) FROM tab WHERE f64_count_min < 7; SELECT count(*) FROM tab WHERE f64_uniq < 7; SELECT count(*) FROM tab WHERE f64 < 7.7; SELECT count(*) FROM tab WHERE f64_tdigest < 7.7; +SELECT count(*) FROM tab WHERE f64_minmax < 7.7; SELECT count(*) FROM tab WHERE f64_count_min < 7.7; SELECT count(*) FROM tab WHERE f64_uniq < 7.7; SELECT count(*) FROM tab WHERE f64 < '7'; SELECT count(*) FROM tab WHERE f64_tdigest < '7'; +SELECT count(*) FROM tab WHERE f64_minmax < '7'; SELECT count(*) FROM tab WHERE f64_count_min < '7'; SELECT count(*) FROM tab WHERE f64_uniq < '7'; SELECT count(*) FROM tab WHERE f64 < '7.7'; SELECT count(*) FROM tab WHERE f64_tdigest < '7.7'; +SELECT count(*) FROM tab WHERE f64_minmax < '7.7'; SELECT count(*) FROM tab WHERE f64_count_min < '7.7'; SELECT count(*) FROM tab WHERE f64_uniq < '7.7'; @@ -153,11 +179,13 @@ SELECT 'dt and ='; SELECT count(*) FROM tab WHERE dt = '2024-08-08 11:12:13'; SELECT count(*) FROM tab WHERE dt_tdigest = '2024-08-08 11:12:13'; +SELECT count(*) FROM tab WHERE dt_minmax = '2024-08-08 11:12:13'; SELECT count(*) FROM tab WHERE dt_count_min = '2024-08-08 11:12:13'; SELECT count(*) FROM tab WHERE dt_uniq = '2024-08-08 11:12:13'; SELECT count(*) FROM tab WHERE dt = 7; SELECT count(*) FROM tab WHERE dt_tdigest = 7; +SELECT count(*) FROM tab WHERE dt_minmax = 7; SELECT count(*) FROM tab WHERE dt_count_min = 7; SELECT count(*) FROM tab WHERE dt_uniq = 7; @@ -165,11 +193,13 @@ SELECT 'dt and <'; SELECT count(*) FROM tab WHERE dt < '2024-08-08 11:12:13'; SELECT count(*) FROM tab WHERE dt_tdigest < '2024-08-08 11:12:13'; +SELECT count(*) FROM tab WHERE dt_minmax < '2024-08-08 11:12:13'; SELECT count(*) FROM tab WHERE dt_count_min < '2024-08-08 11:12:13'; SELECT count(*) FROM tab WHERE dt_uniq < '2024-08-08 11:12:13'; SELECT count(*) FROM tab WHERE dt < 7; SELECT count(*) FROM tab WHERE dt_tdigest < 7; +SELECT count(*) FROM tab WHERE dt_minmax < 7; SELECT count(*) FROM tab WHERE dt_count_min < 7; SELECT count(*) FROM tab WHERE dt_uniq < 7; @@ -179,21 +209,25 @@ SELECT 'b and ='; SELECT count(*) FROM tab WHERE b = true; SELECT count(*) FROM tab WHERE b_tdigest = true; +SELECT count(*) FROM tab WHERE b_minmax = true; SELECT count(*) FROM tab WHERE b_count_min = true; SELECT count(*) FROM tab WHERE b_uniq = true; SELECT count(*) FROM tab WHERE b = 'true'; SELECT count(*) FROM tab WHERE b_tdigest = 'true'; +SELECT count(*) FROM tab WHERE b_minmax = 'true'; SELECT count(*) FROM tab WHERE b_count_min = 'true'; SELECT count(*) FROM tab WHERE b_uniq = 'true'; SELECT count(*) FROM tab WHERE b = 1; SELECT count(*) FROM tab WHERE b_tdigest = 1; +SELECT count(*) FROM tab WHERE b_minmax = 1; SELECT count(*) FROM tab WHERE b_count_min = 1; SELECT count(*) FROM tab WHERE b_uniq = 1; SELECT count(*) FROM tab WHERE b = 1.1; SELECT count(*) FROM tab WHERE b_tdigest = 1.1; +SELECT count(*) FROM tab WHERE b_minmax = 1.1; SELECT count(*) FROM tab WHERE b_count_min = 1.1; SELECT count(*) FROM tab WHERE b_uniq = 1.1; @@ -203,12 +237,14 @@ SELECT 's and ='; SELECT count(*) FROM tab WHERE s = 7; -- { serverError NO_COMMON_TYPE } -- SELECT count(*) FROM tab WHERE s_tdigest = 7; -- not supported +-- SELECT count(*) FROM tab WHERE s_minmax = 7; -- not supported SELECT count(*) FROM tab WHERE s_count_min = 7; -- { serverError NO_COMMON_TYPE } --- SELECT count(*) FROM tab WHERE s_uniq = 7; -- not supported +SELECT count(*) FROM tab WHERE s_uniq = 7; -- { serverError NO_COMMON_TYPE } SELECT count(*) FROM tab WHERE s = '7'; -- SELECT count(*) FROM tab WHERE s_tdigest = '7'; -- not supported +-- SELECT count(*) FROM tab WHERE s_minmax = '7'; -- not supported SELECT count(*) FROM tab WHERE s_count_min = '7'; --- SELECT count(*) FROM tab WHERE s_uniq = '7'; -- not supported +SELECT count(*) FROM tab WHERE s_uniq = '7'; DROP TABLE tab; From d71afba234f8260df19f66ada151e2f668c54aba Mon Sep 17 00:00:00 2001 From: JackyWoo Date: Wed, 21 Aug 2024 18:58:34 +0800 Subject: [PATCH 378/844] Fix tests --- src/Storages/Statistics/StatisticsUniq.cpp | 2 +- tests/queries/0_stateless/02864_statistics_ddl.sql | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/Storages/Statistics/StatisticsUniq.cpp b/src/Storages/Statistics/StatisticsUniq.cpp index 21bc5d227ef..5e5b7a67b04 100644 --- a/src/Storages/Statistics/StatisticsUniq.cpp +++ b/src/Storages/Statistics/StatisticsUniq.cpp @@ -56,7 +56,7 @@ void uniqStatisticsValidator(const SingleStatisticsDescription & /*description*/ { DataTypePtr inner_data_type = removeNullable(data_type); inner_data_type = removeLowCardinalityAndNullable(inner_data_type); - if (!inner_data_type->isValueRepresentedByNumber() && !isStringOrFixedString(data_type)) + if (!inner_data_type->isValueRepresentedByNumber() && !isStringOrFixedString(inner_data_type)) throw Exception(ErrorCodes::ILLEGAL_STATISTICS, "Statistics of type 'uniq' do not support type {}", data_type->getName()); } diff --git a/tests/queries/0_stateless/02864_statistics_ddl.sql b/tests/queries/0_stateless/02864_statistics_ddl.sql index 0fc29d0c362..bcaaa9e7b61 100644 --- a/tests/queries/0_stateless/02864_statistics_ddl.sql +++ b/tests/queries/0_stateless/02864_statistics_ddl.sql @@ -47,7 +47,7 @@ CREATE TABLE tab (col Map(UInt64, UInt64) STATISTICS(tdigest)) Engine = MergeTre CREATE TABLE tab (col UUID STATISTICS(tdigest)) Engine = MergeTree() ORDER BY tuple(); -- { serverError ILLEGAL_STATISTICS } CREATE TABLE tab (col IPv6 STATISTICS(tdigest)) Engine = MergeTree() ORDER BY tuple(); -- { serverError ILLEGAL_STATISTICS } --- uniq requires data_type.isValueRepresentedByInteger +-- uniq requires data_type.isValueRepresentedByInteger or (Fixed)String -- These types work: CREATE TABLE tab (col UInt8 STATISTICS(uniq)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; CREATE TABLE tab (col UInt256 STATISTICS(uniq)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; @@ -62,9 +62,9 @@ CREATE TABLE tab (col IPv4 STATISTICS(uniq)) Engine = MergeTree() ORDER BY tuple CREATE TABLE tab (col Nullable(UInt8) STATISTICS(uniq)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; CREATE TABLE tab (col LowCardinality(UInt8) STATISTICS(uniq)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; CREATE TABLE tab (col LowCardinality(Nullable(UInt8)) STATISTICS(uniq)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; +CREATE TABLE tab (col String STATISTICS(uniq)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; +CREATE TABLE tab (col FixedString(1) STATISTICS(uniq)) Engine = MergeTree() ORDER BY tuple(); DROP TABLE tab; -- These types don't work: -CREATE TABLE tab (col String STATISTICS(uniq)) Engine = MergeTree() ORDER BY tuple(); -- { serverError ILLEGAL_STATISTICS } -CREATE TABLE tab (col FixedString(1) STATISTICS(uniq)) Engine = MergeTree() ORDER BY tuple(); -- { serverError ILLEGAL_STATISTICS } CREATE TABLE tab (col Array(Float64) STATISTICS(uniq)) Engine = MergeTree() ORDER BY tuple(); -- { serverError ILLEGAL_STATISTICS } CREATE TABLE tab (col Tuple(Float64, Float64) STATISTICS(uniq)) Engine = MergeTree() ORDER BY tuple(); -- { serverError ILLEGAL_STATISTICS } CREATE TABLE tab (col Map(UInt64, UInt64) STATISTICS(uniq)) Engine = MergeTree() ORDER BY tuple(); -- { serverError ILLEGAL_STATISTICS } From d7d40db0365109c55cd9f4eedc1791a6628ba0c2 Mon Sep 17 00:00:00 2001 From: Arthur Passos Date: Wed, 21 Aug 2024 19:07:54 -0300 Subject: [PATCH 379/844] make sure all alters except id related are allowed even if state of user is invalid afterwards --- .../Access/InterpreterCreateUserQuery.cpp | 31 +++++++++++-------- 1 file changed, 18 insertions(+), 13 deletions(-) diff --git a/src/Interpreters/Access/InterpreterCreateUserQuery.cpp b/src/Interpreters/Access/InterpreterCreateUserQuery.cpp index 5178ad184d6..50d627bfe85 100644 --- a/src/Interpreters/Access/InterpreterCreateUserQuery.cpp +++ b/src/Interpreters/Access/InterpreterCreateUserQuery.cpp @@ -82,19 +82,6 @@ namespace user.authentication_methods.clear(); } - auto number_of_authentication_methods = user.authentication_methods.size() + authentication_methods.size(); - if (number_of_authentication_methods > max_number_of_authentication_methods) - { - throw Exception(ErrorCodes::BAD_ARGUMENTS, - "User can not be created/updated because it exceeds the allowed quantity of authentication methods per user." - "Check the `max_authentication_methods_per_user` setting"); - } - - for (const auto & authentication_method : authentication_methods) - { - user.authentication_methods.emplace_back(authentication_method); - } - // drop existing ones and keep the most recent if (reset_authentication_methods) { @@ -103,6 +90,24 @@ namespace user.authentication_methods.emplace_back(backup_authentication_method); } + if (!authentication_methods.empty()) + { + // we only check if user exceeds the allowed quantity of authentication methods in case the create/alter query includes + // authentication information. Otherwise, we can bypass this check to avoid blocking non-authentication related alters. + auto number_of_authentication_methods = user.authentication_methods.size() + authentication_methods.size(); + if (number_of_authentication_methods > max_number_of_authentication_methods) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "User can not be created/updated because it exceeds the allowed quantity of authentication methods per user." + "Check the `max_authentication_methods_per_user` setting"); + } + } + + for (const auto & authentication_method : authentication_methods) + { + user.authentication_methods.emplace_back(authentication_method); + } + if (!query.alter) { for (const auto & authentication_method : user.authentication_methods) From b0a0988c5b324a49792b5dd201f7f9dec95aa831 Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Thu, 22 Aug 2024 10:46:44 +0800 Subject: [PATCH 380/844] change as request --- src/Core/SettingsChangesHistory.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/Core/SettingsChangesHistory.cpp b/src/Core/SettingsChangesHistory.cpp index 2415323b4a0..b9c18c88652 100644 --- a/src/Core/SettingsChangesHistory.cpp +++ b/src/Core/SettingsChangesHistory.cpp @@ -94,7 +94,8 @@ static std::initializer_list Date: Thu, 22 Aug 2024 09:03:02 +0000 Subject: [PATCH 381/844] Proper ErrorCodes --- .../ObjectStorage/HDFS/ReadBufferFromHDFS.cpp | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/src/Storages/ObjectStorage/HDFS/ReadBufferFromHDFS.cpp b/src/Storages/ObjectStorage/HDFS/ReadBufferFromHDFS.cpp index bf6f9db722c..d3379b90163 100644 --- a/src/Storages/ObjectStorage/HDFS/ReadBufferFromHDFS.cpp +++ b/src/Storages/ObjectStorage/HDFS/ReadBufferFromHDFS.cpp @@ -22,12 +22,12 @@ namespace DB namespace ErrorCodes { - extern const int NETWORK_ERROR; - extern const int CANNOT_OPEN_FILE; - extern const int CANNOT_SEEK_THROUGH_FILE; - extern const int SEEK_POSITION_OUT_OF_BOUND; - extern const int LOGICAL_ERROR; - extern const int UNKNOWN_FILE_SIZE; +extern const int HDFS_ERROR; +extern const int CANNOT_OPEN_FILE; +extern const int CANNOT_SEEK_THROUGH_FILE; +extern const int SEEK_POSITION_OUT_OF_BOUND; +extern const int LOGICAL_ERROR; +extern const int UNKNOWN_FILE_SIZE; } @@ -135,9 +135,12 @@ struct ReadBufferFromHDFS::ReadBufferFromHDFSImpl : public BufferWithOwnMemory Date: Thu, 22 Aug 2024 09:41:10 -0300 Subject: [PATCH 382/844] make max_number_of_authentication_methods=0 unlimited --- src/Access/AccessEntityIO.cpp | 2 +- src/Core/ServerSettings.h | 2 +- src/Interpreters/Access/InterpreterCreateUserQuery.cpp | 3 ++- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/src/Access/AccessEntityIO.cpp b/src/Access/AccessEntityIO.cpp index 53b073cb750..cc1b7eee807 100644 --- a/src/Access/AccessEntityIO.cpp +++ b/src/Access/AccessEntityIO.cpp @@ -82,7 +82,7 @@ AccessEntityPtr deserializeAccessEntityImpl(const String & definition) if (res) throw Exception(ErrorCodes::INCORRECT_ACCESS_ENTITY_DEFINITION, "Two access entities attached in the same file"); res = user = std::make_unique(); - InterpreterCreateUserQuery::updateUserFromQuery(*user, *create_user_query, /* allow_no_password = */ true, /* allow_plaintext_password = */ true, /* max_number_of_authentication_methods = */ std::numeric_limits::max()); + InterpreterCreateUserQuery::updateUserFromQuery(*user, *create_user_query, /* allow_no_password = */ true, /* allow_plaintext_password = */ true, /* max_number_of_authentication_methods = zero is unlimited*/ 0); } else if (auto * create_role_query = query->as()) { diff --git a/src/Core/ServerSettings.h b/src/Core/ServerSettings.h index ee5eefddc83..dab6401157d 100644 --- a/src/Core/ServerSettings.h +++ b/src/Core/ServerSettings.h @@ -116,7 +116,7 @@ namespace DB M(UInt64, max_part_num_to_warn, 100000lu, "If the number of parts is greater than this value, the server will create a warning that will displayed to user.", 0) \ M(UInt64, max_table_num_to_throw, 0lu, "If number of tables is greater than this value, server will throw an exception. 0 means no limitation. View, remote tables, dictionary, system tables are not counted. Only count table in Atomic/Ordinary/Replicated/Lazy database engine.", 0) \ M(UInt64, max_database_num_to_throw, 0lu, "If number of databases is greater than this value, server will throw an exception. 0 means no limitation.", 0) \ - M(UInt64, max_authentication_methods_per_user, 100, "The maximum number of authentication methods a user can be created with or altered. Changing this setting does not affect existing users.", 0) \ + M(UInt64, max_authentication_methods_per_user, 100, "The maximum number of authentication methods a user can be created with or altered. Changing this setting does not affect existing users. Zero means unlimited", 0) \ M(UInt64, concurrent_threads_soft_limit_num, 0, "Sets how many concurrent thread can be allocated before applying CPU pressure. Zero means unlimited.", 0) \ M(UInt64, concurrent_threads_soft_limit_ratio_to_cores, 0, "Same as concurrent_threads_soft_limit_num, but with ratio to cores.", 0) \ \ diff --git a/src/Interpreters/Access/InterpreterCreateUserQuery.cpp b/src/Interpreters/Access/InterpreterCreateUserQuery.cpp index 50d627bfe85..467de1e025c 100644 --- a/src/Interpreters/Access/InterpreterCreateUserQuery.cpp +++ b/src/Interpreters/Access/InterpreterCreateUserQuery.cpp @@ -90,7 +90,8 @@ namespace user.authentication_methods.emplace_back(backup_authentication_method); } - if (!authentication_methods.empty()) + // max_number_of_authentication_methods == 0 means unlimited + if (!authentication_methods.empty() && max_number_of_authentication_methods != 0) { // we only check if user exceeds the allowed quantity of authentication methods in case the create/alter query includes // authentication information. Otherwise, we can bypass this check to avoid blocking non-authentication related alters. From 91cceccb4b7a6e7b93c7fa41a777a64015271697 Mon Sep 17 00:00:00 2001 From: Arthur Passos Date: Thu, 22 Aug 2024 09:48:53 -0300 Subject: [PATCH 383/844] add setting docs --- .../server-configuration-parameters/settings.md | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/docs/en/operations/server-configuration-parameters/settings.md b/docs/en/operations/server-configuration-parameters/settings.md index 9fce83a0dc4..8a2cdeb3216 100644 --- a/docs/en/operations/server-configuration-parameters/settings.md +++ b/docs/en/operations/server-configuration-parameters/settings.md @@ -3121,3 +3121,15 @@ Default value: "default" **See Also** - [Workload Scheduling](/docs/en/operations/workload-scheduling.md) + +## max_authentication_methods_per_user {#max_authentication_methods_per_user} + +The maximum number of authentication methods a user can be created with or altered to. +Changing this setting does not affect existing users. Create/alter authentication related queries will fail if they exceed the limit specified in this setting. +Non authentication create/alter queries will succeed. + +Type: UInt64 + +Default value: 100 + +Zero means unlimited From 62ce2999ae43cc9bf2f27f762ed8202ebe5e2062 Mon Sep 17 00:00:00 2001 From: Arthur Passos Date: Thu, 22 Aug 2024 10:00:20 -0300 Subject: [PATCH 384/844] minor grammar --- docs/en/operations/server-configuration-parameters/settings.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/operations/server-configuration-parameters/settings.md b/docs/en/operations/server-configuration-parameters/settings.md index 8a2cdeb3216..ff41f501689 100644 --- a/docs/en/operations/server-configuration-parameters/settings.md +++ b/docs/en/operations/server-configuration-parameters/settings.md @@ -3125,7 +3125,7 @@ Default value: "default" ## max_authentication_methods_per_user {#max_authentication_methods_per_user} The maximum number of authentication methods a user can be created with or altered to. -Changing this setting does not affect existing users. Create/alter authentication related queries will fail if they exceed the limit specified in this setting. +Changing this setting does not affect existing users. Create/alter authentication-related queries will fail if they exceed the limit specified in this setting. Non authentication create/alter queries will succeed. Type: UInt64 From b4a0a9b00cafc86399c62c244b8ada6e0f8d2db4 Mon Sep 17 00:00:00 2001 From: Yatsishin Ilya <2159081+qoega@users.noreply.github.com> Date: Thu, 22 Aug 2024 16:58:19 +0000 Subject: [PATCH 385/844] rework test_async_insert_adaptive_busy_timeout --- .../test.py | 55 ++++++++++--------- .../test_with_table_engine_grant.py | 1 + 2 files changed, 31 insertions(+), 25 deletions(-) diff --git a/tests/integration/test_async_insert_adaptive_busy_timeout/test.py b/tests/integration/test_async_insert_adaptive_busy_timeout/test.py index 0ea076b1468..5599786026f 100644 --- a/tests/integration/test_async_insert_adaptive_busy_timeout/test.py +++ b/tests/integration/test_async_insert_adaptive_busy_timeout/test.py @@ -104,7 +104,7 @@ def test_with_merge_tree(): _insert_queries_sequentially( table_name, _query_settings, - iterations=100, + iterations=10, max_values_size=1000, array_size_range=[10, 50], ) @@ -125,7 +125,7 @@ def test_with_merge_tree_multithread(): table_name, _query_settings, thread_num=15, - tasks=1000, + tasks=100, max_values_size=1000, array_size_range=[10, 15], ) @@ -152,7 +152,7 @@ def test_with_replicated_merge_tree(): _insert_queries_sequentially( table_name, settings, - iterations=100, + iterations=10, max_values_size=1000, array_size_range=[10, 50], ) @@ -180,7 +180,7 @@ def test_with_replicated_merge_tree_multithread(): table_name, _query_settings, thread_num=15, - tasks=1000, + tasks=100, max_values_size=1000, array_size_range=[10, 15], ) @@ -200,13 +200,13 @@ def test_compare_sequential_inserts_durations_for_adaptive_and_fixed_async_timeo fixed_tm_settings = copy.copy(_query_settings) fixed_tm_settings["async_insert_use_adaptive_busy_timeout"] = 0 - fixed_tm_settings["async_insert_busy_timeout_ms"] = 200 + fixed_tm_settings["async_insert_busy_timeout_ms"] = 100 fixed_tm_run_duration = timeit.timeit( lambda: _insert_queries_sequentially( fixed_tm_table_name, fixed_tm_settings, - iterations=100, + iterations=50, max_values_size=1000, array_size_range=[10, 50], ), @@ -231,13 +231,13 @@ def test_compare_sequential_inserts_durations_for_adaptive_and_fixed_async_timeo adaptive_tm_settings = copy.copy(_query_settings) adaptive_tm_settings["async_insert_busy_timeout_min_ms"] = 10 - adaptive_tm_settings["async_insert_busy_timeout_max_ms"] = 1000 + adaptive_tm_settings["async_insert_busy_timeout_max_ms"] = 500 adaptive_tm_run_duration = timeit.timeit( lambda: _insert_queries_sequentially( adaptive_tm_table_name, adaptive_tm_settings, - iterations=100, + iterations=50, max_values_size=1000, array_size_range=[10, 50], ), @@ -268,14 +268,14 @@ def test_compare_parallel_inserts_durations_for_adaptive_and_fixed_async_timeout fixed_tm_settings = copy.copy(_query_settings) fixed_tm_settings["async_insert_use_adaptive_busy_timeout"] = 0 - fixed_tm_settings["async_insert_busy_timeout_ms"] = 200 + fixed_tm_settings["async_insert_busy_timeout_ms"] = 500 fixed_tm_run_duration = timeit.timeit( lambda: _insert_queries_in_parallel( fixed_tm_table_name, fixed_tm_settings, thread_num=15, - tasks=1000, + tasks=150, max_values_size=1000, array_size_range=[10, 50], ), @@ -300,14 +300,14 @@ def test_compare_parallel_inserts_durations_for_adaptive_and_fixed_async_timeout adaptive_tm_settings = copy.copy(_query_settings) adaptive_tm_settings["async_insert_busy_timeout_min_ms"] = 10 - adaptive_tm_settings["async_insert_busy_timeout_max_ms"] = 200 + adaptive_tm_settings["async_insert_busy_timeout_max_ms"] = 500 adaptive_tm_run_duration = timeit.timeit( lambda: _insert_queries_in_parallel( adaptive_tm_table_name, adaptive_tm_settings, thread_num=15, - tasks=100, + tasks=150, max_values_size=1000, array_size_range=[10, 50], ), @@ -344,29 +344,34 @@ def test_change_queries_frequency(): settings = copy.copy(_query_settings) min_ms = 50 + max_ms = 200 + settings["async_insert_busy_timeout_min_ms"] = min_ms - settings["async_insert_busy_timeout_max_ms"] = 2000 + settings["async_insert_busy_timeout_max_ms"] = max_ms + + _insert_queries_sequentially( + table_name, + settings, + iterations=50, + max_values_size=1000, + array_size_range=[10, 50], + ) + + select_log_query = f"SELECT countIf(timeout_milliseconds - {min_ms} < 25) FROM (SELECT timeout_milliseconds FROM system.asynchronous_insert_log ORDER BY event_time DESC LIMIT 10)" + res = node.query(select_log_query) + assert int(res) >= 5 _insert_queries_in_parallel( table_name, settings, - thread_num=15, + thread_num=20, tasks=2000, max_values_size=1000, array_size_range=[10, 15], ) - _insert_queries_sequentially( - table_name, - settings, - iterations=200, - max_values_size=1000, - array_size_range=[10, 50], - ) - - select_log_query = "SELECT timeout_milliseconds FROM system.asynchronous_insert_log ORDER BY event_time DESC LIMIT 50" + select_log_query = f"SELECT countIf({max_ms} - timeout_milliseconds < 100) FROM (SELECT timeout_milliseconds FROM system.asynchronous_insert_log ORDER BY event_time DESC LIMIT 10)" res = node.query(select_log_query) - for line in res.splitlines(): - assert int(line) == min_ms + assert int(res) >= 5 node.query("DROP TABLE IF EXISTS {} SYNC".format(table_name)) diff --git a/tests/integration/test_grant_and_revoke/test_with_table_engine_grant.py b/tests/integration/test_grant_and_revoke/test_with_table_engine_grant.py index 5fc8f67b75b..ad3d35d8bbd 100644 --- a/tests/integration/test_grant_and_revoke/test_with_table_engine_grant.py +++ b/tests/integration/test_grant_and_revoke/test_with_table_engine_grant.py @@ -532,6 +532,7 @@ def test_current_database(): assert "Not enough privileges" in instance.query_and_get_error( "SELECT * FROM table", user="A" ) + instance.query("DROP TABLE default.table SYNC") def test_grant_with_replace_option(): From e6b18f2b300c7aec883d60619f432f7e4b4cca3a Mon Sep 17 00:00:00 2001 From: Yatsishin Ilya <2159081+qoega@users.noreply.github.com> Date: Thu, 22 Aug 2024 19:05:07 +0000 Subject: [PATCH 386/844] black --- .../test_postgresql_replica_database_engine_2/test.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/integration/test_postgresql_replica_database_engine_2/test.py b/tests/integration/test_postgresql_replica_database_engine_2/test.py index 428e9ecdb96..0a364d7802b 100644 --- a/tests/integration/test_postgresql_replica_database_engine_2/test.py +++ b/tests/integration/test_postgresql_replica_database_engine_2/test.py @@ -61,6 +61,7 @@ pg_manager = PostgresManager() pg_manager2 = PostgresManager() pg_manager_instance2 = PostgresManager() + @pytest.fixture(scope="module") def started_cluster(): try: @@ -918,7 +919,7 @@ def test_failed_load_from_snapshot(started_cluster): def test_symbols_in_publication_name(started_cluster): id = uuid.uuid4() - db = f'test_{id}' + db = f"test_{id}" table = f"test_symbols_in_publication_name" pg_manager3 = PostgresManager() From 1c6976d7a529f8b4f910e0913cb8fd71d0466f81 Mon Sep 17 00:00:00 2001 From: joelynch Date: Fri, 23 Aug 2024 16:36:27 +0200 Subject: [PATCH 387/844] Fix zero copy bug with encrypted disk and UNFREEZE When running UNFREEZE with encrypted disk, zookeeper metadata would be erroneously removed here src/Storages/StorageReplicatedMergeTree.cpp#L10418. --- src/Disks/DiskEncrypted.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/Disks/DiskEncrypted.h b/src/Disks/DiskEncrypted.h index f06f5ba8e17..9818c284009 100644 --- a/src/Disks/DiskEncrypted.h +++ b/src/Disks/DiskEncrypted.h @@ -350,6 +350,11 @@ public: return delegate; } + UInt32 getRefCount(const String & path) const override { + auto wrapped_path = wrappedPath(path); + return delegate->getRefCount(wrapped_path); + } + #if USE_AWS_S3 std::shared_ptr getS3StorageClient() const override { From cb7ef910e2f3a165c3ab48610fac9c9e1059d7b6 Mon Sep 17 00:00:00 2001 From: Arthur Passos Date: Fri, 23 Aug 2024 13:50:38 -0300 Subject: [PATCH 388/844] Update 03174_multiple_authentication_methods.sh --- .../0_stateless/03174_multiple_authentication_methods.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/03174_multiple_authentication_methods.sh b/tests/queries/0_stateless/03174_multiple_authentication_methods.sh index 7fe91f409ac..310187c62fc 100755 --- a/tests/queries/0_stateless/03174_multiple_authentication_methods.sh +++ b/tests/queries/0_stateless/03174_multiple_authentication_methods.sh @@ -1,5 +1,6 @@ #!/usr/bin/env bash -# Tags: no-fasttest, no-parallel +# Tags: no-fasttest, no-parallel, no-replicated-database +# Tag no-replicated-database: https://s3.amazonaws.com/clickhouse-test-reports/65277/43e9a7ba4bbf7f20145531b384a31304895b55bc/stateless_tests__release__old_analyzer__s3__databasereplicated__[1_2].html and https://github.com/ClickHouse/ClickHouse/blob/011c694117845500c82f9563c65930429979982f/tests/queries/0_stateless/01175_distributed_ddl_output_mode_long.sh#L4 CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh From 66be44e7aa719681b7f13e31a91f2c105e705a4f Mon Sep 17 00:00:00 2001 From: Yatsishin Ilya <2159081+qoega@users.noreply.github.com> Date: Fri, 23 Aug 2024 19:09:09 +0000 Subject: [PATCH 389/844] better --- tests/ci/integration_tests_runner.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tests/ci/integration_tests_runner.py b/tests/ci/integration_tests_runner.py index 074c947fe02..deddcb95cf4 100755 --- a/tests/ci/integration_tests_runner.py +++ b/tests/ci/integration_tests_runner.py @@ -33,7 +33,7 @@ CLICKHOUSE_BINARY_PATH = "usr/bin/clickhouse" CLICKHOUSE_ODBC_BRIDGE_BINARY_PATH = "usr/bin/clickhouse-odbc-bridge" CLICKHOUSE_LIBRARY_BRIDGE_BINARY_PATH = "usr/bin/clickhouse-library-bridge" -FLAKY_TRIES_COUNT = 10 # run whole pytest several times +FLAKY_TRIES_COUNT = 5 # run whole pytest several times FLAKY_REPEAT_COUNT = 5 # runs test case in single module several times MAX_TIME_SECONDS = 3600 @@ -794,7 +794,7 @@ class ClickhouseIntegrationTestsRunner: } # type: Dict tests_times = defaultdict(float) # type: Dict tests_log_paths = defaultdict(list) - + id_counter = 0 for test_to_run in tests_to_run: tries_num = 1 if should_fail else FLAKY_TRIES_COUNT for i in range(tries_num): @@ -805,12 +805,13 @@ class ClickhouseIntegrationTestsRunner: logging.info("Running tests for the %s time", i) group_counters, group_test_times, log_paths = self.try_run_test_group( repo_path, - "bugfix" if should_fail else "flaky", + f"bugfix_{id_counter}" if should_fail else f"flaky{id_counter}", [test_to_run], 1, 1, FLAKY_REPEAT_COUNT, ) + id_counter = id_counter + 1 for counter, value in group_counters.items(): logging.info( "Tests from group %s stats, %s count %s", From 83fa51f400fba43cd4bb961419f7dd572ec240a0 Mon Sep 17 00:00:00 2001 From: Alexey Korepanov Date: Fri, 23 Aug 2024 21:42:54 +0200 Subject: [PATCH 390/844] PR fixes: update documentation on JSONCompactWithProgress format --- docs/en/interfaces/formats.md | 38 ++++++++++++++++++++++++++++++++++- 1 file changed, 37 insertions(+), 1 deletion(-) diff --git a/docs/en/interfaces/formats.md b/docs/en/interfaces/formats.md index 8795b71e0ac..f2ca803453e 100644 --- a/docs/en/interfaces/formats.md +++ b/docs/en/interfaces/formats.md @@ -930,7 +930,43 @@ Columns that are not present in the block will be filled with default values (yo In this format, ClickHouse outputs each row as a separated, newline-delimited JSON Object. -Each row is either a metadata object, data object, progress information or statistics object. +Each row is either a metadata object, data object, progress information or statistics object: + +1. **Metadata Object (`meta`)** + - Describes the structure of the data rows. + - Fields: `name` (column name), `type` (data type, e.g., `UInt32`, `String`, etc.). + - Example: `{"meta": [{"name":"id", "type":"UInt32"}, {"name":"name", "type":"String"}]}` + - Appears before any data objects. + +2. **Data Object (`data`)** + - Represents a row of query results. + - Fields: An array with values corresponding to the columns defined in the metadata. + - Example: `{"data":["1", "John Doe"]}` + - Appears after the metadata object, one per row. + +3. **Progress Information Object (`progress`)** + - Provides real-time progress feedback during query execution. + - Fields: `read_rows`, `read_bytes`, `written_rows`, `written_bytes`, `total_rows_to_read`, `result_rows`, `result_bytes`, `elapsed_ns`. + - Example: `{"progress":{"read_rows":"8","read_bytes":"168"}}` + - May appear intermittently. + +4. **Statistics Object (`statistics`)** + - Summarizes query execution statistics. + - Fields: `rows`, `rows_before_limit_at_least`, `elapsed`, `rows_read`, `bytes_read`. + - Example: `{"statistics": {"rows":2, "elapsed":0.001995, "rows_read":8}}` + - Appears at the end. + +5. **Exception Object (`exception`)** + - Represents an error that occurred during query execution. + - Fields: A single text field containing the error message. + - Example: `{"exception": "Code: 395. DB::Exception: Value passed to 'throwIf' function is non-zero..."}` + - Appears when an error is encountered. + +6. **Totals Object (`totals`)** + - Provides the totals for each numeric column in the result set. + - Fields: An array with total values corresponding to the columns defined in the metadata. + - Example: `{"totals": ["", "3"]}` + - Appears at the end of the data rows, if applicable. Example: From d12aac7d12de9632e4a97ae7240a0e0dabe5ef59 Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Fri, 23 Aug 2024 20:11:33 +0000 Subject: [PATCH 391/844] Test 03228_pr_subquery_view_order_by PR + subquery with view and order by --- .../03228_pr_subquery_view_order_by.reference | 20 +++++++++++++++++++ .../03228_pr_subquery_view_order_by.sql | 18 +++++++++++++++++ 2 files changed, 38 insertions(+) create mode 100644 tests/queries/0_stateless/03228_pr_subquery_view_order_by.reference create mode 100644 tests/queries/0_stateless/03228_pr_subquery_view_order_by.sql diff --git a/tests/queries/0_stateless/03228_pr_subquery_view_order_by.reference b/tests/queries/0_stateless/03228_pr_subquery_view_order_by.reference new file mode 100644 index 00000000000..aad720e27cf --- /dev/null +++ b/tests/queries/0_stateless/03228_pr_subquery_view_order_by.reference @@ -0,0 +1,20 @@ +300 +299 +298 +297 +296 +295 +294 +293 +292 +291 +290 +289 +288 +287 +286 +285 +284 +283 +282 +281 diff --git a/tests/queries/0_stateless/03228_pr_subquery_view_order_by.sql b/tests/queries/0_stateless/03228_pr_subquery_view_order_by.sql new file mode 100644 index 00000000000..b85392e0521 --- /dev/null +++ b/tests/queries/0_stateless/03228_pr_subquery_view_order_by.sql @@ -0,0 +1,18 @@ +DROP TABLE IF EXISTS view1; +DROP TABLE IF EXISTS table1; +CREATE TABLE table1 (number UInt64) ENGINE=MergeTree ORDER BY number SETTINGS index_granularity=1; +INSERT INTO table1 SELECT number FROM numbers(1, 300); +CREATE VIEW view1 AS SELECT number FROM table1; + +SELECT * +FROM +( + SELECT * + FROM view1 +) +ORDER BY number DESC +LIMIT 20 +SETTINGS cluster_for_parallel_replicas = 'parallel_replicas', allow_experimental_parallel_reading_from_replicas = 1, max_parallel_replicas = 3, parallel_replicas_for_non_replicated_merge_tree = 1, parallel_replicas_local_plan = 1, query_plan_lift_up_union = 0; + +DROP TABLE view1; +DROP TABLE table1; From b772140514aa47ada3556f1d24712eae734a5645 Mon Sep 17 00:00:00 2001 From: Alexey Korepanov Date: Fri, 23 Aug 2024 22:56:24 +0200 Subject: [PATCH 392/844] Fix PR comments: check exception in the test --- .../0_stateless/03174_json_compact_with_progress.reference | 2 ++ tests/queries/0_stateless/03174_json_compact_with_progress.sh | 4 ++++ 2 files changed, 6 insertions(+) diff --git a/tests/queries/0_stateless/03174_json_compact_with_progress.reference b/tests/queries/0_stateless/03174_json_compact_with_progress.reference index b45f296e5c0..fe51e484bc1 100644 --- a/tests/queries/0_stateless/03174_json_compact_with_progress.reference +++ b/tests/queries/0_stateless/03174_json_compact_with_progress.reference @@ -11,3 +11,5 @@ {"data":["c", "1"]} {"totals": ["", "3"]} {"statistics": {"rows":3, "elapsed":ELAPSED, "rows_read":3, "bytes_read":30}} +3 +Value passed to 'throwIf' function is non-zero: diff --git a/tests/queries/0_stateless/03174_json_compact_with_progress.sh b/tests/queries/0_stateless/03174_json_compact_with_progress.sh index 8f524cdff76..b15dc7cfdb2 100755 --- a/tests/queries/0_stateless/03174_json_compact_with_progress.sh +++ b/tests/queries/0_stateless/03174_json_compact_with_progress.sh @@ -16,4 +16,8 @@ $CLICKHOUSE_CLIENT -q "SELECT 2;" # Check Totals $CLICKHOUSE_CLIENT -q "SELECT name, count() AS c FROM test_table GROUP BY name WITH TOTALS ORDER BY name FORMAT JSONCompactWithProgress settings max_block_size=2;" | grep -v --text "progress" | sed -E 's/"elapsed":[0-9]+\.[0-9]+/"elapsed":ELAPSED/g' +$CLICKHOUSE_CLIENT -q "SELECT 3;" +# Check exceptions +${CLICKHOUSE_CURL} -sS "$CLICKHOUSE_URL" -d "SELECT throwIf(number = 15), 1::Int64 as a, '\"' from numbers(100) format JSONCompactWithProgress settings output_format_json_quote_64bit_integers=1, max_block_size=10" | grep "exception" | cut -c42-88 + $CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS test_table;" From 06f0267ed142d76109b2d6d48d3c413bdaf0fc17 Mon Sep 17 00:00:00 2001 From: Alexey Korepanov Date: Sun, 25 Aug 2024 11:27:43 +0200 Subject: [PATCH 393/844] Do not show statistics if exception happens --- ...JSONCompactWithProgressRowOutputFormat.cpp | 22 +++++++++---------- 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/src/Processors/Formats/Impl/JSONCompactWithProgressRowOutputFormat.cpp b/src/Processors/Formats/Impl/JSONCompactWithProgressRowOutputFormat.cpp index 0814e5f45d6..cb3398196f4 100644 --- a/src/Processors/Formats/Impl/JSONCompactWithProgressRowOutputFormat.cpp +++ b/src/Processors/Formats/Impl/JSONCompactWithProgressRowOutputFormat.cpp @@ -117,18 +117,16 @@ void JSONCompactWithProgressRowOutputFormat::writeProgress() void JSONCompactWithProgressRowOutputFormat::finalizeImpl() { - JSONUtils::writeCompactAdditionalInfo( - row_count, - statistics.rows_before_limit, - statistics.applied_limit, - statistics.watch, - statistics.progress, - settings.write_statistics && exception_message.empty(), - *ostr); - - if (!exception_message.empty()) - { - writeCString("\n", *ostr); + if (exception_message.empty()) { + JSONUtils::writeCompactAdditionalInfo( + row_count, + statistics.rows_before_limit, + statistics.applied_limit, + statistics.watch, + statistics.progress, + settings.write_statistics && exception_message.empty(), + *ostr); + } else { JSONUtils::writeCompactObjectStart(*ostr); JSONUtils::writeException(exception_message, *ostr, settings, 0); JSONUtils::writeCompactObjectEnd(*ostr); From 58aab9b8c789090834dd1db7bb00568db7b66cac Mon Sep 17 00:00:00 2001 From: Alexey Korepanov Date: Sun, 25 Aug 2024 11:29:07 +0200 Subject: [PATCH 394/844] Remove wrong bracket in the meta section --- .../Formats/Impl/JSONCompactWithProgressRowOutputFormat.cpp | 2 +- .../0_stateless/03174_json_compact_with_progress.reference | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Processors/Formats/Impl/JSONCompactWithProgressRowOutputFormat.cpp b/src/Processors/Formats/Impl/JSONCompactWithProgressRowOutputFormat.cpp index cb3398196f4..b5cbd6fb232 100644 --- a/src/Processors/Formats/Impl/JSONCompactWithProgressRowOutputFormat.cpp +++ b/src/Processors/Formats/Impl/JSONCompactWithProgressRowOutputFormat.cpp @@ -19,7 +19,7 @@ void JSONCompactWithProgressRowOutputFormat::writePrefix() JSONUtils::writeCompactObjectStart(*ostr); JSONUtils::writeCompactMetadata(names, types, settings, *ostr); JSONUtils::writeCompactObjectEnd(*ostr); - writeCString("}\n", *ostr); + writeCString("\n", *ostr); } void JSONCompactWithProgressRowOutputFormat::writeField(const IColumn & column, const ISerialization & serialization, size_t row_num) diff --git a/tests/queries/0_stateless/03174_json_compact_with_progress.reference b/tests/queries/0_stateless/03174_json_compact_with_progress.reference index fe51e484bc1..cdbe7cfcb3e 100644 --- a/tests/queries/0_stateless/03174_json_compact_with_progress.reference +++ b/tests/queries/0_stateless/03174_json_compact_with_progress.reference @@ -1,11 +1,11 @@ 1 -{"meta": [{"name":"value", "type":"UInt8"}, {"name":"name", "type":"String"}]}} +{"meta": [{"name":"value", "type":"UInt8"}, {"name":"name", "type":"String"}]} {"data":[1, "a"]} {"data":[2, "b"]} {"data":[3, "c"]} {"statistics": {"rows":3, "elapsed":ELAPSED, "rows_read":3, "bytes_read":33}} 2 -{"meta": [{"name":"name", "type":"String"}, {"name":"c", "type":"UInt64"}]}} +{"meta": [{"name":"name", "type":"String"}, {"name":"c", "type":"UInt64"}]} {"data":["a", "1"]} {"data":["b", "1"]} {"data":["c", "1"]} From af3f600b5cca7cadb304e0f69fc281b7015deca9 Mon Sep 17 00:00:00 2001 From: Alexey Korepanov Date: Sun, 25 Aug 2024 11:35:35 +0200 Subject: [PATCH 395/844] Fix commas in the statistics section --- src/Formats/JSONUtils.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Formats/JSONUtils.cpp b/src/Formats/JSONUtils.cpp index 0918658cf6f..6c2c6fc3700 100644 --- a/src/Formats/JSONUtils.cpp +++ b/src/Formats/JSONUtils.cpp @@ -563,17 +563,17 @@ namespace JSONUtils writeCompactObjectStart(out, 0, "statistics"); writeTitle("rows", out, 0, ""); writeIntText(rows, out); - writeFieldCompactDelimiter(out); if (applied_limit) { + writeFieldCompactDelimiter(out); writeTitle("rows_before_limit_at_least", out, 0, ""); writeIntText(rows_before_limit, out); - writeFieldCompactDelimiter(out); } if (write_statistics) { + writeFieldCompactDelimiter(out); writeTitle("elapsed", out, 0, ""); writeText(watch.elapsedSeconds(), out); writeFieldCompactDelimiter(out); From 0e4f2b17cc75a1f09994ad583df69fd067e582f8 Mon Sep 17 00:00:00 2001 From: Alexey Korepanov Date: Sun, 25 Aug 2024 11:38:29 +0200 Subject: [PATCH 396/844] Remove unnecessary check --- .../Formats/Impl/JSONCompactWithProgressRowOutputFormat.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Processors/Formats/Impl/JSONCompactWithProgressRowOutputFormat.cpp b/src/Processors/Formats/Impl/JSONCompactWithProgressRowOutputFormat.cpp index b5cbd6fb232..d1d7355724f 100644 --- a/src/Processors/Formats/Impl/JSONCompactWithProgressRowOutputFormat.cpp +++ b/src/Processors/Formats/Impl/JSONCompactWithProgressRowOutputFormat.cpp @@ -124,7 +124,7 @@ void JSONCompactWithProgressRowOutputFormat::finalizeImpl() statistics.applied_limit, statistics.watch, statistics.progress, - settings.write_statistics && exception_message.empty(), + settings.write_statistics, *ostr); } else { JSONUtils::writeCompactObjectStart(*ostr); From 312d32aaf972e01a88fceb2fea60fce424bfc49f Mon Sep 17 00:00:00 2001 From: Alexey Korepanov Date: Sun, 25 Aug 2024 12:14:18 +0200 Subject: [PATCH 397/844] Fix formatting --- .../Impl/JSONCompactWithProgressRowOutputFormat.cpp | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/Processors/Formats/Impl/JSONCompactWithProgressRowOutputFormat.cpp b/src/Processors/Formats/Impl/JSONCompactWithProgressRowOutputFormat.cpp index d1d7355724f..e90864ecdf3 100644 --- a/src/Processors/Formats/Impl/JSONCompactWithProgressRowOutputFormat.cpp +++ b/src/Processors/Formats/Impl/JSONCompactWithProgressRowOutputFormat.cpp @@ -117,7 +117,8 @@ void JSONCompactWithProgressRowOutputFormat::writeProgress() void JSONCompactWithProgressRowOutputFormat::finalizeImpl() { - if (exception_message.empty()) { + if (exception_message.empty()) + { JSONUtils::writeCompactAdditionalInfo( row_count, statistics.rows_before_limit, @@ -126,7 +127,9 @@ void JSONCompactWithProgressRowOutputFormat::finalizeImpl() statistics.progress, settings.write_statistics, *ostr); - } else { + } + else + { JSONUtils::writeCompactObjectStart(*ostr); JSONUtils::writeException(exception_message, *ostr, settings, 0); JSONUtils::writeCompactObjectEnd(*ostr); From 1011f8ef9c2511654937c96b7bea7693a4c9715f Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Mon, 26 Aug 2024 14:45:41 +0800 Subject: [PATCH 398/844] add uts about orc string encode --- src/Common/CMakeLists.txt | 4 +- .../benchmarks/orc_string_dictionary.cpp | 302 ++++++++++++++++++ 2 files changed, 304 insertions(+), 2 deletions(-) create mode 100644 src/Common/benchmarks/orc_string_dictionary.cpp diff --git a/src/Common/CMakeLists.txt b/src/Common/CMakeLists.txt index d4802c28f53..7cc8dd25cd9 100644 --- a/src/Common/CMakeLists.txt +++ b/src/Common/CMakeLists.txt @@ -1,6 +1,6 @@ -if (ENABLE_BENCHMARKS) +#if (ENABLE_BENCHMARKS) add_subdirectory(benchmarks) -endif() +#endif() if (ENABLE_EXAMPLES) add_subdirectory(examples) diff --git a/src/Common/benchmarks/orc_string_dictionary.cpp b/src/Common/benchmarks/orc_string_dictionary.cpp new file mode 100644 index 00000000000..bde7d03e864 --- /dev/null +++ b/src/Common/benchmarks/orc_string_dictionary.cpp @@ -0,0 +1,302 @@ +#include +#include +#include + +class OldSortedStringDictionary +{ +public: + struct DictEntry + { + DictEntry(const char * str, size_t len) : data(str), length(len) { } + const char * data; + size_t length; + }; + + OldSortedStringDictionary() : totalLength(0) { } + + // insert a new string into dictionary, return its insertion order + size_t insert(const char * str, size_t len); + + // reorder input index buffer from insertion order to dictionary order + void reorder(std::vector & idxBuffer) const; + + // get dict entries in insertion order + void getEntriesInInsertionOrder(std::vector &) const; + + size_t size() const; + + // return total length of strings in the dictioanry + uint64_t length() const; + + void clear(); + + // store indexes of insertion order in the dictionary for not-null rows + std::vector idxInDictBuffer; + +private: + struct LessThan + { + bool operator()(const DictEntry & left, const DictEntry & right) const + { + int ret = memcmp(left.data, right.data, std::min(left.length, right.length)); + if (ret != 0) + { + return ret < 0; + } + return left.length < right.length; + } + }; + + std::map dict; + std::vector> data; + uint64_t totalLength; +}; + +// insert a new string into dictionary, return its insertion order +size_t OldSortedStringDictionary::insert(const char * str, size_t len) +{ + auto ret = dict.insert({DictEntry(str, len), dict.size()}); + if (ret.second) + { + // make a copy to internal storage + data.push_back(std::vector(len)); + memcpy(data.back().data(), str, len); + // update dictionary entry to link pointer to internal storage + DictEntry * entry = const_cast(&(ret.first->first)); + entry->data = data.back().data(); + totalLength += len; + } + return ret.first->second; +} + +/** + * Reorder input index buffer from insertion order to dictionary order + * + * We require this function because string values are buffered by indexes + * in their insertion order. Until the entire dictionary is complete can + * we get their sorted indexes in the dictionary in that ORC specification + * demands dictionary should be ordered. Therefore this function transforms + * the indexes from insertion order to dictionary value order for final + * output. + */ +void OldSortedStringDictionary::reorder(std::vector & idxBuffer) const +{ + // iterate the dictionary to get mapping from insertion order to value order + std::vector mapping(dict.size()); + size_t dictIdx = 0; + for (auto it = dict.cbegin(); it != dict.cend(); ++it) + { + mapping[it->second] = dictIdx++; + } + + // do the transformation + for (size_t i = 0; i != idxBuffer.size(); ++i) + { + idxBuffer[i] = static_cast(mapping[static_cast(idxBuffer[i])]); + } +} + +// get dict entries in insertion order +void OldSortedStringDictionary::getEntriesInInsertionOrder(std::vector & entries) const +{ + entries.resize(dict.size()); + for (auto it = dict.cbegin(); it != dict.cend(); ++it) + { + entries[it->second] = &(it->first); + } +} + +// return count of entries +size_t OldSortedStringDictionary::size() const +{ + return dict.size(); +} + +// return total length of strings in the dictioanry +uint64_t OldSortedStringDictionary::length() const +{ + return totalLength; +} + +void OldSortedStringDictionary::clear() +{ + totalLength = 0; + data.clear(); + dict.clear(); +} + + +/** + * Implementation of increasing sorted string dictionary + */ +class NewSortedStringDictionary +{ +public: + struct DictEntry + { + DictEntry(const char * str, size_t len) : data(str), length(len) { } + const char * data; + size_t length; + }; + + struct DictEntryWithIndex + { + DictEntryWithIndex(const char * str, size_t len, size_t index_) : entry(str, len), index(index_) { } + DictEntry entry; + size_t index; + }; + + NewSortedStringDictionary() : totalLength_(0) { } + + // insert a new string into dictionary, return its insertion order + size_t insert(const char * str, size_t len); + + // reorder input index buffer from insertion order to dictionary order + void reorder(std::vector & idxBuffer) const; + + // get dict entries in insertion order + void getEntriesInInsertionOrder(std::vector &) const; + + // return count of entries + size_t size() const; + + // return total length of strings in the dictioanry + uint64_t length() const; + + void clear(); + + // store indexes of insertion order in the dictionary for not-null rows + std::vector idxInDictBuffer; + +private: + struct LessThan + { + bool operator()(const DictEntryWithIndex & l, const DictEntryWithIndex & r) + { + const auto & left = l.entry; + const auto & right = r.entry; + int ret = memcmp(left.data, right.data, std::min(left.length, right.length)); + if (ret != 0) + { + return ret < 0; + } + return left.length < right.length; + } + }; + + mutable std::vector flatDict_; + std::unordered_map keyToIndex; + uint64_t totalLength_; +}; + +// insert a new string into dictionary, return its insertion order +size_t NewSortedStringDictionary::insert(const char * str, size_t len) +{ + size_t index = flatDict_.size(); + auto ret = keyToIndex.emplace(std::string(str, len), index); + if (ret.second) + { + flatDict_.emplace_back(ret.first->first.data(), ret.first->first.size(), index); + totalLength_ += len; + } + return ret.first->second; +} + +/** + * Reorder input index buffer from insertion order to dictionary order + * + * We require this function because string values are buffered by indexes + * in their insertion order. Until the entire dictionary is complete can + * we get their sorted indexes in the dictionary in that ORC specification + * demands dictionary should be ordered. Therefore this function transforms + * the indexes from insertion order to dictionary value order for final + * output. + */ +void NewSortedStringDictionary::reorder(std::vector & idxBuffer) const +{ + // iterate the dictionary to get mapping from insertion order to value order + std::vector mapping(flatDict_.size()); + for (size_t i = 0; i < flatDict_.size(); ++i) + { + mapping[flatDict_[i].index] = i; + } + + // do the transformation + for (size_t i = 0; i != idxBuffer.size(); ++i) + { + idxBuffer[i] = static_cast(mapping[static_cast(idxBuffer[i])]); + } +} + +// get dict entries in insertion order +void NewSortedStringDictionary::getEntriesInInsertionOrder(std::vector & entries) const +{ + std::sort( + flatDict_.begin(), + flatDict_.end(), + [](const DictEntryWithIndex & left, const DictEntryWithIndex & right) { return left.index < right.index; }); + + entries.resize(flatDict_.size()); + for (size_t i = 0; i < flatDict_.size(); ++i) + { + entries[i] = &(flatDict_[i].entry); + } +} + +// return count of entries +size_t NewSortedStringDictionary::size() const +{ + return flatDict_.size(); +} + +// return total length of strings in the dictioanry +uint64_t NewSortedStringDictionary::length() const +{ + return totalLength_; +} + +void NewSortedStringDictionary::clear() +{ + totalLength_ = 0; + keyToIndex.clear(); + flatDict_.clear(); +} + +static std::vector mockStrings() +{ + std::vector res(1000000); + for (auto & s : res) + { + s = "test string dictionary " + std::to_string(rand() % 100); + } + return res; +} + +template +static NO_INLINE std::unique_ptr createAndWriteStringDictionary(const std::vector & strs) +{ + auto dict = std::make_unique(); + for (const auto & str : strs) + { + auto index = dict->insert(str.data(), str.size()); + dict->idxInDictBuffer.push_back(index); + } + dict->reorder(dict->idxInDictBuffer); + + return dict; +} + +template +static void BM_writeStringDictionary(benchmark::State & state) +{ + auto strs = mockStrings(); + for (auto _ : state) + { + auto dict = createAndWriteStringDictionary(strs); + benchmark::DoNotOptimize(dict); + } +} + +BENCHMARK_TEMPLATE(BM_writeStringDictionary, OldSortedStringDictionary); +BENCHMARK_TEMPLATE(BM_writeStringDictionary, NewSortedStringDictionary); + From d6df83d561b7d123d1bc854179a7c5435db7081c Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Mon, 26 Aug 2024 14:57:51 +0800 Subject: [PATCH 399/844] add uts about orc string encode --- src/Common/benchmarks/CMakeLists.txt | 7 ++++++- .../benchmarks/orc_string_dictionary.cpp | 19 ++++++++++++++----- 2 files changed, 20 insertions(+), 6 deletions(-) diff --git a/src/Common/benchmarks/CMakeLists.txt b/src/Common/benchmarks/CMakeLists.txt index 57ed837db8b..690ced1eb88 100644 --- a/src/Common/benchmarks/CMakeLists.txt +++ b/src/Common/benchmarks/CMakeLists.txt @@ -1,4 +1,4 @@ -clickhouse_add_executable(integer_hash_tables_and_hashes integer_hash_tables_and_hashes.cpp) +clickhouse_add_executable(integer_hash_tables_and_hashes integer_hash_tables_and_hashes.cpp orc_string_dictionary.cpp) target_link_libraries (integer_hash_tables_and_hashes PRIVATE ch_contrib::gbenchmark_all dbms @@ -7,3 +7,8 @@ target_link_libraries (integer_hash_tables_and_hashes PRIVATE ch_contrib::wyhash ch_contrib::farmhash ch_contrib::xxHash) + +clickhouse_add_executable(orc_string_dictionary orc_string_dictionary.cpp) +target_link_libraries (orc_string_dictionary PRIVATE + ch_contrib::gbenchmark_all + dbms) diff --git a/src/Common/benchmarks/orc_string_dictionary.cpp b/src/Common/benchmarks/orc_string_dictionary.cpp index bde7d03e864..7d4a3b1ab56 100644 --- a/src/Common/benchmarks/orc_string_dictionary.cpp +++ b/src/Common/benchmarks/orc_string_dictionary.cpp @@ -262,12 +262,13 @@ void NewSortedStringDictionary::clear() flatDict_.clear(); } +template static std::vector mockStrings() { std::vector res(1000000); for (auto & s : res) { - s = "test string dictionary " + std::to_string(rand() % 100); + s = "test string dictionary " + std::to_string(rand() % cardinality); } return res; } @@ -286,10 +287,10 @@ static NO_INLINE std::unique_ptr createAndWriteStringDictionary( return dict; } -template +template static void BM_writeStringDictionary(benchmark::State & state) { - auto strs = mockStrings(); + auto strs = mockStrings(); for (auto _ : state) { auto dict = createAndWriteStringDictionary(strs); @@ -297,6 +298,14 @@ static void BM_writeStringDictionary(benchmark::State & state) } } -BENCHMARK_TEMPLATE(BM_writeStringDictionary, OldSortedStringDictionary); -BENCHMARK_TEMPLATE(BM_writeStringDictionary, NewSortedStringDictionary); +BENCHMARK_TEMPLATE(BM_writeStringDictionary, OldSortedStringDictionary, 10); +BENCHMARK_TEMPLATE(BM_writeStringDictionary, NewSortedStringDictionary, 10); +BENCHMARK_TEMPLATE(BM_writeStringDictionary, OldSortedStringDictionary, 100); +BENCHMARK_TEMPLATE(BM_writeStringDictionary, NewSortedStringDictionary, 100); +BENCHMARK_TEMPLATE(BM_writeStringDictionary, OldSortedStringDictionary, 1000); +BENCHMARK_TEMPLATE(BM_writeStringDictionary, NewSortedStringDictionary, 1000); +BENCHMARK_TEMPLATE(BM_writeStringDictionary, OldSortedStringDictionary, 10000); +BENCHMARK_TEMPLATE(BM_writeStringDictionary, NewSortedStringDictionary, 10000); +BENCHMARK_TEMPLATE(BM_writeStringDictionary, OldSortedStringDictionary, 100000); +BENCHMARK_TEMPLATE(BM_writeStringDictionary, NewSortedStringDictionary, 100000); From 7aaa0289e157f90441a837544efd485450c032d7 Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Mon, 26 Aug 2024 14:58:57 +0800 Subject: [PATCH 400/844] revert files --- src/Common/CMakeLists.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Common/CMakeLists.txt b/src/Common/CMakeLists.txt index 7cc8dd25cd9..d4802c28f53 100644 --- a/src/Common/CMakeLists.txt +++ b/src/Common/CMakeLists.txt @@ -1,6 +1,6 @@ -#if (ENABLE_BENCHMARKS) +if (ENABLE_BENCHMARKS) add_subdirectory(benchmarks) -#endif() +endif() if (ENABLE_EXAMPLES) add_subdirectory(examples) From 9339c0593b354dbb54b418425c6cfe1ac37e556c Mon Sep 17 00:00:00 2001 From: Max Kainov Date: Mon, 26 Aug 2024 13:37:37 +0200 Subject: [PATCH 401/844] handle timeout in flaky check --- tests/ci/integration_tests_runner.py | 67 ++++++++++++++-------------- 1 file changed, 33 insertions(+), 34 deletions(-) diff --git a/tests/ci/integration_tests_runner.py b/tests/ci/integration_tests_runner.py index deddcb95cf4..a7d5a8c4cf6 100755 --- a/tests/ci/integration_tests_runner.py +++ b/tests/ci/integration_tests_runner.py @@ -873,21 +873,45 @@ class ClickhouseIntegrationTestsRunner: def run_impl(self, repo_path, build_path): stopwatch = Stopwatch() if self.flaky_check or self.bugfix_validate_check: - return self.run_flaky_check( - repo_path, build_path, should_fail=self.bugfix_validate_check + result_state, status_text, test_result, tests_log_paths = ( + self.run_flaky_check( + repo_path, build_path, should_fail=self.bugfix_validate_check + ) + ) + else: + result_state, status_text, test_result, tests_log_paths = ( + self.run_normal_check(build_path, repo_path) ) - self._install_clickhouse(build_path) + if self.soft_deadline_time < time.time(): + status_text = "Timeout, " + status_text + result_state = "failure" + if timeout_expired: + logging.error( + "Job killed by external timeout signal - setting status to failure!" + ) + status_text = "Job timeout expired, " + status_text + result_state = "failure" + # add mock test case to make timeout visible in job report and in ci db + test_result.insert( + 0, (JOB_TIMEOUT_TEST_NAME, "FAIL", f"{stopwatch.duration_seconds}", "") + ) + + if "(memory)" in self.params["context_name"]: + result_state = "success" + + return result_state, status_text, test_result, tests_log_paths + + def run_normal_check(self, build_path, repo_path): + self._install_clickhouse(build_path) logging.info("Pulling images") self._pre_pull_images(repo_path) - logging.info( "Dump iptables before run %s", subprocess.check_output("sudo iptables -nvL", shell=True), ) all_tests = self._get_all_tests(repo_path) - if self.run_by_hash_total != 0: grouped_tests = self.group_test_by_file(all_tests) all_filtered_by_hash_tests = [] @@ -895,7 +919,6 @@ class ClickhouseIntegrationTestsRunner: if stringhash(group) % self.run_by_hash_total == self.run_by_hash_num: all_filtered_by_hash_tests += tests_in_group all_tests = all_filtered_by_hash_tests - parallel_skip_tests = self._get_parallel_tests_skip_list(repo_path) logging.info( "Found %s tests first 3 %s", len(all_tests), " ".join(all_tests[:3]) @@ -927,14 +950,12 @@ class ClickhouseIntegrationTestsRunner: len(not_found_tests), " ".join(not_found_tests[:3]), ) - grouped_tests = self.group_test_by_file(filtered_sequential_tests) i = 0 for par_group in chunks(filtered_parallel_tests, PARALLEL_GROUP_SIZE): grouped_tests[f"parallel{i}"] = par_group i += 1 logging.info("Found %s tests groups", len(grouped_tests)) - counters = { "ERROR": [], "PASSED": [], @@ -945,14 +966,11 @@ class ClickhouseIntegrationTestsRunner: } # type: Dict tests_times = defaultdict(float) tests_log_paths = defaultdict(list) - items_to_run = list(grouped_tests.items()) - logging.info("Total test groups %s", len(items_to_run)) if self.shuffle_test_groups(): logging.info("Shuffling test groups") random.shuffle(items_to_run) - for group, tests in items_to_run: if timeout_expired: print("Timeout expired - break tests execution") @@ -980,7 +998,6 @@ class ClickhouseIntegrationTestsRunner: if len(counters["FAILED"]) + len(counters["ERROR"]) >= 20: logging.info("Collected more than 20 failed/error tests, stopping") break - if counters["FAILED"] or counters["ERROR"]: logging.info( "Overall status failure, because we have tests in FAILED or ERROR state" @@ -989,7 +1006,6 @@ class ClickhouseIntegrationTestsRunner: else: logging.info("Overall success!") result_state = "success" - test_result = [] for state in ( "ERROR", @@ -1009,33 +1025,14 @@ class ClickhouseIntegrationTestsRunner: (c, text_state, f"{tests_times[c]:.2f}", tests_log_paths[c]) for c in counters[state] ] - failed_sum = len(counters["FAILED"]) + len(counters["ERROR"]) status_text = f"fail: {failed_sum}, passed: {len(counters['PASSED'])}" - if self.soft_deadline_time < time.time(): - status_text = "Timeout, " + status_text - result_state = "failure" - - if timeout_expired: - logging.error( - "Job killed by external timeout signal - setting status to failure!" - ) - status_text = "Job timeout expired, " + status_text - result_state = "failure" - # add mock test case to make timeout visible in job report and in ci db - test_result.insert( - 0, (JOB_TIMEOUT_TEST_NAME, "FAIL", f"{stopwatch.duration_seconds}", "") - ) - if not counters or sum(len(counter) for counter in counters.values()) == 0: status_text = "No tests found for some reason! It's a bug" result_state = "failure" - if "(memory)" in self.params["context_name"]: - result_state = "success" - - return result_state, status_text, test_result, [] + return result_state, status_text, test_result, tests_log_paths def write_results(results_file, status_file, results, status): @@ -1068,7 +1065,9 @@ def run(): logging.info("Clearing dmesg before run") subprocess.check_call("sudo -E dmesg --clear", shell=True) - state, description, test_results, _ = runner.run_impl(repo_path, build_path) + state, description, test_results, _test_log_paths = runner.run_impl( + repo_path, build_path + ) logging.info("Tests finished") if IS_CI: From 5bad631d468a8cddbd1af40dc43c2d5db2931b46 Mon Sep 17 00:00:00 2001 From: Tuan Pham Anh Date: Mon, 26 Aug 2024 15:26:21 +0000 Subject: [PATCH 402/844] Add config for outout_format_identifers_quoting_style --- src/Core/Settings.h | 2 ++ src/Core/SettingsChangesHistory.cpp | 5 +++++ src/Core/SettingsEnums.cpp | 6 ++++++ src/Core/SettingsEnums.h | 3 +++ src/Interpreters/formatWithPossiblyHidingSecrets.h | 2 +- src/Parsers/IAST.cpp | 5 ++++- src/Parsers/IAST.h | 2 +- 7 files changed, 22 insertions(+), 3 deletions(-) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 23dc2a8fdc5..c33562e25ed 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -1296,6 +1296,8 @@ class IColumn; M(Bool, precise_float_parsing, false, "Prefer more precise (but slower) float parsing algorithm", 0) \ M(DateTimeOverflowBehavior, date_time_overflow_behavior, "ignore", "Overflow mode for Date, Date32, DateTime, DateTime64 types. Possible values: 'ignore', 'throw', 'saturate'.", 0) \ M(Bool, validate_experimental_and_suspicious_types_inside_nested_types, true, "Validate usage of experimental and suspicious types inside nested types like Array/Map/Tuple", 0) \ + \ + M(IdentifierQuotingStyle, output_format_identifier_quoting_style, IdentifierQuotingStyle::Backticks, "Set the quoting style for identifiers", 0) \ // End of FORMAT_FACTORY_SETTINGS diff --git a/src/Core/SettingsChangesHistory.cpp b/src/Core/SettingsChangesHistory.cpp index 5e831c6301c..76ec440aaf2 100644 --- a/src/Core/SettingsChangesHistory.cpp +++ b/src/Core/SettingsChangesHistory.cpp @@ -57,6 +57,11 @@ String ClickHouseVersion::toString() const /// Note: please check if the key already exists to prevent duplicate entries. static std::initializer_list> settings_changes_history_initializer = { + {"24.12", + { + {"output_format_identifier_quoting_style", 1, 1, "New setting."} + } + }, {"24.12", { } diff --git a/src/Core/SettingsEnums.cpp b/src/Core/SettingsEnums.cpp index 30e60f246f4..0c43b5256d6 100644 --- a/src/Core/SettingsEnums.cpp +++ b/src/Core/SettingsEnums.cpp @@ -244,4 +244,10 @@ IMPLEMENT_SETTING_ENUM( GroupArrayActionWhenLimitReached, ErrorCodes::BAD_ARGUMENTS, {{"throw", GroupArrayActionWhenLimitReached::THROW}, {"discard", GroupArrayActionWhenLimitReached::DISCARD}}) + +IMPLEMENT_SETTING_ENUM(IdentifierQuotingStyle, ErrorCodes::BAD_ARGUMENTS, + {{"None", IdentifierQuotingStyle::None}, + {"Backticks", IdentifierQuotingStyle::Backticks}, + {"DoubleQuotes", IdentifierQuotingStyle::DoubleQuotes}, + {"BackticksMySQL", IdentifierQuotingStyle::BackticksMySQL}}) } diff --git a/src/Core/SettingsEnums.h b/src/Core/SettingsEnums.h index 5c2d8825e76..d358a2d44a0 100644 --- a/src/Core/SettingsEnums.h +++ b/src/Core/SettingsEnums.h @@ -10,6 +10,7 @@ #include #include #include +#include #include #include @@ -351,6 +352,8 @@ DECLARE_SETTING_ENUM_WITH_RENAME(DateTimeOverflowBehavior, FormatSettings::DateT DECLARE_SETTING_ENUM(SQLSecurityType) +DECLARE_SETTING_ENUM(IdentifierQuotingStyle) + enum class GroupArrayActionWhenLimitReached : uint8_t { THROW, diff --git a/src/Interpreters/formatWithPossiblyHidingSecrets.h b/src/Interpreters/formatWithPossiblyHidingSecrets.h index 14e84f1d1a4..8ec40d3dcc3 100644 --- a/src/Interpreters/formatWithPossiblyHidingSecrets.h +++ b/src/Interpreters/formatWithPossiblyHidingSecrets.h @@ -26,7 +26,7 @@ inline String format(const SecretHidingFormatSettings & settings) && settings.ctx->getAccess()->isGranted(AccessType::displaySecretsInShowAndSelect); return settings.query.formatWithPossiblyHidingSensitiveData( - settings.max_length, settings.one_line, show_secrets, settings.ctx->getSettingsRef().print_pretty_type_names); + settings.max_length, settings.one_line, show_secrets, settings.ctx->getSettingsRef().print_pretty_type_names, settings.ctx->getSettingsRef().output_format_identifier_quoting_style); } } diff --git a/src/Parsers/IAST.cpp b/src/Parsers/IAST.cpp index 5bd2c92c60a..90cdd330ee8 100644 --- a/src/Parsers/IAST.cpp +++ b/src/Parsers/IAST.cpp @@ -165,12 +165,15 @@ size_t IAST::checkDepthImpl(size_t max_depth) const return res; } -String IAST::formatWithPossiblyHidingSensitiveData(size_t max_length, bool one_line, bool show_secrets, bool print_pretty_type_names) const +String IAST::formatWithPossiblyHidingSensitiveData(size_t max_length, bool one_line, bool show_secrets, bool print_pretty_type_names, IdentifierQuotingStyle identifier_quoting_style) const { + WriteBufferFromOwnString buf; FormatSettings settings(buf, one_line); settings.show_secrets = show_secrets; settings.print_pretty_type_names = print_pretty_type_names; + settings.always_quote_identifiers = identifier_quoting_style != IdentifierQuotingStyle::None; + settings.identifier_quoting_style = identifier_quoting_style; format(settings); return wipeSensitiveDataAndCutToLength(buf.str(), max_length); } diff --git a/src/Parsers/IAST.h b/src/Parsers/IAST.h index 2293d50b0ec..777265aaa7c 100644 --- a/src/Parsers/IAST.h +++ b/src/Parsers/IAST.h @@ -278,7 +278,7 @@ public: /// Secrets are displayed regarding show_secrets, then SensitiveDataMasker is applied. /// You can use Interpreters/formatWithPossiblyHidingSecrets.h for convenience. - String formatWithPossiblyHidingSensitiveData(size_t max_length, bool one_line, bool show_secrets, bool print_pretty_type_names) const; + String formatWithPossiblyHidingSensitiveData(size_t max_length, bool one_line, bool show_secrets, bool print_pretty_type_names, IdentifierQuotingStyle identifier_quoting_style = IdentifierQuotingStyle::Backticks) const; /** formatForLogging and formatForErrorMessage always hide secrets. This inconsistent * behaviour is due to the fact such functions are called from Client which knows nothing about From d6ca589ebed7a16f498fbbe4365d9ac07686af08 Mon Sep 17 00:00:00 2001 From: Tuan Pham Anh Date: Mon, 26 Aug 2024 15:26:21 +0000 Subject: [PATCH 403/844] Treat PROJECTION, DICTIONARY, COLUMN, INDEX names as identifiers when formating --- src/Core/SettingsChangesHistory.cpp | 2 +- src/Parsers/ASTColumnDeclaration.cpp | 3 +-- src/Parsers/ASTDictionaryAttributeDeclaration.cpp | 2 +- src/Parsers/ASTIndexDeclaration.cpp | 2 +- src/Parsers/ASTProjectionDeclaration.cpp | 2 +- 5 files changed, 5 insertions(+), 6 deletions(-) diff --git a/src/Core/SettingsChangesHistory.cpp b/src/Core/SettingsChangesHistory.cpp index 76ec440aaf2..46f6590b724 100644 --- a/src/Core/SettingsChangesHistory.cpp +++ b/src/Core/SettingsChangesHistory.cpp @@ -59,7 +59,7 @@ static std::initializer_listformatImpl(s, state, frame); } diff --git a/src/Parsers/ASTProjectionDeclaration.cpp b/src/Parsers/ASTProjectionDeclaration.cpp index 60050986161..af79745a88e 100644 --- a/src/Parsers/ASTProjectionDeclaration.cpp +++ b/src/Parsers/ASTProjectionDeclaration.cpp @@ -17,7 +17,7 @@ ASTPtr ASTProjectionDeclaration::clone() const void ASTProjectionDeclaration::formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const { - settings.ostr << backQuoteIfNeed(name); + settings.writeIdentifier(name); std::string indent_str = settings.one_line ? "" : std::string(4u * frame.indent, ' '); std::string nl_or_nothing = settings.one_line ? "" : "\n"; settings.ostr << settings.nl_or_ws << indent_str << "(" << nl_or_nothing; From e1d66b9e262466695bff2a30cccec047a953e269 Mon Sep 17 00:00:00 2001 From: Tuan Pham Anh Date: Mon, 26 Aug 2024 15:26:21 +0000 Subject: [PATCH 404/844] Add output_format_always_quote_identifiers into the settings --- src/Core/ExternalTable.cpp | 12 ++++++++--- src/Core/Settings.h | 1 + src/Core/SettingsChangesHistory.cpp | 1 + src/Interpreters/executeQuery.cpp | 4 ++-- .../formatWithPossiblyHidingSecrets.h | 7 ++++++- src/Parsers/ASTColumnDeclaration.cpp | 3 ++- src/Parsers/IAST.cpp | 20 ++++++++++++------- src/Parsers/IAST.h | 15 ++++++++++---- 8 files changed, 45 insertions(+), 18 deletions(-) diff --git a/src/Core/ExternalTable.cpp b/src/Core/ExternalTable.cpp index 4ff0d7092d8..1f8e63db72e 100644 --- a/src/Core/ExternalTable.cpp +++ b/src/Core/ExternalTable.cpp @@ -17,11 +17,12 @@ #include #include -#include #include #include #include #include +#include +#include "Parsers/IdentifierQuotingStyle.h" namespace DB @@ -85,7 +86,9 @@ void BaseExternalTable::parseStructureFromStructureField(const std::string & arg /// We use `formatWithPossiblyHidingSensitiveData` instead of `getColumnNameWithoutAlias` because `column->type` is an ASTFunction. /// `getColumnNameWithoutAlias` will return name of the function with `(arguments)` even if arguments is empty. if (column) - structure.emplace_back(column->name, column->type->formatWithPossiblyHidingSensitiveData(0, true, true, false)); + structure.emplace_back( + column->name, + column->type->formatWithPossiblyHidingSensitiveData(0, true, true, false, false, IdentifierQuotingStyle::Backticks)); else throw Exception(ErrorCodes::BAD_ARGUMENTS, "Error while parsing table structure: expected column definition, got {}", child->formatForErrorMessage()); } @@ -102,7 +105,10 @@ void BaseExternalTable::parseStructureFromTypesField(const std::string & argumen throw Exception(ErrorCodes::BAD_ARGUMENTS, "Error while parsing table structure: {}", error); for (size_t i = 0; i < type_list_raw->children.size(); ++i) - structure.emplace_back("_" + toString(i + 1), type_list_raw->children[i]->formatWithPossiblyHidingSensitiveData(0, true, true, false)); + structure.emplace_back( + "_" + toString(i + 1), + type_list_raw->children[i]->formatWithPossiblyHidingSensitiveData( + 0, true, true, false, false, IdentifierQuotingStyle::Backticks)); } void BaseExternalTable::initSampleBlock() diff --git a/src/Core/Settings.h b/src/Core/Settings.h index c33562e25ed..0e58562c745 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -1297,6 +1297,7 @@ class IColumn; M(DateTimeOverflowBehavior, date_time_overflow_behavior, "ignore", "Overflow mode for Date, Date32, DateTime, DateTime64 types. Possible values: 'ignore', 'throw', 'saturate'.", 0) \ M(Bool, validate_experimental_and_suspicious_types_inside_nested_types, true, "Validate usage of experimental and suspicious types inside nested types like Array/Map/Tuple", 0) \ \ + M(Bool, output_format_always_quote_identifiers, false, "Always quote identifiers", 0) \ M(IdentifierQuotingStyle, output_format_identifier_quoting_style, IdentifierQuotingStyle::Backticks, "Set the quoting style for identifiers", 0) \ diff --git a/src/Core/SettingsChangesHistory.cpp b/src/Core/SettingsChangesHistory.cpp index 46f6590b724..ac7100d3706 100644 --- a/src/Core/SettingsChangesHistory.cpp +++ b/src/Core/SettingsChangesHistory.cpp @@ -59,6 +59,7 @@ static std::initializer_list executeQueryImpl( /// Verify that AST formatting is consistent: /// If you format AST, parse it back, and format it again, you get the same string. - String formatted1 = ast->formatWithPossiblyHidingSensitiveData(0, true, true, false); + String formatted1 = ast->formatWithPossiblyHidingSensitiveData(0, true, true, false, false, IdentifierQuotingStyle::Backticks); /// The query can become more verbose after formatting, so: size_t new_max_query_size = max_query_size > 0 ? (1000 + 2 * max_query_size) : 0; @@ -811,7 +811,7 @@ static std::tuple executeQueryImpl( chassert(ast2); - String formatted2 = ast2->formatWithPossiblyHidingSensitiveData(0, true, true, false); + String formatted2 = ast2->formatWithPossiblyHidingSensitiveData(0, true, true, false, false, IdentifierQuotingStyle::Backticks); if (formatted1 != formatted2) throw Exception(ErrorCodes::LOGICAL_ERROR, diff --git a/src/Interpreters/formatWithPossiblyHidingSecrets.h b/src/Interpreters/formatWithPossiblyHidingSecrets.h index 8ec40d3dcc3..61f689fb821 100644 --- a/src/Interpreters/formatWithPossiblyHidingSecrets.h +++ b/src/Interpreters/formatWithPossiblyHidingSecrets.h @@ -26,7 +26,12 @@ inline String format(const SecretHidingFormatSettings & settings) && settings.ctx->getAccess()->isGranted(AccessType::displaySecretsInShowAndSelect); return settings.query.formatWithPossiblyHidingSensitiveData( - settings.max_length, settings.one_line, show_secrets, settings.ctx->getSettingsRef().print_pretty_type_names, settings.ctx->getSettingsRef().output_format_identifier_quoting_style); + settings.max_length, + settings.one_line, + show_secrets, + settings.ctx->getSettingsRef().print_pretty_type_names, + settings.ctx->getSettingsRef().output_format_always_quote_identifiers, + settings.ctx->getSettingsRef().output_format_identifier_quoting_style); } } diff --git a/src/Parsers/ASTColumnDeclaration.cpp b/src/Parsers/ASTColumnDeclaration.cpp index 39b1da68a86..7b5ea49b63d 100644 --- a/src/Parsers/ASTColumnDeclaration.cpp +++ b/src/Parsers/ASTColumnDeclaration.cpp @@ -66,7 +66,8 @@ void ASTColumnDeclaration::formatImpl(const FormatSettings & format_settings, Fo { frame.need_parens = false; - format_settings.writeIdentifier(name); + /// We have to always quote column names to avoid ambiguity with INDEX and other declarations in CREATE query. + format_settings.writeIdentifier(name, true); if (type) { diff --git a/src/Parsers/IAST.cpp b/src/Parsers/IAST.cpp index 90cdd330ee8..858630cef2e 100644 --- a/src/Parsers/IAST.cpp +++ b/src/Parsers/IAST.cpp @@ -165,14 +165,20 @@ size_t IAST::checkDepthImpl(size_t max_depth) const return res; } -String IAST::formatWithPossiblyHidingSensitiveData(size_t max_length, bool one_line, bool show_secrets, bool print_pretty_type_names, IdentifierQuotingStyle identifier_quoting_style) const +String IAST::formatWithPossiblyHidingSensitiveData( + size_t max_length, + bool one_line, + bool show_secrets, + bool print_pretty_type_names, + bool always_quote_identifiers, + IdentifierQuotingStyle identifier_quoting_style) const { WriteBufferFromOwnString buf; FormatSettings settings(buf, one_line); settings.show_secrets = show_secrets; settings.print_pretty_type_names = print_pretty_type_names; - settings.always_quote_identifiers = identifier_quoting_style != IdentifierQuotingStyle::None; + settings.always_quote_identifiers = always_quote_identifiers; settings.identifier_quoting_style = identifier_quoting_style; format(settings); return wipeSensitiveDataAndCutToLength(buf.str(), max_length); @@ -211,13 +217,13 @@ String IAST::getColumnNameWithoutAlias() const } -void IAST::FormatSettings::writeIdentifier(const String & name) const +void IAST::FormatSettings::writeIdentifier(const String & name, bool force_quoting) const { switch (identifier_quoting_style) { case IdentifierQuotingStyle::None: { - if (always_quote_identifiers) + if (force_quoting || always_quote_identifiers) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Incompatible arguments: always_quote_identifiers = true && " "identifier_quoting_style == IdentifierQuotingStyle::None"); @@ -226,7 +232,7 @@ void IAST::FormatSettings::writeIdentifier(const String & name) const } case IdentifierQuotingStyle::Backticks: { - if (always_quote_identifiers) + if (force_quoting || always_quote_identifiers) writeBackQuotedString(name, ostr); else writeProbablyBackQuotedString(name, ostr); @@ -234,7 +240,7 @@ void IAST::FormatSettings::writeIdentifier(const String & name) const } case IdentifierQuotingStyle::DoubleQuotes: { - if (always_quote_identifiers) + if (force_quoting || always_quote_identifiers) writeDoubleQuotedString(name, ostr); else writeProbablyDoubleQuotedString(name, ostr); @@ -242,7 +248,7 @@ void IAST::FormatSettings::writeIdentifier(const String & name) const } case IdentifierQuotingStyle::BackticksMySQL: { - if (always_quote_identifiers) + if (force_quoting || always_quote_identifiers) writeBackQuotedStringMySQL(name, ostr); else writeProbablyBackQuotedStringMySQL(name, ostr); diff --git a/src/Parsers/IAST.h b/src/Parsers/IAST.h index 777265aaa7c..c8fb93d3bf6 100644 --- a/src/Parsers/IAST.h +++ b/src/Parsers/IAST.h @@ -237,7 +237,8 @@ public: { } - void writeIdentifier(const String & name) const; + // If `force_quoting` is true, `name` is always quoted regardless of `always_quote_identifiers` + void writeIdentifier(const String & name, bool force_quoting = false) const; }; /// State. For example, a set of nodes can be remembered, which we already walk through. @@ -278,7 +279,13 @@ public: /// Secrets are displayed regarding show_secrets, then SensitiveDataMasker is applied. /// You can use Interpreters/formatWithPossiblyHidingSecrets.h for convenience. - String formatWithPossiblyHidingSensitiveData(size_t max_length, bool one_line, bool show_secrets, bool print_pretty_type_names, IdentifierQuotingStyle identifier_quoting_style = IdentifierQuotingStyle::Backticks) const; + String formatWithPossiblyHidingSensitiveData( + size_t max_length, + bool one_line, + bool show_secrets, + bool print_pretty_type_names, + bool always_quote_identifiers, + IdentifierQuotingStyle identifier_quoting_style) const; /** formatForLogging and formatForErrorMessage always hide secrets. This inconsistent * behaviour is due to the fact such functions are called from Client which knows nothing about @@ -287,12 +294,12 @@ public: */ String formatForLogging(size_t max_length = 0) const { - return formatWithPossiblyHidingSensitiveData(max_length, true, false, false); + return formatWithPossiblyHidingSensitiveData(max_length, true, false, false, false, IdentifierQuotingStyle::Backticks); } String formatForErrorMessage() const { - return formatWithPossiblyHidingSensitiveData(0, true, false, false); + return formatWithPossiblyHidingSensitiveData(0, true, false, false, false, IdentifierQuotingStyle::Backticks); } virtual bool hasSecretParts() const { return childrenHaveSecretParts(); } From 32e995acbb1ac613217190561fbdda976c4386b5 Mon Sep 17 00:00:00 2001 From: Tuan Pham Anh Date: Mon, 26 Aug 2024 15:26:21 +0000 Subject: [PATCH 405/844] Update settings change history version --- src/Core/SettingsChangesHistory.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Core/SettingsChangesHistory.cpp b/src/Core/SettingsChangesHistory.cpp index ac7100d3706..982cd0389a9 100644 --- a/src/Core/SettingsChangesHistory.cpp +++ b/src/Core/SettingsChangesHistory.cpp @@ -57,7 +57,7 @@ String ClickHouseVersion::toString() const /// Note: please check if the key already exists to prevent duplicate entries. static std::initializer_list> settings_changes_history_initializer = { - {"24.12", + {"24.13", { {"output_format_always_quote_identifiers", false, false, "New setting."}, {"output_format_identifier_quoting_style", "Backticks", "Backticks", "New setting."} From 8e6de7e767e3db62efab622a7cf05e81da5d7866 Mon Sep 17 00:00:00 2001 From: Tuan Pham Anh Date: Mon, 26 Aug 2024 15:26:21 +0000 Subject: [PATCH 406/844] 1) Fix: move new setting to 24.9 key. 2) Fix: fource quoting dictionary attribute names --- src/Core/SettingsChangesHistory.cpp | 8 ++------ src/Parsers/ASTDictionaryAttributeDeclaration.cpp | 2 +- 2 files changed, 3 insertions(+), 7 deletions(-) diff --git a/src/Core/SettingsChangesHistory.cpp b/src/Core/SettingsChangesHistory.cpp index 982cd0389a9..258065dcfd4 100644 --- a/src/Core/SettingsChangesHistory.cpp +++ b/src/Core/SettingsChangesHistory.cpp @@ -57,12 +57,6 @@ String ClickHouseVersion::toString() const /// Note: please check if the key already exists to prevent duplicate entries. static std::initializer_list> settings_changes_history_initializer = { - {"24.13", - { - {"output_format_always_quote_identifiers", false, false, "New setting."}, - {"output_format_identifier_quoting_style", "Backticks", "Backticks", "New setting."} - } - }, {"24.12", { } @@ -81,6 +75,8 @@ static std::initializer_list Date: Mon, 26 Aug 2024 18:43:21 +0200 Subject: [PATCH 407/844] increase timeout for flaky check --- tests/ci/ci_config.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/ci/ci_config.py b/tests/ci/ci_config.py index 5453bffd9c6..5de572bc0cb 100644 --- a/tests/ci/ci_config.py +++ b/tests/ci/ci_config.py @@ -424,6 +424,7 @@ class CI: pr_only=True, # TODO: approach with reference job names does not work because digest may not be calculated if job skipped in wf # reference_job_name=JobNames.INTEGRATION_TEST_TSAN, + timeout=4*3600 # to be able to process many updated tests ), JobNames.COMPATIBILITY_TEST: CommonJobConfigs.COMPATIBILITY_TEST.with_properties( required_builds=[BuildNames.PACKAGE_RELEASE], From 34d13bd7467610daec7004640e1ab07da8a03f81 Mon Sep 17 00:00:00 2001 From: Ilya Yatsishin <2159081+qoega@users.noreply.github.com> Date: Mon, 26 Aug 2024 19:33:04 +0200 Subject: [PATCH 408/844] Update ci_config.py --- tests/ci/ci_config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/ci/ci_config.py b/tests/ci/ci_config.py index 5de572bc0cb..0d5f29d705f 100644 --- a/tests/ci/ci_config.py +++ b/tests/ci/ci_config.py @@ -424,7 +424,7 @@ class CI: pr_only=True, # TODO: approach with reference job names does not work because digest may not be calculated if job skipped in wf # reference_job_name=JobNames.INTEGRATION_TEST_TSAN, - timeout=4*3600 # to be able to process many updated tests + timeout=4 * 3600 # to be able to process many updated tests ), JobNames.COMPATIBILITY_TEST: CommonJobConfigs.COMPATIBILITY_TEST.with_properties( required_builds=[BuildNames.PACKAGE_RELEASE], From f9f960a5ed65b479257fff70ce2ef7234c61406d Mon Sep 17 00:00:00 2001 From: Ilya Yatsishin <2159081+qoega@users.noreply.github.com> Date: Mon, 26 Aug 2024 22:04:54 +0200 Subject: [PATCH 409/844] Update ci_config.py --- tests/ci/ci_config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/ci/ci_config.py b/tests/ci/ci_config.py index 0d5f29d705f..e594168ece3 100644 --- a/tests/ci/ci_config.py +++ b/tests/ci/ci_config.py @@ -424,7 +424,7 @@ class CI: pr_only=True, # TODO: approach with reference job names does not work because digest may not be calculated if job skipped in wf # reference_job_name=JobNames.INTEGRATION_TEST_TSAN, - timeout=4 * 3600 # to be able to process many updated tests + timeout=4 * 3600, # to be able to process many updated tests ), JobNames.COMPATIBILITY_TEST: CommonJobConfigs.COMPATIBILITY_TEST.with_properties( required_builds=[BuildNames.PACKAGE_RELEASE], From aa4688a98203adb24ad3162a0afebb5e6be7be35 Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Tue, 27 Aug 2024 12:25:22 +0800 Subject: [PATCH 410/844] fix style --- src/Common/benchmarks/orc_string_dictionary.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/Common/benchmarks/orc_string_dictionary.cpp b/src/Common/benchmarks/orc_string_dictionary.cpp index 7d4a3b1ab56..a542ed2182b 100644 --- a/src/Common/benchmarks/orc_string_dictionary.cpp +++ b/src/Common/benchmarks/orc_string_dictionary.cpp @@ -25,7 +25,7 @@ public: size_t size() const; - // return total length of strings in the dictioanry + // return total length of strings in the dictionary uint64_t length() const; void clear(); @@ -112,7 +112,7 @@ size_t OldSortedStringDictionary::size() const return dict.size(); } -// return total length of strings in the dictioanry +// return total length of strings in the dictionary uint64_t OldSortedStringDictionary::length() const { return totalLength; @@ -160,7 +160,7 @@ public: // return count of entries size_t size() const; - // return total length of strings in the dictioanry + // return total length of strings in the dictionary uint64_t length() const; void clear(); @@ -249,7 +249,7 @@ size_t NewSortedStringDictionary::size() const return flatDict_.size(); } -// return total length of strings in the dictioanry +// return total length of strings in the dictionary uint64_t NewSortedStringDictionary::length() const { return totalLength_; From 69cf54192c3e4a74db645f48afad90f52dd2da1e Mon Sep 17 00:00:00 2001 From: Tuan Pham Anh Date: Tue, 27 Aug 2024 09:33:45 +0000 Subject: [PATCH 411/844] 1) Add tests for the identifier quoting style setting. 2) Update uni-test for table overrides --- src/Parsers/ASTColumnDeclaration.cpp | 2 +- .../ASTDictionaryAttributeDeclaration.cpp | 2 +- src/Parsers/IAST.cpp | 38 +- src/Parsers/IAST.h | 6 +- src/Parsers/tests/gtest_Parser.cpp | 2 +- ..._format_identifier_quoting_style.reference | 40 +++ ...output_format_identifier_quoting_style.sql | 328 ++++++++++++++++++ 7 files changed, 408 insertions(+), 10 deletions(-) create mode 100644 tests/queries/0_stateless/03230_output_format_identifier_quoting_style.reference create mode 100644 tests/queries/0_stateless/03230_output_format_identifier_quoting_style.sql diff --git a/src/Parsers/ASTColumnDeclaration.cpp b/src/Parsers/ASTColumnDeclaration.cpp index 7b5ea49b63d..d7728462df3 100644 --- a/src/Parsers/ASTColumnDeclaration.cpp +++ b/src/Parsers/ASTColumnDeclaration.cpp @@ -67,7 +67,7 @@ void ASTColumnDeclaration::formatImpl(const FormatSettings & format_settings, Fo frame.need_parens = false; /// We have to always quote column names to avoid ambiguity with INDEX and other declarations in CREATE query. - format_settings.writeIdentifier(name, true); + format_settings.quoteIdentifier(name); if (type) { diff --git a/src/Parsers/ASTDictionaryAttributeDeclaration.cpp b/src/Parsers/ASTDictionaryAttributeDeclaration.cpp index 070cc6f222a..a600987dc45 100644 --- a/src/Parsers/ASTDictionaryAttributeDeclaration.cpp +++ b/src/Parsers/ASTDictionaryAttributeDeclaration.cpp @@ -35,7 +35,7 @@ void ASTDictionaryAttributeDeclaration::formatImpl(const FormatSettings & settin { frame.need_parens = false; - settings.writeIdentifier(name, true); + settings.quoteIdentifier(name); if (type) { diff --git a/src/Parsers/IAST.cpp b/src/Parsers/IAST.cpp index 858630cef2e..54b11f2888e 100644 --- a/src/Parsers/IAST.cpp +++ b/src/Parsers/IAST.cpp @@ -217,13 +217,13 @@ String IAST::getColumnNameWithoutAlias() const } -void IAST::FormatSettings::writeIdentifier(const String & name, bool force_quoting) const +void IAST::FormatSettings::writeIdentifier(const String & name) const { switch (identifier_quoting_style) { case IdentifierQuotingStyle::None: { - if (force_quoting || always_quote_identifiers) + if (always_quote_identifiers) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Incompatible arguments: always_quote_identifiers = true && " "identifier_quoting_style == IdentifierQuotingStyle::None"); @@ -232,7 +232,7 @@ void IAST::FormatSettings::writeIdentifier(const String & name, bool force_quoti } case IdentifierQuotingStyle::Backticks: { - if (force_quoting || always_quote_identifiers) + if (always_quote_identifiers) writeBackQuotedString(name, ostr); else writeProbablyBackQuotedString(name, ostr); @@ -240,7 +240,7 @@ void IAST::FormatSettings::writeIdentifier(const String & name, bool force_quoti } case IdentifierQuotingStyle::DoubleQuotes: { - if (force_quoting || always_quote_identifiers) + if (always_quote_identifiers) writeDoubleQuotedString(name, ostr); else writeProbablyDoubleQuotedString(name, ostr); @@ -248,7 +248,7 @@ void IAST::FormatSettings::writeIdentifier(const String & name, bool force_quoti } case IdentifierQuotingStyle::BackticksMySQL: { - if (force_quoting || always_quote_identifiers) + if (always_quote_identifiers) writeBackQuotedStringMySQL(name, ostr); else writeProbablyBackQuotedStringMySQL(name, ostr); @@ -257,6 +257,34 @@ void IAST::FormatSettings::writeIdentifier(const String & name, bool force_quoti } } + +void IAST::FormatSettings::quoteIdentifier(const String & name) const +{ + switch (identifier_quoting_style) + { + case IdentifierQuotingStyle::None: + { + writeBackQuotedString(name, ostr); + break; + } + case IdentifierQuotingStyle::Backticks: + { + writeBackQuotedString(name, ostr); + break; + } + case IdentifierQuotingStyle::DoubleQuotes: + { + writeDoubleQuotedString(name, ostr); + break; + } + case IdentifierQuotingStyle::BackticksMySQL: + { + writeBackQuotedStringMySQL(name, ostr); + break; + } + } +} + void IAST::dumpTree(WriteBuffer & ostr, size_t indent) const { String indent_str(indent, '-'); diff --git a/src/Parsers/IAST.h b/src/Parsers/IAST.h index c8fb93d3bf6..dfb6a6cbeba 100644 --- a/src/Parsers/IAST.h +++ b/src/Parsers/IAST.h @@ -237,8 +237,10 @@ public: { } - // If `force_quoting` is true, `name` is always quoted regardless of `always_quote_identifiers` - void writeIdentifier(const String & name, bool force_quoting = false) const; + void writeIdentifier(const String & name) const; + // Quote identifier `name` even when `always_quote_identifiers` is false. + // If `identifier_quoting_style` is `IdentifierQuotingStyle::None`, quote it with `IdentifierQuotingStyle::Backticks` + void quoteIdentifier(const String & name) const; }; /// State. For example, a set of nodes can be remembered, which we already walk through. diff --git a/src/Parsers/tests/gtest_Parser.cpp b/src/Parsers/tests/gtest_Parser.cpp index f0abc68f966..98cd9682c9c 100644 --- a/src/Parsers/tests/gtest_Parser.cpp +++ b/src/Parsers/tests/gtest_Parser.cpp @@ -243,7 +243,7 @@ INSTANTIATE_TEST_SUITE_P(ParserCreateDatabaseQuery, ParserTest, }, { "CREATE DATABASE db TABLE OVERRIDE tbl (COLUMNS (INDEX foo foo TYPE minmax GRANULARITY 1) PARTITION BY if(_staged = 1, 'staging', toYYYYMM(created)))", - "CREATE DATABASE db\nTABLE OVERRIDE `tbl`\n(\n COLUMNS\n (\n INDEX foo `foo` TYPE minmax GRANULARITY 1\n )\n PARTITION BY if(`_staged` = 1, 'staging', toYYYYMM(`created`))\n)" + "CREATE DATABASE db\nTABLE OVERRIDE `tbl`\n(\n COLUMNS\n (\n INDEX `foo` `foo` TYPE minmax GRANULARITY 1\n )\n PARTITION BY if(`_staged` = 1, 'staging', toYYYYMM(`created`))\n)" }, { "CREATE DATABASE db TABLE OVERRIDE t1 (TTL inserted + INTERVAL 1 MONTH DELETE), TABLE OVERRIDE t2 (TTL `inserted` + INTERVAL 2 MONTH DELETE)", diff --git a/tests/queries/0_stateless/03230_output_format_identifier_quoting_style.reference b/tests/queries/0_stateless/03230_output_format_identifier_quoting_style.reference new file mode 100644 index 00000000000..c563617a01c --- /dev/null +++ b/tests/queries/0_stateless/03230_output_format_identifier_quoting_style.reference @@ -0,0 +1,40 @@ +CREATE TABLE default.uk_price_paid\n(\n `price` UInt32,\n `date` Date,\n `postcode1` LowCardinality(String),\n `postcode2` LowCardinality(String),\n `type` Enum8(\'other\' = 0, \'terraced\' = 1, \'semi-detached\' = 2, \'detached\' = 3, \'flat\' = 4),\n `is_new` UInt8,\n `duration` Enum8(\'unknown\' = 0, \'freehold\' = 1, \'leasehold\' = 2),\n `addr1` String,\n `addr2` String,\n `street` LowCardinality(String),\n `locality` LowCardinality(String),\n `town` LowCardinality(String),\n `district` LowCardinality(String),\n `county` LowCardinality(String),\n INDEX county_index county TYPE set(10) GRANULARITY 1,\n PROJECTION town_date_projection\n (\n SELECT \n town,\n date,\n price\n ORDER BY \n town,\n date\n ),\n PROJECTION handy_aggs_projection\n (\n SELECT \n avg(price),\n max(price),\n sum(price)\n GROUP BY town\n )\n)\nENGINE = MergeTree\nORDER BY (postcode1, postcode2, date)\nSETTINGS index_granularity = 8192 +CREATE MATERIALIZED VIEW default.prices_by_year_view TO default.prices_by_year_dest\n(\n `price` UInt32,\n `date` Date,\n `addr1` String,\n `addr2` String,\n `street` LowCardinality(String),\n `town` LowCardinality(String),\n `district` LowCardinality(String),\n `county` LowCardinality(String)\n)\nAS SELECT\n price,\n date,\n addr1,\n addr2,\n street,\n town,\n district,\n county\nFROM default.uk_price_paid +CREATE TABLE default.uk_prices_aggs_dest\n(\n `month` Date,\n `min_price` SimpleAggregateFunction(min, UInt32),\n `max_price` SimpleAggregateFunction(max, UInt32),\n `volume` AggregateFunction(count, UInt32),\n `avg_price` AggregateFunction(avg, UInt32)\n)\nENGINE = AggregatingMergeTree\nPRIMARY KEY month\nORDER BY month\nSETTINGS index_granularity = 8192 +CREATE MATERIALIZED VIEW default.uk_prices_aggs_view TO default.uk_prices_aggs_dest\n(\n `month` Date,\n `min_price` SimpleAggregateFunction(min, UInt32),\n `max_price` SimpleAggregateFunction(max, UInt32),\n `volume` AggregateFunction(count, UInt32),\n `avg_price` AggregateFunction(avg, UInt32)\n)\nAS WITH toStartOfMonth(date) AS month\nSELECT\n month,\n minSimpleState(price) AS min_price,\n maxSimpleState(price) AS max_price,\n countState(price) AS volume,\n avgState(price) AS avg_price\nFROM default.uk_price_paid\nGROUP BY month +CREATE DICTIONARY default.uk_mortgage_rates_dict\n(\n `date` DateTime64,\n `variable` Decimal32(2),\n `fixed` Decimal32(2),\n `bank` Decimal32(2)\n)\nPRIMARY KEY date\nSOURCE(CLICKHOUSE(TABLE \'uk_mortgage_rates\'))\nLIFETIME(MIN 0 MAX 2628000000)\nLAYOUT(COMPLEX_KEY_HASHED()) +CREATE TABLE default.uk_price_paid\n(\n `price` UInt32,\n `date` Date,\n `postcode1` LowCardinality(String),\n `postcode2` LowCardinality(String),\n `type` Enum8(\'other\' = 0, \'terraced\' = 1, \'semi-detached\' = 2, \'detached\' = 3, \'flat\' = 4),\n `is_new` UInt8,\n `duration` Enum8(\'unknown\' = 0, \'freehold\' = 1, \'leasehold\' = 2),\n `addr1` String,\n `addr2` String,\n `street` LowCardinality(String),\n `locality` LowCardinality(String),\n `town` LowCardinality(String),\n `district` LowCardinality(String),\n `county` LowCardinality(String),\n INDEX county_index county TYPE set(10) GRANULARITY 1,\n PROJECTION town_date_projection\n (\n SELECT \n town,\n date,\n price\n ORDER BY \n town,\n date\n ),\n PROJECTION handy_aggs_projection\n (\n SELECT \n avg(price),\n max(price),\n sum(price)\n GROUP BY town\n )\n)\nENGINE = MergeTree\nORDER BY (postcode1, postcode2, date)\nSETTINGS index_granularity = 8192 +CREATE MATERIALIZED VIEW default.prices_by_year_view TO default.prices_by_year_dest\n(\n `price` UInt32,\n `date` Date,\n `addr1` String,\n `addr2` String,\n `street` LowCardinality(String),\n `town` LowCardinality(String),\n `district` LowCardinality(String),\n `county` LowCardinality(String)\n)\nAS SELECT\n price,\n date,\n addr1,\n addr2,\n street,\n town,\n district,\n county\nFROM default.uk_price_paid +CREATE TABLE default.uk_prices_aggs_dest\n(\n `month` Date,\n `min_price` SimpleAggregateFunction(min, UInt32),\n `max_price` SimpleAggregateFunction(max, UInt32),\n `volume` AggregateFunction(count, UInt32),\n `avg_price` AggregateFunction(avg, UInt32)\n)\nENGINE = AggregatingMergeTree\nPRIMARY KEY month\nORDER BY month\nSETTINGS index_granularity = 8192 +CREATE MATERIALIZED VIEW default.uk_prices_aggs_view TO default.uk_prices_aggs_dest\n(\n `month` Date,\n `min_price` SimpleAggregateFunction(min, UInt32),\n `max_price` SimpleAggregateFunction(max, UInt32),\n `volume` AggregateFunction(count, UInt32),\n `avg_price` AggregateFunction(avg, UInt32)\n)\nAS WITH toStartOfMonth(date) AS month\nSELECT\n month,\n minSimpleState(price) AS min_price,\n maxSimpleState(price) AS max_price,\n countState(price) AS volume,\n avgState(price) AS avg_price\nFROM default.uk_price_paid\nGROUP BY month +CREATE DICTIONARY default.uk_mortgage_rates_dict\n(\n `date` DateTime64,\n `variable` Decimal32(2),\n `fixed` Decimal32(2),\n `bank` Decimal32(2)\n)\nPRIMARY KEY date\nSOURCE(CLICKHOUSE(TABLE \'uk_mortgage_rates\'))\nLIFETIME(MIN 0 MAX 2628000000)\nLAYOUT(COMPLEX_KEY_HASHED()) +CREATE TABLE `default`.`uk_price_paid`\n(\n `price` UInt32,\n `date` Date,\n `postcode1` LowCardinality(String),\n `postcode2` LowCardinality(String),\n `type` Enum8(\'other\' = 0, \'terraced\' = 1, \'semi-detached\' = 2, \'detached\' = 3, \'flat\' = 4),\n `is_new` UInt8,\n `duration` Enum8(\'unknown\' = 0, \'freehold\' = 1, \'leasehold\' = 2),\n `addr1` String,\n `addr2` String,\n `street` LowCardinality(String),\n `locality` LowCardinality(String),\n `town` LowCardinality(String),\n `district` LowCardinality(String),\n `county` LowCardinality(String),\n INDEX `county_index` `county` TYPE set(10) GRANULARITY 1,\n PROJECTION `town_date_projection`\n (\n SELECT \n `town`,\n `date`,\n `price`\n ORDER BY \n `town`,\n `date`\n ),\n PROJECTION `handy_aggs_projection`\n (\n SELECT \n avg(`price`),\n max(`price`),\n sum(`price`)\n GROUP BY `town`\n )\n)\nENGINE = MergeTree\nORDER BY (`postcode1`, `postcode2`, `date`)\nSETTINGS index_granularity = 8192 +CREATE MATERIALIZED VIEW `default`.`prices_by_year_view` TO default.prices_by_year_dest\n(\n `price` UInt32,\n `date` Date,\n `addr1` String,\n `addr2` String,\n `street` LowCardinality(String),\n `town` LowCardinality(String),\n `district` LowCardinality(String),\n `county` LowCardinality(String)\n)\nAS SELECT\n `price`,\n `date`,\n `addr1`,\n `addr2`,\n `street`,\n `town`,\n `district`,\n `county`\nFROM `default`.`uk_price_paid` +CREATE TABLE `default`.`uk_prices_aggs_dest`\n(\n `month` Date,\n `min_price` SimpleAggregateFunction(`min`, UInt32),\n `max_price` SimpleAggregateFunction(`max`, UInt32),\n `volume` AggregateFunction(`count`, UInt32),\n `avg_price` AggregateFunction(`avg`, UInt32)\n)\nENGINE = AggregatingMergeTree\nPRIMARY KEY `month`\nORDER BY `month`\nSETTINGS index_granularity = 8192 +CREATE MATERIALIZED VIEW `default`.`uk_prices_aggs_view` TO default.uk_prices_aggs_dest\n(\n `month` Date,\n `min_price` SimpleAggregateFunction(`min`, UInt32),\n `max_price` SimpleAggregateFunction(`max`, UInt32),\n `volume` AggregateFunction(`count`, UInt32),\n `avg_price` AggregateFunction(`avg`, UInt32)\n)\nAS WITH toStartOfMonth(`date`) AS `month`\nSELECT\n `month`,\n minSimpleState(`price`) AS `min_price`,\n maxSimpleState(`price`) AS `max_price`,\n countState(`price`) AS `volume`,\n avgState(`price`) AS `avg_price`\nFROM `default`.`uk_price_paid`\nGROUP BY `month` +CREATE DICTIONARY `default`.`uk_mortgage_rates_dict`\n(\n `date` DateTime64,\n `variable` Decimal32(2),\n `fixed` Decimal32(2),\n `bank` Decimal32(2)\n)\nPRIMARY KEY `date`\nSOURCE(CLICKHOUSE(TABLE \'uk_mortgage_rates\'))\nLIFETIME(MIN 0 MAX 2628000000)\nLAYOUT(COMPLEX_KEY_HASHED()) +CREATE TABLE default.uk_price_paid\n(\n `price` UInt32,\n `date` Date,\n `postcode1` LowCardinality(String),\n `postcode2` LowCardinality(String),\n `type` Enum8(\'other\' = 0, \'terraced\' = 1, \'semi-detached\' = 2, \'detached\' = 3, \'flat\' = 4),\n `is_new` UInt8,\n `duration` Enum8(\'unknown\' = 0, \'freehold\' = 1, \'leasehold\' = 2),\n `addr1` String,\n `addr2` String,\n `street` LowCardinality(String),\n `locality` LowCardinality(String),\n `town` LowCardinality(String),\n `district` LowCardinality(String),\n `county` LowCardinality(String),\n INDEX county_index county TYPE set(10) GRANULARITY 1,\n PROJECTION town_date_projection\n (\n SELECT \n town,\n date,\n price\n ORDER BY \n town,\n date\n ),\n PROJECTION handy_aggs_projection\n (\n SELECT \n avg(price),\n max(price),\n sum(price)\n GROUP BY town\n )\n)\nENGINE = MergeTree\nORDER BY (postcode1, postcode2, date)\nSETTINGS index_granularity = 8192 +CREATE MATERIALIZED VIEW default.prices_by_year_view TO default.prices_by_year_dest\n(\n `price` UInt32,\n `date` Date,\n `addr1` String,\n `addr2` String,\n `street` LowCardinality(String),\n `town` LowCardinality(String),\n `district` LowCardinality(String),\n `county` LowCardinality(String)\n)\nAS SELECT\n price,\n date,\n addr1,\n addr2,\n street,\n town,\n district,\n county\nFROM default.uk_price_paid +CREATE TABLE default.uk_prices_aggs_dest\n(\n `month` Date,\n `min_price` SimpleAggregateFunction(min, UInt32),\n `max_price` SimpleAggregateFunction(max, UInt32),\n `volume` AggregateFunction(count, UInt32),\n `avg_price` AggregateFunction(avg, UInt32)\n)\nENGINE = AggregatingMergeTree\nPRIMARY KEY month\nORDER BY month\nSETTINGS index_granularity = 8192 +CREATE MATERIALIZED VIEW default.uk_prices_aggs_view TO default.uk_prices_aggs_dest\n(\n `month` Date,\n `min_price` SimpleAggregateFunction(min, UInt32),\n `max_price` SimpleAggregateFunction(max, UInt32),\n `volume` AggregateFunction(count, UInt32),\n `avg_price` AggregateFunction(avg, UInt32)\n)\nAS WITH toStartOfMonth(date) AS month\nSELECT\n month,\n minSimpleState(price) AS min_price,\n maxSimpleState(price) AS max_price,\n countState(price) AS volume,\n avgState(price) AS avg_price\nFROM default.uk_price_paid\nGROUP BY month +CREATE DICTIONARY default.uk_mortgage_rates_dict\n(\n `date` DateTime64,\n `variable` Decimal32(2),\n `fixed` Decimal32(2),\n `bank` Decimal32(2)\n)\nPRIMARY KEY date\nSOURCE(CLICKHOUSE(TABLE \'uk_mortgage_rates\'))\nLIFETIME(MIN 0 MAX 2628000000)\nLAYOUT(COMPLEX_KEY_HASHED()) +CREATE TABLE "default"."uk_price_paid"\n(\n "price" UInt32,\n "date" Date,\n "postcode1" LowCardinality(String),\n "postcode2" LowCardinality(String),\n "type" Enum8(\'other\' = 0, \'terraced\' = 1, \'semi-detached\' = 2, \'detached\' = 3, \'flat\' = 4),\n "is_new" UInt8,\n "duration" Enum8(\'unknown\' = 0, \'freehold\' = 1, \'leasehold\' = 2),\n "addr1" String,\n "addr2" String,\n "street" LowCardinality(String),\n "locality" LowCardinality(String),\n "town" LowCardinality(String),\n "district" LowCardinality(String),\n "county" LowCardinality(String),\n INDEX "county_index" "county" TYPE set(10) GRANULARITY 1,\n PROJECTION "town_date_projection"\n (\n SELECT \n "town",\n "date",\n "price"\n ORDER BY \n "town",\n "date"\n ),\n PROJECTION "handy_aggs_projection"\n (\n SELECT \n avg("price"),\n max("price"),\n sum("price")\n GROUP BY "town"\n )\n)\nENGINE = MergeTree\nORDER BY ("postcode1", "postcode2", "date")\nSETTINGS index_granularity = 8192 +CREATE MATERIALIZED VIEW "default"."prices_by_year_view" TO default.prices_by_year_dest\n(\n "price" UInt32,\n "date" Date,\n "addr1" String,\n "addr2" String,\n "street" LowCardinality(String),\n "town" LowCardinality(String),\n "district" LowCardinality(String),\n "county" LowCardinality(String)\n)\nAS SELECT\n "price",\n "date",\n "addr1",\n "addr2",\n "street",\n "town",\n "district",\n "county"\nFROM "default"."uk_price_paid" +CREATE TABLE "default"."uk_prices_aggs_dest"\n(\n "month" Date,\n "min_price" SimpleAggregateFunction("min", UInt32),\n "max_price" SimpleAggregateFunction("max", UInt32),\n "volume" AggregateFunction("count", UInt32),\n "avg_price" AggregateFunction("avg", UInt32)\n)\nENGINE = AggregatingMergeTree\nPRIMARY KEY "month"\nORDER BY "month"\nSETTINGS index_granularity = 8192 +CREATE MATERIALIZED VIEW "default"."uk_prices_aggs_view" TO default.uk_prices_aggs_dest\n(\n "month" Date,\n "min_price" SimpleAggregateFunction("min", UInt32),\n "max_price" SimpleAggregateFunction("max", UInt32),\n "volume" AggregateFunction("count", UInt32),\n "avg_price" AggregateFunction("avg", UInt32)\n)\nAS WITH toStartOfMonth("date") AS "month"\nSELECT\n "month",\n minSimpleState("price") AS "min_price",\n maxSimpleState("price") AS "max_price",\n countState("price") AS "volume",\n avgState("price") AS "avg_price"\nFROM "default"."uk_price_paid"\nGROUP BY "month" +CREATE DICTIONARY "default"."uk_mortgage_rates_dict"\n(\n "date" DateTime64,\n "variable" Decimal32(2),\n "fixed" Decimal32(2),\n "bank" Decimal32(2)\n)\nPRIMARY KEY "date"\nSOURCE(CLICKHOUSE(TABLE \'uk_mortgage_rates\'))\nLIFETIME(MIN 0 MAX 2628000000)\nLAYOUT(COMPLEX_KEY_HASHED()) +CREATE TABLE default.uk_price_paid\n(\n "price" UInt32,\n "date" Date,\n "postcode1" LowCardinality(String),\n "postcode2" LowCardinality(String),\n "type" Enum8(\'other\' = 0, \'terraced\' = 1, \'semi-detached\' = 2, \'detached\' = 3, \'flat\' = 4),\n "is_new" UInt8,\n "duration" Enum8(\'unknown\' = 0, \'freehold\' = 1, \'leasehold\' = 2),\n "addr1" String,\n "addr2" String,\n "street" LowCardinality(String),\n "locality" LowCardinality(String),\n "town" LowCardinality(String),\n "district" LowCardinality(String),\n "county" LowCardinality(String),\n INDEX county_index county TYPE set(10) GRANULARITY 1,\n PROJECTION town_date_projection\n (\n SELECT \n town,\n date,\n price\n ORDER BY \n town,\n date\n ),\n PROJECTION handy_aggs_projection\n (\n SELECT \n avg(price),\n max(price),\n sum(price)\n GROUP BY town\n )\n)\nENGINE = MergeTree\nORDER BY (postcode1, postcode2, date)\nSETTINGS index_granularity = 8192 +CREATE MATERIALIZED VIEW default.prices_by_year_view TO default.prices_by_year_dest\n(\n "price" UInt32,\n "date" Date,\n "addr1" String,\n "addr2" String,\n "street" LowCardinality(String),\n "town" LowCardinality(String),\n "district" LowCardinality(String),\n "county" LowCardinality(String)\n)\nAS SELECT\n price,\n date,\n addr1,\n addr2,\n street,\n town,\n district,\n county\nFROM default.uk_price_paid +CREATE TABLE default.uk_prices_aggs_dest\n(\n "month" Date,\n "min_price" SimpleAggregateFunction(min, UInt32),\n "max_price" SimpleAggregateFunction(max, UInt32),\n "volume" AggregateFunction(count, UInt32),\n "avg_price" AggregateFunction(avg, UInt32)\n)\nENGINE = AggregatingMergeTree\nPRIMARY KEY month\nORDER BY month\nSETTINGS index_granularity = 8192 +CREATE MATERIALIZED VIEW default.uk_prices_aggs_view TO default.uk_prices_aggs_dest\n(\n "month" Date,\n "min_price" SimpleAggregateFunction(min, UInt32),\n "max_price" SimpleAggregateFunction(max, UInt32),\n "volume" AggregateFunction(count, UInt32),\n "avg_price" AggregateFunction(avg, UInt32)\n)\nAS WITH toStartOfMonth(date) AS month\nSELECT\n month,\n minSimpleState(price) AS min_price,\n maxSimpleState(price) AS max_price,\n countState(price) AS volume,\n avgState(price) AS avg_price\nFROM default.uk_price_paid\nGROUP BY month +CREATE DICTIONARY default.uk_mortgage_rates_dict\n(\n "date" DateTime64,\n "variable" Decimal32(2),\n "fixed" Decimal32(2),\n "bank" Decimal32(2)\n)\nPRIMARY KEY date\nSOURCE(CLICKHOUSE(TABLE \'uk_mortgage_rates\'))\nLIFETIME(MIN 0 MAX 2628000000)\nLAYOUT(COMPLEX_KEY_HASHED()) +CREATE TABLE `default`.`uk_price_paid`\n(\n `price` UInt32,\n `date` Date,\n `postcode1` LowCardinality(String),\n `postcode2` LowCardinality(String),\n `type` Enum8(\'other\' = 0, \'terraced\' = 1, \'semi-detached\' = 2, \'detached\' = 3, \'flat\' = 4),\n `is_new` UInt8,\n `duration` Enum8(\'unknown\' = 0, \'freehold\' = 1, \'leasehold\' = 2),\n `addr1` String,\n `addr2` String,\n `street` LowCardinality(String),\n `locality` LowCardinality(String),\n `town` LowCardinality(String),\n `district` LowCardinality(String),\n `county` LowCardinality(String),\n INDEX `county_index` `county` TYPE set(10) GRANULARITY 1,\n PROJECTION `town_date_projection`\n (\n SELECT \n `town`,\n `date`,\n `price`\n ORDER BY \n `town`,\n `date`\n ),\n PROJECTION `handy_aggs_projection`\n (\n SELECT \n avg(`price`),\n max(`price`),\n sum(`price`)\n GROUP BY `town`\n )\n)\nENGINE = MergeTree\nORDER BY (`postcode1`, `postcode2`, `date`)\nSETTINGS index_granularity = 8192 +CREATE MATERIALIZED VIEW `default`.`prices_by_year_view` TO default.prices_by_year_dest\n(\n `price` UInt32,\n `date` Date,\n `addr1` String,\n `addr2` String,\n `street` LowCardinality(String),\n `town` LowCardinality(String),\n `district` LowCardinality(String),\n `county` LowCardinality(String)\n)\nAS SELECT\n `price`,\n `date`,\n `addr1`,\n `addr2`,\n `street`,\n `town`,\n `district`,\n `county`\nFROM `default`.`uk_price_paid` +CREATE TABLE `default`.`uk_prices_aggs_dest`\n(\n `month` Date,\n `min_price` SimpleAggregateFunction(`min`, UInt32),\n `max_price` SimpleAggregateFunction(`max`, UInt32),\n `volume` AggregateFunction(`count`, UInt32),\n `avg_price` AggregateFunction(`avg`, UInt32)\n)\nENGINE = AggregatingMergeTree\nPRIMARY KEY `month`\nORDER BY `month`\nSETTINGS index_granularity = 8192 +CREATE MATERIALIZED VIEW `default`.`uk_prices_aggs_view` TO default.uk_prices_aggs_dest\n(\n `month` Date,\n `min_price` SimpleAggregateFunction(`min`, UInt32),\n `max_price` SimpleAggregateFunction(`max`, UInt32),\n `volume` AggregateFunction(`count`, UInt32),\n `avg_price` AggregateFunction(`avg`, UInt32)\n)\nAS WITH toStartOfMonth(`date`) AS `month`\nSELECT\n `month`,\n minSimpleState(`price`) AS `min_price`,\n maxSimpleState(`price`) AS `max_price`,\n countState(`price`) AS `volume`,\n avgState(`price`) AS `avg_price`\nFROM `default`.`uk_price_paid`\nGROUP BY `month` +CREATE DICTIONARY `default`.`uk_mortgage_rates_dict`\n(\n `date` DateTime64,\n `variable` Decimal32(2),\n `fixed` Decimal32(2),\n `bank` Decimal32(2)\n)\nPRIMARY KEY `date`\nSOURCE(CLICKHOUSE(TABLE \'uk_mortgage_rates\'))\nLIFETIME(MIN 0 MAX 2628000000)\nLAYOUT(COMPLEX_KEY_HASHED()) +CREATE TABLE default.uk_price_paid\n(\n `price` UInt32,\n `date` Date,\n `postcode1` LowCardinality(String),\n `postcode2` LowCardinality(String),\n `type` Enum8(\'other\' = 0, \'terraced\' = 1, \'semi-detached\' = 2, \'detached\' = 3, \'flat\' = 4),\n `is_new` UInt8,\n `duration` Enum8(\'unknown\' = 0, \'freehold\' = 1, \'leasehold\' = 2),\n `addr1` String,\n `addr2` String,\n `street` LowCardinality(String),\n `locality` LowCardinality(String),\n `town` LowCardinality(String),\n `district` LowCardinality(String),\n `county` LowCardinality(String),\n INDEX county_index county TYPE set(10) GRANULARITY 1,\n PROJECTION town_date_projection\n (\n SELECT \n town,\n date,\n price\n ORDER BY \n town,\n date\n ),\n PROJECTION handy_aggs_projection\n (\n SELECT \n avg(price),\n max(price),\n sum(price)\n GROUP BY town\n )\n)\nENGINE = MergeTree\nORDER BY (postcode1, postcode2, date)\nSETTINGS index_granularity = 8192 +CREATE MATERIALIZED VIEW default.prices_by_year_view TO default.prices_by_year_dest\n(\n `price` UInt32,\n `date` Date,\n `addr1` String,\n `addr2` String,\n `street` LowCardinality(String),\n `town` LowCardinality(String),\n `district` LowCardinality(String),\n `county` LowCardinality(String)\n)\nAS SELECT\n price,\n date,\n addr1,\n addr2,\n street,\n town,\n district,\n county\nFROM default.uk_price_paid +CREATE TABLE default.uk_prices_aggs_dest\n(\n `month` Date,\n `min_price` SimpleAggregateFunction(min, UInt32),\n `max_price` SimpleAggregateFunction(max, UInt32),\n `volume` AggregateFunction(count, UInt32),\n `avg_price` AggregateFunction(avg, UInt32)\n)\nENGINE = AggregatingMergeTree\nPRIMARY KEY month\nORDER BY month\nSETTINGS index_granularity = 8192 +CREATE MATERIALIZED VIEW default.uk_prices_aggs_view TO default.uk_prices_aggs_dest\n(\n `month` Date,\n `min_price` SimpleAggregateFunction(min, UInt32),\n `max_price` SimpleAggregateFunction(max, UInt32),\n `volume` AggregateFunction(count, UInt32),\n `avg_price` AggregateFunction(avg, UInt32)\n)\nAS WITH toStartOfMonth(date) AS month\nSELECT\n month,\n minSimpleState(price) AS min_price,\n maxSimpleState(price) AS max_price,\n countState(price) AS volume,\n avgState(price) AS avg_price\nFROM default.uk_price_paid\nGROUP BY month +CREATE DICTIONARY default.uk_mortgage_rates_dict\n(\n `date` DateTime64,\n `variable` Decimal32(2),\n `fixed` Decimal32(2),\n `bank` Decimal32(2)\n)\nPRIMARY KEY date\nSOURCE(CLICKHOUSE(TABLE \'uk_mortgage_rates\'))\nLIFETIME(MIN 0 MAX 2628000000)\nLAYOUT(COMPLEX_KEY_HASHED()) diff --git a/tests/queries/0_stateless/03230_output_format_identifier_quoting_style.sql b/tests/queries/0_stateless/03230_output_format_identifier_quoting_style.sql new file mode 100644 index 00000000000..c500dd4e4c6 --- /dev/null +++ b/tests/queries/0_stateless/03230_output_format_identifier_quoting_style.sql @@ -0,0 +1,328 @@ +DROP DICTIONARY IF EXISTS uk_mortgage_rates_dict; +DROP TABLE IF EXISTS uk_mortgage_rates; +DROP VIEW IF EXISTS uk_prices_aggs_view; +DROP TABLE IF EXISTS uk_prices_aggs_dest; +DROP VIEW IF EXISTS prices_by_year_view; +DROP TABLE IF EXISTS prices_by_year_dest; +DROP TABLE IF EXISTS uk_price_paid; + +-- Create tables, views, dictionaries + +CREATE TABLE uk_price_paid +( + price UInt32, + date Date, + postcode1 LowCardinality(String), + postcode2 LowCardinality(String), + type Enum('terraced' = 1, 'semi-detached' = 2, 'detached' = 3, 'flat' = 4, 'other' = 0), + is_new UInt8, + duration Enum('freehold' = 1, 'leasehold' = 2, 'unknown' = 0), + addr1 String, + addr2 String, + street LowCardinality(String), + locality LowCardinality(String), + town LowCardinality(String), + district LowCardinality(String), + county LowCardinality(String), + INDEX county_index county TYPE set(10) GRANULARITY 1, + PROJECTION town_date_projection + ( + SELECT + town, + date, + price + ORDER BY + town, + date + ), + PROJECTION handy_aggs_projection + ( + SELECT + avg(price), + max(price), + sum(price) + GROUP BY town + ) +) +ENGINE = MergeTree +ORDER BY (postcode1, postcode2, date); + +CREATE TABLE prices_by_year_dest ( + price UInt32, + date Date, + addr1 String, + addr2 String, + street LowCardinality(String), + town LowCardinality(String), + district LowCardinality(String), + county LowCardinality(String) +) +ENGINE = MergeTree +PRIMARY KEY (town, date) +PARTITION BY toYear(date); + +CREATE MATERIALIZED VIEW prices_by_year_view +TO prices_by_year_dest +AS + SELECT + price, + date, + addr1, + addr2, + street, + town, + district, + county + FROM uk_price_paid; + +CREATE TABLE uk_prices_aggs_dest ( + month Date, + min_price SimpleAggregateFunction(min, UInt32), + max_price SimpleAggregateFunction(max, UInt32), + volume AggregateFunction(count, UInt32), + avg_price AggregateFunction(avg, UInt32) +) +ENGINE = AggregatingMergeTree +PRIMARY KEY month; + +CREATE MATERIALIZED VIEW uk_prices_aggs_view +TO uk_prices_aggs_dest +AS + WITH + toStartOfMonth(date) AS month + SELECT + month, + minSimpleState(price) AS min_price, + maxSimpleState(price) AS max_price, + countState(price) AS volume, + avgState(price) AS avg_price + FROM uk_price_paid + GROUP BY month; + +CREATE TABLE uk_mortgage_rates ( + date DateTime64, + variable Decimal32(2), + fixed Decimal32(2), + bank Decimal32(2) +) +ENGINE Memory(); + +INSERT INTO uk_mortgage_rates VALUES ('2004-02-29', 5.02, 4.9, 4); +INSERT INTO uk_mortgage_rates VALUES ('2004-03-31', 5.11, 4.91, 4); + +CREATE DICTIONARY uk_mortgage_rates_dict ( + date DateTime64, + variable Decimal32(2), + fixed Decimal32(2), + bank Decimal32(2) +) +PRIMARY KEY date +SOURCE( + CLICKHOUSE(TABLE 'uk_mortgage_rates') +) +LAYOUT(COMPLEX_KEY_HASHED()) +LIFETIME(2628000000); + + +-- Show tables, views, dictionaries with default settings +SHOW CREATE TABLE uk_price_paid; + +SHOW CREATE VIEW prices_by_year_view; + +SHOW CREATE uk_prices_aggs_dest; + +SHOW CREATE VIEW uk_prices_aggs_view; + +SHOW CREATE DICTIONARY uk_mortgage_rates_dict; + + +-- Show tables, views, dictionaries with output_format_always_quote_identifiers=false, output_format_identifier_quoting_style='None' +SHOW CREATE TABLE uk_price_paid +SETTINGS + output_format_always_quote_identifiers=false, + output_format_identifier_quoting_style='None'; + +SHOW CREATE VIEW prices_by_year_view +SETTINGS + output_format_always_quote_identifiers=false, + output_format_identifier_quoting_style='None'; + +SHOW CREATE uk_prices_aggs_dest +SETTINGS + output_format_always_quote_identifiers=false, + output_format_identifier_quoting_style='None'; + +SHOW CREATE VIEW uk_prices_aggs_view +SETTINGS + output_format_always_quote_identifiers=false, + output_format_identifier_quoting_style='None'; + +SHOW CREATE DICTIONARY uk_mortgage_rates_dict +SETTINGS + output_format_always_quote_identifiers=false, + output_format_identifier_quoting_style='None'; + +-- Show tables, views, dictionaries with output_format_always_quote_identifiers=true, output_format_identifier_quoting_style='Backticks' +SHOW CREATE TABLE uk_price_paid +SETTINGS + output_format_always_quote_identifiers=true, + output_format_identifier_quoting_style='Backticks'; + +SHOW CREATE VIEW prices_by_year_view +SETTINGS + output_format_always_quote_identifiers=true, + output_format_identifier_quoting_style='Backticks'; + +SHOW CREATE uk_prices_aggs_dest +SETTINGS + output_format_always_quote_identifiers=true, + output_format_identifier_quoting_style='Backticks'; + +SHOW CREATE VIEW uk_prices_aggs_view +SETTINGS + output_format_always_quote_identifiers=true, + output_format_identifier_quoting_style='Backticks'; + +SHOW CREATE DICTIONARY uk_mortgage_rates_dict +SETTINGS + output_format_always_quote_identifiers=true, + output_format_identifier_quoting_style='Backticks'; + +-- Show tables, views, dictionaries with output_format_always_quote_identifiers=false, output_format_identifier_quoting_style='Backticks' +SHOW CREATE TABLE uk_price_paid +SETTINGS + output_format_always_quote_identifiers=false, + output_format_identifier_quoting_style='Backticks'; + +SHOW CREATE VIEW prices_by_year_view +SETTINGS + output_format_always_quote_identifiers=false, + output_format_identifier_quoting_style='Backticks'; + +SHOW CREATE uk_prices_aggs_dest +SETTINGS + output_format_always_quote_identifiers=false, + output_format_identifier_quoting_style='Backticks'; + +SHOW CREATE VIEW uk_prices_aggs_view +SETTINGS + output_format_always_quote_identifiers=false, + output_format_identifier_quoting_style='Backticks'; + +SHOW CREATE DICTIONARY uk_mortgage_rates_dict +SETTINGS + output_format_always_quote_identifiers=false, + output_format_identifier_quoting_style='Backticks'; + +-- Show tables, views, dictionaries with output_format_always_quote_identifiers=true, output_format_identifier_quoting_style='DoubleQuotes' +SHOW CREATE TABLE uk_price_paid +SETTINGS + output_format_always_quote_identifiers=true, + output_format_identifier_quoting_style='DoubleQuotes'; + +SHOW CREATE VIEW prices_by_year_view +SETTINGS + output_format_always_quote_identifiers=true, + output_format_identifier_quoting_style='DoubleQuotes'; + +SHOW CREATE uk_prices_aggs_dest +SETTINGS + output_format_always_quote_identifiers=true, + output_format_identifier_quoting_style='DoubleQuotes'; + +SHOW CREATE VIEW uk_prices_aggs_view +SETTINGS + output_format_always_quote_identifiers=true, + output_format_identifier_quoting_style='DoubleQuotes'; + +SHOW CREATE DICTIONARY uk_mortgage_rates_dict +SETTINGS + output_format_always_quote_identifiers=true, + output_format_identifier_quoting_style='DoubleQuotes'; + +-- Show tables, views, dictionaries with output_format_always_quote_identifiers=false, output_format_identifier_quoting_style='DoubleQuotes' +SHOW CREATE TABLE uk_price_paid +SETTINGS + output_format_always_quote_identifiers=false, + output_format_identifier_quoting_style='DoubleQuotes'; + +SHOW CREATE VIEW prices_by_year_view +SETTINGS + output_format_always_quote_identifiers=false, + output_format_identifier_quoting_style='DoubleQuotes'; + +SHOW CREATE uk_prices_aggs_dest +SETTINGS + output_format_always_quote_identifiers=false, + output_format_identifier_quoting_style='DoubleQuotes'; + +SHOW CREATE VIEW uk_prices_aggs_view +SETTINGS + output_format_always_quote_identifiers=false, + output_format_identifier_quoting_style='DoubleQuotes'; + +SHOW CREATE DICTIONARY uk_mortgage_rates_dict +SETTINGS + output_format_always_quote_identifiers=false, + output_format_identifier_quoting_style='DoubleQuotes'; + + +-- Show tables, views, dictionaries with output_format_always_quote_identifiers=true, output_format_identifier_quoting_style='BackticksMySQL' +SHOW CREATE TABLE uk_price_paid +SETTINGS + output_format_always_quote_identifiers=true, + output_format_identifier_quoting_style='BackticksMySQL'; + +SHOW CREATE VIEW prices_by_year_view +SETTINGS + output_format_always_quote_identifiers=true, + output_format_identifier_quoting_style='BackticksMySQL'; + +SHOW CREATE uk_prices_aggs_dest +SETTINGS + output_format_always_quote_identifiers=true, + output_format_identifier_quoting_style='BackticksMySQL'; + +SHOW CREATE VIEW uk_prices_aggs_view +SETTINGS + output_format_always_quote_identifiers=true, + output_format_identifier_quoting_style='BackticksMySQL'; + +SHOW CREATE DICTIONARY uk_mortgage_rates_dict +SETTINGS + output_format_always_quote_identifiers=true, + output_format_identifier_quoting_style='BackticksMySQL'; + +-- Show tables, views, dictionaries with output_format_always_quote_identifiers=false, output_format_identifier_quoting_style='BackticksMySQL' +SHOW CREATE TABLE uk_price_paid +SETTINGS + output_format_always_quote_identifiers=false, + output_format_identifier_quoting_style='BackticksMySQL'; + +SHOW CREATE VIEW prices_by_year_view +SETTINGS + output_format_always_quote_identifiers=false, + output_format_identifier_quoting_style='BackticksMySQL'; + +SHOW CREATE uk_prices_aggs_dest +SETTINGS + output_format_always_quote_identifiers=false, + output_format_identifier_quoting_style='BackticksMySQL'; + +SHOW CREATE VIEW uk_prices_aggs_view +SETTINGS + output_format_always_quote_identifiers=false, + output_format_identifier_quoting_style='BackticksMySQL'; + +SHOW CREATE DICTIONARY uk_mortgage_rates_dict +SETTINGS + output_format_always_quote_identifiers=false, + output_format_identifier_quoting_style='BackticksMySQL'; + +DROP DICTIONARY uk_mortgage_rates_dict; +DROP TABLE uk_mortgage_rates; +DROP VIEW uk_prices_aggs_view; +DROP TABLE uk_prices_aggs_dest; +DROP VIEW prices_by_year_view; +DROP TABLE prices_by_year_dest; +DROP TABLE uk_price_paid; From 1523df6ec310ae7a431b32ef084c50e4377d11ec Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A1nos=20Benjamin=20Antal?= Date: Tue, 27 Aug 2024 10:31:11 +0000 Subject: [PATCH 412/844] Ignore `MODIFY_COLUMN` commands without column type when parsing mutation commands --- src/Storages/MutationCommands.cpp | 5 ++++- ...xed_mutations_and_remove_materialized.reference | 8 ++++++++ ...ith_mixed_mutations_and_remove_materialized.sql | 14 ++++++++++++++ 3 files changed, 26 insertions(+), 1 deletion(-) create mode 100644 tests/queries/0_stateless/03230_alter_with_mixed_mutations_and_remove_materialized.reference create mode 100644 tests/queries/0_stateless/03230_alter_with_mixed_mutations_and_remove_materialized.sql diff --git a/src/Storages/MutationCommands.cpp b/src/Storages/MutationCommands.cpp index f736c863eee..f5ccc80f1d8 100644 --- a/src/Storages/MutationCommands.cpp +++ b/src/Storages/MutationCommands.cpp @@ -115,7 +115,10 @@ std::optional MutationCommand::parse(ASTAlterCommand * command, res.column_name = getIdentifierName(command->column); return res; } - else if (parse_alter_commands && command->type == ASTAlterCommand::MODIFY_COLUMN) + /// MODIFY COLUMN x REMOVE MATERIALIZED is a valid alter command, but doesn't have any specified column type, thus no mutation is needed + else if ( + parse_alter_commands && command->type == ASTAlterCommand::MODIFY_COLUMN && command->col_decl + && command->col_decl->as().type) { MutationCommand res; res.ast = command->ptr(); diff --git a/tests/queries/0_stateless/03230_alter_with_mixed_mutations_and_remove_materialized.reference b/tests/queries/0_stateless/03230_alter_with_mixed_mutations_and_remove_materialized.reference new file mode 100644 index 00000000000..9166a82f472 --- /dev/null +++ b/tests/queries/0_stateless/03230_alter_with_mixed_mutations_and_remove_materialized.reference @@ -0,0 +1,8 @@ +BEFORE a_r1 x String +BEFORE a_r1 y String MATERIALIZED \'str\' +BEFORE a_r2 x String +BEFORE a_r2 y String MATERIALIZED \'str\' +AFTER a_r1 x String +AFTER a_r1 y String +AFTER a_r2 x String +AFTER a_r2 y String diff --git a/tests/queries/0_stateless/03230_alter_with_mixed_mutations_and_remove_materialized.sql b/tests/queries/0_stateless/03230_alter_with_mixed_mutations_and_remove_materialized.sql new file mode 100644 index 00000000000..3c43b9a8521 --- /dev/null +++ b/tests/queries/0_stateless/03230_alter_with_mixed_mutations_and_remove_materialized.sql @@ -0,0 +1,14 @@ +DROP TABLE IF EXISTS a_r1 SYNC; +DROP TABLE IF EXISTS a_r2 SYNC; +CREATE TABLE a_r1 (x String, y String MATERIALIZED 'str') ENGINE = ReplicatedMergeTree('/clickhouse/{database}/a', 'r1') ORDER BY x; +CREATE TABLE a_r2 (x String, y String MATERIALIZED 'str') ENGINE = ReplicatedMergeTree('/clickhouse/{database}/a', 'r2') ORDER BY x; + +INSERT INTO a_r1 SELECT toString(number) FROM numbers(100); +SELECT 'BEFORE', table, name, type, default_kind, default_expression FROM system.columns WHERE database = currentDatabase() AND table LIKE 'a\_r%' ORDER BY table, name; + +ALTER TABLE a_r1 + ADD INDEX IF NOT EXISTS some_index x TYPE set(16) GRANULARITY 1, + MODIFY COLUMN y REMOVE MATERIALIZED +SETTINGS alter_sync = 2, mutations_sync = 2; + +SELECT 'AFTER', table, name, type, default_kind, default_expression FROM system.columns WHERE database = currentDatabase() AND table LIKE 'a\_r%' ORDER BY table, name; From 793b549291db336be2596c2fe7382d7b7943b547 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A1nos=20Benjamin=20Antal?= Date: Tue, 27 Aug 2024 10:31:22 +0000 Subject: [PATCH 413/844] Fix typo in docs --- docs/en/operations/settings/settings.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index 7bd36ccd00f..816d1caa632 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -3226,7 +3226,7 @@ Default value: `0`. ## lightweight_deletes_sync {#lightweight_deletes_sync} -The same as 'mutation_sync', but controls only execution of lightweight deletes. +The same as [`mutations_sync`](#mutations_sync), but controls only execution of lightweight deletes. Possible values: From c1c4cabe9ffd5de032bc26fbb2ebd19b316e0336 Mon Sep 17 00:00:00 2001 From: Blargian Date: Tue, 27 Aug 2024 12:41:02 +0200 Subject: [PATCH 414/844] Update toReference*Num function documentation --- .../functions/date-time-functions.md | 306 +++++++++++++++++- 1 file changed, 298 insertions(+), 8 deletions(-) diff --git a/docs/en/sql-reference/functions/date-time-functions.md b/docs/en/sql-reference/functions/date-time-functions.md index 4f5e5a5d716..cb32d1a8d63 100644 --- a/docs/en/sql-reference/functions/date-time-functions.md +++ b/docs/en/sql-reference/functions/date-time-functions.md @@ -1625,37 +1625,327 @@ If unit `WEEK` was specified, `toStartOfInterval` assumes that weeks start on Mo Converts a date with time to a certain fixed date, while preserving the time. +**Syntax** + +```sql +toTime(date[,timezone]) +``` + +**Arguments** + +- `date` — Date to convert to a time. [Date](../data-types/date.md)/[DateTime](../data-types/datetime.md)/[DateTime64](../data-types/datetime64.md). +- `timezone` (optional) — Timezone for the returned value. [String](../data-types/string.md). + +**Returned value** + +- DateTime with date equated to `1970-01-02` while preserving the time. [DateTime](../data-types/datetime.md). + +:::note +If the `date` input argument contained sub-second components, +they will be dropped in the returned `DateTime` value with second-accuracy. +::: + +**Example** + +Query: + +```sql +SELECT toTime(toDateTime64('1970-12-10 01:20:30.3000',3)) AS result, toTypeName(result); +``` + +Result: + +```response + +``` + ## toRelativeYearNum -Converts a date, or date with time, to the number of the year, starting from a certain fixed point in the past. +Converts a date, or date with time, to the number of years elapsed since a certain fixed point in the past. + +**Syntax** + +```sql +toRelativeYearNum(date) +``` + +**Arguments** + +- `date` — Date or date with time. [Date](../data-types/date.md)/[DateTime](../data-types/datetime.md)/[DateTime64](../data-types/datetime64.md). + +**Returned value** + +- The number of years from a fixed reference point in the past. [UInt16](../data-types/int-uint.md). + +**Example** + +Query: + +```sql +SELECT + toRelativeYearNum(toDate('2002-12-08')) AS y1, + toRelativeYearNum(toDate('2010-10-26')) AS y2 +``` + +Result: + +```response +┌───y1─┬───y2─┐ +│ 2002 │ 2010 │ +└──────┴──────┘ +``` ## toRelativeQuarterNum -Converts a date, or date with time, to the number of the quarter, starting from a certain fixed point in the past. +Converts a date, or date with time, to the number of quarters elapsed since a certain fixed point in the past. + +**Syntax** + +```sql +toRelativeQuarterNum(date) +``` + +**Arguments** + +- `date` — Date or date with time. [Date](../data-types/date.md)/[DateTime](../data-types/datetime.md)/[DateTime64](../data-types/datetime64.md). + +**Returned value** + +- The number of quarters from a fixed reference point in the past. [UInt32](../data-types/int-uint.md). + +**Example** + +Query: + +```sql +SELECT + toRelativeQuarterNum(toDate('1993-11-25')) AS q1, + toRelativeQuarterNum(toDate('2005-01-05')) AS q2 +``` + +Result: + +```response +┌───q1─┬───q2─┐ +│ 7975 │ 8020 │ +└──────┴──────┘ +``` ## toRelativeMonthNum -Converts a date, or date with time, to the number of the month, starting from a certain fixed point in the past. +Converts a date, or date with time, to the number of months elapsed since a certain fixed point in the past. + +**Syntax** + +```sql +toRelativeMonthNum(date) +``` + +**Arguments** + +- `date` — Date or date with time. [Date](../data-types/date.md)/[DateTime](../data-types/datetime.md)/[DateTime64](../data-types/datetime64.md). + +**Returned value** + +- The number of months from a fixed reference point in the past. [UInt32](../data-types/int-uint.md). + +**Example** + +Query: + +```sql +SELECT + toRelativeMonthNum(toDate('2001-04-25')) AS m1, + toRelativeMonthNum(toDate('2009-07-08')) AS m2 +``` + +Result: + +```response +┌────m1─┬────m2─┐ +│ 24016 │ 24115 │ +└───────┴───────┘ +``` ## toRelativeWeekNum -Converts a date, or date with time, to the number of the week, starting from a certain fixed point in the past. +Converts a date, or date with time, to the number of weeks elapsed since a certain fixed point in the past. + +**Syntax** + +```sql +toRelativeWeekNum(date) +``` + +**Arguments** + +- `date` — Date or date with time. [Date](../data-types/date.md)/[DateTime](../data-types/datetime.md)/[DateTime64](../data-types/datetime64.md). + +**Returned value** + +- The number of weeks from a fixed reference point in the past. [UInt32](../data-types/int-uint.md). + +**Example** + +Query: + +```sql +SELECT + toRelativeWeekNum(toDate('2000-02-29')) AS w1, + toRelativeWeekNum(toDate('2001-01-12')) AS w2 +``` + +Result: + +```response +┌───w1─┬───w2─┐ +│ 1574 │ 1619 │ +└──────┴──────┘ +``` ## toRelativeDayNum -Converts a date, or date with time, to the number of the day, starting from a certain fixed point in the past. +Converts a date, or date with time, to the number of days elapsed since a certain fixed point in the past. + +**Syntax** + +```sql +toRelativeDayNum(date) +``` + +**Arguments** + +- `date` — Date or date with time. [Date](../data-types/date.md)/[DateTime](../data-types/datetime.md)/[DateTime64](../data-types/datetime64.md). + +**Returned value** + +- The number of days from a fixed reference point in the past. [UInt32](../data-types/int-uint.md). + +**Example** + +Query: + +```sql +SELECT + toRelativeDayNum(toDate('1993-10-05')) AS d1, + toRelativeDayNum(toDate('2000-09-20')) AS d2 +``` + +Result: + +```response +┌───d1─┬────d2─┐ +│ 8678 │ 11220 │ +└──────┴───────┘ +``` ## toRelativeHourNum -Converts a date, or date with time, to the number of the hour, starting from a certain fixed point in the past. +Converts a date, or date with time, to the number of hours elapsed since a certain fixed point in the past. + +**Syntax** + +```sql +toRelativeHourNum(date) +``` + +**Arguments** + +- `date` — Date or date with time. [Date](../data-types/date.md)/[DateTime](../data-types/datetime.md)/[DateTime64](../data-types/datetime64.md). + +**Returned value** + +- The number of hours from a fixed reference point in the past. [UInt32](../data-types/int-uint.md). + +**Example** + +Query: + +```sql +SELECT + toRelativeHourNum(toDateTime('1993-10-05 05:20:36')) AS h1, + toRelativeHourNum(toDateTime('2000-09-20 14:11:29')) AS h2 +``` + +Result: + +```response +┌─────h1─┬─────h2─┐ +│ 208276 │ 269292 │ +└────────┴────────┘ +``` ## toRelativeMinuteNum -Converts a date, or date with time, to the number of the minute, starting from a certain fixed point in the past. +Converts a date, or date with time, to the number of minutes elapsed since a certain fixed point in the past. + +**Syntax** + +```sql +toRelativeMinuteNum(date) +``` + +**Arguments** + +- `date` — Date or date with time. [Date](../data-types/date.md)/[DateTime](../data-types/datetime.md)/[DateTime64](../data-types/datetime64.md). + +**Returned value** + +- The number of minutes from a fixed reference point in the past. [UInt32](../data-types/int-uint.md). + +**Example** + +Query: + +```sql +SELECT + toRelativeMinuteNum(toDateTime('1993-10-05 05:20:36')) AS m1, + toRelativeMinuteNum(toDateTime('2000-09-20 14:11:29')) AS m2 +``` + +Result: + +```response +┌───────m1─┬───────m2─┐ +│ 12496580 │ 16157531 │ +└──────────┴──────────┘ +``` ## toRelativeSecondNum -Converts a date, or date with time, to the number of the second, starting from a certain fixed point in the past. +Converts a date, or date with time, to the number of the seconds elapsed since a certain fixed point in the past. + +**Syntax** + +```sql +toRelativeSecondNum(date) +``` + +**Arguments** + +- `date` — Date or date with time. [Date](../data-types/date.md)/[DateTime](../data-types/datetime.md)/[DateTime64](../data-types/datetime64.md). + +**Returned value** + +- The number of seconds from a fixed reference point in the past. [UInt32](../data-types/int-uint.md). + +**Example** + +Query: + +```sql +SELECT + toRelativeSecondNum(toDateTime('1993-10-05 05:20:36')) AS s1, + toRelativeSecondNum(toDateTime('2000-09-20 14:11:29')) AS s2 +``` + +Result: + +```response +┌────────s1─┬────────s2─┐ +│ 749794836 │ 969451889 │ +└───────────┴───────────┘ +``` ## toISOYear From 29664d12fc7eedb977147872bdebced05ecfd058 Mon Sep 17 00:00:00 2001 From: Blargian Date: Tue, 27 Aug 2024 12:44:28 +0200 Subject: [PATCH 415/844] update example of toTime --- docs/en/sql-reference/functions/date-time-functions.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/docs/en/sql-reference/functions/date-time-functions.md b/docs/en/sql-reference/functions/date-time-functions.md index cb32d1a8d63..46d36dbe92f 100644 --- a/docs/en/sql-reference/functions/date-time-functions.md +++ b/docs/en/sql-reference/functions/date-time-functions.md @@ -1656,7 +1656,9 @@ SELECT toTime(toDateTime64('1970-12-10 01:20:30.3000',3)) AS result, toTypeName( Result: ```response - +┌──────────────result─┬─toTypeName(result)─┐ +│ 1970-01-02 01:20:30 │ DateTime │ +└─────────────────────┴────────────────────┘ ``` ## toRelativeYearNum From c44cc71310f982bd9d4101ee02001f5590971f17 Mon Sep 17 00:00:00 2001 From: Yatsishin Ilya <2159081+qoega@users.noreply.github.com> Date: Tue, 27 Aug 2024 11:32:05 +0000 Subject: [PATCH 416/844] forgot several files --- tests/integration/helpers/postgres_utility.py | 14 +++++++------- .../test.py | 8 ++++---- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/tests/integration/helpers/postgres_utility.py b/tests/integration/helpers/postgres_utility.py index 468c3b3bb63..4bb68284314 100644 --- a/tests/integration/helpers/postgres_utility.py +++ b/tests/integration/helpers/postgres_utility.py @@ -245,9 +245,9 @@ class PostgresManager: ): postgres_database = self.database_or_default(postgres_database) self.created_materialized_postgres_db_list.add(materialized_database) - self.instance.query(f"DROP DATABASE IF EXISTS {materialized_database}") + self.instance.query(f"DROP DATABASE IF EXISTS `{materialized_database}`") - create_query = f"CREATE DATABASE {materialized_database} ENGINE = MaterializedPostgreSQL('{ip}:{port}', '{postgres_database}', '{user}', '{password}')" + create_query = f"CREATE DATABASE `{materialized_database}` ENGINE = MaterializedPostgreSQL('{ip}:{port}', '{postgres_database}', '{user}', '{password}')" if len(settings) > 0: create_query += " SETTINGS " for i in range(len(settings)): @@ -259,7 +259,7 @@ class PostgresManager: assert materialized_database in self.instance.query("SHOW DATABASES") def drop_materialized_db(self, materialized_database="test_database"): - self.instance.query(f"DROP DATABASE IF EXISTS {materialized_database} SYNC") + self.instance.query(f"DROP DATABASE IF EXISTS `{materialized_database}` SYNC") if materialized_database in self.created_materialized_postgres_db_list: self.created_materialized_postgres_db_list.remove(materialized_database) @@ -329,11 +329,11 @@ def assert_nested_table_is_created( table = schema_name + "." + table_name print(f"Checking table {table} exists in {materialized_database}") - database_tables = instance.query(f"SHOW TABLES FROM {materialized_database}") + database_tables = instance.query(f"SHOW TABLES FROM `{materialized_database}` WHERE name = '{table}'") while table not in database_tables: time.sleep(0.2) - database_tables = instance.query(f"SHOW TABLES FROM {materialized_database}") + database_tables = instance.query(f"SHOW TABLES FROM `{materialized_database}` WHERE name = '{table}'") assert table in database_tables @@ -366,9 +366,9 @@ def check_tables_are_synchronized( table_path = "" if len(schema_name) == 0: - table_path = f"{materialized_database}.{table_name}" + table_path = f"`{materialized_database}`.`{table_name}`" else: - table_path = f"{materialized_database}.`{schema_name}.{table_name}`" + table_path = f"`{materialized_database}`.`{schema_name}.{table_name}`" print(f"Checking table is synchronized: {table_path}") result_query = f"select * from {table_path} order by {order_by};" diff --git a/tests/integration/test_async_insert_adaptive_busy_timeout/test.py b/tests/integration/test_async_insert_adaptive_busy_timeout/test.py index 5599786026f..280985b55b3 100644 --- a/tests/integration/test_async_insert_adaptive_busy_timeout/test.py +++ b/tests/integration/test_async_insert_adaptive_busy_timeout/test.py @@ -356,7 +356,7 @@ def test_change_queries_frequency(): max_values_size=1000, array_size_range=[10, 50], ) - + node.query("SYSTEM FLUSH LOGS") select_log_query = f"SELECT countIf(timeout_milliseconds - {min_ms} < 25) FROM (SELECT timeout_milliseconds FROM system.asynchronous_insert_log ORDER BY event_time DESC LIMIT 10)" res = node.query(select_log_query) assert int(res) >= 5 @@ -364,12 +364,12 @@ def test_change_queries_frequency(): _insert_queries_in_parallel( table_name, settings, - thread_num=20, - tasks=2000, + thread_num=10, + tasks=1000, max_values_size=1000, array_size_range=[10, 15], ) - + node.query("SYSTEM FLUSH LOGS") select_log_query = f"SELECT countIf({max_ms} - timeout_milliseconds < 100) FROM (SELECT timeout_milliseconds FROM system.asynchronous_insert_log ORDER BY event_time DESC LIMIT 10)" res = node.query(select_log_query) assert int(res) >= 5 From 6febfc78b3418c27d2c5c12787da554ee3a13fb3 Mon Sep 17 00:00:00 2001 From: Blargian Date: Tue, 27 Aug 2024 14:52:46 +0200 Subject: [PATCH 417/844] updates docs for DateTime64OrZero/Null/Default --- .../functions/type-conversion-functions.md | 131 +++++++++++++++++- 1 file changed, 129 insertions(+), 2 deletions(-) diff --git a/docs/en/sql-reference/functions/type-conversion-functions.md b/docs/en/sql-reference/functions/type-conversion-functions.md index edd04580f27..625ed4462d7 100644 --- a/docs/en/sql-reference/functions/type-conversion-functions.md +++ b/docs/en/sql-reference/functions/type-conversion-functions.md @@ -3857,7 +3857,7 @@ Result: ## toDateTime64 -Converts the argument to the [DateTime64](../data-types/datetime64.md) data type. +Converts an input value to a value of type [DateTime64](../data-types/datetime64.md). **Syntax** @@ -3869,7 +3869,7 @@ toDateTime64(expr, scale, [timezone]) - `expr` — The value. [String](../data-types/string.md), [UInt32](../data-types/int-uint.md), [Float](../data-types/float.md) or [DateTime](../data-types/datetime.md). - `scale` - Tick size (precision): 10-precision seconds. Valid range: [ 0 : 9 ]. -- `timezone` - Time zone of the specified datetime64 object. +- `timezone` (optional) - Time zone of the specified datetime64 object. **Returned value** @@ -3928,10 +3928,137 @@ SELECT toDateTime64('2019-01-01 00:00:00', 3, 'Asia/Istanbul') AS value, toTypeN ## toDateTime64OrZero +Like [toDateTime64](#todatetime64), this function converts an input value to a value of type [DateTime64](../data-types/datetime64.md) but returns the min value of [DateTime64](../data-types/datetime64.md) if an invalid argument is received. + +**Syntax** + +``` sql +toDateTime64OrZero(expr, scale, [timezone]) +``` + +**Arguments** + +- `expr` — The value. [String](../data-types/string.md), [UInt32](../data-types/int-uint.md), [Float](../data-types/float.md) or [DateTime](../data-types/datetime.md). +- `scale` - Tick size (precision): 10-precision seconds. Valid range: [ 0 : 9 ]. +- `timezone` (optional) - Time zone of the specified DateTime64 object. + +**Returned value** + +- A calendar date and time of day, with sub-second precision, otherwise the minimum value of `DateTime64`: `1970-01-01 01:00:00.000`. [DateTime64](../data-types/datetime64.md). + +**Example** + +Query: + +```sql +SELECT toDateTime64OrZero('2008-10-12 00:00:00 00:30:30', 3) AS invalid_arg +``` + +Result: + +```response +┌─────────────invalid_arg─┐ +│ 1970-01-01 01:00:00.000 │ +└─────────────────────────┘ +``` + +**See also** + +- [toDateTime64](#todatetime64). +- [toDateTime64OrNull](#todatetime64ornull). +- [toDateTime64OrDefault](#todatetime64ordefault). + ## toDateTime64OrNull +Like [toDateTime64](#todatetime64), this function converts an input value to a value of type [DateTime64](../data-types/datetime64.md) but returns `NULL` if an invalid argument is received. + +**Syntax** + +``` sql +toDateTime64OrNull(expr, scale, [timezone]) +``` + +**Arguments** + +- `expr` — The value. [String](../data-types/string.md), [UInt32](../data-types/int-uint.md), [Float](../data-types/float.md) or [DateTime](../data-types/datetime.md). +- `scale` - Tick size (precision): 10-precision seconds. Valid range: [ 0 : 9 ]. +- `timezone` (optional) - Time zone of the specified DateTime64 object. + +**Returned value** + +- A calendar date and time of day, with sub-second precision, otherwise `NULL`. [DateTime64](../data-types/datetime64.md)/[NULL](../data-types/nullable.md). + +**Example** + +Query: + +```sql +SELECT + toDateTime64OrNull('1976-10-18 00:00:00.30', 3) AS valid_arg, + toDateTime64OrNull('1976-10-18 00:00:00 30', 3) AS invalid_arg +``` + +Result: + +```response +┌───────────────valid_arg─┬─invalid_arg─┐ +│ 1976-10-18 00:00:00.300 │ ᴺᵁᴸᴸ │ +└─────────────────────────┴─────────────┘ +``` + +**See also** + +- [toDateTime64](#todatetime64). +- [toDateTime64OrZero](#todatetime64orzero). +- [toDateTime64OrDefault](#todatetime64ordefault). + ## toDateTime64OrDefault +Like [toDateTime64](#todatetime64), this function converts an input value to a value of type [DateTime64](../data-types/datetime64.md), +but returns either the default value of [DateTime64](../data-types/datetime64.md) +or the provided default if an invalid argument is received. + +**Syntax** + +``` sql +toDateTime64OrNull(expr, scale, [timezone, default]) +``` + +**Arguments** + +- `expr` — The value. [String](../data-types/string.md), [UInt32](../data-types/int-uint.md), [Float](../data-types/float.md) or [DateTime](../data-types/datetime.md). +- `scale` - Tick size (precision): 10-precision seconds. Valid range: [ 0 : 9 ]. +- `timezone` (optional) - Time zone of the specified DateTime64 object. +- `default` (optional) - Default value to return if an invalid argument is received. [DateTime64](../data-types/datetime64.md). + +**Returned value** + +- A calendar date and time of day, with sub-second precision, otherwise the minimum value of `DateTime64` or the `default` value if provided. [DateTime64](../data-types/datetime64.md). + +**Example** + +Query: + +```sql +SELECT + toDateTime64OrDefault('1976-10-18 00:00:00 30', 3) AS invalid_arg, + toDateTime64OrDefault('1976-10-18 00:00:00 30', 3, 'UTC', toDateTime64('2001-01-01 00:00:00.00',3)) AS invalid_arg_with_default +``` + +Result: + +```response +┌─────────────invalid_arg─┬─invalid_arg_with_default─┐ +│ 1970-01-01 01:00:00.000 │ 2000-12-31 23:00:00.000 │ +└─────────────────────────┴──────────────────────────┘ +``` + +**See also** + +- [toDateTime64](#todatetime64). +- [toDateTime64OrZero](#todatetime64orzero). +- [toDateTime64OrNull](#todatetime64ornull). + ## toDecimal32 Converts an input value to a value of type [`Decimal(9, S)`](../data-types/decimal.md) with scale of `S`. Throws an exception in case of an error. From cc701c92b4d870a4a2028cccbc46c2cfda8c405d Mon Sep 17 00:00:00 2001 From: Alexander Gololobov Date: Tue, 27 Aug 2024 15:05:05 +0200 Subject: [PATCH 418/844] Add plan_step_name and plan_step_description columns to system.processors_profile_log --- src/Interpreters/ProcessorsProfileLog.cpp | 4 ++++ src/Interpreters/ProcessorsProfileLog.h | 2 ++ src/Processors/IProcessor.cpp | 12 ++++++++++++ src/Processors/IProcessor.h | 10 +++++----- 4 files changed, 23 insertions(+), 5 deletions(-) diff --git a/src/Interpreters/ProcessorsProfileLog.cpp b/src/Interpreters/ProcessorsProfileLog.cpp index 7dec2a3163a..8a646b5d0e7 100644 --- a/src/Interpreters/ProcessorsProfileLog.cpp +++ b/src/Interpreters/ProcessorsProfileLog.cpp @@ -30,6 +30,8 @@ ColumnsDescription ProcessorProfileLogElement::getColumnsDescription() {"id", std::make_shared(), "ID of processor."}, {"parent_ids", std::make_shared(std::make_shared()), "Parent processors IDs."}, {"plan_step", std::make_shared(), "ID of the query plan step which created this processor. The value is zero if the processor was not added from any step."}, + {"plan_step_name", std::make_shared(), "Name of the query plan step which created this processor. The value is empty if the processor was not added from any step."}, + {"plan_step_description", std::make_shared(), "Description of the query plan step which created this processor. The value is empty if the processor was not added from any step."}, {"plan_group", std::make_shared(), "Group of the processor if it was created by query plan step. A group is a logical partitioning of processors added from the same query plan step. Group is used only for beautifying the result of EXPLAIN PIPELINE result."}, {"initial_query_id", std::make_shared(), "ID of the initial query (for distributed query execution)."}, @@ -64,6 +66,8 @@ void ProcessorProfileLogElement::appendToBlock(MutableColumns & columns) const } columns[i++]->insert(plan_step); + columns[i++]->insert(plan_step_name); + columns[i++]->insert(plan_step_description); columns[i++]->insert(plan_group); columns[i++]->insertData(initial_query_id.data(), initial_query_id.size()); columns[i++]->insertData(query_id.data(), query_id.size()); diff --git a/src/Interpreters/ProcessorsProfileLog.h b/src/Interpreters/ProcessorsProfileLog.h index abece2604f2..fbf52f45f56 100644 --- a/src/Interpreters/ProcessorsProfileLog.h +++ b/src/Interpreters/ProcessorsProfileLog.h @@ -19,6 +19,8 @@ struct ProcessorProfileLogElement UInt64 plan_step{}; UInt64 plan_group{}; + String plan_step_name; + String plan_step_description; String initial_query_id; String query_id; diff --git a/src/Processors/IProcessor.cpp b/src/Processors/IProcessor.cpp index fc595a7b565..d9bd5074c09 100644 --- a/src/Processors/IProcessor.cpp +++ b/src/Processors/IProcessor.cpp @@ -1,5 +1,6 @@ #include #include +#include #include #include @@ -9,6 +10,17 @@ namespace DB { +void IProcessor::setQueryPlanStep(IQueryPlanStep * step, size_t group) +{ + query_plan_step = step; + query_plan_step_group = group; + if (step) + { + plan_step_name = step->getName(); + plan_step_description = step->getStepDescription(); + } +} + void IProcessor::cancel() noexcept { diff --git a/src/Processors/IProcessor.h b/src/Processors/IProcessor.h index 02b8a3daa28..fd75eb530aa 100644 --- a/src/Processors/IProcessor.h +++ b/src/Processors/IProcessor.h @@ -311,14 +311,12 @@ public: constexpr static size_t NO_STREAM = std::numeric_limits::max(); /// Step of QueryPlan from which processor was created. - void setQueryPlanStep(IQueryPlanStep * step, size_t group = 0) - { - query_plan_step = step; - query_plan_step_group = group; - } + void setQueryPlanStep(IQueryPlanStep * step, size_t group = 0); IQueryPlanStep * getQueryPlanStep() const { return query_plan_step; } size_t getQueryPlanStepGroup() const { return query_plan_step_group; } + const String & getPlanStepName() const { return plan_step_name; } + const String & getPlanStepDescription() const { return plan_step_description; } uint64_t getElapsedNs() const { return elapsed_ns; } uint64_t getInputWaitElapsedNs() const { return input_wait_elapsed_ns; } @@ -410,6 +408,8 @@ private: IQueryPlanStep * query_plan_step = nullptr; size_t query_plan_step_group = 0; + String plan_step_name; + String plan_step_description; }; From 2a68ad8704d62b179a123a8fe3355d58450f2b99 Mon Sep 17 00:00:00 2001 From: Alexander Gololobov Date: Tue, 27 Aug 2024 15:12:01 +0200 Subject: [PATCH 419/844] Fill plan_step_name and plan_step_description --- src/Interpreters/executeQuery.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp index decc16a3704..d8ceae77d13 100644 --- a/src/Interpreters/executeQuery.cpp +++ b/src/Interpreters/executeQuery.cpp @@ -471,6 +471,8 @@ void logQueryFinish( processor_elem.parent_ids = std::move(parents); processor_elem.plan_step = reinterpret_cast(processor->getQueryPlanStep()); + processor_elem.plan_step_name = processor->getPlanStepName(); + processor_elem.plan_step_description = processor->getPlanStepDescription(); processor_elem.plan_group = processor->getQueryPlanStepGroup(); processor_elem.processor_name = processor->getName(); From b8d5a82f975e4da9be8dff890b2d0caf3832a1b3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A1nos=20Benjamin=20Antal?= Date: Tue, 27 Aug 2024 14:31:37 +0000 Subject: [PATCH 420/844] Fix test --- ..._mutations_and_remove_materialized.reference | 12 ++++-------- ..._mixed_mutations_and_remove_materialized.sql | 17 ++++++++--------- 2 files changed, 12 insertions(+), 17 deletions(-) diff --git a/tests/queries/0_stateless/03230_alter_with_mixed_mutations_and_remove_materialized.reference b/tests/queries/0_stateless/03230_alter_with_mixed_mutations_and_remove_materialized.reference index 9166a82f472..66cf66367c1 100644 --- a/tests/queries/0_stateless/03230_alter_with_mixed_mutations_and_remove_materialized.reference +++ b/tests/queries/0_stateless/03230_alter_with_mixed_mutations_and_remove_materialized.reference @@ -1,8 +1,4 @@ -BEFORE a_r1 x String -BEFORE a_r1 y String MATERIALIZED \'str\' -BEFORE a_r2 x String -BEFORE a_r2 y String MATERIALIZED \'str\' -AFTER a_r1 x String -AFTER a_r1 y String -AFTER a_r2 x String -AFTER a_r2 y String +BEFORE a x String +BEFORE a y String MATERIALIZED \'str\' +AFTER a x String +AFTER a y String diff --git a/tests/queries/0_stateless/03230_alter_with_mixed_mutations_and_remove_materialized.sql b/tests/queries/0_stateless/03230_alter_with_mixed_mutations_and_remove_materialized.sql index 3c43b9a8521..d8ac3280792 100644 --- a/tests/queries/0_stateless/03230_alter_with_mixed_mutations_and_remove_materialized.sql +++ b/tests/queries/0_stateless/03230_alter_with_mixed_mutations_and_remove_materialized.sql @@ -1,14 +1,13 @@ -DROP TABLE IF EXISTS a_r1 SYNC; -DROP TABLE IF EXISTS a_r2 SYNC; -CREATE TABLE a_r1 (x String, y String MATERIALIZED 'str') ENGINE = ReplicatedMergeTree('/clickhouse/{database}/a', 'r1') ORDER BY x; -CREATE TABLE a_r2 (x String, y String MATERIALIZED 'str') ENGINE = ReplicatedMergeTree('/clickhouse/{database}/a', 'r2') ORDER BY x; +DROP TABLE IF EXISTS a SYNC; +CREATE TABLE a (x String, y String MATERIALIZED 'str') ENGINE = ReplicatedMergeTree('/clickhouse/{database}/a', 'r1') ORDER BY x; -INSERT INTO a_r1 SELECT toString(number) FROM numbers(100); -SELECT 'BEFORE', table, name, type, default_kind, default_expression FROM system.columns WHERE database = currentDatabase() AND table LIKE 'a\_r%' ORDER BY table, name; +INSERT INTO a SELECT toString(number) FROM numbers(100); +SELECT 'BEFORE', table, name, type, default_kind, default_expression FROM system.columns WHERE database = currentDatabase() AND table = 'a' ORDER BY table, name; -ALTER TABLE a_r1 - ADD INDEX IF NOT EXISTS some_index x TYPE set(16) GRANULARITY 1, +-- DROP INDEX is important to make the mutation not a pure metadata mutation +ALTER TABLE a + DROP INDEX IF EXISTS some_index, MODIFY COLUMN y REMOVE MATERIALIZED SETTINGS alter_sync = 2, mutations_sync = 2; -SELECT 'AFTER', table, name, type, default_kind, default_expression FROM system.columns WHERE database = currentDatabase() AND table LIKE 'a\_r%' ORDER BY table, name; +SELECT 'AFTER', table, name, type, default_kind, default_expression FROM system.columns WHERE database = currentDatabase() AND table = 'a' ORDER BY table, name; From 9ae0d5161349bd89a9a185a8f60aaab0e9357f34 Mon Sep 17 00:00:00 2001 From: Tuan Pham Anh Date: Tue, 27 Aug 2024 15:02:02 +0000 Subject: [PATCH 421/844] Reformat the code, pass variable names in comments when calling --- src/Core/ExternalTable.cpp | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/src/Core/ExternalTable.cpp b/src/Core/ExternalTable.cpp index 1f8e63db72e..7fe23f844b6 100644 --- a/src/Core/ExternalTable.cpp +++ b/src/Core/ExternalTable.cpp @@ -18,11 +18,11 @@ #include #include #include +#include #include #include #include #include -#include "Parsers/IdentifierQuotingStyle.h" namespace DB @@ -88,7 +88,13 @@ void BaseExternalTable::parseStructureFromStructureField(const std::string & arg if (column) structure.emplace_back( column->name, - column->type->formatWithPossiblyHidingSensitiveData(0, true, true, false, false, IdentifierQuotingStyle::Backticks)); + column->type->formatWithPossiblyHidingSensitiveData( + /*max_length=*/0, + /*one_line=*/true, + /*show_secrets=*/true, + /*print_pretty_type_names=*/false, + /*always_quote_identifiers=*/false, + /*identifier_quoting_style=*/IdentifierQuotingStyle::Backticks)); else throw Exception(ErrorCodes::BAD_ARGUMENTS, "Error while parsing table structure: expected column definition, got {}", child->formatForErrorMessage()); } @@ -108,7 +114,12 @@ void BaseExternalTable::parseStructureFromTypesField(const std::string & argumen structure.emplace_back( "_" + toString(i + 1), type_list_raw->children[i]->formatWithPossiblyHidingSensitiveData( - 0, true, true, false, false, IdentifierQuotingStyle::Backticks)); + /*max_length=*/0, + /*one_line=*/true, + /*show_secrets=*/true, + /*print_pretty_type_names=*/false, + /*always_quote_identifiers=*/false, + /*identifier_quoting_style=*/IdentifierQuotingStyle::Backticks)); } void BaseExternalTable::initSampleBlock() From a65d175a8137232996ab3adb8124e87e2157d3ba Mon Sep 17 00:00:00 2001 From: Arthur Passos Date: Tue, 27 Aug 2024 10:40:04 -0300 Subject: [PATCH 422/844] change parsing logic a bit --- .../Access/InterpreterCreateUserQuery.cpp | 23 +++--- src/Parsers/Access/ParserCreateUserQuery.cpp | 77 ++++++++----------- ..._multiple_authentication_methods.reference | 32 ++++---- .../03174_multiple_authentication_methods.sh | 21 +++-- 4 files changed, 68 insertions(+), 85 deletions(-) diff --git a/src/Interpreters/Access/InterpreterCreateUserQuery.cpp b/src/Interpreters/Access/InterpreterCreateUserQuery.cpp index 467de1e025c..e0d99b64dfb 100644 --- a/src/Interpreters/Access/InterpreterCreateUserQuery.cpp +++ b/src/Interpreters/Access/InterpreterCreateUserQuery.cpp @@ -67,17 +67,8 @@ namespace user.authentication_methods.emplace_back(); } - bool has_no_password_authentication_method = std::find_if( - user.authentication_methods.begin(), - user.authentication_methods.end(), - [](const AuthenticationData & authentication_method) - { - return authentication_method.getType() == AuthenticationType::NO_PASSWORD; - }) != user.authentication_methods.end(); - // 1. an IDENTIFIED WITH will drop existing authentication methods in favor of new ones. - // 2. if the user contains an auth method of type NO_PASSWORD and another one is being added, NO_PASSWORD must be dropped - if (replace_authentication_methods || (has_no_password_authentication_method && !authentication_methods.empty())) + if (replace_authentication_methods) { user.authentication_methods.clear(); } @@ -109,6 +100,18 @@ namespace user.authentication_methods.emplace_back(authentication_method); } + bool has_no_password_authentication_method = std::find_if(user.authentication_methods.begin(), + user.authentication_methods.end(), + [](const AuthenticationData & authentication_data) + { + return authentication_data.getType() == AuthenticationType::NO_PASSWORD; + }) != user.authentication_methods.end(); + + if (has_no_password_authentication_method && user.authentication_methods.size() > 1) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Authentication method 'no_password' cannot co-exist with other authentication methods."); + } + if (!query.alter) { for (const auto & authentication_method : user.authentication_methods) diff --git a/src/Parsers/Access/ParserCreateUserQuery.cpp b/src/Parsers/Access/ParserCreateUserQuery.cpp index abf01182ec8..593026b19ad 100644 --- a/src/Parsers/Access/ParserCreateUserQuery.cpp +++ b/src/Parsers/Access/ParserCreateUserQuery.cpp @@ -53,7 +53,8 @@ namespace Expected & expected, std::shared_ptr & auth_data, bool is_type_specifier_mandatory, - bool is_type_specifier_allowed) + bool is_type_specifier_allowed, + bool should_parse_no_password) { return IParserBase::wrapParseImpl(pos, [&] { @@ -67,7 +68,7 @@ namespace bool expect_public_ssh_key = false; bool expect_http_auth_server = false; - for (auto check_type : collections::range(AuthenticationType::MAX)) + auto parse_non_password_based_type = [&](auto check_type) { if (ParserKeyword{AuthenticationTypeInfo::get(check_type).keyword}.ignore(pos, expected)) { @@ -86,7 +87,20 @@ namespace else if (check_type != AuthenticationType::NO_PASSWORD) expect_password = true; - break; + return true; + } + + return false; + }; + + { + const auto first_authentication_type_element_to_check + = should_parse_no_password ? AuthenticationType::NO_PASSWORD : AuthenticationType::PLAINTEXT_PASSWORD; + + for (auto check_type : collections::range(first_authentication_type_element_to_check, AuthenticationType::MAX)) + { + if (parse_non_password_based_type(check_type)) + break; } } @@ -219,7 +233,11 @@ namespace } - bool parseIdentifiedWith(IParserBase::Pos & pos, Expected & expected, std::vector> & authentication_methods) + bool parseIdentifiedWith( + IParserBase::Pos & pos, + Expected & expected, + std::vector> & authentication_methods, + bool should_parse_no_password) { return IParserBase::wrapParseImpl(pos, [&] { @@ -232,7 +250,7 @@ namespace std::shared_ptr ast_authentication_data; - if (!parseAuthenticationData(pos, expected, ast_authentication_data, is_type_specifier_mandatory, is_type_specifier_mandatory)) + if (!parseAuthenticationData(pos, expected, ast_authentication_data, is_type_specifier_mandatory, is_type_specifier_mandatory, should_parse_no_password)) { return false; } @@ -248,7 +266,7 @@ namespace { std::shared_ptr ast_authentication_data; - if (!parseAuthenticationData(aux_pos, expected, ast_authentication_data, false, true)) + if (!parseAuthenticationData(aux_pos, expected, ast_authentication_data, false, true, should_parse_no_password)) { break; } @@ -273,7 +291,7 @@ namespace return true; } - return parseIdentifiedWith(pos, expected, authentication_methods); + return parseIdentifiedWith(pos, expected, authentication_methods, true); }); } @@ -480,7 +498,7 @@ namespace return false; } - return parseIdentifiedWith(pos, expected, auth_data); + return parseIdentifiedWith(pos, expected, auth_data, false); }); } @@ -551,29 +569,19 @@ bool ParserCreateUserQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expec while (true) { - if (auth_data.empty()) + if (auth_data.empty() && !reset_authentication_methods_to_new) { parsed_identified_with = parseIdentifiedOrNotIdentified(pos, expected, auth_data); - if (!parsed_identified_with) + if (!parsed_identified_with && alter) { parsed_add_identified_with = parseAddIdentifiedWith(pos, expected, auth_data); - - if (parsed_add_identified_with && !alter) - { - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Create user query is not allowed to have ADD IDENTIFIED, remove the ADD keyword."); - } } } - if (!reset_authentication_methods_to_new) + if (!reset_authentication_methods_to_new && alter && auth_data.empty()) { reset_authentication_methods_to_new = parseResetAuthenticationMethods(pos, expected); - - if (reset_authentication_methods_to_new && !alter) - { - throw Exception(ErrorCodes::BAD_ARGUMENTS, "RESET AUTHENTICATION METHODS TO NEW can only be used on ALTER statement"); - } } if (!valid_until) @@ -640,31 +648,6 @@ bool ParserCreateUserQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expec break; } - bool has_no_password_authentication_method = std::find_if( - auth_data.begin(), - auth_data.end(), - [](const std::shared_ptr & ast_authentication_data) - { - return ast_authentication_data->type == AuthenticationType::NO_PASSWORD; - }) != auth_data.end(); - - if (has_no_password_authentication_method && auth_data.size() > 1) - { - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Authentication method 'no_password' cannot co-exist with other authentication methods."); - } - - if (has_no_password_authentication_method && parsed_add_identified_with) - { - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Authentication method 'no_password' cannot co-exist with other authentication methods. " - "Use 'ALTER USER xyz IDENTIFIED WITH no_password' to replace existing authentication methods"); - - } - - if (reset_authentication_methods_to_new && !auth_data.empty()) - { - throw Exception(ErrorCodes::BAD_ARGUMENTS, "RESET AUTHENTICATION METHODS TO NEW cannot be used along with [ADD] IDENTIFIED clauses"); - } - if (!alter && !hosts) { String common_host_pattern; @@ -700,7 +683,7 @@ bool ParserCreateUserQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expec query->storage_name = std::move(storage_name); query->reset_authentication_methods_to_new = reset_authentication_methods_to_new; query->add_identified_with = parsed_add_identified_with; - query->replace_authentication_methods = parsed_identified_with || has_no_password_authentication_method; + query->replace_authentication_methods = parsed_identified_with; for (const auto & authentication_method : query->authentication_methods) { diff --git a/tests/queries/0_stateless/03174_multiple_authentication_methods.reference b/tests/queries/0_stateless/03174_multiple_authentication_methods.reference index 429b04f3907..c7dbf67d784 100644 --- a/tests/queries/0_stateless/03174_multiple_authentication_methods.reference +++ b/tests/queries/0_stateless/03174_multiple_authentication_methods.reference @@ -28,13 +28,12 @@ Syntax error CREATE Multiple identified with, not allowed Syntax error Create user with no identification -Add identified with -CREATE USER u01_03174 IDENTIFIED WITH plaintext_password -Try to provide no_password mixed with other authentication methods, should not be allowed +Add identified with, should not be allowed because user is currently identified with no_password and it can not co-exist with other auth types BAD_ARGUMENTS +Try to add no_password mixed with other authentication methods, should not be allowed +SYNTAX_ERROR Adding no_password, should fail -BAD_ARGUMENTS -CREATE USER u01_03174 IDENTIFIED WITH plaintext_password +SYNTAX_ERROR Replacing existing authentication methods in favor of no_password, should succeed CREATE USER u01_03174 IDENTIFIED WITH no_password Trying to auth with no pwd, should succeed @@ -46,17 +45,18 @@ CREATE USER u01_03174 IDENTIFIED WITH sha256_password, plaintext_password, bcryp Use WITH without providing authentication type, should fail Syntax error Create user with ADD identification, should fail, add is not allowed for create query -BAD_ARGUMENTS +SYNTAX_ERROR Trailing comma should result in syntax error SYNTAX_ERROR First auth method can't specify type if WITH keyword is not present SYNTAX_ERROR RESET AUTHENTICATION METHODS TO NEW can only be used on alter statement -BAD_ARGUMENTS +SYNTAX_ERROR ADD NOT IDENTIFIED should result in syntax error SYNTAX_ERROR RESET AUTHENTICATION METHODS TO NEW cannot be used along with [ADD] IDENTIFIED clauses -BAD_ARGUMENTS +SYNTAX_ERROR +On cluster tests localhost 9000 0 0 0 localhost 9000 0 0 0 Basic authentication after user creation @@ -98,14 +98,12 @@ Syntax error localhost 9000 0 0 0 Create user with no identification localhost 9000 0 0 0 -Add identified with -localhost 9000 0 0 0 -CREATE USER u01_03174 IDENTIFIED WITH plaintext_password -Try to provide no_password mixed with other authentication methods, should not be allowed +Add identified with, should not be allowed because user is currently identified with no_password and it can not co-exist with other auth types BAD_ARGUMENTS +Try to add no_password mixed with other authentication methods, should not be allowed +SYNTAX_ERROR Adding no_password, should fail -BAD_ARGUMENTS -CREATE USER u01_03174 IDENTIFIED WITH plaintext_password +SYNTAX_ERROR Replacing existing authentication methods in favor of no_password, should succeed localhost 9000 0 0 0 CREATE USER u01_03174 IDENTIFIED WITH no_password @@ -123,14 +121,14 @@ localhost 9000 0 0 0 Use WITH without providing authentication type, should fail Syntax error Create user with ADD identification, should fail, add is not allowed for create query -BAD_ARGUMENTS +SYNTAX_ERROR Trailing comma should result in syntax error SYNTAX_ERROR First auth method can't specify type if WITH keyword is not present SYNTAX_ERROR RESET AUTHENTICATION METHODS TO NEW can only be used on alter statement -BAD_ARGUMENTS +SYNTAX_ERROR ADD NOT IDENTIFIED should result in syntax error SYNTAX_ERROR RESET AUTHENTICATION METHODS TO NEW cannot be used along with [ADD] IDENTIFIED clauses -BAD_ARGUMENTS +SYNTAX_ERROR diff --git a/tests/queries/0_stateless/03174_multiple_authentication_methods.sh b/tests/queries/0_stateless/03174_multiple_authentication_methods.sh index 310187c62fc..bfb7775dbb0 100755 --- a/tests/queries/0_stateless/03174_multiple_authentication_methods.sh +++ b/tests/queries/0_stateless/03174_multiple_authentication_methods.sh @@ -102,17 +102,14 @@ function test echo "Create user with no identification" ${CLICKHOUSE_CLIENT} --query "CREATE USER ${user} $1" - echo "Add identified with" - ${CLICKHOUSE_CLIENT} --query "ALTER USER ${user} $1 ADD IDENTIFIED WITH plaintext_password by '7'" + echo "Add identified with, should not be allowed because user is currently identified with no_password and it can not co-exist with other auth types" + ${CLICKHOUSE_CLIENT} --query "ALTER USER ${user} $1 ADD IDENTIFIED WITH plaintext_password by '7'" 2>&1 | grep -m1 -o "BAD_ARGUMENTS" - ${CLICKHOUSE_CLIENT} --query "SHOW CREATE USER ${user}" - - echo "Try to provide no_password mixed with other authentication methods, should not be allowed" - ${CLICKHOUSE_CLIENT} --query "ALTER USER ${user} $1 ADD IDENTIFIED WITH plaintext_password by '8', no_password" 2>&1 | grep -m1 -o "BAD_ARGUMENTS" + echo "Try to add no_password mixed with other authentication methods, should not be allowed" + ${CLICKHOUSE_CLIENT} --query "ALTER USER ${user} $1 ADD IDENTIFIED WITH plaintext_password by '8', no_password" 2>&1 | grep -m1 -o "SYNTAX_ERROR" echo "Adding no_password, should fail" - ${CLICKHOUSE_CLIENT} --query "ALTER USER ${user} $1 ADD IDENTIFIED WITH no_password" 2>&1 | grep -m1 -o "BAD_ARGUMENTS" - ${CLICKHOUSE_CLIENT} --query "SHOW CREATE USER ${user}" + ${CLICKHOUSE_CLIENT} --query "ALTER USER ${user} $1 ADD IDENTIFIED WITH no_password" 2>&1 | grep -m1 -o "SYNTAX_ERROR" echo "Replacing existing authentication methods in favor of no_password, should succeed" ${CLICKHOUSE_CLIENT} --query "ALTER USER ${user} $1 IDENTIFIED WITH no_password" @@ -139,7 +136,7 @@ function test ${CLICKHOUSE_CLIENT} --query "CREATE USER ${user} $1 IDENTIFIED WITH BY '1';" 2>&1 | grep -m1 -o "Syntax error" echo "Create user with ADD identification, should fail, add is not allowed for create query" - ${CLICKHOUSE_CLIENT} --query "CREATE USER ${user} $1 ADD IDENTIFIED WITH plaintext_password by '1'" 2>&1 | grep -m1 -o "BAD_ARGUMENTS" + ${CLICKHOUSE_CLIENT} --query "CREATE USER ${user} $1 ADD IDENTIFIED WITH plaintext_password by '1'" 2>&1 | grep -m1 -o "SYNTAX_ERROR" echo "Trailing comma should result in syntax error" ${CLICKHOUSE_CLIENT} --query "ALTER USER ${user} $1 ADD IDENTIFIED WITH plaintext_password by '1'," 2>&1 | grep -m1 -o "SYNTAX_ERROR" @@ -148,13 +145,13 @@ function test ${CLICKHOUSE_CLIENT} --query "CREATE USER ${user} $1 IDENTIFIED plaintext_password by '1'" 2>&1 | grep -m1 -o "SYNTAX_ERROR" echo "RESET AUTHENTICATION METHODS TO NEW can only be used on alter statement" - ${CLICKHOUSE_CLIENT} --query "CREATE USER ${user} $1 RESET AUTHENTICATION METHODS TO NEW" 2>&1 | grep -m1 -o "BAD_ARGUMENTS" + ${CLICKHOUSE_CLIENT} --query "CREATE USER ${user} $1 RESET AUTHENTICATION METHODS TO NEW" 2>&1 | grep -m1 -o "SYNTAX_ERROR" echo "ADD NOT IDENTIFIED should result in syntax error" ${CLICKHOUSE_CLIENT} --query "ALTER USER ${user} $1 ADD NOT IDENTIFIED" 2>&1 | grep -m1 -o "SYNTAX_ERROR" echo "RESET AUTHENTICATION METHODS TO NEW cannot be used along with [ADD] IDENTIFIED clauses" - ${CLICKHOUSE_CLIENT} --query "ALTER USER ${user} $1 IDENTIFIED WITH plaintext_password by '1' RESET AUTHENTICATION METHODS TO NEW" 2>&1 | grep -m1 -o "BAD_ARGUMENTS" + ${CLICKHOUSE_CLIENT} --query "ALTER USER ${user} $1 IDENTIFIED WITH plaintext_password by '1' RESET AUTHENTICATION METHODS TO NEW" 2>&1 | grep -m1 -o "SYNTAX_ERROR" ${CLICKHOUSE_CLIENT} --query "DROP USER IF EXISTS ${user}" @@ -162,4 +159,6 @@ function test test "" +echo "On cluster tests" + test "ON CLUSTER test_shard_localhost" From aed953bf943eb57feef7ae5a0f2ebf9336543f88 Mon Sep 17 00:00:00 2001 From: Yatsishin Ilya <2159081+qoega@users.noreply.github.com> Date: Tue, 27 Aug 2024 16:00:33 +0000 Subject: [PATCH 423/844] black --- tests/integration/helpers/postgres_utility.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/tests/integration/helpers/postgres_utility.py b/tests/integration/helpers/postgres_utility.py index 4bb68284314..c61c535bd62 100644 --- a/tests/integration/helpers/postgres_utility.py +++ b/tests/integration/helpers/postgres_utility.py @@ -329,11 +329,15 @@ def assert_nested_table_is_created( table = schema_name + "." + table_name print(f"Checking table {table} exists in {materialized_database}") - database_tables = instance.query(f"SHOW TABLES FROM `{materialized_database}` WHERE name = '{table}'") + database_tables = instance.query( + f"SHOW TABLES FROM `{materialized_database}` WHERE name = '{table}'" + ) while table not in database_tables: time.sleep(0.2) - database_tables = instance.query(f"SHOW TABLES FROM `{materialized_database}` WHERE name = '{table}'") + database_tables = instance.query( + f"SHOW TABLES FROM `{materialized_database}` WHERE name = '{table}'" + ) assert table in database_tables From a0ab22e031f6866826fe6cd85a00c4eed8ee9359 Mon Sep 17 00:00:00 2001 From: Arthur Passos Date: Tue, 27 Aug 2024 13:01:10 -0300 Subject: [PATCH 424/844] style --- src/Parsers/Access/ParserCreateUserQuery.cpp | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/Parsers/Access/ParserCreateUserQuery.cpp b/src/Parsers/Access/ParserCreateUserQuery.cpp index 593026b19ad..8bfc84a28a6 100644 --- a/src/Parsers/Access/ParserCreateUserQuery.cpp +++ b/src/Parsers/Access/ParserCreateUserQuery.cpp @@ -25,11 +25,6 @@ namespace DB { -namespace ErrorCodes -{ - extern const int BAD_ARGUMENTS; -} - namespace { bool parseRenameTo(IParserBase::Pos & pos, Expected & expected, std::optional & new_name) From 0b29aef1a07983579bc454e0941a43c7954827e9 Mon Sep 17 00:00:00 2001 From: Arthur Passos Date: Tue, 27 Aug 2024 15:25:41 -0300 Subject: [PATCH 425/844] remove extra dot from ex message --- src/Interpreters/Access/InterpreterCreateUserQuery.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Interpreters/Access/InterpreterCreateUserQuery.cpp b/src/Interpreters/Access/InterpreterCreateUserQuery.cpp index e0d99b64dfb..86d052ab370 100644 --- a/src/Interpreters/Access/InterpreterCreateUserQuery.cpp +++ b/src/Interpreters/Access/InterpreterCreateUserQuery.cpp @@ -109,7 +109,7 @@ namespace if (has_no_password_authentication_method && user.authentication_methods.size() > 1) { - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Authentication method 'no_password' cannot co-exist with other authentication methods."); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Authentication method 'no_password' cannot co-exist with other authentication methods"); } if (!query.alter) From c298b20ba9e187a1ef9a5cbdacf121876cae334d Mon Sep 17 00:00:00 2001 From: Arthur Passos Date: Tue, 27 Aug 2024 16:17:35 -0300 Subject: [PATCH 426/844] extract some tests into sql as an attempt to make them run faster for flaky check.. --- ..._multiple_authentication_methods.reference | 66 ----------------- .../03174_multiple_authentication_methods.sh | 58 --------------- ...ltiple_authentication_methods_no_login.sql | 70 +++++++++++++++++++ 3 files changed, 70 insertions(+), 124 deletions(-) create mode 100644 tests/queries/0_stateless/03174_multiple_authentication_methods_no_login.sql diff --git a/tests/queries/0_stateless/03174_multiple_authentication_methods.reference b/tests/queries/0_stateless/03174_multiple_authentication_methods.reference index c7dbf67d784..cc8628c1ac0 100644 --- a/tests/queries/0_stateless/03174_multiple_authentication_methods.reference +++ b/tests/queries/0_stateless/03174_multiple_authentication_methods.reference @@ -23,39 +23,10 @@ Only the latest should work, below should fail AUTHENTICATION_FAILED Should work 1 -Multiple identified with, not allowed -Syntax error -CREATE Multiple identified with, not allowed -Syntax error -Create user with no identification -Add identified with, should not be allowed because user is currently identified with no_password and it can not co-exist with other auth types -BAD_ARGUMENTS -Try to add no_password mixed with other authentication methods, should not be allowed -SYNTAX_ERROR -Adding no_password, should fail -SYNTAX_ERROR Replacing existing authentication methods in favor of no_password, should succeed CREATE USER u01_03174 IDENTIFIED WITH no_password Trying to auth with no pwd, should succeed 1 -Create user with mix both implicit and explicit auth type, starting with with -CREATE USER u01_03174 IDENTIFIED WITH plaintext_password, sha256_password, bcrypt_password, sha256_password -Create user with mix both implicit and explicit auth type, starting with by -CREATE USER u01_03174 IDENTIFIED WITH sha256_password, plaintext_password, bcrypt_password, sha256_password -Use WITH without providing authentication type, should fail -Syntax error -Create user with ADD identification, should fail, add is not allowed for create query -SYNTAX_ERROR -Trailing comma should result in syntax error -SYNTAX_ERROR -First auth method can't specify type if WITH keyword is not present -SYNTAX_ERROR -RESET AUTHENTICATION METHODS TO NEW can only be used on alter statement -SYNTAX_ERROR -ADD NOT IDENTIFIED should result in syntax error -SYNTAX_ERROR -RESET AUTHENTICATION METHODS TO NEW cannot be used along with [ADD] IDENTIFIED clauses -SYNTAX_ERROR On cluster tests localhost 9000 0 0 0 localhost 9000 0 0 0 @@ -90,45 +61,8 @@ Only the latest should work, below should fail AUTHENTICATION_FAILED Should work 1 -Multiple identified with, not allowed -Syntax error -localhost 9000 0 0 0 -CREATE Multiple identified with, not allowed -Syntax error -localhost 9000 0 0 0 -Create user with no identification -localhost 9000 0 0 0 -Add identified with, should not be allowed because user is currently identified with no_password and it can not co-exist with other auth types -BAD_ARGUMENTS -Try to add no_password mixed with other authentication methods, should not be allowed -SYNTAX_ERROR -Adding no_password, should fail -SYNTAX_ERROR Replacing existing authentication methods in favor of no_password, should succeed localhost 9000 0 0 0 CREATE USER u01_03174 IDENTIFIED WITH no_password Trying to auth with no pwd, should succeed 1 -localhost 9000 0 0 0 -Create user with mix both implicit and explicit auth type, starting with with -localhost 9000 0 0 0 -CREATE USER u01_03174 IDENTIFIED WITH plaintext_password, sha256_password, bcrypt_password, sha256_password -localhost 9000 0 0 0 -Create user with mix both implicit and explicit auth type, starting with by -localhost 9000 0 0 0 -CREATE USER u01_03174 IDENTIFIED WITH sha256_password, plaintext_password, bcrypt_password, sha256_password -localhost 9000 0 0 0 -Use WITH without providing authentication type, should fail -Syntax error -Create user with ADD identification, should fail, add is not allowed for create query -SYNTAX_ERROR -Trailing comma should result in syntax error -SYNTAX_ERROR -First auth method can't specify type if WITH keyword is not present -SYNTAX_ERROR -RESET AUTHENTICATION METHODS TO NEW can only be used on alter statement -SYNTAX_ERROR -ADD NOT IDENTIFIED should result in syntax error -SYNTAX_ERROR -RESET AUTHENTICATION METHODS TO NEW cannot be used along with [ADD] IDENTIFIED clauses -SYNTAX_ERROR diff --git a/tests/queries/0_stateless/03174_multiple_authentication_methods.sh b/tests/queries/0_stateless/03174_multiple_authentication_methods.sh index bfb7775dbb0..d775034bd59 100755 --- a/tests/queries/0_stateless/03174_multiple_authentication_methods.sh +++ b/tests/queries/0_stateless/03174_multiple_authentication_methods.sh @@ -89,28 +89,6 @@ function test echo "Should work" test_login_pwd ${user} '6' - echo "Multiple identified with, not allowed" - ${CLICKHOUSE_CLIENT} --query "ALTER USER ${user} $1 IDENTIFIED WITH plaintext_password by '7', IDENTIFIED plaintext_password by '8'" 2>&1 | grep -m1 -o "Syntax error" - - ${CLICKHOUSE_CLIENT} --query "DROP USER ${user} $1" - - echo "CREATE Multiple identified with, not allowed" - ${CLICKHOUSE_CLIENT} --query "CREATE USER ${user} $1 IDENTIFIED WITH plaintext_password by '7', IDENTIFIED WITH plaintext_password by '8'" 2>&1 | grep -m1 -o "Syntax error" - - ${CLICKHOUSE_CLIENT} --query "DROP USER IF EXISTS ${user} $1" - - echo "Create user with no identification" - ${CLICKHOUSE_CLIENT} --query "CREATE USER ${user} $1" - - echo "Add identified with, should not be allowed because user is currently identified with no_password and it can not co-exist with other auth types" - ${CLICKHOUSE_CLIENT} --query "ALTER USER ${user} $1 ADD IDENTIFIED WITH plaintext_password by '7'" 2>&1 | grep -m1 -o "BAD_ARGUMENTS" - - echo "Try to add no_password mixed with other authentication methods, should not be allowed" - ${CLICKHOUSE_CLIENT} --query "ALTER USER ${user} $1 ADD IDENTIFIED WITH plaintext_password by '8', no_password" 2>&1 | grep -m1 -o "SYNTAX_ERROR" - - echo "Adding no_password, should fail" - ${CLICKHOUSE_CLIENT} --query "ALTER USER ${user} $1 ADD IDENTIFIED WITH no_password" 2>&1 | grep -m1 -o "SYNTAX_ERROR" - echo "Replacing existing authentication methods in favor of no_password, should succeed" ${CLICKHOUSE_CLIENT} --query "ALTER USER ${user} $1 IDENTIFIED WITH no_password" ${CLICKHOUSE_CLIENT} --query "SHOW CREATE USER ${user}" @@ -118,43 +96,7 @@ function test echo "Trying to auth with no pwd, should succeed" test_login_no_pwd ${user} - ${CLICKHOUSE_CLIENT} --query "DROP USER IF EXISTS ${user} $1" - - echo "Create user with mix both implicit and explicit auth type, starting with with" - ${CLICKHOUSE_CLIENT} --query "CREATE USER ${user} $1 IDENTIFIED WITH plaintext_password by '1', by '2', bcrypt_password by '3', by '4';" - ${CLICKHOUSE_CLIENT} --query "SHOW CREATE USER ${user}" - - ${CLICKHOUSE_CLIENT} --query "DROP USER IF EXISTS ${user} $1" - - echo "Create user with mix both implicit and explicit auth type, starting with by" - ${CLICKHOUSE_CLIENT} --query "CREATE USER ${user} $1 IDENTIFIED by '1', plaintext_password by '2', bcrypt_password by '3', by '4';" - ${CLICKHOUSE_CLIENT} --query "SHOW CREATE USER ${user}" - - ${CLICKHOUSE_CLIENT} --query "DROP USER IF EXISTS ${user} $1" - - echo "Use WITH without providing authentication type, should fail" - ${CLICKHOUSE_CLIENT} --query "CREATE USER ${user} $1 IDENTIFIED WITH BY '1';" 2>&1 | grep -m1 -o "Syntax error" - - echo "Create user with ADD identification, should fail, add is not allowed for create query" - ${CLICKHOUSE_CLIENT} --query "CREATE USER ${user} $1 ADD IDENTIFIED WITH plaintext_password by '1'" 2>&1 | grep -m1 -o "SYNTAX_ERROR" - - echo "Trailing comma should result in syntax error" - ${CLICKHOUSE_CLIENT} --query "ALTER USER ${user} $1 ADD IDENTIFIED WITH plaintext_password by '1'," 2>&1 | grep -m1 -o "SYNTAX_ERROR" - - echo "First auth method can't specify type if WITH keyword is not present" - ${CLICKHOUSE_CLIENT} --query "CREATE USER ${user} $1 IDENTIFIED plaintext_password by '1'" 2>&1 | grep -m1 -o "SYNTAX_ERROR" - - echo "RESET AUTHENTICATION METHODS TO NEW can only be used on alter statement" - ${CLICKHOUSE_CLIENT} --query "CREATE USER ${user} $1 RESET AUTHENTICATION METHODS TO NEW" 2>&1 | grep -m1 -o "SYNTAX_ERROR" - - echo "ADD NOT IDENTIFIED should result in syntax error" - ${CLICKHOUSE_CLIENT} --query "ALTER USER ${user} $1 ADD NOT IDENTIFIED" 2>&1 | grep -m1 -o "SYNTAX_ERROR" - - echo "RESET AUTHENTICATION METHODS TO NEW cannot be used along with [ADD] IDENTIFIED clauses" - ${CLICKHOUSE_CLIENT} --query "ALTER USER ${user} $1 IDENTIFIED WITH plaintext_password by '1' RESET AUTHENTICATION METHODS TO NEW" 2>&1 | grep -m1 -o "SYNTAX_ERROR" - ${CLICKHOUSE_CLIENT} --query "DROP USER IF EXISTS ${user}" - } test "" diff --git a/tests/queries/0_stateless/03174_multiple_authentication_methods_no_login.sql b/tests/queries/0_stateless/03174_multiple_authentication_methods_no_login.sql new file mode 100644 index 00000000000..b21b90700b6 --- /dev/null +++ b/tests/queries/0_stateless/03174_multiple_authentication_methods_no_login.sql @@ -0,0 +1,70 @@ +DROP USER IF EXISTS u_03174_no_login; + +CREATE USER u_03174_no_login; + +-- multiple identified with, not allowed +ALTER USER u_03174_no_login IDENTIFIED WITH plaintext_password by '7', IDENTIFIED plaintext_password by '8'; -- { clientError SYNTAX_ERROR } + +-- CREATE Multiple identified with, not allowed +CREATE USER u_03174_no_login IDENTIFIED WITH plaintext_password by '7', IDENTIFIED WITH plaintext_password by '8'; -- { clientError SYNTAX_ERROR } + +DROP USER u_03174_no_login; + +-- Create user with no identification +CREATE USER u_03174_no_login; + +-- Add identified with, should not be allowed because user is currently identified with no_password and it can not co-exist with other auth types +ALTER USER u_03174_no_login ADD IDENTIFIED WITH plaintext_password by '7'; -- { serverError BAD_ARGUMENTS } + +-- Try to add no_password mixed with other authentication methods, should not be allowed +ALTER USER u_03174_no_login ADD IDENTIFIED WITH plaintext_password by '8', no_password; -- { clientError SYNTAX_ERROR } + +-- Adding no_password, should fail +ALTER USER u_03174_no_login ADD IDENTIFIED WITH no_password; -- { clientError SYNTAX_ERROR } + +DROP USER IF EXISTS u_03174_no_login; + +-- Create user with mix both implicit and explicit auth type, starting with with +CREATE USER u_03174_no_login IDENTIFIED WITH plaintext_password by '1', by '2', bcrypt_password by '3', by '4'; +SHOW CREATE USER u_03174_no_login; + +DROP USER IF EXISTS u_03174_no_login; + +-- Create user with mix both implicit and explicit auth type, starting with with. On cluster +CREATE USER u_03174_no_login ON CLUSTER test_shard_localhost IDENTIFIED WITH plaintext_password by '1', by '2', bcrypt_password by '3', by '4'; +SHOW CREATE USER u_03174_no_login; + +DROP USER IF EXISTS u_03174_no_login; + +-- Create user with mix both implicit and explicit auth type, starting with by +CREATE USER u_03174_no_login IDENTIFIED by '1', plaintext_password by '2', bcrypt_password by '3', by '4'; +SHOW CREATE USER u_03174_no_login; + +DROP USER IF EXISTS u_03174_no_login; + +-- Create user with mix both implicit and explicit auth type, starting with by. On cluster +CREATE USER u_03174_no_login ON CLUSTER test_shard_localhost IDENTIFIED by '1', plaintext_password by '2', bcrypt_password by '3', by '4'; +SHOW CREATE USER u_03174_no_login; + +DROP USER IF EXISTS u_03174_no_login; + +-- Use WITH without providing authentication type, should fail +CREATE USER u_03174_no_login IDENTIFIED WITH BY '1'; -- { clientError SYNTAX_ERROR } + +-- Create user with ADD identification, should fail, add is not allowed for create query +CREATE USER u_03174_no_login ADD IDENTIFIED WITH plaintext_password by '1'; -- { clientError SYNTAX_ERROR } + +-- Trailing comma should result in syntax error +ALTER USER u_03174_no_login ADD IDENTIFIED WITH plaintext_password by '1',; -- { clientError SYNTAX_ERROR } + +-- First auth method can't specify type if WITH keyword is not present +CREATE USER u_03174_no_login IDENTIFIED plaintext_password by '1'; -- { clientError SYNTAX_ERROR } + +-- RESET AUTHENTICATION METHODS TO NEW can only be used on alter statement +CREATE USER u_03174_no_login RESET AUTHENTICATION METHODS TO NEW; -- { clientError SYNTAX_ERROR } + +-- ADD NOT IDENTIFIED should result in syntax error +ALTER USER u_03174_no_login ADD NOT IDENTIFIED; -- { clientError SYNTAX_ERROR } + +-- RESET AUTHENTICATION METHODS TO NEW cannot be used along with [ADD] IDENTIFIED clauses +ALTER USER u_03174_no_login IDENTIFIED WITH plaintext_password by '1' RESET AUTHENTICATION METHODS TO NEW; -- { clientError SYNTAX_ERROR } From 50b3d3172cc2c0ed09883bcaa13a6824b8875b30 Mon Sep 17 00:00:00 2001 From: Arthur Passos Date: Tue, 27 Aug 2024 16:48:10 -0300 Subject: [PATCH 427/844] add no fast test --- ...03174_multiple_authentication_methods_no_login.reference | 6 ++++++ .../03174_multiple_authentication_methods_no_login.sql | 2 ++ 2 files changed, 8 insertions(+) create mode 100644 tests/queries/0_stateless/03174_multiple_authentication_methods_no_login.reference diff --git a/tests/queries/0_stateless/03174_multiple_authentication_methods_no_login.reference b/tests/queries/0_stateless/03174_multiple_authentication_methods_no_login.reference new file mode 100644 index 00000000000..f451567b666 --- /dev/null +++ b/tests/queries/0_stateless/03174_multiple_authentication_methods_no_login.reference @@ -0,0 +1,6 @@ +CREATE USER u_03174_no_login IDENTIFIED WITH plaintext_password, sha256_password, bcrypt_password, sha256_password +localhost 9000 0 0 0 +CREATE USER u_03174_no_login IDENTIFIED WITH plaintext_password, sha256_password, bcrypt_password, sha256_password +CREATE USER u_03174_no_login IDENTIFIED WITH sha256_password, plaintext_password, bcrypt_password, sha256_password +localhost 9000 0 0 0 +CREATE USER u_03174_no_login IDENTIFIED WITH sha256_password, plaintext_password, bcrypt_password, sha256_password diff --git a/tests/queries/0_stateless/03174_multiple_authentication_methods_no_login.sql b/tests/queries/0_stateless/03174_multiple_authentication_methods_no_login.sql index b21b90700b6..a79c13b94be 100644 --- a/tests/queries/0_stateless/03174_multiple_authentication_methods_no_login.sql +++ b/tests/queries/0_stateless/03174_multiple_authentication_methods_no_login.sql @@ -1,3 +1,5 @@ +-- Tags: no-fasttest + DROP USER IF EXISTS u_03174_no_login; CREATE USER u_03174_no_login; From 41a4a97ca3c47bc4420b9247572f937569b98e6f Mon Sep 17 00:00:00 2001 From: Arthur Passos Date: Tue, 27 Aug 2024 19:57:59 -0300 Subject: [PATCH 428/844] no parallel --- .../03174_multiple_authentication_methods_no_login.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/03174_multiple_authentication_methods_no_login.sql b/tests/queries/0_stateless/03174_multiple_authentication_methods_no_login.sql index a79c13b94be..5a158b339ec 100644 --- a/tests/queries/0_stateless/03174_multiple_authentication_methods_no_login.sql +++ b/tests/queries/0_stateless/03174_multiple_authentication_methods_no_login.sql @@ -1,4 +1,4 @@ --- Tags: no-fasttest +-- Tags: no-fasttest, no-parallel DROP USER IF EXISTS u_03174_no_login; From e0b9c0f14f6a7e817fddc6eedc5f6bd69b6d9f83 Mon Sep 17 00:00:00 2001 From: Tuan Pham Anh Date: Wed, 28 Aug 2024 03:47:15 +0000 Subject: [PATCH 429/844] Remove settings update in ASTTableOverride::formatImpl --- src/Parsers/ASTTableOverrides.cpp | 4 +- src/Parsers/tests/gtest_Parser.cpp | 98 +++++++++++++++--------------- 2 files changed, 50 insertions(+), 52 deletions(-) diff --git a/src/Parsers/ASTTableOverrides.cpp b/src/Parsers/ASTTableOverrides.cpp index ccb485f6c69..8352e68b156 100644 --- a/src/Parsers/ASTTableOverrides.cpp +++ b/src/Parsers/ASTTableOverrides.cpp @@ -22,10 +22,8 @@ ASTPtr ASTTableOverride::clone() const return res; } -void ASTTableOverride::formatImpl(const FormatSettings & settings_, FormatState & state, FormatStateStacked frame) const +void ASTTableOverride::formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const { - FormatSettings settings = settings_; - settings.always_quote_identifiers = true; String nl_or_nothing = settings.one_line ? "" : "\n"; String nl_or_ws = settings.one_line ? " " : "\n"; String hl_keyword = settings.hilite ? hilite_keyword : ""; diff --git a/src/Parsers/tests/gtest_Parser.cpp b/src/Parsers/tests/gtest_Parser.cpp index 98cd9682c9c..1b50d4480d7 100644 --- a/src/Parsers/tests/gtest_Parser.cpp +++ b/src/Parsers/tests/gtest_Parser.cpp @@ -225,55 +225,55 @@ INSTANTIATE_TEST_SUITE_P(ParserCreateDatabaseQuery, ParserTest, ::testing::Combine( ::testing::Values(std::make_shared()), ::testing::ValuesIn(std::initializer_list{ - { - "CREATE DATABASE db ENGINE=MaterializeMySQL('addr:port', 'db', 'user', 'pw')", - "CREATE DATABASE db\nENGINE = MaterializeMySQL('addr:port', 'db', 'user', 'pw')" - }, - { - "CREATE DATABASE db ENGINE=MaterializeMySQL('addr:port', 'db', 'user', 'pw') TABLE OVERRIDE `tbl`\n(PARTITION BY toYYYYMM(created))", - "CREATE DATABASE db\nENGINE = MaterializeMySQL('addr:port', 'db', 'user', 'pw')\nTABLE OVERRIDE `tbl`\n(\n PARTITION BY toYYYYMM(`created`)\n)" - }, - { - "CREATE DATABASE db ENGINE=Foo TABLE OVERRIDE `tbl` (), TABLE OVERRIDE a (COLUMNS (_created DateTime MATERIALIZED now())), TABLE OVERRIDE b (PARTITION BY rand())", - "CREATE DATABASE db\nENGINE = Foo\nTABLE OVERRIDE `tbl`\n(\n\n),\nTABLE OVERRIDE `a`\n(\n COLUMNS\n (\n `_created` DateTime MATERIALIZED now()\n )\n),\nTABLE OVERRIDE `b`\n(\n PARTITION BY rand()\n)" - }, - { - "CREATE DATABASE db ENGINE=MaterializeMySQL('addr:port', 'db', 'user', 'pw') TABLE OVERRIDE tbl (COLUMNS (id UUID) PARTITION BY toYYYYMM(created))", - "CREATE DATABASE db\nENGINE = MaterializeMySQL('addr:port', 'db', 'user', 'pw')\nTABLE OVERRIDE `tbl`\n(\n COLUMNS\n (\n `id` UUID\n )\n PARTITION BY toYYYYMM(`created`)\n)" - }, - { - "CREATE DATABASE db TABLE OVERRIDE tbl (COLUMNS (INDEX foo foo TYPE minmax GRANULARITY 1) PARTITION BY if(_staged = 1, 'staging', toYYYYMM(created)))", - "CREATE DATABASE db\nTABLE OVERRIDE `tbl`\n(\n COLUMNS\n (\n INDEX `foo` `foo` TYPE minmax GRANULARITY 1\n )\n PARTITION BY if(`_staged` = 1, 'staging', toYYYYMM(`created`))\n)" - }, - { - "CREATE DATABASE db TABLE OVERRIDE t1 (TTL inserted + INTERVAL 1 MONTH DELETE), TABLE OVERRIDE t2 (TTL `inserted` + INTERVAL 2 MONTH DELETE)", - "CREATE DATABASE db\nTABLE OVERRIDE `t1`\n(\n TTL `inserted` + toIntervalMonth(1)\n),\nTABLE OVERRIDE `t2`\n(\n TTL `inserted` + toIntervalMonth(2)\n)" - }, - { - "CREATE DATABASE db ENGINE = MaterializeMySQL('127.0.0.1:3306', 'db', 'root', 'pw') SETTINGS allows_query_when_mysql_lost = 1 TABLE OVERRIDE tab3 (COLUMNS (_staged UInt8 MATERIALIZED 1) PARTITION BY (c3) TTL c3 + INTERVAL 10 minute), TABLE OVERRIDE tab5 (PARTITION BY (c3) TTL c3 + INTERVAL 10 minute)", - "CREATE DATABASE db\nENGINE = MaterializeMySQL('127.0.0.1:3306', 'db', 'root', 'pw')\nSETTINGS allows_query_when_mysql_lost = 1\nTABLE OVERRIDE `tab3`\n(\n COLUMNS\n (\n `_staged` UInt8 MATERIALIZED 1\n )\n PARTITION BY `c3`\n TTL `c3` + toIntervalMinute(10)\n),\nTABLE OVERRIDE `tab5`\n(\n PARTITION BY `c3`\n TTL `c3` + toIntervalMinute(10)\n)" - }, - { - "CREATE DATABASE db TABLE OVERRIDE tbl (PARTITION BY toYYYYMM(created) COLUMNS (created DateTime CODEC(Delta)))", - "CREATE DATABASE db\nTABLE OVERRIDE `tbl`\n(\n COLUMNS\n (\n `created` DateTime CODEC(Delta)\n )\n PARTITION BY toYYYYMM(`created`)\n)" - }, - { - "CREATE DATABASE db ENGINE = Foo() SETTINGS a = 1", - "CREATE DATABASE db\nENGINE = Foo\nSETTINGS a = 1" - }, - { - "CREATE DATABASE db ENGINE = Foo() SETTINGS a = 1, b = 2", - "CREATE DATABASE db\nENGINE = Foo\nSETTINGS a = 1, b = 2" - }, - { - "CREATE DATABASE db ENGINE = Foo() SETTINGS a = 1, b = 2 TABLE OVERRIDE a (ORDER BY (id, version))", - "CREATE DATABASE db\nENGINE = Foo\nSETTINGS a = 1, b = 2\nTABLE OVERRIDE `a`\n(\n ORDER BY (`id`, `version`)\n)" - }, - { - "CREATE DATABASE db ENGINE = Foo() SETTINGS a = 1, b = 2 COMMENT 'db comment' TABLE OVERRIDE a (ORDER BY (id, version))", - "CREATE DATABASE db\nENGINE = Foo\nSETTINGS a = 1, b = 2\nTABLE OVERRIDE `a`\n(\n ORDER BY (`id`, `version`)\n)\nCOMMENT 'db comment'" - } -}))); + { + "CREATE DATABASE db ENGINE=MaterializeMySQL('addr:port', 'db', 'user', 'pw')", + "CREATE DATABASE db\nENGINE = MaterializeMySQL('addr:port', 'db', 'user', 'pw')" + }, + { + "CREATE DATABASE db ENGINE=MaterializeMySQL('addr:port', 'db', 'user', 'pw') TABLE OVERRIDE `tbl`\n(PARTITION BY toYYYYMM(created))", + "CREATE DATABASE db\nENGINE = MaterializeMySQL('addr:port', 'db', 'user', 'pw')\nTABLE OVERRIDE tbl\n(\n PARTITION BY toYYYYMM(created)\n)" + }, + { + "CREATE DATABASE db ENGINE=Foo TABLE OVERRIDE `tbl` (), TABLE OVERRIDE a (COLUMNS (_created DateTime MATERIALIZED now())), TABLE OVERRIDE b (PARTITION BY rand())", + "CREATE DATABASE db\nENGINE = Foo\nTABLE OVERRIDE tbl\n(\n\n),\nTABLE OVERRIDE a\n(\n COLUMNS\n (\n `_created` DateTime MATERIALIZED now()\n )\n),\nTABLE OVERRIDE b\n(\n PARTITION BY rand()\n)" + }, + { + "CREATE DATABASE db ENGINE=MaterializeMySQL('addr:port', 'db', 'user', 'pw') TABLE OVERRIDE tbl (COLUMNS (id UUID) PARTITION BY toYYYYMM(created))", + "CREATE DATABASE db\nENGINE = MaterializeMySQL('addr:port', 'db', 'user', 'pw')\nTABLE OVERRIDE tbl\n(\n COLUMNS\n (\n `id` UUID\n )\n PARTITION BY toYYYYMM(created)\n)" + }, + { + "CREATE DATABASE db TABLE OVERRIDE tbl (COLUMNS (INDEX foo foo TYPE minmax GRANULARITY 1) PARTITION BY if(_staged = 1, 'staging', toYYYYMM(created)))", + "CREATE DATABASE db\nTABLE OVERRIDE tbl\n(\n COLUMNS\n (\n INDEX foo foo TYPE minmax GRANULARITY 1\n )\n PARTITION BY if(_staged = 1, 'staging', toYYYYMM(created))\n)" + }, + { + "CREATE DATABASE db TABLE OVERRIDE t1 (TTL inserted + INTERVAL 1 MONTH DELETE), TABLE OVERRIDE t2 (TTL `inserted` + INTERVAL 2 MONTH DELETE)", + "CREATE DATABASE db\nTABLE OVERRIDE t1\n(\n TTL inserted + toIntervalMonth(1)\n),\nTABLE OVERRIDE t2\n(\n TTL inserted + toIntervalMonth(2)\n)" + }, + { + "CREATE DATABASE db ENGINE = MaterializeMySQL('127.0.0.1:3306', 'db', 'root', 'pw') SETTINGS allows_query_when_mysql_lost = 1 TABLE OVERRIDE tab3 (COLUMNS (_staged UInt8 MATERIALIZED 1) PARTITION BY (c3) TTL c3 + INTERVAL 10 minute), TABLE OVERRIDE tab5 (PARTITION BY (c3) TTL c3 + INTERVAL 10 minute)", + "CREATE DATABASE db\nENGINE = MaterializeMySQL('127.0.0.1:3306', 'db', 'root', 'pw')\nSETTINGS allows_query_when_mysql_lost = 1\nTABLE OVERRIDE tab3\n(\n COLUMNS\n (\n `_staged` UInt8 MATERIALIZED 1\n )\n PARTITION BY c3\n TTL c3 + toIntervalMinute(10)\n),\nTABLE OVERRIDE tab5\n(\n PARTITION BY c3\n TTL c3 + toIntervalMinute(10)\n)" + }, + { + "CREATE DATABASE db TABLE OVERRIDE tbl (PARTITION BY toYYYYMM(created) COLUMNS (created DateTime CODEC(Delta)))", + "CREATE DATABASE db\nTABLE OVERRIDE tbl\n(\n COLUMNS\n (\n `created` DateTime CODEC(Delta)\n )\n PARTITION BY toYYYYMM(created)\n)" + }, + { + "CREATE DATABASE db ENGINE = Foo() SETTINGS a = 1", + "CREATE DATABASE db\nENGINE = Foo\nSETTINGS a = 1" + }, + { + "CREATE DATABASE db ENGINE = Foo() SETTINGS a = 1, b = 2", + "CREATE DATABASE db\nENGINE = Foo\nSETTINGS a = 1, b = 2" + }, + { + "CREATE DATABASE db ENGINE = Foo() SETTINGS a = 1, b = 2 TABLE OVERRIDE a (ORDER BY (id, version))", + "CREATE DATABASE db\nENGINE = Foo\nSETTINGS a = 1, b = 2\nTABLE OVERRIDE a\n(\n ORDER BY (id, version)\n)" + }, + { + "CREATE DATABASE db ENGINE = Foo() SETTINGS a = 1, b = 2 COMMENT 'db comment' TABLE OVERRIDE a (ORDER BY (id, version))", + "CREATE DATABASE db\nENGINE = Foo\nSETTINGS a = 1, b = 2\nTABLE OVERRIDE a\n(\n ORDER BY (id, version)\n)\nCOMMENT 'db comment'" + } + }))); INSTANTIATE_TEST_SUITE_P(ParserCreateUserQuery, ParserTest, ::testing::Combine( From 2c0ddd10a0c711a1b65895f171b2e570eccef3ba Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Wed, 28 Aug 2024 13:49:48 +0200 Subject: [PATCH 430/844] Fix build --- src/Core/SettingsChangesHistory.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Core/SettingsChangesHistory.cpp b/src/Core/SettingsChangesHistory.cpp index b4ba5202aa7..54852a37318 100644 --- a/src/Core/SettingsChangesHistory.cpp +++ b/src/Core/SettingsChangesHistory.cpp @@ -71,7 +71,7 @@ static std::initializer_list Date: Wed, 28 Aug 2024 11:52:51 +0000 Subject: [PATCH 431/844] Fix test_role & test_keeper_s3_snapshot --- tests/integration/test_keeper_s3_snapshot/test.py | 2 +- tests/integration/test_role/test.py | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/integration/test_keeper_s3_snapshot/test.py b/tests/integration/test_keeper_s3_snapshot/test.py index 84ffc964621..1e766cb974b 100644 --- a/tests/integration/test_keeper_s3_snapshot/test.py +++ b/tests/integration/test_keeper_s3_snapshot/test.py @@ -92,7 +92,7 @@ def test_s3_upload(started_cluster): # Keeper sends snapshots asynchornously, hence we need to retry. @retry(AssertionError, tries=10, delay=2) def _check_snapshots(): - assert set(get_saved_snapshots()) == set( + assert set(get_saved_snapshots()) >= set( [ "snapshot_50.bin.zstd", "snapshot_100.bin.zstd", diff --git a/tests/integration/test_role/test.py b/tests/integration/test_role/test.py index 225cab975ff..b746af56083 100644 --- a/tests/integration/test_role/test.py +++ b/tests/integration/test_role/test.py @@ -629,5 +629,6 @@ def test_roles_cache(): check() instance.query("DROP USER " + ", ".join(users)) - instance.query("DROP ROLE " + ", ".join(roles)) + if roles: + instance.query("DROP ROLE " + ", ".join(roles)) instance.query("DROP TABLE tbl") From ad08db39e5309602736eefd1dbb535a223993c1f Mon Sep 17 00:00:00 2001 From: imddba Date: Wed, 28 Aug 2024 20:05:06 +0800 Subject: [PATCH 432/844] Add CKibana as third-party GUI --- docs/en/interfaces/third-party/gui.md | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/docs/en/interfaces/third-party/gui.md b/docs/en/interfaces/third-party/gui.md index 8d9dce983bc..16d9b66e17f 100644 --- a/docs/en/interfaces/third-party/gui.md +++ b/docs/en/interfaces/third-party/gui.md @@ -233,6 +233,16 @@ Features: - Useful tools: Zookeeper data exploration, query EXPLAIN, kill queries, etc. - Visualization metric charts: queries and resource usage, number of merges/mutation, merge performance, query performance, etc. +### CKibana {#ckibana} + +[CKibana](https://github.com/TongchengOpenSource/ckibana) is a lightweight service that allows you to effortlessly search, explore, and visualize ClickHouse data using the native Kibana UI. + +Features: + +- Translates chart requests from the native Kibana UI into ClickHouse query syntax. +- Supports advanced features such as sampling and caching to enhance query performance. +- Minimizes the learning cost for users after migrating from ElasticSearch to ClickHouse. + ## Commercial {#commercial} ### DataGrip {#datagrip} From ae8d90f6b81a79bb08f9314e61fd24732ba51a30 Mon Sep 17 00:00:00 2001 From: Arthur Passos Date: Wed, 28 Aug 2024 09:05:25 -0300 Subject: [PATCH 433/844] no replicated database --- .../03174_multiple_authentication_methods_no_login.sql | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/03174_multiple_authentication_methods_no_login.sql b/tests/queries/0_stateless/03174_multiple_authentication_methods_no_login.sql index 5a158b339ec..124241bf6fd 100644 --- a/tests/queries/0_stateless/03174_multiple_authentication_methods_no_login.sql +++ b/tests/queries/0_stateless/03174_multiple_authentication_methods_no_login.sql @@ -1,4 +1,5 @@ --- Tags: no-fasttest, no-parallel +-- Tags: no-fasttest, no-parallel, no-replicated-database +-- Tag no-replicated-database: https://s3.amazonaws.com/clickhouse-test-reports/65277/43e9a7ba4bbf7f20145531b384a31304895b55bc/stateless_tests__release__old_analyzer__s3__databasereplicated__[1_2].html and https://github.com/ClickHouse/ClickHouse/blob/011c694117845500c82f9563c65930429979982f/tests/queries/0_stateless/01175_distributed_ddl_output_mode_long.sh#L4 DROP USER IF EXISTS u_03174_no_login; From b29e00b83830e238a45b64a42bf135d9c3614b54 Mon Sep 17 00:00:00 2001 From: Arthur Passos Date: Wed, 28 Aug 2024 09:17:04 -0300 Subject: [PATCH 434/844] add space --- src/Interpreters/Access/InterpreterCreateUserQuery.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Interpreters/Access/InterpreterCreateUserQuery.cpp b/src/Interpreters/Access/InterpreterCreateUserQuery.cpp index 86d052ab370..81600b2b6eb 100644 --- a/src/Interpreters/Access/InterpreterCreateUserQuery.cpp +++ b/src/Interpreters/Access/InterpreterCreateUserQuery.cpp @@ -90,7 +90,7 @@ namespace if (number_of_authentication_methods > max_number_of_authentication_methods) { throw Exception(ErrorCodes::BAD_ARGUMENTS, - "User can not be created/updated because it exceeds the allowed quantity of authentication methods per user." + "User can not be created/updated because it exceeds the allowed quantity of authentication methods per user. " "Check the `max_authentication_methods_per_user` setting"); } } From 2c6563ee691a7c1a0cee61af815ba037393957d2 Mon Sep 17 00:00:00 2001 From: Tuan Pham Anh Date: Wed, 28 Aug 2024 12:37:28 +0000 Subject: [PATCH 435/844] Fix test in tests/integration/test_materialized_mysql_database/materialized_with_ddl.py --- .../test_materialized_mysql_database/materialized_with_ddl.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/test_materialized_mysql_database/materialized_with_ddl.py b/tests/integration/test_materialized_mysql_database/materialized_with_ddl.py index 86000799ae4..9a99f0c9aa8 100644 --- a/tests/integration/test_materialized_mysql_database/materialized_with_ddl.py +++ b/tests/integration/test_materialized_mysql_database/materialized_with_ddl.py @@ -2353,7 +2353,7 @@ def table_overrides(clickhouse_node, mysql_node, service_name): ) check_query(clickhouse_node, "SELECT count() FROM table_overrides.t1", "1001\n") show_db = clickhouse_node.query("SHOW CREATE DATABASE table_overrides") - assert "TABLE OVERRIDE `t1`\\n(\\n\\n)" in show_db, show_db + assert "TABLE OVERRIDE t1\\n(\\n\\n)" in show_db, show_db clickhouse_node.query("DROP DATABASE IF EXISTS table_overrides") mysql_node.query("DROP DATABASE IF EXISTS table_overrides") From ae582120aee9065d547c49c1bd13e76d7afacc84 Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Wed, 28 Aug 2024 20:56:33 +0800 Subject: [PATCH 436/844] change as request --- src/Core/SettingsChangesHistory.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Core/SettingsChangesHistory.cpp b/src/Core/SettingsChangesHistory.cpp index b9c18c88652..803f19b894a 100644 --- a/src/Core/SettingsChangesHistory.cpp +++ b/src/Core/SettingsChangesHistory.cpp @@ -71,6 +71,7 @@ static std::initializer_list Date: Wed, 28 Aug 2024 13:05:43 +0000 Subject: [PATCH 437/844] Prevent specifying properties in `MODIFY COLUMN` queries when using `REMOVE`/`RESET SETTING`/`MODIFY SETTING` --- src/Parsers/ParserAlterQuery.cpp | 34 +++- ...rties_before_remove_modify_reset.reference | 13 ++ ..._properties_before_remove_modify_reset.sql | 169 ++++++++++++++++++ 3 files changed, 212 insertions(+), 4 deletions(-) create mode 100644 tests/queries/0_stateless/03231_alter_no_properties_before_remove_modify_reset.reference create mode 100644 tests/queries/0_stateless/03231_alter_no_properties_before_remove_modify_reset.sql diff --git a/src/Parsers/ParserAlterQuery.cpp b/src/Parsers/ParserAlterQuery.cpp index 73fd563faf6..3920f09918a 100644 --- a/src/Parsers/ParserAlterQuery.cpp +++ b/src/Parsers/ParserAlterQuery.cpp @@ -1,6 +1,8 @@ -#include -#include #include + +#include +#include +#include #include #include #include @@ -9,14 +11,19 @@ #include #include #include -#include -#include +#include #include +#include namespace DB { +namespace ErrorCodes +{ +extern const int SYNTAX_ERROR; +} + bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { auto command = std::make_shared(); @@ -725,8 +732,23 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected if (!parser_modify_col_decl.parse(pos, command_col_decl, expected)) return false; + auto check_no_type = [&](const std::string_view keyword) + { + if (!command_col_decl) + return; + const auto & column_decl = command_col_decl->as(); + + if (!column_decl.children.empty() || column_decl.null_modifier.has_value() || !column_decl.default_specifier.empty() + || column_decl.ephemeral_default || column_decl.primary_key_specifier) + { + throw Exception(ErrorCodes::SYNTAX_ERROR, "Cannot specify column properties before '{}'", keyword); + } + }; + if (s_remove.ignore(pos, expected)) { + check_no_type(s_remove.getName()); + if (s_default.ignore(pos, expected)) command->remove_property = toStringView(Keyword::DEFAULT); else if (s_materialized.ignore(pos, expected)) @@ -746,11 +768,15 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected } else if (s_modify_setting.ignore(pos, expected)) { + check_no_type(s_modify_setting.getName()); + if (!parser_settings.parse(pos, command_settings_changes, expected)) return false; } else if (s_reset_setting.ignore(pos, expected)) { + check_no_type(s_reset_setting.getName()); + if (!parser_reset_setting.parse(pos, command_settings_resets, expected)) return false; } diff --git a/tests/queries/0_stateless/03231_alter_no_properties_before_remove_modify_reset.reference b/tests/queries/0_stateless/03231_alter_no_properties_before_remove_modify_reset.reference new file mode 100644 index 00000000000..60c67ceac92 --- /dev/null +++ b/tests/queries/0_stateless/03231_alter_no_properties_before_remove_modify_reset.reference @@ -0,0 +1,13 @@ +REMOVE +The same, but with type +MODIFY SETTING +The same, but with type +RESET SETTING +The same, but with type +All the above, but on server side +REMOVE +The same, but with type +MODIFY SETTING +The same, but with type +RESET SETTING +The same, but with type diff --git a/tests/queries/0_stateless/03231_alter_no_properties_before_remove_modify_reset.sql b/tests/queries/0_stateless/03231_alter_no_properties_before_remove_modify_reset.sql new file mode 100644 index 00000000000..13ad11bb139 --- /dev/null +++ b/tests/queries/0_stateless/03231_alter_no_properties_before_remove_modify_reset.sql @@ -0,0 +1,169 @@ +DROP TABLE IF EXISTS a SYNC; +CREATE TABLE a (x Int64, y Int64 MATERIALIZED 1 SETTINGS (max_compress_block_size = 30000)) ENGINE = MergeTree ORDER BY x; + + +SELECT 'REMOVE'; +ALTER TABLE a MODIFY COLUMN y Int64 REMOVE MATERIALIZED; -- { clientError SYNTAX_ERROR } +ALTER TABLE a MODIFY COLUMN y NOT NULL REMOVE MATERIALIZED; -- { clientError SYNTAX_ERROR } +ALTER TABLE a MODIFY COLUMN y DEFAULT 2 REMOVE MATERIALIZED; -- { clientError SYNTAX_ERROR } +ALTER TABLE a MODIFY COLUMN y MATERIALIZED 3 REMOVE MATERIALIZED; -- { clientError SYNTAX_ERROR } +ALTER TABLE a MODIFY COLUMN y EPHEMERAL 4 REMOVE MATERIALIZED; -- { clientError SYNTAX_ERROR } +ALTER TABLE a MODIFY COLUMN y COMMENT 5 REMOVE MATERIALIZED; -- { clientError SYNTAX_ERROR } +ALTER TABLE a MODIFY COLUMN y CODEC(ZSTD) REMOVE MATERIALIZED; -- { clientError SYNTAX_ERROR } +ALTER TABLE a MODIFY COLUMN y STATISTICS(tdigest) REMOVE MATERIALIZED; -- { clientError SYNTAX_ERROR } +ALTER TABLE a MODIFY COLUMN y TTL toDate('2025-01-01') + toIntervalDay(x) REMOVE MATERIALIZED; -- { clientError SYNTAX_ERROR } +ALTER TABLE a MODIFY COLUMN y COLLATE binary REMOVE MATERIALIZED; -- { clientError SYNTAX_ERROR } +ALTER TABLE a MODIFY COLUMN y SETTINGS (max_compress_block_size = 20000) REMOVE MATERIALIZED; -- { clientError SYNTAX_ERROR } +ALTER TABLE a MODIFY COLUMN y PRIMARY KEY REMOVE MATERIALIZED; -- { clientError SYNTAX_ERROR } + +SELECT 'The same, but with type'; +ALTER TABLE a MODIFY COLUMN y Int64 NOT NULL REMOVE MATERIALIZED; -- { clientError SYNTAX_ERROR } +ALTER TABLE a MODIFY COLUMN y Int64 DEFAULT 2 REMOVE MATERIALIZED; -- { clientError SYNTAX_ERROR } +ALTER TABLE a MODIFY COLUMN y Int64 MATERIALIZED 3 REMOVE MATERIALIZED; -- { clientError SYNTAX_ERROR } +ALTER TABLE a MODIFY COLUMN y Int64 EPHEMERAL 4 REMOVE MATERIALIZED; -- { clientError SYNTAX_ERROR } +ALTER TABLE a MODIFY COLUMN y Int64 COMMENT 5 REMOVE MATERIALIZED; -- { clientError SYNTAX_ERROR } +ALTER TABLE a MODIFY COLUMN y Int64 CODEC(ZSTD) REMOVE MATERIALIZED; -- { clientError SYNTAX_ERROR } +ALTER TABLE a MODIFY COLUMN y Int64 STATISTICS(tdigest) REMOVE MATERIALIZED; -- { clientError SYNTAX_ERROR } +ALTER TABLE a MODIFY COLUMN y Int64 TTL toDate('2025-01-01') + toIntervalDay(x) REMOVE MATERIALIZED; -- { clientError SYNTAX_ERROR } +ALTER TABLE a MODIFY COLUMN y Int64 COLLATE binary REMOVE MATERIALIZED; -- { clientError SYNTAX_ERROR } +ALTER TABLE a MODIFY COLUMN y Int64 SETTINGS (max_compress_block_size = 20000) REMOVE MATERIALIZED; -- { clientError SYNTAX_ERROR } +ALTER TABLE a MODIFY COLUMN y Int64 PRIMARY KEY REMOVE MATERIALIZED; -- { clientError SYNTAX_ERROR } + +SELECT 'MODIFY SETTING'; +ALTER TABLE a MODIFY COLUMN y Int64 MODIFY SETTING max_compress_block_size = 20000; -- { clientError SYNTAX_ERROR } +ALTER TABLE a MODIFY COLUMN y NOT NULL MODIFY SETTING max_compress_block_size = 20000; -- { clientError SYNTAX_ERROR } +ALTER TABLE a MODIFY COLUMN y DEFAULT 2 MODIFY SETTING max_compress_block_size = 20000; -- { clientError SYNTAX_ERROR } +ALTER TABLE a MODIFY COLUMN y MATERIALIZED 3 MODIFY SETTING max_compress_block_size = 20000; -- { clientError SYNTAX_ERROR } +ALTER TABLE a MODIFY COLUMN y EPHEMERAL 4 MODIFY SETTING max_compress_block_size = 20000; -- { clientError SYNTAX_ERROR } +ALTER TABLE a MODIFY COLUMN y COMMENT 5 MODIFY SETTING max_compress_block_size = 20000; -- { clientError SYNTAX_ERROR } +ALTER TABLE a MODIFY COLUMN y CODEC(ZSTD) MODIFY SETTING max_compress_block_size = 20000; -- { clientError SYNTAX_ERROR } +ALTER TABLE a MODIFY COLUMN y STATISTICS(tdigest) MODIFY SETTING max_compress_block_size = 20000; -- { clientError SYNTAX_ERROR } +ALTER TABLE a MODIFY COLUMN y TTL toDate('2025-01-01') + toIntervalDay(x) MODIFY SETTING max_compress_block_size = 20000; -- { clientError SYNTAX_ERROR } +ALTER TABLE a MODIFY COLUMN y COLLATE binary MODIFY SETTING max_compress_block_size = 20000; -- { clientError SYNTAX_ERROR } +ALTER TABLE a MODIFY COLUMN y SETTINGS (some_setting = 2) MODIFY SETTING max_compress_block_size = 20000; -- { clientError SYNTAX_ERROR } +ALTER TABLE a MODIFY COLUMN y PRIMARY KEY MODIFY SETTING max_compress_block_size = 20000; -- { clientError SYNTAX_ERROR } + +SELECT 'The same, but with type'; +ALTER TABLE a MODIFY COLUMN y Int64 NOT NULL MODIFY SETTING max_compress_block_size = 20000; -- { clientError SYNTAX_ERROR } +ALTER TABLE a MODIFY COLUMN y Int64 DEFAULT 2 MODIFY SETTING max_compress_block_size = 20000; -- { clientError SYNTAX_ERROR } +ALTER TABLE a MODIFY COLUMN y Int64 MATERIALIZED 3 MODIFY SETTING max_compress_block_size = 20000; -- { clientError SYNTAX_ERROR } +ALTER TABLE a MODIFY COLUMN y Int64 EPHEMERAL 4 MODIFY SETTING max_compress_block_size = 20000; -- { clientError SYNTAX_ERROR } +ALTER TABLE a MODIFY COLUMN y Int64 COMMENT 5 MODIFY SETTING max_compress_block_size = 20000; -- { clientError SYNTAX_ERROR } +ALTER TABLE a MODIFY COLUMN y Int64 CODEC(ZSTD) MODIFY SETTING max_compress_block_size = 20000; -- { clientError SYNTAX_ERROR } +ALTER TABLE a MODIFY COLUMN y Int64 STATISTICS(tdigest) MODIFY SETTING max_compress_block_size = 20000; -- { clientError SYNTAX_ERROR } +ALTER TABLE a MODIFY COLUMN y Int64 TTL toDate('2025-01-01') + toIntervalDay(x) MODIFY SETTING max_compress_block_size = 20000; -- { clientError SYNTAX_ERROR } +ALTER TABLE a MODIFY COLUMN y Int64 COLLATE binary MODIFY SETTING max_compress_block_size = 20000; -- { clientError SYNTAX_ERROR } +ALTER TABLE a MODIFY COLUMN y Int64 SETTINGS (some_setting = 2) MODIFY SETTING max_compress_block_size = 20000; -- { clientError SYNTAX_ERROR } +ALTER TABLE a MODIFY COLUMN y Int64 PRIMARY KEY MODIFY SETTING max_compress_block_size = 20000; -- { clientError SYNTAX_ERROR } + +SELECT 'RESET SETTING'; +ALTER TABLE a MODIFY COLUMN y Int64 RESET SETTING max_compress_block_size; -- { clientError SYNTAX_ERROR } +ALTER TABLE a MODIFY COLUMN y NOT NULL RESET SETTING max_compress_block_size; -- { clientError SYNTAX_ERROR } +ALTER TABLE a MODIFY COLUMN y DEFAULT 2 RESET SETTING max_compress_block_size; -- { clientError SYNTAX_ERROR } +ALTER TABLE a MODIFY COLUMN y MATERIALIZED 3 RESET SETTING max_compress_block_size; -- { clientError SYNTAX_ERROR } +ALTER TABLE a MODIFY COLUMN y EPHEMERAL 4 RESET SETTING max_compress_block_size; -- { clientError SYNTAX_ERROR } +ALTER TABLE a MODIFY COLUMN y COMMENT 5 RESET SETTING max_compress_block_size; -- { clientError SYNTAX_ERROR } +ALTER TABLE a MODIFY COLUMN y CODEC(ZSTD) RESET SETTING max_compress_block_size; -- { clientError SYNTAX_ERROR } +ALTER TABLE a MODIFY COLUMN y STATISTICS(tdigest) RESET SETTING max_compress_block_size; -- { clientError SYNTAX_ERROR } +ALTER TABLE a MODIFY COLUMN y TTL toDate('2025-01-01') + toIntervalDay(x) RESET SETTING max_compress_block_size; -- { clientError SYNTAX_ERROR } +ALTER TABLE a MODIFY COLUMN y COLLATE binary RESET SETTING max_compress_block_size; -- { clientError SYNTAX_ERROR } +ALTER TABLE a MODIFY COLUMN y SETTINGS (some_setting = 2) RESET SETTING max_compress_block_size; -- { clientError SYNTAX_ERROR } +ALTER TABLE a MODIFY COLUMN y PRIMARY KEY RESET SETTING max_compress_block_size; -- { clientError SYNTAX_ERROR } + +SELECT 'The same, but with type'; +ALTER TABLE a MODIFY COLUMN y Int64 NOT NULL RESET SETTING max_compress_block_size; -- { clientError SYNTAX_ERROR } +ALTER TABLE a MODIFY COLUMN y Int64 DEFAULT 2 RESET SETTING max_compress_block_size; -- { clientError SYNTAX_ERROR } +ALTER TABLE a MODIFY COLUMN y Int64 MATERIALIZED 3 RESET SETTING max_compress_block_size; -- { clientError SYNTAX_ERROR } +ALTER TABLE a MODIFY COLUMN y Int64 EPHEMERAL 4 RESET SETTING max_compress_block_size; -- { clientError SYNTAX_ERROR } +ALTER TABLE a MODIFY COLUMN y Int64 COMMENT 5 RESET SETTING max_compress_block_size; -- { clientError SYNTAX_ERROR } +ALTER TABLE a MODIFY COLUMN y Int64 CODEC(ZSTD) RESET SETTING max_compress_block_size; -- { clientError SYNTAX_ERROR } +ALTER TABLE a MODIFY COLUMN y Int64 STATISTICS(tdigest) RESET SETTING max_compress_block_size; -- { clientError SYNTAX_ERROR } +ALTER TABLE a MODIFY COLUMN y Int64 TTL toDate('2025-01-01') + toIntervalDay(x) RESET SETTING max_compress_block_size; -- { clientError SYNTAX_ERROR } +ALTER TABLE a MODIFY COLUMN y Int64 COLLATE binary RESET SETTING max_compress_block_size; -- { clientError SYNTAX_ERROR } +ALTER TABLE a MODIFY COLUMN y Int64 SETTINGS (some_setting = 2) RESET SETTING max_compress_block_size; -- { clientError SYNTAX_ERROR } +ALTER TABLE a MODIFY COLUMN y Int64 PRIMARY KEY RESET SETTING max_compress_block_size; -- { clientError SYNTAX_ERROR } + + + +SELECT 'All the above, but on server side'; + +SELECT 'REMOVE'; +SELECT formatQuery('ALTER TABLE a MODIFY COLUMN y Int64 REMOVE MATERIALIZED'); -- { serverError SYNTAX_ERROR } +SELECT formatQuery('ALTER TABLE a MODIFY COLUMN y NOT NULL REMOVE MATERIALIZED'); -- { serverError SYNTAX_ERROR } +SELECT formatQuery('ALTER TABLE a MODIFY COLUMN y DEFAULT 2 REMOVE MATERIALIZED'); -- { serverError SYNTAX_ERROR } +SELECT formatQuery('ALTER TABLE a MODIFY COLUMN y MATERIALIZED 3 REMOVE MATERIALIZED'); -- { serverError SYNTAX_ERROR } +SELECT formatQuery('ALTER TABLE a MODIFY COLUMN y EPHEMERAL 4 REMOVE MATERIALIZED'); -- { serverError SYNTAX_ERROR } +SELECT formatQuery('ALTER TABLE a MODIFY COLUMN y COMMENT 5 REMOVE MATERIALIZED'); -- { serverError SYNTAX_ERROR } +SELECT formatQuery('ALTER TABLE a MODIFY COLUMN y CODEC(ZSTD) REMOVE MATERIALIZED'); -- { serverError SYNTAX_ERROR } +SELECT formatQuery('ALTER TABLE a MODIFY COLUMN y STATISTICS(tdigest) REMOVE MATERIALIZED'); -- { serverError SYNTAX_ERROR } +SELECT formatQuery('ALTER TABLE a MODIFY COLUMN y TTL toDate(\'2025-01-01\') + toIntervalDay(x) REMOVE MATERIALIZED'); -- { serverError SYNTAX_ERROR } +SELECT formatQuery('ALTER TABLE a MODIFY COLUMN y COLLATE binary REMOVE MATERIALIZED'); -- { serverError SYNTAX_ERROR } +SELECT formatQuery('ALTER TABLE a MODIFY COLUMN y SETTINGS (max_compress_block_size = 20000) REMOVE MATERIALIZED'); -- { serverError SYNTAX_ERROR } +SELECT formatQuery('ALTER TABLE a MODIFY COLUMN y PRIMARY KEY REMOVE MATERIALIZED'); -- { serverError SYNTAX_ERROR } + +SELECT 'The same, but with type'; +SELECT formatQuery('ALTER TABLE a MODIFY COLUMN y Int64 NOT NULL REMOVE MATERIALIZED'); -- { serverError SYNTAX_ERROR } +SELECT formatQuery('ALTER TABLE a MODIFY COLUMN y Int64 DEFAULT 2 REMOVE MATERIALIZED'); -- { serverError SYNTAX_ERROR } +SELECT formatQuery('ALTER TABLE a MODIFY COLUMN y Int64 MATERIALIZED 3 REMOVE MATERIALIZED'); -- { serverError SYNTAX_ERROR } +SELECT formatQuery('ALTER TABLE a MODIFY COLUMN y Int64 EPHEMERAL 4 REMOVE MATERIALIZED'); -- { serverError SYNTAX_ERROR } +SELECT formatQuery('ALTER TABLE a MODIFY COLUMN y Int64 COMMENT 5 REMOVE MATERIALIZED'); -- { serverError SYNTAX_ERROR } +SELECT formatQuery('ALTER TABLE a MODIFY COLUMN y Int64 CODEC(ZSTD) REMOVE MATERIALIZED'); -- { serverError SYNTAX_ERROR } +SELECT formatQuery('ALTER TABLE a MODIFY COLUMN y Int64 STATISTICS(tdigest) REMOVE MATERIALIZED'); -- { serverError SYNTAX_ERROR } +SELECT formatQuery('ALTER TABLE a MODIFY COLUMN y Int64 TTL toDate(\'2025-01-01\') + toIntervalDay(x) REMOVE MATERIALIZED'); -- { serverError SYNTAX_ERROR } +SELECT formatQuery('ALTER TABLE a MODIFY COLUMN y Int64 COLLATE binary REMOVE MATERIALIZED'); -- { serverError SYNTAX_ERROR } +SELECT formatQuery('ALTER TABLE a MODIFY COLUMN y Int64 SETTINGS (max_compress_block_size = 20000) REMOVE MATERIALIZED'); -- { serverError SYNTAX_ERROR } +SELECT formatQuery('ALTER TABLE a MODIFY COLUMN y Int64 PRIMARY KEY REMOVE MATERIALIZED'); -- { serverError SYNTAX_ERROR } + +SELECT 'MODIFY SETTING'; +SELECT formatQuery('ALTER TABLE a MODIFY COLUMN y Int64 MODIFY SETTING max_compress_block_size = 20000'); -- { serverError SYNTAX_ERROR } +SELECT formatQuery('ALTER TABLE a MODIFY COLUMN y NOT NULL MODIFY SETTING max_compress_block_size = 20000'); -- { serverError SYNTAX_ERROR } +SELECT formatQuery('ALTER TABLE a MODIFY COLUMN y DEFAULT 2 MODIFY SETTING max_compress_block_size = 20000'); -- { serverError SYNTAX_ERROR } +SELECT formatQuery('ALTER TABLE a MODIFY COLUMN y MATERIALIZED 3 MODIFY SETTING max_compress_block_size = 20000'); -- { serverError SYNTAX_ERROR } +SELECT formatQuery('ALTER TABLE a MODIFY COLUMN y EPHEMERAL 4 MODIFY SETTING max_compress_block_size = 20000'); -- { serverError SYNTAX_ERROR } +SELECT formatQuery('ALTER TABLE a MODIFY COLUMN y COMMENT 5 MODIFY SETTING max_compress_block_size = 20000'); -- { serverError SYNTAX_ERROR } +SELECT formatQuery('ALTER TABLE a MODIFY COLUMN y CODEC(ZSTD) MODIFY SETTING max_compress_block_size = 20000'); -- { serverError SYNTAX_ERROR } +SELECT formatQuery('ALTER TABLE a MODIFY COLUMN y STATISTICS(tdigest) MODIFY SETTING max_compress_block_size = 20000'); -- { serverError SYNTAX_ERROR } +SELECT formatQuery('ALTER TABLE a MODIFY COLUMN y TTL toDate(\'2025-01-01\') + toIntervalDay(x) MODIFY SETTING max_compress_block_size = 20000'); -- { serverError SYNTAX_ERROR } +SELECT formatQuery('ALTER TABLE a MODIFY COLUMN y COLLATE binary MODIFY SETTING max_compress_block_size = 20000'); -- { serverError SYNTAX_ERROR } +SELECT formatQuery('ALTER TABLE a MODIFY COLUMN y SETTINGS (some_setting = 2) MODIFY SETTING max_compress_block_size = 20000'); -- { serverError SYNTAX_ERROR } +SELECT formatQuery('ALTER TABLE a MODIFY COLUMN y PRIMARY KEY MODIFY SETTING max_compress_block_size = 20000'); -- { serverError SYNTAX_ERROR } + +SELECT 'The same, but with type'; +SELECT formatQuery('ALTER TABLE a MODIFY COLUMN y Int64 NOT NULL MODIFY SETTING max_compress_block_size = 20000'); -- { serverError SYNTAX_ERROR } +SELECT formatQuery('ALTER TABLE a MODIFY COLUMN y Int64 DEFAULT 2 MODIFY SETTING max_compress_block_size = 20000'); -- { serverError SYNTAX_ERROR } +SELECT formatQuery('ALTER TABLE a MODIFY COLUMN y Int64 MATERIALIZED 3 MODIFY SETTING max_compress_block_size = 20000'); -- { serverError SYNTAX_ERROR } +SELECT formatQuery('ALTER TABLE a MODIFY COLUMN y Int64 EPHEMERAL 4 MODIFY SETTING max_compress_block_size = 20000'); -- { serverError SYNTAX_ERROR } +SELECT formatQuery('ALTER TABLE a MODIFY COLUMN y Int64 COMMENT 5 MODIFY SETTING max_compress_block_size = 20000'); -- { serverError SYNTAX_ERROR } +SELECT formatQuery('ALTER TABLE a MODIFY COLUMN y Int64 CODEC(ZSTD) MODIFY SETTING max_compress_block_size = 20000'); -- { serverError SYNTAX_ERROR } +SELECT formatQuery('ALTER TABLE a MODIFY COLUMN y Int64 STATISTICS(tdigest) MODIFY SETTING max_compress_block_size = 20000'); -- { serverError SYNTAX_ERROR } +SELECT formatQuery('ALTER TABLE a MODIFY COLUMN y Int64 TTL toDate(\'2025-01-01\') + toIntervalDay(x) MODIFY SETTING max_compress_block_size = 20000'); -- { serverError SYNTAX_ERROR } +SELECT formatQuery('ALTER TABLE a MODIFY COLUMN y Int64 COLLATE binary MODIFY SETTING max_compress_block_size = 20000'); -- { serverError SYNTAX_ERROR } +SELECT formatQuery('ALTER TABLE a MODIFY COLUMN y Int64 SETTINGS (some_setting = 2) MODIFY SETTING max_compress_block_size = 20000'); -- { serverError SYNTAX_ERROR } +SELECT formatQuery('ALTER TABLE a MODIFY COLUMN y Int64 PRIMARY KEY MODIFY SETTING max_compress_block_size = 20000'); -- { serverError SYNTAX_ERROR } + +SELECT 'RESET SETTING'; +SELECT formatQuery('ALTER TABLE a MODIFY COLUMN y Int64 RESET SETTING max_compress_block_size'); -- { serverError SYNTAX_ERROR } +SELECT formatQuery('ALTER TABLE a MODIFY COLUMN y NOT NULL RESET SETTING max_compress_block_size'); -- { serverError SYNTAX_ERROR } +SELECT formatQuery('ALTER TABLE a MODIFY COLUMN y DEFAULT 2 RESET SETTING max_compress_block_size'); -- { serverError SYNTAX_ERROR } +SELECT formatQuery('ALTER TABLE a MODIFY COLUMN y MATERIALIZED 3 RESET SETTING max_compress_block_size'); -- { serverError SYNTAX_ERROR } +SELECT formatQuery('ALTER TABLE a MODIFY COLUMN y EPHEMERAL 4 RESET SETTING max_compress_block_size'); -- { serverError SYNTAX_ERROR } +SELECT formatQuery('ALTER TABLE a MODIFY COLUMN y COMMENT 5 RESET SETTING max_compress_block_size'); -- { serverError SYNTAX_ERROR } +SELECT formatQuery('ALTER TABLE a MODIFY COLUMN y CODEC(ZSTD) RESET SETTING max_compress_block_size'); -- { serverError SYNTAX_ERROR } +SELECT formatQuery('ALTER TABLE a MODIFY COLUMN y STATISTICS(tdigest) RESET SETTING max_compress_block_size'); -- { serverError SYNTAX_ERROR } +SELECT formatQuery('ALTER TABLE a MODIFY COLUMN y TTL toDate(\'2025-01-01\') + toIntervalDay(x) RESET SETTING max_compress_block_size'); -- { serverError SYNTAX_ERROR } +SELECT formatQuery('ALTER TABLE a MODIFY COLUMN y COLLATE binary RESET SETTING max_compress_block_size'); -- { serverError SYNTAX_ERROR } +SELECT formatQuery('ALTER TABLE a MODIFY COLUMN y SETTINGS (some_setting = 2) RESET SETTING max_compress_block_size'); -- { serverError SYNTAX_ERROR } +SELECT formatQuery('ALTER TABLE a MODIFY COLUMN y PRIMARY KEY RESET SETTING max_compress_block_size'); -- { serverError SYNTAX_ERROR } + +SELECT 'The same, but with type'; +SELECT formatQuery('ALTER TABLE a MODIFY COLUMN y Int64 NOT NULL RESET SETTING max_compress_block_size'); -- { serverError SYNTAX_ERROR } +SELECT formatQuery('ALTER TABLE a MODIFY COLUMN y Int64 DEFAULT 2 RESET SETTING max_compress_block_size'); -- { serverError SYNTAX_ERROR } +SELECT formatQuery('ALTER TABLE a MODIFY COLUMN y Int64 MATERIALIZED 3 RESET SETTING max_compress_block_size'); -- { serverError SYNTAX_ERROR } +SELECT formatQuery('ALTER TABLE a MODIFY COLUMN y Int64 EPHEMERAL 4 RESET SETTING max_compress_block_size'); -- { serverError SYNTAX_ERROR } +SELECT formatQuery('ALTER TABLE a MODIFY COLUMN y Int64 COMMENT 5 RESET SETTING max_compress_block_size'); -- { serverError SYNTAX_ERROR } +SELECT formatQuery('ALTER TABLE a MODIFY COLUMN y Int64 CODEC(ZSTD) RESET SETTING max_compress_block_size'); -- { serverError SYNTAX_ERROR } +SELECT formatQuery('ALTER TABLE a MODIFY COLUMN y Int64 STATISTICS(tdigest) RESET SETTING max_compress_block_size'); -- { serverError SYNTAX_ERROR } +SELECT formatQuery('ALTER TABLE a MODIFY COLUMN y Int64 TTL toDate(\'2025-01-01\') + toIntervalDay(x) RESET SETTING max_compress_block_size'); -- { serverError SYNTAX_ERROR } +SELECT formatQuery('ALTER TABLE a MODIFY COLUMN y Int64 COLLATE binary RESET SETTING max_compress_block_size'); -- { serverError SYNTAX_ERROR } +SELECT formatQuery('ALTER TABLE a MODIFY COLUMN y Int64 SETTINGS (some_setting = 2) RESET SETTING max_compress_block_size'); -- { serverError SYNTAX_ERROR } +SELECT formatQuery('ALTER TABLE a MODIFY COLUMN y Int64 PRIMARY KEY RESET SETTING max_compress_block_size'); -- { serverError SYNTAX_ERROR } From a0fa693f0b56390a8be4b18aef03a9808fcd63db Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A1nos=20Benjamin=20Antal?= Date: Wed, 28 Aug 2024 13:20:58 +0000 Subject: [PATCH 438/844] Add safety assertion --- src/Parsers/ParserAlterQuery.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/Parsers/ParserAlterQuery.cpp b/src/Parsers/ParserAlterQuery.cpp index 3920f09918a..90e0d0cade0 100644 --- a/src/Parsers/ParserAlterQuery.cpp +++ b/src/Parsers/ParserAlterQuery.cpp @@ -736,6 +736,7 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected { if (!command_col_decl) return; + const auto & column_decl = command_col_decl->as(); if (!column_decl.children.empty() || column_decl.null_modifier.has_value() || !column_decl.default_specifier.empty() @@ -791,6 +792,11 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected } } command->type = ASTAlterCommand::MODIFY_COLUMN; + + /// Make sure that type is not populated when REMOVE/MODIFY SETTING/RESET SETTING is used, because we wouldn't modify the type, which can be confusing + chassert( + nullptr == command_col_decl->as().type + || (command->remove_property.empty() && nullptr == command_settings_changes && nullptr == command_settings_resets)); } else if (s_modify_order_by.ignore(pos, expected)) { From 0d463b839e2e0c89e61754fc53aa904ae6728e81 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A1nos=20Benjamin=20Antal?= Date: Wed, 28 Aug 2024 13:29:41 +0000 Subject: [PATCH 439/844] Remove unused parser --- src/Parsers/ParserAlterQuery.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/Parsers/ParserAlterQuery.cpp b/src/Parsers/ParserAlterQuery.cpp index 90e0d0cade0..54caf574e03 100644 --- a/src/Parsers/ParserAlterQuery.cpp +++ b/src/Parsers/ParserAlterQuery.cpp @@ -129,7 +129,6 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected ParserCompoundIdentifier parser_name; ParserStringLiteral parser_string_literal; ParserStringAndSubstitution parser_string_and_substituion; - ParserIdentifier parser_remove_property; ParserCompoundColumnDeclaration parser_col_decl; ParserIndexDeclaration parser_idx_decl; ParserStatisticsDeclaration parser_stat_decl; From f76e6ecdaf8c2bb005e3b2712b64db4670120c34 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A1nos=20Benjamin=20Antal?= Date: Wed, 28 Aug 2024 13:30:25 +0000 Subject: [PATCH 440/844] Make check more specific --- src/Storages/MutationCommands.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/Storages/MutationCommands.cpp b/src/Storages/MutationCommands.cpp index f5ccc80f1d8..8276e9de232 100644 --- a/src/Storages/MutationCommands.cpp +++ b/src/Storages/MutationCommands.cpp @@ -22,6 +22,7 @@ namespace ErrorCodes { extern const int UNKNOWN_MUTATION_COMMAND; extern const int MULTIPLE_ASSIGNMENTS_TO_COLUMN; + extern const int LOGICAL_ERROR; } @@ -115,10 +116,10 @@ std::optional MutationCommand::parse(ASTAlterCommand * command, res.column_name = getIdentifierName(command->column); return res; } - /// MODIFY COLUMN x REMOVE MATERIALIZED is a valid alter command, but doesn't have any specified column type, thus no mutation is needed + /// MODIFY COLUMN x REMOVE MATERIALIZED/RESET SETTING/MODIFY SETTING is a valid alter command, but doesn't have any specified column type, + /// thus no mutation is needed else if ( - parse_alter_commands && command->type == ASTAlterCommand::MODIFY_COLUMN && command->col_decl - && command->col_decl->as().type) + parse_alter_commands && command->type == ASTAlterCommand::MODIFY_COLUMN && command->remove_property.empty() && nullptr == command->settings_changes && nullptr == command->settings_resets) { MutationCommand res; res.ast = command->ptr(); From f109c141b0cffd9c27c935774a88a16ad071bac1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A1nos=20Benjamin=20Antal?= Date: Wed, 28 Aug 2024 13:30:41 +0000 Subject: [PATCH 441/844] Add safety check --- src/Storages/MutationCommands.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/Storages/MutationCommands.cpp b/src/Storages/MutationCommands.cpp index 8276e9de232..75440aeac59 100644 --- a/src/Storages/MutationCommands.cpp +++ b/src/Storages/MutationCommands.cpp @@ -125,6 +125,8 @@ std::optional MutationCommand::parse(ASTAlterCommand * command, res.ast = command->ptr(); res.type = MutationCommand::Type::READ_COLUMN; const auto & ast_col_decl = command->col_decl->as(); + if (nullptr == ast_col_decl.type) + throw Exception(ErrorCodes::LOGICAL_ERROR, "MODIFY COLUMN mutation command doesn't specify type: {}", serializeAST(*command)); res.column_name = ast_col_decl.name; res.data_type = DataTypeFactory::instance().get(ast_col_decl.type); return res; From 5b3ca6b2b916b63d12ddb402c0726d0c2e29c1b1 Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Wed, 28 Aug 2024 22:14:43 +0800 Subject: [PATCH 442/844] allow unsligned arrays in arrayZip --- src/Functions/array/arrayZip.cpp | 114 +++++++++++++++++++++++-------- 1 file changed, 85 insertions(+), 29 deletions(-) diff --git a/src/Functions/array/arrayZip.cpp b/src/Functions/array/arrayZip.cpp index 6c6fff5926b..39c04264c84 100644 --- a/src/Functions/array/arrayZip.cpp +++ b/src/Functions/array/arrayZip.cpp @@ -1,7 +1,8 @@ -#include #include -#include +#include +#include #include +#include #include #include #include @@ -12,23 +13,22 @@ namespace DB namespace ErrorCodes { - extern const int ILLEGAL_TYPE_OF_ARGUMENT; - extern const int SIZES_OF_ARRAYS_DONT_MATCH; - extern const int TOO_FEW_ARGUMENTS_FOR_FUNCTION; - extern const int ILLEGAL_COLUMN; +extern const int ILLEGAL_TYPE_OF_ARGUMENT; +extern const int SIZES_OF_ARRAYS_DONT_MATCH; +extern const int TOO_FEW_ARGUMENTS_FOR_FUNCTION; +extern const int ILLEGAL_COLUMN; } /// arrayZip(['a', 'b', 'c'], ['d', 'e', 'f']) = [('a', 'd'), ('b', 'e'), ('c', 'f')] +/// arrayZipUnaligned(['a', 'b', 'c'], ['d', 'e']) = [('a', 'd'), ('b', 'e'), ('c', null)] +template class FunctionArrayZip : public IFunction { public: - static constexpr auto name = "arrayZip"; + static constexpr auto name = allow_unaligned ? "arrayZipUnaligned" : "arrayZip"; static FunctionPtr create(ContextPtr) { return std::make_shared(); } - String getName() const override - { - return name; - } + String getName() const override { return name; } bool isVariadic() const override { return true; } size_t getNumberOfArguments() const override { return 0; } @@ -39,8 +39,11 @@ public: DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { if (arguments.empty()) - throw Exception(ErrorCodes::TOO_FEW_ARGUMENTS_FOR_FUNCTION, - "Function {} needs at least one argument; passed {}." , getName(), arguments.size()); + throw Exception( + ErrorCodes::TOO_FEW_ARGUMENTS_FOR_FUNCTION, + "Function {} needs at least one argument; passed {}.", + getName(), + arguments.size()); DataTypes arguments_types; for (size_t index = 0; index < arguments.size(); ++index) @@ -48,16 +51,24 @@ public: const DataTypeArray * array_type = checkAndGetDataType(arguments[index].type.get()); if (!array_type) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Argument {} of function {} must be array. Found {} instead.", - toString(index + 1), getName(), arguments[0].type->getName()); + throw Exception( + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Argument {} of function {} must be array. Found {} instead.", + toString(index + 1), + getName(), + arguments[0].type->getName()); - arguments_types.emplace_back(array_type->getNestedType()); + auto nested_type = array_type->getNestedType(); + if constexpr (allow_unaligned) + nested_type = makeNullable(nested_type); + arguments_types.emplace_back(nested_type); } return std::make_shared(std::make_shared(arguments_types)); } - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override + ColumnPtr + executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & /*result_type*/, size_t input_rows_count) const override { size_t num_arguments = arguments.size(); @@ -68,12 +79,19 @@ public: { /// Constant columns cannot be inside tuple. It's only possible to have constant tuple as a whole. ColumnPtr holder = arguments[i].column->convertToFullColumnIfConst(); - const ColumnArray * column_array = checkAndGetColumn(holder.get()); - if (!column_array) - throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Argument {} of function {} must be array. Found column {} instead.", - i + 1, getName(), holder->getName()); + throw Exception( + ErrorCodes::ILLEGAL_COLUMN, + "Argument {} of function {} must be array. Found column {} instead.", + i + 1, + getName(), + holder->getName()); + + tuple_columns[i] = column_array->getDataPtr(); + + if constexpr (allow_unaligned) + tuple_columns[i] = makeNullable(tuple_columns[i]); if (i == 0) { @@ -81,23 +99,61 @@ public: } else if (!column_array->hasEqualOffsets(static_cast(*first_array_column))) { - throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH, - "The argument 1 and argument {} of function {} have different array sizes", - i + 1, getName()); + if constexpr (allow_unaligned) + return executeUnaligned(static_cast(*first_array_column), *column_array, input_rows_count); + else + throw Exception( + ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH, + "The argument 1 and argument {} of function {} have different array sizes", + i + 1, + getName()); } - - tuple_columns[i] = column_array->getDataPtr(); } return ColumnArray::create( - ColumnTuple::create(tuple_columns), static_cast(*first_array_column).getOffsetsPtr()); + ColumnTuple::create(std::move(tuple_columns)), static_cast(*first_array_column).getOffsetsPtr()); + } + +private: + ColumnPtr + executeUnaligned(const ColumnArray & first_array_colmn, const ColumnArray & second_array_column, size_t input_rows_count) const + { + const auto & first_data = first_array_colmn.getDataPtr(); + const auto & second_data = second_array_column.getDataPtr(); + const auto & nullable_first_data = makeNullable(first_data); + const auto & nullable_second_data = makeNullable(second_data); + auto res_first_data = nullable_first_data->cloneEmpty(); + auto res_second_data = nullable_second_data->cloneEmpty(); + auto res_offsets_column = ColumnArray::ColumnOffsets::create(input_rows_count); + auto & res_offsets = assert_cast(*res_offsets_column).getData(); + + const auto & first_offsets = first_array_colmn.getOffsets(); + const auto & second_offsets = second_array_column.getOffsets(); + for (size_t i = 0; i < input_rows_count; ++i) + { + size_t first_size = first_offsets[i] - first_offsets[i - 1]; + size_t second_size = second_offsets[i] - second_offsets[i - 1]; + + res_first_data->insertRangeFrom(*nullable_first_data, first_offsets[i - 1], first_size); + res_second_data->insertRangeFrom(*nullable_second_data, second_offsets[i - 1], second_size); + + if (first_size < second_size) + res_first_data->insertManyDefaults(second_size - first_size); + else if (first_size > second_size) + res_second_data->insertManyDefaults(first_size - second_size); + + res_offsets[i] = std::max(first_size, second_size); + } + + Columns tuple_columns{std::move(res_first_data), std::move(res_second_data)}; + return ColumnArray::create(ColumnTuple::create(std::move(tuple_columns)), std::move(res_offsets_column)); } }; REGISTER_FUNCTION(ArrayZip) { - factory.registerFunction(); + factory.registerFunction>(); + factory.registerFunction>(); } } - From a1c9cc471d5b8afbc21a4aa6f8d44791c896eb10 Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Wed, 28 Aug 2024 16:51:45 +0200 Subject: [PATCH 443/844] Apply suggestions from code review MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: János Benjamin Antal --- .../AggregateFunctionDistinctJSONPaths.cpp | 20 +++++++++---------- src/Columns/ColumnDynamic.cpp | 2 +- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/src/AggregateFunctions/AggregateFunctionDistinctJSONPaths.cpp b/src/AggregateFunctions/AggregateFunctionDistinctJSONPaths.cpp index 6100bd57515..98996aac2f7 100644 --- a/src/AggregateFunctions/AggregateFunctionDistinctJSONPaths.cpp +++ b/src/AggregateFunctions/AggregateFunctionDistinctJSONPaths.cpp @@ -49,8 +49,8 @@ struct AggregateFunctionDistinctJSONPathsData /// Iterate over paths in shared data in this row. const auto [shared_data_paths, _] = column.getSharedDataPathsAndValues(); const auto & shared_data_offsets = column.getSharedDataOffsets(); - size_t start = shared_data_offsets[static_cast(row_num) - 1]; - size_t end = shared_data_offsets[static_cast(row_num)]; + const size_t start = shared_data_offsets[static_cast(row_num) - 1]; + const size_t end = shared_data_offsets[static_cast(row_num)]; for (size_t i = start; i != end; ++i) data.insert(shared_data_paths->getDataAt(i).toString()); } @@ -137,8 +137,8 @@ struct AggregateFunctionDistinctJSONPathsAndTypesData /// Iterate over paths om shared data in this row and decode the data types. const auto [shared_data_paths, shared_data_values] = column.getSharedDataPathsAndValues(); const auto & shared_data_offsets = column.getSharedDataOffsets(); - size_t start = shared_data_offsets[static_cast(row_num) - 1]; - size_t end = shared_data_offsets[static_cast(row_num)]; + const size_t start = shared_data_offsets[static_cast(row_num) - 1]; + const size_t end = shared_data_offsets[static_cast(row_num)]; for (size_t i = start; i != end; ++i) { auto path = shared_data_paths->getDataAt(i).toString(); @@ -146,8 +146,8 @@ struct AggregateFunctionDistinctJSONPathsAndTypesData ReadBufferFromMemory buf(value.data, value.size); auto type = decodeDataType(buf); /// We should not have Nulls here but let's check just in case. - if (!isNothing(type)) - data[path].insert(type->getName()); + chassert(!isNothingType(type)); + data[path].insert(type->getName()); } } @@ -172,8 +172,8 @@ struct AggregateFunctionDistinctJSONPathsAndTypesData ReadBufferFromMemory buf(value.data, value.size); auto type = decodeDataType(buf); /// We should not have Nulls here but let's check just in case. - if (!isNothing(type)) - data[path].insert(type->getName()); + chassert(!isNothingType(type)); + data[path].insert(type->getName()); } } @@ -200,7 +200,7 @@ struct AggregateFunctionDistinctJSONPathsAndTypesData size_t paths_size, types_size; readVarUInt(paths_size, buf); if (paths_size > DISTINCT_JSON_PATHS_MAX_ARRAY_SIZE) - throw Exception(ErrorCodes::TOO_LARGE_ARRAY_SIZE, "Too large array size (maximum: {}): {}", DISTINCT_JSON_PATHS_MAX_ARRAY_SIZE, paths_size); + throw Exception(ErrorCodes::TOO_LARGE_ARRAY_SIZE, "Too large array size for paths (maximum: {}): {}", DISTINCT_JSON_PATHS_MAX_ARRAY_SIZE, paths_size); data.reserve(paths_size); String path, type; @@ -209,7 +209,7 @@ struct AggregateFunctionDistinctJSONPathsAndTypesData readStringBinary(path, buf); readVarUInt(types_size, buf); if (types_size > DISTINCT_JSON_PATHS_MAX_ARRAY_SIZE) - throw Exception(ErrorCodes::TOO_LARGE_ARRAY_SIZE, "Too large array size (maximum: {}): {}", DISTINCT_JSON_PATHS_MAX_ARRAY_SIZE, types_size); + throw Exception(ErrorCodes::TOO_LARGE_ARRAY_SIZE, "Too large array size for types (maximum: {}): {}", DISTINCT_JSON_PATHS_MAX_ARRAY_SIZE, types_size); data[path].reserve(types_size); for (size_t j = 0; j != types_size; ++j) diff --git a/src/Columns/ColumnDynamic.cpp b/src/Columns/ColumnDynamic.cpp index b5378e983c6..eb85c6b0d08 100644 --- a/src/Columns/ColumnDynamic.cpp +++ b/src/Columns/ColumnDynamic.cpp @@ -983,7 +983,7 @@ ColumnPtr ColumnDynamic::compress() const String ColumnDynamic::getTypeNameAt(size_t row_num) const { const auto & variant_col = getVariantColumn(); - size_t discr = variant_col.globalDiscriminatorAt(row_num); + const size_t discr = variant_col.globalDiscriminatorAt(row_num); if (discr == ColumnVariant::NULL_DISCRIMINATOR) return ""; From 0aba986372504cbdc6eb28283f51a706ffdb7b2e Mon Sep 17 00:00:00 2001 From: avogar Date: Wed, 28 Aug 2024 15:06:05 +0000 Subject: [PATCH 444/844] Address review comments --- .../reference/distinctjsonpaths.md | 41 +++++++++++++++++++ .../AggregateFunctionDistinctDynamicTypes.cpp | 2 +- .../AggregateFunctionDistinctJSONPaths.cpp | 8 ++-- src/Columns/ColumnDynamic.cpp | 2 +- src/Columns/ColumnDynamic.h | 2 +- 5 files changed, 48 insertions(+), 7 deletions(-) diff --git a/docs/en/sql-reference/aggregate-functions/reference/distinctjsonpaths.md b/docs/en/sql-reference/aggregate-functions/reference/distinctjsonpaths.md index f916734ca44..d88b2eb024b 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/distinctjsonpaths.md +++ b/docs/en/sql-reference/aggregate-functions/reference/distinctjsonpaths.md @@ -82,3 +82,44 @@ Result: │ {'a':['Int64'],'b':['Array(Nullable(Int64))','String'],'c.d.e':['Date'],'c.d.f':['Array(JSON(max_dynamic_types=16, max_dynamic_paths=256))']} │ └───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘ ``` + +**Note** + +If JSON declaration contains paths with specified types, these paths will be always included in the result of `distinctJSONPaths/distinctJSONPathsAndTypes` functions even if input data didn't have values for these paths. + +```sql +DROP TABLE IF EXISTS test_json; +CREATE TABLE test_json(json JSON(a UInt32)) ENGINE = Memory; +INSERT INTO test_json VALUES ('{"b" : "Hello"}'), ('{"b" : "World", "c" : [1, 2, 3]}'); +``` + +```sql +SELECT json FROM test_json; +``` + +```text +┌─json──────────────────────────────────┐ +│ {"a":0,"b":"Hello"} │ +│ {"a":0,"b":"World","c":["1","2","3"]} │ +└───────────────────────────────────────┘ +``` + +```sql +SELECT distinctJSONPaths(json) FROM test_json; +``` + +```text +┌─distinctJSONPaths(json)─┐ +│ ['a','b','c'] │ +└─────────────────────────┘ +``` + +```sql +SELECT distinctJSONPathsAndTypes(json) FROM test_json; +``` + +```text +┌─distinctJSONPathsAndTypes(json)────────────────────────────────┐ +│ {'a':['UInt32'],'b':['String'],'c':['Array(Nullable(Int64))']} │ +└────────────────────────────────────────────────────────────────┘ +``` \ No newline at end of file diff --git a/src/AggregateFunctions/AggregateFunctionDistinctDynamicTypes.cpp b/src/AggregateFunctions/AggregateFunctionDistinctDynamicTypes.cpp index 17e32b20a99..57f7aecd316 100644 --- a/src/AggregateFunctions/AggregateFunctionDistinctDynamicTypes.cpp +++ b/src/AggregateFunctions/AggregateFunctionDistinctDynamicTypes.cpp @@ -106,7 +106,7 @@ public: /// In this case we can avoid iterating over all rows because we can get all types /// in Dynamic column in a more efficient way. else - assert_cast(*columns[0]).getAllTypeNames(data(place).data); + assert_cast(*columns[0]).getAllTypeNamesInto(data(place).data); } void addManyDefaults( diff --git a/src/AggregateFunctions/AggregateFunctionDistinctJSONPaths.cpp b/src/AggregateFunctions/AggregateFunctionDistinctJSONPaths.cpp index 98996aac2f7..4e60e6fe60b 100644 --- a/src/AggregateFunctions/AggregateFunctionDistinctJSONPaths.cpp +++ b/src/AggregateFunctions/AggregateFunctionDistinctJSONPaths.cpp @@ -146,7 +146,7 @@ struct AggregateFunctionDistinctJSONPathsAndTypesData ReadBufferFromMemory buf(value.data, value.size); auto type = decodeDataType(buf); /// We should not have Nulls here but let's check just in case. - chassert(!isNothingType(type)); + chassert(!isNothing(type)); data[path].insert(type->getName()); } } @@ -160,7 +160,7 @@ struct AggregateFunctionDistinctJSONPathsAndTypesData /// Add dynamic path only if it has at least one non-null value. /// getNumberOfDefaultRows for Dynamic column is O(1). if (dynamic_column->getNumberOfDefaultRows() != dynamic_column->size()) - dynamic_column->getAllTypeNames(data[path]); + dynamic_column->getAllTypeNamesInto(data[path]); } /// Iterate over all paths in shared data and decode the data types. @@ -172,7 +172,7 @@ struct AggregateFunctionDistinctJSONPathsAndTypesData ReadBufferFromMemory buf(value.data, value.size); auto type = decodeDataType(buf); /// We should not have Nulls here but let's check just in case. - chassert(!isNothingType(type)); + chassert(!isNothing(type)); data[path].insert(type->getName()); } } @@ -255,7 +255,7 @@ struct AggregateFunctionDistinctJSONPathsAndTypesData } }; -/// Calculates the list of distinct data types in Dynamic column. +/// Calculates the list of distinct paths or pairs (path, type) in JSON column. template class AggregateFunctionDistinctJSONPathsAndTypes final : public IAggregateFunctionDataHelper> { diff --git a/src/Columns/ColumnDynamic.cpp b/src/Columns/ColumnDynamic.cpp index eb85c6b0d08..269c8455e2f 100644 --- a/src/Columns/ColumnDynamic.cpp +++ b/src/Columns/ColumnDynamic.cpp @@ -997,7 +997,7 @@ String ColumnDynamic::getTypeNameAt(size_t row_num) const return variant_info.variant_names[discr]; } -void ColumnDynamic::getAllTypeNames(std::unordered_set & names) const +void ColumnDynamic::getAllTypeNamesInto(std::unordered_set & names) const { auto shared_variant_discr = getSharedVariantDiscriminator(); for (size_t i = 0; i != variant_info.variant_names.size(); ++i) diff --git a/src/Columns/ColumnDynamic.h b/src/Columns/ColumnDynamic.h index c06c31bb8c9..5789b80a2de 100644 --- a/src/Columns/ColumnDynamic.h +++ b/src/Columns/ColumnDynamic.h @@ -431,7 +431,7 @@ public: const SerializationPtr & getVariantSerialization(const DataTypePtr & variant_type) const { return getVariantSerialization(variant_type, variant_type->getName()); } String getTypeNameAt(size_t row_num) const; - void getAllTypeNames(std::unordered_set & names) const; + void getAllTypeNamesInto(std::unordered_set & names) const; private: void createVariantInfo(const DataTypePtr & variant_type); From 9f7f6c6f931c800fc8126bf0b94b59137638226d Mon Sep 17 00:00:00 2001 From: Arthur Passos Date: Wed, 28 Aug 2024 12:54:22 -0300 Subject: [PATCH 445/844] use curl client for create/drop/alter queries to make test run faster --- .../03174_multiple_authentication_methods.sh | 22 +++++++++---------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/tests/queries/0_stateless/03174_multiple_authentication_methods.sh b/tests/queries/0_stateless/03174_multiple_authentication_methods.sh index d775034bd59..97c20fe6e16 100755 --- a/tests/queries/0_stateless/03174_multiple_authentication_methods.sh +++ b/tests/queries/0_stateless/03174_multiple_authentication_methods.sh @@ -33,14 +33,14 @@ function test { user="u01_03174" - ${CLICKHOUSE_CLIENT} --query "DROP USER IF EXISTS ${user} $1" + ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" -d "DROP USER IF EXISTS ${user} $1" - ${CLICKHOUSE_CLIENT} --query "CREATE USER ${user} $1 IDENTIFIED WITH plaintext_password BY '1'" + ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" -d "CREATE USER ${user} $1 IDENTIFIED WITH plaintext_password BY '1'" echo "Basic authentication after user creation" test_login_pwd ${user} '1' - ${CLICKHOUSE_CLIENT} --query "ALTER USER ${user} $1 IDENTIFIED WITH plaintext_password BY '2'" + ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" -d "ALTER USER ${user} $1 IDENTIFIED WITH plaintext_password BY '2'" echo "Changed password, old password should not work" test_login_pwd_expect_error ${user} '1' @@ -48,7 +48,7 @@ function test echo "New password should work" test_login_pwd ${user} '2' - ${CLICKHOUSE_CLIENT} --query "ALTER USER ${user} $1 ADD IDENTIFIED WITH plaintext_password BY '3', plaintext_password BY '4'" + ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" -d "ALTER USER ${user} $1 ADD IDENTIFIED WITH plaintext_password BY '3', plaintext_password BY '4'" echo "Two new passwords were added, should both work" test_login_pwd ${user} '3' @@ -57,7 +57,7 @@ function test ssh_pub_key="AAAAC3NzaC1lZDI1NTE5AAAAIBzqa3duS0ce6QYkzUgko9W0Ux7i7d3xPoseFrwnhY4Y" - ${CLICKHOUSE_CLIENT} --query "ALTER USER ${user} $1 ADD IDENTIFIED WITH ssh_key BY KEY '${ssh_pub_key}' TYPE 'ssh-ed25519'" + ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" -d "ALTER USER ${user} $1 ADD IDENTIFIED WITH ssh_key BY KEY '${ssh_pub_key}' TYPE 'ssh-ed25519'" echo ${ssh_key} > ssh_key @@ -65,7 +65,7 @@ function test ${CLICKHOUSE_CLIENT} --user ${user} --ssh-key-file 'ssh_key' --ssh-key-passphrase "" --query "SELECT 1" echo "Altering credentials and keeping only bcrypt_password" - ${CLICKHOUSE_CLIENT} --query "ALTER USER ${user} $1 IDENTIFIED WITH bcrypt_password BY '5'" + ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" -d "ALTER USER ${user} $1 IDENTIFIED WITH bcrypt_password BY '5'" echo "Asserting SSH does not work anymore" ${CLICKHOUSE_CLIENT} --user ${user} --ssh-key-file 'ssh_key' --ssh-key-passphrase "" --query "SELECT 1" 2>&1 | grep -m1 -o 'AUTHENTICATION_FAILED' @@ -74,14 +74,14 @@ function test test_login_pwd ${user} '5' echo "Adding new bcrypt_password" - ${CLICKHOUSE_CLIENT} --query "ALTER USER ${user} $1 ADD IDENTIFIED WITH bcrypt_password BY '6'" + ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" -d "ALTER USER ${user} $1 ADD IDENTIFIED WITH bcrypt_password BY '6'" echo "Both current authentication methods should work" test_login_pwd ${user} '5' test_login_pwd ${user} '6' echo "Reset authentication methods to new" - ${CLICKHOUSE_CLIENT} --query "ALTER USER ${user} $1 RESET AUTHENTICATION METHODS TO NEW" + ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" -d "ALTER USER ${user} $1 RESET AUTHENTICATION METHODS TO NEW" echo "Only the latest should work, below should fail" test_login_pwd_expect_error ${user} '5' @@ -90,13 +90,13 @@ function test test_login_pwd ${user} '6' echo "Replacing existing authentication methods in favor of no_password, should succeed" - ${CLICKHOUSE_CLIENT} --query "ALTER USER ${user} $1 IDENTIFIED WITH no_password" - ${CLICKHOUSE_CLIENT} --query "SHOW CREATE USER ${user}" + ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" -d "ALTER USER ${user} $1 IDENTIFIED WITH no_password" + ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" -d "SHOW CREATE USER ${user}" echo "Trying to auth with no pwd, should succeed" test_login_no_pwd ${user} - ${CLICKHOUSE_CLIENT} --query "DROP USER IF EXISTS ${user}" + ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" -d "DROP USER IF EXISTS ${user}" } test "" From 82eae9f09f58959c2d9727dce66092132168de66 Mon Sep 17 00:00:00 2001 From: Yatsishin Ilya <2159081+qoega@users.noreply.github.com> Date: Wed, 28 Aug 2024 16:18:57 +0000 Subject: [PATCH 446/844] tune --- tests/ci/integration_tests_runner.py | 2 +- tests/integration/test_async_load_databases/test.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/ci/integration_tests_runner.py b/tests/ci/integration_tests_runner.py index a7d5a8c4cf6..96e1ddc9bff 100755 --- a/tests/ci/integration_tests_runner.py +++ b/tests/ci/integration_tests_runner.py @@ -33,7 +33,7 @@ CLICKHOUSE_BINARY_PATH = "usr/bin/clickhouse" CLICKHOUSE_ODBC_BRIDGE_BINARY_PATH = "usr/bin/clickhouse-odbc-bridge" CLICKHOUSE_LIBRARY_BRIDGE_BINARY_PATH = "usr/bin/clickhouse-library-bridge" -FLAKY_TRIES_COUNT = 5 # run whole pytest several times +FLAKY_TRIES_COUNT = 3 # run whole pytest several times FLAKY_REPEAT_COUNT = 5 # runs test case in single module several times MAX_TIME_SECONDS = 3600 diff --git a/tests/integration/test_async_load_databases/test.py b/tests/integration/test_async_load_databases/test.py index f36cff76ea2..94aba46c713 100644 --- a/tests/integration/test_async_load_databases/test.py +++ b/tests/integration/test_async_load_databases/test.py @@ -182,4 +182,4 @@ def test_multiple_tables(started_cluster): for i in order: assert query(f"select count() from test.table_{i}") == "100\n" for i in range(tables_count): - query(f"drop table test.table_{i}") + query(f"drop table test.table_{i} sync") From 313b6b533f3ab2059ad07fdbdf4b32d396f19e09 Mon Sep 17 00:00:00 2001 From: Arthur Passos Date: Wed, 28 Aug 2024 13:54:31 -0300 Subject: [PATCH 447/844] further optimize test by using http client instead of clickhouse-client --- .../0_stateless/03174_multiple_authentication_methods.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/03174_multiple_authentication_methods.sh b/tests/queries/0_stateless/03174_multiple_authentication_methods.sh index 97c20fe6e16..41c2f89bc77 100755 --- a/tests/queries/0_stateless/03174_multiple_authentication_methods.sh +++ b/tests/queries/0_stateless/03174_multiple_authentication_methods.sh @@ -16,12 +16,12 @@ ssh_key="-----BEGIN OPENSSH PRIVATE KEY----- function test_login_no_pwd { - ${CLICKHOUSE_CLIENT} --user $1 --query "select 1" + ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&user=$1" -d "select 1" } function test_login_pwd { - ${CLICKHOUSE_CLIENT} --user $1 --password $2 --query "select 1" + ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&user=$1&password=$2" -d "select 1" } function test_login_pwd_expect_error From 7879915493723ba1e1cd46603577f490afa41bbb Mon Sep 17 00:00:00 2001 From: Arthur Passos Date: Wed, 28 Aug 2024 14:01:24 -0300 Subject: [PATCH 448/844] merge test fils now that they are faster --- ..._multiple_authentication_methods.reference | 67 +++++++++++++++++ .../03174_multiple_authentication_methods.sh | 60 ++++++++++++++- ..._authentication_methods_no_login.reference | 6 -- ...ltiple_authentication_methods_no_login.sql | 73 ------------------- 4 files changed, 126 insertions(+), 80 deletions(-) delete mode 100644 tests/queries/0_stateless/03174_multiple_authentication_methods_no_login.reference delete mode 100644 tests/queries/0_stateless/03174_multiple_authentication_methods_no_login.sql diff --git a/tests/queries/0_stateless/03174_multiple_authentication_methods.reference b/tests/queries/0_stateless/03174_multiple_authentication_methods.reference index cc8628c1ac0..759097b3da2 100644 --- a/tests/queries/0_stateless/03174_multiple_authentication_methods.reference +++ b/tests/queries/0_stateless/03174_multiple_authentication_methods.reference @@ -23,10 +23,39 @@ Only the latest should work, below should fail AUTHENTICATION_FAILED Should work 1 +Multiple identified with, not allowed +Syntax error +CREATE Multiple identified with, not allowed +Syntax error +Create user with no identification +Add identified with, should not be allowed because user is currently identified with no_password and it can not co-exist with other auth types +BAD_ARGUMENTS +Try to add no_password mixed with other authentication methods, should not be allowed +SYNTAX_ERROR +Adding no_password, should fail +SYNTAX_ERROR Replacing existing authentication methods in favor of no_password, should succeed CREATE USER u01_03174 IDENTIFIED WITH no_password Trying to auth with no pwd, should succeed 1 +Create user with mix both implicit and explicit auth type, starting with with +CREATE USER u01_03174 IDENTIFIED WITH plaintext_password, sha256_password, bcrypt_password, sha256_password +Create user with mix both implicit and explicit auth type, starting with by +CREATE USER u01_03174 IDENTIFIED WITH sha256_password, plaintext_password, bcrypt_password, sha256_password +Use WITH without providing authentication type, should fail +Syntax error +Create user with ADD identification, should fail, add is not allowed for create query +SYNTAX_ERROR +Trailing comma should result in syntax error +SYNTAX_ERROR +First auth method can't specify type if WITH keyword is not present +SYNTAX_ERROR +RESET AUTHENTICATION METHODS TO NEW can only be used on alter statement +SYNTAX_ERROR +ADD NOT IDENTIFIED should result in syntax error +SYNTAX_ERROR +RESET AUTHENTICATION METHODS TO NEW cannot be used along with [ADD] IDENTIFIED clauses +SYNTAX_ERROR On cluster tests localhost 9000 0 0 0 localhost 9000 0 0 0 @@ -61,8 +90,46 @@ Only the latest should work, below should fail AUTHENTICATION_FAILED Should work 1 +Multiple identified with, not allowed +Syntax error +localhost 9000 0 0 0 +CREATE Multiple identified with, not allowed +Syntax error +localhost 9000 0 0 0 +Create user with no identification +localhost 9000 0 0 0 +Add identified with, should not be allowed because user is currently identified with no_password and it can not co-exist with other auth types +BAD_ARGUMENTS +BAD_ARGUMENTS +Try to add no_password mixed with other authentication methods, should not be allowed +SYNTAX_ERROR +Adding no_password, should fail +SYNTAX_ERROR Replacing existing authentication methods in favor of no_password, should succeed localhost 9000 0 0 0 CREATE USER u01_03174 IDENTIFIED WITH no_password Trying to auth with no pwd, should succeed 1 +localhost 9000 0 0 0 +Create user with mix both implicit and explicit auth type, starting with with +localhost 9000 0 0 0 +CREATE USER u01_03174 IDENTIFIED WITH plaintext_password, sha256_password, bcrypt_password, sha256_password +localhost 9000 0 0 0 +Create user with mix both implicit and explicit auth type, starting with by +localhost 9000 0 0 0 +CREATE USER u01_03174 IDENTIFIED WITH sha256_password, plaintext_password, bcrypt_password, sha256_password +localhost 9000 0 0 0 +Use WITH without providing authentication type, should fail +Syntax error +Create user with ADD identification, should fail, add is not allowed for create query +SYNTAX_ERROR +Trailing comma should result in syntax error +SYNTAX_ERROR +First auth method can't specify type if WITH keyword is not present +SYNTAX_ERROR +RESET AUTHENTICATION METHODS TO NEW can only be used on alter statement +SYNTAX_ERROR +ADD NOT IDENTIFIED should result in syntax error +SYNTAX_ERROR +RESET AUTHENTICATION METHODS TO NEW cannot be used along with [ADD] IDENTIFIED clauses +SYNTAX_ERROR diff --git a/tests/queries/0_stateless/03174_multiple_authentication_methods.sh b/tests/queries/0_stateless/03174_multiple_authentication_methods.sh index 41c2f89bc77..a7969e55a13 100755 --- a/tests/queries/0_stateless/03174_multiple_authentication_methods.sh +++ b/tests/queries/0_stateless/03174_multiple_authentication_methods.sh @@ -89,6 +89,28 @@ function test echo "Should work" test_login_pwd ${user} '6' + echo "Multiple identified with, not allowed" + ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" -d "ALTER USER ${user} $1 IDENTIFIED WITH plaintext_password by '7', IDENTIFIED plaintext_password by '8'" 2>&1 | grep -m1 -o "Syntax error" + + ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" -d "DROP USER ${user} $1" + + echo "CREATE Multiple identified with, not allowed" + ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" -d "CREATE USER ${user} $1 IDENTIFIED WITH plaintext_password by '7', IDENTIFIED WITH plaintext_password by '8'" 2>&1 | grep -m1 -o "Syntax error" + + ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" -d "DROP USER IF EXISTS ${user} $1" + + echo "Create user with no identification" + ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" -d "CREATE USER ${user} $1" + + echo "Add identified with, should not be allowed because user is currently identified with no_password and it can not co-exist with other auth types" + ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" -d "ALTER USER ${user} $1 ADD IDENTIFIED WITH plaintext_password by '7'" 2>&1 | grep -m1 -o "BAD_ARGUMENTS" + + echo "Try to add no_password mixed with other authentication methods, should not be allowed" + ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" -d "ALTER USER ${user} $1 ADD IDENTIFIED WITH plaintext_password by '8', no_password" 2>&1 | grep -m1 -o "SYNTAX_ERROR" + + echo "Adding no_password, should fail" + ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" -d "ALTER USER ${user} $1 ADD IDENTIFIED WITH no_password" 2>&1 | grep -m1 -o "SYNTAX_ERROR" + echo "Replacing existing authentication methods in favor of no_password, should succeed" ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" -d "ALTER USER ${user} $1 IDENTIFIED WITH no_password" ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" -d "SHOW CREATE USER ${user}" @@ -96,11 +118,47 @@ function test echo "Trying to auth with no pwd, should succeed" test_login_no_pwd ${user} + ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" -d "DROP USER IF EXISTS ${user} $1" + + echo "Create user with mix both implicit and explicit auth type, starting with with" + ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" -d "CREATE USER ${user} $1 IDENTIFIED WITH plaintext_password by '1', by '2', bcrypt_password by '3', by '4';" + ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" -d "SHOW CREATE USER ${user}" + + ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" -d "DROP USER IF EXISTS ${user} $1" + + echo "Create user with mix both implicit and explicit auth type, starting with by" + ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" -d "CREATE USER ${user} $1 IDENTIFIED by '1', plaintext_password by '2', bcrypt_password by '3', by '4';" + ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" -d "SHOW CREATE USER ${user}" + + ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" -d "DROP USER IF EXISTS ${user} $1" + + echo "Use WITH without providing authentication type, should fail" + ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" -d "CREATE USER ${user} $1 IDENTIFIED WITH BY '1';" 2>&1 | grep -m1 -o "Syntax error" + + echo "Create user with ADD identification, should fail, add is not allowed for create query" + ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" -d "CREATE USER ${user} $1 ADD IDENTIFIED WITH plaintext_password by '1'" 2>&1 | grep -m1 -o "SYNTAX_ERROR" + + echo "Trailing comma should result in syntax error" + ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" -d "ALTER USER ${user} $1 ADD IDENTIFIED WITH plaintext_password by '1'," 2>&1 | grep -m1 -o "SYNTAX_ERROR" + + echo "First auth method can't specify type if WITH keyword is not present" + ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" -d "CREATE USER ${user} $1 IDENTIFIED plaintext_password by '1'" 2>&1 | grep -m1 -o "SYNTAX_ERROR" + + echo "RESET AUTHENTICATION METHODS TO NEW can only be used on alter statement" + ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" -d "CREATE USER ${user} $1 RESET AUTHENTICATION METHODS TO NEW" 2>&1 | grep -m1 -o "SYNTAX_ERROR" + + echo "ADD NOT IDENTIFIED should result in syntax error" + ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" -d "ALTER USER ${user} $1 ADD NOT IDENTIFIED" 2>&1 | grep -m1 -o "SYNTAX_ERROR" + + echo "RESET AUTHENTICATION METHODS TO NEW cannot be used along with [ADD] IDENTIFIED clauses" + ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" -d "ALTER USER ${user} $1 IDENTIFIED WITH plaintext_password by '1' RESET AUTHENTICATION METHODS TO NEW" 2>&1 | grep -m1 -o "SYNTAX_ERROR" + ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" -d "DROP USER IF EXISTS ${user}" + } test "" echo "On cluster tests" -test "ON CLUSTER test_shard_localhost" +test "ON CLUSTER test_shard_localhost" \ No newline at end of file diff --git a/tests/queries/0_stateless/03174_multiple_authentication_methods_no_login.reference b/tests/queries/0_stateless/03174_multiple_authentication_methods_no_login.reference deleted file mode 100644 index f451567b666..00000000000 --- a/tests/queries/0_stateless/03174_multiple_authentication_methods_no_login.reference +++ /dev/null @@ -1,6 +0,0 @@ -CREATE USER u_03174_no_login IDENTIFIED WITH plaintext_password, sha256_password, bcrypt_password, sha256_password -localhost 9000 0 0 0 -CREATE USER u_03174_no_login IDENTIFIED WITH plaintext_password, sha256_password, bcrypt_password, sha256_password -CREATE USER u_03174_no_login IDENTIFIED WITH sha256_password, plaintext_password, bcrypt_password, sha256_password -localhost 9000 0 0 0 -CREATE USER u_03174_no_login IDENTIFIED WITH sha256_password, plaintext_password, bcrypt_password, sha256_password diff --git a/tests/queries/0_stateless/03174_multiple_authentication_methods_no_login.sql b/tests/queries/0_stateless/03174_multiple_authentication_methods_no_login.sql deleted file mode 100644 index 124241bf6fd..00000000000 --- a/tests/queries/0_stateless/03174_multiple_authentication_methods_no_login.sql +++ /dev/null @@ -1,73 +0,0 @@ --- Tags: no-fasttest, no-parallel, no-replicated-database --- Tag no-replicated-database: https://s3.amazonaws.com/clickhouse-test-reports/65277/43e9a7ba4bbf7f20145531b384a31304895b55bc/stateless_tests__release__old_analyzer__s3__databasereplicated__[1_2].html and https://github.com/ClickHouse/ClickHouse/blob/011c694117845500c82f9563c65930429979982f/tests/queries/0_stateless/01175_distributed_ddl_output_mode_long.sh#L4 - -DROP USER IF EXISTS u_03174_no_login; - -CREATE USER u_03174_no_login; - --- multiple identified with, not allowed -ALTER USER u_03174_no_login IDENTIFIED WITH plaintext_password by '7', IDENTIFIED plaintext_password by '8'; -- { clientError SYNTAX_ERROR } - --- CREATE Multiple identified with, not allowed -CREATE USER u_03174_no_login IDENTIFIED WITH plaintext_password by '7', IDENTIFIED WITH plaintext_password by '8'; -- { clientError SYNTAX_ERROR } - -DROP USER u_03174_no_login; - --- Create user with no identification -CREATE USER u_03174_no_login; - --- Add identified with, should not be allowed because user is currently identified with no_password and it can not co-exist with other auth types -ALTER USER u_03174_no_login ADD IDENTIFIED WITH plaintext_password by '7'; -- { serverError BAD_ARGUMENTS } - --- Try to add no_password mixed with other authentication methods, should not be allowed -ALTER USER u_03174_no_login ADD IDENTIFIED WITH plaintext_password by '8', no_password; -- { clientError SYNTAX_ERROR } - --- Adding no_password, should fail -ALTER USER u_03174_no_login ADD IDENTIFIED WITH no_password; -- { clientError SYNTAX_ERROR } - -DROP USER IF EXISTS u_03174_no_login; - --- Create user with mix both implicit and explicit auth type, starting with with -CREATE USER u_03174_no_login IDENTIFIED WITH plaintext_password by '1', by '2', bcrypt_password by '3', by '4'; -SHOW CREATE USER u_03174_no_login; - -DROP USER IF EXISTS u_03174_no_login; - --- Create user with mix both implicit and explicit auth type, starting with with. On cluster -CREATE USER u_03174_no_login ON CLUSTER test_shard_localhost IDENTIFIED WITH plaintext_password by '1', by '2', bcrypt_password by '3', by '4'; -SHOW CREATE USER u_03174_no_login; - -DROP USER IF EXISTS u_03174_no_login; - --- Create user with mix both implicit and explicit auth type, starting with by -CREATE USER u_03174_no_login IDENTIFIED by '1', plaintext_password by '2', bcrypt_password by '3', by '4'; -SHOW CREATE USER u_03174_no_login; - -DROP USER IF EXISTS u_03174_no_login; - --- Create user with mix both implicit and explicit auth type, starting with by. On cluster -CREATE USER u_03174_no_login ON CLUSTER test_shard_localhost IDENTIFIED by '1', plaintext_password by '2', bcrypt_password by '3', by '4'; -SHOW CREATE USER u_03174_no_login; - -DROP USER IF EXISTS u_03174_no_login; - --- Use WITH without providing authentication type, should fail -CREATE USER u_03174_no_login IDENTIFIED WITH BY '1'; -- { clientError SYNTAX_ERROR } - --- Create user with ADD identification, should fail, add is not allowed for create query -CREATE USER u_03174_no_login ADD IDENTIFIED WITH plaintext_password by '1'; -- { clientError SYNTAX_ERROR } - --- Trailing comma should result in syntax error -ALTER USER u_03174_no_login ADD IDENTIFIED WITH plaintext_password by '1',; -- { clientError SYNTAX_ERROR } - --- First auth method can't specify type if WITH keyword is not present -CREATE USER u_03174_no_login IDENTIFIED plaintext_password by '1'; -- { clientError SYNTAX_ERROR } - --- RESET AUTHENTICATION METHODS TO NEW can only be used on alter statement -CREATE USER u_03174_no_login RESET AUTHENTICATION METHODS TO NEW; -- { clientError SYNTAX_ERROR } - --- ADD NOT IDENTIFIED should result in syntax error -ALTER USER u_03174_no_login ADD NOT IDENTIFIED; -- { clientError SYNTAX_ERROR } - --- RESET AUTHENTICATION METHODS TO NEW cannot be used along with [ADD] IDENTIFIED clauses -ALTER USER u_03174_no_login IDENTIFIED WITH plaintext_password by '1' RESET AUTHENTICATION METHODS TO NEW; -- { clientError SYNTAX_ERROR } From b2795f06dc65d0bb9668f982e6f5db28d886b430 Mon Sep 17 00:00:00 2001 From: Arthur Passos Date: Wed, 28 Aug 2024 14:02:09 -0300 Subject: [PATCH 449/844] add extra line --- .../0_stateless/03174_multiple_authentication_methods.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/03174_multiple_authentication_methods.sh b/tests/queries/0_stateless/03174_multiple_authentication_methods.sh index a7969e55a13..715b99a4334 100755 --- a/tests/queries/0_stateless/03174_multiple_authentication_methods.sh +++ b/tests/queries/0_stateless/03174_multiple_authentication_methods.sh @@ -161,4 +161,4 @@ test "" echo "On cluster tests" -test "ON CLUSTER test_shard_localhost" \ No newline at end of file +test "ON CLUSTER test_shard_localhost" From a690539935c380040afc49fffa16342a97381228 Mon Sep 17 00:00:00 2001 From: Arthur Passos Date: Wed, 28 Aug 2024 18:03:27 -0300 Subject: [PATCH 450/844] fix test --- .../0_stateless/03174_multiple_authentication_methods.reference | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/queries/0_stateless/03174_multiple_authentication_methods.reference b/tests/queries/0_stateless/03174_multiple_authentication_methods.reference index 759097b3da2..c7dbf67d784 100644 --- a/tests/queries/0_stateless/03174_multiple_authentication_methods.reference +++ b/tests/queries/0_stateless/03174_multiple_authentication_methods.reference @@ -100,7 +100,6 @@ Create user with no identification localhost 9000 0 0 0 Add identified with, should not be allowed because user is currently identified with no_password and it can not co-exist with other auth types BAD_ARGUMENTS -BAD_ARGUMENTS Try to add no_password mixed with other authentication methods, should not be allowed SYNTAX_ERROR Adding no_password, should fail From cb6d142947fa4fcd614f298b244c10c79aff0cd0 Mon Sep 17 00:00:00 2001 From: Arthur Passos Date: Wed, 28 Aug 2024 18:09:30 -0300 Subject: [PATCH 451/844] remove non clustered tests to make it faster :D --- ..._multiple_authentication_methods.reference | 59 ------------------- .../03174_multiple_authentication_methods.sh | 4 -- 2 files changed, 63 deletions(-) diff --git a/tests/queries/0_stateless/03174_multiple_authentication_methods.reference b/tests/queries/0_stateless/03174_multiple_authentication_methods.reference index c7dbf67d784..d503b78a2bf 100644 --- a/tests/queries/0_stateless/03174_multiple_authentication_methods.reference +++ b/tests/queries/0_stateless/03174_multiple_authentication_methods.reference @@ -1,62 +1,3 @@ -Basic authentication after user creation -1 -Changed password, old password should not work -AUTHENTICATION_FAILED -New password should work -1 -Two new passwords were added, should both work -1 -1 -Authenticating with ssh key -1 -Altering credentials and keeping only bcrypt_password -Asserting SSH does not work anymore -AUTHENTICATION_FAILED -Asserting bcrypt_password works -1 -Adding new bcrypt_password -Both current authentication methods should work -1 -1 -Reset authentication methods to new -Only the latest should work, below should fail -AUTHENTICATION_FAILED -Should work -1 -Multiple identified with, not allowed -Syntax error -CREATE Multiple identified with, not allowed -Syntax error -Create user with no identification -Add identified with, should not be allowed because user is currently identified with no_password and it can not co-exist with other auth types -BAD_ARGUMENTS -Try to add no_password mixed with other authentication methods, should not be allowed -SYNTAX_ERROR -Adding no_password, should fail -SYNTAX_ERROR -Replacing existing authentication methods in favor of no_password, should succeed -CREATE USER u01_03174 IDENTIFIED WITH no_password -Trying to auth with no pwd, should succeed -1 -Create user with mix both implicit and explicit auth type, starting with with -CREATE USER u01_03174 IDENTIFIED WITH plaintext_password, sha256_password, bcrypt_password, sha256_password -Create user with mix both implicit and explicit auth type, starting with by -CREATE USER u01_03174 IDENTIFIED WITH sha256_password, plaintext_password, bcrypt_password, sha256_password -Use WITH without providing authentication type, should fail -Syntax error -Create user with ADD identification, should fail, add is not allowed for create query -SYNTAX_ERROR -Trailing comma should result in syntax error -SYNTAX_ERROR -First auth method can't specify type if WITH keyword is not present -SYNTAX_ERROR -RESET AUTHENTICATION METHODS TO NEW can only be used on alter statement -SYNTAX_ERROR -ADD NOT IDENTIFIED should result in syntax error -SYNTAX_ERROR -RESET AUTHENTICATION METHODS TO NEW cannot be used along with [ADD] IDENTIFIED clauses -SYNTAX_ERROR -On cluster tests localhost 9000 0 0 0 localhost 9000 0 0 0 Basic authentication after user creation diff --git a/tests/queries/0_stateless/03174_multiple_authentication_methods.sh b/tests/queries/0_stateless/03174_multiple_authentication_methods.sh index 715b99a4334..d0563885892 100755 --- a/tests/queries/0_stateless/03174_multiple_authentication_methods.sh +++ b/tests/queries/0_stateless/03174_multiple_authentication_methods.sh @@ -157,8 +157,4 @@ function test } -test "" - -echo "On cluster tests" - test "ON CLUSTER test_shard_localhost" From 57ba0f0b32ac8d6a092ce7a56b825265fa34dbc1 Mon Sep 17 00:00:00 2001 From: Tuan Pham Anh Date: Thu, 29 Aug 2024 00:31:33 +0000 Subject: [PATCH 452/844] format gtest_Parser.cpp --- src/Parsers/tests/gtest_Parser.cpp | 98 +++++++++++++++--------------- 1 file changed, 49 insertions(+), 49 deletions(-) diff --git a/src/Parsers/tests/gtest_Parser.cpp b/src/Parsers/tests/gtest_Parser.cpp index 1b50d4480d7..47f7a54389b 100644 --- a/src/Parsers/tests/gtest_Parser.cpp +++ b/src/Parsers/tests/gtest_Parser.cpp @@ -225,55 +225,55 @@ INSTANTIATE_TEST_SUITE_P(ParserCreateDatabaseQuery, ParserTest, ::testing::Combine( ::testing::Values(std::make_shared()), ::testing::ValuesIn(std::initializer_list{ - { - "CREATE DATABASE db ENGINE=MaterializeMySQL('addr:port', 'db', 'user', 'pw')", - "CREATE DATABASE db\nENGINE = MaterializeMySQL('addr:port', 'db', 'user', 'pw')" - }, - { - "CREATE DATABASE db ENGINE=MaterializeMySQL('addr:port', 'db', 'user', 'pw') TABLE OVERRIDE `tbl`\n(PARTITION BY toYYYYMM(created))", - "CREATE DATABASE db\nENGINE = MaterializeMySQL('addr:port', 'db', 'user', 'pw')\nTABLE OVERRIDE tbl\n(\n PARTITION BY toYYYYMM(created)\n)" - }, - { - "CREATE DATABASE db ENGINE=Foo TABLE OVERRIDE `tbl` (), TABLE OVERRIDE a (COLUMNS (_created DateTime MATERIALIZED now())), TABLE OVERRIDE b (PARTITION BY rand())", - "CREATE DATABASE db\nENGINE = Foo\nTABLE OVERRIDE tbl\n(\n\n),\nTABLE OVERRIDE a\n(\n COLUMNS\n (\n `_created` DateTime MATERIALIZED now()\n )\n),\nTABLE OVERRIDE b\n(\n PARTITION BY rand()\n)" - }, - { - "CREATE DATABASE db ENGINE=MaterializeMySQL('addr:port', 'db', 'user', 'pw') TABLE OVERRIDE tbl (COLUMNS (id UUID) PARTITION BY toYYYYMM(created))", - "CREATE DATABASE db\nENGINE = MaterializeMySQL('addr:port', 'db', 'user', 'pw')\nTABLE OVERRIDE tbl\n(\n COLUMNS\n (\n `id` UUID\n )\n PARTITION BY toYYYYMM(created)\n)" - }, - { - "CREATE DATABASE db TABLE OVERRIDE tbl (COLUMNS (INDEX foo foo TYPE minmax GRANULARITY 1) PARTITION BY if(_staged = 1, 'staging', toYYYYMM(created)))", - "CREATE DATABASE db\nTABLE OVERRIDE tbl\n(\n COLUMNS\n (\n INDEX foo foo TYPE minmax GRANULARITY 1\n )\n PARTITION BY if(_staged = 1, 'staging', toYYYYMM(created))\n)" - }, - { - "CREATE DATABASE db TABLE OVERRIDE t1 (TTL inserted + INTERVAL 1 MONTH DELETE), TABLE OVERRIDE t2 (TTL `inserted` + INTERVAL 2 MONTH DELETE)", - "CREATE DATABASE db\nTABLE OVERRIDE t1\n(\n TTL inserted + toIntervalMonth(1)\n),\nTABLE OVERRIDE t2\n(\n TTL inserted + toIntervalMonth(2)\n)" - }, - { - "CREATE DATABASE db ENGINE = MaterializeMySQL('127.0.0.1:3306', 'db', 'root', 'pw') SETTINGS allows_query_when_mysql_lost = 1 TABLE OVERRIDE tab3 (COLUMNS (_staged UInt8 MATERIALIZED 1) PARTITION BY (c3) TTL c3 + INTERVAL 10 minute), TABLE OVERRIDE tab5 (PARTITION BY (c3) TTL c3 + INTERVAL 10 minute)", - "CREATE DATABASE db\nENGINE = MaterializeMySQL('127.0.0.1:3306', 'db', 'root', 'pw')\nSETTINGS allows_query_when_mysql_lost = 1\nTABLE OVERRIDE tab3\n(\n COLUMNS\n (\n `_staged` UInt8 MATERIALIZED 1\n )\n PARTITION BY c3\n TTL c3 + toIntervalMinute(10)\n),\nTABLE OVERRIDE tab5\n(\n PARTITION BY c3\n TTL c3 + toIntervalMinute(10)\n)" - }, - { - "CREATE DATABASE db TABLE OVERRIDE tbl (PARTITION BY toYYYYMM(created) COLUMNS (created DateTime CODEC(Delta)))", - "CREATE DATABASE db\nTABLE OVERRIDE tbl\n(\n COLUMNS\n (\n `created` DateTime CODEC(Delta)\n )\n PARTITION BY toYYYYMM(created)\n)" - }, - { - "CREATE DATABASE db ENGINE = Foo() SETTINGS a = 1", - "CREATE DATABASE db\nENGINE = Foo\nSETTINGS a = 1" - }, - { - "CREATE DATABASE db ENGINE = Foo() SETTINGS a = 1, b = 2", - "CREATE DATABASE db\nENGINE = Foo\nSETTINGS a = 1, b = 2" - }, - { - "CREATE DATABASE db ENGINE = Foo() SETTINGS a = 1, b = 2 TABLE OVERRIDE a (ORDER BY (id, version))", - "CREATE DATABASE db\nENGINE = Foo\nSETTINGS a = 1, b = 2\nTABLE OVERRIDE a\n(\n ORDER BY (id, version)\n)" - }, - { - "CREATE DATABASE db ENGINE = Foo() SETTINGS a = 1, b = 2 COMMENT 'db comment' TABLE OVERRIDE a (ORDER BY (id, version))", - "CREATE DATABASE db\nENGINE = Foo\nSETTINGS a = 1, b = 2\nTABLE OVERRIDE a\n(\n ORDER BY (id, version)\n)\nCOMMENT 'db comment'" - } - }))); + { + "CREATE DATABASE db ENGINE=MaterializeMySQL('addr:port', 'db', 'user', 'pw')", + "CREATE DATABASE db\nENGINE = MaterializeMySQL('addr:port', 'db', 'user', 'pw')" + }, + { + "CREATE DATABASE db ENGINE=MaterializeMySQL('addr:port', 'db', 'user', 'pw') TABLE OVERRIDE `tbl`\n(PARTITION BY toYYYYMM(created))", + "CREATE DATABASE db\nENGINE = MaterializeMySQL('addr:port', 'db', 'user', 'pw')\nTABLE OVERRIDE tbl\n(\n PARTITION BY toYYYYMM(created)\n)" + }, + { + "CREATE DATABASE db ENGINE=Foo TABLE OVERRIDE `tbl` (), TABLE OVERRIDE a (COLUMNS (_created DateTime MATERIALIZED now())), TABLE OVERRIDE b (PARTITION BY rand())", + "CREATE DATABASE db\nENGINE = Foo\nTABLE OVERRIDE tbl\n(\n\n),\nTABLE OVERRIDE a\n(\n COLUMNS\n (\n `_created` DateTime MATERIALIZED now()\n )\n),\nTABLE OVERRIDE b\n(\n PARTITION BY rand()\n)" + }, + { + "CREATE DATABASE db ENGINE=MaterializeMySQL('addr:port', 'db', 'user', 'pw') TABLE OVERRIDE tbl (COLUMNS (id UUID) PARTITION BY toYYYYMM(created))", + "CREATE DATABASE db\nENGINE = MaterializeMySQL('addr:port', 'db', 'user', 'pw')\nTABLE OVERRIDE tbl\n(\n COLUMNS\n (\n `id` UUID\n )\n PARTITION BY toYYYYMM(created)\n)" + }, + { + "CREATE DATABASE db TABLE OVERRIDE tbl (COLUMNS (INDEX foo foo TYPE minmax GRANULARITY 1) PARTITION BY if(_staged = 1, 'staging', toYYYYMM(created)))", + "CREATE DATABASE db\nTABLE OVERRIDE tbl\n(\n COLUMNS\n (\n INDEX foo foo TYPE minmax GRANULARITY 1\n )\n PARTITION BY if(_staged = 1, 'staging', toYYYYMM(created))\n)" + }, + { + "CREATE DATABASE db TABLE OVERRIDE t1 (TTL inserted + INTERVAL 1 MONTH DELETE), TABLE OVERRIDE t2 (TTL `inserted` + INTERVAL 2 MONTH DELETE)", + "CREATE DATABASE db\nTABLE OVERRIDE t1\n(\n TTL inserted + toIntervalMonth(1)\n),\nTABLE OVERRIDE t2\n(\n TTL inserted + toIntervalMonth(2)\n)" + }, + { + "CREATE DATABASE db ENGINE = MaterializeMySQL('127.0.0.1:3306', 'db', 'root', 'pw') SETTINGS allows_query_when_mysql_lost = 1 TABLE OVERRIDE tab3 (COLUMNS (_staged UInt8 MATERIALIZED 1) PARTITION BY (c3) TTL c3 + INTERVAL 10 minute), TABLE OVERRIDE tab5 (PARTITION BY (c3) TTL c3 + INTERVAL 10 minute)", + "CREATE DATABASE db\nENGINE = MaterializeMySQL('127.0.0.1:3306', 'db', 'root', 'pw')\nSETTINGS allows_query_when_mysql_lost = 1\nTABLE OVERRIDE tab3\n(\n COLUMNS\n (\n `_staged` UInt8 MATERIALIZED 1\n )\n PARTITION BY c3\n TTL c3 + toIntervalMinute(10)\n),\nTABLE OVERRIDE tab5\n(\n PARTITION BY c3\n TTL c3 + toIntervalMinute(10)\n)" + }, + { + "CREATE DATABASE db TABLE OVERRIDE tbl (PARTITION BY toYYYYMM(created) COLUMNS (created DateTime CODEC(Delta)))", + "CREATE DATABASE db\nTABLE OVERRIDE tbl\n(\n COLUMNS\n (\n `created` DateTime CODEC(Delta)\n )\n PARTITION BY toYYYYMM(created)\n)" + }, + { + "CREATE DATABASE db ENGINE = Foo() SETTINGS a = 1", + "CREATE DATABASE db\nENGINE = Foo\nSETTINGS a = 1" + }, + { + "CREATE DATABASE db ENGINE = Foo() SETTINGS a = 1, b = 2", + "CREATE DATABASE db\nENGINE = Foo\nSETTINGS a = 1, b = 2" + }, + { + "CREATE DATABASE db ENGINE = Foo() SETTINGS a = 1, b = 2 TABLE OVERRIDE a (ORDER BY (id, version))", + "CREATE DATABASE db\nENGINE = Foo\nSETTINGS a = 1, b = 2\nTABLE OVERRIDE a\n(\n ORDER BY (id, version)\n)" + }, + { + "CREATE DATABASE db ENGINE = Foo() SETTINGS a = 1, b = 2 COMMENT 'db comment' TABLE OVERRIDE a (ORDER BY (id, version))", + "CREATE DATABASE db\nENGINE = Foo\nSETTINGS a = 1, b = 2\nTABLE OVERRIDE a\n(\n ORDER BY (id, version)\n)\nCOMMENT 'db comment'" + } +}))); INSTANTIATE_TEST_SUITE_P(ParserCreateUserQuery, ParserTest, ::testing::Combine( From 7d56c8bd43c70f396d4a1e0aa0c8887ca4ee94b9 Mon Sep 17 00:00:00 2001 From: "baolin.hbl" Date: Tue, 20 Aug 2024 03:28:26 +0000 Subject: [PATCH 453/844] Avoid detached covered-by-broken part duplicates Problem: When a broken part is found during the startup, it will clone the parts which are covered by the broken part, to the detached directory (with the 'covered-by-broken' prefix). A part may be covered by multiple merged parts, which will result in multiple clones, which leads to path conflicts and further attempts to clone to the try-n directory. If n exceeds 9, the clone is abandoned and the table is marked as read-only. pull#41981 tried fixed the problem, but the fix is incomplete. The metadata_version.txt file is deleted during covered-by-broken clone. As a result, looksLikeBrokenDetachedPartHasTheSameContent finds differences during part comparison. Fix: covered-by-broken retain metadata_version.txt file when cloning --- src/Storages/MergeTree/IMergeTreeDataPart.cpp | 1 + .../test_covered_by_broken_exists/__init__.py | 0 .../test_covered_by_broken_exists/test.py | 103 ++++++++++++++++++ 3 files changed, 104 insertions(+) create mode 100644 tests/integration/test_covered_by_broken_exists/__init__.py create mode 100644 tests/integration/test_covered_by_broken_exists/test.py diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp index 195aa4fdc10..22bb188f74a 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp +++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp @@ -2063,6 +2063,7 @@ DataPartStoragePtr IMergeTreeDataPart::makeCloneInDetached(const String & prefix IDataPartStorage::ClonePartParams params { .copy_instead_of_hardlink = isStoredOnRemoteDiskWithZeroCopySupport() && storage.supportsReplication() && storage_settings->allow_remote_fs_zero_copy_replication, + .keep_metadata_version = prefix == "covered-by-broken", .make_source_readonly = true, .external_transaction = disk_transaction }; diff --git a/tests/integration/test_covered_by_broken_exists/__init__.py b/tests/integration/test_covered_by_broken_exists/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_covered_by_broken_exists/test.py b/tests/integration/test_covered_by_broken_exists/test.py new file mode 100644 index 00000000000..b6d1f55f133 --- /dev/null +++ b/tests/integration/test_covered_by_broken_exists/test.py @@ -0,0 +1,103 @@ +import pytest +import logging +import time +from helpers.cluster import ClickHouseCluster +from helpers.test_tools import TSV +from helpers.test_tools import assert_eq_with_retry + +cluster = ClickHouseCluster(__file__) +node1 = cluster.add_instance("node1", stay_alive=True, with_zookeeper=True) +node2 = cluster.add_instance("node2", with_zookeeper=True) + +instance = node1 +q = node1.query + +path_to_data = "/var/lib/clickhouse/" + + +@pytest.fixture(scope="module") +def started_cluster(): + try: + cluster.start() + yield cluster + finally: + cluster.shutdown() + + +def wait_merged_part(table, part_name, retries=100): + q("OPTIMIZE TABLE {} FINAL".format(table)) + for i in range(retries): + result = q( + "SELECT name FROM system.parts where table='{}' AND name='{}'".format( + table, part_name + ) + ) + if result: + return True + time.sleep(0.5) + else: + return False + + +def test_make_clone_covered_by_broken_detached_dir_exists(started_cluster): + q("DROP TABLE IF EXISTS test_make_clone_cvbdde") + + q( + "CREATE TABLE test_make_clone_cvbdde(n int, m String) ENGINE=ReplicatedMergeTree('/test_make_clone_cvbdde', '1') ORDER BY n SETTINGS old_parts_lifetime=3600, min_age_to_force_merge_seconds=1, min_age_to_force_merge_on_partition_only=0" + ) + path = path_to_data + "data/default/test_make_clone_cvbdde/" + + q("INSERT INTO test_make_clone_cvbdde VALUES (0, 'hbl')") + + q("INSERT INTO test_make_clone_cvbdde VALUES (1, 'hbl')") + if not (wait_merged_part("test_make_clone_cvbdde", "all_0_1_1")): + assert False, "Part all_0_1_1 doesn't appeared in system.parts" + + q("INSERT INTO test_make_clone_cvbdde VALUES (2, 'hbl')") + if not (wait_merged_part("test_make_clone_cvbdde", "all_0_2_2")): + assert False, "Part all_0_2_2 doesn't appeared in system.parts" + + q("INSERT INTO test_make_clone_cvbdde VALUES (3, 'hbl')") + if not (wait_merged_part("test_make_clone_cvbdde", "all_0_3_3")): + assert False, "Part all_0_3_3 doesn't appeared in system.parts" + + res = str(instance.exec_in_container(["ls", path]).strip().split("\n")) + + # broke the merged parts + instance.exec_in_container( + [ + "bash", + "-c", + "echo 'broken' > {}".format(path + "all_0_1_1/data.bin"), + ] + ) + + instance.exec_in_container( + [ + "bash", + "-c", + "echo 'broken' > {}".format(path + "all_0_2_2/data.bin"), + ] + ) + + instance.exec_in_container( + [ + "bash", + "-c", + "echo 'broken' > {}".format(path + "all_0_3_3/data.bin"), + ] + ) + + instance.restart_clickhouse(kill=True) + + assert [ + "broken-on-start_all_0_1_1", + "broken-on-start_all_0_2_2", + "broken-on-start_all_0_3_3", + "covered-by-broken_all_0_0_0", + "covered-by-broken_all_1_1_0", + "covered-by-broken_all_2_2_0", + "covered-by-broken_all_3_3_0", + ] == sorted( + instance.exec_in_container(["ls", path + "detached/"]).strip().split("\n") + ) From d88aa3952d34eeecbca363a1a66df111c5b3e587 Mon Sep 17 00:00:00 2001 From: flynn Date: Thu, 29 Aug 2024 10:56:26 +0000 Subject: [PATCH 454/844] Disable alter table add vector similarity index if setting does not enabled --- src/Storages/AlterCommands.cpp | 10 ++++++++++ src/Storages/AlterCommands.h | 2 ++ src/Storages/MergeTree/MergeTreeData.cpp | 5 +++++ ...rbid_add_vector_similarity_index.reference | 0 ...231_forbid_add_vector_similarity_index.sql | 19 +++++++++++++++++++ 5 files changed, 36 insertions(+) create mode 100644 tests/queries/0_stateless/03231_forbid_add_vector_similarity_index.reference create mode 100644 tests/queries/0_stateless/03231_forbid_add_vector_similarity_index.sql diff --git a/src/Storages/AlterCommands.cpp b/src/Storages/AlterCommands.cpp index d92d8b59f6e..e0563f9f1c6 100644 --- a/src/Storages/AlterCommands.cpp +++ b/src/Storages/AlterCommands.cpp @@ -1142,6 +1142,16 @@ bool AlterCommands::hasFullTextIndex(const StorageInMemoryMetadata & metadata) return false; } +bool AlterCommands::hasVectorSimilarityIndex(const StorageInMemoryMetadata & metadata) +{ + for (const auto & index : metadata.secondary_indices) + { + if (index.type == "vector_similarity") + return true; + } + return false; +} + void AlterCommands::apply(StorageInMemoryMetadata & metadata, ContextPtr context) const { if (!prepared) diff --git a/src/Storages/AlterCommands.h b/src/Storages/AlterCommands.h index a91bac10214..b2f0f9a6abd 100644 --- a/src/Storages/AlterCommands.h +++ b/src/Storages/AlterCommands.h @@ -237,6 +237,8 @@ public: /// Check if commands have any full-text index static bool hasFullTextIndex(const StorageInMemoryMetadata & metadata); + + static bool hasVectorSimilarityIndex(const StorageInMemoryMetadata & metadata); }; } diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 94f6d196b99..8b12330c1a4 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -3230,6 +3230,11 @@ void MergeTreeData::checkAlterIsPossible(const AlterCommands & commands, Context throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "Experimental full-text index feature is not enabled (turn on setting 'allow_experimental_full_text_index')"); + if (AlterCommands::hasVectorSimilarityIndex(new_metadata) && !settings.allow_experimental_vector_similarity_index) + throw Exception( + ErrorCodes::SUPPORT_IS_DISABLED, + "Experimental vector_similarity index feature is not enabled (turn on setting 'allow_experimental_vector_similarity_index')"); + for (const auto & disk : getDisks()) if (!disk->supportsHardLinks() && !commands.isSettingsAlter() && !commands.isCommentAlter()) throw Exception( diff --git a/tests/queries/0_stateless/03231_forbid_add_vector_similarity_index.reference b/tests/queries/0_stateless/03231_forbid_add_vector_similarity_index.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/03231_forbid_add_vector_similarity_index.sql b/tests/queries/0_stateless/03231_forbid_add_vector_similarity_index.sql new file mode 100644 index 00000000000..a1e362e7bd1 --- /dev/null +++ b/tests/queries/0_stateless/03231_forbid_add_vector_similarity_index.sql @@ -0,0 +1,19 @@ +DROP TABLE IF EXISTS test_embedding; + +CREATE TABLE test_embedding +( + id UInt32, + embedding Array(Float32), +) +ENGINE = MergeTree +ORDER BY tuple(); + +SET allow_experimental_vector_similarity_index = 0; + +alter table test_embedding add INDEX idx embedding TYPE vector_similarity('hnsw', 'cosineDistance'); -- { serverError SUPPORT_IS_DISABLED } + +SET allow_experimental_vector_similarity_index = 1; + +alter table test_embedding add INDEX idx embedding TYPE vector_similarity('hnsw', 'cosineDistance'); + +DROP TABLE test_embedding; From d533c24321a8a1c8d7a937bbce6756e91a2230b8 Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Thu, 29 Aug 2024 13:14:43 +0200 Subject: [PATCH 455/844] Reduce test size --- .../0_stateless/03227_distinct_dynamic_types_json_paths.sql | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/03227_distinct_dynamic_types_json_paths.sql b/tests/queries/0_stateless/03227_distinct_dynamic_types_json_paths.sql index 6930f5a3d44..3af911e27fa 100644 --- a/tests/queries/0_stateless/03227_distinct_dynamic_types_json_paths.sql +++ b/tests/queries/0_stateless/03227_distinct_dynamic_types_json_paths.sql @@ -4,10 +4,11 @@ set allow_experimental_dynamic_type = 1; set allow_experimental_json_type = 1; set allow_experimental_variant_type = 1; set use_variant_as_common_type = 1; +set max_block_size = 10000; drop table if exists test_json_dynamic_aggregate_functions; create table test_json_dynamic_aggregate_functions (json JSON(a1 String, max_dynamic_paths=2, max_dynamic_types=2)) engine=Memory; -insert into test_json_dynamic_aggregate_functions select toJSONString(map('a' || number % 13, multiIf(number % 5 == 0, NULL, number % 5 == 1, number::UInt32, number % 5 == 2, 'str_' || number, number % 5 == 3, range(number % 5), toBool(number % 2)))) from numbers(200000); +insert into test_json_dynamic_aggregate_functions select toJSONString(map('a' || number % 13, multiIf(number % 5 == 0, NULL, number % 5 == 1, number::UInt32, number % 5 == 2, 'str_' || number, number % 5 == 3, range(number % 5), toBool(number % 2)))) from numbers(100000); select arrayJoin(distinctJSONPaths(json)) from test_json_dynamic_aggregate_functions; select arrayJoin(distinctJSONPathsAndTypes(json)) from test_json_dynamic_aggregate_functions; select arrayJoin(distinctDynamicTypes(json.a2)) from test_json_dynamic_aggregate_functions; From b6a6d9315217a783f61671e9a0c0ef997e1cd7e6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A1nos=20Benjamin=20Antal?= Date: Thu, 29 Aug 2024 11:28:56 +0000 Subject: [PATCH 456/844] Remove completely invalid queries from test --- ...lter_no_properties_before_remove_modify_reset.sql | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/tests/queries/0_stateless/03231_alter_no_properties_before_remove_modify_reset.sql b/tests/queries/0_stateless/03231_alter_no_properties_before_remove_modify_reset.sql index 13ad11bb139..0b98c605ccf 100644 --- a/tests/queries/0_stateless/03231_alter_no_properties_before_remove_modify_reset.sql +++ b/tests/queries/0_stateless/03231_alter_no_properties_before_remove_modify_reset.sql @@ -4,7 +4,6 @@ CREATE TABLE a (x Int64, y Int64 MATERIALIZED 1 SETTINGS (max_compress_block_siz SELECT 'REMOVE'; ALTER TABLE a MODIFY COLUMN y Int64 REMOVE MATERIALIZED; -- { clientError SYNTAX_ERROR } -ALTER TABLE a MODIFY COLUMN y NOT NULL REMOVE MATERIALIZED; -- { clientError SYNTAX_ERROR } ALTER TABLE a MODIFY COLUMN y DEFAULT 2 REMOVE MATERIALIZED; -- { clientError SYNTAX_ERROR } ALTER TABLE a MODIFY COLUMN y MATERIALIZED 3 REMOVE MATERIALIZED; -- { clientError SYNTAX_ERROR } ALTER TABLE a MODIFY COLUMN y EPHEMERAL 4 REMOVE MATERIALIZED; -- { clientError SYNTAX_ERROR } @@ -17,7 +16,6 @@ ALTER TABLE a MODIFY COLUMN y SETTINGS (max_compress_block_size = 20000) REMOVE ALTER TABLE a MODIFY COLUMN y PRIMARY KEY REMOVE MATERIALIZED; -- { clientError SYNTAX_ERROR } SELECT 'The same, but with type'; -ALTER TABLE a MODIFY COLUMN y Int64 NOT NULL REMOVE MATERIALIZED; -- { clientError SYNTAX_ERROR } ALTER TABLE a MODIFY COLUMN y Int64 DEFAULT 2 REMOVE MATERIALIZED; -- { clientError SYNTAX_ERROR } ALTER TABLE a MODIFY COLUMN y Int64 MATERIALIZED 3 REMOVE MATERIALIZED; -- { clientError SYNTAX_ERROR } ALTER TABLE a MODIFY COLUMN y Int64 EPHEMERAL 4 REMOVE MATERIALIZED; -- { clientError SYNTAX_ERROR } @@ -31,7 +29,6 @@ ALTER TABLE a MODIFY COLUMN y Int64 PRIMARY KEY REMOVE MATERIALIZED; -- { client SELECT 'MODIFY SETTING'; ALTER TABLE a MODIFY COLUMN y Int64 MODIFY SETTING max_compress_block_size = 20000; -- { clientError SYNTAX_ERROR } -ALTER TABLE a MODIFY COLUMN y NOT NULL MODIFY SETTING max_compress_block_size = 20000; -- { clientError SYNTAX_ERROR } ALTER TABLE a MODIFY COLUMN y DEFAULT 2 MODIFY SETTING max_compress_block_size = 20000; -- { clientError SYNTAX_ERROR } ALTER TABLE a MODIFY COLUMN y MATERIALIZED 3 MODIFY SETTING max_compress_block_size = 20000; -- { clientError SYNTAX_ERROR } ALTER TABLE a MODIFY COLUMN y EPHEMERAL 4 MODIFY SETTING max_compress_block_size = 20000; -- { clientError SYNTAX_ERROR } @@ -44,7 +41,6 @@ ALTER TABLE a MODIFY COLUMN y SETTINGS (some_setting = 2) MODIFY SETTING max_com ALTER TABLE a MODIFY COLUMN y PRIMARY KEY MODIFY SETTING max_compress_block_size = 20000; -- { clientError SYNTAX_ERROR } SELECT 'The same, but with type'; -ALTER TABLE a MODIFY COLUMN y Int64 NOT NULL MODIFY SETTING max_compress_block_size = 20000; -- { clientError SYNTAX_ERROR } ALTER TABLE a MODIFY COLUMN y Int64 DEFAULT 2 MODIFY SETTING max_compress_block_size = 20000; -- { clientError SYNTAX_ERROR } ALTER TABLE a MODIFY COLUMN y Int64 MATERIALIZED 3 MODIFY SETTING max_compress_block_size = 20000; -- { clientError SYNTAX_ERROR } ALTER TABLE a MODIFY COLUMN y Int64 EPHEMERAL 4 MODIFY SETTING max_compress_block_size = 20000; -- { clientError SYNTAX_ERROR } @@ -58,7 +54,6 @@ ALTER TABLE a MODIFY COLUMN y Int64 PRIMARY KEY MODIFY SETTING max_compress_bloc SELECT 'RESET SETTING'; ALTER TABLE a MODIFY COLUMN y Int64 RESET SETTING max_compress_block_size; -- { clientError SYNTAX_ERROR } -ALTER TABLE a MODIFY COLUMN y NOT NULL RESET SETTING max_compress_block_size; -- { clientError SYNTAX_ERROR } ALTER TABLE a MODIFY COLUMN y DEFAULT 2 RESET SETTING max_compress_block_size; -- { clientError SYNTAX_ERROR } ALTER TABLE a MODIFY COLUMN y MATERIALIZED 3 RESET SETTING max_compress_block_size; -- { clientError SYNTAX_ERROR } ALTER TABLE a MODIFY COLUMN y EPHEMERAL 4 RESET SETTING max_compress_block_size; -- { clientError SYNTAX_ERROR } @@ -71,7 +66,6 @@ ALTER TABLE a MODIFY COLUMN y SETTINGS (some_setting = 2) RESET SETTING max_comp ALTER TABLE a MODIFY COLUMN y PRIMARY KEY RESET SETTING max_compress_block_size; -- { clientError SYNTAX_ERROR } SELECT 'The same, but with type'; -ALTER TABLE a MODIFY COLUMN y Int64 NOT NULL RESET SETTING max_compress_block_size; -- { clientError SYNTAX_ERROR } ALTER TABLE a MODIFY COLUMN y Int64 DEFAULT 2 RESET SETTING max_compress_block_size; -- { clientError SYNTAX_ERROR } ALTER TABLE a MODIFY COLUMN y Int64 MATERIALIZED 3 RESET SETTING max_compress_block_size; -- { clientError SYNTAX_ERROR } ALTER TABLE a MODIFY COLUMN y Int64 EPHEMERAL 4 RESET SETTING max_compress_block_size; -- { clientError SYNTAX_ERROR } @@ -89,7 +83,6 @@ SELECT 'All the above, but on server side'; SELECT 'REMOVE'; SELECT formatQuery('ALTER TABLE a MODIFY COLUMN y Int64 REMOVE MATERIALIZED'); -- { serverError SYNTAX_ERROR } -SELECT formatQuery('ALTER TABLE a MODIFY COLUMN y NOT NULL REMOVE MATERIALIZED'); -- { serverError SYNTAX_ERROR } SELECT formatQuery('ALTER TABLE a MODIFY COLUMN y DEFAULT 2 REMOVE MATERIALIZED'); -- { serverError SYNTAX_ERROR } SELECT formatQuery('ALTER TABLE a MODIFY COLUMN y MATERIALIZED 3 REMOVE MATERIALIZED'); -- { serverError SYNTAX_ERROR } SELECT formatQuery('ALTER TABLE a MODIFY COLUMN y EPHEMERAL 4 REMOVE MATERIALIZED'); -- { serverError SYNTAX_ERROR } @@ -102,7 +95,6 @@ SELECT formatQuery('ALTER TABLE a MODIFY COLUMN y SETTINGS (max_compress_block_s SELECT formatQuery('ALTER TABLE a MODIFY COLUMN y PRIMARY KEY REMOVE MATERIALIZED'); -- { serverError SYNTAX_ERROR } SELECT 'The same, but with type'; -SELECT formatQuery('ALTER TABLE a MODIFY COLUMN y Int64 NOT NULL REMOVE MATERIALIZED'); -- { serverError SYNTAX_ERROR } SELECT formatQuery('ALTER TABLE a MODIFY COLUMN y Int64 DEFAULT 2 REMOVE MATERIALIZED'); -- { serverError SYNTAX_ERROR } SELECT formatQuery('ALTER TABLE a MODIFY COLUMN y Int64 MATERIALIZED 3 REMOVE MATERIALIZED'); -- { serverError SYNTAX_ERROR } SELECT formatQuery('ALTER TABLE a MODIFY COLUMN y Int64 EPHEMERAL 4 REMOVE MATERIALIZED'); -- { serverError SYNTAX_ERROR } @@ -116,7 +108,6 @@ SELECT formatQuery('ALTER TABLE a MODIFY COLUMN y Int64 PRIMARY KEY REMOVE MATER SELECT 'MODIFY SETTING'; SELECT formatQuery('ALTER TABLE a MODIFY COLUMN y Int64 MODIFY SETTING max_compress_block_size = 20000'); -- { serverError SYNTAX_ERROR } -SELECT formatQuery('ALTER TABLE a MODIFY COLUMN y NOT NULL MODIFY SETTING max_compress_block_size = 20000'); -- { serverError SYNTAX_ERROR } SELECT formatQuery('ALTER TABLE a MODIFY COLUMN y DEFAULT 2 MODIFY SETTING max_compress_block_size = 20000'); -- { serverError SYNTAX_ERROR } SELECT formatQuery('ALTER TABLE a MODIFY COLUMN y MATERIALIZED 3 MODIFY SETTING max_compress_block_size = 20000'); -- { serverError SYNTAX_ERROR } SELECT formatQuery('ALTER TABLE a MODIFY COLUMN y EPHEMERAL 4 MODIFY SETTING max_compress_block_size = 20000'); -- { serverError SYNTAX_ERROR } @@ -129,7 +120,6 @@ SELECT formatQuery('ALTER TABLE a MODIFY COLUMN y SETTINGS (some_setting = 2) MO SELECT formatQuery('ALTER TABLE a MODIFY COLUMN y PRIMARY KEY MODIFY SETTING max_compress_block_size = 20000'); -- { serverError SYNTAX_ERROR } SELECT 'The same, but with type'; -SELECT formatQuery('ALTER TABLE a MODIFY COLUMN y Int64 NOT NULL MODIFY SETTING max_compress_block_size = 20000'); -- { serverError SYNTAX_ERROR } SELECT formatQuery('ALTER TABLE a MODIFY COLUMN y Int64 DEFAULT 2 MODIFY SETTING max_compress_block_size = 20000'); -- { serverError SYNTAX_ERROR } SELECT formatQuery('ALTER TABLE a MODIFY COLUMN y Int64 MATERIALIZED 3 MODIFY SETTING max_compress_block_size = 20000'); -- { serverError SYNTAX_ERROR } SELECT formatQuery('ALTER TABLE a MODIFY COLUMN y Int64 EPHEMERAL 4 MODIFY SETTING max_compress_block_size = 20000'); -- { serverError SYNTAX_ERROR } @@ -143,7 +133,6 @@ SELECT formatQuery('ALTER TABLE a MODIFY COLUMN y Int64 PRIMARY KEY MODIFY SETTI SELECT 'RESET SETTING'; SELECT formatQuery('ALTER TABLE a MODIFY COLUMN y Int64 RESET SETTING max_compress_block_size'); -- { serverError SYNTAX_ERROR } -SELECT formatQuery('ALTER TABLE a MODIFY COLUMN y NOT NULL RESET SETTING max_compress_block_size'); -- { serverError SYNTAX_ERROR } SELECT formatQuery('ALTER TABLE a MODIFY COLUMN y DEFAULT 2 RESET SETTING max_compress_block_size'); -- { serverError SYNTAX_ERROR } SELECT formatQuery('ALTER TABLE a MODIFY COLUMN y MATERIALIZED 3 RESET SETTING max_compress_block_size'); -- { serverError SYNTAX_ERROR } SELECT formatQuery('ALTER TABLE a MODIFY COLUMN y EPHEMERAL 4 RESET SETTING max_compress_block_size'); -- { serverError SYNTAX_ERROR } @@ -156,7 +145,6 @@ SELECT formatQuery('ALTER TABLE a MODIFY COLUMN y SETTINGS (some_setting = 2) RE SELECT formatQuery('ALTER TABLE a MODIFY COLUMN y PRIMARY KEY RESET SETTING max_compress_block_size'); -- { serverError SYNTAX_ERROR } SELECT 'The same, but with type'; -SELECT formatQuery('ALTER TABLE a MODIFY COLUMN y Int64 NOT NULL RESET SETTING max_compress_block_size'); -- { serverError SYNTAX_ERROR } SELECT formatQuery('ALTER TABLE a MODIFY COLUMN y Int64 DEFAULT 2 RESET SETTING max_compress_block_size'); -- { serverError SYNTAX_ERROR } SELECT formatQuery('ALTER TABLE a MODIFY COLUMN y Int64 MATERIALIZED 3 RESET SETTING max_compress_block_size'); -- { serverError SYNTAX_ERROR } SELECT formatQuery('ALTER TABLE a MODIFY COLUMN y Int64 EPHEMERAL 4 RESET SETTING max_compress_block_size'); -- { serverError SYNTAX_ERROR } From 0400dcb03eefdb6604d04f5ed3c70c179032f84d Mon Sep 17 00:00:00 2001 From: flynn Date: Thu, 29 Aug 2024 12:34:58 +0000 Subject: [PATCH 457/844] no-fastest --- .../0_stateless/03231_forbid_add_vector_similarity_index.sql | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/queries/0_stateless/03231_forbid_add_vector_similarity_index.sql b/tests/queries/0_stateless/03231_forbid_add_vector_similarity_index.sql index a1e362e7bd1..e91d7c71eac 100644 --- a/tests/queries/0_stateless/03231_forbid_add_vector_similarity_index.sql +++ b/tests/queries/0_stateless/03231_forbid_add_vector_similarity_index.sql @@ -1,3 +1,5 @@ +-- Tags: no-fasttest + DROP TABLE IF EXISTS test_embedding; CREATE TABLE test_embedding From 7000f214abbb1937423e8282f08bba8431249901 Mon Sep 17 00:00:00 2001 From: Arthur Passos Date: Thu, 29 Aug 2024 09:47:18 -0300 Subject: [PATCH 458/844] add head -n 1 to make sure only on occurrence of error code is grepped --- .../03174_multiple_authentication_methods.sh | 28 +++++++++---------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/tests/queries/0_stateless/03174_multiple_authentication_methods.sh b/tests/queries/0_stateless/03174_multiple_authentication_methods.sh index d0563885892..cd6cb0807f3 100755 --- a/tests/queries/0_stateless/03174_multiple_authentication_methods.sh +++ b/tests/queries/0_stateless/03174_multiple_authentication_methods.sh @@ -26,7 +26,7 @@ function test_login_pwd function test_login_pwd_expect_error { - test_login_pwd "$1" "$2" 2>&1 | grep -m1 -o 'AUTHENTICATION_FAILED' + test_login_pwd "$1" "$2" 2>&1 | grep -m1 -o 'AUTHENTICATION_FAILED' | head -n 1 } function test @@ -68,7 +68,7 @@ function test ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" -d "ALTER USER ${user} $1 IDENTIFIED WITH bcrypt_password BY '5'" echo "Asserting SSH does not work anymore" - ${CLICKHOUSE_CLIENT} --user ${user} --ssh-key-file 'ssh_key' --ssh-key-passphrase "" --query "SELECT 1" 2>&1 | grep -m1 -o 'AUTHENTICATION_FAILED' + ${CLICKHOUSE_CLIENT} --user ${user} --ssh-key-file 'ssh_key' --ssh-key-passphrase "" --query "SELECT 1" 2>&1 | grep -m1 -o 'AUTHENTICATION_FAILED' | head -n 1 echo "Asserting bcrypt_password works" test_login_pwd ${user} '5' @@ -90,12 +90,12 @@ function test test_login_pwd ${user} '6' echo "Multiple identified with, not allowed" - ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" -d "ALTER USER ${user} $1 IDENTIFIED WITH plaintext_password by '7', IDENTIFIED plaintext_password by '8'" 2>&1 | grep -m1 -o "Syntax error" + ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" -d "ALTER USER ${user} $1 IDENTIFIED WITH plaintext_password by '7', IDENTIFIED plaintext_password by '8'" 2>&1 | grep -m1 -o "Syntax error" | head -n 1 ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" -d "DROP USER ${user} $1" echo "CREATE Multiple identified with, not allowed" - ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" -d "CREATE USER ${user} $1 IDENTIFIED WITH plaintext_password by '7', IDENTIFIED WITH plaintext_password by '8'" 2>&1 | grep -m1 -o "Syntax error" + ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" -d "CREATE USER ${user} $1 IDENTIFIED WITH plaintext_password by '7', IDENTIFIED WITH plaintext_password by '8'" 2>&1 | grep -m1 -o "Syntax error" | head -n 1 ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" -d "DROP USER IF EXISTS ${user} $1" @@ -103,13 +103,13 @@ function test ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" -d "CREATE USER ${user} $1" echo "Add identified with, should not be allowed because user is currently identified with no_password and it can not co-exist with other auth types" - ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" -d "ALTER USER ${user} $1 ADD IDENTIFIED WITH plaintext_password by '7'" 2>&1 | grep -m1 -o "BAD_ARGUMENTS" + ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" -d "ALTER USER ${user} $1 ADD IDENTIFIED WITH plaintext_password by '7'" 2>&1 | grep -m1 -o "BAD_ARGUMENTS" | head -n 1 echo "Try to add no_password mixed with other authentication methods, should not be allowed" - ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" -d "ALTER USER ${user} $1 ADD IDENTIFIED WITH plaintext_password by '8', no_password" 2>&1 | grep -m1 -o "SYNTAX_ERROR" + ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" -d "ALTER USER ${user} $1 ADD IDENTIFIED WITH plaintext_password by '8', no_password" 2>&1 | grep -m1 -o "SYNTAX_ERROR" | head -n 1 echo "Adding no_password, should fail" - ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" -d "ALTER USER ${user} $1 ADD IDENTIFIED WITH no_password" 2>&1 | grep -m1 -o "SYNTAX_ERROR" + ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" -d "ALTER USER ${user} $1 ADD IDENTIFIED WITH no_password" 2>&1 | grep -m1 -o "SYNTAX_ERROR" | head -n 1 echo "Replacing existing authentication methods in favor of no_password, should succeed" ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" -d "ALTER USER ${user} $1 IDENTIFIED WITH no_password" @@ -133,25 +133,25 @@ function test ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" -d "DROP USER IF EXISTS ${user} $1" echo "Use WITH without providing authentication type, should fail" - ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" -d "CREATE USER ${user} $1 IDENTIFIED WITH BY '1';" 2>&1 | grep -m1 -o "Syntax error" + ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" -d "CREATE USER ${user} $1 IDENTIFIED WITH BY '1';" 2>&1 | grep -m1 -o "Syntax error" | head -n 1 echo "Create user with ADD identification, should fail, add is not allowed for create query" - ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" -d "CREATE USER ${user} $1 ADD IDENTIFIED WITH plaintext_password by '1'" 2>&1 | grep -m1 -o "SYNTAX_ERROR" + ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" -d "CREATE USER ${user} $1 ADD IDENTIFIED WITH plaintext_password by '1'" 2>&1 | grep -m1 -o "SYNTAX_ERROR" | head -n 1 echo "Trailing comma should result in syntax error" - ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" -d "ALTER USER ${user} $1 ADD IDENTIFIED WITH plaintext_password by '1'," 2>&1 | grep -m1 -o "SYNTAX_ERROR" + ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" -d "ALTER USER ${user} $1 ADD IDENTIFIED WITH plaintext_password by '1'," 2>&1 | grep -m1 -o "SYNTAX_ERROR" | head -n 1 echo "First auth method can't specify type if WITH keyword is not present" - ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" -d "CREATE USER ${user} $1 IDENTIFIED plaintext_password by '1'" 2>&1 | grep -m1 -o "SYNTAX_ERROR" + ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" -d "CREATE USER ${user} $1 IDENTIFIED plaintext_password by '1'" 2>&1 | grep -m1 -o "SYNTAX_ERROR" | head -n 1 echo "RESET AUTHENTICATION METHODS TO NEW can only be used on alter statement" - ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" -d "CREATE USER ${user} $1 RESET AUTHENTICATION METHODS TO NEW" 2>&1 | grep -m1 -o "SYNTAX_ERROR" + ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" -d "CREATE USER ${user} $1 RESET AUTHENTICATION METHODS TO NEW" 2>&1 | grep -m1 -o "SYNTAX_ERROR" | head -n 1 echo "ADD NOT IDENTIFIED should result in syntax error" - ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" -d "ALTER USER ${user} $1 ADD NOT IDENTIFIED" 2>&1 | grep -m1 -o "SYNTAX_ERROR" + ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" -d "ALTER USER ${user} $1 ADD NOT IDENTIFIED" 2>&1 | grep -m1 -o "SYNTAX_ERROR" | head -n 1 echo "RESET AUTHENTICATION METHODS TO NEW cannot be used along with [ADD] IDENTIFIED clauses" - ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" -d "ALTER USER ${user} $1 IDENTIFIED WITH plaintext_password by '1' RESET AUTHENTICATION METHODS TO NEW" 2>&1 | grep -m1 -o "SYNTAX_ERROR" + ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" -d "ALTER USER ${user} $1 IDENTIFIED WITH plaintext_password by '1' RESET AUTHENTICATION METHODS TO NEW" 2>&1 | grep -m1 -o "SYNTAX_ERROR" | head -n 1 ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" -d "DROP USER IF EXISTS ${user}" From edf4e09fb2bc1d8a0a058941f6cb7f13d063c62b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9A=D0=B8=D1=80=D0=B8=D0=BB=D0=BB=20=D0=93=D0=B0=D1=80?= =?UTF-8?q?=D0=B1=D0=B0=D1=80?= Date: Thu, 29 Aug 2024 18:46:06 +0300 Subject: [PATCH 459/844] Remove stale moving parts without zookeeper --- src/Common/FailPoint.cpp | 1 + .../MergeTree/MergeTreePartsMover.cpp | 16 ++- .../__init__.py | 0 .../test_remove_stale_moving_parts/config.xml | 46 +++++++ .../test_remove_stale_moving_parts/test.py | 126 ++++++++++++++++++ 5 files changed, 188 insertions(+), 1 deletion(-) create mode 100644 tests/integration/test_remove_stale_moving_parts/__init__.py create mode 100644 tests/integration/test_remove_stale_moving_parts/config.xml create mode 100644 tests/integration/test_remove_stale_moving_parts/test.py diff --git a/src/Common/FailPoint.cpp b/src/Common/FailPoint.cpp index b2fcbc77c56..bc4f2cb43d2 100644 --- a/src/Common/FailPoint.cpp +++ b/src/Common/FailPoint.cpp @@ -63,6 +63,7 @@ static struct InitFiu REGULAR(keepermap_fail_drop_data) \ REGULAR(lazy_pipe_fds_fail_close) \ PAUSEABLE(infinite_sleep) \ + PAUSEABLE(stop_moving_part_before_swap_with_active) \ namespace FailPoints diff --git a/src/Storages/MergeTree/MergeTreePartsMover.cpp b/src/Storages/MergeTree/MergeTreePartsMover.cpp index 9223d6fd5b1..8daa48a2339 100644 --- a/src/Storages/MergeTree/MergeTreePartsMover.cpp +++ b/src/Storages/MergeTree/MergeTreePartsMover.cpp @@ -1,6 +1,7 @@ #include #include #include +#include #include #include @@ -15,6 +16,11 @@ namespace ErrorCodes extern const int DIRECTORY_ALREADY_EXISTS; } +namespace FailPoints +{ + extern const char stop_moving_part_before_swap_with_active[]; +} + namespace { @@ -272,7 +278,13 @@ MergeTreePartsMover::TemporaryClonedPart MergeTreePartsMover::clonePart(const Me cloned_part.part = std::move(builder).withPartFormatFromDisk().build(); LOG_TRACE(log, "Part {} was cloned to {}", part->name, cloned_part.part->getDataPartStorage().getFullPath()); - cloned_part.part->is_temp = data->allowRemoveStaleMovingParts(); + if (data->allowRemoveStaleMovingParts()) + { + cloned_part.part->is_temp = data->allowRemoveStaleMovingParts(); + /// Setting it in case connection to zookeeper is lost while moving + /// Otherwise part might be stuck in the moving directory due to the KEEPER_EXCEPTION in part's destructor + cloned_part.part->remove_tmp_policy = IMergeTreeDataPart::BlobsRemovalPolicyForTemporaryParts::REMOVE_BLOBS; + } cloned_part.part->loadColumnsChecksumsIndexes(true, true); cloned_part.part->loadVersionMetadata(); cloned_part.part->modification_time = cloned_part.part->getDataPartStorage().getLastModified().epochTime(); @@ -282,6 +294,8 @@ MergeTreePartsMover::TemporaryClonedPart MergeTreePartsMover::clonePart(const Me void MergeTreePartsMover::swapClonedPart(TemporaryClonedPart & cloned_part) const { + /// Used to get some stuck parts in the moving directory by stopping moves while pause is active + FailPointInjection::pauseFailPoint(FailPoints::stop_moving_part_before_swap_with_active); if (moves_blocker.isCancelled()) throw Exception(ErrorCodes::ABORTED, "Cancelled moving parts."); diff --git a/tests/integration/test_remove_stale_moving_parts/__init__.py b/tests/integration/test_remove_stale_moving_parts/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_remove_stale_moving_parts/config.xml b/tests/integration/test_remove_stale_moving_parts/config.xml new file mode 100644 index 00000000000..968e07fae51 --- /dev/null +++ b/tests/integration/test_remove_stale_moving_parts/config.xml @@ -0,0 +1,46 @@ + + + + + + ch1 + 9000 + + + + + + 01 + + + + + s3 + http://minio1:9001/root/data/ + minio + minio123 + + + + + + + default + False + + + s3 + False + + + 0.0 + + + + + + true + s3 + + true + diff --git a/tests/integration/test_remove_stale_moving_parts/test.py b/tests/integration/test_remove_stale_moving_parts/test.py new file mode 100644 index 00000000000..dcbcf38d25a --- /dev/null +++ b/tests/integration/test_remove_stale_moving_parts/test.py @@ -0,0 +1,126 @@ +from pathlib import Path +import time +import pytest +from helpers.cluster import ClickHouseCluster + +cluster = ClickHouseCluster(__file__) +ch1 = cluster.add_instance( + "ch1", + main_configs=[ + "config.xml", + ], + macros={"replica": "node1"}, + with_zookeeper=True, + with_minio=True, +) + +DATABASE_NAME = "stale_moving_parts" + + +@pytest.fixture(scope="module") +def started_cluster(): + try: + cluster.start() + yield cluster + + finally: + cluster.shutdown() + + +def q(node, query): + return node.query(database=DATABASE_NAME, sql=query) + + +# .../disks/s3/store/ +def get_table_path(node, table): + return ( + node.query( + sql=f"SELECT data_paths FROM system.tables WHERE table = '{table}' and database = '{DATABASE_NAME}' LIMIT 1" + ) + .strip('"\n[]') + .split(",")[1] + .strip("'") + ) + + +def exec(node, cmd, path): + return node.exec_in_container( + [ + "bash", + "-c", + f"{cmd} {path}", + ] + ) + + +def stop_zookeeper(node): + node.exec_in_container(["bash", "-c", "/opt/zookeeper/bin/zkServer.sh stop"]) + timeout = time.time() + 60 + while node.get_process_pid("zookeeper") != None: + if time.time() > timeout: + raise Exception("Failed to stop ZooKeeper in 60 secs") + time.sleep(0.2) + + +def wait_part_is_stuck(node, table_moving_path, moving_part): + num_tries = 5 + while q(node, "SELECT part_name FROM system.moves").strip() != moving_part: + if num_tries == 0: + raise Exception("Part has not started to move") + num_tries -= 1 + time.sleep(1) + num_tries = 5 + while exec(node, "ls", table_moving_path).strip() != moving_part: + if num_tries == 0: + raise Exception("Part is not stuck in the moving directory") + num_tries -= 1 + time.sleep(1) + + +def wait_zookeeper_node_to_start(zk_nodes, timeout=60): + start = time.time() + while time.time() - start < timeout: + try: + for instance in zk_nodes: + conn = cluster.get_kazoo_client(instance) + conn.get_children("/") + print("All instances of ZooKeeper started") + return + except Exception as ex: + print(("Can't connect to ZooKeeper " + str(ex))) + time.sleep(0.5) + + +def test_remove_stale_moving_parts_without_zookeeper(started_cluster): + ch1.query(f"CREATE DATABASE IF NOT EXISTS {DATABASE_NAME}") + + q( + ch1, + "CREATE TABLE test_remove ON CLUSTER cluster ( id UInt32 ) ENGINE ReplicatedMergeTree() ORDER BY id;", + ) + + table_moving_path = Path(get_table_path(ch1, "test_remove")) / "moving" + + q(ch1, "SYSTEM ENABLE FAILPOINT stop_moving_part_before_swap_with_active") + q(ch1, "INSERT INTO test_remove SELECT number FROM numbers(100);") + moving_part = "all_0_0_0" + move_response = ch1.get_query_request( + sql=f"ALTER TABLE test_remove MOVE PART '{moving_part}' TO DISK 's3'", + database=DATABASE_NAME, + ) + + wait_part_is_stuck(ch1, table_moving_path, moving_part) + + cluster.stop_zookeeper_nodes(["zoo1", "zoo2", "zoo3"]) + # Stop moves in case table is not read-only yet + q(ch1, "SYSTEM STOP MOVES") + q(ch1, "SYSTEM DISABLE FAILPOINT stop_moving_part_before_swap_with_active") + + assert "Cancelled moving parts" in move_response.get_error() + assert exec(ch1, "ls", table_moving_path).strip() == "" + + cluster.start_zookeeper_nodes(["zoo1", "zoo2", "zoo3"]) + wait_zookeeper_node_to_start(["zoo1", "zoo2", "zoo3"]) + q(ch1, "SYSTEM START MOVES") + + q(ch1, f"DROP TABLE test_remove") From b2c4b771d8f1fc83d4096128231130dc93e9fd79 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9A=D0=B8=D1=80=D0=B8=D0=BB=D0=BB=20=D0=93=D0=B0=D1=80?= =?UTF-8?q?=D0=B1=D0=B0=D1=80?= Date: Thu, 29 Aug 2024 19:33:04 +0300 Subject: [PATCH 460/844] Minor fixes --- src/Storages/MergeTree/MergeTreePartsMover.cpp | 3 ++- tests/integration/test_remove_stale_moving_parts/test.py | 9 --------- 2 files changed, 2 insertions(+), 10 deletions(-) diff --git a/src/Storages/MergeTree/MergeTreePartsMover.cpp b/src/Storages/MergeTree/MergeTreePartsMover.cpp index 8daa48a2339..11a55fe2420 100644 --- a/src/Storages/MergeTree/MergeTreePartsMover.cpp +++ b/src/Storages/MergeTree/MergeTreePartsMover.cpp @@ -278,9 +278,10 @@ MergeTreePartsMover::TemporaryClonedPart MergeTreePartsMover::clonePart(const Me cloned_part.part = std::move(builder).withPartFormatFromDisk().build(); LOG_TRACE(log, "Part {} was cloned to {}", part->name, cloned_part.part->getDataPartStorage().getFullPath()); + cloned_part.part->is_temp = false; if (data->allowRemoveStaleMovingParts()) { - cloned_part.part->is_temp = data->allowRemoveStaleMovingParts(); + cloned_part.part->is_temp = true; /// Setting it in case connection to zookeeper is lost while moving /// Otherwise part might be stuck in the moving directory due to the KEEPER_EXCEPTION in part's destructor cloned_part.part->remove_tmp_policy = IMergeTreeDataPart::BlobsRemovalPolicyForTemporaryParts::REMOVE_BLOBS; diff --git a/tests/integration/test_remove_stale_moving_parts/test.py b/tests/integration/test_remove_stale_moving_parts/test.py index dcbcf38d25a..f7cb4e5817e 100644 --- a/tests/integration/test_remove_stale_moving_parts/test.py +++ b/tests/integration/test_remove_stale_moving_parts/test.py @@ -53,15 +53,6 @@ def exec(node, cmd, path): ) -def stop_zookeeper(node): - node.exec_in_container(["bash", "-c", "/opt/zookeeper/bin/zkServer.sh stop"]) - timeout = time.time() + 60 - while node.get_process_pid("zookeeper") != None: - if time.time() > timeout: - raise Exception("Failed to stop ZooKeeper in 60 secs") - time.sleep(0.2) - - def wait_part_is_stuck(node, table_moving_path, moving_part): num_tries = 5 while q(node, "SELECT part_name FROM system.moves").strip() != moving_part: From df16831cc8cd9e1af95116d73309a594fb42b9e5 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Thu, 29 Aug 2024 18:56:37 +0200 Subject: [PATCH 461/844] Update tests/integration/test_covered_by_broken_exists/test.py --- tests/integration/test_covered_by_broken_exists/test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/test_covered_by_broken_exists/test.py b/tests/integration/test_covered_by_broken_exists/test.py index b6d1f55f133..caa091fdd2d 100644 --- a/tests/integration/test_covered_by_broken_exists/test.py +++ b/tests/integration/test_covered_by_broken_exists/test.py @@ -40,7 +40,7 @@ def wait_merged_part(table, part_name, retries=100): def test_make_clone_covered_by_broken_detached_dir_exists(started_cluster): - q("DROP TABLE IF EXISTS test_make_clone_cvbdde") + q("DROP TABLE IF EXISTS test_make_clone_cvbdde SYNC") q( "CREATE TABLE test_make_clone_cvbdde(n int, m String) ENGINE=ReplicatedMergeTree('/test_make_clone_cvbdde', '1') ORDER BY n SETTINGS old_parts_lifetime=3600, min_age_to_force_merge_seconds=1, min_age_to_force_merge_on_partition_only=0" From 25f31f914904e197797ef9917ff97c6447846203 Mon Sep 17 00:00:00 2001 From: avogar Date: Thu, 29 Aug 2024 18:15:05 +0000 Subject: [PATCH 462/844] Fix conversion for Dynamic, add more tests --- src/Functions/FunctionsConversion.cpp | 62 ++- ..._variant_dynamic_cast_or_default.reference | 484 ++++++++++++++++++ .../03212_variant_dynamic_cast_or_default.sql | 108 ++++ 3 files changed, 644 insertions(+), 10 deletions(-) diff --git a/src/Functions/FunctionsConversion.cpp b/src/Functions/FunctionsConversion.cpp index 96e2a3291b3..271daa99d0c 100644 --- a/src/Functions/FunctionsConversion.cpp +++ b/src/Functions/FunctionsConversion.cpp @@ -4143,12 +4143,15 @@ private: /// requires quite a lot of work. By now let's simply use try/catch. /// First, check that we can create a wrapper. WrapperType wrapper = prepareUnpackDictionaries(from, to); - /// Second, check if we can perform a conversion on empty columns. - ColumnsWithTypeAndName column_from = {{from->createColumn(), from, "" }}; - wrapper(column_from, to, nullptr, 0); + /// Second, check if we can perform a conversion on column with default value. + /// (we cannot just check empty column as we do some checks only during iteration over rows). + auto test_col = from->createColumn(); + test_col->insertDefault(); + ColumnsWithTypeAndName column_from = {{test_col->getPtr(), from, "" }}; + wrapper(column_from, to, nullptr, 1); return wrapper; } - catch (...) + catch (const Exception &) { return {}; } @@ -4393,10 +4396,27 @@ private: casted_variant_columns.reserve(variant_types.size()); for (size_t i = 0; i != variant_types.size(); ++i) { + /// Skip shared variant, it will be processed later. + if (i == column_dynamic.getSharedVariantDiscriminator()) + { + casted_variant_columns.push_back(nullptr); + continue; + } + const auto & variant_col = variant_column.getVariantPtrByGlobalDiscriminator(i); ColumnsWithTypeAndName variant = {{variant_col, variant_types[i], ""}}; - auto variant_wrapper = prepareUnpackDictionaries(variant_types[i], result_type); - casted_variant_columns.push_back(variant_wrapper(variant, result_type, nullptr, variant_col->size())); + WrapperType variant_wrapper; + if (cast_type == CastType::accurateOrNull) + /// Create wrapper only if we support conversion from variant to the resulting type. + variant_wrapper = createWrapperIfCanConvert(variant_types[i], result_type); + else + variant_wrapper = prepareUnpackDictionaries(variant_types[i], result_type); + + ColumnPtr casted_variant; + /// Check if we have wrapper for this variant. + if (variant_wrapper) + casted_variant = variant_wrapper(variant, result_type, nullptr, variant_col->size()); + casted_variant_columns.push_back(casted_variant); } /// Second, collect all variants stored in shared variant and cast them to result type. @@ -4452,8 +4472,18 @@ private: for (size_t i = 0; i != variant_types_from_shared_variant.size(); ++i) { ColumnsWithTypeAndName variant = {{variant_columns_from_shared_variant[i]->getPtr(), variant_types_from_shared_variant[i], ""}}; - auto variant_wrapper = prepareUnpackDictionaries(variant_types_from_shared_variant[i], result_type); - casted_shared_variant_columns.push_back(variant_wrapper(variant, result_type, nullptr, variant_columns_from_shared_variant[i]->size())); + WrapperType variant_wrapper; + if (cast_type == CastType::accurateOrNull) + /// Create wrapper only if we support conversion from variant to the resulting type. + variant_wrapper = createWrapperIfCanConvert(variant_types_from_shared_variant[i], result_type); + else + variant_wrapper = prepareUnpackDictionaries(variant_types_from_shared_variant[i], result_type); + + ColumnPtr casted_variant; + /// Check if we have wrapper for this variant. + if (variant_wrapper) + casted_variant = variant_wrapper(variant, result_type, nullptr, variant_columns_from_shared_variant[i]->size()); + casted_shared_variant_columns.push_back(casted_variant); } /// Construct result column from all casted variants. @@ -4463,11 +4493,23 @@ private: { auto global_discr = variant_column.globalDiscriminatorByLocal(local_discriminators[i]); if (global_discr == ColumnVariant::NULL_DISCRIMINATOR) + { res->insertDefault(); + } else if (global_discr == shared_variant_discr) - res->insertFrom(*casted_shared_variant_columns[shared_variant_indexes[i]], shared_variant_offsets[i]); + { + if (casted_shared_variant_columns[shared_variant_indexes[i]]) + res->insertFrom(*casted_shared_variant_columns[shared_variant_indexes[i]], shared_variant_offsets[i]); + else + res->insertDefault(); + } else - res->insertFrom(*casted_variant_columns[global_discr], offsets[i]); + { + if (casted_variant_columns[global_discr]) + res->insertFrom(*casted_variant_columns[global_discr], offsets[i]); + else + res->insertDefault(); + } } return res; diff --git a/tests/queries/0_stateless/03212_variant_dynamic_cast_or_default.reference b/tests/queries/0_stateless/03212_variant_dynamic_cast_or_default.reference index 8b1a342181c..fd16d020019 100644 --- a/tests/queries/0_stateless/03212_variant_dynamic_cast_or_default.reference +++ b/tests/queries/0_stateless/03212_variant_dynamic_cast_or_default.reference @@ -30,3 +30,487 @@ 5 5 \N str_6 \N [0,1,2,3,4,5,6] +-128 +-127 +-1 +0 +1 +2 +3 +126 +127 +0 +1 +2 +3 +126 +127 +254 +255 +-32768 +-32767 +-128 +-127 +-1 +0 +1 +2 +3 +126 +127 +254 +255 +32766 +32767 +0 +1 +2 +3 +126 +127 +254 +255 +32766 +32767 +65534 +65535 +-2147483648 +-2147483647 +-32768 +-32767 +-128 +-127 +-1 +0 +1 +2 +3 +126 +127 +254 +255 +32766 +32767 +65534 +65535 +2147483646 +2147483647 +0 +1 +2 +3 +126 +127 +254 +255 +32766 +32767 +65534 +65535 +2147483646 +2147483647 +3232235521 +4294967294 +4294967295 +-9223372036854775808 +-9223372036854775807 +-2147483648 +-2147483647 +-32768 +-32767 +-128 +-127 +-1 +0 +1 +2 +3 +126 +127 +254 +255 +32766 +32767 +65534 +65535 +2147483646 +2147483647 +4294967294 +4294967295 +9223372036854775806 +9223372036854775807 +0 +1 +2 +3 +126 +127 +254 +255 +32766 +32767 +65534 +65535 +2147483646 +2147483647 +3232235521 +4294967294 +4294967295 +9223372036854775806 +9223372036854775807 +18446744073709551614 +18446744073709551615 +-170141183460469231731687303715884105728 +-170141183460469231731687303715884105727 +-9223372036854775808 +-9223372036854775807 +-2147483648 +-2147483647 +-32768 +-32767 +-128 +-127 +-1 +0 +1 +2 +3 +126 +127 +254 +255 +32766 +32767 +65534 +65535 +2147483646 +2147483647 +4294967294 +4294967295 +9223372036854775806 +9223372036854775807 +18446744073709551614 +18446744073709551615 +170141183460469231731687303715884105726 +170141183460469231731687303715884105727 +0 +1 +2 +3 +126 +127 +254 +255 +32766 +32767 +65534 +65535 +2147483646 +2147483647 +3232235521 +4294967294 +4294967295 +9223372036854775806 +9223372036854775807 +18446744073709551614 +18446744073709551615 +170141183460469231731687303715884105726 +170141183460469231731687303715884105727 +296245801836096677496328508227807879401 +340282366920938463463374607431768211454 +340282366920938463463374607431768211455 +-57896044618658097711785492504343953926634992332820282019728792003956564819968 +-57896044618658097711785492504343953926634992332820282019728792003956564819967 +-170141183460469231731687303715884105728 +-170141183460469231731687303715884105727 +-9223372036854775808 +-9223372036854775807 +-2147483648 +-2147483647 +-32768 +-32767 +-128 +-127 +-1 +0 +1 +2 +3 +126 +127 +254 +255 +32766 +32767 +65534 +65535 +2147483646 +2147483647 +4294967294 +4294967295 +9223372036854775806 +9223372036854775807 +18446744073709551614 +18446744073709551615 +170141183460469231731687303715884105726 +170141183460469231731687303715884105727 +340282366920938463463374607431768211454 +340282366920938463463374607431768211455 +57896044618658097711785492504343953926634992332820282019728792003956564819966 +57896044618658097711785492504343953926634992332820282019728792003956564819967 +0 +1 +2 +3 +126 +127 +254 +255 +32766 +32767 +65534 +65535 +2147483646 +2147483647 +3232235521 +4294967294 +4294967295 +9223372036854775806 +9223372036854775807 +18446744073709551614 +18446744073709551615 +170141183460469231731687303715884105726 +170141183460469231731687303715884105727 +340282366920938463463374607431768211454 +340282366920938463463374607431768211455 +57896044618658097711785492504343953926634992332820282019728792003956564819966 +57896044618658097711785492504343953926634992332820282019728792003956564819967 +115792089237316195423570985008687907853269984665640564039457584007913129639934 +115792089237316195423570985008687907853269984665640564039457584007913129639935 +-inf +-3.4028233e38 +-1.7014118e38 +-9223372000000000000 +-2147483600 +-32768 +-32767 +-128 +-127 +-1 +-1.1754942e-38 +-1e-45 +0 +1e-45 +1.1754942e-38 +1 +2 +3 +126 +127 +254 +255 +32766 +32767 +65534 +65535 +3.4028233e38 +inf +nan +-inf +-1.7976931348623157e308 +-5.78960446186581e76 +-3.40282347e38 +-3.4028232635611926e38 +-1.7014118346046923e38 +-9223372036854776000 +-2147483648 +-2147483647 +-32768 +-32767 +-128 +-127 +-1 +-1.1754943499999998e-38 +-1.1754942106924411e-38 +-1.401298464324817e-45 +-1.3999999999999999e-45 +-2.2250738585072014e-308 +0 +2.2250738585072014e-308 +1.3999999999999999e-45 +1.401298464324817e-45 +1.1754942106924411e-38 +1.1754943499999998e-38 +1 +2 +3 +126 +127 +254 +255 +32766 +32767 +65534 +65535 +2147483646 +2147483647 +4294967294 +4294967295 +3.4028232635611926e38 +3.40282347e38 +1.7976931348623157e308 +inf +nan +-32768 +-32767 +-128 +-127 +-1 +0 +1 +126 +127 +254 +255 +32766 +32767 +65534 +65535 +-2147483648 +-2147483647 +-32768 +-32767 +-128 +-127 +-1 +0 +1 +126 +127 +254 +255 +32766 +32767 +65534 +65535 +2147483646 +2147483647 +4294967294 +4294967295 +-9223372036854775808 +-9223372036854775807 +-18446744073709551.616 +-2147483648 +-2147483647 +-32768 +-32767 +-128 +-127 +-1 +0 +1 +126 +127 +254 +255 +32766 +32767 +65534 +65535 +2147483646 +2147483647 +4294967294 +4294967295 +9223372036854775806 +9223372036854775807 +18446744073709551614 +18446744073709551615 +-340282347000000000977176926486249829565.415 +-9223372036854775808 +-9223372036854775807 +-18446744073709551.616 +-2147483648 +-2147483647 +-32768 +-32767 +-128 +-127 +-1 +0 +1 +126 +127 +254 +255 +32766 +32767 +65534 +65535 +2147483646 +2147483647 +4294967294 +4294967295 +9223372036854775806 +9223372036854775807 +18446744073709551614 +18446744073709551615 +340282347000000000977176926486249829565.415 +1970-01-01 +1970-01-02 +1970-01-03 +1970-01-04 +1970-05-07 +1970-05-08 +1970-09-12 +1970-09-13 +2038-01-19 +2059-09-17 +2059-09-18 +2106-02-07 +2149-06-05 +2149-06-06 +2299-12-31 +2299-12-31 +1900-01-01 +1969-08-26 +1969-08-27 +1969-12-30 +1969-12-31 +1970-01-01 +1970-01-02 +1970-01-03 +1970-01-04 +1970-05-07 +1970-05-08 +1970-09-12 +1970-09-13 +2038-01-19 +2059-09-17 +2059-09-18 +2106-02-07 +2149-06-05 +2149-06-06 +2299-12-31 +1970-01-01 00:00:00 +1970-01-01 00:00:01 +1970-01-01 00:00:02 +1970-01-01 00:00:03 +1970-01-01 00:02:06 +1970-01-01 00:02:07 +1970-01-01 00:04:14 +1970-01-01 00:04:15 +1970-01-01 09:06:06 +1970-01-01 09:06:07 +1970-01-01 18:12:14 +1970-01-01 18:12:15 +2038-01-19 03:14:06 +2038-01-19 03:14:07 +2106-02-07 06:28:14 +2106-02-07 06:28:15 +0.0.0.0 +192.168.0.1 +:: +::1 +::ffff:192.168.0.1 +00000000-0000-0000-0000-000000000000 +dededdb6-7835-4ce4-8d11-b5de6f2820e9 diff --git a/tests/queries/0_stateless/03212_variant_dynamic_cast_or_default.sql b/tests/queries/0_stateless/03212_variant_dynamic_cast_or_default.sql index 1e71e36780c..f227bbdac77 100644 --- a/tests/queries/0_stateless/03212_variant_dynamic_cast_or_default.sql +++ b/tests/queries/0_stateless/03212_variant_dynamic_cast_or_default.sql @@ -1,9 +1,117 @@ set allow_experimental_variant_type = 1; set use_variant_as_common_type = 1; set allow_experimental_dynamic_type = 1; +set allow_suspicious_low_cardinality_types = 1; +set session_timezone = 'UTC'; select accurateCastOrDefault(variant, 'UInt32'), multiIf(number % 4 == 0, NULL, number % 4 == 1, number, number % 4 == 2, 'str_' || toString(number), range(number)) as variant from numbers(8); select accurateCastOrNull(variant, 'UInt32'), multiIf(number % 4 == 0, NULL, number % 4 == 1, number, number % 4 == 2, 'str_' || toString(number), range(number)) as variant from numbers(8); select accurateCastOrDefault(dynamic, 'UInt32'), multiIf(number % 4 == 0, NULL, number % 4 == 1, number, number % 4 == 2, 'str_' || toString(number), range(number))::Dynamic as dynamic from numbers(8); select accurateCastOrNull(dynamic, 'UInt32'), multiIf(number % 4 == 0, NULL, number % 4 == 1, number, number % 4 == 2, 'str_' || toString(number), range(number))::Dynamic as dynamic from numbers(8); + +drop table if exists t; +create table t (d Dynamic) engine=MergeTree order by tuple(); + +-- Integer types: signed and unsigned integers (UInt8, UInt16, UInt32, UInt64, UInt128, UInt256, Int8, Int16, Int32, Int64, Int128, Int256) +INSERT INTO t VALUES (-128::Int8), (-127::Int8), (-1::Int8), (0::Int8), (1::Int8), (126::Int8), (127::Int8); +INSERT INTO t VALUES (-128::Int8), (-127::Int8), (-1::Int8), (0::Int8), (1::Int8), (126::Int8), (127::Int8); +INSERT INTO t VALUES (-128::Int8), (-127::Int8), (-1::Int8), (0::Int8), (1::Int8), (126::Int8), (127::Int8); +INSERT INTO t VALUES (-32768::Int16), (-32767::Int16), (-1::Int16), (0::Int16), (1::Int16), (32766::Int16), (32767::Int16); +INSERT INTO t VALUES (-2147483648::Int32), (-2147483647::Int32), (-1::Int32), (0::Int32), (1::Int32), (2147483646::Int32), (2147483647::Int32); +INSERT INTO t VALUES (-9223372036854775808::Int64), (-9223372036854775807::Int64), (-1::Int64), (0::Int64), (1::Int64), (9223372036854775806::Int64), (9223372036854775807::Int64); +INSERT INTO t VALUES (-170141183460469231731687303715884105728::Int128), (-170141183460469231731687303715884105727::Int128), (-1::Int128), (0::Int128), (1::Int128), (170141183460469231731687303715884105726::Int128), (170141183460469231731687303715884105727::Int128); +INSERT INTO t VALUES (-57896044618658097711785492504343953926634992332820282019728792003956564819968::Int256), (-57896044618658097711785492504343953926634992332820282019728792003956564819967::Int256), (-1::Int256), (0::Int256), (1::Int256), (57896044618658097711785492504343953926634992332820282019728792003956564819966::Int256), (57896044618658097711785492504343953926634992332820282019728792003956564819967::Int256); + +INSERT INTO t VALUES (0::UInt8), (1::UInt8), (254::UInt8), (255::UInt8); +INSERT INTO t VALUES (0::UInt16), (1::UInt16), (65534::UInt16), (65535::UInt16); +INSERT INTO t VALUES (0::UInt32), (1::UInt32), (4294967294::UInt32), (4294967295::UInt32); +INSERT INTO t VALUES (0::UInt64), (1::UInt64), (18446744073709551614::UInt64), (18446744073709551615::UInt64); +INSERT INTO t VALUES (0::UInt128), (1::UInt128), (340282366920938463463374607431768211454::UInt128), (340282366920938463463374607431768211455::UInt128); +INSERT INTO t VALUES (0::UInt256), (1::UInt256), (115792089237316195423570985008687907853269984665640564039457584007913129639934::UInt256), (115792089237316195423570985008687907853269984665640564039457584007913129639935::UInt256); + +-- Floating-point numbers: floats(Float32 and Float64) values +INSERT INTO t VALUES (1.17549435e-38::Float32), (3.40282347e+38::Float32), (-3.40282347e+38::Float32), (-1.17549435e-38::Float32), (1.4e-45::Float32), (-1.4e-45::Float32); +INSERT INTO t VALUES (inf::Float32), (-inf::Float32), (nan::Float32); +INSERT INTO t VALUES (inf::FLOAT(12)), (-inf::FLOAT(12)), (nan::FLOAT(12)); +INSERT INTO t VALUES (inf::FLOAT(15,22)), (-inf::FLOAT(15,22)), (nan::FLOAT(15,22)); + +INSERT INTO t VALUES (1.17549435e-38::Float64), (3.40282347e+38::Float64), (-3.40282347e+38::Float64), (-1.17549435e-38::Float64), (1.4e-45::Float64), (-1.4e-45::Float64); +INSERT INTO t VALUES (2.2250738585072014e-308::Float64), (1.7976931348623157e+308::Float64), (-1.7976931348623157e+308::Float64), (-2.2250738585072014e-308::Float64); +INSERT INTO t VALUES (inf::Float64), (-inf::Float64), (nan::Float64); +INSERT INTO t VALUES (inf::DOUBLE(12)), (-inf::DOUBLE(12)), (nan::DOUBLE(12)); +INSERT INTO t VALUES (inf::DOUBLE(15,22)), (-inf::DOUBLE(15,22)), (nan::DOUBLE(15,22)); + +-- Strings: String and FixedString +INSERT INTO t VALUES ('string'::String), ('1'::FixedString(1)), ('1'::FixedString(2)), ('1'::FixedString(10)); --(''::String), + +-- Boolean +INSERT INTO t VALUES ('1'::Bool), (0::Bool); + +-- UUID +INSERT INTO t VALUES ('dededdb6-7835-4ce4-8d11-b5de6f2820e9'::UUID); +INSERT INTO t VALUES ('00000000-0000-0000-0000-000000000000'::UUID); + +-- LowCardinality +INSERT INTO t VALUES ('1'::LowCardinality(String)), ('1'::LowCardinality(String)), (0::LowCardinality(UInt16)); + +-- Arrays +INSERT INTO t VALUES ([]::Array(Dynamic)), ([[]]::Array(Array(Dynamic))), ([[[]]]::Array(Array(Array(Dynamic)))); + +-- Tuple +INSERT INTO t VALUES (()::Tuple(Dynamic)), ((())::Tuple(Tuple(Dynamic))), (((()))::Tuple(Tuple(Tuple(Dynamic)))); + +-- Map. +INSERT INTO t VALUES (map(11::Dynamic, 'v1'::Dynamic, '22'::Dynamic, 1::Dynamic)); + +-- SimpleAggregateFunction +INSERT INTO t VALUES ([1,2]::SimpleAggregateFunction(anyLast, Array(Int16))); + +-- IPs +INSERT INTO t VALUES (toIPv4('192.168.0.1')), (toIPv6('::1')); + +-- Geo +INSERT INTO t VALUES ((1.23, 4.56)::Point), (([(1.23, 4.56)::Point, (2.34, 5.67)::Point])::Ring); +INSERT INTO t VALUES ([[[(0, 0), (10, 0), (10, 10), (0, 10)]], [[(20, 20), (50, 20), (50, 50), (20, 50)],[(30, 30), (50, 50), (50, 30)]]]::MultiPolygon); + +-- Interval +INSERT INTO t VALUES (interval '1' day), (interval '2' month), (interval '3' year); + +-- Nested +INSERT INTO t VALUES ([(1, 'aa'), (2, 'bb')]::Nested(x UInt32, y String)); +INSERT INTO t VALUES ([(1, (2, ['aa', 'bb']), [(3, 'cc'), (4, 'dd')]), (5, (6, ['ee', 'ff']), [(7, 'gg'), (8, 'hh')])]::Nested(x UInt32, y Tuple(y1 UInt32, y2 Array(String)), z Nested(z1 UInt32, z2 String))); + +optimize table t final; + +select distinct toInt8OrDefault(d) as res from t order by res; +select distinct toUInt8OrDefault(d) as res from t order by res; +select distinct toInt16OrDefault(d) as res from t order by res; +select distinct toUInt16OrDefault(d) as res from t order by res; +select distinct toInt32OrDefault(d) as res from t order by res; +select distinct toUInt32OrDefault(d) as res from t order by res; +select distinct toInt64OrDefault(d) as res from t order by res; +select distinct toUInt64OrDefault(d) as res from t order by res; +select distinct toInt128OrDefault(d) as res from t order by res; +select distinct toUInt128OrDefault(d) as res from t order by res; +select distinct toInt256OrDefault(d) as res from t order by res; +select distinct toUInt256OrDefault(d) as res from t order by res; + +select distinct toFloat32OrDefault(d) as res from t order by res; +select distinct toFloat64OrDefault(d) as res from t order by res; + +select distinct toDecimal32OrDefault(d, 3) as res from t order by res; +select distinct toDecimal64OrDefault(d, 3) as res from t order by res; +select distinct toDecimal128OrDefault(d, 3) as res from t order by res; +select distinct toDecimal256OrDefault(d, 3) as res from t order by res; + +select distinct toDateOrDefault(d) as res from t order by res; +select distinct toDate32OrDefault(d) as res from t order by res; +select distinct toDateTimeOrDefault(d) as res from t order by res; + +select distinct toIPv4OrDefault(d) as res from t order by res; +select distinct toIPv6OrDefault(d) as res from t order by res; + +select distinct toUUIDOrDefault(d) as res from t order by res; + +drop table t; + From 79c01e717f56132c1af6503b93fc7094dcd47637 Mon Sep 17 00:00:00 2001 From: Kirill Nikiforov Date: Thu, 29 Aug 2024 23:15:39 +0400 Subject: [PATCH 463/844] rm comment, add docs --- docs/en/sql-reference/statements/alter/partition.md | 2 +- src/Storages/StorageReplicatedMergeTree.cpp | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/docs/en/sql-reference/statements/alter/partition.md b/docs/en/sql-reference/statements/alter/partition.md index 1bb7817364a..11926b2aa08 100644 --- a/docs/en/sql-reference/statements/alter/partition.md +++ b/docs/en/sql-reference/statements/alter/partition.md @@ -351,7 +351,7 @@ ALTER TABLE mt DELETE IN PARTITION ID '2' WHERE p = 2; You can specify the partition expression in `ALTER ... PARTITION` queries in different ways: - As a value from the `partition` column of the `system.parts` table. For example, `ALTER TABLE visits DETACH PARTITION 201901`. -- Using the keyword `ALL`. It can be used only with DROP/DETACH/ATTACH. For example, `ALTER TABLE visits ATTACH PARTITION ALL`. +- Using the keyword `ALL`. It can be used only with DROP/DETACH/ATTACH/ATTACH FROM. For example, `ALTER TABLE visits ATTACH PARTITION ALL`. - As a tuple of expressions or constants that matches (in types) the table partitioning keys tuple. In the case of a single element partitioning key, the expression should be wrapped in the `tuple (...)` function. For example, `ALTER TABLE visits DETACH PARTITION tuple(toYYYYMM(toDate('2019-01-25')))`. - Using the partition ID. Partition ID is a string identifier of the partition (human-readable, if possible) that is used as the names of partitions in the file system and in ZooKeeper. The partition ID must be specified in the `PARTITION ID` clause, in a single quotes. For example, `ALTER TABLE visits DETACH PARTITION ID '201901'`. - In the [ALTER ATTACH PART](#attach-partitionpart) and [DROP DETACHED PART](#drop-detached-partitionpart) query, to specify the name of a part, use string literal with a value from the `name` column of the [system.detached_parts](/docs/en/operations/system-tables/detached_parts.md/#system_tables-detached_parts) table. For example, `ALTER TABLE visits ATTACH PART '201901_1_1_0'`. diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 4915bf9f366..0688b05e2ad 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -8240,7 +8240,6 @@ std::unique_ptr StorageReplicatedMergeTree::rep part_checksums.emplace_back(hash_hex); } - //ReplicatedMergeTreeLogEntryData entry; auto entry = std::make_unique(); { auto src_table_id = src_data.getStorageID(); From 6ad8e5558a99b43e3452c057346b9c44e8e27517 Mon Sep 17 00:00:00 2001 From: flynn Date: Fri, 30 Aug 2024 07:25:25 +0000 Subject: [PATCH 464/844] Fix typo --- src/Storages/MergeTree/MergeTreeIndexVectorSimilarity.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/MergeTree/MergeTreeIndexVectorSimilarity.cpp b/src/Storages/MergeTree/MergeTreeIndexVectorSimilarity.cpp index ae183d74782..58892d0dbf2 100644 --- a/src/Storages/MergeTree/MergeTreeIndexVectorSimilarity.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexVectorSimilarity.cpp @@ -195,7 +195,7 @@ void MergeTreeIndexGranuleVectorSimilarity::serializeBinary(WriteBuffer & ostr) LOG_TRACE(logger, "Start writing vector similarity index"); if (empty()) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Attempt to write empty minmax index {}", backQuote(index_name)); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Attempt to write empty vector similarity index {}", backQuote(index_name)); writeIntBinary(FILE_FORMAT_VERSION, ostr); From c5b92413cac091c4e28acb5160b89fcabc37a853 Mon Sep 17 00:00:00 2001 From: flynn Date: Fri, 30 Aug 2024 08:28:31 +0000 Subject: [PATCH 465/844] Fix vector similarity index does not work for cosineDistance --- src/Storages/MergeTree/VectorSimilarityCondition.cpp | 2 ++ src/Storages/MergeTree/VectorSimilarityCondition.h | 3 ++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/src/Storages/MergeTree/VectorSimilarityCondition.cpp b/src/Storages/MergeTree/VectorSimilarityCondition.cpp index c8f33857640..251cdde65ab 100644 --- a/src/Storages/MergeTree/VectorSimilarityCondition.cpp +++ b/src/Storages/MergeTree/VectorSimilarityCondition.cpp @@ -44,6 +44,8 @@ VectorSimilarityCondition::Info::DistanceFunction stringToDistanceFunction(std:: { if (distance_function == "L2Distance") return VectorSimilarityCondition::Info::DistanceFunction::L2; + else if (distance_function == "cosineDistance") + return VectorSimilarityCondition::Info::DistanceFunction::Cosine; else return VectorSimilarityCondition::Info::DistanceFunction::Unknown; } diff --git a/src/Storages/MergeTree/VectorSimilarityCondition.h b/src/Storages/MergeTree/VectorSimilarityCondition.h index 2380f8f46b0..e2946222f49 100644 --- a/src/Storages/MergeTree/VectorSimilarityCondition.h +++ b/src/Storages/MergeTree/VectorSimilarityCondition.h @@ -57,7 +57,8 @@ public: enum class DistanceFunction : uint8_t { Unknown, - L2 + L2, + Cosine }; std::vector reference_vector; From af7adfe4b2f8e9b3b7354314a91d19a19b7955f9 Mon Sep 17 00:00:00 2001 From: flynn Date: Fri, 30 Aug 2024 09:03:23 +0000 Subject: [PATCH 466/844] Remove unused code --- src/Storages/MergeTree/VectorSimilarityCondition.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/Storages/MergeTree/VectorSimilarityCondition.h b/src/Storages/MergeTree/VectorSimilarityCondition.h index e2946222f49..83ae1e19bfb 100644 --- a/src/Storages/MergeTree/VectorSimilarityCondition.h +++ b/src/Storages/MergeTree/VectorSimilarityCondition.h @@ -143,6 +143,7 @@ private: void traverseOrderByAST(const ASTPtr & node, RPN & rpn); /// Returns true and stores ANNExpr if the query has valid WHERE section + /// TODO NOT implemented, WHERE does not supported. static bool matchRPNWhere(RPN & rpn, Info & info); /// Returns true and stores ANNExpr if the query has valid ORDERBY section @@ -151,9 +152,6 @@ private: /// Returns true and stores Length if we have valid LIMIT clause in query static bool matchRPNLimit(RPNElement & rpn, UInt64 & limit); - /// Matches dist function, reference vector, column name - static bool matchMainParts(RPN::iterator & iter, const RPN::iterator & end, Info & info); - /// Gets float or int from AST node static float getFloatOrIntLiteralOrPanic(const RPN::iterator& iter); From 7c766e7458f3c75c72483b32d5a1adebb7653d93 Mon Sep 17 00:00:00 2001 From: Arthur Passos Date: Fri, 30 Aug 2024 11:41:44 -0300 Subject: [PATCH 467/844] split tests again and randomize user name to be able to parallel --- ...74_multiple_authentication_methods.reference | 9 --------- .../03174_multiple_authentication_methods.sh | 17 ++--------------- ...authentication_methods_show_create.reference | 2 ++ ...tiple_authentication_methods_show_create.sql | 13 +++++++++++++ 4 files changed, 17 insertions(+), 24 deletions(-) create mode 100644 tests/queries/0_stateless/03174_multiple_authentication_methods_show_create.reference create mode 100644 tests/queries/0_stateless/03174_multiple_authentication_methods_show_create.sql diff --git a/tests/queries/0_stateless/03174_multiple_authentication_methods.reference b/tests/queries/0_stateless/03174_multiple_authentication_methods.reference index d503b78a2bf..297ef667995 100644 --- a/tests/queries/0_stateless/03174_multiple_authentication_methods.reference +++ b/tests/queries/0_stateless/03174_multiple_authentication_methods.reference @@ -47,18 +47,9 @@ Adding no_password, should fail SYNTAX_ERROR Replacing existing authentication methods in favor of no_password, should succeed localhost 9000 0 0 0 -CREATE USER u01_03174 IDENTIFIED WITH no_password Trying to auth with no pwd, should succeed 1 localhost 9000 0 0 0 -Create user with mix both implicit and explicit auth type, starting with with -localhost 9000 0 0 0 -CREATE USER u01_03174 IDENTIFIED WITH plaintext_password, sha256_password, bcrypt_password, sha256_password -localhost 9000 0 0 0 -Create user with mix both implicit and explicit auth type, starting with by -localhost 9000 0 0 0 -CREATE USER u01_03174 IDENTIFIED WITH sha256_password, plaintext_password, bcrypt_password, sha256_password -localhost 9000 0 0 0 Use WITH without providing authentication type, should fail Syntax error Create user with ADD identification, should fail, add is not allowed for create query diff --git a/tests/queries/0_stateless/03174_multiple_authentication_methods.sh b/tests/queries/0_stateless/03174_multiple_authentication_methods.sh index cd6cb0807f3..552e5cc20eb 100755 --- a/tests/queries/0_stateless/03174_multiple_authentication_methods.sh +++ b/tests/queries/0_stateless/03174_multiple_authentication_methods.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-fasttest, no-parallel, no-replicated-database +# Tags: no-fasttest, no-replicated-database # Tag no-replicated-database: https://s3.amazonaws.com/clickhouse-test-reports/65277/43e9a7ba4bbf7f20145531b384a31304895b55bc/stateless_tests__release__old_analyzer__s3__databasereplicated__[1_2].html and https://github.com/ClickHouse/ClickHouse/blob/011c694117845500c82f9563c65930429979982f/tests/queries/0_stateless/01175_distributed_ddl_output_mode_long.sh#L4 CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) @@ -31,7 +31,7 @@ function test_login_pwd_expect_error function test { - user="u01_03174" + user="u01_03174$RANDOM" ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" -d "DROP USER IF EXISTS ${user} $1" @@ -113,25 +113,12 @@ function test echo "Replacing existing authentication methods in favor of no_password, should succeed" ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" -d "ALTER USER ${user} $1 IDENTIFIED WITH no_password" - ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" -d "SHOW CREATE USER ${user}" echo "Trying to auth with no pwd, should succeed" test_login_no_pwd ${user} ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" -d "DROP USER IF EXISTS ${user} $1" - echo "Create user with mix both implicit and explicit auth type, starting with with" - ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" -d "CREATE USER ${user} $1 IDENTIFIED WITH plaintext_password by '1', by '2', bcrypt_password by '3', by '4';" - ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" -d "SHOW CREATE USER ${user}" - - ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" -d "DROP USER IF EXISTS ${user} $1" - - echo "Create user with mix both implicit and explicit auth type, starting with by" - ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" -d "CREATE USER ${user} $1 IDENTIFIED by '1', plaintext_password by '2', bcrypt_password by '3', by '4';" - ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" -d "SHOW CREATE USER ${user}" - - ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" -d "DROP USER IF EXISTS ${user} $1" - echo "Use WITH without providing authentication type, should fail" ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" -d "CREATE USER ${user} $1 IDENTIFIED WITH BY '1';" 2>&1 | grep -m1 -o "Syntax error" | head -n 1 diff --git a/tests/queries/0_stateless/03174_multiple_authentication_methods_show_create.reference b/tests/queries/0_stateless/03174_multiple_authentication_methods_show_create.reference new file mode 100644 index 00000000000..f5da15ccfaa --- /dev/null +++ b/tests/queries/0_stateless/03174_multiple_authentication_methods_show_create.reference @@ -0,0 +1,2 @@ +CREATE USER u_03174_multiple_auth_show_create IDENTIFIED WITH plaintext_password, sha256_password, bcrypt_password, sha256_password +CREATE USER u_03174_multiple_auth_show_create IDENTIFIED WITH sha256_password, plaintext_password, bcrypt_password, sha256_password diff --git a/tests/queries/0_stateless/03174_multiple_authentication_methods_show_create.sql b/tests/queries/0_stateless/03174_multiple_authentication_methods_show_create.sql new file mode 100644 index 00000000000..f9e9e72e5c0 --- /dev/null +++ b/tests/queries/0_stateless/03174_multiple_authentication_methods_show_create.sql @@ -0,0 +1,13 @@ +-- Tags: no-fasttest, no-parallel + +-- Create user with mix both implicit and explicit auth type, starting with with +CREATE USER u_03174_multiple_auth_show_create IDENTIFIED WITH plaintext_password by '1', by '2', bcrypt_password by '3', by '4'; +SHOW CREATE USER u_03174_multiple_auth_show_create; + +DROP USER IF EXISTS u_03174_multiple_auth_show_create; + +-- Create user with mix both implicit and explicit auth type, starting with by +CREATE USER u_03174_multiple_auth_show_create IDENTIFIED by '1', plaintext_password by '2', bcrypt_password by '3', by '4'; +SHOW CREATE USER u_03174_multiple_auth_show_create; + +DROP USER IF EXISTS u_03174_multiple_auth_show_create; From cf87893758b0172f19e54b8bc7fd962bf258c96e Mon Sep 17 00:00:00 2001 From: avogar Date: Fri, 30 Aug 2024 15:28:53 +0000 Subject: [PATCH 468/844] Don't infer Bool type from String in CSV when input_format_csv_try_infer_numbers_from_strings=1 --- src/Formats/EscapingRuleUtils.cpp | 8 ++++++-- .../03231_csv_dont_infer_bool_from_string.reference | 4 ++++ .../0_stateless/03231_csv_dont_infer_bool_from_string.sql | 4 ++++ 3 files changed, 14 insertions(+), 2 deletions(-) create mode 100644 tests/queries/0_stateless/03231_csv_dont_infer_bool_from_string.reference create mode 100644 tests/queries/0_stateless/03231_csv_dont_infer_bool_from_string.sql diff --git a/src/Formats/EscapingRuleUtils.cpp b/src/Formats/EscapingRuleUtils.cpp index 5429d8b7e0d..50a46d2334d 100644 --- a/src/Formats/EscapingRuleUtils.cpp +++ b/src/Formats/EscapingRuleUtils.cpp @@ -302,8 +302,12 @@ DataTypePtr tryInferDataTypeByEscapingRule(const String & field, const FormatSet /// Try to determine the type of value inside quotes auto type = tryInferDataTypeForSingleField(data, format_settings); - /// If we couldn't infer any type or it's a number and csv.try_infer_numbers_from_strings = 0, we determine it as a string. - if (!type || (format_settings.csv.try_infer_strings_from_quoted_tuples && isTuple(type)) || (!format_settings.csv.try_infer_numbers_from_strings && isNumber(type))) + /// Return String type if one of the following conditions apply + /// - we couldn't infer any type + /// - it's a number and csv.try_infer_numbers_from_strings = 0 + /// - it's a tuple and try_infer_strings_from_quoted_tuples = 0 + /// - it's a Bool type (we don't allow reading bool values from strings) + if (!type || (format_settings.csv.try_infer_strings_from_quoted_tuples && isTuple(type)) || (!format_settings.csv.try_infer_numbers_from_strings && isNumber(type)) || isBool(type)) return std::make_shared(); return type; diff --git a/tests/queries/0_stateless/03231_csv_dont_infer_bool_from_string.reference b/tests/queries/0_stateless/03231_csv_dont_infer_bool_from_string.reference new file mode 100644 index 00000000000..d23e2d2cbf3 --- /dev/null +++ b/tests/queries/0_stateless/03231_csv_dont_infer_bool_from_string.reference @@ -0,0 +1,4 @@ +c1 Nullable(Int64) +c2 Nullable(Float64) +c3 Nullable(String) +42 42.42 True diff --git a/tests/queries/0_stateless/03231_csv_dont_infer_bool_from_string.sql b/tests/queries/0_stateless/03231_csv_dont_infer_bool_from_string.sql new file mode 100644 index 00000000000..e3cf77249eb --- /dev/null +++ b/tests/queries/0_stateless/03231_csv_dont_infer_bool_from_string.sql @@ -0,0 +1,4 @@ +set input_format_csv_try_infer_numbers_from_strings = 1; +desc format(CSV, '"42","42.42","True"'); +select * from format(CSV, '"42","42.42","True"'); + From 3c29f27dd2a1cc8d4649860a9b5021f8784e467b Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Fri, 30 Aug 2024 20:04:43 +0000 Subject: [PATCH 469/844] Fix 03228_pr_subquery_view_order_by --- .../QueryPlan/Optimizations/liftUpUnion.cpp | 1 + .../Optimizations/optimizeReadInOrder.cpp | 23 +++++++++++++++++++ .../03228_pr_subquery_view_order_by.sql | 2 +- 3 files changed, 25 insertions(+), 1 deletion(-) diff --git a/src/Processors/QueryPlan/Optimizations/liftUpUnion.cpp b/src/Processors/QueryPlan/Optimizations/liftUpUnion.cpp index c48551732c9..43cf166002e 100644 --- a/src/Processors/QueryPlan/Optimizations/liftUpUnion.cpp +++ b/src/Processors/QueryPlan/Optimizations/liftUpUnion.cpp @@ -50,6 +50,7 @@ size_t tryLiftUpUnion(QueryPlan::Node * parent_node, QueryPlan::Nodes & nodes) expr_node.step = std::make_unique( expr_node.children.front()->step->getOutputStream(), expression->getExpression().clone()); + expr_node.step->setStepDescription(expression->getStepDescription()); } /// - Expression - Something diff --git a/src/Processors/QueryPlan/Optimizations/optimizeReadInOrder.cpp b/src/Processors/QueryPlan/Optimizations/optimizeReadInOrder.cpp index 5df7d7b4e82..2eac1896066 100644 --- a/src/Processors/QueryPlan/Optimizations/optimizeReadInOrder.cpp +++ b/src/Processors/QueryPlan/Optimizations/optimizeReadInOrder.cpp @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -899,6 +900,18 @@ AggregationInputOrder buildInputOrderInfo(AggregatingStep & aggregating, QueryPl return {}; } +static bool readingFromParallelReplicas(const QueryPlan::Node * node) +{ + IQueryPlanStep * step = node->step.get(); + while (!node->children.empty()) + { + step = node->children.front()->step.get(); + node = node->children.front(); + } + + return typeid_cast(step); +} + void optimizeReadInOrder(QueryPlan::Node & node, QueryPlan::Nodes & nodes) { if (node.children.size() != 1) @@ -924,6 +937,16 @@ void optimizeReadInOrder(QueryPlan::Node & node, QueryPlan::Nodes & nodes) std::vector infos; infos.reserve(node.children.size()); + for (const auto * child : union_node->children) + { + /// in case of parallel replicas + /// avoid applying read-in-order optimization for local replica + /// since it will lead to different parallel replicas modes + /// between local and remote nodes + if (readingFromParallelReplicas(child)) + return; + } + for (auto * child : union_node->children) { infos.push_back(buildInputOrderInfo(*sorting, *child, steps_to_update)); diff --git a/tests/queries/0_stateless/03228_pr_subquery_view_order_by.sql b/tests/queries/0_stateless/03228_pr_subquery_view_order_by.sql index b85392e0521..804a97f737f 100644 --- a/tests/queries/0_stateless/03228_pr_subquery_view_order_by.sql +++ b/tests/queries/0_stateless/03228_pr_subquery_view_order_by.sql @@ -12,7 +12,7 @@ FROM ) ORDER BY number DESC LIMIT 20 -SETTINGS cluster_for_parallel_replicas = 'parallel_replicas', allow_experimental_parallel_reading_from_replicas = 1, max_parallel_replicas = 3, parallel_replicas_for_non_replicated_merge_tree = 1, parallel_replicas_local_plan = 1, query_plan_lift_up_union = 0; +SETTINGS cluster_for_parallel_replicas = 'parallel_replicas', allow_experimental_parallel_reading_from_replicas = 1, max_parallel_replicas = 3, parallel_replicas_for_non_replicated_merge_tree = 1, parallel_replicas_local_plan = 1; DROP TABLE view1; DROP TABLE table1; From 81b8f8594c044b1de6e1b18877e1d9b80401f617 Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Fri, 30 Aug 2024 21:23:07 +0000 Subject: [PATCH 470/844] Fix tests --- ...imize_distributed_group_by_sharding_key.reference | 4 ++-- .../02496_remove_redundant_sorting.reference | 12 ++++++------ ...02496_remove_redundant_sorting_analyzer.reference | 12 ++++++------ ...2500_remove_redundant_distinct_analyzer.reference | 4 ++-- 4 files changed, 16 insertions(+), 16 deletions(-) diff --git a/tests/queries/0_stateless/01952_optimize_distributed_group_by_sharding_key.reference b/tests/queries/0_stateless/01952_optimize_distributed_group_by_sharding_key.reference index e786532f25a..a807bf7096e 100644 --- a/tests/queries/0_stateless/01952_optimize_distributed_group_by_sharding_key.reference +++ b/tests/queries/0_stateless/01952_optimize_distributed_group_by_sharding_key.reference @@ -21,7 +21,7 @@ Expression (Projection) Union Expression ((Before LIMIT BY + (Before ORDER BY + (Convert VIEW subquery result to VIEW table structure + (Materialize constants after VIEW subquery + (Projection + Before ORDER BY)))))) ReadFromSystemNumbers - Expression + Expression (Before LIMIT BY) ReadFromRemote (Read from remote replica) explain select distinct on (k1, k2) v from remote('127.{1,2}', view(select 1 k1, 2 k2, 3 v from numbers(2)), cityHash64(k1, k2)); -- optimized Union @@ -96,7 +96,7 @@ Expression (Project names) LimitBy Expression ((Before LIMIT BY + (Projection + (Change column names to column identifiers + (Convert VIEW subquery result to VIEW table structure + (Materialize constants after VIEW subquery + (Project names + (Projection + (Change column names to column identifiers + (Project names + (Projection + Change column names to column identifiers))))))))))) ReadFromSystemNumbers - Expression + Expression (Before LIMIT BY) ReadFromRemote (Read from remote replica) explain select distinct on (k1, k2) v from remote('127.{1,2}', view(select 1 k1, 2 k2, 3 v from numbers(2)), cityHash64(k1, k2)); -- optimized Union diff --git a/tests/queries/0_stateless/02496_remove_redundant_sorting.reference b/tests/queries/0_stateless/02496_remove_redundant_sorting.reference index 4d004f2f78f..7824fd8cba9 100644 --- a/tests/queries/0_stateless/02496_remove_redundant_sorting.reference +++ b/tests/queries/0_stateless/02496_remove_redundant_sorting.reference @@ -395,9 +395,9 @@ Expression ((Projection + Before ORDER BY)) Union Expression ((Before ORDER BY + (Conversion before UNION + (Projection + Before ORDER BY)))) ReadFromStorage (SystemOne) - Expression (( + (Conversion before UNION + (Projection + Before ORDER BY)))) + Expression ((Before ORDER BY + (Conversion before UNION + (Projection + Before ORDER BY)))) ReadFromStorage (SystemOne) - Expression (( + (Conversion before UNION + (Projection + Before ORDER BY)))) + Expression ((Before ORDER BY + (Conversion before UNION + (Projection + Before ORDER BY)))) ReadFromStorage (SystemOne) -- execute Float64 9007199254740994 @@ -427,9 +427,9 @@ Expression ((Projection + Before ORDER BY)) Union Expression ((Before ORDER BY + (Conversion before UNION + (Projection + Before ORDER BY)))) ReadFromStorage (SystemOne) - Expression (( + (Conversion before UNION + (Projection + Before ORDER BY)))) + Expression ((Before ORDER BY + (Conversion before UNION + (Projection + Before ORDER BY)))) ReadFromStorage (SystemOne) - Expression (( + (Conversion before UNION + (Projection + Before ORDER BY)))) + Expression ((Before ORDER BY + (Conversion before UNION + (Projection + Before ORDER BY)))) ReadFromStorage (SystemOne) -- execute Nullable(Float64) 9007199254740994 @@ -459,9 +459,9 @@ Expression ((Projection + Before ORDER BY)) Union Expression ((Before ORDER BY + (Conversion before UNION + (Projection + Before ORDER BY)))) ReadFromStorage (SystemOne) - Expression (( + (Conversion before UNION + (Projection + Before ORDER BY)))) + Expression ((Before ORDER BY + (Conversion before UNION + (Projection + Before ORDER BY)))) ReadFromStorage (SystemOne) - Expression (( + (Conversion before UNION + (Projection + Before ORDER BY)))) + Expression ((Before ORDER BY + (Conversion before UNION + (Projection + Before ORDER BY)))) ReadFromStorage (SystemOne) -- execute Float64 9007199254740994 diff --git a/tests/queries/0_stateless/02496_remove_redundant_sorting_analyzer.reference b/tests/queries/0_stateless/02496_remove_redundant_sorting_analyzer.reference index dd5ac7bf706..3c68d14fdf2 100644 --- a/tests/queries/0_stateless/02496_remove_redundant_sorting_analyzer.reference +++ b/tests/queries/0_stateless/02496_remove_redundant_sorting_analyzer.reference @@ -394,9 +394,9 @@ Expression ((Project names + Projection)) Union Expression ((Before ORDER BY + (Projection + (Change column names to column identifiers + (Conversion before UNION + (Project names + (Projection + Change column names to column identifiers))))))) ReadFromStorage (SystemOne) - Expression (( + ( + ( + (Conversion before UNION + (Project names + (Projection + Change column names to column identifiers))))))) + Expression ((Before ORDER BY + (Projection + (Change column names to column identifiers + (Conversion before UNION + (Project names + (Projection + Change column names to column identifiers))))))) ReadFromStorage (SystemOne) - Expression (( + ( + ( + (Conversion before UNION + (Project names + (Projection + Change column names to column identifiers))))))) + Expression ((Before ORDER BY + (Projection + (Change column names to column identifiers + (Conversion before UNION + (Project names + (Projection + Change column names to column identifiers))))))) ReadFromStorage (SystemOne) -- execute Float64 9007199254740994 @@ -426,9 +426,9 @@ Expression ((Project names + Projection)) Union Expression ((Before ORDER BY + (Projection + (Change column names to column identifiers + (Conversion before UNION + (Project names + (Projection + Change column names to column identifiers))))))) ReadFromStorage (SystemOne) - Expression (( + ( + ( + (Conversion before UNION + (Project names + (Projection + Change column names to column identifiers))))))) + Expression ((Before ORDER BY + (Projection + (Change column names to column identifiers + (Conversion before UNION + (Project names + (Projection + Change column names to column identifiers))))))) ReadFromStorage (SystemOne) - Expression (( + ( + ( + (Conversion before UNION + (Project names + (Projection + Change column names to column identifiers))))))) + Expression ((Before ORDER BY + (Projection + (Change column names to column identifiers + (Conversion before UNION + (Project names + (Projection + Change column names to column identifiers))))))) ReadFromStorage (SystemOne) -- execute Nullable(Float64) 9007199254740994 @@ -458,9 +458,9 @@ Expression ((Project names + Projection)) Union Expression ((Before ORDER BY + (Projection + (Change column names to column identifiers + (Conversion before UNION + (Project names + (Projection + Change column names to column identifiers))))))) ReadFromStorage (SystemOne) - Expression (( + ( + ( + (Conversion before UNION + (Project names + (Projection + Change column names to column identifiers))))))) + Expression ((Before ORDER BY + (Projection + (Change column names to column identifiers + (Conversion before UNION + (Project names + (Projection + Change column names to column identifiers))))))) ReadFromStorage (SystemOne) - Expression (( + ( + ( + (Conversion before UNION + (Project names + (Projection + Change column names to column identifiers))))))) + Expression ((Before ORDER BY + (Projection + (Change column names to column identifiers + (Conversion before UNION + (Project names + (Projection + Change column names to column identifiers))))))) ReadFromStorage (SystemOne) -- execute Float64 9007199254740994 diff --git a/tests/queries/0_stateless/02500_remove_redundant_distinct_analyzer.reference b/tests/queries/0_stateless/02500_remove_redundant_distinct_analyzer.reference index b79f6310166..27b01cf1158 100644 --- a/tests/queries/0_stateless/02500_remove_redundant_distinct_analyzer.reference +++ b/tests/queries/0_stateless/02500_remove_redundant_distinct_analyzer.reference @@ -54,7 +54,7 @@ Expression (Project names) Distinct (Preliminary DISTINCT) Expression ((Projection + Change column names to column identifiers)) ReadFromSystemNumbers - Expression (( + ( + Project names))) + Expression ((Projection + (Change column names to column identifiers + Project names))) Distinct (DISTINCT) Distinct (Preliminary DISTINCT) Expression ((Projection + Change column names to column identifiers)) @@ -542,7 +542,7 @@ Expression (Project names) Distinct (Preliminary DISTINCT) Expression ((Projection + Change column names to column identifiers)) ReadFromSystemNumbers - Expression (( + ( + Project names))) + Expression ((Projection + (Change column names to column identifiers + Project names))) Distinct (DISTINCT) Distinct (Preliminary DISTINCT) Expression ((Projection + Change column names to column identifiers)) From bfa27d6650484063b0f8c9b340f5a85b3df2c27d Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Sat, 31 Aug 2024 06:11:50 +0000 Subject: [PATCH 471/844] Fix 02500_remove_redundant_distinct --- .../0_stateless/02500_remove_redundant_distinct.reference | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/02500_remove_redundant_distinct.reference b/tests/queries/0_stateless/02500_remove_redundant_distinct.reference index d7623cd5541..9bb8f4a4017 100644 --- a/tests/queries/0_stateless/02500_remove_redundant_distinct.reference +++ b/tests/queries/0_stateless/02500_remove_redundant_distinct.reference @@ -53,7 +53,7 @@ Expression (Projection) Distinct (Preliminary DISTINCT) Expression (Before ORDER BY) ReadFromSystemNumbers - Expression (( + Projection)) + Expression ((Before ORDER BY + Projection)) Distinct Distinct (Preliminary DISTINCT) Expression (Before ORDER BY) @@ -536,7 +536,7 @@ Expression (Projection) Distinct (Preliminary DISTINCT) Expression (Before ORDER BY) ReadFromSystemNumbers - Expression (( + Projection)) + Expression ((Before ORDER BY + Projection)) Distinct Distinct (Preliminary DISTINCT) Expression (Before ORDER BY) From 6fd7656aebd0d747ddd06a069139cb935031492e Mon Sep 17 00:00:00 2001 From: m4xxx1m Date: Sat, 31 Aug 2024 18:22:03 +0300 Subject: [PATCH 472/844] Added Poco::Util::AbstractConfiguration::getHost method --- .../include/Poco/Util/AbstractConfiguration.h | 40 +++++ base/poco/Util/src/AbstractConfiguration.cpp | 153 ++++++++++++++++++ 2 files changed, 193 insertions(+) diff --git a/base/poco/Util/include/Poco/Util/AbstractConfiguration.h b/base/poco/Util/include/Poco/Util/AbstractConfiguration.h index 926ac3ba8a9..7f58df905c0 100644 --- a/base/poco/Util/include/Poco/Util/AbstractConfiguration.h +++ b/base/poco/Util/include/Poco/Util/AbstractConfiguration.h @@ -241,6 +241,20 @@ namespace Util /// If the value contains references to other properties (${}), these /// are expanded. + std::string getHost(const std::string & key) const; + /// Returns the string value of the host property with the given name. + /// Throws a NotFoundException if the key does not exist. + /// Throws a SyntaxException if the property is not a valid host (IP address or domain). + /// If the value contains references to other properties (${}), these + /// are expanded. + + std::string getHost(const std::string & key, const std::string & defaultValue) const; + /// If a property with the given key exists, returns the host property's string value, + /// otherwise returns the given default value. + /// Throws a SyntaxException if the property is not a valid host (IP address or domain). + /// If the value contains references to other properties (${}), these + /// are expanded. + virtual void setString(const std::string & key, const std::string & value); /// Sets the property with the given key to the given value. /// An already existing value for the key is overwritten. @@ -339,12 +353,38 @@ namespace Util static bool parseBool(const std::string & value); void setRawWithEvent(const std::string & key, std::string value); + static void checkHostValidity(const std::string & value); + /// Throws a SyntaxException if the value is not a valid host (IP address or domain). + virtual ~AbstractConfiguration(); private: std::string internalExpand(const std::string & value) const; std::string uncheckedExpand(const std::string & value) const; + static bool isValidIPv4Address(const std::string & value); + /// A string value is considered to be a valid IPv4 address if it matches + /// "x1.x2.x3.x4", where xi - integer in range 0..255 and may have leading zeroes + + static bool isValidIPv6Address(const std::string & value); + /// A string value is considered to be a valid IPv6 address if it matches + /// "x1:x2:x3:x4:x5:x6:x7:x8", where xi is hexadecimal integer and consist of 4 + /// characters or less (but at least 1), xi may have leading zeroes. + /// Letters in hexadecimal representation can be in upper case or lower case. + /// One or more consecutive hextets of zeroes can be replaced with "::", but + /// "::" can appear only once in a valid IPv6 address. + + static bool isValidDomainName(const std::string & value); + /// ::= [ "." ] + /// ::=