From 0dbd569b50828003dc6a55af56717dab92054d8f Mon Sep 17 00:00:00 2001 From: Vasily Nemkov Date: Tue, 12 Mar 2024 20:00:04 +0100 Subject: [PATCH 001/182] Minor change --- src/Storages/MergeTree/MergeTreePartInfo.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/Storages/MergeTree/MergeTreePartInfo.h b/src/Storages/MergeTree/MergeTreePartInfo.h index 5fbb5d70bf3..918acc78e8f 100644 --- a/src/Storages/MergeTree/MergeTreePartInfo.h +++ b/src/Storages/MergeTree/MergeTreePartInfo.h @@ -101,9 +101,8 @@ struct MergeTreePartInfo bool isFakeDropRangePart() const { - /// Another max level was previously used for REPLACE/MOVE PARTITION - auto another_max_level = std::numeric_limits::max(); - return level == MergeTreePartInfo::MAX_LEVEL || level == another_max_level; + /// LEGACY_MAX_LEVEL was previously used for REPLACE/MOVE PARTITION + return level == MergeTreePartInfo::MAX_LEVEL || level == MergeTreePartInfo::LEGACY_MAX_LEVEL; } String getPartNameAndCheckFormat(MergeTreeDataFormatVersion format_version) const; From d84e272ed8687f8d13f0286ac37b93db3f874715 Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Tue, 30 Apr 2024 13:15:23 +0000 Subject: [PATCH 002/182] Add changeDate functions --- src/Functions/changeDate.cpp | 342 ++++++++++++++++++ .../0_stateless/02982_changeDate.reference | 36 ++ .../queries/0_stateless/02982_changeDate.sql | 42 +++ 3 files changed, 420 insertions(+) create mode 100644 src/Functions/changeDate.cpp create mode 100644 tests/queries/0_stateless/02982_changeDate.reference create mode 100644 tests/queries/0_stateless/02982_changeDate.sql diff --git a/src/Functions/changeDate.cpp b/src/Functions/changeDate.cpp new file mode 100644 index 00000000000..14f6887af47 --- /dev/null +++ b/src/Functions/changeDate.cpp @@ -0,0 +1,342 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "Common/DateLUTImpl.h" +#include +#include +#include "Columns/IColumn.h" +#include "DataTypes/IDataType.h" +#include "base/DayNum.h" + +#include +#include + +namespace DB +{ +namespace ErrorCodes +{ + extern const int ARGUMENT_OUT_OF_BOUND; +} + +namespace +{ + +enum ChangeDateFunctionsNames +{ + CHANGE_YEAR = 0, + CHANGE_MONTH = 1, + CHANGE_DAY = 2, + CHANGE_HOUR = 3, + CHANGE_MINUTE = 4, + CHANGE_SECOND = 5 +}; + +template +class FunctionChangeDate : public IFunction +{ +public: + static constexpr auto name = Traits::Name; + + static constexpr std::array mandatory_argument_names = {"date", "new_value"}; + + String getName() const override { return name; } + + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } + + size_t getNumberOfArguments() const override { return mandatory_argument_names.size(); } + + static FunctionPtr create(ContextPtr) { return std::make_shared(); } + + DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override + { + FunctionArgumentDescriptors args{ + {mandatory_argument_names[0], &isDateOrDate32OrDateTimeOrDateTime64, nullptr, "Date"}, + {mandatory_argument_names[1], &isNumber, nullptr, "Number"} + }; + validateFunctionArgumentTypes(*this, arguments, args); + + if (Traits::EnumName >= 3) + { + if (isDate(arguments[0].type)) + return std::make_shared(); + if (isDate32(arguments[0].type)) + return std::make_shared(DataTypeDateTime64::default_scale); + } + + return arguments[0].type; + } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override + { + const auto & input_type = arguments[0].type; + if (isDate(input_type)) + { + if (Traits::EnumName >= 3) + return execute(arguments, input_type, result_type, input_rows_count); + return execute(arguments, input_type, result_type, input_rows_count); + } + if (isDate32(input_type)) + { + if (Traits::EnumName >= 3) + return executeDate32ToDateTime64(arguments, result_type, input_rows_count); + return execute(arguments, input_type, result_type, input_rows_count); + } + if (isDateTime(input_type)) + return execute(arguments, input_type, result_type, input_rows_count); + return executeDateTime64(arguments, result_type, input_rows_count); + } + + + template + ColumnPtr execute(const ColumnsWithTypeAndName & arguments, const DataTypePtr & input_type, const DataTypePtr & result_type, size_t input_rows_count) const + { + const auto & date_lut = DateLUT::instance(); + + auto result_column = ResultDataType::ColumnType::create(input_rows_count); + auto & result_data = result_column->getData(); + + auto input_column = castColumn(arguments[0], std::make_shared()); + input_column = input_column->convertToFullColumnIfConst(); + const auto & input_column_data = typeid_cast(*input_column).getData(); + + auto new_value_column = castColumn(arguments[1], std::make_shared()); + new_value_column = new_value_column->convertToFullColumnIfConst(); + const auto & new_value_column_data = typeid_cast(*new_value_column).getData(); + + for (size_t i = 0; i < input_rows_count; ++i) + { + Int64 time; + if (isDateOrDate32(input_type)) + time = static_cast(date_lut.toNumYYYYMMDD(DayNum(input_column_data[i]))) * 1000'000; + else + time = date_lut.toNumYYYYMMDDhhmmss(input_column_data[i]); + + if (isDateOrDate32(result_type)) + result_data[i] = static_cast(getChangedDate(time, new_value_column_data[i], result_type, date_lut)); + else + result_data[i] = static_cast(getChangedDate(time, new_value_column_data[i], result_type, date_lut)); + } + + return result_column; + } + + ColumnPtr executeDate32ToDateTime64(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const + { + const auto & date_lut = DateLUT::instance(); + + auto result_column = ColumnDateTime64::create(input_rows_count, DataTypeDateTime64::default_scale); + auto & result_data = result_column->getData(); + + auto input_column = arguments[0].column->convertToFullColumnIfConst(); + const auto & input_column_data = typeid_cast(*input_column).getData(); + + auto new_value_column = castColumn(arguments[1], std::make_shared()); + new_value_column = new_value_column->convertToFullColumnIfConst(); + const auto & new_value_column_data = typeid_cast(*new_value_column).getData(); + + for (size_t i = 0; i < input_rows_count; ++i) + { + Int64 time = static_cast(date_lut.toNumYYYYMMDD(DayNum(input_column_data[i]))) * 1000'000; + + result_data[i] = getChangedDate(time, new_value_column_data[i], result_type, date_lut, 1000, 0); + } + + return result_column; + } + + ColumnPtr executeDateTime64(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const + { + auto result_column = ColumnDateTime64::create(input_rows_count, DataTypeDateTime64::default_scale); + auto & result_data = result_column->getData(); + + auto input_column = arguments[0].column->convertToFullColumnIfConst(); + const auto & input_column_data = typeid_cast(*input_column).getData(); + + auto new_value_column = castColumn(arguments[1], std::make_shared()); + new_value_column = new_value_column->convertToFullColumnIfConst(); + const auto & new_value_column_data = typeid_cast(*new_value_column).getData(); + + const auto scale = typeid_cast(*result_type).getScale(); + const auto & date_lut = typeid_cast(*result_type).getTimeZone(); + Int64 deg = 1; + for (size_t i = 0; i < scale; ++i) { + deg *= 10; + } + + for (size_t i = 0; i < input_rows_count; ++i) + { + Int64 time = date_lut.toNumYYYYMMDDhhmmss(input_column_data[i] / deg); + Int64 fraction = input_column_data[i] % deg; + + result_data[i] = getChangedDate(time, new_value_column_data[i], result_type, date_lut, deg, fraction); + } + + return result_column; + } + + Int64 getChangedDate(Int64 time, Float32 new_value, const DataTypePtr & result_type, const DateLUTImpl & date_lut, Int64 deg = 0, Int64 fraction = 0) const + { + auto year = time / 10'000'000'000; + auto month = (time % 10'000'000'000) / 100'000'000; + auto day = (time % 100'000'000) / 1000'000; + auto hours = (time % 1000'000) / 10'000; + auto minutes = (time % 10'000) / 100; + auto seconds = time % 100; + + Int64 min_date, max_date; + Int16 min_year, max_year; + if (isDate(result_type)) + { + min_date = date_lut.makeDayNum(1970, 1, 1); + max_date = date_lut.makeDayNum(2149, 6, 6); + min_year = 1970; + max_year = 2149; + } + else if (isDate32(result_type)) + { + min_date = date_lut.makeDayNum(1900, 1, 1); + max_date = date_lut.makeDayNum(2299, 12, 31); + min_year = 1900; + max_year = 2299; + } + else if (isDateTime(result_type)) + { + min_date = 0; + max_date = 0x0ffffffffll; + min_year = 1970; + max_year = 2106; + } + else + { + min_date = date_lut.makeDateTime(1900, 1, 1, 0,0 , 0) * deg; + max_date = date_lut.makeDateTime(2299, 12, 31, 23, 59, 59) * deg + (deg - 1); + min_year = 1900; + max_year = 2299; + } + + Int8 fl = 0; + + switch (Traits::EnumName) + { + case CHANGE_YEAR: + if (new_value < min_year) + fl = 1; + else if (new_value > max_year) + fl = 2; + year = static_cast(new_value); + break; + case CHANGE_MONTH: + if (new_value < 1 || new_value > 12) + fl = 1; + month = static_cast(new_value); + break; + case CHANGE_DAY: + if (new_value < 1 || new_value > 31) + fl = 1; + day = static_cast(new_value); + break; + case CHANGE_HOUR: + if (new_value < 0 || new_value > 23) + fl = 1; + hours = static_cast(new_value); + break; + case CHANGE_MINUTE: + if (new_value < 0 || new_value > 59) + fl = 1; + minutes = static_cast(new_value); + break; + case CHANGE_SECOND: + if (new_value < 0 || new_value > 59) + fl = 1; + seconds = static_cast(new_value); + break; + } + + if (fl == 1) + return min_date; + + if (fl == 2) + return max_date; + + Int64 result; + if (isDateOrDate32(result_type)) + result = date_lut.makeDayNum(year, month, day); + else + { + if (isDateTime(result_type)) + result = date_lut.makeDateTime(year, month, day, hours, minutes, seconds); + else + result = date_lut.makeDateTime(year, month, day, hours, minutes, seconds) * deg + fraction; + } + + + if (result > max_date) + return max_date; + + return result; + } +}; + + +struct ChangeYearTraits +{ + static constexpr auto Name = "changeYear"; + static constexpr auto EnumName = CHANGE_YEAR; +}; + +struct ChangeMonthTraits +{ + static constexpr auto Name = "changeMonth"; + static constexpr auto EnumName = CHANGE_MONTH; +}; + +struct ChangeDayTraits +{ + static constexpr auto Name = "changeDay"; + static constexpr auto EnumName = CHANGE_DAY; +}; + +struct ChangeHourTraits +{ + static constexpr auto Name = "changeHour"; + static constexpr auto EnumName = CHANGE_HOUR; +}; + +struct ChangeMinuteTraits +{ + static constexpr auto Name = "changeMinute"; + static constexpr auto EnumName = CHANGE_MINUTE; +}; + +struct ChangeSecondTraits +{ + static constexpr auto Name = "changeSecond"; + static constexpr auto EnumName = CHANGE_SECOND; +}; + + +} + +REGISTER_FUNCTION(ChangeDate) +{ + factory.registerFunction>(); + factory.registerFunction>(); + factory.registerFunction>(); + factory.registerFunction>(); + factory.registerFunction>(); + factory.registerFunction>(); +} + +} diff --git a/tests/queries/0_stateless/02982_changeDate.reference b/tests/queries/0_stateless/02982_changeDate.reference new file mode 100644 index 00000000000..d7d4edf4b43 --- /dev/null +++ b/tests/queries/0_stateless/02982_changeDate.reference @@ -0,0 +1,36 @@ +2000-01-01 +2001-01-01 +2002-01-01 11:22:33 +2003-01-01 11:22:33.4444 +1970-02-01 +1970-03-01 +1970-04-01 11:22:33 +1970-05-01 11:22:33.4444 +1970-01-02 +1970-01-03 +1970-01-04 11:22:33 +1970-01-05 11:22:33.4444 +1970-01-01 12:00:00 +1970-01-01 13:00:00.000 +1970-01-01 14:22:33 +1970-01-01 15:22:33.4444 +1970-01-01 00:23:00 +1970-01-01 00:24:00.000 +1970-01-01 11:25:33 +1970-01-01 11:26:33.4444 +1970-01-01 00:00:34 +1970-01-01 00:00:35.000 +1970-01-01 11:22:36 +1970-01-01 11:22:37.4444 +1970-01-01 +2149-06-06 +2149-06-06 +1970-01-01 +1970-01-01 +1970-01-01 +1970-01-01 00:00:00 +1970-01-01 00:00:00 +1970-01-01 00:00:00 +1970-01-01 00:00:00 +1970-01-01 00:00:00 +1970-01-01 00:00:00 diff --git a/tests/queries/0_stateless/02982_changeDate.sql b/tests/queries/0_stateless/02982_changeDate.sql new file mode 100644 index 00000000000..fbfe3771b33 --- /dev/null +++ b/tests/queries/0_stateless/02982_changeDate.sql @@ -0,0 +1,42 @@ +SELECT changeYear(makeDate(1970, 01, 01), 2000); +SELECT changeYear(makeDate32(1970, 01, 01), 2001); +SELECT changeYear(makeDateTime(1970, 01, 01, 11, 22, 33), 2002); +SELECT changeYear(makeDateTime64(1970, 01, 01, 11, 22, 33, 4444, 4), 2003); + +SELECT changeMonth(makeDate(1970, 01, 01), 02); +SELECT changeMonth(makeDate32(1970, 01, 01), 03); +SELECT changeMonth(makeDateTime(1970, 01, 01, 11, 22, 33), 04); +SELECT changeMonth(makeDateTime64(1970, 01, 01, 11, 22, 33, 4444, 4), 05); + +SELECT changeDay(makeDate(1970, 01, 01), 02); +SELECT changeDay(makeDate32(1970, 01, 01), 03); +SELECT changeDay(makeDateTime(1970, 01, 01, 11, 22, 33), 04); +SELECT changeDay(makeDateTime64(1970, 01, 01, 11, 22, 33, 4444, 4), 05); + +SELECT changeHour(makeDate(1970, 01, 01), 12); +SELECT changeHour(makeDate32(1970, 01, 01), 13); +SELECT changeHour(makeDateTime(1970, 01, 01, 11, 22, 33), 14); +SELECT changeHour(makeDateTime64(1970, 01, 01, 11, 22, 33, 4444, 4), 15); + +SELECT changeMinute(makeDate(1970, 01, 01), 23); +SELECT changeMinute(makeDate32(1970, 01, 01), 24); +SELECT changeMinute(makeDateTime(1970, 01, 01, 11, 22, 33), 25); +SELECT changeMinute(makeDateTime64(1970, 01, 01, 11, 22, 33, 4444, 4), 26); + +SELECT changeSecond(makeDate(1970, 01, 01), 34); +SELECT changeSecond(makeDate32(1970, 01, 01), 35); +SELECT changeSecond(makeDateTime(1970, 01, 01, 11, 22, 33), 36); +SELECT changeSecond(makeDateTime64(1970, 01, 01, 11, 22, 33, 4444, 4), 37); + +SELECT changeYear(makeDate(2000, 01, 01), 1969.0); +SELECT changeYear(makeDate(2000, 06, 07), 2149.0); +SELECT changeMonth(makeDate(2149, 01, 01), 07); +SELECT changeMonth(makeDate(2000, 06, 07), 13); +SELECT changeDay(makeDate(2000, 01, 01), 0); +SELECT changeDay(makeDate(2000, 06, 07), 32); +SELECT changeHour(makeDate(2000, 01, 01), -1); +SELECT changeHour(makeDate(2000, 06, 07), 24); +SELECT changeMinute(makeDate(2000, 01, 01), -1); +SELECT changeMinute(makeDate(2000, 06, 07), 60); +SELECT changeSecond(makeDate(2000, 01, 01), -1); +SELECT changeSecond(makeDate(2000, 06, 07), 60); \ No newline at end of file From 8f11262666132a9a940c2a10e67c5be8f0151c3a Mon Sep 17 00:00:00 2001 From: Maksim Galkin Date: Tue, 30 Apr 2024 13:36:36 +0000 Subject: [PATCH 003/182] ome style changes --- src/Functions/changeDate.cpp | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/src/Functions/changeDate.cpp b/src/Functions/changeDate.cpp index 14f6887af47..4edc31384c3 100644 --- a/src/Functions/changeDate.cpp +++ b/src/Functions/changeDate.cpp @@ -25,10 +25,6 @@ namespace DB { -namespace ErrorCodes -{ - extern const int ARGUMENT_OUT_OF_BOUND; -} namespace { @@ -171,7 +167,8 @@ public: const auto scale = typeid_cast(*result_type).getScale(); const auto & date_lut = typeid_cast(*result_type).getTimeZone(); Int64 deg = 1; - for (size_t i = 0; i < scale; ++i) { + for (size_t i = 0; i < scale; ++i) + { deg *= 10; } @@ -266,7 +263,7 @@ public: if (fl == 1) return min_date; - + if (fl == 2) return max_date; @@ -280,7 +277,7 @@ public: else result = date_lut.makeDateTime(year, month, day, hours, minutes, seconds) * deg + fraction; } - + if (result > max_date) return max_date; From 32f267999da8e9edd68c843431d7a1ae26c7250e Mon Sep 17 00:00:00 2001 From: Maksim Galkin Date: Sat, 4 May 2024 21:28:32 +0000 Subject: [PATCH 004/182] changeDate implementation fixes --- src/DataTypes/DataTypeDate.h | 1 + src/DataTypes/DataTypeDate32.h | 1 + src/DataTypes/DataTypeDateTime.h | 1 + src/Functions/changeDate.cpp | 296 +++++++++++++++---------------- 4 files changed, 143 insertions(+), 156 deletions(-) diff --git a/src/DataTypes/DataTypeDate.h b/src/DataTypes/DataTypeDate.h index 0e08b9ba2ca..72b7ef2509f 100644 --- a/src/DataTypes/DataTypeDate.h +++ b/src/DataTypes/DataTypeDate.h @@ -10,6 +10,7 @@ class DataTypeDate final : public DataTypeNumberBase { public: static constexpr auto family_name = "Date"; + static constexpr auto type_id = TypeIndex::Date; TypeIndex getTypeId() const override { return TypeIndex::Date; } TypeIndex getColumnType() const override { return TypeIndex::UInt16; } diff --git a/src/DataTypes/DataTypeDate32.h b/src/DataTypes/DataTypeDate32.h index 02e818f10df..052bb39fc31 100644 --- a/src/DataTypes/DataTypeDate32.h +++ b/src/DataTypes/DataTypeDate32.h @@ -10,6 +10,7 @@ class DataTypeDate32 final : public DataTypeNumberBase { public: static constexpr auto family_name = "Date32"; + static constexpr auto type_id = TypeIndex::Date32; TypeIndex getTypeId() const override { return TypeIndex::Date32; } TypeIndex getColumnType() const override { return TypeIndex::Int32; } diff --git a/src/DataTypes/DataTypeDateTime.h b/src/DataTypes/DataTypeDateTime.h index 5519240dee1..3b1212d910d 100644 --- a/src/DataTypes/DataTypeDateTime.h +++ b/src/DataTypes/DataTypeDateTime.h @@ -36,6 +36,7 @@ public: explicit DataTypeDateTime(const TimezoneMixin & time_zone); static constexpr auto family_name = "DateTime"; + static constexpr auto type_id = TypeIndex::DateTime; const char * getFamilyName() const override { return family_name; } String doGetName() const override; diff --git a/src/Functions/changeDate.cpp b/src/Functions/changeDate.cpp index 4edc31384c3..8a5d0a87ca2 100644 --- a/src/Functions/changeDate.cpp +++ b/src/Functions/changeDate.cpp @@ -10,26 +10,23 @@ #include #include #include -#include #include - #include "Common/DateLUTImpl.h" #include #include #include "Columns/IColumn.h" #include "DataTypes/IDataType.h" -#include "base/DayNum.h" - + #include #include - + namespace DB { - + namespace { - -enum ChangeDateFunctionsNames + +enum class ChangeDateFunctionsNames { CHANGE_YEAR = 0, CHANGE_MONTH = 1, @@ -38,160 +35,161 @@ enum ChangeDateFunctionsNames CHANGE_MINUTE = 4, CHANGE_SECOND = 5 }; - + +constexpr bool isTimeChange(const ChangeDateFunctionsNames & type) +{ + return type == ChangeDateFunctionsNames::CHANGE_HOUR || + type == ChangeDateFunctionsNames::CHANGE_MINUTE || + type == ChangeDateFunctionsNames::CHANGE_SECOND; +} + +template +constexpr bool isDate32() +{ + return DataType::type_id == TypeIndex::Date32; +} + +template +constexpr bool isDateTime64() +{ + return DataType::type_id == TypeIndex::DateTime64; +} + template class FunctionChangeDate : public IFunction { public: static constexpr auto name = Traits::Name; - + static constexpr std::array mandatory_argument_names = {"date", "new_value"}; - + String getName() const override { return name; } - + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } - + size_t getNumberOfArguments() const override { return mandatory_argument_names.size(); } - + static FunctionPtr create(ContextPtr) { return std::make_shared(); } - + DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { FunctionArgumentDescriptors args{ - {mandatory_argument_names[0], &isDateOrDate32OrDateTimeOrDateTime64, nullptr, "Date"}, + {mandatory_argument_names[0], &isDateOrDate32OrDateTimeOrDateTime64, nullptr, "Date(32) or DateTime(64)"}, {mandatory_argument_names[1], &isNumber, nullptr, "Number"} }; validateFunctionArgumentTypes(*this, arguments, args); - - if (Traits::EnumName >= 3) + + if (isTimeChange(Traits::EnumName)) { if (isDate(arguments[0].type)) return std::make_shared(); if (isDate32(arguments[0].type)) return std::make_shared(DataTypeDateTime64::default_scale); } - + return arguments[0].type; } - + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override { const auto & input_type = arguments[0].type; if (isDate(input_type)) { - if (Traits::EnumName >= 3) + if (isTimeChange(Traits::EnumName)) return execute(arguments, input_type, result_type, input_rows_count); return execute(arguments, input_type, result_type, input_rows_count); } if (isDate32(input_type)) { - if (Traits::EnumName >= 3) - return executeDate32ToDateTime64(arguments, result_type, input_rows_count); + if (isTimeChange(Traits::EnumName)) + return execute(arguments, input_type, result_type, input_rows_count); return execute(arguments, input_type, result_type, input_rows_count); } if (isDateTime(input_type)) return execute(arguments, input_type, result_type, input_rows_count); - return executeDateTime64(arguments, result_type, input_rows_count); + return execute(arguments, input_type, result_type, input_rows_count); } - - + + template ColumnPtr execute(const ColumnsWithTypeAndName & arguments, const DataTypePtr & input_type, const DataTypePtr & result_type, size_t input_rows_count) const { - const auto & date_lut = DateLUT::instance(); + bool is_const = (isColumnConst(*arguments[0].column) && isColumnConst(*arguments[1].column)); + size_t result_rows_count = (is_const ? 1 : input_rows_count); - auto result_column = ResultDataType::ColumnType::create(input_rows_count); + typename ResultDataType::ColumnType::MutablePtr result_column; + if constexpr (isDateTime64()) + { + auto scale = DataTypeDateTime64::default_scale; + if constexpr (isDateTime64()) + scale = typeid_cast(*result_type).getScale(); + result_column = ResultDataType::ColumnType::create(result_rows_count, scale); + } + else + result_column = ResultDataType::ColumnType::create(result_rows_count); + auto & result_data = result_column->getData(); - - auto input_column = castColumn(arguments[0], std::make_shared()); - input_column = input_column->convertToFullColumnIfConst(); + + auto input_column = arguments[0].column->convertToFullIfNeeded(); const auto & input_column_data = typeid_cast(*input_column).getData(); - - auto new_value_column = castColumn(arguments[1], std::make_shared()); - new_value_column = new_value_column->convertToFullColumnIfConst(); - const auto & new_value_column_data = typeid_cast(*new_value_column).getData(); - - for (size_t i = 0; i < input_rows_count; ++i) + + auto new_value_column = castColumn(arguments[1], std::make_shared()); + new_value_column = new_value_column->convertToFullIfNeeded(); + const auto & new_value_column_data = typeid_cast(*new_value_column).getData(); + + for (size_t i = 0; i < result_rows_count; ++i) { - Int64 time; - if (isDateOrDate32(input_type)) - time = static_cast(date_lut.toNumYYYYMMDD(DayNum(input_column_data[i]))) * 1000'000; + if constexpr (isDateTime64()) + { + const auto scale = typeid_cast(*result_type).getScale(); + const auto & date_lut = typeid_cast(*result_type).getTimeZone(); + Int64 deg = 1; + for (size_t j = 0; j < scale; ++j) + deg *= 10; + + Int64 time = date_lut.toNumYYYYMMDDhhmmss(input_column_data[i] / deg); + Int64 fraction = input_column_data[i] % deg; + + result_data[i] = getChangedDate(time, new_value_column_data[i], result_type, date_lut, deg, fraction); + } + else if constexpr (isDate32() && isDateTime64()) + { + const auto & date_lut = DateLUT::instance(); + Int64 time = static_cast(date_lut.toNumYYYYMMDD(DayNum(input_column_data[i]))) * 1'000'000; + + result_data[i] = getChangedDate(time, new_value_column_data[i], result_type, date_lut, 1'000, 0); + } else - time = date_lut.toNumYYYYMMDDhhmmss(input_column_data[i]); - - if (isDateOrDate32(result_type)) - result_data[i] = static_cast(getChangedDate(time, new_value_column_data[i], result_type, date_lut)); - else - result_data[i] = static_cast(getChangedDate(time, new_value_column_data[i], result_type, date_lut)); + { + const auto & date_lut = DateLUT::instance(); + Int64 time; + if (isDateOrDate32(input_type)) + time = static_cast(date_lut.toNumYYYYMMDD(DayNum(input_column_data[i]))) * 1'000'000; + else + time = date_lut.toNumYYYYMMDDhhmmss(input_column_data[i]); + + if (isDateOrDate32(result_type)) + result_data[i] = static_cast(getChangedDate(time, new_value_column_data[i], result_type, date_lut)); + else + result_data[i] = static_cast(getChangedDate(time, new_value_column_data[i], result_type, date_lut, 1, 0)); + } } + if (is_const) + return ColumnConst::create(std::move(result_column), input_rows_count); + return result_column; } - - ColumnPtr executeDate32ToDateTime64(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const - { - const auto & date_lut = DateLUT::instance(); - - auto result_column = ColumnDateTime64::create(input_rows_count, DataTypeDateTime64::default_scale); - auto & result_data = result_column->getData(); - - auto input_column = arguments[0].column->convertToFullColumnIfConst(); - const auto & input_column_data = typeid_cast(*input_column).getData(); - - auto new_value_column = castColumn(arguments[1], std::make_shared()); - new_value_column = new_value_column->convertToFullColumnIfConst(); - const auto & new_value_column_data = typeid_cast(*new_value_column).getData(); - - for (size_t i = 0; i < input_rows_count; ++i) - { - Int64 time = static_cast(date_lut.toNumYYYYMMDD(DayNum(input_column_data[i]))) * 1000'000; - - result_data[i] = getChangedDate(time, new_value_column_data[i], result_type, date_lut, 1000, 0); - } - - return result_column; - } - - ColumnPtr executeDateTime64(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const - { - auto result_column = ColumnDateTime64::create(input_rows_count, DataTypeDateTime64::default_scale); - auto & result_data = result_column->getData(); - - auto input_column = arguments[0].column->convertToFullColumnIfConst(); - const auto & input_column_data = typeid_cast(*input_column).getData(); - - auto new_value_column = castColumn(arguments[1], std::make_shared()); - new_value_column = new_value_column->convertToFullColumnIfConst(); - const auto & new_value_column_data = typeid_cast(*new_value_column).getData(); - - const auto scale = typeid_cast(*result_type).getScale(); - const auto & date_lut = typeid_cast(*result_type).getTimeZone(); - Int64 deg = 1; - for (size_t i = 0; i < scale; ++i) - { - deg *= 10; - } - - for (size_t i = 0; i < input_rows_count; ++i) - { - Int64 time = date_lut.toNumYYYYMMDDhhmmss(input_column_data[i] / deg); - Int64 fraction = input_column_data[i] % deg; - - result_data[i] = getChangedDate(time, new_value_column_data[i], result_type, date_lut, deg, fraction); - } - - return result_column; - } - - Int64 getChangedDate(Int64 time, Float32 new_value, const DataTypePtr & result_type, const DateLUTImpl & date_lut, Int64 deg = 0, Int64 fraction = 0) const + + Int64 getChangedDate(Int64 time, Float64 new_value, const DataTypePtr & result_type, const DateLUTImpl & date_lut, Int64 deg = 0, Int64 fraction = 0) const { auto year = time / 10'000'000'000; auto month = (time % 10'000'000'000) / 100'000'000; - auto day = (time % 100'000'000) / 1000'000; - auto hours = (time % 1000'000) / 10'000; + auto day = (time % 100'000'000) / 1'000'000; + auto hours = (time % 1'000'000) / 10'000; auto minutes = (time % 10'000) / 100; auto seconds = time % 100; - + Int64 min_date, max_date; Int16 min_year, max_year; if (isDate(result_type)) @@ -217,115 +215,101 @@ public: } else { - min_date = date_lut.makeDateTime(1900, 1, 1, 0,0 , 0) * deg; + min_date = date_lut.makeDateTime(1900, 1, 1, 0, 0, 0) * deg; max_date = date_lut.makeDateTime(2299, 12, 31, 23, 59, 59) * deg + (deg - 1); min_year = 1900; max_year = 2299; } - - Int8 fl = 0; - + switch (Traits::EnumName) { - case CHANGE_YEAR: + case ChangeDateFunctionsNames::CHANGE_YEAR: if (new_value < min_year) - fl = 1; + return min_date; else if (new_value > max_year) - fl = 2; + return max_date; year = static_cast(new_value); break; - case CHANGE_MONTH: + case ChangeDateFunctionsNames::CHANGE_MONTH: if (new_value < 1 || new_value > 12) - fl = 1; + return min_date; month = static_cast(new_value); break; - case CHANGE_DAY: + case ChangeDateFunctionsNames::CHANGE_DAY: if (new_value < 1 || new_value > 31) - fl = 1; + return min_date; day = static_cast(new_value); break; - case CHANGE_HOUR: + case ChangeDateFunctionsNames::CHANGE_HOUR: if (new_value < 0 || new_value > 23) - fl = 1; + return min_date; hours = static_cast(new_value); break; - case CHANGE_MINUTE: + case ChangeDateFunctionsNames::CHANGE_MINUTE: if (new_value < 0 || new_value > 59) - fl = 1; + return min_date; minutes = static_cast(new_value); break; - case CHANGE_SECOND: + case ChangeDateFunctionsNames::CHANGE_SECOND: if (new_value < 0 || new_value > 59) - fl = 1; + return min_date; seconds = static_cast(new_value); break; } - - if (fl == 1) - return min_date; - - if (fl == 2) - return max_date; - + Int64 result; if (isDateOrDate32(result_type)) result = date_lut.makeDayNum(year, month, day); else - { - if (isDateTime(result_type)) - result = date_lut.makeDateTime(year, month, day, hours, minutes, seconds); - else - result = date_lut.makeDateTime(year, month, day, hours, minutes, seconds) * deg + fraction; - } - - + result = date_lut.makeDateTime(year, month, day, hours, minutes, seconds) * deg + fraction; + if (result > max_date) return max_date; - + return result; } }; - - + + struct ChangeYearTraits { static constexpr auto Name = "changeYear"; - static constexpr auto EnumName = CHANGE_YEAR; + static constexpr auto EnumName = ChangeDateFunctionsNames::CHANGE_YEAR; }; - + struct ChangeMonthTraits { static constexpr auto Name = "changeMonth"; - static constexpr auto EnumName = CHANGE_MONTH; + static constexpr auto EnumName = ChangeDateFunctionsNames::CHANGE_MONTH; }; - + struct ChangeDayTraits { static constexpr auto Name = "changeDay"; - static constexpr auto EnumName = CHANGE_DAY; + static constexpr auto EnumName = ChangeDateFunctionsNames::CHANGE_DAY; }; - + struct ChangeHourTraits { static constexpr auto Name = "changeHour"; - static constexpr auto EnumName = CHANGE_HOUR; + static constexpr auto EnumName = ChangeDateFunctionsNames::CHANGE_HOUR; }; - + struct ChangeMinuteTraits { static constexpr auto Name = "changeMinute"; - static constexpr auto EnumName = CHANGE_MINUTE; + static constexpr auto EnumName = ChangeDateFunctionsNames::CHANGE_MINUTE; }; - + struct ChangeSecondTraits { static constexpr auto Name = "changeSecond"; - static constexpr auto EnumName = CHANGE_SECOND; + static constexpr auto EnumName = ChangeDateFunctionsNames::CHANGE_SECOND; }; - - + + } - + REGISTER_FUNCTION(ChangeDate) { factory.registerFunction>(); @@ -335,5 +319,5 @@ REGISTER_FUNCTION(ChangeDate) factory.registerFunction>(); factory.registerFunction>(); } - + } From fb6c931262c106bf3e076949fc47a28cf06181ba Mon Sep 17 00:00:00 2001 From: Maksim Galkin Date: Sat, 4 May 2024 22:49:10 +0000 Subject: [PATCH 005/182] Style fixes --- src/Functions/changeDate.cpp | 108 ++++++++++++++++++----------------- 1 file changed, 57 insertions(+), 51 deletions(-) diff --git a/src/Functions/changeDate.cpp b/src/Functions/changeDate.cpp index 8a5d0a87ca2..ed6b3255cfc 100644 --- a/src/Functions/changeDate.cpp +++ b/src/Functions/changeDate.cpp @@ -12,20 +12,27 @@ #include #include #include "Common/DateLUTImpl.h" +#include "Common/Exception.h" #include #include #include "Columns/IColumn.h" #include "DataTypes/IDataType.h" - + #include #include - + namespace DB { - + +namespace ErrorCodes +{ + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; + extern const int ILLEGAL_TYPE_OF_ARGUMENT; +} + namespace { - + enum class ChangeDateFunctionsNames { CHANGE_YEAR = 0, @@ -35,50 +42,50 @@ enum class ChangeDateFunctionsNames CHANGE_MINUTE = 4, CHANGE_SECOND = 5 }; - + constexpr bool isTimeChange(const ChangeDateFunctionsNames & type) { return type == ChangeDateFunctionsNames::CHANGE_HOUR || type == ChangeDateFunctionsNames::CHANGE_MINUTE || type == ChangeDateFunctionsNames::CHANGE_SECOND; } - + template constexpr bool isDate32() { return DataType::type_id == TypeIndex::Date32; } - + template constexpr bool isDateTime64() { return DataType::type_id == TypeIndex::DateTime64; } - + template class FunctionChangeDate : public IFunction { public: static constexpr auto name = Traits::Name; - + static constexpr std::array mandatory_argument_names = {"date", "new_value"}; - + String getName() const override { return name; } - + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } - + size_t getNumberOfArguments() const override { return mandatory_argument_names.size(); } - + static FunctionPtr create(ContextPtr) { return std::make_shared(); } - + DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { - FunctionArgumentDescriptors args{ - {mandatory_argument_names[0], &isDateOrDate32OrDateTimeOrDateTime64, nullptr, "Date(32) or DateTime(64)"}, - {mandatory_argument_names[1], &isNumber, nullptr, "Number"} - }; - validateFunctionArgumentTypes(*this, arguments, args); - + if (arguments.size() != 2) + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Function {} requires 2 parameters: date, new_value. Passed {}.", getName(), arguments.size()); + + if (!isDateOrDate32OrDateTimeOrDateTime64(*arguments[0].type) || !isNumber(*arguments[1].type)) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "First argument for function {} must be Date(32) or DateTime(64), second - numeric", getName()); + if (isTimeChange(Traits::EnumName)) { if (isDate(arguments[0].type)) @@ -86,10 +93,10 @@ public: if (isDate32(arguments[0].type)) return std::make_shared(DataTypeDateTime64::default_scale); } - + return arguments[0].type; } - + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override { const auto & input_type = arguments[0].type; @@ -109,8 +116,7 @@ public: return execute(arguments, input_type, result_type, input_rows_count); return execute(arguments, input_type, result_type, input_rows_count); } - - + template ColumnPtr execute(const ColumnsWithTypeAndName & arguments, const DataTypePtr & input_type, const DataTypePtr & result_type, size_t input_rows_count) const { @@ -127,16 +133,16 @@ public: } else result_column = ResultDataType::ColumnType::create(result_rows_count); - + auto & result_data = result_column->getData(); - + auto input_column = arguments[0].column->convertToFullIfNeeded(); const auto & input_column_data = typeid_cast(*input_column).getData(); - + auto new_value_column = castColumn(arguments[1], std::make_shared()); new_value_column = new_value_column->convertToFullIfNeeded(); - const auto & new_value_column_data = typeid_cast(*new_value_column).getData(); - + const auto & new_value_column_data = typeid_cast(*new_value_column).getData(); + for (size_t i = 0; i < result_rows_count; ++i) { if constexpr (isDateTime64()) @@ -146,17 +152,17 @@ public: Int64 deg = 1; for (size_t j = 0; j < scale; ++j) deg *= 10; - + Int64 time = date_lut.toNumYYYYMMDDhhmmss(input_column_data[i] / deg); Int64 fraction = input_column_data[i] % deg; - + result_data[i] = getChangedDate(time, new_value_column_data[i], result_type, date_lut, deg, fraction); } else if constexpr (isDate32() && isDateTime64()) { const auto & date_lut = DateLUT::instance(); Int64 time = static_cast(date_lut.toNumYYYYMMDD(DayNum(input_column_data[i]))) * 1'000'000; - + result_data[i] = getChangedDate(time, new_value_column_data[i], result_type, date_lut, 1'000, 0); } else @@ -167,7 +173,7 @@ public: time = static_cast(date_lut.toNumYYYYMMDD(DayNum(input_column_data[i]))) * 1'000'000; else time = date_lut.toNumYYYYMMDDhhmmss(input_column_data[i]); - + if (isDateOrDate32(result_type)) result_data[i] = static_cast(getChangedDate(time, new_value_column_data[i], result_type, date_lut)); else @@ -177,10 +183,10 @@ public: if (is_const) return ColumnConst::create(std::move(result_column), input_rows_count); - + return result_column; } - + Int64 getChangedDate(Int64 time, Float64 new_value, const DataTypePtr & result_type, const DateLUTImpl & date_lut, Int64 deg = 0, Int64 fraction = 0) const { auto year = time / 10'000'000'000; @@ -189,7 +195,7 @@ public: auto hours = (time % 1'000'000) / 10'000; auto minutes = (time % 10'000) / 100; auto seconds = time % 100; - + Int64 min_date, max_date; Int16 min_year, max_year; if (isDate(result_type)) @@ -220,7 +226,7 @@ public: min_year = 1900; max_year = 2299; } - + switch (Traits::EnumName) { case ChangeDateFunctionsNames::CHANGE_YEAR: @@ -256,60 +262,60 @@ public: seconds = static_cast(new_value); break; } - + Int64 result; if (isDateOrDate32(result_type)) result = date_lut.makeDayNum(year, month, day); else result = date_lut.makeDateTime(year, month, day, hours, minutes, seconds) * deg + fraction; - + if (result > max_date) return max_date; - + return result; } }; - - + + struct ChangeYearTraits { static constexpr auto Name = "changeYear"; static constexpr auto EnumName = ChangeDateFunctionsNames::CHANGE_YEAR; }; - + struct ChangeMonthTraits { static constexpr auto Name = "changeMonth"; static constexpr auto EnumName = ChangeDateFunctionsNames::CHANGE_MONTH; }; - + struct ChangeDayTraits { static constexpr auto Name = "changeDay"; static constexpr auto EnumName = ChangeDateFunctionsNames::CHANGE_DAY; }; - + struct ChangeHourTraits { static constexpr auto Name = "changeHour"; static constexpr auto EnumName = ChangeDateFunctionsNames::CHANGE_HOUR; }; - + struct ChangeMinuteTraits { static constexpr auto Name = "changeMinute"; static constexpr auto EnumName = ChangeDateFunctionsNames::CHANGE_MINUTE; }; - + struct ChangeSecondTraits { static constexpr auto Name = "changeSecond"; static constexpr auto EnumName = ChangeDateFunctionsNames::CHANGE_SECOND; }; - - + + } - + REGISTER_FUNCTION(ChangeDate) { factory.registerFunction>(); @@ -319,5 +325,5 @@ REGISTER_FUNCTION(ChangeDate) factory.registerFunction>(); factory.registerFunction>(); } - + } From f4990f26d914b078d92674061ec5427b3daa4ab7 Mon Sep 17 00:00:00 2001 From: Maksim Galkin Date: Wed, 8 May 2024 16:38:53 +0000 Subject: [PATCH 006/182] fixes --- src/Functions/changeDate.cpp | 39 +++++++++++++++++++++++++++--------- 1 file changed, 29 insertions(+), 10 deletions(-) diff --git a/src/Functions/changeDate.cpp b/src/Functions/changeDate.cpp index ed6b3255cfc..dfa93c9d218 100644 --- a/src/Functions/changeDate.cpp +++ b/src/Functions/changeDate.cpp @@ -156,28 +156,33 @@ public: Int64 time = date_lut.toNumYYYYMMDDhhmmss(input_column_data[i] / deg); Int64 fraction = input_column_data[i] % deg; - result_data[i] = getChangedDate(time, new_value_column_data[i], result_type, date_lut, deg, fraction); + result_data[i] = getChangedDate(time, new_value_column_data[i], result_type, date_lut, scale, fraction); } else if constexpr (isDate32() && isDateTime64()) { const auto & date_lut = DateLUT::instance(); - Int64 time = static_cast(date_lut.toNumYYYYMMDD(DayNum(input_column_data[i]))) * 1'000'000; + Int64 time = static_cast(date_lut.toNumYYYYMMDD(ExtendedDayNum(input_column_data[i]))) * 1'000'000; - result_data[i] = getChangedDate(time, new_value_column_data[i], result_type, date_lut, 1'000, 0); + result_data[i] = getChangedDate(time, new_value_column_data[i], result_type, date_lut, 3, 0); } else { const auto & date_lut = DateLUT::instance(); Int64 time; - if (isDateOrDate32(input_type)) + if (isDate(input_type)) time = static_cast(date_lut.toNumYYYYMMDD(DayNum(input_column_data[i]))) * 1'000'000; + else if (isDate32(input_type)) + time = static_cast(date_lut.toNumYYYYMMDD(ExtendedDayNum(input_column_data[i]))) * 1'000'000; else time = date_lut.toNumYYYYMMDDhhmmss(input_column_data[i]); - if (isDateOrDate32(result_type)) + + if (isDate(result_type)) + result_data[i] = static_cast(getChangedDate(time, new_value_column_data[i], result_type, date_lut)); + else if (isDate32(result_type)) result_data[i] = static_cast(getChangedDate(time, new_value_column_data[i], result_type, date_lut)); else - result_data[i] = static_cast(getChangedDate(time, new_value_column_data[i], result_type, date_lut, 1, 0)); + result_data[i] = static_cast(getChangedDate(time, new_value_column_data[i], result_type, date_lut)); } } @@ -187,7 +192,7 @@ public: return result_column; } - Int64 getChangedDate(Int64 time, Float64 new_value, const DataTypePtr & result_type, const DateLUTImpl & date_lut, Int64 deg = 0, Int64 fraction = 0) const + Int64 getChangedDate(Int64 time, Float64 new_value, const DataTypePtr & result_type, const DateLUTImpl & date_lut, Int64 scale = 0, Int64 fraction = 0) const { auto year = time / 10'000'000'000; auto month = (time % 10'000'000'000) / 100'000'000; @@ -221,8 +226,17 @@ public: } else { - min_date = date_lut.makeDateTime(1900, 1, 1, 0, 0, 0) * deg; - max_date = date_lut.makeDateTime(2299, 12, 31, 23, 59, 59) * deg + (deg - 1); + min_date = DecimalUtils::decimalFromComponents( + date_lut.makeDateTime(1900, 1, 1, 0, 0, 0), + static_cast(0), + static_cast(scale)); + Int64 deg = 1; + for (Int64 j = 0; j < scale; ++j) + deg *= 10; + max_date = DecimalUtils::decimalFromComponents( + date_lut.makeDateTime(2299, 12, 31, 23, 59, 59), + static_cast(deg - 1), + static_cast(scale)); min_year = 1900; max_year = 2299; } @@ -266,8 +280,13 @@ public: Int64 result; if (isDateOrDate32(result_type)) result = date_lut.makeDayNum(year, month, day); + else if (isDateTime(result_type)) + result = date_lut.makeDateTime(year, month, day, hours, minutes, seconds); else - result = date_lut.makeDateTime(year, month, day, hours, minutes, seconds) * deg + fraction; + result = DecimalUtils::decimalFromComponents( + date_lut.makeDateTime(year, month, day, hours, minutes, seconds), + static_cast(fraction), + static_cast(scale)); if (result > max_date) return max_date; From 2f8341d8c1686b9b73bfad36d12651d63b8ffd08 Mon Sep 17 00:00:00 2001 From: Maksim Galkin Date: Wed, 8 May 2024 16:52:36 +0000 Subject: [PATCH 007/182] style --- src/Functions/changeDate.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/Functions/changeDate.cpp b/src/Functions/changeDate.cpp index dfa93c9d218..2265ee1b26b 100644 --- a/src/Functions/changeDate.cpp +++ b/src/Functions/changeDate.cpp @@ -176,7 +176,6 @@ public: else time = date_lut.toNumYYYYMMDDhhmmss(input_column_data[i]); - if (isDate(result_type)) result_data[i] = static_cast(getChangedDate(time, new_value_column_data[i], result_type, date_lut)); else if (isDate32(result_type)) From c82687c9a61bd6f53949ac5310d26c9b410fabbf Mon Sep 17 00:00:00 2001 From: Maksim Galkin Date: Wed, 8 May 2024 22:17:59 +0000 Subject: [PATCH 008/182] fixes + functions docs --- src/Functions/changeDate.cpp | 82 ++++++++++++++++--- .../queries/0_stateless/02982_changeDate.sql | 24 +++--- 2 files changed, 82 insertions(+), 24 deletions(-) diff --git a/src/Functions/changeDate.cpp b/src/Functions/changeDate.cpp index 2265ee1b26b..73ac9eff867 100644 --- a/src/Functions/changeDate.cpp +++ b/src/Functions/changeDate.cpp @@ -56,6 +56,12 @@ constexpr bool isDate32() return DataType::type_id == TypeIndex::Date32; } +template +constexpr bool isDateTime() +{ + return DataType::type_id == TypeIndex::DateTime; +} + template constexpr bool isDateTime64() { @@ -86,7 +92,7 @@ public: if (!isDateOrDate32OrDateTimeOrDateTime64(*arguments[0].type) || !isNumber(*arguments[1].type)) throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "First argument for function {} must be Date(32) or DateTime(64), second - numeric", getName()); - if (isTimeChange(Traits::EnumName)) + if constexpr (isTimeChange(Traits::EnumName)) { if (isDate(arguments[0].type)) return std::make_shared(); @@ -102,13 +108,13 @@ public: const auto & input_type = arguments[0].type; if (isDate(input_type)) { - if (isTimeChange(Traits::EnumName)) + if constexpr (isTimeChange(Traits::EnumName)) return execute(arguments, input_type, result_type, input_rows_count); return execute(arguments, input_type, result_type, input_rows_count); } if (isDate32(input_type)) { - if (isTimeChange(Traits::EnumName)) + if constexpr (isTimeChange(Traits::EnumName)) return execute(arguments, input_type, result_type, input_rows_count); return execute(arguments, input_type, result_type, input_rows_count); } @@ -145,6 +151,7 @@ public: for (size_t i = 0; i < result_rows_count; ++i) { + if constexpr (isDateTime64()) { const auto scale = typeid_cast(*result_type).getScale(); @@ -165,16 +172,21 @@ public: result_data[i] = getChangedDate(time, new_value_column_data[i], result_type, date_lut, 3, 0); } + else if constexpr (isDateTime()) + { + const auto & date_lut = typeid_cast(*result_type).getTimeZone(); + Int64 time = date_lut.toNumYYYYMMDDhhmmss(input_column_data[i]); + + result_data[i] = static_cast(getChangedDate(time, new_value_column_data[i], result_type, date_lut)); + } else { const auto & date_lut = DateLUT::instance(); Int64 time; if (isDate(input_type)) time = static_cast(date_lut.toNumYYYYMMDD(DayNum(input_column_data[i]))) * 1'000'000; - else if (isDate32(input_type)) - time = static_cast(date_lut.toNumYYYYMMDD(ExtendedDayNum(input_column_data[i]))) * 1'000'000; else - time = date_lut.toNumYYYYMMDDhhmmss(input_column_data[i]); + time = static_cast(date_lut.toNumYYYYMMDD(ExtendedDayNum(input_column_data[i]))) * 1'000'000; if (isDate(result_type)) result_data[i] = static_cast(getChangedDate(time, new_value_column_data[i], result_type, date_lut)); @@ -336,12 +348,58 @@ struct ChangeSecondTraits REGISTER_FUNCTION(ChangeDate) { - factory.registerFunction>(); - factory.registerFunction>(); - factory.registerFunction>(); - factory.registerFunction>(); - factory.registerFunction>(); - factory.registerFunction>(); + factory.registerFunction>( + FunctionDocumentation{ + .description = R"( +Changes the year of the given Date(32) or DateTime(64). +Returns the same type as the input data. +)", + .categories{"Dates and Times"} + } + ); + factory.registerFunction>( + FunctionDocumentation{ + .description = R"( +Same as changeYear function, but changes month of the date. +)", + .categories{"Dates and Times"} + } + ); + factory.registerFunction>( + FunctionDocumentation{ + .description = R"( +Same as changeYear function, but changes day_of_month of the date. +)", + .categories{"Dates and Times"} + } + ); + factory.registerFunction>( + FunctionDocumentation{ + .description = R"( +Changes the hour of the given Date(32) or DateTime(64). +If the input data is Date, return DateTime; +if the input data is Date32, return DateTime64; +In other cases returns the same type as the input data. +)", + .categories{"Dates and Times"} + } + ); + factory.registerFunction>( + FunctionDocumentation{ + .description = R"( +Same as changeHour function, but changes minute of the date. +)", + .categories{"Dates and Times"} + } + ); + factory.registerFunction>( + FunctionDocumentation{ + .description = R"( +Same as changeHour function, but changes seconds of the date. +)", + .categories{"Dates and Times"} + } + ); } } diff --git a/tests/queries/0_stateless/02982_changeDate.sql b/tests/queries/0_stateless/02982_changeDate.sql index fbfe3771b33..9f7f5eacfc5 100644 --- a/tests/queries/0_stateless/02982_changeDate.sql +++ b/tests/queries/0_stateless/02982_changeDate.sql @@ -13,18 +13,18 @@ SELECT changeDay(makeDate32(1970, 01, 01), 03); SELECT changeDay(makeDateTime(1970, 01, 01, 11, 22, 33), 04); SELECT changeDay(makeDateTime64(1970, 01, 01, 11, 22, 33, 4444, 4), 05); -SELECT changeHour(makeDate(1970, 01, 01), 12); -SELECT changeHour(makeDate32(1970, 01, 01), 13); +SELECT toTimeZone(changeHour(makeDate(1970, 01, 01), 12), 'UTC'); +SELECT toTimeZone(changeHour(makeDate32(1970, 01, 01), 13), 'UTC'); SELECT changeHour(makeDateTime(1970, 01, 01, 11, 22, 33), 14); SELECT changeHour(makeDateTime64(1970, 01, 01, 11, 22, 33, 4444, 4), 15); -SELECT changeMinute(makeDate(1970, 01, 01), 23); -SELECT changeMinute(makeDate32(1970, 01, 01), 24); +SELECT toTimeZone(changeMinute(makeDate(1970, 01, 01), 23), 'UTC'); +SELECT toTimeZone(changeMinute(makeDate32(1970, 01, 01), 24), 'UTC'); SELECT changeMinute(makeDateTime(1970, 01, 01, 11, 22, 33), 25); SELECT changeMinute(makeDateTime64(1970, 01, 01, 11, 22, 33, 4444, 4), 26); -SELECT changeSecond(makeDate(1970, 01, 01), 34); -SELECT changeSecond(makeDate32(1970, 01, 01), 35); +SELECT toTimeZone(changeSecond(makeDate(1970, 01, 01), 34), 'UTC'); +SELECT toTimeZone(changeSecond(makeDate32(1970, 01, 01), 35), 'UTC'); SELECT changeSecond(makeDateTime(1970, 01, 01, 11, 22, 33), 36); SELECT changeSecond(makeDateTime64(1970, 01, 01, 11, 22, 33, 4444, 4), 37); @@ -34,9 +34,9 @@ SELECT changeMonth(makeDate(2149, 01, 01), 07); SELECT changeMonth(makeDate(2000, 06, 07), 13); SELECT changeDay(makeDate(2000, 01, 01), 0); SELECT changeDay(makeDate(2000, 06, 07), 32); -SELECT changeHour(makeDate(2000, 01, 01), -1); -SELECT changeHour(makeDate(2000, 06, 07), 24); -SELECT changeMinute(makeDate(2000, 01, 01), -1); -SELECT changeMinute(makeDate(2000, 06, 07), 60); -SELECT changeSecond(makeDate(2000, 01, 01), -1); -SELECT changeSecond(makeDate(2000, 06, 07), 60); \ No newline at end of file +SELECT toTimeZone(changeHour(makeDate(2000, 01, 01), -1), 'UTC'); +SELECT toTimeZone(changeHour(makeDate(2000, 06, 07), 24), 'UTC'); +SELECT toTimeZone(changeMinute(makeDate(2000, 01, 01), -1), 'UTC'); +SELECT toTimeZone(changeMinute(makeDate(2000, 06, 07), 60), 'UTC'); +SELECT toTimeZone(changeSecond(makeDate(2000, 01, 01), -1), 'UTC'); +SELECT toTimeZone(changeSecond(makeDate(2000, 06, 07), 60), 'UTC'); \ No newline at end of file From f52c24f753b35240d6d4925aaf582c9ebac829b2 Mon Sep 17 00:00:00 2001 From: Maksim Galkin Date: Thu, 9 May 2024 08:18:02 +0000 Subject: [PATCH 009/182] tests fixes --- src/Functions/changeDate.cpp | 1 - .../queries/0_stateless/02982_changeDate.sql | 72 +++++++++---------- 2 files changed, 36 insertions(+), 37 deletions(-) diff --git a/src/Functions/changeDate.cpp b/src/Functions/changeDate.cpp index 73ac9eff867..a1827e1d94a 100644 --- a/src/Functions/changeDate.cpp +++ b/src/Functions/changeDate.cpp @@ -151,7 +151,6 @@ public: for (size_t i = 0; i < result_rows_count; ++i) { - if constexpr (isDateTime64()) { const auto scale = typeid_cast(*result_type).getScale(); diff --git a/tests/queries/0_stateless/02982_changeDate.sql b/tests/queries/0_stateless/02982_changeDate.sql index 9f7f5eacfc5..f438212f9fa 100644 --- a/tests/queries/0_stateless/02982_changeDate.sql +++ b/tests/queries/0_stateless/02982_changeDate.sql @@ -1,42 +1,42 @@ -SELECT changeYear(makeDate(1970, 01, 01), 2000); -SELECT changeYear(makeDate32(1970, 01, 01), 2001); -SELECT changeYear(makeDateTime(1970, 01, 01, 11, 22, 33), 2002); -SELECT changeYear(makeDateTime64(1970, 01, 01, 11, 22, 33, 4444, 4), 2003); +SELECT changeYear(toDate('1970-01-01', 'UTC'), 2000); +SELECT changeYear(toDate32('1900-01-01', 'UTC'), 2001); +SELECT changeYear(toDateTime('1970-01-01 11:22:33', 'UTC'), 2002); +SELECT changeYear(toDateTime64('1900-01-01 11:22:33.4444', 4, 'UTC'), 2003); -SELECT changeMonth(makeDate(1970, 01, 01), 02); -SELECT changeMonth(makeDate32(1970, 01, 01), 03); -SELECT changeMonth(makeDateTime(1970, 01, 01, 11, 22, 33), 04); -SELECT changeMonth(makeDateTime64(1970, 01, 01, 11, 22, 33, 4444, 4), 05); +SELECT changeMonth(toDate('1970-01-01', 'UTC'), 02); +SELECT changeMonth(toDate32('1970-01-01', 'UTC'), 03); +SELECT changeMonth(toDateTime('1970-01-01 11:22:33', 'UTC'), 04); +SELECT changeMonth(toDateTime64('1970-01-01 11:22:33.4444', 4, 'UTC'), 05); -SELECT changeDay(makeDate(1970, 01, 01), 02); -SELECT changeDay(makeDate32(1970, 01, 01), 03); -SELECT changeDay(makeDateTime(1970, 01, 01, 11, 22, 33), 04); -SELECT changeDay(makeDateTime64(1970, 01, 01, 11, 22, 33, 4444, 4), 05); +SELECT changeDay(toDate('1970-01-01', 'UTC'), 02); +SELECT changeDay(toDate32('1970-01-01', 'UTC'), 03); +SELECT changeDay(toDateTime('1970-01-01 11:22:33', 'UTC'), 04); +SELECT changeDay(toDateTime64('1970-01-01 11:22:33.4444', 4, 'UTC'), 05); -SELECT toTimeZone(changeHour(makeDate(1970, 01, 01), 12), 'UTC'); -SELECT toTimeZone(changeHour(makeDate32(1970, 01, 01), 13), 'UTC'); -SELECT changeHour(makeDateTime(1970, 01, 01, 11, 22, 33), 14); -SELECT changeHour(makeDateTime64(1970, 01, 01, 11, 22, 33, 4444, 4), 15); +SELECT toTimeZone(changeHour(toDate('1970-01-01', 'UTC'), 12), 'UTC'); +SELECT toTimeZone(changeHour(toDate32('1970-01-01', 'UTC'), 13), 'UTC'); +SELECT changeHour(toDateTime('1970-01-01 11:22:33', 'UTC'), 14); +SELECT changeHour(toDateTime64('1970-01-01 11:22:33.4444', 4, 'UTC'), 15); -SELECT toTimeZone(changeMinute(makeDate(1970, 01, 01), 23), 'UTC'); -SELECT toTimeZone(changeMinute(makeDate32(1970, 01, 01), 24), 'UTC'); -SELECT changeMinute(makeDateTime(1970, 01, 01, 11, 22, 33), 25); -SELECT changeMinute(makeDateTime64(1970, 01, 01, 11, 22, 33, 4444, 4), 26); +SELECT toTimeZone(changeMinute(toDate('1970-01-01', 'UTC'), 23), 'UTC'); +SELECT toTimeZone(changeMinute(toDate32('1970-01-01', 'UTC'), 24), 'UTC'); +SELECT changeMinute(toDateTime('1970-01-01 11:22:33', 'UTC'), 25); +SELECT changeMinute(toDateTime64('1970-01-01 11:22:33.4444', 4, 'UTC'), 26); -SELECT toTimeZone(changeSecond(makeDate(1970, 01, 01), 34), 'UTC'); -SELECT toTimeZone(changeSecond(makeDate32(1970, 01, 01), 35), 'UTC'); -SELECT changeSecond(makeDateTime(1970, 01, 01, 11, 22, 33), 36); -SELECT changeSecond(makeDateTime64(1970, 01, 01, 11, 22, 33, 4444, 4), 37); +SELECT toTimeZone(changeSecond(toDate('1970-01-01', 'UTC'), 34), 'UTC'); +SELECT toTimeZone(changeSecond(toDate32('1970-01-01', 'UTC'), 35), 'UTC'); +SELECT changeSecond(toDateTime('1970-01-01 11:22:33', 'UTC'), 36); +SELECT changeSecond(toDateTime64('1970-01-01 11:22:33.4444', 4, 'UTC'), 37); -SELECT changeYear(makeDate(2000, 01, 01), 1969.0); -SELECT changeYear(makeDate(2000, 06, 07), 2149.0); -SELECT changeMonth(makeDate(2149, 01, 01), 07); -SELECT changeMonth(makeDate(2000, 06, 07), 13); -SELECT changeDay(makeDate(2000, 01, 01), 0); -SELECT changeDay(makeDate(2000, 06, 07), 32); -SELECT toTimeZone(changeHour(makeDate(2000, 01, 01), -1), 'UTC'); -SELECT toTimeZone(changeHour(makeDate(2000, 06, 07), 24), 'UTC'); -SELECT toTimeZone(changeMinute(makeDate(2000, 01, 01), -1), 'UTC'); -SELECT toTimeZone(changeMinute(makeDate(2000, 06, 07), 60), 'UTC'); -SELECT toTimeZone(changeSecond(makeDate(2000, 01, 01), -1), 'UTC'); -SELECT toTimeZone(changeSecond(makeDate(2000, 06, 07), 60), 'UTC'); \ No newline at end of file +SELECT changeYear(toDate('2000-01-01', 'UTC'), 1969.0); +SELECT changeYear(toDate('2000-06-07', 'UTC'), 2149.0); +SELECT changeMonth(toDate('2149-01-01', 'UTC'), 07); +SELECT changeMonth(toDate('2000-01-01', 'UTC'), 13); +SELECT changeDay(toDate('2000-01-01', 'UTC'), 0); +SELECT changeDay(toDate('2000-01-01', 'UTC'), 32); +SELECT toTimeZone(changeHour(toDate('2000-01-01', 'UTC'), -1), 'UTC'); +SELECT toTimeZone(changeHour(toDate('2000-01-01', 'UTC'), 24), 'UTC'); +SELECT toTimeZone(changeMinute(toDate('2000-01-01', 'UTC'), -1), 'UTC'); +SELECT toTimeZone(changeMinute(toDate('2000-01-01', 'UTC'), 60), 'UTC'); +SELECT toTimeZone(changeSecond(toDate('2000-01-01', 'UTC'), -1), 'UTC'); +SELECT toTimeZone(changeSecond(toDate('2000-01-01', 'UTC'), 60), 'UTC'); \ No newline at end of file From ff9c0934cecb5ee8b938adc92498cec035d2e26f Mon Sep 17 00:00:00 2001 From: Maksim Galkin Date: Thu, 9 May 2024 17:52:50 +0000 Subject: [PATCH 010/182] changes in tests and functions because of time zone --- src/Functions/changeDate.cpp | 30 +++++--- .../0_stateless/02982_changeDate.reference | 12 ++-- .../queries/0_stateless/02982_changeDate.sql | 72 +++++++++---------- 3 files changed, 64 insertions(+), 50 deletions(-) diff --git a/src/Functions/changeDate.cpp b/src/Functions/changeDate.cpp index a1827e1d94a..c7815263323 100644 --- a/src/Functions/changeDate.cpp +++ b/src/Functions/changeDate.cpp @@ -50,6 +50,12 @@ constexpr bool isTimeChange(const ChangeDateFunctionsNames & type) type == ChangeDateFunctionsNames::CHANGE_SECOND; } +template +constexpr bool isDate() +{ + return DataType::type_id == TypeIndex::Date; +} + template constexpr bool isDate32() { @@ -68,6 +74,7 @@ constexpr bool isDateTime64() return DataType::type_id == TypeIndex::DateTime64; } + template class FunctionChangeDate : public IFunction { @@ -162,21 +169,21 @@ public: Int64 time = date_lut.toNumYYYYMMDDhhmmss(input_column_data[i] / deg); Int64 fraction = input_column_data[i] % deg; - result_data[i] = getChangedDate(time, new_value_column_data[i], result_type, date_lut, scale, fraction); + result_data[i] = getChangedDate(time, new_value_column_data[i], input_type, result_type, date_lut, scale, fraction); } else if constexpr (isDate32() && isDateTime64()) { const auto & date_lut = DateLUT::instance(); Int64 time = static_cast(date_lut.toNumYYYYMMDD(ExtendedDayNum(input_column_data[i]))) * 1'000'000; - result_data[i] = getChangedDate(time, new_value_column_data[i], result_type, date_lut, 3, 0); + result_data[i] = getChangedDate(time, new_value_column_data[i], input_type, result_type, date_lut, 3, 0); } else if constexpr (isDateTime()) { const auto & date_lut = typeid_cast(*result_type).getTimeZone(); Int64 time = date_lut.toNumYYYYMMDDhhmmss(input_column_data[i]); - result_data[i] = static_cast(getChangedDate(time, new_value_column_data[i], result_type, date_lut)); + result_data[i] = static_cast(getChangedDate(time, new_value_column_data[i], input_type, result_type, date_lut)); } else { @@ -188,11 +195,11 @@ public: time = static_cast(date_lut.toNumYYYYMMDD(ExtendedDayNum(input_column_data[i]))) * 1'000'000; if (isDate(result_type)) - result_data[i] = static_cast(getChangedDate(time, new_value_column_data[i], result_type, date_lut)); + result_data[i] = static_cast(getChangedDate(time, new_value_column_data[i], input_type, result_type, date_lut)); else if (isDate32(result_type)) - result_data[i] = static_cast(getChangedDate(time, new_value_column_data[i], result_type, date_lut)); + result_data[i] = static_cast(getChangedDate(time, new_value_column_data[i], input_type, result_type, date_lut)); else - result_data[i] = static_cast(getChangedDate(time, new_value_column_data[i], result_type, date_lut)); + result_data[i] = static_cast(getChangedDate(time, new_value_column_data[i], input_type, result_type, date_lut)); } } @@ -202,7 +209,7 @@ public: return result_column; } - Int64 getChangedDate(Int64 time, Float64 new_value, const DataTypePtr & result_type, const DateLUTImpl & date_lut, Int64 scale = 0, Int64 fraction = 0) const + Int64 getChangedDate(Int64 time, Float64 new_value, const DataTypePtr & input_type, const DataTypePtr & result_type, const DateLUTImpl & date_lut, Int64 scale = 0, Int64 fraction = 0) const { auto year = time / 10'000'000'000; auto month = (time % 10'000'000'000) / 100'000'000; @@ -291,12 +298,20 @@ public: if (isDateOrDate32(result_type)) result = date_lut.makeDayNum(year, month, day); else if (isDateTime(result_type)) + { result = date_lut.makeDateTime(year, month, day, hours, minutes, seconds); + if (isDate(input_type)) + result += date_lut.timezoneOffset(result); + } else + { result = DecimalUtils::decimalFromComponents( date_lut.makeDateTime(year, month, day, hours, minutes, seconds), static_cast(fraction), static_cast(scale)); + if (isDate32(input_type)) + result += date_lut.timezoneOffset(result); + } if (result > max_date) return max_date; @@ -342,7 +357,6 @@ struct ChangeSecondTraits static constexpr auto EnumName = ChangeDateFunctionsNames::CHANGE_SECOND; }; - } REGISTER_FUNCTION(ChangeDate) diff --git a/tests/queries/0_stateless/02982_changeDate.reference b/tests/queries/0_stateless/02982_changeDate.reference index d7d4edf4b43..67747922ae5 100644 --- a/tests/queries/0_stateless/02982_changeDate.reference +++ b/tests/queries/0_stateless/02982_changeDate.reference @@ -10,16 +10,16 @@ 1970-01-03 1970-01-04 11:22:33 1970-01-05 11:22:33.4444 -1970-01-01 12:00:00 -1970-01-01 13:00:00.000 +12 +13 1970-01-01 14:22:33 1970-01-01 15:22:33.4444 -1970-01-01 00:23:00 -1970-01-01 00:24:00.000 +23 +24 1970-01-01 11:25:33 1970-01-01 11:26:33.4444 -1970-01-01 00:00:34 -1970-01-01 00:00:35.000 +34 +35 1970-01-01 11:22:36 1970-01-01 11:22:37.4444 1970-01-01 diff --git a/tests/queries/0_stateless/02982_changeDate.sql b/tests/queries/0_stateless/02982_changeDate.sql index f438212f9fa..786e27808e8 100644 --- a/tests/queries/0_stateless/02982_changeDate.sql +++ b/tests/queries/0_stateless/02982_changeDate.sql @@ -1,42 +1,42 @@ -SELECT changeYear(toDate('1970-01-01', 'UTC'), 2000); -SELECT changeYear(toDate32('1900-01-01', 'UTC'), 2001); -SELECT changeYear(toDateTime('1970-01-01 11:22:33', 'UTC'), 2002); -SELECT changeYear(toDateTime64('1900-01-01 11:22:33.4444', 4, 'UTC'), 2003); +SELECT changeYear(toDate('1970-01-01'), 2000); +SELECT changeYear(toDate32('1900-01-01'), 2001); +SELECT changeYear(toDateTime('1970-01-01 11:22:33', 'Antarctica/Palmer'), 2002); +SELECT changeYear(toDateTime64('1900-01-01 11:22:33.4444', 4, 'Antarctica/Palmer'), 2003); -SELECT changeMonth(toDate('1970-01-01', 'UTC'), 02); -SELECT changeMonth(toDate32('1970-01-01', 'UTC'), 03); -SELECT changeMonth(toDateTime('1970-01-01 11:22:33', 'UTC'), 04); -SELECT changeMonth(toDateTime64('1970-01-01 11:22:33.4444', 4, 'UTC'), 05); +SELECT changeMonth(toDate('1970-01-01'), 02); +SELECT changeMonth(toDate32('1970-01-01'), 03); +SELECT changeMonth(toDateTime('1970-01-01 11:22:33', 'Antarctica/Palmer'), 04); +SELECT changeMonth(toDateTime64('1970-01-01 11:22:33.4444', 4, 'Antarctica/Palmer'), 05); -SELECT changeDay(toDate('1970-01-01', 'UTC'), 02); -SELECT changeDay(toDate32('1970-01-01', 'UTC'), 03); -SELECT changeDay(toDateTime('1970-01-01 11:22:33', 'UTC'), 04); -SELECT changeDay(toDateTime64('1970-01-01 11:22:33.4444', 4, 'UTC'), 05); +SELECT changeDay(toDate('1970-01-01'), 02); +SELECT changeDay(toDate32('1970-01-01'), 03); +SELECT changeDay(toDateTime('1970-01-01 11:22:33', 'Antarctica/Palmer'), 04); +SELECT changeDay(toDateTime64('1970-01-01 11:22:33.4444', 4, 'Antarctica/Palmer'), 05); -SELECT toTimeZone(changeHour(toDate('1970-01-01', 'UTC'), 12), 'UTC'); -SELECT toTimeZone(changeHour(toDate32('1970-01-01', 'UTC'), 13), 'UTC'); -SELECT changeHour(toDateTime('1970-01-01 11:22:33', 'UTC'), 14); -SELECT changeHour(toDateTime64('1970-01-01 11:22:33.4444', 4, 'UTC'), 15); +SELECT toHour(changeHour(toDate('1970-01-01'), 12)); +SELECT toHour(changeHour(toDate32('1970-01-01'), 13)); +SELECT changeHour(toDateTime('1970-01-01 11:22:33', 'Antarctica/Palmer'), 14); +SELECT changeHour(toDateTime64('1970-01-01 11:22:33.4444', 4, 'Antarctica/Palmer'), 15); -SELECT toTimeZone(changeMinute(toDate('1970-01-01', 'UTC'), 23), 'UTC'); -SELECT toTimeZone(changeMinute(toDate32('1970-01-01', 'UTC'), 24), 'UTC'); -SELECT changeMinute(toDateTime('1970-01-01 11:22:33', 'UTC'), 25); -SELECT changeMinute(toDateTime64('1970-01-01 11:22:33.4444', 4, 'UTC'), 26); +SELECT toMinute(changeMinute(toDate('1970-01-01'), 23)); +SELECT toMinute(changeMinute(toDate32('1970-01-01'), 24)); +SELECT changeMinute(toDateTime('1970-01-01 11:22:33', 'Antarctica/Palmer'), 25); +SELECT changeMinute(toDateTime64('1970-01-01 11:22:33.4444', 4, 'Antarctica/Palmer'), 26); -SELECT toTimeZone(changeSecond(toDate('1970-01-01', 'UTC'), 34), 'UTC'); -SELECT toTimeZone(changeSecond(toDate32('1970-01-01', 'UTC'), 35), 'UTC'); -SELECT changeSecond(toDateTime('1970-01-01 11:22:33', 'UTC'), 36); -SELECT changeSecond(toDateTime64('1970-01-01 11:22:33.4444', 4, 'UTC'), 37); +SELECT toSecond(changeSecond(toDate('1970-01-01'), 34)); +SELECT toSecond(changeSecond(toDate32('1970-01-01'), 35)); +SELECT changeSecond(toDateTime('1970-01-01 11:22:33', 'Antarctica/Palmer'), 36); +SELECT changeSecond(toDateTime64('1970-01-01 11:22:33.4444', 4, 'Antarctica/Palmer'), 37); -SELECT changeYear(toDate('2000-01-01', 'UTC'), 1969.0); -SELECT changeYear(toDate('2000-06-07', 'UTC'), 2149.0); -SELECT changeMonth(toDate('2149-01-01', 'UTC'), 07); -SELECT changeMonth(toDate('2000-01-01', 'UTC'), 13); -SELECT changeDay(toDate('2000-01-01', 'UTC'), 0); -SELECT changeDay(toDate('2000-01-01', 'UTC'), 32); -SELECT toTimeZone(changeHour(toDate('2000-01-01', 'UTC'), -1), 'UTC'); -SELECT toTimeZone(changeHour(toDate('2000-01-01', 'UTC'), 24), 'UTC'); -SELECT toTimeZone(changeMinute(toDate('2000-01-01', 'UTC'), -1), 'UTC'); -SELECT toTimeZone(changeMinute(toDate('2000-01-01', 'UTC'), 60), 'UTC'); -SELECT toTimeZone(changeSecond(toDate('2000-01-01', 'UTC'), -1), 'UTC'); -SELECT toTimeZone(changeSecond(toDate('2000-01-01', 'UTC'), 60), 'UTC'); \ No newline at end of file +SELECT changeYear(toDate('2000-01-01'), 1969.0); +SELECT changeYear(toDate('2000-06-07'), 2149.0); +SELECT changeMonth(toDate('2149-01-01'), 07); +SELECT changeMonth(toDate('2000-01-01'), 13); +SELECT changeDay(toDate('2000-01-01'), 0); +SELECT changeDay(toDate('2000-01-01'), 32); +SELECT changeHour(toDate('2000-01-01'), -1); +SELECT changeHour(toDate('2000-01-01'), 24); +SELECT changeMinute(toDate('2000-01-01'), -1); +SELECT changeMinute(toDate('2000-01-01'), 60); +SELECT changeSecond(toDate('2000-01-01'), -1); +SELECT changeSecond(toDate('2000-01-01'), 60); \ No newline at end of file From 3e7041c1f4106359ab6792ddbab86db3a4792555 Mon Sep 17 00:00:00 2001 From: Maksim Galkin Date: Thu, 9 May 2024 18:33:46 +0000 Subject: [PATCH 011/182] . --- src/Functions/changeDate.cpp | 22 +++++++--------------- 1 file changed, 7 insertions(+), 15 deletions(-) diff --git a/src/Functions/changeDate.cpp b/src/Functions/changeDate.cpp index c7815263323..e77aaec30fe 100644 --- a/src/Functions/changeDate.cpp +++ b/src/Functions/changeDate.cpp @@ -169,21 +169,21 @@ public: Int64 time = date_lut.toNumYYYYMMDDhhmmss(input_column_data[i] / deg); Int64 fraction = input_column_data[i] % deg; - result_data[i] = getChangedDate(time, new_value_column_data[i], input_type, result_type, date_lut, scale, fraction); + result_data[i] = getChangedDate(time, new_value_column_data[i], result_type, date_lut, scale, fraction); } else if constexpr (isDate32() && isDateTime64()) { const auto & date_lut = DateLUT::instance(); Int64 time = static_cast(date_lut.toNumYYYYMMDD(ExtendedDayNum(input_column_data[i]))) * 1'000'000; - result_data[i] = getChangedDate(time, new_value_column_data[i], input_type, result_type, date_lut, 3, 0); + result_data[i] = getChangedDate(time, new_value_column_data[i], result_type, date_lut, 3, 0); } else if constexpr (isDateTime()) { const auto & date_lut = typeid_cast(*result_type).getTimeZone(); Int64 time = date_lut.toNumYYYYMMDDhhmmss(input_column_data[i]); - result_data[i] = static_cast(getChangedDate(time, new_value_column_data[i], input_type, result_type, date_lut)); + result_data[i] = static_cast(getChangedDate(time, new_value_column_data[i], result_type, date_lut)); } else { @@ -195,11 +195,11 @@ public: time = static_cast(date_lut.toNumYYYYMMDD(ExtendedDayNum(input_column_data[i]))) * 1'000'000; if (isDate(result_type)) - result_data[i] = static_cast(getChangedDate(time, new_value_column_data[i], input_type, result_type, date_lut)); + result_data[i] = static_cast(getChangedDate(time, new_value_column_data[i], result_type, date_lut)); else if (isDate32(result_type)) - result_data[i] = static_cast(getChangedDate(time, new_value_column_data[i], input_type, result_type, date_lut)); + result_data[i] = static_cast(getChangedDate(time, new_value_column_data[i], result_type, date_lut)); else - result_data[i] = static_cast(getChangedDate(time, new_value_column_data[i], input_type, result_type, date_lut)); + result_data[i] = static_cast(getChangedDate(time, new_value_column_data[i], result_type, date_lut)); } } @@ -209,7 +209,7 @@ public: return result_column; } - Int64 getChangedDate(Int64 time, Float64 new_value, const DataTypePtr & input_type, const DataTypePtr & result_type, const DateLUTImpl & date_lut, Int64 scale = 0, Int64 fraction = 0) const + Int64 getChangedDate(Int64 time, Float64 new_value, const DataTypePtr & result_type, const DateLUTImpl & date_lut, Int64 scale = 0, Int64 fraction = 0) const { auto year = time / 10'000'000'000; auto month = (time % 10'000'000'000) / 100'000'000; @@ -298,20 +298,12 @@ public: if (isDateOrDate32(result_type)) result = date_lut.makeDayNum(year, month, day); else if (isDateTime(result_type)) - { result = date_lut.makeDateTime(year, month, day, hours, minutes, seconds); - if (isDate(input_type)) - result += date_lut.timezoneOffset(result); - } else - { result = DecimalUtils::decimalFromComponents( date_lut.makeDateTime(year, month, day, hours, minutes, seconds), static_cast(fraction), static_cast(scale)); - if (isDate32(input_type)) - result += date_lut.timezoneOffset(result); - } if (result > max_date) return max_date; From afb47e418303b4ee125f8c9700985372f827da8b Mon Sep 17 00:00:00 2001 From: Maksim Galkin Date: Thu, 9 May 2024 19:32:43 +0000 Subject: [PATCH 012/182] . --- src/Functions/changeDate.cpp | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/src/Functions/changeDate.cpp b/src/Functions/changeDate.cpp index e77aaec30fe..fd279a028b7 100644 --- a/src/Functions/changeDate.cpp +++ b/src/Functions/changeDate.cpp @@ -173,11 +173,18 @@ public: } else if constexpr (isDate32() && isDateTime64()) { - const auto & date_lut = DateLUT::instance(); + const auto & date_lut = typeid_cast(*result_type).getTimeZone(); Int64 time = static_cast(date_lut.toNumYYYYMMDD(ExtendedDayNum(input_column_data[i]))) * 1'000'000; result_data[i] = getChangedDate(time, new_value_column_data[i], result_type, date_lut, 3, 0); } + else if constexpr (isDate() && isDateTime()) + { + const auto & date_lut = typeid_cast(*result_type).getTimeZone(); + Int64 time = static_cast(date_lut.toNumYYYYMMDD(ExtendedDayNum(input_column_data[i]))) * 1'000'000; + + result_data[i] = static_cast(getChangedDate(time, new_value_column_data[i], result_type, date_lut)); + } else if constexpr (isDateTime()) { const auto & date_lut = typeid_cast(*result_type).getTimeZone(); @@ -196,10 +203,8 @@ public: if (isDate(result_type)) result_data[i] = static_cast(getChangedDate(time, new_value_column_data[i], result_type, date_lut)); - else if (isDate32(result_type)) - result_data[i] = static_cast(getChangedDate(time, new_value_column_data[i], result_type, date_lut)); else - result_data[i] = static_cast(getChangedDate(time, new_value_column_data[i], result_type, date_lut)); + result_data[i] = static_cast(getChangedDate(time, new_value_column_data[i], result_type, date_lut)); } } From eb1cce7b07bae5dcb80868f449db1b4dd7dbeb52 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 11 May 2024 03:09:25 +0200 Subject: [PATCH 013/182] Everything should work with Analyzer --- .../0_stateless/01487_distributed_in_not_default_db.sql | 4 ---- tests/queries/0_stateless/02942_variant_cast.sql | 1 - tests/queries/0_stateless/02944_variant_as_common_type.sql | 3 --- 3 files changed, 8 deletions(-) diff --git a/tests/queries/0_stateless/01487_distributed_in_not_default_db.sql b/tests/queries/0_stateless/01487_distributed_in_not_default_db.sql index cd027530ac8..ccd2c571290 100644 --- a/tests/queries/0_stateless/01487_distributed_in_not_default_db.sql +++ b/tests/queries/0_stateless/01487_distributed_in_not_default_db.sql @@ -25,10 +25,6 @@ CREATE TABLE d AS t ENGINE = Distributed(test_cluster_two_shards_different_datab USE test_01487; DROP DATABASE test_01487; --- After the default database is dropped QueryAnalysisPass cannot process the following SELECT query. --- That query is invalid on the initiator node. -set allow_experimental_analyzer = 0; - SELECT * FROM main_01487.d WHERE value IN (SELECT l.value FROM l) ORDER BY value; USE main_01487; diff --git a/tests/queries/0_stateless/02942_variant_cast.sql b/tests/queries/0_stateless/02942_variant_cast.sql index fc2d1d63657..33587e3e438 100644 --- a/tests/queries/0_stateless/02942_variant_cast.sql +++ b/tests/queries/0_stateless/02942_variant_cast.sql @@ -1,5 +1,4 @@ set allow_experimental_variant_type=1; -set allow_experimental_analyzer=0; -- It's currently doesn't work with analyzer because of the way it works with constants, but it will be refactored and fixed in future select NULL::Variant(String, UInt64); select 42::UInt64::Variant(String, UInt64); diff --git a/tests/queries/0_stateless/02944_variant_as_common_type.sql b/tests/queries/0_stateless/02944_variant_as_common_type.sql index e985cf365dd..b3b86427b06 100644 --- a/tests/queries/0_stateless/02944_variant_as_common_type.sql +++ b/tests/queries/0_stateless/02944_variant_as_common_type.sql @@ -1,5 +1,3 @@ -set allow_experimental_analyzer=0; -- The result type for if function with constant is different with analyzer. It wil be fixed after refactoring around constants in analyzer. - set allow_experimental_variant_type=1; set use_variant_as_common_type=1; @@ -73,4 +71,3 @@ select toTypeName(res), array([1, 2, 3], [[1, 2, 3]]) as res; select toTypeName(res), map('a', 1, 'b', 'str_1') as res; select toTypeName(res), map('a', 1, 'b', map('c', 2, 'd', 'str_1')) as res; select toTypeName(res), map('a', 1, 'b', [1, 2, 3], 'c', [[4, 5, 6]]) as res; - From 50377d2450701ea31ea3fcde8c6ee51b96c971cf Mon Sep 17 00:00:00 2001 From: Maksim Galkin Date: Sun, 19 May 2024 08:41:30 +0000 Subject: [PATCH 014/182] fix date_min of DateTime --- src/Functions/changeDate.cpp | 8 ++- .../0_stateless/02982_changeDate.reference | 58 ++++++++--------- .../queries/0_stateless/02982_changeDate.sql | 62 +++++++++---------- 3 files changed, 66 insertions(+), 62 deletions(-) diff --git a/src/Functions/changeDate.cpp b/src/Functions/changeDate.cpp index fd279a028b7..237d9082566 100644 --- a/src/Functions/changeDate.cpp +++ b/src/Functions/changeDate.cpp @@ -162,6 +162,7 @@ public: { const auto scale = typeid_cast(*result_type).getScale(); const auto & date_lut = typeid_cast(*result_type).getTimeZone(); + Int64 deg = 1; for (size_t j = 0; j < scale; ++j) deg *= 10; @@ -223,7 +224,7 @@ public: auto minutes = (time % 10'000) / 100; auto seconds = time % 100; - Int64 min_date, max_date; + Int64 min_date = 0, max_date = 0; Int16 min_year, max_year; if (isDate(result_type)) { @@ -242,7 +243,7 @@ public: else if (isDateTime(result_type)) { min_date = 0; - max_date = 0x0ffffffffll; + max_date = 0x0FFFFFFFFLL; min_year = 1970; max_year = 2106; } @@ -310,6 +311,9 @@ public: static_cast(fraction), static_cast(scale)); + if (result < min_date) + return min_date; + if (result > max_date) return max_date; diff --git a/tests/queries/0_stateless/02982_changeDate.reference b/tests/queries/0_stateless/02982_changeDate.reference index 67747922ae5..c64abc89ed2 100644 --- a/tests/queries/0_stateless/02982_changeDate.reference +++ b/tests/queries/0_stateless/02982_changeDate.reference @@ -1,36 +1,36 @@ -2000-01-01 2001-01-01 -2002-01-01 11:22:33 -2003-01-01 11:22:33.4444 -1970-02-01 -1970-03-01 -1970-04-01 11:22:33 -1970-05-01 11:22:33.4444 -1970-01-02 -1970-01-03 -1970-01-04 11:22:33 -1970-01-05 11:22:33.4444 -12 -13 -1970-01-01 14:22:33 -1970-01-01 15:22:33.4444 -23 -24 -1970-01-01 11:25:33 -1970-01-01 11:26:33.4444 -34 -35 -1970-01-01 11:22:36 -1970-01-01 11:22:37.4444 +2002-01-01 +2003-01-01 11:22:33 +2004-01-01 11:22:33.4444 +2000-02-01 +2000-03-01 +2000-04-01 11:22:33 +2000-05-01 11:22:33.4444 +2000-01-02 +2000-01-03 +2000-01-04 11:22:33 +2000-01-05 11:22:33.4444 +2000-01-01 12:00:00 +2000-01-01 13:00:00.000 +2000-01-01 14:22:33 +2000-01-01 15:22:33.4444 +2000-01-01 00:23:00 +2000-01-01 00:24:00.000 +2000-01-01 11:25:33 +2000-01-01 11:26:33.4444 +2000-01-01 00:00:34 +2000-01-01 00:00:35.000 +2000-01-01 11:22:36 +2000-01-01 11:22:37.4444 1970-01-01 2149-06-06 2149-06-06 1970-01-01 1970-01-01 1970-01-01 -1970-01-01 00:00:00 -1970-01-01 00:00:00 -1970-01-01 00:00:00 -1970-01-01 00:00:00 -1970-01-01 00:00:00 -1970-01-01 00:00:00 +1970-01-01 07:00:00 +1970-01-01 07:00:00 +1970-01-01 07:00:00 +1970-01-01 07:00:00 +1970-01-01 07:00:00 +1970-01-01 07:00:00 diff --git a/tests/queries/0_stateless/02982_changeDate.sql b/tests/queries/0_stateless/02982_changeDate.sql index 786e27808e8..0d1bd75e130 100644 --- a/tests/queries/0_stateless/02982_changeDate.sql +++ b/tests/queries/0_stateless/02982_changeDate.sql @@ -1,42 +1,42 @@ -SELECT changeYear(toDate('1970-01-01'), 2000); -SELECT changeYear(toDate32('1900-01-01'), 2001); -SELECT changeYear(toDateTime('1970-01-01 11:22:33', 'Antarctica/Palmer'), 2002); -SELECT changeYear(toDateTime64('1900-01-01 11:22:33.4444', 4, 'Antarctica/Palmer'), 2003); +SELECT changeYear(toDate('2000-01-01'), 2001); +SELECT changeYear(toDate32('2000-01-01'), 2002); +SELECT changeYear(toDateTime('2000-01-01 11:22:33'), 2003); +SELECT changeYear(toDateTime64('2000-01-01 11:22:33.4444', 4), 2004); -SELECT changeMonth(toDate('1970-01-01'), 02); -SELECT changeMonth(toDate32('1970-01-01'), 03); -SELECT changeMonth(toDateTime('1970-01-01 11:22:33', 'Antarctica/Palmer'), 04); -SELECT changeMonth(toDateTime64('1970-01-01 11:22:33.4444', 4, 'Antarctica/Palmer'), 05); +SELECT changeMonth(toDate('2000-01-01'), 02); +SELECT changeMonth(toDate32('2000-01-01'), 03); +SELECT changeMonth(toDateTime('2000-01-01 11:22:33'), 04); +SELECT changeMonth(toDateTime64('2000-01-01 11:22:33.4444', 4), 05); -SELECT changeDay(toDate('1970-01-01'), 02); -SELECT changeDay(toDate32('1970-01-01'), 03); -SELECT changeDay(toDateTime('1970-01-01 11:22:33', 'Antarctica/Palmer'), 04); -SELECT changeDay(toDateTime64('1970-01-01 11:22:33.4444', 4, 'Antarctica/Palmer'), 05); +SELECT changeDay(toDate('2000-01-01'), 02); +SELECT changeDay(toDate32('2000-01-01'), 03); +SELECT changeDay(toDateTime('2000-01-01 11:22:33'), 04); +SELECT changeDay(toDateTime64('2000-01-01 11:22:33.4444', 4), 05); -SELECT toHour(changeHour(toDate('1970-01-01'), 12)); -SELECT toHour(changeHour(toDate32('1970-01-01'), 13)); -SELECT changeHour(toDateTime('1970-01-01 11:22:33', 'Antarctica/Palmer'), 14); -SELECT changeHour(toDateTime64('1970-01-01 11:22:33.4444', 4, 'Antarctica/Palmer'), 15); +SELECT changeHour(toDate('2000-01-01'), 12); +SELECT changeHour(toDate32('2000-01-01'), 13); +SELECT changeHour(toDateTime('2000-01-01 11:22:33'), 14); +SELECT changeHour(toDateTime64('2000-01-01 11:22:33.4444', 4), 15); -SELECT toMinute(changeMinute(toDate('1970-01-01'), 23)); -SELECT toMinute(changeMinute(toDate32('1970-01-01'), 24)); -SELECT changeMinute(toDateTime('1970-01-01 11:22:33', 'Antarctica/Palmer'), 25); -SELECT changeMinute(toDateTime64('1970-01-01 11:22:33.4444', 4, 'Antarctica/Palmer'), 26); +SELECT changeMinute(toDate('2000-01-01'), 23); +SELECT changeMinute(toDate32('2000-01-01'), 24); +SELECT changeMinute(toDateTime('2000-01-01 11:22:33'), 25); +SELECT changeMinute(toDateTime64('2000-01-01 11:22:33.4444', 4), 26); -SELECT toSecond(changeSecond(toDate('1970-01-01'), 34)); -SELECT toSecond(changeSecond(toDate32('1970-01-01'), 35)); -SELECT changeSecond(toDateTime('1970-01-01 11:22:33', 'Antarctica/Palmer'), 36); -SELECT changeSecond(toDateTime64('1970-01-01 11:22:33.4444', 4, 'Antarctica/Palmer'), 37); +SELECT changeSecond(toDate('2000-01-01'), 34); +SELECT changeSecond(toDate32('2000-01-01'), 35); +SELECT changeSecond(toDateTime('2000-01-01 11:22:33'), 36); +SELECT changeSecond(toDateTime64('2000-01-01 11:22:33.4444', 4), 37); -SELECT changeYear(toDate('2000-01-01'), 1969.0); +SELECT changeYear(toDate('2000-01-01'), 1969.0); SELECT changeYear(toDate('2000-06-07'), 2149.0); SELECT changeMonth(toDate('2149-01-01'), 07); SELECT changeMonth(toDate('2000-01-01'), 13); SELECT changeDay(toDate('2000-01-01'), 0); SELECT changeDay(toDate('2000-01-01'), 32); -SELECT changeHour(toDate('2000-01-01'), -1); -SELECT changeHour(toDate('2000-01-01'), 24); -SELECT changeMinute(toDate('2000-01-01'), -1); -SELECT changeMinute(toDate('2000-01-01'), 60); -SELECT changeSecond(toDate('2000-01-01'), -1); -SELECT changeSecond(toDate('2000-01-01'), 60); \ No newline at end of file +SELECT changeHour(toDate('2000-01-01'), -1) SETTINGS session_timezone = 'Asia/Novosibirsk'; +SELECT changeHour(toDate('2000-01-01'), 24) SETTINGS session_timezone = 'Asia/Novosibirsk'; +SELECT changeMinute(toDate('2000-01-01'), -1) SETTINGS session_timezone = 'Asia/Novosibirsk'; +SELECT changeMinute(toDate('2000-01-01'), 60) SETTINGS session_timezone = 'Asia/Novosibirsk'; +SELECT changeSecond(toDate('2000-01-01'), -1) SETTINGS session_timezone = 'Asia/Novosibirsk'; +SELECT changeSecond(toDate('2000-01-01'), 60) SETTINGS session_timezone = 'Asia/Novosibirsk'; \ No newline at end of file From eecbd44ce5e6673072c728097f50b41e2935ae90 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Sun, 19 May 2024 13:23:30 +0000 Subject: [PATCH 015/182] Some fixups --- src/Functions/changeDate.cpp | 172 ++++++++---------- .../queries/0_stateless/02982_changeDate.sql | 2 +- 2 files changed, 75 insertions(+), 99 deletions(-) diff --git a/src/Functions/changeDate.cpp b/src/Functions/changeDate.cpp index 237d9082566..6725afe3356 100644 --- a/src/Functions/changeDate.cpp +++ b/src/Functions/changeDate.cpp @@ -1,24 +1,23 @@ -#include -#include -#include +#include "Common/DateLUTImpl.h" +#include "Common/Exception.h" +#include +#include +#include +#include +#include +#include +#include #include #include #include #include #include -#include -#include -#include -#include +#include +#include +#include +#include #include -#include "Common/DateLUTImpl.h" -#include "Common/Exception.h" -#include -#include -#include "Columns/IColumn.h" -#include "DataTypes/IDataType.h" -#include #include namespace DB @@ -26,88 +25,62 @@ namespace DB namespace ErrorCodes { - extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; extern const int ILLEGAL_TYPE_OF_ARGUMENT; + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; } namespace { -enum class ChangeDateFunctionsNames +enum class Component { - CHANGE_YEAR = 0, - CHANGE_MONTH = 1, - CHANGE_DAY = 2, - CHANGE_HOUR = 3, - CHANGE_MINUTE = 4, - CHANGE_SECOND = 5 + Year, + Month, + Day, + Hour, + Minute, + Second }; -constexpr bool isTimeChange(const ChangeDateFunctionsNames & type) +bool isTimeComponentChange(Component type) { - return type == ChangeDateFunctionsNames::CHANGE_HOUR || - type == ChangeDateFunctionsNames::CHANGE_MINUTE || - type == ChangeDateFunctionsNames::CHANGE_SECOND; + return type == Component::Hour || + type == Component::Minute || + type == Component::Second; } -template -constexpr bool isDate() -{ - return DataType::type_id == TypeIndex::Date; -} - -template -constexpr bool isDate32() -{ - return DataType::type_id == TypeIndex::Date32; -} - -template -constexpr bool isDateTime() -{ - return DataType::type_id == TypeIndex::DateTime; -} - -template -constexpr bool isDateTime64() -{ - return DataType::type_id == TypeIndex::DateTime64; -} - - template class FunctionChangeDate : public IFunction { public: - static constexpr auto name = Traits::Name; - - static constexpr std::array mandatory_argument_names = {"date", "new_value"}; - - String getName() const override { return name; } - - bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } - - size_t getNumberOfArguments() const override { return mandatory_argument_names.size(); } + static constexpr auto name = Traits::name; static FunctionPtr create(ContextPtr) { return std::make_shared(); } + String getName() const override { return Traits::name; } + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } + size_t getNumberOfArguments() const override { return 2; } DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { if (arguments.size() != 2) throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Function {} requires 2 parameters: date, new_value. Passed {}.", getName(), arguments.size()); - if (!isDateOrDate32OrDateTimeOrDateTime64(*arguments[0].type) || !isNumber(*arguments[1].type)) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "First argument for function {} must be Date(32) or DateTime(64), second - numeric", getName()); + if (!isDateOrDate32OrDateTimeOrDateTime64(*arguments[0].type)) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "First argument for function {} must be Date, Date32, DateTime or DateTime64", getName()); + if (!isNumber(*arguments[1].type)) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Second argument for function {} must be numeric", getName()); - if constexpr (isTimeChange(Traits::EnumName)) + const auto & input_type = arguments[0].type; + + if (isTimeComponentChange(Traits::component)) { - if (isDate(arguments[0].type)) + if (isDate(input_type)) return std::make_shared(); - if (isDate32(arguments[0].type)) + if (isDate32(input_type)) return std::make_shared(DataTypeDateTime64::default_scale); } - return arguments[0].type; + return input_type; } ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override @@ -115,50 +88,53 @@ public: const auto & input_type = arguments[0].type; if (isDate(input_type)) { - if constexpr (isTimeChange(Traits::EnumName)) + if (isTimeComponentChange(Traits::component)) return execute(arguments, input_type, result_type, input_rows_count); return execute(arguments, input_type, result_type, input_rows_count); } if (isDate32(input_type)) { - if constexpr (isTimeChange(Traits::EnumName)) + if (isTimeComponentChange(Traits::component)) return execute(arguments, input_type, result_type, input_rows_count); return execute(arguments, input_type, result_type, input_rows_count); } if (isDateTime(input_type)) return execute(arguments, input_type, result_type, input_rows_count); - return execute(arguments, input_type, result_type, input_rows_count); + if (isDateTime64(input_type)) + return execute(arguments, input_type, result_type, input_rows_count); + + throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid input type"); } - template + template ColumnPtr execute(const ColumnsWithTypeAndName & arguments, const DataTypePtr & input_type, const DataTypePtr & result_type, size_t input_rows_count) const { bool is_const = (isColumnConst(*arguments[0].column) && isColumnConst(*arguments[1].column)); size_t result_rows_count = (is_const ? 1 : input_rows_count); typename ResultDataType::ColumnType::MutablePtr result_column; - if constexpr (isDateTime64()) + if constexpr (std::is_same_v) { auto scale = DataTypeDateTime64::default_scale; - if constexpr (isDateTime64()) + if constexpr (std::is_same_v) scale = typeid_cast(*result_type).getScale(); result_column = ResultDataType::ColumnType::create(result_rows_count, scale); } else result_column = ResultDataType::ColumnType::create(result_rows_count); - auto & result_data = result_column->getData(); - auto input_column = arguments[0].column->convertToFullIfNeeded(); - const auto & input_column_data = typeid_cast(*input_column).getData(); + const auto & input_column_data = typeid_cast(*input_column).getData(); auto new_value_column = castColumn(arguments[1], std::make_shared()); new_value_column = new_value_column->convertToFullIfNeeded(); const auto & new_value_column_data = typeid_cast(*new_value_column).getData(); + auto & result_data = result_column->getData(); + for (size_t i = 0; i < result_rows_count; ++i) { - if constexpr (isDateTime64()) + if constexpr (std::is_same_v) { const auto scale = typeid_cast(*result_type).getScale(); const auto & date_lut = typeid_cast(*result_type).getTimeZone(); @@ -172,21 +148,21 @@ public: result_data[i] = getChangedDate(time, new_value_column_data[i], result_type, date_lut, scale, fraction); } - else if constexpr (isDate32() && isDateTime64()) + else if constexpr (std::is_same_v && std::is_same_v) { const auto & date_lut = typeid_cast(*result_type).getTimeZone(); Int64 time = static_cast(date_lut.toNumYYYYMMDD(ExtendedDayNum(input_column_data[i]))) * 1'000'000; result_data[i] = getChangedDate(time, new_value_column_data[i], result_type, date_lut, 3, 0); } - else if constexpr (isDate() && isDateTime()) + else if constexpr (std::is_same_v && std::is_same_v) { const auto & date_lut = typeid_cast(*result_type).getTimeZone(); Int64 time = static_cast(date_lut.toNumYYYYMMDD(ExtendedDayNum(input_column_data[i]))) * 1'000'000; result_data[i] = static_cast(getChangedDate(time, new_value_column_data[i], result_type, date_lut)); } - else if constexpr (isDateTime()) + else if constexpr (std::is_same_v) { const auto & date_lut = typeid_cast(*result_type).getTimeZone(); Int64 time = date_lut.toNumYYYYMMDDhhmmss(input_column_data[i]); @@ -264,36 +240,36 @@ public: max_year = 2299; } - switch (Traits::EnumName) + switch (Traits::component) { - case ChangeDateFunctionsNames::CHANGE_YEAR: + case Component::Year: if (new_value < min_year) return min_date; else if (new_value > max_year) return max_date; year = static_cast(new_value); break; - case ChangeDateFunctionsNames::CHANGE_MONTH: + case Component::Month: if (new_value < 1 || new_value > 12) return min_date; month = static_cast(new_value); break; - case ChangeDateFunctionsNames::CHANGE_DAY: + case Component::Day: if (new_value < 1 || new_value > 31) return min_date; day = static_cast(new_value); break; - case ChangeDateFunctionsNames::CHANGE_HOUR: + case Component::Hour: if (new_value < 0 || new_value > 23) return min_date; hours = static_cast(new_value); break; - case ChangeDateFunctionsNames::CHANGE_MINUTE: + case Component::Minute: if (new_value < 0 || new_value > 59) return min_date; minutes = static_cast(new_value); break; - case ChangeDateFunctionsNames::CHANGE_SECOND: + case Component::Second: if (new_value < 0 || new_value > 59) return min_date; seconds = static_cast(new_value); @@ -324,38 +300,38 @@ public: struct ChangeYearTraits { - static constexpr auto Name = "changeYear"; - static constexpr auto EnumName = ChangeDateFunctionsNames::CHANGE_YEAR; + static constexpr auto name = "changeYear"; + static constexpr auto component = Component::Year; }; struct ChangeMonthTraits { - static constexpr auto Name = "changeMonth"; - static constexpr auto EnumName = ChangeDateFunctionsNames::CHANGE_MONTH; + static constexpr auto name = "changeMonth"; + static constexpr auto component = Component::Month; }; struct ChangeDayTraits { - static constexpr auto Name = "changeDay"; - static constexpr auto EnumName = ChangeDateFunctionsNames::CHANGE_DAY; + static constexpr auto name = "changeDay"; + static constexpr auto component = Component::Day; }; struct ChangeHourTraits { - static constexpr auto Name = "changeHour"; - static constexpr auto EnumName = ChangeDateFunctionsNames::CHANGE_HOUR; + static constexpr auto name = "changeHour"; + static constexpr auto component = Component::Hour; }; struct ChangeMinuteTraits { - static constexpr auto Name = "changeMinute"; - static constexpr auto EnumName = ChangeDateFunctionsNames::CHANGE_MINUTE; + static constexpr auto name = "changeMinute"; + static constexpr auto component = Component::Minute; }; struct ChangeSecondTraits { - static constexpr auto Name = "changeSecond"; - static constexpr auto EnumName = ChangeDateFunctionsNames::CHANGE_SECOND; + static constexpr auto name = "changeSecond"; + static constexpr auto component = Component::Second; }; } diff --git a/tests/queries/0_stateless/02982_changeDate.sql b/tests/queries/0_stateless/02982_changeDate.sql index 0d1bd75e130..26d53b2f7f4 100644 --- a/tests/queries/0_stateless/02982_changeDate.sql +++ b/tests/queries/0_stateless/02982_changeDate.sql @@ -39,4 +39,4 @@ SELECT changeHour(toDate('2000-01-01'), 24) SETTINGS session_timezone = 'Asia/No SELECT changeMinute(toDate('2000-01-01'), -1) SETTINGS session_timezone = 'Asia/Novosibirsk'; SELECT changeMinute(toDate('2000-01-01'), 60) SETTINGS session_timezone = 'Asia/Novosibirsk'; SELECT changeSecond(toDate('2000-01-01'), -1) SETTINGS session_timezone = 'Asia/Novosibirsk'; -SELECT changeSecond(toDate('2000-01-01'), 60) SETTINGS session_timezone = 'Asia/Novosibirsk'; \ No newline at end of file +SELECT changeSecond(toDate('2000-01-01'), 60) SETTINGS session_timezone = 'Asia/Novosibirsk'; From b1d3eb4c4d67e2e95d09dabf027ea9065751634d Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Sun, 19 May 2024 13:27:17 +0000 Subject: [PATCH 016/182] Some fixups, pt. II --- src/Functions/changeDate.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Functions/changeDate.cpp b/src/Functions/changeDate.cpp index 6725afe3356..d17787f4f55 100644 --- a/src/Functions/changeDate.cpp +++ b/src/Functions/changeDate.cpp @@ -26,6 +26,7 @@ namespace DB namespace ErrorCodes { extern const int ILLEGAL_TYPE_OF_ARGUMENT; + extern const int LOGICAL_ERROR; extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; } From 4fdcdc284d6c41c3a1f97f6d72c7b9c73816a9df Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Sun, 19 May 2024 13:28:38 +0000 Subject: [PATCH 017/182] Some fixups, pt. III --- src/DataTypes/DataTypeDate.h | 1 - src/DataTypes/DataTypeDate32.h | 1 - src/DataTypes/DataTypeDateTime.h | 1 - 3 files changed, 3 deletions(-) diff --git a/src/DataTypes/DataTypeDate.h b/src/DataTypes/DataTypeDate.h index 72b7ef2509f..0e08b9ba2ca 100644 --- a/src/DataTypes/DataTypeDate.h +++ b/src/DataTypes/DataTypeDate.h @@ -10,7 +10,6 @@ class DataTypeDate final : public DataTypeNumberBase { public: static constexpr auto family_name = "Date"; - static constexpr auto type_id = TypeIndex::Date; TypeIndex getTypeId() const override { return TypeIndex::Date; } TypeIndex getColumnType() const override { return TypeIndex::UInt16; } diff --git a/src/DataTypes/DataTypeDate32.h b/src/DataTypes/DataTypeDate32.h index 414c8301558..65633e7a228 100644 --- a/src/DataTypes/DataTypeDate32.h +++ b/src/DataTypes/DataTypeDate32.h @@ -9,7 +9,6 @@ class DataTypeDate32 final : public DataTypeNumberBase { public: static constexpr auto family_name = "Date32"; - static constexpr auto type_id = TypeIndex::Date32; TypeIndex getTypeId() const override { return TypeIndex::Date32; } TypeIndex getColumnType() const override { return TypeIndex::Int32; } diff --git a/src/DataTypes/DataTypeDateTime.h b/src/DataTypes/DataTypeDateTime.h index 3b1212d910d..5519240dee1 100644 --- a/src/DataTypes/DataTypeDateTime.h +++ b/src/DataTypes/DataTypeDateTime.h @@ -36,7 +36,6 @@ public: explicit DataTypeDateTime(const TimezoneMixin & time_zone); static constexpr auto family_name = "DateTime"; - static constexpr auto type_id = TypeIndex::DateTime; const char * getFamilyName() const override { return family_name; } String doGetName() const override; From ba1e0e0317e9b408bfeafaac3fdda87eb16296fb Mon Sep 17 00:00:00 2001 From: Maksim Galkin Date: Mon, 20 May 2024 11:43:31 +0000 Subject: [PATCH 018/182] added docks + updated tests --- .../functions/date-time-functions.md | 85 +++++++++++++++++++ .../0_stateless/02982_changeDate.reference | 17 ++++ .../queries/0_stateless/02982_changeDate.sql | 26 +++++- 3 files changed, 127 insertions(+), 1 deletion(-) diff --git a/docs/en/sql-reference/functions/date-time-functions.md b/docs/en/sql-reference/functions/date-time-functions.md index 5622097537e..1ce6fce6667 100644 --- a/docs/en/sql-reference/functions/date-time-functions.md +++ b/docs/en/sql-reference/functions/date-time-functions.md @@ -3039,3 +3039,88 @@ Result: - Blog: [Working with time series data in ClickHouse](https://clickhouse.com/blog/working-with-time-series-data-and-functions-ClickHouse) +## changeYear + +Changes a year of the passed date argument. + +**Syntax** + +``` sql +changeYear(date, new_value) +``` + +**Arguments** + +- `date` - a [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md) + +- `new_value` - a new value of the year. [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md). + +**Return value** + +- A date or date with time. Same data type as input date argument. + +Type: [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). + +**Example** + +``` sql + SELECT changeYear(toDate('1999-01-01'), 2000), hangeYear(toDateTime64('1999-01-01 00:00:00.000', 3), 2000); +``` + +Result: +``` +┌─changeYear(toDate('1999-01-01'), 2000)─┬─changeYear(toDateTime64('1999-01-01 00:00:00.000', 3), 2000)─┐ +│ 2000-01-01 │ 2000-01-01 00:00:00.000 │ +└────────────────────────────────────────┴──────────────────────────────────────────────────────────────┘ +``` + +## changeMonth + +Like [changeYear](#changeYear) but changes a month of the passed date argument. + +## changeMonth + +Like [changeYear](#changeYear) but changes a day of year of the passed date argument. + +## changeHour + +Changes an hour of the passed date argument. + +**Syntax** + +``` sql +changeHour(date, new_value) +``` + +**Arguments** + +- `date` - a [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md) + +- `new_value` - a new value of the hour. [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md). + +**Return value** + +- A date with time. If date argument is Date - returns DateTime, if Date32 - returns DateTime64, otherwise returns same data type as input date argument. + +Type: [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). + +**Exapmle** + +``` sql +SELECT changeHour(toDate('1999-01-01'), 12), changeHour(toDateTime64('1999-01-01 00:00:00.000', 3), 12); +``` + +Result: +``` +┌─changeHour(toDate('1999-01-01'), 12)─┬─changeHour(toDateTime64('1999-01-01 00:00:00.000', 3), 12)─┐ +│ 1999-01-01 12:00:00 │ 1999-01-01 12:00:00.000 │ +└──────────────────────────────────────┴────────────────────────────────────────────────────────────┘ +``` + +## changeMinute + +Like [changeHour](#changeHour) but changes a minute of the passed date argument. + +## changeSecond + +Like [changeHour](#changeHour) but changes a seconds of the passed date argument. \ No newline at end of file diff --git a/tests/queries/0_stateless/02982_changeDate.reference b/tests/queries/0_stateless/02982_changeDate.reference index c64abc89ed2..3e8ad45d9a8 100644 --- a/tests/queries/0_stateless/02982_changeDate.reference +++ b/tests/queries/0_stateless/02982_changeDate.reference @@ -34,3 +34,20 @@ 1970-01-01 07:00:00 1970-01-01 07:00:00 1970-01-01 07:00:00 +2000-02-01 +2000-02-01 +2000-02-01 +2000-02-01 +2000-02-01 +2000-02-01 +2000-02-01 +2000-02-01 +2000-02-01 +2000-02-01 +2000-02-01 +2299-12-31 +1900-01-01 +2106-02-07 13:28:15 +1970-01-01 07:00:00 +2299-12-31 23:59:59.999 +1900-01-01 00:00:00.000 diff --git a/tests/queries/0_stateless/02982_changeDate.sql b/tests/queries/0_stateless/02982_changeDate.sql index 0d1bd75e130..1a9d062bafe 100644 --- a/tests/queries/0_stateless/02982_changeDate.sql +++ b/tests/queries/0_stateless/02982_changeDate.sql @@ -39,4 +39,28 @@ SELECT changeHour(toDate('2000-01-01'), 24) SETTINGS session_timezone = 'Asia/No SELECT changeMinute(toDate('2000-01-01'), -1) SETTINGS session_timezone = 'Asia/Novosibirsk'; SELECT changeMinute(toDate('2000-01-01'), 60) SETTINGS session_timezone = 'Asia/Novosibirsk'; SELECT changeSecond(toDate('2000-01-01'), -1) SETTINGS session_timezone = 'Asia/Novosibirsk'; -SELECT changeSecond(toDate('2000-01-01'), 60) SETTINGS session_timezone = 'Asia/Novosibirsk'; \ No newline at end of file +SELECT changeSecond(toDate('2000-01-01'), 60) SETTINGS session_timezone = 'Asia/Novosibirsk'; + +SELECT changeYear(toDate('2000-01-01')); -- { serverError 42 } +SELECT changeYear(toDate('2000-01-01'), 2001, 2002); -- { serverError 42 } +SELECT changeYear(toDate('2000-01-01'), '2001'); -- { serverError 43 } + +SELECT changeMonth(toDate('2000-01-01'), CAST(2 AS Int8)); +SELECT changeMonth(toDate('2000-01-01'), CAST(2 AS Int16)); +SELECT changeMonth(toDate('2000-01-01'), CAST(2 AS Int32)); +SELECT changeMonth(toDate('2000-01-01'), CAST(2 AS Int64)); +SELECT changeMonth(toDate('2000-01-01'), CAST(2 AS UInt8)); +SELECT changeMonth(toDate('2000-01-01'), CAST(2 AS UInt16)); +SELECT changeMonth(toDate('2000-01-01'), CAST(2 AS UInt32)); +SELECT changeMonth(toDate('2000-01-01'), CAST(2 AS UInt64)); +SELECT changeMonth(toDate('2000-01-01'), CAST(2 AS Float32)); +SELECT changeMonth(toDate('2000-01-01'), CAST(2 AS Float64)); +SELECT changeMonth(toDate('2000-01-01'), CAST(2 AS Decimal(10, 5))); + +SELECT changeYear(toDate32('2000-01-01'), 2300); +SELECT changeYear(toDate32('2000-01-01'), 1899); +SELECT changeSecond(toDateTime('2106-02-07 13:28:15'), 16) SETTINGS session_timezone = 'Asia/Novosibirsk'; +SELECT changeHour(toDateTime('1970-01-01 23:59:59'), 6) SETTINGS session_timezone = 'Asia/Novosibirsk'; +SELECT changeYear(toDateTime64('2000-01-01 00:00:00.000', 3), 2300) SETTINGS session_timezone = 'Asia/Novosibirsk'; +SELECT changeYear(toDateTime64('2000-01-01 00:00:00.000', 3), 1899) SETTINGS session_timezone = 'Asia/Novosibirsk'; + From d115adf462859fd82a5bb5a83a6dedaeae799a08 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Mon, 20 May 2024 15:02:51 +0000 Subject: [PATCH 019/182] Some fixups, pt. IV --- .../functions/date-time-functions.md | 180 ++++++++++++--- src/Functions/changeDate.cpp | 126 ++++++----- .../0_stateless/02982_changeDate.reference | 181 +++++++++++++-- .../queries/0_stateless/02982_changeDate.sql | 207 +++++++++++++++--- 4 files changed, 546 insertions(+), 148 deletions(-) diff --git a/docs/en/sql-reference/functions/date-time-functions.md b/docs/en/sql-reference/functions/date-time-functions.md index 43bb5b6f6cb..8bf301d76c2 100644 --- a/docs/en/sql-reference/functions/date-time-functions.md +++ b/docs/en/sql-reference/functions/date-time-functions.md @@ -4104,33 +4104,33 @@ timeDiff(toDateTime64('1927-01-01 00:00:00', 3), toDate32('1927-01-02')); ## changeYear -Changes a year of the passed date argument. +Changes the year component of a date or date time. **Syntax** ``` sql -changeYear(date, new_value) +changeYear(date_or_datetime, value) ``` **Arguments** -- `date` - a [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md) - -- `new_value` - a new value of the year. [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md). +- `date_or_datetime` - a [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md) +- `value` - a new value of the year. [Integer](../../sql-reference/data-types/int-uint.md). **Return value** -- A date or date with time. Same data type as input date argument. +- The same type as `date_or_datetime`. Type: [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). **Example** ``` sql - SELECT changeYear(toDate('1999-01-01'), 2000), hangeYear(toDateTime64('1999-01-01 00:00:00.000', 3), 2000); + SELECT changeYear(toDate('1999-01-01'), 2000), changeYear(toDateTime64('1999-01-01 00:00:00.000', 3), 2000); ``` Result: + ``` ┌─changeYear(toDate('1999-01-01'), 2000)─┬─changeYear(toDateTime64('1999-01-01 00:00:00.000', 3), 2000)─┐ │ 2000-01-01 │ 2000-01-01 00:00:00.000 │ @@ -4139,51 +4139,175 @@ Result: ## changeMonth -Like [changeYear](#changeYear) but changes a month of the passed date argument. - -## changeMonth - -Like [changeYear](#changeYear) but changes a day of year of the passed date argument. - -## changeHour - -Changes an hour of the passed date argument. +Changes the month component of a date or date time. **Syntax** ``` sql -changeHour(date, new_value) +changeMonth(date_or_datetime, value) ``` **Arguments** -- `date` - a [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md) - -- `new_value` - a new value of the hour. [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md). +- `date_or_datetime` - a [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md) +- `value` - a new value of the month. [Integer](../../sql-reference/data-types/int-uint.md). **Return value** -- A date with time. If date argument is Date - returns DateTime, if Date32 - returns DateTime64, otherwise returns same data type as input date argument. +- The same type as `date_or_datetime`. -Type: [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). +Type: [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). -**Exapmle** +**Example** ``` sql -SELECT changeHour(toDate('1999-01-01'), 12), changeHour(toDateTime64('1999-01-01 00:00:00.000', 3), 12); + SELECT changeMonth(toDate('1999-01-01'), 2), changeMonth(toDateTime64('1999-01-01 00:00:00.000', 3), 2); ``` Result: + ``` -┌─changeHour(toDate('1999-01-01'), 12)─┬─changeHour(toDateTime64('1999-01-01 00:00:00.000', 3), 12)─┐ -│ 1999-01-01 12:00:00 │ 1999-01-01 12:00:00.000 │ +┌─changeMonth(toDate('1999-01-01'), 2)─┬─changeMonth(toDateTime64('1999-01-01 00:00:00.000', 3), 2)─┐ +│ 1999-02-01 │ 1999-02-01 00:00:00.000 │ +└──────────────────────────────────────┴────────────────────────────────────────────────────────────┘ +``` + +## changeDay + +Changes the day component of a date or date time. + +**Syntax** + +``` sql +changeDay(date_or_datetime, value) +``` + +**Arguments** + +- `date_or_datetime` - a [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md) +- `value` - a new value of the day. [Integer](../../sql-reference/data-types/int-uint.md). + +**Return value** + +- The same type as `date_or_datetime`. + +Type: [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). + +**Example** + +``` sql + SELECT changeDay(toDate('1999-01-01'), 5), changeDay(toDateTime64('1999-01-01 00:00:00.000', 3), 5); +``` + +Result: + +``` +┌─changeDay(toDate('1999-01-01'), 5)─┬─changeDay(toDateTime64('1999-01-01 00:00:00.000', 3), 5)─┐ +│ 1999-01-05 │ 1999-01-05 00:00:00.000 │ +└────────────────────────────────────┴──────────────────────────────────────────────────────────┘ +``` + +## changeHour + +Changes the hour component of a date or date time. + +**Syntax** + +``` sql +changeHour(date_or_datetime, value) +``` + +**Arguments** + +- `date_or_datetime` - a [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md) +- `value` - a new value of the hour. [Integer](../../sql-reference/data-types/int-uint.md). + +**Return value** + +- The same type as `date_or_datetime`. If the input is a Date, return DateTime. If the input is a Date32, return DateTime64. + +Type: [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). + +**Example** + +``` sql + SELECT changeHour(toDate('1999-01-01'), 14), changeHour(toDateTime64('1999-01-01 00:00:00.000', 3), 14); +``` + +Result: + +``` +┌─changeHour(toDate('1999-01-01'), 14)─┬─changeHour(toDateTime64('1999-01-01 00:00:00.000', 3), 14)─┐ +│ 1999-01-01 14:00:00 │ 1999-01-01 14:00:00.000 │ └──────────────────────────────────────┴────────────────────────────────────────────────────────────┘ ``` ## changeMinute -Like [changeHour](#changeHour) but changes a minute of the passed date argument. +Changes the minute component of a date or date time. + +**Syntax** + +``` sql +changeMinute(date_or_datetime, value) +``` + +**Arguments** + +- `date_or_datetime` - a [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md) +- `value` - a new value of the minute. [Integer](../../sql-reference/data-types/int-uint.md). + +**Return value** + +- The same type as `date_or_datetime`. If the input is a Date, return DateTime. If the input is a Date32, return DateTime64. + +Type: [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). + +**Example** + +``` sql + SELECT changeMinute(toDate('1999-01-01'), 15), changeMinute(toDateTime64('1999-01-01 00:00:00.000', 3), 15); +``` + +Result: + +``` +┌─changeMinute(toDate('1999-01-01'), 15)─┬─changeMinute(toDateTime64('1999-01-01 00:00:00.000', 3), 15)─┐ +│ 1999-01-01 00:15:00 │ 1999-01-01 00:15:00.000 │ +└────────────────────────────────────────┴──────────────────────────────────────────────────────────────┘ +``` ## changeSecond -Like [changeHour](#changeHour) but changes a seconds of the passed date argument. \ No newline at end of file +Changes the second component of a date or date time. + +**Syntax** + +``` sql +changeSecond(date_or_datetime, value) +``` + +**Arguments** + +- `date_or_datetime` - a [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md) +- `value` - a new value of the second. [Integer](../../sql-reference/data-types/int-uint.md). + +**Return value** + +- The same type as `date_or_datetime`. If the input is a Date, return DateTime. If the input is a Date32, return DateTime64. + +Type: [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). + +**Example** + +``` sql + SELECT changeSecond(toDate('1999-01-01'), 15), changeSecond(toDateTime64('1999-01-01 00:00:00.000', 3), 15); +``` + +Result: + +``` +┌─changeSecond(toDate('1999-01-01'), 15)─┬─changeSecond(toDateTime64('1999-01-01 00:00:00.000', 3), 15)─┐ +│ 1999-01-01 00:00:15 │ 1999-01-01 00:00:15.000 │ +└────────────────────────────────────────┴──────────────────────────────────────────────────────────────┘ +``` diff --git a/src/Functions/changeDate.cpp b/src/Functions/changeDate.cpp index d17787f4f55..bd73154a6f1 100644 --- a/src/Functions/changeDate.cpp +++ b/src/Functions/changeDate.cpp @@ -45,9 +45,9 @@ enum class Component bool isTimeComponentChange(Component type) { - return type == Component::Hour || - type == Component::Minute || - type == Component::Second; + return type == Component::Hour || type == Component::Minute || type == Component::Second; +} + } template @@ -63,13 +63,11 @@ public: DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { - if (arguments.size() != 2) - throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Function {} requires 2 parameters: date, new_value. Passed {}.", getName(), arguments.size()); - - if (!isDateOrDate32OrDateTimeOrDateTime64(*arguments[0].type)) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "First argument for function {} must be Date, Date32, DateTime or DateTime64", getName()); - if (!isNumber(*arguments[1].type)) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Second argument for function {} must be numeric", getName()); + FunctionArgumentDescriptors args{ + {"date_or_datetime", static_cast(&isDateOrDate32OrDateTimeOrDateTime64), nullptr, "Date or date with time"}, + {"value", static_cast(&isNativeInteger), nullptr, "Integer"} + }; + validateFunctionArgumentTypes(*this, arguments, args); const auto & input_type = arguments[0].type; @@ -335,62 +333,62 @@ struct ChangeSecondTraits static constexpr auto component = Component::Second; }; -} - REGISTER_FUNCTION(ChangeDate) { - factory.registerFunction>( - FunctionDocumentation{ - .description = R"( -Changes the year of the given Date(32) or DateTime(64). -Returns the same type as the input data. -)", - .categories{"Dates and Times"} - } - ); - factory.registerFunction>( - FunctionDocumentation{ - .description = R"( -Same as changeYear function, but changes month of the date. -)", - .categories{"Dates and Times"} - } - ); - factory.registerFunction>( - FunctionDocumentation{ - .description = R"( -Same as changeYear function, but changes day_of_month of the date. -)", - .categories{"Dates and Times"} - } - ); - factory.registerFunction>( - FunctionDocumentation{ - .description = R"( -Changes the hour of the given Date(32) or DateTime(64). -If the input data is Date, return DateTime; -if the input data is Date32, return DateTime64; -In other cases returns the same type as the input data. -)", - .categories{"Dates and Times"} - } - ); - factory.registerFunction>( - FunctionDocumentation{ - .description = R"( -Same as changeHour function, but changes minute of the date. -)", - .categories{"Dates and Times"} - } - ); - factory.registerFunction>( - FunctionDocumentation{ - .description = R"( -Same as changeHour function, but changes seconds of the date. -)", - .categories{"Dates and Times"} - } - ); + { + FunctionDocumentation::Description description = "Changes the year component of a date or date time."; + FunctionDocumentation::Syntax syntax = "changeYear(date_or_datetime, value);"; + FunctionDocumentation::Arguments arguments = {{"date_or_datetime", "The value to change. Type: Date, Date32, DateTime, or DateTime64"}, {"value", "The new value. Type: [U]Int*"}}; + FunctionDocumentation::ReturnedValue returned_value = "The same type as date_or_datetime."; + FunctionDocumentation::Categories categories = {"Dates and Times"}; + FunctionDocumentation function_documentation = {.description = description, .syntax = syntax, .arguments = arguments, .returned_value = returned_value, .categories = categories}; + factory.registerFunction>(function_documentation); + } + { + FunctionDocumentation::Description description = "Changes the month component of a date or date time."; + FunctionDocumentation::Syntax syntax = "changeMonth(date_or_datetime, value);"; + FunctionDocumentation::Arguments arguments = {{"date_or_datetime", "The value to change. Type: Date, Date32, DateTime, or DateTime64"}, {"value", "The new value. Type: [U]Int*"}}; + FunctionDocumentation::ReturnedValue returned_value = "The same type as date_or_datetime."; + FunctionDocumentation::Categories categories = {"Dates and Times"}; + FunctionDocumentation function_documentation = {.description = description, .syntax = syntax, .arguments = arguments, .returned_value = returned_value, .categories = categories}; + factory.registerFunction>(function_documentation); + } + { + FunctionDocumentation::Description description = "Changes the day component of a date or date time."; + FunctionDocumentation::Syntax syntax = "changeDay(date_or_datetime, value);"; + FunctionDocumentation::Arguments arguments = {{"date_or_datetime", "The value to change. Type: Date, Date32, DateTime, or DateTime64"}, {"value", "The new value. Type: [U]Int*"}}; + FunctionDocumentation::ReturnedValue returned_value = "The same type as date_or_datetime."; + FunctionDocumentation::Categories categories = {"Dates and Times"}; + FunctionDocumentation function_documentation = {.description = description, .syntax = syntax, .arguments = arguments, .returned_value = returned_value, .categories = categories}; + factory.registerFunction>(function_documentation); + } + { + FunctionDocumentation::Description description = "Changes the hour component of a date or date time."; + FunctionDocumentation::Syntax syntax = "changeHour(date_or_datetime, value);"; + FunctionDocumentation::Arguments arguments = {{"date_or_datetime", "The value to change. Type: Date, Date32, DateTime, or DateTime64"}, {"value", "The new value. Type: [U]Int*"}}; + FunctionDocumentation::ReturnedValue returned_value = "The same type as date_or_datetime. If the input is a Date, return DateTime. If the input is a Date32, return DateTime64."; + FunctionDocumentation::Categories categories = {"Dates and Times"}; + FunctionDocumentation function_documentation = {.description = description, .syntax = syntax, .arguments = arguments, .returned_value = returned_value, .categories = categories}; + factory.registerFunction>(function_documentation); + } + { + FunctionDocumentation::Description description = "Changes the minute component of a date or date time."; + FunctionDocumentation::Syntax syntax = "changeMinute(date_or_datetime, value);"; + FunctionDocumentation::Arguments arguments = {{"date_or_datetime", "The value to change. Type: Date, Date32, DateTime, or DateTime64"}, {"value", "The new value. Type: [U]Int*"}}; + FunctionDocumentation::ReturnedValue returned_value = "The same type as date_or_datetime. If the input is a Date, return DateTime. If the input is a Date32, return DateTime64."; + FunctionDocumentation::Categories categories = {"Dates and Times"}; + FunctionDocumentation function_documentation = {.description = description, .syntax = syntax, .arguments = arguments, .returned_value = returned_value, .categories = categories}; + factory.registerFunction>(function_documentation); + } + { + FunctionDocumentation::Description description = "Changes the second component of a date or date time."; + FunctionDocumentation::Syntax syntax = "changeSecond(date_or_datetime, value);"; + FunctionDocumentation::Arguments arguments = {{"date_or_datetime", "The value to change. Type: Date, Date32, DateTime, or DateTime64"}, {"value", "The new value. Type: [U]Int*"}}; + FunctionDocumentation::ReturnedValue returned_value = "The same type as date_or_datetime. If the input is a Date, return DateTime. If the input is a Date32, return DateTime64."; + FunctionDocumentation::Categories categories = {"Dates and Times"}; + FunctionDocumentation function_documentation = {.description = description, .syntax = syntax, .arguments = arguments, .returned_value = returned_value, .categories = categories}; + factory.registerFunction>(function_documentation); + } } } diff --git a/tests/queries/0_stateless/02982_changeDate.reference b/tests/queries/0_stateless/02982_changeDate.reference index 3e8ad45d9a8..8ce647481bb 100644 --- a/tests/queries/0_stateless/02982_changeDate.reference +++ b/tests/queries/0_stateless/02982_changeDate.reference @@ -1,37 +1,170 @@ +Negative tests +changeYear +-- Date 2001-01-01 -2002-01-01 -2003-01-01 11:22:33 -2004-01-01 11:22:33.4444 +1970-01-01 +1970-01-01 +2149-06-06 +-- Date32 +2001-01-01 +1900-01-01 +1900-01-01 +2299-12-31 +-- DateTime +2001-01-01 11:22:33 +1970-01-01 01:00:00 +1970-01-01 01:00:00 +2106-02-07 07:28:15 +-- DateTime64 +2001-01-01 11:22:33.4444 +1900-01-01 00:00:00.0000 +1900-01-01 00:00:00.0000 +2299-12-31 23:59:59.9999 +changeMonth +-- Date +2000-01-01 2000-02-01 -2000-03-01 -2000-04-01 11:22:33 -2000-05-01 11:22:33.4444 +2000-12-01 +1970-01-01 +1970-01-01 +1970-01-01 +-- Date32 +2000-01-01 +2000-02-01 +2000-12-01 +1900-01-01 +1900-01-01 +1900-01-01 +-- DateTime +2000-01-01 11:22:33 +2000-02-01 11:22:33 +2000-12-01 11:22:33 +1970-01-01 01:00:00 +1970-01-01 01:00:00 +1970-01-01 01:00:00 +-- DateTime64 +2000-01-01 11:22:33.4444 +2000-02-01 11:22:33.4444 +2000-12-01 11:22:33.4444 +1900-01-01 00:00:00.0000 +1900-01-01 00:00:00.0000 +1900-01-01 00:00:00.0000 +changeDay +-- Date +2000-01-01 2000-01-02 -2000-01-03 -2000-01-04 11:22:33 -2000-01-05 11:22:33.4444 -2000-01-01 12:00:00 -2000-01-01 13:00:00.000 -2000-01-01 14:22:33 -2000-01-01 15:22:33.4444 -2000-01-01 00:23:00 -2000-01-01 00:24:00.000 -2000-01-01 11:25:33 -2000-01-01 11:26:33.4444 -2000-01-01 00:00:34 -2000-01-01 00:00:35.000 -2000-01-01 11:22:36 -2000-01-01 11:22:37.4444 -1970-01-01 -2149-06-06 -2149-06-06 +2000-01-31 1970-01-01 1970-01-01 1970-01-01 +-- Date32 +2000-01-01 +2000-01-02 +2000-01-31 +1900-01-01 +1900-01-01 +1900-01-01 +-- DateTime +2000-01-01 11:22:33 +2000-01-02 11:22:33 +2000-01-31 11:22:33 +1970-01-01 01:00:00 +1970-01-01 01:00:00 +1970-01-01 01:00:00 +-- DateTime64 +2000-01-01 11:22:33.4444 +2000-01-02 11:22:33.4444 +2000-01-31 11:22:33.4444 +1900-01-01 00:00:00.0000 +1900-01-01 00:00:00.0000 +1900-01-01 00:00:00.0000 +-- Special case: change to 29 Feb in a leap year +2000-02-29 +2000-02-29 +2000-02-29 11:22:33 +2000-02-29 11:22:33.4444 +changeHour +-- Date +2000-01-01 00:00:00 +2000-01-01 02:00:00 +2000-01-01 23:00:00 +1970-01-01 01:00:00 +1970-01-01 01:00:00 +-- Date32 +2000-01-01 00:00:00.000 +2000-01-01 02:00:00.000 +2000-01-01 23:00:00.000 +1900-01-01 00:00:00.000 +1900-01-01 00:00:00.000 +-- DateTime +2000-01-01 00:22:33 +2000-01-01 02:22:33 +2000-01-01 23:22:33 +1970-01-01 01:00:00 +1970-01-01 01:00:00 +-- DateTime64 +2000-01-01 00:22:33.4444 +2000-01-01 02:22:33.4444 +2000-01-01 23:22:33.4444 +1900-01-01 00:00:00.0000 +1900-01-01 00:00:00.0000 +-- With different timezone 1970-01-01 07:00:00 1970-01-01 07:00:00 +changeMinute +-- Date +2000-01-01 00:00:00 +2000-01-01 00:02:00 +2000-01-01 00:59:00 +1970-01-01 01:00:00 +1970-01-01 01:00:00 +-- Date32 +2000-01-01 00:00:00.000 +2000-01-01 00:02:00.000 +2000-01-01 00:59:00.000 +1900-01-01 00:00:00.000 +1900-01-01 00:00:00.000 +-- DateTime +2000-01-01 11:00:33 +2000-01-01 11:02:33 +2000-01-01 11:59:33 +1970-01-01 01:00:00 +1970-01-01 01:00:00 +-- DateTime64 +2000-01-01 11:00:33.4444 +2000-01-01 11:02:33.4444 +2000-01-01 11:59:33.4444 +1900-01-01 00:00:00.0000 +1900-01-01 00:00:00.0000 +-- With different timezone 1970-01-01 07:00:00 1970-01-01 07:00:00 +changeSecond +-- Date +2000-01-01 00:00:00 +2000-01-01 00:00:02 +2000-01-01 00:00:59 +1970-01-01 01:00:00 +1970-01-01 01:00:00 +-- Date32 +2000-01-01 00:00:00.000 +2000-01-01 00:00:02.000 +2000-01-01 00:00:59.000 +1900-01-01 00:00:00.000 +1900-01-01 00:00:00.000 +-- DateTime +2000-01-01 11:22:00 +2000-01-01 11:22:02 +2000-01-01 11:22:59 +1970-01-01 01:00:00 +1970-01-01 01:00:00 +-- DateTime64 +2000-01-01 11:22:00.4444 +2000-01-01 11:22:02.4444 +2000-01-01 11:22:59.4444 +1900-01-01 00:00:00.0000 +1900-01-01 00:00:00.0000 +-- With different timezone 1970-01-01 07:00:00 1970-01-01 07:00:00 2000-02-01 diff --git a/tests/queries/0_stateless/02982_changeDate.sql b/tests/queries/0_stateless/02982_changeDate.sql index dec95c2d79d..62232079d61 100644 --- a/tests/queries/0_stateless/02982_changeDate.sql +++ b/tests/queries/0_stateless/02982_changeDate.sql @@ -1,43 +1,186 @@ +SELECT 'Negative tests'; +-- as changeYear, changeMonth, changeDay, changeMinute, changeSecond share the same implementation, just testing one of them +SELECT changeYear(); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +SELECT changeYear(toDate('2000-01-01')); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +SELECT changeYear(toDate('2000-01-01'), 2000, 1); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +SELECT changeYear(1999, 2000); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT changeYear(toDate('2000-01-01'), 'abc'); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT changeYear(toDate('2000-01-01'), 1.5); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } + +-- Disable timezone randomization +SET session_timezone='CET'; + +SELECT 'changeYear'; +SELECT '-- Date'; SELECT changeYear(toDate('2000-01-01'), 2001); -SELECT changeYear(toDate32('2000-01-01'), 2002); -SELECT changeYear(toDateTime('2000-01-01 11:22:33'), 2003); -SELECT changeYear(toDateTime64('2000-01-01 11:22:33.4444', 4), 2004); +SELECT changeYear(toDate('2000-01-01'), 1800); -- out-of-bounds +SELECT changeYear(toDate('2000-01-01'), -5000); -- out-of-bounds +SELECT changeYear(toDate('2000-01-01'), 2500); -- out-of-bounds +SELECT '-- Date32'; +SELECT changeYear(toDate32('2000-01-01'), 2001); +SELECT changeYear(toDate32('2000-01-01'), 1800); -- out-of-bounds +SELECT changeYear(toDate32('2000-01-01'), -5000); -- out-of-bounds +SELECT changeYear(toDate32('2000-01-01'), 2500); -- out-of-bounds +SELECT '-- DateTime'; +SELECT changeYear(toDateTime('2000-01-01 11:22:33'), 2001); +SELECT changeYear(toDateTime('2000-01-01 11:22:33'), 1800); -- out-of-bounds +SELECT changeYear(toDateTime('2000-01-01 11:22:33'), -5000); -- out-of-bounds +SELECT changeYear(toDateTime('2000-01-01 11:22:33'), 2500); -- out-of-bounds +SELECT '-- DateTime64'; +SELECT changeYear(toDateTime64('2000-01-01 11:22:33.4444', 4), 2001); +SELECT changeYear(toDateTime64('2000-01-01 11:22:33.4444', 4), 1800); -- out-of-bounds +SELECT changeYear(toDateTime64('2000-01-01 11:22:33.4444', 4), -5000); -- out-of-bounds +SELECT changeYear(toDateTime64('2000-01-01 11:22:33.4444', 4), 2500); -- out-of-bounds -SELECT changeMonth(toDate('2000-01-01'), 02); -SELECT changeMonth(toDate32('2000-01-01'), 03); -SELECT changeMonth(toDateTime('2000-01-01 11:22:33'), 04); -SELECT changeMonth(toDateTime64('2000-01-01 11:22:33.4444', 4), 05); +SELECT 'changeMonth'; +SELECT '-- Date'; +SELECT changeMonth(toDate('2000-01-01'), 1); +SELECT changeMonth(toDate('2000-01-01'), 2); +SELECT changeMonth(toDate('2000-01-01'), 12); +SELECT changeMonth(toDate('2000-01-01'), 0); -- out-of-bounds +SELECT changeMonth(toDate('2000-01-01'), -1); -- out-of-bounds +SELECT changeMonth(toDate('2000-01-01'), 13); -- out-of-bounds +SELECT '-- Date32'; +SELECT changeMonth(toDate32('2000-01-01'), 1); +SELECT changeMonth(toDate32('2000-01-01'), 2); +SELECT changeMonth(toDate32('2000-01-01'), 12); +SELECT changeMonth(toDate32('2000-01-01'), 0); -- out-of-bounds +SELECT changeMonth(toDate32('2000-01-01'), -1); -- out-of-bounds +SELECT changeMonth(toDate32('2000-01-01'), 13); -- out-of-bounds +SELECT '-- DateTime'; +SELECT changeMonth(toDateTime('2000-01-01 11:22:33'), 1); +SELECT changeMonth(toDateTime('2000-01-01 11:22:33'), 2); +SELECT changeMonth(toDateTime('2000-01-01 11:22:33'), 12); +SELECT changeMonth(toDateTime('2000-01-01 11:22:33'), 0); -- out-of-bounds +SELECT changeMonth(toDateTime('2000-01-01 11:22:33'), -1); -- out-of-bounds +SELECT changeMonth(toDateTime('2000-01-01 11:22:33'), 13); -- out-of-bounds +SELECT '-- DateTime64'; +SELECT changeMonth(toDateTime64('2000-01-01 11:22:33.4444', 4), 1); +SELECT changeMonth(toDateTime64('2000-01-01 11:22:33.4444', 4), 2); +SELECT changeMonth(toDateTime64('2000-01-01 11:22:33.4444', 4), 12); +SELECT changeMonth(toDateTime64('2000-01-01 11:22:33.4444', 4), 0); -- out-of-bounds +SELECT changeMonth(toDateTime64('2000-01-01 11:22:33.4444', 4), -1); -- out-of-bounds +SELECT changeMonth(toDateTime64('2000-01-01 11:22:33.4444', 4), 13); -- out-of-bounds -SELECT changeDay(toDate('2000-01-01'), 02); -SELECT changeDay(toDate32('2000-01-01'), 03); -SELECT changeDay(toDateTime('2000-01-01 11:22:33'), 04); -SELECT changeDay(toDateTime64('2000-01-01 11:22:33.4444', 4), 05); +SELECT 'changeDay'; +SELECT '-- Date'; +SELECT changeDay(toDate('2000-01-01'), 1); +SELECT changeDay(toDate('2000-01-01'), 2); +SELECT changeDay(toDate('2000-01-01'), 31); +SELECT changeDay(toDate('2000-01-01'), 0); -- out-of-bounds +SELECT changeDay(toDate('2000-01-01'), -1); -- out-of-bounds +SELECT changeDay(toDate('2000-01-01'), 32); -- out-of-bounds +SELECT '-- Date32'; +SELECT changeDay(toDate32('2000-01-01'), 1); +SELECT changeDay(toDate32('2000-01-01'), 2); +SELECT changeDay(toDate32('2000-01-01'), 31); +SELECT changeDay(toDate32('2000-01-01'), 0); -- out-of-bounds +SELECT changeDay(toDate32('2000-01-01'), -1); -- out-of-bounds +SELECT changeDay(toDate32('2000-01-01'), 32); -- out-of-bounds +SELECT '-- DateTime'; +SELECT changeDay(toDateTime('2000-01-01 11:22:33'), 1); +SELECT changeDay(toDateTime('2000-01-01 11:22:33'), 2); +SELECT changeDay(toDateTime('2000-01-01 11:22:33'), 31); +SELECT changeDay(toDateTime('2000-01-01 11:22:33'), 0); -- out-of-bounds +SELECT changeDay(toDateTime('2000-01-01 11:22:33'), -1); -- out-of-bounds +SELECT changeDay(toDateTime('2000-01-01 11:22:33'), 32); -- out-of-bounds +SELECT '-- DateTime64'; +SELECT changeDay(toDateTime64('2000-01-01 11:22:33.4444', 4), 1); +SELECT changeDay(toDateTime64('2000-01-01 11:22:33.4444', 4), 2); +SELECT changeDay(toDateTime64('2000-01-01 11:22:33.4444', 4), 31); +SELECT changeDay(toDateTime64('2000-01-01 11:22:33.4444', 4), 0); -- out-of-bounds +SELECT changeDay(toDateTime64('2000-01-01 11:22:33.4444', 4), -1); -- out-of-bounds +SELECT changeDay(toDateTime64('2000-01-01 11:22:33.4444', 4), 32); -- out-of-bounds +SELECT '-- Special case: change to 29 Feb in a leap year'; +SELECT changeDay(toDate('2000-02-28'), 29); +SELECT changeDay(toDate32('2000-02-01'), 29); +SELECT changeDay(toDateTime('2000-02-01 11:22:33'), 29); +SELECT changeDay(toDateTime64('2000-02-01 11:22:33.4444', 4), 29); -SELECT changeHour(toDate('2000-01-01'), 12); -SELECT changeHour(toDate32('2000-01-01'), 13); -SELECT changeHour(toDateTime('2000-01-01 11:22:33'), 14); -SELECT changeHour(toDateTime64('2000-01-01 11:22:33.4444', 4), 15); - -SELECT changeMinute(toDate('2000-01-01'), 23); -SELECT changeMinute(toDate32('2000-01-01'), 24); -SELECT changeMinute(toDateTime('2000-01-01 11:22:33'), 25); -SELECT changeMinute(toDateTime64('2000-01-01 11:22:33.4444', 4), 26); - -SELECT changeSecond(toDate('2000-01-01'), 34); -SELECT changeSecond(toDate32('2000-01-01'), 35); -SELECT changeSecond(toDateTime('2000-01-01 11:22:33'), 36); -SELECT changeSecond(toDateTime64('2000-01-01 11:22:33.4444', 4), 37); - -SELECT changeYear(toDate('2000-01-01'), 1969.0); -SELECT changeYear(toDate('2000-06-07'), 2149.0); -SELECT changeMonth(toDate('2149-01-01'), 07); -SELECT changeMonth(toDate('2000-01-01'), 13); -SELECT changeDay(toDate('2000-01-01'), 0); -SELECT changeDay(toDate('2000-01-01'), 32); +SELECT 'changeHour'; +SELECT '-- Date'; +SELECT changeHour(toDate('2000-01-01'), 0); +SELECT changeHour(toDate('2000-01-01'), 2); +SELECT changeHour(toDate('2000-01-01'), 23); +SELECT changeHour(toDate('2000-01-01'), -1); -- out-of-bounds +SELECT changeHour(toDate('2000-01-01'), 24); -- out-of-bounds +SELECT '-- Date32'; +SELECT changeHour(toDate32('2000-01-01'), 0); +SELECT changeHour(toDate32('2000-01-01'), 2); +SELECT changeHour(toDate32('2000-01-01'), 23); +SELECT changeHour(toDate32('2000-01-01'), -1); -- out-of-bounds +SELECT changeHour(toDate32('2000-01-01'), 24); -- out-of-bounds +SELECT '-- DateTime'; +SELECT changeHour(toDateTime('2000-01-01 11:22:33'), 0); +SELECT changeHour(toDateTime('2000-01-01 11:22:33'), 2); +SELECT changeHour(toDateTime('2000-01-01 11:22:33'), 23); +SELECT changeHour(toDateTime('2000-01-01 11:22:33'), -1); -- out-of-bounds +SELECT changeHour(toDateTime('2000-01-01 11:22:33'), 24); -- out-of-bounds +SELECT '-- DateTime64'; +SELECT changeHour(toDateTime64('2000-01-01 11:22:33.4444', 4), 0); +SELECT changeHour(toDateTime64('2000-01-01 11:22:33.4444', 4), 2); +SELECT changeHour(toDateTime64('2000-01-01 11:22:33.4444', 4), 23); +SELECT changeHour(toDateTime64('2000-01-01 11:22:33.4444', 4), -1); -- out-of-bounds +SELECT changeHour(toDateTime64('2000-01-01 11:22:33.4444', 4), 24); -- out-of-bounds +SELECT '-- With different timezone'; SELECT changeHour(toDate('2000-01-01'), -1) SETTINGS session_timezone = 'Asia/Novosibirsk'; SELECT changeHour(toDate('2000-01-01'), 24) SETTINGS session_timezone = 'Asia/Novosibirsk'; + +SELECT 'changeMinute'; +SELECT '-- Date'; +SELECT changeMinute(toDate('2000-01-01'), 0); +SELECT changeMinute(toDate('2000-01-01'), 2); +SELECT changeMinute(toDate('2000-01-01'), 59); +SELECT changeMinute(toDate('2000-01-01'), -1); -- out-of-bounds +SELECT changeMinute(toDate('2000-01-01'), 60); -- out-of-bounds +SELECT '-- Date32'; +SELECT changeMinute(toDate32('2000-01-01'), 0); +SELECT changeMinute(toDate32('2000-01-01'), 2); +SELECT changeMinute(toDate32('2000-01-01'), 59); +SELECT changeMinute(toDate32('2000-01-01'), -1); -- out-of-bounds +SELECT changeMinute(toDate32('2000-01-01'), 60); -- out-of-bounds +SELECT '-- DateTime'; +SELECT changeMinute(toDateTime('2000-01-01 11:22:33'), 0); +SELECT changeMinute(toDateTime('2000-01-01 11:22:33'), 2); +SELECT changeMinute(toDateTime('2000-01-01 11:22:33'), 59); +SELECT changeMinute(toDateTime('2000-01-01 11:22:33'), -1); -- out-of-bounds +SELECT changeMinute(toDateTime('2000-01-01 11:22:33'), 60); -- out-of-bounds +SELECT '-- DateTime64'; +SELECT changeMinute(toDateTime64('2000-01-01 11:22:33.4444', 4), 0); +SELECT changeMinute(toDateTime64('2000-01-01 11:22:33.4444', 4), 2); +SELECT changeMinute(toDateTime64('2000-01-01 11:22:33.4444', 4), 59); +SELECT changeMinute(toDateTime64('2000-01-01 11:22:33.4444', 4), -1); -- out-of-bounds +SELECT changeMinute(toDateTime64('2000-01-01 11:22:33.4444', 4), 60); -- out-of-bounds +SELECT '-- With different timezone'; SELECT changeMinute(toDate('2000-01-01'), -1) SETTINGS session_timezone = 'Asia/Novosibirsk'; SELECT changeMinute(toDate('2000-01-01'), 60) SETTINGS session_timezone = 'Asia/Novosibirsk'; + +SELECT 'changeSecond'; +SELECT '-- Date'; +SELECT changeSecond(toDate('2000-01-01'), 0); +SELECT changeSecond(toDate('2000-01-01'), 2); +SELECT changeSecond(toDate('2000-01-01'), 59); +SELECT changeSecond(toDate('2000-01-01'), -1); -- out-of-bounds +SELECT changeSecond(toDate('2000-01-01'), 60); -- out-of-bounds +SELECT '-- Date32'; +SELECT changeSecond(toDate32('2000-01-01'), 0); +SELECT changeSecond(toDate32('2000-01-01'), 2); +SELECT changeSecond(toDate32('2000-01-01'), 59); +SELECT changeSecond(toDate32('2000-01-01'), -1); -- out-of-bounds +SELECT changeSecond(toDate32('2000-01-01'), 60); -- out-of-bounds +SELECT '-- DateTime'; +SELECT changeSecond(toDateTime('2000-01-01 11:22:33'), 0); +SELECT changeSecond(toDateTime('2000-01-01 11:22:33'), 2); +SELECT changeSecond(toDateTime('2000-01-01 11:22:33'), 59); +SELECT changeSecond(toDateTime('2000-01-01 11:22:33'), -1); -- out-of-bounds +SELECT changeSecond(toDateTime('2000-01-01 11:22:33'), 60); -- out-of-bounds +SELECT '-- DateTime64'; +SELECT changeSecond(toDateTime64('2000-01-01 11:22:33.4444', 4), 0); +SELECT changeSecond(toDateTime64('2000-01-01 11:22:33.4444', 4), 2); +SELECT changeSecond(toDateTime64('2000-01-01 11:22:33.4444', 4), 59); +SELECT changeSecond(toDateTime64('2000-01-01 11:22:33.4444', 4), -1); -- out-of-bounds +SELECT changeSecond(toDateTime64('2000-01-01 11:22:33.4444', 4), 60); -- out-of-bounds +SELECT '-- With different timezone'; SELECT changeSecond(toDate('2000-01-01'), -1) SETTINGS session_timezone = 'Asia/Novosibirsk'; SELECT changeSecond(toDate('2000-01-01'), 60) SETTINGS session_timezone = 'Asia/Novosibirsk'; From 6ff3cf82819e61020f3483687934a70ade67be67 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Mon, 20 May 2024 15:36:25 +0000 Subject: [PATCH 020/182] Some fixups, pt. V --- .../functions/date-time-functions.md | 419 +++++++++--------- 1 file changed, 210 insertions(+), 209 deletions(-) diff --git a/docs/en/sql-reference/functions/date-time-functions.md b/docs/en/sql-reference/functions/date-time-functions.md index 8bf301d76c2..7a39cc74372 100644 --- a/docs/en/sql-reference/functions/date-time-functions.md +++ b/docs/en/sql-reference/functions/date-time-functions.md @@ -3520,6 +3520,216 @@ Result: └───────────────────────────────────────────────────────────────────────┘ ``` +## changeYear + +Changes the year component of a date or date time. + +**Syntax** + +``` sql +changeYear(date_or_datetime, value) +``` + +**Arguments** + +- `date_or_datetime` - a [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md) +- `value` - a new value of the year. [Integer](../../sql-reference/data-types/int-uint.md). + +**Return value** + +- The same type as `date_or_datetime`. + +Type: [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). + +**Example** + +``` sql + SELECT changeYear(toDate('1999-01-01'), 2000), changeYear(toDateTime64('1999-01-01 00:00:00.000', 3), 2000); +``` + +Result: + +``` +┌─changeYear(toDate('1999-01-01'), 2000)─┬─changeYear(toDateTime64('1999-01-01 00:00:00.000', 3), 2000)─┐ +│ 2000-01-01 │ 2000-01-01 00:00:00.000 │ +└────────────────────────────────────────┴──────────────────────────────────────────────────────────────┘ +``` + +## changeMonth + +Changes the month component of a date or date time. + +**Syntax** + +``` sql +changeMonth(date_or_datetime, value) +``` + +**Arguments** + +- `date_or_datetime` - a [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md) +- `value` - a new value of the month. [Integer](../../sql-reference/data-types/int-uint.md). + +**Return value** + +- The same type as `date_or_datetime`. + +Type: [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). + +**Example** + +``` sql + SELECT changeMonth(toDate('1999-01-01'), 2), changeMonth(toDateTime64('1999-01-01 00:00:00.000', 3), 2); +``` + +Result: + +``` +┌─changeMonth(toDate('1999-01-01'), 2)─┬─changeMonth(toDateTime64('1999-01-01 00:00:00.000', 3), 2)─┐ +│ 1999-02-01 │ 1999-02-01 00:00:00.000 │ +└──────────────────────────────────────┴────────────────────────────────────────────────────────────┘ +``` + +## changeDay + +Changes the day component of a date or date time. + +**Syntax** + +``` sql +changeDay(date_or_datetime, value) +``` + +**Arguments** + +- `date_or_datetime` - a [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md) +- `value` - a new value of the day. [Integer](../../sql-reference/data-types/int-uint.md). + +**Return value** + +- The same type as `date_or_datetime`. + +Type: [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). + +**Example** + +``` sql + SELECT changeDay(toDate('1999-01-01'), 5), changeDay(toDateTime64('1999-01-01 00:00:00.000', 3), 5); +``` + +Result: + +``` +┌─changeDay(toDate('1999-01-01'), 5)─┬─changeDay(toDateTime64('1999-01-01 00:00:00.000', 3), 5)─┐ +│ 1999-01-05 │ 1999-01-05 00:00:00.000 │ +└────────────────────────────────────┴──────────────────────────────────────────────────────────┘ +``` + +## changeHour + +Changes the hour component of a date or date time. + +**Syntax** + +``` sql +changeHour(date_or_datetime, value) +``` + +**Arguments** + +- `date_or_datetime` - a [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md) +- `value` - a new value of the hour. [Integer](../../sql-reference/data-types/int-uint.md). + +**Return value** + +- The same type as `date_or_datetime`. If the input is a Date, return DateTime. If the input is a Date32, return DateTime64. + +Type: [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). + +**Example** + +``` sql + SELECT changeHour(toDate('1999-01-01'), 14), changeHour(toDateTime64('1999-01-01 00:00:00.000', 3), 14); +``` + +Result: + +``` +┌─changeHour(toDate('1999-01-01'), 14)─┬─changeHour(toDateTime64('1999-01-01 00:00:00.000', 3), 14)─┐ +│ 1999-01-01 14:00:00 │ 1999-01-01 14:00:00.000 │ +└──────────────────────────────────────┴────────────────────────────────────────────────────────────┘ +``` + +## changeMinute + +Changes the minute component of a date or date time. + +**Syntax** + +``` sql +changeMinute(date_or_datetime, value) +``` + +**Arguments** + +- `date_or_datetime` - a [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md) +- `value` - a new value of the minute. [Integer](../../sql-reference/data-types/int-uint.md). + +**Return value** + +- The same type as `date_or_datetime`. If the input is a Date, return DateTime. If the input is a Date32, return DateTime64. + +Type: [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). + +**Example** + +``` sql + SELECT changeMinute(toDate('1999-01-01'), 15), changeMinute(toDateTime64('1999-01-01 00:00:00.000', 3), 15); +``` + +Result: + +``` +┌─changeMinute(toDate('1999-01-01'), 15)─┬─changeMinute(toDateTime64('1999-01-01 00:00:00.000', 3), 15)─┐ +│ 1999-01-01 00:15:00 │ 1999-01-01 00:15:00.000 │ +└────────────────────────────────────────┴──────────────────────────────────────────────────────────────┘ +``` + +## changeSecond + +Changes the second component of a date or date time. + +**Syntax** + +``` sql +changeSecond(date_or_datetime, value) +``` + +**Arguments** + +- `date_or_datetime` - a [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md) +- `value` - a new value of the second. [Integer](../../sql-reference/data-types/int-uint.md). + +**Return value** + +- The same type as `date_or_datetime`. If the input is a Date, return DateTime. If the input is a Date32, return DateTime64. + +Type: [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). + +**Example** + +``` sql + SELECT changeSecond(toDate('1999-01-01'), 15), changeSecond(toDateTime64('1999-01-01 00:00:00.000', 3), 15); +``` + +Result: + +``` +┌─changeSecond(toDate('1999-01-01'), 15)─┬─changeSecond(toDateTime64('1999-01-01 00:00:00.000', 3), 15)─┐ +│ 1999-01-01 00:00:15 │ 1999-01-01 00:00:15.000 │ +└────────────────────────────────────────┴──────────────────────────────────────────────────────────────┘ +``` + ## timeSlots(StartTime, Duration,\[, Size\]) For a time interval starting at ‘StartTime’ and continuing for ‘Duration’ seconds, it returns an array of moments in time, consisting of points from this interval rounded down to the ‘Size’ in seconds. ‘Size’ is an optional parameter set to 1800 (30 minutes) by default. @@ -4102,212 +4312,3 @@ timeDiff(toDateTime64('1927-01-01 00:00:00', 3), toDate32('1927-01-02')); - Blog: [Working with time series data in ClickHouse](https://clickhouse.com/blog/working-with-time-series-data-and-functions-ClickHouse) -## changeYear - -Changes the year component of a date or date time. - -**Syntax** - -``` sql -changeYear(date_or_datetime, value) -``` - -**Arguments** - -- `date_or_datetime` - a [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md) -- `value` - a new value of the year. [Integer](../../sql-reference/data-types/int-uint.md). - -**Return value** - -- The same type as `date_or_datetime`. - -Type: [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). - -**Example** - -``` sql - SELECT changeYear(toDate('1999-01-01'), 2000), changeYear(toDateTime64('1999-01-01 00:00:00.000', 3), 2000); -``` - -Result: - -``` -┌─changeYear(toDate('1999-01-01'), 2000)─┬─changeYear(toDateTime64('1999-01-01 00:00:00.000', 3), 2000)─┐ -│ 2000-01-01 │ 2000-01-01 00:00:00.000 │ -└────────────────────────────────────────┴──────────────────────────────────────────────────────────────┘ -``` - -## changeMonth - -Changes the month component of a date or date time. - -**Syntax** - -``` sql -changeMonth(date_or_datetime, value) -``` - -**Arguments** - -- `date_or_datetime` - a [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md) -- `value` - a new value of the month. [Integer](../../sql-reference/data-types/int-uint.md). - -**Return value** - -- The same type as `date_or_datetime`. - -Type: [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). - -**Example** - -``` sql - SELECT changeMonth(toDate('1999-01-01'), 2), changeMonth(toDateTime64('1999-01-01 00:00:00.000', 3), 2); -``` - -Result: - -``` -┌─changeMonth(toDate('1999-01-01'), 2)─┬─changeMonth(toDateTime64('1999-01-01 00:00:00.000', 3), 2)─┐ -│ 1999-02-01 │ 1999-02-01 00:00:00.000 │ -└──────────────────────────────────────┴────────────────────────────────────────────────────────────┘ -``` - -## changeDay - -Changes the day component of a date or date time. - -**Syntax** - -``` sql -changeDay(date_or_datetime, value) -``` - -**Arguments** - -- `date_or_datetime` - a [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md) -- `value` - a new value of the day. [Integer](../../sql-reference/data-types/int-uint.md). - -**Return value** - -- The same type as `date_or_datetime`. - -Type: [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). - -**Example** - -``` sql - SELECT changeDay(toDate('1999-01-01'), 5), changeDay(toDateTime64('1999-01-01 00:00:00.000', 3), 5); -``` - -Result: - -``` -┌─changeDay(toDate('1999-01-01'), 5)─┬─changeDay(toDateTime64('1999-01-01 00:00:00.000', 3), 5)─┐ -│ 1999-01-05 │ 1999-01-05 00:00:00.000 │ -└────────────────────────────────────┴──────────────────────────────────────────────────────────┘ -``` - -## changeHour - -Changes the hour component of a date or date time. - -**Syntax** - -``` sql -changeHour(date_or_datetime, value) -``` - -**Arguments** - -- `date_or_datetime` - a [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md) -- `value` - a new value of the hour. [Integer](../../sql-reference/data-types/int-uint.md). - -**Return value** - -- The same type as `date_or_datetime`. If the input is a Date, return DateTime. If the input is a Date32, return DateTime64. - -Type: [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). - -**Example** - -``` sql - SELECT changeHour(toDate('1999-01-01'), 14), changeHour(toDateTime64('1999-01-01 00:00:00.000', 3), 14); -``` - -Result: - -``` -┌─changeHour(toDate('1999-01-01'), 14)─┬─changeHour(toDateTime64('1999-01-01 00:00:00.000', 3), 14)─┐ -│ 1999-01-01 14:00:00 │ 1999-01-01 14:00:00.000 │ -└──────────────────────────────────────┴────────────────────────────────────────────────────────────┘ -``` - -## changeMinute - -Changes the minute component of a date or date time. - -**Syntax** - -``` sql -changeMinute(date_or_datetime, value) -``` - -**Arguments** - -- `date_or_datetime` - a [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md) -- `value` - a new value of the minute. [Integer](../../sql-reference/data-types/int-uint.md). - -**Return value** - -- The same type as `date_or_datetime`. If the input is a Date, return DateTime. If the input is a Date32, return DateTime64. - -Type: [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). - -**Example** - -``` sql - SELECT changeMinute(toDate('1999-01-01'), 15), changeMinute(toDateTime64('1999-01-01 00:00:00.000', 3), 15); -``` - -Result: - -``` -┌─changeMinute(toDate('1999-01-01'), 15)─┬─changeMinute(toDateTime64('1999-01-01 00:00:00.000', 3), 15)─┐ -│ 1999-01-01 00:15:00 │ 1999-01-01 00:15:00.000 │ -└────────────────────────────────────────┴──────────────────────────────────────────────────────────────┘ -``` - -## changeSecond - -Changes the second component of a date or date time. - -**Syntax** - -``` sql -changeSecond(date_or_datetime, value) -``` - -**Arguments** - -- `date_or_datetime` - a [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md) -- `value` - a new value of the second. [Integer](../../sql-reference/data-types/int-uint.md). - -**Return value** - -- The same type as `date_or_datetime`. If the input is a Date, return DateTime. If the input is a Date32, return DateTime64. - -Type: [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). - -**Example** - -``` sql - SELECT changeSecond(toDate('1999-01-01'), 15), changeSecond(toDateTime64('1999-01-01 00:00:00.000', 3), 15); -``` - -Result: - -``` -┌─changeSecond(toDate('1999-01-01'), 15)─┬─changeSecond(toDateTime64('1999-01-01 00:00:00.000', 3), 15)─┐ -│ 1999-01-01 00:00:15 │ 1999-01-01 00:00:15.000 │ -└────────────────────────────────────────┴──────────────────────────────────────────────────────────────┘ -``` From b47524a9a5d5c379227da1ac290938f42d2b0586 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Tue, 21 May 2024 05:19:49 +0000 Subject: [PATCH 021/182] Fix spelling --- .../aspell-ignore/en/aspell-dict.txt | 116 +++++++++--------- 1 file changed, 61 insertions(+), 55 deletions(-) diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt index 2a9aa259fdd..ade822f508a 100644 --- a/utils/check-style/aspell-ignore/en/aspell-dict.txt +++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt @@ -29,13 +29,6 @@ Alexey AnyEvent AppleClang Approximative -arrayDotProduct -arrayEnumerateDenseRanked -arrayEnumerateUniqRanked -arrayFirstOrNull -arrayLastOrNull -arrayPartialShuffle -arrayShuffle ArrayJoin ArrowStream AsyncInsertCacheSize @@ -184,7 +177,6 @@ ComplexKeyCache ComplexKeyDirect ComplexKeyHashed Composable -composable Config ConnectionDetails Const @@ -394,8 +386,6 @@ InterserverThreads IsPentagon IsResClassIII IsValid -isNotDistinctFrom -isNullable JBOD JOINed JOINs @@ -464,8 +454,6 @@ KittenHouse Klickhouse Kolmogorov Konstantin -kostik -kostikConsistentHash Korzeniewski Kubernetes LDAP @@ -475,9 +463,8 @@ LLDB LLVM's LOCALTIME LOCALTIMESTAMP -LOONGARCH LONGLONG -LoongArch +LOONGARCH Levenshtein Liao LibFuzzer @@ -495,6 +482,7 @@ LocalThreadActive LogQL Logstash LookML +LoongArch LowCardinality LpDistance LpNorm @@ -569,17 +557,6 @@ MindsDB Mongodb Monotonicity MsgPack -multiSearchAllPositionsCaseInsensitive -multiSearchAllPositionsCaseInsensitiveUTF -multiSearchAnyCaseInsensitive -multiSearchAnyCaseInsensitiveUTF -multiSearchAnyUTF -multiSearchFirstIndexCaseInsensitive -multiSearchFirstIndexCaseInsensitiveUTF -multiSearchFirstIndexUTF -multiSearchFirstPositionCaseInsensitive -multiSearchFirstPositionCaseInsensitiveUTF -multiSearchFirstPositionUTF MultiPolygon Multiline Multiqueries @@ -681,8 +658,8 @@ OSUserTimeNormalized OTLP OUTFILE ObjectId -Observability Oblakov +Observability Octonica Ok OnTime @@ -883,7 +860,6 @@ Simhash SimpleAggregateFunction SimpleState SipHash -sigmoid Smirnov's Smirnov'test Soundex @@ -929,7 +905,6 @@ TAVG TCPConnection TCPThreads TDigest -ThreadMonotonic TINYINT TLSv TMAX @@ -955,7 +930,6 @@ TablesLoaderForegroundThreads TablesLoaderForegroundThreadsActive TablesToDropQueueSize TargetSpecific -tanh Telegraf TemplateIgnoreSpaces TemporaryFilesForAggregation @@ -965,6 +939,7 @@ TemporaryFilesUnknown Testflows Tgz Theil's +ThreadMonotonic ThreadPoolFSReaderThreads ThreadPoolFSReaderThreadsActive ThreadPoolRemoteFSReaderThreads @@ -1025,7 +1000,6 @@ UncompressedCacheBytes UncompressedCacheCells UnidirectionalEdgeIsValid UniqThetaSketch -unshuffled Updatable Uppercased Uptime @@ -1092,6 +1066,7 @@ activerecord addDate addDays addHours +addInterval addMicroseconds addMilliseconds addMinutes @@ -1099,10 +1074,9 @@ addMonths addNanoseconds addQuarters addSeconds +addTupleOfIntervals addWeeks addYears -addInterval -addTupleOfIntervals addr addressToLine addressToLineWithInlines @@ -1144,15 +1118,19 @@ arrayCumSum arrayCumSumNonNegative arrayDifference arrayDistinct +arrayDotProduct arrayElement arrayEnumerate arrayEnumerateDense +arrayEnumerateDenseRanked arrayEnumerateUniq +arrayEnumerateUniqRanked arrayExists arrayFill arrayFilter arrayFirst arrayFirstIndex +arrayFirstOrNull arrayFlatten arrayFold arrayIntersect @@ -1160,10 +1138,12 @@ arrayJaccardIndex arrayJoin arrayLast arrayLastIndex +arrayLastOrNull arrayMap arrayMax arrayMin arrayPartialReverseSort +arrayPartialShuffle arrayPartialSort arrayPopBack arrayPopFront @@ -1183,6 +1163,7 @@ arrayRotateRight arrayShiftLeft arrayShiftRight arrayShingles +arrayShuffle arraySlice arraySort arraySplit @@ -1323,6 +1304,12 @@ cfg cgroup cgroups chadmin +changeDay +changeHour +changeMinute +changeMonth +changeSecond +changeYear changelog changelogs charset @@ -1364,6 +1351,7 @@ collapsingmergetree combinator combinators comparising +composable compressability concat concatAssumeInjective @@ -1725,8 +1713,8 @@ hasSubsequenceCaseInsensitive hasSubsequenceCaseInsensitiveUTF hasSubsequenceUTF hasSubstr -hasToken hasThreadFuzzer +hasToken hasTokenCaseInsensitive hasTokenCaseInsensitiveOrNull hasTokenOrNull @@ -1799,8 +1787,10 @@ isIPAddressInRange isIPv isInfinite isNaN +isNotDistinctFrom isNotNull isNull +isNullable isValidJSON isValidUTF isZeroOrNull @@ -1852,6 +1842,8 @@ kolmogorovSmirnovTest kolmogorovsmirnovtest kolya konsole +kostik +kostikConsistentHash kurtPop kurtSamp kurtosis @@ -1863,9 +1855,9 @@ laravel largestTriangleThreeBuckets latencies ldap -leftUTF leftPad leftPadUTF +leftUTF lemmatization lemmatize lemmatized @@ -1912,8 +1904,8 @@ logTrace logagent loghouse london -loongarch lookups +loongarch lowcardinality lowerUTF lowercased @@ -1984,8 +1976,8 @@ mispredictions mmap mmapped modularization -moduloOrZero moduli +moduloOrZero mongodb monotonicity monthName @@ -2002,10 +1994,21 @@ multiMatchAllIndices multiMatchAny multiMatchAnyIndex multiSearchAllPositions +multiSearchAllPositionsCaseInsensitive +multiSearchAllPositionsCaseInsensitiveUTF multiSearchAllPositionsUTF multiSearchAny +multiSearchAnyCaseInsensitive +multiSearchAnyCaseInsensitiveUTF +multiSearchAnyUTF multiSearchFirstIndex +multiSearchFirstIndexCaseInsensitive +multiSearchFirstIndexCaseInsensitiveUTF +multiSearchFirstIndexUTF multiSearchFirstPosition +multiSearchFirstPositionCaseInsensitive +multiSearchFirstPositionCaseInsensitiveUTF +multiSearchFirstPositionUTF multibyte multidirectory multiline @@ -2340,8 +2343,8 @@ retentions rethrow retransmit retriable -rewritable reverseUTF +rewritable rightPad rightPadUTF rightUTF @@ -2401,8 +2404,9 @@ sharded sharding shortcircuit shortkeys -showCertificate shoutout +showCertificate +sigmoid simdjson simpleJSON simpleJSONExtractBool @@ -2416,8 +2420,8 @@ simpleLinearRegression simpleaggregatefunction simplelinearregression simpod -singlepart singleValueOrNull +singlepart singlevalueornull sinh sipHash @@ -2462,13 +2466,13 @@ statbox stateful stddev stddevPop -stddevSamp -stddevpop -stddevsamp -stddevpopstable stddevPopStable -stddevsampstable +stddevSamp stddevSampStable +stddevpop +stddevpopstable +stddevsamp +stddevsampstable stderr stdin stdout @@ -2529,6 +2533,7 @@ substrings subtitiles subtractDays subtractHours +subtractInterval subtractMicroseconds subtractMilliseconds subtractMinutes @@ -2536,10 +2541,9 @@ subtractMonths subtractNanoseconds subtractQuarters subtractSeconds +subtractTupleOfIntervals subtractWeeks subtractYears -subtractInterval -subtractTupleOfIntervals subtree subtrees subtype @@ -2548,13 +2552,13 @@ sumCount sumKahan sumMap sumMapFiltered +sumMapFilteredWithOverflow +sumMapWithOverflow sumWithOverflow sumcount sumkahan summap summapwithoverflow -sumMapWithOverflow -sumMapFilteredWithOverflow summingmergetree sumwithoverflow superaggregates @@ -2577,6 +2581,7 @@ tabseparatedrawwithnames tabseparatedrawwithnamesandtypes tabseparatedwithnames tabseparatedwithnamesandtypes +tanh tcp tcpPort tcpnodelay @@ -2711,18 +2716,18 @@ tupleDivide tupleDivideByNumber tupleElement tupleHammingDistance +tupleIntDiv +tupleIntDivByNumber +tupleIntDivOrZero +tupleIntDivOrZeroByNumber tupleMinus +tupleModulo +tupleModuloByNumber tupleMultiply tupleMultiplyByNumber tupleNegate tuplePlus tupleToNameValuePairs -tupleIntDiv -tupleIntDivByNumber -tupleIntDivOrZero -tupleIntDivOrZeroByNumber -tupleModulo -tupleModuloByNumber turbostat txt typename @@ -2765,6 +2770,7 @@ unrealiable unreplicated unresolvable unrounded +unshuffled untracked untrusted untuple @@ -2775,8 +2781,8 @@ uptime uptrace uring url -urlencoded urlCluster +urlencoded urls usearch userspace From c026a5b7e938b797d3f8cb016457dab38c002aab Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Tue, 21 May 2024 05:20:46 +0000 Subject: [PATCH 022/182] Fix style check --- src/Functions/changeDate.cpp | 7 ------- 1 file changed, 7 deletions(-) diff --git a/src/Functions/changeDate.cpp b/src/Functions/changeDate.cpp index bd73154a6f1..9868ac5b914 100644 --- a/src/Functions/changeDate.cpp +++ b/src/Functions/changeDate.cpp @@ -23,13 +23,6 @@ namespace DB { -namespace ErrorCodes -{ - extern const int ILLEGAL_TYPE_OF_ARGUMENT; - extern const int LOGICAL_ERROR; - extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; -} - namespace { From 336e791ea87c8298ff584ae2de8a140f0947f02a Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Tue, 21 May 2024 20:50:54 +0000 Subject: [PATCH 023/182] Fix style check, pt. II --- src/Functions/changeDate.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/Functions/changeDate.cpp b/src/Functions/changeDate.cpp index 9868ac5b914..b400680d272 100644 --- a/src/Functions/changeDate.cpp +++ b/src/Functions/changeDate.cpp @@ -23,6 +23,11 @@ namespace DB { +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + namespace { From ab2baf5e9664133097025c149a93a09e84cca152 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Wed, 22 May 2024 12:23:14 +0000 Subject: [PATCH 024/182] Fix expected results --- .../0_stateless/02982_changeDate.reference | 17 -------------- .../queries/0_stateless/02982_changeDate.sql | 23 ------------------- 2 files changed, 40 deletions(-) diff --git a/tests/queries/0_stateless/02982_changeDate.reference b/tests/queries/0_stateless/02982_changeDate.reference index 8ce647481bb..4a7f093ca2b 100644 --- a/tests/queries/0_stateless/02982_changeDate.reference +++ b/tests/queries/0_stateless/02982_changeDate.reference @@ -167,20 +167,3 @@ changeSecond -- With different timezone 1970-01-01 07:00:00 1970-01-01 07:00:00 -2000-02-01 -2000-02-01 -2000-02-01 -2000-02-01 -2000-02-01 -2000-02-01 -2000-02-01 -2000-02-01 -2000-02-01 -2000-02-01 -2000-02-01 -2299-12-31 -1900-01-01 -2106-02-07 13:28:15 -1970-01-01 07:00:00 -2299-12-31 23:59:59.999 -1900-01-01 00:00:00.000 diff --git a/tests/queries/0_stateless/02982_changeDate.sql b/tests/queries/0_stateless/02982_changeDate.sql index 62232079d61..2bc9aa95569 100644 --- a/tests/queries/0_stateless/02982_changeDate.sql +++ b/tests/queries/0_stateless/02982_changeDate.sql @@ -183,26 +183,3 @@ SELECT changeSecond(toDateTime64('2000-01-01 11:22:33.4444', 4), 60); -- out-of- SELECT '-- With different timezone'; SELECT changeSecond(toDate('2000-01-01'), -1) SETTINGS session_timezone = 'Asia/Novosibirsk'; SELECT changeSecond(toDate('2000-01-01'), 60) SETTINGS session_timezone = 'Asia/Novosibirsk'; - -SELECT changeYear(toDate('2000-01-01')); -- { serverError 42 } -SELECT changeYear(toDate('2000-01-01'), 2001, 2002); -- { serverError 42 } -SELECT changeYear(toDate('2000-01-01'), '2001'); -- { serverError 43 } - -SELECT changeMonth(toDate('2000-01-01'), CAST(2 AS Int8)); -SELECT changeMonth(toDate('2000-01-01'), CAST(2 AS Int16)); -SELECT changeMonth(toDate('2000-01-01'), CAST(2 AS Int32)); -SELECT changeMonth(toDate('2000-01-01'), CAST(2 AS Int64)); -SELECT changeMonth(toDate('2000-01-01'), CAST(2 AS UInt8)); -SELECT changeMonth(toDate('2000-01-01'), CAST(2 AS UInt16)); -SELECT changeMonth(toDate('2000-01-01'), CAST(2 AS UInt32)); -SELECT changeMonth(toDate('2000-01-01'), CAST(2 AS UInt64)); -SELECT changeMonth(toDate('2000-01-01'), CAST(2 AS Float32)); -SELECT changeMonth(toDate('2000-01-01'), CAST(2 AS Float64)); -SELECT changeMonth(toDate('2000-01-01'), CAST(2 AS Decimal(10, 5))); - -SELECT changeYear(toDate32('2000-01-01'), 2300); -SELECT changeYear(toDate32('2000-01-01'), 1899); -SELECT changeSecond(toDateTime('2106-02-07 13:28:15'), 16) SETTINGS session_timezone = 'Asia/Novosibirsk'; -SELECT changeHour(toDateTime('1970-01-01 23:59:59'), 6) SETTINGS session_timezone = 'Asia/Novosibirsk'; -SELECT changeYear(toDateTime64('2000-01-01 00:00:00.000', 3), 2300) SETTINGS session_timezone = 'Asia/Novosibirsk'; -SELECT changeYear(toDateTime64('2000-01-01 00:00:00.000', 3), 1899) SETTINGS session_timezone = 'Asia/Novosibirsk'; \ No newline at end of file From cdd99a73a0e46801bb01f47df49c47e67fd9bb6f Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Thu, 23 May 2024 10:12:32 +0000 Subject: [PATCH 025/182] Fix clang-tidy --- src/Functions/changeDate.cpp | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/src/Functions/changeDate.cpp b/src/Functions/changeDate.cpp index b400680d272..e24391afe12 100644 --- a/src/Functions/changeDate.cpp +++ b/src/Functions/changeDate.cpp @@ -274,15 +274,25 @@ public: } Int64 result; - if (isDateOrDate32(result_type)) + if (isDate(result_type) || isDate32(result_type)) result = date_lut.makeDayNum(year, month, day); else if (isDateTime(result_type)) result = date_lut.makeDateTime(year, month, day, hours, minutes, seconds); else +#ifndef __clang_analyzer__ + /// ^^ This looks funny. It is the least terrible suppression of a false positive reported by clang-analyzer (a sub-class + /// of clang-tidy checks) deep down in 'decimalFromComponents'. Usual suppressions of the form NOLINT* don't work here (they + /// would only affect code in _this_ file), and suppressing the issue in 'decimalFromComponents' may suppress true positives. result = DecimalUtils::decimalFromComponents( date_lut.makeDateTime(year, month, day, hours, minutes, seconds), - static_cast(fraction), + fraction, static_cast(scale)); +#else + { + UNUSED(fraction); + result = 0; + } +#endif if (result < min_date) return min_date; From bdd8bcc0d9b68474ca10df52772babc5aa1a20d4 Mon Sep 17 00:00:00 2001 From: Konstantin Morozov Date: Fri, 7 Jun 2024 15:51:13 +0000 Subject: [PATCH 026/182] add some log --- src/Databases/DatabaseAtomic.cpp | 7 +++++++ src/Storages/StorageReplicatedMergeTree.cpp | 19 ++++++++++++++++--- 2 files changed, 23 insertions(+), 3 deletions(-) diff --git a/src/Databases/DatabaseAtomic.cpp b/src/Databases/DatabaseAtomic.cpp index ccab72cfbae..d431eb5c1b7 100644 --- a/src/Databases/DatabaseAtomic.cpp +++ b/src/Databases/DatabaseAtomic.cpp @@ -112,6 +112,13 @@ StoragePtr DatabaseAtomic::detachTable(ContextPtr /* context */, const String & table_name_to_path.erase(name); detached_tables.emplace(table->getStorageID().uuid, table); not_in_use = cleanupDetachedTables(); + + if (!not_in_use.empty()) + { + not_in_use.clear(); + LOG_DEBUG(log, "Finish removing non using detached tables"); + } + return table; } diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index e18e66d7af9..68bb5916d7c 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -5,20 +5,21 @@ #include #include +#include +#include #include #include #include #include +#include #include #include #include #include #include +#include #include #include -#include -#include -#include #include @@ -5272,6 +5273,8 @@ void StorageReplicatedMergeTree::flushAndPrepareForShutdown() if (shutdown_prepared_called.exchange(true)) return; + LOG_TRACE(log, "Start preparing for shutdown"); + try { auto settings_ptr = getSettings(); @@ -5282,7 +5285,11 @@ void StorageReplicatedMergeTree::flushAndPrepareForShutdown() stopBeingLeader(); if (attach_thread) + { attach_thread->shutdown(); + LOG_TRACE(log, "Attach thread shutdowned"); + } + restarting_thread.shutdown(/* part_of_full_shutdown */true); /// Explicitly set the event, because the restarting thread will not set it again @@ -5295,6 +5302,8 @@ void StorageReplicatedMergeTree::flushAndPrepareForShutdown() shutdown_deadline.emplace(std::chrono::system_clock::now()); throw; } + + LOG_TRACE(log, "Finish preparing for shutdown"); } void StorageReplicatedMergeTree::partialShutdown() @@ -5332,6 +5341,9 @@ void StorageReplicatedMergeTree::shutdown(bool) if (shutdown_called.exchange(true)) return; + const auto storage_name = getStorageID().getNameForLogs(); + LOG_TRACE(log, "Shutdown started, table={}", storage_name); + flushAndPrepareForShutdown(); if (!shutdown_deadline.has_value()) @@ -5374,6 +5386,7 @@ void StorageReplicatedMergeTree::shutdown(bool) /// Wait for all of them std::lock_guard lock(data_parts_exchange_ptr->rwlock); } + LOG_TRACE(log, "Shutdown finished, table={}", storage_name); } From 8af89e6e6d919cf4f0c1eb4a5372ab49dfd9b144 Mon Sep 17 00:00:00 2001 From: Konstantin Morozov Date: Thu, 13 Jun 2024 13:22:25 +0000 Subject: [PATCH 027/182] apply comments --- src/Databases/DatabaseAtomic.cpp | 2 +- src/Storages/StorageReplicatedMergeTree.cpp | 12 +++++------- 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/src/Databases/DatabaseAtomic.cpp b/src/Databases/DatabaseAtomic.cpp index d431eb5c1b7..b30b05bb7a7 100644 --- a/src/Databases/DatabaseAtomic.cpp +++ b/src/Databases/DatabaseAtomic.cpp @@ -116,7 +116,7 @@ StoragePtr DatabaseAtomic::detachTable(ContextPtr /* context */, const String & if (!not_in_use.empty()) { not_in_use.clear(); - LOG_DEBUG(log, "Finish removing non using detached tables"); + LOG_DEBUG(log, "Finished removing not used detached tables"); } return table; diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 68bb5916d7c..9b914e3de8f 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -5,17 +5,15 @@ #include #include -#include -#include #include #include #include #include -#include #include #include #include #include +#include #include #include #include @@ -5287,7 +5285,7 @@ void StorageReplicatedMergeTree::flushAndPrepareForShutdown() if (attach_thread) { attach_thread->shutdown(); - LOG_TRACE(log, "Attach thread shutdowned"); + LOG_TRACE(log, "The attach thread is shutdown"); } @@ -5303,7 +5301,7 @@ void StorageReplicatedMergeTree::flushAndPrepareForShutdown() throw; } - LOG_TRACE(log, "Finish preparing for shutdown"); + LOG_TRACE(log, "Finished preparing for shutdown"); } void StorageReplicatedMergeTree::partialShutdown() @@ -5342,7 +5340,7 @@ void StorageReplicatedMergeTree::shutdown(bool) return; const auto storage_name = getStorageID().getNameForLogs(); - LOG_TRACE(log, "Shutdown started, table={}", storage_name); + LOG_TRACE(log, "Shutdown started"); flushAndPrepareForShutdown(); @@ -5386,7 +5384,7 @@ void StorageReplicatedMergeTree::shutdown(bool) /// Wait for all of them std::lock_guard lock(data_parts_exchange_ptr->rwlock); } - LOG_TRACE(log, "Shutdown finished, table={}", storage_name); + LOG_TRACE(log, "Shutdown finished"); } From f7eac01b822c94184a16dfda1685d95f05c5cc8a Mon Sep 17 00:00:00 2001 From: Konstantin Morozov Date: Thu, 13 Jun 2024 13:31:52 +0000 Subject: [PATCH 028/182] up includes --- src/Storages/StorageReplicatedMergeTree.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index b33514907f9..a1f4a40a0ab 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -5,10 +5,12 @@ #include #include +#include #include #include #include #include +#include #include #include #include From cae2f023852f3f49c4a0f25e8e11bcb3fe6ed49a Mon Sep 17 00:00:00 2001 From: wudidapaopao <664920313@qq.com> Date: Fri, 14 Jun 2024 14:39:00 +0800 Subject: [PATCH 029/182] Fix unexpected projection name when query with CTE --- src/Analyzer/Resolve/QueryAnalyzer.cpp | 6 +++++- .../02378_analyzer_projection_names.reference | 12 ++++++++++++ .../0_stateless/02378_analyzer_projection_names.sql | 10 ++++++++++ .../0_stateless/03123_analyzer_dist_join_CTE.sql | 2 +- 4 files changed, 28 insertions(+), 2 deletions(-) diff --git a/src/Analyzer/Resolve/QueryAnalyzer.cpp b/src/Analyzer/Resolve/QueryAnalyzer.cpp index 5e5ecaaa93a..dc35462a4b0 100644 --- a/src/Analyzer/Resolve/QueryAnalyzer.cpp +++ b/src/Analyzer/Resolve/QueryAnalyzer.cpp @@ -1484,7 +1484,11 @@ void QueryAnalyzer::qualifyColumnNodesWithProjectionNames(const QueryTreeNodes & /// Build additional column qualification parts array std::vector additional_column_qualification_parts; - if (table_expression_node->hasAlias()) + auto * query_node = table_expression_node->as(); + auto * union_node = table_expression_node->as(); + bool is_cte = (query_node && query_node->isCTE()) || (union_node && union_node->isCTE()); + + if (!is_cte && table_expression_node->hasAlias()) additional_column_qualification_parts = {table_expression_node->getAlias()}; else if (auto * table_node = table_expression_node->as()) additional_column_qualification_parts = {table_node->getStorageID().getDatabaseName(), table_node->getStorageID().getTableName()}; diff --git a/tests/queries/0_stateless/02378_analyzer_projection_names.reference b/tests/queries/0_stateless/02378_analyzer_projection_names.reference index fd5bc7d4ae8..5860f1deb0d 100644 --- a/tests/queries/0_stateless/02378_analyzer_projection_names.reference +++ b/tests/queries/0_stateless/02378_analyzer_projection_names.reference @@ -443,6 +443,18 @@ SELECT '--'; -- DESCRIBE (WITH test_table_in_cte AS (SELECT id FROM test_table) SELECT id IN test_table_in_cte FROM test_table); in(id, test_table_in_cte) UInt8 +SELECT '--'; +-- +DESCRIBE (WITH test_table_in_cte_1 AS (SELECT 1 AS c1), test_table_in_cte_2 AS (SELECT 1 AS c1) SELECT * +FROM test_table_in_cte_1 INNER JOIN test_table_in_cte_2 as test_table_in_cte_2 ON test_table_in_cte_1.c1 = test_table_in_cte_2.c1); +c1 UInt8 +c1 UInt8 +SELECT '--'; +-- +DESCRIBE (WITH test_table_in_cte_1 AS (SELECT 1 AS c1), test_table_in_cte_2 AS (SELECT 1 AS c1 UNION ALL SELECT 1 AS c1) SELECT * +FROM test_table_in_cte_1 INNER JOIN test_table_in_cte_2 as test_table_in_cte_2 ON test_table_in_cte_1.c1 = test_table_in_cte_2.c1); +c1 UInt8 +c1 UInt8 SELECT 'Joins'; Joins DESCRIBE (SELECT * FROM test_table_join_1, test_table_join_2); diff --git a/tests/queries/0_stateless/02378_analyzer_projection_names.sql b/tests/queries/0_stateless/02378_analyzer_projection_names.sql index f5ac5f7476f..f41afe6a950 100644 --- a/tests/queries/0_stateless/02378_analyzer_projection_names.sql +++ b/tests/queries/0_stateless/02378_analyzer_projection_names.sql @@ -408,6 +408,16 @@ SELECT '--'; DESCRIBE (WITH test_table_in_cte AS (SELECT id FROM test_table) SELECT id IN test_table_in_cte FROM test_table); +SELECT '--'; + +DESCRIBE (WITH test_table_in_cte_1 AS (SELECT 1 AS c1), test_table_in_cte_2 AS (SELECT 1 AS c1) SELECT * +FROM test_table_in_cte_1 INNER JOIN test_table_in_cte_2 as test_table_in_cte_2 ON test_table_in_cte_1.c1 = test_table_in_cte_2.c1); + +SELECT '--'; + +DESCRIBE (WITH test_table_in_cte_1 AS (SELECT 1 AS c1), test_table_in_cte_2 AS (SELECT 1 AS c1 UNION ALL SELECT 1 AS c1) SELECT * +FROM test_table_in_cte_1 INNER JOIN test_table_in_cte_2 as test_table_in_cte_2 ON test_table_in_cte_1.c1 = test_table_in_cte_2.c1); + SELECT 'Joins'; DESCRIBE (SELECT * FROM test_table_join_1, test_table_join_2); diff --git a/tests/queries/0_stateless/03123_analyzer_dist_join_CTE.sql b/tests/queries/0_stateless/03123_analyzer_dist_join_CTE.sql index 4fb8e0b91c4..4275f405d13 100644 --- a/tests/queries/0_stateless/03123_analyzer_dist_join_CTE.sql +++ b/tests/queries/0_stateless/03123_analyzer_dist_join_CTE.sql @@ -20,7 +20,7 @@ WITH SELECT toInt64(number) AS a FROM numbers(10) ) -SELECT * +SELECT a.a, b, b.a, c.a FROM dist_t0 AS a LEFT JOIN b AS b ON a.a = b.a LEFT JOIN c AS c ON a.a = c.a From 409b24b8d8d185e7840bc082fd73afc1b13314d2 Mon Sep 17 00:00:00 2001 From: wudidapaopao <664920313@qq.com> Date: Tue, 18 Jun 2024 16:22:00 +0800 Subject: [PATCH 030/182] empty commit From 96fd928bced6c14e9de98ad14b77b370ed14de8e Mon Sep 17 00:00:00 2001 From: Konstantin Morozov Date: Wed, 19 Jun 2024 08:59:48 +0000 Subject: [PATCH 031/182] remove unused var --- src/Storages/StorageReplicatedMergeTree.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index a1f4a40a0ab..61a492c1f63 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -5341,7 +5341,6 @@ void StorageReplicatedMergeTree::shutdown(bool) if (shutdown_called.exchange(true)) return; - const auto storage_name = getStorageID().getNameForLogs(); LOG_TRACE(log, "Shutdown started"); flushAndPrepareForShutdown(); From f4c1266975ff1a1aabe71e61ae318d1765931a62 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 30 Jun 2024 18:56:05 +0200 Subject: [PATCH 032/182] Remove incorrect test --- ...87_distributed_in_not_default_db.reference | 2 - .../01487_distributed_in_not_default_db.sql | 39 ------------------- 2 files changed, 41 deletions(-) delete mode 100644 tests/queries/0_stateless/01487_distributed_in_not_default_db.reference delete mode 100644 tests/queries/0_stateless/01487_distributed_in_not_default_db.sql diff --git a/tests/queries/0_stateless/01487_distributed_in_not_default_db.reference b/tests/queries/0_stateless/01487_distributed_in_not_default_db.reference deleted file mode 100644 index 0d66ea1aee9..00000000000 --- a/tests/queries/0_stateless/01487_distributed_in_not_default_db.reference +++ /dev/null @@ -1,2 +0,0 @@ -0 -1 diff --git a/tests/queries/0_stateless/01487_distributed_in_not_default_db.sql b/tests/queries/0_stateless/01487_distributed_in_not_default_db.sql deleted file mode 100644 index ccd2c571290..00000000000 --- a/tests/queries/0_stateless/01487_distributed_in_not_default_db.sql +++ /dev/null @@ -1,39 +0,0 @@ --- Tags: distributed, no-parallel - -CREATE DATABASE IF NOT EXISTS shard_0; -CREATE DATABASE IF NOT EXISTS shard_1; -CREATE DATABASE IF NOT EXISTS main_01487; -CREATE DATABASE IF NOT EXISTS test_01487; - -USE main_01487; - -DROP TABLE IF EXISTS shard_0.l; -DROP TABLE IF EXISTS shard_1.l; -DROP TABLE IF EXISTS d; -DROP TABLE IF EXISTS t; - -CREATE TABLE shard_0.l (value UInt8) ENGINE = MergeTree ORDER BY value; -CREATE TABLE shard_1.l (value UInt8) ENGINE = MergeTree ORDER BY value; -CREATE TABLE t (value UInt8) ENGINE = Memory; - -INSERT INTO shard_0.l VALUES (0); -INSERT INTO shard_1.l VALUES (1); -INSERT INTO t VALUES (0), (1), (2); - -CREATE TABLE d AS t ENGINE = Distributed(test_cluster_two_shards_different_databases, currentDatabase(), t); - -USE test_01487; -DROP DATABASE test_01487; - -SELECT * FROM main_01487.d WHERE value IN (SELECT l.value FROM l) ORDER BY value; - -USE main_01487; - -DROP TABLE IF EXISTS shard_0.l; -DROP TABLE IF EXISTS shard_1.l; -DROP TABLE IF EXISTS d; -DROP TABLE IF EXISTS t; - -DROP DATABASE shard_0; -DROP DATABASE shard_1; -DROP DATABASE main_01487; From aa7017a7fb1dcc09f6d7f948d3adb2d65a7b5201 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 1 Jul 2024 00:32:39 +0200 Subject: [PATCH 033/182] Add a test for #43003 --- .../03199_join_with_materialized_column.reference | 0 .../0_stateless/03199_join_with_materialized_column.sql | 6 ++++++ 2 files changed, 6 insertions(+) create mode 100644 tests/queries/0_stateless/03199_join_with_materialized_column.reference create mode 100644 tests/queries/0_stateless/03199_join_with_materialized_column.sql diff --git a/tests/queries/0_stateless/03199_join_with_materialized_column.reference b/tests/queries/0_stateless/03199_join_with_materialized_column.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/03199_join_with_materialized_column.sql b/tests/queries/0_stateless/03199_join_with_materialized_column.sql new file mode 100644 index 00000000000..8c53c5b3e66 --- /dev/null +++ b/tests/queries/0_stateless/03199_join_with_materialized_column.sql @@ -0,0 +1,6 @@ +SET allow_experimental_analyzer = 1; + +DROP TABLE IF EXISTS table_with_materialized; +CREATE TABLE table_with_materialized (col String MATERIALIZED 'A') ENGINE = Memory; +SELECT number FROM numbers(1) AS n, table_with_materialized; +DROP TABLE table_with_materialized; From 4f61f530bd02fec686b273f4c2519cde77a689f2 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 2 Jul 2024 11:22:05 +0200 Subject: [PATCH 034/182] Named collections in clickhouse-local --- programs/local/LocalServer.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp index b33e1595056..74906d8797c 100644 --- a/programs/local/LocalServer.cpp +++ b/programs/local/LocalServer.cpp @@ -376,6 +376,7 @@ void LocalServer::setupUsers() " " " default" " default" + " 1 " " " " " " From e45a905904b5afd94e960bf48de015017a351527 Mon Sep 17 00:00:00 2001 From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> Date: Tue, 2 Jul 2024 11:52:51 +0200 Subject: [PATCH 035/182] Update LocalServer.cpp --- programs/local/LocalServer.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp index 74906d8797c..46b543e49e9 100644 --- a/programs/local/LocalServer.cpp +++ b/programs/local/LocalServer.cpp @@ -376,7 +376,7 @@ void LocalServer::setupUsers() " " " default" " default" - " 1 + " 1" " " " " " " From 84fb836ecba44b0674f8e4bdbb1626e1aae02ba6 Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Tue, 2 Jul 2024 23:27:02 +0000 Subject: [PATCH 036/182] proper qualification for CTE, fix tests --- src/Analyzer/Resolve/QueryAnalyzer.cpp | 10 +++++----- .../02378_analyzer_projection_names.reference | 4 ++-- .../0_stateless/03123_analyzer_dist_join_CTE.sql | 2 +- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/src/Analyzer/Resolve/QueryAnalyzer.cpp b/src/Analyzer/Resolve/QueryAnalyzer.cpp index dc35462a4b0..58a85debf37 100644 --- a/src/Analyzer/Resolve/QueryAnalyzer.cpp +++ b/src/Analyzer/Resolve/QueryAnalyzer.cpp @@ -1484,14 +1484,14 @@ void QueryAnalyzer::qualifyColumnNodesWithProjectionNames(const QueryTreeNodes & /// Build additional column qualification parts array std::vector additional_column_qualification_parts; - auto * query_node = table_expression_node->as(); - auto * union_node = table_expression_node->as(); - bool is_cte = (query_node && query_node->isCTE()) || (union_node && union_node->isCTE()); - - if (!is_cte && table_expression_node->hasAlias()) + if (table_expression_node->hasAlias()) additional_column_qualification_parts = {table_expression_node->getAlias()}; else if (auto * table_node = table_expression_node->as()) additional_column_qualification_parts = {table_node->getStorageID().getDatabaseName(), table_node->getStorageID().getTableName()}; + else if (auto * query_node = table_expression_node->as(); query_node && query_node->isCTE()) + additional_column_qualification_parts = {"", query_node->getCTEName()}; + else if (auto * union_node = table_expression_node->as(); union_node && union_node->isCTE()) + additional_column_qualification_parts = {"", union_node->getCTEName()}; size_t additional_column_qualification_parts_size = additional_column_qualification_parts.size(); const auto & table_expression_data = scope.getTableExpressionDataOrThrow(table_expression_node); diff --git a/tests/queries/0_stateless/02378_analyzer_projection_names.reference b/tests/queries/0_stateless/02378_analyzer_projection_names.reference index 5860f1deb0d..170f3e9e1ad 100644 --- a/tests/queries/0_stateless/02378_analyzer_projection_names.reference +++ b/tests/queries/0_stateless/02378_analyzer_projection_names.reference @@ -448,13 +448,13 @@ SELECT '--'; DESCRIBE (WITH test_table_in_cte_1 AS (SELECT 1 AS c1), test_table_in_cte_2 AS (SELECT 1 AS c1) SELECT * FROM test_table_in_cte_1 INNER JOIN test_table_in_cte_2 as test_table_in_cte_2 ON test_table_in_cte_1.c1 = test_table_in_cte_2.c1); c1 UInt8 -c1 UInt8 +test_table_in_cte_2.c1 UInt8 SELECT '--'; -- DESCRIBE (WITH test_table_in_cte_1 AS (SELECT 1 AS c1), test_table_in_cte_2 AS (SELECT 1 AS c1 UNION ALL SELECT 1 AS c1) SELECT * FROM test_table_in_cte_1 INNER JOIN test_table_in_cte_2 as test_table_in_cte_2 ON test_table_in_cte_1.c1 = test_table_in_cte_2.c1); c1 UInt8 -c1 UInt8 +test_table_in_cte_2.c1 UInt8 SELECT 'Joins'; Joins DESCRIBE (SELECT * FROM test_table_join_1, test_table_join_2); diff --git a/tests/queries/0_stateless/03123_analyzer_dist_join_CTE.sql b/tests/queries/0_stateless/03123_analyzer_dist_join_CTE.sql index 4275f405d13..4fb8e0b91c4 100644 --- a/tests/queries/0_stateless/03123_analyzer_dist_join_CTE.sql +++ b/tests/queries/0_stateless/03123_analyzer_dist_join_CTE.sql @@ -20,7 +20,7 @@ WITH SELECT toInt64(number) AS a FROM numbers(10) ) -SELECT a.a, b, b.a, c.a +SELECT * FROM dist_t0 AS a LEFT JOIN b AS b ON a.a = b.a LEFT JOIN c AS c ON a.a = c.a From 6128f53f63332d57e9e7f34e54dcd3b9f5c89df5 Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Wed, 3 Jul 2024 02:56:47 +0000 Subject: [PATCH 037/182] fix --- src/Analyzer/Resolve/IdentifierResolver.cpp | 2 +- src/Analyzer/Resolve/QueryAnalyzer.cpp | 11 ++++++----- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/src/Analyzer/Resolve/IdentifierResolver.cpp b/src/Analyzer/Resolve/IdentifierResolver.cpp index 692a31b66ba..59cf82989e7 100644 --- a/src/Analyzer/Resolve/IdentifierResolver.cpp +++ b/src/Analyzer/Resolve/IdentifierResolver.cpp @@ -1129,7 +1129,7 @@ QueryTreeNodePtr IdentifierResolver::tryResolveIdentifierFromJoin(const Identifi resolved_identifier = left_resolved_identifier; } } - else if (scope.joins_count == 1 && scope.context->getSettingsRef().single_join_prefer_left_table) + else if (scope.joins_count && scope.context->getSettingsRef().single_join_prefer_left_table) { resolved_side = JoinTableSide::Left; resolved_identifier = left_resolved_identifier; diff --git a/src/Analyzer/Resolve/QueryAnalyzer.cpp b/src/Analyzer/Resolve/QueryAnalyzer.cpp index 58a85debf37..25ab3752f11 100644 --- a/src/Analyzer/Resolve/QueryAnalyzer.cpp +++ b/src/Analyzer/Resolve/QueryAnalyzer.cpp @@ -1489,9 +1489,9 @@ void QueryAnalyzer::qualifyColumnNodesWithProjectionNames(const QueryTreeNodes & else if (auto * table_node = table_expression_node->as()) additional_column_qualification_parts = {table_node->getStorageID().getDatabaseName(), table_node->getStorageID().getTableName()}; else if (auto * query_node = table_expression_node->as(); query_node && query_node->isCTE()) - additional_column_qualification_parts = {"", query_node->getCTEName()}; + additional_column_qualification_parts = {query_node->getCTEName()}; else if (auto * union_node = table_expression_node->as(); union_node && union_node->isCTE()) - additional_column_qualification_parts = {"", union_node->getCTEName()}; + additional_column_qualification_parts = {union_node->getCTEName()}; size_t additional_column_qualification_parts_size = additional_column_qualification_parts.size(); const auto & table_expression_data = scope.getTableExpressionDataOrThrow(table_expression_node); @@ -4455,10 +4455,11 @@ void QueryAnalyzer::initializeTableExpressionData(const QueryTreeNodePtr & table if (auto * scope_query_node = scope.scope_node->as()) { auto left_table_expression = extractLeftTableExpression(scope_query_node->getJoinTree()); + bool is_cte = (query_node && query_node->isCTE()) || (union_node && union_node->isCTE()); if (table_expression_node.get() == left_table_expression.get() && - scope.joins_count == 1 && - scope.context->getSettingsRef().single_join_prefer_left_table) - table_expression_data.should_qualify_columns = false; + scope.joins_count && + (scope.context->getSettingsRef().single_join_prefer_left_table || is_cte)) + table_expression_data.should_qualify_columns = false; } scope.table_expression_node_to_data.emplace(table_expression_node, std::move(table_expression_data)); From ea3b0e735de285db89cb36e2782db88c6d403ee2 Mon Sep 17 00:00:00 2001 From: avogar Date: Wed, 3 Jul 2024 13:40:41 +0000 Subject: [PATCH 038/182] Refactor JSONExtract functions and support more types and reuse its code in new JSON type --- docs/en/sql-reference/data-types/dynamic.md | 34 + src/Common/JSONParsers/SimdJSONParser.h | 1 + src/DataTypes/DataTypeDynamic.cpp | 1 + src/Formats/JSONExtractTree.cpp | 1561 +++++++++ src/Formats/JSONExtractTree.h | 35 + src/Formats/SchemaInferenceUtils.cpp | 94 +- src/Formats/SchemaInferenceUtils.h | 10 + src/Functions/FunctionsJSON.cpp | 1061 +++++- src/Functions/FunctionsJSON.h | 3054 +++++++---------- .../03198_json_extract_more_types.reference | 21 + .../03198_json_extract_more_types.sql | 29 + .../03199_json_extract_dynamic.reference | 30 + .../03199_json_extract_dynamic.sql | 37 + 13 files changed, 4158 insertions(+), 1810 deletions(-) create mode 100644 src/Formats/JSONExtractTree.cpp create mode 100644 src/Formats/JSONExtractTree.h create mode 100644 tests/queries/0_stateless/03198_json_extract_more_types.reference create mode 100644 tests/queries/0_stateless/03198_json_extract_more_types.sql create mode 100644 tests/queries/0_stateless/03199_json_extract_dynamic.reference create mode 100644 tests/queries/0_stateless/03199_json_extract_dynamic.sql diff --git a/docs/en/sql-reference/data-types/dynamic.md b/docs/en/sql-reference/data-types/dynamic.md index 955fd54e641..e063bed2de4 100644 --- a/docs/en/sql-reference/data-types/dynamic.md +++ b/docs/en/sql-reference/data-types/dynamic.md @@ -493,3 +493,37 @@ SELECT count(), dynamicType(d), _part FROM test GROUP BY _part, dynamicType(d) O ``` As we can see, ClickHouse kept the most frequent types `UInt64` and `Array(UInt64)` and casted all other types to `String`. + + +## JSONExtract functions with Dynamic + +All `JSONExtract*` functions support `Dynamic` type: + +```sql +SELECT JSONExtract('{"a" : [1, 2, 3]}', 'a', 'Dynamic') AS dynamic, dynamicType(dynamic) AS dynamic_type; +``` + +```text +┌─dynamic─┬─dynamic_type───────────┐ +│ [1,2,3] │ Array(Nullable(Int64)) │ +└─────────┴────────────────────────┘ +``` + +```sql +SELECT JSONExtract('{"obj" : {"a" : 42, "b" : "Hello", "c" : [1,2,3]}}', 'obj', 'Map(String, Variant(UInt32, String, Array(UInt32)))') AS map_of_dynamics, mapApply((k, v) -> (k, variantType(v)), map_of_dynamics) AS map_of_dynamic_types``` + +```text +┌─map_of_dynamics──────────────────┬─map_of_dynamic_types────────────────────────────┐ +│ {'a':42,'b':'Hello','c':[1,2,3]} │ {'a':'UInt32','b':'String','c':'Array(UInt32)'} │ +└──────────────────────────────────┴─────────────────────────────────────────────────┘ +``` + +```sql +SELECT JSONExtractKeysAndValues('{"a" : 42, "b" : "Hello", "c" : [1,2,3]}', 'Variant(UInt32, String, Array(UInt32))') AS dynamics, arrayMap(x -> (x.1, variantType(x.2)), dynamics) AS dynamic_types``` +``` + +```text +┌─dynamics───────────────────────────────┬─dynamic_types─────────────────────────────────────────┐ +│ [('a',42),('b','Hello'),('c',[1,2,3])] │ [('a','UInt32'),('b','String'),('c','Array(UInt32)')] │ +└────────────────────────────────────────┴───────────────────────────────────────────────────────┘ +``` diff --git a/src/Common/JSONParsers/SimdJSONParser.h b/src/Common/JSONParsers/SimdJSONParser.h index 827d142266a..db679b14f52 100644 --- a/src/Common/JSONParsers/SimdJSONParser.h +++ b/src/Common/JSONParsers/SimdJSONParser.h @@ -14,6 +14,7 @@ namespace DB { + namespace ErrorCodes { extern const int CANNOT_ALLOCATE_MEMORY; diff --git a/src/DataTypes/DataTypeDynamic.cpp b/src/DataTypes/DataTypeDynamic.cpp index c920e69c13b..6826c46a1a7 100644 --- a/src/DataTypes/DataTypeDynamic.cpp +++ b/src/DataTypes/DataTypeDynamic.cpp @@ -12,6 +12,7 @@ #include #include #include +#include namespace DB { diff --git a/src/Formats/JSONExtractTree.cpp b/src/Formats/JSONExtractTree.cpp new file mode 100644 index 00000000000..6d019f96ba6 --- /dev/null +++ b/src/Formats/JSONExtractTree.cpp @@ -0,0 +1,1561 @@ +#include +#include + +#include +#if USE_SIMDJSON +#include +#endif +#if USE_RAPIDJSON +#include +#endif + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +#include +#include +#include +#include + +namespace DB +{ + +template +void jsonElementToString(const typename JSONParser::Element & element, WriteBuffer & buf, const FormatSettings & format_settings) +{ + if (element.isInt64()) + { + writeIntText(element.getInt64(), buf); + return; + } + if (element.isUInt64()) + { + writeIntText(element.getUInt64(), buf); + return; + } + if (element.isDouble()) + { + writeFloatText(element.getDouble(), buf); + return; + } + if (element.isBool()) + { + if (element.getBool()) + writeCString("true", buf); + else + writeCString("false", buf); + return; + } + if (element.isString()) + { + writeJSONString(element.getString(), buf, format_settings); + return; + } + if (element.isArray()) + { + writeChar('[', buf); + bool need_comma = false; + for (auto value : element.getArray()) + { + if (std::exchange(need_comma, true)) + writeChar(',', buf); + jsonElementToString(value, buf, format_settings); + } + writeChar(']', buf); + return; + } + if (element.isObject()) + { + writeChar('{', buf); + bool need_comma = false; + for (auto [key, value] : element.getObject()) + { + if (std::exchange(need_comma, true)) + writeChar(',', buf); + writeJSONString(key, buf, format_settings); + writeChar(':', buf); + jsonElementToString(value, buf, format_settings); + } + writeChar('}', buf); + return; + } + if (element.isNull()) + { + writeCString("null", buf); + return; + } +} + +template +bool tryGetNumericValueFromJSONElement( + NumberType & value, + const typename JSONParser::Element & element, + bool convert_bool_to_integer, + String & error) +{ + switch (element.type()) + { + case ElementType::DOUBLE: + if constexpr (std::is_floating_point_v) + { + /// We permit inaccurate conversion of double to float. + /// Example: double 0.1 from JSON is not representable in float. + /// But it will be more convenient for user to perform conversion. + value = static_cast(element.getDouble()); + } + else if (!accurate::convertNumeric(element.getDouble(), value)) + { + error = fmt::format("cannot convert double value {} to {}", element.getDouble(), TypeName); + return false; + } + break; + case ElementType::UINT64: + if (!accurate::convertNumeric(element.getUInt64(), value)) + { + error = fmt::format("cannot convert UInt64 value {} to {}", element.getUInt64(), TypeName); + return false; + } + break; + case ElementType::INT64: + if (!accurate::convertNumeric(element.getInt64(), value)) + { + error = fmt::format("cannot convert Int64 value {} to {}", element.getInt64(), TypeName); + return false; + } + break; + case ElementType::BOOL: + if constexpr (is_integer) + { + if (convert_bool_to_integer) + { + value = static_cast(element.getBool()); + break; + } + } + error = fmt::format("cannot convert bool value to {}", TypeName); + return false; + case ElementType::STRING: { + auto rb = ReadBufferFromMemory{element.getString()}; + if constexpr (std::is_floating_point_v) + { + if (!tryReadFloatText(value, rb) || !rb.eof()) + { + error = fmt::format("cannot parse {} value here: {}", TypeName, element.getString()); + return false; + } + } + else + { + if (tryReadIntText(value, rb) && rb.eof()) + break; + + /// Try to parse float and convert it to integer. + Float64 tmp_float; + rb.position() = rb.buffer().begin(); + if (!tryReadFloatText(tmp_float, rb) || !rb.eof()) + { + error = fmt::format("cannot parse {} value here: {}", TypeName, element.getString()); + return false; + } + + if (!accurate::convertNumeric(tmp_float, value)) + { + error = fmt::format("cannot parse {} value here: {}", TypeName, element.getString()); + return false; + } + } + break; + } + default: + return false; + } + + return true; +} + +namespace +{ + +template +String jsonElementToString(const typename JSONParser::Element & element, const FormatSettings & format_settings) +{ + WriteBufferFromOwnString buf; + jsonElementToString(element, buf, format_settings); + return buf.str(); +} + +template +class NumericNode : public JSONExtractTreeNode +{ +public: + explicit NumericNode(bool is_bool_type_ = false) : is_bool_type(is_bool_type_) { } + + bool insertResultToColumn( + IColumn & column, const typename JSONParser::Element & element, const JSONExtractInsertSettings & insert_settings, const FormatSettings & format_settings, String & error) const override + { + if (element.isNull()) + { + if (format_settings.null_as_default) + { + column.insertDefault(); + return true; + } + + error = fmt::format("cannot parse {} value from null", TypeName); + return false; + } + + NumberType value; + if (!tryGetNumericValueFromJSONElement(value, element, insert_settings.convert_bool_to_integer || is_bool_type, error)) + { + if (error.empty()) + error = fmt::format("cannot read {} value from JSON element: {}", TypeName, jsonElementToString(element, format_settings)); + return false; + } + + if (is_bool_type) + value = static_cast(value); + + auto & col_vec = assert_cast &>(column); + col_vec.insertValue(value); + return true; + } + +protected: + bool is_bool_type; +}; + +template +class LowCardinalityNumericNode : public NumericNode +{ +public: + explicit LowCardinalityNumericNode(bool is_nullable_, bool is_bool_type_ = false) + : NumericNode(is_bool_type_), is_nullable(is_nullable_) + { + } + + bool insertResultToColumn( + IColumn & column, const typename JSONParser::Element & element, const JSONExtractInsertSettings & insert_settings, const FormatSettings & format_settings, String & error) const override + { + if (element.isNull()) + { + if (is_nullable || format_settings.null_as_default) + { + column.insertDefault(); + return true; + } + + error = fmt::format("cannot parse {} value from null", TypeName); + return false; + } + + NumberType value; + if (!tryGetNumericValueFromJSONElement(value, element, insert_settings.convert_bool_to_integer || this->is_bool_type, error)) + { + if (error.empty()) + error = fmt::format("cannot read {} value from JSON element: {}", TypeName, jsonElementToString(element, format_settings)); + return false; + } + + if (this->is_bool_type) + value = static_cast(value); + + auto & col_lc = assert_cast(column); + col_lc.insertData(reinterpret_cast(&value), sizeof(value)); + return true; + } + +private: + bool is_nullable; +}; + +template +class StringNode : public JSONExtractTreeNode +{ +public: + bool insertResultToColumn( + IColumn & column, const typename JSONParser::Element & element, const JSONExtractInsertSettings &, const FormatSettings & format_settings, String & error) const override + { + if (element.isNull()) + { + if (format_settings.null_as_default) + { + column.insertDefault(); + return true; + } + error = "cannot parse String value from null"; + return false; + } + + if (!element.isString()) + { + auto & col_str = assert_cast(column); + auto & chars = col_str.getChars(); + WriteBufferFromVector buf(chars, AppendModeTag()); + jsonElementToString(element, buf, format_settings); + buf.finalize(); + chars.push_back(0); + col_str.getOffsets().push_back(chars.size()); + } + else + { + auto value = element.getString(); + auto & col_str = assert_cast(column); + col_str.insertData(value.data(), value.size()); + } + return true; + } +}; + +template +class LowCardinalityStringNode : public JSONExtractTreeNode +{ +public: + explicit LowCardinalityStringNode(bool is_nullable_) : is_nullable(is_nullable_) { } + + bool insertResultToColumn( + IColumn & column, const typename JSONParser::Element & element, const JSONExtractInsertSettings &, const FormatSettings & format_settings, String & error) const override + { + if (element.isNull()) + { + if (is_nullable || format_settings.null_as_default) + { + column.insertDefault(); + return true; + } + + error = "cannot parse String value from null"; + return false; + } + + if (!element.isString()) + { + auto value = jsonElementToString(element, format_settings); + assert_cast(column).insertData(value.data(), value.size()); + } + else + { + auto value = element.getString(); + assert_cast(column).insertData(value.data(), value.size()); + } + + return true; + } + +private: + bool is_nullable; +}; + +template +class FixedStringNode : public JSONExtractTreeNode +{ +public: + explicit FixedStringNode(size_t fixed_length_) : fixed_length(fixed_length_) { } + bool insertResultToColumn( + IColumn & column, const typename JSONParser::Element & element, const JSONExtractInsertSettings &, const FormatSettings & format_settings, String & error) const override + { + if (element.isNull()) + { + if (format_settings.null_as_default) + { + column.insertDefault(); + return true; + } + + error = "cannot parse FixedString value from null"; + return false; + } + + if (!element.isString()) + return checkValueSizeAndInsert(column, jsonElementToString(element, format_settings), error); + return checkValueSizeAndInsert(column, element.getString(), error); + } + +private: + template + bool checkValueSizeAndInsert(IColumn & column, const T & value, String & error) const + { + if (value.size() > fixed_length) + { + error = fmt::format("too large string for FixedString({}): {}", fixed_length, value); + return false; + } + assert_cast(column).insertData(value.data(), value.size()); + return true; + } + + size_t fixed_length; +}; + +template +class LowCardinalityFixedStringNode : public JSONExtractTreeNode +{ +public: + explicit LowCardinalityFixedStringNode(bool is_nullable_, size_t fixed_length_) : is_nullable(is_nullable_), fixed_length(fixed_length_) + { + } + + bool insertResultToColumn( + IColumn & column, const typename JSONParser::Element & element, const JSONExtractInsertSettings &, const FormatSettings & format_settings, String & error) const override + { + if (element.isNull()) + { + if (is_nullable || format_settings.null_as_default) + { + column.insertDefault(); + return true; + } + error = "cannot parse FixedString value from null"; + return false; + } + + if (!element.isString()) + return checkValueSizeAndInsert(column, jsonElementToString(element, format_settings), error); + return checkValueSizeAndInsert(column, element.getString(), error); + } + +private: + template + bool checkValueSizeAndInsert(IColumn & column, const T & value, String & error) const + { + if (value.size() > fixed_length) + { + error = fmt::format("too large string for FixedString({}): {}", fixed_length, value); + return false; + } + + // For the non low cardinality case of FixedString, the padding is done in the FixedString Column implementation. + // In order to avoid having to pass the data to a FixedString Column and read it back (which would slow down the execution) + // the data is padded here and written directly to the Low Cardinality Column + if (value.size() == fixed_length) + { + assert_cast(column).insertData(value.data(), value.size()); + } + else + { + String padded_value(value); + padded_value.resize(fixed_length, '\0'); + assert_cast(column).insertData(padded_value.data(), padded_value.size()); + } + return true; + } + + bool is_nullable; + size_t fixed_length; +}; + +template +class UUIDNode : public JSONExtractTreeNode +{ +public: + bool insertResultToColumn( + IColumn & column, const typename JSONParser::Element & element, const JSONExtractInsertSettings &, const FormatSettings & format_settings, String & error) const override + { + if (element.isNull() && format_settings.null_as_default) + { + column.insertDefault(); + return true; + } + + if (!element.isString()) + { + error = fmt::format("cannot read UUID value from JSON element: {}", jsonElementToString(element, format_settings)); + return false; + } + + auto data = element.getString(); + UUID uuid; + if (!tryParse(uuid, data)) + { + error = fmt::format("cannot parse UUID value here: {}", data); + return false; + } + + assert_cast(column).insert(uuid); + return true; + } + + + static bool tryParse(UUID & uuid, std::string_view data) + { + ReadBufferFromMemory buf(data.data(), data.size()); + return tryReadUUIDText(uuid, buf) && buf.eof(); + } +}; + +template +class LowCardinalityUUIDNode : public JSONExtractTreeNode +{ +public: + explicit LowCardinalityUUIDNode(bool is_nullable_) : is_nullable(is_nullable_) { } + + bool insertResultToColumn( + IColumn & column, const typename JSONParser::Element & element, const JSONExtractInsertSettings &, const FormatSettings & format_settings, String & error) const override + { + if (element.isNull() && (is_nullable || format_settings.null_as_default)) + { + column.insertDefault(); + return true; + } + + if (!element.isString()) + { + error = fmt::format("cannot read UUID value from JSON element: {}", jsonElementToString(element, format_settings)); + return false; + } + + auto data = element.getString(); + ReadBufferFromMemory buf(data.data(), data.size()); + UUID uuid; + if (!tryReadUUIDText(uuid, buf) || !buf.eof()) + { + error = fmt::format("cannot parse UUID value here: {}", data); + return false; + } + assert_cast(column).insertData(reinterpret_cast(&uuid), sizeof(uuid)); + return true; + } + +private: + bool is_nullable; +}; + +template +class DateNode : public JSONExtractTreeNode +{ +public: + bool insertResultToColumn( + IColumn & column, const typename JSONParser::Element & element, const JSONExtractInsertSettings &, const FormatSettings & format_settings, String & error) const override + { + if (element.isNull() && format_settings.null_as_default) + { + column.insertDefault(); + return true; + } + + if (!element.isString()) + { + error = fmt::format("cannot read Date value from JSON element: {}", jsonElementToString(element, format_settings)); + return false; + } + + auto data = element.getString(); + ReadBufferFromMemory buf(data.data(), data.size()); + DateType date; + if (!tryReadDateText(date, buf) || !buf.eof()) + { + error = fmt::format("cannot parse Date value here: {}", data); + return false; + } + + assert_cast &>(column).insertValue(date); + return true; + } +}; + +template +class DateTimeNode : public JSONExtractTreeNode, public TimezoneMixin +{ +public: + explicit DateTimeNode(const DataTypeDateTime & datetime_type) : TimezoneMixin(datetime_type) { } + + bool insertResultToColumn( + IColumn & column, const typename JSONParser::Element & element, const JSONExtractInsertSettings &, const FormatSettings & format_settings, String & error) const override + { + if (element.isNull() && format_settings.null_as_default) + { + column.insertDefault(); + return true; + } + + time_t value; + if (element.isString()) + { + if (!tryParse(value, element.getString(), format_settings.date_time_input_format)) + { + error = fmt::format("cannot parse DateTime value here: {}", element.getString()); + return false; + } + } + else if (element.isUInt64()) + { + value = element.getUInt64(); + } + else + { + error = fmt::format("cannot read DateTime value from JSON element: {}", jsonElementToString(element, format_settings)); + return false; + } + + assert_cast(column).insert(value); + return true; + } + + bool tryParse(time_t & value, std::string_view data, FormatSettings::DateTimeInputFormat date_time_input_format) const + { + ReadBufferFromMemory buf(data.data(), data.size()); + switch (date_time_input_format) + { + case FormatSettings::DateTimeInputFormat::Basic: + if (tryReadDateTimeText(value, buf, time_zone) && buf.eof()) + return true; + break; + case FormatSettings::DateTimeInputFormat::BestEffort: + if (tryParseDateTimeBestEffort(value, buf, time_zone, utc_time_zone) && buf.eof()) + return true; + break; + case FormatSettings::DateTimeInputFormat::BestEffortUS: + if (tryParseDateTimeBestEffortUS(value, buf, time_zone, utc_time_zone) && buf.eof()) + return true; + break; + } + + return false; + } +}; + +template +class DecimalNode : public JSONExtractTreeNode +{ +public: + explicit DecimalNode(const DataTypePtr & type) : scale(assert_cast &>(*type).getScale()) { } + + bool insertResultToColumn( + IColumn & column, const typename JSONParser::Element & element, const JSONExtractInsertSettings &, const FormatSettings & format_settings, String & error) const override + { + DecimalType value{}; + + switch (element.type()) + { + case ElementType::DOUBLE: + value = convertToDecimal, DataTypeDecimal>(element.getDouble(), scale); + break; + case ElementType::UINT64: + value = convertToDecimal, DataTypeDecimal>(element.getUInt64(), scale); + break; + case ElementType::INT64: + value = convertToDecimal, DataTypeDecimal>(element.getInt64(), scale); + break; + case ElementType::STRING: { + auto rb = ReadBufferFromMemory{element.getString()}; + if (!SerializationDecimal::tryReadText(value, rb, DecimalUtils::max_precision, scale)) + { + error = fmt::format("cannot parse Decimal value here: {}", element.getString()); + return false; + } + break; + } + case ElementType::NULL_VALUE: { + if (!format_settings.null_as_default) + { + error = "cannot convert null to Decimal value"; + return false; + } + break; + } + default: { + error = fmt::format("cannot read Decimal value from JSON element: {}", jsonElementToString(element, format_settings)); + return false; + } + } + + assert_cast &>(column).insertValue(value); + return true; + } + +private: + UInt32 scale; +}; + + +template +class DateTime64Node : public JSONExtractTreeNode, public TimezoneMixin +{ +public: + explicit DateTime64Node(const DataTypeDateTime64 & datetime64_type) : TimezoneMixin(datetime64_type), scale(datetime64_type.getScale()) { } + + bool insertResultToColumn( + IColumn & column, const typename JSONParser::Element & element, const JSONExtractInsertSettings &, const FormatSettings & format_settings, String & error) const override + { + if (element.isNull() && format_settings.null_as_default) + { + column.insertDefault(); + return true; + } + + DateTime64 value; + if (element.isString()) + { + if (!tryParse(value, element.getString(), format_settings.date_time_input_format)) + { + error = fmt::format("cannot parse DateTime64 value here: {}", element.getString()); + return false; + } + } + else + { + switch (element.type()) + { + case ElementType::DOUBLE: + value = convertToDecimal, DataTypeDecimal>(element.getDouble(), scale); + break; + case ElementType::UINT64: + value = convertToDecimal, DataTypeDecimal>(element.getUInt64(), scale); + break; + case ElementType::INT64: + value = convertToDecimal, DataTypeDecimal>(element.getInt64(), scale); + break; + default: + error = fmt::format("cannot read DateTime64 value from JSON element: {}", jsonElementToString(element, format_settings)); + return false; + } + } + + assert_cast(column).insert(value); + return true; + } + + bool tryParse(DateTime64 & value, std::string_view data, FormatSettings::DateTimeInputFormat date_time_input_format) const + { + ReadBufferFromMemory buf(data.data(), data.size()); + switch (date_time_input_format) + { + case FormatSettings::DateTimeInputFormat::Basic: + if (tryReadDateTime64Text(value, scale, buf, time_zone) && buf.eof()) + return true; + break; + case FormatSettings::DateTimeInputFormat::BestEffort: + if (tryParseDateTime64BestEffort(value, scale, buf, time_zone, utc_time_zone) && buf.eof()) + return true; + break; + case FormatSettings::DateTimeInputFormat::BestEffortUS: + if (tryParseDateTime64BestEffortUS(value, scale, buf, time_zone, utc_time_zone) && buf.eof()) + return true; + break; + } + + return false; + } + +private: + UInt32 scale; +}; + +template +class EnumNode : public JSONExtractTreeNode +{ +public: + explicit EnumNode(const std::vector> & name_value_pairs_) : name_value_pairs(name_value_pairs_) + { + for (const auto & name_value_pair : name_value_pairs) + { + name_to_value_map.emplace(name_value_pair.first, name_value_pair.second); + only_values.emplace(name_value_pair.second); + } + } + + bool insertResultToColumn( + IColumn & column, const typename JSONParser::Element & element, const JSONExtractInsertSettings &, const FormatSettings & format_settings, String & error) const override + { + if (element.isNull()) + { + if (format_settings.null_as_default) + { + column.insertDefault(); + return true; + } + + error = "cannot convert null to Enum value"; + return false; + } + + auto & col_vec = assert_cast &>(column); + + if (element.isInt64()) + { + Type value; + if (!accurate::convertNumeric(element.getInt64(), value) || !only_values.contains(value)) + { + error = fmt::format("cannot convert value {} to enum: there is no such value in enum", element.getInt64()); + return false; + } + col_vec.insertValue(value); + return true; + } + + if (element.isUInt64()) + { + Type value; + if (!accurate::convertNumeric(element.getUInt64(), value) || !only_values.contains(value)) + { + error = fmt::format("cannot convert value {} to enum: there is no such value in enum", element.getUInt64()); + return false; + } + col_vec.insertValue(value); + return true; + } + + if (element.isString()) + { + auto value = name_to_value_map.find(element.getString()); + if (value == name_to_value_map.end()) + { + error = fmt::format("cannot convert value {} to enum: there is no such value in enum", element.getString()); + return false; + } + col_vec.insertValue(value->second); + return true; + } + + error = fmt::format("cannot read Enum value from JSON element: {}", jsonElementToString(element, format_settings)); + return false; + } + +private: + std::vector> name_value_pairs; + std::unordered_map name_to_value_map; + std::unordered_set only_values; +}; + +template +class IPv4Node : public JSONExtractTreeNode +{ +public: + bool insertResultToColumn( + IColumn & column, const typename JSONParser::Element & element, const JSONExtractInsertSettings &, const FormatSettings & format_settings, String & error) const override + { + if (element.isNull() && format_settings.null_as_default) + { + column.insertDefault(); + return true; + } + + if (!element.isString()) + { + error = fmt::format("cannot read IPv4 value from JSON element: {}", jsonElementToString(element, format_settings)); + return false; + } + + auto data = element.getString(); + IPv4 value; + if (!tryParse(value, data)) + { + error = fmt::format("cannot parse IPv4 value here: {}", data); + return false; + } + + assert_cast(column).insert(value); + return true; + } + + static bool tryParse(IPv4 & value, std::string_view data) + { + ReadBufferFromMemory buf(data.data(), data.size()); + return tryReadIPv4Text(value, buf) && buf.eof(); + } +}; + +template +class IPv6Node : public JSONExtractTreeNode +{ +public: + bool insertResultToColumn( + IColumn & column, const typename JSONParser::Element & element, const JSONExtractInsertSettings &, const FormatSettings & format_settings, String & error) const override + { + if (element.isNull() && format_settings.null_as_default) + { + column.insertDefault(); + return true; + } + + if (!element.isString()) + { + error = fmt::format("cannot read IPv6 value from JSON element: {}", jsonElementToString(element, format_settings)); + return false; + } + + auto data = element.getString(); + IPv6 value; + if (!tryParse(value, data)) + { + error = fmt::format("cannot parse IPv6 value here: {}", data); + return false; + } + + assert_cast(column).insert(value); + return true; + } + + + static bool tryParse(IPv6 & value, std::string_view data) + { + ReadBufferFromMemory buf(data.data(), data.size()); + return tryReadIPv6Text(value, buf) && buf.eof(); + } +}; + +template +class NullableNode : public JSONExtractTreeNode +{ +public: + explicit NullableNode(std::unique_ptr> nested_) : nested(std::move(nested_)) { } + + bool insertResultToColumn( + IColumn & column, const typename JSONParser::Element & element, const JSONExtractInsertSettings & insert_settings, const FormatSettings & format_settings, String & error) const override + { + if (element.isNull()) + { + column.insertDefault(); + return true; + } + + auto & col_null = assert_cast(column); + if (!nested-> insertResultToColumn(col_null.getNestedColumn(), element, insert_settings, format_settings, error)) + return false; + col_null.getNullMapColumn().insertValue(0); + return true; + } + +private: + std::unique_ptr> nested; +}; + +template +class LowCardinalityNode : public JSONExtractTreeNode +{ +public: + explicit LowCardinalityNode(bool is_nullable_, std::unique_ptr> nested_) + : is_nullable(is_nullable_), nested(std::move(nested_)) + { + } + + bool insertResultToColumn( + IColumn & column, const typename JSONParser::Element & element, const JSONExtractInsertSettings & insert_settings, const FormatSettings & format_settings, String & error) const override + { + if (element.isNull() && (is_nullable || format_settings.null_as_default)) + { + column.insertDefault(); + return true; + } + + auto & col_lc = assert_cast(column); + auto tmp_nested = col_lc.getDictionary().getNestedColumn()->cloneEmpty(); + if (!nested-> insertResultToColumn(*tmp_nested, element, insert_settings, format_settings, error)) + return false; + + col_lc.insertFromFullColumn(*tmp_nested, 0); + return true; + } + +private: + bool is_nullable; + std::unique_ptr> nested; +}; + +template +class ArrayNode : public JSONExtractTreeNode +{ +public: + explicit ArrayNode(std::unique_ptr> nested_) : nested(std::move(nested_)) { } + + bool insertResultToColumn( + IColumn & column, const typename JSONParser::Element & element, const JSONExtractInsertSettings & insert_settings, const FormatSettings & format_settings, String & error) const override + { + if (element.isNull() && format_settings.null_as_default) + { + column.insertDefault(); + return true; + } + + if (!element.isArray()) + { + error = fmt::format("cannot read Array value from JSON element: {}", jsonElementToString(element, format_settings)); + return false; + } + + auto array = element.getArray(); + + auto & col_arr = assert_cast(column); + auto & data = col_arr.getData(); + size_t old_size = data.size(); + bool were_valid_elements = false; + + for (auto value : array) + { + if (nested-> insertResultToColumn(data, value, insert_settings, format_settings, error)) + { + were_valid_elements = true; + } + else if (insert_settings.insert_default_on_invalid_elements_in_complex_types) + { + data.insertDefault(); + } + else + { + data.popBack(data.size() - old_size); + return false; + } + } + + if (!were_valid_elements) + { + data.popBack(data.size() - old_size); + return false; + } + + col_arr.getOffsets().push_back(data.size()); + return true; + } + +private: + std::unique_ptr> nested; +}; + +template +class TupleNode : public JSONExtractTreeNode +{ +public: + TupleNode(std::vector>> nested_, const std::vector & explicit_names_) + : nested(std::move(nested_)), explicit_names(explicit_names_) + { + for (size_t i = 0; i != explicit_names.size(); ++i) + name_to_index_map.emplace(explicit_names[i], i); + } + + bool insertResultToColumn( + IColumn & column, const typename JSONParser::Element & element, const JSONExtractInsertSettings & insert_settings, const FormatSettings & format_settings, String & error) const override + { + auto & tuple = assert_cast(column); + size_t old_size = column.size(); + bool were_valid_elements = false; + + auto set_size = [&](size_t size) + { + for (size_t i = 0; i != tuple.tupleSize(); ++i) + { + auto & col = tuple.getColumn(i); + if (col.size() != size) + { + if (col.size() > size) + col.popBack(col.size() - size); + else + while (col.size() < size) + col.insertDefault(); + } + } + }; + + if (element.isArray()) + { + auto array = element.getArray(); + auto it = array.begin(); + + for (size_t index = 0; (index != nested.size()) && (it != array.end()); ++index) + { + if (nested[index]-> insertResultToColumn(tuple.getColumn(index), *it++, insert_settings, format_settings, error)) + { + were_valid_elements = true; + } + else if (insert_settings.insert_default_on_invalid_elements_in_complex_types) + { + tuple.getColumn(index).insertDefault(); + } + else + { + set_size(old_size); + error += fmt::format("(during reading tuple {} element)", index); + return false; + } + } + + set_size(old_size + static_cast(were_valid_elements)); + return were_valid_elements; + } + + if (element.isObject()) + { + auto object = element.getObject(); + if (name_to_index_map.empty()) + { + auto it = object.begin(); + for (size_t index = 0; (index != nested.size()) && (it != object.end()); ++index) + { + if (nested[index]-> insertResultToColumn(tuple.getColumn(index), (*it++).second, insert_settings, format_settings, error)) + { + were_valid_elements = true; + } + else if (insert_settings.insert_default_on_invalid_elements_in_complex_types) + { + tuple.getColumn(index).insertDefault(); + } + else + { + set_size(old_size); + error += fmt::format("(during reading tuple {} element)", index); + return false; + } + } + } + else + { + for (const auto & [key, value] : object) + { + auto index = name_to_index_map.find(key); + if (index != name_to_index_map.end()) + { + if (nested[index->second]-> insertResultToColumn(tuple.getColumn(index->second), value, insert_settings, format_settings, error)) + { + were_valid_elements = true; + } + else if (!insert_settings.insert_default_on_invalid_elements_in_complex_types) + { + set_size(old_size); + error += fmt::format("(during reading tuple element \"{}\")", key); + return false; + } + } + } + } + + set_size(old_size + static_cast(were_valid_elements)); + return were_valid_elements; + } + + error = fmt::format("cannot read Tuple value from JSON element: {}", jsonElementToString(element, format_settings)); + return false; + } + +private: + std::vector>> nested; + std::vector explicit_names; + std::unordered_map name_to_index_map; +}; + +template +class MapNode : public JSONExtractTreeNode +{ +public: + explicit MapNode(std::unique_ptr> value_) : value(std::move(value_)) { } + + bool insertResultToColumn( + IColumn & column, const typename JSONParser::Element & element, const JSONExtractInsertSettings & insert_settings, const FormatSettings & format_settings, String & error) const override + { + if (!element.isObject()) + { + error = fmt::format("cannot read Map value from JSON element: {}", jsonElementToString(element, format_settings)); + return false; + } + + auto & map_col = assert_cast(column); + auto & offsets = map_col.getNestedColumn().getOffsets(); + auto & tuple_col = map_col.getNestedData(); + auto & key_col = tuple_col.getColumn(0); + auto & value_col = tuple_col.getColumn(1); + size_t old_size = tuple_col.size(); + + auto object = element.getObject(); + auto it = object.begin(); + for (; it != object.end(); ++it) + { + auto pair = *it; + + /// Insert key + key_col.insertData(pair.first.data(), pair.first.size()); + + /// Insert value + if (!value-> insertResultToColumn(value_col, pair.second, insert_settings, format_settings, error)) + { + if (insert_settings.insert_default_on_invalid_elements_in_complex_types) + { + value_col.insertDefault(); + } + else + { + key_col.popBack(key_col.size() - offsets.back()); + value_col.popBack(value_col.size() - offsets.back()); + error += fmt::format("(during reading value of key \"{}\")", pair.first); + return false; + } + } + } + + offsets.push_back(old_size + object.size()); + return true; + } + +private: + std::unique_ptr> value; +}; + +template +class VariantNode : public JSONExtractTreeNode +{ +public: + VariantNode(std::vector>> variant_nodes_, std::vector order_) + : variant_nodes(std::move(variant_nodes_)), order(std::move(order_)) + { + } + + bool insertResultToColumn( + IColumn & column, const typename JSONParser::Element & element, const JSONExtractInsertSettings & insert_settings, const FormatSettings & format_settings, String & error) const override + { + auto & column_variant = assert_cast(column); + for (size_t i : order) + { + auto & variant = column_variant.getVariantByGlobalDiscriminator(i); + if (variant_nodes[i]-> insertResultToColumn(variant, element, insert_settings, format_settings, error)) + { + column_variant.getLocalDiscriminators().push_back(column_variant.localDiscriminatorByGlobal(i)); + column_variant.getOffsets().push_back(variant.size() - 1); + return true; + } + } + + error = fmt::format("cannot read Map value from JSON element: {}", jsonElementToString(element, format_settings)); + return false; + } + +private: + std::vector>> variant_nodes; + /// Order in which we should try variants nodes. + /// For example, String should be always the last one. + std::vector order; +}; + + +template +class DynamicNode : public JSONExtractTreeNode +{ +public: + bool insertResultToColumn(IColumn & column, const typename JSONParser::Element & element, const JSONExtractInsertSettings & insert_settings, const FormatSettings & format_settings, String & error) const override + { + auto & column_dynamic = assert_cast(column); + auto & variant_column = column_dynamic.getVariantColumn(); + auto variant_info = column_dynamic.getVariantInfo(); + /// First, infer ClickHouse type for this element and add it as a new variant. + auto element_type = elementToDataType(element, format_settings); + if (column_dynamic.addNewVariant(element_type)) + { + auto node = buildJSONExtractTree(element_type, "Dynamic inference"); + auto global_discriminator = variant_info.variant_name_to_discriminator[element_type->getName()]; + auto & variant = variant_column.getVariantByGlobalDiscriminator(global_discriminator); + if (!node-> insertResultToColumn(variant, element, insert_settings, format_settings, error)) + return false; + variant_column.getLocalDiscriminators().push_back(variant_column.localDiscriminatorByGlobal(global_discriminator)); + variant_column.getOffsets().push_back(variant.size() - 1); + return true; + } + + /// We couldn't add new variant. Try to insert element into current variants. + auto variant_node = buildJSONExtractTree(variant_info.variant_type, "Dynamic inference"); + if (variant_node-> insertResultToColumn(variant_column, element, insert_settings, format_settings, error)) + return true; + + /// We couldn't insert element into any existing variant, add String variant and read value as String. + column_dynamic.addStringVariant(); + auto string_global_discriminator = variant_info.variant_name_to_discriminator["String"]; + auto & string_column = variant_column.getVariantByGlobalDiscriminator(string_global_discriminator); + if (!getStringNode()-> insertResultToColumn(string_column, element, insert_settings, format_settings, error)) + return false; + variant_column.getLocalDiscriminators().push_back(variant_column.localDiscriminatorByGlobal(string_global_discriminator)); + variant_column.getOffsets().push_back(string_column.size() - 1); + return true; + } + + static const std::unique_ptr> & getStringNode() + { + static const std::unique_ptr> string_node + = buildJSONExtractTree(std::make_shared(), "Dynamic inference"); + return string_node; + } + + static DataTypePtr elementToDataType(const typename JSONParser::Element & element, const FormatSettings & format_settings) + { + JSONInferenceInfo json_inference_info; + auto type = elementToDataTypeImpl(element, format_settings, json_inference_info); + transformFinalInferredJSONTypeIfNeeded(type, format_settings, &json_inference_info); + return type; + } + +private: + static DataTypePtr elementToDataTypeImpl(const typename JSONParser::Element & element, const FormatSettings & format_settings, JSONInferenceInfo & json_inference_info) + { + switch (element.type()) + { + case ElementType::NULL_VALUE: + return makeNullable(std::make_shared()); + case ElementType::BOOL: + return DataTypeFactory::instance().get("Bool"); + case ElementType::INT64: + { + auto type = std::make_shared(); + if (element.getInt64() < 0) + json_inference_info.negative_integers.insert(type.get()); + return type; + } + case ElementType::UINT64: + return std::make_shared(); + case ElementType::DOUBLE: + return std::make_shared(); + case ElementType::STRING: + { + auto data = element.getString(); + + if (auto type = tryInferDateOrDateTimeFromString(data, format_settings)) + return type; + + if (format_settings.json.try_infer_numbers_from_strings) + { + bool is_negative = false; + if (auto type = tryInferJSONNumberFromString(data, format_settings, &json_inference_info)) + { + json_inference_info.numbers_parsed_from_json_strings.insert(type.get()); + if (is_negative) + json_inference_info.negative_integers.insert(type.get()); + return type; + } + } + + return std::make_shared(); + } + case ElementType::ARRAY: + { + auto array = element.getArray(); + DataTypes types; + types.reserve(array.size()); + for (auto value : array) + types.push_back(makeNullableSafe(elementToDataTypeImpl(value, format_settings, json_inference_info))); + + if (types.empty()) + return std::make_shared(makeNullable(std::make_shared())); + + if (checkIfTypesAreEqual(types)) + return std::make_shared(types.back()); + + /// For JSON if we have not complete types, we should not try to transform them + /// and return it as a Tuple. + /// For example, if we have types [Nullable(Float64), Nullable(Nothing), Nullable(Float64)] + /// it can be Array(Nullable(Float64)) or Tuple(Nullable(Float64), , Nullable(Float64)) and + /// we can't determine which one it is right now. But we will be able to do it later + /// when we will have the final top level type. + /// For example, we can have JSON element [[42.42, null, 43.43], [44.44, "Some string", 45.45]] and we should + /// determine the type for this element as Tuple(Nullable(Float64), Nullable(String), Nullable(Float64)). + for (const auto & type : types) + { + if (!checkIfTypeIsComplete(type)) + return std::make_shared(types); + } + + auto types_copy = types; + transformInferredJSONTypesIfNeeded(types_copy, format_settings, &json_inference_info); + + if (checkIfTypesAreEqual(types_copy)) + return std::make_shared(types_copy.back()); + + return std::make_shared(types); + } + case ElementType::OBJECT: { + /// TODO: Use new JSON type here when it's ready. + return std::make_shared(std::make_shared(), makeNullable(std::make_shared())); + } + } + } +}; + +} + +template +std::unique_ptr> buildJSONExtractTree(const DataTypePtr & type, const char * source_for_exception_message) +{ + switch (type->getTypeId()) + { + case TypeIndex::UInt8: + return std::make_unique>(isBool(type)); + case TypeIndex::UInt16: + return std::make_unique>(); + case TypeIndex::UInt32: + return std::make_unique>(); + case TypeIndex::UInt64: + return std::make_unique>(); + case TypeIndex::UInt128: + return std::make_unique>(); + case TypeIndex::UInt256: + return std::make_unique>(); + case TypeIndex::Int8: + return std::make_unique>(); + case TypeIndex::Int16: + return std::make_unique>(); + case TypeIndex::Int32: + return std::make_unique>(); + case TypeIndex::Int64: + return std::make_unique>(); + case TypeIndex::Int128: + return std::make_unique>(); + case TypeIndex::Int256: + return std::make_unique>(); + case TypeIndex::Float32: + return std::make_unique>(); + case TypeIndex::Float64: + return std::make_unique>(); + case TypeIndex::String: + return std::make_unique>(); + case TypeIndex::FixedString: + return std::make_unique>(assert_cast(*type).getN()); + case TypeIndex::UUID: + return std::make_unique>(); + case TypeIndex::IPv4: + return std::make_unique>(); + case TypeIndex::IPv6: + return std::make_unique>(); + case TypeIndex::Date:; + return std::make_unique>(); + case TypeIndex::Date32: + return std::make_unique>(); + case TypeIndex::DateTime: + return std::make_unique>(assert_cast(*type)); + case TypeIndex::DateTime64: + return std::make_unique>(assert_cast(*type)); + case TypeIndex::Decimal32: + return std::make_unique>(type); + case TypeIndex::Decimal64: + return std::make_unique>(type); + case TypeIndex::Decimal128: + return std::make_unique>(type); + case TypeIndex::Decimal256: + return std::make_unique>(type); + case TypeIndex::Enum8: + return std::make_unique>(assert_cast(*type).getValues()); + case TypeIndex::Enum16: + return std::make_unique>(assert_cast(*type).getValues()); + case TypeIndex::LowCardinality: + { + /// To optimize inserting into LowCardinality we have special nodes for LowCardinality of numeric and string types. + auto lc_type = typeid_cast(type.get()); + auto dictionary_type = removeNullable(lc_type->getDictionaryType()); + bool is_nullable = lc_type->isLowCardinalityNullable(); + + switch (dictionary_type->getTypeId()) + { + case TypeIndex::UInt8: + return std::make_unique>(is_nullable, isBool(type)); + case TypeIndex::UInt16: + return std::make_unique>(is_nullable); + case TypeIndex::UInt32: + return std::make_unique>(is_nullable); + case TypeIndex::UInt64: + return std::make_unique>(is_nullable); + case TypeIndex::Int8: + return std::make_unique>(is_nullable); + case TypeIndex::Int16: + return std::make_unique>(is_nullable); + case TypeIndex::Int32: + return std::make_unique>(is_nullable); + case TypeIndex::Int64: + return std::make_unique>(is_nullable); + case TypeIndex::Float32: + return std::make_unique>(is_nullable); + case TypeIndex::Float64: + return std::make_unique>(is_nullable); + case TypeIndex::String: + return std::make_unique>(is_nullable); + case TypeIndex::FixedString: + return std::make_unique>(is_nullable, assert_cast(*dictionary_type).getN()); + case TypeIndex::UUID: + return std::make_unique>(is_nullable); + default: + return std::make_unique>(is_nullable, buildJSONExtractTree(dictionary_type, source_for_exception_message)); + } + } + case TypeIndex::Nullable: + return std::make_unique>(buildJSONExtractTree(assert_cast(*type).getNestedType(), source_for_exception_message)); + case TypeIndex::Array: + return std::make_unique>(buildJSONExtractTree(assert_cast(*type).getNestedType(), source_for_exception_message)); + case TypeIndex::Tuple: + { + const auto & tuple = assert_cast(*type); + const auto & tuple_elements = tuple.getElements(); + std::vector>> elements; + elements.reserve(tuple_elements.size()); + for (const auto & tuple_element : tuple_elements) + elements.emplace_back(buildJSONExtractTree(tuple_element, source_for_exception_message)); + return std::make_unique>(std::move(elements), tuple.haveExplicitNames() ? tuple.getElementNames() : Strings{}); + } + case TypeIndex::Map: + { + const auto & map_type = assert_cast(*type); + const auto & key_type = map_type.getKeyType(); + if (!isString(removeLowCardinality(key_type))) + throw Exception( + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "{} doesn't support the return type schema: {} with key type not String", + source_for_exception_message, + type->getName()); + + const auto & value_type = map_type.getValueType(); + return std::make_unique>(buildJSONExtractTree(value_type, source_for_exception_message)); + } + case TypeIndex::Variant: + { + const auto & variant_type = assert_cast(*type); + const auto & variants = variant_type.getVariants(); + std::vector>> variant_nodes; + variant_nodes.reserve(variants.size()); + for (const auto & variant : variants) + variant_nodes.push_back(buildJSONExtractTree(variant, source_for_exception_message)); + return std::make_unique>(std::move(variant_nodes), SerializationVariant::getVariantsDeserializeTextOrder(variants)); + } + case TypeIndex::Dynamic: + return std::make_unique>(); + default: + throw Exception( + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "{} doesn't support the return type schema: {}", + source_for_exception_message, + type->getName()); + } +} + +#if USE_SIMDJSON +template void jsonElementToString(const SimdJSONParser::Element & element, WriteBuffer & buf, const FormatSettings & format_settings); +template std::unique_ptr> buildJSONExtractTree(const DataTypePtr & type, const char * source_for_exception_message); +#endif + +#if USE_RAPIDJSON +template void jsonElementToString(const RapidJSONParser::Element & element, WriteBuffer & buf, const FormatSettings & format_settings); +template std::unique_ptr> buildJSONExtractTree(const DataTypePtr & type, const char * source_for_exception_message); +#else +template void jsonElementToString(const DummyJSONParser::Element & element, WriteBuffer & buf, const FormatSettings & format_settings); +template std::unique_ptr> buildJSONExtractTree(const DataTypePtr & type, const char * source_for_exception_message); +#endif + +} diff --git a/src/Formats/JSONExtractTree.h b/src/Formats/JSONExtractTree.h new file mode 100644 index 00000000000..4735f568b1c --- /dev/null +++ b/src/Formats/JSONExtractTree.h @@ -0,0 +1,35 @@ +#pragma once +#include +#include +#include + + +namespace DB +{ + +struct JSONExtractInsertSettings +{ + bool convert_bool_to_integer = true; + bool insert_default_on_invalid_elements_in_complex_types = false; +}; + +template +class JSONExtractTreeNode +{ +public: + JSONExtractTreeNode() = default; + virtual ~JSONExtractTreeNode() = default; + virtual bool insertResultToColumn(IColumn &, const typename JSONParser::Element &, const JSONExtractInsertSettings & insert_setting, const FormatSettings & format_settings, String & error) const = 0; +}; + +/// Build a tree for insertion JSON element into a column with provided data type. +template +std::unique_ptr> buildJSONExtractTree(const DataTypePtr & type, const char * source_for_exception_message); + +template +void jsonElementToString(const typename JSONParser::Element & element, WriteBuffer & buf, const FormatSettings & format_settings); + +template +bool tryGetNumericValueFromJSONElement(NumberType & value, const typename JSONParser::Element & element, bool convert_bool_to_integer, String & error); + +} diff --git a/src/Formats/SchemaInferenceUtils.cpp b/src/Formats/SchemaInferenceUtils.cpp index 31faea2e13e..6519d54a8c5 100644 --- a/src/Formats/SchemaInferenceUtils.cpp +++ b/src/Formats/SchemaInferenceUtils.cpp @@ -225,19 +225,6 @@ namespace Paths paths; }; - bool checkIfTypesAreEqual(const DataTypes & types) - { - if (types.empty()) - return true; - - for (size_t i = 1; i < types.size(); ++i) - { - if (!types[0]->equals(*types[i])) - return false; - } - return true; - } - void updateTypeIndexes(DataTypes & data_types, TypeIndexesSet & type_indexes) { type_indexes.clear(); @@ -272,24 +259,31 @@ namespace type_indexes.erase(TypeIndex::Nothing); } - /// If we have both Int64 and UInt64, convert all Int64 to UInt64, + /// If we have both Int64 and UInt64, convert all not-negative Int64 to UInt64, /// because UInt64 is inferred only in case of Int64 overflow. - void transformIntegers(DataTypes & data_types, TypeIndexesSet & type_indexes) + void transformIntegers(DataTypes & data_types, TypeIndexesSet & type_indexes, JSONInferenceInfo * json_info) { if (!type_indexes.contains(TypeIndex::Int64) || !type_indexes.contains(TypeIndex::UInt64)) return; + bool have_negative_integers = false; for (auto & type : data_types) { if (WhichDataType(type).isInt64()) - type = std::make_shared(); + { + bool is_negative = json_info->negative_integers.contains(type.get()); + have_negative_integers |= is_negative; + if (!is_negative) + type = std::make_shared(); + } } - type_indexes.erase(TypeIndex::Int64); + if (!have_negative_integers) + type_indexes.erase(TypeIndex::Int64); } /// If we have both Int64 and Float64 types, convert all Int64 to Float64. - void transformIntegersAndFloatsToFloats(DataTypes & data_types, TypeIndexesSet & type_indexes) + void transformIntegersAndFloatsToFloats(DataTypes & data_types, TypeIndexesSet & type_indexes, JSONInferenceInfo * json_info) { bool have_floats = type_indexes.contains(TypeIndex::Float64); bool have_integers = type_indexes.contains(TypeIndex::Int64) || type_indexes.contains(TypeIndex::UInt64); @@ -300,7 +294,12 @@ namespace { WhichDataType which(type); if (which.isInt64() || which.isUInt64()) - type = std::make_shared(); + { + auto new_type = std::make_shared(); + if (json_info->numbers_parsed_from_json_strings.erase(type.get())) + json_info->numbers_parsed_from_json_strings.insert(new_type.get()); + type = new_type; + } } type_indexes.erase(TypeIndex::Int64); @@ -635,9 +634,9 @@ namespace if (settings.try_infer_integers) { /// Transform Int64 to UInt64 if needed. - transformIntegers(data_types, type_indexes); + transformIntegers(data_types, type_indexes, json_info); /// Transform integers to floats if needed. - transformIntegersAndFloatsToFloats(data_types, type_indexes); + transformIntegersAndFloatsToFloats(data_types, type_indexes, json_info); } /// Transform Date to DateTime or both to String if needed. @@ -887,7 +886,7 @@ namespace } template - DataTypePtr tryInferNumber(ReadBuffer & buf, const FormatSettings & settings) + DataTypePtr tryInferNumber(ReadBuffer & buf, const FormatSettings & settings, JSONInferenceInfo * json_info) { if (buf.eof()) return nullptr; @@ -911,7 +910,12 @@ namespace Int64 tmp_int; buf.position() = number_start; if (tryReadIntText(tmp_int, buf)) - return std::make_shared(); + { + auto type = std::make_shared(); + if (json_info && tmp_int < 0) + json_info->negative_integers.insert(type.get()); + return type; + } /// In case of Int64 overflow we can try to infer UInt64. UInt64 tmp_uint; @@ -934,7 +938,12 @@ namespace Int64 tmp_int; if (tryReadIntText(tmp_int, peekable_buf)) - return std::make_shared(); + { + auto type = std::make_shared(); + if (json_info && tmp_int < 0) + json_info->negative_integers.insert(type.get()); + return type; + } peekable_buf.rollbackToCheckpoint(/* drop= */ true); /// In case of Int64 overflow we can try to infer UInt64. @@ -952,7 +961,7 @@ namespace } template - DataTypePtr tryInferNumberFromStringImpl(std::string_view field, const FormatSettings & settings) + DataTypePtr tryInferNumberFromStringImpl(std::string_view field, const FormatSettings & settings, JSONInferenceInfo * json_inference_info = nullptr) { ReadBufferFromString buf(field); @@ -960,7 +969,12 @@ namespace { Int64 tmp_int; if (tryReadIntText(tmp_int, buf) && buf.eof()) - return std::make_shared(); + { + auto type = std::make_shared(); + if (json_inference_info && tmp_int < 0) + json_inference_info->negative_integers.insert(type.get()); + return type; + } /// We can safely get back to the start of buffer, because we read from a string and we didn't reach eof. buf.position() = buf.buffer().begin(); @@ -1011,7 +1025,7 @@ namespace { if (settings.json.try_infer_numbers_from_strings) { - if (auto number_type = tryInferNumberFromStringImpl(field, settings)) + if (auto number_type = tryInferNumberFromStringImpl(field, settings, json_info)) { json_info->numbers_parsed_from_json_strings.insert(number_type.get()); return number_type; @@ -1254,10 +1268,23 @@ namespace } /// Number - return tryInferNumber(buf, settings); + return tryInferNumber(buf, settings, json_info); } } +bool checkIfTypesAreEqual(const DataTypes & types) +{ + if (types.empty()) + return true; + + for (size_t i = 1; i < types.size(); ++i) + { + if (!types[0]->equals(*types[i])) + return false; + } + return true; +} + void transformInferredTypesIfNeeded(DataTypePtr & first, DataTypePtr & second, const FormatSettings & settings) { DataTypes types = {first, second}; @@ -1275,6 +1302,11 @@ void transformInferredJSONTypesIfNeeded( second = std::move(types[1]); } +void transformInferredJSONTypesIfNeeded(DataTypes & types, const FormatSettings & settings, JSONInferenceInfo * json_info) +{ + transformInferredTypesIfNeededImpl(types, settings, json_info); +} + void transformInferredJSONTypesFromDifferentFilesIfNeeded(DataTypePtr & first, DataTypePtr & second, const FormatSettings & settings) { JSONInferenceInfo json_info; @@ -1396,6 +1428,12 @@ DataTypePtr tryInferNumberFromString(std::string_view field, const FormatSetting return tryInferNumberFromStringImpl(field, settings); } +DataTypePtr tryInferJSONNumberFromString(std::string_view field, const FormatSettings & settings, JSONInferenceInfo * json_info) +{ + return tryInferNumberFromStringImpl(field, settings, json_info); + +} + DataTypePtr tryInferDateOrDateTimeFromString(std::string_view field, const FormatSettings & settings) { if (settings.try_infer_dates && tryInferDate(field)) diff --git a/src/Formats/SchemaInferenceUtils.h b/src/Formats/SchemaInferenceUtils.h index bcf3d194825..06c14c0797a 100644 --- a/src/Formats/SchemaInferenceUtils.h +++ b/src/Formats/SchemaInferenceUtils.h @@ -2,6 +2,7 @@ #include #include +#include #include @@ -18,6 +19,11 @@ struct JSONInferenceInfo /// We store numbers that were parsed from strings. /// It's used in types transformation to change such numbers back to string if needed. std::unordered_set numbers_parsed_from_json_strings; + /// Store integer types that were inferred from negative numbers. + /// It's used to determine common type for Int64 and UInt64 + /// TODO: check it not only in JSON formats. + std::unordered_set negative_integers; + /// Indicates if currently we are inferring type for Map/Object key. bool is_object_key = false; /// When we transform types for the same column from different files @@ -48,6 +54,7 @@ DataTypePtr tryInferDateOrDateTimeFromString(std::string_view field, const Forma /// Try to parse a number value from a string. By default, it tries to parse Float64, /// but if setting try_infer_integers is enabled, it also tries to parse Int64. DataTypePtr tryInferNumberFromString(std::string_view field, const FormatSettings & settings); +DataTypePtr tryInferJSONNumberFromString(std::string_view field, const FormatSettings & settings, JSONInferenceInfo * json_info); /// It takes two types inferred for the same column and tries to transform them to a common type if possible. /// It's also used when we try to infer some not ordinary types from another types. @@ -77,6 +84,7 @@ void transformInferredTypesIfNeeded(DataTypePtr & first, DataTypePtr & second, c /// Example 2: /// We merge DataTypeJSONPaths types to a single DataTypeJSONPaths type with union of all JSON paths. void transformInferredJSONTypesIfNeeded(DataTypePtr & first, DataTypePtr & second, const FormatSettings & settings, JSONInferenceInfo * json_info); +void transformInferredJSONTypesIfNeeded(DataTypes & types, const FormatSettings & settings, JSONInferenceInfo * json_info); /// Make final transform for types inferred in JSON format. It does 3 types of transformation: /// 1) Checks if type is unnamed Tuple(...), tries to transform nested types to find a common type for them and if all nested types @@ -107,4 +115,6 @@ NamesAndTypesList getNamesAndRecursivelyNullableTypes(const Block & header); /// Check if type contains Nothing, like Array(Tuple(Nullable(Nothing), String)) bool checkIfTypeIsComplete(const DataTypePtr & type); +bool checkIfTypesAreEqual(const DataTypes & types); + } diff --git a/src/Functions/FunctionsJSON.cpp b/src/Functions/FunctionsJSON.cpp index fbd987577e9..c6af0674db7 100644 --- a/src/Functions/FunctionsJSON.cpp +++ b/src/Functions/FunctionsJSON.cpp @@ -1,10 +1,1069 @@ -#include +#include +#include + +#include + +#include +#include + +#include +#include + +#include + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include #include +#include +#include +#include +#include + +#include + +#include "config.h" namespace DB { +namespace ErrorCodes +{ +extern const int ILLEGAL_TYPE_OF_ARGUMENT; +extern const int ILLEGAL_COLUMN; +extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; +} + +template +concept HasIndexOperator = requires (T t) +{ + t[0]; +}; + +/// Functions to parse JSONs and extract values from it. +/// The first argument of all these functions gets a JSON, +/// after that there are any number of arguments specifying path to a desired part from the JSON's root. +/// For example, +/// select JSONExtractInt('{"a": "hello", "b": [-100, 200.0, 300]}', 'b', 1) = -100 + +class FunctionJSONHelpers +{ +public: + template typename Impl, class JSONParser> + class Executor + { + public: + static ColumnPtr run(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count, const FormatSettings & format_settings) + { + MutableColumnPtr to{result_type->createColumn()}; + to->reserve(input_rows_count); + + if (arguments.empty()) + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Function {} requires at least one argument", String(Name::name)); + + const auto & first_column = arguments[0]; + if (!isString(first_column.type)) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "The first argument of function {} should be a string containing JSON, illegal type: " + "{}", String(Name::name), first_column.type->getName()); + + const ColumnPtr & arg_json = first_column.column; + const auto * col_json_const = typeid_cast(arg_json.get()); + const auto * col_json_string + = typeid_cast(col_json_const ? col_json_const->getDataColumnPtr().get() : arg_json.get()); + + if (!col_json_string) + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {}", arg_json->getName()); + + const ColumnString::Chars & chars = col_json_string->getChars(); + const ColumnString::Offsets & offsets = col_json_string->getOffsets(); + + size_t num_index_arguments = Impl::getNumberOfIndexArguments(arguments); + std::vector moves = prepareMoves(Name::name, arguments, 1, num_index_arguments); + + /// Preallocate memory in parser if necessary. + JSONParser parser; + if constexpr (has_member_function_reserve::value) + { + size_t max_size = calculateMaxSize(offsets); + if (max_size) + parser.reserve(max_size); + } + + Impl impl; + + /// prepare() does Impl-specific preparation before handling each row. + if constexpr (has_member_function_prepare::*)(const char *, const ColumnsWithTypeAndName &, const DataTypePtr &)>::value) + impl.prepare(Name::name, arguments, result_type); + + using Element = typename JSONParser::Element; + + Element document; + bool document_ok = false; + if (col_json_const) + { + std::string_view json{reinterpret_cast(chars.data()), offsets[0] - 1}; + document_ok = parser.parse(json, document); + } + + String error; + for (const auto i : collections::range(0, input_rows_count)) + { + if (!col_json_const) + { + std::string_view json{reinterpret_cast(&chars[offsets[i - 1]]), offsets[i] - offsets[i - 1] - 1}; + document_ok = parser.parse(json, document); + } + + bool added_to_column = false; + if (document_ok) + { + /// Perform moves. + Element element; + std::string_view last_key; + bool moves_ok = performMoves(arguments, i, document, moves, element, last_key); + + if (moves_ok) + added_to_column = impl.insertResultToColumn(*to, element, last_key, format_settings, error); + } + + /// We add default value (=null or zero) if something goes wrong, we don't throw exceptions in these JSON functions. + if (!added_to_column) + to->insertDefault(); + } + return to; + } + }; + +private: + BOOST_TTI_HAS_MEMBER_FUNCTION(reserve) + BOOST_TTI_HAS_MEMBER_FUNCTION(prepare) + + /// Represents a move of a JSON iterator described by a single argument passed to a JSON function. + /// For example, the call JSONExtractInt('{"a": "hello", "b": [-100, 200.0, 300]}', 'b', 1) + /// contains two moves: {MoveType::ConstKey, "b"} and {MoveType::ConstIndex, 1}. + /// Keys and indices can be nonconst, in this case they are calculated for each row. + enum class MoveType : uint8_t + { + Key, + Index, + ConstKey, + ConstIndex, + }; + + struct Move + { + explicit Move(MoveType type_, size_t index_ = 0) : type(type_), index(index_) {} + Move(MoveType type_, const String & key_) : type(type_), key(key_) {} + MoveType type; + size_t index = 0; + String key; + }; + + static std::vector prepareMoves( + const char * function_name, + const ColumnsWithTypeAndName & columns, + size_t first_index_argument, + size_t num_index_arguments) + { + std::vector moves; + moves.reserve(num_index_arguments); + for (const auto i : collections::range(first_index_argument, first_index_argument + num_index_arguments)) + { + const auto & column = columns[i]; + if (!isString(column.type) && !isNativeInteger(column.type)) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "The argument {} of function {} should be a string specifying key " + "or an integer specifying index, illegal type: {}", + std::to_string(i + 1), String(function_name), column.type->getName()); + + if (column.column && isColumnConst(*column.column)) + { + const auto & column_const = assert_cast(*column.column); + if (isString(column.type)) + moves.emplace_back(MoveType::ConstKey, column_const.getValue()); + else + moves.emplace_back(MoveType::ConstIndex, column_const.getInt(0)); + } + else + { + if (isString(column.type)) + moves.emplace_back(MoveType::Key, ""); + else + moves.emplace_back(MoveType::Index, 0); + } + } + return moves; + } + + + /// Performs moves of types MoveType::Index and MoveType::ConstIndex. + template + static bool performMoves(const ColumnsWithTypeAndName & arguments, size_t row, + const typename JSONParser::Element & document, const std::vector & moves, + typename JSONParser::Element & element, std::string_view & last_key) + { + typename JSONParser::Element res_element = document; + std::string_view key; + + for (size_t j = 0; j != moves.size(); ++j) + { + switch (moves[j].type) + { + case MoveType::ConstIndex: + { + if (!moveToElementByIndex(res_element, static_cast(moves[j].index), key)) + return false; + break; + } + case MoveType::ConstKey: + { + key = moves[j].key; + if (!moveToElementByKey(res_element, key)) + return false; + break; + } + case MoveType::Index: + { + Int64 index = (*arguments[j + 1].column)[row].get(); + if (!moveToElementByIndex(res_element, static_cast(index), key)) + return false; + break; + } + case MoveType::Key: + { + key = arguments[j + 1].column->getDataAt(row).toView(); + if (!moveToElementByKey(res_element, key)) + return false; + break; + } + } + } + + element = res_element; + last_key = key; + return true; + } + + template + static bool moveToElementByIndex(typename JSONParser::Element & element, int index, std::string_view & out_key) + { + if (element.isArray()) + { + auto array = element.getArray(); + if (index >= 0) + --index; + else + index += array.size(); + + if (static_cast(index) >= array.size()) + return false; + element = array[index]; + out_key = {}; + return true; + } + + if constexpr (HasIndexOperator) + { + if (element.isObject()) + { + auto object = element.getObject(); + if (index >= 0) + --index; + else + index += object.size(); + + if (static_cast(index) >= object.size()) + return false; + std::tie(out_key, element) = object[index]; + return true; + } + } + + return {}; + } + + /// Performs moves of types MoveType::Key and MoveType::ConstKey. + template + static bool moveToElementByKey(typename JSONParser::Element & element, std::string_view key) + { + if (!element.isObject()) + return false; + auto object = element.getObject(); + return object.find(key, element); + } + + static size_t calculateMaxSize(const ColumnString::Offsets & offsets) + { + size_t max_size = 0; + for (const auto i : collections::range(0, offsets.size())) + { + size_t size = offsets[i] - offsets[i - 1]; + max_size = std::max(max_size, size); + } + if (max_size) + --max_size; + return max_size; + } + +}; + +template +class JSONExtractImpl; + +template +class JSONExtractKeysAndValuesImpl; + +/** +* Functions JSONExtract and JSONExtractKeysAndValues force the return type - it is specified in the last argument. +* For example - `SELECT JSONExtract(materialize('{"a": 131231, "b": 1234}'), 'b', 'LowCardinality(FixedString(4))')` +* But by default ClickHouse decides on its own whether the return type will be LowCardinality based on the types of +* input arguments. +* And for these specific functions we cannot rely on this mechanism, so these functions have their own implementation - +* just convert all of the LowCardinality input columns to full ones, execute and wrap the resulting column in LowCardinality +* if needed. +*/ +template typename Impl> +constexpr bool functionForcesTheReturnType() +{ + return std::is_same_v, JSONExtractImpl> || std::is_same_v, JSONExtractKeysAndValuesImpl>; +} + +template typename Impl> +class ExecutableFunctionJSON : public IExecutableFunction +{ + +public: + explicit ExecutableFunctionJSON(const NullPresence & null_presence_, bool allow_simdjson_, const DataTypePtr & json_return_type_, const FormatSettings & format_settings_) + : null_presence(null_presence_), allow_simdjson(allow_simdjson_), json_return_type(json_return_type_), format_settings(format_settings_) + { + format_settings.json.escape_forward_slashes = false; + format_settings.null_as_default = false; + } + + String getName() const override { return Name::name; } + bool useDefaultImplementationForNulls() const override { return false; } + bool useDefaultImplementationForConstants() const override { return true; } + bool useDefaultImplementationForLowCardinalityColumns() const override + { + return !functionForcesTheReturnType(); + } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override + { + if (null_presence.has_null_constant) + return result_type->createColumnConstWithDefaultValue(input_rows_count); + + if constexpr (functionForcesTheReturnType()) + { + ColumnsWithTypeAndName columns_without_low_cardinality = arguments; + + for (auto & column : columns_without_low_cardinality) + { + column.column = recursiveRemoveLowCardinality(column.column); + column.type = recursiveRemoveLowCardinality(column.type); + } + + ColumnsWithTypeAndName temporary_columns = null_presence.has_nullable ? createBlockWithNestedColumns(columns_without_low_cardinality) : columns_without_low_cardinality; + ColumnPtr temporary_result = chooseAndRunJSONParser(temporary_columns, json_return_type, input_rows_count); + + if (null_presence.has_nullable) + temporary_result = wrapInNullable(temporary_result, columns_without_low_cardinality, result_type, input_rows_count); + + if (result_type->lowCardinality()) + temporary_result = recursiveLowCardinalityTypeConversion(temporary_result, json_return_type, result_type); + + return temporary_result; + } + else + { + ColumnsWithTypeAndName temporary_columns = null_presence.has_nullable ? createBlockWithNestedColumns(arguments) : arguments; + ColumnPtr temporary_result = chooseAndRunJSONParser(temporary_columns, json_return_type, input_rows_count); + + if (null_presence.has_nullable) + temporary_result = wrapInNullable(temporary_result, arguments, result_type, input_rows_count); + + if (result_type->lowCardinality()) + temporary_result = recursiveLowCardinalityTypeConversion(temporary_result, json_return_type, result_type); + + return temporary_result; + } + } + +private: + + ColumnPtr + chooseAndRunJSONParser(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const + { +#if USE_SIMDJSON + if (allow_simdjson) + return FunctionJSONHelpers::Executor::run(arguments, result_type, input_rows_count, format_settings); +#endif + +#if USE_RAPIDJSON + return FunctionJSONHelpers::Executor::run(arguments, result_type, input_rows_count, format_settings); +#else + return FunctionJSONHelpers::Executor::run(arguments, result_type, input_rows_count, format_settings); +#endif + } + + NullPresence null_presence; + bool allow_simdjson; + DataTypePtr json_return_type; + FormatSettings format_settings; +}; + + +template typename Impl> +class FunctionBaseFunctionJSON : public IFunctionBase +{ +public: + explicit FunctionBaseFunctionJSON( + const NullPresence & null_presence_, + bool allow_simdjson_, + DataTypes argument_types_, + DataTypePtr return_type_, + DataTypePtr json_return_type_, + const FormatSettings & format_settings_) + : null_presence(null_presence_) + , allow_simdjson(allow_simdjson_) + , argument_types(std::move(argument_types_)) + , return_type(std::move(return_type_)) + , json_return_type(std::move(json_return_type_)) + , format_settings(format_settings_) + { + } + + String getName() const override { return Name::name; } + + const DataTypes & getArgumentTypes() const override + { + return argument_types; + } + + const DataTypePtr & getResultType() const override + { + return return_type; + } + + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } + + ExecutableFunctionPtr prepare(const ColumnsWithTypeAndName &) const override + { + return std::make_unique>(null_presence, allow_simdjson, json_return_type, format_settings); + } + +private: + NullPresence null_presence; + bool allow_simdjson; + DataTypes argument_types; + DataTypePtr return_type; + DataTypePtr json_return_type; + FormatSettings format_settings; +}; + +/// We use IFunctionOverloadResolver instead of IFunction to handle non-default NULL processing. +/// Both NULL and JSON NULL should generate NULL value. If any argument is NULL, return NULL. +template typename Impl> +class JSONOverloadResolver : public IFunctionOverloadResolver, WithContext +{ +public: + static constexpr auto name = Name::name; + + String getName() const override { return name; } + + static FunctionOverloadResolverPtr create(ContextPtr context_) + { + return std::make_unique(context_); + } + + explicit JSONOverloadResolver(ContextPtr context_) : WithContext(context_) {} + + bool isVariadic() const override { return true; } + size_t getNumberOfArguments() const override { return 0; } + bool useDefaultImplementationForNulls() const override { return false; } + bool useDefaultImplementationForLowCardinalityColumns() const override + { + return !functionForcesTheReturnType(); + } + + FunctionBasePtr build(const ColumnsWithTypeAndName & arguments) const override + { + bool has_nothing_argument = false; + for (const auto & arg : arguments) + has_nothing_argument |= isNothing(arg.type); + + DataTypePtr json_return_type = Impl::getReturnType(Name::name, createBlockWithNestedColumns(arguments)); + NullPresence null_presence = getNullPresense(arguments); + DataTypePtr return_type; + if (has_nothing_argument) + return_type = std::make_shared(); + else if (null_presence.has_null_constant) + return_type = makeNullable(std::make_shared()); + else if (null_presence.has_nullable) + return_type = makeNullable(json_return_type); + else + return_type = json_return_type; + + DataTypes argument_types; + argument_types.reserve(arguments.size()); + for (const auto & argument : arguments) + argument_types.emplace_back(argument.type); + return std::make_unique>( + null_presence, getContext()->getSettingsRef().allow_simdjson, argument_types, return_type, json_return_type, getFormatSettings(getContext())); + } +}; + +struct NameJSONHas { static constexpr auto name{"JSONHas"}; }; +struct NameIsValidJSON { static constexpr auto name{"isValidJSON"}; }; +struct NameJSONLength { static constexpr auto name{"JSONLength"}; }; +struct NameJSONKey { static constexpr auto name{"JSONKey"}; }; +struct NameJSONType { static constexpr auto name{"JSONType"}; }; +struct NameJSONExtractInt { static constexpr auto name{"JSONExtractInt"}; }; +struct NameJSONExtractUInt { static constexpr auto name{"JSONExtractUInt"}; }; +struct NameJSONExtractFloat { static constexpr auto name{"JSONExtractFloat"}; }; +struct NameJSONExtractBool { static constexpr auto name{"JSONExtractBool"}; }; +struct NameJSONExtractString { static constexpr auto name{"JSONExtractString"}; }; +struct NameJSONExtract { static constexpr auto name{"JSONExtract"}; }; +struct NameJSONExtractKeysAndValues { static constexpr auto name{"JSONExtractKeysAndValues"}; }; +struct NameJSONExtractRaw { static constexpr auto name{"JSONExtractRaw"}; }; +struct NameJSONExtractArrayRaw { static constexpr auto name{"JSONExtractArrayRaw"}; }; +struct NameJSONExtractKeysAndValuesRaw { static constexpr auto name{"JSONExtractKeysAndValuesRaw"}; }; +struct NameJSONExtractKeys { static constexpr auto name{"JSONExtractKeys"}; }; + + +template +class JSONHasImpl +{ +public: + using Element = typename JSONParser::Element; + + static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &) { return std::make_shared(); } + + static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; } + + static bool insertResultToColumn(IColumn & dest, const Element &, std::string_view, const FormatSettings &, String &) + { + ColumnVector & col_vec = assert_cast &>(dest); + col_vec.insertValue(1); + return true; + } +}; + + +template +class IsValidJSONImpl +{ +public: + using Element = typename JSONParser::Element; + + static DataTypePtr getReturnType(const char * function_name, const ColumnsWithTypeAndName & arguments) + { + if (arguments.size() != 1) + { + /// IsValidJSON() shouldn't get parameters other than JSON. + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Function {} needs exactly one argument", + String(function_name)); + } + return std::make_shared(); + } + + static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName &) { return 0; } + + static bool insertResultToColumn(IColumn & dest, const Element &, std::string_view, const FormatSettings &, String &) + { + /// This function is called only if JSON is valid. + /// If JSON isn't valid then `FunctionJSON::Executor::run()` adds default value (=zero) to `dest` without calling this function. + ColumnVector & col_vec = assert_cast &>(dest); + col_vec.insertValue(1); + return true; + } +}; + + +template +class JSONLengthImpl +{ +public: + using Element = typename JSONParser::Element; + + static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &) + { + return std::make_shared(); + } + + static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; } + + static bool insertResultToColumn(IColumn & dest, const Element & element, std::string_view, const FormatSettings &, String &) + { + size_t size; + if (element.isArray()) + size = element.getArray().size(); + else if (element.isObject()) + size = element.getObject().size(); + else + return false; + + ColumnVector & col_vec = assert_cast &>(dest); + col_vec.insertValue(size); + return true; + } +}; + + +template +class JSONKeyImpl +{ +public: + using Element = typename JSONParser::Element; + + static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &) + { + return std::make_shared(); + } + + static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; } + + static bool insertResultToColumn(IColumn & dest, const Element &, std::string_view last_key, const FormatSettings &, String &) + { + if (last_key.empty()) + return false; + ColumnString & col_str = assert_cast(dest); + col_str.insertData(last_key.data(), last_key.size()); + return true; + } +}; + + +template +class JSONTypeImpl +{ +public: + using Element = typename JSONParser::Element; + + static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &) + { + static const std::vector> values = { + {"Array", '['}, + {"Object", '{'}, + {"String", '"'}, + {"Int64", 'i'}, + {"UInt64", 'u'}, + {"Double", 'd'}, + {"Bool", 'b'}, + {"Null", 0}, /// the default value for the column. + }; + return std::make_shared>(values); + } + + static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; } + + static bool insertResultToColumn(IColumn & dest, const Element & element, std::string_view, const FormatSettings &, String &) + { + UInt8 type; + switch (element.type()) + { + case ElementType::INT64: + type = 'i'; + break; + case ElementType::UINT64: + type = 'u'; + break; + case ElementType::DOUBLE: + type = 'd'; + break; + case ElementType::STRING: + type = '"'; + break; + case ElementType::ARRAY: + type = '['; + break; + case ElementType::OBJECT: + type = '{'; + break; + case ElementType::BOOL: + type = 'b'; + break; + case ElementType::NULL_VALUE: + type = 0; + break; + } + + ColumnVector & col_vec = assert_cast &>(dest); + col_vec.insertValue(type); + return true; + } +}; + + +template +class JSONExtractNumericImpl +{ +public: + using Element = typename JSONParser::Element; + + static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &) + { + return std::make_shared>(); + } + + static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; } + + static const std::unique_ptr> & getInsertNode() + { + static const std::unique_ptr> node = buildJSONExtractTree(std::make_shared>()); + } + + static bool insertResultToColumn(IColumn & dest, const Element & element, std::string_view, const FormatSettings &, String & error) + { + NumberType value; + + tryGetNumericValueFromJSONElement(value, element, convert_bool_to_integer, error); + + if (dest.getDataType() == TypeIndex::LowCardinality) + { + ColumnLowCardinality & col_low = assert_cast(dest); + col_low.insertData(reinterpret_cast(&value), sizeof(value)); + } + else + { + auto & col_vec = assert_cast &>(dest); + col_vec.insertValue(value); + } + return true; + } +}; + + +template +using JSONExtractInt64Impl = JSONExtractNumericImpl; +template +using JSONExtractUInt64Impl = JSONExtractNumericImpl; +template +using JSONExtractFloat64Impl = JSONExtractNumericImpl; + + +template +class JSONExtractBoolImpl +{ +public: + using Element = typename JSONParser::Element; + + static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &) + { + return std::make_shared(); + } + + static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; } + + static bool insertResultToColumn(IColumn & dest, const Element & element, std::string_view, const FormatSettings &, String &) + { + bool value; + switch (element.type()) + { + case ElementType::BOOL: + value = element.getBool(); + break; + case ElementType::INT64: + value = element.getInt64() != 0; + break; + case ElementType::UINT64: + value = element.getUInt64() != 0; + break; + default: + return false; + } + + auto & col_vec = assert_cast &>(dest); + col_vec.insertValue(static_cast(value)); + return true; + } +}; + +template +class JSONExtractRawImpl; + +template +class JSONExtractStringImpl +{ +public: + using Element = typename JSONParser::Element; + + static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &) + { + return std::make_shared(); + } + + static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; } + + static bool insertResultToColumn(IColumn & dest, const Element & element, std::string_view, const FormatSettings & format_settings, String & error) + { + if (element.isNull()) + return false; + + if (!element.isString()) + return JSONExtractRawImpl::insertResultToColumn(dest, element, {}, format_settings, error); + + auto str = element.getString(); + ColumnString & col_str = assert_cast(dest); + col_str.insertData(str.data(), str.size()); + return true; + } +}; + +template +class JSONExtractImpl +{ +public: + using Element = typename JSONParser::Element; + + static DataTypePtr getReturnType(const char * function_name, const ColumnsWithTypeAndName & arguments) + { + if (arguments.size() < 2) + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Function {} requires at least two arguments", String(function_name)); + + const auto & col = arguments.back(); + const auto * col_type_const = typeid_cast(col.column.get()); + if (!col_type_const || !isString(col.type)) + throw Exception(ErrorCodes::ILLEGAL_COLUMN, + "The last argument of function {} should " + "be a constant string specifying the return data type, illegal value: {}", + String(function_name), col.name); + + return DataTypeFactory::instance().get(col_type_const->getValue()); + } + + static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 2; } + + void prepare(const char * function_name, const ColumnsWithTypeAndName &, const DataTypePtr & result_type) + { + extract_tree = buildJSONExtractTree(result_type, function_name); + insert_settings.insert_default_on_invalid_elements_in_complex_types = true; + } + + bool insertResultToColumn(IColumn & dest, const Element & element, std::string_view, const FormatSettings & format_settings, String & error) + { + return extract_tree->insertResultToColumn(dest, element, insert_settings, format_settings, error); + } + +protected: + std::unique_ptr> extract_tree; + JSONExtractInsertSettings insert_settings; +}; + + +template +class JSONExtractKeysAndValuesImpl +{ +public: + using Element = typename JSONParser::Element; + + static DataTypePtr getReturnType(const char * function_name, const ColumnsWithTypeAndName & arguments) + { + if (arguments.size() < 2) + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Function {} requires at least two arguments", String(function_name)); + + const auto & col = arguments.back(); + const auto * col_type_const = typeid_cast(col.column.get()); + if (!col_type_const || !isString(col.type)) + throw Exception(ErrorCodes::ILLEGAL_COLUMN, + "The last argument of function {} should " + "be a constant string specifying the values' data type, illegal value: {}", + String(function_name), col.name); + + DataTypePtr key_type = std::make_unique(); + DataTypePtr value_type = DataTypeFactory::instance().get(col_type_const->getValue()); + DataTypePtr tuple_type = std::make_unique(DataTypes{key_type, value_type}); + return std::make_unique(tuple_type); + } + + static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 2; } + + void prepare(const char * function_name, const ColumnsWithTypeAndName &, const DataTypePtr & result_type) + { + const auto tuple_type = typeid_cast(result_type.get())->getNestedType(); + const auto value_type = typeid_cast(tuple_type.get())->getElements()[1]; + extract_tree = buildJSONExtractTree(value_type, function_name); + insert_settings.insert_default_on_invalid_elements_in_complex_types = true; + } + + bool insertResultToColumn(IColumn & dest, const Element & element, std::string_view, const FormatSettings & format_settings, String & error) + { + if (!element.isObject()) + return false; + + auto object = element.getObject(); + + auto & col_arr = assert_cast(dest); + auto & col_tuple = assert_cast(col_arr.getData()); + size_t old_size = col_tuple.size(); + auto & col_key = assert_cast(col_tuple.getColumn(0)); + auto & col_value = col_tuple.getColumn(1); + + for (const auto & [key, value] : object) + { + if (extract_tree->insertResultToColumn(col_value, value, insert_settings, format_settings, error)) + col_key.insertData(key.data(), key.size()); + } + + if (col_tuple.size() == old_size) + return false; + + col_arr.getOffsets().push_back(col_tuple.size()); + return true; + } + +private: + std::unique_ptr> extract_tree; + JSONExtractInsertSettings insert_settings; +}; + + +template +class JSONExtractRawImpl +{ +public: + using Element = typename JSONParser::Element; + + static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &) + { + return std::make_shared(); + } + + static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; } + + static bool insertResultToColumn(IColumn & dest, const Element & element, std::string_view, const FormatSettings & format_settings, String &) + { + ColumnString & col_str = assert_cast(dest); + auto & chars = col_str.getChars(); + WriteBufferFromVector buf(chars, AppendModeTag()); + jsonElementToString(element, buf, format_settings); + buf.finalize(); + chars.push_back(0); + col_str.getOffsets().push_back(chars.size()); + return true; + } +}; + + +template +class JSONExtractArrayRawImpl +{ +public: + using Element = typename JSONParser::Element; + + static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &) + { + return std::make_shared(std::make_shared()); + } + + static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; } + + static bool insertResultToColumn(IColumn & dest, const Element & element, std::string_view, const FormatSettings & format_settings, String & error) + { + if (!element.isArray()) + return false; + + auto array = element.getArray(); + ColumnArray & col_res = assert_cast(dest); + + for (auto value : array) + JSONExtractRawImpl::insertResultToColumn(col_res.getData(), value, {}, format_settings, error); + + col_res.getOffsets().push_back(col_res.getOffsets().back() + array.size()); + return true; + } +}; + + +template +class JSONExtractKeysAndValuesRawImpl +{ +public: + using Element = typename JSONParser::Element; + + static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &) + { + DataTypePtr string_type = std::make_unique(); + DataTypePtr tuple_type = std::make_unique(DataTypes{string_type, string_type}); + return std::make_unique(tuple_type); + } + + static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; } + + bool insertResultToColumn(IColumn & dest, const Element & element, std::string_view, const FormatSettings & format_settings, String & error) + { + if (!element.isObject()) + return false; + + auto object = element.getObject(); + + auto & col_arr = assert_cast(dest); + auto & col_tuple = assert_cast(col_arr.getData()); + auto & col_key = assert_cast(col_tuple.getColumn(0)); + auto & col_value = assert_cast(col_tuple.getColumn(1)); + + for (const auto & [key, value] : object) + { + col_key.insertData(key.data(), key.size()); + JSONExtractRawImpl::insertResultToColumn(col_value, value, {}, format_settings, error); + } + + col_arr.getOffsets().push_back(col_arr.getOffsets().back() + object.size()); + return true; + } +}; + +template +class JSONExtractKeysImpl +{ +public: + using Element = typename JSONParser::Element; + + static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &) + { + return std::make_unique(std::make_shared()); + } + + static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; } + + bool insertResultToColumn(IColumn & dest, const Element & element, std::string_view, const FormatSettings &, String &) + { + if (!element.isObject()) + return false; + + auto object = element.getObject(); + + ColumnArray & col_res = assert_cast(dest); + auto & col_key = assert_cast(col_res.getData()); + + for (const auto & [key, value] : object) + { + col_key.insertData(key.data(), key.size()); + } + + col_res.getOffsets().push_back(col_res.getOffsets().back() + object.size()); + return true; + } +}; + REGISTER_FUNCTION(JSON) { factory.registerFunction>(); diff --git a/src/Functions/FunctionsJSON.h b/src/Functions/FunctionsJSON.h index 8a2ad457d34..5d44e22300d 100644 --- a/src/Functions/FunctionsJSON.h +++ b/src/Functions/FunctionsJSON.h @@ -1,1781 +1,1273 @@ -#pragma once - -#include -#include - -#include - -#include -#include -#include - -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include - -#include -#include - - -#include "config.h" - - -namespace DB -{ - -namespace ErrorCodes -{ - extern const int ILLEGAL_TYPE_OF_ARGUMENT; - extern const int ILLEGAL_COLUMN; - extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; -} - -template -concept HasIndexOperator = requires (T t) -{ - t[0]; -}; - -/// Functions to parse JSONs and extract values from it. -/// The first argument of all these functions gets a JSON, -/// after that there are any number of arguments specifying path to a desired part from the JSON's root. -/// For example, -/// select JSONExtractInt('{"a": "hello", "b": [-100, 200.0, 300]}', 'b', 1) = -100 - -class FunctionJSONHelpers -{ -public: - template typename Impl, class JSONParser> - class Executor - { - public: - static ColumnPtr run(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) - { - MutableColumnPtr to{result_type->createColumn()}; - to->reserve(input_rows_count); - - if (arguments.empty()) - throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Function {} requires at least one argument", String(Name::name)); - - const auto & first_column = arguments[0]; - if (!isString(first_column.type)) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, - "The first argument of function {} should be a string containing JSON, illegal type: " - "{}", String(Name::name), first_column.type->getName()); - - const ColumnPtr & arg_json = first_column.column; - const auto * col_json_const = typeid_cast(arg_json.get()); - const auto * col_json_string - = typeid_cast(col_json_const ? col_json_const->getDataColumnPtr().get() : arg_json.get()); - - if (!col_json_string) - throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {}", arg_json->getName()); - - const ColumnString::Chars & chars = col_json_string->getChars(); - const ColumnString::Offsets & offsets = col_json_string->getOffsets(); - - size_t num_index_arguments = Impl::getNumberOfIndexArguments(arguments); - std::vector moves = prepareMoves(Name::name, arguments, 1, num_index_arguments); - - /// Preallocate memory in parser if necessary. - JSONParser parser; - if constexpr (has_member_function_reserve::value) - { - size_t max_size = calculateMaxSize(offsets); - if (max_size) - parser.reserve(max_size); - } - - Impl impl; - - /// prepare() does Impl-specific preparation before handling each row. - if constexpr (has_member_function_prepare::*)(const char *, const ColumnsWithTypeAndName &, const DataTypePtr &)>::value) - impl.prepare(Name::name, arguments, result_type); - - using Element = typename JSONParser::Element; - - Element document; - bool document_ok = false; - if (col_json_const) - { - std::string_view json{reinterpret_cast(chars.data()), offsets[0] - 1}; - document_ok = parser.parse(json, document); - } - - for (const auto i : collections::range(0, input_rows_count)) - { - if (!col_json_const) - { - std::string_view json{reinterpret_cast(&chars[offsets[i - 1]]), offsets[i] - offsets[i - 1] - 1}; - document_ok = parser.parse(json, document); - } - - bool added_to_column = false; - if (document_ok) - { - /// Perform moves. - Element element; - std::string_view last_key; - bool moves_ok = performMoves(arguments, i, document, moves, element, last_key); - - if (moves_ok) - added_to_column = impl.insertResultToColumn(*to, element, last_key); - } - - /// We add default value (=null or zero) if something goes wrong, we don't throw exceptions in these JSON functions. - if (!added_to_column) - to->insertDefault(); - } - return to; - } - }; - -private: - BOOST_TTI_HAS_MEMBER_FUNCTION(reserve) - BOOST_TTI_HAS_MEMBER_FUNCTION(prepare) - - /// Represents a move of a JSON iterator described by a single argument passed to a JSON function. - /// For example, the call JSONExtractInt('{"a": "hello", "b": [-100, 200.0, 300]}', 'b', 1) - /// contains two moves: {MoveType::ConstKey, "b"} and {MoveType::ConstIndex, 1}. - /// Keys and indices can be nonconst, in this case they are calculated for each row. - enum class MoveType : uint8_t - { - Key, - Index, - ConstKey, - ConstIndex, - }; - - struct Move - { - explicit Move(MoveType type_, size_t index_ = 0) : type(type_), index(index_) {} - Move(MoveType type_, const String & key_) : type(type_), key(key_) {} - MoveType type; - size_t index = 0; - String key; - }; - - static std::vector prepareMoves( - const char * function_name, - const ColumnsWithTypeAndName & columns, - size_t first_index_argument, - size_t num_index_arguments) - { - std::vector moves; - moves.reserve(num_index_arguments); - for (const auto i : collections::range(first_index_argument, first_index_argument + num_index_arguments)) - { - const auto & column = columns[i]; - if (!isString(column.type) && !isNativeInteger(column.type)) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, - "The argument {} of function {} should be a string specifying key " - "or an integer specifying index, illegal type: {}", - std::to_string(i + 1), String(function_name), column.type->getName()); - - if (column.column && isColumnConst(*column.column)) - { - const auto & column_const = assert_cast(*column.column); - if (isString(column.type)) - moves.emplace_back(MoveType::ConstKey, column_const.getValue()); - else - moves.emplace_back(MoveType::ConstIndex, column_const.getInt(0)); - } - else - { - if (isString(column.type)) - moves.emplace_back(MoveType::Key, ""); - else - moves.emplace_back(MoveType::Index, 0); - } - } - return moves; - } - - - /// Performs moves of types MoveType::Index and MoveType::ConstIndex. - template - static bool performMoves(const ColumnsWithTypeAndName & arguments, size_t row, - const typename JSONParser::Element & document, const std::vector & moves, - typename JSONParser::Element & element, std::string_view & last_key) - { - typename JSONParser::Element res_element = document; - std::string_view key; - - for (size_t j = 0; j != moves.size(); ++j) - { - switch (moves[j].type) - { - case MoveType::ConstIndex: - { - if (!moveToElementByIndex(res_element, static_cast(moves[j].index), key)) - return false; - break; - } - case MoveType::ConstKey: - { - key = moves[j].key; - if (!moveToElementByKey(res_element, key)) - return false; - break; - } - case MoveType::Index: - { - Int64 index = (*arguments[j + 1].column)[row].get(); - if (!moveToElementByIndex(res_element, static_cast(index), key)) - return false; - break; - } - case MoveType::Key: - { - key = arguments[j + 1].column->getDataAt(row).toView(); - if (!moveToElementByKey(res_element, key)) - return false; - break; - } - } - } - - element = res_element; - last_key = key; - return true; - } - - template - static bool moveToElementByIndex(typename JSONParser::Element & element, int index, std::string_view & out_key) - { - if (element.isArray()) - { - auto array = element.getArray(); - if (index >= 0) - --index; - else - index += array.size(); - - if (static_cast(index) >= array.size()) - return false; - element = array[index]; - out_key = {}; - return true; - } - - if constexpr (HasIndexOperator) - { - if (element.isObject()) - { - auto object = element.getObject(); - if (index >= 0) - --index; - else - index += object.size(); - - if (static_cast(index) >= object.size()) - return false; - std::tie(out_key, element) = object[index]; - return true; - } - } - - return {}; - } - - /// Performs moves of types MoveType::Key and MoveType::ConstKey. - template - static bool moveToElementByKey(typename JSONParser::Element & element, std::string_view key) - { - if (!element.isObject()) - return false; - auto object = element.getObject(); - return object.find(key, element); - } - - static size_t calculateMaxSize(const ColumnString::Offsets & offsets) - { - size_t max_size = 0; - for (const auto i : collections::range(0, offsets.size())) - { - size_t size = offsets[i] - offsets[i - 1]; - max_size = std::max(max_size, size); - } - if (max_size) - --max_size; - return max_size; - } - -}; - -template -class JSONExtractImpl; - -template -class JSONExtractKeysAndValuesImpl; - -/** -* Functions JSONExtract and JSONExtractKeysAndValues force the return type - it is specified in the last argument. -* For example - `SELECT JSONExtract(materialize('{"a": 131231, "b": 1234}'), 'b', 'LowCardinality(FixedString(4))')` -* But by default ClickHouse decides on its own whether the return type will be LowCardinality based on the types of -* input arguments. -* And for these specific functions we cannot rely on this mechanism, so these functions have their own implementation - -* just convert all of the LowCardinality input columns to full ones, execute and wrap the resulting column in LowCardinality -* if needed. -*/ -template typename Impl> -constexpr bool functionForcesTheReturnType() -{ - return std::is_same_v, JSONExtractImpl> || std::is_same_v, JSONExtractKeysAndValuesImpl>; -} - -template typename Impl> -class ExecutableFunctionJSON : public IExecutableFunction -{ - -public: - explicit ExecutableFunctionJSON(const NullPresence & null_presence_, bool allow_simdjson_, const DataTypePtr & json_return_type_) - : null_presence(null_presence_), allow_simdjson(allow_simdjson_), json_return_type(json_return_type_) - { - } - - String getName() const override { return Name::name; } - bool useDefaultImplementationForNulls() const override { return false; } - bool useDefaultImplementationForConstants() const override { return true; } - bool useDefaultImplementationForLowCardinalityColumns() const override - { - return !functionForcesTheReturnType(); - } - - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override - { - if (null_presence.has_null_constant) - return result_type->createColumnConstWithDefaultValue(input_rows_count); - - if constexpr (functionForcesTheReturnType()) - { - ColumnsWithTypeAndName columns_without_low_cardinality = arguments; - - for (auto & column : columns_without_low_cardinality) - { - column.column = recursiveRemoveLowCardinality(column.column); - column.type = recursiveRemoveLowCardinality(column.type); - } - - ColumnsWithTypeAndName temporary_columns = null_presence.has_nullable ? createBlockWithNestedColumns(columns_without_low_cardinality) : columns_without_low_cardinality; - ColumnPtr temporary_result = chooseAndRunJSONParser(temporary_columns, json_return_type, input_rows_count); - - if (null_presence.has_nullable) - temporary_result = wrapInNullable(temporary_result, columns_without_low_cardinality, result_type, input_rows_count); - - if (result_type->lowCardinality()) - temporary_result = recursiveLowCardinalityTypeConversion(temporary_result, json_return_type, result_type); - - return temporary_result; - } - else - { - ColumnsWithTypeAndName temporary_columns = null_presence.has_nullable ? createBlockWithNestedColumns(arguments) : arguments; - ColumnPtr temporary_result = chooseAndRunJSONParser(temporary_columns, json_return_type, input_rows_count); - - if (null_presence.has_nullable) - temporary_result = wrapInNullable(temporary_result, arguments, result_type, input_rows_count); - - if (result_type->lowCardinality()) - temporary_result = recursiveLowCardinalityTypeConversion(temporary_result, json_return_type, result_type); - - return temporary_result; - } - } - -private: - - ColumnPtr - chooseAndRunJSONParser(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const - { -#if USE_SIMDJSON - if (allow_simdjson) - return FunctionJSONHelpers::Executor::run(arguments, result_type, input_rows_count); -#endif - -#if USE_RAPIDJSON - return FunctionJSONHelpers::Executor::run(arguments, result_type, input_rows_count); -#else - return FunctionJSONHelpers::Executor::run(arguments, result_type, input_rows_count); -#endif - } - - NullPresence null_presence; - bool allow_simdjson; - DataTypePtr json_return_type; -}; - - -template typename Impl> -class FunctionBaseFunctionJSON : public IFunctionBase -{ -public: - explicit FunctionBaseFunctionJSON( - const NullPresence & null_presence_, - bool allow_simdjson_, - DataTypes argument_types_, - DataTypePtr return_type_, - DataTypePtr json_return_type_) - : null_presence(null_presence_) - , allow_simdjson(allow_simdjson_) - , argument_types(std::move(argument_types_)) - , return_type(std::move(return_type_)) - , json_return_type(std::move(json_return_type_)) - { - } - - String getName() const override { return Name::name; } - - const DataTypes & getArgumentTypes() const override - { - return argument_types; - } - - const DataTypePtr & getResultType() const override - { - return return_type; - } - - bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } - - ExecutableFunctionPtr prepare(const ColumnsWithTypeAndName &) const override - { - return std::make_unique>(null_presence, allow_simdjson, json_return_type); - } - -private: - NullPresence null_presence; - bool allow_simdjson; - DataTypes argument_types; - DataTypePtr return_type; - DataTypePtr json_return_type; -}; - -/// We use IFunctionOverloadResolver instead of IFunction to handle non-default NULL processing. -/// Both NULL and JSON NULL should generate NULL value. If any argument is NULL, return NULL. -template typename Impl> -class JSONOverloadResolver : public IFunctionOverloadResolver, WithContext -{ -public: - static constexpr auto name = Name::name; - - String getName() const override { return name; } - - static FunctionOverloadResolverPtr create(ContextPtr context_) - { - return std::make_unique(context_); - } - - explicit JSONOverloadResolver(ContextPtr context_) : WithContext(context_) {} - - bool isVariadic() const override { return true; } - size_t getNumberOfArguments() const override { return 0; } - bool useDefaultImplementationForNulls() const override { return false; } - bool useDefaultImplementationForLowCardinalityColumns() const override - { - return !functionForcesTheReturnType(); - } - - FunctionBasePtr build(const ColumnsWithTypeAndName & arguments) const override - { - bool has_nothing_argument = false; - for (const auto & arg : arguments) - has_nothing_argument |= isNothing(arg.type); - - DataTypePtr json_return_type = Impl::getReturnType(Name::name, createBlockWithNestedColumns(arguments)); - NullPresence null_presence = getNullPresense(arguments); - DataTypePtr return_type; - if (has_nothing_argument) - return_type = std::make_shared(); - else if (null_presence.has_null_constant) - return_type = makeNullable(std::make_shared()); - else if (null_presence.has_nullable) - return_type = makeNullable(json_return_type); - else - return_type = json_return_type; - - /// Top-level LowCardinality columns are processed outside JSON parser. - json_return_type = removeLowCardinality(json_return_type); - - DataTypes argument_types; - argument_types.reserve(arguments.size()); - for (const auto & argument : arguments) - argument_types.emplace_back(argument.type); - return std::make_unique>( - null_presence, getContext()->getSettingsRef().allow_simdjson, argument_types, return_type, json_return_type); - } -}; - -struct NameJSONHas { static constexpr auto name{"JSONHas"}; }; -struct NameIsValidJSON { static constexpr auto name{"isValidJSON"}; }; -struct NameJSONLength { static constexpr auto name{"JSONLength"}; }; -struct NameJSONKey { static constexpr auto name{"JSONKey"}; }; -struct NameJSONType { static constexpr auto name{"JSONType"}; }; -struct NameJSONExtractInt { static constexpr auto name{"JSONExtractInt"}; }; -struct NameJSONExtractUInt { static constexpr auto name{"JSONExtractUInt"}; }; -struct NameJSONExtractFloat { static constexpr auto name{"JSONExtractFloat"}; }; -struct NameJSONExtractBool { static constexpr auto name{"JSONExtractBool"}; }; -struct NameJSONExtractString { static constexpr auto name{"JSONExtractString"}; }; -struct NameJSONExtract { static constexpr auto name{"JSONExtract"}; }; -struct NameJSONExtractKeysAndValues { static constexpr auto name{"JSONExtractKeysAndValues"}; }; -struct NameJSONExtractRaw { static constexpr auto name{"JSONExtractRaw"}; }; -struct NameJSONExtractArrayRaw { static constexpr auto name{"JSONExtractArrayRaw"}; }; -struct NameJSONExtractKeysAndValuesRaw { static constexpr auto name{"JSONExtractKeysAndValuesRaw"}; }; -struct NameJSONExtractKeys { static constexpr auto name{"JSONExtractKeys"}; }; - - -template -class JSONHasImpl -{ -public: - using Element = typename JSONParser::Element; - - static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &) { return std::make_shared(); } - - static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; } - - static bool insertResultToColumn(IColumn & dest, const Element &, std::string_view) - { - ColumnVector & col_vec = assert_cast &>(dest); - col_vec.insertValue(1); - return true; - } -}; - - -template -class IsValidJSONImpl -{ -public: - using Element = typename JSONParser::Element; - - static DataTypePtr getReturnType(const char * function_name, const ColumnsWithTypeAndName & arguments) - { - if (arguments.size() != 1) - { - /// IsValidJSON() shouldn't get parameters other than JSON. - throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Function {} needs exactly one argument", - String(function_name)); - } - return std::make_shared(); - } - - static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName &) { return 0; } - - static bool insertResultToColumn(IColumn & dest, const Element &, std::string_view) - { - /// This function is called only if JSON is valid. - /// If JSON isn't valid then `FunctionJSON::Executor::run()` adds default value (=zero) to `dest` without calling this function. - ColumnVector & col_vec = assert_cast &>(dest); - col_vec.insertValue(1); - return true; - } -}; - - -template -class JSONLengthImpl -{ -public: - using Element = typename JSONParser::Element; - - static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &) - { - return std::make_shared(); - } - - static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; } - - static bool insertResultToColumn(IColumn & dest, const Element & element, std::string_view) - { - size_t size; - if (element.isArray()) - size = element.getArray().size(); - else if (element.isObject()) - size = element.getObject().size(); - else - return false; - - ColumnVector & col_vec = assert_cast &>(dest); - col_vec.insertValue(size); - return true; - } -}; - - -template -class JSONKeyImpl -{ -public: - using Element = typename JSONParser::Element; - - static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &) - { - return std::make_shared(); - } - - static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; } - - static bool insertResultToColumn(IColumn & dest, const Element &, std::string_view last_key) - { - if (last_key.empty()) - return false; - ColumnString & col_str = assert_cast(dest); - col_str.insertData(last_key.data(), last_key.size()); - return true; - } -}; - - -template -class JSONTypeImpl -{ -public: - using Element = typename JSONParser::Element; - - static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &) - { - static const std::vector> values = { - {"Array", '['}, - {"Object", '{'}, - {"String", '"'}, - {"Int64", 'i'}, - {"UInt64", 'u'}, - {"Double", 'd'}, - {"Bool", 'b'}, - {"Null", 0}, /// the default value for the column. - }; - return std::make_shared>(values); - } - - static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; } - - static bool insertResultToColumn(IColumn & dest, const Element & element, std::string_view) - { - UInt8 type; - switch (element.type()) - { - case ElementType::INT64: - type = 'i'; - break; - case ElementType::UINT64: - type = 'u'; - break; - case ElementType::DOUBLE: - type = 'd'; - break; - case ElementType::STRING: - type = '"'; - break; - case ElementType::ARRAY: - type = '['; - break; - case ElementType::OBJECT: - type = '{'; - break; - case ElementType::BOOL: - type = 'b'; - break; - case ElementType::NULL_VALUE: - type = 0; - break; - } - - ColumnVector & col_vec = assert_cast &>(dest); - col_vec.insertValue(type); - return true; - } -}; - - -template -class JSONExtractNumericImpl -{ -public: - using Element = typename JSONParser::Element; - - static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &) - { - return std::make_shared>(); - } - - static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; } - - static bool insertResultToColumn(IColumn & dest, const Element & element, std::string_view) - { - NumberType value; - - switch (element.type()) - { - case ElementType::DOUBLE: - if constexpr (std::is_floating_point_v) - { - /// We permit inaccurate conversion of double to float. - /// Example: double 0.1 from JSON is not representable in float. - /// But it will be more convenient for user to perform conversion. - value = static_cast(element.getDouble()); - } - else if (!accurate::convertNumeric(element.getDouble(), value)) - return false; - break; - case ElementType::UINT64: - if (!accurate::convertNumeric(element.getUInt64(), value)) - return false; - break; - case ElementType::INT64: - if (!accurate::convertNumeric(element.getInt64(), value)) - return false; - break; - case ElementType::BOOL: - if constexpr (is_integer && convert_bool_to_integer) - { - value = static_cast(element.getBool()); - break; - } - return false; - case ElementType::STRING: - { - auto rb = ReadBufferFromMemory{element.getString()}; - if constexpr (std::is_floating_point_v) - { - if (!tryReadFloatText(value, rb) || !rb.eof()) - return false; - } - else - { - if (tryReadIntText(value, rb) && rb.eof()) - break; - - /// Try to parse float and convert it to integer. - Float64 tmp_float; - rb.position() = rb.buffer().begin(); - if (!tryReadFloatText(tmp_float, rb) || !rb.eof()) - return false; - - if (!accurate::convertNumeric(tmp_float, value)) - return false; - } - break; - } - default: - return false; - } - - if (dest.getDataType() == TypeIndex::LowCardinality) - { - ColumnLowCardinality & col_low = assert_cast(dest); - col_low.insertData(reinterpret_cast(&value), sizeof(value)); - } - else - { - auto & col_vec = assert_cast &>(dest); - col_vec.insertValue(value); - } - return true; - } -}; - - -template -using JSONExtractInt64Impl = JSONExtractNumericImpl; -template -using JSONExtractUInt64Impl = JSONExtractNumericImpl; -template -using JSONExtractFloat64Impl = JSONExtractNumericImpl; - - -template -class JSONExtractBoolImpl -{ -public: - using Element = typename JSONParser::Element; - - static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &) - { - return std::make_shared(); - } - - static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; } - - static bool insertResultToColumn(IColumn & dest, const Element & element, std::string_view) - { - bool value; - switch (element.type()) - { - case ElementType::BOOL: - value = element.getBool(); - break; - case ElementType::INT64: - value = element.getInt64() != 0; - break; - case ElementType::UINT64: - value = element.getUInt64() != 0; - break; - default: - return false; - } - - auto & col_vec = assert_cast &>(dest); - col_vec.insertValue(static_cast(value)); - return true; - } -}; - -template -class JSONExtractRawImpl; - -template -class JSONExtractStringImpl -{ -public: - using Element = typename JSONParser::Element; - - static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &) - { - return std::make_shared(); - } - - static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; } - - static bool insertResultToColumn(IColumn & dest, const Element & element, std::string_view) - { - if (element.isNull()) - return false; - - if (!element.isString()) - return JSONExtractRawImpl::insertResultToColumn(dest, element, {}); - - auto str = element.getString(); - - if (dest.getDataType() == TypeIndex::LowCardinality) - { - ColumnLowCardinality & col_low = assert_cast(dest); - col_low.insertData(str.data(), str.size()); - } - else - { - ColumnString & col_str = assert_cast(dest); - col_str.insertData(str.data(), str.size()); - } - return true; - } -}; - -/// Nodes of the extract tree. We need the extract tree to extract from JSON complex values containing array, tuples or nullables. -template -struct JSONExtractTree -{ - using Element = typename JSONParser::Element; - - class Node - { - public: - Node() = default; - virtual ~Node() = default; - virtual bool insertResultToColumn(IColumn &, const Element &) = 0; - }; - - template - class NumericNode : public Node - { - public: - bool insertResultToColumn(IColumn & dest, const Element & element) override - { - return JSONExtractNumericImpl::insertResultToColumn(dest, element, {}); - } - }; - - class LowCardinalityFixedStringNode : public Node - { - public: - explicit LowCardinalityFixedStringNode(const size_t fixed_length_) : fixed_length(fixed_length_) { } - bool insertResultToColumn(IColumn & dest, const Element & element) override - { - // If element is an object we delegate the insertion to JSONExtractRawImpl - if (element.isObject()) - return JSONExtractRawImpl::insertResultToLowCardinalityFixedStringColumn(dest, element, fixed_length); - else if (!element.isString()) - return false; - - auto str = element.getString(); - if (str.size() > fixed_length) - return false; - - // For the non low cardinality case of FixedString, the padding is done in the FixedString Column implementation. - // In order to avoid having to pass the data to a FixedString Column and read it back (which would slow down the execution) - // the data is padded here and written directly to the Low Cardinality Column - if (str.size() == fixed_length) - { - assert_cast(dest).insertData(str.data(), str.size()); - } - else - { - String padded_str(str); - padded_str.resize(fixed_length, '\0'); - - assert_cast(dest).insertData(padded_str.data(), padded_str.size()); - } - return true; - } - - private: - const size_t fixed_length; - }; - - class UUIDNode : public Node - { - public: - bool insertResultToColumn(IColumn & dest, const Element & element) override - { - if (!element.isString()) - return false; - - auto uuid = parseFromString(element.getString()); - if (dest.getDataType() == TypeIndex::LowCardinality) - { - ColumnLowCardinality & col_low = assert_cast(dest); - col_low.insertData(reinterpret_cast(&uuid), sizeof(uuid)); - } - else - { - assert_cast(dest).insert(uuid); - } - return true; - } - }; - - template - class DecimalNode : public Node - { - public: - explicit DecimalNode(DataTypePtr data_type_) : data_type(data_type_) {} - bool insertResultToColumn(IColumn & dest, const Element & element) override - { - const auto * type = assert_cast *>(data_type.get()); - - DecimalType value{}; - - switch (element.type()) - { - case ElementType::DOUBLE: - value = convertToDecimal, DataTypeDecimal>( - element.getDouble(), type->getScale()); - break; - case ElementType::UINT64: - value = convertToDecimal, DataTypeDecimal>( - element.getUInt64(), type->getScale()); - break; - case ElementType::INT64: - value = convertToDecimal, DataTypeDecimal>( - element.getInt64(), type->getScale()); - break; - case ElementType::STRING: { - auto rb = ReadBufferFromMemory{element.getString()}; - if (!SerializationDecimal::tryReadText(value, rb, DecimalUtils::max_precision, type->getScale())) - return false; - break; - } - default: - return false; - } - - assert_cast &>(dest).insertValue(value); - return true; - } - - private: - DataTypePtr data_type; - }; - - class StringNode : public Node - { - public: - bool insertResultToColumn(IColumn & dest, const Element & element) override - { - return JSONExtractStringImpl::insertResultToColumn(dest, element, {}); - } - }; - - class FixedStringNode : public Node - { - public: - bool insertResultToColumn(IColumn & dest, const Element & element) override - { - if (element.isNull()) - return false; - - if (!element.isString()) - return JSONExtractRawImpl::insertResultToFixedStringColumn(dest, element, {}); - - auto str = element.getString(); - auto & col_str = assert_cast(dest); - if (str.size() > col_str.getN()) - return false; - col_str.insertData(str.data(), str.size()); - - return true; - } - }; - - template - class EnumNode : public Node - { - public: - explicit EnumNode(const std::vector> & name_value_pairs_) : name_value_pairs(name_value_pairs_) - { - for (const auto & name_value_pair : name_value_pairs) - { - name_to_value_map.emplace(name_value_pair.first, name_value_pair.second); - only_values.emplace(name_value_pair.second); - } - } - - bool insertResultToColumn(IColumn & dest, const Element & element) override - { - auto & col_vec = assert_cast &>(dest); - - if (element.isInt64()) - { - Type value; - if (!accurate::convertNumeric(element.getInt64(), value) || !only_values.contains(value)) - return false; - col_vec.insertValue(value); - return true; - } - - if (element.isUInt64()) - { - Type value; - if (!accurate::convertNumeric(element.getUInt64(), value) || !only_values.contains(value)) - return false; - col_vec.insertValue(value); - return true; - } - - if (element.isString()) - { - auto value = name_to_value_map.find(element.getString()); - if (value == name_to_value_map.end()) - return false; - col_vec.insertValue(value->second); - return true; - } - - return false; - } - - private: - std::vector> name_value_pairs; - std::unordered_map name_to_value_map; - std::unordered_set only_values; - }; - - class NullableNode : public Node - { - public: - explicit NullableNode(std::unique_ptr nested_) : nested(std::move(nested_)) {} - - bool insertResultToColumn(IColumn & dest, const Element & element) override - { - if (dest.getDataType() == TypeIndex::LowCardinality) - { - /// We do not need to handle nullability in that case - /// because nested node handles LowCardinality columns and will call proper overload of `insertData` - return nested->insertResultToColumn(dest, element); - } - - ColumnNullable & col_null = assert_cast(dest); - if (!nested->insertResultToColumn(col_null.getNestedColumn(), element)) - return false; - col_null.getNullMapColumn().insertValue(0); - return true; - } - - private: - std::unique_ptr nested; - }; - - class ArrayNode : public Node - { - public: - explicit ArrayNode(std::unique_ptr nested_) : nested(std::move(nested_)) {} - - bool insertResultToColumn(IColumn & dest, const Element & element) override - { - if (!element.isArray()) - return false; - - auto array = element.getArray(); - - ColumnArray & col_arr = assert_cast(dest); - auto & data = col_arr.getData(); - size_t old_size = data.size(); - bool were_valid_elements = false; - - for (auto value : array) - { - if (nested->insertResultToColumn(data, value)) - were_valid_elements = true; - else - data.insertDefault(); - } - - if (!were_valid_elements) - { - data.popBack(data.size() - old_size); - return false; - } - - col_arr.getOffsets().push_back(data.size()); - return true; - } - - private: - std::unique_ptr nested; - }; - - class TupleNode : public Node - { - public: - TupleNode(std::vector> nested_, const std::vector & explicit_names_) : nested(std::move(nested_)), explicit_names(explicit_names_) - { - for (size_t i = 0; i != explicit_names.size(); ++i) - name_to_index_map.emplace(explicit_names[i], i); - } - - bool insertResultToColumn(IColumn & dest, const Element & element) override - { - ColumnTuple & tuple = assert_cast(dest); - size_t old_size = dest.size(); - bool were_valid_elements = false; - - auto set_size = [&](size_t size) - { - for (size_t i = 0; i != tuple.tupleSize(); ++i) - { - auto & col = tuple.getColumn(i); - if (col.size() != size) - { - if (col.size() > size) - col.popBack(col.size() - size); - else - while (col.size() < size) - col.insertDefault(); - } - } - }; - - if (element.isArray()) - { - auto array = element.getArray(); - auto it = array.begin(); - - for (size_t index = 0; (index != nested.size()) && (it != array.end()); ++index) - { - if (nested[index]->insertResultToColumn(tuple.getColumn(index), *it++)) - were_valid_elements = true; - else - tuple.getColumn(index).insertDefault(); - } - - set_size(old_size + static_cast(were_valid_elements)); - return were_valid_elements; - } - - if (element.isObject()) - { - auto object = element.getObject(); - if (name_to_index_map.empty()) - { - auto it = object.begin(); - for (size_t index = 0; (index != nested.size()) && (it != object.end()); ++index) - { - if (nested[index]->insertResultToColumn(tuple.getColumn(index), (*it++).second)) - were_valid_elements = true; - else - tuple.getColumn(index).insertDefault(); - } - } - else - { - for (const auto & [key, value] : object) - { - auto index = name_to_index_map.find(key); - if (index != name_to_index_map.end()) - { - if (nested[index->second]->insertResultToColumn(tuple.getColumn(index->second), value)) - were_valid_elements = true; - } - } - } - - set_size(old_size + static_cast(were_valid_elements)); - return were_valid_elements; - } - - return false; - } - - private: - std::vector> nested; - std::vector explicit_names; - std::unordered_map name_to_index_map; - }; - - class MapNode : public Node - { - public: - MapNode(std::unique_ptr key_, std::unique_ptr value_) : key(std::move(key_)), value(std::move(value_)) { } - - bool insertResultToColumn(IColumn & dest, const Element & element) override - { - if (!element.isObject()) - return false; - - ColumnMap & map_col = assert_cast(dest); - auto & offsets = map_col.getNestedColumn().getOffsets(); - auto & tuple_col = map_col.getNestedData(); - auto & key_col = tuple_col.getColumn(0); - auto & value_col = tuple_col.getColumn(1); - size_t old_size = tuple_col.size(); - - auto object = element.getObject(); - auto it = object.begin(); - for (; it != object.end(); ++it) - { - auto pair = *it; - - /// Insert key - key_col.insertData(pair.first.data(), pair.first.size()); - - /// Insert value - if (!value->insertResultToColumn(value_col, pair.second)) - value_col.insertDefault(); - } - - offsets.push_back(old_size + object.size()); - return true; - } - - private: - std::unique_ptr key; - std::unique_ptr value; - }; - - class VariantNode : public Node - { - public: - VariantNode(std::vector> variant_nodes_, std::vector order_) : variant_nodes(std::move(variant_nodes_)), order(std::move(order_)) { } - - bool insertResultToColumn(IColumn & dest, const Element & element) override - { - auto & column_variant = assert_cast(dest); - for (size_t i : order) - { - auto & variant = column_variant.getVariantByGlobalDiscriminator(i); - if (variant_nodes[i]->insertResultToColumn(variant, element)) - { - column_variant.getLocalDiscriminators().push_back(column_variant.localDiscriminatorByGlobal(i)); - column_variant.getOffsets().push_back(variant.size() - 1); - return true; - } - } - - return false; - } - - private: - std::vector> variant_nodes; - /// Order in which we should try variants nodes. - /// For example, String should be always the last one. - std::vector order; - }; - - static std::unique_ptr build(const char * function_name, const DataTypePtr & type) - { - switch (type->getTypeId()) - { - case TypeIndex::UInt8: return std::make_unique>(); - case TypeIndex::UInt16: return std::make_unique>(); - case TypeIndex::UInt32: return std::make_unique>(); - case TypeIndex::UInt64: return std::make_unique>(); - case TypeIndex::UInt128: return std::make_unique>(); - case TypeIndex::UInt256: return std::make_unique>(); - case TypeIndex::Int8: return std::make_unique>(); - case TypeIndex::Int16: return std::make_unique>(); - case TypeIndex::Int32: return std::make_unique>(); - case TypeIndex::Int64: return std::make_unique>(); - case TypeIndex::Int128: return std::make_unique>(); - case TypeIndex::Int256: return std::make_unique>(); - case TypeIndex::Float32: return std::make_unique>(); - case TypeIndex::Float64: return std::make_unique>(); - case TypeIndex::String: return std::make_unique(); - case TypeIndex::FixedString: return std::make_unique(); - case TypeIndex::UUID: return std::make_unique(); - case TypeIndex::LowCardinality: - { - // The low cardinality case is treated in two different ways: - // For FixedString type, an especial class is implemented for inserting the data in the destination column, - // as the string length must be passed in order to check and pad the incoming data. - // For the rest of low cardinality types, the insertion is done in their corresponding class, adapting the data - // as needed for the insertData function of the ColumnLowCardinality. - auto dictionary_type = typeid_cast(type.get())->getDictionaryType(); - if ((*dictionary_type).getTypeId() == TypeIndex::FixedString) - { - auto fixed_length = typeid_cast(dictionary_type.get())->getN(); - return std::make_unique(fixed_length); - } - return build(function_name, dictionary_type); - } - case TypeIndex::Decimal256: return std::make_unique>(type); - case TypeIndex::Decimal128: return std::make_unique>(type); - case TypeIndex::Decimal64: return std::make_unique>(type); - case TypeIndex::Decimal32: return std::make_unique>(type); - case TypeIndex::Enum8: - return std::make_unique>(static_cast(*type).getValues()); - case TypeIndex::Enum16: - return std::make_unique>(static_cast(*type).getValues()); - case TypeIndex::Nullable: - { - return std::make_unique(build(function_name, static_cast(*type).getNestedType())); - } - case TypeIndex::Array: - { - return std::make_unique(build(function_name, static_cast(*type).getNestedType())); - } - case TypeIndex::Tuple: - { - const auto & tuple = static_cast(*type); - const auto & tuple_elements = tuple.getElements(); - std::vector> elements; - elements.reserve(tuple_elements.size()); - for (const auto & tuple_element : tuple_elements) - elements.emplace_back(build(function_name, tuple_element)); - return std::make_unique(std::move(elements), tuple.haveExplicitNames() ? tuple.getElementNames() : Strings{}); - } - case TypeIndex::Map: - { - const auto & map_type = static_cast(*type); - const auto & key_type = map_type.getKeyType(); - if (!isString(removeLowCardinality(key_type))) - throw Exception( - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, - "Function {} doesn't support the return type schema: {} with key type not String", - String(function_name), - type->getName()); - - const auto & value_type = map_type.getValueType(); - return std::make_unique(build(function_name, key_type), build(function_name, value_type)); - } - case TypeIndex::Variant: - { - const auto & variant_type = static_cast(*type); - const auto & variants = variant_type.getVariants(); - std::vector> variant_nodes; - variant_nodes.reserve(variants.size()); - for (const auto & variant : variants) - variant_nodes.push_back(build(function_name, variant)); - return std::make_unique(std::move(variant_nodes), SerializationVariant::getVariantsDeserializeTextOrder(variants)); - } - default: - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, - "Function {} doesn't support the return type schema: {}", - String(function_name), type->getName()); - } - } -}; - - -template -class JSONExtractImpl -{ -public: - using Element = typename JSONParser::Element; - - static DataTypePtr getReturnType(const char * function_name, const ColumnsWithTypeAndName & arguments) - { - if (arguments.size() < 2) - throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Function {} requires at least two arguments", String(function_name)); - - const auto & col = arguments.back(); - const auto * col_type_const = typeid_cast(col.column.get()); - if (!col_type_const || !isString(col.type)) - throw Exception(ErrorCodes::ILLEGAL_COLUMN, - "The last argument of function {} should " - "be a constant string specifying the return data type, illegal value: {}", - String(function_name), col.name); - - return DataTypeFactory::instance().get(col_type_const->getValue()); - } - - static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 2; } - - void prepare(const char * function_name, const ColumnsWithTypeAndName &, const DataTypePtr & result_type) - { - extract_tree = JSONExtractTree::build(function_name, result_type); - } - - bool insertResultToColumn(IColumn & dest, const Element & element, std::string_view) - { - return extract_tree->insertResultToColumn(dest, element); - } - -protected: - std::unique_ptr::Node> extract_tree; -}; - - -template -class JSONExtractKeysAndValuesImpl -{ -public: - using Element = typename JSONParser::Element; - - static DataTypePtr getReturnType(const char * function_name, const ColumnsWithTypeAndName & arguments) - { - if (arguments.size() < 2) - throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Function {} requires at least two arguments", String(function_name)); - - const auto & col = arguments.back(); - const auto * col_type_const = typeid_cast(col.column.get()); - if (!col_type_const || !isString(col.type)) - throw Exception(ErrorCodes::ILLEGAL_COLUMN, - "The last argument of function {} should " - "be a constant string specifying the values' data type, illegal value: {}", - String(function_name), col.name); - - DataTypePtr key_type = std::make_unique(); - DataTypePtr value_type = DataTypeFactory::instance().get(col_type_const->getValue()); - DataTypePtr tuple_type = std::make_unique(DataTypes{key_type, value_type}); - return std::make_unique(tuple_type); - } - - static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 2; } - - void prepare(const char * function_name, const ColumnsWithTypeAndName &, const DataTypePtr & result_type) - { - const auto tuple_type = typeid_cast(result_type.get())->getNestedType(); - const auto value_type = typeid_cast(tuple_type.get())->getElements()[1]; - extract_tree = JSONExtractTree::build(function_name, value_type); - } - - bool insertResultToColumn(IColumn & dest, const Element & element, std::string_view) - { - if (!element.isObject()) - return false; - - auto object = element.getObject(); - - auto & col_arr = assert_cast(dest); - auto & col_tuple = assert_cast(col_arr.getData()); - size_t old_size = col_tuple.size(); - auto & col_key = assert_cast(col_tuple.getColumn(0)); - auto & col_value = col_tuple.getColumn(1); - - for (const auto & [key, value] : object) - { - if (extract_tree->insertResultToColumn(col_value, value)) - col_key.insertData(key.data(), key.size()); - } - - if (col_tuple.size() == old_size) - return false; - - col_arr.getOffsets().push_back(col_tuple.size()); - return true; - } - -private: - std::unique_ptr::Node> extract_tree; -}; - - -template -class JSONExtractRawImpl -{ -public: - using Element = typename JSONParser::Element; - - static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &) - { - return std::make_shared(); - } - - static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; } - - static bool insertResultToColumn(IColumn & dest, const Element & element, std::string_view) - { - if (dest.getDataType() == TypeIndex::LowCardinality) - { - ColumnString::Chars chars; - WriteBufferFromVector buf(chars, AppendModeTag()); - traverse(element, buf); - buf.finalize(); - assert_cast(dest).insertData(reinterpret_cast(chars.data()), chars.size()); - } - else - { - ColumnString & col_str = assert_cast(dest); - auto & chars = col_str.getChars(); - WriteBufferFromVector buf(chars, AppendModeTag()); - traverse(element, buf); - buf.finalize(); - chars.push_back(0); - col_str.getOffsets().push_back(chars.size()); - } - return true; - } - - // We use insertResultToFixedStringColumn in case we are inserting raw data in a FixedString column - static bool insertResultToFixedStringColumn(IColumn & dest, const Element & element, std::string_view) - { - ColumnFixedString::Chars chars; - WriteBufferFromVector buf(chars, AppendModeTag()); - traverse(element, buf); - buf.finalize(); - - auto & col_str = assert_cast(dest); - - if (chars.size() > col_str.getN()) - return false; - - chars.resize_fill(col_str.getN()); - col_str.insertData(reinterpret_cast(chars.data()), chars.size()); - - - return true; - } - - // We use insertResultToLowCardinalityFixedStringColumn in case we are inserting raw data in a Low Cardinality FixedString column - static bool insertResultToLowCardinalityFixedStringColumn(IColumn & dest, const Element & element, size_t fixed_length) - { - if (element.getObject().size() > fixed_length) - return false; - - ColumnFixedString::Chars chars; - WriteBufferFromVector buf(chars, AppendModeTag()); - traverse(element, buf); - buf.finalize(); - - if (chars.size() > fixed_length) - return false; - chars.resize_fill(fixed_length); - assert_cast(dest).insertData(reinterpret_cast(chars.data()), chars.size()); - - return true; - } - -private: - static void traverse(const Element & element, WriteBuffer & buf) - { - if (element.isInt64()) - { - writeIntText(element.getInt64(), buf); - return; - } - if (element.isUInt64()) - { - writeIntText(element.getUInt64(), buf); - return; - } - if (element.isDouble()) - { - writeFloatText(element.getDouble(), buf); - return; - } - if (element.isBool()) - { - if (element.getBool()) - writeCString("true", buf); - else - writeCString("false", buf); - return; - } - if (element.isString()) - { - writeJSONString(element.getString(), buf, formatSettings()); - return; - } - if (element.isArray()) - { - writeChar('[', buf); - bool need_comma = false; - for (auto value : element.getArray()) - { - if (std::exchange(need_comma, true)) - writeChar(',', buf); - traverse(value, buf); - } - writeChar(']', buf); - return; - } - if (element.isObject()) - { - writeChar('{', buf); - bool need_comma = false; - for (auto [key, value] : element.getObject()) - { - if (std::exchange(need_comma, true)) - writeChar(',', buf); - writeJSONString(key, buf, formatSettings()); - writeChar(':', buf); - traverse(value, buf); - } - writeChar('}', buf); - return; - } - if (element.isNull()) - { - writeCString("null", buf); - return; - } - } - - static const FormatSettings & formatSettings() - { - static const FormatSettings the_instance = [] - { - FormatSettings settings; - settings.json.escape_forward_slashes = false; - return settings; - }(); - return the_instance; - } -}; - - -template -class JSONExtractArrayRawImpl -{ -public: - using Element = typename JSONParser::Element; - - static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &) - { - return std::make_shared(std::make_shared()); - } - - static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; } - - static bool insertResultToColumn(IColumn & dest, const Element & element, std::string_view) - { - if (!element.isArray()) - return false; - - auto array = element.getArray(); - ColumnArray & col_res = assert_cast(dest); - - for (auto value : array) - JSONExtractRawImpl::insertResultToColumn(col_res.getData(), value, {}); - - col_res.getOffsets().push_back(col_res.getOffsets().back() + array.size()); - return true; - } -}; - - -template -class JSONExtractKeysAndValuesRawImpl -{ -public: - using Element = typename JSONParser::Element; - - static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &) - { - DataTypePtr string_type = std::make_unique(); - DataTypePtr tuple_type = std::make_unique(DataTypes{string_type, string_type}); - return std::make_unique(tuple_type); - } - - static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; } - - bool insertResultToColumn(IColumn & dest, const Element & element, std::string_view) - { - if (!element.isObject()) - return false; - - auto object = element.getObject(); - - auto & col_arr = assert_cast(dest); - auto & col_tuple = assert_cast(col_arr.getData()); - auto & col_key = assert_cast(col_tuple.getColumn(0)); - auto & col_value = assert_cast(col_tuple.getColumn(1)); - - for (const auto & [key, value] : object) - { - col_key.insertData(key.data(), key.size()); - JSONExtractRawImpl::insertResultToColumn(col_value, value, {}); - } - - col_arr.getOffsets().push_back(col_arr.getOffsets().back() + object.size()); - return true; - } -}; - -template -class JSONExtractKeysImpl -{ -public: - using Element = typename JSONParser::Element; - - static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &) - { - return std::make_unique(std::make_shared()); - } - - static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; } - - bool insertResultToColumn(IColumn & dest, const Element & element, std::string_view) - { - if (!element.isObject()) - return false; - - auto object = element.getObject(); - - ColumnArray & col_res = assert_cast(dest); - auto & col_key = assert_cast(col_res.getData()); - - for (const auto & [key, value] : object) - { - col_key.insertData(key.data(), key.size()); - } - - col_res.getOffsets().push_back(col_res.getOffsets().back() + object.size()); - return true; - } -}; - -} +//#pragma once +// +//#include +//#include +// +//#include +// +//#include +// +//#include +//#include +//#include +// +//#include +////#include +// +//#include +//#include +//#include +//#include +//#include +//#include +//#include +//#include +//#include +//#include +// +//#include +//#include +//#include +//#include +//#include +//#include +//#include +//#include +//#include +//#include +//#include +//#include +//#include +//#include +//#include +//#include +// +//#include +//#include +//#include +//#include +//#include +// +//#include +//#include +// +// +//#include "config.h" +// +// +//namespace DB +//{ +// +//namespace ErrorCodes +//{ +// extern const int ILLEGAL_TYPE_OF_ARGUMENT; +// extern const int ILLEGAL_COLUMN; +// extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; +//} +// +//template +//concept HasIndexOperator = requires (T t) +//{ +// t[0]; +//}; +// +///// Functions to parse JSONs and extract values from it. +///// The first argument of all these functions gets a JSON, +///// after that there are any number of arguments specifying path to a desired part from the JSON's root. +///// For example, +///// select JSONExtractInt('{"a": "hello", "b": [-100, 200.0, 300]}', 'b', 1) = -100 +// +//class FunctionJSONHelpers +//{ +//public: +// template typename Impl, class JSONParser> +// class Executor +// { +// public: +// static ColumnPtr run(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) +// { +// MutableColumnPtr to{result_type->createColumn()}; +// to->reserve(input_rows_count); +// +// if (arguments.empty()) +// throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Function {} requires at least one argument", String(Name::name)); +// +// const auto & first_column = arguments[0]; +// if (!isString(first_column.type)) +// throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, +// "The first argument of function {} should be a string containing JSON, illegal type: " +// "{}", String(Name::name), first_column.type->getName()); +// +// const ColumnPtr & arg_json = first_column.column; +// const auto * col_json_const = typeid_cast(arg_json.get()); +// const auto * col_json_string +// = typeid_cast(col_json_const ? col_json_const->getDataColumnPtr().get() : arg_json.get()); +// +// if (!col_json_string) +// throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {}", arg_json->getName()); +// +// const ColumnString::Chars & chars = col_json_string->getChars(); +// const ColumnString::Offsets & offsets = col_json_string->getOffsets(); +// +// size_t num_index_arguments = Impl::getNumberOfIndexArguments(arguments); +// std::vector moves = prepareMoves(Name::name, arguments, 1, num_index_arguments); +// +// /// Preallocate memory in parser if necessary. +// JSONParser parser; +// if constexpr (has_member_function_reserve::value) +// { +// size_t max_size = calculateMaxSize(offsets); +// if (max_size) +// parser.reserve(max_size); +// } +// +// Impl impl; +// +// /// prepare() does Impl-specific preparation before handling each row. +// if constexpr (has_member_function_prepare::*)(const char *, const ColumnsWithTypeAndName &, const DataTypePtr &)>::value) +// impl.prepare(Name::name, arguments, result_type); +// +// using Element = typename JSONParser::Element; +// +// Element document; +// bool document_ok = false; +// if (col_json_const) +// { +// std::string_view json{reinterpret_cast(chars.data()), offsets[0] - 1}; +// document_ok = parser.parse(json, document); +// } +// +// for (const auto i : collections::range(0, input_rows_count)) +// { +// if (!col_json_const) +// { +// std::string_view json{reinterpret_cast(&chars[offsets[i - 1]]), offsets[i] - offsets[i - 1] - 1}; +// document_ok = parser.parse(json, document); +// } +// +// bool added_to_column = false; +// if (document_ok) +// { +// /// Perform moves. +// Element element; +// std::string_view last_key; +// bool moves_ok = performMoves(arguments, i, document, moves, element, last_key); +// +// if (moves_ok) +// added_to_column = impl.insertResultToColumn(*to, element, last_key); +// } +// +// /// We add default value (=null or zero) if something goes wrong, we don't throw exceptions in these JSON functions. +// if (!added_to_column) +// to->insertDefault(); +// } +// return to; +// } +// }; +// +//private: +// BOOST_TTI_HAS_MEMBER_FUNCTION(reserve) +// BOOST_TTI_HAS_MEMBER_FUNCTION(prepare) +// +// /// Represents a move of a JSON iterator described by a single argument passed to a JSON function. +// /// For example, the call JSONExtractInt('{"a": "hello", "b": [-100, 200.0, 300]}', 'b', 1) +// /// contains two moves: {MoveType::ConstKey, "b"} and {MoveType::ConstIndex, 1}. +// /// Keys and indices can be nonconst, in this case they are calculated for each row. +// enum class MoveType : uint8_t +// { +// Key, +// Index, +// ConstKey, +// ConstIndex, +// }; +// +// struct Move +// { +// explicit Move(MoveType type_, size_t index_ = 0) : type(type_), index(index_) {} +// Move(MoveType type_, const String & key_) : type(type_), key(key_) {} +// MoveType type; +// size_t index = 0; +// String key; +// }; +// +// static std::vector prepareMoves( +// const char * function_name, +// const ColumnsWithTypeAndName & columns, +// size_t first_index_argument, +// size_t num_index_arguments) +// { +// std::vector moves; +// moves.reserve(num_index_arguments); +// for (const auto i : collections::range(first_index_argument, first_index_argument + num_index_arguments)) +// { +// const auto & column = columns[i]; +// if (!isString(column.type) && !isNativeInteger(column.type)) +// throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, +// "The argument {} of function {} should be a string specifying key " +// "or an integer specifying index, illegal type: {}", +// std::to_string(i + 1), String(function_name), column.type->getName()); +// +// if (column.column && isColumnConst(*column.column)) +// { +// const auto & column_const = assert_cast(*column.column); +// if (isString(column.type)) +// moves.emplace_back(MoveType::ConstKey, column_const.getValue()); +// else +// moves.emplace_back(MoveType::ConstIndex, column_const.getInt(0)); +// } +// else +// { +// if (isString(column.type)) +// moves.emplace_back(MoveType::Key, ""); +// else +// moves.emplace_back(MoveType::Index, 0); +// } +// } +// return moves; +// } +// +// +// /// Performs moves of types MoveType::Index and MoveType::ConstIndex. +// template +// static bool performMoves(const ColumnsWithTypeAndName & arguments, size_t row, +// const typename JSONParser::Element & document, const std::vector & moves, +// typename JSONParser::Element & element, std::string_view & last_key) +// { +// typename JSONParser::Element res_element = document; +// std::string_view key; +// +// for (size_t j = 0; j != moves.size(); ++j) +// { +// switch (moves[j].type) +// { +// case MoveType::ConstIndex: +// { +// if (!moveToElementByIndex(res_element, static_cast(moves[j].index), key)) +// return false; +// break; +// } +// case MoveType::ConstKey: +// { +// key = moves[j].key; +// if (!moveToElementByKey(res_element, key)) +// return false; +// break; +// } +// case MoveType::Index: +// { +// Int64 index = (*arguments[j + 1].column)[row].get(); +// if (!moveToElementByIndex(res_element, static_cast(index), key)) +// return false; +// break; +// } +// case MoveType::Key: +// { +// key = arguments[j + 1].column->getDataAt(row).toView(); +// if (!moveToElementByKey(res_element, key)) +// return false; +// break; +// } +// } +// } +// +// element = res_element; +// last_key = key; +// return true; +// } +// +// template +// static bool moveToElementByIndex(typename JSONParser::Element & element, int index, std::string_view & out_key) +// { +// if (element.isArray()) +// { +// auto array = element.getArray(); +// if (index >= 0) +// --index; +// else +// index += array.size(); +// +// if (static_cast(index) >= array.size()) +// return false; +// element = array[index]; +// out_key = {}; +// return true; +// } +// +// if constexpr (HasIndexOperator) +// { +// if (element.isObject()) +// { +// auto object = element.getObject(); +// if (index >= 0) +// --index; +// else +// index += object.size(); +// +// if (static_cast(index) >= object.size()) +// return false; +// std::tie(out_key, element) = object[index]; +// return true; +// } +// } +// +// return {}; +// } +// +// /// Performs moves of types MoveType::Key and MoveType::ConstKey. +// template +// static bool moveToElementByKey(typename JSONParser::Element & element, std::string_view key) +// { +// if (!element.isObject()) +// return false; +// auto object = element.getObject(); +// return object.find(key, element); +// } +// +// static size_t calculateMaxSize(const ColumnString::Offsets & offsets) +// { +// size_t max_size = 0; +// for (const auto i : collections::range(0, offsets.size())) +// { +// size_t size = offsets[i] - offsets[i - 1]; +// max_size = std::max(max_size, size); +// } +// if (max_size) +// --max_size; +// return max_size; +// } +// +//}; +// +//template +//class JSONExtractImpl; +// +//template +//class JSONExtractKeysAndValuesImpl; +// +///** +//* Functions JSONExtract and JSONExtractKeysAndValues force the return type - it is specified in the last argument. +//* For example - `SELECT JSONExtract(materialize('{"a": 131231, "b": 1234}'), 'b', 'LowCardinality(FixedString(4))')` +//* But by default ClickHouse decides on its own whether the return type will be LowCardinality based on the types of +//* input arguments. +//* And for these specific functions we cannot rely on this mechanism, so these functions have their own implementation - +//* just convert all of the LowCardinality input columns to full ones, execute and wrap the resulting column in LowCardinality +//* if needed. +//*/ +//template typename Impl> +//constexpr bool functionForcesTheReturnType() +//{ +// return std::is_same_v, JSONExtractImpl> || std::is_same_v, JSONExtractKeysAndValuesImpl>; +//} +// +//template typename Impl> +//class ExecutableFunctionJSON : public IExecutableFunction +//{ +// +//public: +// explicit ExecutableFunctionJSON(const NullPresence & null_presence_, bool allow_simdjson_, const DataTypePtr & json_return_type_) +// : null_presence(null_presence_), allow_simdjson(allow_simdjson_), json_return_type(json_return_type_) +// { +// } +// +// String getName() const override { return Name::name; } +// bool useDefaultImplementationForNulls() const override { return false; } +// bool useDefaultImplementationForConstants() const override { return true; } +// bool useDefaultImplementationForLowCardinalityColumns() const override +// { +// return !functionForcesTheReturnType(); +// } +// +// ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override +// { +// if (null_presence.has_null_constant) +// return result_type->createColumnConstWithDefaultValue(input_rows_count); +// +// if constexpr (functionForcesTheReturnType()) +// { +// ColumnsWithTypeAndName columns_without_low_cardinality = arguments; +// +// for (auto & column : columns_without_low_cardinality) +// { +// column.column = recursiveRemoveLowCardinality(column.column); +// column.type = recursiveRemoveLowCardinality(column.type); +// } +// +// ColumnsWithTypeAndName temporary_columns = null_presence.has_nullable ? createBlockWithNestedColumns(columns_without_low_cardinality) : columns_without_low_cardinality; +// ColumnPtr temporary_result = chooseAndRunJSONParser(temporary_columns, json_return_type, input_rows_count); +// +// if (null_presence.has_nullable) +// temporary_result = wrapInNullable(temporary_result, columns_without_low_cardinality, result_type, input_rows_count); +// +// if (result_type->lowCardinality()) +// temporary_result = recursiveLowCardinalityTypeConversion(temporary_result, json_return_type, result_type); +// +// return temporary_result; +// } +// else +// { +// ColumnsWithTypeAndName temporary_columns = null_presence.has_nullable ? createBlockWithNestedColumns(arguments) : arguments; +// ColumnPtr temporary_result = chooseAndRunJSONParser(temporary_columns, json_return_type, input_rows_count); +// +// if (null_presence.has_nullable) +// temporary_result = wrapInNullable(temporary_result, arguments, result_type, input_rows_count); +// +// if (result_type->lowCardinality()) +// temporary_result = recursiveLowCardinalityTypeConversion(temporary_result, json_return_type, result_type); +// +// return temporary_result; +// } +// } +// +//private: +// +// ColumnPtr +// chooseAndRunJSONParser(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const +// { +//#if USE_SIMDJSON +// if (allow_simdjson) +// return FunctionJSONHelpers::Executor::run(arguments, result_type, input_rows_count); +//#endif +// +//#if USE_RAPIDJSON +// return FunctionJSONHelpers::Executor::run(arguments, result_type, input_rows_count); +//#else +// return FunctionJSONHelpers::Executor::run(arguments, result_type, input_rows_count); +//#endif +// } +// +// NullPresence null_presence; +// bool allow_simdjson; +// DataTypePtr json_return_type; +//}; +// +// +//template typename Impl> +//class FunctionBaseFunctionJSON : public IFunctionBase +//{ +//public: +// explicit FunctionBaseFunctionJSON( +// const NullPresence & null_presence_, +// bool allow_simdjson_, +// DataTypes argument_types_, +// DataTypePtr return_type_, +// DataTypePtr json_return_type_) +// : null_presence(null_presence_) +// , allow_simdjson(allow_simdjson_) +// , argument_types(std::move(argument_types_)) +// , return_type(std::move(return_type_)) +// , json_return_type(std::move(json_return_type_)) +// { +// } +// +// String getName() const override { return Name::name; } +// +// const DataTypes & getArgumentTypes() const override +// { +// return argument_types; +// } +// +// const DataTypePtr & getResultType() const override +// { +// return return_type; +// } +// +// bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } +// +// ExecutableFunctionPtr prepare(const ColumnsWithTypeAndName &) const override +// { +// return std::make_unique>(null_presence, allow_simdjson, json_return_type); +// } +// +//private: +// NullPresence null_presence; +// bool allow_simdjson; +// DataTypes argument_types; +// DataTypePtr return_type; +// DataTypePtr json_return_type; +//}; +// +///// We use IFunctionOverloadResolver instead of IFunction to handle non-default NULL processing. +///// Both NULL and JSON NULL should generate NULL value. If any argument is NULL, return NULL. +//template typename Impl> +//class JSONOverloadResolver : public IFunctionOverloadResolver, WithContext +//{ +//public: +// static constexpr auto name = Name::name; +// +// String getName() const override { return name; } +// +// static FunctionOverloadResolverPtr create(ContextPtr context_) +// { +// return std::make_unique(context_); +// } +// +// explicit JSONOverloadResolver(ContextPtr context_) : WithContext(context_) {} +// +// bool isVariadic() const override { return true; } +// size_t getNumberOfArguments() const override { return 0; } +// bool useDefaultImplementationForNulls() const override { return false; } +// bool useDefaultImplementationForLowCardinalityColumns() const override +// { +// return !functionForcesTheReturnType(); +// } +// +// FunctionBasePtr build(const ColumnsWithTypeAndName & arguments) const override +// { +// bool has_nothing_argument = false; +// for (const auto & arg : arguments) +// has_nothing_argument |= isNothing(arg.type); +// +// DataTypePtr json_return_type = Impl::getReturnType(Name::name, createBlockWithNestedColumns(arguments)); +// NullPresence null_presence = getNullPresense(arguments); +// DataTypePtr return_type; +// if (has_nothing_argument) +// return_type = std::make_shared(); +// else if (null_presence.has_null_constant) +// return_type = makeNullable(std::make_shared()); +// else if (null_presence.has_nullable) +// return_type = makeNullable(json_return_type); +// else +// return_type = json_return_type; +// +// /// Top-level LowCardinality columns are processed outside JSON parser. +// json_return_type = removeLowCardinality(json_return_type); +// +// DataTypes argument_types; +// argument_types.reserve(arguments.size()); +// for (const auto & argument : arguments) +// argument_types.emplace_back(argument.type); +// return std::make_unique>( +// null_presence, getContext()->getSettingsRef().allow_simdjson, argument_types, return_type, json_return_type); +// } +//}; +// +//struct NameJSONHas { static constexpr auto name{"JSONHas"}; }; +//struct NameIsValidJSON { static constexpr auto name{"isValidJSON"}; }; +//struct NameJSONLength { static constexpr auto name{"JSONLength"}; }; +//struct NameJSONKey { static constexpr auto name{"JSONKey"}; }; +//struct NameJSONType { static constexpr auto name{"JSONType"}; }; +//struct NameJSONExtractInt { static constexpr auto name{"JSONExtractInt"}; }; +//struct NameJSONExtractUInt { static constexpr auto name{"JSONExtractUInt"}; }; +//struct NameJSONExtractFloat { static constexpr auto name{"JSONExtractFloat"}; }; +//struct NameJSONExtractBool { static constexpr auto name{"JSONExtractBool"}; }; +//struct NameJSONExtractString { static constexpr auto name{"JSONExtractString"}; }; +//struct NameJSONExtract { static constexpr auto name{"JSONExtract"}; }; +//struct NameJSONExtractKeysAndValues { static constexpr auto name{"JSONExtractKeysAndValues"}; }; +//struct NameJSONExtractRaw { static constexpr auto name{"JSONExtractRaw"}; }; +//struct NameJSONExtractArrayRaw { static constexpr auto name{"JSONExtractArrayRaw"}; }; +//struct NameJSONExtractKeysAndValuesRaw { static constexpr auto name{"JSONExtractKeysAndValuesRaw"}; }; +//struct NameJSONExtractKeys { static constexpr auto name{"JSONExtractKeys"}; }; +// +// +//template +//class JSONHasImpl +//{ +//public: +// using Element = typename JSONParser::Element; +// +// static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &) { return std::make_shared(); } +// +// static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; } +// +// static bool insertResultToColumn(IColumn & dest, const Element &, std::string_view) +// { +// ColumnVector & col_vec = assert_cast &>(dest); +// col_vec.insertValue(1); +// return true; +// } +//}; +// +// +//template +//class IsValidJSONImpl +//{ +//public: +// using Element = typename JSONParser::Element; +// +// static DataTypePtr getReturnType(const char * function_name, const ColumnsWithTypeAndName & arguments) +// { +// if (arguments.size() != 1) +// { +// /// IsValidJSON() shouldn't get parameters other than JSON. +// throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Function {} needs exactly one argument", +// String(function_name)); +// } +// return std::make_shared(); +// } +// +// static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName &) { return 0; } +// +// static bool insertResultToColumn(IColumn & dest, const Element &, std::string_view) +// { +// /// This function is called only if JSON is valid. +// /// If JSON isn't valid then `FunctionJSON::Executor::run()` adds default value (=zero) to `dest` without calling this function. +// ColumnVector & col_vec = assert_cast &>(dest); +// col_vec.insertValue(1); +// return true; +// } +//}; +// +// +//template +//class JSONLengthImpl +//{ +//public: +// using Element = typename JSONParser::Element; +// +// static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &) +// { +// return std::make_shared(); +// } +// +// static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; } +// +// static bool insertResultToColumn(IColumn & dest, const Element & element, std::string_view) +// { +// size_t size; +// if (element.isArray()) +// size = element.getArray().size(); +// else if (element.isObject()) +// size = element.getObject().size(); +// else +// return false; +// +// ColumnVector & col_vec = assert_cast &>(dest); +// col_vec.insertValue(size); +// return true; +// } +//}; +// +// +//template +//class JSONKeyImpl +//{ +//public: +// using Element = typename JSONParser::Element; +// +// static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &) +// { +// return std::make_shared(); +// } +// +// static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; } +// +// static bool insertResultToColumn(IColumn & dest, const Element &, std::string_view last_key) +// { +// if (last_key.empty()) +// return false; +// ColumnString & col_str = assert_cast(dest); +// col_str.insertData(last_key.data(), last_key.size()); +// return true; +// } +//}; +// +// +//template +//class JSONTypeImpl +//{ +//public: +// using Element = typename JSONParser::Element; +// +// static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &) +// { +// static const std::vector> values = { +// {"Array", '['}, +// {"Object", '{'}, +// {"String", '"'}, +// {"Int64", 'i'}, +// {"UInt64", 'u'}, +// {"Double", 'd'}, +// {"Bool", 'b'}, +// {"Null", 0}, /// the default value for the column. +// }; +// return std::make_shared>(values); +// } +// +// static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; } +// +// static bool insertResultToColumn(IColumn & dest, const Element & element, std::string_view) +// { +// UInt8 type; +// switch (element.type()) +// { +// case ElementType::INT64: +// type = 'i'; +// break; +// case ElementType::UINT64: +// type = 'u'; +// break; +// case ElementType::DOUBLE: +// type = 'd'; +// break; +// case ElementType::STRING: +// type = '"'; +// break; +// case ElementType::ARRAY: +// type = '['; +// break; +// case ElementType::OBJECT: +// type = '{'; +// break; +// case ElementType::BOOL: +// type = 'b'; +// break; +// case ElementType::NULL_VALUE: +// type = 0; +// break; +// } +// +// ColumnVector & col_vec = assert_cast &>(dest); +// col_vec.insertValue(type); +// return true; +// } +//}; +// +// +//template +//class JSONExtractNumericImpl +//{ +//public: +// using Element = typename JSONParser::Element; +// +// static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &) +// { +// return std::make_shared>(); +// } +// +// static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; } +// +// static bool insertResultToColumn(IColumn & dest, const Element & element, std::string_view) +// { +// NumberType value; +// +// switch (element.type()) +// { +// case ElementType::DOUBLE: +// if constexpr (std::is_floating_point_v) +// { +// /// We permit inaccurate conversion of double to float. +// /// Example: double 0.1 from JSON is not representable in float. +// /// But it will be more convenient for user to perform conversion. +// value = static_cast(element.getDouble()); +// } +// else if (!accurate::convertNumeric(element.getDouble(), value)) +// return false; +// break; +// case ElementType::UINT64: +// if (!accurate::convertNumeric(element.getUInt64(), value)) +// return false; +// break; +// case ElementType::INT64: +// if (!accurate::convertNumeric(element.getInt64(), value)) +// return false; +// break; +// case ElementType::BOOL: +// if constexpr (is_integer && convert_bool_to_integer) +// { +// value = static_cast(element.getBool()); +// break; +// } +// return false; +// case ElementType::STRING: +// { +// auto rb = ReadBufferFromMemory{element.getString()}; +// if constexpr (std::is_floating_point_v) +// { +// if (!tryReadFloatText(value, rb) || !rb.eof()) +// return false; +// } +// else +// { +// if (tryReadIntText(value, rb) && rb.eof()) +// break; +// +// /// Try to parse float and convert it to integer. +// Float64 tmp_float; +// rb.position() = rb.buffer().begin(); +// if (!tryReadFloatText(tmp_float, rb) || !rb.eof()) +// return false; +// +// if (!accurate::convertNumeric(tmp_float, value)) +// return false; +// } +// break; +// } +// default: +// return false; +// } +// +// if (dest.getDataType() == TypeIndex::LowCardinality) +// { +// ColumnLowCardinality & col_low = assert_cast(dest); +// col_low.insertData(reinterpret_cast(&value), sizeof(value)); +// } +// else +// { +// auto & col_vec = assert_cast &>(dest); +// col_vec.insertValue(value); +// } +// return true; +// } +//}; +// +// +//template +//using JSONExtractInt64Impl = JSONExtractNumericImpl; +//template +//using JSONExtractUInt64Impl = JSONExtractNumericImpl; +//template +//using JSONExtractFloat64Impl = JSONExtractNumericImpl; +// +// +//template +//class JSONExtractBoolImpl +//{ +//public: +// using Element = typename JSONParser::Element; +// +// static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &) +// { +// return std::make_shared(); +// } +// +// static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; } +// +// static bool insertResultToColumn(IColumn & dest, const Element & element, std::string_view) +// { +// bool value; +// switch (element.type()) +// { +// case ElementType::BOOL: +// value = element.getBool(); +// break; +// case ElementType::INT64: +// value = element.getInt64() != 0; +// break; +// case ElementType::UINT64: +// value = element.getUInt64() != 0; +// break; +// default: +// return false; +// } +// +// auto & col_vec = assert_cast &>(dest); +// col_vec.insertValue(static_cast(value)); +// return true; +// } +//}; +// +//template +//class JSONExtractRawImpl; +// +//template +//class JSONExtractStringImpl +//{ +//public: +// using Element = typename JSONParser::Element; +// +// static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &) +// { +// return std::make_shared(); +// } +// +// static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; } +// +// static bool insertResultToColumn(IColumn & dest, const Element & element, std::string_view) +// { +// if (element.isNull()) +// return false; +// +// if (!element.isString()) +// return JSONExtractRawImpl::insertResultToColumn(dest, element, {}); +// +// auto str = element.getString(); +// +// if (dest.getDataType() == TypeIndex::LowCardinality) +// { +// ColumnLowCardinality & col_low = assert_cast(dest); +// col_low.insertData(str.data(), str.size()); +// } +// else +// { +// ColumnString & col_str = assert_cast(dest); +// col_str.insertData(str.data(), str.size()); +// } +// return true; +// } +//}; +// +// +//static const JSONExtractInsertSettings & getJSONExtractInsertSettings() +//{ +// static const JSONExtractInsertSettings instance = [] +// { +// JSONExtractInsertSettings settings; +// settings.insert_null_as_default = false; +// settings.insert_default_on_invalid_elements_in_complex_types = true; +// return settings; +// }(); +// return instance; +//} +// +//template +//class JSONExtractImpl +//{ +//public: +// using Element = typename JSONParser::Element; +// +// static DataTypePtr getReturnType(const char * function_name, const ColumnsWithTypeAndName & arguments) +// { +// if (arguments.size() < 2) +// throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Function {} requires at least two arguments", String(function_name)); +// +// const auto & col = arguments.back(); +// const auto * col_type_const = typeid_cast(col.column.get()); +// if (!col_type_const || !isString(col.type)) +// throw Exception(ErrorCodes::ILLEGAL_COLUMN, +// "The last argument of function {} should " +// "be a constant string specifying the return data type, illegal value: {}", +// String(function_name), col.name); +// +// return DataTypeFactory::instance().get(col_type_const->getValue()); +// } +// +// static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 2; } +// +// void prepare(const char * function_name, const ColumnsWithTypeAndName &, const DataTypePtr & result_type) +// { +// extract_tree = buildJSONExtractTree(result_type, function_name); +// } +// +// bool insertResultToColumn(IColumn & dest, const Element & element, std::string_view) +// { +// String error; +// return extract_tree->insertResultToColumn(dest, element, getJSONExtractInsertSettings(), error); +// } +// +//protected: +// std::unique_ptr> extract_tree; +//}; +// +// +//template +//class JSONExtractKeysAndValuesImpl +//{ +//public: +// using Element = typename JSONParser::Element; +// +// static DataTypePtr getReturnType(const char * function_name, const ColumnsWithTypeAndName & arguments) +// { +// if (arguments.size() < 2) +// throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Function {} requires at least two arguments", String(function_name)); +// +// const auto & col = arguments.back(); +// const auto * col_type_const = typeid_cast(col.column.get()); +// if (!col_type_const || !isString(col.type)) +// throw Exception(ErrorCodes::ILLEGAL_COLUMN, +// "The last argument of function {} should " +// "be a constant string specifying the values' data type, illegal value: {}", +// String(function_name), col.name); +// +// DataTypePtr key_type = std::make_unique(); +// DataTypePtr value_type = DataTypeFactory::instance().get(col_type_const->getValue()); +// DataTypePtr tuple_type = std::make_unique(DataTypes{key_type, value_type}); +// return std::make_unique(tuple_type); +// } +// +// static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 2; } +// +// void prepare(const char * function_name, const ColumnsWithTypeAndName &, const DataTypePtr & result_type) +// { +// const auto tuple_type = typeid_cast(result_type.get())->getNestedType(); +// const auto value_type = typeid_cast(tuple_type.get())->getElements()[1]; +// extract_tree = buildJSONExtractTree(value_type, function_name); +// } +// +// bool insertResultToColumn(IColumn & dest, const Element & element, std::string_view) +// { +// if (!element.isObject()) +// return false; +// +// auto object = element.getObject(); +// +// auto & col_arr = assert_cast(dest); +// auto & col_tuple = assert_cast(col_arr.getData()); +// size_t old_size = col_tuple.size(); +// auto & col_key = assert_cast(col_tuple.getColumn(0)); +// auto & col_value = col_tuple.getColumn(1); +// +// String error; +// for (const auto & [key, value] : object) +// { +// if (extract_tree->insertResultToColumn(col_value, value, getJSONExtractInsertSettings(), error)) +// col_key.insertData(key.data(), key.size()); +// } +// +// if (col_tuple.size() == old_size) +// return false; +// +// col_arr.getOffsets().push_back(col_tuple.size()); +// return true; +// } +// +//private: +// std::unique_ptr> extract_tree; +//}; +// +// +//template +//class JSONExtractRawImpl +//{ +//public: +// using Element = typename JSONParser::Element; +// +// static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &) +// { +// return std::make_shared(); +// } +// +// static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; } +// +// static bool insertResultToColumn(IColumn & dest, const Element & element, std::string_view) +// { +// if (dest.getDataType() == TypeIndex::LowCardinality) +// { +// ColumnString::Chars chars; +// WriteBufferFromVector buf(chars, AppendModeTag()); +// traverse(element, buf); +// buf.finalize(); +// assert_cast(dest).insertData(reinterpret_cast(chars.data()), chars.size()); +// } +// else +// { +// ColumnString & col_str = assert_cast(dest); +// auto & chars = col_str.getChars(); +// WriteBufferFromVector buf(chars, AppendModeTag()); +// traverse(element, buf); +// buf.finalize(); +// chars.push_back(0); +// col_str.getOffsets().push_back(chars.size()); +// } +// return true; +// } +// +// // We use insertResultToFixedStringColumn in case we are inserting raw data in a FixedString column +// static bool insertResultToFixedStringColumn(IColumn & dest, const Element & element, std::string_view) +// { +// ColumnFixedString::Chars chars; +// WriteBufferFromVector buf(chars, AppendModeTag()); +// traverse(element, buf); +// buf.finalize(); +// +// auto & col_str = assert_cast(dest); +// +// if (chars.size() > col_str.getN()) +// return false; +// +// chars.resize_fill(col_str.getN()); +// col_str.insertData(reinterpret_cast(chars.data()), chars.size()); +// +// +// return true; +// } +// +// // We use insertResultToLowCardinalityFixedStringColumn in case we are inserting raw data in a Low Cardinality FixedString column +// static bool insertResultToLowCardinalityFixedStringColumn(IColumn & dest, const Element & element, size_t fixed_length) +// { +// if (element.getObject().size() > fixed_length) +// return false; +// +// ColumnFixedString::Chars chars; +// WriteBufferFromVector buf(chars, AppendModeTag()); +// traverse(element, buf); +// buf.finalize(); +// +// if (chars.size() > fixed_length) +// return false; +// chars.resize_fill(fixed_length); +// assert_cast(dest).insertData(reinterpret_cast(chars.data()), chars.size()); +// +// return true; +// } +// +//private: +// static void traverse(const Element & element, WriteBuffer & buf) +// { +// if (element.isInt64()) +// { +// writeIntText(element.getInt64(), buf); +// return; +// } +// if (element.isUInt64()) +// { +// writeIntText(element.getUInt64(), buf); +// return; +// } +// if (element.isDouble()) +// { +// writeFloatText(element.getDouble(), buf); +// return; +// } +// if (element.isBool()) +// { +// if (element.getBool()) +// writeCString("true", buf); +// else +// writeCString("false", buf); +// return; +// } +// if (element.isString()) +// { +// writeJSONString(element.getString(), buf, formatSettings()); +// return; +// } +// if (element.isArray()) +// { +// writeChar('[', buf); +// bool need_comma = false; +// for (auto value : element.getArray()) +// { +// if (std::exchange(need_comma, true)) +// writeChar(',', buf); +// traverse(value, buf); +// } +// writeChar(']', buf); +// return; +// } +// if (element.isObject()) +// { +// writeChar('{', buf); +// bool need_comma = false; +// for (auto [key, value] : element.getObject()) +// { +// if (std::exchange(need_comma, true)) +// writeChar(',', buf); +// writeJSONString(key, buf, formatSettings()); +// writeChar(':', buf); +// traverse(value, buf); +// } +// writeChar('}', buf); +// return; +// } +// if (element.isNull()) +// { +// writeCString("null", buf); +// return; +// } +// } +// +// static const FormatSettings & formatSettings() +// { +// static const FormatSettings the_instance = [] +// { +// FormatSettings settings; +// settings.json.escape_forward_slashes = false; +// return settings; +// }(); +// return the_instance; +// } +//}; +// +// +//template +//class JSONExtractArrayRawImpl +//{ +//public: +// using Element = typename JSONParser::Element; +// +// static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &) +// { +// return std::make_shared(std::make_shared()); +// } +// +// static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; } +// +// static bool insertResultToColumn(IColumn & dest, const Element & element, std::string_view) +// { +// if (!element.isArray()) +// return false; +// +// auto array = element.getArray(); +// ColumnArray & col_res = assert_cast(dest); +// +// for (auto value : array) +// JSONExtractRawImpl::insertResultToColumn(col_res.getData(), value, {}); +// +// col_res.getOffsets().push_back(col_res.getOffsets().back() + array.size()); +// return true; +// } +//}; +// +// +//template +//class JSONExtractKeysAndValuesRawImpl +//{ +//public: +// using Element = typename JSONParser::Element; +// +// static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &) +// { +// DataTypePtr string_type = std::make_unique(); +// DataTypePtr tuple_type = std::make_unique(DataTypes{string_type, string_type}); +// return std::make_unique(tuple_type); +// } +// +// static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; } +// +// bool insertResultToColumn(IColumn & dest, const Element & element, std::string_view) +// { +// if (!element.isObject()) +// return false; +// +// auto object = element.getObject(); +// +// auto & col_arr = assert_cast(dest); +// auto & col_tuple = assert_cast(col_arr.getData()); +// auto & col_key = assert_cast(col_tuple.getColumn(0)); +// auto & col_value = assert_cast(col_tuple.getColumn(1)); +// +// for (const auto & [key, value] : object) +// { +// col_key.insertData(key.data(), key.size()); +// JSONExtractRawImpl::insertResultToColumn(col_value, value, {}); +// } +// +// col_arr.getOffsets().push_back(col_arr.getOffsets().back() + object.size()); +// return true; +// } +//}; +// +//template +//class JSONExtractKeysImpl +//{ +//public: +// using Element = typename JSONParser::Element; +// +// static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &) +// { +// return std::make_unique(std::make_shared()); +// } +// +// static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; } +// +// bool insertResultToColumn(IColumn & dest, const Element & element, std::string_view) +// { +// if (!element.isObject()) +// return false; +// +// auto object = element.getObject(); +// +// ColumnArray & col_res = assert_cast(dest); +// auto & col_key = assert_cast(col_res.getData()); +// +// for (const auto & [key, value] : object) +// { +// col_key.insertData(key.data(), key.size()); +// } +// +// col_res.getOffsets().push_back(col_res.getOffsets().back() + object.size()); +// return true; +// } +//}; +// +//} diff --git a/tests/queries/0_stateless/03198_json_extract_more_types.reference b/tests/queries/0_stateless/03198_json_extract_more_types.reference new file mode 100644 index 00000000000..9a6580ff81b --- /dev/null +++ b/tests/queries/0_stateless/03198_json_extract_more_types.reference @@ -0,0 +1,21 @@ +2020-01-01 +2020-01-01 +2020-01-01 00:00:00 +2020-01-01 00:00:00.000000 +127.0.0.1 +2001:db8:85a3::8a2e:370:7334 +42 +42 +42 +42 +42 +42 +42 +42 +42 +42 +Hello +Hello +\0\0\0 +Hello\0\0\0\0\0 +5801c962-1182-458a-89f8-d077da5074f9 diff --git a/tests/queries/0_stateless/03198_json_extract_more_types.sql b/tests/queries/0_stateless/03198_json_extract_more_types.sql new file mode 100644 index 00000000000..28d24bbb271 --- /dev/null +++ b/tests/queries/0_stateless/03198_json_extract_more_types.sql @@ -0,0 +1,29 @@ +set allow_suspicious_low_cardinality_types=1; + +select JSONExtract('{"a" : "2020-01-01"}', 'a', 'Date'); +select JSONExtract('{"a" : "2020-01-01"}', 'a', 'Date32'); +select JSONExtract('{"a" : "2020-01-01 00:00:00"}', 'a', 'DateTime'); +select JSONExtract('{"a" : "2020-01-01 00:00:00.000000"}', 'a', 'DateTime64(6)'); +select JSONExtract('{"a" : "127.0.0.1"}', 'a', 'IPv4'); +select JSONExtract('{"a" : "2001:0db8:85a3:0000:0000:8a2e:0370:7334"}', 'a', 'IPv6'); + + +select JSONExtract('{"a" : 42}', 'a', 'LowCardinality(UInt8)'); +select JSONExtract('{"a" : 42}', 'a', 'LowCardinality(Int8)'); +select JSONExtract('{"a" : 42}', 'a', 'LowCardinality(UInt16)'); +select JSONExtract('{"a" : 42}', 'a', 'LowCardinality(Int16)'); +select JSONExtract('{"a" : 42}', 'a', 'LowCardinality(UInt32)'); +select JSONExtract('{"a" : 42}', 'a', 'LowCardinality(Int32)'); +select JSONExtract('{"a" : 42}', 'a', 'LowCardinality(UInt64)'); +select JSONExtract('{"a" : 42}', 'a', 'LowCardinality(Int64)'); + +select JSONExtract('{"a" : 42}', 'a', 'LowCardinality(Float32)'); +select JSONExtract('{"a" : 42}', 'a', 'LowCardinality(Float32)'); + +select JSONExtract('{"a" : "Hello"}', 'a', 'LowCardinality(String)'); +select JSONExtract('{"a" : "Hello"}', 'a', 'LowCardinality(FixedString(5))'); +select JSONExtract('{"a" : "Hello"}', 'a', 'LowCardinality(FixedString(3))'); +select JSONExtract('{"a" : "Hello"}', 'a', 'LowCardinality(FixedString(10))'); + +select JSONExtract('{"a" : "5801c962-1182-458a-89f8-d077da5074f9"}', 'a', 'LowCardinality(UUID)'); + diff --git a/tests/queries/0_stateless/03199_json_extract_dynamic.reference b/tests/queries/0_stateless/03199_json_extract_dynamic.reference new file mode 100644 index 00000000000..759b7763cd1 --- /dev/null +++ b/tests/queries/0_stateless/03199_json_extract_dynamic.reference @@ -0,0 +1,30 @@ +true Bool +42 Int64 +-42 Int64 +18446744073709551615 UInt64 +42.42 Float64 +42 Int64 +-42 Int64 +18446744073709551615 UInt64 +Hello String +2020-01-01 Date +2020-01-01 00:00:00.000000000 DateTime64(9) +[1,2,3] Array(Nullable(Int64)) +['str1','str2','str3'] Array(Nullable(String)) +[[[1],[2,3,4]],[[5,6],[7]]] Array(Array(Array(Nullable(Int64)))) +['2020-01-01 00:00:00.000000000','2020-01-01 00:00:00.000000000'] Array(Nullable(DateTime64(9))) +['2020-01-01','2020-01-01 date'] Array(Nullable(String)) +['2020-01-01','2020-01-01 00:00:00','str'] Array(Nullable(String)) +['2020-01-01','2020-01-01 00:00:00','42'] Array(Nullable(String)) +['str','42'] Array(Nullable(String)) +[42,42.42] Array(Nullable(Float64)) +[42,18446744073709552000,42.42] Array(Nullable(Float64)) +[42,42.42] Array(Nullable(Float64)) +[NULL,NULL] Array(Nullable(String)) +[NULL,42] Array(Nullable(Int64)) +[[NULL],[],[42]] Array(Array(Nullable(Int64))) +[[],[NULL,NULL],[1,NULL,3],[NULL,2,NULL]] Array(Array(Nullable(Int64))) +[[],[NULL,NULL],['1',NULL,'3'],[NULL,'2',NULL],['2020-01-01']] Array(Array(Nullable(String))) +('str',42,[42]) Tuple(Nullable(String), Nullable(Int64), Array(Nullable(Int64))) +[42,18446744073709551615] Array(Nullable(UInt64)) +(-42,18446744073709551615) Tuple(Nullable(Int64), Nullable(UInt64)) diff --git a/tests/queries/0_stateless/03199_json_extract_dynamic.sql b/tests/queries/0_stateless/03199_json_extract_dynamic.sql new file mode 100644 index 00000000000..286949f4d3e --- /dev/null +++ b/tests/queries/0_stateless/03199_json_extract_dynamic.sql @@ -0,0 +1,37 @@ +set input_format_json_try_infer_numbers_from_strings=1; + +select JSONExtract(materialize('{"d" : true}'), 'd', 'Dynamic') as d, dynamicType(d); +select JSONExtract(materialize('{"d" : 42}'), 'd', 'Dynamic') as d, dynamicType(d); +select JSONExtract(materialize('{"d" : -42}'), 'd', 'Dynamic') as d, dynamicType(d); +select JSONExtract(materialize('{"d" : 18446744073709551615}'), 'd', 'Dynamic') as d, dynamicType(d); +select JSONExtract(materialize('{"d" : 42.42}'), 'd', 'Dynamic') as d, dynamicType(d); +select JSONExtract(materialize('{"d" : "42"}'), 'd', 'Dynamic') as d, dynamicType(d); +select JSONExtract(materialize('{"d" : "-42"}'), 'd', 'Dynamic') as d, dynamicType(d); +select JSONExtract(materialize('{"d" : "18446744073709551615"}'), 'd', 'Dynamic') as d, dynamicType(d); + +select JSONExtract(materialize('{"d" : "Hello"}'), 'd', 'Dynamic') as d, dynamicType(d); +select JSONExtract(materialize('{"d" : "2020-01-01"}'), 'd', 'Dynamic') as d, dynamicType(d); +select JSONExtract(materialize('{"d" : "2020-01-01 00:00:00.000"}'), 'd', 'Dynamic') as d, dynamicType(d); + +select JSONExtract(materialize('{"d" : [1, 2, 3]}'), 'd', 'Dynamic') as d, dynamicType(d); +select JSONExtract(materialize('{"d" : ["str1", "str2", "str3"]}'), 'd', 'Dynamic') as d, dynamicType(d); +select JSONExtract(materialize('{"d" : [[[1], [2, 3, 4]], [[5, 6], [7]]]}'), 'd', 'Dynamic') as d, dynamicType(d); + +select JSONExtract(materialize('{"d" : ["2020-01-01", "2020-01-01 00:00:00"]}'), 'd', 'Dynamic') as d, dynamicType(d); +select JSONExtract(materialize('{"d" : ["2020-01-01", "2020-01-01 date"]}'), 'd', 'Dynamic') as d, dynamicType(d); +select JSONExtract(materialize('{"d" : ["2020-01-01", "2020-01-01 00:00:00", "str"]}'), 'd', 'Dynamic') as d, dynamicType(d); +select JSONExtract(materialize('{"d" : ["2020-01-01", "2020-01-01 00:00:00", "42"]}'), 'd', 'Dynamic') as d, dynamicType(d); +select JSONExtract(materialize('{"d" : ["str", "42"]}'), 'd', 'Dynamic') as d, dynamicType(d); +select JSONExtract(materialize('{"d" : [42, 42.42]}'), 'd', 'Dynamic') as d, dynamicType(d); +select JSONExtract(materialize('{"d" : [42, 18446744073709551615, 42.42]}'), 'd', 'Dynamic') as d, dynamicType(d); +select JSONExtract(materialize('{"d" : [42, 42.42]}'), 'd', 'Dynamic') as d, dynamicType(d); + +select JSONExtract(materialize('{"d" : [null, null]}'), 'd', 'Dynamic') as d, dynamicType(d); +select JSONExtract(materialize('{"d" : [null, 42]}'), 'd', 'Dynamic') as d, dynamicType(d); +select JSONExtract(materialize('{"d" : [[null], [], [42]]}'), 'd', 'Dynamic') as d, dynamicType(d); +select JSONExtract(materialize('{"a" : [[], [null, null], ["1", null, "3"], [null, "2", null]]}'), 'a', 'Dynamic') as d, dynamicType(d); +select JSONExtract(materialize('{"a" : [[], [null, null], ["1", null, "3"], [null, "2", null], ["2020-01-01"]]}'), 'a', 'Dynamic') as d, dynamicType(d); + +select JSONExtract(materialize('{"d" : ["str", 42, [42]]}'), 'd', 'Dynamic') as d, dynamicType(d); +select JSONExtract(materialize('{"d" : [42, 18446744073709551615]}'), 'd', 'Dynamic') as d, dynamicType(d); +select JSONExtract(materialize('{"d" : [-42, 18446744073709551615]}'), 'd', 'Dynamic') as d, dynamicType(d); From 5fe594243a4fc281cf3ee878b2f79b09dfd83970 Mon Sep 17 00:00:00 2001 From: avogar Date: Wed, 3 Jul 2024 13:53:30 +0000 Subject: [PATCH 039/182] Remove old file --- src/Functions/FunctionsJSON.h | 1273 --------------------------------- 1 file changed, 1273 deletions(-) delete mode 100644 src/Functions/FunctionsJSON.h diff --git a/src/Functions/FunctionsJSON.h b/src/Functions/FunctionsJSON.h deleted file mode 100644 index 5d44e22300d..00000000000 --- a/src/Functions/FunctionsJSON.h +++ /dev/null @@ -1,1273 +0,0 @@ -//#pragma once -// -//#include -//#include -// -//#include -// -//#include -// -//#include -//#include -//#include -// -//#include -////#include -// -//#include -//#include -//#include -//#include -//#include -//#include -//#include -//#include -//#include -//#include -// -//#include -//#include -//#include -//#include -//#include -//#include -//#include -//#include -//#include -//#include -//#include -//#include -//#include -//#include -//#include -//#include -// -//#include -//#include -//#include -//#include -//#include -// -//#include -//#include -// -// -//#include "config.h" -// -// -//namespace DB -//{ -// -//namespace ErrorCodes -//{ -// extern const int ILLEGAL_TYPE_OF_ARGUMENT; -// extern const int ILLEGAL_COLUMN; -// extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; -//} -// -//template -//concept HasIndexOperator = requires (T t) -//{ -// t[0]; -//}; -// -///// Functions to parse JSONs and extract values from it. -///// The first argument of all these functions gets a JSON, -///// after that there are any number of arguments specifying path to a desired part from the JSON's root. -///// For example, -///// select JSONExtractInt('{"a": "hello", "b": [-100, 200.0, 300]}', 'b', 1) = -100 -// -//class FunctionJSONHelpers -//{ -//public: -// template typename Impl, class JSONParser> -// class Executor -// { -// public: -// static ColumnPtr run(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) -// { -// MutableColumnPtr to{result_type->createColumn()}; -// to->reserve(input_rows_count); -// -// if (arguments.empty()) -// throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Function {} requires at least one argument", String(Name::name)); -// -// const auto & first_column = arguments[0]; -// if (!isString(first_column.type)) -// throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, -// "The first argument of function {} should be a string containing JSON, illegal type: " -// "{}", String(Name::name), first_column.type->getName()); -// -// const ColumnPtr & arg_json = first_column.column; -// const auto * col_json_const = typeid_cast(arg_json.get()); -// const auto * col_json_string -// = typeid_cast(col_json_const ? col_json_const->getDataColumnPtr().get() : arg_json.get()); -// -// if (!col_json_string) -// throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {}", arg_json->getName()); -// -// const ColumnString::Chars & chars = col_json_string->getChars(); -// const ColumnString::Offsets & offsets = col_json_string->getOffsets(); -// -// size_t num_index_arguments = Impl::getNumberOfIndexArguments(arguments); -// std::vector moves = prepareMoves(Name::name, arguments, 1, num_index_arguments); -// -// /// Preallocate memory in parser if necessary. -// JSONParser parser; -// if constexpr (has_member_function_reserve::value) -// { -// size_t max_size = calculateMaxSize(offsets); -// if (max_size) -// parser.reserve(max_size); -// } -// -// Impl impl; -// -// /// prepare() does Impl-specific preparation before handling each row. -// if constexpr (has_member_function_prepare::*)(const char *, const ColumnsWithTypeAndName &, const DataTypePtr &)>::value) -// impl.prepare(Name::name, arguments, result_type); -// -// using Element = typename JSONParser::Element; -// -// Element document; -// bool document_ok = false; -// if (col_json_const) -// { -// std::string_view json{reinterpret_cast(chars.data()), offsets[0] - 1}; -// document_ok = parser.parse(json, document); -// } -// -// for (const auto i : collections::range(0, input_rows_count)) -// { -// if (!col_json_const) -// { -// std::string_view json{reinterpret_cast(&chars[offsets[i - 1]]), offsets[i] - offsets[i - 1] - 1}; -// document_ok = parser.parse(json, document); -// } -// -// bool added_to_column = false; -// if (document_ok) -// { -// /// Perform moves. -// Element element; -// std::string_view last_key; -// bool moves_ok = performMoves(arguments, i, document, moves, element, last_key); -// -// if (moves_ok) -// added_to_column = impl.insertResultToColumn(*to, element, last_key); -// } -// -// /// We add default value (=null or zero) if something goes wrong, we don't throw exceptions in these JSON functions. -// if (!added_to_column) -// to->insertDefault(); -// } -// return to; -// } -// }; -// -//private: -// BOOST_TTI_HAS_MEMBER_FUNCTION(reserve) -// BOOST_TTI_HAS_MEMBER_FUNCTION(prepare) -// -// /// Represents a move of a JSON iterator described by a single argument passed to a JSON function. -// /// For example, the call JSONExtractInt('{"a": "hello", "b": [-100, 200.0, 300]}', 'b', 1) -// /// contains two moves: {MoveType::ConstKey, "b"} and {MoveType::ConstIndex, 1}. -// /// Keys and indices can be nonconst, in this case they are calculated for each row. -// enum class MoveType : uint8_t -// { -// Key, -// Index, -// ConstKey, -// ConstIndex, -// }; -// -// struct Move -// { -// explicit Move(MoveType type_, size_t index_ = 0) : type(type_), index(index_) {} -// Move(MoveType type_, const String & key_) : type(type_), key(key_) {} -// MoveType type; -// size_t index = 0; -// String key; -// }; -// -// static std::vector prepareMoves( -// const char * function_name, -// const ColumnsWithTypeAndName & columns, -// size_t first_index_argument, -// size_t num_index_arguments) -// { -// std::vector moves; -// moves.reserve(num_index_arguments); -// for (const auto i : collections::range(first_index_argument, first_index_argument + num_index_arguments)) -// { -// const auto & column = columns[i]; -// if (!isString(column.type) && !isNativeInteger(column.type)) -// throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, -// "The argument {} of function {} should be a string specifying key " -// "or an integer specifying index, illegal type: {}", -// std::to_string(i + 1), String(function_name), column.type->getName()); -// -// if (column.column && isColumnConst(*column.column)) -// { -// const auto & column_const = assert_cast(*column.column); -// if (isString(column.type)) -// moves.emplace_back(MoveType::ConstKey, column_const.getValue()); -// else -// moves.emplace_back(MoveType::ConstIndex, column_const.getInt(0)); -// } -// else -// { -// if (isString(column.type)) -// moves.emplace_back(MoveType::Key, ""); -// else -// moves.emplace_back(MoveType::Index, 0); -// } -// } -// return moves; -// } -// -// -// /// Performs moves of types MoveType::Index and MoveType::ConstIndex. -// template -// static bool performMoves(const ColumnsWithTypeAndName & arguments, size_t row, -// const typename JSONParser::Element & document, const std::vector & moves, -// typename JSONParser::Element & element, std::string_view & last_key) -// { -// typename JSONParser::Element res_element = document; -// std::string_view key; -// -// for (size_t j = 0; j != moves.size(); ++j) -// { -// switch (moves[j].type) -// { -// case MoveType::ConstIndex: -// { -// if (!moveToElementByIndex(res_element, static_cast(moves[j].index), key)) -// return false; -// break; -// } -// case MoveType::ConstKey: -// { -// key = moves[j].key; -// if (!moveToElementByKey(res_element, key)) -// return false; -// break; -// } -// case MoveType::Index: -// { -// Int64 index = (*arguments[j + 1].column)[row].get(); -// if (!moveToElementByIndex(res_element, static_cast(index), key)) -// return false; -// break; -// } -// case MoveType::Key: -// { -// key = arguments[j + 1].column->getDataAt(row).toView(); -// if (!moveToElementByKey(res_element, key)) -// return false; -// break; -// } -// } -// } -// -// element = res_element; -// last_key = key; -// return true; -// } -// -// template -// static bool moveToElementByIndex(typename JSONParser::Element & element, int index, std::string_view & out_key) -// { -// if (element.isArray()) -// { -// auto array = element.getArray(); -// if (index >= 0) -// --index; -// else -// index += array.size(); -// -// if (static_cast(index) >= array.size()) -// return false; -// element = array[index]; -// out_key = {}; -// return true; -// } -// -// if constexpr (HasIndexOperator) -// { -// if (element.isObject()) -// { -// auto object = element.getObject(); -// if (index >= 0) -// --index; -// else -// index += object.size(); -// -// if (static_cast(index) >= object.size()) -// return false; -// std::tie(out_key, element) = object[index]; -// return true; -// } -// } -// -// return {}; -// } -// -// /// Performs moves of types MoveType::Key and MoveType::ConstKey. -// template -// static bool moveToElementByKey(typename JSONParser::Element & element, std::string_view key) -// { -// if (!element.isObject()) -// return false; -// auto object = element.getObject(); -// return object.find(key, element); -// } -// -// static size_t calculateMaxSize(const ColumnString::Offsets & offsets) -// { -// size_t max_size = 0; -// for (const auto i : collections::range(0, offsets.size())) -// { -// size_t size = offsets[i] - offsets[i - 1]; -// max_size = std::max(max_size, size); -// } -// if (max_size) -// --max_size; -// return max_size; -// } -// -//}; -// -//template -//class JSONExtractImpl; -// -//template -//class JSONExtractKeysAndValuesImpl; -// -///** -//* Functions JSONExtract and JSONExtractKeysAndValues force the return type - it is specified in the last argument. -//* For example - `SELECT JSONExtract(materialize('{"a": 131231, "b": 1234}'), 'b', 'LowCardinality(FixedString(4))')` -//* But by default ClickHouse decides on its own whether the return type will be LowCardinality based on the types of -//* input arguments. -//* And for these specific functions we cannot rely on this mechanism, so these functions have their own implementation - -//* just convert all of the LowCardinality input columns to full ones, execute and wrap the resulting column in LowCardinality -//* if needed. -//*/ -//template typename Impl> -//constexpr bool functionForcesTheReturnType() -//{ -// return std::is_same_v, JSONExtractImpl> || std::is_same_v, JSONExtractKeysAndValuesImpl>; -//} -// -//template typename Impl> -//class ExecutableFunctionJSON : public IExecutableFunction -//{ -// -//public: -// explicit ExecutableFunctionJSON(const NullPresence & null_presence_, bool allow_simdjson_, const DataTypePtr & json_return_type_) -// : null_presence(null_presence_), allow_simdjson(allow_simdjson_), json_return_type(json_return_type_) -// { -// } -// -// String getName() const override { return Name::name; } -// bool useDefaultImplementationForNulls() const override { return false; } -// bool useDefaultImplementationForConstants() const override { return true; } -// bool useDefaultImplementationForLowCardinalityColumns() const override -// { -// return !functionForcesTheReturnType(); -// } -// -// ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override -// { -// if (null_presence.has_null_constant) -// return result_type->createColumnConstWithDefaultValue(input_rows_count); -// -// if constexpr (functionForcesTheReturnType()) -// { -// ColumnsWithTypeAndName columns_without_low_cardinality = arguments; -// -// for (auto & column : columns_without_low_cardinality) -// { -// column.column = recursiveRemoveLowCardinality(column.column); -// column.type = recursiveRemoveLowCardinality(column.type); -// } -// -// ColumnsWithTypeAndName temporary_columns = null_presence.has_nullable ? createBlockWithNestedColumns(columns_without_low_cardinality) : columns_without_low_cardinality; -// ColumnPtr temporary_result = chooseAndRunJSONParser(temporary_columns, json_return_type, input_rows_count); -// -// if (null_presence.has_nullable) -// temporary_result = wrapInNullable(temporary_result, columns_without_low_cardinality, result_type, input_rows_count); -// -// if (result_type->lowCardinality()) -// temporary_result = recursiveLowCardinalityTypeConversion(temporary_result, json_return_type, result_type); -// -// return temporary_result; -// } -// else -// { -// ColumnsWithTypeAndName temporary_columns = null_presence.has_nullable ? createBlockWithNestedColumns(arguments) : arguments; -// ColumnPtr temporary_result = chooseAndRunJSONParser(temporary_columns, json_return_type, input_rows_count); -// -// if (null_presence.has_nullable) -// temporary_result = wrapInNullable(temporary_result, arguments, result_type, input_rows_count); -// -// if (result_type->lowCardinality()) -// temporary_result = recursiveLowCardinalityTypeConversion(temporary_result, json_return_type, result_type); -// -// return temporary_result; -// } -// } -// -//private: -// -// ColumnPtr -// chooseAndRunJSONParser(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const -// { -//#if USE_SIMDJSON -// if (allow_simdjson) -// return FunctionJSONHelpers::Executor::run(arguments, result_type, input_rows_count); -//#endif -// -//#if USE_RAPIDJSON -// return FunctionJSONHelpers::Executor::run(arguments, result_type, input_rows_count); -//#else -// return FunctionJSONHelpers::Executor::run(arguments, result_type, input_rows_count); -//#endif -// } -// -// NullPresence null_presence; -// bool allow_simdjson; -// DataTypePtr json_return_type; -//}; -// -// -//template typename Impl> -//class FunctionBaseFunctionJSON : public IFunctionBase -//{ -//public: -// explicit FunctionBaseFunctionJSON( -// const NullPresence & null_presence_, -// bool allow_simdjson_, -// DataTypes argument_types_, -// DataTypePtr return_type_, -// DataTypePtr json_return_type_) -// : null_presence(null_presence_) -// , allow_simdjson(allow_simdjson_) -// , argument_types(std::move(argument_types_)) -// , return_type(std::move(return_type_)) -// , json_return_type(std::move(json_return_type_)) -// { -// } -// -// String getName() const override { return Name::name; } -// -// const DataTypes & getArgumentTypes() const override -// { -// return argument_types; -// } -// -// const DataTypePtr & getResultType() const override -// { -// return return_type; -// } -// -// bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } -// -// ExecutableFunctionPtr prepare(const ColumnsWithTypeAndName &) const override -// { -// return std::make_unique>(null_presence, allow_simdjson, json_return_type); -// } -// -//private: -// NullPresence null_presence; -// bool allow_simdjson; -// DataTypes argument_types; -// DataTypePtr return_type; -// DataTypePtr json_return_type; -//}; -// -///// We use IFunctionOverloadResolver instead of IFunction to handle non-default NULL processing. -///// Both NULL and JSON NULL should generate NULL value. If any argument is NULL, return NULL. -//template typename Impl> -//class JSONOverloadResolver : public IFunctionOverloadResolver, WithContext -//{ -//public: -// static constexpr auto name = Name::name; -// -// String getName() const override { return name; } -// -// static FunctionOverloadResolverPtr create(ContextPtr context_) -// { -// return std::make_unique(context_); -// } -// -// explicit JSONOverloadResolver(ContextPtr context_) : WithContext(context_) {} -// -// bool isVariadic() const override { return true; } -// size_t getNumberOfArguments() const override { return 0; } -// bool useDefaultImplementationForNulls() const override { return false; } -// bool useDefaultImplementationForLowCardinalityColumns() const override -// { -// return !functionForcesTheReturnType(); -// } -// -// FunctionBasePtr build(const ColumnsWithTypeAndName & arguments) const override -// { -// bool has_nothing_argument = false; -// for (const auto & arg : arguments) -// has_nothing_argument |= isNothing(arg.type); -// -// DataTypePtr json_return_type = Impl::getReturnType(Name::name, createBlockWithNestedColumns(arguments)); -// NullPresence null_presence = getNullPresense(arguments); -// DataTypePtr return_type; -// if (has_nothing_argument) -// return_type = std::make_shared(); -// else if (null_presence.has_null_constant) -// return_type = makeNullable(std::make_shared()); -// else if (null_presence.has_nullable) -// return_type = makeNullable(json_return_type); -// else -// return_type = json_return_type; -// -// /// Top-level LowCardinality columns are processed outside JSON parser. -// json_return_type = removeLowCardinality(json_return_type); -// -// DataTypes argument_types; -// argument_types.reserve(arguments.size()); -// for (const auto & argument : arguments) -// argument_types.emplace_back(argument.type); -// return std::make_unique>( -// null_presence, getContext()->getSettingsRef().allow_simdjson, argument_types, return_type, json_return_type); -// } -//}; -// -//struct NameJSONHas { static constexpr auto name{"JSONHas"}; }; -//struct NameIsValidJSON { static constexpr auto name{"isValidJSON"}; }; -//struct NameJSONLength { static constexpr auto name{"JSONLength"}; }; -//struct NameJSONKey { static constexpr auto name{"JSONKey"}; }; -//struct NameJSONType { static constexpr auto name{"JSONType"}; }; -//struct NameJSONExtractInt { static constexpr auto name{"JSONExtractInt"}; }; -//struct NameJSONExtractUInt { static constexpr auto name{"JSONExtractUInt"}; }; -//struct NameJSONExtractFloat { static constexpr auto name{"JSONExtractFloat"}; }; -//struct NameJSONExtractBool { static constexpr auto name{"JSONExtractBool"}; }; -//struct NameJSONExtractString { static constexpr auto name{"JSONExtractString"}; }; -//struct NameJSONExtract { static constexpr auto name{"JSONExtract"}; }; -//struct NameJSONExtractKeysAndValues { static constexpr auto name{"JSONExtractKeysAndValues"}; }; -//struct NameJSONExtractRaw { static constexpr auto name{"JSONExtractRaw"}; }; -//struct NameJSONExtractArrayRaw { static constexpr auto name{"JSONExtractArrayRaw"}; }; -//struct NameJSONExtractKeysAndValuesRaw { static constexpr auto name{"JSONExtractKeysAndValuesRaw"}; }; -//struct NameJSONExtractKeys { static constexpr auto name{"JSONExtractKeys"}; }; -// -// -//template -//class JSONHasImpl -//{ -//public: -// using Element = typename JSONParser::Element; -// -// static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &) { return std::make_shared(); } -// -// static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; } -// -// static bool insertResultToColumn(IColumn & dest, const Element &, std::string_view) -// { -// ColumnVector & col_vec = assert_cast &>(dest); -// col_vec.insertValue(1); -// return true; -// } -//}; -// -// -//template -//class IsValidJSONImpl -//{ -//public: -// using Element = typename JSONParser::Element; -// -// static DataTypePtr getReturnType(const char * function_name, const ColumnsWithTypeAndName & arguments) -// { -// if (arguments.size() != 1) -// { -// /// IsValidJSON() shouldn't get parameters other than JSON. -// throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Function {} needs exactly one argument", -// String(function_name)); -// } -// return std::make_shared(); -// } -// -// static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName &) { return 0; } -// -// static bool insertResultToColumn(IColumn & dest, const Element &, std::string_view) -// { -// /// This function is called only if JSON is valid. -// /// If JSON isn't valid then `FunctionJSON::Executor::run()` adds default value (=zero) to `dest` without calling this function. -// ColumnVector & col_vec = assert_cast &>(dest); -// col_vec.insertValue(1); -// return true; -// } -//}; -// -// -//template -//class JSONLengthImpl -//{ -//public: -// using Element = typename JSONParser::Element; -// -// static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &) -// { -// return std::make_shared(); -// } -// -// static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; } -// -// static bool insertResultToColumn(IColumn & dest, const Element & element, std::string_view) -// { -// size_t size; -// if (element.isArray()) -// size = element.getArray().size(); -// else if (element.isObject()) -// size = element.getObject().size(); -// else -// return false; -// -// ColumnVector & col_vec = assert_cast &>(dest); -// col_vec.insertValue(size); -// return true; -// } -//}; -// -// -//template -//class JSONKeyImpl -//{ -//public: -// using Element = typename JSONParser::Element; -// -// static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &) -// { -// return std::make_shared(); -// } -// -// static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; } -// -// static bool insertResultToColumn(IColumn & dest, const Element &, std::string_view last_key) -// { -// if (last_key.empty()) -// return false; -// ColumnString & col_str = assert_cast(dest); -// col_str.insertData(last_key.data(), last_key.size()); -// return true; -// } -//}; -// -// -//template -//class JSONTypeImpl -//{ -//public: -// using Element = typename JSONParser::Element; -// -// static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &) -// { -// static const std::vector> values = { -// {"Array", '['}, -// {"Object", '{'}, -// {"String", '"'}, -// {"Int64", 'i'}, -// {"UInt64", 'u'}, -// {"Double", 'd'}, -// {"Bool", 'b'}, -// {"Null", 0}, /// the default value for the column. -// }; -// return std::make_shared>(values); -// } -// -// static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; } -// -// static bool insertResultToColumn(IColumn & dest, const Element & element, std::string_view) -// { -// UInt8 type; -// switch (element.type()) -// { -// case ElementType::INT64: -// type = 'i'; -// break; -// case ElementType::UINT64: -// type = 'u'; -// break; -// case ElementType::DOUBLE: -// type = 'd'; -// break; -// case ElementType::STRING: -// type = '"'; -// break; -// case ElementType::ARRAY: -// type = '['; -// break; -// case ElementType::OBJECT: -// type = '{'; -// break; -// case ElementType::BOOL: -// type = 'b'; -// break; -// case ElementType::NULL_VALUE: -// type = 0; -// break; -// } -// -// ColumnVector & col_vec = assert_cast &>(dest); -// col_vec.insertValue(type); -// return true; -// } -//}; -// -// -//template -//class JSONExtractNumericImpl -//{ -//public: -// using Element = typename JSONParser::Element; -// -// static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &) -// { -// return std::make_shared>(); -// } -// -// static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; } -// -// static bool insertResultToColumn(IColumn & dest, const Element & element, std::string_view) -// { -// NumberType value; -// -// switch (element.type()) -// { -// case ElementType::DOUBLE: -// if constexpr (std::is_floating_point_v) -// { -// /// We permit inaccurate conversion of double to float. -// /// Example: double 0.1 from JSON is not representable in float. -// /// But it will be more convenient for user to perform conversion. -// value = static_cast(element.getDouble()); -// } -// else if (!accurate::convertNumeric(element.getDouble(), value)) -// return false; -// break; -// case ElementType::UINT64: -// if (!accurate::convertNumeric(element.getUInt64(), value)) -// return false; -// break; -// case ElementType::INT64: -// if (!accurate::convertNumeric(element.getInt64(), value)) -// return false; -// break; -// case ElementType::BOOL: -// if constexpr (is_integer && convert_bool_to_integer) -// { -// value = static_cast(element.getBool()); -// break; -// } -// return false; -// case ElementType::STRING: -// { -// auto rb = ReadBufferFromMemory{element.getString()}; -// if constexpr (std::is_floating_point_v) -// { -// if (!tryReadFloatText(value, rb) || !rb.eof()) -// return false; -// } -// else -// { -// if (tryReadIntText(value, rb) && rb.eof()) -// break; -// -// /// Try to parse float and convert it to integer. -// Float64 tmp_float; -// rb.position() = rb.buffer().begin(); -// if (!tryReadFloatText(tmp_float, rb) || !rb.eof()) -// return false; -// -// if (!accurate::convertNumeric(tmp_float, value)) -// return false; -// } -// break; -// } -// default: -// return false; -// } -// -// if (dest.getDataType() == TypeIndex::LowCardinality) -// { -// ColumnLowCardinality & col_low = assert_cast(dest); -// col_low.insertData(reinterpret_cast(&value), sizeof(value)); -// } -// else -// { -// auto & col_vec = assert_cast &>(dest); -// col_vec.insertValue(value); -// } -// return true; -// } -//}; -// -// -//template -//using JSONExtractInt64Impl = JSONExtractNumericImpl; -//template -//using JSONExtractUInt64Impl = JSONExtractNumericImpl; -//template -//using JSONExtractFloat64Impl = JSONExtractNumericImpl; -// -// -//template -//class JSONExtractBoolImpl -//{ -//public: -// using Element = typename JSONParser::Element; -// -// static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &) -// { -// return std::make_shared(); -// } -// -// static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; } -// -// static bool insertResultToColumn(IColumn & dest, const Element & element, std::string_view) -// { -// bool value; -// switch (element.type()) -// { -// case ElementType::BOOL: -// value = element.getBool(); -// break; -// case ElementType::INT64: -// value = element.getInt64() != 0; -// break; -// case ElementType::UINT64: -// value = element.getUInt64() != 0; -// break; -// default: -// return false; -// } -// -// auto & col_vec = assert_cast &>(dest); -// col_vec.insertValue(static_cast(value)); -// return true; -// } -//}; -// -//template -//class JSONExtractRawImpl; -// -//template -//class JSONExtractStringImpl -//{ -//public: -// using Element = typename JSONParser::Element; -// -// static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &) -// { -// return std::make_shared(); -// } -// -// static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; } -// -// static bool insertResultToColumn(IColumn & dest, const Element & element, std::string_view) -// { -// if (element.isNull()) -// return false; -// -// if (!element.isString()) -// return JSONExtractRawImpl::insertResultToColumn(dest, element, {}); -// -// auto str = element.getString(); -// -// if (dest.getDataType() == TypeIndex::LowCardinality) -// { -// ColumnLowCardinality & col_low = assert_cast(dest); -// col_low.insertData(str.data(), str.size()); -// } -// else -// { -// ColumnString & col_str = assert_cast(dest); -// col_str.insertData(str.data(), str.size()); -// } -// return true; -// } -//}; -// -// -//static const JSONExtractInsertSettings & getJSONExtractInsertSettings() -//{ -// static const JSONExtractInsertSettings instance = [] -// { -// JSONExtractInsertSettings settings; -// settings.insert_null_as_default = false; -// settings.insert_default_on_invalid_elements_in_complex_types = true; -// return settings; -// }(); -// return instance; -//} -// -//template -//class JSONExtractImpl -//{ -//public: -// using Element = typename JSONParser::Element; -// -// static DataTypePtr getReturnType(const char * function_name, const ColumnsWithTypeAndName & arguments) -// { -// if (arguments.size() < 2) -// throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Function {} requires at least two arguments", String(function_name)); -// -// const auto & col = arguments.back(); -// const auto * col_type_const = typeid_cast(col.column.get()); -// if (!col_type_const || !isString(col.type)) -// throw Exception(ErrorCodes::ILLEGAL_COLUMN, -// "The last argument of function {} should " -// "be a constant string specifying the return data type, illegal value: {}", -// String(function_name), col.name); -// -// return DataTypeFactory::instance().get(col_type_const->getValue()); -// } -// -// static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 2; } -// -// void prepare(const char * function_name, const ColumnsWithTypeAndName &, const DataTypePtr & result_type) -// { -// extract_tree = buildJSONExtractTree(result_type, function_name); -// } -// -// bool insertResultToColumn(IColumn & dest, const Element & element, std::string_view) -// { -// String error; -// return extract_tree->insertResultToColumn(dest, element, getJSONExtractInsertSettings(), error); -// } -// -//protected: -// std::unique_ptr> extract_tree; -//}; -// -// -//template -//class JSONExtractKeysAndValuesImpl -//{ -//public: -// using Element = typename JSONParser::Element; -// -// static DataTypePtr getReturnType(const char * function_name, const ColumnsWithTypeAndName & arguments) -// { -// if (arguments.size() < 2) -// throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Function {} requires at least two arguments", String(function_name)); -// -// const auto & col = arguments.back(); -// const auto * col_type_const = typeid_cast(col.column.get()); -// if (!col_type_const || !isString(col.type)) -// throw Exception(ErrorCodes::ILLEGAL_COLUMN, -// "The last argument of function {} should " -// "be a constant string specifying the values' data type, illegal value: {}", -// String(function_name), col.name); -// -// DataTypePtr key_type = std::make_unique(); -// DataTypePtr value_type = DataTypeFactory::instance().get(col_type_const->getValue()); -// DataTypePtr tuple_type = std::make_unique(DataTypes{key_type, value_type}); -// return std::make_unique(tuple_type); -// } -// -// static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 2; } -// -// void prepare(const char * function_name, const ColumnsWithTypeAndName &, const DataTypePtr & result_type) -// { -// const auto tuple_type = typeid_cast(result_type.get())->getNestedType(); -// const auto value_type = typeid_cast(tuple_type.get())->getElements()[1]; -// extract_tree = buildJSONExtractTree(value_type, function_name); -// } -// -// bool insertResultToColumn(IColumn & dest, const Element & element, std::string_view) -// { -// if (!element.isObject()) -// return false; -// -// auto object = element.getObject(); -// -// auto & col_arr = assert_cast(dest); -// auto & col_tuple = assert_cast(col_arr.getData()); -// size_t old_size = col_tuple.size(); -// auto & col_key = assert_cast(col_tuple.getColumn(0)); -// auto & col_value = col_tuple.getColumn(1); -// -// String error; -// for (const auto & [key, value] : object) -// { -// if (extract_tree->insertResultToColumn(col_value, value, getJSONExtractInsertSettings(), error)) -// col_key.insertData(key.data(), key.size()); -// } -// -// if (col_tuple.size() == old_size) -// return false; -// -// col_arr.getOffsets().push_back(col_tuple.size()); -// return true; -// } -// -//private: -// std::unique_ptr> extract_tree; -//}; -// -// -//template -//class JSONExtractRawImpl -//{ -//public: -// using Element = typename JSONParser::Element; -// -// static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &) -// { -// return std::make_shared(); -// } -// -// static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; } -// -// static bool insertResultToColumn(IColumn & dest, const Element & element, std::string_view) -// { -// if (dest.getDataType() == TypeIndex::LowCardinality) -// { -// ColumnString::Chars chars; -// WriteBufferFromVector buf(chars, AppendModeTag()); -// traverse(element, buf); -// buf.finalize(); -// assert_cast(dest).insertData(reinterpret_cast(chars.data()), chars.size()); -// } -// else -// { -// ColumnString & col_str = assert_cast(dest); -// auto & chars = col_str.getChars(); -// WriteBufferFromVector buf(chars, AppendModeTag()); -// traverse(element, buf); -// buf.finalize(); -// chars.push_back(0); -// col_str.getOffsets().push_back(chars.size()); -// } -// return true; -// } -// -// // We use insertResultToFixedStringColumn in case we are inserting raw data in a FixedString column -// static bool insertResultToFixedStringColumn(IColumn & dest, const Element & element, std::string_view) -// { -// ColumnFixedString::Chars chars; -// WriteBufferFromVector buf(chars, AppendModeTag()); -// traverse(element, buf); -// buf.finalize(); -// -// auto & col_str = assert_cast(dest); -// -// if (chars.size() > col_str.getN()) -// return false; -// -// chars.resize_fill(col_str.getN()); -// col_str.insertData(reinterpret_cast(chars.data()), chars.size()); -// -// -// return true; -// } -// -// // We use insertResultToLowCardinalityFixedStringColumn in case we are inserting raw data in a Low Cardinality FixedString column -// static bool insertResultToLowCardinalityFixedStringColumn(IColumn & dest, const Element & element, size_t fixed_length) -// { -// if (element.getObject().size() > fixed_length) -// return false; -// -// ColumnFixedString::Chars chars; -// WriteBufferFromVector buf(chars, AppendModeTag()); -// traverse(element, buf); -// buf.finalize(); -// -// if (chars.size() > fixed_length) -// return false; -// chars.resize_fill(fixed_length); -// assert_cast(dest).insertData(reinterpret_cast(chars.data()), chars.size()); -// -// return true; -// } -// -//private: -// static void traverse(const Element & element, WriteBuffer & buf) -// { -// if (element.isInt64()) -// { -// writeIntText(element.getInt64(), buf); -// return; -// } -// if (element.isUInt64()) -// { -// writeIntText(element.getUInt64(), buf); -// return; -// } -// if (element.isDouble()) -// { -// writeFloatText(element.getDouble(), buf); -// return; -// } -// if (element.isBool()) -// { -// if (element.getBool()) -// writeCString("true", buf); -// else -// writeCString("false", buf); -// return; -// } -// if (element.isString()) -// { -// writeJSONString(element.getString(), buf, formatSettings()); -// return; -// } -// if (element.isArray()) -// { -// writeChar('[', buf); -// bool need_comma = false; -// for (auto value : element.getArray()) -// { -// if (std::exchange(need_comma, true)) -// writeChar(',', buf); -// traverse(value, buf); -// } -// writeChar(']', buf); -// return; -// } -// if (element.isObject()) -// { -// writeChar('{', buf); -// bool need_comma = false; -// for (auto [key, value] : element.getObject()) -// { -// if (std::exchange(need_comma, true)) -// writeChar(',', buf); -// writeJSONString(key, buf, formatSettings()); -// writeChar(':', buf); -// traverse(value, buf); -// } -// writeChar('}', buf); -// return; -// } -// if (element.isNull()) -// { -// writeCString("null", buf); -// return; -// } -// } -// -// static const FormatSettings & formatSettings() -// { -// static const FormatSettings the_instance = [] -// { -// FormatSettings settings; -// settings.json.escape_forward_slashes = false; -// return settings; -// }(); -// return the_instance; -// } -//}; -// -// -//template -//class JSONExtractArrayRawImpl -//{ -//public: -// using Element = typename JSONParser::Element; -// -// static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &) -// { -// return std::make_shared(std::make_shared()); -// } -// -// static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; } -// -// static bool insertResultToColumn(IColumn & dest, const Element & element, std::string_view) -// { -// if (!element.isArray()) -// return false; -// -// auto array = element.getArray(); -// ColumnArray & col_res = assert_cast(dest); -// -// for (auto value : array) -// JSONExtractRawImpl::insertResultToColumn(col_res.getData(), value, {}); -// -// col_res.getOffsets().push_back(col_res.getOffsets().back() + array.size()); -// return true; -// } -//}; -// -// -//template -//class JSONExtractKeysAndValuesRawImpl -//{ -//public: -// using Element = typename JSONParser::Element; -// -// static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &) -// { -// DataTypePtr string_type = std::make_unique(); -// DataTypePtr tuple_type = std::make_unique(DataTypes{string_type, string_type}); -// return std::make_unique(tuple_type); -// } -// -// static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; } -// -// bool insertResultToColumn(IColumn & dest, const Element & element, std::string_view) -// { -// if (!element.isObject()) -// return false; -// -// auto object = element.getObject(); -// -// auto & col_arr = assert_cast(dest); -// auto & col_tuple = assert_cast(col_arr.getData()); -// auto & col_key = assert_cast(col_tuple.getColumn(0)); -// auto & col_value = assert_cast(col_tuple.getColumn(1)); -// -// for (const auto & [key, value] : object) -// { -// col_key.insertData(key.data(), key.size()); -// JSONExtractRawImpl::insertResultToColumn(col_value, value, {}); -// } -// -// col_arr.getOffsets().push_back(col_arr.getOffsets().back() + object.size()); -// return true; -// } -//}; -// -//template -//class JSONExtractKeysImpl -//{ -//public: -// using Element = typename JSONParser::Element; -// -// static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &) -// { -// return std::make_unique(std::make_shared()); -// } -// -// static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; } -// -// bool insertResultToColumn(IColumn & dest, const Element & element, std::string_view) -// { -// if (!element.isObject()) -// return false; -// -// auto object = element.getObject(); -// -// ColumnArray & col_res = assert_cast(dest); -// auto & col_key = assert_cast(col_res.getData()); -// -// for (const auto & [key, value] : object) -// { -// col_key.insertData(key.data(), key.size()); -// } -// -// col_res.getOffsets().push_back(col_res.getOffsets().back() + object.size()); -// return true; -// } -//}; -// -//} From 63303dd79893ace08ce2ed4be6bfee422287d44b Mon Sep 17 00:00:00 2001 From: avogar Date: Wed, 3 Jul 2024 14:03:04 +0000 Subject: [PATCH 040/182] Fix style --- src/Formats/JSONExtractTree.cpp | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/Formats/JSONExtractTree.cpp b/src/Formats/JSONExtractTree.cpp index 6d019f96ba6..18437c16bc9 100644 --- a/src/Formats/JSONExtractTree.cpp +++ b/src/Formats/JSONExtractTree.cpp @@ -50,6 +50,11 @@ namespace DB { +namespace ErrorCodes +{ + extern const int ILLEGAL_TYPE_OF_ARGUMENT; +} + template void jsonElementToString(const typename JSONParser::Element & element, WriteBuffer & buf, const FormatSettings & format_settings) { @@ -207,7 +212,7 @@ namespace { template -String jsonElementToString(const typename JSONParser::Element & element, const FormatSettings & format_settings) +String jsonElementToString(const typename JSONParser::Element & element, const FormatSettings & format_settings) { WriteBufferFromOwnString buf; jsonElementToString(element, buf, format_settings); @@ -1440,7 +1445,7 @@ std::unique_ptr> buildJSONExtractTree(const Data case TypeIndex::Date:; return std::make_unique>(); case TypeIndex::Date32: - return std::make_unique>(); + return std::make_unique>(); case TypeIndex::DateTime: return std::make_unique>(assert_cast(*type)); case TypeIndex::DateTime64: From d03fcb5ff121203f9cd6bf729df98764593328fe Mon Sep 17 00:00:00 2001 From: avogar Date: Wed, 3 Jul 2024 14:23:38 +0000 Subject: [PATCH 041/182] Fix --- src/Formats/SchemaInferenceUtils.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Formats/SchemaInferenceUtils.cpp b/src/Formats/SchemaInferenceUtils.cpp index 6519d54a8c5..f2ad1dc6717 100644 --- a/src/Formats/SchemaInferenceUtils.cpp +++ b/src/Formats/SchemaInferenceUtils.cpp @@ -271,7 +271,7 @@ namespace { if (WhichDataType(type).isInt64()) { - bool is_negative = json_info->negative_integers.contains(type.get()); + bool is_negative = json_info && json_info->negative_integers.contains(type.get()); have_negative_integers |= is_negative; if (!is_negative) type = std::make_shared(); From 6530ae104d16ffbda51cf849b5f89e3d4080d2af Mon Sep 17 00:00:00 2001 From: avogar Date: Wed, 3 Jul 2024 15:23:01 +0000 Subject: [PATCH 042/182] Fix tests --- src/Formats/SchemaInferenceUtils.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Formats/SchemaInferenceUtils.cpp b/src/Formats/SchemaInferenceUtils.cpp index f2ad1dc6717..3c374ada9e6 100644 --- a/src/Formats/SchemaInferenceUtils.cpp +++ b/src/Formats/SchemaInferenceUtils.cpp @@ -296,7 +296,7 @@ namespace if (which.isInt64() || which.isUInt64()) { auto new_type = std::make_shared(); - if (json_info->numbers_parsed_from_json_strings.erase(type.get())) + if (json_info && json_info->numbers_parsed_from_json_strings.erase(type.get())) json_info->numbers_parsed_from_json_strings.insert(new_type.get()); type = new_type; } From a5adf31b9e4dfa041150fa263ab68f32cb47122c Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Wed, 3 Jul 2024 19:30:36 +0200 Subject: [PATCH 043/182] Fix special build --- src/Formats/JSONExtractTree.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Formats/JSONExtractTree.cpp b/src/Formats/JSONExtractTree.cpp index 18437c16bc9..b94981e7cb4 100644 --- a/src/Formats/JSONExtractTree.cpp +++ b/src/Formats/JSONExtractTree.cpp @@ -1465,9 +1465,9 @@ std::unique_ptr> buildJSONExtractTree(const Data case TypeIndex::LowCardinality: { /// To optimize inserting into LowCardinality we have special nodes for LowCardinality of numeric and string types. - auto lc_type = typeid_cast(type.get()); - auto dictionary_type = removeNullable(lc_type->getDictionaryType()); - bool is_nullable = lc_type->isLowCardinalityNullable(); + const auto & lc_type = assert_cast(*type)); + auto dictionary_type = removeNullable(lc_type.getDictionaryType()); + bool is_nullable = lc_type.isLowCardinalityNullable(); switch (dictionary_type->getTypeId()) { From 41b9216dd1d862b46ed72d50e899197e2fec9daa Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Thu, 4 Jul 2024 00:22:41 +0200 Subject: [PATCH 044/182] Fix build --- src/Formats/JSONExtractTree.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Formats/JSONExtractTree.cpp b/src/Formats/JSONExtractTree.cpp index b94981e7cb4..827f276311a 100644 --- a/src/Formats/JSONExtractTree.cpp +++ b/src/Formats/JSONExtractTree.cpp @@ -1465,7 +1465,7 @@ std::unique_ptr> buildJSONExtractTree(const Data case TypeIndex::LowCardinality: { /// To optimize inserting into LowCardinality we have special nodes for LowCardinality of numeric and string types. - const auto & lc_type = assert_cast(*type)); + const auto & lc_type = assert_cast(*type); auto dictionary_type = removeNullable(lc_type.getDictionaryType()); bool is_nullable = lc_type.isLowCardinalityNullable(); From e11b48c7b5ef1bb25369f8a687b99b16b8ab198d Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Wed, 3 Jul 2024 23:04:13 +0000 Subject: [PATCH 045/182] fix --- src/Analyzer/Resolve/QueryAnalyzer.cpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/Analyzer/Resolve/QueryAnalyzer.cpp b/src/Analyzer/Resolve/QueryAnalyzer.cpp index 9e4d3fead6c..fc7e56c281f 100644 --- a/src/Analyzer/Resolve/QueryAnalyzer.cpp +++ b/src/Analyzer/Resolve/QueryAnalyzer.cpp @@ -4458,10 +4458,8 @@ void QueryAnalyzer::initializeTableExpressionData(const QueryTreeNodePtr & table if (auto * scope_query_node = scope.scope_node->as()) { auto left_table_expression = extractLeftTableExpression(scope_query_node->getJoinTree()); - bool is_cte = (query_node && query_node->isCTE()) || (union_node && union_node->isCTE()); if (table_expression_node.get() == left_table_expression.get() && - scope.joins_count && - (scope.context->getSettingsRef().single_join_prefer_left_table || is_cte)) + scope.joins_count && scope.context->getSettingsRef().single_join_prefer_left_table) table_expression_data.should_qualify_columns = false; } From 3776fafc881bf4725bfbb356e6e81df66ad336b6 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Thu, 4 Jul 2024 13:44:44 +0000 Subject: [PATCH 046/182] Print stacktrace in case of about after logical error. --- src/Common/Exception.cpp | 9 ++++++--- src/Common/StackTrace.cpp | 2 +- src/Common/StackTrace.h | 2 +- 3 files changed, 8 insertions(+), 5 deletions(-) diff --git a/src/Common/Exception.cpp b/src/Common/Exception.cpp index 1f4b0aea8f2..181b4f1488e 100644 --- a/src/Common/Exception.cpp +++ b/src/Common/Exception.cpp @@ -38,9 +38,12 @@ namespace ErrorCodes extern const int CANNOT_MREMAP; } -void abortOnFailedAssertion(const String & description) +void abortOnFailedAssertion(const String & description, const Exception::FramePointers * trace = nullptr) { - LOG_FATAL(&Poco::Logger::root(), "Logical error: '{}'.", description); + auto & logger = Poco::Logger::root(); + LOG_FATAL(&logger, "Logical error: '{}'.", description); + if (trace) + LOG_FATAL(&logger, "Stack trace (when copying this message, always include the lines below):\n\n{}", StackTrace::toString(trace->data(), 0, trace->size())); abort(); } @@ -58,7 +61,7 @@ void handle_error_code(const std::string & msg, int code, bool remote, const Exc #ifdef ABORT_ON_LOGICAL_ERROR if (code == ErrorCodes::LOGICAL_ERROR) { - abortOnFailedAssertion(msg); + abortOnFailedAssertion(msg, &trace); } #endif diff --git a/src/Common/StackTrace.cpp b/src/Common/StackTrace.cpp index 239e957bdfe..34f6f0b7535 100644 --- a/src/Common/StackTrace.cpp +++ b/src/Common/StackTrace.cpp @@ -545,7 +545,7 @@ std::string StackTrace::toString() const return toStringCached(frame_pointers, offset, size); } -std::string StackTrace::toString(void ** frame_pointers_raw, size_t offset, size_t size) +std::string StackTrace::toString(void * const * frame_pointers_raw, size_t offset, size_t size) { __msan_unpoison(frame_pointers_raw, size * sizeof(*frame_pointers_raw)); diff --git a/src/Common/StackTrace.h b/src/Common/StackTrace.h index 4ce9a9281f3..2078828f3d7 100644 --- a/src/Common/StackTrace.h +++ b/src/Common/StackTrace.h @@ -59,7 +59,7 @@ public: const FramePointers & getFramePointers() const { return frame_pointers; } std::string toString() const; - static std::string toString(void ** frame_pointers, size_t offset, size_t size); + static std::string toString(void * const * frame_pointers, size_t offset, size_t size); static void dropCache(); /// @param fatal - if true, will process inline frames (slower) From 4271b2b6e3d6940603ef0d1836fbabf42b092d65 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Thu, 4 Jul 2024 16:29:32 +0000 Subject: [PATCH 047/182] Add noreturn/ --- src/Common/Exception.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Common/Exception.cpp b/src/Common/Exception.cpp index 181b4f1488e..07bda6a75be 100644 --- a/src/Common/Exception.cpp +++ b/src/Common/Exception.cpp @@ -38,7 +38,7 @@ namespace ErrorCodes extern const int CANNOT_MREMAP; } -void abortOnFailedAssertion(const String & description, const Exception::FramePointers * trace = nullptr) +[[noreturn]] void abortOnFailedAssertion(const String & description, const Exception::FramePointers * trace = nullptr) { auto & logger = Poco::Logger::root(); LOG_FATAL(&logger, "Logical error: '{}'.", description); From f556f2cd9529acfdf796df91c20abec0ce405a95 Mon Sep 17 00:00:00 2001 From: avogar Date: Thu, 4 Jul 2024 18:28:22 +0000 Subject: [PATCH 048/182] Try to fix special build --- src/Formats/JSONExtractTree.cpp | 1 + src/Functions/FunctionsJSON.cpp | 13 ++----------- 2 files changed, 3 insertions(+), 11 deletions(-) diff --git a/src/Formats/JSONExtractTree.cpp b/src/Formats/JSONExtractTree.cpp index 827f276311a..8fe472930d3 100644 --- a/src/Formats/JSONExtractTree.cpp +++ b/src/Formats/JSONExtractTree.cpp @@ -1558,6 +1558,7 @@ template std::unique_ptr> buildJSONExtractTr #if USE_RAPIDJSON template void jsonElementToString(const RapidJSONParser::Element & element, WriteBuffer & buf, const FormatSettings & format_settings); template std::unique_ptr> buildJSONExtractTree(const DataTypePtr & type, const char * source_for_exception_message); +template bool tryGetNumericValueFromJSONElement(Float64 & value, const RapidJSONParser::Element & element, bool convert_bool_to_integer, String & error); #else template void jsonElementToString(const DummyJSONParser::Element & element, WriteBuffer & buf, const FormatSettings & format_settings); template std::unique_ptr> buildJSONExtractTree(const DataTypePtr & type, const char * source_for_exception_message); diff --git a/src/Functions/FunctionsJSON.cpp b/src/Functions/FunctionsJSON.cpp index c6af0674db7..ca233becb63 100644 --- a/src/Functions/FunctionsJSON.cpp +++ b/src/Functions/FunctionsJSON.cpp @@ -736,17 +736,8 @@ public: NumberType value; tryGetNumericValueFromJSONElement(value, element, convert_bool_to_integer, error); - - if (dest.getDataType() == TypeIndex::LowCardinality) - { - ColumnLowCardinality & col_low = assert_cast(dest); - col_low.insertData(reinterpret_cast(&value), sizeof(value)); - } - else - { - auto & col_vec = assert_cast &>(dest); - col_vec.insertValue(value); - } + auto & col_vec = assert_cast &>(dest); + col_vec.insertValue(value); return true; } }; From 64ef36dab362094e0bfa4a32e09b830502eb2c56 Mon Sep 17 00:00:00 2001 From: Konstantin Morozov Date: Fri, 5 Jul 2024 11:19:06 +0000 Subject: [PATCH 049/182] fix deadlock --- src/Databases/DatabaseAtomic.cpp | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/src/Databases/DatabaseAtomic.cpp b/src/Databases/DatabaseAtomic.cpp index b30b05bb7a7..a48eb2abce6 100644 --- a/src/Databases/DatabaseAtomic.cpp +++ b/src/Databases/DatabaseAtomic.cpp @@ -106,12 +106,17 @@ void DatabaseAtomic::attachTable(ContextPtr /* context_ */, const String & name, StoragePtr DatabaseAtomic::detachTable(ContextPtr /* context */, const String & name) { + // it is important to call destructures not_in_use without + // blocking mutex for avoid potential deadlock. DetachedTables not_in_use; - std::lock_guard lock(mutex); - auto table = DatabaseOrdinary::detachTableUnlocked(name); - table_name_to_path.erase(name); - detached_tables.emplace(table->getStorageID().uuid, table); - not_in_use = cleanupDetachedTables(); + StoragePtr table; + { + std::lock_guard lock(mutex); + table = DatabaseOrdinary::detachTableUnlocked(name); + table_name_to_path.erase(name); + detached_tables.emplace(table->getStorageID().uuid, table); + not_in_use = cleanupDetachedTables(); + } if (!not_in_use.empty()) { From 05a027822be6acacc99d8eeccc124a8304edc797 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Fri, 5 Jul 2024 15:57:17 +0200 Subject: [PATCH 050/182] Update ZooKeeperImpl.cpp --- src/Common/ZooKeeper/ZooKeeperImpl.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/Common/ZooKeeper/ZooKeeperImpl.cpp b/src/Common/ZooKeeper/ZooKeeperImpl.cpp index 8653af51308..2728f953bea 100644 --- a/src/Common/ZooKeeper/ZooKeeperImpl.cpp +++ b/src/Common/ZooKeeper/ZooKeeperImpl.cpp @@ -996,6 +996,10 @@ void ZooKeeper::receiveEvent() if (request_info.callback) request_info.callback(*response); + + /// Finalize current session if we receive a hardware error from ZooKeeper + if (err != Error::ZOK && isHardwareError(err)) + finalize(/*error_send*/ false, /*error_receive*/ true, fmt::format("Hardware error: {}", err)); } From 8fded210bc7320d144bdccfa05feabf0b07ce67e Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Fri, 5 Jul 2024 14:49:19 +0000 Subject: [PATCH 051/182] revert to one join --- src/Analyzer/Resolve/IdentifierResolver.cpp | 2 +- src/Analyzer/Resolve/QueryAnalyzer.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Analyzer/Resolve/IdentifierResolver.cpp b/src/Analyzer/Resolve/IdentifierResolver.cpp index 59cf82989e7..692a31b66ba 100644 --- a/src/Analyzer/Resolve/IdentifierResolver.cpp +++ b/src/Analyzer/Resolve/IdentifierResolver.cpp @@ -1129,7 +1129,7 @@ QueryTreeNodePtr IdentifierResolver::tryResolveIdentifierFromJoin(const Identifi resolved_identifier = left_resolved_identifier; } } - else if (scope.joins_count && scope.context->getSettingsRef().single_join_prefer_left_table) + else if (scope.joins_count == 1 && scope.context->getSettingsRef().single_join_prefer_left_table) { resolved_side = JoinTableSide::Left; resolved_identifier = left_resolved_identifier; diff --git a/src/Analyzer/Resolve/QueryAnalyzer.cpp b/src/Analyzer/Resolve/QueryAnalyzer.cpp index fc7e56c281f..134b2759c93 100644 --- a/src/Analyzer/Resolve/QueryAnalyzer.cpp +++ b/src/Analyzer/Resolve/QueryAnalyzer.cpp @@ -4459,7 +4459,7 @@ void QueryAnalyzer::initializeTableExpressionData(const QueryTreeNodePtr & table { auto left_table_expression = extractLeftTableExpression(scope_query_node->getJoinTree()); if (table_expression_node.get() == left_table_expression.get() && - scope.joins_count && scope.context->getSettingsRef().single_join_prefer_left_table) + scope.joins_count == 1 && scope.context->getSettingsRef().single_join_prefer_left_table) table_expression_data.should_qualify_columns = false; } From 1f0c6061556cd64225a58933177df0228b4c185a Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Fri, 5 Jul 2024 18:48:01 +0000 Subject: [PATCH 052/182] fix tests --- .../02378_analyzer_projection_names.reference | 4 ++-- ...056_analyzer_double_subquery_alias.reference | 1 - .../03056_analyzer_double_subquery_alias.sql | 17 ----------------- 3 files changed, 2 insertions(+), 20 deletions(-) delete mode 100644 tests/queries/0_stateless/03056_analyzer_double_subquery_alias.reference delete mode 100644 tests/queries/0_stateless/03056_analyzer_double_subquery_alias.sql diff --git a/tests/queries/0_stateless/02378_analyzer_projection_names.reference b/tests/queries/0_stateless/02378_analyzer_projection_names.reference index 170f3e9e1ad..532414f117c 100644 --- a/tests/queries/0_stateless/02378_analyzer_projection_names.reference +++ b/tests/queries/0_stateless/02378_analyzer_projection_names.reference @@ -447,13 +447,13 @@ SELECT '--'; -- DESCRIBE (WITH test_table_in_cte_1 AS (SELECT 1 AS c1), test_table_in_cte_2 AS (SELECT 1 AS c1) SELECT * FROM test_table_in_cte_1 INNER JOIN test_table_in_cte_2 as test_table_in_cte_2 ON test_table_in_cte_1.c1 = test_table_in_cte_2.c1); -c1 UInt8 +test_table_in_cte_1.c1 UInt8 test_table_in_cte_2.c1 UInt8 SELECT '--'; -- DESCRIBE (WITH test_table_in_cte_1 AS (SELECT 1 AS c1), test_table_in_cte_2 AS (SELECT 1 AS c1 UNION ALL SELECT 1 AS c1) SELECT * FROM test_table_in_cte_1 INNER JOIN test_table_in_cte_2 as test_table_in_cte_2 ON test_table_in_cte_1.c1 = test_table_in_cte_2.c1); -c1 UInt8 +test_table_in_cte_1.c1 UInt8 test_table_in_cte_2.c1 UInt8 SELECT 'Joins'; Joins diff --git a/tests/queries/0_stateless/03056_analyzer_double_subquery_alias.reference b/tests/queries/0_stateless/03056_analyzer_double_subquery_alias.reference deleted file mode 100644 index 72749c905a3..00000000000 --- a/tests/queries/0_stateless/03056_analyzer_double_subquery_alias.reference +++ /dev/null @@ -1 +0,0 @@ -1 1 1 diff --git a/tests/queries/0_stateless/03056_analyzer_double_subquery_alias.sql b/tests/queries/0_stateless/03056_analyzer_double_subquery_alias.sql deleted file mode 100644 index de471c1a091..00000000000 --- a/tests/queries/0_stateless/03056_analyzer_double_subquery_alias.sql +++ /dev/null @@ -1,17 +0,0 @@ --- https://github.com/ClickHouse/ClickHouse/issues/22627 -SET allow_experimental_analyzer=1; -WITH - x AS - ( - SELECT 1 AS a - ), - xx AS - ( - SELECT * - FROM x - , x AS x1 - , x AS x2 - ) -SELECT * -FROM xx -WHERE a = 1; From f00a8a42a0c26f3805ddf10e09b11fd0e4d5325e Mon Sep 17 00:00:00 2001 From: Blargian Date: Fri, 5 Jul 2024 20:55:46 +0200 Subject: [PATCH 053/182] update partitionId --- .../functions/other-functions.md | 51 +++++++++++++++++++ .../aspell-ignore/en/aspell-dict.txt | 1 + 2 files changed, 52 insertions(+) diff --git a/docs/en/sql-reference/functions/other-functions.md b/docs/en/sql-reference/functions/other-functions.md index 58fc1eba02e..8d8b54b64f5 100644 --- a/docs/en/sql-reference/functions/other-functions.md +++ b/docs/en/sql-reference/functions/other-functions.md @@ -2984,6 +2984,57 @@ Result: └─────────┘ ``` +## partitionId + +Returns computed [partition](../../engines/table-engines/mergetree-family/custom-partitioning-key.md) IDs of its arguments. + +:::note +This function is slow and should not be called for large amount of rows. +::: + +**Syntax** + +```sql +partitionId(x[, y, ...]); +``` + +**Arguments** + +- `x` — Column for which to return the partition ID. +- `y, ...` — Remaining N columns for which to return the partition ID (optional). + +**Return Type** + +- Partition ID that the row belongs to. [String](../data-types/string.md). + +**Example** + +Query: + +```sql +DROP TABLE IF EXISTS mt; +CREATE TABLE mt (n UInt64) ENGINE=MergeTree ORDER BY n PARTITION BY n % 2; +INSERT INTO mt SELECT * FROM numbers(10); +SELECT partitionId(*) FROM mt; +``` +Result: + +```response + ┌─partitionId(n)─┐ + 1. │ 0 │ + 2. │ 2 │ + 3. │ 4 │ + 4. │ 6 │ + 5. │ 8 │ + 6. │ 10 │ + 7. │ 12 │ + 8. │ 14 │ + 9. │ 16 │ +10. │ 18 │ + └────────────────┘ +``` + + ## shardNum Returns the index of a shard which processes a part of data in a distributed query. Indices are started from `1`. diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt index 229eccefa48..d6406e2bb76 100644 --- a/utils/check-style/aspell-ignore/en/aspell-dict.txt +++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt @@ -2184,6 +2184,7 @@ parseReadableSizeOrZero parseTimeDelta parseable parsers +partitionId pathFull pclmulqdq pcre From e2885b1cfa976e9fe072e6453d8142364c013aa9 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 6 Jul 2024 02:16:51 +0200 Subject: [PATCH 054/182] Add a test --- .../03201_local_named_collections.reference | 1 + .../03201_local_named_collections.sh | 20 +++++++++++++++++++ 2 files changed, 21 insertions(+) create mode 100644 tests/queries/0_stateless/03201_local_named_collections.reference create mode 100755 tests/queries/0_stateless/03201_local_named_collections.sh diff --git a/tests/queries/0_stateless/03201_local_named_collections.reference b/tests/queries/0_stateless/03201_local_named_collections.reference new file mode 100644 index 00000000000..af5626b4a11 --- /dev/null +++ b/tests/queries/0_stateless/03201_local_named_collections.reference @@ -0,0 +1 @@ +Hello, world! diff --git a/tests/queries/0_stateless/03201_local_named_collections.sh b/tests/queries/0_stateless/03201_local_named_collections.sh new file mode 100755 index 00000000000..54ca76a52d9 --- /dev/null +++ b/tests/queries/0_stateless/03201_local_named_collections.sh @@ -0,0 +1,20 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +${CLICKHOUSE_CLIENT} --multiquery " +DROP TABLE IF EXISTS test; +CREATE TABLE test (s String) ORDER BY (); +INSERT INTO test VALUES ('Hello, world!'); +" + +${CLICKHOUSE_LOCAL} --multiquery " +CREATE NAMED COLLECTION mydb AS host = '${CLICKHOUSE_HOST}', port = ${CLICKHOUSE_PORT_TCP}, user = 'default', password = '', db = '${CLICKHOUSE_DATABASE}'; +SELECT * FROM remote(mydb, table = 'test'); +" + +${CLICKHOUSE_CLIENT} --multiquery " +DROP TABLE test; +" From 02ff6a70737b77a7711a9427eaa487cf705f7378 Mon Sep 17 00:00:00 2001 From: Blargian Date: Sat, 6 Jul 2024 11:36:13 +0200 Subject: [PATCH 055/182] Update example for partitionId --- .../functions/other-functions.md | 29 +++++++++---------- 1 file changed, 14 insertions(+), 15 deletions(-) diff --git a/docs/en/sql-reference/functions/other-functions.md b/docs/en/sql-reference/functions/other-functions.md index 8d8b54b64f5..b1424c8b19a 100644 --- a/docs/en/sql-reference/functions/other-functions.md +++ b/docs/en/sql-reference/functions/other-functions.md @@ -3013,25 +3013,24 @@ Query: ```sql DROP TABLE IF EXISTS mt; -CREATE TABLE mt (n UInt64) ENGINE=MergeTree ORDER BY n PARTITION BY n % 2; -INSERT INTO mt SELECT * FROM numbers(10); -SELECT partitionId(*) FROM mt; +CREATE TABLE mt (i int, j int) ENGINE=MergeTree PARTITION BY i ORDER BY j SETTINGS index_granularity = 1; +INSERT INTO mt VALUES (1, 1), (1, 2), (1, 3), (2, 4), (2, 5), (2, 6); +SELECT * FROM mt WHERE _partition_id = partitionId(1); +SELECT * FROM mt WHERE _partition_id = partitionId(2); ``` Result: ```response - ┌─partitionId(n)─┐ - 1. │ 0 │ - 2. │ 2 │ - 3. │ 4 │ - 4. │ 6 │ - 5. │ 8 │ - 6. │ 10 │ - 7. │ 12 │ - 8. │ 14 │ - 9. │ 16 │ -10. │ 18 │ - └────────────────┘ + ┌─i─┬─j─┐ +1. │ 1 │ 1 │ +2. │ 1 │ 2 │ +3. │ 1 │ 3 │ + └───┴───┘ + ┌─i─┬─j─┐ +1. │ 2 │ 4 │ +2. │ 2 │ 5 │ +3. │ 2 │ 6 │ + └───┴───┘ ``` From f1d373bb1c95a1b852e88b7890b44789e7ae4026 Mon Sep 17 00:00:00 2001 From: Blargian Date: Sat, 6 Jul 2024 11:38:32 +0200 Subject: [PATCH 056/182] update example formatting --- docs/en/sql-reference/functions/other-functions.md | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/docs/en/sql-reference/functions/other-functions.md b/docs/en/sql-reference/functions/other-functions.md index b1424c8b19a..c0225f009b7 100644 --- a/docs/en/sql-reference/functions/other-functions.md +++ b/docs/en/sql-reference/functions/other-functions.md @@ -3013,8 +3013,18 @@ Query: ```sql DROP TABLE IF EXISTS mt; -CREATE TABLE mt (i int, j int) ENGINE=MergeTree PARTITION BY i ORDER BY j SETTINGS index_granularity = 1; +CREATE TABLE mt +( + `i` int, + `j` int +) + ENGINE = MergeTree +PARTITION BY i +ORDER BY j +SETTINGS index_granularity = 1; + INSERT INTO mt VALUES (1, 1), (1, 2), (1, 3), (2, 4), (2, 5), (2, 6); + SELECT * FROM mt WHERE _partition_id = partitionId(1); SELECT * FROM mt WHERE _partition_id = partitionId(2); ``` From b9c662e1da504c4964b67d6408ad06fe9bcc0f95 Mon Sep 17 00:00:00 2001 From: Blargian Date: Sat, 6 Jul 2024 11:39:11 +0200 Subject: [PATCH 057/182] update example formatting --- docs/en/sql-reference/functions/other-functions.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/sql-reference/functions/other-functions.md b/docs/en/sql-reference/functions/other-functions.md index c0225f009b7..86a03ed6df8 100644 --- a/docs/en/sql-reference/functions/other-functions.md +++ b/docs/en/sql-reference/functions/other-functions.md @@ -3018,7 +3018,7 @@ CREATE TABLE mt `i` int, `j` int ) - ENGINE = MergeTree +ENGINE = MergeTree PARTITION BY i ORDER BY j SETTINGS index_granularity = 1; From 17e089c490efbf1ac4224fa8cf0b74bb3f50739a Mon Sep 17 00:00:00 2001 From: zhongyuankai <872237106@qq.com> Date: Sun, 7 Jul 2024 18:22:55 +0800 Subject: [PATCH 058/182] Refactor `OptimizeIfWithConstantConditionVisitor` using `InDepthNodeVisitor` --- ...OptimizeIfWithConstantConditionVisitor.cpp | 93 ++++++++----------- .../OptimizeIfWithConstantConditionVisitor.h | 17 ++-- src/Interpreters/TreeOptimizer.cpp | 3 +- 3 files changed, 52 insertions(+), 61 deletions(-) diff --git a/src/Interpreters/OptimizeIfWithConstantConditionVisitor.cpp b/src/Interpreters/OptimizeIfWithConstantConditionVisitor.cpp index 20451fb20ad..48c9988b6fc 100644 --- a/src/Interpreters/OptimizeIfWithConstantConditionVisitor.cpp +++ b/src/Interpreters/OptimizeIfWithConstantConditionVisitor.cpp @@ -73,66 +73,55 @@ static bool tryExtractConstValueFromCondition(const ASTPtr & condition, bool & v return false; } -void OptimizeIfWithConstantConditionVisitor::visit(ASTPtr & current_ast) +void OptimizeIfWithConstantConditionVisitorData::visit(ASTFunction & function_node, ASTPtr & ast) { - if (!current_ast) - return; - checkStackSize(); - for (ASTPtr & child : current_ast->children) + if (function_node.name != "if") + return; + + if (!function_node.arguments) + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Wrong number of arguments for function 'if' (0 instead of 3)"); + + if (function_node.arguments->children.size() != 3) + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Wrong number of arguments for function 'if' ({} instead of 3)", + function_node.arguments->children.size()); + + const auto * args = function_node.arguments->as(); + + ASTPtr condition_expr = args->children[0]; + ASTPtr then_expr = args->children[1]; + ASTPtr else_expr = args->children[2]; + + bool condition; + if (tryExtractConstValueFromCondition(condition_expr, condition)) { - auto * function_node = child->as(); - if (!function_node || function_node->name != "if") + ASTPtr replace_ast = condition ? then_expr : else_expr; + ASTPtr child_copy = ast; + String replace_alias = replace_ast->tryGetAlias(); + String if_alias = ast->tryGetAlias(); + + if (replace_alias.empty()) { - visit(child); - continue; + replace_ast->setAlias(if_alias); + ast = replace_ast; + } + else + { + /// Only copy of one node is required here. + /// But IAST has only method for deep copy of subtree. + /// This can be a reason of performance degradation in case of deep queries. + ASTPtr replace_ast_deep_copy = replace_ast->clone(); + replace_ast_deep_copy->setAlias(if_alias); + ast = replace_ast_deep_copy; } - if (!function_node->arguments) - throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Wrong number of arguments for function 'if' (0 instead of 3)"); - - if (function_node->arguments->children.size() != 3) - throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, - "Wrong number of arguments for function 'if' ({} instead of 3)", - function_node->arguments->children.size()); - - visit(function_node->arguments); - const auto * args = function_node->arguments->as(); - - ASTPtr condition_expr = args->children[0]; - ASTPtr then_expr = args->children[1]; - ASTPtr else_expr = args->children[2]; - - bool condition; - if (tryExtractConstValueFromCondition(condition_expr, condition)) + if (!if_alias.empty()) { - ASTPtr replace_ast = condition ? then_expr : else_expr; - ASTPtr child_copy = child; - String replace_alias = replace_ast->tryGetAlias(); - String if_alias = child->tryGetAlias(); - - if (replace_alias.empty()) - { - replace_ast->setAlias(if_alias); - child = replace_ast; - } - else - { - /// Only copy of one node is required here. - /// But IAST has only method for deep copy of subtree. - /// This can be a reason of performance degradation in case of deep queries. - ASTPtr replace_ast_deep_copy = replace_ast->clone(); - replace_ast_deep_copy->setAlias(if_alias); - child = replace_ast_deep_copy; - } - - if (!if_alias.empty()) - { - auto alias_it = aliases.find(if_alias); - if (alias_it != aliases.end() && alias_it->second.get() == child_copy.get()) - alias_it->second = child; - } + auto alias_it = aliases.find(if_alias); + if (alias_it != aliases.end() && alias_it->second.get() == child_copy.get()) + alias_it->second = ast; } } } diff --git a/src/Interpreters/OptimizeIfWithConstantConditionVisitor.h b/src/Interpreters/OptimizeIfWithConstantConditionVisitor.h index ad98f92bafd..3b46f90f07c 100644 --- a/src/Interpreters/OptimizeIfWithConstantConditionVisitor.h +++ b/src/Interpreters/OptimizeIfWithConstantConditionVisitor.h @@ -1,23 +1,24 @@ #pragma once #include +#include namespace DB { - -/// It removes Function_if node from AST if condition is constant. -/// TODO: rewrite with InDepthNodeVisitor -class OptimizeIfWithConstantConditionVisitor +struct OptimizeIfWithConstantConditionVisitorData { -public: - explicit OptimizeIfWithConstantConditionVisitor(Aliases & aliases_) + using TypeToVisit = ASTFunction; + + explicit OptimizeIfWithConstantConditionVisitorData(Aliases & aliases_) : aliases(aliases_) {} - void visit(ASTPtr & ast); - + void visit(ASTFunction & function_node, ASTPtr & ast); private: Aliases & aliases; }; +/// It removes Function_if node from AST if condition is constant. +using OptimizeIfWithConstantConditionVisitor = InDepthNodeVisitor, false>; + } diff --git a/src/Interpreters/TreeOptimizer.cpp b/src/Interpreters/TreeOptimizer.cpp index b88d75cd5a2..b872eb94fde 100644 --- a/src/Interpreters/TreeOptimizer.cpp +++ b/src/Interpreters/TreeOptimizer.cpp @@ -577,7 +577,8 @@ void TreeOptimizer::optimizeIf(ASTPtr & query, Aliases & aliases, bool if_chain_ optimizeMultiIfToIf(query); /// Optimize if with constant condition after constants was substituted instead of scalar subqueries. - OptimizeIfWithConstantConditionVisitor(aliases).visit(query); + OptimizeIfWithConstantConditionVisitorData visitor_data(aliases); + OptimizeIfWithConstantConditionVisitor(visitor_data).visit(query); if (if_chain_to_multiif) OptimizeIfChainsVisitor().visit(query); From 5f28c025ce39b1f9e9ea2e48b3cee78ad9a432be Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 7 Jul 2024 12:51:29 +0200 Subject: [PATCH 059/182] Fix test (connections use coroutines) --- tests/queries/0_stateless/03201_local_named_collections.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/03201_local_named_collections.sh b/tests/queries/0_stateless/03201_local_named_collections.sh index 54ca76a52d9..2054a09df06 100755 --- a/tests/queries/0_stateless/03201_local_named_collections.sh +++ b/tests/queries/0_stateless/03201_local_named_collections.sh @@ -13,7 +13,7 @@ INSERT INTO test VALUES ('Hello, world!'); ${CLICKHOUSE_LOCAL} --multiquery " CREATE NAMED COLLECTION mydb AS host = '${CLICKHOUSE_HOST}', port = ${CLICKHOUSE_PORT_TCP}, user = 'default', password = '', db = '${CLICKHOUSE_DATABASE}'; SELECT * FROM remote(mydb, table = 'test'); -" +" | grep --text -F -v "ASan doesn't fully support makecontext/swapcontext functions" ${CLICKHOUSE_CLIENT} --multiquery " DROP TABLE test; From 97c6cbec46f5c93c2c6199576592a9262aff56f0 Mon Sep 17 00:00:00 2001 From: Blargian Date: Sun, 7 Jul 2024 22:38:43 +0200 Subject: [PATCH 060/182] ad individual window function pages --- .../window-functions/dense_rank.md | 73 ++++++++++++++++++ .../sql-reference/window-functions/index.md | 35 ++++----- .../en/sql-reference/window-functions/rank.md | 74 +++++++++++++++++++ .../window-functions/row_number.md | 0 4 files changed, 165 insertions(+), 17 deletions(-) create mode 100644 docs/en/sql-reference/window-functions/dense_rank.md create mode 100644 docs/en/sql-reference/window-functions/rank.md create mode 100644 docs/en/sql-reference/window-functions/row_number.md diff --git a/docs/en/sql-reference/window-functions/dense_rank.md b/docs/en/sql-reference/window-functions/dense_rank.md new file mode 100644 index 00000000000..17ab894707e --- /dev/null +++ b/docs/en/sql-reference/window-functions/dense_rank.md @@ -0,0 +1,73 @@ +--- +slug: /en/sql-reference/window-functions/dense_rank +sidebar_label: dense_rank +sidebar_position: 2 +--- + +# dense_rank + +This window function ranks the current row within its partition without gaps. In other words, if the value of any new row encountered is equal to the value of one of the previous rows then it will receive the next successive rank without any gaps in ranking. + +The [rank](./rank.md) function provides the same behaviour, but with gaps in ranking. + +**Syntax** + +```sql +dense_rank (column_name) + OVER ([[PARTITION BY grouping_column] [ORDER BY sorting_column] + [ROWS or RANGE expression_to_bound_rows_withing_the_group]] | [window_name]) +FROM table_name +WINDOW window_name as ([[PARTITION BY grouping_column] [ORDER BY sorting_column]) +``` + +For more detail on window function syntax see: [Window Functions - Syntax](./index.md/#syntax). + +**Returned value** + +- A number for the current row within its partition, without gaps in ranking. [UInt64](../data-types/int-uint.md). + +**Example** + +The following example is based on the example provided in the video instructional [Ranking window functions in ClickHouse](https://youtu.be/Yku9mmBYm_4?si=XIMu1jpYucCQEoXA). + +Query: + +```sql +CREATE TABLE salaries +( + `team` String, + `player` String, + `salary` UInt32, + `position` String +) +Engine = Memory; + +INSERT INTO salaries FORMAT Values + ('Port Elizabeth Barbarians', 'Gary Chen', 195000, 'F'), + ('New Coreystad Archdukes', 'Charles Juarez', 190000, 'F'), + ('Port Elizabeth Barbarians', 'Michael Stanley', 150000, 'D'), + ('New Coreystad Archdukes', 'Scott Harrison', 150000, 'D'), + ('Port Elizabeth Barbarians', 'Robert George', 195000, 'M'), + ('South Hampton Seagulls', 'Douglas Benson', 150000, 'M'), + ('South Hampton Seagulls', 'James Henderson', 140000, 'M'); +``` + +```sql +SELECT player, salary, + dense_rank() OVER (ORDER BY salary DESC) AS dense_rank +FROM salaries; +``` + +Result: + +```response + ┌─player──────────┬─salary─┬─dense_rank─┐ +1. │ Gary Chen │ 195000 │ 1 │ +2. │ Robert George │ 195000 │ 1 │ +3. │ Charles Juarez │ 190000 │ 2 │ +4. │ Michael Stanley │ 150000 │ 3 │ +5. │ Douglas Benson │ 150000 │ 3 │ +6. │ Scott Harrison │ 150000 │ 3 │ +7. │ James Henderson │ 140000 │ 4 │ + └─────────────────┴────────┴────────────┘ +``` \ No newline at end of file diff --git a/docs/en/sql-reference/window-functions/index.md b/docs/en/sql-reference/window-functions/index.md index 3a8afd10359..a0246af610f 100644 --- a/docs/en/sql-reference/window-functions/index.md +++ b/docs/en/sql-reference/window-functions/index.md @@ -1,10 +1,11 @@ --- slug: /en/sql-reference/window-functions/ -sidebar_position: 62 sidebar_label: Window Functions -title: Window Functions +sidebar_position: 1 --- +# Window Functions + Windows functions let you perform calculations across a set of rows that are related to the current row. Some of the calculations that you can do are similar to those that can be done with an aggregate function, but a window function doesn't cause rows to be grouped into a single output - the individual rows are still returned. @@ -12,19 +13,19 @@ Some of the calculations that you can do are similar to those that can be done w ClickHouse supports the standard grammar for defining windows and window functions. The table below indicates whether a feature is currently supported. -| Feature | Supported? | -|------------------------------------------------------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Feature | Supported? | +|--------------------------------------------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| | ad hoc window specification (`count(*) over (partition by id order by time desc)`) | ✅ | -| expressions involving window functions, e.g. `(count(*) over ()) / 2)` | ✅ | -| `WINDOW` clause (`select ... from table window w as (partition by id)`) | ✅ | -| `ROWS` frame | ✅ | -| `RANGE` frame | ✅ (the default) | -| `INTERVAL` syntax for `DateTime` `RANGE OFFSET` frame | ❌ (specify the number of seconds instead (`RANGE` works with any numeric type).) | -| `GROUPS` frame | ❌ | -| Calculating aggregate functions over a frame (`sum(value) over (order by time)`) | ✅ (All aggregate functions are supported) | -| `rank()`, `dense_rank()`, `row_number()` | ✅ | -| `lag/lead(value, offset)` | ❌
You can use one of the following workarounds:
1) `any(value) over (.... rows between preceding and preceding)`, or `following` for `lead`
2) `lagInFrame/leadInFrame`, which are analogous, but respect the window frame. To get behavior identical to `lag/lead`, use `rows between unbounded preceding and unbounded following` | -| ntile(buckets) | ✅
Specify window like, (partition by x order by y rows between unbounded preceding and unrounded following). | +| expressions involving window functions, e.g. `(count(*) over ()) / 2)` | ✅ | +| `WINDOW` clause (`select ... from table window w as (partition by id)`) | ✅ | +| `ROWS` frame | ✅ | +| `RANGE` frame | ✅ (the default) | +| `INTERVAL` syntax for `DateTime` `RANGE OFFSET` frame | ❌ (specify the number of seconds instead (`RANGE` works with any numeric type).) | +| `GROUPS` frame | ❌ | +| Calculating aggregate functions over a frame (`sum(value) over (order by time)`) | ✅ (All aggregate functions are supported) | +| `rank()`, `dense_rank()`, `row_number()` | ✅ | +| `lag/lead(value, offset)` | ❌
You can use one of the following workarounds:
1) `any(value) over (.... rows between preceding and preceding)`, or `following` for `lead`
2) `lagInFrame/leadInFrame`, which are analogous, but respect the window frame. To get behavior identical to `lag/lead`, use `rows between unbounded preceding and unbounded following` | +| ntile(buckets) | ✅
Specify window like, (partition by x order by y rows between unbounded preceding and unrounded following). | ## ClickHouse-specific Window Functions @@ -74,12 +75,12 @@ WINDOW window_name as ([[PARTITION BY grouping_column] [ORDER BY sorting_column] These functions can be used only as a window function. -- `row_number()` - Number the current row within its partition starting from 1. +- [`row_number()`](./row_number.md) - Number the current row within its partition starting from 1. - `first_value(x)` - Return the first non-NULL value evaluated within its ordered frame. - `last_value(x)` - Return the last non-NULL value evaluated within its ordered frame. - `nth_value(x, offset)` - Return the first non-NULL value evaluated against the nth row (offset) in its ordered frame. -- `rank()` - Rank the current row within its partition with gaps. -- `dense_rank()` - Rank the current row within its partition without gaps. +- [`rank()`](./rank.md) - Rank the current row within its partition with gaps. +- [`dense_rank()`](./dense_rank.md) - Rank the current row within its partition without gaps. - `lagInFrame(x)` - Return a value evaluated at the row that is at a specified physical offset row before the current row within the ordered frame. - `leadInFrame(x)` - Return a value evaluated at the row that is offset rows after the current row within the ordered frame. diff --git a/docs/en/sql-reference/window-functions/rank.md b/docs/en/sql-reference/window-functions/rank.md new file mode 100644 index 00000000000..17db889ef92 --- /dev/null +++ b/docs/en/sql-reference/window-functions/rank.md @@ -0,0 +1,74 @@ +--- +slug: /en/sql-reference/window-functions/rank +sidebar_label: rank +sidebar_position: 3 +--- + +# rank + +This window function ranks the current row within its partition with gaps. In other words, if the value of any row it encounters is equal to the value of a previous row then it will receive the same rank as that previous row. +The rank of the next row is then equal to the rank of the previous row plus a gap equal to the number of times the previous rank was given. + +The [dense_rank](./dense_rank.md) function provides the same behaviour but without gaps in ranking. + +**Syntax** + +```sql +rank (column_name) + OVER ([[PARTITION BY grouping_column] [ORDER BY sorting_column] + [ROWS or RANGE expression_to_bound_rows_withing_the_group]] | [window_name]) +FROM table_name +WINDOW window_name as ([[PARTITION BY grouping_column] [ORDER BY sorting_column]) +``` + +For more detail on window function syntax see: [Window Functions - Syntax](./index.md/#syntax). + +**Returned value** + +- A number for the current row within its partition, including gaps. [UInt64](../data-types/int-uint.md). + +**Example** + +The following example is based on the example provided in the video instructional [Ranking window functions in ClickHouse](https://youtu.be/Yku9mmBYm_4?si=XIMu1jpYucCQEoXA). + +Query: + +```sql +CREATE TABLE salaries +( + `team` String, + `player` String, + `salary` UInt32, + `position` String +) +Engine = Memory; + +INSERT INTO salaries FORMAT Values + ('Port Elizabeth Barbarians', 'Gary Chen', 195000, 'F'), + ('New Coreystad Archdukes', 'Charles Juarez', 190000, 'F'), + ('Port Elizabeth Barbarians', 'Michael Stanley', 150000, 'D'), + ('New Coreystad Archdukes', 'Scott Harrison', 150000, 'D'), + ('Port Elizabeth Barbarians', 'Robert George', 195000, 'M'), + ('South Hampton Seagulls', 'Douglas Benson', 150000, 'M'), + ('South Hampton Seagulls', 'James Henderson', 140000, 'M'); +``` + +```sql +SELECT player, salary, + rank() OVER (ORDER BY salary DESC) AS rank +FROM salaries; +``` + +Result: + +```response + ┌─player──────────┬─salary─┬─rank─┐ +1. │ Gary Chen │ 195000 │ 1 │ +2. │ Robert George │ 195000 │ 1 │ +3. │ Charles Juarez │ 190000 │ 3 │ +4. │ Douglas Benson │ 150000 │ 4 │ +5. │ Michael Stanley │ 150000 │ 4 │ +6. │ Scott Harrison │ 150000 │ 4 │ +7. │ James Henderson │ 140000 │ 7 │ + └─────────────────┴────────┴──────┘ +``` \ No newline at end of file diff --git a/docs/en/sql-reference/window-functions/row_number.md b/docs/en/sql-reference/window-functions/row_number.md new file mode 100644 index 00000000000..e69de29bb2d From 7b3ce3c3b38f698eb80923061c8ec0e309e2cff6 Mon Sep 17 00:00:00 2001 From: Blargian Date: Mon, 8 Jul 2024 06:20:10 +0200 Subject: [PATCH 061/182] add leadInFrame, lagInFrame, row_number --- .../window-functions/lagInFrame.md | 79 +++++++++++++++++++ .../window-functions/leadInFrame.md | 60 ++++++++++++++ .../window-functions/row_number.md | 67 ++++++++++++++++ 3 files changed, 206 insertions(+) create mode 100644 docs/en/sql-reference/window-functions/lagInFrame.md create mode 100644 docs/en/sql-reference/window-functions/leadInFrame.md diff --git a/docs/en/sql-reference/window-functions/lagInFrame.md b/docs/en/sql-reference/window-functions/lagInFrame.md new file mode 100644 index 00000000000..ea9f6d9dea2 --- /dev/null +++ b/docs/en/sql-reference/window-functions/lagInFrame.md @@ -0,0 +1,79 @@ +--- +slug: /en/sql-reference/window-functions/lagInFrame +sidebar_label: lagInFrame +sidebar_position: 5 +--- + +# lagInFrame + +Return a value evaluated at the row that is at a specified physical offset before the current row within the ordered frame. The offset parameter, if not specified, defaults to 1, meaning it will fetch the value from the next row. If the calculated row exceeds the boundaries of the window frame, the specified default value is returned. + +**Syntax** + +```sql +lagInFrame(x[, offset[, default]]) + OVER ([[PARTITION BY grouping_column] [ORDER BY sorting_column] + [ROWS or RANGE expression_to_bound_rows_withing_the_group]] | [window_name]) +FROM table_name +WINDOW window_name as ([[PARTITION BY grouping_column] [ORDER BY sorting_column]) +``` + +For more detail on window function syntax see: [Window Functions - Syntax](./index.md/#syntax). + +**Parameters** +- `x` — Column name. +- `offset` — Offset to apply. [(U)Int*](../data-types/int-uint.md). (Optional - `1` by default). +- `default` — Value to return if calculated row exceeds the boundaries of the window frame. (Optional - `null` by default). + +**Returned value** + +- Value evaluated at the row that is at a specified physical offset before the current row within the ordered frame. + +**Example** + +This example looks at historical data for a specific stock and uses the `lagInFrame` function to calculate a day-to-day delta and percentage change in the closing price of the stock. + +Query: + +```sql +CREATE TABLE stock_prices +( + `date` Date, + `open` Float32, -- opening price + `high` Float32, -- daily high + `low` Float32, -- daily low + `close` Float32, -- closing price + `volume` UInt32 -- trade volume +) +Engine = Memory; + +INSERT INTO stock_prices FORMAT Values + ('2024-06-03', 113.62, 115.00, 112.00, 115.00, 438392000), + ('2024-06-04', 115.72, 116.60, 114.04, 116.44, 403324000), + ('2024-06-05', 118.37, 122.45, 117.47, 122.44, 528402000), + ('2024-06-06', 124.05, 125.59, 118.32, 121.00, 664696000), + ('2024-06-07', 119.77, 121.69, 118.02, 120.89, 412386000); +``` + +```sql +SELECT + date, + close, + lagInFrame(close, 1, close) OVER (ORDER BY date ASC) AS previous_day_close, + COALESCE(ROUND(close - previous_day_close, 2)) AS delta, + COALESCE(ROUND((delta / previous_day_close) * 100, 2)) AS percent_change +FROM stock_prices +ORDER BY date DESC; +``` + +Result: + +```response + ┌───────date─┬──close─┬─previous_day_close─┬─delta─┬─percent_change─┐ +1. │ 2024-06-07 │ 120.89 │ 121 │ -0.11 │ -0.09 │ +2. │ 2024-06-06 │ 121 │ 122.44 │ -1.44 │ -1.18 │ +3. │ 2024-06-05 │ 122.44 │ 116.44 │ 6 │ 5.15 │ +4. │ 2024-06-04 │ 116.44 │ 115 │ 1.44 │ 1.25 │ +5. │ 2024-06-03 │ 115 │ 115 │ 0 │ 0 │ + └────────────┴────────┴────────────────────┴───────┴────────────────┘ +``` \ No newline at end of file diff --git a/docs/en/sql-reference/window-functions/leadInFrame.md b/docs/en/sql-reference/window-functions/leadInFrame.md new file mode 100644 index 00000000000..e3b65af9a4d --- /dev/null +++ b/docs/en/sql-reference/window-functions/leadInFrame.md @@ -0,0 +1,60 @@ +--- +slug: /en/sql-reference/window-functions/leadInFrame +sidebar_label: leadInFrame +sidebar_position: 6 +--- + +# leadInFrame + +Return a value evaluated at the row that is offset rows after the current row within the ordered frame. + +**Syntax** + +```sql +leadInFrame(x[, offset[, default]]) + OVER ([[PARTITION BY grouping_column] [ORDER BY sorting_column] + [ROWS or RANGE expression_to_bound_rows_withing_the_group]] | [window_name]) +FROM table_name +WINDOW window_name as ([[PARTITION BY grouping_column] [ORDER BY sorting_column]) +``` + +For more detail on window function syntax see: [Window Functions - Syntax](./index.md/#syntax). + +**Parameters** +- `x` — Column name. +- `offset` — Offset to apply. [(U)Int*](../data-types/int-uint.md). (Optional - `1` by default). +- `default` — Value to return if calculated row exceeds the boundaries of the window frame. (Optional - `null` by default). + +**Returned value** + +- value evaluated at the row that is offset rows after the current row within the ordered frame. + +**Example** + +This example looks at [historical data](https://www.kaggle.com/datasets/sazidthe1/nobel-prize-data) for Nobel Prize winners and uses the `leadInFrame` function to return a list of successive winners in the physics category. + +Query: + +```sql +CREATE OR REPLACE VIEW nobel_prize_laureates AS FROM file('nobel_laureates_data.csv') SELECT *; +``` + +```sql +FROM nobel_prize_laureates SELECT fullName, leadInFrame(year, 1, year) OVER (PARTITION BY category ORDER BY year) AS year, category, motivation WHERE category == 'physics' ORDER BY year DESC LIMIT 9; +``` + +Result: + +```response + ┌─fullName─────────┬─year─┬─category─┬─motivation─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐ +1. │ Pierre Agostini │ 2023 │ physics │ for experimental methods that generate attosecond pulses of light for the study of electron dynamics in matter │ +2. │ Ferenc Krausz │ 2023 │ physics │ for experimental methods that generate attosecond pulses of light for the study of electron dynamics in matter │ +3. │ Anne L Huillier │ 2023 │ physics │ for experimental methods that generate attosecond pulses of light for the study of electron dynamics in matter │ +4. │ Alain Aspect │ 2022 │ physics │ for experiments with entangled photons establishing the violation of Bell inequalities and pioneering quantum information science │ +5. │ Anton Zeilinger │ 2022 │ physics │ for experiments with entangled photons establishing the violation of Bell inequalities and pioneering quantum information science │ +6. │ John Clauser │ 2022 │ physics │ for experiments with entangled photons establishing the violation of Bell inequalities and pioneering quantum information science │ +7. │ Syukuro Manabe │ 2021 │ physics │ for the physical modelling of Earths climate quantifying variability and reliably predicting global warming │ +8. │ Klaus Hasselmann │ 2021 │ physics │ for the physical modelling of Earths climate quantifying variability and reliably predicting global warming │ +9. │ Giorgio Parisi │ 2021 │ physics │ for the discovery of the interplay of disorder and fluctuations in physical systems from atomic to planetary scales │ + └──────────────────┴──────┴──────────┴────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘ +``` \ No newline at end of file diff --git a/docs/en/sql-reference/window-functions/row_number.md b/docs/en/sql-reference/window-functions/row_number.md index e69de29bb2d..428bb34a8ba 100644 --- a/docs/en/sql-reference/window-functions/row_number.md +++ b/docs/en/sql-reference/window-functions/row_number.md @@ -0,0 +1,67 @@ +--- +slug: /en/sql-reference/window-functions/row_number +sidebar_label: row_number +sidebar_position: 4 +--- + +# row_number + +Numbers the current row within its partition starting from 1 + +**Syntax** + +```sql +row_number (column_name) + OVER ([[PARTITION BY grouping_column] [ORDER BY sorting_column] + [ROWS or RANGE expression_to_bound_rows_withing_the_group]] | [window_name]) +FROM table_name +WINDOW window_name as ([[PARTITION BY grouping_column] [ORDER BY sorting_column]) +``` + +For more detail on window function syntax see: [Window Functions - Syntax](./index.md/#syntax). + +**Returned value** + +- A number for the current row within its partition. [UInt64](../data-types/int-uint.md). + +**Example** + +The following example is based on the example provided in the video instructional [Ranking window functions in ClickHouse](https://youtu.be/Yku9mmBYm_4?si=XIMu1jpYucCQEoXA). + +Query: + +```sql +CREATE TABLE salaries +( + `team` String, + `player` String, + `salary` UInt32, + `position` String +) +Engine = Memory; + +INSERT INTO salaries FORMAT Values + ('Port Elizabeth Barbarians', 'Gary Chen', 195000, 'F'), + ('New Coreystad Archdukes', 'Charles Juarez', 190000, 'F'), + ('Port Elizabeth Barbarians', 'Michael Stanley', 150000, 'D'), + ('New Coreystad Archdukes', 'Scott Harrison', 150000, 'D'), + ('Port Elizabeth Barbarians', 'Robert George', 195000, 'M'); +``` + +```sql +SELECT player, salary, + row_number() OVER (ORDER BY salary DESC) AS row_number +FROM salaries; +``` + +Result: + +```response + ┌─player──────────┬─salary─┬─row_number─┐ +1. │ Gary Chen │ 195000 │ 1 │ +2. │ Robert George │ 195000 │ 2 │ +3. │ Charles Juarez │ 190000 │ 3 │ +4. │ Scott Harrison │ 150000 │ 4 │ +5. │ Michael Stanley │ 150000 │ 5 │ + └─────────────────┴────────┴────────────┘ +``` \ No newline at end of file From 9b1003527dc8aba15729a63a86428390470bff07 Mon Sep 17 00:00:00 2001 From: kssenii Date: Mon, 8 Jul 2024 11:34:39 +0200 Subject: [PATCH 062/182] Fix the order --- .../DataLakes/DeltaLakeMetadata.cpp | 75 ++++++++++--------- 1 file changed, 38 insertions(+), 37 deletions(-) diff --git a/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.cpp b/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.cpp index bc64ef15cf1..12341c877e2 100644 --- a/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.cpp +++ b/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.cpp @@ -209,43 +209,6 @@ struct DeltaLakeMetadataImpl // object->stringify(oss); // LOG_TEST(log, "Metadata: {}", oss.str()); - if (object->has("add")) - { - auto add_object = object->get("add").extract(); - auto path = add_object->getValue("path"); - result.insert(fs::path(configuration->getPath()) / path); - - auto filename = fs::path(path).filename().string(); - auto it = file_partition_columns.find(filename); - if (it == file_partition_columns.end()) - { - if (add_object->has("partitionValues")) - { - auto partition_values = add_object->get("partitionValues").extract(); - if (partition_values->size()) - { - auto & current_partition_columns = file_partition_columns[filename]; - for (const auto & partition_name : partition_values->getNames()) - { - const auto value = partition_values->getValue(partition_name); - auto name_and_type = file_schema.tryGetByName(partition_name); - if (!name_and_type) - throw Exception(ErrorCodes::LOGICAL_ERROR, "No such column in schema: {}", partition_name); - - auto field = getFieldValue(value, name_and_type->type); - current_partition_columns.emplace_back(*name_and_type, field); - - LOG_TEST(log, "Partition {} value is {} (for {})", partition_name, value, filename); - } - } - } - } - } - else if (object->has("remove")) - { - auto path = object->get("remove").extract()->getValue("path"); - result.erase(fs::path(configuration->getPath()) / path); - } if (object->has("metaData")) { const auto metadata_object = object->get("metaData").extract(); @@ -289,6 +252,44 @@ struct DeltaLakeMetadataImpl file_schema.toString(), current_schema.toString()); } } + + if (object->has("add")) + { + auto add_object = object->get("add").extract(); + auto path = add_object->getValue("path"); + result.insert(fs::path(configuration->getPath()) / path); + + auto filename = fs::path(path).filename().string(); + auto it = file_partition_columns.find(filename); + if (it == file_partition_columns.end()) + { + if (add_object->has("partitionValues")) + { + auto partition_values = add_object->get("partitionValues").extract(); + if (partition_values->size()) + { + auto & current_partition_columns = file_partition_columns[filename]; + for (const auto & partition_name : partition_values->getNames()) + { + const auto value = partition_values->getValue(partition_name); + auto name_and_type = file_schema.tryGetByName(partition_name); + if (!name_and_type) + throw Exception(ErrorCodes::LOGICAL_ERROR, "No such column in schema: {}", partition_name); + + auto field = getFieldValue(value, name_and_type->type); + current_partition_columns.emplace_back(*name_and_type, field); + + LOG_TEST(log, "Partition {} value is {} (for {})", partition_name, value, filename); + } + } + } + } + } + else if (object->has("remove")) + { + auto path = object->get("remove").extract()->getValue("path"); + result.erase(fs::path(configuration->getPath()) / path); + } } } From 4227447eac30dd77c6ba70d4b1685bbf11a8221f Mon Sep 17 00:00:00 2001 From: Blargian Date: Mon, 8 Jul 2024 12:53:55 +0200 Subject: [PATCH 063/182] add nth_value and update ordering --- .../sql-reference/window-functions/index.md | 6 +- .../window-functions/lagInFrame.md | 2 +- .../window-functions/leadInFrame.md | 2 +- .../window-functions/nth_value.md | 77 +++++++++++++++++++ .../en/sql-reference/window-functions/rank.md | 2 +- .../window-functions/row_number.md | 2 +- 6 files changed, 84 insertions(+), 7 deletions(-) create mode 100644 docs/en/sql-reference/window-functions/nth_value.md diff --git a/docs/en/sql-reference/window-functions/index.md b/docs/en/sql-reference/window-functions/index.md index a0246af610f..ee54a679ba1 100644 --- a/docs/en/sql-reference/window-functions/index.md +++ b/docs/en/sql-reference/window-functions/index.md @@ -78,11 +78,11 @@ These functions can be used only as a window function. - [`row_number()`](./row_number.md) - Number the current row within its partition starting from 1. - `first_value(x)` - Return the first non-NULL value evaluated within its ordered frame. - `last_value(x)` - Return the last non-NULL value evaluated within its ordered frame. -- `nth_value(x, offset)` - Return the first non-NULL value evaluated against the nth row (offset) in its ordered frame. +- [`nth_value(x, offset)`](./nth_value.md) - Return the first non-NULL value evaluated against the nth row (offset) in its ordered frame. - [`rank()`](./rank.md) - Rank the current row within its partition with gaps. - [`dense_rank()`](./dense_rank.md) - Rank the current row within its partition without gaps. -- `lagInFrame(x)` - Return a value evaluated at the row that is at a specified physical offset row before the current row within the ordered frame. -- `leadInFrame(x)` - Return a value evaluated at the row that is offset rows after the current row within the ordered frame. +- [`lagInFrame(x)`](./lagInFrame.md) - Return a value evaluated at the row that is at a specified physical offset row before the current row within the ordered frame. +- [`leadInFrame(x)`](./leadInFrame.md) - Return a value evaluated at the row that is offset rows after the current row within the ordered frame. ## Examples diff --git a/docs/en/sql-reference/window-functions/lagInFrame.md b/docs/en/sql-reference/window-functions/lagInFrame.md index ea9f6d9dea2..b67cf252283 100644 --- a/docs/en/sql-reference/window-functions/lagInFrame.md +++ b/docs/en/sql-reference/window-functions/lagInFrame.md @@ -1,7 +1,7 @@ --- slug: /en/sql-reference/window-functions/lagInFrame sidebar_label: lagInFrame -sidebar_position: 5 +sidebar_position: 3 --- # lagInFrame diff --git a/docs/en/sql-reference/window-functions/leadInFrame.md b/docs/en/sql-reference/window-functions/leadInFrame.md index e3b65af9a4d..0cb4eea52b2 100644 --- a/docs/en/sql-reference/window-functions/leadInFrame.md +++ b/docs/en/sql-reference/window-functions/leadInFrame.md @@ -1,7 +1,7 @@ --- slug: /en/sql-reference/window-functions/leadInFrame sidebar_label: leadInFrame -sidebar_position: 6 +sidebar_position: 4 --- # leadInFrame diff --git a/docs/en/sql-reference/window-functions/nth_value.md b/docs/en/sql-reference/window-functions/nth_value.md new file mode 100644 index 00000000000..26c90110aaa --- /dev/null +++ b/docs/en/sql-reference/window-functions/nth_value.md @@ -0,0 +1,77 @@ +--- +slug: /en/sql-reference/window-functions/leadInFrame +sidebar_label: leadInFrame +sidebar_position: 5 +--- + +# nth_value + +Return the first non-NULL value evaluated against the nth row (offset) in its ordered frame. + +The [dense_rank](./dense_rank.md) function provides the same behaviour but without gaps in ranking. + +**Syntax** + +```sql +nth_value (x, offset) + OVER ([[PARTITION BY grouping_column] [ORDER BY sorting_column] + [ROWS or RANGE expression_to_bound_rows_withing_the_group]] | [window_name]) +FROM table_name +WINDOW window_name as ([[PARTITION BY grouping_column] [ORDER BY sorting_column]) +``` + +For more detail on window function syntax see: [Window Functions - Syntax](./index.md/#syntax). + +**Parameters** + +- `x` — Column name. +- `offset` — nth row to evaluate current row against. + +**Returned value** + +- The first non-NULL value evaluated against the nth row (offset) in its ordered frame. + +**Example** + +In this example the `nth-value` function is used to find the third-highest salary from a fictional dataset of salaries of Premier League football players. + +Query: + +```sql +DROP TABLE IF EXISTS salaries; +CREATE TABLE salaries +( + `team` String, + `player` String, + `salary` UInt32, + `position` String +) +Engine = Memory; + +INSERT INTO salaries FORMAT Values + ('Port Elizabeth Barbarians', 'Gary Chen', 195000, 'F'), + ('New Coreystad Archdukes', 'Charles Juarez', 190000, 'F'), + ('Port Elizabeth Barbarians', 'Michael Stanley', 10000, 'D'), + ('New Coreystad Archdukes', 'Scott Harrison', 180000, 'D'), + ('Port Elizabeth Barbarians', 'Robert George', 195000, 'M'), + ('South Hampton Seagulls', 'Douglas Benson', 150000, 'M'), + ('South Hampton Seagulls', 'James Henderson', 140000, 'M'); +``` + +```sql +SELECT salary, nth_value(salary,3) OVER(ORDER BY salary DESC) FROM salaries GROUP BY salary; +``` + +Result: + +```response + ┌─player──────────┬─salary─┬─rank─┐ +1. │ Gary Chen │ 195000 │ 1 │ +2. │ Robert George │ 195000 │ 1 │ +3. │ Charles Juarez │ 190000 │ 3 │ +4. │ Douglas Benson │ 150000 │ 4 │ +5. │ Michael Stanley │ 150000 │ 4 │ +6. │ Scott Harrison │ 150000 │ 4 │ +7. │ James Henderson │ 140000 │ 7 │ + └─────────────────┴────────┴──────┘ +``` \ No newline at end of file diff --git a/docs/en/sql-reference/window-functions/rank.md b/docs/en/sql-reference/window-functions/rank.md index 17db889ef92..9ac99dde6df 100644 --- a/docs/en/sql-reference/window-functions/rank.md +++ b/docs/en/sql-reference/window-functions/rank.md @@ -1,7 +1,7 @@ --- slug: /en/sql-reference/window-functions/rank sidebar_label: rank -sidebar_position: 3 +sidebar_position: 6 --- # rank diff --git a/docs/en/sql-reference/window-functions/row_number.md b/docs/en/sql-reference/window-functions/row_number.md index 428bb34a8ba..e7165d60169 100644 --- a/docs/en/sql-reference/window-functions/row_number.md +++ b/docs/en/sql-reference/window-functions/row_number.md @@ -1,7 +1,7 @@ --- slug: /en/sql-reference/window-functions/row_number sidebar_label: row_number -sidebar_position: 4 +sidebar_position: 7 --- # row_number From c3b72386a2f26ad40431eca0c1985b14663b43ca Mon Sep 17 00:00:00 2001 From: Blargian Date: Mon, 8 Jul 2024 14:14:13 +0200 Subject: [PATCH 064/182] remove partitionID function, update tests and remove from documentation --- .../functions/other-functions.md | 60 ---------------- src/Functions/partitionId.cpp | 71 ------------------- tests/integration/test_lost_part/test.py | 2 +- .../01748_partition_id_pruning.sql | 6 +- 4 files changed, 4 insertions(+), 135 deletions(-) delete mode 100644 src/Functions/partitionId.cpp diff --git a/docs/en/sql-reference/functions/other-functions.md b/docs/en/sql-reference/functions/other-functions.md index 86a03ed6df8..58fc1eba02e 100644 --- a/docs/en/sql-reference/functions/other-functions.md +++ b/docs/en/sql-reference/functions/other-functions.md @@ -2984,66 +2984,6 @@ Result: └─────────┘ ``` -## partitionId - -Returns computed [partition](../../engines/table-engines/mergetree-family/custom-partitioning-key.md) IDs of its arguments. - -:::note -This function is slow and should not be called for large amount of rows. -::: - -**Syntax** - -```sql -partitionId(x[, y, ...]); -``` - -**Arguments** - -- `x` — Column for which to return the partition ID. -- `y, ...` — Remaining N columns for which to return the partition ID (optional). - -**Return Type** - -- Partition ID that the row belongs to. [String](../data-types/string.md). - -**Example** - -Query: - -```sql -DROP TABLE IF EXISTS mt; -CREATE TABLE mt -( - `i` int, - `j` int -) -ENGINE = MergeTree -PARTITION BY i -ORDER BY j -SETTINGS index_granularity = 1; - -INSERT INTO mt VALUES (1, 1), (1, 2), (1, 3), (2, 4), (2, 5), (2, 6); - -SELECT * FROM mt WHERE _partition_id = partitionId(1); -SELECT * FROM mt WHERE _partition_id = partitionId(2); -``` -Result: - -```response - ┌─i─┬─j─┐ -1. │ 1 │ 1 │ -2. │ 1 │ 2 │ -3. │ 1 │ 3 │ - └───┴───┘ - ┌─i─┬─j─┐ -1. │ 2 │ 4 │ -2. │ 2 │ 5 │ -3. │ 2 │ 6 │ - └───┴───┘ -``` - - ## shardNum Returns the index of a shard which processes a part of data in a distributed query. Indices are started from `1`. diff --git a/src/Functions/partitionId.cpp b/src/Functions/partitionId.cpp deleted file mode 100644 index e2e9038cd8b..00000000000 --- a/src/Functions/partitionId.cpp +++ /dev/null @@ -1,71 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include - - -namespace DB -{ -namespace ErrorCodes -{ - extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; -} - - -/** partitionId(x, y, ...) is a function that computes partition ids of arguments. - * The function is slow and should not be called for large amount of rows. - */ -class FunctionPartitionId : public IFunction -{ -public: - static constexpr auto name = "partitionId"; - - static FunctionPtr create(ContextPtr) { return std::make_shared(); } - - String getName() const override { return name; } - - bool isVariadic() const override { return true; } - - size_t getNumberOfArguments() const override { return 0; } - - bool isInjective(const ColumnsWithTypeAndName & /*sample_columns*/) const override { return true; } - - bool useDefaultImplementationForNulls() const override { return true; } - bool useDefaultImplementationForConstants() const override { return true; } - bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } - - DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override - { - if (arguments.empty()) - throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Function {} requires at least one argument.", getName()); - - return std::make_shared(); - } - - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override - { - Block sample_block(arguments); - size_t size = arguments.size(); - - auto result_column = ColumnString::create(); - for (size_t j = 0; j < input_rows_count; ++j) - { - Row row(size); - for (size_t i = 0; i < size; ++i) - arguments[i].column->get(j, row[i]); - MergeTreePartition partition(std::move(row)); - result_column->insert(partition.getID(sample_block)); - } - return result_column; - } -}; - -REGISTER_FUNCTION(PartitionId) -{ - factory.registerFunction(); -} - -} diff --git a/tests/integration/test_lost_part/test.py b/tests/integration/test_lost_part/test.py index b8e67551d79..5d0ec383a39 100644 --- a/tests/integration/test_lost_part/test.py +++ b/tests/integration/test_lost_part/test.py @@ -266,7 +266,7 @@ def test_lost_last_part(start_cluster): "ALTER TABLE mt3 UPDATE id = 777 WHERE 1", settings={"mutations_sync": "0"} ) - partition_id = node1.query("select partitionId('x')").strip() + partition_id = node1.query("select _partition_id").strip() remove_part_from_disk(node1, "mt3", f"{partition_id}_0_0_0") # other way to detect broken parts diff --git a/tests/queries/0_stateless/01748_partition_id_pruning.sql b/tests/queries/0_stateless/01748_partition_id_pruning.sql index b637528bc6c..0e942573652 100644 --- a/tests/queries/0_stateless/01748_partition_id_pruning.sql +++ b/tests/queries/0_stateless/01748_partition_id_pruning.sql @@ -8,17 +8,17 @@ insert into x values (1, 1), (1, 2), (1, 3), (2, 4), (2, 5), (2, 6); set max_rows_to_read = 3; -select * from x where _partition_id = partitionId(1); +select * from x where _partition_id = '1'; set max_rows_to_read = 5; -- one row for subquery + subquery -select * from x where _partition_id in (select partitionId(number + 1) from numbers(1)); +select * from x where _partition_id in (select number + 1 from numbers(1)); -- trivial count optimization test set max_rows_to_read = 2; -- one row for subquery + subquery itself -- TODO: Relax the limits because we might build prepared set twice with _minmax_count_projection set max_rows_to_read = 3; -select count() from x where _partition_id in (select partitionId(number + 1) from numbers(1)); +select count() from x where _partition_id in (select number + 1 from numbers(1)); drop table x; From ee4162f5ec3c34e2e35eec5fca012d75aee84401 Mon Sep 17 00:00:00 2001 From: Shaun Struwig <41984034+Blargian@users.noreply.github.com> Date: Mon, 8 Jul 2024 15:23:58 +0200 Subject: [PATCH 065/182] Update 02415_all_new_functions_must_be_documented.reference --- .../02415_all_new_functions_must_be_documented.reference | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference b/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference index a152066a460..eb5ae5b4f44 100644 --- a/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference +++ b/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference @@ -534,7 +534,6 @@ parseDateTimeInJodaSyntaxOrZero parseDateTimeOrNull parseDateTimeOrZero parseTimeDelta -partitionId path pathFull pi From 6098bc20d90fc3ab45128f62db70624efa0c05dc Mon Sep 17 00:00:00 2001 From: Arthur Passos Date: Mon, 8 Jul 2024 10:49:46 -0300 Subject: [PATCH 066/182] remove test --- .../__init__.py | 0 .../configs/config.xml | 4 - .../configs/host_regexp.xml | 11 --- .../configs/listen_host.xml | 5 -- .../coredns_config/Corefile | 8 -- .../coredns_config/example.com | 1 - .../scripts/stress_test.py | 62 ------------- .../test.py | 88 ------------------- 8 files changed, 179 deletions(-) delete mode 100644 tests/integration/test_host_regexp_multiple_ptr_records_concurrent/__init__.py delete mode 100644 tests/integration/test_host_regexp_multiple_ptr_records_concurrent/configs/config.xml delete mode 100644 tests/integration/test_host_regexp_multiple_ptr_records_concurrent/configs/host_regexp.xml delete mode 100644 tests/integration/test_host_regexp_multiple_ptr_records_concurrent/configs/listen_host.xml delete mode 100644 tests/integration/test_host_regexp_multiple_ptr_records_concurrent/coredns_config/Corefile delete mode 100644 tests/integration/test_host_regexp_multiple_ptr_records_concurrent/coredns_config/example.com delete mode 100644 tests/integration/test_host_regexp_multiple_ptr_records_concurrent/scripts/stress_test.py delete mode 100644 tests/integration/test_host_regexp_multiple_ptr_records_concurrent/test.py diff --git a/tests/integration/test_host_regexp_multiple_ptr_records_concurrent/__init__.py b/tests/integration/test_host_regexp_multiple_ptr_records_concurrent/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/tests/integration/test_host_regexp_multiple_ptr_records_concurrent/configs/config.xml b/tests/integration/test_host_regexp_multiple_ptr_records_concurrent/configs/config.xml deleted file mode 100644 index 42a1f962705..00000000000 --- a/tests/integration/test_host_regexp_multiple_ptr_records_concurrent/configs/config.xml +++ /dev/null @@ -1,4 +0,0 @@ - - 1 - 250 - diff --git a/tests/integration/test_host_regexp_multiple_ptr_records_concurrent/configs/host_regexp.xml b/tests/integration/test_host_regexp_multiple_ptr_records_concurrent/configs/host_regexp.xml deleted file mode 100644 index 9329c8dbde2..00000000000 --- a/tests/integration/test_host_regexp_multiple_ptr_records_concurrent/configs/host_regexp.xml +++ /dev/null @@ -1,11 +0,0 @@ - - - - - - test1\.example\.com$ - - default - - - diff --git a/tests/integration/test_host_regexp_multiple_ptr_records_concurrent/configs/listen_host.xml b/tests/integration/test_host_regexp_multiple_ptr_records_concurrent/configs/listen_host.xml deleted file mode 100644 index 9c27c612f63..00000000000 --- a/tests/integration/test_host_regexp_multiple_ptr_records_concurrent/configs/listen_host.xml +++ /dev/null @@ -1,5 +0,0 @@ - - :: - 0.0.0.0 - 1 - diff --git a/tests/integration/test_host_regexp_multiple_ptr_records_concurrent/coredns_config/Corefile b/tests/integration/test_host_regexp_multiple_ptr_records_concurrent/coredns_config/Corefile deleted file mode 100644 index 3edf37dafa5..00000000000 --- a/tests/integration/test_host_regexp_multiple_ptr_records_concurrent/coredns_config/Corefile +++ /dev/null @@ -1,8 +0,0 @@ -. { - hosts /example.com { - reload "20ms" - fallthrough - } - forward . 127.0.0.11 - log -} diff --git a/tests/integration/test_host_regexp_multiple_ptr_records_concurrent/coredns_config/example.com b/tests/integration/test_host_regexp_multiple_ptr_records_concurrent/coredns_config/example.com deleted file mode 100644 index 9beb415c290..00000000000 --- a/tests/integration/test_host_regexp_multiple_ptr_records_concurrent/coredns_config/example.com +++ /dev/null @@ -1 +0,0 @@ -filled in runtime, but needs to exist in order to be volume mapped in docker \ No newline at end of file diff --git a/tests/integration/test_host_regexp_multiple_ptr_records_concurrent/scripts/stress_test.py b/tests/integration/test_host_regexp_multiple_ptr_records_concurrent/scripts/stress_test.py deleted file mode 100644 index 70419f95dd3..00000000000 --- a/tests/integration/test_host_regexp_multiple_ptr_records_concurrent/scripts/stress_test.py +++ /dev/null @@ -1,62 +0,0 @@ -import pycurl -import threading -from io import BytesIO -import sys - -client_ip = sys.argv[1] -server_ip = sys.argv[2] - -mutex = threading.Lock() -success_counter = 0 -number_of_threads = 100 -number_of_iterations = 50 - - -def perform_request(): - buffer = BytesIO() - crl = pycurl.Curl() - crl.setopt(pycurl.INTERFACE, client_ip) - crl.setopt(crl.WRITEDATA, buffer) - crl.setopt(crl.URL, f"http://{server_ip}:8123/?query=select+1&user=test_dns") - - crl.perform() - - # End curl session - crl.close() - - str_response = buffer.getvalue().decode("iso-8859-1") - expected_response = "1\n" - - mutex.acquire() - - global success_counter - - if str_response == expected_response: - success_counter += 1 - - mutex.release() - - -def perform_multiple_requests(n): - for request_number in range(n): - perform_request() - - -threads = [] - - -for i in range(number_of_threads): - thread = threading.Thread( - target=perform_multiple_requests, args=(number_of_iterations,) - ) - thread.start() - threads.append(thread) - -for thread in threads: - thread.join() - - -if success_counter == number_of_threads * number_of_iterations: - exit(0) - -exit(1) diff --git a/tests/integration/test_host_regexp_multiple_ptr_records_concurrent/test.py b/tests/integration/test_host_regexp_multiple_ptr_records_concurrent/test.py deleted file mode 100644 index d73e8813e79..00000000000 --- a/tests/integration/test_host_regexp_multiple_ptr_records_concurrent/test.py +++ /dev/null @@ -1,88 +0,0 @@ -import pytest -import socket -from helpers.cluster import ClickHouseCluster, get_docker_compose_path, run_and_check -from time import sleep -import os - -DOCKER_COMPOSE_PATH = get_docker_compose_path() -SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__)) - -cluster = ClickHouseCluster(__file__) - -ch_server = cluster.add_instance( - "clickhouse-server", - with_coredns=True, - main_configs=["configs/config.xml", "configs/listen_host.xml"], - user_configs=["configs/host_regexp.xml"], -) - -client = cluster.add_instance( - "clickhouse-client", -) - - -@pytest.fixture(scope="module") -def started_cluster(): - global cluster - try: - cluster.start() - yield cluster - - finally: - cluster.shutdown() - - -def check_ptr_record(ip, hostname): - try: - host, aliaslist, ipaddrlist = socket.gethostbyaddr(ip) - if hostname.lower() == host.lower(): - return True - except socket.herror: - pass - return False - - -def setup_dns_server(ip): - domains_string = "test3.example.com test2.example.com test1.example.com" - example_file_path = f'{ch_server.env_variables["COREDNS_CONFIG_DIR"]}/example.com' - run_and_check(f"echo '{ip} {domains_string}' > {example_file_path}", shell=True) - - # DNS server takes time to reload the configuration. - for try_num in range(10): - if all(check_ptr_record(ip, host) for host in domains_string.split()): - break - sleep(1) - - -def setup_ch_server(dns_server_ip): - ch_server.exec_in_container( - (["bash", "-c", f"echo 'nameserver {dns_server_ip}' > /etc/resolv.conf"]) - ) - ch_server.exec_in_container( - (["bash", "-c", "echo 'options ndots:0' >> /etc/resolv.conf"]) - ) - ch_server.query("SYSTEM DROP DNS CACHE") - - -def build_endpoint_v4(ip): - return f"'http://{ip}:8123/?query=SELECT+1&user=test_dns'" - - -def build_endpoint_v6(ip): - return build_endpoint_v4(f"[{ip}]") - - -def test_host_regexp_multiple_ptr_v4(started_cluster): - server_ip = cluster.get_instance_ip("clickhouse-server") - client_ip = cluster.get_instance_ip("clickhouse-client") - dns_server_ip = cluster.get_instance_ip(cluster.coredns_host) - - setup_dns_server(client_ip) - setup_ch_server(dns_server_ip) - - current_dir = os.path.dirname(__file__) - client.copy_file_to_container( - os.path.join(current_dir, "scripts", "stress_test.py"), "stress_test.py" - ) - - client.exec_in_container(["python3", f"stress_test.py", client_ip, server_ip]) From 9ba10ca604ad6705ad46a60b9d03569c4729afcc Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sun, 30 Jun 2024 17:22:42 +0200 Subject: [PATCH 067/182] Remove mysqlxx::Pool::Entry assignment operator v2: fix tidy https://s3.amazonaws.com/clickhouse-builds/PRs/65920/86789491be1a945602f6ebf0b3b93bf5272e52ab/binary_tidy/build_log.log Signed-off-by: Azat Khuzhin --- src/Common/mysqlxx/Pool.cpp | 1 - src/Common/mysqlxx/mysqlxx/Pool.h | 11 ----------- src/Common/mysqlxx/tests/mysqlxx_pool_test.cpp | 4 +--- src/Databases/MySQL/MaterializedMySQLSyncThread.cpp | 12 +++++++----- 4 files changed, 8 insertions(+), 20 deletions(-) diff --git a/src/Common/mysqlxx/Pool.cpp b/src/Common/mysqlxx/Pool.cpp index cc5b18214c8..546e9e91dc7 100644 --- a/src/Common/mysqlxx/Pool.cpp +++ b/src/Common/mysqlxx/Pool.cpp @@ -228,7 +228,6 @@ Pool::Entry Pool::tryGet() for (auto connection_it = connections.cbegin(); connection_it != connections.cend();) { Connection * connection_ptr = *connection_it; - /// Fixme: There is a race condition here b/c we do not synchronize with Pool::Entry's copy-assignment operator if (connection_ptr->ref_count == 0) { { diff --git a/src/Common/mysqlxx/mysqlxx/Pool.h b/src/Common/mysqlxx/mysqlxx/Pool.h index 6e509d8bdd6..f1ef81e28dd 100644 --- a/src/Common/mysqlxx/mysqlxx/Pool.h +++ b/src/Common/mysqlxx/mysqlxx/Pool.h @@ -64,17 +64,6 @@ public: decrementRefCount(); } - Entry & operator= (const Entry & src) /// NOLINT - { - pool = src.pool; - if (data) - decrementRefCount(); - data = src.data; - if (data) - incrementRefCount(); - return * this; - } - bool isNull() const { return data == nullptr; diff --git a/src/Common/mysqlxx/tests/mysqlxx_pool_test.cpp b/src/Common/mysqlxx/tests/mysqlxx_pool_test.cpp index 61d6a117285..121767edc84 100644 --- a/src/Common/mysqlxx/tests/mysqlxx_pool_test.cpp +++ b/src/Common/mysqlxx/tests/mysqlxx_pool_test.cpp @@ -13,13 +13,11 @@ mysqlxx::Pool::Entry getWithFailover(mysqlxx::Pool & connections_pool) constexpr size_t max_tries = 3; - mysqlxx::Pool::Entry worker_connection; - for (size_t try_no = 1; try_no <= max_tries; ++try_no) { try { - worker_connection = connections_pool.tryGet(); + mysqlxx::Pool::Entry worker_connection = connections_pool.tryGet(); if (!worker_connection.isNull()) { diff --git a/src/Databases/MySQL/MaterializedMySQLSyncThread.cpp b/src/Databases/MySQL/MaterializedMySQLSyncThread.cpp index 7ab4235feeb..27ebe0b6d21 100644 --- a/src/Databases/MySQL/MaterializedMySQLSyncThread.cpp +++ b/src/Databases/MySQL/MaterializedMySQLSyncThread.cpp @@ -29,6 +29,7 @@ #include #include #include +#include #include #include #include @@ -532,13 +533,17 @@ static inline void dumpDataForTables( bool MaterializedMySQLSyncThread::prepareSynchronized(MaterializeMetadata & metadata) { bool opened_transaction = false; - mysqlxx::PoolWithFailover::Entry connection; while (!isCancelled()) { try { - connection = pool.tryGet(); + mysqlxx::PoolWithFailover::Entry connection = pool.tryGet(); + SCOPE_EXIT({ + if (opened_transaction) + connection->query("ROLLBACK").execute(); + }); + if (connection.isNull()) { if (settings->max_wait_time_when_mysql_unavailable < 0) @@ -602,9 +607,6 @@ bool MaterializedMySQLSyncThread::prepareSynchronized(MaterializeMetadata & meta { tryLogCurrentException(log); - if (opened_transaction) - connection->query("ROLLBACK").execute(); - if (settings->max_wait_time_when_mysql_unavailable < 0) throw; From db1817a633d1fc1dca13eefa75303f0cb78145b7 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Mon, 8 Jul 2024 19:33:52 +0000 Subject: [PATCH 068/182] Some minor fixups --- .../settings/merge-tree-settings.md | 2 +- .../functions/date-time-functions.md | 434 +++++++++--------- src/Functions/changeDate.cpp | 110 ++--- .../aspell-ignore/en/aspell-dict.txt | 3 + 4 files changed, 285 insertions(+), 264 deletions(-) diff --git a/docs/en/operations/settings/merge-tree-settings.md b/docs/en/operations/settings/merge-tree-settings.md index 22c8c704ba2..7278b91f90d 100644 --- a/docs/en/operations/settings/merge-tree-settings.md +++ b/docs/en/operations/settings/merge-tree-settings.md @@ -1030,7 +1030,7 @@ A table with no primary key represents the extreme case of a single equivalence The fewer and the larger the equivalence classes are, the higher the degree of freedom when re-shuffling rows. -The heuristics applied to find the best row order within each equivalence class is suggested by D. Lemir, O. Kaser in [Reordering columns for smaller indexes](https://doi.org/10.1016/j.ins.2011.02.002) and based on sorting the rows within each equivalence class by ascending cardinality of the non-primary key columns. +The heuristics applied to find the best row order within each equivalence class is suggested by D. Lemire, O. Kaser in [Reordering columns for smaller indexes](https://doi.org/10.1016/j.ins.2011.02.002) and based on sorting the rows within each equivalence class by ascending cardinality of the non-primary key columns. It performs three steps: 1. Find all equivalence classes based on the row values in primary key columns. 2. For each equivalence class, calculate (usually estimate) the cardinalities of the non-primary-key columns. diff --git a/docs/en/sql-reference/functions/date-time-functions.md b/docs/en/sql-reference/functions/date-time-functions.md index 1edd8d407eb..4f5e5a5d716 100644 --- a/docs/en/sql-reference/functions/date-time-functions.md +++ b/docs/en/sql-reference/functions/date-time-functions.md @@ -2698,6 +2698,204 @@ Like function `YYYYMMDDhhmmssToDate()` but produces a [DateTime64](../data-types Accepts an additional, optional `precision` parameter after the `timezone` parameter. +## changeYear + +Changes the year component of a date or date time. + +**Syntax** +``` sql + +changeYear(date_or_datetime, value) +``` + +**Arguments** + +- `date_or_datetime` - a [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md) +- `value` - a new value of the year. [Integer](../../sql-reference/data-types/int-uint.md). + +**Returned value** + +- The same type as `date_or_datetime`. + +**Example** + +``` sql +SELECT changeYear(toDate('1999-01-01'), 2000), changeYear(toDateTime64('1999-01-01 00:00:00.000', 3), 2000); +``` + +Result: + +``` +┌─changeYear(toDate('1999-01-01'), 2000)─┬─changeYear(toDateTime64('1999-01-01 00:00:00.000', 3), 2000)─┐ +│ 2000-01-01 │ 2000-01-01 00:00:00.000 │ +└────────────────────────────────────────┴──────────────────────────────────────────────────────────────┘ +``` + +## changeMonth + +Changes the month component of a date or date time. + +**Syntax** + +``` sql +changeMonth(date_or_datetime, value) +``` + +**Arguments** + +- `date_or_datetime` - a [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md) +- `value` - a new value of the month. [Integer](../../sql-reference/data-types/int-uint.md). + +**Returned value** + +- Returns a value of same type as `date_or_datetime`. + +**Example** + +``` sql +SELECT changeMonth(toDate('1999-01-01'), 2), changeMonth(toDateTime64('1999-01-01 00:00:00.000', 3), 2); +``` + +Result: + +``` +┌─changeMonth(toDate('1999-01-01'), 2)─┬─changeMonth(toDateTime64('1999-01-01 00:00:00.000', 3), 2)─┐ +│ 1999-02-01 │ 1999-02-01 00:00:00.000 │ +└──────────────────────────────────────┴────────────────────────────────────────────────────────────┘ +``` + +## changeDay + +Changes the day component of a date or date time. + +**Syntax** + +``` sql +changeDay(date_or_datetime, value) +``` + +**Arguments** + +- `date_or_datetime` - a [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md) +- `value` - a new value of the day. [Integer](../../sql-reference/data-types/int-uint.md). + +**Returned value** + +- Returns a value of same type as `date_or_datetime`. + +**Example** + +``` sql +SELECT changeDay(toDate('1999-01-01'), 5), changeDay(toDateTime64('1999-01-01 00:00:00.000', 3), 5); +``` + +Result: + +``` +┌─changeDay(toDate('1999-01-01'), 5)─┬─changeDay(toDateTime64('1999-01-01 00:00:00.000', 3), 5)─┐ +│ 1999-01-05 │ 1999-01-05 00:00:00.000 │ +└────────────────────────────────────┴──────────────────────────────────────────────────────────┘ +``` + +## changeHour + +Changes the hour component of a date or date time. + +**Syntax** + +``` sql +changeHour(date_or_datetime, value) +``` + +**Arguments** + +- `date_or_datetime` - a [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md) +- `value` - a new value of the hour. [Integer](../../sql-reference/data-types/int-uint.md). + +**Returned value** + +- Returns a value of same type as `date_or_datetime`. If the input is a [Date](../data-types/date.md), return [DateTime](../data-types/datetime.md). If the input is a [Date32](../data-types/date32.md), return [DateTime64](../data-types/datetime64.md). + +**Example** + +``` sql +SELECT changeHour(toDate('1999-01-01'), 14), changeHour(toDateTime64('1999-01-01 00:00:00.000', 3), 14); +``` + +Result: + +``` +┌─changeHour(toDate('1999-01-01'), 14)─┬─changeHour(toDateTime64('1999-01-01 00:00:00.000', 3), 14)─┐ +│ 1999-01-01 14:00:00 │ 1999-01-01 14:00:00.000 │ +└──────────────────────────────────────┴────────────────────────────────────────────────────────────┘ +``` + +## changeMinute + +Changes the minute component of a date or date time. + +**Syntax** + +``` sql +changeMinute(date_or_datetime, value) +``` + +**Arguments** + +- `date_or_datetime` - a [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md) +- `value` - a new value of the minute. [Integer](../../sql-reference/data-types/int-uint.md). + +**Returned value** + +- Returns a value of same type as `date_or_datetime`. If the input is a [Date](../data-types/date.md), return [DateTime](../data-types/datetime.md). If the input is a [Date32](../data-types/date32.md), return [DateTime64](../data-types/datetime64.md). + +**Example** + +``` sql + SELECT changeMinute(toDate('1999-01-01'), 15), changeMinute(toDateTime64('1999-01-01 00:00:00.000', 3), 15); +``` + +Result: + +``` +┌─changeMinute(toDate('1999-01-01'), 15)─┬─changeMinute(toDateTime64('1999-01-01 00:00:00.000', 3), 15)─┐ +│ 1999-01-01 00:15:00 │ 1999-01-01 00:15:00.000 │ +└────────────────────────────────────────┴──────────────────────────────────────────────────────────────┘ +``` + +## changeSecond + +Changes the second component of a date or date time. + +**Syntax** + +``` sql +changeSecond(date_or_datetime, value) +``` + +**Arguments** + +- `date_or_datetime` - a [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md) +- `value` - a new value of the second. [Integer](../../sql-reference/data-types/int-uint.md). + +**Returned value** + +- Returns a value of same type as `date_or_datetime`. If the input is a [Date](../data-types/date.md), return [DateTime](../data-types/datetime.md). If the input is a [Date32](../data-types/date32.md), return [DateTime64](../data-types/datetime64.md). + +**Example** + +``` sql +SELECT changeSecond(toDate('1999-01-01'), 15), changeSecond(toDateTime64('1999-01-01 00:00:00.000', 3), 15); +``` + +Result: + +``` +┌─changeSecond(toDate('1999-01-01'), 15)─┬─changeSecond(toDateTime64('1999-01-01 00:00:00.000', 3), 15)─┐ +│ 1999-01-01 00:00:15 │ 1999-01-01 00:00:15.000 │ +└────────────────────────────────────────┴──────────────────────────────────────────────────────────────┘ +``` + ## addYears Adds a specified number of years to a date, a date with time or a string-encoded date / date with time. @@ -2714,6 +2912,7 @@ addYears(date, num) - `num`: Number of years to add. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md). **Returned value** + - Returns `date` plus `num` years. [Date](../data-types/date.md)/[Date32](../data-types/date32.md)/[DateTime](../data-types/datetime.md)/[DateTime64](../data-types/datetime64.md). **Example** @@ -2751,6 +2950,7 @@ addQuarters(date, num) - `num`: Number of quarters to add. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md). **Returned value** + - Returns `date` plus `num` quarters. [Date](../data-types/date.md)/[Date32](../data-types/date32.md)/[DateTime](../data-types/datetime.md)/[DateTime64](../data-types/datetime64.md). **Example** @@ -2788,6 +2988,7 @@ addMonths(date, num) - `num`: Number of months to add. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md). **Returned value** + - Returns `date` plus `num` months. [Date](../data-types/date.md)/[Date32](../data-types/date32.md)/[DateTime](../data-types/datetime.md)/[DateTime64](../data-types/datetime64.md). **Example** @@ -2825,6 +3026,7 @@ addWeeks(date, num) - `num`: Number of weeks to add. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md). **Returned value** + - Returns `date` plus `num` weeks. [Date](../data-types/date.md)/[Date32](../data-types/date32.md)/[DateTime](../data-types/datetime.md)/[DateTime64](../data-types/datetime64.md). **Example** @@ -2862,6 +3064,7 @@ addDays(date, num) - `num`: Number of days to add. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md). **Returned value** + - Returns `date` plus `num` days. [Date](../data-types/date.md)/[Date32](../data-types/date32.md)/[DateTime](../data-types/datetime.md)/[DateTime64](../data-types/datetime64.md). **Example** @@ -2899,6 +3102,7 @@ addHours(date, num) - `num`: Number of hours to add. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md). **Returned value** +o - Returns `date` plus `num` hours. [Date](../data-types/date.md)/[Date32](../data-types/date32.md)/[DateTime](../data-types/datetime.md)/[DateTime64](../data-types/datetime64.md). **Example** @@ -2936,6 +3140,7 @@ addMinutes(date, num) - `num`: Number of minutes to add. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md). **Returned value** + - Returns `date` plus `num` minutes. [Date](../data-types/date.md)/[Date32](../data-types/date32.md)/[DateTime](../data-types/datetime.md)/[DateTime64](../data-types/datetime64.md). **Example** @@ -2973,6 +3178,7 @@ addSeconds(date, num) - `num`: Number of seconds to add. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md). **Returned value** + - Returns `date` plus `num` seconds. [Date](../data-types/date.md)/[Date32](../data-types/date32.md)/[DateTime](../data-types/datetime.md)/[DateTime64](../data-types/datetime64.md). **Example** @@ -3010,6 +3216,7 @@ addMilliseconds(date_time, num) - `num`: Number of milliseconds to add. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md). **Returned value** + - Returns `date_time` plus `num` milliseconds. [DateTime64](../data-types/datetime64.md). **Example** @@ -3045,6 +3252,7 @@ addMicroseconds(date_time, num) - `num`: Number of microseconds to add. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md). **Returned value** + - Returns `date_time` plus `num` microseconds. [DateTime64](../data-types/datetime64.md). **Example** @@ -3080,6 +3288,7 @@ addNanoseconds(date_time, num) - `num`: Number of nanoseconds to add. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md). **Returned value** + - Returns `date_time` plus `num` nanoseconds. [DateTime64](../data-types/datetime64.md). **Example** @@ -3115,6 +3324,7 @@ addInterval(interval_1, interval_2) - `interval_2`: Second interval to be added. [interval](../data-types/special-data-types/interval.md). **Returned value** + - Returns a tuple of intervals. [tuple](../data-types/tuple.md)([interval](../data-types/special-data-types/interval.md)). :::note @@ -3161,6 +3371,7 @@ addTupleOfIntervals(interval_1, interval_2) - `intervals`: Tuple of intervals to add to `date`. [tuple](../data-types/tuple.md)([interval](../data-types/special-data-types/interval.md)). **Returned value** + - Returns `date` with added `intervals`. [date](../data-types/date.md)/[date32](../data-types/date32.md)/[datetime](../data-types/datetime.md)/[datetime64](../data-types/datetime64.md). **Example** @@ -3195,6 +3406,7 @@ subtractYears(date, num) - `num`: Number of years to subtract. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md). **Returned value** + - Returns `date` minus `num` years. [Date](../data-types/date.md)/[Date32](../data-types/date32.md)/[DateTime](../data-types/datetime.md)/[DateTime64](../data-types/datetime64.md). **Example** @@ -3232,6 +3444,7 @@ subtractQuarters(date, num) - `num`: Number of quarters to subtract. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md). **Returned value** + - Returns `date` minus `num` quarters. [Date](../data-types/date.md)/[Date32](../data-types/date32.md)/[DateTime](../data-types/datetime.md)/[DateTime64](../data-types/datetime64.md). **Example** @@ -3269,6 +3482,7 @@ subtractMonths(date, num) - `num`: Number of months to subtract. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md). **Returned value** + - Returns `date` minus `num` months. [Date](../data-types/date.md)/[Date32](../data-types/date32.md)/[DateTime](../data-types/datetime.md)/[DateTime64](../data-types/datetime64.md). **Example** @@ -3306,6 +3520,7 @@ subtractWeeks(date, num) - `num`: Number of weeks to subtract. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md). **Returned value** + - Returns `date` minus `num` weeks. [Date](../data-types/date.md)/[Date32](../data-types/date32.md)/[DateTime](../data-types/datetime.md)/[DateTime64](../data-types/datetime64.md). **Example** @@ -3343,6 +3558,7 @@ subtractDays(date, num) - `num`: Number of days to subtract. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md). **Returned value** + - Returns `date` minus `num` days. [Date](../data-types/date.md)/[Date32](../data-types/date32.md)/[DateTime](../data-types/datetime.md)/[DateTime64](../data-types/datetime64.md). **Example** @@ -3380,6 +3596,7 @@ subtractHours(date, num) - `num`: Number of hours to subtract. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md). **Returned value** + - Returns `date` minus `num` hours. [Date](../data-types/date.md)/[Date32](../data-types/date32.md)/[Datetime](../data-types/datetime.md)/[DateTime64](../data-types/datetime64.md). **Example** @@ -3417,6 +3634,7 @@ subtractMinutes(date, num) - `num`: Number of minutes to subtract. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md). **Returned value** + - Returns `date` minus `num` minutes. [Date](../data-types/date.md)/[Date32](../data-types/date32.md)/[DateTime](../data-types/datetime.md)/[DateTime64](../data-types/datetime64.md). **Example** @@ -3454,6 +3672,7 @@ subtractSeconds(date, num) - `num`: Number of seconds to subtract. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md). **Returned value** + - Returns `date` minus `num` seconds. [Date](../data-types/date.md)/[Date32](../data-types/date32.md)/[DateTime](../data-types/datetime.md)/[DateTime64](../data-types/datetime64.md). **Example** @@ -3491,6 +3710,7 @@ subtractMilliseconds(date_time, num) - `num`: Number of milliseconds to subtract. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md). **Returned value** + - Returns `date_time` minus `num` milliseconds. [DateTime64](../data-types/datetime64.md). **Example** @@ -3526,6 +3746,7 @@ subtractMicroseconds(date_time, num) - `num`: Number of microseconds to subtract. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md). **Returned value** + - Returns `date_time` minus `num` microseconds. [DateTime64](../data-types/datetime64.md). **Example** @@ -3561,6 +3782,7 @@ subtractNanoseconds(date_time, num) - `num`: Number of nanoseconds to subtract. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md). **Returned value** + - Returns `date_time` minus `num` nanoseconds. [DateTime64](../data-types/datetime64.md). **Example** @@ -3596,6 +3818,7 @@ subtractInterval(interval_1, interval_2) - `interval_2`: Second interval to be negated. [interval](../data-types/special-data-types/interval.md). **Returned value** + - Returns a tuple of intervals. [tuple](../data-types/tuple.md)([interval](../data-types/special-data-types/interval.md)). :::note @@ -3642,6 +3865,7 @@ subtractTupleOfIntervals(interval_1, interval_2) - `intervals`: Tuple of intervals to subtract from `date`. [tuple](../data-types/tuple.md)([interval](../data-types/special-data-types/interval.md)). **Returned value** + - Returns `date` with subtracted `intervals`. [Date](../data-types/date.md)/[Date32](../data-types/date32.md)/[DateTime](../data-types/datetime.md)/[DateTime64](../data-types/datetime64.md). **Example** @@ -3660,216 +3884,6 @@ Result: └───────────────────────────────────────────────────────────────────────┘ ``` -## changeYear - -Changes the year component of a date or date time. - -**Syntax** - -``` sql -changeYear(date_or_datetime, value) -``` - -**Arguments** - -- `date_or_datetime` - a [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md) -- `value` - a new value of the year. [Integer](../../sql-reference/data-types/int-uint.md). - -**Return value** - -- The same type as `date_or_datetime`. - -Type: [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). - -**Example** - -``` sql - SELECT changeYear(toDate('1999-01-01'), 2000), changeYear(toDateTime64('1999-01-01 00:00:00.000', 3), 2000); -``` - -Result: - -``` -┌─changeYear(toDate('1999-01-01'), 2000)─┬─changeYear(toDateTime64('1999-01-01 00:00:00.000', 3), 2000)─┐ -│ 2000-01-01 │ 2000-01-01 00:00:00.000 │ -└────────────────────────────────────────┴──────────────────────────────────────────────────────────────┘ -``` - -## changeMonth - -Changes the month component of a date or date time. - -**Syntax** - -``` sql -changeMonth(date_or_datetime, value) -``` - -**Arguments** - -- `date_or_datetime` - a [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md) -- `value` - a new value of the month. [Integer](../../sql-reference/data-types/int-uint.md). - -**Return value** - -- The same type as `date_or_datetime`. - -Type: [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). - -**Example** - -``` sql - SELECT changeMonth(toDate('1999-01-01'), 2), changeMonth(toDateTime64('1999-01-01 00:00:00.000', 3), 2); -``` - -Result: - -``` -┌─changeMonth(toDate('1999-01-01'), 2)─┬─changeMonth(toDateTime64('1999-01-01 00:00:00.000', 3), 2)─┐ -│ 1999-02-01 │ 1999-02-01 00:00:00.000 │ -└──────────────────────────────────────┴────────────────────────────────────────────────────────────┘ -``` - -## changeDay - -Changes the day component of a date or date time. - -**Syntax** - -``` sql -changeDay(date_or_datetime, value) -``` - -**Arguments** - -- `date_or_datetime` - a [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md) -- `value` - a new value of the day. [Integer](../../sql-reference/data-types/int-uint.md). - -**Return value** - -- The same type as `date_or_datetime`. - -Type: [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). - -**Example** - -``` sql - SELECT changeDay(toDate('1999-01-01'), 5), changeDay(toDateTime64('1999-01-01 00:00:00.000', 3), 5); -``` - -Result: - -``` -┌─changeDay(toDate('1999-01-01'), 5)─┬─changeDay(toDateTime64('1999-01-01 00:00:00.000', 3), 5)─┐ -│ 1999-01-05 │ 1999-01-05 00:00:00.000 │ -└────────────────────────────────────┴──────────────────────────────────────────────────────────┘ -``` - -## changeHour - -Changes the hour component of a date or date time. - -**Syntax** - -``` sql -changeHour(date_or_datetime, value) -``` - -**Arguments** - -- `date_or_datetime` - a [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md) -- `value` - a new value of the hour. [Integer](../../sql-reference/data-types/int-uint.md). - -**Return value** - -- The same type as `date_or_datetime`. If the input is a Date, return DateTime. If the input is a Date32, return DateTime64. - -Type: [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). - -**Example** - -``` sql - SELECT changeHour(toDate('1999-01-01'), 14), changeHour(toDateTime64('1999-01-01 00:00:00.000', 3), 14); -``` - -Result: - -``` -┌─changeHour(toDate('1999-01-01'), 14)─┬─changeHour(toDateTime64('1999-01-01 00:00:00.000', 3), 14)─┐ -│ 1999-01-01 14:00:00 │ 1999-01-01 14:00:00.000 │ -└──────────────────────────────────────┴────────────────────────────────────────────────────────────┘ -``` - -## changeMinute - -Changes the minute component of a date or date time. - -**Syntax** - -``` sql -changeMinute(date_or_datetime, value) -``` - -**Arguments** - -- `date_or_datetime` - a [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md) -- `value` - a new value of the minute. [Integer](../../sql-reference/data-types/int-uint.md). - -**Return value** - -- The same type as `date_or_datetime`. If the input is a Date, return DateTime. If the input is a Date32, return DateTime64. - -Type: [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). - -**Example** - -``` sql - SELECT changeMinute(toDate('1999-01-01'), 15), changeMinute(toDateTime64('1999-01-01 00:00:00.000', 3), 15); -``` - -Result: - -``` -┌─changeMinute(toDate('1999-01-01'), 15)─┬─changeMinute(toDateTime64('1999-01-01 00:00:00.000', 3), 15)─┐ -│ 1999-01-01 00:15:00 │ 1999-01-01 00:15:00.000 │ -└────────────────────────────────────────┴──────────────────────────────────────────────────────────────┘ -``` - -## changeSecond - -Changes the second component of a date or date time. - -**Syntax** - -``` sql -changeSecond(date_or_datetime, value) -``` - -**Arguments** - -- `date_or_datetime` - a [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md) -- `value` - a new value of the second. [Integer](../../sql-reference/data-types/int-uint.md). - -**Return value** - -- The same type as `date_or_datetime`. If the input is a Date, return DateTime. If the input is a Date32, return DateTime64. - -Type: [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). - -**Example** - -``` sql - SELECT changeSecond(toDate('1999-01-01'), 15), changeSecond(toDateTime64('1999-01-01 00:00:00.000', 3), 15); -``` - -Result: - -``` -┌─changeSecond(toDate('1999-01-01'), 15)─┬─changeSecond(toDateTime64('1999-01-01 00:00:00.000', 3), 15)─┐ -│ 1999-01-01 00:00:15 │ 1999-01-01 00:00:15.000 │ -└────────────────────────────────────────┴──────────────────────────────────────────────────────────────┘ -``` - ## timeSlots(StartTime, Duration,\[, Size\]) For a time interval starting at ‘StartTime’ and continuing for ‘Duration’ seconds, it returns an array of moments in time, consisting of points from this interval rounded down to the ‘Size’ in seconds. ‘Size’ is an optional parameter set to 1800 (30 minutes) by default. diff --git a/src/Functions/changeDate.cpp b/src/Functions/changeDate.cpp index e24391afe12..5965f3d1d00 100644 --- a/src/Functions/changeDate.cpp +++ b/src/Functions/changeDate.cpp @@ -41,11 +41,6 @@ enum class Component Second }; -bool isTimeComponentChange(Component type) -{ - return type == Component::Hour || type == Component::Minute || type == Component::Second; -} - } template @@ -65,11 +60,11 @@ public: {"date_or_datetime", static_cast(&isDateOrDate32OrDateTimeOrDateTime64), nullptr, "Date or date with time"}, {"value", static_cast(&isNativeInteger), nullptr, "Integer"} }; - validateFunctionArgumentTypes(*this, arguments, args); + validateFunctionArguments(*this, arguments, args); const auto & input_type = arguments[0].type; - if (isTimeComponentChange(Traits::component)) + if constexpr (Traits::component == Component::Hour || Traits::component == Component::Minute || Traits::component == Component::Second) { if (isDate(input_type)) return std::make_shared(); @@ -85,13 +80,13 @@ public: const auto & input_type = arguments[0].type; if (isDate(input_type)) { - if (isTimeComponentChange(Traits::component)) + if constexpr (Traits::component == Component::Hour || Traits::component == Component::Minute || Traits::component == Component::Second) return execute(arguments, input_type, result_type, input_rows_count); return execute(arguments, input_type, result_type, input_rows_count); } if (isDate32(input_type)) { - if (isTimeComponentChange(Traits::component)) + if constexpr (Traits::component == Component::Hour || Traits::component == Component::Minute || Traits::component == Component::Second) return execute(arguments, input_type, result_type, input_rows_count); return execute(arguments, input_type, result_type, input_rows_count); } @@ -109,83 +104,92 @@ public: bool is_const = (isColumnConst(*arguments[0].column) && isColumnConst(*arguments[1].column)); size_t result_rows_count = (is_const ? 1 : input_rows_count); - typename ResultDataType::ColumnType::MutablePtr result_column; + typename ResultDataType::ColumnType::MutablePtr result_col; if constexpr (std::is_same_v) { auto scale = DataTypeDateTime64::default_scale; if constexpr (std::is_same_v) scale = typeid_cast(*result_type).getScale(); - result_column = ResultDataType::ColumnType::create(result_rows_count, scale); + result_col = ResultDataType::ColumnType::create(result_rows_count, scale); } else - result_column = ResultDataType::ColumnType::create(result_rows_count); + result_col = ResultDataType::ColumnType::create(result_rows_count); - auto input_column = arguments[0].column->convertToFullIfNeeded(); - const auto & input_column_data = typeid_cast(*input_column).getData(); + auto date_time_col = arguments[0].column->convertToFullIfNeeded(); + const auto & date_time_col_data = typeid_cast(*date_time_col).getData(); - auto new_value_column = castColumn(arguments[1], std::make_shared()); - new_value_column = new_value_column->convertToFullIfNeeded(); - const auto & new_value_column_data = typeid_cast(*new_value_column).getData(); + auto value_col = castColumn(arguments[1], std::make_shared()); + value_col = value_col->convertToFullIfNeeded(); + const auto & value_col_data = typeid_cast(*value_col).getData(); - auto & result_data = result_column->getData(); + auto & result_col_data = result_col->getData(); - for (size_t i = 0; i < result_rows_count; ++i) + if constexpr (std::is_same_v) { - if constexpr (std::is_same_v) + const auto scale = typeid_cast(*result_type).getScale(); + const auto & date_lut = typeid_cast(*result_type).getTimeZone(); + + Int64 deg = 1; + for (size_t j = 0; j < scale; ++j) + deg *= 10; + + for (size_t i = 0; i < result_rows_count; ++i) { - const auto scale = typeid_cast(*result_type).getScale(); - const auto & date_lut = typeid_cast(*result_type).getTimeZone(); + Int64 time = date_lut.toNumYYYYMMDDhhmmss(date_time_col_data[i] / deg); + Int64 fraction = date_time_col_data[i] % deg; - Int64 deg = 1; - for (size_t j = 0; j < scale; ++j) - deg *= 10; - - Int64 time = date_lut.toNumYYYYMMDDhhmmss(input_column_data[i] / deg); - Int64 fraction = input_column_data[i] % deg; - - result_data[i] = getChangedDate(time, new_value_column_data[i], result_type, date_lut, scale, fraction); + result_col_data[i] = getChangedDate(time, value_col_data[i], result_type, date_lut, scale, fraction); } - else if constexpr (std::is_same_v && std::is_same_v) + } + else if constexpr (std::is_same_v && std::is_same_v) + { + const auto & date_lut = typeid_cast(*result_type).getTimeZone(); + for (size_t i = 0; i < result_rows_count; ++i) { - const auto & date_lut = typeid_cast(*result_type).getTimeZone(); - Int64 time = static_cast(date_lut.toNumYYYYMMDD(ExtendedDayNum(input_column_data[i]))) * 1'000'000; - - result_data[i] = getChangedDate(time, new_value_column_data[i], result_type, date_lut, 3, 0); + Int64 time = static_cast(date_lut.toNumYYYYMMDD(ExtendedDayNum(date_time_col_data[i]))) * 1'000'000; + result_col_data[i] = getChangedDate(time, value_col_data[i], result_type, date_lut, 3, 0); } - else if constexpr (std::is_same_v && std::is_same_v) + } + else if constexpr (std::is_same_v && std::is_same_v) + { + const auto & date_lut = typeid_cast(*result_type).getTimeZone(); + for (size_t i = 0; i < result_rows_count; ++i) { - const auto & date_lut = typeid_cast(*result_type).getTimeZone(); - Int64 time = static_cast(date_lut.toNumYYYYMMDD(ExtendedDayNum(input_column_data[i]))) * 1'000'000; - - result_data[i] = static_cast(getChangedDate(time, new_value_column_data[i], result_type, date_lut)); + Int64 time = static_cast(date_lut.toNumYYYYMMDD(ExtendedDayNum(date_time_col_data[i]))) * 1'000'000; + result_col_data[i] = static_cast(getChangedDate(time, value_col_data[i], result_type, date_lut)); } - else if constexpr (std::is_same_v) + } + else if constexpr (std::is_same_v) + { + const auto & date_lut = typeid_cast(*result_type).getTimeZone(); + for (size_t i = 0; i < result_rows_count; ++i) { - const auto & date_lut = typeid_cast(*result_type).getTimeZone(); - Int64 time = date_lut.toNumYYYYMMDDhhmmss(input_column_data[i]); - - result_data[i] = static_cast(getChangedDate(time, new_value_column_data[i], result_type, date_lut)); + Int64 time = date_lut.toNumYYYYMMDDhhmmss(date_time_col_data[i]); + result_col_data[i] = static_cast(getChangedDate(time, value_col_data[i], result_type, date_lut)); } - else + } + else + { + const auto & date_lut = DateLUT::instance(); + for (size_t i = 0; i < result_rows_count; ++i) { - const auto & date_lut = DateLUT::instance(); Int64 time; if (isDate(input_type)) - time = static_cast(date_lut.toNumYYYYMMDD(DayNum(input_column_data[i]))) * 1'000'000; + time = static_cast(date_lut.toNumYYYYMMDD(DayNum(date_time_col_data[i]))) * 1'000'000; else - time = static_cast(date_lut.toNumYYYYMMDD(ExtendedDayNum(input_column_data[i]))) * 1'000'000; + time = static_cast(date_lut.toNumYYYYMMDD(ExtendedDayNum(date_time_col_data[i]))) * 1'000'000; if (isDate(result_type)) - result_data[i] = static_cast(getChangedDate(time, new_value_column_data[i], result_type, date_lut)); + result_col_data[i] = static_cast(getChangedDate(time, value_col_data[i], result_type, date_lut)); else - result_data[i] = static_cast(getChangedDate(time, new_value_column_data[i], result_type, date_lut)); + result_col_data[i] = static_cast(getChangedDate(time, value_col_data[i], result_type, date_lut)); } } if (is_const) - return ColumnConst::create(std::move(result_column), input_rows_count); + return ColumnConst::create(std::move(result_col), input_rows_count); - return result_column; + return result_col; } Int64 getChangedDate(Int64 time, Float64 new_value, const DataTypePtr & result_type, const DateLUTImpl & date_lut, Int64 scale = 0, Int64 fraction = 0) const diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt index 5689f94d2ae..5e31a09effb 100644 --- a/utils/check-style/aspell-ignore/en/aspell-dict.txt +++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt @@ -467,6 +467,7 @@ LOCALTIME LOCALTIMESTAMP LONGLONG LOONGARCH +Lemire Levenshtein Liao LibFuzzer @@ -1962,6 +1963,8 @@ loghouse london lookups loongarch +lowCardinalityIndices +lowCardinalityKeys lowcardinality lowerUTF lowercased From 85f5fe832a23fa25e9784728b9f26d712057148a Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Mon, 8 Jul 2024 20:46:56 +0000 Subject: [PATCH 069/182] Fix integration tests --- tests/integration/test_backward_compatibility/test_functions.py | 2 ++ tests/integration/test_lost_part/test.py | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/integration/test_backward_compatibility/test_functions.py b/tests/integration/test_backward_compatibility/test_functions.py index fc03a77030e..551a0550c0c 100644 --- a/tests/integration/test_backward_compatibility/test_functions.py +++ b/tests/integration/test_backward_compatibility/test_functions.py @@ -168,6 +168,8 @@ def test_string_functions(start_cluster): "filesystemAvailable", # Exclude it for now. Looks like the result depends on the build type. "farmHash64", + # Removed in 24.7 + "partitionId", ] functions = filter(lambda x: x not in excludes, functions) diff --git a/tests/integration/test_lost_part/test.py b/tests/integration/test_lost_part/test.py index 5d0ec383a39..5070b3c5b68 100644 --- a/tests/integration/test_lost_part/test.py +++ b/tests/integration/test_lost_part/test.py @@ -266,7 +266,7 @@ def test_lost_last_part(start_cluster): "ALTER TABLE mt3 UPDATE id = 777 WHERE 1", settings={"mutations_sync": "0"} ) - partition_id = node1.query("select _partition_id").strip() + partition_id = node1.query("select distinct _partition_id from mt3 where p = 'x'").strip() remove_part_from_disk(node1, "mt3", f"{partition_id}_0_0_0") # other way to detect broken parts From 595a12ee884840da76e2b50ae723511e00915b85 Mon Sep 17 00:00:00 2001 From: nauu Date: Tue, 9 Jul 2024 09:48:51 +0800 Subject: [PATCH 070/182] to avoid ambiguity, replace FilesystemCacheFailToReserveSpaceBecauseOfLockContention with FilesystemCacheFailToReserveSpaceBecauseOfCacheResize. to avoid ambiguity, replace FilesystemCacheFailToReserveSpaceBecauseOfLockContention with FilesystemCacheResize. --- src/Common/ProfileEvents.cpp | 1 + src/Interpreters/Cache/FileCache.cpp | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/src/Common/ProfileEvents.cpp b/src/Common/ProfileEvents.cpp index 439965a92fb..e80afc95e8d 100644 --- a/src/Common/ProfileEvents.cpp +++ b/src/Common/ProfileEvents.cpp @@ -508,6 +508,7 @@ The server successfully detected this situation and will download merged part fr M(FileSegmentHolderCompleteMicroseconds, "File segments holder complete() time") \ M(FileSegmentFailToIncreasePriority, "Number of times the priority was not increased due to a high contention on the cache lock") \ M(FilesystemCacheFailToReserveSpaceBecauseOfLockContention, "Number of times space reservation was skipped due to a high contention on the cache lock") \ + M(FilesystemCacheFailToReserveSpaceBecauseOfCacheResize, "Number of times space reservation was skipped due to the cache is being resized") \ M(FilesystemCacheHoldFileSegments, "Filesystem cache file segments count, which were hold") \ M(FilesystemCacheUnusedHoldFileSegments, "Filesystem cache file segments count, which were hold, but not used (because of seek or LIMIT n, etc)") \ M(FilesystemCacheFreeSpaceKeepingThreadRun, "Number of times background thread executed free space keeping job") \ diff --git a/src/Interpreters/Cache/FileCache.cpp b/src/Interpreters/Cache/FileCache.cpp index 0d33e39ffa3..a3848fa3a75 100644 --- a/src/Interpreters/Cache/FileCache.cpp +++ b/src/Interpreters/Cache/FileCache.cpp @@ -30,6 +30,7 @@ namespace ProfileEvents extern const Event FilesystemCacheFailToReserveSpaceBecauseOfLockContention; extern const Event FilesystemCacheFreeSpaceKeepingThreadRun; extern const Event FilesystemCacheFreeSpaceKeepingThreadWorkMilliseconds; + extern const Event FilesystemCacheFailToReserveSpaceBecauseOfCacheResize; } namespace DB @@ -813,7 +814,7 @@ bool FileCache::tryReserve( /// ok compared to the number of cases this check will help. if (cache_is_being_resized.load(std::memory_order_relaxed)) { - ProfileEvents::increment(ProfileEvents::FilesystemCacheFailToReserveSpaceBecauseOfLockContention); + ProfileEvents::increment(ProfileEvents::FilesystemCacheFailToReserveSpaceBecauseOfCacheResize); return false; } From 9153e65456dd3a90d9be85c1a8a52592ce054e77 Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Tue, 9 Jul 2024 10:15:41 +0200 Subject: [PATCH 071/182] Remove unneded include --- src/DataTypes/DataTypeDynamic.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/DataTypes/DataTypeDynamic.cpp b/src/DataTypes/DataTypeDynamic.cpp index 6826c46a1a7..c920e69c13b 100644 --- a/src/DataTypes/DataTypeDynamic.cpp +++ b/src/DataTypes/DataTypeDynamic.cpp @@ -12,7 +12,6 @@ #include #include #include -#include namespace DB { From 0eb081f899bd0fa9c185add84c9c59e212dfd6ca Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Tue, 9 Jul 2024 10:30:07 +0000 Subject: [PATCH 072/182] Fix style --- tests/integration/test_lost_part/test.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/integration/test_lost_part/test.py b/tests/integration/test_lost_part/test.py index 5070b3c5b68..e88b84775b4 100644 --- a/tests/integration/test_lost_part/test.py +++ b/tests/integration/test_lost_part/test.py @@ -266,7 +266,9 @@ def test_lost_last_part(start_cluster): "ALTER TABLE mt3 UPDATE id = 777 WHERE 1", settings={"mutations_sync": "0"} ) - partition_id = node1.query("select distinct _partition_id from mt3 where p = 'x'").strip() + partition_id = node1.query( + "select distinct _partition_id from mt3 where p = 'x'" + ).strip() remove_part_from_disk(node1, "mt3", f"{partition_id}_0_0_0") # other way to detect broken parts From 6baa52d10176369fefc6249dbb256a26eb7b1bdc Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Tue, 9 Jul 2024 14:01:28 +0200 Subject: [PATCH 073/182] Fix null insertion into dynamic column --- src/Formats/JSONExtractTree.cpp | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/Formats/JSONExtractTree.cpp b/src/Formats/JSONExtractTree.cpp index 8fe472930d3..9efb1392583 100644 --- a/src/Formats/JSONExtractTree.cpp +++ b/src/Formats/JSONExtractTree.cpp @@ -1265,9 +1265,16 @@ public: bool insertResultToColumn(IColumn & column, const typename JSONParser::Element & element, const JSONExtractInsertSettings & insert_settings, const FormatSettings & format_settings, String & error) const override { auto & column_dynamic = assert_cast(column); + /// First, check if element is NULL. + if (element.isNull()) + { + column_dynamic.insertDefault(); + return true; + } + auto & variant_column = column_dynamic.getVariantColumn(); auto variant_info = column_dynamic.getVariantInfo(); - /// First, infer ClickHouse type for this element and add it as a new variant. + /// Second, infer ClickHouse type for this element and add it as a new variant. auto element_type = elementToDataType(element, format_settings); if (column_dynamic.addNewVariant(element_type)) { From 61f827b5698754af84e7d75a2b83bd0191139820 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A1nos=20Benjamin=20Antal?= Date: Tue, 9 Jul 2024 15:26:33 +0200 Subject: [PATCH 074/182] Update src/Databases/DatabaseAtomic.cpp --- src/Databases/DatabaseAtomic.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Databases/DatabaseAtomic.cpp b/src/Databases/DatabaseAtomic.cpp index a48eb2abce6..5b816e4f282 100644 --- a/src/Databases/DatabaseAtomic.cpp +++ b/src/Databases/DatabaseAtomic.cpp @@ -106,8 +106,8 @@ void DatabaseAtomic::attachTable(ContextPtr /* context_ */, const String & name, StoragePtr DatabaseAtomic::detachTable(ContextPtr /* context */, const String & name) { - // it is important to call destructures not_in_use without - // blocking mutex for avoid potential deadlock. + // it is important to call the destructors of not_in_use without + // locked mutex to avoid potential deadlock. DetachedTables not_in_use; StoragePtr table; { From 8a202d91ad745089adaff4ebf2cde5e6754503ce Mon Sep 17 00:00:00 2001 From: kssenii Date: Tue, 9 Jul 2024 16:24:35 +0200 Subject: [PATCH 075/182] Properly read schema and partition columns from checkpoint file --- .../DataLakes/DeltaLakeMetadata.cpp | 169 ++++++++++++------ .../DataLakes/IStorageDataLake.h | 10 +- .../StorageObjectStorageSource.cpp | 28 +-- tests/integration/test_storage_delta/test.py | 129 +++++++++++-- 4 files changed, 255 insertions(+), 81 deletions(-) diff --git a/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.cpp b/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.cpp index 12341c877e2..d37bffc42c4 100644 --- a/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.cpp +++ b/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.cpp @@ -8,6 +8,7 @@ #include #include #include +#include #include #include @@ -30,6 +31,7 @@ #include #include #include +#include #include #include @@ -111,7 +113,7 @@ struct DeltaLakeMetadataImpl std::set result_files; NamesAndTypesList current_schema; DataLakePartitionColumns current_partition_columns; - const auto checkpoint_version = getCheckpointIfExists(result_files); + const auto checkpoint_version = getCheckpointIfExists(result_files, current_schema, current_partition_columns); if (checkpoint_version) { @@ -205,9 +207,9 @@ struct DeltaLakeMetadataImpl Poco::Dynamic::Var json = parser.parse(json_str); Poco::JSON::Object::Ptr object = json.extract(); - // std::ostringstream oss; // STYLE_CHECK_ALLOW_STD_STRING_STREAM - // object->stringify(oss); - // LOG_TEST(log, "Metadata: {}", oss.str()); + std::ostringstream oss; // STYLE_CHECK_ALLOW_STD_STRING_STREAM + object->stringify(oss); + LOG_TEST(log, "Metadata: {}", oss.str()); if (object->has("metaData")) { @@ -216,30 +218,9 @@ struct DeltaLakeMetadataImpl Poco::JSON::Parser p; Poco::Dynamic::Var fields_json = parser.parse(schema_object); - Poco::JSON::Object::Ptr fields_object = fields_json.extract(); - - const auto fields = fields_object->get("fields").extract(); - NamesAndTypesList current_schema; - for (size_t i = 0; i < fields->size(); ++i) - { - const auto field = fields->getObject(static_cast(i)); - auto column_name = field->getValue("name"); - auto type = field->getValue("type"); - auto is_nullable = field->getValue("nullable"); - - std::string physical_name; - auto schema_metadata_object = field->get("metadata").extract(); - if (schema_metadata_object->has("delta.columnMapping.physicalName")) - physical_name = schema_metadata_object->getValue("delta.columnMapping.physicalName"); - else - physical_name = column_name; - - LOG_TEST(log, "Found column: {}, type: {}, nullable: {}, physical name: {}", - column_name, type, is_nullable, physical_name); - - current_schema.push_back({physical_name, getFieldType(field, "type", is_nullable)}); - } + const Poco::JSON::Object::Ptr & fields_object = fields_json.extract(); + auto current_schema = parseMetadata(fields_object); if (file_schema.empty()) { file_schema = current_schema; @@ -274,7 +255,12 @@ struct DeltaLakeMetadataImpl const auto value = partition_values->getValue(partition_name); auto name_and_type = file_schema.tryGetByName(partition_name); if (!name_and_type) - throw Exception(ErrorCodes::LOGICAL_ERROR, "No such column in schema: {}", partition_name); + { + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "No such column in schema: {} (schema: {})", + partition_name, file_schema.toNamesAndTypesDescription()); + } auto field = getFieldValue(value, name_and_type->type); current_partition_columns.emplace_back(*name_and_type, field); @@ -293,6 +279,32 @@ struct DeltaLakeMetadataImpl } } + NamesAndTypesList parseMetadata(const Poco::JSON::Object::Ptr & metadata_json) + { + NamesAndTypesList schema; + const auto fields = metadata_json->get("fields").extract(); + for (size_t i = 0; i < fields->size(); ++i) + { + const auto field = fields->getObject(static_cast(i)); + auto column_name = field->getValue("name"); + auto type = field->getValue("type"); + auto is_nullable = field->getValue("nullable"); + + std::string physical_name; + auto schema_metadata_object = field->get("metadata").extract(); + if (schema_metadata_object->has("delta.columnMapping.physicalName")) + physical_name = schema_metadata_object->getValue("delta.columnMapping.physicalName"); + else + physical_name = column_name; + + LOG_TEST(log, "Found column: {}, type: {}, nullable: {}, physical name: {}", + column_name, type, is_nullable, physical_name); + + schema.push_back({physical_name, getFieldType(field, "type", is_nullable)}); + } + return schema; + } + DataTypePtr getFieldType(const Poco::JSON::Object::Ptr & field, const String & type_key, bool is_nullable) { if (field->isObject(type_key)) @@ -506,7 +518,10 @@ struct DeltaLakeMetadataImpl throw Exception(ErrorCodes::BAD_ARGUMENTS, "Arrow error: {}", _s.ToString()); \ } while (false) - size_t getCheckpointIfExists(std::set & result) + size_t getCheckpointIfExists( + std::set & result, + NamesAndTypesList & file_schema, + DataLakePartitionColumns & file_partition_columns) { const auto version = readLastCheckpointIfExists(); if (!version) @@ -527,7 +542,8 @@ struct DeltaLakeMetadataImpl auto columns = ParquetSchemaReader(*buf, format_settings).readSchema(); /// Read only columns that we need. - columns.filterColumns(NameSet{"add", "remove"}); + auto filter_column_names = NameSet{"add", "metaData"}; + columns.filterColumns(filter_column_names); Block header; for (const auto & column : columns) header.insert({column.type->createColumn(), column.type, column.name}); @@ -541,9 +557,6 @@ struct DeltaLakeMetadataImpl ArrowMemoryPool::instance(), &reader)); - std::shared_ptr file_schema; - THROW_ARROW_NOT_OK(reader->GetSchema(&file_schema)); - ArrowColumnToCHColumn column_reader( header, "Parquet", format_settings.parquet.allow_missing_columns, @@ -554,29 +567,85 @@ struct DeltaLakeMetadataImpl std::shared_ptr table; THROW_ARROW_NOT_OK(reader->ReadTable(&table)); - Chunk res = column_reader.arrowTableToCHChunk(table, reader->parquet_reader()->metadata()->num_rows()); - const auto & res_columns = res.getColumns(); + Chunk chunk = column_reader.arrowTableToCHChunk(table, reader->parquet_reader()->metadata()->num_rows()); + auto res_block = header.cloneWithColumns(chunk.detachColumns()); + res_block = Nested::flatten(res_block); - if (res_columns.size() != 2) - { - throw Exception( - ErrorCodes::INCORRECT_DATA, - "Unexpected number of columns: {} (having: {}, expected: {})", - res_columns.size(), res.dumpStructure(), header.dumpStructure()); - } + const auto * nullable_path_column = assert_cast(res_block.getByName("add.path").column.get()); + const auto & path_column = assert_cast(nullable_path_column->getNestedColumn()); + + const auto * nullable_schema_column = assert_cast(res_block.getByName("metaData.schemaString").column.get()); + const auto & schema_column = assert_cast(nullable_schema_column->getNestedColumn()); + + auto partition_values_column_raw = res_block.getByName("add.partitionValues").column; + const auto & partition_values_column = assert_cast(*partition_values_column_raw); - const auto * tuple_column = assert_cast(res_columns[0].get()); - const auto & nullable_column = assert_cast(tuple_column->getColumn(0)); - const auto & path_column = assert_cast(nullable_column.getNestedColumn()); for (size_t i = 0; i < path_column.size(); ++i) { - const auto filename = String(path_column.getDataAt(i)); - if (filename.empty()) + const auto metadata = String(schema_column.getDataAt(i)); + if (!metadata.empty()) + { + Poco::JSON::Parser parser; + Poco::Dynamic::Var json = parser.parse(metadata); + const Poco::JSON::Object::Ptr & object = json.extract(); + + auto current_schema = parseMetadata(object); + if (file_schema.empty()) + { + file_schema = current_schema; + LOG_TEST(log, "Processed schema from checkpoint: {}", file_schema.toString()); + } + else if (file_schema != current_schema) + { + throw Exception(ErrorCodes::NOT_IMPLEMENTED, + "Reading from files with different schema is not possible " + "({} is different from {})", + file_schema.toString(), current_schema.toString()); + } + } + } + + for (size_t i = 0; i < path_column.size(); ++i) + { + const auto path = String(path_column.getDataAt(i)); + if (path.empty()) continue; - LOG_TEST(log, "Adding {}", filename); - const auto [_, inserted] = result.insert(std::filesystem::path(configuration->getPath()) / filename); + + auto filename = fs::path(path).filename().string(); + auto it = file_partition_columns.find(filename); + if (it == file_partition_columns.end()) + { + Field map; + partition_values_column.get(i, map); + auto partition_values_map = map.safeGet(); + if (!partition_values_map.empty()) + { + auto & current_partition_columns = file_partition_columns[filename]; + for (const auto & map_value : partition_values_map) + { + const auto tuple = map_value.safeGet(); + const auto partition_name = tuple[0].safeGet(); + auto name_and_type = file_schema.tryGetByName(partition_name); + if (!name_and_type) + { + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "No such column in schema: {} (schema: {})", + partition_name, file_schema.toString()); + } + const auto value = tuple[1].safeGet(); + auto field = getFieldValue(value, name_and_type->type); + current_partition_columns.emplace_back(*name_and_type, field); + + LOG_TEST(log, "Partition {} value is {} (for {})", partition_name, value, filename); + } + } + } + + LOG_TEST(log, "Adding {}", path); + const auto [_, inserted] = result.insert(std::filesystem::path(configuration->getPath()) / path); if (!inserted) - throw Exception(ErrorCodes::INCORRECT_DATA, "File already exists {}", filename); + throw Exception(ErrorCodes::INCORRECT_DATA, "File already exists {}", path); } return version; diff --git a/src/Storages/ObjectStorage/DataLakes/IStorageDataLake.h b/src/Storages/ObjectStorage/DataLakes/IStorageDataLake.h index f1217bc9729..d6935c706d9 100644 --- a/src/Storages/ObjectStorage/DataLakes/IStorageDataLake.h +++ b/src/Storages/ObjectStorage/DataLakes/IStorageDataLake.h @@ -17,6 +17,10 @@ namespace DB { +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} /// Storage for read-only integration with Apache Iceberg tables in Amazon S3 (see https://iceberg.apache.org/) /// Right now it's implemented on top of StorageS3 and right now it doesn't support @@ -41,6 +45,7 @@ public: auto object_storage = base_configuration->createObjectStorage(context, /* is_readonly */true); DataLakeMetadataPtr metadata; NamesAndTypesList schema_from_metadata; + const bool use_schema_from_metadata = columns_.empty(); if (base_configuration->format == "auto") base_configuration->format = "Parquet"; @@ -50,8 +55,9 @@ public: try { metadata = DataLakeMetadata::create(object_storage, base_configuration, context); - schema_from_metadata = metadata->getTableSchema(); configuration->setPaths(metadata->getDataFiles()); + if (use_schema_from_metadata) + schema_from_metadata = metadata->getTableSchema(); } catch (...) { @@ -66,7 +72,7 @@ public: return std::make_shared>( base_configuration, std::move(metadata), configuration, object_storage, context, table_id_, - columns_.empty() ? ColumnsDescription(schema_from_metadata) : columns_, + use_schema_from_metadata ? ColumnsDescription(schema_from_metadata) : columns_, constraints_, comment_, format_settings_); } diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp index 6940f10cb91..a9a7e062076 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp +++ b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp @@ -206,23 +206,25 @@ Chunk StorageObjectStorageSource::generate() if (!partition_columns.empty() && chunk_size && chunk.hasColumns()) { auto partition_values = partition_columns.find(filename); - - for (const auto & [name_and_type, value] : partition_values->second) + if (partition_values != partition_columns.end()) { - if (!read_from_format_info.source_header.has(name_and_type.name)) - continue; + for (const auto & [name_and_type, value] : partition_values->second) + { + if (!read_from_format_info.source_header.has(name_and_type.name)) + continue; - const auto column_pos = read_from_format_info.source_header.getPositionByName(name_and_type.name); - auto partition_column = name_and_type.type->createColumnConst(chunk.getNumRows(), value)->convertToFullColumnIfConst(); + const auto column_pos = read_from_format_info.source_header.getPositionByName(name_and_type.name); + auto partition_column = name_and_type.type->createColumnConst(chunk.getNumRows(), value)->convertToFullColumnIfConst(); - /// This column is filled with default value now, remove it. - chunk.erase(column_pos); + /// This column is filled with default value now, remove it. + chunk.erase(column_pos); - /// Add correct values. - if (chunk.hasColumns()) - chunk.addColumn(column_pos, std::move(partition_column)); - else - chunk.addColumn(std::move(partition_column)); + /// Add correct values. + if (column_pos < chunk.getNumColumns()) + chunk.addColumn(column_pos, std::move(partition_column)); + else + chunk.addColumn(std::move(partition_column)); + } } } return chunk; diff --git a/tests/integration/test_storage_delta/test.py b/tests/integration/test_storage_delta/test.py index 4cb71895881..d3dd7cfe52a 100644 --- a/tests/integration/test_storage_delta/test.py +++ b/tests/integration/test_storage_delta/test.py @@ -596,19 +596,116 @@ def test_partition_columns(started_cluster): ) assert result == 1 - # instance.query( - # f""" - # DROP TABLE IF EXISTS {TABLE_NAME}; - # CREATE TABLE {TABLE_NAME} (a Int32, b String, c DateTime) - # ENGINE=DeltaLake('http://{started_cluster.minio_ip}:{started_cluster.minio_port}/{bucket}/{result_file}/', 'minio', 'minio123')""" - # ) - # assert ( - # int( - # instance.query( - # f"SELECT count() FROM {TABLE_NAME} WHERE c != toDateTime('2000/01/05')" - # ) - # ) - # == num_rows - 1 - # ) - # instance.query(f"SELECT a, b, c, FROM {TABLE_NAME}") - # assert False + instance.query( + f""" + DROP TABLE IF EXISTS {TABLE_NAME}; + CREATE TABLE {TABLE_NAME} (a Nullable(Int32), b Nullable(String), c Nullable(Date32), d Nullable(Int32), e Nullable(Bool)) + ENGINE=DeltaLake('http://{started_cluster.minio_ip}:{started_cluster.minio_port}/{bucket}/{result_file}/', 'minio', 'minio123')""" + ) + assert ( + """1 test1 2000-01-01 1 false +2 test2 2000-01-02 2 false +3 test3 2000-01-03 3 false +4 test4 2000-01-04 4 false +5 test5 2000-01-05 5 false +6 test6 2000-01-06 6 false +7 test7 2000-01-07 7 false +8 test8 2000-01-08 8 false +9 test9 2000-01-09 9 false""" + == instance.query(f"SELECT * FROM {TABLE_NAME} ORDER BY b").strip() + ) + + assert ( + int( + instance.query( + f"SELECT count() FROM {TABLE_NAME} WHERE c == toDateTime('2000/01/05')" + ) + ) + == 1 + ) + + # Subset of columns should work. + instance.query( + f""" + DROP TABLE IF EXISTS {TABLE_NAME}; + CREATE TABLE {TABLE_NAME} (b Nullable(String), c Nullable(Date32), d Nullable(Int32)) + ENGINE=DeltaLake('http://{started_cluster.minio_ip}:{started_cluster.minio_port}/{bucket}/{result_file}/', 'minio', 'minio123')""" + ) + assert ( + """test1 2000-01-01 1 +test2 2000-01-02 2 +test3 2000-01-03 3 +test4 2000-01-04 4 +test5 2000-01-05 5 +test6 2000-01-06 6 +test7 2000-01-07 7 +test8 2000-01-08 8 +test9 2000-01-09 9""" + == instance.query(f"SELECT * FROM {TABLE_NAME} ORDER BY b").strip() + ) + + for i in range(num_rows + 1, 2 * num_rows + 1): + data = [ + ( + i, + "test" + str(i), + datetime.strptime(f"2000-01-{i}", "%Y-%m-%d"), + i, + False, + ) + ] + df = spark.createDataFrame(data=data, schema=schema) + df.printSchema() + df.write.mode("append").format("delta").partitionBy(partition_columns).save( + f"/{TABLE_NAME}" + ) + + files = upload_directory(minio_client, bucket, f"/{TABLE_NAME}", "") + ok = False + for file in files: + if file.endswith("last_checkpoint"): + ok = True + assert ok + + result = int( + instance.query( + f"""SELECT count() + FROM deltaLake('http://{started_cluster.minio_ip}:{started_cluster.minio_port}/{bucket}/{result_file}/', 'minio', 'minio123') + """ + ) + ) + assert result == num_rows * 2 + + assert ( + """1 test1 2000-01-01 1 false +2 test2 2000-01-02 2 false +3 test3 2000-01-03 3 false +4 test4 2000-01-04 4 false +5 test5 2000-01-05 5 false +6 test6 2000-01-06 6 false +7 test7 2000-01-07 7 false +8 test8 2000-01-08 8 false +9 test9 2000-01-09 9 false +10 test10 2000-01-10 10 false +11 test11 2000-01-11 11 false +12 test12 2000-01-12 12 false +13 test13 2000-01-13 13 false +14 test14 2000-01-14 14 false +15 test15 2000-01-15 15 false +16 test16 2000-01-16 16 false +17 test17 2000-01-17 17 false +18 test18 2000-01-18 18 false""" + == instance.query( + f""" +SELECT * FROM deltaLake('http://{started_cluster.minio_ip}:{started_cluster.minio_port}/{bucket}/{result_file}/', 'minio', 'minio123') ORDER BY c + """ + ).strip() + ) + assert ( + int( + instance.query( + f"SELECT count() FROM {TABLE_NAME} WHERE c == toDateTime('2000/01/15')" + ) + ) + == 1 + ) From b4f59b96c274fcde50050e172a91d455eddcb17f Mon Sep 17 00:00:00 2001 From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> Date: Tue, 9 Jul 2024 19:30:15 +0200 Subject: [PATCH 076/182] Update IStorageDataLake.h --- src/Storages/ObjectStorage/DataLakes/IStorageDataLake.h | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/Storages/ObjectStorage/DataLakes/IStorageDataLake.h b/src/Storages/ObjectStorage/DataLakes/IStorageDataLake.h index d6935c706d9..c8603fccb86 100644 --- a/src/Storages/ObjectStorage/DataLakes/IStorageDataLake.h +++ b/src/Storages/ObjectStorage/DataLakes/IStorageDataLake.h @@ -17,10 +17,6 @@ namespace DB { -namespace ErrorCodes -{ - extern const int LOGICAL_ERROR; -} /// Storage for read-only integration with Apache Iceberg tables in Amazon S3 (see https://iceberg.apache.org/) /// Right now it's implemented on top of StorageS3 and right now it doesn't support From 9fc557ad65ab0a306e417d01ea0b4636a0569824 Mon Sep 17 00:00:00 2001 From: MikhailBurdukov Date: Tue, 9 Jul 2024 17:36:09 +0000 Subject: [PATCH 077/182] Ignore ON CLUSTER clause in queries for management of replicated named collections --- .../NamedCollectionsFactory.cpp | 9 +++++- .../NamedCollectionsFactory.h | 2 ++ .../NamedCollectionsMetadataStorage.cpp | 12 ++++---- .../NamedCollectionsMetadataStorage.h | 2 +- src/Core/Settings.h | 1 + .../InterpreterAlterNamedCollectionQuery.cpp | 7 +++-- .../InterpreterCreateNamedCollectionQuery.cpp | 7 +++-- .../InterpreterDropNamedCollectionQuery.cpp | 7 +++-- .../removeOnClusterClauseIfNeeded.cpp | 16 ++++++++++- .../named_collections_with_zookeeper.xml | 17 +++++++++++ .../configs/users.d/users.xml | 5 ++++ .../test_named_collections/test.py | 28 +++++++++++++++++++ 12 files changed, 98 insertions(+), 15 deletions(-) diff --git a/src/Common/NamedCollections/NamedCollectionsFactory.cpp b/src/Common/NamedCollections/NamedCollectionsFactory.cpp index 14105a8651d..2faea1957ba 100644 --- a/src/Common/NamedCollections/NamedCollectionsFactory.cpp +++ b/src/Common/NamedCollections/NamedCollectionsFactory.cpp @@ -235,7 +235,7 @@ bool NamedCollectionFactory::loadIfNot(std::lock_guard & lock) loadFromConfig(context->getConfigRef(), lock); loadFromSQL(lock); - if (metadata_storage->supportsPeriodicUpdate()) + if (metadata_storage->isReplicated()) { update_task = context->getSchedulePool().createTask("NamedCollectionsMetadataStorage", [this]{ updateFunc(); }); update_task->activate(); @@ -357,6 +357,13 @@ void NamedCollectionFactory::reloadFromSQL() add(std::move(collections), lock); } +bool NamedCollectionFactory::usesReplicatedStorage() +{ + std::lock_guard lock(mutex); + loadIfNot(lock); + return metadata_storage->isReplicated(); +} + void NamedCollectionFactory::updateFunc() { LOG_TRACE(log, "Named collections background updating thread started"); diff --git a/src/Common/NamedCollections/NamedCollectionsFactory.h b/src/Common/NamedCollections/NamedCollectionsFactory.h index 6ee5940e686..a0721ad8a50 100644 --- a/src/Common/NamedCollections/NamedCollectionsFactory.h +++ b/src/Common/NamedCollections/NamedCollectionsFactory.h @@ -34,6 +34,8 @@ public: void updateFromSQL(const ASTAlterNamedCollectionQuery & query); + bool usesReplicatedStorage(); + void loadIfNot(); void shutdown(); diff --git a/src/Common/NamedCollections/NamedCollectionsMetadataStorage.cpp b/src/Common/NamedCollections/NamedCollectionsMetadataStorage.cpp index 32fdb25abd3..b3671350f92 100644 --- a/src/Common/NamedCollections/NamedCollectionsMetadataStorage.cpp +++ b/src/Common/NamedCollections/NamedCollectionsMetadataStorage.cpp @@ -67,7 +67,7 @@ public: virtual bool removeIfExists(const std::string & path) = 0; - virtual bool supportsPeriodicUpdate() const = 0; + virtual bool isReplicated() const = 0; virtual bool waitUpdate(size_t /* timeout */) { return false; } }; @@ -89,7 +89,7 @@ public: ~LocalStorage() override = default; - bool supportsPeriodicUpdate() const override { return false; } + bool isReplicated() const override { return false; } std::vector list() const override { @@ -221,7 +221,7 @@ public: ~ZooKeeperStorage() override = default; - bool supportsPeriodicUpdate() const override { return true; } + bool isReplicated() const override { return true; } /// Return true if children changed. bool waitUpdate(size_t timeout) override @@ -465,14 +465,14 @@ void NamedCollectionsMetadataStorage::writeCreateQuery(const ASTCreateNamedColle storage->write(getFileName(query.collection_name), serializeAST(*normalized_query), replace); } -bool NamedCollectionsMetadataStorage::supportsPeriodicUpdate() const +bool NamedCollectionsMetadataStorage::isReplicated() const { - return storage->supportsPeriodicUpdate(); + return storage->isReplicated(); } bool NamedCollectionsMetadataStorage::waitUpdate() { - if (!storage->supportsPeriodicUpdate()) + if (!storage->isReplicated()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Periodic updates are not supported"); const auto & config = Context::getGlobalContextInstance()->getConfigRef(); diff --git a/src/Common/NamedCollections/NamedCollectionsMetadataStorage.h b/src/Common/NamedCollections/NamedCollectionsMetadataStorage.h index 3c089fe2fa2..c3468fbc468 100644 --- a/src/Common/NamedCollections/NamedCollectionsMetadataStorage.h +++ b/src/Common/NamedCollections/NamedCollectionsMetadataStorage.h @@ -30,7 +30,7 @@ public: /// Return true if update was made bool waitUpdate(); - bool supportsPeriodicUpdate() const; + bool isReplicated() const; private: class INamedCollectionsStorage; diff --git a/src/Core/Settings.h b/src/Core/Settings.h index d84e5b149f6..6c53837138b 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -364,6 +364,7 @@ class IColumn; \ M(Bool, ignore_on_cluster_for_replicated_udf_queries, false, "Ignore ON CLUSTER clause for replicated UDF management queries.", 0) \ M(Bool, ignore_on_cluster_for_replicated_access_entities_queries, false, "Ignore ON CLUSTER clause for replicated access entities management queries.", 0) \ + M(Bool, ignore_on_cluster_for_replicated_named_collections_queries, false, "Ignore ON CLUSTER clause for replicated named collections management queries.", 0) \ /** Settings for testing hedged requests */ \ M(Milliseconds, sleep_in_send_tables_status_ms, 0, "Time to sleep in sending tables status response in TCPHandler", 0) \ M(Milliseconds, sleep_in_send_data_ms, 0, "Time to sleep in sending data in TCPHandler", 0) \ diff --git a/src/Interpreters/InterpreterAlterNamedCollectionQuery.cpp b/src/Interpreters/InterpreterAlterNamedCollectionQuery.cpp index 79a17fd1844..0e83e2039f6 100644 --- a/src/Interpreters/InterpreterAlterNamedCollectionQuery.cpp +++ b/src/Interpreters/InterpreterAlterNamedCollectionQuery.cpp @@ -4,6 +4,7 @@ #include #include #include +#include #include @@ -13,14 +14,16 @@ namespace DB BlockIO InterpreterAlterNamedCollectionQuery::execute() { auto current_context = getContext(); - const auto & query = query_ptr->as(); + + const auto updated_query = removeOnClusterClauseIfNeeded(query_ptr, getContext()); + const auto & query = updated_query->as(); current_context->checkAccess(AccessType::ALTER_NAMED_COLLECTION, query.collection_name); if (!query.cluster.empty()) { DDLQueryOnClusterParams params; - return executeDDLQueryOnCluster(query_ptr, current_context, params); + return executeDDLQueryOnCluster(updated_query, current_context, params); } NamedCollectionFactory::instance().updateFromSQL(query); diff --git a/src/Interpreters/InterpreterCreateNamedCollectionQuery.cpp b/src/Interpreters/InterpreterCreateNamedCollectionQuery.cpp index c71441daa8c..b4920b1729f 100644 --- a/src/Interpreters/InterpreterCreateNamedCollectionQuery.cpp +++ b/src/Interpreters/InterpreterCreateNamedCollectionQuery.cpp @@ -4,6 +4,7 @@ #include #include #include +#include #include @@ -13,14 +14,16 @@ namespace DB BlockIO InterpreterCreateNamedCollectionQuery::execute() { auto current_context = getContext(); - const auto & query = query_ptr->as(); + + const auto updated_query = removeOnClusterClauseIfNeeded(query_ptr, getContext()); + const auto & query = updated_query->as(); current_context->checkAccess(AccessType::CREATE_NAMED_COLLECTION, query.collection_name); if (!query.cluster.empty()) { DDLQueryOnClusterParams params; - return executeDDLQueryOnCluster(query_ptr, current_context, params); + return executeDDLQueryOnCluster(updated_query, current_context, params); } NamedCollectionFactory::instance().createFromSQL(query); diff --git a/src/Interpreters/InterpreterDropNamedCollectionQuery.cpp b/src/Interpreters/InterpreterDropNamedCollectionQuery.cpp index 2edaef1b2f2..6233d21b439 100644 --- a/src/Interpreters/InterpreterDropNamedCollectionQuery.cpp +++ b/src/Interpreters/InterpreterDropNamedCollectionQuery.cpp @@ -4,6 +4,7 @@ #include #include #include +#include #include @@ -13,14 +14,16 @@ namespace DB BlockIO InterpreterDropNamedCollectionQuery::execute() { auto current_context = getContext(); - const auto & query = query_ptr->as(); + + const auto updated_query = removeOnClusterClauseIfNeeded(query_ptr, getContext()); + const auto & query = updated_query->as(); current_context->checkAccess(AccessType::DROP_NAMED_COLLECTION, query.collection_name); if (!query.cluster.empty()) { DDLQueryOnClusterParams params; - return executeDDLQueryOnCluster(query_ptr, current_context, params); + return executeDDLQueryOnCluster(updated_query, current_context, params); } NamedCollectionFactory::instance().removeFromSQL(query); diff --git a/src/Interpreters/removeOnClusterClauseIfNeeded.cpp b/src/Interpreters/removeOnClusterClauseIfNeeded.cpp index 44167fe7242..dd20164925c 100644 --- a/src/Interpreters/removeOnClusterClauseIfNeeded.cpp +++ b/src/Interpreters/removeOnClusterClauseIfNeeded.cpp @@ -15,6 +15,10 @@ #include #include #include +#include +#include +#include +#include namespace DB @@ -38,6 +42,13 @@ static bool isAccessControlQuery(const ASTPtr & query) || query->as(); } +static bool isNamedCollectionQuery(const ASTPtr & query) +{ + return query->as() + || query->as() + || query->as(); +} + ASTPtr removeOnClusterClauseIfNeeded(const ASTPtr & query, ContextPtr context, const WithoutOnClusterASTRewriteParams & params) { auto * query_on_cluster = dynamic_cast(query.get()); @@ -50,7 +61,10 @@ ASTPtr removeOnClusterClauseIfNeeded(const ASTPtr & query, ContextPtr context, c && context->getUserDefinedSQLObjectsStorage().isReplicated()) || (isAccessControlQuery(query) && context->getSettings().ignore_on_cluster_for_replicated_access_entities_queries - && context->getAccessControl().containsStorage(ReplicatedAccessStorage::STORAGE_TYPE))) + && context->getAccessControl().containsStorage(ReplicatedAccessStorage::STORAGE_TYPE)) + || (isNamedCollectionQuery(query) + && context->getSettings().ignore_on_cluster_for_replicated_named_collections_queries + && NamedCollectionFactory::instance().usesReplicatedStorage())) { LOG_DEBUG(getLogger("removeOnClusterClauseIfNeeded"), "ON CLUSTER clause was ignored for query {}", query->getID()); return query_on_cluster->getRewrittenASTWithoutOnCluster(params); diff --git a/tests/integration/test_named_collections/configs/config.d/named_collections_with_zookeeper.xml b/tests/integration/test_named_collections/configs/config.d/named_collections_with_zookeeper.xml index 2d7946d1587..43d80ee6f69 100644 --- a/tests/integration/test_named_collections/configs/config.d/named_collections_with_zookeeper.xml +++ b/tests/integration/test_named_collections/configs/config.d/named_collections_with_zookeeper.xml @@ -9,4 +9,21 @@ value1 + + + + + true + + node_with_keeper + 9000 + + + node_with_keeper_2 + 9000 + + + true + + diff --git a/tests/integration/test_named_collections/configs/users.d/users.xml b/tests/integration/test_named_collections/configs/users.d/users.xml index 15da914f666..7d4f0543ff1 100644 --- a/tests/integration/test_named_collections/configs/users.d/users.xml +++ b/tests/integration/test_named_collections/configs/users.d/users.xml @@ -1,4 +1,9 @@ + + + 0 + + diff --git a/tests/integration/test_named_collections/test.py b/tests/integration/test_named_collections/test.py index dbc502236c0..5d38047e885 100644 --- a/tests/integration/test_named_collections/test.py +++ b/tests/integration/test_named_collections/test.py @@ -3,6 +3,8 @@ import pytest import os import time from helpers.cluster import ClickHouseCluster +from contextlib import nullcontext as does_not_raise +from helpers.client import QueryRuntimeException SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__)) NAMED_COLLECTIONS_CONFIG = os.path.join( @@ -761,3 +763,29 @@ def test_keeper_storage(cluster): check_dropped(node1) check_dropped(node2) + + +@pytest.mark.parametrize( + "ignore, expected_raise", + [(True, does_not_raise()), (False, pytest.raises(QueryRuntimeException))], +) +def test_keeper_storage_remove_on_cluster(cluster, ignore, expected_raise): + node = cluster.instances["node_with_keeper"] + + replace_in_users_config( + node, + "ignore_on_cluster_for_replicated_named_collections_queries>.", + f"ignore_on_cluster_for_replicated_named_collections_queries>{int(ignore)}", + ) + node.query("SYSTEM RELOAD CONFIG") + + with expected_raise: + node.query( + f"CREATE NAMED COLLECTION test_nc ON CLUSTER `replicated_nc_nodes_cluster` AS key1=1, key2=2 OVERRIDABLE" + ) + node.query( + f"ALTER NAMED COLLECTION test_nc ON CLUSTER `replicated_nc_nodes_cluster` SET key2=3" + ) + node.query( + f"DROP NAMED COLLECTION test_nc ON CLUSTER `replicated_nc_nodes_cluster`" + ) From 5a12659f43f74aa501610404c4b2ee6b1b4a02c9 Mon Sep 17 00:00:00 2001 From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> Date: Tue, 9 Jul 2024 20:18:03 +0200 Subject: [PATCH 078/182] Update run.sh --- docker/test/stateless/run.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/test/stateless/run.sh b/docker/test/stateless/run.sh index 43d3c698d8a..637d277e6f8 100755 --- a/docker/test/stateless/run.sh +++ b/docker/test/stateless/run.sh @@ -249,7 +249,7 @@ function run_tests() try_run_with_retry 10 clickhouse-client -q "insert into system.zookeeper (name, path, value) values ('auxiliary_zookeeper2', '/test/chroot/', '')" set +e - timeout -s TERM --preserve-status 120m clickhouse-test --testname --shard --zookeeper --check-zookeeper-session --hung-check --print-time \ + timeout -s KILL --preserve-status 120m clickhouse-test --testname --shard --zookeeper --check-zookeeper-session --hung-check --print-time \ --no-drop-if-fail --test-runs "$NUM_TRIES" "${ADDITIONAL_OPTIONS[@]}" 2>&1 \ | ts '%Y-%m-%d %H:%M:%S' \ | tee -a test_output/test_result.txt From 0d54151cb81421b8eaa99df0c8abb224b776570b Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Tue, 9 Jul 2024 19:55:37 +0000 Subject: [PATCH 079/182] Make the pocketfft to point to the upstream/master branch --- contrib/pocketfft | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/pocketfft b/contrib/pocketfft index 9efd4da52cf..f4c1aa8aa9c 160000 --- a/contrib/pocketfft +++ b/contrib/pocketfft @@ -1 +1 @@ -Subproject commit 9efd4da52cf8d28d14531d14e43ad9d913807546 +Subproject commit f4c1aa8aa9ce79ad39e80f2c9c41b92ead90fda3 From 2b091983e8df97a5a103be8aa03ad2c0a836ff46 Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Tue, 9 Jul 2024 19:59:49 +0000 Subject: [PATCH 080/182] Bump Azure to https://github.com/ClickHouse/azure-sdk-for-cpp/commit/ea3e19a7be08519134c643177d56c7484dfec884 --- contrib/azure | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/azure b/contrib/azure index 92c94d7f37a..ea3e19a7be0 160000 --- a/contrib/azure +++ b/contrib/azure @@ -1 +1 @@ -Subproject commit 92c94d7f37a43cc8fc4d466884a95f610c0593bf +Subproject commit ea3e19a7be08519134c643177d56c7484dfec884 From b53e58c501109c81d57a746cc4b3b8c45a6840ef Mon Sep 17 00:00:00 2001 From: Vitaly Baranov Date: Tue, 9 Jul 2024 22:19:47 +0200 Subject: [PATCH 081/182] Fix error reporting while copying to S3. --- src/IO/S3/copyS3File.cpp | 36 ++++++++++++++++++++---------------- 1 file changed, 20 insertions(+), 16 deletions(-) diff --git a/src/IO/S3/copyS3File.cpp b/src/IO/S3/copyS3File.cpp index bb654c3f5c9..0b3e5e50f3d 100644 --- a/src/IO/S3/copyS3File.cpp +++ b/src/IO/S3/copyS3File.cpp @@ -98,7 +98,6 @@ namespace size_t part_size; String tag; bool is_finished = false; - std::exception_ptr exception; }; size_t num_parts; @@ -111,6 +110,7 @@ namespace size_t num_added_bg_tasks TSA_GUARDED_BY(bg_tasks_mutex) = 0; size_t num_finished_bg_tasks TSA_GUARDED_BY(bg_tasks_mutex) = 0; size_t num_finished_parts TSA_GUARDED_BY(bg_tasks_mutex) = 0; + std::exception_ptr bg_exception TSA_GUARDED_BY(bg_tasks_mutex); std::mutex bg_tasks_mutex; std::condition_variable bg_tasks_condvar; @@ -273,7 +273,7 @@ namespace } catch (...) { - tryLogCurrentException(__PRETTY_FUNCTION__); + tryLogCurrentException(log, fmt::format("While performing multipart upload of {}", dest_key)); // Multipart upload failed because it wasn't possible to schedule all the tasks. // To avoid execution of already scheduled tasks we abort MultipartUpload. abortMultipartUpload(); @@ -385,7 +385,12 @@ namespace } catch (...) { - task->exception = std::current_exception(); + std::lock_guard lock(bg_tasks_mutex); + if (!bg_exception) + { + tryLogCurrentException(log, fmt::format("While writing part #{}", task->part_number)); + bg_exception = std::current_exception(); /// The exception will be rethrown after all background tasks stop working. + } } task_finish_notify(); }, Priority{}); @@ -435,22 +440,21 @@ namespace /// Suppress warnings because bg_tasks_mutex is actually hold, but tsa annotations do not understand std::unique_lock bg_tasks_condvar.wait(lock, [this]() {return TSA_SUPPRESS_WARNING_FOR_READ(num_added_bg_tasks) == TSA_SUPPRESS_WARNING_FOR_READ(num_finished_bg_tasks); }); - auto & tasks = TSA_SUPPRESS_WARNING_FOR_WRITE(bg_tasks); - for (auto & task : tasks) + auto exception = TSA_SUPPRESS_WARNING_FOR_READ(bg_exception); + if (exception) { - if (task.exception) - { - /// abortMultipartUpload() might be called already, see processUploadPartRequest(). - /// However if there were concurrent uploads at that time, those part uploads might or might not succeed. - /// As a result, it might be necessary to abort a given multipart upload multiple times in order to completely free - /// all storage consumed by all parts. - abortMultipartUpload(); + /// abortMultipartUpload() might be called already, see processUploadPartRequest(). + /// However if there were concurrent uploads at that time, those part uploads might or might not succeed. + /// As a result, it might be necessary to abort a given multipart upload multiple times in order to completely free + /// all storage consumed by all parts. + abortMultipartUpload(); - std::rethrow_exception(task.exception); - } - - part_tags.push_back(task.tag); + std::rethrow_exception(exception); } + + const auto & tasks = TSA_SUPPRESS_WARNING_FOR_READ(bg_tasks); + for (const auto & task : tasks) + part_tags.push_back(task.tag); } }; From ebc87d0c702e9bb26814718fec97e4c938735dec Mon Sep 17 00:00:00 2001 From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> Date: Tue, 9 Jul 2024 22:58:06 +0200 Subject: [PATCH 082/182] Update run.sh --- docker/test/stateless/run.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/test/stateless/run.sh b/docker/test/stateless/run.sh index 637d277e6f8..1c03f5107b0 100755 --- a/docker/test/stateless/run.sh +++ b/docker/test/stateless/run.sh @@ -249,7 +249,7 @@ function run_tests() try_run_with_retry 10 clickhouse-client -q "insert into system.zookeeper (name, path, value) values ('auxiliary_zookeeper2', '/test/chroot/', '')" set +e - timeout -s KILL --preserve-status 120m clickhouse-test --testname --shard --zookeeper --check-zookeeper-session --hung-check --print-time \ + timeout -s TERM --preserve-status 120m -k 60m clickhouse-test --testname --shard --zookeeper --check-zookeeper-session --hung-check --print-time \ --no-drop-if-fail --test-runs "$NUM_TRIES" "${ADDITIONAL_OPTIONS[@]}" 2>&1 \ | ts '%Y-%m-%d %H:%M:%S' \ | tee -a test_output/test_result.txt From bc02d8e66ecc82bee3c8d0402b01816c5005ece9 Mon Sep 17 00:00:00 2001 From: MikhailBurdukov Date: Wed, 10 Jul 2024 08:01:36 +0000 Subject: [PATCH 083/182] Fix settings changelog --- src/Core/SettingsChangesHistory.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Core/SettingsChangesHistory.cpp b/src/Core/SettingsChangesHistory.cpp index b0725340f46..3ccc7321088 100644 --- a/src/Core/SettingsChangesHistory.cpp +++ b/src/Core/SettingsChangesHistory.cpp @@ -59,6 +59,7 @@ static std::initializer_list Date: Wed, 10 Jul 2024 11:56:43 +0200 Subject: [PATCH 084/182] Update run.sh --- docker/test/stateless/run.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/test/stateless/run.sh b/docker/test/stateless/run.sh index 1c03f5107b0..8e66d2667f1 100755 --- a/docker/test/stateless/run.sh +++ b/docker/test/stateless/run.sh @@ -249,7 +249,7 @@ function run_tests() try_run_with_retry 10 clickhouse-client -q "insert into system.zookeeper (name, path, value) values ('auxiliary_zookeeper2', '/test/chroot/', '')" set +e - timeout -s TERM --preserve-status 120m -k 60m clickhouse-test --testname --shard --zookeeper --check-zookeeper-session --hung-check --print-time \ + timeout -k 60m -s TERM --preserve-status 120m clickhouse-test --testname --shard --zookeeper --check-zookeeper-session --hung-check --print-time \ --no-drop-if-fail --test-runs "$NUM_TRIES" "${ADDITIONAL_OPTIONS[@]}" 2>&1 \ | ts '%Y-%m-%d %H:%M:%S' \ | tee -a test_output/test_result.txt From a32795d116903c66c18263f47e5d1e622d83a362 Mon Sep 17 00:00:00 2001 From: avogar Date: Wed, 10 Jul 2024 10:07:02 +0000 Subject: [PATCH 085/182] Fix review comments --- src/Formats/JSONExtractTree.cpp | 174 ++++++++++++++++++++++++-------- src/Formats/JSONExtractTree.h | 6 ++ src/Functions/FunctionsJSON.cpp | 3 + 3 files changed, 139 insertions(+), 44 deletions(-) diff --git a/src/Formats/JSONExtractTree.cpp b/src/Formats/JSONExtractTree.cpp index 9efb1392583..242d2dc9f80 100644 --- a/src/Formats/JSONExtractTree.cpp +++ b/src/Formats/JSONExtractTree.cpp @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include @@ -35,9 +36,8 @@ #include #include #include -#include #include -#include +#include #include #include @@ -123,10 +123,7 @@ void jsonElementToString(const typename JSONParser::Element & element, WriteBuff template bool tryGetNumericValueFromJSONElement( - NumberType & value, - const typename JSONParser::Element & element, - bool convert_bool_to_integer, - String & error) + NumberType & value, const typename JSONParser::Element & element, bool convert_bool_to_integer, String & error) { switch (element.type()) { @@ -226,7 +223,11 @@ public: explicit NumericNode(bool is_bool_type_ = false) : is_bool_type(is_bool_type_) { } bool insertResultToColumn( - IColumn & column, const typename JSONParser::Element & element, const JSONExtractInsertSettings & insert_settings, const FormatSettings & format_settings, String & error) const override + IColumn & column, + const typename JSONParser::Element & element, + const JSONExtractInsertSettings & insert_settings, + const FormatSettings & format_settings, + String & error) const override { if (element.isNull()) { @@ -270,7 +271,11 @@ public: } bool insertResultToColumn( - IColumn & column, const typename JSONParser::Element & element, const JSONExtractInsertSettings & insert_settings, const FormatSettings & format_settings, String & error) const override + IColumn & column, + const typename JSONParser::Element & element, + const JSONExtractInsertSettings & insert_settings, + const FormatSettings & format_settings, + String & error) const override { if (element.isNull()) { @@ -309,7 +314,11 @@ class StringNode : public JSONExtractTreeNode { public: bool insertResultToColumn( - IColumn & column, const typename JSONParser::Element & element, const JSONExtractInsertSettings &, const FormatSettings & format_settings, String & error) const override + IColumn & column, + const typename JSONParser::Element & element, + const JSONExtractInsertSettings &, + const FormatSettings & format_settings, + String & error) const override { if (element.isNull()) { @@ -349,7 +358,11 @@ public: explicit LowCardinalityStringNode(bool is_nullable_) : is_nullable(is_nullable_) { } bool insertResultToColumn( - IColumn & column, const typename JSONParser::Element & element, const JSONExtractInsertSettings &, const FormatSettings & format_settings, String & error) const override + IColumn & column, + const typename JSONParser::Element & element, + const JSONExtractInsertSettings &, + const FormatSettings & format_settings, + String & error) const override { if (element.isNull()) { @@ -387,7 +400,11 @@ class FixedStringNode : public JSONExtractTreeNode public: explicit FixedStringNode(size_t fixed_length_) : fixed_length(fixed_length_) { } bool insertResultToColumn( - IColumn & column, const typename JSONParser::Element & element, const JSONExtractInsertSettings &, const FormatSettings & format_settings, String & error) const override + IColumn & column, + const typename JSONParser::Element & element, + const JSONExtractInsertSettings &, + const FormatSettings & format_settings, + String & error) const override { if (element.isNull()) { @@ -431,7 +448,11 @@ public: } bool insertResultToColumn( - IColumn & column, const typename JSONParser::Element & element, const JSONExtractInsertSettings &, const FormatSettings & format_settings, String & error) const override + IColumn & column, + const typename JSONParser::Element & element, + const JSONExtractInsertSettings &, + const FormatSettings & format_settings, + String & error) const override { if (element.isNull()) { @@ -484,7 +505,11 @@ class UUIDNode : public JSONExtractTreeNode { public: bool insertResultToColumn( - IColumn & column, const typename JSONParser::Element & element, const JSONExtractInsertSettings &, const FormatSettings & format_settings, String & error) const override + IColumn & column, + const typename JSONParser::Element & element, + const JSONExtractInsertSettings &, + const FormatSettings & format_settings, + String & error) const override { if (element.isNull() && format_settings.null_as_default) { @@ -525,7 +550,11 @@ public: explicit LowCardinalityUUIDNode(bool is_nullable_) : is_nullable(is_nullable_) { } bool insertResultToColumn( - IColumn & column, const typename JSONParser::Element & element, const JSONExtractInsertSettings &, const FormatSettings & format_settings, String & error) const override + IColumn & column, + const typename JSONParser::Element & element, + const JSONExtractInsertSettings &, + const FormatSettings & format_settings, + String & error) const override { if (element.isNull() && (is_nullable || format_settings.null_as_default)) { @@ -560,7 +589,11 @@ class DateNode : public JSONExtractTreeNode { public: bool insertResultToColumn( - IColumn & column, const typename JSONParser::Element & element, const JSONExtractInsertSettings &, const FormatSettings & format_settings, String & error) const override + IColumn & column, + const typename JSONParser::Element & element, + const JSONExtractInsertSettings &, + const FormatSettings & format_settings, + String & error) const override { if (element.isNull() && format_settings.null_as_default) { @@ -595,7 +628,11 @@ public: explicit DateTimeNode(const DataTypeDateTime & datetime_type) : TimezoneMixin(datetime_type) { } bool insertResultToColumn( - IColumn & column, const typename JSONParser::Element & element, const JSONExtractInsertSettings &, const FormatSettings & format_settings, String & error) const override + IColumn & column, + const typename JSONParser::Element & element, + const JSONExtractInsertSettings &, + const FormatSettings & format_settings, + String & error) const override { if (element.isNull() && format_settings.null_as_default) { @@ -656,7 +693,11 @@ public: explicit DecimalNode(const DataTypePtr & type) : scale(assert_cast &>(*type).getScale()) { } bool insertResultToColumn( - IColumn & column, const typename JSONParser::Element & element, const JSONExtractInsertSettings &, const FormatSettings & format_settings, String & error) const override + IColumn & column, + const typename JSONParser::Element & element, + const JSONExtractInsertSettings &, + const FormatSettings & format_settings, + String & error) const override { DecimalType value{}; @@ -688,7 +729,8 @@ public: } break; } - default: { + default: + { error = fmt::format("cannot read Decimal value from JSON element: {}", jsonElementToString(element, format_settings)); return false; } @@ -707,10 +749,16 @@ template class DateTime64Node : public JSONExtractTreeNode, public TimezoneMixin { public: - explicit DateTime64Node(const DataTypeDateTime64 & datetime64_type) : TimezoneMixin(datetime64_type), scale(datetime64_type.getScale()) { } + explicit DateTime64Node(const DataTypeDateTime64 & datetime64_type) : TimezoneMixin(datetime64_type), scale(datetime64_type.getScale()) + { + } bool insertResultToColumn( - IColumn & column, const typename JSONParser::Element & element, const JSONExtractInsertSettings &, const FormatSettings & format_settings, String & error) const override + IColumn & column, + const typename JSONParser::Element & element, + const JSONExtractInsertSettings &, + const FormatSettings & format_settings, + String & error) const override { if (element.isNull() && format_settings.null_as_default) { @@ -790,7 +838,11 @@ public: } bool insertResultToColumn( - IColumn & column, const typename JSONParser::Element & element, const JSONExtractInsertSettings &, const FormatSettings & format_settings, String & error) const override + IColumn & column, + const typename JSONParser::Element & element, + const JSONExtractInsertSettings &, + const FormatSettings & format_settings, + String & error) const override { if (element.isNull()) { @@ -857,7 +909,11 @@ class IPv4Node : public JSONExtractTreeNode { public: bool insertResultToColumn( - IColumn & column, const typename JSONParser::Element & element, const JSONExtractInsertSettings &, const FormatSettings & format_settings, String & error) const override + IColumn & column, + const typename JSONParser::Element & element, + const JSONExtractInsertSettings &, + const FormatSettings & format_settings, + String & error) const override { if (element.isNull() && format_settings.null_as_default) { @@ -895,7 +951,11 @@ class IPv6Node : public JSONExtractTreeNode { public: bool insertResultToColumn( - IColumn & column, const typename JSONParser::Element & element, const JSONExtractInsertSettings &, const FormatSettings & format_settings, String & error) const override + IColumn & column, + const typename JSONParser::Element & element, + const JSONExtractInsertSettings &, + const FormatSettings & format_settings, + String & error) const override { if (element.isNull() && format_settings.null_as_default) { @@ -936,7 +996,11 @@ public: explicit NullableNode(std::unique_ptr> nested_) : nested(std::move(nested_)) { } bool insertResultToColumn( - IColumn & column, const typename JSONParser::Element & element, const JSONExtractInsertSettings & insert_settings, const FormatSettings & format_settings, String & error) const override + IColumn & column, + const typename JSONParser::Element & element, + const JSONExtractInsertSettings & insert_settings, + const FormatSettings & format_settings, + String & error) const override { if (element.isNull()) { @@ -945,7 +1009,7 @@ public: } auto & col_null = assert_cast(column); - if (!nested-> insertResultToColumn(col_null.getNestedColumn(), element, insert_settings, format_settings, error)) + if (!nested->insertResultToColumn(col_null.getNestedColumn(), element, insert_settings, format_settings, error)) return false; col_null.getNullMapColumn().insertValue(0); return true; @@ -965,7 +1029,11 @@ public: } bool insertResultToColumn( - IColumn & column, const typename JSONParser::Element & element, const JSONExtractInsertSettings & insert_settings, const FormatSettings & format_settings, String & error) const override + IColumn & column, + const typename JSONParser::Element & element, + const JSONExtractInsertSettings & insert_settings, + const FormatSettings & format_settings, + String & error) const override { if (element.isNull() && (is_nullable || format_settings.null_as_default)) { @@ -975,7 +1043,7 @@ public: auto & col_lc = assert_cast(column); auto tmp_nested = col_lc.getDictionary().getNestedColumn()->cloneEmpty(); - if (!nested-> insertResultToColumn(*tmp_nested, element, insert_settings, format_settings, error)) + if (!nested->insertResultToColumn(*tmp_nested, element, insert_settings, format_settings, error)) return false; col_lc.insertFromFullColumn(*tmp_nested, 0); @@ -994,7 +1062,11 @@ public: explicit ArrayNode(std::unique_ptr> nested_) : nested(std::move(nested_)) { } bool insertResultToColumn( - IColumn & column, const typename JSONParser::Element & element, const JSONExtractInsertSettings & insert_settings, const FormatSettings & format_settings, String & error) const override + IColumn & column, + const typename JSONParser::Element & element, + const JSONExtractInsertSettings & insert_settings, + const FormatSettings & format_settings, + String & error) const override { if (element.isNull() && format_settings.null_as_default) { @@ -1017,7 +1089,7 @@ public: for (auto value : array) { - if (nested-> insertResultToColumn(data, value, insert_settings, format_settings, error)) + if (nested->insertResultToColumn(data, value, insert_settings, format_settings, error)) { were_valid_elements = true; } @@ -1058,7 +1130,11 @@ public: } bool insertResultToColumn( - IColumn & column, const typename JSONParser::Element & element, const JSONExtractInsertSettings & insert_settings, const FormatSettings & format_settings, String & error) const override + IColumn & column, + const typename JSONParser::Element & element, + const JSONExtractInsertSettings & insert_settings, + const FormatSettings & format_settings, + String & error) const override { auto & tuple = assert_cast(column); size_t old_size = column.size(); @@ -1087,7 +1163,7 @@ public: for (size_t index = 0; (index != nested.size()) && (it != array.end()); ++index) { - if (nested[index]-> insertResultToColumn(tuple.getColumn(index), *it++, insert_settings, format_settings, error)) + if (nested[index]->insertResultToColumn(tuple.getColumn(index), *it++, insert_settings, format_settings, error)) { were_valid_elements = true; } @@ -1115,7 +1191,7 @@ public: auto it = object.begin(); for (size_t index = 0; (index != nested.size()) && (it != object.end()); ++index) { - if (nested[index]-> insertResultToColumn(tuple.getColumn(index), (*it++).second, insert_settings, format_settings, error)) + if (nested[index]->insertResultToColumn(tuple.getColumn(index), (*it++).second, insert_settings, format_settings, error)) { were_valid_elements = true; } @@ -1138,7 +1214,7 @@ public: auto index = name_to_index_map.find(key); if (index != name_to_index_map.end()) { - if (nested[index->second]-> insertResultToColumn(tuple.getColumn(index->second), value, insert_settings, format_settings, error)) + if (nested[index->second]->insertResultToColumn(tuple.getColumn(index->second), value, insert_settings, format_settings, error)) { were_valid_elements = true; } @@ -1173,7 +1249,11 @@ public: explicit MapNode(std::unique_ptr> value_) : value(std::move(value_)) { } bool insertResultToColumn( - IColumn & column, const typename JSONParser::Element & element, const JSONExtractInsertSettings & insert_settings, const FormatSettings & format_settings, String & error) const override + IColumn & column, + const typename JSONParser::Element & element, + const JSONExtractInsertSettings & insert_settings, + const FormatSettings & format_settings, + String & error) const override { if (!element.isObject()) { @@ -1198,7 +1278,7 @@ public: key_col.insertData(pair.first.data(), pair.first.size()); /// Insert value - if (!value-> insertResultToColumn(value_col, pair.second, insert_settings, format_settings, error)) + if (!value->insertResultToColumn(value_col, pair.second, insert_settings, format_settings, error)) { if (insert_settings.insert_default_on_invalid_elements_in_complex_types) { @@ -1232,13 +1312,17 @@ public: } bool insertResultToColumn( - IColumn & column, const typename JSONParser::Element & element, const JSONExtractInsertSettings & insert_settings, const FormatSettings & format_settings, String & error) const override + IColumn & column, + const typename JSONParser::Element & element, + const JSONExtractInsertSettings & insert_settings, + const FormatSettings & format_settings, + String & error) const override { auto & column_variant = assert_cast(column); for (size_t i : order) { auto & variant = column_variant.getVariantByGlobalDiscriminator(i); - if (variant_nodes[i]-> insertResultToColumn(variant, element, insert_settings, format_settings, error)) + if (variant_nodes[i]->insertResultToColumn(variant, element, insert_settings, format_settings, error)) { column_variant.getLocalDiscriminators().push_back(column_variant.localDiscriminatorByGlobal(i)); column_variant.getOffsets().push_back(variant.size() - 1); @@ -1262,7 +1346,12 @@ template class DynamicNode : public JSONExtractTreeNode { public: - bool insertResultToColumn(IColumn & column, const typename JSONParser::Element & element, const JSONExtractInsertSettings & insert_settings, const FormatSettings & format_settings, String & error) const override + bool insertResultToColumn( + IColumn & column, + const typename JSONParser::Element & element, + const JSONExtractInsertSettings & insert_settings, + const FormatSettings & format_settings, + String & error) const override { auto & column_dynamic = assert_cast(column); /// First, check if element is NULL. @@ -1281,7 +1370,7 @@ public: auto node = buildJSONExtractTree(element_type, "Dynamic inference"); auto global_discriminator = variant_info.variant_name_to_discriminator[element_type->getName()]; auto & variant = variant_column.getVariantByGlobalDiscriminator(global_discriminator); - if (!node-> insertResultToColumn(variant, element, insert_settings, format_settings, error)) + if (!node->insertResultToColumn(variant, element, insert_settings, format_settings, error)) return false; variant_column.getLocalDiscriminators().push_back(variant_column.localDiscriminatorByGlobal(global_discriminator)); variant_column.getOffsets().push_back(variant.size() - 1); @@ -1290,14 +1379,14 @@ public: /// We couldn't add new variant. Try to insert element into current variants. auto variant_node = buildJSONExtractTree(variant_info.variant_type, "Dynamic inference"); - if (variant_node-> insertResultToColumn(variant_column, element, insert_settings, format_settings, error)) + if (variant_node->insertResultToColumn(variant_column, element, insert_settings, format_settings, error)) return true; /// We couldn't insert element into any existing variant, add String variant and read value as String. column_dynamic.addStringVariant(); auto string_global_discriminator = variant_info.variant_name_to_discriminator["String"]; auto & string_column = variant_column.getVariantByGlobalDiscriminator(string_global_discriminator); - if (!getStringNode()-> insertResultToColumn(string_column, element, insert_settings, format_settings, error)) + if (!getStringNode()->insertResultToColumn(string_column, element, insert_settings, format_settings, error)) return false; variant_column.getLocalDiscriminators().push_back(variant_column.localDiscriminatorByGlobal(string_global_discriminator)); variant_column.getOffsets().push_back(string_column.size() - 1); @@ -1348,12 +1437,9 @@ private: if (format_settings.json.try_infer_numbers_from_strings) { - bool is_negative = false; if (auto type = tryInferJSONNumberFromString(data, format_settings, &json_inference_info)) { json_inference_info.numbers_parsed_from_json_strings.insert(type.get()); - if (is_negative) - json_inference_info.negative_integers.insert(type.get()); return type; } } diff --git a/src/Formats/JSONExtractTree.h b/src/Formats/JSONExtractTree.h index 4735f568b1c..b5e82506548 100644 --- a/src/Formats/JSONExtractTree.h +++ b/src/Formats/JSONExtractTree.h @@ -9,7 +9,13 @@ namespace DB struct JSONExtractInsertSettings { + /// If false, JSON boolean values won't be inserted into columns with integer types + /// It's used in JSONExtractInt64/JSONExtractUInt64/... functions. bool convert_bool_to_integer = true; + /// If true, when complex type like Array/Map has both valid and invalid elements, + /// the default value will be inserted on invalid elements. + /// For example, if we have [1, "hello", 2] and type Array(UInt32), + /// we will insert [1, 0, 2] in the column. Used in all JSONExtract functions. bool insert_default_on_invalid_elements_in_complex_types = false; }; diff --git a/src/Functions/FunctionsJSON.cpp b/src/Functions/FunctionsJSON.cpp index ca233becb63..db1602b1939 100644 --- a/src/Functions/FunctionsJSON.cpp +++ b/src/Functions/FunctionsJSON.cpp @@ -354,7 +354,10 @@ public: explicit ExecutableFunctionJSON(const NullPresence & null_presence_, bool allow_simdjson_, const DataTypePtr & json_return_type_, const FormatSettings & format_settings_) : null_presence(null_presence_), allow_simdjson(allow_simdjson_), json_return_type(json_return_type_), format_settings(format_settings_) { + /// Don't escape forward slashes during converting JSON elements to raw string. format_settings.json.escape_forward_slashes = false; + /// Don't insert default values on null during traversing the JSON element. + /// We allow to insert null only to Nullable columns in JSONExtract functions. format_settings.null_as_default = false; } From 6ffac4034a4ca1b18a2ad5aa44f2c7cce696c246 Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Wed, 10 Jul 2024 14:20:12 +0200 Subject: [PATCH 086/182] Enable checks in assert_cast under sanitizers --- src/Common/assert_cast.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Common/assert_cast.h b/src/Common/assert_cast.h index 0b73ba1cc12..f9d0bf0e595 100644 --- a/src/Common/assert_cast.h +++ b/src/Common/assert_cast.h @@ -25,7 +25,7 @@ namespace DB template inline To assert_cast(From && from) { -#ifndef NDEBUG +#ifdef ABORT_ON_LOGICAL_ERROR try { if constexpr (std::is_pointer_v) From f8b9fe621a9b249764538d08dbf361e3ba4a1d49 Mon Sep 17 00:00:00 2001 From: MikhailBurdukov Date: Wed, 10 Jul 2024 12:47:19 +0000 Subject: [PATCH 087/182] Fix test --- tests/integration/test_named_collections/test.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/integration/test_named_collections/test.py b/tests/integration/test_named_collections/test.py index 5d38047e885..32846c79d23 100644 --- a/tests/integration/test_named_collections/test.py +++ b/tests/integration/test_named_collections/test.py @@ -780,6 +780,9 @@ def test_keeper_storage_remove_on_cluster(cluster, ignore, expected_raise): node.query("SYSTEM RELOAD CONFIG") with expected_raise: + node.query( + "DROP NAMED COLLECTION IF EXISTS test_nc ON CLUSTER `replicated_nc_nodes_cluster`" + ) node.query( f"CREATE NAMED COLLECTION test_nc ON CLUSTER `replicated_nc_nodes_cluster` AS key1=1, key2=2 OVERRIDABLE" ) From 4729161f4187e887e10757731b2381673234b43a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A1nos=20Benjamin=20Antal?= Date: Wed, 10 Jul 2024 18:41:48 +0000 Subject: [PATCH 088/182] Collect logs from minio --- docker/test/stateful/run.sh | 5 +++++ docker/test/stateless/run.sh | 6 ++++++ 2 files changed, 11 insertions(+) diff --git a/docker/test/stateful/run.sh b/docker/test/stateful/run.sh index 2215ac2b37c..ce637b9a146 100755 --- a/docker/test/stateful/run.sh +++ b/docker/test/stateful/run.sh @@ -20,7 +20,10 @@ ln -s /usr/share/clickhouse-test/clickhouse-test /usr/bin/clickhouse-test /usr/share/clickhouse-test/config/install.sh azurite-blob --blobHost 0.0.0.0 --blobPort 10000 --silent --inMemoryPersistence & + ./setup_minio.sh stateful +mc admin trace clickminio > /test_output/rubbish.log & +MC_ADMIN_PID=$! config_logs_export_cluster /etc/clickhouse-server/config.d/system_logs_export.yaml @@ -251,6 +254,8 @@ if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]] sudo clickhouse stop --pid-path /var/run/clickhouse-server2 ||: fi +# Kill minio admin client to stop collecting logs +kill $MC_ADMIN_PID rg -Fa "" /var/log/clickhouse-server/clickhouse-server.log ||: zstd --threads=0 < /var/log/clickhouse-server/clickhouse-server.log > /test_output/clickhouse-server.log.zst ||: diff --git a/docker/test/stateless/run.sh b/docker/test/stateless/run.sh index 029c5a03151..a01abe5929f 100755 --- a/docker/test/stateless/run.sh +++ b/docker/test/stateless/run.sh @@ -46,6 +46,9 @@ source /utils.lib /usr/share/clickhouse-test/config/install.sh ./setup_minio.sh stateless +mc admin trace clickminio > /test_output/rubbish.log & +MC_ADMIN_PID=$! + ./setup_hdfs_minicluster.sh config_logs_export_cluster /etc/clickhouse-server/config.d/system_logs_export.yaml @@ -325,6 +328,9 @@ if [[ -n "$USE_SHARED_CATALOG" ]] && [[ "$USE_SHARED_CATALOG" -eq 1 ]]; then sudo clickhouse stop --pid-path /var/run/clickhouse-server1 ||: fi +# Kill minio admin client to stop collecting logs +kill $MC_ADMIN_PID + rg -Fa "" /var/log/clickhouse-server/clickhouse-server.log ||: rg -A50 -Fa "============" /var/log/clickhouse-server/stderr.log ||: zstd --threads=0 < /var/log/clickhouse-server/clickhouse-server.log > /test_output/clickhouse-server.log.zst & From 9c0610ec2bf454ba4740a2117fb5b0d03510607f Mon Sep 17 00:00:00 2001 From: Blargian Date: Wed, 10 Jul 2024 21:27:15 +0200 Subject: [PATCH 089/182] add remaining window functions --- .../window-functions/first_value.md | 72 +++++++++++++++++++ .../sql-reference/window-functions/index.md | 4 +- .../window-functions/last_value.md | 72 +++++++++++++++++++ .../window-functions/leadInFrame.md | 2 +- .../window-functions/nth_value.md | 24 +++---- .../en/sql-reference/window-functions/rank.md | 2 +- .../window-functions/row_number.md | 2 +- 7 files changed, 161 insertions(+), 17 deletions(-) create mode 100644 docs/en/sql-reference/window-functions/first_value.md create mode 100644 docs/en/sql-reference/window-functions/last_value.md diff --git a/docs/en/sql-reference/window-functions/first_value.md b/docs/en/sql-reference/window-functions/first_value.md new file mode 100644 index 00000000000..575a6fc3f48 --- /dev/null +++ b/docs/en/sql-reference/window-functions/first_value.md @@ -0,0 +1,72 @@ +--- +slug: /en/sql-reference/window-functions/first_value +sidebar_label: first_value +sidebar_position: 3 +--- + +# first_value + +Returns the first non-NULL value evaluated within its ordered frame. + +**Syntax** + +```sql +first_value (column_name) + OVER ([[PARTITION BY grouping_column] [ORDER BY sorting_column] + [ROWS or RANGE expression_to_bound_rows_withing_the_group]] | [window_name]) +FROM table_name +WINDOW window_name as ([[PARTITION BY grouping_column] [ORDER BY sorting_column]) +``` + +For more detail on window function syntax see: [Window Functions - Syntax](./index.md/#syntax). + +**Returned value** + +- The first non-NULL value evaluated within its ordered frame. + +**Example** + +In this example the `first_value` function is used to find the highest paid footballer from a fictional dataset of salaries of Premier League football players. + +Query: + +```sql +DROP TABLE IF EXISTS salaries; +CREATE TABLE salaries +( + `team` String, + `player` String, + `salary` UInt32, + `position` String +) +Engine = Memory; + +INSERT INTO salaries FORMAT Values + ('Port Elizabeth Barbarians', 'Gary Chen', 196000, 'F'), + ('New Coreystad Archdukes', 'Charles Juarez', 190000, 'F'), + ('Port Elizabeth Barbarians', 'Michael Stanley', 100000, 'D'), + ('New Coreystad Archdukes', 'Scott Harrison', 180000, 'D'), + ('Port Elizabeth Barbarians', 'Robert George', 195000, 'M'), + ('South Hampton Seagulls', 'Douglas Benson', 150000, 'M'), + ('South Hampton Seagulls', 'James Henderson', 140000, 'M'); +``` + +```sql +SELECT player, salary, + first_value(player) OVER (ORDER BY salary DESC) AS highest_paid_player +FROM salaries; +``` + +Result: + +```response + ┌─player──────────┬─salary─┬─highest_paid_player─┐ +1. │ Gary Chen │ 196000 │ Gary Chen │ +2. │ Robert George │ 195000 │ Gary Chen │ +3. │ Charles Juarez │ 190000 │ Gary Chen │ +4. │ Scott Harrison │ 180000 │ Gary Chen │ +5. │ Douglas Benson │ 150000 │ Gary Chen │ +6. │ James Henderson │ 140000 │ Gary Chen │ +7. │ Michael Stanley │ 100000 │ Gary Chen │ + └─────────────────┴────────┴─────────────────────┘ +``` \ No newline at end of file diff --git a/docs/en/sql-reference/window-functions/index.md b/docs/en/sql-reference/window-functions/index.md index ee54a679ba1..d18dbcc189d 100644 --- a/docs/en/sql-reference/window-functions/index.md +++ b/docs/en/sql-reference/window-functions/index.md @@ -76,8 +76,8 @@ WINDOW window_name as ([[PARTITION BY grouping_column] [ORDER BY sorting_column] These functions can be used only as a window function. - [`row_number()`](./row_number.md) - Number the current row within its partition starting from 1. -- `first_value(x)` - Return the first non-NULL value evaluated within its ordered frame. -- `last_value(x)` - Return the last non-NULL value evaluated within its ordered frame. +- [`first_value(x)`](./first_value.md) - Return the first non-NULL value evaluated within its ordered frame. +- [`last_value(x)`](./last_value.md) - Return the last non-NULL value evaluated within its ordered frame. - [`nth_value(x, offset)`](./nth_value.md) - Return the first non-NULL value evaluated against the nth row (offset) in its ordered frame. - [`rank()`](./rank.md) - Rank the current row within its partition with gaps. - [`dense_rank()`](./dense_rank.md) - Rank the current row within its partition without gaps. diff --git a/docs/en/sql-reference/window-functions/last_value.md b/docs/en/sql-reference/window-functions/last_value.md new file mode 100644 index 00000000000..098ee81ceb3 --- /dev/null +++ b/docs/en/sql-reference/window-functions/last_value.md @@ -0,0 +1,72 @@ +--- +slug: /en/sql-reference/window-functions/lagInFrame +sidebar_label: lagInFrame +sidebar_position: 4 +--- + +# first_value + +Return the last non-NULL value evaluated within its ordered frame. + +**Syntax** + +```sql +first_value (column_name) + OVER ([[PARTITION BY grouping_column] [ORDER BY sorting_column] + [ROWS or RANGE expression_to_bound_rows_withing_the_group]] | [window_name]) +FROM table_name +WINDOW window_name as ([[PARTITION BY grouping_column] [ORDER BY sorting_column]) +``` + +For more detail on window function syntax see: [Window Functions - Syntax](./index.md/#syntax). + +**Returned value** + +- The last non-NULL value evaluated within its ordered frame. + +**Example** + +In this example the `last_value` function is used to find the highest paid footballer from a fictional dataset of salaries of Premier League football players. + +Query: + +```sql +DROP TABLE IF EXISTS salaries; +CREATE TABLE salaries +( + `team` String, + `player` String, + `salary` UInt32, + `position` String +) +Engine = Memory; + +INSERT INTO salaries FORMAT Values + ('Port Elizabeth Barbarians', 'Gary Chen', 196000, 'F'), + ('New Coreystad Archdukes', 'Charles Juarez', 190000, 'F'), + ('Port Elizabeth Barbarians', 'Michael Stanley', 100000, 'D'), + ('New Coreystad Archdukes', 'Scott Harrison', 180000, 'D'), + ('Port Elizabeth Barbarians', 'Robert George', 195000, 'M'), + ('South Hampton Seagulls', 'Douglas Benson', 150000, 'M'), + ('South Hampton Seagulls', 'James Henderson', 140000, 'M'); +``` + +```sql +SELECT player, salary, + last_value(player) OVER (ORDER BY salary DESC RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS lowest_paid_player +FROM salaries; +``` + +Result: + +```response + ┌─player──────────┬─salary─┬─lowest_paid_player─┐ +1. │ Gary Chen │ 196000 │ Michael Stanley │ +2. │ Robert George │ 195000 │ Michael Stanley │ +3. │ Charles Juarez │ 190000 │ Michael Stanley │ +4. │ Scott Harrison │ 180000 │ Michael Stanley │ +5. │ Douglas Benson │ 150000 │ Michael Stanley │ +6. │ James Henderson │ 140000 │ Michael Stanley │ +7. │ Michael Stanley │ 100000 │ Michael Stanley │ + └─────────────────┴────────┴────────────────────┘ +``` \ No newline at end of file diff --git a/docs/en/sql-reference/window-functions/leadInFrame.md b/docs/en/sql-reference/window-functions/leadInFrame.md index 0cb4eea52b2..33f69c0dcae 100644 --- a/docs/en/sql-reference/window-functions/leadInFrame.md +++ b/docs/en/sql-reference/window-functions/leadInFrame.md @@ -1,7 +1,7 @@ --- slug: /en/sql-reference/window-functions/leadInFrame sidebar_label: leadInFrame -sidebar_position: 4 +sidebar_position: 5 --- # leadInFrame diff --git a/docs/en/sql-reference/window-functions/nth_value.md b/docs/en/sql-reference/window-functions/nth_value.md index 26c90110aaa..5c430707009 100644 --- a/docs/en/sql-reference/window-functions/nth_value.md +++ b/docs/en/sql-reference/window-functions/nth_value.md @@ -1,7 +1,7 @@ --- slug: /en/sql-reference/window-functions/leadInFrame sidebar_label: leadInFrame -sidebar_position: 5 +sidebar_position: 6 --- # nth_value @@ -51,7 +51,7 @@ Engine = Memory; INSERT INTO salaries FORMAT Values ('Port Elizabeth Barbarians', 'Gary Chen', 195000, 'F'), ('New Coreystad Archdukes', 'Charles Juarez', 190000, 'F'), - ('Port Elizabeth Barbarians', 'Michael Stanley', 10000, 'D'), + ('Port Elizabeth Barbarians', 'Michael Stanley', 100000, 'D'), ('New Coreystad Archdukes', 'Scott Harrison', 180000, 'D'), ('Port Elizabeth Barbarians', 'Robert George', 195000, 'M'), ('South Hampton Seagulls', 'Douglas Benson', 150000, 'M'), @@ -59,19 +59,19 @@ INSERT INTO salaries FORMAT Values ``` ```sql -SELECT salary, nth_value(salary,3) OVER(ORDER BY salary DESC) FROM salaries GROUP BY salary; +SELECT player, salary, nth_value(player,3) OVER(ORDER BY salary DESC) AS third_highest_salary FROM salaries; ``` Result: ```response - ┌─player──────────┬─salary─┬─rank─┐ -1. │ Gary Chen │ 195000 │ 1 │ -2. │ Robert George │ 195000 │ 1 │ -3. │ Charles Juarez │ 190000 │ 3 │ -4. │ Douglas Benson │ 150000 │ 4 │ -5. │ Michael Stanley │ 150000 │ 4 │ -6. │ Scott Harrison │ 150000 │ 4 │ -7. │ James Henderson │ 140000 │ 7 │ - └─────────────────┴────────┴──────┘ + ┌─player──────────┬─salary─┬─third_highest_salary─┐ +1. │ Gary Chen │ 195000 │ │ +2. │ Robert George │ 195000 │ │ +3. │ Charles Juarez │ 190000 │ Charles Juarez │ +4. │ Scott Harrison │ 180000 │ Charles Juarez │ +5. │ Douglas Benson │ 150000 │ Charles Juarez │ +6. │ James Henderson │ 140000 │ Charles Juarez │ +7. │ Michael Stanley │ 100000 │ Charles Juarez │ + └─────────────────┴────────┴──────────────────────┘ ``` \ No newline at end of file diff --git a/docs/en/sql-reference/window-functions/rank.md b/docs/en/sql-reference/window-functions/rank.md index 9ac99dde6df..d7ed8d79c35 100644 --- a/docs/en/sql-reference/window-functions/rank.md +++ b/docs/en/sql-reference/window-functions/rank.md @@ -1,7 +1,7 @@ --- slug: /en/sql-reference/window-functions/rank sidebar_label: rank -sidebar_position: 6 +sidebar_position: 7 --- # rank diff --git a/docs/en/sql-reference/window-functions/row_number.md b/docs/en/sql-reference/window-functions/row_number.md index e7165d60169..485ca355f12 100644 --- a/docs/en/sql-reference/window-functions/row_number.md +++ b/docs/en/sql-reference/window-functions/row_number.md @@ -1,7 +1,7 @@ --- slug: /en/sql-reference/window-functions/row_number sidebar_label: row_number -sidebar_position: 7 +sidebar_position: 8 --- # row_number From 4d60ff6a91b9d17744a8522e3da0b850215a76d2 Mon Sep 17 00:00:00 2001 From: Blargian Date: Wed, 10 Jul 2024 21:51:14 +0200 Subject: [PATCH 090/182] small updates --- docs/en/sql-reference/window-functions/dense_rank.md | 4 ++-- docs/en/sql-reference/window-functions/lagInFrame.md | 4 ++-- docs/en/sql-reference/window-functions/last_value.md | 10 +++++----- docs/en/sql-reference/window-functions/leadInFrame.md | 4 ++-- docs/en/sql-reference/window-functions/nth_value.md | 10 ++++------ docs/en/sql-reference/window-functions/rank.md | 4 ++-- docs/en/sql-reference/window-functions/row_number.md | 4 ++-- 7 files changed, 19 insertions(+), 21 deletions(-) diff --git a/docs/en/sql-reference/window-functions/dense_rank.md b/docs/en/sql-reference/window-functions/dense_rank.md index 17ab894707e..d6445b68c55 100644 --- a/docs/en/sql-reference/window-functions/dense_rank.md +++ b/docs/en/sql-reference/window-functions/dense_rank.md @@ -1,12 +1,12 @@ --- slug: /en/sql-reference/window-functions/dense_rank sidebar_label: dense_rank -sidebar_position: 2 +sidebar_position: 7 --- # dense_rank -This window function ranks the current row within its partition without gaps. In other words, if the value of any new row encountered is equal to the value of one of the previous rows then it will receive the next successive rank without any gaps in ranking. +Ranks the current row within its partition without gaps. In other words, if the value of any new row encountered is equal to the value of one of the previous rows then it will receive the next successive rank without any gaps in ranking. The [rank](./rank.md) function provides the same behaviour, but with gaps in ranking. diff --git a/docs/en/sql-reference/window-functions/lagInFrame.md b/docs/en/sql-reference/window-functions/lagInFrame.md index b67cf252283..049e095c10f 100644 --- a/docs/en/sql-reference/window-functions/lagInFrame.md +++ b/docs/en/sql-reference/window-functions/lagInFrame.md @@ -1,12 +1,12 @@ --- slug: /en/sql-reference/window-functions/lagInFrame sidebar_label: lagInFrame -sidebar_position: 3 +sidebar_position: 8 --- # lagInFrame -Return a value evaluated at the row that is at a specified physical offset before the current row within the ordered frame. The offset parameter, if not specified, defaults to 1, meaning it will fetch the value from the next row. If the calculated row exceeds the boundaries of the window frame, the specified default value is returned. +Returns a value evaluated at the row that is at a specified physical offset row before the current row within the ordered frame. **Syntax** diff --git a/docs/en/sql-reference/window-functions/last_value.md b/docs/en/sql-reference/window-functions/last_value.md index 098ee81ceb3..99b7ca4f75a 100644 --- a/docs/en/sql-reference/window-functions/last_value.md +++ b/docs/en/sql-reference/window-functions/last_value.md @@ -1,17 +1,17 @@ --- -slug: /en/sql-reference/window-functions/lagInFrame -sidebar_label: lagInFrame +slug: /en/sql-reference/window-functions/last_value +sidebar_label: last_value sidebar_position: 4 --- -# first_value +# last_value -Return the last non-NULL value evaluated within its ordered frame. +Returns the last non-NULL value evaluated within its ordered frame. **Syntax** ```sql -first_value (column_name) +last_value (column_name) OVER ([[PARTITION BY grouping_column] [ORDER BY sorting_column] [ROWS or RANGE expression_to_bound_rows_withing_the_group]] | [window_name]) FROM table_name diff --git a/docs/en/sql-reference/window-functions/leadInFrame.md b/docs/en/sql-reference/window-functions/leadInFrame.md index 33f69c0dcae..fc1b92cc266 100644 --- a/docs/en/sql-reference/window-functions/leadInFrame.md +++ b/docs/en/sql-reference/window-functions/leadInFrame.md @@ -1,12 +1,12 @@ --- slug: /en/sql-reference/window-functions/leadInFrame sidebar_label: leadInFrame -sidebar_position: 5 +sidebar_position: 9 --- # leadInFrame -Return a value evaluated at the row that is offset rows after the current row within the ordered frame. +Returns a value evaluated at the row that is offset rows after the current row within the ordered frame. **Syntax** diff --git a/docs/en/sql-reference/window-functions/nth_value.md b/docs/en/sql-reference/window-functions/nth_value.md index 5c430707009..aa5baf651a8 100644 --- a/docs/en/sql-reference/window-functions/nth_value.md +++ b/docs/en/sql-reference/window-functions/nth_value.md @@ -1,14 +1,12 @@ --- -slug: /en/sql-reference/window-functions/leadInFrame -sidebar_label: leadInFrame -sidebar_position: 6 +slug: /en/sql-reference/window-functions/nth_value +sidebar_label: nth_value +sidebar_position: 5 --- # nth_value -Return the first non-NULL value evaluated against the nth row (offset) in its ordered frame. - -The [dense_rank](./dense_rank.md) function provides the same behaviour but without gaps in ranking. +Returns the first non-NULL value evaluated against the nth row (offset) in its ordered frame. **Syntax** diff --git a/docs/en/sql-reference/window-functions/rank.md b/docs/en/sql-reference/window-functions/rank.md index d7ed8d79c35..dff5e154151 100644 --- a/docs/en/sql-reference/window-functions/rank.md +++ b/docs/en/sql-reference/window-functions/rank.md @@ -1,12 +1,12 @@ --- slug: /en/sql-reference/window-functions/rank sidebar_label: rank -sidebar_position: 7 +sidebar_position: 6 --- # rank -This window function ranks the current row within its partition with gaps. In other words, if the value of any row it encounters is equal to the value of a previous row then it will receive the same rank as that previous row. +Ranks the current row within its partition with gaps. In other words, if the value of any row it encounters is equal to the value of a previous row then it will receive the same rank as that previous row. The rank of the next row is then equal to the rank of the previous row plus a gap equal to the number of times the previous rank was given. The [dense_rank](./dense_rank.md) function provides the same behaviour but without gaps in ranking. diff --git a/docs/en/sql-reference/window-functions/row_number.md b/docs/en/sql-reference/window-functions/row_number.md index 485ca355f12..f1c331f89a3 100644 --- a/docs/en/sql-reference/window-functions/row_number.md +++ b/docs/en/sql-reference/window-functions/row_number.md @@ -1,12 +1,12 @@ --- slug: /en/sql-reference/window-functions/row_number sidebar_label: row_number -sidebar_position: 8 +sidebar_position: 2 --- # row_number -Numbers the current row within its partition starting from 1 +Numbers the current row within its partition starting from 1. **Syntax** From 41633cabb2e055a42db9e8899947358111470cf3 Mon Sep 17 00:00:00 2001 From: Blargian Date: Wed, 10 Jul 2024 22:16:03 +0200 Subject: [PATCH 091/182] Update first_value, last_value with possibility to use RESPECT NULLS --- docs/en/sql-reference/window-functions/first_value.md | 11 +++++++++-- docs/en/sql-reference/window-functions/index.md | 4 ++-- docs/en/sql-reference/window-functions/last_value.md | 11 +++++++++-- 3 files changed, 20 insertions(+), 6 deletions(-) diff --git a/docs/en/sql-reference/window-functions/first_value.md b/docs/en/sql-reference/window-functions/first_value.md index 575a6fc3f48..17ca1cacda8 100644 --- a/docs/en/sql-reference/window-functions/first_value.md +++ b/docs/en/sql-reference/window-functions/first_value.md @@ -6,18 +6,25 @@ sidebar_position: 3 # first_value -Returns the first non-NULL value evaluated within its ordered frame. +Returns the first value evaluated within its ordered frame. By default, NULL arguments are skipped, however the `RESPECT NULLS` modifier can be used to override this behaviour. **Syntax** ```sql -first_value (column_name) +first_value (column_name) [RESPECT NULLS] OVER ([[PARTITION BY grouping_column] [ORDER BY sorting_column] [ROWS or RANGE expression_to_bound_rows_withing_the_group]] | [window_name]) FROM table_name WINDOW window_name as ([[PARTITION BY grouping_column] [ORDER BY sorting_column]) ``` +Alias: `any`. + +:::note +Using the optional modifier `RESPECT NULLS` after `first_value(column_name)` will ensure that `NULL` arguments are not skipped. +See [NULL processing](../aggregate-functions/index.md/#null-processing) for more information. +::: + For more detail on window function syntax see: [Window Functions - Syntax](./index.md/#syntax). **Returned value** diff --git a/docs/en/sql-reference/window-functions/index.md b/docs/en/sql-reference/window-functions/index.md index 712b99992ea..0c3e2ea1cb6 100644 --- a/docs/en/sql-reference/window-functions/index.md +++ b/docs/en/sql-reference/window-functions/index.md @@ -77,8 +77,8 @@ WINDOW window_name as ([[PARTITION BY grouping_column] [ORDER BY sorting_column] These functions can be used only as a window function. - [`row_number()`](./row_number.md) - Number the current row within its partition starting from 1. -- [`first_value(x)`](./first_value.md) - Return the first non-NULL value evaluated within its ordered frame. -- [`last_value(x)`](./last_value.md) - Return the last non-NULL value evaluated within its ordered frame. +- [`first_value(x)`](./first_value.md) - Return the first value evaluated within its ordered frame. +- [`last_value(x)`](./last_value.md) - Return the last value evaluated within its ordered frame. - [`nth_value(x, offset)`](./nth_value.md) - Return the first non-NULL value evaluated against the nth row (offset) in its ordered frame. - [`rank()`](./rank.md) - Rank the current row within its partition with gaps. - [`dense_rank()`](./dense_rank.md) - Rank the current row within its partition without gaps. diff --git a/docs/en/sql-reference/window-functions/last_value.md b/docs/en/sql-reference/window-functions/last_value.md index 99b7ca4f75a..9d1ce81cc57 100644 --- a/docs/en/sql-reference/window-functions/last_value.md +++ b/docs/en/sql-reference/window-functions/last_value.md @@ -6,18 +6,25 @@ sidebar_position: 4 # last_value -Returns the last non-NULL value evaluated within its ordered frame. +Returns the last value evaluated within its ordered frame. By default, NULL arguments are skipped, however the `RESPECT NULLS` modifier can be used to override this behaviour. **Syntax** ```sql -last_value (column_name) +last_value (column_name) [RESPECT NULLS] OVER ([[PARTITION BY grouping_column] [ORDER BY sorting_column] [ROWS or RANGE expression_to_bound_rows_withing_the_group]] | [window_name]) FROM table_name WINDOW window_name as ([[PARTITION BY grouping_column] [ORDER BY sorting_column]) ``` +Alias: `anyLast`. + +:::note +Using the optional modifier `RESPECT NULLS` after `first_value(column_name)` will ensure that `NULL` arguments are not skipped. +See [NULL processing](../aggregate-functions/index.md/#null-processing) for more information. +::: + For more detail on window function syntax see: [Window Functions - Syntax](./index.md/#syntax). **Returned value** From 22706b89b9927045e463286c53d82c6369f68bf2 Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Thu, 11 Jul 2024 00:10:59 +0200 Subject: [PATCH 092/182] Try to fix links in docs --- docs/en/sql-reference/data-types/dynamic.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/en/sql-reference/data-types/dynamic.md b/docs/en/sql-reference/data-types/dynamic.md index e50f7e6ddaa..8b3c7479f4f 100644 --- a/docs/en/sql-reference/data-types/dynamic.md +++ b/docs/en/sql-reference/data-types/dynamic.md @@ -529,10 +529,10 @@ SELECT JSONExtractKeysAndValues('{"a" : 42, "b" : "Hello", "c" : [1,2,3]}', 'Var ### Binary output format -In [RowBinary](../../interfaces/formats.md#rowbinary-rowbinary) format values of `Dynamic` type are serialized in the following format: +In [RowBinary](/docs/en/interfaces/formats.md#rowbinary-rowbinary) format values of `Dynamic` type are serialized in the following format: ```text ``` -See the [data types binary encoding specification](../../sql-reference/data-types/data-types-binary-encoding.md) +See the [data types binary encoding specification](/docs/en/sql-reference/data-types/data-types-binary-encoding.md) From a9a227ddc614834eb2943a20bf0f2fa722549fec Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Thu, 11 Jul 2024 07:53:54 +0200 Subject: [PATCH 093/182] Fix "Sending a batch of X files to Y (0.00 rows, 0.00 B bytes)." in case of batch restoring Previously it was always zeros. Signed-off-by: Azat Khuzhin --- .../Distributed/DistributedAsyncInsertBatch.cpp | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/Storages/Distributed/DistributedAsyncInsertBatch.cpp b/src/Storages/Distributed/DistributedAsyncInsertBatch.cpp index 06d4c185840..e1facec5b40 100644 --- a/src/Storages/Distributed/DistributedAsyncInsertBatch.cpp +++ b/src/Storages/Distributed/DistributedAsyncInsertBatch.cpp @@ -196,6 +196,16 @@ void DistributedAsyncInsertBatch::readText(ReadBuffer & in) UInt64 idx; in >> idx >> "\n"; files.push_back(std::filesystem::absolute(fmt::format("{}/{}.bin", parent.path, idx)).string()); + + ReadBufferFromFile header_buffer(files.back()); + const DistributedAsyncInsertHeader & header = DistributedAsyncInsertHeader::read(header_buffer, parent.log); + total_bytes += total_bytes; + + if (header.rows) + { + total_rows += header.rows; + total_bytes += header.bytes; + } } recovered = true; From 37c66c8976da45e1984e22e55a3932e02e7937cc Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Thu, 11 Jul 2024 08:04:44 +0200 Subject: [PATCH 094/182] Fix 03030_system_flush_distributed_settings flakiness Signed-off-by: Azat Khuzhin --- .../03030_system_flush_distributed_settings.reference | 2 +- .../03030_system_flush_distributed_settings.sql | 8 +++++--- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/tests/queries/0_stateless/03030_system_flush_distributed_settings.reference b/tests/queries/0_stateless/03030_system_flush_distributed_settings.reference index 5caff40c4a0..3a05c8b3ee8 100644 --- a/tests/queries/0_stateless/03030_system_flush_distributed_settings.reference +++ b/tests/queries/0_stateless/03030_system_flush_distributed_settings.reference @@ -1 +1 @@ -10000 +30000 diff --git a/tests/queries/0_stateless/03030_system_flush_distributed_settings.sql b/tests/queries/0_stateless/03030_system_flush_distributed_settings.sql index ac64135b593..fac673a4fe4 100644 --- a/tests/queries/0_stateless/03030_system_flush_distributed_settings.sql +++ b/tests/queries/0_stateless/03030_system_flush_distributed_settings.sql @@ -6,15 +6,17 @@ drop table if exists dist_out; create table ephemeral (key Int, value Int) engine=Null(); create table dist_in as ephemeral engine=Distributed(test_shard_localhost, currentDatabase(), ephemeral, key) settings background_insert_batch=1; -create table data (key Int, uniq_values Int) engine=Memory(); -create materialized view mv to data as select key, uniqExact(value) uniq_values from ephemeral group by key; +create table data (key Int, uniq_values Int) engine=TinyLog(); +create materialized view mv to data as select key, uniqExact(value::String) uniq_values from ephemeral group by key; system stop distributed sends dist_in; create table dist_out as data engine=Distributed(test_shard_localhost, currentDatabase(), data); set prefer_localhost_replica=0; SET optimize_trivial_insert_select = 1; -insert into dist_in select number/100, number from system.numbers limit 1e6 settings max_memory_usage='20Mi'; +-- due to pushing to MV with aggregation the query needs ~300MiB +-- but it will be done in background via "system flush distributed" +insert into dist_in select number/100, number from system.numbers limit 3e6 settings max_block_size=3e6, max_memory_usage='100Mi'; system flush distributed dist_in; -- { serverError MEMORY_LIMIT_EXCEEDED } system flush distributed dist_in settings max_memory_usage=0; select count() from dist_out; From 3c52651b5580034e5d42433320c0d3de70a15b4e Mon Sep 17 00:00:00 2001 From: Ilya Golshtein Date: Wed, 10 Jul 2024 11:58:45 +0000 Subject: [PATCH 095/182] s3_off_fix: initial (proper ifdef for registerStorageAzureQueue) --- src/Storages/registerStorages.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/Storages/registerStorages.cpp b/src/Storages/registerStorages.cpp index 9f849052071..adc1074b1fe 100644 --- a/src/Storages/registerStorages.cpp +++ b/src/Storages/registerStorages.cpp @@ -35,7 +35,6 @@ void registerStorageFuzzJSON(StorageFactory & factory); void registerStorageS3(StorageFactory & factory); void registerStorageHudi(StorageFactory & factory); void registerStorageS3Queue(StorageFactory & factory); -void registerStorageAzureQueue(StorageFactory & factory); #if USE_PARQUET void registerStorageDeltaLake(StorageFactory & factory); @@ -45,6 +44,10 @@ void registerStorageIceberg(StorageFactory & factory); #endif #endif +#if USE_AZURE_BLOB_STORAGE +void registerStorageAzureQueue(StorageFactory & factory); +#endif + #if USE_HDFS #if USE_HIVE void registerStorageHive(StorageFactory & factory); From 321a58c51931e8d39b8ba111108fed20ba0541cd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A1nos=20Benjamin=20Antal?= Date: Thu, 11 Jul 2024 08:33:10 +0000 Subject: [PATCH 096/182] Fix relative path to `mc` --- docker/test/stateful/run.sh | 2 +- docker/test/stateless/run.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docker/test/stateful/run.sh b/docker/test/stateful/run.sh index ce637b9a146..2efbed6aa4f 100755 --- a/docker/test/stateful/run.sh +++ b/docker/test/stateful/run.sh @@ -22,7 +22,7 @@ ln -s /usr/share/clickhouse-test/clickhouse-test /usr/bin/clickhouse-test azurite-blob --blobHost 0.0.0.0 --blobPort 10000 --silent --inMemoryPersistence & ./setup_minio.sh stateful -mc admin trace clickminio > /test_output/rubbish.log & +./mc admin trace clickminio > /test_output/rubbish.log & MC_ADMIN_PID=$! config_logs_export_cluster /etc/clickhouse-server/config.d/system_logs_export.yaml diff --git a/docker/test/stateless/run.sh b/docker/test/stateless/run.sh index a01abe5929f..d47728cddb9 100755 --- a/docker/test/stateless/run.sh +++ b/docker/test/stateless/run.sh @@ -46,7 +46,7 @@ source /utils.lib /usr/share/clickhouse-test/config/install.sh ./setup_minio.sh stateless -mc admin trace clickminio > /test_output/rubbish.log & +m./c admin trace clickminio > /test_output/rubbish.log & MC_ADMIN_PID=$! ./setup_hdfs_minicluster.sh From 35850da12e0dc775f117ed060f839671306cc26b Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Thu, 11 Jul 2024 11:03:03 +0200 Subject: [PATCH 097/182] Update dynamic.md --- docs/en/sql-reference/data-types/dynamic.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/sql-reference/data-types/dynamic.md b/docs/en/sql-reference/data-types/dynamic.md index 8b3c7479f4f..b5781a7dd62 100644 --- a/docs/en/sql-reference/data-types/dynamic.md +++ b/docs/en/sql-reference/data-types/dynamic.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/data-types/dynamic -sidebar_position: 56 +sidebar_position: 62 sidebar_label: Dynamic --- From 595bce4945cef4ef1822e610e5352a13e654e45f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Thu, 11 Jul 2024 12:06:04 +0200 Subject: [PATCH 098/182] Update docs/en/sql-reference/window-functions/first_value.md --- docs/en/sql-reference/window-functions/first_value.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/sql-reference/window-functions/first_value.md b/docs/en/sql-reference/window-functions/first_value.md index 17ca1cacda8..4f8a9d393b1 100644 --- a/docs/en/sql-reference/window-functions/first_value.md +++ b/docs/en/sql-reference/window-functions/first_value.md @@ -11,7 +11,7 @@ Returns the first value evaluated within its ordered frame. By default, NULL arg **Syntax** ```sql -first_value (column_name) [RESPECT NULLS] +first_value (column_name) [[RESPECT NULLS] | [IGNORE NULLS]] OVER ([[PARTITION BY grouping_column] [ORDER BY sorting_column] [ROWS or RANGE expression_to_bound_rows_withing_the_group]] | [window_name]) FROM table_name From 366ed8701e0e60bc6ca54258663987d3342d7763 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Thu, 11 Jul 2024 12:06:09 +0200 Subject: [PATCH 099/182] Update docs/en/sql-reference/window-functions/first_value.md --- docs/en/sql-reference/window-functions/first_value.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/sql-reference/window-functions/first_value.md b/docs/en/sql-reference/window-functions/first_value.md index 4f8a9d393b1..30c3b1f99dc 100644 --- a/docs/en/sql-reference/window-functions/first_value.md +++ b/docs/en/sql-reference/window-functions/first_value.md @@ -29,7 +29,7 @@ For more detail on window function syntax see: [Window Functions - Syntax](./ind **Returned value** -- The first non-NULL value evaluated within its ordered frame. +- The first value evaluated within its ordered frame. **Example** From 3d96bf298ceaf030c3e863ea2fabd0a6ebe90e4c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Thu, 11 Jul 2024 12:06:13 +0200 Subject: [PATCH 100/182] Update docs/en/sql-reference/window-functions/last_value.md --- docs/en/sql-reference/window-functions/last_value.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/sql-reference/window-functions/last_value.md b/docs/en/sql-reference/window-functions/last_value.md index 9d1ce81cc57..34170226cdd 100644 --- a/docs/en/sql-reference/window-functions/last_value.md +++ b/docs/en/sql-reference/window-functions/last_value.md @@ -29,7 +29,7 @@ For more detail on window function syntax see: [Window Functions - Syntax](./ind **Returned value** -- The last non-NULL value evaluated within its ordered frame. +- The last value evaluated within its ordered frame. **Example** From 330082c3d4d1ff075e66f2eaf72c1515ad64ffdb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Thu, 11 Jul 2024 12:06:33 +0200 Subject: [PATCH 101/182] Update docs/en/sql-reference/window-functions/last_value.md --- docs/en/sql-reference/window-functions/last_value.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/sql-reference/window-functions/last_value.md b/docs/en/sql-reference/window-functions/last_value.md index 34170226cdd..dd7f5fa078a 100644 --- a/docs/en/sql-reference/window-functions/last_value.md +++ b/docs/en/sql-reference/window-functions/last_value.md @@ -11,7 +11,7 @@ Returns the last value evaluated within its ordered frame. By default, NULL argu **Syntax** ```sql -last_value (column_name) [RESPECT NULLS] +last_value (column_name) [[RESPECT NULLS] | [IGNORE NULLS]] OVER ([[PARTITION BY grouping_column] [ORDER BY sorting_column] [ROWS or RANGE expression_to_bound_rows_withing_the_group]] | [window_name]) FROM table_name From 050240d89071f750516f2a38fea1909d58095aaa Mon Sep 17 00:00:00 2001 From: kssenii Date: Thu, 11 Jul 2024 12:08:16 +0200 Subject: [PATCH 102/182] Review fix --- src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.cpp b/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.cpp index d37bffc42c4..c896a760597 100644 --- a/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.cpp +++ b/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.cpp @@ -635,7 +635,7 @@ struct DeltaLakeMetadataImpl } const auto value = tuple[1].safeGet(); auto field = getFieldValue(value, name_and_type->type); - current_partition_columns.emplace_back(*name_and_type, field); + current_partition_columns.emplace_back(std::move(name_and_type.value()), std::move(field)); LOG_TEST(log, "Partition {} value is {} (for {})", partition_name, value, filename); } From 80ceb63f5f194c4c99aa7502f64f7770933ae18f Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Thu, 11 Jul 2024 10:45:36 +0000 Subject: [PATCH 103/182] Fixing build. --- src/Common/Exception.cpp | 2 +- src/Common/Exception.h | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/src/Common/Exception.cpp b/src/Common/Exception.cpp index 07bda6a75be..09ba664baef 100644 --- a/src/Common/Exception.cpp +++ b/src/Common/Exception.cpp @@ -38,7 +38,7 @@ namespace ErrorCodes extern const int CANNOT_MREMAP; } -[[noreturn]] void abortOnFailedAssertion(const String & description, const Exception::FramePointers * trace = nullptr) +void abortOnFailedAssertion(const String & description, const Exception::FramePointers * trace) { auto & logger = Poco::Logger::root(); LOG_FATAL(&logger, "Logical error: '{}'.", description); diff --git a/src/Common/Exception.h b/src/Common/Exception.h index 87ef7101cdc..68cc305e67e 100644 --- a/src/Common/Exception.h +++ b/src/Common/Exception.h @@ -25,8 +25,6 @@ namespace DB class AtomicLogger; -[[noreturn]] void abortOnFailedAssertion(const String & description); - /// This flag can be set for testing purposes - to check that no exceptions are thrown. extern bool terminate_on_any_exception; @@ -167,6 +165,7 @@ protected: mutable std::vector capture_thread_frame_pointers; }; +[[noreturn]] void abortOnFailedAssertion(const String & description, const Exception::FramePointers * trace = nullptr); std::string getExceptionStackTraceString(const std::exception & e); std::string getExceptionStackTraceString(std::exception_ptr e); From 1bc02fb71d6a27a2bc83484dec667edb48b0ab84 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Thu, 11 Jul 2024 12:39:53 +0000 Subject: [PATCH 104/182] Ignore subquery for IN in DDLLoadingDependencyVisitor --- src/Databases/DDLLoadingDependencyVisitor.cpp | 8 +++++++ .../02841_not_ready_set_constraints.reference | 1 + .../02841_not_ready_set_constraints.sql | 24 +++++++++++++++++++ 3 files changed, 33 insertions(+) diff --git a/src/Databases/DDLLoadingDependencyVisitor.cpp b/src/Databases/DDLLoadingDependencyVisitor.cpp index 40234abb20f..67bce915168 100644 --- a/src/Databases/DDLLoadingDependencyVisitor.cpp +++ b/src/Databases/DDLLoadingDependencyVisitor.cpp @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -211,6 +212,13 @@ void DDLLoadingDependencyVisitor::extractTableNameFromArgument(const ASTFunction qualified_name.database = table_identifier->getDatabaseName(); qualified_name.table = table_identifier->shortName(); } + else if (arg->as()) + { + /// Allow IN subquery. + /// Do not add tables from the subquery into dependencies, + /// because CREATE will succeed anyway. + return; + } else { assert(false); diff --git a/tests/queries/0_stateless/02841_not_ready_set_constraints.reference b/tests/queries/0_stateless/02841_not_ready_set_constraints.reference index d81cc0710eb..daaac9e3030 100644 --- a/tests/queries/0_stateless/02841_not_ready_set_constraints.reference +++ b/tests/queries/0_stateless/02841_not_ready_set_constraints.reference @@ -1 +1,2 @@ 42 +42 diff --git a/tests/queries/0_stateless/02841_not_ready_set_constraints.sql b/tests/queries/0_stateless/02841_not_ready_set_constraints.sql index ecdf4d50635..274940f50a3 100644 --- a/tests/queries/0_stateless/02841_not_ready_set_constraints.sql +++ b/tests/queries/0_stateless/02841_not_ready_set_constraints.sql @@ -17,3 +17,27 @@ ENGINE = MergeTree ORDER BY conversation; INSERT INTO t2(conversation) VALUES (42); select * from t2; + +drop table t1; + +INSERT INTO t2(conversation) VALUES (42); -- { serverError UNKNOWN_TABLE } + +drop table t2; + +CREATE TABLE t2 ( + `conversation` UInt64, + CONSTRAINT constraint_conversation CHECK conversation IN (SELECT id FROM t1) +) +ENGINE = MergeTree ORDER BY conversation; + +INSERT INTO t2(conversation) VALUES (42); -- { serverError UNKNOWN_TABLE } + +CREATE TABLE t1 ( + `id` UInt64 +) +ENGINE = MergeTree ORDER BY id; + +INSERT INTO t1(id) VALUES (42); + +INSERT INTO t2(conversation) VALUES (42); +select * from t2; From 355a56d1b0025b6ba85c7b63a4ce7356d5de792c Mon Sep 17 00:00:00 2001 From: Vitaly Baranov Date: Mon, 8 Jul 2024 15:21:11 +0200 Subject: [PATCH 105/182] Add a stateless test for gRPC protocol. --- docker/test/stateless/Dockerfile | 1 + docker/test/stateless/requirements.txt | 2 + tests/ci/functional_test_check.py | 1 + tests/config/config.d/grpc_protocol.xml | 3 ++ tests/config/install.sh | 1 + .../0_stateless/03203_grpc_protocol.reference | 1 + .../0_stateless/03203_grpc_protocol.sh | 14 +++++ utils/grpc-client/generate_pb2.py | 52 +++++++++++++++++++ utils/grpc-client/pb2/generate.py | 29 ----------- 9 files changed, 75 insertions(+), 29 deletions(-) create mode 100644 tests/config/config.d/grpc_protocol.xml create mode 100644 tests/queries/0_stateless/03203_grpc_protocol.reference create mode 100755 tests/queries/0_stateless/03203_grpc_protocol.sh create mode 100755 utils/grpc-client/generate_pb2.py delete mode 100755 utils/grpc-client/pb2/generate.py diff --git a/docker/test/stateless/Dockerfile b/docker/test/stateless/Dockerfile index 5a655a3fd2b..a0e5513a3a2 100644 --- a/docker/test/stateless/Dockerfile +++ b/docker/test/stateless/Dockerfile @@ -86,6 +86,7 @@ RUN curl -L --no-verbose -O 'https://archive.apache.org/dist/hadoop/common/hadoo ENV MINIO_ROOT_USER="clickhouse" ENV MINIO_ROOT_PASSWORD="clickhouse" ENV EXPORT_S3_STORAGE_POLICIES=1 +ENV CLICKHOUSE_GRPC_CLIENT="/usr/share/clickhouse-utils/grpc-client/clickhouse-grpc-client.py" RUN npm install -g azurite@3.30.0 \ && npm install -g tslib && npm install -g node diff --git a/docker/test/stateless/requirements.txt b/docker/test/stateless/requirements.txt index 3284107e24e..74860d5fec3 100644 --- a/docker/test/stateless/requirements.txt +++ b/docker/test/stateless/requirements.txt @@ -8,6 +8,7 @@ cryptography==3.4.8 dbus-python==1.2.18 distro==1.7.0 docutils==0.17.1 +grpcio==1.47.0 gyp==0.1 httplib2==0.20.2 idna==3.3 @@ -28,6 +29,7 @@ packaging==24.1 pandas==1.5.3 pip==24.1.1 pipdeptree==2.23.0 +protobuf==4.25.3 pyarrow==15.0.0 pyasn1==0.4.8 PyJWT==2.3.0 diff --git a/tests/ci/functional_test_check.py b/tests/ci/functional_test_check.py index d8e5a7fa27f..c48a5d91bf5 100644 --- a/tests/ci/functional_test_check.py +++ b/tests/ci/functional_test_check.py @@ -106,6 +106,7 @@ def get_run_command( f"docker run --volume={builds_path}:/package_folder " f"{ci_logs_args}" f"--volume={repo_path}/tests:/usr/share/clickhouse-test " + f"--volume={repo_path}/utils/grpc-client:/usr/share/clickhouse-utils/grpc-client " f"{volume_with_broken_test}" f"--volume={result_path}:/test_output " f"--volume={server_log_path}:/var/log/clickhouse-server " diff --git a/tests/config/config.d/grpc_protocol.xml b/tests/config/config.d/grpc_protocol.xml new file mode 100644 index 00000000000..b957618120d --- /dev/null +++ b/tests/config/config.d/grpc_protocol.xml @@ -0,0 +1,3 @@ + + 9100 + diff --git a/tests/config/install.sh b/tests/config/install.sh index 08ee11a7407..9f8730bb91e 100755 --- a/tests/config/install.sh +++ b/tests/config/install.sh @@ -27,6 +27,7 @@ ln -sf $SRC_PATH/config.d/secure_ports.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/clusters.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/graphite.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/graphite_alternative.xml $DEST_SERVER_PATH/config.d/ +ln -sf $SRC_PATH/config.d/grpc_protocol.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/database_atomic.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/max_concurrent_queries.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/merge_tree_settings.xml $DEST_SERVER_PATH/config.d/ diff --git a/tests/queries/0_stateless/03203_grpc_protocol.reference b/tests/queries/0_stateless/03203_grpc_protocol.reference new file mode 100644 index 00000000000..9766475a418 --- /dev/null +++ b/tests/queries/0_stateless/03203_grpc_protocol.reference @@ -0,0 +1 @@ +ok diff --git a/tests/queries/0_stateless/03203_grpc_protocol.sh b/tests/queries/0_stateless/03203_grpc_protocol.sh new file mode 100755 index 00000000000..d51d6382f67 --- /dev/null +++ b/tests/queries/0_stateless/03203_grpc_protocol.sh @@ -0,0 +1,14 @@ +#!/usr/bin/env bash +# Tags: no-fasttest +# Tag no-fasttest: In fasttest, ENABLE_LIBRARIES=0, so the grpc library is not built + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +if [[ -z "$CLICKHOUSE_GRPC_CLIENT" ]]; then + CLICKHOUSE_GRPC_CLIENT="$CURDIR/../../../utils/grpc-client/clickhouse-grpc-client.py" +fi + +# Simple test. +$CLICKHOUSE_GRPC_CLIENT --query "SELECT 'ok'" diff --git a/utils/grpc-client/generate_pb2.py b/utils/grpc-client/generate_pb2.py new file mode 100755 index 00000000000..95a39023ed7 --- /dev/null +++ b/utils/grpc-client/generate_pb2.py @@ -0,0 +1,52 @@ +#!/usr/bin/env python3 + +# This is a helper utility. +# It generates files in the "pb2" folder using the protocol buffer compiler. +# This script must be called manually after any change pf "clickhouse_grpc.proto" + +import grpc_tools # pip3 install grpcio-tools + +import os, shutil, subprocess + + +# Settings. +script_path = os.path.realpath(__file__) +script_name = os.path.basename(script_path) +script_dir = os.path.dirname(script_path) +root_dir = os.path.abspath(os.path.join(script_dir, "../..")) + +grpc_proto_dir = os.path.abspath(os.path.join(root_dir, "src/Server/grpc_protos")) +grpc_proto_filename = "clickhouse_grpc.proto" + +# Files in the "pb2" folder which will be generated by this script. +pb2_filenames = ["clickhouse_grpc_pb2.py", "clickhouse_grpc_pb2_grpc.py"] +pb2_dir = os.path.join(script_dir, "pb2") + + +# Processes the protobuf schema with the protocol buffer compiler and generates the "pb2" folder. +def generate_pb2(): + print(f"Generating files:") + for pb2_filename in pb2_filenames: + print(os.path.join(pb2_dir, pb2_filename)) + + os.makedirs(pb2_dir, exist_ok=True) + + cmd = [ + "python3", + "-m", + "grpc_tools.protoc", + "-I" + grpc_proto_dir, + "--python_out=" + pb2_dir, + "--grpc_python_out=" + pb2_dir, + os.path.join(grpc_proto_dir, grpc_proto_filename), + ] + subprocess.run(cmd) + + for pb2_filename in pb2_filenames: + assert os.path.exists(os.path.join(pb2_dir, pb2_filename)) + print("Done! (generate_pb2)") + + +# MAIN +if __name__ == "__main__": + generate_pb2() diff --git a/utils/grpc-client/pb2/generate.py b/utils/grpc-client/pb2/generate.py deleted file mode 100755 index 2f4b3bf5af7..00000000000 --- a/utils/grpc-client/pb2/generate.py +++ /dev/null @@ -1,29 +0,0 @@ -#!/usr/bin/env python3 - -import grpc_tools # pip3 install grpcio-tools - -import os -import subprocess - - -script_dir = os.path.dirname(os.path.realpath(__file__)) -dest_dir = script_dir -src_dir = os.path.abspath(os.path.join(script_dir, "../../../src/Server/grpc_protos")) -src_filename = "clickhouse_grpc.proto" - - -def generate(): - cmd = [ - "python3", - "-m", - "grpc_tools.protoc", - "-I" + src_dir, - "--python_out=" + dest_dir, - "--grpc_python_out=" + dest_dir, - os.path.join(src_dir, src_filename), - ] - subprocess.run(cmd) - - -if __name__ == "__main__": - generate() From 3e9f6265195ee6952d4963409de3bd4a1f344730 Mon Sep 17 00:00:00 2001 From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> Date: Thu, 11 Jul 2024 15:59:34 +0200 Subject: [PATCH 106/182] Update test.py --- tests/integration/test_storage_rabbitmq/test.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/integration/test_storage_rabbitmq/test.py b/tests/integration/test_storage_rabbitmq/test.py index 3240039ee81..f885a3507ac 100644 --- a/tests/integration/test_storage_rabbitmq/test.py +++ b/tests/integration/test_storage_rabbitmq/test.py @@ -78,13 +78,13 @@ def wait_rabbitmq_to_start(rabbitmq_docker_id, cookie, timeout=180): def kill_rabbitmq(rabbitmq_id): p = subprocess.Popen(("docker", "stop", rabbitmq_id), stdout=subprocess.PIPE) - p.communicate() + p.wait(timeout=60) return p.returncode == 0 def revive_rabbitmq(rabbitmq_id, cookie): p = subprocess.Popen(("docker", "start", rabbitmq_id), stdout=subprocess.PIPE) - p.communicate() + p.wait(timeout=60) wait_rabbitmq_to_start(rabbitmq_id, cookie) From cdb3b3f2aaec64b8d5b3ec1ee6e38545e3a2b186 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Thu, 11 Jul 2024 16:42:29 +0200 Subject: [PATCH 107/182] Add query elapsed time for non-default format in play UI Signed-off-by: Azat Khuzhin --- programs/server/play.html | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/programs/server/play.html b/programs/server/play.html index 507a96382a7..b5bcc687c27 100644 --- a/programs/server/play.html +++ b/programs/server/play.html @@ -516,6 +516,9 @@ /// Save query in history only if it is different. let previous_query = ''; + /// Start of the last query + let last_query_start = 0; + const current_url = new URL(window.location); const opened_locally = location.protocol == 'file:'; @@ -567,6 +570,8 @@ '&password=' + encodeURIComponent(password) } + last_query_start = performance.now(); + const xhr = new XMLHttpRequest; xhr.open('POST', url, true); @@ -579,7 +584,8 @@ if (posted_request_num != request_num) { return; } else if (this.readyState === XMLHttpRequest.DONE) { - renderResponse(this.status, this.response); + const elapsed_msec = performance.now() - last_query_start; + renderResponse(this.status, this.response, elapsed_msec); /// The query is saved in browser history (in state JSON object) /// as well as in URL fragment identifier. @@ -587,7 +593,8 @@ const state = { query: query, status: this.status, - response: this.response.length > 100000 ? null : this.response /// Lower than the browser's limit. + response: this.response.length > 100000 ? null : this.response, /// Lower than the browser's limit. + elapsed_msec: elapsed_msec, }; const title = "ClickHouse Query: " + query; @@ -617,7 +624,7 @@ xhr.send(query); } - function renderResponse(status, response) { + function renderResponse(status, response, elapsed_msec) { document.getElementById('hourglass').style.display = 'none'; if (status === 200) { @@ -632,6 +639,7 @@ renderChart(json); } else { renderUnparsedResult(response); + stats.innerText = `Elapsed: ${(elapsed_msec/1000).toFixed(3)} sec.`; } document.getElementById('check-mark').style.display = 'inline'; } else { @@ -651,7 +659,7 @@ clear(); return; } - renderResponse(event.state.status, event.state.response); + renderResponse(event.state.status, event.state.response, event.state.elapsed_msec); }; if (window.location.hash) { From 5f302e539dcf728174b30819594c69b4cc85543b Mon Sep 17 00:00:00 2001 From: Vitaly Baranov Date: Tue, 9 Jul 2024 22:49:41 +0200 Subject: [PATCH 108/182] Fix error reporting while copying to Azure Blob Storage. --- .../copyAzureBlobStorageFile.cpp | 42 +++++++++++-------- 1 file changed, 25 insertions(+), 17 deletions(-) diff --git a/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp b/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp index 128df415197..c10a7cd017a 100644 --- a/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp +++ b/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp @@ -49,7 +49,7 @@ namespace const String & dest_blob_, std::shared_ptr settings_, ThreadPoolCallbackRunnerUnsafe schedule_, - const Poco::Logger * log_) + LoggerPtr log_) : create_read_buffer(create_read_buffer_) , client(client_) , offset (offset_) @@ -74,7 +74,7 @@ namespace const String & dest_blob; std::shared_ptr settings; ThreadPoolCallbackRunnerUnsafe schedule; - const Poco::Logger * log; + const LoggerPtr log; size_t max_single_part_upload_size; struct UploadPartTask @@ -83,7 +83,6 @@ namespace size_t part_size; std::vector block_ids; bool is_finished = false; - std::exception_ptr exception; }; size_t normal_part_size; @@ -92,6 +91,7 @@ namespace std::list TSA_GUARDED_BY(bg_tasks_mutex) bg_tasks; int num_added_bg_tasks TSA_GUARDED_BY(bg_tasks_mutex) = 0; int num_finished_bg_tasks TSA_GUARDED_BY(bg_tasks_mutex) = 0; + std::exception_ptr bg_exception TSA_GUARDED_BY(bg_tasks_mutex); std::mutex bg_tasks_mutex; std::condition_variable bg_tasks_condvar; @@ -186,7 +186,7 @@ namespace } catch (...) { - tryLogCurrentException(__PRETTY_FUNCTION__); + tryLogCurrentException(log, fmt::format("While performing multipart upload of blob {} in container {}", dest_blob, dest_container_for_logging)); waitForAllBackgroundTasks(); throw; } @@ -242,7 +242,12 @@ namespace } catch (...) { - task->exception = std::current_exception(); + std::lock_guard lock(bg_tasks_mutex); + if (!bg_exception) + { + tryLogCurrentException(log, "While writing part"); + bg_exception = std::current_exception(); /// The exception will be rethrown after all background tasks stop working. + } } task_finish_notify(); }, Priority{}); @@ -299,13 +304,13 @@ namespace /// Suppress warnings because bg_tasks_mutex is actually hold, but tsa annotations do not understand std::unique_lock bg_tasks_condvar.wait(lock, [this]() {return TSA_SUPPRESS_WARNING_FOR_READ(num_added_bg_tasks) == TSA_SUPPRESS_WARNING_FOR_READ(num_finished_bg_tasks); }); - auto & tasks = TSA_SUPPRESS_WARNING_FOR_WRITE(bg_tasks); - for (auto & task : tasks) - { - if (task.exception) - std::rethrow_exception(task.exception); + auto exception = TSA_SUPPRESS_WARNING_FOR_READ(bg_exception); + if (exception) + std::rethrow_exception(exception); + + const auto & tasks = TSA_SUPPRESS_WARNING_FOR_READ(bg_tasks); + for (const auto & task : tasks) block_ids.insert(block_ids.end(),task.block_ids.begin(), task.block_ids.end()); - } } }; } @@ -321,7 +326,8 @@ void copyDataToAzureBlobStorageFile( std::shared_ptr settings, ThreadPoolCallbackRunnerUnsafe schedule) { - UploadHelper helper{create_read_buffer, dest_client, offset, size, dest_container_for_logging, dest_blob, settings, schedule, &Poco::Logger::get("copyDataToAzureBlobStorageFile")}; + auto log = getLogger("copyDataToAzureBlobStorageFile"); + UploadHelper helper{create_read_buffer, dest_client, offset, size, dest_container_for_logging, dest_blob, settings, schedule, log}; helper.performCopy(); } @@ -339,9 +345,11 @@ void copyAzureBlobStorageFile( const ReadSettings & read_settings, ThreadPoolCallbackRunnerUnsafe schedule) { + auto log = getLogger("copyAzureBlobStorageFile"); + if (settings->use_native_copy) { - LOG_TRACE(getLogger("copyAzureBlobStorageFile"), "Copying Blob: {} from Container: {} using native copy", src_container_for_logging, src_blob); + LOG_TRACE(log, "Copying Blob: {} from Container: {} using native copy", src_container_for_logging, src_blob); ProfileEvents::increment(ProfileEvents::AzureCopyObject); if (dest_client->GetClickhouseOptions().IsClientForDisk) ProfileEvents::increment(ProfileEvents::DiskAzureCopyObject); @@ -352,7 +360,7 @@ void copyAzureBlobStorageFile( if (size < settings->max_single_part_copy_size) { - LOG_TRACE(getLogger("copyAzureBlobStorageFile"), "Copy blob sync {} -> {}", src_blob, dest_blob); + LOG_TRACE(log, "Copy blob sync {} -> {}", src_blob, dest_blob); block_blob_client_dest.CopyFromUri(source_uri); } else @@ -368,7 +376,7 @@ void copyAzureBlobStorageFile( if (copy_status.HasValue() && copy_status.Value() == Azure::Storage::Blobs::Models::CopyStatus::Success) { - LOG_TRACE(getLogger("copyAzureBlobStorageFile"), "Copy of {} to {} finished", properties_model.CopySource.Value(), dest_blob); + LOG_TRACE(log, "Copy of {} to {} finished", properties_model.CopySource.Value(), dest_blob); } else { @@ -382,14 +390,14 @@ void copyAzureBlobStorageFile( } else { - LOG_TRACE(&Poco::Logger::get("copyAzureBlobStorageFile"), "Reading from Container: {}, Blob: {}", src_container_for_logging, src_blob); + LOG_TRACE(log, "Reading from Container: {}, Blob: {}", src_container_for_logging, src_blob); auto create_read_buffer = [&] { return std::make_unique( src_client, src_blob, read_settings, settings->max_single_read_retries, settings->max_single_download_retries); }; - UploadHelper helper{create_read_buffer, dest_client, offset, size, dest_container_for_logging, dest_blob, settings, schedule, &Poco::Logger::get("copyAzureBlobStorageFile")}; + UploadHelper helper{create_read_buffer, dest_client, offset, size, dest_container_for_logging, dest_blob, settings, schedule, log}; helper.performCopy(); } } From 262c1f9e77add2349c76777d9ddabfe06895c6d3 Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Thu, 11 Jul 2024 17:03:13 +0200 Subject: [PATCH 109/182] Update dynamic.md --- docs/en/sql-reference/data-types/dynamic.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/sql-reference/data-types/dynamic.md b/docs/en/sql-reference/data-types/dynamic.md index b5781a7dd62..d0116e7158c 100644 --- a/docs/en/sql-reference/data-types/dynamic.md +++ b/docs/en/sql-reference/data-types/dynamic.md @@ -529,7 +529,7 @@ SELECT JSONExtractKeysAndValues('{"a" : 42, "b" : "Hello", "c" : [1,2,3]}', 'Var ### Binary output format -In [RowBinary](/docs/en/interfaces/formats.md#rowbinary-rowbinary) format values of `Dynamic` type are serialized in the following format: +In [RowBinary](/docs/en/interfaces/formats.md#rowbinary) format values of `Dynamic` type are serialized in the following format: ```text From 6868708a58f39fac83382695864459eb5fcffe5b Mon Sep 17 00:00:00 2001 From: Max Kainov Date: Thu, 11 Jul 2024 11:37:26 +0000 Subject: [PATCH 110/182] CI Buddy bot to notify about CI events --- pyproject.toml | 3 +- tests/ci/.mypy.ini | 1 + tests/ci/ci.py | 11 +++++- tests/ci/ci_buddy.py | 88 ++++++++++++++++++++++++++++++++++++++++++++ tests/ci/ci_utils.py | 41 +++++++++++++++++++++ 5 files changed, 141 insertions(+), 3 deletions(-) create mode 100644 tests/ci/ci_buddy.py diff --git a/pyproject.toml b/pyproject.toml index 279d077a695..90f089afa41 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -35,10 +35,9 @@ disable = ''' broad-except, bare-except, no-else-return, - global-statement + global-statement, ''' [tool.pylint.SIMILARITIES] # due to SQL min-similarity-lines=1000 - diff --git a/tests/ci/.mypy.ini b/tests/ci/.mypy.ini index 9bc44025826..f12d27979ce 100644 --- a/tests/ci/.mypy.ini +++ b/tests/ci/.mypy.ini @@ -15,3 +15,4 @@ warn_return_any = True no_implicit_reexport = True strict_equality = True extra_checks = True +ignore_missing_imports = True \ No newline at end of file diff --git a/tests/ci/ci.py b/tests/ci/ci.py index af2f4c0a1fc..b4a3c7ec849 100644 --- a/tests/ci/ci.py +++ b/tests/ci/ci.py @@ -15,7 +15,7 @@ import upload_result_helper from build_check import get_release_or_pr from ci_config import CI from ci_metadata import CiMetadata -from ci_utils import GHActions, normalize_string +from ci_utils import GHActions, normalize_string, Shell from clickhouse_helper import ( CiLogsCredentials, ClickHouseHelper, @@ -53,6 +53,7 @@ from stopwatch import Stopwatch from tee_popen import TeePopen from ci_cache import CiCache from ci_settings import CiSettings +from ci_buddy import CIBuddy from version_helper import get_version_from_repo # pylint: disable=too-many-lines @@ -262,6 +263,8 @@ def check_missing_images_on_dockerhub( def _pre_action(s3, indata, pr_info): + print("Clear dmesg") + Shell.run("sudo dmesg --clear ||:") CommitStatusData.cleanup() JobReport.cleanup() BuildResult.cleanup() @@ -1118,6 +1121,12 @@ def main() -> int: ### POST action: start elif args.post: + if Shell.check( + "sudo dmesg -T | grep -q -e 'Out of memory: Killed process' -e 'oom_reaper: reaped process' -e 'oom-kill:constraint=CONSTRAINT_NONE'" + ): + print("WARNING: OOM while job execution") + CIBuddy().post_error("Out Of Memory") + job_report = JobReport.load() if JobReport.exist() else None if job_report: ch_helper = ClickHouseHelper() diff --git a/tests/ci/ci_buddy.py b/tests/ci/ci_buddy.py new file mode 100644 index 00000000000..d03f5d819ec --- /dev/null +++ b/tests/ci/ci_buddy.py @@ -0,0 +1,88 @@ +import json +import os + +import boto3 +import requests +from botocore.exceptions import ClientError + +from pr_info import PRInfo +from ci_utils import Shell + + +class CIBuddy: + _HEADERS = {"Content-Type": "application/json"} + + def __init__(self, dry_run=False): + self.repo = os.getenv("GITHUB_REPOSITORY", "") + self.dry_run = dry_run + res = self._get_webhooks() + self.test_channel = "" + self.dev_ci_channel = "" + if res: + self.test_channel = json.loads(res)["test_channel"] + self.dev_ci_channel = json.loads(res)["ci_channel"] + self.job_name = os.getenv("CHECK_NAME", "unknown") + pr_info = PRInfo() + self.pr_number = pr_info.number + self.head_ref = pr_info.head_ref + self.commit_url = pr_info.commit_html_url + + @staticmethod + def _get_webhooks(): + name = "ci_buddy_web_hooks" + + session = boto3.Session(region_name="us-east-1") # Replace with your region + ssm_client = session.client("ssm") + json_string = None + try: + response = ssm_client.get_parameter( + Name=name, + WithDecryption=True, # Set to True if the parameter is a SecureString + ) + json_string = response["Parameter"]["Value"] + except ClientError as e: + print(f"An error occurred: {e}") + + return json_string + + def post(self, message, dry_run=None): + if dry_run is None: + dry_run = self.dry_run + print(f"Posting slack message, dry_run [{dry_run}]") + if dry_run: + url = self.test_channel + else: + url = self.dev_ci_channel + data = {"text": message} + try: + requests.post(url, headers=self._HEADERS, data=json.dumps(data), timeout=10) + except Exception as e: + print(f"ERROR: Failed to post message, ex {e}") + + def post_error(self, error_description, job_name="", with_instance_info=True): + instance_id, instance_type = "unknown", "unknown" + if with_instance_info: + instance_id = Shell.run("ec2metadata --instance-id") or instance_id + instance_type = Shell.run("ec2metadata --instance-type") or instance_type + if not job_name: + job_name = os.getenv("CHECK_NAME", "unknown") + line_err = f":red_circle: {error_description} :red_circle:\n\n" + line_ghr = f" *Runner:* `{instance_type}`, `{instance_id}`\n" + line_job = f" *Job:* `{job_name}`\n" + line_pr_ = f" *PR:* \n" + line_br_ = f" *Branch:* `{self.head_ref}`, <{self.commit_url}|commit>\n" + message = line_err + message += line_job + if with_instance_info: + message += line_ghr + if self.pr_number > 0: + message += line_pr_ + else: + message += line_br_ + self.post(message) + + +if __name__ == "__main__": + # test + buddy = CIBuddy(dry_run=True) + buddy.post_error("Out of memory") diff --git a/tests/ci/ci_utils.py b/tests/ci/ci_utils.py index e7034d0b104..629f37289a9 100644 --- a/tests/ci/ci_utils.py +++ b/tests/ci/ci_utils.py @@ -1,4 +1,5 @@ import os +import subprocess from contextlib import contextmanager from pathlib import Path from typing import Any, Iterator, List, Union @@ -42,3 +43,43 @@ class GHActions: for line in lines: print(line) print("::endgroup::") + + +class Shell: + @classmethod + def run_strict(cls, command): + subprocess.run( + command + " 2>&1", + shell=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True, + check=True, + ) + + @classmethod + def run(cls, command): + res = "" + result = subprocess.run( + command, + shell=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True, + check=False, + ) + if result.returncode == 0: + res = result.stdout + return res.strip() + + @classmethod + def check(cls, command): + result = subprocess.run( + command + " 2>&1", + shell=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True, + check=False, + ) + return result.returncode == 0 From 808d875a760d81792691f4cd7c465ec2823aefa9 Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Thu, 11 Jul 2024 18:57:54 +0200 Subject: [PATCH 111/182] Remove links at all --- docs/en/sql-reference/data-types/dynamic.md | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/docs/en/sql-reference/data-types/dynamic.md b/docs/en/sql-reference/data-types/dynamic.md index d0116e7158c..8be81471377 100644 --- a/docs/en/sql-reference/data-types/dynamic.md +++ b/docs/en/sql-reference/data-types/dynamic.md @@ -529,10 +529,8 @@ SELECT JSONExtractKeysAndValues('{"a" : 42, "b" : "Hello", "c" : [1,2,3]}', 'Var ### Binary output format -In [RowBinary](/docs/en/interfaces/formats.md#rowbinary) format values of `Dynamic` type are serialized in the following format: +In RowBinary format values of `Dynamic` type are serialized in the following format: ```text ``` - -See the [data types binary encoding specification](/docs/en/sql-reference/data-types/data-types-binary-encoding.md) From fe451ec25a3baa269cb47722e38dfd90f01b3734 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Thu, 11 Jul 2024 17:34:33 +0000 Subject: [PATCH 112/182] Fixing build. --- src/Common/Exception.cpp | 12 +++++++++--- src/Common/Exception.h | 3 ++- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/src/Common/Exception.cpp b/src/Common/Exception.cpp index 09ba664baef..111280074dd 100644 --- a/src/Common/Exception.cpp +++ b/src/Common/Exception.cpp @@ -38,15 +38,21 @@ namespace ErrorCodes extern const int CANNOT_MREMAP; } -void abortOnFailedAssertion(const String & description, const Exception::FramePointers * trace) +void abortOnFailedAssertion(const String & description, void * const * trace, size_t trace_offset, size_t trace_size) { auto & logger = Poco::Logger::root(); LOG_FATAL(&logger, "Logical error: '{}'.", description); if (trace) - LOG_FATAL(&logger, "Stack trace (when copying this message, always include the lines below):\n\n{}", StackTrace::toString(trace->data(), 0, trace->size())); + LOG_FATAL(&logger, "Stack trace (when copying this message, always include the lines below):\n\n{}", StackTrace::toString(trace, trace_offset, trace_size)); abort(); } +void abortOnFailedAssertion(const String & description) +{ + StackTrace st; + abortOnFailedAssertion(description, st.getFramePointers().data(), st.getOffset(), st.getSize()); +} + bool terminate_on_any_exception = false; static int terminate_status_code = 128 + SIGABRT; thread_local bool update_error_statistics = true; @@ -61,7 +67,7 @@ void handle_error_code(const std::string & msg, int code, bool remote, const Exc #ifdef ABORT_ON_LOGICAL_ERROR if (code == ErrorCodes::LOGICAL_ERROR) { - abortOnFailedAssertion(msg, &trace); + abortOnFailedAssertion(msg, trace.data(), 0, trace.size()); } #endif diff --git a/src/Common/Exception.h b/src/Common/Exception.h index 68cc305e67e..a4774a89f6a 100644 --- a/src/Common/Exception.h +++ b/src/Common/Exception.h @@ -165,7 +165,8 @@ protected: mutable std::vector capture_thread_frame_pointers; }; -[[noreturn]] void abortOnFailedAssertion(const String & description, const Exception::FramePointers * trace = nullptr); +[[noreturn]] void abortOnFailedAssertion(const String & description, void * const * trace, size_t trace_offset, size_t trace_size); +[[noreturn]] void abortOnFailedAssertion(const String & description); std::string getExceptionStackTraceString(const std::exception & e); std::string getExceptionStackTraceString(std::exception_ptr e); From 966e4d2d6327987b05668ce625e9f145c28e264a Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 11 Jul 2024 21:46:08 +0200 Subject: [PATCH 113/182] Remove noisy message --- src/Interpreters/DatabaseCatalog.cpp | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/Interpreters/DatabaseCatalog.cpp b/src/Interpreters/DatabaseCatalog.cpp index 964baea1891..bb2dd158710 100644 --- a/src/Interpreters/DatabaseCatalog.cpp +++ b/src/Interpreters/DatabaseCatalog.cpp @@ -1281,10 +1281,6 @@ void DatabaseCatalog::rescheduleDropTableTask() auto min_drop_time = getMinDropTime(); time_t schedule_after_ms = min_drop_time > current_time ? (min_drop_time - current_time) * 1000 : 0; - LOG_TRACE( - log, - "Have {} tables in queue to drop. Schedule background task in {} seconds", - tables_marked_dropped.size(), schedule_after_ms / 1000); (*drop_task)->scheduleAfter(schedule_after_ms); } From 364686731f1437e708f12d6e0f44d371b977dc9c Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 11 Jul 2024 21:51:09 +0200 Subject: [PATCH 114/182] Fix test --- tests/queries/0_stateless/03201_local_named_collections.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/03201_local_named_collections.sh b/tests/queries/0_stateless/03201_local_named_collections.sh index 2054a09df06..809b4b52f41 100755 --- a/tests/queries/0_stateless/03201_local_named_collections.sh +++ b/tests/queries/0_stateless/03201_local_named_collections.sh @@ -13,7 +13,7 @@ INSERT INTO test VALUES ('Hello, world!'); ${CLICKHOUSE_LOCAL} --multiquery " CREATE NAMED COLLECTION mydb AS host = '${CLICKHOUSE_HOST}', port = ${CLICKHOUSE_PORT_TCP}, user = 'default', password = '', db = '${CLICKHOUSE_DATABASE}'; SELECT * FROM remote(mydb, table = 'test'); -" | grep --text -F -v "ASan doesn't fully support makecontext/swapcontext functions" +" 2>&1 | grep --text -F -v "ASan doesn't fully support makecontext/swapcontext functions" ${CLICKHOUSE_CLIENT} --multiquery " DROP TABLE test; From 4a56c601b2b4ec364f808b25d6e9d9adfd4d3ce2 Mon Sep 17 00:00:00 2001 From: Nikita Fomichev Date: Fri, 12 Jul 2024 01:11:13 +0200 Subject: [PATCH 115/182] Stateless tests: decrease CI timeout --- docker/test/stateless/run.sh | 6 +++--- tests/ci/ci_definitions.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/docker/test/stateless/run.sh b/docker/test/stateless/run.sh index 5747ead7986..cb699926cbb 100755 --- a/docker/test/stateless/run.sh +++ b/docker/test/stateless/run.sh @@ -6,8 +6,8 @@ source /setup_export_logs.sh # fail on errors, verbose and export all env variables set -e -x -a -MAX_RUN_TIME=${MAX_RUN_TIME:-10800} -MAX_RUN_TIME=$((MAX_RUN_TIME == 0 ? 10800 : MAX_RUN_TIME)) +MAX_RUN_TIME=${MAX_RUN_TIME:-7200} +MAX_RUN_TIME=$((MAX_RUN_TIME == 0 ? 7200 : MAX_RUN_TIME)) USE_DATABASE_REPLICATED=${USE_DATABASE_REPLICATED:=0} USE_SHARED_CATALOG=${USE_SHARED_CATALOG:=0} @@ -320,7 +320,7 @@ export -f run_tests # This should be enough to setup job and collect artifacts -TIMEOUT=$((MAX_RUN_TIME - 600)) +TIMEOUT=$((MAX_RUN_TIME - 700)) if [ "$NUM_TRIES" -gt "1" ]; then # We don't run tests with Ordinary database in PRs, only in master. # So run new/changed tests with Ordinary at least once in flaky check. diff --git a/tests/ci/ci_definitions.py b/tests/ci/ci_definitions.py index 48e1280d939..4ae252560e9 100644 --- a/tests/ci/ci_definitions.py +++ b/tests/ci/ci_definitions.py @@ -378,7 +378,7 @@ class CommonJobConfigs: ), run_command='functional_test_check.py "$CHECK_NAME"', runner_type=Runners.FUNC_TESTER, - timeout=10800, + timeout=7200, ) STATEFUL_TEST = JobConfig( job_name_keyword="stateful", From 8ef3fbf32333dea0be9ee3ebbd9a3c9529cb9fb6 Mon Sep 17 00:00:00 2001 From: Guspan Tanadi <36249910+guspan-tanadi@users.noreply.github.com> Date: Fri, 12 Jul 2024 07:50:35 +0700 Subject: [PATCH 116/182] docs(clickhouse-local): intended section link --- docs/en/operations/utilities/clickhouse-local.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/operations/utilities/clickhouse-local.md b/docs/en/operations/utilities/clickhouse-local.md index f19643a3fa5..c20e4fc3b09 100644 --- a/docs/en/operations/utilities/clickhouse-local.md +++ b/docs/en/operations/utilities/clickhouse-local.md @@ -16,7 +16,7 @@ sidebar_label: clickhouse-local While `clickhouse-local` is a great tool for development and testing purposes, and for processing files, it is not suitable for serving end users or applications. In these scenarios, it is recommended to use the open-source [ClickHouse](https://clickhouse.com/docs/en/install). ClickHouse is a powerful OLAP database that is designed to handle large-scale analytical workloads. It provides fast and efficient processing of complex queries on large datasets, making it ideal for use in production environments where high-performance is critical. Additionally, ClickHouse offers a wide range of features such as replication, sharding, and high availability, which are essential for scaling up to handle large datasets and serving applications. If you need to handle larger datasets or serve end users or applications, we recommend using open-source ClickHouse instead of `clickhouse-local`. -Please read the docs below that show example use cases for `clickhouse-local`, such as [querying local CSVs](#query-data-in-a-csv-file-using-sql) or [reading a parquet file in S3](#query-data-in-a-parquet-file-in-aws-s3). +Please read the docs below that show example use cases for `clickhouse-local`, such as [querying local file](#query_data_in_file) or [reading a parquet file in S3](#query-data-in-a-parquet-file-in-aws-s3). ## Download clickhouse-local From 08b6dd604a4673628d0496808a7109f87897d1b5 Mon Sep 17 00:00:00 2001 From: Nikita Fomichev Date: Fri, 12 Jul 2024 01:24:07 +0200 Subject: [PATCH 117/182] Stateless tests: deal with hang-ups more roughly --- tests/clickhouse-test | 123 +++++++++++++++++++++++++++++++----------- 1 file changed, 92 insertions(+), 31 deletions(-) diff --git a/tests/clickhouse-test b/tests/clickhouse-test index 958dde0606f..ffb3dcf4d9e 100755 --- a/tests/clickhouse-test +++ b/tests/clickhouse-test @@ -1750,7 +1750,7 @@ class TestCase: return TestResult( self.name, TestStatus.FAIL, - FailureReason.INTERNAL_QUERY_FAIL, + FailureReason.TIMEOUT, total_time, self.add_info_about_settings( self.get_description_from_exception_info(sys.exc_info()) @@ -2189,11 +2189,26 @@ def run_tests_array(all_tests_with_params: Tuple[List[str], int, TestSuite, bool sys.stdout.flush() while True: - test_result = test_case.run( - args, test_suite, client_options, server_logs_level - ) - test_result = test_case.process_result(test_result, MESSAGES) - if not test_result.need_retry: + # This is the upper level timeout + # This helps with completely frozen processes, like in case of gdb errors + def timeout_handler(signum, frame): + raise TimeoutError("Test execution timed out") + + signal.signal(signal.SIGALRM, timeout_handler) + signal.alarm(int(args.timeout * 1.1)) + test_result = None + try: + test_result = test_case.run( + args, test_suite, client_options, server_logs_level + ) + test_result = test_case.process_result(test_result, MESSAGES) + break + except TimeoutError: + break + finally: + signal.alarm(0) + + if not test_result or not test_result.need_retry: break restarted_tests.append(test_result) @@ -2452,6 +2467,10 @@ def override_envs(*args_, **kwargs): run_tests_array(*args_, **kwargs) +def run_tests_process(*args, **kwargs): + return run_tests_array(*args, **kwargs) + + def do_run_tests(jobs, test_suite: TestSuite): if jobs > 1 and len(test_suite.parallel_tests) > 0: print( @@ -2475,39 +2494,70 @@ def do_run_tests(jobs, test_suite: TestSuite): # of failures will be nearly the same for all tests from the group. random.shuffle(test_suite.parallel_tests) - batch_size = max(1, len(test_suite.parallel_tests) // jobs) + batch_size = max(1, (len(test_suite.parallel_tests) // jobs) + 1) parallel_tests_array = [] for job in range(jobs): range_ = job * batch_size, job * batch_size + batch_size batch = test_suite.parallel_tests[range_[0] : range_[1]] parallel_tests_array.append((batch, batch_size, test_suite, True)) - try: - with multiprocessing.Pool(processes=jobs + 1) as pool: - future = pool.map_async(run_tests_array, parallel_tests_array) + processes = [] - if args.run_sequential_tests_in_parallel: - # Run parallel tests and sequential tests at the same time - # Sequential tests will use different ClickHouse instance - # In this process we can safely override values in `args` and `os.environ` - future_seq = pool.map_async( - override_envs, - [ - ( - test_suite.sequential_tests, - len(test_suite.sequential_tests), - test_suite, - False, - ) - ], - ) - future_seq.wait() + for test_batch in parallel_tests_array: + process = multiprocessing.Process( + target=run_tests_process, args=(test_batch,) + ) + processes.append(process) + process.start() - future.wait() - finally: - pool.terminate() - pool.close() - pool.join() + if args.run_sequential_tests_in_parallel: + # Run parallel tests and sequential tests at the same time + # Sequential tests will use different ClickHouse instance + # In this process we can safely override values in `args` and `os.environ` + process = multiprocessing.Process( + target=override_envs, + args=( + ( + test_suite.sequential_tests, + len(test_suite.sequential_tests), + test_suite, + False, + ), + ), + ) + processes.append(process) + process.start() + + while processes: + sys.stdout.flush() + # Periodically check the server for hangs + # and stop all processes in this case + try: + clickhouse_execute( + args, + query="SELECT 1 /*hang up check*/", + max_http_retries=5, + timeout=20, + ) + except Exception: + print("Hang up check failed") + server_died.set() + + if server_died.is_set(): + print("Server died, terminating all processes...") + kill_gdb_if_any() + # Wait for test results + sleep(args.timeout) + for p in processes: + if p.is_alive(): + p.terminate() + break + + for p in processes[:]: + if not p.is_alive(): + processes.remove(p) + + sleep(5) if not args.run_sequential_tests_in_parallel: run_tests_array( @@ -3358,6 +3408,14 @@ def parse_args(): return parser.parse_args() +class Terminated(KeyboardInterrupt): + pass + + +def signal_handler(sig, frame): + raise Terminated(f"Terminated with {sig} signal") + + if __name__ == "__main__": stop_time = None exit_code = multiprocessing.Value("i", 0) @@ -3369,6 +3427,9 @@ if __name__ == "__main__": # infinite tests processes left # (new process group is required to avoid killing some parent processes) os.setpgid(0, 0) + signal.signal(signal.SIGTERM, signal_handler) + signal.signal(signal.SIGINT, signal_handler) + signal.signal(signal.SIGHUP, signal_handler) try: args = parse_args() From 5f8358942c9de0380728a0e1e7a4ba749e8d7856 Mon Sep 17 00:00:00 2001 From: Nikita Fomichev Date: Fri, 12 Jul 2024 03:06:07 +0200 Subject: [PATCH 118/182] Stateless tests: push CI --- tests/clickhouse-test | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/clickhouse-test b/tests/clickhouse-test index ffb3dcf4d9e..79f6b5d71d3 100755 --- a/tests/clickhouse-test +++ b/tests/clickhouse-test @@ -2190,7 +2190,7 @@ def run_tests_array(all_tests_with_params: Tuple[List[str], int, TestSuite, bool while True: # This is the upper level timeout - # This helps with completely frozen processes, like in case of gdb errors + # It helps with completely frozen processes, like in case of gdb errors def timeout_handler(signum, frame): raise TimeoutError("Test execution timed out") From f30cd1243495265f54bd6cbcbd721c4f77cebe37 Mon Sep 17 00:00:00 2001 From: Nikita Fomichev Date: Fri, 12 Jul 2024 08:38:22 +0200 Subject: [PATCH 119/182] Stateless tests: add "Server died" check --- docker/test/util/process_functional_tests_result.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/docker/test/util/process_functional_tests_result.py b/docker/test/util/process_functional_tests_result.py index fd4cc9f4bf7..8b2fd46c973 100755 --- a/docker/test/util/process_functional_tests_result.py +++ b/docker/test/util/process_functional_tests_result.py @@ -11,6 +11,7 @@ TIMEOUT_SIGN = "[ Timeout! " UNKNOWN_SIGN = "[ UNKNOWN " SKIPPED_SIGN = "[ SKIPPED " HUNG_SIGN = "Found hung queries in processlist" +SERVER_DIED_SIGN = "Server died, terminating all processes" DATABASE_SIGN = "Database: " SUCCESS_FINISH_SIGNS = ["All tests have finished", "No tests were run"] @@ -25,6 +26,7 @@ def process_test_log(log_path, broken_tests): failed = 0 success = 0 hung = False + server_died = False retries = False success_finish = False test_results = [] @@ -41,6 +43,8 @@ def process_test_log(log_path, broken_tests): if HUNG_SIGN in line: hung = True break + if SERVER_DIED_SIGN in line: + server_died = True if RETRIES_SIGN in line: retries = True if any( @@ -123,6 +127,7 @@ def process_test_log(log_path, broken_tests): failed, success, hung, + server_died, success_finish, retries, test_results, @@ -150,6 +155,7 @@ def process_result(result_path, broken_tests): failed, success, hung, + server_died, success_finish, retries, test_results, @@ -165,6 +171,10 @@ def process_result(result_path, broken_tests): description = "Some queries hung, " state = "failure" test_results.append(("Some queries hung", "FAIL", "0", "")) + elif server_died: + description = "Server died, " + state = "failure" + test_results.append(("Server died", "FAIL", "0", "")) elif not success_finish: description = "Tests are not finished, " state = "failure" From b024bb736f6008de741bf6392d0b8432e8cdf16c Mon Sep 17 00:00:00 2001 From: Max K Date: Fri, 12 Jul 2024 10:14:41 +0200 Subject: [PATCH 120/182] CI: CiBuddy to post to salck channel from release branches only --- tests/ci/ci.py | 4 +++- tests/ci/ci_buddy.py | 10 +++++----- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/tests/ci/ci.py b/tests/ci/ci.py index b4a3c7ec849..fac50d30022 100644 --- a/tests/ci/ci.py +++ b/tests/ci/ci.py @@ -1125,7 +1125,9 @@ def main() -> int: "sudo dmesg -T | grep -q -e 'Out of memory: Killed process' -e 'oom_reaper: reaped process' -e 'oom-kill:constraint=CONSTRAINT_NONE'" ): print("WARNING: OOM while job execution") - CIBuddy().post_error("Out Of Memory") + CIBuddy(dry_run=not pr_info.is_release).post_error( + "Out Of Memory", job_name=_get_ext_check_name(args.job_name) + ) job_report = JobReport.load() if JobReport.exist() else None if job_report: diff --git a/tests/ci/ci_buddy.py b/tests/ci/ci_buddy.py index d03f5d819ec..ea690bb602c 100644 --- a/tests/ci/ci_buddy.py +++ b/tests/ci/ci_buddy.py @@ -66,11 +66,11 @@ class CIBuddy: instance_type = Shell.run("ec2metadata --instance-type") or instance_type if not job_name: job_name = os.getenv("CHECK_NAME", "unknown") - line_err = f":red_circle: {error_description} :red_circle:\n\n" - line_ghr = f" *Runner:* `{instance_type}`, `{instance_id}`\n" - line_job = f" *Job:* `{job_name}`\n" - line_pr_ = f" *PR:* \n" - line_br_ = f" *Branch:* `{self.head_ref}`, <{self.commit_url}|commit>\n" + line_err = f":red_circle: *Error: {error_description}*\n\n" + line_ghr = f" *Runner:* `{instance_type}`, `{instance_id}`\n" + line_job = f" *Job:* `{job_name}`\n" + line_pr_ = f" *PR:* \n" + line_br_ = f" *Branch:* `{self.head_ref}`, <{self.commit_url}|commit>\n" message = line_err message += line_job if with_instance_info: From 7d9e1700d2a85330380c19777b788ce5a6c2f605 Mon Sep 17 00:00:00 2001 From: Blargian Date: Fri, 12 Jul 2024 11:24:24 +0200 Subject: [PATCH 121/182] update intHash32, intHash64 --- .../functions/array-functions.md | 40 ++++++++++++ .../sql-reference/functions/hash-functions.md | 63 ++++++++++++++++++- 2 files changed, 102 insertions(+), 1 deletion(-) diff --git a/docs/en/sql-reference/functions/array-functions.md b/docs/en/sql-reference/functions/array-functions.md index d87ca4a0fe7..4080dce883f 100644 --- a/docs/en/sql-reference/functions/array-functions.md +++ b/docs/en/sql-reference/functions/array-functions.md @@ -3081,3 +3081,43 @@ Result: ## Distance functions All supported functions are described in [distance functions documentation](../../sql-reference/functions/distance-functions.md). + +## kql_array_sort_asc + +Sorts an array from lowest to highest value. For use with [Kusto Query Language (KQL)](https://clickhouse.com/docs/en/guides/developer/alternative-query-languages#kusto-query-language-kql). + +:::note +For this function to work you should have Kusto enabled. To enable Kusto: + +```sql +SET dialect = 'kusto' +``` + +::: + +**Syntax** + +``` sql +kql_array_sort_asc(arr1 [, arr2 ... arrN]) +``` + +**Arguments** + +- `arr1` — [Array](../data-types/array.md) of numeric values. +- `arr1` — [Array](../data-types/array.md) of numeric values. + +**Returned value** + +- Returns an array of non-negative partial sums of elements in the source array. [UInt\*](https://clickhouse.com/docs/en/data_types/int_uint/#uint-ranges), [Int\*](https://clickhouse.com/docs/en/data_types/int_uint/#int-ranges), [Float\*](https://clickhouse.com/docs/en/data_types/float/). + +**Example** + +``` sql +SELECT arrayCumSumNonNegative([1, 1, -4, 1]) AS res +``` + +``` text +┌─res───────┐ +│ [1,2,0,1] │ +└───────────┘ +``` \ No newline at end of file diff --git a/docs/en/sql-reference/functions/hash-functions.md b/docs/en/sql-reference/functions/hash-functions.md index e431ed75465..d2ed4516fce 100644 --- a/docs/en/sql-reference/functions/hash-functions.md +++ b/docs/en/sql-reference/functions/hash-functions.md @@ -314,10 +314,71 @@ SELECT groupBitXor(cityHash64(*)) FROM table Calculates a 32-bit hash code from any type of integer. This is a relatively fast non-cryptographic hash function of average quality for numbers. +**Syntax** + +```sql +intHash32(int) +``` + +**Arguments** + +- `int` — Integer to hash. [(U)Int*](../data-types/int-uint.md). + +**Returned value** + +- 32-bit hash code. [UInt32](../data-types/int-uint.md). + +**Example** + +Query: + +```sql +SELECT intHash32(42); +``` + +Result: + +```response +┌─intHash32(42)─┐ +│ 1228623923 │ +└───────────────┘ +``` + ## intHash64 Calculates a 64-bit hash code from any type of integer. -It works faster than intHash32. Average quality. +This is a relatively fast non-cryptographic hash function of average quality for numbers. +It works faster than [intHash32](#inthash32). + +**Syntax** + +```sql +intHash32(int) +``` + +**Arguments** + +- `int` — Integer to hash. [(U)Int*](../data-types/int-uint.md). + +**Returned value** + +- 64-bit hash code. [UInt64](../data-types/int-uint.md). + +**Example** + +Query: + +```sql +SELECT intHash64(42); +``` + +Result: + +```response +┌────────intHash64(42)─┐ +│ 11490350930367293593 │ +└──────────────────────┘ +``` ## SHA1, SHA224, SHA256, SHA512, SHA512_256 From 18e411d35366a82e3a2c9a725ccfabfa6a9170b6 Mon Sep 17 00:00:00 2001 From: Blargian Date: Fri, 12 Jul 2024 11:28:41 +0200 Subject: [PATCH 122/182] remove unwanted change --- .../functions/array-functions.md | 42 +------------------ 1 file changed, 1 insertion(+), 41 deletions(-) diff --git a/docs/en/sql-reference/functions/array-functions.md b/docs/en/sql-reference/functions/array-functions.md index 4080dce883f..1b52440903d 100644 --- a/docs/en/sql-reference/functions/array-functions.md +++ b/docs/en/sql-reference/functions/array-functions.md @@ -3080,44 +3080,4 @@ Result: ## Distance functions -All supported functions are described in [distance functions documentation](../../sql-reference/functions/distance-functions.md). - -## kql_array_sort_asc - -Sorts an array from lowest to highest value. For use with [Kusto Query Language (KQL)](https://clickhouse.com/docs/en/guides/developer/alternative-query-languages#kusto-query-language-kql). - -:::note -For this function to work you should have Kusto enabled. To enable Kusto: - -```sql -SET dialect = 'kusto' -``` - -::: - -**Syntax** - -``` sql -kql_array_sort_asc(arr1 [, arr2 ... arrN]) -``` - -**Arguments** - -- `arr1` — [Array](../data-types/array.md) of numeric values. -- `arr1` — [Array](../data-types/array.md) of numeric values. - -**Returned value** - -- Returns an array of non-negative partial sums of elements in the source array. [UInt\*](https://clickhouse.com/docs/en/data_types/int_uint/#uint-ranges), [Int\*](https://clickhouse.com/docs/en/data_types/int_uint/#int-ranges), [Float\*](https://clickhouse.com/docs/en/data_types/float/). - -**Example** - -``` sql -SELECT arrayCumSumNonNegative([1, 1, -4, 1]) AS res -``` - -``` text -┌─res───────┐ -│ [1,2,0,1] │ -└───────────┘ -``` \ No newline at end of file +All supported functions are described in [distance functions documentation](../../sql-reference/functions/distance-functions.md). \ No newline at end of file From 7c6db58eec7d06ae216774b957df992c05e94454 Mon Sep 17 00:00:00 2001 From: Shaun Struwig <41984034+Blargian@users.noreply.github.com> Date: Fri, 12 Jul 2024 11:34:04 +0200 Subject: [PATCH 123/182] Update aspell-dict.txt --- utils/check-style/aspell-ignore/en/aspell-dict.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt index 78c4b6bde95..ca2c4ec4192 100644 --- a/utils/check-style/aspell-ignore/en/aspell-dict.txt +++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt @@ -1900,11 +1900,13 @@ kurtosis kurtpop kurtsamp laion +lagInFrame lang laravel largestTriangleThreeBuckets latencies ldap +leadInFrame leftPad leftPadUTF leftUTF From d9a05bca89f3578bb2cf965b1ce373657de0474a Mon Sep 17 00:00:00 2001 From: Blargian Date: Fri, 12 Jul 2024 11:43:04 +0200 Subject: [PATCH 124/182] add alias to anyLast_respect_nulls --- .../aggregate-functions/reference/anylast_respect_nulls.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/en/sql-reference/aggregate-functions/reference/anylast_respect_nulls.md b/docs/en/sql-reference/aggregate-functions/reference/anylast_respect_nulls.md index 8f093cfdb61..a28b965f7ea 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/anylast_respect_nulls.md +++ b/docs/en/sql-reference/aggregate-functions/reference/anylast_respect_nulls.md @@ -13,6 +13,8 @@ Selects the last value encountered, irregardless of whether it is `NULL` or not. anyLast_respect_nulls(column) ``` +Alias: `last_value_respect_nulls`. + **Parameters** - `column`: The column name. From 713546e5102829f71b2c2f27021478c63da0ea3e Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 12 Jul 2024 11:44:34 +0200 Subject: [PATCH 125/182] Update src/Common/ProfileEvents.cpp --- src/Common/ProfileEvents.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Common/ProfileEvents.cpp b/src/Common/ProfileEvents.cpp index e80afc95e8d..ba9c4cbfdb2 100644 --- a/src/Common/ProfileEvents.cpp +++ b/src/Common/ProfileEvents.cpp @@ -508,7 +508,7 @@ The server successfully detected this situation and will download merged part fr M(FileSegmentHolderCompleteMicroseconds, "File segments holder complete() time") \ M(FileSegmentFailToIncreasePriority, "Number of times the priority was not increased due to a high contention on the cache lock") \ M(FilesystemCacheFailToReserveSpaceBecauseOfLockContention, "Number of times space reservation was skipped due to a high contention on the cache lock") \ - M(FilesystemCacheFailToReserveSpaceBecauseOfCacheResize, "Number of times space reservation was skipped due to the cache is being resized") \ + M(FilesystemCacheFailToReserveSpaceBecauseOfCacheResize, "Number of times space reservation was skipped due to the cache is being resized" ) \ M(FilesystemCacheHoldFileSegments, "Filesystem cache file segments count, which were hold") \ M(FilesystemCacheUnusedHoldFileSegments, "Filesystem cache file segments count, which were hold, but not used (because of seek or LIMIT n, etc)") \ M(FilesystemCacheFreeSpaceKeepingThreadRun, "Number of times background thread executed free space keeping job") \ From b4959b25dfc1a0cf590734bf2b9bc16bc5728188 Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 12 Jul 2024 11:44:44 +0200 Subject: [PATCH 126/182] Update src/Common/ProfileEvents.cpp --- src/Common/ProfileEvents.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Common/ProfileEvents.cpp b/src/Common/ProfileEvents.cpp index ba9c4cbfdb2..e80afc95e8d 100644 --- a/src/Common/ProfileEvents.cpp +++ b/src/Common/ProfileEvents.cpp @@ -508,7 +508,7 @@ The server successfully detected this situation and will download merged part fr M(FileSegmentHolderCompleteMicroseconds, "File segments holder complete() time") \ M(FileSegmentFailToIncreasePriority, "Number of times the priority was not increased due to a high contention on the cache lock") \ M(FilesystemCacheFailToReserveSpaceBecauseOfLockContention, "Number of times space reservation was skipped due to a high contention on the cache lock") \ - M(FilesystemCacheFailToReserveSpaceBecauseOfCacheResize, "Number of times space reservation was skipped due to the cache is being resized" ) \ + M(FilesystemCacheFailToReserveSpaceBecauseOfCacheResize, "Number of times space reservation was skipped due to the cache is being resized") \ M(FilesystemCacheHoldFileSegments, "Filesystem cache file segments count, which were hold") \ M(FilesystemCacheUnusedHoldFileSegments, "Filesystem cache file segments count, which were hold, but not used (because of seek or LIMIT n, etc)") \ M(FilesystemCacheFreeSpaceKeepingThreadRun, "Number of times background thread executed free space keeping job") \ From dd6dac6c5a0d5057e1927e3230d887af86f1d9c3 Mon Sep 17 00:00:00 2001 From: Nikita Fomichev Date: Fri, 12 Jul 2024 12:15:59 +0200 Subject: [PATCH 127/182] Stateless tests: better sort checks in test report --- .../test/util/process_functional_tests_result.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/docker/test/util/process_functional_tests_result.py b/docker/test/util/process_functional_tests_result.py index 8b2fd46c973..4442c9d7d9e 100755 --- a/docker/test/util/process_functional_tests_result.py +++ b/docker/test/util/process_functional_tests_result.py @@ -228,5 +228,20 @@ if __name__ == "__main__": state, description, test_results = process_result(args.in_results_dir, broken_tests) logging.info("Result parsed") status = (state, description) + + def test_result_comparator(item): + # sort by status then by check name + order = { + "FAIL": 0, + "Timeout": 1, + "NOT_FAILED": 2, + "BROKEN": 3, + "OK": 4, + "SKIPPED": 5, + } + return order.get(item[1], 10), str(item[0]), item[1] + + test_results.sort(key=test_result_comparator) + write_results(args.out_results_file, args.out_status_file, test_results, status) logging.info("Result written") From 3806ab7ef1ed8f298cbe0a1d3b186a0d29e7d3a6 Mon Sep 17 00:00:00 2001 From: Blargian Date: Fri, 12 Jul 2024 12:50:19 +0200 Subject: [PATCH 128/182] remove *_respect_nulls and modify any, anyLast to reflect that they can use modifier RESPECT NULLS --- .../aggregate-functions/index.md | 2 +- .../aggregate-functions/reference/any.md | 8 ++-- .../reference/any_respect_nulls.md | 44 ------------------- .../aggregate-functions/reference/anylast.md | 8 +++- .../reference/anylast_respect_nulls.md | 41 ----------------- .../aggregate-functions/reference/index.md | 3 +- 6 files changed, 13 insertions(+), 93 deletions(-) delete mode 100644 docs/en/sql-reference/aggregate-functions/reference/any_respect_nulls.md delete mode 100644 docs/en/sql-reference/aggregate-functions/reference/anylast_respect_nulls.md diff --git a/docs/en/sql-reference/aggregate-functions/index.md b/docs/en/sql-reference/aggregate-functions/index.md index 96bf0c5d93b..5056ef2c7aa 100644 --- a/docs/en/sql-reference/aggregate-functions/index.md +++ b/docs/en/sql-reference/aggregate-functions/index.md @@ -18,7 +18,7 @@ ClickHouse also supports: During aggregation, all `NULL` arguments are skipped. If the aggregation has several arguments it will ignore any row in which one or more of them are NULL. -There is an exception to this rule, which are the functions [`first_value`](../../sql-reference/aggregate-functions/reference/first_value.md), [`last_value`](../../sql-reference/aggregate-functions/reference/last_value.md) and their aliases when followed by the modifier `RESPECT NULLS`: `FIRST_VALUE(b) RESPECT NULLS`. +There is an exception to this rule, which are the functions [`first_value`](../../sql-reference/aggregate-functions/reference/first_value.md), [`last_value`](../../sql-reference/aggregate-functions/reference/last_value.md) and their aliases (`any` and `anyLast` respectively) when followed by the modifier `RESPECT NULLS`. For example, `FIRST_VALUE(b) RESPECT NULLS`. **Examples:** diff --git a/docs/en/sql-reference/aggregate-functions/reference/any.md b/docs/en/sql-reference/aggregate-functions/reference/any.md index cdff7dde4a9..972263585f2 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/any.md +++ b/docs/en/sql-reference/aggregate-functions/reference/any.md @@ -5,12 +5,12 @@ sidebar_position: 102 # any -Selects the first encountered value of a column. +Selects the first encountered value of a column, ignoring any `NULL` values. **Syntax** ```sql -any(column) +any(column) [RESPECT NULLS] ``` Aliases: `any_value`, [`first_value`](../reference/first_value.md). @@ -20,7 +20,9 @@ Aliases: `any_value`, [`first_value`](../reference/first_value.md). **Returned value** -By default, it ignores NULL values and returns the first NOT NULL value found in the column. Like [`first_value`](../../../sql-reference/aggregate-functions/reference/first_value.md) it supports `RESPECT NULLS`, in which case it will select the first value passed, independently on whether it's NULL or not. +:::note +Supports the `RESPECT NULLS` modifier after the function name. Using this modifier will ensure the function selects the first value passed, regardless of whether it is `NULL` or not. +::: :::note The return type of the function is the same as the input, except for LowCardinality which is discarded. This means that given no rows as input it will return the default value of that type (0 for integers, or Null for a Nullable() column). You might use the `-OrNull` [combinator](../../../sql-reference/aggregate-functions/combinators.md) ) to modify this behaviour. diff --git a/docs/en/sql-reference/aggregate-functions/reference/any_respect_nulls.md b/docs/en/sql-reference/aggregate-functions/reference/any_respect_nulls.md deleted file mode 100644 index 99104a9b8c7..00000000000 --- a/docs/en/sql-reference/aggregate-functions/reference/any_respect_nulls.md +++ /dev/null @@ -1,44 +0,0 @@ ---- -slug: /en/sql-reference/aggregate-functions/reference/any_respect_nulls -sidebar_position: 103 ---- - -# any_respect_nulls - -Selects the first encountered value of a column, irregardless of whether it is a `NULL` value or not. - -Alias: `any_value_respect_nulls`, `first_value_repect_nulls`. - -**Syntax** - -```sql -any_respect_nulls(column) -``` - -**Parameters** -- `column`: The column name. - -**Returned value** - -- The last value encountered, irregardless of whether it is a `NULL` value or not. - -**Example** - -Query: - -```sql -CREATE TABLE any_nulls (city Nullable(String)) ENGINE=Log; - -INSERT INTO any_nulls (city) VALUES (NULL), ('Amsterdam'), ('New York'), ('Tokyo'), ('Valencia'), (NULL); - -SELECT any(city), any_respect_nulls(city) FROM any_nulls; -``` - -```response -┌─any(city)─┬─any_respect_nulls(city)─┐ -│ Amsterdam │ ᴺᵁᴸᴸ │ -└───────────┴─────────────────────────┘ -``` - -**See Also** -- [any](../reference/any.md) diff --git a/docs/en/sql-reference/aggregate-functions/reference/anylast.md b/docs/en/sql-reference/aggregate-functions/reference/anylast.md index e43bc07fbdc..202d2e9fb10 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/anylast.md +++ b/docs/en/sql-reference/aggregate-functions/reference/anylast.md @@ -5,17 +5,21 @@ sidebar_position: 105 # anyLast -Selects the last value encountered. The result is just as indeterminate as for the [any](../../../sql-reference/aggregate-functions/reference/any.md) function. +Selects the last value encountered, ignoring any `NULL` values by default. The result is just as indeterminate as for the [any](../../../sql-reference/aggregate-functions/reference/any.md) function. **Syntax** ```sql -anyLast(column) +anyLast(column) [RESPECT NULLS] ``` **Parameters** - `column`: The column name. +:::note +Supports the `RESPECT NULLS` modifier after the function name. Using this modifier will ensure the function selects the first value passed, regardless of whether it is `NULL` or not. +::: + **Returned value** - The last value encountered. diff --git a/docs/en/sql-reference/aggregate-functions/reference/anylast_respect_nulls.md b/docs/en/sql-reference/aggregate-functions/reference/anylast_respect_nulls.md deleted file mode 100644 index a28b965f7ea..00000000000 --- a/docs/en/sql-reference/aggregate-functions/reference/anylast_respect_nulls.md +++ /dev/null @@ -1,41 +0,0 @@ ---- -slug: /en/sql-reference/aggregate-functions/reference/anylast_respect_nulls -sidebar_position: 106 ---- - -# anyLast_respect_nulls - -Selects the last value encountered, irregardless of whether it is `NULL` or not. - -**Syntax** - -```sql -anyLast_respect_nulls(column) -``` - -Alias: `last_value_respect_nulls`. - -**Parameters** -- `column`: The column name. - -**Returned value** - -- The last value encountered, irregardless of whether it is `NULL` or not. - -**Example** - -Query: - -```sql -CREATE TABLE any_last_nulls (city Nullable(String)) ENGINE=Log; - -INSERT INTO any_last_nulls (city) VALUES ('Amsterdam'),(NULL),('New York'),('Tokyo'),('Valencia'),(NULL); - -SELECT anyLast(city), anyLast_respect_nulls(city) FROM any_last_nulls; -``` - -```response -┌─anyLast(city)─┬─anyLast_respect_nulls(city)─┐ -│ Valencia │ ᴺᵁᴸᴸ │ -└───────────────┴─────────────────────────────┘ -``` \ No newline at end of file diff --git a/docs/en/sql-reference/aggregate-functions/reference/index.md b/docs/en/sql-reference/aggregate-functions/reference/index.md index e3725b6a430..323a99d276f 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/index.md +++ b/docs/en/sql-reference/aggregate-functions/reference/index.md @@ -44,10 +44,9 @@ Standard aggregate functions: ClickHouse-specific aggregate functions: - [analysisOfVariance](../reference/analysis_of_variance.md) -- [any](../reference/any_respect_nulls.md) +- [any](../reference/any.md) - [anyHeavy](../reference/anyheavy.md) - [anyLast](../reference/anylast.md) -- [anyLast](../reference/anylast_respect_nulls.md) - [boundingRatio](../reference/boundrat.md) - [first_value](../reference/first_value.md) - [last_value](../reference/last_value.md) From 633db10d397a030b9e0f5aa4435fe5d6c002b54f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Fri, 12 Jul 2024 12:58:14 +0200 Subject: [PATCH 129/182] Update docs/en/sql-reference/functions/hash-functions.md --- docs/en/sql-reference/functions/hash-functions.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/sql-reference/functions/hash-functions.md b/docs/en/sql-reference/functions/hash-functions.md index d2ed4516fce..7c977e7d6dc 100644 --- a/docs/en/sql-reference/functions/hash-functions.md +++ b/docs/en/sql-reference/functions/hash-functions.md @@ -353,7 +353,7 @@ It works faster than [intHash32](#inthash32). **Syntax** ```sql -intHash32(int) +intHash64(int) ``` **Arguments** From c7180e67bde8fa7924c77b999a0dafdc2b4b283d Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Fri, 12 Jul 2024 13:08:53 +0100 Subject: [PATCH 130/182] impl --- cmake/limit_jobs.cmake | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/cmake/limit_jobs.cmake b/cmake/limit_jobs.cmake index 8e48fc9b9d8..3a759b90fe3 100644 --- a/cmake/limit_jobs.cmake +++ b/cmake/limit_jobs.cmake @@ -42,9 +42,14 @@ endif () # But use 2 parallel jobs, since: # - this is what llvm does # - and I've verfied that lld-11 does not use all available CPU time (in peak) while linking one binary -if (CMAKE_BUILD_TYPE_UC STREQUAL "RELWITHDEBINFO" AND ENABLE_THINLTO AND PARALLEL_LINK_JOBS GREATER 2) - message(STATUS "ThinLTO provides its own parallel linking - limiting parallel link jobs to 2.") - set (PARALLEL_LINK_JOBS 2) +if (CMAKE_BUILD_TYPE_UC STREQUAL "RELWITHDEBINFO" AND ENABLE_THINLTO) + if (ARCH_AARCH64) + message(STATUS "ThinLTO provides its own parallel linking - limiting parallel link jobs to 1.") + set (PARALLEL_LINK_JOBS 1) + elseif (PARALLEL_LINK_JOBS GREATER 2) + message(STATUS "ThinLTO provides its own parallel linking - limiting parallel link jobs to 2.") + set (PARALLEL_LINK_JOBS 2) + endif () endif() message(STATUS "Building sub-tree with ${PARALLEL_COMPILE_JOBS} compile jobs and ${PARALLEL_LINK_JOBS} linker jobs (system: ${NUMBER_OF_LOGICAL_CORES} cores, ${TOTAL_PHYSICAL_MEMORY} MB RAM, 'OFF' means the native core count).") From a4591a4dc44a4d8488721125e107ddbe03384c95 Mon Sep 17 00:00:00 2001 From: Max K Date: Thu, 11 Jul 2024 22:19:50 +0200 Subject: [PATCH 131/182] CI: Skip pending and not affected jobs from PR workflow run --- tests/ci/ci.py | 3 +++ tests/ci/ci_cache.py | 41 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 44 insertions(+) diff --git a/tests/ci/ci.py b/tests/ci/ci.py index fac50d30022..4774f65b062 100644 --- a/tests/ci/ci.py +++ b/tests/ci/ci.py @@ -997,6 +997,9 @@ def main() -> int: ) ci_cache.print_status() + if IS_CI and pr_info.is_pr: + ci_cache.filter_out_not_affected_jobs() + if IS_CI and not pr_info.is_merge_queue: # wait for pending jobs to be finished, await_jobs is a long blocking call ci_cache.await_pending_jobs(pr_info.is_release) diff --git a/tests/ci/ci_cache.py b/tests/ci/ci_cache.py index 8ee0ae54385..594654ce168 100644 --- a/tests/ci/ci_cache.py +++ b/tests/ci/ci_cache.py @@ -674,6 +674,47 @@ class CiCache: bucket=S3_BUILDS_BUCKET, file_path=result_json_path, s3_path=s3_path ) + def filter_out_not_affected_jobs(self): + """ + removes the following jobs from to_do and to_wait lists: + test jobs - as not affected by the change + build jobs which are not required by left test jobs + :return: + """ + remove_from_await_list = [] + for job_name, job_config in self.jobs_to_wait.items(): + if CI.is_test_job(job_name): + remove_from_await_list.append(job_name) + for job in remove_from_await_list: + print(f"Filter job [{job}] - test job and not affected by the change") + del self.jobs_to_wait[job] + del self.jobs_to_do[job] + + required_builds = list() + for job_name, job_config in self.jobs_to_do.items(): + if CI.is_test_job(job_name) and job_config.required_builds: + required_builds += job_config.required_builds + required_builds = list(set(required_builds)) + + remove_builds = [] + has_builds_to_do = False + for job_name, job_config in self.jobs_to_do.items(): + if CI.is_build_job(job_name): + if job_name not in required_builds: + remove_builds += job_name + else: + has_builds_to_do = True + + for build_job in remove_builds: + print(f"Filter build job [{build_job}] - not affected and not required by test jobs") + del self.jobs_to_do[build_job] + if build_job in self.jobs_to_wait: + del self.jobs_to_wait[build_job] + + if not has_builds_to_do and CI.JobNames.BUILD_CHECK in self.jobs_to_do: + print(f"Filter job [{CI.JobNames.BUILD_CHECK}] - no builds to do") + del self.jobs_to_do[CI.JobNames.BUILD_CHECK] + def await_pending_jobs(self, is_release: bool, dry_run: bool = False) -> None: """ await pending jobs to be finished From f9eb0f9efd6d5ad8cb80831a960d7cd313a71d24 Mon Sep 17 00:00:00 2001 From: Max K Date: Fri, 12 Jul 2024 12:29:34 +0200 Subject: [PATCH 132/182] ci unit test --- tests/ci/ci.py | 3 +- tests/ci/ci_cache.py | 47 ++++++++-- .../lambda_shared/token.py | 1 + tests/ci/ssh.py | 6 +- tests/ci/test_ci_config.py | 86 ++++++++++++++++++- tests/ci/test_ci_options.py | 14 ++- 6 files changed, 135 insertions(+), 22 deletions(-) diff --git a/tests/ci/ci.py b/tests/ci/ci.py index 4774f65b062..32b87698395 100644 --- a/tests/ci/ci.py +++ b/tests/ci/ci.py @@ -995,11 +995,12 @@ def main() -> int: ci_settings, args.skip_jobs, ) - ci_cache.print_status() if IS_CI and pr_info.is_pr: ci_cache.filter_out_not_affected_jobs() + ci_cache.print_status() + if IS_CI and not pr_info.is_merge_queue: # wait for pending jobs to be finished, await_jobs is a long blocking call ci_cache.await_pending_jobs(pr_info.is_release) diff --git a/tests/ci/ci_cache.py b/tests/ci/ci_cache.py index 594654ce168..07dc362428c 100644 --- a/tests/ci/ci_cache.py +++ b/tests/ci/ci_cache.py @@ -676,37 +676,68 @@ class CiCache: def filter_out_not_affected_jobs(self): """ - removes the following jobs from to_do and to_wait lists: - test jobs - as not affected by the change - build jobs which are not required by left test jobs + Filter is to be applied in PRs to remove jobs that are not affected by the change + It removes jobs from @jobs_to_do if it is a: + 1. test job and it is in @jobs_to_wait (no need to wait not affected jobs in PRs) + 2. test job and it has finished on release branch (even if failed) + 2. build job which is not required by any test job that is left in @jobs_to_do + :return: """ + # 1. remove_from_await_list = [] for job_name, job_config in self.jobs_to_wait.items(): - if CI.is_test_job(job_name): + if CI.is_test_job(job_name) and job_name != CI.JobNames.BUILD_CHECK: remove_from_await_list.append(job_name) for job in remove_from_await_list: print(f"Filter job [{job}] - test job and not affected by the change") del self.jobs_to_wait[job] del self.jobs_to_do[job] - required_builds = list() + # 2. + remove_from_to_do = [] + for job_name, job_config in self.jobs_to_do.items(): + if CI.is_test_job(job_name): + batches_to_remove = [] + if job_config.batches is not None: + for batch in job_config.batches: + if self.is_failed( + job_name, batch, job_config.num_batches, release_branch=True + ): + print( + f"Filter [{job_name}/{batch}] - not affected by the change (failed on release branch)" + ) + batches_to_remove.append(batch) + for batch in batches_to_remove: + job_config.batches.remove(batch) + if not job_config.batches: + print( + f"Filter [{job_name}] - not affected by the change (failed on release branch)" + ) + remove_from_to_do.append(job_name) + for job in remove_from_to_do: + del self.jobs_to_do[job] + + # 3. + required_builds = [] # type: List[str] for job_name, job_config in self.jobs_to_do.items(): if CI.is_test_job(job_name) and job_config.required_builds: required_builds += job_config.required_builds required_builds = list(set(required_builds)) - remove_builds = [] + remove_builds = [] # type: List[str] has_builds_to_do = False for job_name, job_config in self.jobs_to_do.items(): if CI.is_build_job(job_name): if job_name not in required_builds: - remove_builds += job_name + remove_builds.append(job_name) else: has_builds_to_do = True for build_job in remove_builds: - print(f"Filter build job [{build_job}] - not affected and not required by test jobs") + print( + f"Filter build job [{build_job}] - not affected and not required by test jobs" + ) del self.jobs_to_do[build_job] if build_job in self.jobs_to_wait: del self.jobs_to_wait[build_job] diff --git a/tests/ci/lambda_shared_package/lambda_shared/token.py b/tests/ci/lambda_shared_package/lambda_shared/token.py index 9749122bd39..3fb8f10c0e2 100644 --- a/tests/ci/lambda_shared_package/lambda_shared/token.py +++ b/tests/ci/lambda_shared_package/lambda_shared/token.py @@ -1,4 +1,5 @@ """Module to get the token for GitHub""" + from dataclasses import dataclass import json import time diff --git a/tests/ci/ssh.py b/tests/ci/ssh.py index 321826fcf44..89d90d724d2 100644 --- a/tests/ci/ssh.py +++ b/tests/ci/ssh.py @@ -37,9 +37,9 @@ class SSHAgent: ssh_options = ( "," + os.environ["SSH_OPTIONS"] if os.environ.get("SSH_OPTIONS") else "" ) - os.environ[ - "SSH_OPTIONS" - ] = f"{ssh_options}UserKnownHostsFile=/dev/null,StrictHostKeyChecking=no" + os.environ["SSH_OPTIONS"] = ( + f"{ssh_options}UserKnownHostsFile=/dev/null,StrictHostKeyChecking=no" + ) def add(self, key): key_pub = self._key_pub(key) diff --git a/tests/ci/test_ci_config.py b/tests/ci/test_ci_config.py index 47247b91858..558faca915e 100644 --- a/tests/ci/test_ci_config.py +++ b/tests/ci/test_ci_config.py @@ -417,7 +417,7 @@ class TestCIConfig(unittest.TestCase): assert not ci_cache.jobs_to_skip assert not ci_cache.jobs_to_wait - # pretend there are pending jobs that we neet to wait + # pretend there are pending jobs that we need to wait ci_cache.jobs_to_wait = dict(ci_cache.jobs_to_do) for job, config in ci_cache.jobs_to_wait.items(): assert not config.pending_batches @@ -489,3 +489,87 @@ class TestCIConfig(unittest.TestCase): self.assertCountEqual( list(ci_cache.jobs_to_do) + ci_cache.jobs_to_skip, all_jobs_in_wf ) + + def test_ci_py_filters_not_affected_jobs_in_prs(self): + """ + checks ci.py filters not affected jobs in PRs + """ + settings = CiSettings() + settings.no_ci_cache = True + pr_info = PRInfo(github_event=_TEST_EVENT_JSON) + pr_info.event_type = EventType.PUSH + pr_info.number = 0 + assert pr_info.is_release and not pr_info.is_merge_queue + ci_cache = CIPY._configure_jobs( + S3Helper(), pr_info, settings, skip_jobs=False, dry_run=True + ) + self.assertTrue(not ci_cache.jobs_to_skip, "Must be no jobs in skip list") + all_jobs_in_wf = list(ci_cache.jobs_to_do) + assert not ci_cache.jobs_to_wait + assert not ci_cache.jobs_to_skip + + # pretend there are pending jobs that we need to wait + for job, job_config in ci_cache.jobs_to_do.items(): + ci_cache.jobs_to_wait[job] = job_config + + # remove couple tests from to_wait and + # expect they are preserved in @jobs_to_to along with required package_asan + del ci_cache.jobs_to_wait[CI.JobNames.STATELESS_TEST_ASAN] + del ci_cache.jobs_to_wait[CI.JobNames.INTEGRATION_TEST_TSAN] + del ci_cache.jobs_to_wait[CI.JobNames.STATELESS_TEST_MSAN] + + # pretend we have some batches failed for one of the job from the to_do list + failed_job = CI.JobNames.INTEGRATION_TEST_TSAN + failed_job_config = ci_cache.jobs_to_do[failed_job] + FAILED_BATCHES = [0, 3] + for batch in FAILED_BATCHES: + assert batch < failed_job_config.num_batches + record = CiCache.Record( + record_type=CiCache.RecordType.FAILED, + job_name=failed_job, + job_digest=ci_cache.job_digests[failed_job], + batch=batch, + num_batches=failed_job_config.num_batches, + release_branch=True, + ) + for record_t_, records_ in ci_cache.records.items(): + if record_t_.value == CiCache.RecordType.FAILED.value: + records_[record.to_str_key()] = record + + # pretend we have all batches failed for one of the job from the to_do list + failed_job = CI.JobNames.STATELESS_TEST_MSAN + failed_job_config = ci_cache.jobs_to_do[failed_job] + assert failed_job_config.num_batches > 1 + for batch in range(failed_job_config.num_batches): + record = CiCache.Record( + record_type=CiCache.RecordType.FAILED, + job_name=failed_job, + job_digest=ci_cache.job_digests[failed_job], + batch=batch, + num_batches=failed_job_config.num_batches, + release_branch=True, + ) + for record_t_, records_ in ci_cache.records.items(): + if record_t_.value == CiCache.RecordType.FAILED.value: + records_[record.to_str_key()] = record + + ci_cache.filter_out_not_affected_jobs() + expected_to_do = [ + CI.JobNames.STATELESS_TEST_ASAN, + CI.BuildNames.PACKAGE_ASAN, + CI.JobNames.INTEGRATION_TEST_TSAN, + CI.BuildNames.PACKAGE_TSAN, + CI.JobNames.BUILD_CHECK, + ] + self.assertCountEqual( + list(ci_cache.jobs_to_wait), + [ + CI.BuildNames.PACKAGE_ASAN, + CI.BuildNames.PACKAGE_TSAN, + CI.JobNames.BUILD_CHECK, + ], + ) + self.assertCountEqual(list(ci_cache.jobs_to_do), expected_to_do) + self.assertTrue(ci_cache.jobs_to_do[CI.JobNames.INTEGRATION_TEST_TSAN].batches) + for batch in ci_cache.jobs_to_do[CI.JobNames.INTEGRATION_TEST_TSAN].batches: + self.assertTrue(batch not in FAILED_BATCHES) diff --git a/tests/ci/test_ci_options.py b/tests/ci/test_ci_options.py index 3f158e79f30..f4d14a17512 100644 --- a/tests/ci/test_ci_options.py +++ b/tests/ci/test_ci_options.py @@ -172,14 +172,10 @@ class TestCIOptions(unittest.TestCase): job: CI.JobConfig(runner_type=CI.Runners.STYLE_CHECKER) for job in _TEST_JOB_LIST } - jobs_configs[ - "fuzzers" - ].run_by_label = ( + jobs_configs["fuzzers"].run_by_label = ( "TEST_LABEL" # check "fuzzers" appears in the result due to the label ) - jobs_configs[ - "Integration tests (asan)" - ].release_only = ( + jobs_configs["Integration tests (asan)"].release_only = ( True # still must be included as it's set with include keywords ) filtered_jobs = list( @@ -311,9 +307,9 @@ class TestCIOptions(unittest.TestCase): job: CI.JobConfig(runner_type=CI.Runners.STYLE_CHECKER) for job in _TEST_JOB_LIST } - jobs_configs[ - "fuzzers" - ].run_by_label = "TEST_LABEL" # check "fuzzers" does not appears in the result + jobs_configs["fuzzers"].run_by_label = ( + "TEST_LABEL" # check "fuzzers" does not appears in the result + ) jobs_configs["Integration tests (asan)"].release_only = True filtered_jobs = list( ci_options.apply( From ed693da2b0937f708fe3c37d73821e49e8f2314f Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Fri, 12 Jul 2024 12:34:16 +0000 Subject: [PATCH 133/182] Automatic style fix --- tests/ci/ssh.py | 6 +++--- tests/ci/test_ci_options.py | 14 +++++++++----- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/tests/ci/ssh.py b/tests/ci/ssh.py index 89d90d724d2..321826fcf44 100644 --- a/tests/ci/ssh.py +++ b/tests/ci/ssh.py @@ -37,9 +37,9 @@ class SSHAgent: ssh_options = ( "," + os.environ["SSH_OPTIONS"] if os.environ.get("SSH_OPTIONS") else "" ) - os.environ["SSH_OPTIONS"] = ( - f"{ssh_options}UserKnownHostsFile=/dev/null,StrictHostKeyChecking=no" - ) + os.environ[ + "SSH_OPTIONS" + ] = f"{ssh_options}UserKnownHostsFile=/dev/null,StrictHostKeyChecking=no" def add(self, key): key_pub = self._key_pub(key) diff --git a/tests/ci/test_ci_options.py b/tests/ci/test_ci_options.py index f4d14a17512..3f158e79f30 100644 --- a/tests/ci/test_ci_options.py +++ b/tests/ci/test_ci_options.py @@ -172,10 +172,14 @@ class TestCIOptions(unittest.TestCase): job: CI.JobConfig(runner_type=CI.Runners.STYLE_CHECKER) for job in _TEST_JOB_LIST } - jobs_configs["fuzzers"].run_by_label = ( + jobs_configs[ + "fuzzers" + ].run_by_label = ( "TEST_LABEL" # check "fuzzers" appears in the result due to the label ) - jobs_configs["Integration tests (asan)"].release_only = ( + jobs_configs[ + "Integration tests (asan)" + ].release_only = ( True # still must be included as it's set with include keywords ) filtered_jobs = list( @@ -307,9 +311,9 @@ class TestCIOptions(unittest.TestCase): job: CI.JobConfig(runner_type=CI.Runners.STYLE_CHECKER) for job in _TEST_JOB_LIST } - jobs_configs["fuzzers"].run_by_label = ( - "TEST_LABEL" # check "fuzzers" does not appears in the result - ) + jobs_configs[ + "fuzzers" + ].run_by_label = "TEST_LABEL" # check "fuzzers" does not appears in the result jobs_configs["Integration tests (asan)"].release_only = True filtered_jobs = list( ci_options.apply( From a3ab1ab5ca707c0e40b4ad738413dd868f2db606 Mon Sep 17 00:00:00 2001 From: Max Kainov Date: Fri, 12 Jul 2024 13:10:13 +0000 Subject: [PATCH 134/182] CI: Do not block on few number of test failures --- tests/ci/ci_config.py | 3 ++ tests/ci/ci_utils.py | 15 ++++++++- tests/ci/merge_pr.py | 78 ++++++++++++++++++++++++++++++++++--------- 3 files changed, 80 insertions(+), 16 deletions(-) diff --git a/tests/ci/ci_config.py b/tests/ci/ci_config.py index 8eda6e6b96f..9a9aa553e1b 100644 --- a/tests/ci/ci_config.py +++ b/tests/ci/ci_config.py @@ -13,6 +13,9 @@ class CI: each config item in the below dicts should be an instance of JobConfig class or inherited from it """ + MAX_TOTAL_FAILURES_BEFORE_BLOCKING_CI = 2 + MAX_TOTAL_FAILURES_PER_JOB_BEFORE_BLOCKING_CI = 1 + # reimport types to CI class so that they visible as CI.* and mypy is happy # pylint:disable=useless-import-alias,reimported,import-outside-toplevel from ci_definitions import BuildConfig as BuildConfig diff --git a/tests/ci/ci_utils.py b/tests/ci/ci_utils.py index 629f37289a9..abc4a88989d 100644 --- a/tests/ci/ci_utils.py +++ b/tests/ci/ci_utils.py @@ -1,8 +1,9 @@ import os +import re import subprocess from contextlib import contextmanager from pathlib import Path -from typing import Any, Iterator, List, Union +from typing import Any, Iterator, List, Union, Optional class WithIter(type): @@ -83,3 +84,15 @@ class Shell: check=False, ) return result.returncode == 0 + + +class Utils: + @staticmethod + def get_failed_tests_number(description: str) -> Optional[int]: + description = description.lower() + + pattern = r"fail:\s*(\d+)\s*(?=,|$)" + match = re.search(pattern, description) + if match: + return int(match.group(1)) + return None diff --git a/tests/ci/merge_pr.py b/tests/ci/merge_pr.py index 37c08fc4efe..6b437731561 100644 --- a/tests/ci/merge_pr.py +++ b/tests/ci/merge_pr.py @@ -26,6 +26,8 @@ from pr_info import PRInfo from report import SUCCESS, FAILURE from env_helper import GITHUB_UPSTREAM_REPOSITORY, GITHUB_REPOSITORY from synchronizer_utils import SYNC_BRANCH_PREFIX +from ci_config import CI +from ci_utils import Utils # The team name for accepted approvals TEAM_NAME = getenv("GITHUB_TEAM_NAME", "core") @@ -251,23 +253,69 @@ def main(): # set mergeable check status and exit commit = get_commit(gh, args.pr_info.sha) statuses = get_commit_filtered_statuses(commit) - state = trigger_mergeable_check( - commit, - statuses, - workflow_failed=(args.wf_status != "success"), - ) - # Process upstream StatusNames.SYNC - pr_info = PRInfo() - if ( - pr_info.head_ref.startswith(f"{SYNC_BRANCH_PREFIX}/pr/") - and GITHUB_REPOSITORY != GITHUB_UPSTREAM_REPOSITORY - ): - print("Updating upstream statuses") - update_upstream_sync_status(pr_info, state) + max_failed_tests_per_job = 0 + job_name_with_max_failures = None + total_failed_tests = 0 + failed_to_get_info = False + has_failed_statuses = False + for status in statuses: + if not CI.is_required(status.context): + continue + if status.state == FAILURE: + has_failed_statuses = True + failed_cnt = Utils.get_failed_tests_number(status.description) + if failed_cnt is None: + failed_to_get_info = True + else: + if failed_cnt > max_failed_tests_per_job: + job_name_with_max_failures = status.context + max_failed_tests_per_job = failed_cnt + total_failed_tests += failed_cnt + elif status.state != SUCCESS: + has_failed_statuses = True + print( + f"Unexpected status for [{status.context}]: [{status.state}] - block further testing" + ) + failed_to_get_info = True - if args.wf_status != "success": - # exit with 1 to rerun on workflow failed job restart + can_continue = True + if total_failed_tests > CI.MAX_TOTAL_FAILURES_BEFORE_BLOCKING_CI: + print( + f"Required check has [{total_failed_tests}] failed - block further testing" + ) + can_continue = False + if max_failed_tests_per_job > CI.MAX_TOTAL_FAILURES_PER_JOB_BEFORE_BLOCKING_CI: + print( + f"Job [{job_name_with_max_failures}] has [{max_failed_tests_per_job}] failures - block further testing" + ) + can_continue = False + if failed_to_get_info: + print(f"Unexpected commit status state - block further testing") + can_continue = False + if args.wf_status != SUCCESS: + can_continue = False + print("Workflow has failures - block further testing") + + if args.wf_status == "success" or has_failed_statuses: + state = trigger_mergeable_check( + commit, + statuses, + ) + # Process upstream StatusNames.SYNC + pr_info = PRInfo() + if ( + pr_info.head_ref.startswith(f"{SYNC_BRANCH_PREFIX}/pr/") + and GITHUB_REPOSITORY != GITHUB_UPSTREAM_REPOSITORY + ): + print("Updating upstream statuses") + update_upstream_sync_status(pr_info, state) + else: + print( + "Workflow failed but no failed statuses found (died runner?) - cannot set Mergeable Check status" + ) + + if not can_continue: sys.exit(1) sys.exit(0) From 666f5ffaf9591ae70484930cf6e381a7ab812381 Mon Sep 17 00:00:00 2001 From: Max K Date: Fri, 12 Jul 2024 15:17:51 +0200 Subject: [PATCH 135/182] mypy fix --- tests/ci/ci_cache.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/tests/ci/ci_cache.py b/tests/ci/ci_cache.py index 07dc362428c..fe54634039d 100644 --- a/tests/ci/ci_cache.py +++ b/tests/ci/ci_cache.py @@ -699,15 +699,15 @@ class CiCache: for job_name, job_config in self.jobs_to_do.items(): if CI.is_test_job(job_name): batches_to_remove = [] - if job_config.batches is not None: - for batch in job_config.batches: - if self.is_failed( - job_name, batch, job_config.num_batches, release_branch=True - ): - print( - f"Filter [{job_name}/{batch}] - not affected by the change (failed on release branch)" - ) - batches_to_remove.append(batch) + assert job_config.batches is not None + for batch in job_config.batches: + if self.is_failed( + job_name, batch, job_config.num_batches, release_branch=True + ): + print( + f"Filter [{job_name}/{batch}] - not affected by the change (failed on release branch)" + ) + batches_to_remove.append(batch) for batch in batches_to_remove: job_config.batches.remove(batch) if not job_config.batches: From 05810ec76fc8e811296daabee97cccc625204941 Mon Sep 17 00:00:00 2001 From: Max K Date: Fri, 12 Jul 2024 15:40:06 +0200 Subject: [PATCH 136/182] do not skip Build_report --- tests/ci/ci_cache.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/ci/ci_cache.py b/tests/ci/ci_cache.py index fe54634039d..291ed56aeea 100644 --- a/tests/ci/ci_cache.py +++ b/tests/ci/ci_cache.py @@ -697,7 +697,7 @@ class CiCache: # 2. remove_from_to_do = [] for job_name, job_config in self.jobs_to_do.items(): - if CI.is_test_job(job_name): + if CI.is_test_job(job_name) and job_name != CI.JobNames.BUILD_CHECK: batches_to_remove = [] assert job_config.batches is not None for batch in job_config.batches: From c06589392b866bf4c799b1f5053197f7027f3db3 Mon Sep 17 00:00:00 2001 From: Nikita Fomichev Date: Fri, 12 Jul 2024 17:21:11 +0200 Subject: [PATCH 137/182] Stateless tests: fix flaky tests --- .../01037_polygon_dicts_correctness_all.sh | 14 +- .../01037_polygon_dicts_correctness_fast.sh | 14 +- .../01037_polygon_dicts_simple_functions.ans | 208 +++++++++--------- .../01037_polygon_dicts_simple_functions.sh | 127 +++++------ 4 files changed, 181 insertions(+), 182 deletions(-) diff --git a/tests/queries/0_stateless/01037_polygon_dicts_correctness_all.sh b/tests/queries/0_stateless/01037_polygon_dicts_correctness_all.sh index fff786d6c06..39f235d9966 100755 --- a/tests/queries/0_stateless/01037_polygon_dicts_correctness_all.sh +++ b/tests/queries/0_stateless/01037_polygon_dicts_correctness_all.sh @@ -5,20 +5,22 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -TMP_DIR="/tmp" +TMP_DIR=${CLICKHOUSE_TMP}${CLICKHOUSE_DATABASE} +mkdir -p $TMP_DIR declare -a SearchTypes=("POLYGON" "POLYGON_SIMPLE" "POLYGON_INDEX_EACH" "POLYGON_INDEX_CELL") -tar -xf "${CURDIR}"/01037_test_data_search.tar.gz -C "${CURDIR}" +DATA_DIR=${CURDIR}/${CLICKHOUSE_DATABASE} +tar -xf "${CURDIR}"/01037_test_data_search.tar.gz -C "${DATA_DIR}" $CLICKHOUSE_CLIENT -n --query=" DROP TABLE IF EXISTS points; CREATE TABLE points (x Float64, y Float64) ENGINE = Memory; " -$CLICKHOUSE_CLIENT --query="INSERT INTO points FORMAT TSV" --max_insert_block_size=100000 < "${CURDIR}/01037_point_data" +$CLICKHOUSE_CLIENT --query="INSERT INTO points FORMAT TSV" --max_insert_block_size=100000 < "${DATA_DIR}/01037_point_data" -rm "${CURDIR}"/01037_point_data +rm "${DATA_DIR}"/01037_point_data $CLICKHOUSE_CLIENT -n --query=" DROP TABLE IF EXISTS polygons_array; @@ -32,9 +34,9 @@ CREATE TABLE polygons_array ENGINE = Memory; " -$CLICKHOUSE_CLIENT --query="INSERT INTO polygons_array FORMAT JSONEachRow" --min_chunk_bytes_for_parallel_parsing=10485760 --max_insert_block_size=100000 < "${CURDIR}/01037_polygon_data" +$CLICKHOUSE_CLIENT --query="INSERT INTO polygons_array FORMAT JSONEachRow" --min_chunk_bytes_for_parallel_parsing=10485760 --max_insert_block_size=100000 < "${DATA_DIR}/01037_polygon_data" -rm "${CURDIR}"/01037_polygon_data +rm "${DATA_DIR}"/01037_polygon_data for type in "${SearchTypes[@]}"; do diff --git a/tests/queries/0_stateless/01037_polygon_dicts_correctness_fast.sh b/tests/queries/0_stateless/01037_polygon_dicts_correctness_fast.sh index c9cd151a2d9..3e461abcefe 100755 --- a/tests/queries/0_stateless/01037_polygon_dicts_correctness_fast.sh +++ b/tests/queries/0_stateless/01037_polygon_dicts_correctness_fast.sh @@ -5,19 +5,21 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -TMP_DIR="/tmp" +TMP_DIR=${CLICKHOUSE_TMP}${CLICKHOUSE_DATABASE} +mkdir -p $TMP_DIR declare -a SearchTypes=("POLYGON_INDEX_EACH" "POLYGON_INDEX_CELL") -tar -xf "${CURDIR}"/01037_test_data_perf.tar.gz -C "${CURDIR}" +DATA_DIR=${CURDIR}/${CLICKHOUSE_DATABASE} +tar -xf "${CURDIR}"/01037_test_data_perf.tar.gz -C "${DATA_DIR}" $CLICKHOUSE_CLIENT -n --query=" CREATE TABLE points (x Float64, y Float64) ENGINE = Memory; " -$CLICKHOUSE_CLIENT --query="INSERT INTO points FORMAT TSV" --min_chunk_bytes_for_parallel_parsing=10485760 --max_insert_block_size=100000 < "${CURDIR}/01037_point_data" +$CLICKHOUSE_CLIENT --query="INSERT INTO points FORMAT TSV" --min_chunk_bytes_for_parallel_parsing=10485760 --max_insert_block_size=100000 < "${DATA_DIR}/01037_point_data" -rm "${CURDIR}"/01037_point_data +rm "${DATA_DIR}"/01037_point_data $CLICKHOUSE_CLIENT -n --query=" DROP TABLE IF EXISTS polygons_array; @@ -31,9 +33,9 @@ CREATE TABLE polygons_array ENGINE = Memory; " -$CLICKHOUSE_CLIENT --query="INSERT INTO polygons_array FORMAT JSONEachRow" --min_chunk_bytes_for_parallel_parsing=10485760 --max_insert_block_size=100000 < "${CURDIR}/01037_polygon_data" +$CLICKHOUSE_CLIENT --query="INSERT INTO polygons_array FORMAT JSONEachRow" --min_chunk_bytes_for_parallel_parsing=10485760 --max_insert_block_size=100000 < "${DATA_DIR}/01037_polygon_data" -rm "${CURDIR}"/01037_polygon_data +rm "${DATA_DIR}"/01037_polygon_data for type in "${SearchTypes[@]}"; do diff --git a/tests/queries/0_stateless/01037_polygon_dicts_simple_functions.ans b/tests/queries/0_stateless/01037_polygon_dicts_simple_functions.ans index dfad14fb113..937539643ec 100644 --- a/tests/queries/0_stateless/01037_polygon_dicts_simple_functions.ans +++ b/tests/queries/0_stateless/01037_polygon_dicts_simple_functions.ans @@ -1,104 +1,104 @@ -dictGet test_01037.dict_array (-100,-42) qqq 101 -dictGet test_01037.dict_array (-1,0) Click South 423 -dictGet test_01037.dict_array (-0.1,0) Click South 423 -dictGet test_01037.dict_array (0,-2) Click West 424 -dictGet test_01037.dict_array (0,-1.1) Click West 424 -dictGet test_01037.dict_array (0,1.1) Click North 422 -dictGet test_01037.dict_array (0,2) Click North 422 -dictGet test_01037.dict_array (0.1,0) Click East 421 -dictGet test_01037.dict_array (0.99,2.99) Click North 422 -dictGet test_01037.dict_array (1,0) Click East 421 -dictGet test_01037.dict_array (3,3) House 314159 -dictGet test_01037.dict_array (5,6) Click 42 -dictGet test_01037.dict_array (7.01,7.01) qqq 101 -dictGetOrDefault test_01037.dict_array (-100,-42) www 1234 -dictGetOrDefault test_01037.dict_array (-1,0) Click South 423 -dictGetOrDefault test_01037.dict_array (-0.1,0) Click South 423 -dictGetOrDefault test_01037.dict_array (0,-2) Click West 424 -dictGetOrDefault test_01037.dict_array (0,-1.1) Click West 424 -dictGetOrDefault test_01037.dict_array (0,1.1) Click North 422 -dictGetOrDefault test_01037.dict_array (0,2) Click North 422 -dictGetOrDefault test_01037.dict_array (0.1,0) Click East 421 -dictGetOrDefault test_01037.dict_array (0.99,2.99) Click North 422 -dictGetOrDefault test_01037.dict_array (1,0) Click East 421 -dictGetOrDefault test_01037.dict_array (3,3) House 314159 -dictGetOrDefault test_01037.dict_array (5,6) Click 42 -dictGetOrDefault test_01037.dict_array (7.01,7.01) www 1234 -dictGetOrDefault test_01037.dict_array (-100,-42) dd 44 -dictGetOrDefault test_01037.dict_array (-1,0) Click South 423 -dictGetOrDefault test_01037.dict_array (-0.1,0) Click South 423 -dictGetOrDefault test_01037.dict_array (0,-2) Click West 424 -dictGetOrDefault test_01037.dict_array (0,-1.1) Click West 424 -dictGetOrDefault test_01037.dict_array (0,1.1) Click North 422 -dictGetOrDefault test_01037.dict_array (0,2) Click North 422 -dictGetOrDefault test_01037.dict_array (0.1,0) Click East 421 -dictGetOrDefault test_01037.dict_array (0.99,2.99) Click North 422 -dictGetOrDefault test_01037.dict_array (1,0) Click East 421 -dictGetOrDefault test_01037.dict_array (3,3) House 314159 -dictGetOrDefault test_01037.dict_array (5,6) Click 42 -dictGetOrDefault test_01037.dict_array (7.01,7.01) ee 55 -dictGet test_01037.dict_tuple (-100,-42) qqq 101 -dictGet test_01037.dict_tuple (-1,0) Click South 423 -dictGet test_01037.dict_tuple (-0.1,0) Click South 423 -dictGet test_01037.dict_tuple (0,-2) Click West 424 -dictGet test_01037.dict_tuple (0,-1.1) Click West 424 -dictGet test_01037.dict_tuple (0,1.1) Click North 422 -dictGet test_01037.dict_tuple (0,2) Click North 422 -dictGet test_01037.dict_tuple (0.1,0) Click East 421 -dictGet test_01037.dict_tuple (0.99,2.99) Click North 422 -dictGet test_01037.dict_tuple (1,0) Click East 421 -dictGet test_01037.dict_tuple (3,3) House 314159 -dictGet test_01037.dict_tuple (5,6) Click 42 -dictGet test_01037.dict_tuple (7.01,7.01) qqq 101 -dictGetOrDefault test_01037.dict_tuple (-100,-42) www 1234 -dictGetOrDefault test_01037.dict_tuple (-1,0) Click South 423 -dictGetOrDefault test_01037.dict_tuple (-0.1,0) Click South 423 -dictGetOrDefault test_01037.dict_tuple (0,-2) Click West 424 -dictGetOrDefault test_01037.dict_tuple (0,-1.1) Click West 424 -dictGetOrDefault test_01037.dict_tuple (0,1.1) Click North 422 -dictGetOrDefault test_01037.dict_tuple (0,2) Click North 422 -dictGetOrDefault test_01037.dict_tuple (0.1,0) Click East 421 -dictGetOrDefault test_01037.dict_tuple (0.99,2.99) Click North 422 -dictGetOrDefault test_01037.dict_tuple (1,0) Click East 421 -dictGetOrDefault test_01037.dict_tuple (3,3) House 314159 -dictGetOrDefault test_01037.dict_tuple (5,6) Click 42 -dictGetOrDefault test_01037.dict_tuple (7.01,7.01) www 1234 -dictGetOrDefault test_01037.dict_tuple (-100,-42) dd 44 -dictGetOrDefault test_01037.dict_tuple (-1,0) Click South 423 -dictGetOrDefault test_01037.dict_tuple (-0.1,0) Click South 423 -dictGetOrDefault test_01037.dict_tuple (0,-2) Click West 424 -dictGetOrDefault test_01037.dict_tuple (0,-1.1) Click West 424 -dictGetOrDefault test_01037.dict_tuple (0,1.1) Click North 422 -dictGetOrDefault test_01037.dict_tuple (0,2) Click North 422 -dictGetOrDefault test_01037.dict_tuple (0.1,0) Click East 421 -dictGetOrDefault test_01037.dict_tuple (0.99,2.99) Click North 422 -dictGetOrDefault test_01037.dict_tuple (1,0) Click East 421 -dictGetOrDefault test_01037.dict_tuple (3,3) House 314159 -dictGetOrDefault test_01037.dict_tuple (5,6) Click 42 -dictGetOrDefault test_01037.dict_tuple (7.01,7.01) ee 55 -dictHas test_01037.dict_array (-100,-42) 0 -dictHas test_01037.dict_array (-1,0) 1 -dictHas test_01037.dict_array (-0.1,0) 1 -dictHas test_01037.dict_array (0,-2) 1 -dictHas test_01037.dict_array (0,-1.1) 1 -dictHas test_01037.dict_array (0,1.1) 1 -dictHas test_01037.dict_array (0,2) 1 -dictHas test_01037.dict_array (0.1,0) 1 -dictHas test_01037.dict_array (0.99,2.99) 1 -dictHas test_01037.dict_array (1,0) 1 -dictHas test_01037.dict_array (3,3) 1 -dictHas test_01037.dict_array (5,6) 1 -dictHas test_01037.dict_array (7.01,7.01) 0 -dictHas test_01037.dict_tuple (-100,-42) 0 -dictHas test_01037.dict_tuple (-1,0) 1 -dictHas test_01037.dict_tuple (-0.1,0) 1 -dictHas test_01037.dict_tuple (0,-2) 1 -dictHas test_01037.dict_tuple (0,-1.1) 1 -dictHas test_01037.dict_tuple (0,1.1) 1 -dictHas test_01037.dict_tuple (0,2) 1 -dictHas test_01037.dict_tuple (0.1,0) 1 -dictHas test_01037.dict_tuple (0.99,2.99) 1 -dictHas test_01037.dict_tuple (1,0) 1 -dictHas test_01037.dict_tuple (3,3) 1 -dictHas test_01037.dict_tuple (5,6) 1 -dictHas test_01037.dict_tuple (7.01,7.01) 0 +dictGet dict_array (-100,-42) qqq 101 +dictGet dict_array (-1,0) Click South 423 +dictGet dict_array (-0.1,0) Click South 423 +dictGet dict_array (0,-2) Click West 424 +dictGet dict_array (0,-1.1) Click West 424 +dictGet dict_array (0,1.1) Click North 422 +dictGet dict_array (0,2) Click North 422 +dictGet dict_array (0.1,0) Click East 421 +dictGet dict_array (0.99,2.99) Click North 422 +dictGet dict_array (1,0) Click East 421 +dictGet dict_array (3,3) House 314159 +dictGet dict_array (5,6) Click 42 +dictGet dict_array (7.01,7.01) qqq 101 +dictGetOrDefault dict_array (-100,-42) www 1234 +dictGetOrDefault dict_array (-1,0) Click South 423 +dictGetOrDefault dict_array (-0.1,0) Click South 423 +dictGetOrDefault dict_array (0,-2) Click West 424 +dictGetOrDefault dict_array (0,-1.1) Click West 424 +dictGetOrDefault dict_array (0,1.1) Click North 422 +dictGetOrDefault dict_array (0,2) Click North 422 +dictGetOrDefault dict_array (0.1,0) Click East 421 +dictGetOrDefault dict_array (0.99,2.99) Click North 422 +dictGetOrDefault dict_array (1,0) Click East 421 +dictGetOrDefault dict_array (3,3) House 314159 +dictGetOrDefault dict_array (5,6) Click 42 +dictGetOrDefault dict_array (7.01,7.01) www 1234 +dictGetOrDefault dict_array (-100,-42) dd 44 +dictGetOrDefault dict_array (-1,0) Click South 423 +dictGetOrDefault dict_array (-0.1,0) Click South 423 +dictGetOrDefault dict_array (0,-2) Click West 424 +dictGetOrDefault dict_array (0,-1.1) Click West 424 +dictGetOrDefault dict_array (0,1.1) Click North 422 +dictGetOrDefault dict_array (0,2) Click North 422 +dictGetOrDefault dict_array (0.1,0) Click East 421 +dictGetOrDefault dict_array (0.99,2.99) Click North 422 +dictGetOrDefault dict_array (1,0) Click East 421 +dictGetOrDefault dict_array (3,3) House 314159 +dictGetOrDefault dict_array (5,6) Click 42 +dictGetOrDefault dict_array (7.01,7.01) ee 55 +dictGet dict_tuple (-100,-42) qqq 101 +dictGet dict_tuple (-1,0) Click South 423 +dictGet dict_tuple (-0.1,0) Click South 423 +dictGet dict_tuple (0,-2) Click West 424 +dictGet dict_tuple (0,-1.1) Click West 424 +dictGet dict_tuple (0,1.1) Click North 422 +dictGet dict_tuple (0,2) Click North 422 +dictGet dict_tuple (0.1,0) Click East 421 +dictGet dict_tuple (0.99,2.99) Click North 422 +dictGet dict_tuple (1,0) Click East 421 +dictGet dict_tuple (3,3) House 314159 +dictGet dict_tuple (5,6) Click 42 +dictGet dict_tuple (7.01,7.01) qqq 101 +dictGetOrDefault dict_tuple (-100,-42) www 1234 +dictGetOrDefault dict_tuple (-1,0) Click South 423 +dictGetOrDefault dict_tuple (-0.1,0) Click South 423 +dictGetOrDefault dict_tuple (0,-2) Click West 424 +dictGetOrDefault dict_tuple (0,-1.1) Click West 424 +dictGetOrDefault dict_tuple (0,1.1) Click North 422 +dictGetOrDefault dict_tuple (0,2) Click North 422 +dictGetOrDefault dict_tuple (0.1,0) Click East 421 +dictGetOrDefault dict_tuple (0.99,2.99) Click North 422 +dictGetOrDefault dict_tuple (1,0) Click East 421 +dictGetOrDefault dict_tuple (3,3) House 314159 +dictGetOrDefault dict_tuple (5,6) Click 42 +dictGetOrDefault dict_tuple (7.01,7.01) www 1234 +dictGetOrDefault dict_tuple (-100,-42) dd 44 +dictGetOrDefault dict_tuple (-1,0) Click South 423 +dictGetOrDefault dict_tuple (-0.1,0) Click South 423 +dictGetOrDefault dict_tuple (0,-2) Click West 424 +dictGetOrDefault dict_tuple (0,-1.1) Click West 424 +dictGetOrDefault dict_tuple (0,1.1) Click North 422 +dictGetOrDefault dict_tuple (0,2) Click North 422 +dictGetOrDefault dict_tuple (0.1,0) Click East 421 +dictGetOrDefault dict_tuple (0.99,2.99) Click North 422 +dictGetOrDefault dict_tuple (1,0) Click East 421 +dictGetOrDefault dict_tuple (3,3) House 314159 +dictGetOrDefault dict_tuple (5,6) Click 42 +dictGetOrDefault dict_tuple (7.01,7.01) ee 55 +dictHas dict_array (-100,-42) 0 +dictHas dict_array (-1,0) 1 +dictHas dict_array (-0.1,0) 1 +dictHas dict_array (0,-2) 1 +dictHas dict_array (0,-1.1) 1 +dictHas dict_array (0,1.1) 1 +dictHas dict_array (0,2) 1 +dictHas dict_array (0.1,0) 1 +dictHas dict_array (0.99,2.99) 1 +dictHas dict_array (1,0) 1 +dictHas dict_array (3,3) 1 +dictHas dict_array (5,6) 1 +dictHas dict_array (7.01,7.01) 0 +dictHas dict_tuple (-100,-42) 0 +dictHas dict_tuple (-1,0) 1 +dictHas dict_tuple (-0.1,0) 1 +dictHas dict_tuple (0,-2) 1 +dictHas dict_tuple (0,-1.1) 1 +dictHas dict_tuple (0,1.1) 1 +dictHas dict_tuple (0,2) 1 +dictHas dict_tuple (0.1,0) 1 +dictHas dict_tuple (0.99,2.99) 1 +dictHas dict_tuple (1,0) 1 +dictHas dict_tuple (3,3) 1 +dictHas dict_tuple (5,6) 1 +dictHas dict_tuple (7.01,7.01) 0 diff --git a/tests/queries/0_stateless/01037_polygon_dicts_simple_functions.sh b/tests/queries/0_stateless/01037_polygon_dicts_simple_functions.sh index be983ec1be4..efc66783d62 100755 --- a/tests/queries/0_stateless/01037_polygon_dicts_simple_functions.sh +++ b/tests/queries/0_stateless/01037_polygon_dicts_simple_functions.sh @@ -1,56 +1,52 @@ #!/usr/bin/env bash -# Tags: no-debug, no-parallel +# Tags: no-debug CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -TMP_DIR="/tmp" +TMP_DIR=${CLICKHOUSE_TMP}${CLICKHOUSE_DATABASE} +mkdir -p $TMP_DIR $CLICKHOUSE_CLIENT -n --query=" -DROP DATABASE IF EXISTS test_01037; +DROP TABLE IF EXISTS polygons_array; -CREATE DATABASE test_01037; +CREATE TABLE polygons_array (key Array(Array(Array(Array(Float64)))), name String, value UInt64) ENGINE = Memory; +INSERT INTO polygons_array VALUES ([[[[1, 3], [1, 1], [3, 1], [3, -1], [1, -1], [1, -3], [-1, -3], [-1, -1], [-3, -1], [-3, 1], [-1, 1], [-1, 3]]], [[[5, 5], [5, 1], [7, 1], [7, 7], [1, 7], [1, 5]]]], 'Click', 42); +INSERT INTO polygons_array VALUES ([[[[5, 5], [5, -5], [-5, -5], [-5, 5]], [[1, 3], [1, 1], [3, 1], [3, -1], [1, -1], [1, -3], [-1, -3], [-1, -1], [-3, -1], [-3, 1], [-1, 1], [-1, 3]]]], 'House', 314159); +INSERT INTO polygons_array VALUES ([[[[3, 1], [0, 1], [0, -1], [3, -1]]]], 'Click East', 421); +INSERT INTO polygons_array VALUES ([[[[-1, 1], [1, 1], [1, 3], [-1, 3]]]], 'Click North', 422); +INSERT INTO polygons_array VALUES ([[[[-3, 1], [-3, -1], [0, -1], [0, 1]]]], 'Click South', 423); +INSERT INTO polygons_array VALUES ([[[[-1, -1], [1, -1], [1, -3], [-1, -3]]]], 'Click West', 424); -DROP TABLE IF EXISTS test_01037.polygons_array; +DROP TABLE IF EXISTS polygons_tuple; -CREATE TABLE test_01037.polygons_array (key Array(Array(Array(Array(Float64)))), name String, value UInt64) ENGINE = Memory; -INSERT INTO test_01037.polygons_array VALUES ([[[[1, 3], [1, 1], [3, 1], [3, -1], [1, -1], [1, -3], [-1, -3], [-1, -1], [-3, -1], [-3, 1], [-1, 1], [-1, 3]]], [[[5, 5], [5, 1], [7, 1], [7, 7], [1, 7], [1, 5]]]], 'Click', 42); -INSERT INTO test_01037.polygons_array VALUES ([[[[5, 5], [5, -5], [-5, -5], [-5, 5]], [[1, 3], [1, 1], [3, 1], [3, -1], [1, -1], [1, -3], [-1, -3], [-1, -1], [-3, -1], [-3, 1], [-1, 1], [-1, 3]]]], 'House', 314159); -INSERT INTO test_01037.polygons_array VALUES ([[[[3, 1], [0, 1], [0, -1], [3, -1]]]], 'Click East', 421); -INSERT INTO test_01037.polygons_array VALUES ([[[[-1, 1], [1, 1], [1, 3], [-1, 3]]]], 'Click North', 422); -INSERT INTO test_01037.polygons_array VALUES ([[[[-3, 1], [-3, -1], [0, -1], [0, 1]]]], 'Click South', 423); -INSERT INTO test_01037.polygons_array VALUES ([[[[-1, -1], [1, -1], [1, -3], [-1, -3]]]], 'Click West', 424); +CREATE TABLE polygons_tuple (key Array(Array(Array(Tuple(Float64, Float64)))), name String, value UInt64) ENGINE = Memory; +INSERT INTO polygons_tuple VALUES ([[[(1, 3), (1, 1), (3, 1), (3, -1), (1, -1), (1, -3), (-1, -3), (-1, -1), (-3, -1), (-3, 1), (-1, 1), (-1, 3)]], [[(5, 5), (5, 1), (7, 1), (7, 7), (1, 7), (1, 5)]]], 'Click', 42); +INSERT INTO polygons_tuple VALUES ([[[(5, 5), (5, -5), (-5, -5), (-5, 5)], [(1, 3), (1, 1), (3, 1), (3, -1), (1, -1), (1, -3), (-1, -3), (-1, -1), (-3, -1), (-3, 1), (-1, 1), (-1, 3)]]], 'House', 314159); +INSERT INTO polygons_tuple VALUES ([[[(3, 1), (0, 1), (0, -1), (3, -1)]]], 'Click East', 421); +INSERT INTO polygons_tuple VALUES ([[[(-1, 1), (1, 1), (1, 3), (-1, 3)]]], 'Click North', 422); +INSERT INTO polygons_tuple VALUES ([[[(-3, 1), (-3, -1), (0, -1), (0, 1)]]], 'Click South', 423); +INSERT INTO polygons_tuple VALUES ([[[(-1, -1), (1, -1), (1, -3), (-1, -3)]]], 'Click West', 424); -DROP TABLE IF EXISTS test_01037.polygons_tuple; +DROP TABLE IF EXISTS points; -CREATE TABLE test_01037.polygons_tuple (key Array(Array(Array(Tuple(Float64, Float64)))), name String, value UInt64) ENGINE = Memory; -INSERT INTO test_01037.polygons_tuple VALUES ([[[(1, 3), (1, 1), (3, 1), (3, -1), (1, -1), (1, -3), (-1, -3), (-1, -1), (-3, -1), (-3, 1), (-1, 1), (-1, 3)]], [[(5, 5), (5, 1), (7, 1), (7, 7), (1, 7), (1, 5)]]], 'Click', 42); -INSERT INTO test_01037.polygons_tuple VALUES ([[[(5, 5), (5, -5), (-5, -5), (-5, 5)], [(1, 3), (1, 1), (3, 1), (3, -1), (1, -1), (1, -3), (-1, -3), (-1, -1), (-3, -1), (-3, 1), (-1, 1), (-1, 3)]]], 'House', 314159); -INSERT INTO test_01037.polygons_tuple VALUES ([[[(3, 1), (0, 1), (0, -1), (3, -1)]]], 'Click East', 421); -INSERT INTO test_01037.polygons_tuple VALUES ([[[(-1, 1), (1, 1), (1, 3), (-1, 3)]]], 'Click North', 422); -INSERT INTO test_01037.polygons_tuple VALUES ([[[(-3, 1), (-3, -1), (0, -1), (0, 1)]]], 'Click South', 423); -INSERT INTO test_01037.polygons_tuple VALUES ([[[(-1, -1), (1, -1), (1, -3), (-1, -3)]]], 'Click West', 424); - -DROP TABLE IF EXISTS test_01037.points; - -CREATE TABLE test_01037.points (x Float64, y Float64, def_i UInt64, def_s String) ENGINE = Memory; -INSERT INTO test_01037.points VALUES (0.1, 0.0, 112, 'aax'); -INSERT INTO test_01037.points VALUES (-0.1, 0.0, 113, 'aay'); -INSERT INTO test_01037.points VALUES (0.0, 1.1, 114, 'aaz'); -INSERT INTO test_01037.points VALUES (0.0, -1.1, 115, 'aat'); -INSERT INTO test_01037.points VALUES (3.0, 3.0, 22, 'bb'); -INSERT INTO test_01037.points VALUES (5.0, 6.0, 33, 'cc'); -INSERT INTO test_01037.points VALUES (-100.0, -42.0, 44, 'dd'); -INSERT INTO test_01037.points VALUES (7.01, 7.01, 55, 'ee'); -INSERT INTO test_01037.points VALUES (0.99, 2.99, 66, 'ee'); -INSERT INTO test_01037.points VALUES (1.0, 0.0, 771, 'ffa'); -INSERT INTO test_01037.points VALUES (-1.0, 0.0, 772, 'ffb'); -INSERT INTO test_01037.points VALUES (0.0, 2.0, 773, 'ffc'); -INSERT INTO test_01037.points VALUES (0.0, -2.0, 774, 'ffd'); +CREATE TABLE points (x Float64, y Float64, def_i UInt64, def_s String) ENGINE = Memory; +INSERT INTO points VALUES (0.1, 0.0, 112, 'aax'); +INSERT INTO points VALUES (-0.1, 0.0, 113, 'aay'); +INSERT INTO points VALUES (0.0, 1.1, 114, 'aaz'); +INSERT INTO points VALUES (0.0, -1.1, 115, 'aat'); +INSERT INTO points VALUES (3.0, 3.0, 22, 'bb'); +INSERT INTO points VALUES (5.0, 6.0, 33, 'cc'); +INSERT INTO points VALUES (-100.0, -42.0, 44, 'dd'); +INSERT INTO points VALUES (7.01, 7.01, 55, 'ee'); +INSERT INTO points VALUES (0.99, 2.99, 66, 'ee'); +INSERT INTO points VALUES (1.0, 0.0, 771, 'ffa'); +INSERT INTO points VALUES (-1.0, 0.0, 772, 'ffb'); +INSERT INTO points VALUES (0.0, 2.0, 773, 'ffc'); +INSERT INTO points VALUES (0.0, -2.0, 774, 'ffd'); " - declare -a SearchTypes=("POLYGON" "POLYGON_SIMPLE" "POLYGON_INDEX_EACH" "POLYGON_INDEX_CELL") for type in "${SearchTypes[@]}"; @@ -58,63 +54,62 @@ do outputFile="${TMP_DIR}/results${type}.out" $CLICKHOUSE_CLIENT -n --query=" - DROP DICTIONARY IF EXISTS test_01037.dict_array; - CREATE DICTIONARY test_01037.dict_array + DROP DICTIONARY IF EXISTS dict_array; + CREATE DICTIONARY dict_array ( key Array(Array(Array(Array(Float64)))), name String DEFAULT 'qqq', value UInt64 DEFAULT 101 ) PRIMARY KEY key - SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'polygons_array' PASSWORD '' DB 'test_01037')) + SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'polygons_array' PASSWORD '' DB currentDatabase())) LIFETIME(0) LAYOUT($type()); - DROP DICTIONARY IF EXISTS test_01037.dict_tuple; + DROP DICTIONARY IF EXISTS dict_tuple; - CREATE DICTIONARY test_01037.dict_tuple + CREATE DICTIONARY dict_tuple ( key Array(Array(Array(Tuple(Float64, Float64)))), name String DEFAULT 'qqq', value UInt64 DEFAULT 101 ) PRIMARY KEY key - SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'polygons_tuple' PASSWORD '' DB 'test_01037')) + SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'polygons_tuple' PASSWORD '' DB currentDatabase())) LIFETIME(0) LAYOUT($type()); - select 'dictGet', 'test_01037.dict_array' as dict_name, tuple(x, y) as key, + select 'dictGet', 'dict_array' as dict_name, tuple(x, y) as key, dictGet(dict_name, 'name', key), - dictGet(dict_name, 'value', key) from test_01037.points order by x, y; - select 'dictGetOrDefault', 'test_01037.dict_array' as dict_name, tuple(x, y) as key, + dictGet(dict_name, 'value', key) from points order by x, y; + select 'dictGetOrDefault', 'dict_array' as dict_name, tuple(x, y) as key, dictGetOrDefault(dict_name, 'name', key, 'www'), - dictGetOrDefault(dict_name, 'value', key, toUInt64(1234)) from test_01037.points order by x, y; - select 'dictGetOrDefault', 'test_01037.dict_array' as dict_name, tuple(x, y) as key, + dictGetOrDefault(dict_name, 'value', key, toUInt64(1234)) from points order by x, y; + select 'dictGetOrDefault', 'dict_array' as dict_name, tuple(x, y) as key, dictGetOrDefault(dict_name, 'name', key, def_s), - dictGetOrDefault(dict_name, 'value', key, def_i) from test_01037.points order by x, y; - select 'dictGet', 'test_01037.dict_tuple' as dict_name, tuple(x, y) as key, + dictGetOrDefault(dict_name, 'value', key, def_i) from points order by x, y; + select 'dictGet', 'dict_tuple' as dict_name, tuple(x, y) as key, dictGet(dict_name, 'name', key), - dictGet(dict_name, 'value', key) from test_01037.points order by x, y; - select 'dictGetOrDefault', 'test_01037.dict_tuple' as dict_name, tuple(x, y) as key, + dictGet(dict_name, 'value', key) from points order by x, y; + select 'dictGetOrDefault', 'dict_tuple' as dict_name, tuple(x, y) as key, dictGetOrDefault(dict_name, 'name', key, 'www'), - dictGetOrDefault(dict_name, 'value', key, toUInt64(1234)) from test_01037.points order by x, y; - select 'dictGetOrDefault', 'test_01037.dict_tuple' as dict_name, tuple(x, y) as key, + dictGetOrDefault(dict_name, 'value', key, toUInt64(1234)) from points order by x, y; + select 'dictGetOrDefault', 'dict_tuple' as dict_name, tuple(x, y) as key, dictGetOrDefault(dict_name, 'name', key, def_s), - dictGetOrDefault(dict_name, 'value', key, def_i) from test_01037.points order by x, y; - select 'dictHas', 'test_01037.dict_array' as dict_name, tuple(x, y) as key, - dictHas(dict_name, key) from test_01037.points order by x, y; - select 'dictHas', 'test_01037.dict_tuple' as dict_name, tuple(x, y) as key, - dictHas(dict_name, key) from test_01037.points order by x, y; + dictGetOrDefault(dict_name, 'value', key, def_i) from points order by x, y; + select 'dictHas', 'dict_array' as dict_name, tuple(x, y) as key, + dictHas(dict_name, key) from points order by x, y; + select 'dictHas', 'dict_tuple' as dict_name, tuple(x, y) as key, + dictHas(dict_name, key) from points order by x, y; " > "$outputFile" diff -q "${CURDIR}/01037_polygon_dicts_simple_functions.ans" "$outputFile" done $CLICKHOUSE_CLIENT -n --query=" -DROP DICTIONARY test_01037.dict_array; -DROP DICTIONARY test_01037.dict_tuple; -DROP TABLE test_01037.polygons_array; -DROP TABLE test_01037.polygons_tuple; -DROP TABLE test_01037.points; -DROP DATABASE test_01037; +DROP DICTIONARY dict_array; +DROP DICTIONARY dict_tuple; +DROP TABLE polygons_array; +DROP TABLE polygons_tuple; +DROP TABLE points; " From b38928b09f75bc88c722c10ba401623a880a7b70 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Fri, 12 Jul 2024 19:25:32 +0200 Subject: [PATCH 138/182] Update 00992_system_parts_race_condition_zookeeper_long.sh --- .../00992_system_parts_race_condition_zookeeper_long.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/00992_system_parts_race_condition_zookeeper_long.sh b/tests/queries/0_stateless/00992_system_parts_race_condition_zookeeper_long.sh index d45cc3a6871..4887c409844 100755 --- a/tests/queries/0_stateless/00992_system_parts_race_condition_zookeeper_long.sh +++ b/tests/queries/0_stateless/00992_system_parts_race_condition_zookeeper_long.sh @@ -41,7 +41,7 @@ function thread3() function thread4() { - while true; do $CLICKHOUSE_CLIENT -q "OPTIMIZE TABLE alter_table0 FINAL"; done + while true; do $CLICKHOUSE_CLIENT --receive_timeout=3 -q "OPTIMIZE TABLE alter_table0 FINAL" | grep -Fv "Timeout exceeded while receiving data from server"; done } function thread5() From 8d07c522f155b3b6ec9760b9ecef28c90598dd1f Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Fri, 12 Jul 2024 17:13:33 +0100 Subject: [PATCH 139/182] reduce amount of parallel linker jobs further --- cmake/limit_jobs.cmake | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/cmake/limit_jobs.cmake b/cmake/limit_jobs.cmake index 3a759b90fe3..17d8dd42a2c 100644 --- a/cmake/limit_jobs.cmake +++ b/cmake/limit_jobs.cmake @@ -44,8 +44,13 @@ endif () # - and I've verfied that lld-11 does not use all available CPU time (in peak) while linking one binary if (CMAKE_BUILD_TYPE_UC STREQUAL "RELWITHDEBINFO" AND ENABLE_THINLTO) if (ARCH_AARCH64) + # aarch64 builds start to often fail with OOMs (reason not yet clear), for now let's limit the concurrency message(STATUS "ThinLTO provides its own parallel linking - limiting parallel link jobs to 1.") set (PARALLEL_LINK_JOBS 1) + if (LINKER_NAME MATCHES "lld") + math(EXPR LTO_JOBS ${NUMBER_OF_LOGICAL_CORES}/4) + set (CMAKE_EXE_LINKER_FLAGS_RELWITHDEBINFO "${CMAKE_EXE_LINKER_FLAGS_RELWITHDEBINFO} -Wl,--thinlto-jobs=${LTO_JOBS}") + endif() elseif (PARALLEL_LINK_JOBS GREATER 2) message(STATUS "ThinLTO provides its own parallel linking - limiting parallel link jobs to 2.") set (PARALLEL_LINK_JOBS 2) From 0bb3d07e8ed0f5e7005186e47446d57f8fea0e9f Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Fri, 12 Jul 2024 20:12:39 +0200 Subject: [PATCH 140/182] fix --- src/Processors/Formats/Impl/NpyRowInputFormat.cpp | 3 +++ tests/queries/0_stateless/02895_npy_format.reference | 12 ++++++------ tests/queries/0_stateless/02895_npy_format.sh | 12 ++++++------ 3 files changed, 15 insertions(+), 12 deletions(-) diff --git a/src/Processors/Formats/Impl/NpyRowInputFormat.cpp b/src/Processors/Formats/Impl/NpyRowInputFormat.cpp index 65e0f9dd192..773cbc9268e 100644 --- a/src/Processors/Formats/Impl/NpyRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/NpyRowInputFormat.cpp @@ -445,6 +445,9 @@ bool NpyRowInputFormat::readRow(MutableColumns & columns, RowReadExtension & /* elements_in_current_column *= header.shape[i]; } + if (typeid_cast(current_column)) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unexpected nesting level of column '{}', expected {}", column->getName(), header.shape.size() - 1); + for (size_t i = 0; i != elements_in_current_column; ++i) readValue(current_column); diff --git a/tests/queries/0_stateless/02895_npy_format.reference b/tests/queries/0_stateless/02895_npy_format.reference index f9e77644a35..52972f0acbd 100644 --- a/tests/queries/0_stateless/02895_npy_format.reference +++ b/tests/queries/0_stateless/02895_npy_format.reference @@ -85,12 +85,12 @@ c [4,5,6] [[1,2],[3,4]] [[5,6],[7,8]] -0 -0 -0 -0 -0 -0 +1 +1 +1 +1 +1 +1 1 [2.199219,1.099609,3.300781] [4.25,3.34961,6.628906] diff --git a/tests/queries/0_stateless/02895_npy_format.sh b/tests/queries/0_stateless/02895_npy_format.sh index 9d05303a091..194b2bc1fe4 100755 --- a/tests/queries/0_stateless/02895_npy_format.sh +++ b/tests/queries/0_stateless/02895_npy_format.sh @@ -52,14 +52,14 @@ $CLICKHOUSE_LOCAL -q "select * from file('$CURDIR/data_npy/two_dim.npy', Npy, 'v $CLICKHOUSE_LOCAL -q "select * from file('$CURDIR/data_npy/three_dim.npy', Npy, 'value Array(Array(Int8))')" $CLICKHOUSE_LOCAL -q "select * from file('$CURDIR/data_npy/one_dim_float.npy', Npy, 'value Array(Float32)')" 2>&1 | grep -c "BAD_ARGUMENTS" -$CLICKHOUSE_LOCAL -q "select * from file('$CURDIR/data_npy/one_dim_float.npy', Npy, 'value UUID')" 2>&1 | grep -c "BAD_ARGUMENTS" -$CLICKHOUSE_LOCAL -q "select * from file('$CURDIR/data_npy/one_dim_float.npy', Npy, 'value Tuple(UInt8)')" 2>&1 | grep -c "BAD_ARGUMENTS" +$CLICKHOUSE_LOCAL -q "select * from file('$CURDIR/data_npy/one_dim_float.npy', Npy, 'value UUID')" 2>&1 | grep -c "UNKNOWN_TYPE" +$CLICKHOUSE_LOCAL -q "select * from file('$CURDIR/data_npy/one_dim_float.npy', Npy, 'value Tuple(UInt8)')" 2>&1 | grep -c "UNKNOWN_TYPE" -$CLICKHOUSE_LOCAL -q "select * from file('$CURDIR/data_npy/one_dim_float.npy', Npy, 'value Int8')" 2>&1 | grep -c "BAD_ARGUMENTS" -$CLICKHOUSE_LOCAL -q "select * from file('$CURDIR/data_npy/one_dim_str.npy', Npy, 'value Int8')" 2>&1 | grep -c "BAD_ARGUMENTS" -$CLICKHOUSE_LOCAL -q "select * from file('$CURDIR/data_npy/one_dim_unicode.npy', Npy, 'value Float32')" 2>&1 | grep -c "BAD_ARGUMENTS" +$CLICKHOUSE_LOCAL -q "select * from file('$CURDIR/data_npy/one_dim_float.npy', Npy, 'value Int8')" 2>&1 | grep -c "ILLEGAL_COLUMN" +$CLICKHOUSE_LOCAL -q "select * from file('$CURDIR/data_npy/one_dim_str.npy', Npy, 'value Int8')" 2>&1 | grep -c "ILLEGAL_COLUMN" +$CLICKHOUSE_LOCAL -q "select * from file('$CURDIR/data_npy/one_dim_unicode.npy', Npy, 'value Float32')" 2>&1 | grep -c "ILLEGAL_COLUMN" -$CLICKHOUSE_LOCAL -q "select * from file('$CURDIR/data_npy/complex.npy')" 2>&1 | grep -c "BAD_ARGUMENTS" +$CLICKHOUSE_LOCAL -q "select * from file('$CURDIR/data_npy/complex.npy')" 2>&1 | grep -c "CANNOT_EXTRACT_TABLE_STRUCTURE" $CLICKHOUSE_LOCAL -q "select * from file('$CURDIR/data_npy/float_16.npy')" From 5b1e9bebe47e6b6971e6432f788e9ad9ce1c5f2b Mon Sep 17 00:00:00 2001 From: Max K Date: Fri, 12 Jul 2024 18:19:30 +0200 Subject: [PATCH 141/182] change thresholds --- tests/ci/ci_config.py | 4 ++-- tests/ci/merge_pr.py | 3 ++- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/tests/ci/ci_config.py b/tests/ci/ci_config.py index 9a9aa553e1b..d9f8e7d3afd 100644 --- a/tests/ci/ci_config.py +++ b/tests/ci/ci_config.py @@ -13,8 +13,8 @@ class CI: each config item in the below dicts should be an instance of JobConfig class or inherited from it """ - MAX_TOTAL_FAILURES_BEFORE_BLOCKING_CI = 2 - MAX_TOTAL_FAILURES_PER_JOB_BEFORE_BLOCKING_CI = 1 + MAX_TOTAL_FAILURES_BEFORE_BLOCKING_CI = 5 + MAX_TOTAL_FAILURES_PER_JOB_BEFORE_BLOCKING_CI = 2 # reimport types to CI class so that they visible as CI.* and mypy is happy # pylint:disable=useless-import-alias,reimported,import-outside-toplevel diff --git a/tests/ci/merge_pr.py b/tests/ci/merge_pr.py index 6b437731561..061376fc856 100644 --- a/tests/ci/merge_pr.py +++ b/tests/ci/merge_pr.py @@ -291,13 +291,14 @@ def main(): ) can_continue = False if failed_to_get_info: - print(f"Unexpected commit status state - block further testing") + print("Unexpected commit status state - block further testing") can_continue = False if args.wf_status != SUCCESS: can_continue = False print("Workflow has failures - block further testing") if args.wf_status == "success" or has_failed_statuses: + # do not set mergeable check status if args.wf_status == failure, apparently it has died runners and is to be restarted state = trigger_mergeable_check( commit, statuses, From 1495ef32180625b743f9f01cd86b4a257ae96ff0 Mon Sep 17 00:00:00 2001 From: Max K Date: Fri, 12 Jul 2024 20:32:01 +0200 Subject: [PATCH 142/182] CI: Set error status for job with OOM --- pyproject.toml | 1 + tests/ci/ci.py | 23 +++++++++++++++++++++-- 2 files changed, 22 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 90f089afa41..39511e1a0d3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -36,6 +36,7 @@ disable = ''' bare-except, no-else-return, global-statement, + f-string-without-interpolation, ''' [tool.pylint.SIMILARITIES] diff --git a/tests/ci/ci.py b/tests/ci/ci.py index 32b87698395..9f4a98114c5 100644 --- a/tests/ci/ci.py +++ b/tests/ci/ci.py @@ -1125,6 +1125,7 @@ def main() -> int: ### POST action: start elif args.post: + has_oom_error = False if Shell.check( "sudo dmesg -T | grep -q -e 'Out of memory: Killed process' -e 'oom_reaper: reaped process' -e 'oom-kill:constraint=CONSTRAINT_NONE'" ): @@ -1132,6 +1133,7 @@ def main() -> int: CIBuddy(dry_run=not pr_info.is_release).post_error( "Out Of Memory", job_name=_get_ext_check_name(args.job_name) ) + has_oom_error = True job_report = JobReport.load() if JobReport.exist() else None if job_report: @@ -1235,8 +1237,25 @@ def main() -> int: ch_helper, ) else: - # no job report - print(f"No job report for {[args.job_name]} - do nothing") + if CI.is_test_job(args.job_name): + if has_oom_error: + description = "ERROR: Out Of Memory" + else: + description = "ERROR: Unknown job status" + gh = GitHub(get_best_robot_token(), per_page=100) + commit = get_commit(gh, pr_info.sha) + post_commit_status( + commit, + ERROR, + "", + description, + job_report.check_name or _get_ext_check_name(args.job_name), + pr_info, + dump_to_file=True, + ) + else: + # no job report + print(f"No job report for {[args.job_name]} - do nothing") ### POST action: end ### MARK SUCCESS action: start From 2dc7d1f510dfc7a4719835a31acf9a9a47a8c1dc Mon Sep 17 00:00:00 2001 From: Nikita Fomichev Date: Fri, 12 Jul 2024 21:09:10 +0200 Subject: [PATCH 143/182] Stateless tests: fix flaky tests 2 --- .../0_stateless/01037_polygon_dicts_correctness_all.sh | 5 +++-- .../0_stateless/01037_polygon_dicts_correctness_fast.sh | 5 +++-- .../0_stateless/01037_polygon_dicts_simple_functions.sh | 2 +- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/tests/queries/0_stateless/01037_polygon_dicts_correctness_all.sh b/tests/queries/0_stateless/01037_polygon_dicts_correctness_all.sh index 39f235d9966..9a26f78a8ee 100755 --- a/tests/queries/0_stateless/01037_polygon_dicts_correctness_all.sh +++ b/tests/queries/0_stateless/01037_polygon_dicts_correctness_all.sh @@ -5,12 +5,13 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -TMP_DIR=${CLICKHOUSE_TMP}${CLICKHOUSE_DATABASE} +TMP_DIR=${CLICKHOUSE_TMP}/tmp +DATA_DIR=${CLICKHOUSE_TMP}/data mkdir -p $TMP_DIR +mkdir -p $DATA_DIR declare -a SearchTypes=("POLYGON" "POLYGON_SIMPLE" "POLYGON_INDEX_EACH" "POLYGON_INDEX_CELL") -DATA_DIR=${CURDIR}/${CLICKHOUSE_DATABASE} tar -xf "${CURDIR}"/01037_test_data_search.tar.gz -C "${DATA_DIR}" $CLICKHOUSE_CLIENT -n --query=" diff --git a/tests/queries/0_stateless/01037_polygon_dicts_correctness_fast.sh b/tests/queries/0_stateless/01037_polygon_dicts_correctness_fast.sh index 3e461abcefe..47f7a5c1c4f 100755 --- a/tests/queries/0_stateless/01037_polygon_dicts_correctness_fast.sh +++ b/tests/queries/0_stateless/01037_polygon_dicts_correctness_fast.sh @@ -5,12 +5,13 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -TMP_DIR=${CLICKHOUSE_TMP}${CLICKHOUSE_DATABASE} +TMP_DIR=${CLICKHOUSE_TMP}/tmp +DATA_DIR=${CLICKHOUSE_TMP}/data mkdir -p $TMP_DIR +mkdir -p $DATA_DIR declare -a SearchTypes=("POLYGON_INDEX_EACH" "POLYGON_INDEX_CELL") -DATA_DIR=${CURDIR}/${CLICKHOUSE_DATABASE} tar -xf "${CURDIR}"/01037_test_data_perf.tar.gz -C "${DATA_DIR}" $CLICKHOUSE_CLIENT -n --query=" diff --git a/tests/queries/0_stateless/01037_polygon_dicts_simple_functions.sh b/tests/queries/0_stateless/01037_polygon_dicts_simple_functions.sh index efc66783d62..d1ee3f283bc 100755 --- a/tests/queries/0_stateless/01037_polygon_dicts_simple_functions.sh +++ b/tests/queries/0_stateless/01037_polygon_dicts_simple_functions.sh @@ -5,7 +5,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -TMP_DIR=${CLICKHOUSE_TMP}${CLICKHOUSE_DATABASE} +TMP_DIR=${CLICKHOUSE_TMP}/tmp mkdir -p $TMP_DIR $CLICKHOUSE_CLIENT -n --query=" From cc2cce97177552dcf82682f06418ffa3388760c1 Mon Sep 17 00:00:00 2001 From: Nikita Fomichev Date: Sat, 13 Jul 2024 00:34:54 +0200 Subject: [PATCH 144/182] Stateless tests: fix flaky tests 3 --- tests/clickhouse-test | 2 +- tests/queries/0_stateless/02834_apache_arrow_abort.sql | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/clickhouse-test b/tests/clickhouse-test index 79f6b5d71d3..bc30b3c21b7 100755 --- a/tests/clickhouse-test +++ b/tests/clickhouse-test @@ -711,7 +711,7 @@ def get_localzone(): class SettingsRandomizer: settings = { - "max_insert_threads": lambda: 32 + "max_insert_threads": lambda: 12 if random.random() < 0.03 else random.randint(1, 3), "group_by_two_level_threshold": threshold_generator(0.2, 0.2, 1, 1000000), diff --git a/tests/queries/0_stateless/02834_apache_arrow_abort.sql b/tests/queries/0_stateless/02834_apache_arrow_abort.sql index bd29e95db9a..47e1c5d3951 100644 --- a/tests/queries/0_stateless/02834_apache_arrow_abort.sql +++ b/tests/queries/0_stateless/02834_apache_arrow_abort.sql @@ -1,4 +1,4 @@ -- Tags: no-fasttest -- This tests depends on internet access, but it does not matter, because it only has to check that there is no abort due to a bug in Apache Arrow library. - +SET optimize_trivial_insert_select=1; INSERT INTO TABLE FUNCTION url('https://clickhouse-public-datasets.s3.amazonaws.com/hits_compatible/athena_partitioned/hits_9.parquet') SELECT * FROM url('https://clickhouse-public-datasets.s3.amazonaws.com/hits_compatible/athena_partitioned/hits_9.parquet'); -- { serverError CANNOT_WRITE_TO_OSTREAM, RECEIVED_ERROR_FROM_REMOTE_IO_SERVER, POCO_EXCEPTION } From 8295a8e9b8360eec0128078fb05f6c3d50ce3b97 Mon Sep 17 00:00:00 2001 From: Nikita Fomichev Date: Sat, 13 Jul 2024 00:39:53 +0200 Subject: [PATCH 145/182] Stateless tests: fix flaky tests 4 --- tests/clickhouse-test | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/clickhouse-test b/tests/clickhouse-test index bc30b3c21b7..0cf46732354 100755 --- a/tests/clickhouse-test +++ b/tests/clickhouse-test @@ -729,7 +729,7 @@ class SettingsRandomizer: "prefer_localhost_replica": lambda: random.randint(0, 1), "max_block_size": lambda: random.randint(8000, 100000), "max_joined_block_size_rows": lambda: random.randint(8000, 100000), - "max_threads": lambda: 64 if random.random() < 0.03 else random.randint(1, 3), + "max_threads": lambda: 32 if random.random() < 0.03 else random.randint(1, 3), "optimize_append_index": lambda: random.randint(0, 1), "optimize_if_chain_to_multiif": lambda: random.randint(0, 1), "optimize_if_transform_strings_to_enum": lambda: random.randint(0, 1), From 04525888f5db6f2c0e61e170cab5ad57626fbf17 Mon Sep 17 00:00:00 2001 From: Max K Date: Sat, 13 Jul 2024 11:55:25 +0200 Subject: [PATCH 146/182] fix for failed workflow status --- tests/ci/merge_pr.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/tests/ci/merge_pr.py b/tests/ci/merge_pr.py index 061376fc856..6fb6821ede4 100644 --- a/tests/ci/merge_pr.py +++ b/tests/ci/merge_pr.py @@ -293,11 +293,14 @@ def main(): if failed_to_get_info: print("Unexpected commit status state - block further testing") can_continue = False - if args.wf_status != SUCCESS: + if args.wf_status != SUCCESS and not has_failed_statuses: + # workflow failed but reason is unknown as no failed statuses present can_continue = False - print("Workflow has failures - block further testing") + print( + "WARNING: Either the runner is faulty or the operating status is unknown. The first is self-healing, the second requires investigation." + ) - if args.wf_status == "success" or has_failed_statuses: + if args.wf_status == SUCCESS or has_failed_statuses: # do not set mergeable check status if args.wf_status == failure, apparently it has died runners and is to be restarted state = trigger_mergeable_check( commit, From 8706145c467852e7d4b84e5a9823050b8de3e085 Mon Sep 17 00:00:00 2001 From: Max K Date: Sat, 13 Jul 2024 12:17:03 +0200 Subject: [PATCH 147/182] fix for not success status in Sync --- tests/ci/merge_pr.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tests/ci/merge_pr.py b/tests/ci/merge_pr.py index 6fb6821ede4..59749abb4fa 100644 --- a/tests/ci/merge_pr.py +++ b/tests/ci/merge_pr.py @@ -272,7 +272,11 @@ def main(): job_name_with_max_failures = status.context max_failed_tests_per_job = failed_cnt total_failed_tests += failed_cnt - elif status.state != SUCCESS: + elif status.state != SUCCESS and status.context not in ( + CI.StatusNames.SYNC, + CI.StatusNames.PR_CHECK, + ): + # do not block CI on failures in (CI.StatusNames.SYNC, CI.StatusNames.PR_CHECK) has_failed_statuses = True print( f"Unexpected status for [{status.context}]: [{status.state}] - block further testing" From 11f3e406c6ab040cc42d209ac2471406367f577c Mon Sep 17 00:00:00 2001 From: Max K Date: Sat, 13 Jul 2024 12:48:48 +0200 Subject: [PATCH 148/182] CI: Cache AST fuzzers (run always) jobs in CI --- tests/ci/ci.py | 4 ++-- tests/ci/ci_cache.py | 4 ++-- tests/ci/ci_definitions.py | 9 ++++++++- 3 files changed, 12 insertions(+), 5 deletions(-) diff --git a/tests/ci/ci.py b/tests/ci/ci.py index 32b87698395..57552985f62 100644 --- a/tests/ci/ci.py +++ b/tests/ci/ci.py @@ -325,8 +325,8 @@ def _mark_success_action( # do nothing, exit without failure print(f"ERROR: no status file for job [{job}]") - if job_config.run_always or job_config.run_by_label: - print(f"Job [{job}] runs always or by label in CI - do not cache") + if job_config.run_by_label or not job_config.has_digest(): + print(f"Job [{job}] has no digest or run by label in CI - do not cache") else: if pr_info.is_master: pass diff --git a/tests/ci/ci_cache.py b/tests/ci/ci_cache.py index 291ed56aeea..bc6761959b4 100644 --- a/tests/ci/ci_cache.py +++ b/tests/ci/ci_cache.py @@ -609,7 +609,7 @@ class CiCache: pushes pending records for all jobs that supposed to be run """ for job, job_config in self.jobs_to_do.items(): - if job_config.run_always: + if not job_config.has_digest(): continue pending_state = PendingState(time.time(), run_url=GITHUB_RUN_URL) assert job_config.batches @@ -680,7 +680,7 @@ class CiCache: It removes jobs from @jobs_to_do if it is a: 1. test job and it is in @jobs_to_wait (no need to wait not affected jobs in PRs) 2. test job and it has finished on release branch (even if failed) - 2. build job which is not required by any test job that is left in @jobs_to_do + 3. build job which is not required by any test job that is left in @jobs_to_do :return: """ diff --git a/tests/ci/ci_definitions.py b/tests/ci/ci_definitions.py index 4ae252560e9..a79097d8b55 100644 --- a/tests/ci/ci_definitions.py +++ b/tests/ci/ci_definitions.py @@ -327,6 +327,9 @@ class JobConfig: assert self.required_builds return self.required_builds[0] + def has_digest(self) -> bool: + return self.digest != DigestConfig() + class CommonJobConfigs: """ @@ -440,7 +443,11 @@ class CommonJobConfigs: ) ASTFUZZER_TEST = JobConfig( job_name_keyword="ast", - digest=DigestConfig(), + digest=DigestConfig( + include_paths=[ + "./tests/ci/ast_fuzzer_check.py", + ], + docker=["clickhouse/fuzzer"]), run_command="ast_fuzzer_check.py", run_always=True, runner_type=Runners.FUZZER_UNIT_TESTER, From fd9f91c796227a4b9d7273f812c626c2053b098a Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Sat, 13 Jul 2024 11:07:52 +0000 Subject: [PATCH 149/182] Automatic style fix --- tests/ci/ci_definitions.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/ci/ci_definitions.py b/tests/ci/ci_definitions.py index a79097d8b55..d2da73f4e46 100644 --- a/tests/ci/ci_definitions.py +++ b/tests/ci/ci_definitions.py @@ -447,7 +447,8 @@ class CommonJobConfigs: include_paths=[ "./tests/ci/ast_fuzzer_check.py", ], - docker=["clickhouse/fuzzer"]), + docker=["clickhouse/fuzzer"], + ), run_command="ast_fuzzer_check.py", run_always=True, runner_type=Runners.FUZZER_UNIT_TESTER, From 99cd83da1f6bf71545337fdd72ffd2ed7cf68bb2 Mon Sep 17 00:00:00 2001 From: Max K Date: Thu, 27 Jun 2024 19:43:10 +0200 Subject: [PATCH 150/182] New Create Release workflow --- .github/workflows/create_release.yml | 138 +++++- tests/ci/artifactory.py | 356 ++++++++++++++ tests/ci/create_release.py | 663 +++++++++++++++++++++++++++ tests/ci/docker_server.py | 24 +- tests/ci/version_helper.py | 6 + 5 files changed, 1175 insertions(+), 12 deletions(-) create mode 100644 tests/ci/artifactory.py create mode 100755 tests/ci/create_release.py diff --git a/.github/workflows/create_release.yml b/.github/workflows/create_release.yml index 3988df3b2b1..e2ad16a05a4 100644 --- a/.github/workflows/create_release.yml +++ b/.github/workflows/create_release.yml @@ -6,8 +6,8 @@ concurrency: 'on': workflow_dispatch: inputs: - sha: - description: 'The SHA hash of the commit from which to create the release' + ref: + description: 'Git reference (branch or commit sha) from which to create the release' required: true type: string type: @@ -15,15 +15,139 @@ concurrency: required: true type: choice options: - - new + # TODO: + #- new - patch + dry-run: + description: 'Dry run' + required: false + default: true + type: boolean jobs: - Release: - runs-on: [self-hosted, style-checker-aarch64] + CreateRelease: + env: + GH_TOKEN: ${{ secrets.ROBOT_CLICKHOUSE_COMMIT_TOKEN }} + RELEASE_TYPE: + runs-on: [self-hosted, release-maker] steps: + - name: DebugInfo + uses: hmarr/debug-action@f7318c783045ac39ed9bb497e22ce835fdafbfe6 + - name: Set envs + # https://docs.github.com/en/actions/learn-github-actions/workflow-commands-for-github-actions#multiline-strings + run: | + cat >> "$GITHUB_ENV" << 'EOF' + ROBOT_CLICKHOUSE_SSH_KEY<> "$GITHUB_ENV" + echo "COMMIT_SHA=$commit_sha" >> "$GITHUB_ENV" + - name: Download All Release Artifacts + run: | + python3 ./tests/ci/create_release.py --infile "$RELEASE_INFO_FILE" --download-packages ${{ inputs.dry-run && '--dry-run' || '' }} + - name: Push Git Tag for the Release + run: | + python3 ./tests/ci/create_release.py --push-release-tag --infile "$RELEASE_INFO_FILE" ${{ inputs.dry-run && '--dry-run' || '' }} + - name: Bump CH Version and Update Contributors' List + run: | + python3 ./tests/ci/create_release.py --create-bump-version-pr --infile "$RELEASE_INFO_FILE" ${{ inputs.dry-run && '--dry-run' || '' }} + - name: Checkout master + run: | + git checkout master + - name: Bump Docker versions, Changelog, Security + run: | + [ "$(git branch --show-current)" != "master" ] && echo "not on the master" && exit 1 + echo "List versions" + ./utils/list-versions/list-versions.sh > ./utils/list-versions/version_date.tsv + echo "Update docker version" + ./utils/list-versions/update-docker-version.sh + echo "Generate ChangeLog" + export CI=1 + docker run -u "${UID}:${GID}" -e PYTHONUNBUFFERED=1 -e CI=1 --network=host \ + --volume=".:/ClickHouse" clickhouse/style-test \ + /ClickHouse/tests/ci/changelog.py -v --debug-helpers \ + --gh-user-or-token="$GH_TOKEN" --jobs=5 \ + --output="/ClickHouse/docs/changelogs/${RELEASE_TAG}.md" "${RELEASE_TAG}" + git add ./docs/changelogs/${RELEASE_TAG}.md + echo "Generate Security" + python3 ./utils/security-generator/generate_security.py > SECURITY.md + git diff HEAD + - name: Create ChangeLog Pull Request + if: ${{ ! inputs.dry-run }} + uses: peter-evans/create-pull-request@v6 + with: + author: "robot-clickhouse " + token: ${{ secrets.ROBOT_CLICKHOUSE_COMMIT_TOKEN }} + committer: "robot-clickhouse " + commit-message: Update version_date.tsv and changelogs after ${{ env.RELEASE_TAG }} + branch: auto/${{ env.RELEASE_TAG }} + assignees: ${{ github.event.sender.login }} # assign the PR to the tag pusher + delete-branch: true + title: Update version_date.tsv and changelog after ${{ env.RELEASE_TAG }} + labels: do not test + body: | + Update version_date.tsv and changelogs after ${{ env.RELEASE_TAG }} + ### Changelog category (leave one): + - Not for changelog (changelog entry is not required) + - name: Reset changes if Dry-run + if: ${{ inputs.dry-run }} + run: | + git reset --hard HEAD + - name: Checkout back to GITHUB_REF + run: | + git checkout "$GITHUB_REF_NAME" + - name: Create GH Release + run: | + python3 ./tests/ci/create_release.py --create-gh-release \ + --infile ${{ env.RELEASE_INFO_FILE }} ${{ inputs.dry-run && '--dry-run' || '' }} + + - name: Export TGZ Packages + run: | + python3 ./tests/ci/artifactory.py --export-tgz --infile ${{ env.RELEASE_INFO_FILE }} ${{ inputs.dry-run && '--dry-run' || '' }} + - name: Test TGZ Packages + run: | + python3 ./tests/ci/artifactory.py --test-tgz --infile ${{ env.RELEASE_INFO_FILE }} ${{ inputs.dry-run && '--dry-run' || '' }} + - name: Export RPM Packages + run: | + python3 ./tests/ci/artifactory.py --export-rpm --infile ${{ env.RELEASE_INFO_FILE }} ${{ inputs.dry-run && '--dry-run' || '' }} + - name: Test RPM Packages + run: | + python3 ./tests/ci/artifactory.py --test-rpm --infile ${{ env.RELEASE_INFO_FILE }} ${{ inputs.dry-run && '--dry-run' || '' }} + - name: Export Debian Packages + run: | + python3 ./tests/ci/artifactory.py --export-debian --infile ${{ env.RELEASE_INFO_FILE }} ${{ inputs.dry-run && '--dry-run' || '' }} + - name: Test Debian Packages + run: | + python3 ./tests/ci/artifactory.py --test-debian --infile ${{ env.RELEASE_INFO_FILE }} ${{ inputs.dry-run && '--dry-run' || '' }} + - name: Docker clickhouse/clickhouse-server building + run: | + cd "./tests/ci" + export CHECK_NAME="Docker server image" + python3 docker_server.py --release-type auto --version ${{ env.RELEASE_TAG }} --check-name "$CHECK_NAME" --sha ${{ env.COMMIT_SHA }} ${{ ! inputs.dry-run && '--push' || '' }} + - name: Docker clickhouse/clickhouse-keeper building + run: | + cd "./tests/ci" + export CHECK_NAME="Docker keeper image" + python3 docker_server.py --release-type auto --version ${{ env.RELEASE_TAG }} --check-name "$CHECK_NAME" --sha ${{ env.COMMIT_SHA }} ${{ ! inputs.dry-run && '--push' || '' }} + - name: Post Slack Message + if: failure() + run: | + echo Slack Message \ No newline at end of file diff --git a/tests/ci/artifactory.py b/tests/ci/artifactory.py new file mode 100644 index 00000000000..2e18316cb78 --- /dev/null +++ b/tests/ci/artifactory.py @@ -0,0 +1,356 @@ +import argparse +import time +from pathlib import Path +from typing import Optional + +from create_release import PackageDownloader, ReleaseInfo, ShellRunner +from ci_utils import WithIter +from shutil import copy2 + + +class MountPointApp(metaclass=WithIter): + RCLONE = "rclone" + S3FS = "s3fs" + + +class R2MountPoint: + _TEST_BUCKET_NAME = "repo-test" + _PROD_BUCKET_NAME = "packages" + _CACHE_MAX_SIZE_GB = 20 + MOUNT_POINT = "/home/ubuntu/mountpoint" + API_ENDPOINT = "https://d4fd593eebab2e3a58a599400c4cd64d.r2.cloudflarestorage.com" + LOG_FILE = "/home/ubuntu/fuse_mount.log" + # mod time is not required by reprepro and createrepo - disable to simplify bucket's mount sync (applicable fro rclone) + NOMODTIME = True + # enable debug messages in mount log + DEBUG = True + # enable cache for mountpoint + CACHE_ENABLED = False + # TODO: which mode is better: minimal/writes/full/off + _RCLONE_CACHE_MODE = "minimal" + UMASK = "0000" + + def __init__(self, app: str, dry_run: bool) -> None: + assert app in MountPointApp + self.app = app + if dry_run: + self.bucket_name = self._TEST_BUCKET_NAME + else: + self.bucket_name = self._PROD_BUCKET_NAME + + self.aux_mount_options = "" + self.async_mount = False + if self.app == MountPointApp.S3FS: + self.cache_dir = "/home/ubuntu/s3fs_cache" + # self.aux_mount_options += "-o nomodtime " if self.NOMODTIME else "" not for s3fs + self.aux_mount_options += "--debug " if self.DEBUG else "" + self.aux_mount_options += ( + f"-o use_cache={self.cache_dir} -o cache_size_mb={self._CACHE_MAX_SIZE_GB * 1024} " + if self.CACHE_ENABLED + else "" + ) + # without -o nomultipart there are errors like "Error 5 writing to /home/ubuntu/***.deb: Input/output error" + self.mount_cmd = f"s3fs {self.bucket_name} {self.MOUNT_POINT} -o url={self.API_ENDPOINT} -o use_path_request_style -o umask=0000 -o nomultipart -o logfile={self.LOG_FILE} {self.aux_mount_options}" + elif self.app == MountPointApp.RCLONE: + # run rclone mount process asynchronously, otherwise subprocess.run(daemonized command) will not return + self.async_mount = True + self.cache_dir = "/home/ubuntu/rclone_cache" + self.aux_mount_options += "--no-modtime " if self.NOMODTIME else "" + self.aux_mount_options += "-v " if self.DEBUG else "" # -vv too verbose + self.aux_mount_options += ( + f"--vfs-cache-mode {self._RCLONE_CACHE_MODE} --vfs-cache-max-size {self._CACHE_MAX_SIZE_GB}G" + if self.CACHE_ENABLED + else "--vfs-cache-mode off" + ) + # Use --no-modtime to try to avoid: ERROR : rpm/lts/clickhouse-client-24.3.6.5.x86_64.rpm: Failed to apply pending mod time + self.mount_cmd = f"rclone mount remote:{self.bucket_name} {self.MOUNT_POINT} --daemon --cache-dir {self.cache_dir} --umask 0000 --log-file {self.LOG_FILE} {self.aux_mount_options}" + else: + assert False + + def init(self): + print(f"Mount bucket [{self.bucket_name}] to [{self.MOUNT_POINT}]") + _CLEAN_LOG_FILE_CMD = f"tail -n 1000 {self.LOG_FILE} > {self.LOG_FILE}_tmp && mv {self.LOG_FILE}_tmp {self.LOG_FILE} ||:" + _MKDIR_CMD = f"mkdir -p {self.MOUNT_POINT}" + _MKDIR_FOR_CACHE = f"mkdir -p {self.cache_dir}" + _UNMOUNT_CMD = ( + f"mount | grep -q {self.MOUNT_POINT} && umount {self.MOUNT_POINT} ||:" + ) + + _TEST_MOUNT_CMD = f"mount | grep -q {self.MOUNT_POINT}" + ShellRunner.run(_CLEAN_LOG_FILE_CMD) + ShellRunner.run(_UNMOUNT_CMD) + ShellRunner.run(_MKDIR_CMD) + ShellRunner.run(_MKDIR_FOR_CACHE) + ShellRunner.run(self.mount_cmd, async_=self.async_mount) + if self.async_mount: + time.sleep(3) + ShellRunner.run(_TEST_MOUNT_CMD) + + @classmethod + def teardown(cls): + print(f"Unmount [{cls.MOUNT_POINT}]") + ShellRunner.run(f"umount {cls.MOUNT_POINT}") + + +class RepoCodenames(metaclass=WithIter): + LTS = "lts" + STABLE = "stable" + + +class DebianArtifactory: + _TEST_REPO_URL = "https://pub-73dd1910f4284a81a02a67018967e028.r2.dev/deb" + _PROD_REPO_URL = "https://packages.clickhouse.com/deb" + + def __init__(self, release_info: ReleaseInfo, dry_run: bool): + self.codename = release_info.codename + self.version = release_info.version + if dry_run: + self.repo_url = self._TEST_REPO_URL + else: + self.repo_url = self._PROD_REPO_URL + assert self.codename in RepoCodenames + self.pd = PackageDownloader( + release=release_info.release_branch, + commit_sha=release_info.commit_sha, + version=release_info.version, + ) + + def export_packages(self): + assert self.pd.local_deb_packages_ready(), "BUG: Packages are not downloaded" + print("Start adding packages") + paths = [ + self.pd.LOCAL_DIR + "/" + file for file in self.pd.get_deb_packages_files() + ] + REPREPRO_CMD_PREFIX = f"reprepro --basedir {R2MountPoint.MOUNT_POINT}/configs/deb --outdir {R2MountPoint.MOUNT_POINT}/deb --verbose" + cmd = f"{REPREPRO_CMD_PREFIX} includedeb {self.codename} {' '.join(paths)}" + print("Running export command:") + print(f" {cmd}") + ShellRunner.run(cmd) + ShellRunner.run("sync") + + if self.codename == RepoCodenames.LTS: + packages_with_version = [ + package + "=" + self.version for package in self.pd.get_packages_names() + ] + print( + f"Copy packages from {RepoCodenames.LTS} to {RepoCodenames.STABLE} repository" + ) + cmd = f"{REPREPRO_CMD_PREFIX} copy {RepoCodenames.STABLE} {RepoCodenames.LTS} {' '.join(packages_with_version)}" + print("Running copy command:") + print(f" {cmd}") + ShellRunner.run(cmd) + ShellRunner.run("sync") + + def test_packages(self): + ShellRunner.run("docker pull ubuntu:latest") + print(f"Test packages installation, version [{self.version}]") + cmd = f"docker run --rm ubuntu:latest bash -c \"apt update -y; apt install -y sudo gnupg ca-certificates; apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv 8919F6BD2B48D754; echo 'deb {self.repo_url} stable main' | tee /etc/apt/sources.list.d/clickhouse.list; apt update -y; apt-get install -y clickhouse-client={self.version}\"" + print("Running test command:") + print(f" {cmd}") + ShellRunner.run(cmd) + + +def _copy_if_not_exists(src: Path, dst: Path) -> Path: + if dst.is_dir(): + dst = dst / src.name + if not dst.exists(): + return copy2(src, dst) # type: ignore + if src.stat().st_size == dst.stat().st_size: + return dst + return copy2(src, dst) # type: ignore + + +class RpmArtifactory: + _TEST_REPO_URL = ( + "https://pub-73dd1910f4284a81a02a67018967e028.r2.dev/rpm/clickhouse.repo" + ) + _PROD_REPO_URL = "https://packages.clickhouse.com/rpm/clickhouse.repo" + _SIGN_KEY = "885E2BDCF96B0B45ABF058453E4AD4719DDE9A38" + + def __init__(self, release_info: ReleaseInfo, dry_run: bool): + self.codename = release_info.codename + self.version = release_info.version + if dry_run: + self.repo_url = self._TEST_REPO_URL + else: + self.repo_url = self._PROD_REPO_URL + assert self.codename in RepoCodenames + self.pd = PackageDownloader( + release=release_info.release_branch, + commit_sha=release_info.commit_sha, + version=release_info.version, + ) + + def export_packages(self, codename: Optional[str] = None) -> None: + assert self.pd.local_rpm_packages_ready(), "BUG: Packages are not downloaded" + codename = codename or self.codename + print(f"Start adding packages to [{codename}]") + paths = [ + self.pd.LOCAL_DIR + "/" + file for file in self.pd.get_rpm_packages_files() + ] + + dest_dir = Path(R2MountPoint.MOUNT_POINT) / "rpm" / codename + + for package in paths: + _copy_if_not_exists(Path(package), dest_dir) + + commands = ( + f"createrepo_c --local-sqlite --workers=2 --update --verbose {dest_dir}", + f"gpg --sign-with {self._SIGN_KEY} --detach-sign --batch --yes --armor {dest_dir / 'repodata' / 'repomd.xml'}", + ) + print(f"Exporting RPM packages into [{codename}]") + + for command in commands: + print(f"Running command:") + print(f" {command}") + ShellRunner.run(command) + + update_public_key = f"gpg --armor --export {self._SIGN_KEY}" + pub_key_path = dest_dir / "repodata" / "repomd.xml.key" + print("Updating repomd.xml.key") + pub_key_path.write_text(ShellRunner.run(update_public_key)[1]) + if codename == RepoCodenames.LTS: + self.export_packages(RepoCodenames.STABLE) + ShellRunner.run("sync") + + def test_packages(self): + ShellRunner.run("docker pull fedora:latest") + print(f"Test package installation, version [{self.version}]") + cmd = f'docker run --rm fedora:latest /bin/bash -c "dnf -y install dnf-plugins-core && dnf config-manager --add-repo={self.repo_url} && dnf makecache && dnf -y install clickhouse-client-{self.version}-1"' + print("Running test command:") + print(f" {cmd}") + ShellRunner.run(cmd) + + +class TgzArtifactory: + _TEST_REPO_URL = "https://pub-73dd1910f4284a81a02a67018967e028.r2.dev/tgz" + _PROD_REPO_URL = "https://packages.clickhouse.com/tgz" + + def __init__(self, release_info: ReleaseInfo, dry_run: bool): + self.codename = release_info.codename + self.version = release_info.version + if dry_run: + self.repo_url = self._TEST_REPO_URL + else: + self.repo_url = self._PROD_REPO_URL + assert self.codename in RepoCodenames + self.pd = PackageDownloader( + release=release_info.release_branch, + commit_sha=release_info.commit_sha, + version=release_info.version, + ) + + def export_packages(self, codename: Optional[str] = None) -> None: + assert self.pd.local_tgz_packages_ready(), "BUG: Packages are not downloaded" + codename = codename or self.codename + + paths = [ + self.pd.LOCAL_DIR + "/" + file for file in self.pd.get_tgz_packages_files() + ] + + dest_dir = Path(R2MountPoint.MOUNT_POINT) / "tgz" / codename + + print(f"Exporting TGZ packages into [{codename}]") + + for package in paths: + _copy_if_not_exists(Path(package), dest_dir) + + if codename == RepoCodenames.LTS: + self.export_packages(RepoCodenames.STABLE) + ShellRunner.run("sync") + + def test_packages(self): + tgz_file = "/tmp/tmp.tgz" + tgz_sha_file = "/tmp/tmp.tgz.sha512" + ShellRunner.run( + f"curl -o {tgz_file} -f0 {self.repo_url}/stable/clickhouse-client-{self.version}-arm64.tgz" + ) + ShellRunner.run( + f"curl -o {tgz_sha_file} -f0 {self.repo_url}/stable/clickhouse-client-{self.version}-arm64.tgz.sha512" + ) + expected_checksum = ShellRunner.run(f"cut -d ' ' -f 1 {tgz_sha_file}") + actual_checksum = ShellRunner.run(f"sha512sum {tgz_file} | cut -d ' ' -f 1") + assert ( + expected_checksum == actual_checksum + ), f"[{actual_checksum} != {expected_checksum}]" + ShellRunner.run("rm /tmp/tmp.tgz*") + + +def parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser( + formatter_class=argparse.ArgumentDefaultsHelpFormatter, + description="Adds release packages to the repository", + ) + parser.add_argument( + "--infile", + type=str, + required=True, + help="input file with release info", + ) + parser.add_argument( + "--export-debian", + action="store_true", + help="Export debian packages to repository", + ) + parser.add_argument( + "--export-rpm", + action="store_true", + help="Export rpm packages to repository", + ) + parser.add_argument( + "--export-tgz", + action="store_true", + help="Export tgz packages to repository", + ) + parser.add_argument( + "--test-debian", + action="store_true", + help="Test debian packages installation", + ) + parser.add_argument( + "--test-rpm", + action="store_true", + help="Test rpm packages installation", + ) + parser.add_argument( + "--test-tgz", + action="store_true", + help="Test tgz packages installation", + ) + parser.add_argument( + "--dry-run", + action="store_true", + help="Dry run mode", + ) + return parser.parse_args() + + +if __name__ == "__main__": + args = parse_args() + assert args.dry_run + + release_info = ReleaseInfo.from_file(args.infile) + """ + Use S3FS. RCLONE has some errors with r2 remote which I didn't figure out how to resolve: + ERROR : IO error: NotImplemented: versionId not implemented + Failed to copy: NotImplemented: versionId not implemented + """ + mp = R2MountPoint(MountPointApp.S3FS, dry_run=args.dry_run) + if args.export_debian: + mp.init() + DebianArtifactory(release_info, dry_run=args.dry_run).export_packages() + mp.teardown() + if args.export_rpm: + mp.init() + RpmArtifactory(release_info, dry_run=args.dry_run).export_packages() + mp.teardown() + if args.export_tgz: + mp.init() + TgzArtifactory(release_info, dry_run=args.dry_run).export_packages() + mp.teardown() + if args.test_debian: + DebianArtifactory(release_info, dry_run=args.dry_run).test_packages() + if args.test_tgz: + TgzArtifactory(release_info, dry_run=args.dry_run).test_packages() + if args.test_rpm: + RpmArtifactory(release_info, dry_run=args.dry_run).test_packages() diff --git a/tests/ci/create_release.py b/tests/ci/create_release.py new file mode 100755 index 00000000000..d749c85994b --- /dev/null +++ b/tests/ci/create_release.py @@ -0,0 +1,663 @@ +import argparse +import dataclasses +import json +import os +import subprocess + +from contextlib import contextmanager +from pathlib import Path +from typing import Iterator, List + +from git_helper import Git, GIT_PREFIX, Runner +from ssh import SSHAgent +from env_helper import GITHUB_REPOSITORY, S3_BUILDS_BUCKET +from s3_helper import S3Helper +from version_helper import ( + FILE_WITH_VERSION_PATH, + GENERATED_CONTRIBUTORS, + get_abs_path, + get_version_from_repo, + update_cmake_version, + update_contributors, +) +from git_helper import git_runner as runner +from ci_config import CI + +CMAKE_PATH = get_abs_path(FILE_WITH_VERSION_PATH) +CONTRIBUTORS_PATH = get_abs_path(GENERATED_CONTRIBUTORS) + + +class ShellRunner: + + @classmethod + def run(cls, command, check_retcode=True, print_output=True, async_=False): + print(f"Running shell command: [{command}]") + if async_: + subprocess.Popen(command.split(" ")) + return 0, "" + result = subprocess.run( + command + " 2>&1", + shell=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True, + ) + if print_output: + print(result.stdout) + if check_retcode: + assert result.returncode == 0, f"Return code [{result.returncode}]" + return result.returncode, result.stdout + + +@dataclasses.dataclass +class ReleaseInfo: + version: str + release_tag: str + release_branch: str + commit_sha: str + # lts or stable + codename: str + + @staticmethod + def from_file(file_path: str) -> "ReleaseInfo": + with open(file_path, "r", encoding="utf-8") as json_file: + res = json.load(json_file) + return ReleaseInfo(**res) + + @staticmethod + def prepare(commit_ref: str, release_type: str, outfile: str) -> None: + dir = Path(outfile).parent + dir.mkdir(parents=True, exist_ok=True) + Path(outfile).unlink(missing_ok=True) + version = None + release_branch = None + release_tag = None + codename = None + assert release_type in ("patch",) + # if release_type == "new": + # assert False, "TODO" + # git = Git() + # version = get_version_from_repo(git=git) + # assert runner.check_command( + # f"git merge-base --is-ancestor {commit_ref} origin/master" + # ) + # expected_tag = f"v{version.major}.{version.minor}-new" + # assert ( + # git.latest_tag == expected_tag + # ), f"BUG: latest tag [{git.latest_tag}], expected [{expected_tag}]" + # release_branch = "master" + if release_type == "patch": + with checkout(commit_ref): + # Git() must be inside "with checkout" contextmanager + commit_sha = Runner().run(f"git rev-parse {commit_ref}") + git = Git() + version = get_version_from_repo(git=git) + codename = version.get_stable_release_type() + version.with_description(codename) + release_branch = f"{version.major}.{version.minor}" + release_tag = version.describe + runner.run(f"{GIT_PREFIX} fetch origin {release_branch} --tags") + assert runner.check_command( + f"git merge-base --is-ancestor {commit_ref} origin/{release_branch}" + ) + if version.patch == 1: + expected_tag_prefix = f"v{version.major}.{version.minor+1}-" + expected_tag_suffix = "-new" + else: + expected_tag_prefix = ( + f"v{version.major}.{version.minor}.{version.patch-1}." + ) + expected_tag_suffix = f"-{version.get_stable_release_type()}" + if git.latest_tag.startswith( + expected_tag_prefix + ) and git.latest_tag.endswith(expected_tag_suffix): + pass + else: + assert ( + False + ), f"BUG: Unexpected latest tag [{git.latest_tag}] expected [{expected_tag_prefix}*{expected_tag_suffix}]" + + assert ( + release_branch + and commit_sha + and release_tag + and version.string + and codename in ("lts", "stable") + ) + res = ReleaseInfo( + release_branch=release_branch, + commit_sha=commit_sha, + release_tag=release_tag, + version=version.string, + codename=codename, + ) + with open(outfile, "w", encoding="utf-8") as f: + print(json.dumps(dataclasses.asdict(res), indent=2), file=f) + + def push_release_tag(self, dry_run: bool) -> None: + if dry_run: + # remove locally created tag from prev run + ShellRunner.run( + f"{GIT_PREFIX} tag -l | grep -q {self.release_tag} && git tag -d {self.release_tag} ||:" + ) + # Create release tag + print( + f"Create and push release tag [{self.release_tag}], commit [{self.commit_sha}]" + ) + tag_message = f"Release {self.release_tag}" + runner.run( + f"{GIT_PREFIX} tag -a -m '{tag_message}' {self.release_tag} {self.commit_sha}" + ) + cmd_push_tag = f"{GIT_PREFIX} push origin {self.release_tag}:{self.release_tag}" + if not dry_run: + # TODO: cannot push - workflow will start + # runner.run(cmd_commit_version_upd) + pass + else: + print("Dry run, would execute:") + print(f"* {cmd_push_tag}") + + def update_version_and_contributors_list(self, dry_run: bool) -> None: + # Bump version, update contributors list, create PR + branch_upd_version_contributors = f"bump_version_{self.version}" + with checkout(self.commit_sha): + git = Git() + version = get_version_from_repo(git=git) + version.with_description(version.get_stable_release_type()) + assert ( + version.string == self.version + ), "BUG: version in release info does not match version in git commit" + with checkout(self.release_branch): + with checkout_new(branch_upd_version_contributors): + update_cmake_version(version) + update_contributors(raise_error=True) + cmd_commit_version_upd = ( + f"{GIT_PREFIX} commit '{CMAKE_PATH}' '{CONTRIBUTORS_PATH}' -m 'Update autogenerated version to {self.version} and contributors'", + ) + cmd_push_branch = f"{GIT_PREFIX} push --set-upstream origin {branch_upd_version_contributors}" + body_file = get_abs_path(".github/PULL_REQUEST_TEMPLATE.md") + actor = os.getenv("GITHUB_ACTOR", "") or "me" + cmd_create_pr = f"gh pr create --repo {GITHUB_REPOSITORY} --title 'Update version after release' --head {branch_upd_version_contributors} --base {self.release_branch} --body-file '{body_file} --label 'do not test' --assignee @{actor}" + if not dry_run: + runner.run(cmd_commit_version_upd) + runner.run(cmd_push_branch) + runner.run(cmd_create_pr) + else: + print("Dry run, would execute:") + print(f"* {cmd_commit_version_upd}") + print(f"* {cmd_push_branch}") + print(f"* {cmd_create_pr}") + print("Dry run, diff:") + print( + runner.run( + f"{GIT_PREFIX} diff '{CMAKE_PATH}' '{CONTRIBUTORS_PATH}'" + ) + ) + runner.run( + f"{GIT_PREFIX} checkout '{CMAKE_PATH}' '{CONTRIBUTORS_PATH}'" + ) + + def create_gh_release(self, packages_files: List[str], dry_run: bool) -> None: + repo = os.getenv("GITHUB_REPOSITORY") + assert repo + cmds = [] + cmds.append( + f"gh release create --repo {repo} --title 'Release {self.release_tag}' {self.release_tag}" + ) + for file in packages_files: + cmds.append(f"gh release upload {self.release_tag} {file}") + if not dry_run: + for cmd in cmds: + ShellRunner.run(cmd) + else: + print("Dry-run, would run commands:") + print("\n * ".join(cmds)) + + +class RepoTypes: + RPM = "rpm" + DEBIAN = "deb" + TGZ = "tgz" + + +class PackageDownloader: + PACKAGES = ( + "clickhouse-client", + "clickhouse-common-static", + "clickhouse-common-static-dbg", + "clickhouse-keeper", + "clickhouse-keeper-dbg", + "clickhouse-server", + ) + + EXTRA_PACKAGES = ( + "clickhouse-library-bridge", + "clickhouse-odbc-bridge", + ) + PACKAGE_TYPES = (CI.BuildNames.PACKAGE_RELEASE, CI.BuildNames.PACKAGE_AARCH64) + MACOS_PACKAGE_TO_BIN_SUFFIX = { + CI.BuildNames.BINARY_DARWIN: "macos", + CI.BuildNames.BINARY_DARWIN_AARCH64: "macos-aarch64", + } + LOCAL_DIR = "/tmp/packages" + + @classmethod + def _get_arch_suffix(cls, package_arch, repo_type): + if package_arch == CI.BuildNames.PACKAGE_RELEASE: + return ( + "amd64" if repo_type in (RepoTypes.DEBIAN, RepoTypes.TGZ) else "x86_64" + ) + elif package_arch == CI.BuildNames.PACKAGE_AARCH64: + return ( + "arm64" if repo_type in (RepoTypes.DEBIAN, RepoTypes.TGZ) else "aarch64" + ) + else: + assert False, "BUG" + + def __init__(self, release, commit_sha, version: str): + assert version.startswith(release), "Invalid release branch or version" + major, minor = map(int, release.split(".")) + self.package_names = self.PACKAGES + if major > 24 or (major == 24 and minor > 3): + self.package_names += self.EXTRA_PACKAGES + self.release = release + self.commit_sha = commit_sha + self.version = version + self.s3 = S3Helper() + self.deb_package_files = [] + self.rpm_package_files = [] + self.tgz_package_files = [] + # just binaries for macos + self.macos_package_files = ["clickhouse-macos", "clickhouse-macos-aarch64"] + self.file_to_type = {} + + ShellRunner.run(f"mkdir -p {self.LOCAL_DIR}") + + for package_type in self.PACKAGE_TYPES: + for package in self.package_names: + package_file_name = ( + package + + "_" + + self.version + + "_" + + self._get_arch_suffix(package_type, RepoTypes.DEBIAN) + + ".deb" + ) + self.deb_package_files.append(package_file_name) + self.file_to_type[package_file_name] = package_type + + package_file_name = ( + package + + "-" + + self.version + + "." + + self._get_arch_suffix(package_type, RepoTypes.RPM) + + ".rpm" + ) + self.rpm_package_files.append(package_file_name) + self.file_to_type[package_file_name] = package_type + + package_file_name = ( + package + + "-" + + self.version + + "-" + + self._get_arch_suffix(package_type, RepoTypes.TGZ) + + ".tgz" + ) + self.tgz_package_files.append(package_file_name) + self.file_to_type[package_file_name] = package_type + package_file_name += ".sha512" + self.tgz_package_files.append(package_file_name) + self.file_to_type[package_file_name] = package_type + + def get_deb_packages_files(self): + return self.deb_package_files + + def get_rpm_packages_files(self): + return self.rpm_package_files + + def get_tgz_packages_files(self): + return self.tgz_package_files + + def get_macos_packages_files(self): + return self.macos_package_files + + def get_packages_names(self): + return self.package_names + + def get_all_packages_files(self): + assert self.local_tgz_packages_ready() + assert self.local_deb_packages_ready() + assert self.local_rpm_packages_ready() + assert self.local_macos_packages_ready() + res = [] + for package_file in ( + self.deb_package_files + + self.rpm_package_files + + self.tgz_package_files + + self.macos_package_files + ): + res.append(self.LOCAL_DIR + "/" + package_file) + return res + + def run(self): + ShellRunner.run(f"rm -rf {self.LOCAL_DIR}/*") + for package_file in ( + self.deb_package_files + self.rpm_package_files + self.tgz_package_files + ): + print(f"Downloading: [{package_file}]") + s3_path = "/".join( + [ + self.release, + self.commit_sha, + self.file_to_type[package_file], + package_file, + ] + ) + self.s3.download_file( + bucket=S3_BUILDS_BUCKET, + s3_path=s3_path, + local_file_path="/".join([self.LOCAL_DIR, package_file]), + ) + + for macos_package, bin_suffix in self.MACOS_PACKAGE_TO_BIN_SUFFIX.items(): + binary_name = "clickhouse" + destination_binary_name = f"{binary_name}-{bin_suffix}" + assert destination_binary_name in self.macos_package_files + print( + f"Downloading: [{macos_package}] binary to [{destination_binary_name}]" + ) + s3_path = "/".join( + [ + self.release, + self.commit_sha, + macos_package, + binary_name, + ] + ) + self.s3.download_file( + bucket=S3_BUILDS_BUCKET, + s3_path=s3_path, + local_file_path="/".join([self.LOCAL_DIR, destination_binary_name]), + ) + + def local_deb_packages_ready(self) -> bool: + assert self.deb_package_files + for package_file in self.deb_package_files: + print(f"Check package is downloaded [{package_file}]") + if not Path(self.LOCAL_DIR + "/" + package_file).is_file(): + return False + return True + + def local_rpm_packages_ready(self) -> bool: + assert self.rpm_package_files + for package_file in self.rpm_package_files: + print(f"Check package is downloaded [{package_file}]") + if not Path(self.LOCAL_DIR + "/" + package_file).is_file(): + return False + return True + + def local_tgz_packages_ready(self) -> bool: + assert self.tgz_package_files + for package_file in self.tgz_package_files: + print(f"Check package is downloaded [{package_file}]") + if not Path(self.LOCAL_DIR + "/" + package_file).is_file(): + return False + return True + + def local_macos_packages_ready(self) -> bool: + assert self.macos_package_files + for package_file in self.macos_package_files: + print(f"Check package is downloaded [{package_file}]") + if not Path(self.LOCAL_DIR + "/" + package_file).is_file(): + return False + return True + + +def parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser( + formatter_class=argparse.ArgumentDefaultsHelpFormatter, + description="Creates release", + ) + parser.add_argument( + "--prepare-release-info", + action="store_true", + help="Initial step to prepare info like release branch, release tag, etc.", + ) + parser.add_argument( + "--push-release-tag", + action="store_true", + help="Creates and pushes git tag", + ) + parser.add_argument( + "--create-bump-version-pr", + action="store_true", + help="Updates version, contributors' list and creates PR", + ) + parser.add_argument( + "--download-packages", + action="store_true", + help="Downloads all required packages from s3", + ) + parser.add_argument( + "--create-gh-release", + action="store_true", + help="Create GH Release object and attach all packages", + ) + parser.add_argument( + "--ref", + type=str, + help="the commit hash or branch", + ) + parser.add_argument( + "--release-type", + choices=("new", "patch"), + # dest="release_type", + help="a release type to bump the major.minor.patch version part, " + "new branch is created only for the value 'new'", + ) + parser.add_argument( + "--dry-run", + action="store_true", + help="do not make any actual changes in the repo, just show what will be done", + ) + parser.add_argument( + "--outfile", + default="", + type=str, + help="output file to write json result to, if not set - stdout", + ) + parser.add_argument( + "--infile", + default="", + type=str, + help="input file with release info", + ) + + return parser.parse_args() + + +@contextmanager +def checkout(ref: str) -> Iterator[None]: + orig_ref = runner.run(f"{GIT_PREFIX} symbolic-ref --short HEAD") + rollback_cmd = f"{GIT_PREFIX} checkout {orig_ref}" + assert orig_ref + if ref not in (orig_ref,): + runner.run(f"{GIT_PREFIX} checkout {ref}") + try: + yield + except (Exception, KeyboardInterrupt) as e: + print(f"ERROR: Exception [{e}]") + runner.run(rollback_cmd) + raise + runner.run(rollback_cmd) + + +@contextmanager +def checkout_new(ref: str) -> Iterator[None]: + orig_ref = runner.run(f"{GIT_PREFIX} symbolic-ref --short HEAD") + rollback_cmd = f"{GIT_PREFIX} checkout {orig_ref}" + assert orig_ref + runner.run(f"{GIT_PREFIX} checkout -b {ref}") + try: + yield + except (Exception, KeyboardInterrupt) as e: + print(f"ERROR: Exception [{e}]") + runner.run(rollback_cmd) + raise + runner.run(rollback_cmd) + + +if __name__ == "__main__": + args = parse_args() + assert args.dry_run + + # prepare ssh for git if needed + _ssh_agent = None + _key_pub = None + if os.getenv("ROBOT_CLICKHOUSE_SSH_KEY", ""): + _key = os.getenv("ROBOT_CLICKHOUSE_SSH_KEY") + _ssh_agent = SSHAgent() + _key_pub = _ssh_agent.add(_key) + _ssh_agent.print_keys() + + if args.prepare_release_info: + assert ( + args.ref and args.release_type and args.outfile + ), "--ref, --release-type and --outfile must be provided with --prepare-release-info" + ReleaseInfo.prepare( + commit_ref=args.ref, release_type=args.release_type, outfile=args.outfile + ) + if args.push_release_tag: + assert args.infile, "--infile must be provided" + release_info = ReleaseInfo.from_file(args.infile) + release_info.push_release_tag(dry_run=args.dry_run) + if args.create_bump_version_pr: + # TODO: store link to PR in release info + assert args.infile, "--infile must be provided" + release_info = ReleaseInfo.from_file(args.infile) + release_info.update_version_and_contributors_list(dry_run=args.dry_run) + if args.download_packages: + assert args.infile, "--infile must be provided" + release_info = ReleaseInfo.from_file(args.infile) + p = PackageDownloader( + release=release_info.release_branch, + commit_sha=release_info.commit_sha, + version=release_info.version, + ) + p.run() + if args.create_gh_release: + assert args.infile, "--infile must be provided" + release_info = ReleaseInfo.from_file(args.infile) + p = PackageDownloader( + release=release_info.release_branch, + commit_sha=release_info.commit_sha, + version=release_info.version, + ) + release_info.create_gh_release(p.get_all_packages_files(), args.dry_run) + + # tear down ssh + if _ssh_agent and _key_pub: + _ssh_agent.remove(_key_pub) + + +""" +Prepare release machine (for arm machine): + +### INSTALL PACKAGES +sudo apt update +sudo apt install --yes --no-install-recommends python3-dev python3-pip gh unzip +sudo apt install --yes python3-boto3 +sudo apt install --yes python3-github +sudo apt install --yes python3-unidiff +sudo apt install --yes s3fs + +### INSTALL AWS CLI +cd /tmp +curl "https://awscli.amazonaws.com/awscli-exe-linux-$(uname -m).zip" -o "awscliv2.zip" +unzip awscliv2.zip +sudo ./aws/install +rm -rf aws* +cd - + +### INSTALL GH ACTIONS RUNNER: +# Create a folder +RUNNER_VERSION=2.317.0 +cd ~ +mkdir actions-runner && cd actions-runner +# Download the latest runner package +runner_arch() { + case $(uname -m) in + x86_64 ) + echo x64;; + aarch64 ) + echo arm64;; + esac +} +curl -O -L https://github.com/actions/runner/releases/download/v$RUNNER_VERSION/actions-runner-linux-$(runner_arch)-$RUNNER_VERSION.tar.gz +# Extract the installer +tar xzf ./actions-runner-linux-$(runner_arch)-$RUNNER_VERSION.tar.gz +rm ./actions-runner-linux-$(runner_arch)-$RUNNER_VERSION.tar.gz + +### Install reprepro: +cd ~ +sudo apt install dpkg-dev libgpgme-dev libdb-dev libbz2-dev liblzma-dev libarchive-dev shunit2 db-util debhelper +git clone https://salsa.debian.org/debian/reprepro.git +cd reprepro +dpkg-buildpackage -b --no-sign && sudo dpkg -i ../reprepro_$(dpkg-parsechangelog --show-field Version)_$(dpkg-architecture -q DEB_HOST_ARCH).deb + +### Install createrepo-c: +sudo apt install createrepo-c +createrepo_c --version +#Version: 0.17.3 (Features: DeltaRPM LegacyWeakdeps ) + +### Import gpg sign key +gpg --import key.pgp +gpg --list-secret-keys + +### Install docker +sudo su; cd ~ + +deb_arch() { + case $(uname -m) in + x86_64 ) + echo amd64;; + aarch64 ) + echo arm64;; + esac +} +curl -fsSL https://download.docker.com/linux/ubuntu/gpg | gpg --dearmor -o /usr/share/keyrings/docker-archive-keyring.gpg + +echo "deb [arch=$(deb_arch) signed-by=/usr/share/keyrings/docker-archive-keyring.gpg] https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable" | sudo tee /etc/apt/sources.list.d/docker.list > /dev/null + +sudo apt-get update +sudo apt-get install --yes --no-install-recommends docker-ce docker-buildx-plugin docker-ce-cli containerd.io + +sudo usermod -aG docker ubuntu + +# enable ipv6 in containers (fixed-cidr-v6 is some random network mask) +cat < /etc/docker/daemon.json +{ + "ipv6": true, + "fixed-cidr-v6": "2001:db8:1::/64", + "log-driver": "json-file", + "log-opts": { + "max-file": "5", + "max-size": "1000m" + }, + "insecure-registries" : ["dockerhub-proxy.dockerhub-proxy-zone:5000"], + "registry-mirrors" : ["http://dockerhub-proxy.dockerhub-proxy-zone:5000"] +} +EOT + +# if docker build does not work: + sudo systemctl restart docker + docker buildx rm mybuilder + docker buildx create --name mybuilder --driver docker-container --use + docker buildx inspect mybuilder --bootstrap + +### Install tailscale + +### Configure GH runner +""" diff --git a/tests/ci/docker_server.py b/tests/ci/docker_server.py index 151cc5a4c02..2f556e3ed57 100644 --- a/tests/ci/docker_server.py +++ b/tests/ci/docker_server.py @@ -11,7 +11,6 @@ from os import path as p from pathlib import Path from typing import Dict, List -from build_check import get_release_or_pr from build_download_helper import read_build_urls from docker_images_helper import DockerImageData, docker_login from env_helper import ( @@ -22,7 +21,7 @@ from env_helper import ( TEMP_PATH, ) from git_helper import Git -from pr_info import PRInfo +from pr_info import PRInfo, EventType from report import FAILURE, SUCCESS, JobReport, TestResult, TestResults from stopwatch import Stopwatch from tee_popen import TeePopen @@ -63,6 +62,12 @@ def parse_args() -> argparse.Namespace: help="a version to build, automaticaly got from version_helper, accepts either " "tag ('refs/tags/' is removed automatically) or a normal 22.2.2.2 format", ) + parser.add_argument( + "--sha", + type=str, + default="", + help="sha of the commit to use packages from", + ) parser.add_argument( "--release-type", type=str, @@ -122,7 +127,7 @@ def parse_args() -> argparse.Namespace: def retry_popen(cmd: str, log_file: Path) -> int: - max_retries = 5 + max_retries = 2 for retry in range(max_retries): # From time to time docker build may failed. Curl issues, or even push # It will sleep progressively 5, 15, 30 and 50 seconds between retries @@ -370,13 +375,22 @@ def main(): tags = gen_tags(args.version, args.release_type) repo_urls = {} direct_urls: Dict[str, List[str]] = {} - release_or_pr, _ = get_release_or_pr(pr_info, args.version) + if pr_info.event_type == EventType.PULL_REQUEST: + release_or_pr = pr_info.number + sha = pr_info.sha + elif pr_info.event_type == EventType.PUSH and pr_info.is_master: + release_or_pr = 0 + sha = pr_info.sha + else: + release_or_pr = f"{args.version.major}.{args.version.minor}" + sha = args.sha + assert sha for arch, build_name in zip(ARCH, ("package_release", "package_aarch64")): if not args.bucket_prefix: repo_urls[arch] = ( f"{S3_DOWNLOAD}/{S3_BUILDS_BUCKET}/" - f"{release_or_pr}/{pr_info.sha}/{build_name}" + f"{release_or_pr}/{sha}/{build_name}" ) else: repo_urls[arch] = f"{args.bucket_prefix}/{build_name}" diff --git a/tests/ci/version_helper.py b/tests/ci/version_helper.py index 50263f6ebb6..d48dd36b16a 100755 --- a/tests/ci/version_helper.py +++ b/tests/ci/version_helper.py @@ -148,6 +148,11 @@ class ClickHouseVersion: """our X.3 and X.8 are LTS""" return self.minor % 5 == 3 + def get_stable_release_type(self) -> str: + if self.is_lts: + return VersionType.LTS + return VersionType.STABLE + def as_dict(self) -> VERSIONS: return { "revision": self.revision, @@ -168,6 +173,7 @@ class ClickHouseVersion: raise ValueError(f"version type {version_type} not in {VersionType.VALID}") self._description = version_type self._describe = f"v{self.string}-{version_type}" + return self def copy(self) -> "ClickHouseVersion": copy = ClickHouseVersion( From f8e71707f234c10dd3567e9087a16e6cc2e21801 Mon Sep 17 00:00:00 2001 From: Max K Date: Wed, 10 Jul 2024 14:50:55 +0200 Subject: [PATCH 151/182] update black --- docker/test/style/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/test/style/requirements.txt b/docker/test/style/requirements.txt index bb0cd55dd1a..ed73d0d3636 100644 --- a/docker/test/style/requirements.txt +++ b/docker/test/style/requirements.txt @@ -3,7 +3,7 @@ aiosignal==1.3.1 astroid==3.1.0 async-timeout==4.0.3 attrs==23.2.0 -black==23.12.0 +black==24.4.2 boto3==1.34.131 botocore==1.34.131 certifi==2024.6.2 From 10502174452c7b1adf623b113e145f1bad24c1ea Mon Sep 17 00:00:00 2001 From: Max K Date: Wed, 10 Jul 2024 16:19:06 +0200 Subject: [PATCH 152/182] add support for new release branch Automatic style fix --- .github/workflows/create_release.yml | 27 +++- docker/test/libfuzzer/run_libfuzzer.py | 18 +-- tests/ci/create_release.py | 125 +++++++++++++++--- tests/ci/ssh.py | 6 +- tests/ci/test_ci_options.py | 14 +- tests/ci/version_helper.py | 13 ++ tests/integration/helpers/cluster.py | 18 +-- tests/integration/test_disk_types/test.py | 6 +- .../test_distributed_default_database/test.py | 1 + .../test_interserver_dns_retires/test.py | 1 + tests/integration/test_storage_s3/test.py | 4 +- .../test_case.py | 1 + .../test_case.py | 1 + ...411_long_accurate_number_comparison.python | 2 +- 14 files changed, 175 insertions(+), 62 deletions(-) diff --git a/.github/workflows/create_release.yml b/.github/workflows/create_release.yml index e2ad16a05a4..26adc3adff9 100644 --- a/.github/workflows/create_release.yml +++ b/.github/workflows/create_release.yml @@ -15,9 +15,8 @@ concurrency: required: true type: choice options: - # TODO: - #- new - patch + - new dry-run: description: 'Dry run' required: false @@ -28,7 +27,6 @@ jobs: CreateRelease: env: GH_TOKEN: ${{ secrets.ROBOT_CLICKHOUSE_COMMIT_TOKEN }} - RELEASE_TYPE: runs-on: [self-hosted, release-maker] steps: - name: DebugInfo @@ -61,11 +59,16 @@ jobs: echo "RELEASE_TAG=$release_tag" >> "$GITHUB_ENV" echo "COMMIT_SHA=$commit_sha" >> "$GITHUB_ENV" - name: Download All Release Artifacts + if: ${{ inputs.type == 'patch' }} run: | python3 ./tests/ci/create_release.py --infile "$RELEASE_INFO_FILE" --download-packages ${{ inputs.dry-run && '--dry-run' || '' }} - name: Push Git Tag for the Release run: | python3 ./tests/ci/create_release.py --push-release-tag --infile "$RELEASE_INFO_FILE" ${{ inputs.dry-run && '--dry-run' || '' }} + - name: Push New Release Branch + if: ${{ inputs.type == 'new' }} + run: | + python3 ./tests/ci/create_release.py --push-new-release-branch --infile "$RELEASE_INFO_FILE" ${{ inputs.dry-run && '--dry-run' || '' }} - name: Bump CH Version and Update Contributors' List run: | python3 ./tests/ci/create_release.py --create-bump-version-pr --infile "$RELEASE_INFO_FILE" ${{ inputs.dry-run && '--dry-run' || '' }} @@ -73,6 +76,7 @@ jobs: run: | git checkout master - name: Bump Docker versions, Changelog, Security + if: ${{ inputs.type == 'patch' }} run: | [ "$(git branch --show-current)" != "master" ] && echo "not on the master" && exit 1 echo "List versions" @@ -90,8 +94,8 @@ jobs: echo "Generate Security" python3 ./utils/security-generator/generate_security.py > SECURITY.md git diff HEAD - - name: Create ChangeLog Pull Request - if: ${{ ! inputs.dry-run }} + - name: Generate ChangeLog + if: ${{ inputs.type == 'patch' && ! inputs.dry-run }} uses: peter-evans/create-pull-request@v6 with: author: "robot-clickhouse " @@ -115,39 +119,48 @@ jobs: run: | git checkout "$GITHUB_REF_NAME" - name: Create GH Release + if: ${{ inputs.type == 'patch' }} run: | python3 ./tests/ci/create_release.py --create-gh-release \ --infile ${{ env.RELEASE_INFO_FILE }} ${{ inputs.dry-run && '--dry-run' || '' }} - name: Export TGZ Packages + if: ${{ inputs.type == 'patch' }} run: | python3 ./tests/ci/artifactory.py --export-tgz --infile ${{ env.RELEASE_INFO_FILE }} ${{ inputs.dry-run && '--dry-run' || '' }} - name: Test TGZ Packages + if: ${{ inputs.type == 'patch' }} run: | python3 ./tests/ci/artifactory.py --test-tgz --infile ${{ env.RELEASE_INFO_FILE }} ${{ inputs.dry-run && '--dry-run' || '' }} - name: Export RPM Packages + if: ${{ inputs.type == 'patch' }} run: | python3 ./tests/ci/artifactory.py --export-rpm --infile ${{ env.RELEASE_INFO_FILE }} ${{ inputs.dry-run && '--dry-run' || '' }} - name: Test RPM Packages + if: ${{ inputs.type == 'patch' }} run: | python3 ./tests/ci/artifactory.py --test-rpm --infile ${{ env.RELEASE_INFO_FILE }} ${{ inputs.dry-run && '--dry-run' || '' }} - name: Export Debian Packages + if: ${{ inputs.type == 'patch' }} run: | python3 ./tests/ci/artifactory.py --export-debian --infile ${{ env.RELEASE_INFO_FILE }} ${{ inputs.dry-run && '--dry-run' || '' }} - name: Test Debian Packages + if: ${{ inputs.type == 'patch' }} run: | python3 ./tests/ci/artifactory.py --test-debian --infile ${{ env.RELEASE_INFO_FILE }} ${{ inputs.dry-run && '--dry-run' || '' }} - name: Docker clickhouse/clickhouse-server building + if: ${{ inputs.type == 'patch' }} run: | cd "./tests/ci" export CHECK_NAME="Docker server image" python3 docker_server.py --release-type auto --version ${{ env.RELEASE_TAG }} --check-name "$CHECK_NAME" --sha ${{ env.COMMIT_SHA }} ${{ ! inputs.dry-run && '--push' || '' }} - name: Docker clickhouse/clickhouse-keeper building + if: ${{ inputs.type == 'patch' }} run: | cd "./tests/ci" export CHECK_NAME="Docker keeper image" python3 docker_server.py --release-type auto --version ${{ env.RELEASE_TAG }} --check-name "$CHECK_NAME" --sha ${{ env.COMMIT_SHA }} ${{ ! inputs.dry-run && '--push' || '' }} - name: Post Slack Message - if: failure() + if: always() run: | - echo Slack Message \ No newline at end of file + echo Slack Message diff --git a/docker/test/libfuzzer/run_libfuzzer.py b/docker/test/libfuzzer/run_libfuzzer.py index 5ed019490d5..fa67805dfa5 100755 --- a/docker/test/libfuzzer/run_libfuzzer.py +++ b/docker/test/libfuzzer/run_libfuzzer.py @@ -27,19 +27,19 @@ def run_fuzzer(fuzzer: str): parser.read(path) if parser.has_section("asan"): - os.environ[ - "ASAN_OPTIONS" - ] = f"{os.environ['ASAN_OPTIONS']}:{':'.join('%s=%s' % (key, value) for key, value in parser['asan'].items())}" + os.environ["ASAN_OPTIONS"] = ( + f"{os.environ['ASAN_OPTIONS']}:{':'.join('%s=%s' % (key, value) for key, value in parser['asan'].items())}" + ) if parser.has_section("msan"): - os.environ[ - "MSAN_OPTIONS" - ] = f"{os.environ['MSAN_OPTIONS']}:{':'.join('%s=%s' % (key, value) for key, value in parser['msan'].items())}" + os.environ["MSAN_OPTIONS"] = ( + f"{os.environ['MSAN_OPTIONS']}:{':'.join('%s=%s' % (key, value) for key, value in parser['msan'].items())}" + ) if parser.has_section("ubsan"): - os.environ[ - "UBSAN_OPTIONS" - ] = f"{os.environ['UBSAN_OPTIONS']}:{':'.join('%s=%s' % (key, value) for key, value in parser['ubsan'].items())}" + os.environ["UBSAN_OPTIONS"] = ( + f"{os.environ['UBSAN_OPTIONS']}:{':'.join('%s=%s' % (key, value) for key, value in parser['ubsan'].items())}" + ) if parser.has_section("libfuzzer"): custom_libfuzzer_options = " ".join( diff --git a/tests/ci/create_release.py b/tests/ci/create_release.py index d749c85994b..65ab48865ef 100755 --- a/tests/ci/create_release.py +++ b/tests/ci/create_release.py @@ -5,6 +5,7 @@ import os import subprocess from contextlib import contextmanager +from copy import copy from pathlib import Path from typing import Iterator, List @@ -12,6 +13,7 @@ from git_helper import Git, GIT_PREFIX, Runner from ssh import SSHAgent from env_helper import GITHUB_REPOSITORY, S3_BUILDS_BUCKET from s3_helper import S3Helper +from autoscale_runners_lambda.lambda_shared.pr import Labels from version_helper import ( FILE_WITH_VERSION_PATH, GENERATED_CONTRIBUTORS, @@ -19,6 +21,7 @@ from version_helper import ( get_version_from_repo, update_cmake_version, update_contributors, + VersionType, ) from git_helper import git_runner as runner from ci_config import CI @@ -30,7 +33,12 @@ CONTRIBUTORS_PATH = get_abs_path(GENERATED_CONTRIBUTORS) class ShellRunner: @classmethod - def run(cls, command, check_retcode=True, print_output=True, async_=False): + def run( + cls, command, check_retcode=True, print_output=True, async_=False, dry_run=False + ): + if dry_run: + print(f"Dry-run: Would run shell command: [{command}]") + return 0, "" print(f"Running shell command: [{command}]") if async_: subprocess.Popen(command.split(" ")) @@ -73,23 +81,31 @@ class ReleaseInfo: release_branch = None release_tag = None codename = None - assert release_type in ("patch",) - # if release_type == "new": - # assert False, "TODO" - # git = Git() - # version = get_version_from_repo(git=git) - # assert runner.check_command( - # f"git merge-base --is-ancestor {commit_ref} origin/master" - # ) - # expected_tag = f"v{version.major}.{version.minor}-new" - # assert ( - # git.latest_tag == expected_tag - # ), f"BUG: latest tag [{git.latest_tag}], expected [{expected_tag}]" - # release_branch = "master" + assert release_type in ("patch", "new") + if release_type == "new": + # check commit_ref is right and on a right branch + ShellRunner.run( + f"git merge-base --is-ancestor origin/{commit_ref} origin/master" + ) + with checkout(commit_ref): + commit_sha = Runner().run(f"git rev-parse {commit_ref}") + # Git() must be inside "with checkout" contextmanager + git = Git() + version = get_version_from_repo(git=git) + release_branch = "master" + expected_prev_tag = f"v{version.major}.{version.minor}.1.1-new" + version.bump().with_description(VersionType.NEW) + assert ( + git.latest_tag == expected_prev_tag + ), f"BUG: latest tag [{git.latest_tag}], expected [{expected_prev_tag}]" + release_tag = version.describe + codename = ( + VersionType.STABLE + ) # dummy value (artifactory won't be updated for new release) if release_type == "patch": with checkout(commit_ref): - # Git() must be inside "with checkout" contextmanager commit_sha = Runner().run(f"git rev-parse {commit_ref}") + # Git() must be inside "with checkout" contextmanager git = Git() version = get_version_from_repo(git=git) codename = version.get_stable_release_type() @@ -97,11 +113,16 @@ class ReleaseInfo: release_branch = f"{version.major}.{version.minor}" release_tag = version.describe runner.run(f"{GIT_PREFIX} fetch origin {release_branch} --tags") - assert runner.check_command( + # check commit is right and on a right branch + ShellRunner.run( f"git merge-base --is-ancestor {commit_ref} origin/{release_branch}" ) if version.patch == 1: - expected_tag_prefix = f"v{version.major}.{version.minor+1}-" + expected_version = copy(version) + expected_version.bump() + expected_tag_prefix = ( + f"v{expected_version.major}.{expected_version.minor}-" + ) expected_tag_suffix = "-new" else: expected_tag_prefix = ( @@ -157,16 +178,71 @@ class ReleaseInfo: print("Dry run, would execute:") print(f"* {cmd_push_tag}") + @staticmethod + def _create_gh_label(label: str, color_hex: str, dry_run: bool): + cmd = f"gh api repos/{GITHUB_REPOSITORY}/labels -f name={label} -f color={color_hex}" + ShellRunner.run(cmd, dry_run=dry_run) + + def push_new_release_branch(self, dry_run: bool) -> None: + assert ( + self.release_branch == "master" + ), "New release branch can be created only for release type [new]" + git = Git() + version = get_version_from_repo(git=git) + new_release_branch = f"{version.major}.{version.minor}" + stable_release_type = version.get_stable_release_type() + version_after_release = copy(version) + version_after_release.bump() + assert ( + version_after_release.string == self.version + ), f"Unexpected current version in git, must precede [{self.version}] by one step, actual [{version.string}]" + if dry_run: + # remove locally created branch from prev run + ShellRunner.run( + f"{GIT_PREFIX} branch -l | grep -q {new_release_branch} && git branch -d {new_release_branch} ||:" + ) + print( + f"Create and push new release branch [{new_release_branch}], commit [{self.commit_sha}]" + ) + with checkout(self.release_branch): + with checkout_new(new_release_branch): + pr_labels = f"--label {Labels.RELEASE}" + if stable_release_type == VersionType.LTS: + pr_labels += f" --label {Labels.RELEASE_LTS}" + cmd_push_branch = ( + f"{GIT_PREFIX} push --set-upstream origin {new_release_branch}" + ) + ShellRunner.run(cmd_push_branch, dry_run=dry_run) + + print("Create and push backport tags for new release branch") + ReleaseInfo._create_gh_label( + f"v{new_release_branch}-must-backport", "10dbed", dry_run=dry_run + ) + ReleaseInfo._create_gh_label( + f"v{new_release_branch}-affected", "c2bfff", dry_run=dry_run + ) + ShellRunner.run( + f"""gh pr create --repo {GITHUB_REPOSITORY} --title 'Release pull request for branch {new_release_branch}' + --head {new_release_branch} {pr_labels} + --body 'This PullRequest is a part of ClickHouse release cycle. It is used by CI system only. Do not perform any changes with it.' + """, + dry_run=dry_run, + ) + def update_version_and_contributors_list(self, dry_run: bool) -> None: # Bump version, update contributors list, create PR branch_upd_version_contributors = f"bump_version_{self.version}" with checkout(self.commit_sha): git = Git() version = get_version_from_repo(git=git) - version.with_description(version.get_stable_release_type()) + if self.release_branch == "master": + version.bump() + version.with_description(VersionType.TESTING) + else: + version.with_description(version.get_stable_release_type()) assert ( version.string == self.version - ), "BUG: version in release info does not match version in git commit" + ), f"BUG: version in release info does not match version in git commit, expected [{self.version}], got [{version.string}]" with checkout(self.release_branch): with checkout_new(branch_upd_version_contributors): update_cmake_version(version) @@ -430,6 +506,11 @@ def parse_args() -> argparse.Namespace: action="store_true", help="Creates and pushes git tag", ) + parser.add_argument( + "--push-new-release-branch", + action="store_true", + help="Creates and pushes new release branch and corresponding service gh tags for backports", + ) parser.add_argument( "--create-bump-version-pr", action="store_true", @@ -533,6 +614,10 @@ if __name__ == "__main__": assert args.infile, "--infile must be provided" release_info = ReleaseInfo.from_file(args.infile) release_info.push_release_tag(dry_run=args.dry_run) + if args.push_new_release_branch: + assert args.infile, "--infile must be provided" + release_info = ReleaseInfo.from_file(args.infile) + release_info.push_new_release_branch(dry_run=args.dry_run) if args.create_bump_version_pr: # TODO: store link to PR in release info assert args.infile, "--infile must be provided" @@ -563,7 +648,7 @@ if __name__ == "__main__": """ -Prepare release machine (for arm machine): +Prepare release machine: ### INSTALL PACKAGES sudo apt update diff --git a/tests/ci/ssh.py b/tests/ci/ssh.py index 321826fcf44..89d90d724d2 100644 --- a/tests/ci/ssh.py +++ b/tests/ci/ssh.py @@ -37,9 +37,9 @@ class SSHAgent: ssh_options = ( "," + os.environ["SSH_OPTIONS"] if os.environ.get("SSH_OPTIONS") else "" ) - os.environ[ - "SSH_OPTIONS" - ] = f"{ssh_options}UserKnownHostsFile=/dev/null,StrictHostKeyChecking=no" + os.environ["SSH_OPTIONS"] = ( + f"{ssh_options}UserKnownHostsFile=/dev/null,StrictHostKeyChecking=no" + ) def add(self, key): key_pub = self._key_pub(key) diff --git a/tests/ci/test_ci_options.py b/tests/ci/test_ci_options.py index 3f158e79f30..f4d14a17512 100644 --- a/tests/ci/test_ci_options.py +++ b/tests/ci/test_ci_options.py @@ -172,14 +172,10 @@ class TestCIOptions(unittest.TestCase): job: CI.JobConfig(runner_type=CI.Runners.STYLE_CHECKER) for job in _TEST_JOB_LIST } - jobs_configs[ - "fuzzers" - ].run_by_label = ( + jobs_configs["fuzzers"].run_by_label = ( "TEST_LABEL" # check "fuzzers" appears in the result due to the label ) - jobs_configs[ - "Integration tests (asan)" - ].release_only = ( + jobs_configs["Integration tests (asan)"].release_only = ( True # still must be included as it's set with include keywords ) filtered_jobs = list( @@ -311,9 +307,9 @@ class TestCIOptions(unittest.TestCase): job: CI.JobConfig(runner_type=CI.Runners.STYLE_CHECKER) for job in _TEST_JOB_LIST } - jobs_configs[ - "fuzzers" - ].run_by_label = "TEST_LABEL" # check "fuzzers" does not appears in the result + jobs_configs["fuzzers"].run_by_label = ( + "TEST_LABEL" # check "fuzzers" does not appears in the result + ) jobs_configs["Integration tests (asan)"].release_only = True filtered_jobs = list( ci_options.apply( diff --git a/tests/ci/version_helper.py b/tests/ci/version_helper.py index d48dd36b16a..07a7a9601c0 100755 --- a/tests/ci/version_helper.py +++ b/tests/ci/version_helper.py @@ -72,6 +72,19 @@ class ClickHouseVersion: return self.patch_update() raise KeyError(f"wrong part {part} is used") + def bump(self) -> "ClickHouseVersion": + if self.minor < 12: + self._minor += 1 + self._revision += 1 + self._patch = 1 + self._tweak = 1 + else: + self._major += 1 + self._revision += 1 + self._patch = 1 + self._tweak = 1 + return self + def major_update(self) -> "ClickHouseVersion": if self._git is not None: self._git.update() diff --git a/tests/integration/helpers/cluster.py b/tests/integration/helpers/cluster.py index 34f5c28fef8..548b58a17e8 100644 --- a/tests/integration/helpers/cluster.py +++ b/tests/integration/helpers/cluster.py @@ -1454,9 +1454,9 @@ class ClickHouseCluster: def setup_azurite_cmd(self, instance, env_variables, docker_compose_yml_dir): self.with_azurite = True env_variables["AZURITE_PORT"] = str(self.azurite_port) - env_variables[ - "AZURITE_STORAGE_ACCOUNT_URL" - ] = f"http://azurite1:{env_variables['AZURITE_PORT']}/devstoreaccount1" + env_variables["AZURITE_STORAGE_ACCOUNT_URL"] = ( + f"http://azurite1:{env_variables['AZURITE_PORT']}/devstoreaccount1" + ) env_variables["AZURITE_CONNECTION_STRING"] = ( f"DefaultEndpointsProtocol=http;AccountName=devstoreaccount1;" f"AccountKey=Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==;" @@ -1653,9 +1653,9 @@ class ClickHouseCluster: # Code coverage files will be placed in database directory # (affect only WITH_COVERAGE=1 build) - env_variables[ - "LLVM_PROFILE_FILE" - ] = "/var/lib/clickhouse/server_%h_%p_%m.profraw" + env_variables["LLVM_PROFILE_FILE"] = ( + "/var/lib/clickhouse/server_%h_%p_%m.profraw" + ) clickhouse_start_command = CLICKHOUSE_START_COMMAND if clickhouse_log_file: @@ -1668,9 +1668,9 @@ class ClickHouseCluster: cluster=self, base_path=self.base_dir, name=name, - base_config_dir=base_config_dir - if base_config_dir - else self.base_config_dir, + base_config_dir=( + base_config_dir if base_config_dir else self.base_config_dir + ), custom_main_configs=main_configs or [], custom_user_configs=user_configs or [], custom_dictionaries=dictionaries or [], diff --git a/tests/integration/test_disk_types/test.py b/tests/integration/test_disk_types/test.py index 1cc5048eb69..a5e2456ef4f 100644 --- a/tests/integration/test_disk_types/test.py +++ b/tests/integration/test_disk_types/test.py @@ -19,9 +19,9 @@ def cluster(): cluster = ClickHouseCluster(__file__) cluster.add_instance( "node", - main_configs=["configs/storage_arm.xml"] - if is_arm() - else ["configs/storage_amd.xml"], + main_configs=( + ["configs/storage_arm.xml"] if is_arm() else ["configs/storage_amd.xml"] + ), with_minio=True, with_hdfs=not is_arm(), ) diff --git a/tests/integration/test_distributed_default_database/test.py b/tests/integration/test_distributed_default_database/test.py index ef69533416b..7da9a368997 100644 --- a/tests/integration/test_distributed_default_database/test.py +++ b/tests/integration/test_distributed_default_database/test.py @@ -5,6 +5,7 @@ in this test we write into per-node tables and read from the distributed table. The default database in the distributed table definition is left empty on purpose to test default database deduction. """ + import pytest from helpers.client import QueryRuntimeException diff --git a/tests/integration/test_interserver_dns_retires/test.py b/tests/integration/test_interserver_dns_retires/test.py index f0c581e6450..7c81f278737 100644 --- a/tests/integration/test_interserver_dns_retires/test.py +++ b/tests/integration/test_interserver_dns_retires/test.py @@ -2,6 +2,7 @@ This test makes sure interserver cluster queries handle invalid DNS records for replicas. """ + from helpers.client import QueryRuntimeException from helpers.cluster import ClickHouseCluster, ClickHouseInstance diff --git a/tests/integration/test_storage_s3/test.py b/tests/integration/test_storage_s3/test.py index 9a0cb352088..40cbf4b44a6 100644 --- a/tests/integration/test_storage_s3/test.py +++ b/tests/integration/test_storage_s3/test.py @@ -197,7 +197,9 @@ def test_partition_by_string_column(started_cluster): started_cluster, bucket, "test_foo/bar.csv" ) assert '3,"йцук"\n' == get_s3_file_content(started_cluster, bucket, "test_йцук.csv") - assert '78,"你好"\n' == get_s3_file_content(started_cluster, bucket, "test_你好.csv") + assert '78,"你好"\n' == get_s3_file_content( + started_cluster, bucket, "test_你好.csv" + ) def test_partition_by_const_column(started_cluster): diff --git a/tests/integration/test_tcp_handler_http_responses/test_case.py b/tests/integration/test_tcp_handler_http_responses/test_case.py index 2fc53674ca4..98f4b74223e 100644 --- a/tests/integration/test_tcp_handler_http_responses/test_case.py +++ b/tests/integration/test_tcp_handler_http_responses/test_case.py @@ -1,4 +1,5 @@ """Test HTTP responses given by the TCP Handler.""" + from pathlib import Path import pytest from helpers.cluster import ClickHouseCluster diff --git a/tests/integration/test_tcp_handler_interserver_listen_host/test_case.py b/tests/integration/test_tcp_handler_interserver_listen_host/test_case.py index 62581996f3b..b20ab48795b 100644 --- a/tests/integration/test_tcp_handler_interserver_listen_host/test_case.py +++ b/tests/integration/test_tcp_handler_interserver_listen_host/test_case.py @@ -1,4 +1,5 @@ """Test Interserver responses on configured IP.""" + from pathlib import Path import pytest from helpers.cluster import ClickHouseCluster diff --git a/tests/queries/0_stateless/00411_long_accurate_number_comparison.python b/tests/queries/0_stateless/00411_long_accurate_number_comparison.python index 183a2637d36..045de9ee7ee 100644 --- a/tests/queries/0_stateless/00411_long_accurate_number_comparison.python +++ b/tests/queries/0_stateless/00411_long_accurate_number_comparison.python @@ -50,7 +50,7 @@ TYPES = { "UInt32": {"bits": 32, "sign": False, "float": False}, "Int32": {"bits": 32, "sign": True, "float": False}, "UInt64": {"bits": 64, "sign": False, "float": False}, - "Int64": {"bits": 64, "sign": True, "float": False} + "Int64": {"bits": 64, "sign": True, "float": False}, # "Float32" : { "bits" : 32, "sign" : True, "float" : True }, # "Float64" : { "bits" : 64, "sign" : True, "float" : True } } From 2cd1a39ff4f283122208bc5b2be5244a60e0bfcf Mon Sep 17 00:00:00 2001 From: Max K Date: Wed, 10 Jul 2024 21:01:49 +0200 Subject: [PATCH 153/182] style fixes Automatic style fix --- .github/workflows/create_release.yml | 2 +- tests/ci/artifactory.py | 5 +- tests/ci/create_release.py | 116 +++++++++------------------ 3 files changed, 42 insertions(+), 81 deletions(-) diff --git a/.github/workflows/create_release.yml b/.github/workflows/create_release.yml index 26adc3adff9..7879c28ab8d 100644 --- a/.github/workflows/create_release.yml +++ b/.github/workflows/create_release.yml @@ -49,7 +49,7 @@ jobs: run: | python3 ./tests/ci/create_release.py --prepare-release-info \ --ref ${{ inputs.ref }} --release-type ${{ inputs.type }} \ - --outfile ${{ env.RELEASE_INFO_FILE }} ${{ inputs.dry-run && '--dry-run' || '' }} + --outfile ${{ env.RELEASE_INFO_FILE }} ${{ inputs.dry-run && '--dry-run' || '' }} echo "::group::Release Info" python3 -m json.tool "$RELEASE_INFO_FILE" echo "::endgroup::" diff --git a/tests/ci/artifactory.py b/tests/ci/artifactory.py index 2e18316cb78..1a062d05a23 100644 --- a/tests/ci/artifactory.py +++ b/tests/ci/artifactory.py @@ -2,10 +2,9 @@ import argparse import time from pathlib import Path from typing import Optional - +from shutil import copy2 from create_release import PackageDownloader, ReleaseInfo, ShellRunner from ci_utils import WithIter -from shutil import copy2 class MountPointApp(metaclass=WithIter): @@ -201,7 +200,7 @@ class RpmArtifactory: print(f"Exporting RPM packages into [{codename}]") for command in commands: - print(f"Running command:") + print("Running command:") print(f" {command}") ShellRunner.run(command) diff --git a/tests/ci/create_release.py b/tests/ci/create_release.py index 65ab48865ef..83a51c66bce 100755 --- a/tests/ci/create_release.py +++ b/tests/ci/create_release.py @@ -9,7 +9,7 @@ from copy import copy from pathlib import Path from typing import Iterator, List -from git_helper import Git, GIT_PREFIX, Runner +from git_helper import Git, GIT_PREFIX from ssh import SSHAgent from env_helper import GITHUB_REPOSITORY, S3_BUILDS_BUCKET from s3_helper import S3Helper @@ -23,7 +23,6 @@ from version_helper import ( update_contributors, VersionType, ) -from git_helper import git_runner as runner from ci_config import CI CMAKE_PATH = get_abs_path(FILE_WITH_VERSION_PATH) @@ -49,6 +48,7 @@ class ShellRunner: stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, + check=True, ) if print_output: print(result.stdout) @@ -74,8 +74,7 @@ class ReleaseInfo: @staticmethod def prepare(commit_ref: str, release_type: str, outfile: str) -> None: - dir = Path(outfile).parent - dir.mkdir(parents=True, exist_ok=True) + Path(outfile).parent.mkdir(parents=True, exist_ok=True) Path(outfile).unlink(missing_ok=True) version = None release_branch = None @@ -88,7 +87,7 @@ class ReleaseInfo: f"git merge-base --is-ancestor origin/{commit_ref} origin/master" ) with checkout(commit_ref): - commit_sha = Runner().run(f"git rev-parse {commit_ref}") + _, commit_sha = ShellRunner.run(f"git rev-parse {commit_ref}") # Git() must be inside "with checkout" contextmanager git = Git() version = get_version_from_repo(git=git) @@ -104,7 +103,7 @@ class ReleaseInfo: ) # dummy value (artifactory won't be updated for new release) if release_type == "patch": with checkout(commit_ref): - commit_sha = Runner().run(f"git rev-parse {commit_ref}") + _, commit_sha = ShellRunner.run(f"git rev-parse {commit_ref}") # Git() must be inside "with checkout" contextmanager git = Git() version = get_version_from_repo(git=git) @@ -112,7 +111,7 @@ class ReleaseInfo: version.with_description(codename) release_branch = f"{version.major}.{version.minor}" release_tag = version.describe - runner.run(f"{GIT_PREFIX} fetch origin {release_branch} --tags") + ShellRunner.run(f"{GIT_PREFIX} fetch origin {release_branch} --tags") # check commit is right and on a right branch ShellRunner.run( f"git merge-base --is-ancestor {commit_ref} origin/{release_branch}" @@ -142,7 +141,7 @@ class ReleaseInfo: release_branch and commit_sha and release_tag - and version.string + and version and codename in ("lts", "stable") ) res = ReleaseInfo( @@ -166,20 +165,14 @@ class ReleaseInfo: f"Create and push release tag [{self.release_tag}], commit [{self.commit_sha}]" ) tag_message = f"Release {self.release_tag}" - runner.run( + ShellRunner.run( f"{GIT_PREFIX} tag -a -m '{tag_message}' {self.release_tag} {self.commit_sha}" ) cmd_push_tag = f"{GIT_PREFIX} push origin {self.release_tag}:{self.release_tag}" - if not dry_run: - # TODO: cannot push - workflow will start - # runner.run(cmd_commit_version_upd) - pass - else: - print("Dry run, would execute:") - print(f"* {cmd_push_tag}") + ShellRunner.run(cmd_push_tag, dry_run=dry_run) @staticmethod - def _create_gh_label(label: str, color_hex: str, dry_run: bool): + def _create_gh_label(label: str, color_hex: str, dry_run: bool) -> None: cmd = f"gh api repos/{GITHUB_REPOSITORY}/labels -f name={label} -f color={color_hex}" ShellRunner.run(cmd, dry_run=dry_run) @@ -247,29 +240,19 @@ class ReleaseInfo: with checkout_new(branch_upd_version_contributors): update_cmake_version(version) update_contributors(raise_error=True) - cmd_commit_version_upd = ( - f"{GIT_PREFIX} commit '{CMAKE_PATH}' '{CONTRIBUTORS_PATH}' -m 'Update autogenerated version to {self.version} and contributors'", - ) + cmd_commit_version_upd = f"{GIT_PREFIX} commit '{CMAKE_PATH}' '{CONTRIBUTORS_PATH}' -m 'Update autogenerated version to {self.version} and contributors'" cmd_push_branch = f"{GIT_PREFIX} push --set-upstream origin {branch_upd_version_contributors}" body_file = get_abs_path(".github/PULL_REQUEST_TEMPLATE.md") actor = os.getenv("GITHUB_ACTOR", "") or "me" cmd_create_pr = f"gh pr create --repo {GITHUB_REPOSITORY} --title 'Update version after release' --head {branch_upd_version_contributors} --base {self.release_branch} --body-file '{body_file} --label 'do not test' --assignee @{actor}" - if not dry_run: - runner.run(cmd_commit_version_upd) - runner.run(cmd_push_branch) - runner.run(cmd_create_pr) - else: - print("Dry run, would execute:") - print(f"* {cmd_commit_version_upd}") - print(f"* {cmd_push_branch}") - print(f"* {cmd_create_pr}") - print("Dry run, diff:") - print( - runner.run( - f"{GIT_PREFIX} diff '{CMAKE_PATH}' '{CONTRIBUTORS_PATH}'" - ) + ShellRunner.run(cmd_commit_version_upd, dry_run=dry_run) + ShellRunner.run(cmd_push_branch, dry_run=dry_run) + ShellRunner.run(cmd_create_pr, dry_run=dry_run) + if dry_run: + ShellRunner.run( + f"{GIT_PREFIX} diff '{CMAKE_PATH}' '{CONTRIBUTORS_PATH}'" ) - runner.run( + ShellRunner.run( f"{GIT_PREFIX} checkout '{CMAKE_PATH}' '{CONTRIBUTORS_PATH}'" ) @@ -330,7 +313,7 @@ class PackageDownloader: else: assert False, "BUG" - def __init__(self, release, commit_sha, version: str): + def __init__(self, release, commit_sha, version): assert version.startswith(release), "Invalid release branch or version" major, minor = map(int, release.split(".")) self.package_names = self.PACKAGES @@ -351,41 +334,20 @@ class PackageDownloader: for package_type in self.PACKAGE_TYPES: for package in self.package_names: - package_file_name = ( - package - + "_" - + self.version - + "_" - + self._get_arch_suffix(package_type, RepoTypes.DEBIAN) - + ".deb" - ) - self.deb_package_files.append(package_file_name) - self.file_to_type[package_file_name] = package_type + deb_package_file_name = f"{package}_{self.version}_{self._get_arch_suffix(package_type, RepoTypes.DEBIAN)}.deb" + self.deb_package_files.append(deb_package_file_name) + self.file_to_type[deb_package_file_name] = package_type - package_file_name = ( - package - + "-" - + self.version - + "." - + self._get_arch_suffix(package_type, RepoTypes.RPM) - + ".rpm" - ) - self.rpm_package_files.append(package_file_name) - self.file_to_type[package_file_name] = package_type + rpm_package_file_name = f"{package}-{self.version}.{self._get_arch_suffix(package_type, RepoTypes.RPM)}.rpm" + self.rpm_package_files.append(rpm_package_file_name) + self.file_to_type[rpm_package_file_name] = package_type - package_file_name = ( - package - + "-" - + self.version - + "-" - + self._get_arch_suffix(package_type, RepoTypes.TGZ) - + ".tgz" - ) - self.tgz_package_files.append(package_file_name) - self.file_to_type[package_file_name] = package_type - package_file_name += ".sha512" - self.tgz_package_files.append(package_file_name) - self.file_to_type[package_file_name] = package_type + tgz_package_file_name = f"{package}-{self.version}-{self._get_arch_suffix(package_type, RepoTypes.TGZ)}.tgz" + self.tgz_package_files.append(tgz_package_file_name) + self.file_to_type[tgz_package_file_name] = package_type + tgz_package_file_name += ".sha512" + self.tgz_package_files.append(tgz_package_file_name) + self.file_to_type[tgz_package_file_name] = package_type def get_deb_packages_files(self): return self.deb_package_files @@ -561,33 +523,33 @@ def parse_args() -> argparse.Namespace: @contextmanager def checkout(ref: str) -> Iterator[None]: - orig_ref = runner.run(f"{GIT_PREFIX} symbolic-ref --short HEAD") + _, orig_ref = ShellRunner.run(f"{GIT_PREFIX} symbolic-ref --short HEAD") rollback_cmd = f"{GIT_PREFIX} checkout {orig_ref}" assert orig_ref if ref not in (orig_ref,): - runner.run(f"{GIT_PREFIX} checkout {ref}") + ShellRunner.run(f"{GIT_PREFIX} checkout {ref}") try: yield except (Exception, KeyboardInterrupt) as e: print(f"ERROR: Exception [{e}]") - runner.run(rollback_cmd) + ShellRunner.run(rollback_cmd) raise - runner.run(rollback_cmd) + ShellRunner.run(rollback_cmd) @contextmanager def checkout_new(ref: str) -> Iterator[None]: - orig_ref = runner.run(f"{GIT_PREFIX} symbolic-ref --short HEAD") + _, orig_ref = ShellRunner.run(f"{GIT_PREFIX} symbolic-ref --short HEAD") rollback_cmd = f"{GIT_PREFIX} checkout {orig_ref}" assert orig_ref - runner.run(f"{GIT_PREFIX} checkout -b {ref}") + ShellRunner.run(f"{GIT_PREFIX} checkout -b {ref}") try: yield except (Exception, KeyboardInterrupt) as e: print(f"ERROR: Exception [{e}]") - runner.run(rollback_cmd) + ShellRunner.run(rollback_cmd) raise - runner.run(rollback_cmd) + ShellRunner.run(rollback_cmd) if __name__ == "__main__": From 9f1520ae219e39f445b0948f48a2b34a99abf568 Mon Sep 17 00:00:00 2001 From: Max K Date: Wed, 10 Jul 2024 21:28:08 +0200 Subject: [PATCH 154/182] do not autofix if not only black failed --- tests/ci/style_check.py | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/tests/ci/style_check.py b/tests/ci/style_check.py index 9deae06d9f4..6b58ecece8d 100644 --- a/tests/ci/style_check.py +++ b/tests/ci/style_check.py @@ -192,15 +192,6 @@ def main(): future = executor.submit(subprocess.run, cmd_shell, shell=True) _ = future.result() - autofix_description = "" - if args.push: - try: - commit_push_staged(pr_info) - except subprocess.SubprocessError: - # do not fail the whole script if the autofix didn't work out - logging.error("Unable to push the autofix. Continue.") - autofix_description = "Failed to push autofix to the PR. " - subprocess.check_call( f"python3 ../../utils/check-style/process_style_check_result.py --in-results-dir {temp_path} " f"--out-results-file {temp_path}/test_results.tsv --out-status-file {temp_path}/check_status.tsv || " @@ -210,6 +201,21 @@ def main(): state, description, test_results, additional_files = process_result(temp_path) + autofix_description = "" + fail_cnt = 0 + for result in test_results: + if result.status == FAILURE: + # do not autofix if not only back failed + fail_cnt += 1 + + if args.push and fail_cnt == 1: + try: + commit_push_staged(pr_info) + except subprocess.SubprocessError: + # do not fail the whole script if the autofix didn't work out + logging.error("Unable to push the autofix. Continue.") + autofix_description = "Failed to push autofix to the PR. " + JobReport( description=f"{autofix_description}{description}", test_results=test_results, From 578d22ae94ea1704819e41ca1f0f7c8e58d92e32 Mon Sep 17 00:00:00 2001 From: Max K Date: Thu, 11 Jul 2024 09:24:25 +0200 Subject: [PATCH 155/182] style Automatic style fix Automatic style fix --- .github/actionlint.yml | 1 + .github/workflows/create_release.yml | 4 ++-- pyproject.toml | 2 ++ tests/ci/create_release.py | 6 +++--- tests/ci/docker_server.py | 4 ++-- tests/ci/style_check.py | 14 +++++++++++--- tests/clickhouse-test | 6 +++--- 7 files changed, 24 insertions(+), 13 deletions(-) diff --git a/.github/actionlint.yml b/.github/actionlint.yml index 0f88f30d42c..4357bd3eb6b 100644 --- a/.github/actionlint.yml +++ b/.github/actionlint.yml @@ -7,3 +7,4 @@ self-hosted-runner: - stress-tester - style-checker - style-checker-aarch64 + - release-maker diff --git a/.github/workflows/create_release.yml b/.github/workflows/create_release.yml index 7879c28ab8d..972aff90195 100644 --- a/.github/workflows/create_release.yml +++ b/.github/workflows/create_release.yml @@ -89,8 +89,8 @@ jobs: --volume=".:/ClickHouse" clickhouse/style-test \ /ClickHouse/tests/ci/changelog.py -v --debug-helpers \ --gh-user-or-token="$GH_TOKEN" --jobs=5 \ - --output="/ClickHouse/docs/changelogs/${RELEASE_TAG}.md" "${RELEASE_TAG}" - git add ./docs/changelogs/${RELEASE_TAG}.md + --output="/ClickHouse/docs/changelogs/${{ env.RELEASE_TAG }}.md" ${{ env.RELEASE_TAG }} + git add ./docs/changelogs/${{ env.RELEASE_TAG }}.md echo "Generate Security" python3 ./utils/security-generator/generate_security.py > SECURITY.md git diff HEAD diff --git a/pyproject.toml b/pyproject.toml index 39511e1a0d3..c89d46c0929 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -17,6 +17,8 @@ src_paths = ["src", "tests/ci", "tests/sqllogic"] [tool.pylint.'MESSAGES CONTROL'] # pytest.mark.parametrize is not callable (not-callable) disable = ''' + pointless-string-statement, + line-too-long, missing-docstring, too-few-public-methods, invalid-name, diff --git a/tests/ci/create_release.py b/tests/ci/create_release.py index 83a51c66bce..7f4cf8c787a 100755 --- a/tests/ci/create_release.py +++ b/tests/ci/create_release.py @@ -40,7 +40,7 @@ class ShellRunner: return 0, "" print(f"Running shell command: [{command}]") if async_: - subprocess.Popen(command.split(" ")) + subprocess.Popen(command.split(" ")) # pylint:disable=consider-using-with return 0, "" result = subprocess.run( command + " 2>&1", @@ -316,9 +316,9 @@ class PackageDownloader: def __init__(self, release, commit_sha, version): assert version.startswith(release), "Invalid release branch or version" major, minor = map(int, release.split(".")) - self.package_names = self.PACKAGES + self.package_names = list(self.PACKAGES) if major > 24 or (major == 24 and minor > 3): - self.package_names += self.EXTRA_PACKAGES + self.package_names += list(self.EXTRA_PACKAGES) self.release = release self.commit_sha = commit_sha self.version = version diff --git a/tests/ci/docker_server.py b/tests/ci/docker_server.py index 2f556e3ed57..21fc02ce02a 100644 --- a/tests/ci/docker_server.py +++ b/tests/ci/docker_server.py @@ -376,10 +376,10 @@ def main(): repo_urls = {} direct_urls: Dict[str, List[str]] = {} if pr_info.event_type == EventType.PULL_REQUEST: - release_or_pr = pr_info.number + release_or_pr = str(pr_info.number) sha = pr_info.sha elif pr_info.event_type == EventType.PUSH and pr_info.is_master: - release_or_pr = 0 + release_or_pr = str(0) sha = pr_info.sha else: release_or_pr = f"{args.version.major}.{args.version.minor}" diff --git a/tests/ci/style_check.py b/tests/ci/style_check.py index 6b58ecece8d..36620d44a2d 100644 --- a/tests/ci/style_check.py +++ b/tests/ci/style_check.py @@ -16,7 +16,15 @@ from docker_images_helper import get_docker_image, pull_image from env_helper import IS_CI, REPO_COPY, TEMP_PATH, GITHUB_EVENT_PATH from git_helper import GIT_PREFIX, git_runner from pr_info import PRInfo -from report import ERROR, FAILURE, SUCCESS, JobReport, TestResults, read_test_results +from report import ( + ERROR, + FAILURE, + SUCCESS, + JobReport, + TestResults, + read_test_results, + FAIL, +) from ssh import SSHKey from stopwatch import Stopwatch @@ -204,8 +212,8 @@ def main(): autofix_description = "" fail_cnt = 0 for result in test_results: - if result.status == FAILURE: - # do not autofix if not only back failed + if result.status in (FAILURE, FAIL): + # do not autofix if not only black failed fail_cnt += 1 if args.push and fail_cnt == 1: diff --git a/tests/clickhouse-test b/tests/clickhouse-test index 0cf46732354..90fb9611151 100755 --- a/tests/clickhouse-test +++ b/tests/clickhouse-test @@ -711,9 +711,9 @@ def get_localzone(): class SettingsRandomizer: settings = { - "max_insert_threads": lambda: 12 - if random.random() < 0.03 - else random.randint(1, 3), + "max_insert_threads": lambda: ( + 12 if random.random() < 0.03 else random.randint(1, 3) + ), "group_by_two_level_threshold": threshold_generator(0.2, 0.2, 1, 1000000), "group_by_two_level_threshold_bytes": threshold_generator( 0.2, 0.2, 1, 50000000 From d99c56ae8cf2ddcc8e404b20eee4885c71a43fb1 Mon Sep 17 00:00:00 2001 From: Max K Date: Sat, 13 Jul 2024 16:13:56 +0200 Subject: [PATCH 156/182] CI: Autoskip all non-affected jobs in PRs --- .github/workflows/pull_request.yml | 2 +- tests/ci/ci.py | 19 ++++- tests/ci/ci_cache.py | 107 +++++++++++++------------- tests/ci/ci_config.py | 11 ++- tests/ci/ci_definitions.py | 6 +- tests/ci/report.py | 1 + tests/ci/test_ci_config.py | 119 +++++++++++++++-------------- 7 files changed, 146 insertions(+), 119 deletions(-) diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml index 259e6d41110..c9f4f858825 100644 --- a/.github/workflows/pull_request.yml +++ b/.github/workflows/pull_request.yml @@ -172,7 +172,7 @@ jobs: ################################# Stage Final ################################# # FinishCheck: - if: ${{ !failure() }} + if: ${{ !failure() && !cancelled() }} needs: [RunConfig, BuildDockers, StyleCheck, FastTest, Builds_1, Builds_2, Builds_Report, Tests_1, Tests_2, Tests_3] runs-on: [self-hosted, style-checker-aarch64] steps: diff --git a/tests/ci/ci.py b/tests/ci/ci.py index 53bbf588e1b..9065e2798c8 100644 --- a/tests/ci/ci.py +++ b/tests/ci/ci.py @@ -996,7 +996,7 @@ def main() -> int: args.skip_jobs, ) - if IS_CI and pr_info.is_pr: + if IS_CI and pr_info.is_pr and not ci_settings.no_ci_cache: ci_cache.filter_out_not_affected_jobs() ci_cache.print_status() @@ -1086,6 +1086,16 @@ def main() -> int: print(status) print("::endgroup::") previous_status = status.state + print("Create dummy job report with job_skipped flag") + JobReport( + status=status.state, + description="", + test_results=[], + start_time="", + duration=0.0, + additional_files=[], + job_skipped=True, + ).dump() # ci cache check if not previous_status and not ci_settings.no_ci_cache: @@ -1136,7 +1146,7 @@ def main() -> int: has_oom_error = True job_report = JobReport.load() if JobReport.exist() else None - if job_report: + if job_report and not job_report.job_skipped: ch_helper = ClickHouseHelper() check_url = "" @@ -1242,6 +1252,9 @@ def main() -> int: description = "ERROR: Out Of Memory" else: description = "ERROR: Unknown job status" + print( + f"No job report for {[args.job_name]} - post status [{description}]" + ) gh = GitHub(get_best_robot_token(), per_page=100) commit = get_commit(gh, pr_info.sha) post_commit_status( @@ -1249,7 +1262,7 @@ def main() -> int: ERROR, "", description, - job_report.check_name or _get_ext_check_name(args.job_name), + _get_ext_check_name(args.job_name), pr_info, dump_to_file=True, ) diff --git a/tests/ci/ci_cache.py b/tests/ci/ci_cache.py index bc6761959b4..ae3b8f9e9a4 100644 --- a/tests/ci/ci_cache.py +++ b/tests/ci/ci_cache.py @@ -520,6 +520,35 @@ class CiCache: self.RecordType.SUCCESSFUL, job, batch, num_batches, release_branch ) + def has_evidence(self, job: str, job_config: CI.JobConfig) -> bool: + """ + checks if the job has been seen in master/release CI + function is to be used to check if change did not affect the job + :param job_config: + :param job: + :return: + """ + return ( + self.is_successful( + job=job, + batch=0, + num_batches=job_config.num_batches, + release_branch=not job_config.pr_only, + ) + or self.is_pending( + job=job, + batch=0, + num_batches=job_config.num_batches, + release_branch=not job_config.pr_only, + ) + or self.is_failed( + job=job, + batch=0, + num_batches=job_config.num_batches, + release_branch=not job_config.pr_only, + ) + ) + def is_failed( self, job: str, batch: int, num_batches: int, release_branch: bool ) -> bool: @@ -677,74 +706,46 @@ class CiCache: def filter_out_not_affected_jobs(self): """ Filter is to be applied in PRs to remove jobs that are not affected by the change - It removes jobs from @jobs_to_do if it is a: - 1. test job and it is in @jobs_to_wait (no need to wait not affected jobs in PRs) - 2. test job and it has finished on release branch (even if failed) - 3. build job which is not required by any test job that is left in @jobs_to_do - :return: """ - # 1. - remove_from_await_list = [] - for job_name, job_config in self.jobs_to_wait.items(): - if CI.is_test_job(job_name) and job_name != CI.JobNames.BUILD_CHECK: - remove_from_await_list.append(job_name) - for job in remove_from_await_list: - print(f"Filter job [{job}] - test job and not affected by the change") - del self.jobs_to_wait[job] - del self.jobs_to_do[job] - - # 2. remove_from_to_do = [] + required_builds = [] for job_name, job_config in self.jobs_to_do.items(): if CI.is_test_job(job_name) and job_name != CI.JobNames.BUILD_CHECK: - batches_to_remove = [] - assert job_config.batches is not None - for batch in job_config.batches: - if self.is_failed( - job_name, batch, job_config.num_batches, release_branch=True - ): - print( - f"Filter [{job_name}/{batch}] - not affected by the change (failed on release branch)" - ) - batches_to_remove.append(batch) - for batch in batches_to_remove: - job_config.batches.remove(batch) - if not job_config.batches: - print( - f"Filter [{job_name}] - not affected by the change (failed on release branch)" - ) + if job_config.reference_job_name: + reference_name = job_config.reference_job_name + reference_config = self.jobs_to_do[reference_name] + else: + reference_name = job_name + reference_config = job_config + if self.has_evidence( + job=reference_name, + job_config=reference_config, + ): remove_from_to_do.append(job_name) - for job in remove_from_to_do: - del self.jobs_to_do[job] + else: + required_builds += ( + job_config.required_builds if job_config.required_builds else [] + ) - # 3. - required_builds = [] # type: List[str] - for job_name, job_config in self.jobs_to_do.items(): - if CI.is_test_job(job_name) and job_config.required_builds: - required_builds += job_config.required_builds - required_builds = list(set(required_builds)) - - remove_builds = [] # type: List[str] has_builds_to_do = False for job_name, job_config in self.jobs_to_do.items(): if CI.is_build_job(job_name): if job_name not in required_builds: - remove_builds.append(job_name) + remove_from_to_do.append(job_name) else: has_builds_to_do = True - for build_job in remove_builds: - print( - f"Filter build job [{build_job}] - not affected and not required by test jobs" - ) - del self.jobs_to_do[build_job] - if build_job in self.jobs_to_wait: - del self.jobs_to_wait[build_job] + if not has_builds_to_do: + remove_from_to_do.append(CI.JobNames.BUILD_CHECK) - if not has_builds_to_do and CI.JobNames.BUILD_CHECK in self.jobs_to_do: - print(f"Filter job [{CI.JobNames.BUILD_CHECK}] - no builds to do") - del self.jobs_to_do[CI.JobNames.BUILD_CHECK] + for job in remove_from_to_do: + print(f"Filter job [{job}] - not affected by the change") + if job in self.jobs_to_do: + del self.jobs_to_do[job] + if job in self.jobs_to_wait: + del self.jobs_to_wait[job] + self.jobs_to_skip.append(job) def await_pending_jobs(self, is_release: bool, dry_run: bool = False) -> None: """ diff --git a/tests/ci/ci_config.py b/tests/ci/ci_config.py index d9f8e7d3afd..9b9ddee5326 100644 --- a/tests/ci/ci_config.py +++ b/tests/ci/ci_config.py @@ -413,7 +413,9 @@ class CI: release_only=True, ), JobNames.INTEGRATION_TEST_FLAKY: CommonJobConfigs.INTEGRATION_TEST.with_properties( - required_builds=[BuildNames.PACKAGE_ASAN], pr_only=True + required_builds=[BuildNames.PACKAGE_ASAN], + pr_only=True, + reference_job_name=JobNames.INTEGRATION_TEST_TSAN, ), JobNames.COMPATIBILITY_TEST: CommonJobConfigs.COMPATIBILITY_TEST.with_properties( required_builds=[BuildNames.PACKAGE_RELEASE], @@ -455,7 +457,10 @@ class CI: required_builds=[BuildNames.PACKAGE_UBSAN], ), JobNames.STATELESS_TEST_FLAKY_ASAN: CommonJobConfigs.STATELESS_TEST.with_properties( - required_builds=[BuildNames.PACKAGE_ASAN], pr_only=True, timeout=3600 + required_builds=[BuildNames.PACKAGE_ASAN], + pr_only=True, + timeout=3600, + reference_job_name=JobNames.STATELESS_TEST_RELEASE, ), JobNames.JEPSEN_KEEPER: JobConfig( required_builds=[BuildNames.BINARY_RELEASE], @@ -640,7 +645,7 @@ class CI: @classmethod def is_test_job(cls, job: str) -> bool: - return not cls.is_build_job(job) and job != cls.JobNames.STYLE_CHECK + return not cls.is_build_job(job) @classmethod def is_docs_job(cls, job: str) -> bool: diff --git a/tests/ci/ci_definitions.py b/tests/ci/ci_definitions.py index d2da73f4e46..acd9b7fa904 100644 --- a/tests/ci/ci_definitions.py +++ b/tests/ci/ci_definitions.py @@ -284,8 +284,12 @@ class JobConfig: # GH Runner type (tag from @Runners) runner_type: str - # used for config validation in ci unittests + # used in ci unittests for config validation job_name_keyword: str = "" + # name of another job that (if provided) should be used to check if job was affected by the change or not (in CiCache.has_evidence(job=@reference_job_name) call) + # for example: "Stateless flaky check" can use reference_job_name="Stateless tests (release)". "Stateless flaky check" does not run on master + # and there cannot be an evidence for it, so instead "Stateless tests (release)" job name can be used to check the evidence + reference_job_name: str = "" # builds required for the job (applicable for test jobs) required_builds: Optional[List[str]] = None # build config for the build job (applicable for builds) diff --git a/tests/ci/report.py b/tests/ci/report.py index bdaa2e15130..039c21e9c9b 100644 --- a/tests/ci/report.py +++ b/tests/ci/report.py @@ -296,6 +296,7 @@ class JobReport: build_dir_for_upload: Union[Path, str] = "" # if False no GH commit status will be created by CI need_commit_status: bool = True + job_skipped: bool = False def __post_init__(self): assert self.status in (SUCCESS, ERROR, FAILURE, PENDING) diff --git a/tests/ci/test_ci_config.py b/tests/ci/test_ci_config.py index 558faca915e..e12a67bfc92 100644 --- a/tests/ci/test_ci_config.py +++ b/tests/ci/test_ci_config.py @@ -1,6 +1,8 @@ #!/usr/bin/env python3 import unittest +import random + from ci_config import CI import ci as CIPY from ci_settings import CiSettings @@ -57,6 +59,18 @@ class TestCIConfig(unittest.TestCase): f"Job [{job}] apparently uses wrong common config with job keyword [{CI.JOB_CONFIGS[job].job_name_keyword}]", ) + def test_job_config_has_proper_values(self): + for job in CI.JobNames: + if CI.JOB_CONFIGS[job].reference_job_name: + reference_job_config = CI.JOB_CONFIGS[ + CI.JOB_CONFIGS[job].reference_job_name + ] + # reference job must run in all workflows and has digest + self.assertTrue(reference_job_config.pr_only == False) + self.assertTrue(reference_job_config.release_only == False) + self.assertTrue(reference_job_config.run_always == False) + self.assertTrue(reference_job_config.digest != CI.DigestConfig()) + def test_required_checks(self): for job in CI.REQUIRED_CHECKS: if job in (CI.StatusNames.PR_CHECK, CI.StatusNames.SYNC): @@ -497,79 +511,68 @@ class TestCIConfig(unittest.TestCase): settings = CiSettings() settings.no_ci_cache = True pr_info = PRInfo(github_event=_TEST_EVENT_JSON) - pr_info.event_type = EventType.PUSH - pr_info.number = 0 - assert pr_info.is_release and not pr_info.is_merge_queue + pr_info.event_type = EventType.PULL_REQUEST + pr_info.number = 123 + assert pr_info.is_pr ci_cache = CIPY._configure_jobs( S3Helper(), pr_info, settings, skip_jobs=False, dry_run=True ) self.assertTrue(not ci_cache.jobs_to_skip, "Must be no jobs in skip list") - all_jobs_in_wf = list(ci_cache.jobs_to_do) assert not ci_cache.jobs_to_wait assert not ci_cache.jobs_to_skip + MOCK_AFFECTED_JOBS = [ + CI.JobNames.STATELESS_TEST_S3_DEBUG, + CI.JobNames.STRESS_TEST_TSAN, + ] + MOCK_REQUIRED_BUILDS = [] + # pretend there are pending jobs that we need to wait for job, job_config in ci_cache.jobs_to_do.items(): - ci_cache.jobs_to_wait[job] = job_config + if job in MOCK_AFFECTED_JOBS: + MOCK_REQUIRED_BUILDS += job_config.required_builds + elif job not in MOCK_AFFECTED_JOBS: + ci_cache.jobs_to_wait[job] = job_config - # remove couple tests from to_wait and - # expect they are preserved in @jobs_to_to along with required package_asan - del ci_cache.jobs_to_wait[CI.JobNames.STATELESS_TEST_ASAN] - del ci_cache.jobs_to_wait[CI.JobNames.INTEGRATION_TEST_TSAN] - del ci_cache.jobs_to_wait[CI.JobNames.STATELESS_TEST_MSAN] - - # pretend we have some batches failed for one of the job from the to_do list - failed_job = CI.JobNames.INTEGRATION_TEST_TSAN - failed_job_config = ci_cache.jobs_to_do[failed_job] - FAILED_BATCHES = [0, 3] - for batch in FAILED_BATCHES: - assert batch < failed_job_config.num_batches - record = CiCache.Record( - record_type=CiCache.RecordType.FAILED, - job_name=failed_job, - job_digest=ci_cache.job_digests[failed_job], - batch=batch, - num_batches=failed_job_config.num_batches, - release_branch=True, - ) - for record_t_, records_ in ci_cache.records.items(): - if record_t_.value == CiCache.RecordType.FAILED.value: - records_[record.to_str_key()] = record - - # pretend we have all batches failed for one of the job from the to_do list - failed_job = CI.JobNames.STATELESS_TEST_MSAN - failed_job_config = ci_cache.jobs_to_do[failed_job] - assert failed_job_config.num_batches > 1 - for batch in range(failed_job_config.num_batches): - record = CiCache.Record( - record_type=CiCache.RecordType.FAILED, - job_name=failed_job, - job_digest=ci_cache.job_digests[failed_job], - batch=batch, - num_batches=failed_job_config.num_batches, - release_branch=True, - ) - for record_t_, records_ in ci_cache.records.items(): - if record_t_.value == CiCache.RecordType.FAILED.value: - records_[record.to_str_key()] = record + for job, job_config in ci_cache.jobs_to_do.items(): + if job_config.reference_job_name: + # jobs with reference_job_name in config are not supposed to have records in the cache - continue + continue + if job in MOCK_AFFECTED_JOBS: + continue + for batch in range(job_config.num_batches): + # add any record into cache + record = CiCache.Record( + record_type=random.choice( + [ + CiCache.RecordType.FAILED, + CiCache.RecordType.PENDING, + CiCache.RecordType.SUCCESSFUL, + ] + ), + job_name=job, + job_digest=ci_cache.job_digests[job], + batch=batch, + num_batches=job_config.num_batches, + release_branch=True, + ) + for record_t_, records_ in ci_cache.records.items(): + if record_t_.value == CiCache.RecordType.FAILED.value: + records_[record.to_str_key()] = record ci_cache.filter_out_not_affected_jobs() - expected_to_do = [ - CI.JobNames.STATELESS_TEST_ASAN, - CI.BuildNames.PACKAGE_ASAN, - CI.JobNames.INTEGRATION_TEST_TSAN, - CI.BuildNames.PACKAGE_TSAN, - CI.JobNames.BUILD_CHECK, - ] + expected_to_do = ( + [ + CI.JobNames.BUILD_CHECK, + ] + + MOCK_AFFECTED_JOBS + + MOCK_REQUIRED_BUILDS + ) self.assertCountEqual( list(ci_cache.jobs_to_wait), [ - CI.BuildNames.PACKAGE_ASAN, - CI.BuildNames.PACKAGE_TSAN, CI.JobNames.BUILD_CHECK, - ], + ] + + MOCK_REQUIRED_BUILDS, ) self.assertCountEqual(list(ci_cache.jobs_to_do), expected_to_do) - self.assertTrue(ci_cache.jobs_to_do[CI.JobNames.INTEGRATION_TEST_TSAN].batches) - for batch in ci_cache.jobs_to_do[CI.JobNames.INTEGRATION_TEST_TSAN].batches: - self.assertTrue(batch not in FAILED_BATCHES) From 162d875aee7ee8f9b2c055b8506c1e6467ad2d77 Mon Sep 17 00:00:00 2001 From: Max K Date: Sat, 13 Jul 2024 18:28:13 +0200 Subject: [PATCH 157/182] persistent job report path --- tests/ci/report.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/ci/report.py b/tests/ci/report.py index 039c21e9c9b..5ca911f638f 100644 --- a/tests/ci/report.py +++ b/tests/ci/report.py @@ -23,7 +23,7 @@ from typing import ( from build_download_helper import get_gh_api from ci_config import CI from ci_utils import normalize_string -from env_helper import REPORT_PATH, TEMP_PATH +from env_helper import REPORT_PATH, GITHUB_WORKSPACE logger = logging.getLogger(__name__) @@ -244,7 +244,8 @@ HTML_TEST_PART = """ """ BASE_HEADERS = ["Test name", "Test status"] -JOB_REPORT_FILE = Path(TEMP_PATH) / "job_report.json" +# should not be in TEMP directory or any directory that may be cleaned during the job execution +JOB_REPORT_FILE = Path(GITHUB_WORKSPACE) / "job_report.json" @dataclass From f9b1f2498a2c5044f6b8cc0859abaf66a87034a0 Mon Sep 17 00:00:00 2001 From: Max K Date: Sat, 13 Jul 2024 18:49:23 +0200 Subject: [PATCH 158/182] fix --- tests/ci/ci.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/ci/ci.py b/tests/ci/ci.py index 9065e2798c8..5e13def95f1 100644 --- a/tests/ci/ci.py +++ b/tests/ci/ci.py @@ -1246,6 +1246,8 @@ def main() -> int: indata["build"], ch_helper, ) + elif job_report.job_skipped: + print(f"Skipped after rerun check {[args.job_name]} - do nothing") else: if CI.is_test_job(args.job_name): if has_oom_error: From 96d3c311900fcb089cfccef965847634bd23de4a Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 13 Jul 2024 18:51:00 +0200 Subject: [PATCH 159/182] Update play.html --- programs/server/play.html | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/programs/server/play.html b/programs/server/play.html index b5bcc687c27..9590a65524c 100644 --- a/programs/server/play.html +++ b/programs/server/play.html @@ -639,7 +639,7 @@ renderChart(json); } else { renderUnparsedResult(response); - stats.innerText = `Elapsed: ${(elapsed_msec/1000).toFixed(3)} sec.`; + stats.innerText = `Elapsed (client-side): ${(elapsed_msec / 1000).toFixed(3)} sec.`; } document.getElementById('check-mark').style.display = 'inline'; } else { From a1229c1ebbb1a783cf75914cc4dd7e1eb002bb6f Mon Sep 17 00:00:00 2001 From: Shaun Struwig <41984034+Blargian@users.noreply.github.com> Date: Sat, 13 Jul 2024 20:37:13 +0200 Subject: [PATCH 160/182] Update install.md Production builds will not run on older ARM CPUs --- docs/en/getting-started/install.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/getting-started/install.md b/docs/en/getting-started/install.md index 98e73dec451..6209ef3c8ee 100644 --- a/docs/en/getting-started/install.md +++ b/docs/en/getting-started/install.md @@ -15,7 +15,7 @@ You have four options for getting up and running with ClickHouse: - **[ClickHouse Cloud](https://clickhouse.com/cloud/):** The official ClickHouse as a service, - built by, maintained and supported by the creators of ClickHouse - **[Quick Install](#quick-install):** an easy-to-download binary for testing and developing with ClickHouse -- **[Production Deployments](#available-installation-options):** ClickHouse can run on any Linux, FreeBSD, or macOS with x86-64, ARM, or PowerPC64LE CPU architecture +- **[Production Deployments](#available-installation-options):** ClickHouse can run on any Linux, FreeBSD, or macOS with x86-64, modern ARM (ARMv8.2-A up), or PowerPC64LE CPU architecture - **[Docker Image](https://hub.docker.com/r/clickhouse/clickhouse-server/):** use the official Docker image in Docker Hub ## ClickHouse Cloud From 09b753ecc59e4e8bec78f63faac97e89df4c6c80 Mon Sep 17 00:00:00 2001 From: Max K Date: Sat, 13 Jul 2024 20:15:47 +0200 Subject: [PATCH 161/182] CI: Check job's exit status and report if killed --- tests/ci/ci.py | 52 +++++++++++++++++++++++--------------------- tests/ci/ci_utils.py | 12 ++++++++++ tests/ci/report.py | 26 ++++++++++++++++++++++ 3 files changed, 65 insertions(+), 25 deletions(-) diff --git a/tests/ci/ci.py b/tests/ci/ci.py index 5e13def95f1..f99a5dad92f 100644 --- a/tests/ci/ci.py +++ b/tests/ci/ci.py @@ -15,7 +15,7 @@ import upload_result_helper from build_check import get_release_or_pr from ci_config import CI from ci_metadata import CiMetadata -from ci_utils import GHActions, normalize_string, Shell +from ci_utils import GHActions, normalize_string, Utils from clickhouse_helper import ( CiLogsCredentials, ClickHouseHelper, @@ -264,7 +264,7 @@ def check_missing_images_on_dockerhub( def _pre_action(s3, indata, pr_info): print("Clear dmesg") - Shell.run("sudo dmesg --clear ||:") + Utils.clear_dmesg() CommitStatusData.cleanup() JobReport.cleanup() BuildResult.cleanup() @@ -1035,6 +1035,7 @@ def main() -> int: elif args.pre: assert indata, "Run config must be provided via --infile" _pre_action(s3, indata, pr_info) + JobReport.create_pre_report().dump() ### RUN action: start elif args.run: @@ -1131,22 +1132,22 @@ def main() -> int: exit_code = 1 else: exit_code = _run_test(check_name, args.run_command) + job_report = JobReport.load() if JobReport.exist() else None + assert ( + job_report + ), "BUG. There must be job report either real report, or pre-report if job was killed" + job_report.exit_code = exit_code + job_report.dump() ### RUN action: end ### POST action: start elif args.post: - has_oom_error = False - if Shell.check( - "sudo dmesg -T | grep -q -e 'Out of memory: Killed process' -e 'oom_reaper: reaped process' -e 'oom-kill:constraint=CONSTRAINT_NONE'" - ): - print("WARNING: OOM while job execution") - CIBuddy(dry_run=not pr_info.is_release).post_error( - "Out Of Memory", job_name=_get_ext_check_name(args.job_name) - ) - has_oom_error = True - job_report = JobReport.load() if JobReport.exist() else None - if job_report and not job_report.job_skipped: + assert ( + job_report + ), "BUG. There must be job report either real report, or pre-report if job was killed" + if not job_report.job_skipped and not job_report.pre_report: + # it's a real job report ch_helper = ClickHouseHelper() check_url = "" @@ -1248,29 +1249,30 @@ def main() -> int: ) elif job_report.job_skipped: print(f"Skipped after rerun check {[args.job_name]} - do nothing") - else: + elif job_report.job_skipped: + print(f"Job was skipped {[args.job_name]} - do nothing") + elif job_report.pre_report: + print(f"ERROR: Job was killed - generate evidence") + job_report.update_duration() + # Job was killed! + if Utils.is_killed_with_oom(): + print("WARNING: OOM while job execution") + error = f"Out Of Memory, exit_code {job_report.exit_code}, after {job_report.duration}s" + else: + error = f"Unknown, exit_code {job_report.exit_code}, after {job_report.duration}s" + CIBuddy().post_error(error, job_name=_get_ext_check_name(args.job_name)) if CI.is_test_job(args.job_name): - if has_oom_error: - description = "ERROR: Out Of Memory" - else: - description = "ERROR: Unknown job status" - print( - f"No job report for {[args.job_name]} - post status [{description}]" - ) gh = GitHub(get_best_robot_token(), per_page=100) commit = get_commit(gh, pr_info.sha) post_commit_status( commit, ERROR, "", - description, + "Error: " + error, _get_ext_check_name(args.job_name), pr_info, dump_to_file=True, ) - else: - # no job report - print(f"No job report for {[args.job_name]} - do nothing") ### POST action: end ### MARK SUCCESS action: start diff --git a/tests/ci/ci_utils.py b/tests/ci/ci_utils.py index abc4a88989d..44bd37fe260 100644 --- a/tests/ci/ci_utils.py +++ b/tests/ci/ci_utils.py @@ -96,3 +96,15 @@ class Utils: if match: return int(match.group(1)) return None + + @staticmethod + def is_killed_with_oom(): + if Shell.check( + "sudo dmesg -T | grep -q -e 'Out of memory: Killed process' -e 'oom_reaper: reaped process' -e 'oom-kill:constraint=CONSTRAINT_NONE'" + ): + return True + return False + + @staticmethod + def clear_dmesg(): + Shell.run("sudo dmesg --clear ||:") diff --git a/tests/ci/report.py b/tests/ci/report.py index 5ca911f638f..4be7b438f4f 100644 --- a/tests/ci/report.py +++ b/tests/ci/report.py @@ -297,7 +297,33 @@ class JobReport: build_dir_for_upload: Union[Path, str] = "" # if False no GH commit status will be created by CI need_commit_status: bool = True + # indicates that this is not real job report but report for the job that was skipped by rerun check job_skipped: bool = False + # indicates that report generated by CI script in order to check later if job was killed before real report is generated + pre_report: bool = False + exit_code: int = -1 + + @staticmethod + def create_pre_report() -> "JobReport": + return JobReport( + status=ERROR, + description="", + test_results=[], + start_time=datetime.datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S"), + duration=0.0, + additional_files=[], + pre_report=True, + ) + + def update_duration(self): + if not self.start_time: + self.duration = 0.0 + else: + start_time = datetime.datetime.strptime( + self.start_time, "%Y-%m-%d %H:%M:%S" + ) + current_time = datetime.datetime.utcnow() + self.duration = (current_time - start_time).total_seconds() def __post_init__(self): assert self.status in (SUCCESS, ERROR, FAILURE, PENDING) From 707994b876145b93bcd57e204f1b2d449496e998 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 13 Jul 2024 21:48:53 +0200 Subject: [PATCH 162/182] Add a test for #37557 --- .../03204_distributed_with_scalar_subquery.reference | 0 .../03204_distributed_with_scalar_subquery.sql | 10 ++++++++++ 2 files changed, 10 insertions(+) create mode 100644 tests/queries/0_stateless/03204_distributed_with_scalar_subquery.reference create mode 100644 tests/queries/0_stateless/03204_distributed_with_scalar_subquery.sql diff --git a/tests/queries/0_stateless/03204_distributed_with_scalar_subquery.reference b/tests/queries/0_stateless/03204_distributed_with_scalar_subquery.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/03204_distributed_with_scalar_subquery.sql b/tests/queries/0_stateless/03204_distributed_with_scalar_subquery.sql new file mode 100644 index 00000000000..0a07ce48268 --- /dev/null +++ b/tests/queries/0_stateless/03204_distributed_with_scalar_subquery.sql @@ -0,0 +1,10 @@ +DROP TABLE IF EXISTS t_c3oollc8r; +CREATE TABLE t_c3oollc8r (c_k37 Int32, c_y String, c_bou Int32, c_g1 Int32, c_lfntfzg Int32, c_kntw50q Int32) ENGINE = MergeTree ORDER BY (); + +SELECT ( + SELECT c_k37 + FROM t_c3oollc8r + ) > c_lfntfzg +FROM remote('127.0.0.{1,2}', currentDatabase(), t_c3oollc8r); + +DROP TABLE t_c3oollc8r; From 1e03ca16b20e2b185d0b96de0f47f88e7293cf6a Mon Sep 17 00:00:00 2001 From: Shaun Struwig <41984034+Blargian@users.noreply.github.com> Date: Sat, 13 Jul 2024 21:52:01 +0200 Subject: [PATCH 163/182] Update developer-instruction.md --- docs/en/development/developer-instruction.md | 54 ++++++++++++++++++-- 1 file changed, 49 insertions(+), 5 deletions(-) diff --git a/docs/en/development/developer-instruction.md b/docs/en/development/developer-instruction.md index 0a1fe58b16f..4b11fb71230 100644 --- a/docs/en/development/developer-instruction.md +++ b/docs/en/development/developer-instruction.md @@ -226,15 +226,59 @@ Other IDEs you can use are [Sublime Text](https://www.sublimetext.com/), [Visual ## Writing Code {#writing-code} -The description of ClickHouse architecture can be found here: https://clickhouse.com/docs/en/development/architecture/ +Below you can find some quick links which may be useful when writing code for ClickHouse: -The Code Style Guide: https://clickhouse.com/docs/en/development/style/ +- [ClickHouse architecture description](https://clickhouse.com/docs/en/development/architecture/). +- [The code style guide](https://clickhouse.com/docs/en/development/style/). +- [Adding third-party libraries](https://clickhouse.com/docs/en/development/contrib/#adding-third-party-libraries) +- [Writing tests](https://clickhouse.com/docs/en/development/tests/) +- [List of open issues](https://github.com/ClickHouse/ClickHouse/issues?q=is%3Aopen+is%3Aissue+label%3Ahacktoberfest) -Adding third-party libraries: https://clickhouse.com/docs/en/development/contrib/#adding-third-party-libraries +## Writing Documentation {#writing-documentation} -Writing tests: https://clickhouse.com/docs/en/development/tests/ +As part of every pull request which adds a new feature, it is necessary to write documentation for it. If you'd like to preview your documentation changes the instructions for how to build the documentation page locally are available in the README.md file [here](https://github.com/ClickHouse/clickhouse-docs). When adding a new function to ClickHouse you can use the template below as a guide: -List of tasks: https://github.com/ClickHouse/ClickHouse/issues?q=is%3Aopen+is%3Aissue+label%3Ahacktoberfest +```markdown +# newFunctionName + +A short description of the function goes here. It should describe briefly what it does and a typical usage case. + +**Syntax** + +\```sql +newFunctionName(arg1, arg2[, arg3]) +\``` + +**Arguments** + +- `arg1` — Description of the argument. [DataType](../data-types/float.md) +- `arg2` — Description of the argument. [DataType](../data-types/float.md) +- `arg3` — Description of optional argument (optional). [DataType](../data-types/float.md) + +**Implementation Details** + +A description of implementation details if relevant. + +**Returned value** + +- Returns {insert what the function returns here}. [DataType](../data-types/float.md) + +**Example** + +Query: + +\```sql +SELECT 'write your example query here'; +\``` + +Response: + +\```response +┌───────────────────────────────────┐ +│ the result of the query │ +└───────────────────────────────────┘ +\``` +``` ## Test Data {#test-data} From 7e0daae303eda27229743eaa9d5f9371c8bcb6a4 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Sat, 13 Jul 2024 20:05:39 +0000 Subject: [PATCH 164/182] Revert "Fix style" This reverts commit 0eb081f899bd0fa9c185add84c9c59e212dfd6ca. --- tests/integration/test_lost_part/test.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tests/integration/test_lost_part/test.py b/tests/integration/test_lost_part/test.py index e88b84775b4..5070b3c5b68 100644 --- a/tests/integration/test_lost_part/test.py +++ b/tests/integration/test_lost_part/test.py @@ -266,9 +266,7 @@ def test_lost_last_part(start_cluster): "ALTER TABLE mt3 UPDATE id = 777 WHERE 1", settings={"mutations_sync": "0"} ) - partition_id = node1.query( - "select distinct _partition_id from mt3 where p = 'x'" - ).strip() + partition_id = node1.query("select distinct _partition_id from mt3 where p = 'x'").strip() remove_part_from_disk(node1, "mt3", f"{partition_id}_0_0_0") # other way to detect broken parts From 9b87a1469b910d643e603f49228ef883116b3905 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Sat, 13 Jul 2024 20:06:12 +0000 Subject: [PATCH 165/182] Revert "Fix integration tests" This reverts commit 85f5fe832a23fa25e9784728b9f26d712057148a. --- tests/integration/test_backward_compatibility/test_functions.py | 2 -- tests/integration/test_lost_part/test.py | 2 +- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/tests/integration/test_backward_compatibility/test_functions.py b/tests/integration/test_backward_compatibility/test_functions.py index 551a0550c0c..fc03a77030e 100644 --- a/tests/integration/test_backward_compatibility/test_functions.py +++ b/tests/integration/test_backward_compatibility/test_functions.py @@ -168,8 +168,6 @@ def test_string_functions(start_cluster): "filesystemAvailable", # Exclude it for now. Looks like the result depends on the build type. "farmHash64", - # Removed in 24.7 - "partitionId", ] functions = filter(lambda x: x not in excludes, functions) diff --git a/tests/integration/test_lost_part/test.py b/tests/integration/test_lost_part/test.py index 5070b3c5b68..5d0ec383a39 100644 --- a/tests/integration/test_lost_part/test.py +++ b/tests/integration/test_lost_part/test.py @@ -266,7 +266,7 @@ def test_lost_last_part(start_cluster): "ALTER TABLE mt3 UPDATE id = 777 WHERE 1", settings={"mutations_sync": "0"} ) - partition_id = node1.query("select distinct _partition_id from mt3 where p = 'x'").strip() + partition_id = node1.query("select _partition_id").strip() remove_part_from_disk(node1, "mt3", f"{partition_id}_0_0_0") # other way to detect broken parts From cef13cfa723508e40d203eff4b185a8ae716c911 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Sat, 13 Jul 2024 20:06:19 +0000 Subject: [PATCH 166/182] Revert "Update 02415_all_new_functions_must_be_documented.reference" This reverts commit ee4162f5ec3c34e2e35eec5fca012d75aee84401. --- .../02415_all_new_functions_must_be_documented.reference | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference b/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference index eb5ae5b4f44..a152066a460 100644 --- a/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference +++ b/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference @@ -534,6 +534,7 @@ parseDateTimeInJodaSyntaxOrZero parseDateTimeOrNull parseDateTimeOrZero parseTimeDelta +partitionId path pathFull pi From eb9a629868899c144f30a3b5fe19b9832aafc43e Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Sat, 13 Jul 2024 20:06:37 +0000 Subject: [PATCH 167/182] Revert "remove partitionID function, update tests and remove from documentation" This reverts commit c3b72386a2f26ad40431eca0c1985b14663b43ca. --- .../functions/other-functions.md | 60 ++++++++++++++++ src/Functions/partitionId.cpp | 71 +++++++++++++++++++ tests/integration/test_lost_part/test.py | 2 +- .../01748_partition_id_pruning.sql | 6 +- 4 files changed, 135 insertions(+), 4 deletions(-) create mode 100644 src/Functions/partitionId.cpp diff --git a/docs/en/sql-reference/functions/other-functions.md b/docs/en/sql-reference/functions/other-functions.md index 8f9fd9abb0d..68d2c12f25c 100644 --- a/docs/en/sql-reference/functions/other-functions.md +++ b/docs/en/sql-reference/functions/other-functions.md @@ -2984,6 +2984,66 @@ Result: └─────────┘ ``` +## partitionId + +Returns computed [partition](../../engines/table-engines/mergetree-family/custom-partitioning-key.md) IDs of its arguments. + +:::note +This function is slow and should not be called for large amount of rows. +::: + +**Syntax** + +```sql +partitionId(x[, y, ...]); +``` + +**Arguments** + +- `x` — Column for which to return the partition ID. +- `y, ...` — Remaining N columns for which to return the partition ID (optional). + +**Return Type** + +- Partition ID that the row belongs to. [String](../data-types/string.md). + +**Example** + +Query: + +```sql +DROP TABLE IF EXISTS mt; +CREATE TABLE mt +( + `i` int, + `j` int +) +ENGINE = MergeTree +PARTITION BY i +ORDER BY j +SETTINGS index_granularity = 1; + +INSERT INTO mt VALUES (1, 1), (1, 2), (1, 3), (2, 4), (2, 5), (2, 6); + +SELECT * FROM mt WHERE _partition_id = partitionId(1); +SELECT * FROM mt WHERE _partition_id = partitionId(2); +``` +Result: + +```response + ┌─i─┬─j─┐ +1. │ 1 │ 1 │ +2. │ 1 │ 2 │ +3. │ 1 │ 3 │ + └───┴───┘ + ┌─i─┬─j─┐ +1. │ 2 │ 4 │ +2. │ 2 │ 5 │ +3. │ 2 │ 6 │ + └───┴───┘ +``` + + ## shardNum Returns the index of a shard which processes a part of data in a distributed query. Indices are started from `1`. diff --git a/src/Functions/partitionId.cpp b/src/Functions/partitionId.cpp new file mode 100644 index 00000000000..e2e9038cd8b --- /dev/null +++ b/src/Functions/partitionId.cpp @@ -0,0 +1,71 @@ +#include +#include +#include +#include +#include +#include +#include + + +namespace DB +{ +namespace ErrorCodes +{ + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; +} + + +/** partitionId(x, y, ...) is a function that computes partition ids of arguments. + * The function is slow and should not be called for large amount of rows. + */ +class FunctionPartitionId : public IFunction +{ +public: + static constexpr auto name = "partitionId"; + + static FunctionPtr create(ContextPtr) { return std::make_shared(); } + + String getName() const override { return name; } + + bool isVariadic() const override { return true; } + + size_t getNumberOfArguments() const override { return 0; } + + bool isInjective(const ColumnsWithTypeAndName & /*sample_columns*/) const override { return true; } + + bool useDefaultImplementationForNulls() const override { return true; } + bool useDefaultImplementationForConstants() const override { return true; } + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } + + DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + { + if (arguments.empty()) + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Function {} requires at least one argument.", getName()); + + return std::make_shared(); + } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override + { + Block sample_block(arguments); + size_t size = arguments.size(); + + auto result_column = ColumnString::create(); + for (size_t j = 0; j < input_rows_count; ++j) + { + Row row(size); + for (size_t i = 0; i < size; ++i) + arguments[i].column->get(j, row[i]); + MergeTreePartition partition(std::move(row)); + result_column->insert(partition.getID(sample_block)); + } + return result_column; + } +}; + +REGISTER_FUNCTION(PartitionId) +{ + factory.registerFunction(); +} + +} diff --git a/tests/integration/test_lost_part/test.py b/tests/integration/test_lost_part/test.py index 5d0ec383a39..b8e67551d79 100644 --- a/tests/integration/test_lost_part/test.py +++ b/tests/integration/test_lost_part/test.py @@ -266,7 +266,7 @@ def test_lost_last_part(start_cluster): "ALTER TABLE mt3 UPDATE id = 777 WHERE 1", settings={"mutations_sync": "0"} ) - partition_id = node1.query("select _partition_id").strip() + partition_id = node1.query("select partitionId('x')").strip() remove_part_from_disk(node1, "mt3", f"{partition_id}_0_0_0") # other way to detect broken parts diff --git a/tests/queries/0_stateless/01748_partition_id_pruning.sql b/tests/queries/0_stateless/01748_partition_id_pruning.sql index 0e942573652..b637528bc6c 100644 --- a/tests/queries/0_stateless/01748_partition_id_pruning.sql +++ b/tests/queries/0_stateless/01748_partition_id_pruning.sql @@ -8,17 +8,17 @@ insert into x values (1, 1), (1, 2), (1, 3), (2, 4), (2, 5), (2, 6); set max_rows_to_read = 3; -select * from x where _partition_id = '1'; +select * from x where _partition_id = partitionId(1); set max_rows_to_read = 5; -- one row for subquery + subquery -select * from x where _partition_id in (select number + 1 from numbers(1)); +select * from x where _partition_id in (select partitionId(number + 1) from numbers(1)); -- trivial count optimization test set max_rows_to_read = 2; -- one row for subquery + subquery itself -- TODO: Relax the limits because we might build prepared set twice with _minmax_count_projection set max_rows_to_read = 3; -select count() from x where _partition_id in (select number + 1 from numbers(1)); +select count() from x where _partition_id in (select partitionId(number + 1) from numbers(1)); drop table x; From 2f913f193ca8a08b10800d5b4b9d0b9473db91d1 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Sat, 13 Jul 2024 20:25:58 +0000 Subject: [PATCH 168/182] Fixup docs --- .../functions/other-functions.md | 44 +++++++++---------- 1 file changed, 22 insertions(+), 22 deletions(-) diff --git a/docs/en/sql-reference/functions/other-functions.md b/docs/en/sql-reference/functions/other-functions.md index 68d2c12f25c..cb31d21e1b3 100644 --- a/docs/en/sql-reference/functions/other-functions.md +++ b/docs/en/sql-reference/functions/other-functions.md @@ -2986,7 +2986,7 @@ Result: ## partitionId -Returns computed [partition](../../engines/table-engines/mergetree-family/custom-partitioning-key.md) IDs of its arguments. +Computes the [partition ID](../../engines/table-engines/mergetree-family/custom-partitioning-key.md). :::note This function is slow and should not be called for large amount of rows. @@ -3003,44 +3003,44 @@ partitionId(x[, y, ...]); - `x` — Column for which to return the partition ID. - `y, ...` — Remaining N columns for which to return the partition ID (optional). -**Return Type** +**Returned Value** -- Partition ID that the row belongs to. [String](../data-types/string.md). +- Partition ID that the row would belong to. [String](../data-types/string.md). **Example** Query: ```sql -DROP TABLE IF EXISTS mt; -CREATE TABLE mt +DROP TABLE IF EXISTS tab; + +CREATE TABLE tab ( - `i` int, - `j` int + i int, + j int ) ENGINE = MergeTree PARTITION BY i -ORDER BY j -SETTINGS index_granularity = 1; +ORDER BY tuple(); -INSERT INTO mt VALUES (1, 1), (1, 2), (1, 3), (2, 4), (2, 5), (2, 6); +INSERT INTO tab VALUES (1, 1), (1, 2), (1, 3), (2, 4), (2, 5), (2, 6); -SELECT * FROM mt WHERE _partition_id = partitionId(1); -SELECT * FROM mt WHERE _partition_id = partitionId(2); +SELECT i, j, partitionId(i), _partition_id FROM tab ORDER BY i, j; ``` + Result: ```response - ┌─i─┬─j─┐ -1. │ 1 │ 1 │ -2. │ 1 │ 2 │ -3. │ 1 │ 3 │ - └───┴───┘ - ┌─i─┬─j─┐ -1. │ 2 │ 4 │ -2. │ 2 │ 5 │ -3. │ 2 │ 6 │ - └───┴───┘ +┌─i─┬─j─┬─partitionId(i)─┬─_partition_id─┐ +│ 1 │ 1 │ 1 │ 1 │ +│ 1 │ 2 │ 1 │ 1 │ +│ 1 │ 3 │ 1 │ 1 │ +└───┴───┴────────────────┴───────────────┘ +┌─i─┬─j─┬─partitionId(i)─┬─_partition_id─┐ +│ 2 │ 4 │ 2 │ 2 │ +│ 2 │ 5 │ 2 │ 2 │ +│ 2 │ 6 │ 2 │ 2 │ +└───┴───┴────────────────┴───────────────┘ ``` From dfb831f86119588f63a48f12807c44d568995ab4 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Sat, 13 Jul 2024 22:07:46 +0000 Subject: [PATCH 169/182] Try to fix an msan issue --- src/Functions/changeDate.cpp | 21 +++++++-------------- 1 file changed, 7 insertions(+), 14 deletions(-) diff --git a/src/Functions/changeDate.cpp b/src/Functions/changeDate.cpp index 5965f3d1d00..19e4c165ee3 100644 --- a/src/Functions/changeDate.cpp +++ b/src/Functions/changeDate.cpp @@ -1,6 +1,5 @@ #include "Common/DateLUTImpl.h" #include "Common/Exception.h" -#include #include #include #include @@ -101,19 +100,16 @@ public: template ColumnPtr execute(const ColumnsWithTypeAndName & arguments, const DataTypePtr & input_type, const DataTypePtr & result_type, size_t input_rows_count) const { - bool is_const = (isColumnConst(*arguments[0].column) && isColumnConst(*arguments[1].column)); - size_t result_rows_count = (is_const ? 1 : input_rows_count); - typename ResultDataType::ColumnType::MutablePtr result_col; if constexpr (std::is_same_v) { auto scale = DataTypeDateTime64::default_scale; if constexpr (std::is_same_v) scale = typeid_cast(*result_type).getScale(); - result_col = ResultDataType::ColumnType::create(result_rows_count, scale); + result_col = ResultDataType::ColumnType::create(input_rows_count, scale); } else - result_col = ResultDataType::ColumnType::create(result_rows_count); + result_col = ResultDataType::ColumnType::create(input_rows_count); auto date_time_col = arguments[0].column->convertToFullIfNeeded(); const auto & date_time_col_data = typeid_cast(*date_time_col).getData(); @@ -133,7 +129,7 @@ public: for (size_t j = 0; j < scale; ++j) deg *= 10; - for (size_t i = 0; i < result_rows_count; ++i) + for (size_t i = 0; i < input_rows_count; ++i) { Int64 time = date_lut.toNumYYYYMMDDhhmmss(date_time_col_data[i] / deg); Int64 fraction = date_time_col_data[i] % deg; @@ -144,7 +140,7 @@ public: else if constexpr (std::is_same_v && std::is_same_v) { const auto & date_lut = typeid_cast(*result_type).getTimeZone(); - for (size_t i = 0; i < result_rows_count; ++i) + for (size_t i = 0; i < input_rows_count; ++i) { Int64 time = static_cast(date_lut.toNumYYYYMMDD(ExtendedDayNum(date_time_col_data[i]))) * 1'000'000; result_col_data[i] = getChangedDate(time, value_col_data[i], result_type, date_lut, 3, 0); @@ -153,7 +149,7 @@ public: else if constexpr (std::is_same_v && std::is_same_v) { const auto & date_lut = typeid_cast(*result_type).getTimeZone(); - for (size_t i = 0; i < result_rows_count; ++i) + for (size_t i = 0; i < input_rows_count; ++i) { Int64 time = static_cast(date_lut.toNumYYYYMMDD(ExtendedDayNum(date_time_col_data[i]))) * 1'000'000; result_col_data[i] = static_cast(getChangedDate(time, value_col_data[i], result_type, date_lut)); @@ -162,7 +158,7 @@ public: else if constexpr (std::is_same_v) { const auto & date_lut = typeid_cast(*result_type).getTimeZone(); - for (size_t i = 0; i < result_rows_count; ++i) + for (size_t i = 0; i < input_rows_count; ++i) { Int64 time = date_lut.toNumYYYYMMDDhhmmss(date_time_col_data[i]); result_col_data[i] = static_cast(getChangedDate(time, value_col_data[i], result_type, date_lut)); @@ -171,7 +167,7 @@ public: else { const auto & date_lut = DateLUT::instance(); - for (size_t i = 0; i < result_rows_count; ++i) + for (size_t i = 0; i < input_rows_count; ++i) { Int64 time; if (isDate(input_type)) @@ -186,9 +182,6 @@ public: } } - if (is_const) - return ColumnConst::create(std::move(result_col), input_rows_count); - return result_col; } From a90e10c438e58e896482f958ca905e7dc21a9993 Mon Sep 17 00:00:00 2001 From: Shaun Struwig <41984034+Blargian@users.noreply.github.com> Date: Sat, 13 Jul 2024 22:38:28 +0000 Subject: [PATCH 170/182] add alias partitionI, update tests, and docs --- docs/en/sql-reference/functions/other-functions.md | 10 +++++----- src/Functions/partitionId.cpp | 1 + tests/integration/test_lost_part/test.py | 2 +- .../queries/0_stateless/01748_partition_id_pruning.sql | 6 +++--- utils/check-style/aspell-ignore/en/aspell-dict.txt | 1 + 5 files changed, 11 insertions(+), 9 deletions(-) diff --git a/docs/en/sql-reference/functions/other-functions.md b/docs/en/sql-reference/functions/other-functions.md index cb31d21e1b3..260457b3be1 100644 --- a/docs/en/sql-reference/functions/other-functions.md +++ b/docs/en/sql-reference/functions/other-functions.md @@ -2984,7 +2984,7 @@ Result: └─────────┘ ``` -## partitionId +## partitionID Computes the [partition ID](../../engines/table-engines/mergetree-family/custom-partitioning-key.md). @@ -2995,7 +2995,7 @@ This function is slow and should not be called for large amount of rows. **Syntax** ```sql -partitionId(x[, y, ...]); +partitionID(x[, y, ...]); ``` **Arguments** @@ -3025,18 +3025,18 @@ ORDER BY tuple(); INSERT INTO tab VALUES (1, 1), (1, 2), (1, 3), (2, 4), (2, 5), (2, 6); -SELECT i, j, partitionId(i), _partition_id FROM tab ORDER BY i, j; +SELECT i, j, partitionID(i), _partition_id FROM tab ORDER BY i, j; ``` Result: ```response -┌─i─┬─j─┬─partitionId(i)─┬─_partition_id─┐ +┌─i─┬─j─┬─partitionID(i)─┬─_partition_id─┐ │ 1 │ 1 │ 1 │ 1 │ │ 1 │ 2 │ 1 │ 1 │ │ 1 │ 3 │ 1 │ 1 │ └───┴───┴────────────────┴───────────────┘ -┌─i─┬─j─┬─partitionId(i)─┬─_partition_id─┐ +┌─i─┬─j─┬─partitionID(i)─┬─_partition_id─┐ │ 2 │ 4 │ 2 │ 2 │ │ 2 │ 5 │ 2 │ 2 │ │ 2 │ 6 │ 2 │ 2 │ diff --git a/src/Functions/partitionId.cpp b/src/Functions/partitionId.cpp index e2e9038cd8b..00ff3981c88 100644 --- a/src/Functions/partitionId.cpp +++ b/src/Functions/partitionId.cpp @@ -66,6 +66,7 @@ public: REGISTER_FUNCTION(PartitionId) { factory.registerFunction(); + factory.registerAlias("partitionID", "partitionId", FunctionFactory::CaseInsensitive); } } diff --git a/tests/integration/test_lost_part/test.py b/tests/integration/test_lost_part/test.py index b8e67551d79..c2469d6c524 100644 --- a/tests/integration/test_lost_part/test.py +++ b/tests/integration/test_lost_part/test.py @@ -266,7 +266,7 @@ def test_lost_last_part(start_cluster): "ALTER TABLE mt3 UPDATE id = 777 WHERE 1", settings={"mutations_sync": "0"} ) - partition_id = node1.query("select partitionId('x')").strip() + partition_id = node1.query("select partitionID('x')").strip() remove_part_from_disk(node1, "mt3", f"{partition_id}_0_0_0") # other way to detect broken parts diff --git a/tests/queries/0_stateless/01748_partition_id_pruning.sql b/tests/queries/0_stateless/01748_partition_id_pruning.sql index b637528bc6c..f492a4a8871 100644 --- a/tests/queries/0_stateless/01748_partition_id_pruning.sql +++ b/tests/queries/0_stateless/01748_partition_id_pruning.sql @@ -8,17 +8,17 @@ insert into x values (1, 1), (1, 2), (1, 3), (2, 4), (2, 5), (2, 6); set max_rows_to_read = 3; -select * from x where _partition_id = partitionId(1); +select * from x where _partition_id = partitionID(1); set max_rows_to_read = 5; -- one row for subquery + subquery -select * from x where _partition_id in (select partitionId(number + 1) from numbers(1)); +select * from x where _partition_id in (select partitionID(number + 1) from numbers(1)); -- trivial count optimization test set max_rows_to_read = 2; -- one row for subquery + subquery itself -- TODO: Relax the limits because we might build prepared set twice with _minmax_count_projection set max_rows_to_read = 3; -select count() from x where _partition_id in (select partitionId(number + 1) from numbers(1)); +select count() from x where _partition_id in (select partitionID(number + 1) from numbers(1)); drop table x; diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt index 935b08c1b88..e5f06b468a0 100644 --- a/utils/check-style/aspell-ignore/en/aspell-dict.txt +++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt @@ -2193,6 +2193,7 @@ parseTimeDelta parseable parsers partitionId +partitionID pathFull pclmulqdq pcre From 22611781b0944670b4b33aa65e6c11733b4e1f3e Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Fri, 12 Jul 2024 14:40:40 +0200 Subject: [PATCH 171/182] Fix jemalloc assertion due to non-monotonic CLOCK_MONOTONIC_COARSE Recently one tricky assertion of jemalloc had been discovered [1]: Failed assertion: "nstime_compare(&decay->epoch, new_time) <= 0" [1]: https://github.com/ClickHouse/ClickHouse/issues/66193 And as it turns out it is really possible for CLOCK_MONOTONIC_COARSE to go backwards, in a nutshell it can be done with ADJ_FREQUENCY, you can find example here [2]. And I can't trigger this issue for non-coarse clocks. [2]: https://gist.github.com/azat/7ea7f50ed75591b1af2d675a240ea94c?permalink_comment_id=5119222#gistcomment-5119222 But, jemalloc do not call clock_gettime() that frequently (I've verified it), so it can use non-coarse version - CLOCK_MONOTONIC I've also measured the latency of CLOCK_MONOTONIC and CLOCK_MONOTONIC_COARSE, and it is 20ns vs 4ns per call [3], so make this change affect performance you need really frequently calls of clock_gettime. [3]: https://gist.github.com/azat/622fa1f9a5d8e7d546ee9d294501961d?permalink_comment_id=5119245#gistcomment-5119245 Interesting, that this bug started to appears only after jemalloc heap profiler had been enabled by default [4], no clue why (I would believe more in a more frequent calls to clock_adjtime(ADJ_FREQUENCY), but I can't verify this) [4]: https://github.com/ClickHouse/ClickHouse/pull/65702 To be continued... Fixes: https://github.com/ClickHouse/ClickHouse/issues/66193 Signed-off-by: Azat Khuzhin --- contrib/jemalloc-cmake/README | 11 +++++++++++ .../jemalloc/internal/jemalloc_internal_defs.h.in | 2 +- .../jemalloc/internal/jemalloc_internal_defs.h.in | 2 +- .../jemalloc/internal/jemalloc_internal_defs.h.in | 2 +- .../jemalloc/internal/jemalloc_internal_defs.h.in | 2 +- .../jemalloc/internal/jemalloc_internal_defs.h.in | 2 +- .../jemalloc/internal/jemalloc_internal_defs.h.in | 2 +- .../jemalloc/internal/jemalloc_internal_defs.h.in | 2 +- 8 files changed, 18 insertions(+), 7 deletions(-) diff --git a/contrib/jemalloc-cmake/README b/contrib/jemalloc-cmake/README index 91b58448c1f..89d7c818445 100644 --- a/contrib/jemalloc-cmake/README +++ b/contrib/jemalloc-cmake/README @@ -4,3 +4,14 @@ It allows to integrate JEMalloc into CMake project. - Added JEMALLOC_CONFIG_MALLOC_CONF substitution - Add musl support (USE_MUSL) - Also note, that darwin build requires JEMALLOC_PREFIX, while others do not +- JEMALLOC_HAVE_CLOCK_MONOTONIC_COARSE should be disabled + + CLOCK_MONOTONIC_COARSE can go backwards after clock_adjtime(ADJ_FREQUENCY) + Let's disable it for now, and this menas that CLOCK_MONOTONIC will be used, + and this, should not be a problem, since: + - jemalloc do not call clock_gettime() that frequently + - the difference is CLOCK_MONOTONIC 20ns and CLOCK_MONOTONIC_COARSE 4ns + + This can be done with the following command: + + gg JEMALLOC_HAVE_CLOCK_MONOTONIC_COARSE | cut -d: -f1 | xargs sed -i 's@#define JEMALLOC_HAVE_CLOCK_MONOTONIC_COARSE@/* #undef JEMALLOC_HAVE_CLOCK_MONOTONIC_COARSE */@' diff --git a/contrib/jemalloc-cmake/include_freebsd_ppc64le/jemalloc/internal/jemalloc_internal_defs.h.in b/contrib/jemalloc-cmake/include_freebsd_ppc64le/jemalloc/internal/jemalloc_internal_defs.h.in index e027ee97d1a..b58d5c1eb8e 100644 --- a/contrib/jemalloc-cmake/include_freebsd_ppc64le/jemalloc/internal/jemalloc_internal_defs.h.in +++ b/contrib/jemalloc-cmake/include_freebsd_ppc64le/jemalloc/internal/jemalloc_internal_defs.h.in @@ -96,7 +96,7 @@ /* * Defined if clock_gettime(CLOCK_MONOTONIC_COARSE, ...) is available. */ -#define JEMALLOC_HAVE_CLOCK_MONOTONIC_COARSE +/* #undef JEMALLOC_HAVE_CLOCK_MONOTONIC_COARSE */ /* * Defined if clock_gettime(CLOCK_MONOTONIC, ...) is available. diff --git a/contrib/jemalloc-cmake/include_linux_aarch64/jemalloc/internal/jemalloc_internal_defs.h.in b/contrib/jemalloc-cmake/include_linux_aarch64/jemalloc/internal/jemalloc_internal_defs.h.in index 597f2d59933..8bfc4b60f3d 100644 --- a/contrib/jemalloc-cmake/include_linux_aarch64/jemalloc/internal/jemalloc_internal_defs.h.in +++ b/contrib/jemalloc-cmake/include_linux_aarch64/jemalloc/internal/jemalloc_internal_defs.h.in @@ -98,7 +98,7 @@ /* * Defined if clock_gettime(CLOCK_MONOTONIC_COARSE, ...) is available. */ -#define JEMALLOC_HAVE_CLOCK_MONOTONIC_COARSE +/* #undef JEMALLOC_HAVE_CLOCK_MONOTONIC_COARSE */ /* * Defined if clock_gettime(CLOCK_MONOTONIC, ...) is available. diff --git a/contrib/jemalloc-cmake/include_linux_ppc64le/jemalloc/internal/jemalloc_internal_defs.h.in b/contrib/jemalloc-cmake/include_linux_ppc64le/jemalloc/internal/jemalloc_internal_defs.h.in index 0a72e71496b..d5197c6127f 100644 --- a/contrib/jemalloc-cmake/include_linux_ppc64le/jemalloc/internal/jemalloc_internal_defs.h.in +++ b/contrib/jemalloc-cmake/include_linux_ppc64le/jemalloc/internal/jemalloc_internal_defs.h.in @@ -98,7 +98,7 @@ /* * Defined if clock_gettime(CLOCK_MONOTONIC_COARSE, ...) is available. */ -#define JEMALLOC_HAVE_CLOCK_MONOTONIC_COARSE +/* #undef JEMALLOC_HAVE_CLOCK_MONOTONIC_COARSE */ /* * Defined if clock_gettime(CLOCK_MONOTONIC, ...) is available. diff --git a/contrib/jemalloc-cmake/include_linux_riscv64/jemalloc/internal/jemalloc_internal_defs.h.in b/contrib/jemalloc-cmake/include_linux_riscv64/jemalloc/internal/jemalloc_internal_defs.h.in index 597f2d59933..8bfc4b60f3d 100644 --- a/contrib/jemalloc-cmake/include_linux_riscv64/jemalloc/internal/jemalloc_internal_defs.h.in +++ b/contrib/jemalloc-cmake/include_linux_riscv64/jemalloc/internal/jemalloc_internal_defs.h.in @@ -98,7 +98,7 @@ /* * Defined if clock_gettime(CLOCK_MONOTONIC_COARSE, ...) is available. */ -#define JEMALLOC_HAVE_CLOCK_MONOTONIC_COARSE +/* #undef JEMALLOC_HAVE_CLOCK_MONOTONIC_COARSE */ /* * Defined if clock_gettime(CLOCK_MONOTONIC, ...) is available. diff --git a/contrib/jemalloc-cmake/include_linux_s390x/jemalloc/internal/jemalloc_internal_defs.h.in b/contrib/jemalloc-cmake/include_linux_s390x/jemalloc/internal/jemalloc_internal_defs.h.in index 531f2bca0c2..b3f3a489a83 100644 --- a/contrib/jemalloc-cmake/include_linux_s390x/jemalloc/internal/jemalloc_internal_defs.h.in +++ b/contrib/jemalloc-cmake/include_linux_s390x/jemalloc/internal/jemalloc_internal_defs.h.in @@ -96,7 +96,7 @@ /* * Defined if clock_gettime(CLOCK_MONOTONIC_COARSE, ...) is available. */ -#define JEMALLOC_HAVE_CLOCK_MONOTONIC_COARSE +/* #undef JEMALLOC_HAVE_CLOCK_MONOTONIC_COARSE */ /* * Defined if clock_gettime(CLOCK_MONOTONIC, ...) is available. diff --git a/contrib/jemalloc-cmake/include_linux_x86_64/jemalloc/internal/jemalloc_internal_defs.h.in b/contrib/jemalloc-cmake/include_linux_x86_64/jemalloc/internal/jemalloc_internal_defs.h.in index d21098a4dcc..cf9acdb0b80 100644 --- a/contrib/jemalloc-cmake/include_linux_x86_64/jemalloc/internal/jemalloc_internal_defs.h.in +++ b/contrib/jemalloc-cmake/include_linux_x86_64/jemalloc/internal/jemalloc_internal_defs.h.in @@ -98,7 +98,7 @@ /* * Defined if clock_gettime(CLOCK_MONOTONIC_COARSE, ...) is available. */ -#define JEMALLOC_HAVE_CLOCK_MONOTONIC_COARSE +/* #undef JEMALLOC_HAVE_CLOCK_MONOTONIC_COARSE */ /* * Defined if clock_gettime(CLOCK_MONOTONIC, ...) is available. diff --git a/contrib/jemalloc-cmake/include_linux_x86_64_musl/jemalloc/internal/jemalloc_internal_defs.h.in b/contrib/jemalloc-cmake/include_linux_x86_64_musl/jemalloc/internal/jemalloc_internal_defs.h.in index e08a2bed2ec..729bd82c2fa 100644 --- a/contrib/jemalloc-cmake/include_linux_x86_64_musl/jemalloc/internal/jemalloc_internal_defs.h.in +++ b/contrib/jemalloc-cmake/include_linux_x86_64_musl/jemalloc/internal/jemalloc_internal_defs.h.in @@ -99,7 +99,7 @@ /* * Defined if clock_gettime(CLOCK_MONOTONIC_COARSE, ...) is available. */ -#define JEMALLOC_HAVE_CLOCK_MONOTONIC_COARSE +/* #undef JEMALLOC_HAVE_CLOCK_MONOTONIC_COARSE */ /* * Defined if clock_gettime(CLOCK_MONOTONIC, ...) is available. From 669ce7cd40881a075fe473027e6668d13683c840 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Fri, 12 Jul 2024 15:01:40 +0200 Subject: [PATCH 172/182] Enable jemalloc heap profiler by default for debug builds as well Since the issue with jemalloc assertion is clear, let's revert that workaround Signed-off-by: Azat Khuzhin --- contrib/jemalloc-cmake/CMakeLists.txt | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/contrib/jemalloc-cmake/CMakeLists.txt b/contrib/jemalloc-cmake/CMakeLists.txt index 1fbfd29a3bd..191886981c3 100644 --- a/contrib/jemalloc-cmake/CMakeLists.txt +++ b/contrib/jemalloc-cmake/CMakeLists.txt @@ -34,11 +34,7 @@ if (OS_LINUX) # avoid spurious latencies and additional work associated with # MADV_DONTNEED. See # https://github.com/ClickHouse/ClickHouse/issues/11121 for motivation. - if (CMAKE_BUILD_TYPE_UC STREQUAL "DEBUG") - set (JEMALLOC_CONFIG_MALLOC_CONF "percpu_arena:percpu,oversize_threshold:0,muzzy_decay_ms:0,dirty_decay_ms:5000") - else() - set (JEMALLOC_CONFIG_MALLOC_CONF "percpu_arena:percpu,oversize_threshold:0,muzzy_decay_ms:0,dirty_decay_ms:5000,prof:true,prof_active:false,background_thread:true") - endif() + set (JEMALLOC_CONFIG_MALLOC_CONF "percpu_arena:percpu,oversize_threshold:0,muzzy_decay_ms:0,dirty_decay_ms:5000,prof:true,prof_active:false,background_thread:true") else() set (JEMALLOC_CONFIG_MALLOC_CONF "oversize_threshold:0,muzzy_decay_ms:0,dirty_decay_ms:5000") endif() From 80c8511004008e5caaec08ad7f869baa578cad6c Mon Sep 17 00:00:00 2001 From: Max K Date: Sun, 14 Jul 2024 11:39:31 +0200 Subject: [PATCH 173/182] CI: Add retry for GH set_comment_status call --- tests/ci/ci.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/tests/ci/ci.py b/tests/ci/ci.py index f99a5dad92f..979d108378d 100644 --- a/tests/ci/ci.py +++ b/tests/ci/ci.py @@ -6,6 +6,7 @@ import os import re import subprocess import sys +import time from dataclasses import dataclass from pathlib import Path from typing import Any, Dict, List, Optional @@ -550,7 +551,17 @@ def _update_gh_statuses_action(indata: Dict, s3: S3Helper) -> None: except Exception as e: raise e print("Going to update overall CI report") - set_status_comment(commit, pr_info) + for retry in range(2): + try: + set_status_comment(commit, pr_info) + break + except Exception as e: + print( + f"WARNING: Failed to update CI Running status, attempt [{retry + 1}], exception [{e}]" + ) + time.sleep(1) + else: + print("ERROR: All retry attempts failed.") print("... CI report update - done") From 379706d2d5118dcd53c87f661b7a06add00bee18 Mon Sep 17 00:00:00 2001 From: Max K Date: Sun, 14 Jul 2024 11:47:18 +0200 Subject: [PATCH 174/182] fix for job filtering --- tests/ci/ci.py | 4 ++-- tests/ci/ci_cache.py | 5 +++-- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/tests/ci/ci.py b/tests/ci/ci.py index 979d108378d..8dcf3fc4c69 100644 --- a/tests/ci/ci.py +++ b/tests/ci/ci.py @@ -1007,10 +1007,10 @@ def main() -> int: args.skip_jobs, ) + ci_cache.print_status() if IS_CI and pr_info.is_pr and not ci_settings.no_ci_cache: ci_cache.filter_out_not_affected_jobs() - - ci_cache.print_status() + ci_cache.print_status() if IS_CI and not pr_info.is_merge_queue: # wait for pending jobs to be finished, await_jobs is a long blocking call diff --git a/tests/ci/ci_cache.py b/tests/ci/ci_cache.py index ae3b8f9e9a4..7552b10b873 100644 --- a/tests/ci/ci_cache.py +++ b/tests/ci/ci_cache.py @@ -714,7 +714,7 @@ class CiCache: if CI.is_test_job(job_name) and job_name != CI.JobNames.BUILD_CHECK: if job_config.reference_job_name: reference_name = job_config.reference_job_name - reference_config = self.jobs_to_do[reference_name] + reference_config = CI.JOB_CONFIGS[reference_name] else: reference_name = job_name reference_config = job_config @@ -745,7 +745,8 @@ class CiCache: del self.jobs_to_do[job] if job in self.jobs_to_wait: del self.jobs_to_wait[job] - self.jobs_to_skip.append(job) + if job in self.jobs_to_skip: + self.jobs_to_skip.remove(job) def await_pending_jobs(self, is_release: bool, dry_run: bool = False) -> None: """ From ad86e3060a907bd3b81c56724cea6df30823716e Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Sun, 14 Jul 2024 11:48:13 +0000 Subject: [PATCH 175/182] OpenSSL: Follow-up to #66064 More commits were added to the upstream fix after #66064 has been merged in ClickHouse. This PR adds these commits to keep things in-sync. --- contrib/openssl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/openssl b/contrib/openssl index ee2bb8513b2..66deddc1e53 160000 --- a/contrib/openssl +++ b/contrib/openssl @@ -1 +1 @@ -Subproject commit ee2bb8513b28bf86b35404dd17a0e29305ca9e08 +Subproject commit 66deddc1e53cda8706604a019777259372d1bd62 From f374cdfe69a8d2bc1f7360ead578a5323331650d Mon Sep 17 00:00:00 2001 From: Max K Date: Sun, 14 Jul 2024 13:53:20 +0200 Subject: [PATCH 176/182] CI: Fix for job filtering in PRs --- tests/ci/ci_cache.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/ci/ci_cache.py b/tests/ci/ci_cache.py index 7552b10b873..5c6d3b05021 100644 --- a/tests/ci/ci_cache.py +++ b/tests/ci/ci_cache.py @@ -533,19 +533,19 @@ class CiCache: job=job, batch=0, num_batches=job_config.num_batches, - release_branch=not job_config.pr_only, + release_branch=True, ) or self.is_pending( job=job, batch=0, num_batches=job_config.num_batches, - release_branch=not job_config.pr_only, + release_branch=True, ) or self.is_failed( job=job, batch=0, num_batches=job_config.num_batches, - release_branch=not job_config.pr_only, + release_branch=True, ) ) From 9dabf205e623680807bffe022d8b8b6b856779d8 Mon Sep 17 00:00:00 2001 From: Max K Date: Sun, 14 Jul 2024 14:01:10 +0200 Subject: [PATCH 177/182] CI: Fix for await to always await for builds --- tests/ci/ci_cache.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/tests/ci/ci_cache.py b/tests/ci/ci_cache.py index 5c6d3b05021..5229c754d70 100644 --- a/tests/ci/ci_cache.py +++ b/tests/ci/ci_cache.py @@ -765,14 +765,19 @@ class CiCache: MAX_JOB_NUM_TO_WAIT = 3 round_cnt = 0 - # FIXME: temporary experiment: lets enable await for PR' workflows but for a shorter time + def _has_build_job(): + for job in self.jobs_to_wait: + if CI.is_build_job(job): + return True + return False + if not is_release: - MAX_ROUNDS_TO_WAIT = 3 + # in PRs we can wait only for builds, TIMEOUT*MAX_ROUNDS_TO_WAIT=100min is enough + MAX_ROUNDS_TO_WAIT = 2 while ( - len(self.jobs_to_wait) > MAX_JOB_NUM_TO_WAIT - and round_cnt < MAX_ROUNDS_TO_WAIT - ): + len(self.jobs_to_wait) > MAX_JOB_NUM_TO_WAIT or _has_build_job() + ) and round_cnt < MAX_ROUNDS_TO_WAIT: round_cnt += 1 GHActions.print_in_group( f"Wait pending jobs, round [{round_cnt}/{MAX_ROUNDS_TO_WAIT}]:", From fd44c533cfefbb15e9c9aa6663973c4533ebf643 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 14 Jul 2024 20:57:02 +0200 Subject: [PATCH 178/182] Add one more revision to ignore --- .git-blame-ignore-revs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.git-blame-ignore-revs b/.git-blame-ignore-revs index 06e893fabb3..5088bf90ede 100644 --- a/.git-blame-ignore-revs +++ b/.git-blame-ignore-revs @@ -13,3 +13,6 @@ # dbms/ → src/ # (though it is unlikely that you will see it in blame) 06446b4f08a142d6f1bc30664c47ded88ab51782 + +# Applied Black formatter for Python code +e6f5a3f98b21ba99cf274a9833797889e020a2b3 From 89f8c577e472d0c3709220427c2da1e880eea3ad Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 15 Jul 2024 05:34:14 +0200 Subject: [PATCH 179/182] Do not fix anything --- tests/queries/0_stateless/00719_parallel_ddl_db.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/00719_parallel_ddl_db.sh b/tests/queries/0_stateless/00719_parallel_ddl_db.sh index ceba24df7e4..ac4e4eae4f2 100755 --- a/tests/queries/0_stateless/00719_parallel_ddl_db.sh +++ b/tests/queries/0_stateless/00719_parallel_ddl_db.sh @@ -5,7 +5,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -DB_SUFFIX=$RANDOM +DB_SUFFIX=${RANDOM}${RANDOM}${RANDOM}${RANDOM} ${CLICKHOUSE_CLIENT} --query "DROP DATABASE IF EXISTS parallel_ddl_${DB_SUFFIX}" function query() From cd719bf8fc76370821c6b1d46a226e6784dce055 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Mon, 15 Jul 2024 08:57:11 +0200 Subject: [PATCH 180/182] Make the alias case-sensitive --- src/Functions/partitionId.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Functions/partitionId.cpp b/src/Functions/partitionId.cpp index 00ff3981c88..dab56f39adc 100644 --- a/src/Functions/partitionId.cpp +++ b/src/Functions/partitionId.cpp @@ -66,7 +66,7 @@ public: REGISTER_FUNCTION(PartitionId) { factory.registerFunction(); - factory.registerAlias("partitionID", "partitionId", FunctionFactory::CaseInsensitive); + factory.registerAlias("partitionID", "partitionId"); } } From 7ef89f169ba53a73c4645d46d48a2744dfd5a4da Mon Sep 17 00:00:00 2001 From: Max K Date: Mon, 15 Jul 2024 08:59:39 +0200 Subject: [PATCH 181/182] CI: Fix for skipping Builds_2 in PRs' CI --- tests/ci/ci_cache.py | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/tests/ci/ci_cache.py b/tests/ci/ci_cache.py index 5229c754d70..44cdb19c3a1 100644 --- a/tests/ci/ci_cache.py +++ b/tests/ci/ci_cache.py @@ -710,6 +710,7 @@ class CiCache: """ remove_from_to_do = [] required_builds = [] + has_test_jobs_to_skip = False for job_name, job_config in self.jobs_to_do.items(): if CI.is_test_job(job_name) and job_name != CI.JobNames.BUILD_CHECK: if job_config.reference_job_name: @@ -723,30 +724,30 @@ class CiCache: job_config=reference_config, ): remove_from_to_do.append(job_name) + has_test_jobs_to_skip = True else: required_builds += ( job_config.required_builds if job_config.required_builds else [] ) - has_builds_to_do = False for job_name, job_config in self.jobs_to_do.items(): if CI.is_build_job(job_name): if job_name not in required_builds: remove_from_to_do.append(job_name) - else: - has_builds_to_do = True - if not has_builds_to_do: + if not required_builds: remove_from_to_do.append(CI.JobNames.BUILD_CHECK) - for job in remove_from_to_do: - print(f"Filter job [{job}] - not affected by the change") - if job in self.jobs_to_do: - del self.jobs_to_do[job] - if job in self.jobs_to_wait: - del self.jobs_to_wait[job] - if job in self.jobs_to_skip: - self.jobs_to_skip.remove(job) + if has_test_jobs_to_skip: + # if there are no test jobs to skip, then we must not skip anything - it's a new CI run with new build digest + for job in remove_from_to_do: + print(f"Filter job [{job}] - not affected by the change") + if job in self.jobs_to_do: + del self.jobs_to_do[job] + if job in self.jobs_to_wait: + del self.jobs_to_wait[job] + if job in self.jobs_to_skip: + self.jobs_to_skip.remove(job) def await_pending_jobs(self, is_release: bool, dry_run: bool = False) -> None: """ From f311612e91aa195a5074cb104d2bb9845babbc6b Mon Sep 17 00:00:00 2001 From: Max K Date: Mon, 15 Jul 2024 09:59:23 +0200 Subject: [PATCH 182/182] check filter_out_not_affected_jobs in unit test --- tests/ci/ci_cache.py | 30 +++++++++++++++--------------- tests/ci/test_ci_config.py | 1 + 2 files changed, 16 insertions(+), 15 deletions(-) diff --git a/tests/ci/ci_cache.py b/tests/ci/ci_cache.py index 44cdb19c3a1..417d5dbc262 100644 --- a/tests/ci/ci_cache.py +++ b/tests/ci/ci_cache.py @@ -729,25 +729,25 @@ class CiCache: required_builds += ( job_config.required_builds if job_config.required_builds else [] ) - - for job_name, job_config in self.jobs_to_do.items(): - if CI.is_build_job(job_name): - if job_name not in required_builds: - remove_from_to_do.append(job_name) + if has_test_jobs_to_skip: + # If there are tests to skip, it means build digest has not been changed. + # No need to test builds. Let's keep all builds required for test jobs and skip the others + for job_name, job_config in self.jobs_to_do.items(): + if CI.is_build_job(job_name): + if job_name not in required_builds: + remove_from_to_do.append(job_name) if not required_builds: remove_from_to_do.append(CI.JobNames.BUILD_CHECK) - if has_test_jobs_to_skip: - # if there are no test jobs to skip, then we must not skip anything - it's a new CI run with new build digest - for job in remove_from_to_do: - print(f"Filter job [{job}] - not affected by the change") - if job in self.jobs_to_do: - del self.jobs_to_do[job] - if job in self.jobs_to_wait: - del self.jobs_to_wait[job] - if job in self.jobs_to_skip: - self.jobs_to_skip.remove(job) + for job in remove_from_to_do: + print(f"Filter job [{job}] - not affected by the change") + if job in self.jobs_to_do: + del self.jobs_to_do[job] + if job in self.jobs_to_wait: + del self.jobs_to_wait[job] + if job in self.jobs_to_skip: + self.jobs_to_skip.remove(job) def await_pending_jobs(self, is_release: bool, dry_run: bool = False) -> None: """ diff --git a/tests/ci/test_ci_config.py b/tests/ci/test_ci_config.py index e12a67bfc92..c901994affa 100644 --- a/tests/ci/test_ci_config.py +++ b/tests/ci/test_ci_config.py @@ -283,6 +283,7 @@ class TestCIConfig(unittest.TestCase): ci_cache = CIPY._configure_jobs( S3Helper(), pr_info, settings, skip_jobs=False, dry_run=True ) + ci_cache.filter_out_not_affected_jobs() actual_jobs_to_do = list(ci_cache.jobs_to_do) expected_jobs_to_do = [] for set_ in settings.ci_sets: