From 0dbd569b50828003dc6a55af56717dab92054d8f Mon Sep 17 00:00:00 2001 From: Vasily Nemkov Date: Tue, 12 Mar 2024 20:00:04 +0100 Subject: [PATCH 001/299] Minor change --- src/Storages/MergeTree/MergeTreePartInfo.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/Storages/MergeTree/MergeTreePartInfo.h b/src/Storages/MergeTree/MergeTreePartInfo.h index 5fbb5d70bf3..918acc78e8f 100644 --- a/src/Storages/MergeTree/MergeTreePartInfo.h +++ b/src/Storages/MergeTree/MergeTreePartInfo.h @@ -101,9 +101,8 @@ struct MergeTreePartInfo bool isFakeDropRangePart() const { - /// Another max level was previously used for REPLACE/MOVE PARTITION - auto another_max_level = std::numeric_limits::max(); - return level == MergeTreePartInfo::MAX_LEVEL || level == another_max_level; + /// LEGACY_MAX_LEVEL was previously used for REPLACE/MOVE PARTITION + return level == MergeTreePartInfo::MAX_LEVEL || level == MergeTreePartInfo::LEGACY_MAX_LEVEL; } String getPartNameAndCheckFormat(MergeTreeDataFormatVersion format_version) const; From d84e272ed8687f8d13f0286ac37b93db3f874715 Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Tue, 30 Apr 2024 13:15:23 +0000 Subject: [PATCH 002/299] Add changeDate functions --- src/Functions/changeDate.cpp | 342 ++++++++++++++++++ .../0_stateless/02982_changeDate.reference | 36 ++ .../queries/0_stateless/02982_changeDate.sql | 42 +++ 3 files changed, 420 insertions(+) create mode 100644 src/Functions/changeDate.cpp create mode 100644 tests/queries/0_stateless/02982_changeDate.reference create mode 100644 tests/queries/0_stateless/02982_changeDate.sql diff --git a/src/Functions/changeDate.cpp b/src/Functions/changeDate.cpp new file mode 100644 index 00000000000..14f6887af47 --- /dev/null +++ b/src/Functions/changeDate.cpp @@ -0,0 +1,342 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "Common/DateLUTImpl.h" +#include +#include +#include "Columns/IColumn.h" +#include "DataTypes/IDataType.h" +#include "base/DayNum.h" + +#include +#include + +namespace DB +{ +namespace ErrorCodes +{ + extern const int ARGUMENT_OUT_OF_BOUND; +} + +namespace +{ + +enum ChangeDateFunctionsNames +{ + CHANGE_YEAR = 0, + CHANGE_MONTH = 1, + CHANGE_DAY = 2, + CHANGE_HOUR = 3, + CHANGE_MINUTE = 4, + CHANGE_SECOND = 5 +}; + +template +class FunctionChangeDate : public IFunction +{ +public: + static constexpr auto name = Traits::Name; + + static constexpr std::array mandatory_argument_names = {"date", "new_value"}; + + String getName() const override { return name; } + + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } + + size_t getNumberOfArguments() const override { return mandatory_argument_names.size(); } + + static FunctionPtr create(ContextPtr) { return std::make_shared(); } + + DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override + { + FunctionArgumentDescriptors args{ + {mandatory_argument_names[0], &isDateOrDate32OrDateTimeOrDateTime64, nullptr, "Date"}, + {mandatory_argument_names[1], &isNumber, nullptr, "Number"} + }; + validateFunctionArgumentTypes(*this, arguments, args); + + if (Traits::EnumName >= 3) + { + if (isDate(arguments[0].type)) + return std::make_shared(); + if (isDate32(arguments[0].type)) + return std::make_shared(DataTypeDateTime64::default_scale); + } + + return arguments[0].type; + } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override + { + const auto & input_type = arguments[0].type; + if (isDate(input_type)) + { + if (Traits::EnumName >= 3) + return execute(arguments, input_type, result_type, input_rows_count); + return execute(arguments, input_type, result_type, input_rows_count); + } + if (isDate32(input_type)) + { + if (Traits::EnumName >= 3) + return executeDate32ToDateTime64(arguments, result_type, input_rows_count); + return execute(arguments, input_type, result_type, input_rows_count); + } + if (isDateTime(input_type)) + return execute(arguments, input_type, result_type, input_rows_count); + return executeDateTime64(arguments, result_type, input_rows_count); + } + + + template + ColumnPtr execute(const ColumnsWithTypeAndName & arguments, const DataTypePtr & input_type, const DataTypePtr & result_type, size_t input_rows_count) const + { + const auto & date_lut = DateLUT::instance(); + + auto result_column = ResultDataType::ColumnType::create(input_rows_count); + auto & result_data = result_column->getData(); + + auto input_column = castColumn(arguments[0], std::make_shared()); + input_column = input_column->convertToFullColumnIfConst(); + const auto & input_column_data = typeid_cast(*input_column).getData(); + + auto new_value_column = castColumn(arguments[1], std::make_shared()); + new_value_column = new_value_column->convertToFullColumnIfConst(); + const auto & new_value_column_data = typeid_cast(*new_value_column).getData(); + + for (size_t i = 0; i < input_rows_count; ++i) + { + Int64 time; + if (isDateOrDate32(input_type)) + time = static_cast(date_lut.toNumYYYYMMDD(DayNum(input_column_data[i]))) * 1000'000; + else + time = date_lut.toNumYYYYMMDDhhmmss(input_column_data[i]); + + if (isDateOrDate32(result_type)) + result_data[i] = static_cast(getChangedDate(time, new_value_column_data[i], result_type, date_lut)); + else + result_data[i] = static_cast(getChangedDate(time, new_value_column_data[i], result_type, date_lut)); + } + + return result_column; + } + + ColumnPtr executeDate32ToDateTime64(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const + { + const auto & date_lut = DateLUT::instance(); + + auto result_column = ColumnDateTime64::create(input_rows_count, DataTypeDateTime64::default_scale); + auto & result_data = result_column->getData(); + + auto input_column = arguments[0].column->convertToFullColumnIfConst(); + const auto & input_column_data = typeid_cast(*input_column).getData(); + + auto new_value_column = castColumn(arguments[1], std::make_shared()); + new_value_column = new_value_column->convertToFullColumnIfConst(); + const auto & new_value_column_data = typeid_cast(*new_value_column).getData(); + + for (size_t i = 0; i < input_rows_count; ++i) + { + Int64 time = static_cast(date_lut.toNumYYYYMMDD(DayNum(input_column_data[i]))) * 1000'000; + + result_data[i] = getChangedDate(time, new_value_column_data[i], result_type, date_lut, 1000, 0); + } + + return result_column; + } + + ColumnPtr executeDateTime64(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const + { + auto result_column = ColumnDateTime64::create(input_rows_count, DataTypeDateTime64::default_scale); + auto & result_data = result_column->getData(); + + auto input_column = arguments[0].column->convertToFullColumnIfConst(); + const auto & input_column_data = typeid_cast(*input_column).getData(); + + auto new_value_column = castColumn(arguments[1], std::make_shared()); + new_value_column = new_value_column->convertToFullColumnIfConst(); + const auto & new_value_column_data = typeid_cast(*new_value_column).getData(); + + const auto scale = typeid_cast(*result_type).getScale(); + const auto & date_lut = typeid_cast(*result_type).getTimeZone(); + Int64 deg = 1; + for (size_t i = 0; i < scale; ++i) { + deg *= 10; + } + + for (size_t i = 0; i < input_rows_count; ++i) + { + Int64 time = date_lut.toNumYYYYMMDDhhmmss(input_column_data[i] / deg); + Int64 fraction = input_column_data[i] % deg; + + result_data[i] = getChangedDate(time, new_value_column_data[i], result_type, date_lut, deg, fraction); + } + + return result_column; + } + + Int64 getChangedDate(Int64 time, Float32 new_value, const DataTypePtr & result_type, const DateLUTImpl & date_lut, Int64 deg = 0, Int64 fraction = 0) const + { + auto year = time / 10'000'000'000; + auto month = (time % 10'000'000'000) / 100'000'000; + auto day = (time % 100'000'000) / 1000'000; + auto hours = (time % 1000'000) / 10'000; + auto minutes = (time % 10'000) / 100; + auto seconds = time % 100; + + Int64 min_date, max_date; + Int16 min_year, max_year; + if (isDate(result_type)) + { + min_date = date_lut.makeDayNum(1970, 1, 1); + max_date = date_lut.makeDayNum(2149, 6, 6); + min_year = 1970; + max_year = 2149; + } + else if (isDate32(result_type)) + { + min_date = date_lut.makeDayNum(1900, 1, 1); + max_date = date_lut.makeDayNum(2299, 12, 31); + min_year = 1900; + max_year = 2299; + } + else if (isDateTime(result_type)) + { + min_date = 0; + max_date = 0x0ffffffffll; + min_year = 1970; + max_year = 2106; + } + else + { + min_date = date_lut.makeDateTime(1900, 1, 1, 0,0 , 0) * deg; + max_date = date_lut.makeDateTime(2299, 12, 31, 23, 59, 59) * deg + (deg - 1); + min_year = 1900; + max_year = 2299; + } + + Int8 fl = 0; + + switch (Traits::EnumName) + { + case CHANGE_YEAR: + if (new_value < min_year) + fl = 1; + else if (new_value > max_year) + fl = 2; + year = static_cast(new_value); + break; + case CHANGE_MONTH: + if (new_value < 1 || new_value > 12) + fl = 1; + month = static_cast(new_value); + break; + case CHANGE_DAY: + if (new_value < 1 || new_value > 31) + fl = 1; + day = static_cast(new_value); + break; + case CHANGE_HOUR: + if (new_value < 0 || new_value > 23) + fl = 1; + hours = static_cast(new_value); + break; + case CHANGE_MINUTE: + if (new_value < 0 || new_value > 59) + fl = 1; + minutes = static_cast(new_value); + break; + case CHANGE_SECOND: + if (new_value < 0 || new_value > 59) + fl = 1; + seconds = static_cast(new_value); + break; + } + + if (fl == 1) + return min_date; + + if (fl == 2) + return max_date; + + Int64 result; + if (isDateOrDate32(result_type)) + result = date_lut.makeDayNum(year, month, day); + else + { + if (isDateTime(result_type)) + result = date_lut.makeDateTime(year, month, day, hours, minutes, seconds); + else + result = date_lut.makeDateTime(year, month, day, hours, minutes, seconds) * deg + fraction; + } + + + if (result > max_date) + return max_date; + + return result; + } +}; + + +struct ChangeYearTraits +{ + static constexpr auto Name = "changeYear"; + static constexpr auto EnumName = CHANGE_YEAR; +}; + +struct ChangeMonthTraits +{ + static constexpr auto Name = "changeMonth"; + static constexpr auto EnumName = CHANGE_MONTH; +}; + +struct ChangeDayTraits +{ + static constexpr auto Name = "changeDay"; + static constexpr auto EnumName = CHANGE_DAY; +}; + +struct ChangeHourTraits +{ + static constexpr auto Name = "changeHour"; + static constexpr auto EnumName = CHANGE_HOUR; +}; + +struct ChangeMinuteTraits +{ + static constexpr auto Name = "changeMinute"; + static constexpr auto EnumName = CHANGE_MINUTE; +}; + +struct ChangeSecondTraits +{ + static constexpr auto Name = "changeSecond"; + static constexpr auto EnumName = CHANGE_SECOND; +}; + + +} + +REGISTER_FUNCTION(ChangeDate) +{ + factory.registerFunction>(); + factory.registerFunction>(); + factory.registerFunction>(); + factory.registerFunction>(); + factory.registerFunction>(); + factory.registerFunction>(); +} + +} diff --git a/tests/queries/0_stateless/02982_changeDate.reference b/tests/queries/0_stateless/02982_changeDate.reference new file mode 100644 index 00000000000..d7d4edf4b43 --- /dev/null +++ b/tests/queries/0_stateless/02982_changeDate.reference @@ -0,0 +1,36 @@ +2000-01-01 +2001-01-01 +2002-01-01 11:22:33 +2003-01-01 11:22:33.4444 +1970-02-01 +1970-03-01 +1970-04-01 11:22:33 +1970-05-01 11:22:33.4444 +1970-01-02 +1970-01-03 +1970-01-04 11:22:33 +1970-01-05 11:22:33.4444 +1970-01-01 12:00:00 +1970-01-01 13:00:00.000 +1970-01-01 14:22:33 +1970-01-01 15:22:33.4444 +1970-01-01 00:23:00 +1970-01-01 00:24:00.000 +1970-01-01 11:25:33 +1970-01-01 11:26:33.4444 +1970-01-01 00:00:34 +1970-01-01 00:00:35.000 +1970-01-01 11:22:36 +1970-01-01 11:22:37.4444 +1970-01-01 +2149-06-06 +2149-06-06 +1970-01-01 +1970-01-01 +1970-01-01 +1970-01-01 00:00:00 +1970-01-01 00:00:00 +1970-01-01 00:00:00 +1970-01-01 00:00:00 +1970-01-01 00:00:00 +1970-01-01 00:00:00 diff --git a/tests/queries/0_stateless/02982_changeDate.sql b/tests/queries/0_stateless/02982_changeDate.sql new file mode 100644 index 00000000000..fbfe3771b33 --- /dev/null +++ b/tests/queries/0_stateless/02982_changeDate.sql @@ -0,0 +1,42 @@ +SELECT changeYear(makeDate(1970, 01, 01), 2000); +SELECT changeYear(makeDate32(1970, 01, 01), 2001); +SELECT changeYear(makeDateTime(1970, 01, 01, 11, 22, 33), 2002); +SELECT changeYear(makeDateTime64(1970, 01, 01, 11, 22, 33, 4444, 4), 2003); + +SELECT changeMonth(makeDate(1970, 01, 01), 02); +SELECT changeMonth(makeDate32(1970, 01, 01), 03); +SELECT changeMonth(makeDateTime(1970, 01, 01, 11, 22, 33), 04); +SELECT changeMonth(makeDateTime64(1970, 01, 01, 11, 22, 33, 4444, 4), 05); + +SELECT changeDay(makeDate(1970, 01, 01), 02); +SELECT changeDay(makeDate32(1970, 01, 01), 03); +SELECT changeDay(makeDateTime(1970, 01, 01, 11, 22, 33), 04); +SELECT changeDay(makeDateTime64(1970, 01, 01, 11, 22, 33, 4444, 4), 05); + +SELECT changeHour(makeDate(1970, 01, 01), 12); +SELECT changeHour(makeDate32(1970, 01, 01), 13); +SELECT changeHour(makeDateTime(1970, 01, 01, 11, 22, 33), 14); +SELECT changeHour(makeDateTime64(1970, 01, 01, 11, 22, 33, 4444, 4), 15); + +SELECT changeMinute(makeDate(1970, 01, 01), 23); +SELECT changeMinute(makeDate32(1970, 01, 01), 24); +SELECT changeMinute(makeDateTime(1970, 01, 01, 11, 22, 33), 25); +SELECT changeMinute(makeDateTime64(1970, 01, 01, 11, 22, 33, 4444, 4), 26); + +SELECT changeSecond(makeDate(1970, 01, 01), 34); +SELECT changeSecond(makeDate32(1970, 01, 01), 35); +SELECT changeSecond(makeDateTime(1970, 01, 01, 11, 22, 33), 36); +SELECT changeSecond(makeDateTime64(1970, 01, 01, 11, 22, 33, 4444, 4), 37); + +SELECT changeYear(makeDate(2000, 01, 01), 1969.0); +SELECT changeYear(makeDate(2000, 06, 07), 2149.0); +SELECT changeMonth(makeDate(2149, 01, 01), 07); +SELECT changeMonth(makeDate(2000, 06, 07), 13); +SELECT changeDay(makeDate(2000, 01, 01), 0); +SELECT changeDay(makeDate(2000, 06, 07), 32); +SELECT changeHour(makeDate(2000, 01, 01), -1); +SELECT changeHour(makeDate(2000, 06, 07), 24); +SELECT changeMinute(makeDate(2000, 01, 01), -1); +SELECT changeMinute(makeDate(2000, 06, 07), 60); +SELECT changeSecond(makeDate(2000, 01, 01), -1); +SELECT changeSecond(makeDate(2000, 06, 07), 60); \ No newline at end of file From 8f11262666132a9a940c2a10e67c5be8f0151c3a Mon Sep 17 00:00:00 2001 From: Maksim Galkin Date: Tue, 30 Apr 2024 13:36:36 +0000 Subject: [PATCH 003/299] ome style changes --- src/Functions/changeDate.cpp | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/src/Functions/changeDate.cpp b/src/Functions/changeDate.cpp index 14f6887af47..4edc31384c3 100644 --- a/src/Functions/changeDate.cpp +++ b/src/Functions/changeDate.cpp @@ -25,10 +25,6 @@ namespace DB { -namespace ErrorCodes -{ - extern const int ARGUMENT_OUT_OF_BOUND; -} namespace { @@ -171,7 +167,8 @@ public: const auto scale = typeid_cast(*result_type).getScale(); const auto & date_lut = typeid_cast(*result_type).getTimeZone(); Int64 deg = 1; - for (size_t i = 0; i < scale; ++i) { + for (size_t i = 0; i < scale; ++i) + { deg *= 10; } @@ -266,7 +263,7 @@ public: if (fl == 1) return min_date; - + if (fl == 2) return max_date; @@ -280,7 +277,7 @@ public: else result = date_lut.makeDateTime(year, month, day, hours, minutes, seconds) * deg + fraction; } - + if (result > max_date) return max_date; From 32f267999da8e9edd68c843431d7a1ae26c7250e Mon Sep 17 00:00:00 2001 From: Maksim Galkin Date: Sat, 4 May 2024 21:28:32 +0000 Subject: [PATCH 004/299] changeDate implementation fixes --- src/DataTypes/DataTypeDate.h | 1 + src/DataTypes/DataTypeDate32.h | 1 + src/DataTypes/DataTypeDateTime.h | 1 + src/Functions/changeDate.cpp | 296 +++++++++++++++---------------- 4 files changed, 143 insertions(+), 156 deletions(-) diff --git a/src/DataTypes/DataTypeDate.h b/src/DataTypes/DataTypeDate.h index 0e08b9ba2ca..72b7ef2509f 100644 --- a/src/DataTypes/DataTypeDate.h +++ b/src/DataTypes/DataTypeDate.h @@ -10,6 +10,7 @@ class DataTypeDate final : public DataTypeNumberBase { public: static constexpr auto family_name = "Date"; + static constexpr auto type_id = TypeIndex::Date; TypeIndex getTypeId() const override { return TypeIndex::Date; } TypeIndex getColumnType() const override { return TypeIndex::UInt16; } diff --git a/src/DataTypes/DataTypeDate32.h b/src/DataTypes/DataTypeDate32.h index 02e818f10df..052bb39fc31 100644 --- a/src/DataTypes/DataTypeDate32.h +++ b/src/DataTypes/DataTypeDate32.h @@ -10,6 +10,7 @@ class DataTypeDate32 final : public DataTypeNumberBase { public: static constexpr auto family_name = "Date32"; + static constexpr auto type_id = TypeIndex::Date32; TypeIndex getTypeId() const override { return TypeIndex::Date32; } TypeIndex getColumnType() const override { return TypeIndex::Int32; } diff --git a/src/DataTypes/DataTypeDateTime.h b/src/DataTypes/DataTypeDateTime.h index 5519240dee1..3b1212d910d 100644 --- a/src/DataTypes/DataTypeDateTime.h +++ b/src/DataTypes/DataTypeDateTime.h @@ -36,6 +36,7 @@ public: explicit DataTypeDateTime(const TimezoneMixin & time_zone); static constexpr auto family_name = "DateTime"; + static constexpr auto type_id = TypeIndex::DateTime; const char * getFamilyName() const override { return family_name; } String doGetName() const override; diff --git a/src/Functions/changeDate.cpp b/src/Functions/changeDate.cpp index 4edc31384c3..8a5d0a87ca2 100644 --- a/src/Functions/changeDate.cpp +++ b/src/Functions/changeDate.cpp @@ -10,26 +10,23 @@ #include #include #include -#include #include - #include "Common/DateLUTImpl.h" #include #include #include "Columns/IColumn.h" #include "DataTypes/IDataType.h" -#include "base/DayNum.h" - + #include #include - + namespace DB { - + namespace { - -enum ChangeDateFunctionsNames + +enum class ChangeDateFunctionsNames { CHANGE_YEAR = 0, CHANGE_MONTH = 1, @@ -38,160 +35,161 @@ enum ChangeDateFunctionsNames CHANGE_MINUTE = 4, CHANGE_SECOND = 5 }; - + +constexpr bool isTimeChange(const ChangeDateFunctionsNames & type) +{ + return type == ChangeDateFunctionsNames::CHANGE_HOUR || + type == ChangeDateFunctionsNames::CHANGE_MINUTE || + type == ChangeDateFunctionsNames::CHANGE_SECOND; +} + +template +constexpr bool isDate32() +{ + return DataType::type_id == TypeIndex::Date32; +} + +template +constexpr bool isDateTime64() +{ + return DataType::type_id == TypeIndex::DateTime64; +} + template class FunctionChangeDate : public IFunction { public: static constexpr auto name = Traits::Name; - + static constexpr std::array mandatory_argument_names = {"date", "new_value"}; - + String getName() const override { return name; } - + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } - + size_t getNumberOfArguments() const override { return mandatory_argument_names.size(); } - + static FunctionPtr create(ContextPtr) { return std::make_shared(); } - + DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { FunctionArgumentDescriptors args{ - {mandatory_argument_names[0], &isDateOrDate32OrDateTimeOrDateTime64, nullptr, "Date"}, + {mandatory_argument_names[0], &isDateOrDate32OrDateTimeOrDateTime64, nullptr, "Date(32) or DateTime(64)"}, {mandatory_argument_names[1], &isNumber, nullptr, "Number"} }; validateFunctionArgumentTypes(*this, arguments, args); - - if (Traits::EnumName >= 3) + + if (isTimeChange(Traits::EnumName)) { if (isDate(arguments[0].type)) return std::make_shared(); if (isDate32(arguments[0].type)) return std::make_shared(DataTypeDateTime64::default_scale); } - + return arguments[0].type; } - + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override { const auto & input_type = arguments[0].type; if (isDate(input_type)) { - if (Traits::EnumName >= 3) + if (isTimeChange(Traits::EnumName)) return execute(arguments, input_type, result_type, input_rows_count); return execute(arguments, input_type, result_type, input_rows_count); } if (isDate32(input_type)) { - if (Traits::EnumName >= 3) - return executeDate32ToDateTime64(arguments, result_type, input_rows_count); + if (isTimeChange(Traits::EnumName)) + return execute(arguments, input_type, result_type, input_rows_count); return execute(arguments, input_type, result_type, input_rows_count); } if (isDateTime(input_type)) return execute(arguments, input_type, result_type, input_rows_count); - return executeDateTime64(arguments, result_type, input_rows_count); + return execute(arguments, input_type, result_type, input_rows_count); } - - + + template ColumnPtr execute(const ColumnsWithTypeAndName & arguments, const DataTypePtr & input_type, const DataTypePtr & result_type, size_t input_rows_count) const { - const auto & date_lut = DateLUT::instance(); + bool is_const = (isColumnConst(*arguments[0].column) && isColumnConst(*arguments[1].column)); + size_t result_rows_count = (is_const ? 1 : input_rows_count); - auto result_column = ResultDataType::ColumnType::create(input_rows_count); + typename ResultDataType::ColumnType::MutablePtr result_column; + if constexpr (isDateTime64()) + { + auto scale = DataTypeDateTime64::default_scale; + if constexpr (isDateTime64()) + scale = typeid_cast(*result_type).getScale(); + result_column = ResultDataType::ColumnType::create(result_rows_count, scale); + } + else + result_column = ResultDataType::ColumnType::create(result_rows_count); + auto & result_data = result_column->getData(); - - auto input_column = castColumn(arguments[0], std::make_shared()); - input_column = input_column->convertToFullColumnIfConst(); + + auto input_column = arguments[0].column->convertToFullIfNeeded(); const auto & input_column_data = typeid_cast(*input_column).getData(); - - auto new_value_column = castColumn(arguments[1], std::make_shared()); - new_value_column = new_value_column->convertToFullColumnIfConst(); - const auto & new_value_column_data = typeid_cast(*new_value_column).getData(); - - for (size_t i = 0; i < input_rows_count; ++i) + + auto new_value_column = castColumn(arguments[1], std::make_shared()); + new_value_column = new_value_column->convertToFullIfNeeded(); + const auto & new_value_column_data = typeid_cast(*new_value_column).getData(); + + for (size_t i = 0; i < result_rows_count; ++i) { - Int64 time; - if (isDateOrDate32(input_type)) - time = static_cast(date_lut.toNumYYYYMMDD(DayNum(input_column_data[i]))) * 1000'000; + if constexpr (isDateTime64()) + { + const auto scale = typeid_cast(*result_type).getScale(); + const auto & date_lut = typeid_cast(*result_type).getTimeZone(); + Int64 deg = 1; + for (size_t j = 0; j < scale; ++j) + deg *= 10; + + Int64 time = date_lut.toNumYYYYMMDDhhmmss(input_column_data[i] / deg); + Int64 fraction = input_column_data[i] % deg; + + result_data[i] = getChangedDate(time, new_value_column_data[i], result_type, date_lut, deg, fraction); + } + else if constexpr (isDate32() && isDateTime64()) + { + const auto & date_lut = DateLUT::instance(); + Int64 time = static_cast(date_lut.toNumYYYYMMDD(DayNum(input_column_data[i]))) * 1'000'000; + + result_data[i] = getChangedDate(time, new_value_column_data[i], result_type, date_lut, 1'000, 0); + } else - time = date_lut.toNumYYYYMMDDhhmmss(input_column_data[i]); - - if (isDateOrDate32(result_type)) - result_data[i] = static_cast(getChangedDate(time, new_value_column_data[i], result_type, date_lut)); - else - result_data[i] = static_cast(getChangedDate(time, new_value_column_data[i], result_type, date_lut)); + { + const auto & date_lut = DateLUT::instance(); + Int64 time; + if (isDateOrDate32(input_type)) + time = static_cast(date_lut.toNumYYYYMMDD(DayNum(input_column_data[i]))) * 1'000'000; + else + time = date_lut.toNumYYYYMMDDhhmmss(input_column_data[i]); + + if (isDateOrDate32(result_type)) + result_data[i] = static_cast(getChangedDate(time, new_value_column_data[i], result_type, date_lut)); + else + result_data[i] = static_cast(getChangedDate(time, new_value_column_data[i], result_type, date_lut, 1, 0)); + } } + if (is_const) + return ColumnConst::create(std::move(result_column), input_rows_count); + return result_column; } - - ColumnPtr executeDate32ToDateTime64(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const - { - const auto & date_lut = DateLUT::instance(); - - auto result_column = ColumnDateTime64::create(input_rows_count, DataTypeDateTime64::default_scale); - auto & result_data = result_column->getData(); - - auto input_column = arguments[0].column->convertToFullColumnIfConst(); - const auto & input_column_data = typeid_cast(*input_column).getData(); - - auto new_value_column = castColumn(arguments[1], std::make_shared()); - new_value_column = new_value_column->convertToFullColumnIfConst(); - const auto & new_value_column_data = typeid_cast(*new_value_column).getData(); - - for (size_t i = 0; i < input_rows_count; ++i) - { - Int64 time = static_cast(date_lut.toNumYYYYMMDD(DayNum(input_column_data[i]))) * 1000'000; - - result_data[i] = getChangedDate(time, new_value_column_data[i], result_type, date_lut, 1000, 0); - } - - return result_column; - } - - ColumnPtr executeDateTime64(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const - { - auto result_column = ColumnDateTime64::create(input_rows_count, DataTypeDateTime64::default_scale); - auto & result_data = result_column->getData(); - - auto input_column = arguments[0].column->convertToFullColumnIfConst(); - const auto & input_column_data = typeid_cast(*input_column).getData(); - - auto new_value_column = castColumn(arguments[1], std::make_shared()); - new_value_column = new_value_column->convertToFullColumnIfConst(); - const auto & new_value_column_data = typeid_cast(*new_value_column).getData(); - - const auto scale = typeid_cast(*result_type).getScale(); - const auto & date_lut = typeid_cast(*result_type).getTimeZone(); - Int64 deg = 1; - for (size_t i = 0; i < scale; ++i) - { - deg *= 10; - } - - for (size_t i = 0; i < input_rows_count; ++i) - { - Int64 time = date_lut.toNumYYYYMMDDhhmmss(input_column_data[i] / deg); - Int64 fraction = input_column_data[i] % deg; - - result_data[i] = getChangedDate(time, new_value_column_data[i], result_type, date_lut, deg, fraction); - } - - return result_column; - } - - Int64 getChangedDate(Int64 time, Float32 new_value, const DataTypePtr & result_type, const DateLUTImpl & date_lut, Int64 deg = 0, Int64 fraction = 0) const + + Int64 getChangedDate(Int64 time, Float64 new_value, const DataTypePtr & result_type, const DateLUTImpl & date_lut, Int64 deg = 0, Int64 fraction = 0) const { auto year = time / 10'000'000'000; auto month = (time % 10'000'000'000) / 100'000'000; - auto day = (time % 100'000'000) / 1000'000; - auto hours = (time % 1000'000) / 10'000; + auto day = (time % 100'000'000) / 1'000'000; + auto hours = (time % 1'000'000) / 10'000; auto minutes = (time % 10'000) / 100; auto seconds = time % 100; - + Int64 min_date, max_date; Int16 min_year, max_year; if (isDate(result_type)) @@ -217,115 +215,101 @@ public: } else { - min_date = date_lut.makeDateTime(1900, 1, 1, 0,0 , 0) * deg; + min_date = date_lut.makeDateTime(1900, 1, 1, 0, 0, 0) * deg; max_date = date_lut.makeDateTime(2299, 12, 31, 23, 59, 59) * deg + (deg - 1); min_year = 1900; max_year = 2299; } - - Int8 fl = 0; - + switch (Traits::EnumName) { - case CHANGE_YEAR: + case ChangeDateFunctionsNames::CHANGE_YEAR: if (new_value < min_year) - fl = 1; + return min_date; else if (new_value > max_year) - fl = 2; + return max_date; year = static_cast(new_value); break; - case CHANGE_MONTH: + case ChangeDateFunctionsNames::CHANGE_MONTH: if (new_value < 1 || new_value > 12) - fl = 1; + return min_date; month = static_cast(new_value); break; - case CHANGE_DAY: + case ChangeDateFunctionsNames::CHANGE_DAY: if (new_value < 1 || new_value > 31) - fl = 1; + return min_date; day = static_cast(new_value); break; - case CHANGE_HOUR: + case ChangeDateFunctionsNames::CHANGE_HOUR: if (new_value < 0 || new_value > 23) - fl = 1; + return min_date; hours = static_cast(new_value); break; - case CHANGE_MINUTE: + case ChangeDateFunctionsNames::CHANGE_MINUTE: if (new_value < 0 || new_value > 59) - fl = 1; + return min_date; minutes = static_cast(new_value); break; - case CHANGE_SECOND: + case ChangeDateFunctionsNames::CHANGE_SECOND: if (new_value < 0 || new_value > 59) - fl = 1; + return min_date; seconds = static_cast(new_value); break; } - - if (fl == 1) - return min_date; - - if (fl == 2) - return max_date; - + Int64 result; if (isDateOrDate32(result_type)) result = date_lut.makeDayNum(year, month, day); else - { - if (isDateTime(result_type)) - result = date_lut.makeDateTime(year, month, day, hours, minutes, seconds); - else - result = date_lut.makeDateTime(year, month, day, hours, minutes, seconds) * deg + fraction; - } - - + result = date_lut.makeDateTime(year, month, day, hours, minutes, seconds) * deg + fraction; + if (result > max_date) return max_date; - + return result; } }; - - + + struct ChangeYearTraits { static constexpr auto Name = "changeYear"; - static constexpr auto EnumName = CHANGE_YEAR; + static constexpr auto EnumName = ChangeDateFunctionsNames::CHANGE_YEAR; }; - + struct ChangeMonthTraits { static constexpr auto Name = "changeMonth"; - static constexpr auto EnumName = CHANGE_MONTH; + static constexpr auto EnumName = ChangeDateFunctionsNames::CHANGE_MONTH; }; - + struct ChangeDayTraits { static constexpr auto Name = "changeDay"; - static constexpr auto EnumName = CHANGE_DAY; + static constexpr auto EnumName = ChangeDateFunctionsNames::CHANGE_DAY; }; - + struct ChangeHourTraits { static constexpr auto Name = "changeHour"; - static constexpr auto EnumName = CHANGE_HOUR; + static constexpr auto EnumName = ChangeDateFunctionsNames::CHANGE_HOUR; }; - + struct ChangeMinuteTraits { static constexpr auto Name = "changeMinute"; - static constexpr auto EnumName = CHANGE_MINUTE; + static constexpr auto EnumName = ChangeDateFunctionsNames::CHANGE_MINUTE; }; - + struct ChangeSecondTraits { static constexpr auto Name = "changeSecond"; - static constexpr auto EnumName = CHANGE_SECOND; + static constexpr auto EnumName = ChangeDateFunctionsNames::CHANGE_SECOND; }; - - + + } - + REGISTER_FUNCTION(ChangeDate) { factory.registerFunction>(); @@ -335,5 +319,5 @@ REGISTER_FUNCTION(ChangeDate) factory.registerFunction>(); factory.registerFunction>(); } - + } From fb6c931262c106bf3e076949fc47a28cf06181ba Mon Sep 17 00:00:00 2001 From: Maksim Galkin Date: Sat, 4 May 2024 22:49:10 +0000 Subject: [PATCH 005/299] Style fixes --- src/Functions/changeDate.cpp | 108 ++++++++++++++++++----------------- 1 file changed, 57 insertions(+), 51 deletions(-) diff --git a/src/Functions/changeDate.cpp b/src/Functions/changeDate.cpp index 8a5d0a87ca2..ed6b3255cfc 100644 --- a/src/Functions/changeDate.cpp +++ b/src/Functions/changeDate.cpp @@ -12,20 +12,27 @@ #include #include #include "Common/DateLUTImpl.h" +#include "Common/Exception.h" #include #include #include "Columns/IColumn.h" #include "DataTypes/IDataType.h" - + #include #include - + namespace DB { - + +namespace ErrorCodes +{ + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; + extern const int ILLEGAL_TYPE_OF_ARGUMENT; +} + namespace { - + enum class ChangeDateFunctionsNames { CHANGE_YEAR = 0, @@ -35,50 +42,50 @@ enum class ChangeDateFunctionsNames CHANGE_MINUTE = 4, CHANGE_SECOND = 5 }; - + constexpr bool isTimeChange(const ChangeDateFunctionsNames & type) { return type == ChangeDateFunctionsNames::CHANGE_HOUR || type == ChangeDateFunctionsNames::CHANGE_MINUTE || type == ChangeDateFunctionsNames::CHANGE_SECOND; } - + template constexpr bool isDate32() { return DataType::type_id == TypeIndex::Date32; } - + template constexpr bool isDateTime64() { return DataType::type_id == TypeIndex::DateTime64; } - + template class FunctionChangeDate : public IFunction { public: static constexpr auto name = Traits::Name; - + static constexpr std::array mandatory_argument_names = {"date", "new_value"}; - + String getName() const override { return name; } - + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } - + size_t getNumberOfArguments() const override { return mandatory_argument_names.size(); } - + static FunctionPtr create(ContextPtr) { return std::make_shared(); } - + DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { - FunctionArgumentDescriptors args{ - {mandatory_argument_names[0], &isDateOrDate32OrDateTimeOrDateTime64, nullptr, "Date(32) or DateTime(64)"}, - {mandatory_argument_names[1], &isNumber, nullptr, "Number"} - }; - validateFunctionArgumentTypes(*this, arguments, args); - + if (arguments.size() != 2) + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Function {} requires 2 parameters: date, new_value. Passed {}.", getName(), arguments.size()); + + if (!isDateOrDate32OrDateTimeOrDateTime64(*arguments[0].type) || !isNumber(*arguments[1].type)) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "First argument for function {} must be Date(32) or DateTime(64), second - numeric", getName()); + if (isTimeChange(Traits::EnumName)) { if (isDate(arguments[0].type)) @@ -86,10 +93,10 @@ public: if (isDate32(arguments[0].type)) return std::make_shared(DataTypeDateTime64::default_scale); } - + return arguments[0].type; } - + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override { const auto & input_type = arguments[0].type; @@ -109,8 +116,7 @@ public: return execute(arguments, input_type, result_type, input_rows_count); return execute(arguments, input_type, result_type, input_rows_count); } - - + template ColumnPtr execute(const ColumnsWithTypeAndName & arguments, const DataTypePtr & input_type, const DataTypePtr & result_type, size_t input_rows_count) const { @@ -127,16 +133,16 @@ public: } else result_column = ResultDataType::ColumnType::create(result_rows_count); - + auto & result_data = result_column->getData(); - + auto input_column = arguments[0].column->convertToFullIfNeeded(); const auto & input_column_data = typeid_cast(*input_column).getData(); - + auto new_value_column = castColumn(arguments[1], std::make_shared()); new_value_column = new_value_column->convertToFullIfNeeded(); - const auto & new_value_column_data = typeid_cast(*new_value_column).getData(); - + const auto & new_value_column_data = typeid_cast(*new_value_column).getData(); + for (size_t i = 0; i < result_rows_count; ++i) { if constexpr (isDateTime64()) @@ -146,17 +152,17 @@ public: Int64 deg = 1; for (size_t j = 0; j < scale; ++j) deg *= 10; - + Int64 time = date_lut.toNumYYYYMMDDhhmmss(input_column_data[i] / deg); Int64 fraction = input_column_data[i] % deg; - + result_data[i] = getChangedDate(time, new_value_column_data[i], result_type, date_lut, deg, fraction); } else if constexpr (isDate32() && isDateTime64()) { const auto & date_lut = DateLUT::instance(); Int64 time = static_cast(date_lut.toNumYYYYMMDD(DayNum(input_column_data[i]))) * 1'000'000; - + result_data[i] = getChangedDate(time, new_value_column_data[i], result_type, date_lut, 1'000, 0); } else @@ -167,7 +173,7 @@ public: time = static_cast(date_lut.toNumYYYYMMDD(DayNum(input_column_data[i]))) * 1'000'000; else time = date_lut.toNumYYYYMMDDhhmmss(input_column_data[i]); - + if (isDateOrDate32(result_type)) result_data[i] = static_cast(getChangedDate(time, new_value_column_data[i], result_type, date_lut)); else @@ -177,10 +183,10 @@ public: if (is_const) return ColumnConst::create(std::move(result_column), input_rows_count); - + return result_column; } - + Int64 getChangedDate(Int64 time, Float64 new_value, const DataTypePtr & result_type, const DateLUTImpl & date_lut, Int64 deg = 0, Int64 fraction = 0) const { auto year = time / 10'000'000'000; @@ -189,7 +195,7 @@ public: auto hours = (time % 1'000'000) / 10'000; auto minutes = (time % 10'000) / 100; auto seconds = time % 100; - + Int64 min_date, max_date; Int16 min_year, max_year; if (isDate(result_type)) @@ -220,7 +226,7 @@ public: min_year = 1900; max_year = 2299; } - + switch (Traits::EnumName) { case ChangeDateFunctionsNames::CHANGE_YEAR: @@ -256,60 +262,60 @@ public: seconds = static_cast(new_value); break; } - + Int64 result; if (isDateOrDate32(result_type)) result = date_lut.makeDayNum(year, month, day); else result = date_lut.makeDateTime(year, month, day, hours, minutes, seconds) * deg + fraction; - + if (result > max_date) return max_date; - + return result; } }; - - + + struct ChangeYearTraits { static constexpr auto Name = "changeYear"; static constexpr auto EnumName = ChangeDateFunctionsNames::CHANGE_YEAR; }; - + struct ChangeMonthTraits { static constexpr auto Name = "changeMonth"; static constexpr auto EnumName = ChangeDateFunctionsNames::CHANGE_MONTH; }; - + struct ChangeDayTraits { static constexpr auto Name = "changeDay"; static constexpr auto EnumName = ChangeDateFunctionsNames::CHANGE_DAY; }; - + struct ChangeHourTraits { static constexpr auto Name = "changeHour"; static constexpr auto EnumName = ChangeDateFunctionsNames::CHANGE_HOUR; }; - + struct ChangeMinuteTraits { static constexpr auto Name = "changeMinute"; static constexpr auto EnumName = ChangeDateFunctionsNames::CHANGE_MINUTE; }; - + struct ChangeSecondTraits { static constexpr auto Name = "changeSecond"; static constexpr auto EnumName = ChangeDateFunctionsNames::CHANGE_SECOND; }; - - + + } - + REGISTER_FUNCTION(ChangeDate) { factory.registerFunction>(); @@ -319,5 +325,5 @@ REGISTER_FUNCTION(ChangeDate) factory.registerFunction>(); factory.registerFunction>(); } - + } From f4990f26d914b078d92674061ec5427b3daa4ab7 Mon Sep 17 00:00:00 2001 From: Maksim Galkin Date: Wed, 8 May 2024 16:38:53 +0000 Subject: [PATCH 006/299] fixes --- src/Functions/changeDate.cpp | 39 +++++++++++++++++++++++++++--------- 1 file changed, 29 insertions(+), 10 deletions(-) diff --git a/src/Functions/changeDate.cpp b/src/Functions/changeDate.cpp index ed6b3255cfc..dfa93c9d218 100644 --- a/src/Functions/changeDate.cpp +++ b/src/Functions/changeDate.cpp @@ -156,28 +156,33 @@ public: Int64 time = date_lut.toNumYYYYMMDDhhmmss(input_column_data[i] / deg); Int64 fraction = input_column_data[i] % deg; - result_data[i] = getChangedDate(time, new_value_column_data[i], result_type, date_lut, deg, fraction); + result_data[i] = getChangedDate(time, new_value_column_data[i], result_type, date_lut, scale, fraction); } else if constexpr (isDate32() && isDateTime64()) { const auto & date_lut = DateLUT::instance(); - Int64 time = static_cast(date_lut.toNumYYYYMMDD(DayNum(input_column_data[i]))) * 1'000'000; + Int64 time = static_cast(date_lut.toNumYYYYMMDD(ExtendedDayNum(input_column_data[i]))) * 1'000'000; - result_data[i] = getChangedDate(time, new_value_column_data[i], result_type, date_lut, 1'000, 0); + result_data[i] = getChangedDate(time, new_value_column_data[i], result_type, date_lut, 3, 0); } else { const auto & date_lut = DateLUT::instance(); Int64 time; - if (isDateOrDate32(input_type)) + if (isDate(input_type)) time = static_cast(date_lut.toNumYYYYMMDD(DayNum(input_column_data[i]))) * 1'000'000; + else if (isDate32(input_type)) + time = static_cast(date_lut.toNumYYYYMMDD(ExtendedDayNum(input_column_data[i]))) * 1'000'000; else time = date_lut.toNumYYYYMMDDhhmmss(input_column_data[i]); - if (isDateOrDate32(result_type)) + + if (isDate(result_type)) + result_data[i] = static_cast(getChangedDate(time, new_value_column_data[i], result_type, date_lut)); + else if (isDate32(result_type)) result_data[i] = static_cast(getChangedDate(time, new_value_column_data[i], result_type, date_lut)); else - result_data[i] = static_cast(getChangedDate(time, new_value_column_data[i], result_type, date_lut, 1, 0)); + result_data[i] = static_cast(getChangedDate(time, new_value_column_data[i], result_type, date_lut)); } } @@ -187,7 +192,7 @@ public: return result_column; } - Int64 getChangedDate(Int64 time, Float64 new_value, const DataTypePtr & result_type, const DateLUTImpl & date_lut, Int64 deg = 0, Int64 fraction = 0) const + Int64 getChangedDate(Int64 time, Float64 new_value, const DataTypePtr & result_type, const DateLUTImpl & date_lut, Int64 scale = 0, Int64 fraction = 0) const { auto year = time / 10'000'000'000; auto month = (time % 10'000'000'000) / 100'000'000; @@ -221,8 +226,17 @@ public: } else { - min_date = date_lut.makeDateTime(1900, 1, 1, 0, 0, 0) * deg; - max_date = date_lut.makeDateTime(2299, 12, 31, 23, 59, 59) * deg + (deg - 1); + min_date = DecimalUtils::decimalFromComponents( + date_lut.makeDateTime(1900, 1, 1, 0, 0, 0), + static_cast(0), + static_cast(scale)); + Int64 deg = 1; + for (Int64 j = 0; j < scale; ++j) + deg *= 10; + max_date = DecimalUtils::decimalFromComponents( + date_lut.makeDateTime(2299, 12, 31, 23, 59, 59), + static_cast(deg - 1), + static_cast(scale)); min_year = 1900; max_year = 2299; } @@ -266,8 +280,13 @@ public: Int64 result; if (isDateOrDate32(result_type)) result = date_lut.makeDayNum(year, month, day); + else if (isDateTime(result_type)) + result = date_lut.makeDateTime(year, month, day, hours, minutes, seconds); else - result = date_lut.makeDateTime(year, month, day, hours, minutes, seconds) * deg + fraction; + result = DecimalUtils::decimalFromComponents( + date_lut.makeDateTime(year, month, day, hours, minutes, seconds), + static_cast(fraction), + static_cast(scale)); if (result > max_date) return max_date; From 2f8341d8c1686b9b73bfad36d12651d63b8ffd08 Mon Sep 17 00:00:00 2001 From: Maksim Galkin Date: Wed, 8 May 2024 16:52:36 +0000 Subject: [PATCH 007/299] style --- src/Functions/changeDate.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/Functions/changeDate.cpp b/src/Functions/changeDate.cpp index dfa93c9d218..2265ee1b26b 100644 --- a/src/Functions/changeDate.cpp +++ b/src/Functions/changeDate.cpp @@ -176,7 +176,6 @@ public: else time = date_lut.toNumYYYYMMDDhhmmss(input_column_data[i]); - if (isDate(result_type)) result_data[i] = static_cast(getChangedDate(time, new_value_column_data[i], result_type, date_lut)); else if (isDate32(result_type)) From c82687c9a61bd6f53949ac5310d26c9b410fabbf Mon Sep 17 00:00:00 2001 From: Maksim Galkin Date: Wed, 8 May 2024 22:17:59 +0000 Subject: [PATCH 008/299] fixes + functions docs --- src/Functions/changeDate.cpp | 82 ++++++++++++++++--- .../queries/0_stateless/02982_changeDate.sql | 24 +++--- 2 files changed, 82 insertions(+), 24 deletions(-) diff --git a/src/Functions/changeDate.cpp b/src/Functions/changeDate.cpp index 2265ee1b26b..73ac9eff867 100644 --- a/src/Functions/changeDate.cpp +++ b/src/Functions/changeDate.cpp @@ -56,6 +56,12 @@ constexpr bool isDate32() return DataType::type_id == TypeIndex::Date32; } +template +constexpr bool isDateTime() +{ + return DataType::type_id == TypeIndex::DateTime; +} + template constexpr bool isDateTime64() { @@ -86,7 +92,7 @@ public: if (!isDateOrDate32OrDateTimeOrDateTime64(*arguments[0].type) || !isNumber(*arguments[1].type)) throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "First argument for function {} must be Date(32) or DateTime(64), second - numeric", getName()); - if (isTimeChange(Traits::EnumName)) + if constexpr (isTimeChange(Traits::EnumName)) { if (isDate(arguments[0].type)) return std::make_shared(); @@ -102,13 +108,13 @@ public: const auto & input_type = arguments[0].type; if (isDate(input_type)) { - if (isTimeChange(Traits::EnumName)) + if constexpr (isTimeChange(Traits::EnumName)) return execute(arguments, input_type, result_type, input_rows_count); return execute(arguments, input_type, result_type, input_rows_count); } if (isDate32(input_type)) { - if (isTimeChange(Traits::EnumName)) + if constexpr (isTimeChange(Traits::EnumName)) return execute(arguments, input_type, result_type, input_rows_count); return execute(arguments, input_type, result_type, input_rows_count); } @@ -145,6 +151,7 @@ public: for (size_t i = 0; i < result_rows_count; ++i) { + if constexpr (isDateTime64()) { const auto scale = typeid_cast(*result_type).getScale(); @@ -165,16 +172,21 @@ public: result_data[i] = getChangedDate(time, new_value_column_data[i], result_type, date_lut, 3, 0); } + else if constexpr (isDateTime()) + { + const auto & date_lut = typeid_cast(*result_type).getTimeZone(); + Int64 time = date_lut.toNumYYYYMMDDhhmmss(input_column_data[i]); + + result_data[i] = static_cast(getChangedDate(time, new_value_column_data[i], result_type, date_lut)); + } else { const auto & date_lut = DateLUT::instance(); Int64 time; if (isDate(input_type)) time = static_cast(date_lut.toNumYYYYMMDD(DayNum(input_column_data[i]))) * 1'000'000; - else if (isDate32(input_type)) - time = static_cast(date_lut.toNumYYYYMMDD(ExtendedDayNum(input_column_data[i]))) * 1'000'000; else - time = date_lut.toNumYYYYMMDDhhmmss(input_column_data[i]); + time = static_cast(date_lut.toNumYYYYMMDD(ExtendedDayNum(input_column_data[i]))) * 1'000'000; if (isDate(result_type)) result_data[i] = static_cast(getChangedDate(time, new_value_column_data[i], result_type, date_lut)); @@ -336,12 +348,58 @@ struct ChangeSecondTraits REGISTER_FUNCTION(ChangeDate) { - factory.registerFunction>(); - factory.registerFunction>(); - factory.registerFunction>(); - factory.registerFunction>(); - factory.registerFunction>(); - factory.registerFunction>(); + factory.registerFunction>( + FunctionDocumentation{ + .description = R"( +Changes the year of the given Date(32) or DateTime(64). +Returns the same type as the input data. +)", + .categories{"Dates and Times"} + } + ); + factory.registerFunction>( + FunctionDocumentation{ + .description = R"( +Same as changeYear function, but changes month of the date. +)", + .categories{"Dates and Times"} + } + ); + factory.registerFunction>( + FunctionDocumentation{ + .description = R"( +Same as changeYear function, but changes day_of_month of the date. +)", + .categories{"Dates and Times"} + } + ); + factory.registerFunction>( + FunctionDocumentation{ + .description = R"( +Changes the hour of the given Date(32) or DateTime(64). +If the input data is Date, return DateTime; +if the input data is Date32, return DateTime64; +In other cases returns the same type as the input data. +)", + .categories{"Dates and Times"} + } + ); + factory.registerFunction>( + FunctionDocumentation{ + .description = R"( +Same as changeHour function, but changes minute of the date. +)", + .categories{"Dates and Times"} + } + ); + factory.registerFunction>( + FunctionDocumentation{ + .description = R"( +Same as changeHour function, but changes seconds of the date. +)", + .categories{"Dates and Times"} + } + ); } } diff --git a/tests/queries/0_stateless/02982_changeDate.sql b/tests/queries/0_stateless/02982_changeDate.sql index fbfe3771b33..9f7f5eacfc5 100644 --- a/tests/queries/0_stateless/02982_changeDate.sql +++ b/tests/queries/0_stateless/02982_changeDate.sql @@ -13,18 +13,18 @@ SELECT changeDay(makeDate32(1970, 01, 01), 03); SELECT changeDay(makeDateTime(1970, 01, 01, 11, 22, 33), 04); SELECT changeDay(makeDateTime64(1970, 01, 01, 11, 22, 33, 4444, 4), 05); -SELECT changeHour(makeDate(1970, 01, 01), 12); -SELECT changeHour(makeDate32(1970, 01, 01), 13); +SELECT toTimeZone(changeHour(makeDate(1970, 01, 01), 12), 'UTC'); +SELECT toTimeZone(changeHour(makeDate32(1970, 01, 01), 13), 'UTC'); SELECT changeHour(makeDateTime(1970, 01, 01, 11, 22, 33), 14); SELECT changeHour(makeDateTime64(1970, 01, 01, 11, 22, 33, 4444, 4), 15); -SELECT changeMinute(makeDate(1970, 01, 01), 23); -SELECT changeMinute(makeDate32(1970, 01, 01), 24); +SELECT toTimeZone(changeMinute(makeDate(1970, 01, 01), 23), 'UTC'); +SELECT toTimeZone(changeMinute(makeDate32(1970, 01, 01), 24), 'UTC'); SELECT changeMinute(makeDateTime(1970, 01, 01, 11, 22, 33), 25); SELECT changeMinute(makeDateTime64(1970, 01, 01, 11, 22, 33, 4444, 4), 26); -SELECT changeSecond(makeDate(1970, 01, 01), 34); -SELECT changeSecond(makeDate32(1970, 01, 01), 35); +SELECT toTimeZone(changeSecond(makeDate(1970, 01, 01), 34), 'UTC'); +SELECT toTimeZone(changeSecond(makeDate32(1970, 01, 01), 35), 'UTC'); SELECT changeSecond(makeDateTime(1970, 01, 01, 11, 22, 33), 36); SELECT changeSecond(makeDateTime64(1970, 01, 01, 11, 22, 33, 4444, 4), 37); @@ -34,9 +34,9 @@ SELECT changeMonth(makeDate(2149, 01, 01), 07); SELECT changeMonth(makeDate(2000, 06, 07), 13); SELECT changeDay(makeDate(2000, 01, 01), 0); SELECT changeDay(makeDate(2000, 06, 07), 32); -SELECT changeHour(makeDate(2000, 01, 01), -1); -SELECT changeHour(makeDate(2000, 06, 07), 24); -SELECT changeMinute(makeDate(2000, 01, 01), -1); -SELECT changeMinute(makeDate(2000, 06, 07), 60); -SELECT changeSecond(makeDate(2000, 01, 01), -1); -SELECT changeSecond(makeDate(2000, 06, 07), 60); \ No newline at end of file +SELECT toTimeZone(changeHour(makeDate(2000, 01, 01), -1), 'UTC'); +SELECT toTimeZone(changeHour(makeDate(2000, 06, 07), 24), 'UTC'); +SELECT toTimeZone(changeMinute(makeDate(2000, 01, 01), -1), 'UTC'); +SELECT toTimeZone(changeMinute(makeDate(2000, 06, 07), 60), 'UTC'); +SELECT toTimeZone(changeSecond(makeDate(2000, 01, 01), -1), 'UTC'); +SELECT toTimeZone(changeSecond(makeDate(2000, 06, 07), 60), 'UTC'); \ No newline at end of file From f52c24f753b35240d6d4925aaf582c9ebac829b2 Mon Sep 17 00:00:00 2001 From: Maksim Galkin Date: Thu, 9 May 2024 08:18:02 +0000 Subject: [PATCH 009/299] tests fixes --- src/Functions/changeDate.cpp | 1 - .../queries/0_stateless/02982_changeDate.sql | 72 +++++++++---------- 2 files changed, 36 insertions(+), 37 deletions(-) diff --git a/src/Functions/changeDate.cpp b/src/Functions/changeDate.cpp index 73ac9eff867..a1827e1d94a 100644 --- a/src/Functions/changeDate.cpp +++ b/src/Functions/changeDate.cpp @@ -151,7 +151,6 @@ public: for (size_t i = 0; i < result_rows_count; ++i) { - if constexpr (isDateTime64()) { const auto scale = typeid_cast(*result_type).getScale(); diff --git a/tests/queries/0_stateless/02982_changeDate.sql b/tests/queries/0_stateless/02982_changeDate.sql index 9f7f5eacfc5..f438212f9fa 100644 --- a/tests/queries/0_stateless/02982_changeDate.sql +++ b/tests/queries/0_stateless/02982_changeDate.sql @@ -1,42 +1,42 @@ -SELECT changeYear(makeDate(1970, 01, 01), 2000); -SELECT changeYear(makeDate32(1970, 01, 01), 2001); -SELECT changeYear(makeDateTime(1970, 01, 01, 11, 22, 33), 2002); -SELECT changeYear(makeDateTime64(1970, 01, 01, 11, 22, 33, 4444, 4), 2003); +SELECT changeYear(toDate('1970-01-01', 'UTC'), 2000); +SELECT changeYear(toDate32('1900-01-01', 'UTC'), 2001); +SELECT changeYear(toDateTime('1970-01-01 11:22:33', 'UTC'), 2002); +SELECT changeYear(toDateTime64('1900-01-01 11:22:33.4444', 4, 'UTC'), 2003); -SELECT changeMonth(makeDate(1970, 01, 01), 02); -SELECT changeMonth(makeDate32(1970, 01, 01), 03); -SELECT changeMonth(makeDateTime(1970, 01, 01, 11, 22, 33), 04); -SELECT changeMonth(makeDateTime64(1970, 01, 01, 11, 22, 33, 4444, 4), 05); +SELECT changeMonth(toDate('1970-01-01', 'UTC'), 02); +SELECT changeMonth(toDate32('1970-01-01', 'UTC'), 03); +SELECT changeMonth(toDateTime('1970-01-01 11:22:33', 'UTC'), 04); +SELECT changeMonth(toDateTime64('1970-01-01 11:22:33.4444', 4, 'UTC'), 05); -SELECT changeDay(makeDate(1970, 01, 01), 02); -SELECT changeDay(makeDate32(1970, 01, 01), 03); -SELECT changeDay(makeDateTime(1970, 01, 01, 11, 22, 33), 04); -SELECT changeDay(makeDateTime64(1970, 01, 01, 11, 22, 33, 4444, 4), 05); +SELECT changeDay(toDate('1970-01-01', 'UTC'), 02); +SELECT changeDay(toDate32('1970-01-01', 'UTC'), 03); +SELECT changeDay(toDateTime('1970-01-01 11:22:33', 'UTC'), 04); +SELECT changeDay(toDateTime64('1970-01-01 11:22:33.4444', 4, 'UTC'), 05); -SELECT toTimeZone(changeHour(makeDate(1970, 01, 01), 12), 'UTC'); -SELECT toTimeZone(changeHour(makeDate32(1970, 01, 01), 13), 'UTC'); -SELECT changeHour(makeDateTime(1970, 01, 01, 11, 22, 33), 14); -SELECT changeHour(makeDateTime64(1970, 01, 01, 11, 22, 33, 4444, 4), 15); +SELECT toTimeZone(changeHour(toDate('1970-01-01', 'UTC'), 12), 'UTC'); +SELECT toTimeZone(changeHour(toDate32('1970-01-01', 'UTC'), 13), 'UTC'); +SELECT changeHour(toDateTime('1970-01-01 11:22:33', 'UTC'), 14); +SELECT changeHour(toDateTime64('1970-01-01 11:22:33.4444', 4, 'UTC'), 15); -SELECT toTimeZone(changeMinute(makeDate(1970, 01, 01), 23), 'UTC'); -SELECT toTimeZone(changeMinute(makeDate32(1970, 01, 01), 24), 'UTC'); -SELECT changeMinute(makeDateTime(1970, 01, 01, 11, 22, 33), 25); -SELECT changeMinute(makeDateTime64(1970, 01, 01, 11, 22, 33, 4444, 4), 26); +SELECT toTimeZone(changeMinute(toDate('1970-01-01', 'UTC'), 23), 'UTC'); +SELECT toTimeZone(changeMinute(toDate32('1970-01-01', 'UTC'), 24), 'UTC'); +SELECT changeMinute(toDateTime('1970-01-01 11:22:33', 'UTC'), 25); +SELECT changeMinute(toDateTime64('1970-01-01 11:22:33.4444', 4, 'UTC'), 26); -SELECT toTimeZone(changeSecond(makeDate(1970, 01, 01), 34), 'UTC'); -SELECT toTimeZone(changeSecond(makeDate32(1970, 01, 01), 35), 'UTC'); -SELECT changeSecond(makeDateTime(1970, 01, 01, 11, 22, 33), 36); -SELECT changeSecond(makeDateTime64(1970, 01, 01, 11, 22, 33, 4444, 4), 37); +SELECT toTimeZone(changeSecond(toDate('1970-01-01', 'UTC'), 34), 'UTC'); +SELECT toTimeZone(changeSecond(toDate32('1970-01-01', 'UTC'), 35), 'UTC'); +SELECT changeSecond(toDateTime('1970-01-01 11:22:33', 'UTC'), 36); +SELECT changeSecond(toDateTime64('1970-01-01 11:22:33.4444', 4, 'UTC'), 37); -SELECT changeYear(makeDate(2000, 01, 01), 1969.0); -SELECT changeYear(makeDate(2000, 06, 07), 2149.0); -SELECT changeMonth(makeDate(2149, 01, 01), 07); -SELECT changeMonth(makeDate(2000, 06, 07), 13); -SELECT changeDay(makeDate(2000, 01, 01), 0); -SELECT changeDay(makeDate(2000, 06, 07), 32); -SELECT toTimeZone(changeHour(makeDate(2000, 01, 01), -1), 'UTC'); -SELECT toTimeZone(changeHour(makeDate(2000, 06, 07), 24), 'UTC'); -SELECT toTimeZone(changeMinute(makeDate(2000, 01, 01), -1), 'UTC'); -SELECT toTimeZone(changeMinute(makeDate(2000, 06, 07), 60), 'UTC'); -SELECT toTimeZone(changeSecond(makeDate(2000, 01, 01), -1), 'UTC'); -SELECT toTimeZone(changeSecond(makeDate(2000, 06, 07), 60), 'UTC'); \ No newline at end of file +SELECT changeYear(toDate('2000-01-01', 'UTC'), 1969.0); +SELECT changeYear(toDate('2000-06-07', 'UTC'), 2149.0); +SELECT changeMonth(toDate('2149-01-01', 'UTC'), 07); +SELECT changeMonth(toDate('2000-01-01', 'UTC'), 13); +SELECT changeDay(toDate('2000-01-01', 'UTC'), 0); +SELECT changeDay(toDate('2000-01-01', 'UTC'), 32); +SELECT toTimeZone(changeHour(toDate('2000-01-01', 'UTC'), -1), 'UTC'); +SELECT toTimeZone(changeHour(toDate('2000-01-01', 'UTC'), 24), 'UTC'); +SELECT toTimeZone(changeMinute(toDate('2000-01-01', 'UTC'), -1), 'UTC'); +SELECT toTimeZone(changeMinute(toDate('2000-01-01', 'UTC'), 60), 'UTC'); +SELECT toTimeZone(changeSecond(toDate('2000-01-01', 'UTC'), -1), 'UTC'); +SELECT toTimeZone(changeSecond(toDate('2000-01-01', 'UTC'), 60), 'UTC'); \ No newline at end of file From ff9c0934cecb5ee8b938adc92498cec035d2e26f Mon Sep 17 00:00:00 2001 From: Maksim Galkin Date: Thu, 9 May 2024 17:52:50 +0000 Subject: [PATCH 010/299] changes in tests and functions because of time zone --- src/Functions/changeDate.cpp | 30 +++++--- .../0_stateless/02982_changeDate.reference | 12 ++-- .../queries/0_stateless/02982_changeDate.sql | 72 +++++++++---------- 3 files changed, 64 insertions(+), 50 deletions(-) diff --git a/src/Functions/changeDate.cpp b/src/Functions/changeDate.cpp index a1827e1d94a..c7815263323 100644 --- a/src/Functions/changeDate.cpp +++ b/src/Functions/changeDate.cpp @@ -50,6 +50,12 @@ constexpr bool isTimeChange(const ChangeDateFunctionsNames & type) type == ChangeDateFunctionsNames::CHANGE_SECOND; } +template +constexpr bool isDate() +{ + return DataType::type_id == TypeIndex::Date; +} + template constexpr bool isDate32() { @@ -68,6 +74,7 @@ constexpr bool isDateTime64() return DataType::type_id == TypeIndex::DateTime64; } + template class FunctionChangeDate : public IFunction { @@ -162,21 +169,21 @@ public: Int64 time = date_lut.toNumYYYYMMDDhhmmss(input_column_data[i] / deg); Int64 fraction = input_column_data[i] % deg; - result_data[i] = getChangedDate(time, new_value_column_data[i], result_type, date_lut, scale, fraction); + result_data[i] = getChangedDate(time, new_value_column_data[i], input_type, result_type, date_lut, scale, fraction); } else if constexpr (isDate32() && isDateTime64()) { const auto & date_lut = DateLUT::instance(); Int64 time = static_cast(date_lut.toNumYYYYMMDD(ExtendedDayNum(input_column_data[i]))) * 1'000'000; - result_data[i] = getChangedDate(time, new_value_column_data[i], result_type, date_lut, 3, 0); + result_data[i] = getChangedDate(time, new_value_column_data[i], input_type, result_type, date_lut, 3, 0); } else if constexpr (isDateTime()) { const auto & date_lut = typeid_cast(*result_type).getTimeZone(); Int64 time = date_lut.toNumYYYYMMDDhhmmss(input_column_data[i]); - result_data[i] = static_cast(getChangedDate(time, new_value_column_data[i], result_type, date_lut)); + result_data[i] = static_cast(getChangedDate(time, new_value_column_data[i], input_type, result_type, date_lut)); } else { @@ -188,11 +195,11 @@ public: time = static_cast(date_lut.toNumYYYYMMDD(ExtendedDayNum(input_column_data[i]))) * 1'000'000; if (isDate(result_type)) - result_data[i] = static_cast(getChangedDate(time, new_value_column_data[i], result_type, date_lut)); + result_data[i] = static_cast(getChangedDate(time, new_value_column_data[i], input_type, result_type, date_lut)); else if (isDate32(result_type)) - result_data[i] = static_cast(getChangedDate(time, new_value_column_data[i], result_type, date_lut)); + result_data[i] = static_cast(getChangedDate(time, new_value_column_data[i], input_type, result_type, date_lut)); else - result_data[i] = static_cast(getChangedDate(time, new_value_column_data[i], result_type, date_lut)); + result_data[i] = static_cast(getChangedDate(time, new_value_column_data[i], input_type, result_type, date_lut)); } } @@ -202,7 +209,7 @@ public: return result_column; } - Int64 getChangedDate(Int64 time, Float64 new_value, const DataTypePtr & result_type, const DateLUTImpl & date_lut, Int64 scale = 0, Int64 fraction = 0) const + Int64 getChangedDate(Int64 time, Float64 new_value, const DataTypePtr & input_type, const DataTypePtr & result_type, const DateLUTImpl & date_lut, Int64 scale = 0, Int64 fraction = 0) const { auto year = time / 10'000'000'000; auto month = (time % 10'000'000'000) / 100'000'000; @@ -291,12 +298,20 @@ public: if (isDateOrDate32(result_type)) result = date_lut.makeDayNum(year, month, day); else if (isDateTime(result_type)) + { result = date_lut.makeDateTime(year, month, day, hours, minutes, seconds); + if (isDate(input_type)) + result += date_lut.timezoneOffset(result); + } else + { result = DecimalUtils::decimalFromComponents( date_lut.makeDateTime(year, month, day, hours, minutes, seconds), static_cast(fraction), static_cast(scale)); + if (isDate32(input_type)) + result += date_lut.timezoneOffset(result); + } if (result > max_date) return max_date; @@ -342,7 +357,6 @@ struct ChangeSecondTraits static constexpr auto EnumName = ChangeDateFunctionsNames::CHANGE_SECOND; }; - } REGISTER_FUNCTION(ChangeDate) diff --git a/tests/queries/0_stateless/02982_changeDate.reference b/tests/queries/0_stateless/02982_changeDate.reference index d7d4edf4b43..67747922ae5 100644 --- a/tests/queries/0_stateless/02982_changeDate.reference +++ b/tests/queries/0_stateless/02982_changeDate.reference @@ -10,16 +10,16 @@ 1970-01-03 1970-01-04 11:22:33 1970-01-05 11:22:33.4444 -1970-01-01 12:00:00 -1970-01-01 13:00:00.000 +12 +13 1970-01-01 14:22:33 1970-01-01 15:22:33.4444 -1970-01-01 00:23:00 -1970-01-01 00:24:00.000 +23 +24 1970-01-01 11:25:33 1970-01-01 11:26:33.4444 -1970-01-01 00:00:34 -1970-01-01 00:00:35.000 +34 +35 1970-01-01 11:22:36 1970-01-01 11:22:37.4444 1970-01-01 diff --git a/tests/queries/0_stateless/02982_changeDate.sql b/tests/queries/0_stateless/02982_changeDate.sql index f438212f9fa..786e27808e8 100644 --- a/tests/queries/0_stateless/02982_changeDate.sql +++ b/tests/queries/0_stateless/02982_changeDate.sql @@ -1,42 +1,42 @@ -SELECT changeYear(toDate('1970-01-01', 'UTC'), 2000); -SELECT changeYear(toDate32('1900-01-01', 'UTC'), 2001); -SELECT changeYear(toDateTime('1970-01-01 11:22:33', 'UTC'), 2002); -SELECT changeYear(toDateTime64('1900-01-01 11:22:33.4444', 4, 'UTC'), 2003); +SELECT changeYear(toDate('1970-01-01'), 2000); +SELECT changeYear(toDate32('1900-01-01'), 2001); +SELECT changeYear(toDateTime('1970-01-01 11:22:33', 'Antarctica/Palmer'), 2002); +SELECT changeYear(toDateTime64('1900-01-01 11:22:33.4444', 4, 'Antarctica/Palmer'), 2003); -SELECT changeMonth(toDate('1970-01-01', 'UTC'), 02); -SELECT changeMonth(toDate32('1970-01-01', 'UTC'), 03); -SELECT changeMonth(toDateTime('1970-01-01 11:22:33', 'UTC'), 04); -SELECT changeMonth(toDateTime64('1970-01-01 11:22:33.4444', 4, 'UTC'), 05); +SELECT changeMonth(toDate('1970-01-01'), 02); +SELECT changeMonth(toDate32('1970-01-01'), 03); +SELECT changeMonth(toDateTime('1970-01-01 11:22:33', 'Antarctica/Palmer'), 04); +SELECT changeMonth(toDateTime64('1970-01-01 11:22:33.4444', 4, 'Antarctica/Palmer'), 05); -SELECT changeDay(toDate('1970-01-01', 'UTC'), 02); -SELECT changeDay(toDate32('1970-01-01', 'UTC'), 03); -SELECT changeDay(toDateTime('1970-01-01 11:22:33', 'UTC'), 04); -SELECT changeDay(toDateTime64('1970-01-01 11:22:33.4444', 4, 'UTC'), 05); +SELECT changeDay(toDate('1970-01-01'), 02); +SELECT changeDay(toDate32('1970-01-01'), 03); +SELECT changeDay(toDateTime('1970-01-01 11:22:33', 'Antarctica/Palmer'), 04); +SELECT changeDay(toDateTime64('1970-01-01 11:22:33.4444', 4, 'Antarctica/Palmer'), 05); -SELECT toTimeZone(changeHour(toDate('1970-01-01', 'UTC'), 12), 'UTC'); -SELECT toTimeZone(changeHour(toDate32('1970-01-01', 'UTC'), 13), 'UTC'); -SELECT changeHour(toDateTime('1970-01-01 11:22:33', 'UTC'), 14); -SELECT changeHour(toDateTime64('1970-01-01 11:22:33.4444', 4, 'UTC'), 15); +SELECT toHour(changeHour(toDate('1970-01-01'), 12)); +SELECT toHour(changeHour(toDate32('1970-01-01'), 13)); +SELECT changeHour(toDateTime('1970-01-01 11:22:33', 'Antarctica/Palmer'), 14); +SELECT changeHour(toDateTime64('1970-01-01 11:22:33.4444', 4, 'Antarctica/Palmer'), 15); -SELECT toTimeZone(changeMinute(toDate('1970-01-01', 'UTC'), 23), 'UTC'); -SELECT toTimeZone(changeMinute(toDate32('1970-01-01', 'UTC'), 24), 'UTC'); -SELECT changeMinute(toDateTime('1970-01-01 11:22:33', 'UTC'), 25); -SELECT changeMinute(toDateTime64('1970-01-01 11:22:33.4444', 4, 'UTC'), 26); +SELECT toMinute(changeMinute(toDate('1970-01-01'), 23)); +SELECT toMinute(changeMinute(toDate32('1970-01-01'), 24)); +SELECT changeMinute(toDateTime('1970-01-01 11:22:33', 'Antarctica/Palmer'), 25); +SELECT changeMinute(toDateTime64('1970-01-01 11:22:33.4444', 4, 'Antarctica/Palmer'), 26); -SELECT toTimeZone(changeSecond(toDate('1970-01-01', 'UTC'), 34), 'UTC'); -SELECT toTimeZone(changeSecond(toDate32('1970-01-01', 'UTC'), 35), 'UTC'); -SELECT changeSecond(toDateTime('1970-01-01 11:22:33', 'UTC'), 36); -SELECT changeSecond(toDateTime64('1970-01-01 11:22:33.4444', 4, 'UTC'), 37); +SELECT toSecond(changeSecond(toDate('1970-01-01'), 34)); +SELECT toSecond(changeSecond(toDate32('1970-01-01'), 35)); +SELECT changeSecond(toDateTime('1970-01-01 11:22:33', 'Antarctica/Palmer'), 36); +SELECT changeSecond(toDateTime64('1970-01-01 11:22:33.4444', 4, 'Antarctica/Palmer'), 37); -SELECT changeYear(toDate('2000-01-01', 'UTC'), 1969.0); -SELECT changeYear(toDate('2000-06-07', 'UTC'), 2149.0); -SELECT changeMonth(toDate('2149-01-01', 'UTC'), 07); -SELECT changeMonth(toDate('2000-01-01', 'UTC'), 13); -SELECT changeDay(toDate('2000-01-01', 'UTC'), 0); -SELECT changeDay(toDate('2000-01-01', 'UTC'), 32); -SELECT toTimeZone(changeHour(toDate('2000-01-01', 'UTC'), -1), 'UTC'); -SELECT toTimeZone(changeHour(toDate('2000-01-01', 'UTC'), 24), 'UTC'); -SELECT toTimeZone(changeMinute(toDate('2000-01-01', 'UTC'), -1), 'UTC'); -SELECT toTimeZone(changeMinute(toDate('2000-01-01', 'UTC'), 60), 'UTC'); -SELECT toTimeZone(changeSecond(toDate('2000-01-01', 'UTC'), -1), 'UTC'); -SELECT toTimeZone(changeSecond(toDate('2000-01-01', 'UTC'), 60), 'UTC'); \ No newline at end of file +SELECT changeYear(toDate('2000-01-01'), 1969.0); +SELECT changeYear(toDate('2000-06-07'), 2149.0); +SELECT changeMonth(toDate('2149-01-01'), 07); +SELECT changeMonth(toDate('2000-01-01'), 13); +SELECT changeDay(toDate('2000-01-01'), 0); +SELECT changeDay(toDate('2000-01-01'), 32); +SELECT changeHour(toDate('2000-01-01'), -1); +SELECT changeHour(toDate('2000-01-01'), 24); +SELECT changeMinute(toDate('2000-01-01'), -1); +SELECT changeMinute(toDate('2000-01-01'), 60); +SELECT changeSecond(toDate('2000-01-01'), -1); +SELECT changeSecond(toDate('2000-01-01'), 60); \ No newline at end of file From 3e7041c1f4106359ab6792ddbab86db3a4792555 Mon Sep 17 00:00:00 2001 From: Maksim Galkin Date: Thu, 9 May 2024 18:33:46 +0000 Subject: [PATCH 011/299] . --- src/Functions/changeDate.cpp | 22 +++++++--------------- 1 file changed, 7 insertions(+), 15 deletions(-) diff --git a/src/Functions/changeDate.cpp b/src/Functions/changeDate.cpp index c7815263323..e77aaec30fe 100644 --- a/src/Functions/changeDate.cpp +++ b/src/Functions/changeDate.cpp @@ -169,21 +169,21 @@ public: Int64 time = date_lut.toNumYYYYMMDDhhmmss(input_column_data[i] / deg); Int64 fraction = input_column_data[i] % deg; - result_data[i] = getChangedDate(time, new_value_column_data[i], input_type, result_type, date_lut, scale, fraction); + result_data[i] = getChangedDate(time, new_value_column_data[i], result_type, date_lut, scale, fraction); } else if constexpr (isDate32() && isDateTime64()) { const auto & date_lut = DateLUT::instance(); Int64 time = static_cast(date_lut.toNumYYYYMMDD(ExtendedDayNum(input_column_data[i]))) * 1'000'000; - result_data[i] = getChangedDate(time, new_value_column_data[i], input_type, result_type, date_lut, 3, 0); + result_data[i] = getChangedDate(time, new_value_column_data[i], result_type, date_lut, 3, 0); } else if constexpr (isDateTime()) { const auto & date_lut = typeid_cast(*result_type).getTimeZone(); Int64 time = date_lut.toNumYYYYMMDDhhmmss(input_column_data[i]); - result_data[i] = static_cast(getChangedDate(time, new_value_column_data[i], input_type, result_type, date_lut)); + result_data[i] = static_cast(getChangedDate(time, new_value_column_data[i], result_type, date_lut)); } else { @@ -195,11 +195,11 @@ public: time = static_cast(date_lut.toNumYYYYMMDD(ExtendedDayNum(input_column_data[i]))) * 1'000'000; if (isDate(result_type)) - result_data[i] = static_cast(getChangedDate(time, new_value_column_data[i], input_type, result_type, date_lut)); + result_data[i] = static_cast(getChangedDate(time, new_value_column_data[i], result_type, date_lut)); else if (isDate32(result_type)) - result_data[i] = static_cast(getChangedDate(time, new_value_column_data[i], input_type, result_type, date_lut)); + result_data[i] = static_cast(getChangedDate(time, new_value_column_data[i], result_type, date_lut)); else - result_data[i] = static_cast(getChangedDate(time, new_value_column_data[i], input_type, result_type, date_lut)); + result_data[i] = static_cast(getChangedDate(time, new_value_column_data[i], result_type, date_lut)); } } @@ -209,7 +209,7 @@ public: return result_column; } - Int64 getChangedDate(Int64 time, Float64 new_value, const DataTypePtr & input_type, const DataTypePtr & result_type, const DateLUTImpl & date_lut, Int64 scale = 0, Int64 fraction = 0) const + Int64 getChangedDate(Int64 time, Float64 new_value, const DataTypePtr & result_type, const DateLUTImpl & date_lut, Int64 scale = 0, Int64 fraction = 0) const { auto year = time / 10'000'000'000; auto month = (time % 10'000'000'000) / 100'000'000; @@ -298,20 +298,12 @@ public: if (isDateOrDate32(result_type)) result = date_lut.makeDayNum(year, month, day); else if (isDateTime(result_type)) - { result = date_lut.makeDateTime(year, month, day, hours, minutes, seconds); - if (isDate(input_type)) - result += date_lut.timezoneOffset(result); - } else - { result = DecimalUtils::decimalFromComponents( date_lut.makeDateTime(year, month, day, hours, minutes, seconds), static_cast(fraction), static_cast(scale)); - if (isDate32(input_type)) - result += date_lut.timezoneOffset(result); - } if (result > max_date) return max_date; From afb47e418303b4ee125f8c9700985372f827da8b Mon Sep 17 00:00:00 2001 From: Maksim Galkin Date: Thu, 9 May 2024 19:32:43 +0000 Subject: [PATCH 012/299] . --- src/Functions/changeDate.cpp | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/src/Functions/changeDate.cpp b/src/Functions/changeDate.cpp index e77aaec30fe..fd279a028b7 100644 --- a/src/Functions/changeDate.cpp +++ b/src/Functions/changeDate.cpp @@ -173,11 +173,18 @@ public: } else if constexpr (isDate32() && isDateTime64()) { - const auto & date_lut = DateLUT::instance(); + const auto & date_lut = typeid_cast(*result_type).getTimeZone(); Int64 time = static_cast(date_lut.toNumYYYYMMDD(ExtendedDayNum(input_column_data[i]))) * 1'000'000; result_data[i] = getChangedDate(time, new_value_column_data[i], result_type, date_lut, 3, 0); } + else if constexpr (isDate() && isDateTime()) + { + const auto & date_lut = typeid_cast(*result_type).getTimeZone(); + Int64 time = static_cast(date_lut.toNumYYYYMMDD(ExtendedDayNum(input_column_data[i]))) * 1'000'000; + + result_data[i] = static_cast(getChangedDate(time, new_value_column_data[i], result_type, date_lut)); + } else if constexpr (isDateTime()) { const auto & date_lut = typeid_cast(*result_type).getTimeZone(); @@ -196,10 +203,8 @@ public: if (isDate(result_type)) result_data[i] = static_cast(getChangedDate(time, new_value_column_data[i], result_type, date_lut)); - else if (isDate32(result_type)) - result_data[i] = static_cast(getChangedDate(time, new_value_column_data[i], result_type, date_lut)); else - result_data[i] = static_cast(getChangedDate(time, new_value_column_data[i], result_type, date_lut)); + result_data[i] = static_cast(getChangedDate(time, new_value_column_data[i], result_type, date_lut)); } } From 50377d2450701ea31ea3fcde8c6ee51b96c971cf Mon Sep 17 00:00:00 2001 From: Maksim Galkin Date: Sun, 19 May 2024 08:41:30 +0000 Subject: [PATCH 013/299] fix date_min of DateTime --- src/Functions/changeDate.cpp | 8 ++- .../0_stateless/02982_changeDate.reference | 58 ++++++++--------- .../queries/0_stateless/02982_changeDate.sql | 62 +++++++++---------- 3 files changed, 66 insertions(+), 62 deletions(-) diff --git a/src/Functions/changeDate.cpp b/src/Functions/changeDate.cpp index fd279a028b7..237d9082566 100644 --- a/src/Functions/changeDate.cpp +++ b/src/Functions/changeDate.cpp @@ -162,6 +162,7 @@ public: { const auto scale = typeid_cast(*result_type).getScale(); const auto & date_lut = typeid_cast(*result_type).getTimeZone(); + Int64 deg = 1; for (size_t j = 0; j < scale; ++j) deg *= 10; @@ -223,7 +224,7 @@ public: auto minutes = (time % 10'000) / 100; auto seconds = time % 100; - Int64 min_date, max_date; + Int64 min_date = 0, max_date = 0; Int16 min_year, max_year; if (isDate(result_type)) { @@ -242,7 +243,7 @@ public: else if (isDateTime(result_type)) { min_date = 0; - max_date = 0x0ffffffffll; + max_date = 0x0FFFFFFFFLL; min_year = 1970; max_year = 2106; } @@ -310,6 +311,9 @@ public: static_cast(fraction), static_cast(scale)); + if (result < min_date) + return min_date; + if (result > max_date) return max_date; diff --git a/tests/queries/0_stateless/02982_changeDate.reference b/tests/queries/0_stateless/02982_changeDate.reference index 67747922ae5..c64abc89ed2 100644 --- a/tests/queries/0_stateless/02982_changeDate.reference +++ b/tests/queries/0_stateless/02982_changeDate.reference @@ -1,36 +1,36 @@ -2000-01-01 2001-01-01 -2002-01-01 11:22:33 -2003-01-01 11:22:33.4444 -1970-02-01 -1970-03-01 -1970-04-01 11:22:33 -1970-05-01 11:22:33.4444 -1970-01-02 -1970-01-03 -1970-01-04 11:22:33 -1970-01-05 11:22:33.4444 -12 -13 -1970-01-01 14:22:33 -1970-01-01 15:22:33.4444 -23 -24 -1970-01-01 11:25:33 -1970-01-01 11:26:33.4444 -34 -35 -1970-01-01 11:22:36 -1970-01-01 11:22:37.4444 +2002-01-01 +2003-01-01 11:22:33 +2004-01-01 11:22:33.4444 +2000-02-01 +2000-03-01 +2000-04-01 11:22:33 +2000-05-01 11:22:33.4444 +2000-01-02 +2000-01-03 +2000-01-04 11:22:33 +2000-01-05 11:22:33.4444 +2000-01-01 12:00:00 +2000-01-01 13:00:00.000 +2000-01-01 14:22:33 +2000-01-01 15:22:33.4444 +2000-01-01 00:23:00 +2000-01-01 00:24:00.000 +2000-01-01 11:25:33 +2000-01-01 11:26:33.4444 +2000-01-01 00:00:34 +2000-01-01 00:00:35.000 +2000-01-01 11:22:36 +2000-01-01 11:22:37.4444 1970-01-01 2149-06-06 2149-06-06 1970-01-01 1970-01-01 1970-01-01 -1970-01-01 00:00:00 -1970-01-01 00:00:00 -1970-01-01 00:00:00 -1970-01-01 00:00:00 -1970-01-01 00:00:00 -1970-01-01 00:00:00 +1970-01-01 07:00:00 +1970-01-01 07:00:00 +1970-01-01 07:00:00 +1970-01-01 07:00:00 +1970-01-01 07:00:00 +1970-01-01 07:00:00 diff --git a/tests/queries/0_stateless/02982_changeDate.sql b/tests/queries/0_stateless/02982_changeDate.sql index 786e27808e8..0d1bd75e130 100644 --- a/tests/queries/0_stateless/02982_changeDate.sql +++ b/tests/queries/0_stateless/02982_changeDate.sql @@ -1,42 +1,42 @@ -SELECT changeYear(toDate('1970-01-01'), 2000); -SELECT changeYear(toDate32('1900-01-01'), 2001); -SELECT changeYear(toDateTime('1970-01-01 11:22:33', 'Antarctica/Palmer'), 2002); -SELECT changeYear(toDateTime64('1900-01-01 11:22:33.4444', 4, 'Antarctica/Palmer'), 2003); +SELECT changeYear(toDate('2000-01-01'), 2001); +SELECT changeYear(toDate32('2000-01-01'), 2002); +SELECT changeYear(toDateTime('2000-01-01 11:22:33'), 2003); +SELECT changeYear(toDateTime64('2000-01-01 11:22:33.4444', 4), 2004); -SELECT changeMonth(toDate('1970-01-01'), 02); -SELECT changeMonth(toDate32('1970-01-01'), 03); -SELECT changeMonth(toDateTime('1970-01-01 11:22:33', 'Antarctica/Palmer'), 04); -SELECT changeMonth(toDateTime64('1970-01-01 11:22:33.4444', 4, 'Antarctica/Palmer'), 05); +SELECT changeMonth(toDate('2000-01-01'), 02); +SELECT changeMonth(toDate32('2000-01-01'), 03); +SELECT changeMonth(toDateTime('2000-01-01 11:22:33'), 04); +SELECT changeMonth(toDateTime64('2000-01-01 11:22:33.4444', 4), 05); -SELECT changeDay(toDate('1970-01-01'), 02); -SELECT changeDay(toDate32('1970-01-01'), 03); -SELECT changeDay(toDateTime('1970-01-01 11:22:33', 'Antarctica/Palmer'), 04); -SELECT changeDay(toDateTime64('1970-01-01 11:22:33.4444', 4, 'Antarctica/Palmer'), 05); +SELECT changeDay(toDate('2000-01-01'), 02); +SELECT changeDay(toDate32('2000-01-01'), 03); +SELECT changeDay(toDateTime('2000-01-01 11:22:33'), 04); +SELECT changeDay(toDateTime64('2000-01-01 11:22:33.4444', 4), 05); -SELECT toHour(changeHour(toDate('1970-01-01'), 12)); -SELECT toHour(changeHour(toDate32('1970-01-01'), 13)); -SELECT changeHour(toDateTime('1970-01-01 11:22:33', 'Antarctica/Palmer'), 14); -SELECT changeHour(toDateTime64('1970-01-01 11:22:33.4444', 4, 'Antarctica/Palmer'), 15); +SELECT changeHour(toDate('2000-01-01'), 12); +SELECT changeHour(toDate32('2000-01-01'), 13); +SELECT changeHour(toDateTime('2000-01-01 11:22:33'), 14); +SELECT changeHour(toDateTime64('2000-01-01 11:22:33.4444', 4), 15); -SELECT toMinute(changeMinute(toDate('1970-01-01'), 23)); -SELECT toMinute(changeMinute(toDate32('1970-01-01'), 24)); -SELECT changeMinute(toDateTime('1970-01-01 11:22:33', 'Antarctica/Palmer'), 25); -SELECT changeMinute(toDateTime64('1970-01-01 11:22:33.4444', 4, 'Antarctica/Palmer'), 26); +SELECT changeMinute(toDate('2000-01-01'), 23); +SELECT changeMinute(toDate32('2000-01-01'), 24); +SELECT changeMinute(toDateTime('2000-01-01 11:22:33'), 25); +SELECT changeMinute(toDateTime64('2000-01-01 11:22:33.4444', 4), 26); -SELECT toSecond(changeSecond(toDate('1970-01-01'), 34)); -SELECT toSecond(changeSecond(toDate32('1970-01-01'), 35)); -SELECT changeSecond(toDateTime('1970-01-01 11:22:33', 'Antarctica/Palmer'), 36); -SELECT changeSecond(toDateTime64('1970-01-01 11:22:33.4444', 4, 'Antarctica/Palmer'), 37); +SELECT changeSecond(toDate('2000-01-01'), 34); +SELECT changeSecond(toDate32('2000-01-01'), 35); +SELECT changeSecond(toDateTime('2000-01-01 11:22:33'), 36); +SELECT changeSecond(toDateTime64('2000-01-01 11:22:33.4444', 4), 37); -SELECT changeYear(toDate('2000-01-01'), 1969.0); +SELECT changeYear(toDate('2000-01-01'), 1969.0); SELECT changeYear(toDate('2000-06-07'), 2149.0); SELECT changeMonth(toDate('2149-01-01'), 07); SELECT changeMonth(toDate('2000-01-01'), 13); SELECT changeDay(toDate('2000-01-01'), 0); SELECT changeDay(toDate('2000-01-01'), 32); -SELECT changeHour(toDate('2000-01-01'), -1); -SELECT changeHour(toDate('2000-01-01'), 24); -SELECT changeMinute(toDate('2000-01-01'), -1); -SELECT changeMinute(toDate('2000-01-01'), 60); -SELECT changeSecond(toDate('2000-01-01'), -1); -SELECT changeSecond(toDate('2000-01-01'), 60); \ No newline at end of file +SELECT changeHour(toDate('2000-01-01'), -1) SETTINGS session_timezone = 'Asia/Novosibirsk'; +SELECT changeHour(toDate('2000-01-01'), 24) SETTINGS session_timezone = 'Asia/Novosibirsk'; +SELECT changeMinute(toDate('2000-01-01'), -1) SETTINGS session_timezone = 'Asia/Novosibirsk'; +SELECT changeMinute(toDate('2000-01-01'), 60) SETTINGS session_timezone = 'Asia/Novosibirsk'; +SELECT changeSecond(toDate('2000-01-01'), -1) SETTINGS session_timezone = 'Asia/Novosibirsk'; +SELECT changeSecond(toDate('2000-01-01'), 60) SETTINGS session_timezone = 'Asia/Novosibirsk'; \ No newline at end of file From eecbd44ce5e6673072c728097f50b41e2935ae90 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Sun, 19 May 2024 13:23:30 +0000 Subject: [PATCH 014/299] Some fixups --- src/Functions/changeDate.cpp | 172 ++++++++---------- .../queries/0_stateless/02982_changeDate.sql | 2 +- 2 files changed, 75 insertions(+), 99 deletions(-) diff --git a/src/Functions/changeDate.cpp b/src/Functions/changeDate.cpp index 237d9082566..6725afe3356 100644 --- a/src/Functions/changeDate.cpp +++ b/src/Functions/changeDate.cpp @@ -1,24 +1,23 @@ -#include -#include -#include +#include "Common/DateLUTImpl.h" +#include "Common/Exception.h" +#include +#include +#include +#include +#include +#include +#include #include #include #include #include #include -#include -#include -#include -#include +#include +#include +#include +#include #include -#include "Common/DateLUTImpl.h" -#include "Common/Exception.h" -#include -#include -#include "Columns/IColumn.h" -#include "DataTypes/IDataType.h" -#include #include namespace DB @@ -26,88 +25,62 @@ namespace DB namespace ErrorCodes { - extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; extern const int ILLEGAL_TYPE_OF_ARGUMENT; + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; } namespace { -enum class ChangeDateFunctionsNames +enum class Component { - CHANGE_YEAR = 0, - CHANGE_MONTH = 1, - CHANGE_DAY = 2, - CHANGE_HOUR = 3, - CHANGE_MINUTE = 4, - CHANGE_SECOND = 5 + Year, + Month, + Day, + Hour, + Minute, + Second }; -constexpr bool isTimeChange(const ChangeDateFunctionsNames & type) +bool isTimeComponentChange(Component type) { - return type == ChangeDateFunctionsNames::CHANGE_HOUR || - type == ChangeDateFunctionsNames::CHANGE_MINUTE || - type == ChangeDateFunctionsNames::CHANGE_SECOND; + return type == Component::Hour || + type == Component::Minute || + type == Component::Second; } -template -constexpr bool isDate() -{ - return DataType::type_id == TypeIndex::Date; -} - -template -constexpr bool isDate32() -{ - return DataType::type_id == TypeIndex::Date32; -} - -template -constexpr bool isDateTime() -{ - return DataType::type_id == TypeIndex::DateTime; -} - -template -constexpr bool isDateTime64() -{ - return DataType::type_id == TypeIndex::DateTime64; -} - - template class FunctionChangeDate : public IFunction { public: - static constexpr auto name = Traits::Name; - - static constexpr std::array mandatory_argument_names = {"date", "new_value"}; - - String getName() const override { return name; } - - bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } - - size_t getNumberOfArguments() const override { return mandatory_argument_names.size(); } + static constexpr auto name = Traits::name; static FunctionPtr create(ContextPtr) { return std::make_shared(); } + String getName() const override { return Traits::name; } + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } + size_t getNumberOfArguments() const override { return 2; } DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { if (arguments.size() != 2) throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Function {} requires 2 parameters: date, new_value. Passed {}.", getName(), arguments.size()); - if (!isDateOrDate32OrDateTimeOrDateTime64(*arguments[0].type) || !isNumber(*arguments[1].type)) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "First argument for function {} must be Date(32) or DateTime(64), second - numeric", getName()); + if (!isDateOrDate32OrDateTimeOrDateTime64(*arguments[0].type)) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "First argument for function {} must be Date, Date32, DateTime or DateTime64", getName()); + if (!isNumber(*arguments[1].type)) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Second argument for function {} must be numeric", getName()); - if constexpr (isTimeChange(Traits::EnumName)) + const auto & input_type = arguments[0].type; + + if (isTimeComponentChange(Traits::component)) { - if (isDate(arguments[0].type)) + if (isDate(input_type)) return std::make_shared(); - if (isDate32(arguments[0].type)) + if (isDate32(input_type)) return std::make_shared(DataTypeDateTime64::default_scale); } - return arguments[0].type; + return input_type; } ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override @@ -115,50 +88,53 @@ public: const auto & input_type = arguments[0].type; if (isDate(input_type)) { - if constexpr (isTimeChange(Traits::EnumName)) + if (isTimeComponentChange(Traits::component)) return execute(arguments, input_type, result_type, input_rows_count); return execute(arguments, input_type, result_type, input_rows_count); } if (isDate32(input_type)) { - if constexpr (isTimeChange(Traits::EnumName)) + if (isTimeComponentChange(Traits::component)) return execute(arguments, input_type, result_type, input_rows_count); return execute(arguments, input_type, result_type, input_rows_count); } if (isDateTime(input_type)) return execute(arguments, input_type, result_type, input_rows_count); - return execute(arguments, input_type, result_type, input_rows_count); + if (isDateTime64(input_type)) + return execute(arguments, input_type, result_type, input_rows_count); + + throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid input type"); } - template + template ColumnPtr execute(const ColumnsWithTypeAndName & arguments, const DataTypePtr & input_type, const DataTypePtr & result_type, size_t input_rows_count) const { bool is_const = (isColumnConst(*arguments[0].column) && isColumnConst(*arguments[1].column)); size_t result_rows_count = (is_const ? 1 : input_rows_count); typename ResultDataType::ColumnType::MutablePtr result_column; - if constexpr (isDateTime64()) + if constexpr (std::is_same_v) { auto scale = DataTypeDateTime64::default_scale; - if constexpr (isDateTime64()) + if constexpr (std::is_same_v) scale = typeid_cast(*result_type).getScale(); result_column = ResultDataType::ColumnType::create(result_rows_count, scale); } else result_column = ResultDataType::ColumnType::create(result_rows_count); - auto & result_data = result_column->getData(); - auto input_column = arguments[0].column->convertToFullIfNeeded(); - const auto & input_column_data = typeid_cast(*input_column).getData(); + const auto & input_column_data = typeid_cast(*input_column).getData(); auto new_value_column = castColumn(arguments[1], std::make_shared()); new_value_column = new_value_column->convertToFullIfNeeded(); const auto & new_value_column_data = typeid_cast(*new_value_column).getData(); + auto & result_data = result_column->getData(); + for (size_t i = 0; i < result_rows_count; ++i) { - if constexpr (isDateTime64()) + if constexpr (std::is_same_v) { const auto scale = typeid_cast(*result_type).getScale(); const auto & date_lut = typeid_cast(*result_type).getTimeZone(); @@ -172,21 +148,21 @@ public: result_data[i] = getChangedDate(time, new_value_column_data[i], result_type, date_lut, scale, fraction); } - else if constexpr (isDate32() && isDateTime64()) + else if constexpr (std::is_same_v && std::is_same_v) { const auto & date_lut = typeid_cast(*result_type).getTimeZone(); Int64 time = static_cast(date_lut.toNumYYYYMMDD(ExtendedDayNum(input_column_data[i]))) * 1'000'000; result_data[i] = getChangedDate(time, new_value_column_data[i], result_type, date_lut, 3, 0); } - else if constexpr (isDate() && isDateTime()) + else if constexpr (std::is_same_v && std::is_same_v) { const auto & date_lut = typeid_cast(*result_type).getTimeZone(); Int64 time = static_cast(date_lut.toNumYYYYMMDD(ExtendedDayNum(input_column_data[i]))) * 1'000'000; result_data[i] = static_cast(getChangedDate(time, new_value_column_data[i], result_type, date_lut)); } - else if constexpr (isDateTime()) + else if constexpr (std::is_same_v) { const auto & date_lut = typeid_cast(*result_type).getTimeZone(); Int64 time = date_lut.toNumYYYYMMDDhhmmss(input_column_data[i]); @@ -264,36 +240,36 @@ public: max_year = 2299; } - switch (Traits::EnumName) + switch (Traits::component) { - case ChangeDateFunctionsNames::CHANGE_YEAR: + case Component::Year: if (new_value < min_year) return min_date; else if (new_value > max_year) return max_date; year = static_cast(new_value); break; - case ChangeDateFunctionsNames::CHANGE_MONTH: + case Component::Month: if (new_value < 1 || new_value > 12) return min_date; month = static_cast(new_value); break; - case ChangeDateFunctionsNames::CHANGE_DAY: + case Component::Day: if (new_value < 1 || new_value > 31) return min_date; day = static_cast(new_value); break; - case ChangeDateFunctionsNames::CHANGE_HOUR: + case Component::Hour: if (new_value < 0 || new_value > 23) return min_date; hours = static_cast(new_value); break; - case ChangeDateFunctionsNames::CHANGE_MINUTE: + case Component::Minute: if (new_value < 0 || new_value > 59) return min_date; minutes = static_cast(new_value); break; - case ChangeDateFunctionsNames::CHANGE_SECOND: + case Component::Second: if (new_value < 0 || new_value > 59) return min_date; seconds = static_cast(new_value); @@ -324,38 +300,38 @@ public: struct ChangeYearTraits { - static constexpr auto Name = "changeYear"; - static constexpr auto EnumName = ChangeDateFunctionsNames::CHANGE_YEAR; + static constexpr auto name = "changeYear"; + static constexpr auto component = Component::Year; }; struct ChangeMonthTraits { - static constexpr auto Name = "changeMonth"; - static constexpr auto EnumName = ChangeDateFunctionsNames::CHANGE_MONTH; + static constexpr auto name = "changeMonth"; + static constexpr auto component = Component::Month; }; struct ChangeDayTraits { - static constexpr auto Name = "changeDay"; - static constexpr auto EnumName = ChangeDateFunctionsNames::CHANGE_DAY; + static constexpr auto name = "changeDay"; + static constexpr auto component = Component::Day; }; struct ChangeHourTraits { - static constexpr auto Name = "changeHour"; - static constexpr auto EnumName = ChangeDateFunctionsNames::CHANGE_HOUR; + static constexpr auto name = "changeHour"; + static constexpr auto component = Component::Hour; }; struct ChangeMinuteTraits { - static constexpr auto Name = "changeMinute"; - static constexpr auto EnumName = ChangeDateFunctionsNames::CHANGE_MINUTE; + static constexpr auto name = "changeMinute"; + static constexpr auto component = Component::Minute; }; struct ChangeSecondTraits { - static constexpr auto Name = "changeSecond"; - static constexpr auto EnumName = ChangeDateFunctionsNames::CHANGE_SECOND; + static constexpr auto name = "changeSecond"; + static constexpr auto component = Component::Second; }; } diff --git a/tests/queries/0_stateless/02982_changeDate.sql b/tests/queries/0_stateless/02982_changeDate.sql index 0d1bd75e130..26d53b2f7f4 100644 --- a/tests/queries/0_stateless/02982_changeDate.sql +++ b/tests/queries/0_stateless/02982_changeDate.sql @@ -39,4 +39,4 @@ SELECT changeHour(toDate('2000-01-01'), 24) SETTINGS session_timezone = 'Asia/No SELECT changeMinute(toDate('2000-01-01'), -1) SETTINGS session_timezone = 'Asia/Novosibirsk'; SELECT changeMinute(toDate('2000-01-01'), 60) SETTINGS session_timezone = 'Asia/Novosibirsk'; SELECT changeSecond(toDate('2000-01-01'), -1) SETTINGS session_timezone = 'Asia/Novosibirsk'; -SELECT changeSecond(toDate('2000-01-01'), 60) SETTINGS session_timezone = 'Asia/Novosibirsk'; \ No newline at end of file +SELECT changeSecond(toDate('2000-01-01'), 60) SETTINGS session_timezone = 'Asia/Novosibirsk'; From b1d3eb4c4d67e2e95d09dabf027ea9065751634d Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Sun, 19 May 2024 13:27:17 +0000 Subject: [PATCH 015/299] Some fixups, pt. II --- src/Functions/changeDate.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Functions/changeDate.cpp b/src/Functions/changeDate.cpp index 6725afe3356..d17787f4f55 100644 --- a/src/Functions/changeDate.cpp +++ b/src/Functions/changeDate.cpp @@ -26,6 +26,7 @@ namespace DB namespace ErrorCodes { extern const int ILLEGAL_TYPE_OF_ARGUMENT; + extern const int LOGICAL_ERROR; extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; } From 4fdcdc284d6c41c3a1f97f6d72c7b9c73816a9df Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Sun, 19 May 2024 13:28:38 +0000 Subject: [PATCH 016/299] Some fixups, pt. III --- src/DataTypes/DataTypeDate.h | 1 - src/DataTypes/DataTypeDate32.h | 1 - src/DataTypes/DataTypeDateTime.h | 1 - 3 files changed, 3 deletions(-) diff --git a/src/DataTypes/DataTypeDate.h b/src/DataTypes/DataTypeDate.h index 72b7ef2509f..0e08b9ba2ca 100644 --- a/src/DataTypes/DataTypeDate.h +++ b/src/DataTypes/DataTypeDate.h @@ -10,7 +10,6 @@ class DataTypeDate final : public DataTypeNumberBase { public: static constexpr auto family_name = "Date"; - static constexpr auto type_id = TypeIndex::Date; TypeIndex getTypeId() const override { return TypeIndex::Date; } TypeIndex getColumnType() const override { return TypeIndex::UInt16; } diff --git a/src/DataTypes/DataTypeDate32.h b/src/DataTypes/DataTypeDate32.h index 414c8301558..65633e7a228 100644 --- a/src/DataTypes/DataTypeDate32.h +++ b/src/DataTypes/DataTypeDate32.h @@ -9,7 +9,6 @@ class DataTypeDate32 final : public DataTypeNumberBase { public: static constexpr auto family_name = "Date32"; - static constexpr auto type_id = TypeIndex::Date32; TypeIndex getTypeId() const override { return TypeIndex::Date32; } TypeIndex getColumnType() const override { return TypeIndex::Int32; } diff --git a/src/DataTypes/DataTypeDateTime.h b/src/DataTypes/DataTypeDateTime.h index 3b1212d910d..5519240dee1 100644 --- a/src/DataTypes/DataTypeDateTime.h +++ b/src/DataTypes/DataTypeDateTime.h @@ -36,7 +36,6 @@ public: explicit DataTypeDateTime(const TimezoneMixin & time_zone); static constexpr auto family_name = "DateTime"; - static constexpr auto type_id = TypeIndex::DateTime; const char * getFamilyName() const override { return family_name; } String doGetName() const override; From ba1e0e0317e9b408bfeafaac3fdda87eb16296fb Mon Sep 17 00:00:00 2001 From: Maksim Galkin Date: Mon, 20 May 2024 11:43:31 +0000 Subject: [PATCH 017/299] added docks + updated tests --- .../functions/date-time-functions.md | 85 +++++++++++++++++++ .../0_stateless/02982_changeDate.reference | 17 ++++ .../queries/0_stateless/02982_changeDate.sql | 26 +++++- 3 files changed, 127 insertions(+), 1 deletion(-) diff --git a/docs/en/sql-reference/functions/date-time-functions.md b/docs/en/sql-reference/functions/date-time-functions.md index 5622097537e..1ce6fce6667 100644 --- a/docs/en/sql-reference/functions/date-time-functions.md +++ b/docs/en/sql-reference/functions/date-time-functions.md @@ -3039,3 +3039,88 @@ Result: - Blog: [Working with time series data in ClickHouse](https://clickhouse.com/blog/working-with-time-series-data-and-functions-ClickHouse) +## changeYear + +Changes a year of the passed date argument. + +**Syntax** + +``` sql +changeYear(date, new_value) +``` + +**Arguments** + +- `date` - a [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md) + +- `new_value` - a new value of the year. [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md). + +**Return value** + +- A date or date with time. Same data type as input date argument. + +Type: [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). + +**Example** + +``` sql + SELECT changeYear(toDate('1999-01-01'), 2000), hangeYear(toDateTime64('1999-01-01 00:00:00.000', 3), 2000); +``` + +Result: +``` +┌─changeYear(toDate('1999-01-01'), 2000)─┬─changeYear(toDateTime64('1999-01-01 00:00:00.000', 3), 2000)─┐ +│ 2000-01-01 │ 2000-01-01 00:00:00.000 │ +└────────────────────────────────────────┴──────────────────────────────────────────────────────────────┘ +``` + +## changeMonth + +Like [changeYear](#changeYear) but changes a month of the passed date argument. + +## changeMonth + +Like [changeYear](#changeYear) but changes a day of year of the passed date argument. + +## changeHour + +Changes an hour of the passed date argument. + +**Syntax** + +``` sql +changeHour(date, new_value) +``` + +**Arguments** + +- `date` - a [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md) + +- `new_value` - a new value of the hour. [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md). + +**Return value** + +- A date with time. If date argument is Date - returns DateTime, if Date32 - returns DateTime64, otherwise returns same data type as input date argument. + +Type: [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). + +**Exapmle** + +``` sql +SELECT changeHour(toDate('1999-01-01'), 12), changeHour(toDateTime64('1999-01-01 00:00:00.000', 3), 12); +``` + +Result: +``` +┌─changeHour(toDate('1999-01-01'), 12)─┬─changeHour(toDateTime64('1999-01-01 00:00:00.000', 3), 12)─┐ +│ 1999-01-01 12:00:00 │ 1999-01-01 12:00:00.000 │ +└──────────────────────────────────────┴────────────────────────────────────────────────────────────┘ +``` + +## changeMinute + +Like [changeHour](#changeHour) but changes a minute of the passed date argument. + +## changeSecond + +Like [changeHour](#changeHour) but changes a seconds of the passed date argument. \ No newline at end of file diff --git a/tests/queries/0_stateless/02982_changeDate.reference b/tests/queries/0_stateless/02982_changeDate.reference index c64abc89ed2..3e8ad45d9a8 100644 --- a/tests/queries/0_stateless/02982_changeDate.reference +++ b/tests/queries/0_stateless/02982_changeDate.reference @@ -34,3 +34,20 @@ 1970-01-01 07:00:00 1970-01-01 07:00:00 1970-01-01 07:00:00 +2000-02-01 +2000-02-01 +2000-02-01 +2000-02-01 +2000-02-01 +2000-02-01 +2000-02-01 +2000-02-01 +2000-02-01 +2000-02-01 +2000-02-01 +2299-12-31 +1900-01-01 +2106-02-07 13:28:15 +1970-01-01 07:00:00 +2299-12-31 23:59:59.999 +1900-01-01 00:00:00.000 diff --git a/tests/queries/0_stateless/02982_changeDate.sql b/tests/queries/0_stateless/02982_changeDate.sql index 0d1bd75e130..1a9d062bafe 100644 --- a/tests/queries/0_stateless/02982_changeDate.sql +++ b/tests/queries/0_stateless/02982_changeDate.sql @@ -39,4 +39,28 @@ SELECT changeHour(toDate('2000-01-01'), 24) SETTINGS session_timezone = 'Asia/No SELECT changeMinute(toDate('2000-01-01'), -1) SETTINGS session_timezone = 'Asia/Novosibirsk'; SELECT changeMinute(toDate('2000-01-01'), 60) SETTINGS session_timezone = 'Asia/Novosibirsk'; SELECT changeSecond(toDate('2000-01-01'), -1) SETTINGS session_timezone = 'Asia/Novosibirsk'; -SELECT changeSecond(toDate('2000-01-01'), 60) SETTINGS session_timezone = 'Asia/Novosibirsk'; \ No newline at end of file +SELECT changeSecond(toDate('2000-01-01'), 60) SETTINGS session_timezone = 'Asia/Novosibirsk'; + +SELECT changeYear(toDate('2000-01-01')); -- { serverError 42 } +SELECT changeYear(toDate('2000-01-01'), 2001, 2002); -- { serverError 42 } +SELECT changeYear(toDate('2000-01-01'), '2001'); -- { serverError 43 } + +SELECT changeMonth(toDate('2000-01-01'), CAST(2 AS Int8)); +SELECT changeMonth(toDate('2000-01-01'), CAST(2 AS Int16)); +SELECT changeMonth(toDate('2000-01-01'), CAST(2 AS Int32)); +SELECT changeMonth(toDate('2000-01-01'), CAST(2 AS Int64)); +SELECT changeMonth(toDate('2000-01-01'), CAST(2 AS UInt8)); +SELECT changeMonth(toDate('2000-01-01'), CAST(2 AS UInt16)); +SELECT changeMonth(toDate('2000-01-01'), CAST(2 AS UInt32)); +SELECT changeMonth(toDate('2000-01-01'), CAST(2 AS UInt64)); +SELECT changeMonth(toDate('2000-01-01'), CAST(2 AS Float32)); +SELECT changeMonth(toDate('2000-01-01'), CAST(2 AS Float64)); +SELECT changeMonth(toDate('2000-01-01'), CAST(2 AS Decimal(10, 5))); + +SELECT changeYear(toDate32('2000-01-01'), 2300); +SELECT changeYear(toDate32('2000-01-01'), 1899); +SELECT changeSecond(toDateTime('2106-02-07 13:28:15'), 16) SETTINGS session_timezone = 'Asia/Novosibirsk'; +SELECT changeHour(toDateTime('1970-01-01 23:59:59'), 6) SETTINGS session_timezone = 'Asia/Novosibirsk'; +SELECT changeYear(toDateTime64('2000-01-01 00:00:00.000', 3), 2300) SETTINGS session_timezone = 'Asia/Novosibirsk'; +SELECT changeYear(toDateTime64('2000-01-01 00:00:00.000', 3), 1899) SETTINGS session_timezone = 'Asia/Novosibirsk'; + From d115adf462859fd82a5bb5a83a6dedaeae799a08 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Mon, 20 May 2024 15:02:51 +0000 Subject: [PATCH 018/299] Some fixups, pt. IV --- .../functions/date-time-functions.md | 180 ++++++++++++--- src/Functions/changeDate.cpp | 126 ++++++----- .../0_stateless/02982_changeDate.reference | 181 +++++++++++++-- .../queries/0_stateless/02982_changeDate.sql | 207 +++++++++++++++--- 4 files changed, 546 insertions(+), 148 deletions(-) diff --git a/docs/en/sql-reference/functions/date-time-functions.md b/docs/en/sql-reference/functions/date-time-functions.md index 43bb5b6f6cb..8bf301d76c2 100644 --- a/docs/en/sql-reference/functions/date-time-functions.md +++ b/docs/en/sql-reference/functions/date-time-functions.md @@ -4104,33 +4104,33 @@ timeDiff(toDateTime64('1927-01-01 00:00:00', 3), toDate32('1927-01-02')); ## changeYear -Changes a year of the passed date argument. +Changes the year component of a date or date time. **Syntax** ``` sql -changeYear(date, new_value) +changeYear(date_or_datetime, value) ``` **Arguments** -- `date` - a [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md) - -- `new_value` - a new value of the year. [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md). +- `date_or_datetime` - a [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md) +- `value` - a new value of the year. [Integer](../../sql-reference/data-types/int-uint.md). **Return value** -- A date or date with time. Same data type as input date argument. +- The same type as `date_or_datetime`. Type: [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). **Example** ``` sql - SELECT changeYear(toDate('1999-01-01'), 2000), hangeYear(toDateTime64('1999-01-01 00:00:00.000', 3), 2000); + SELECT changeYear(toDate('1999-01-01'), 2000), changeYear(toDateTime64('1999-01-01 00:00:00.000', 3), 2000); ``` Result: + ``` ┌─changeYear(toDate('1999-01-01'), 2000)─┬─changeYear(toDateTime64('1999-01-01 00:00:00.000', 3), 2000)─┐ │ 2000-01-01 │ 2000-01-01 00:00:00.000 │ @@ -4139,51 +4139,175 @@ Result: ## changeMonth -Like [changeYear](#changeYear) but changes a month of the passed date argument. - -## changeMonth - -Like [changeYear](#changeYear) but changes a day of year of the passed date argument. - -## changeHour - -Changes an hour of the passed date argument. +Changes the month component of a date or date time. **Syntax** ``` sql -changeHour(date, new_value) +changeMonth(date_or_datetime, value) ``` **Arguments** -- `date` - a [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md) - -- `new_value` - a new value of the hour. [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md). +- `date_or_datetime` - a [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md) +- `value` - a new value of the month. [Integer](../../sql-reference/data-types/int-uint.md). **Return value** -- A date with time. If date argument is Date - returns DateTime, if Date32 - returns DateTime64, otherwise returns same data type as input date argument. +- The same type as `date_or_datetime`. -Type: [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). +Type: [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). -**Exapmle** +**Example** ``` sql -SELECT changeHour(toDate('1999-01-01'), 12), changeHour(toDateTime64('1999-01-01 00:00:00.000', 3), 12); + SELECT changeMonth(toDate('1999-01-01'), 2), changeMonth(toDateTime64('1999-01-01 00:00:00.000', 3), 2); ``` Result: + ``` -┌─changeHour(toDate('1999-01-01'), 12)─┬─changeHour(toDateTime64('1999-01-01 00:00:00.000', 3), 12)─┐ -│ 1999-01-01 12:00:00 │ 1999-01-01 12:00:00.000 │ +┌─changeMonth(toDate('1999-01-01'), 2)─┬─changeMonth(toDateTime64('1999-01-01 00:00:00.000', 3), 2)─┐ +│ 1999-02-01 │ 1999-02-01 00:00:00.000 │ +└──────────────────────────────────────┴────────────────────────────────────────────────────────────┘ +``` + +## changeDay + +Changes the day component of a date or date time. + +**Syntax** + +``` sql +changeDay(date_or_datetime, value) +``` + +**Arguments** + +- `date_or_datetime` - a [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md) +- `value` - a new value of the day. [Integer](../../sql-reference/data-types/int-uint.md). + +**Return value** + +- The same type as `date_or_datetime`. + +Type: [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). + +**Example** + +``` sql + SELECT changeDay(toDate('1999-01-01'), 5), changeDay(toDateTime64('1999-01-01 00:00:00.000', 3), 5); +``` + +Result: + +``` +┌─changeDay(toDate('1999-01-01'), 5)─┬─changeDay(toDateTime64('1999-01-01 00:00:00.000', 3), 5)─┐ +│ 1999-01-05 │ 1999-01-05 00:00:00.000 │ +└────────────────────────────────────┴──────────────────────────────────────────────────────────┘ +``` + +## changeHour + +Changes the hour component of a date or date time. + +**Syntax** + +``` sql +changeHour(date_or_datetime, value) +``` + +**Arguments** + +- `date_or_datetime` - a [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md) +- `value` - a new value of the hour. [Integer](../../sql-reference/data-types/int-uint.md). + +**Return value** + +- The same type as `date_or_datetime`. If the input is a Date, return DateTime. If the input is a Date32, return DateTime64. + +Type: [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). + +**Example** + +``` sql + SELECT changeHour(toDate('1999-01-01'), 14), changeHour(toDateTime64('1999-01-01 00:00:00.000', 3), 14); +``` + +Result: + +``` +┌─changeHour(toDate('1999-01-01'), 14)─┬─changeHour(toDateTime64('1999-01-01 00:00:00.000', 3), 14)─┐ +│ 1999-01-01 14:00:00 │ 1999-01-01 14:00:00.000 │ └──────────────────────────────────────┴────────────────────────────────────────────────────────────┘ ``` ## changeMinute -Like [changeHour](#changeHour) but changes a minute of the passed date argument. +Changes the minute component of a date or date time. + +**Syntax** + +``` sql +changeMinute(date_or_datetime, value) +``` + +**Arguments** + +- `date_or_datetime` - a [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md) +- `value` - a new value of the minute. [Integer](../../sql-reference/data-types/int-uint.md). + +**Return value** + +- The same type as `date_or_datetime`. If the input is a Date, return DateTime. If the input is a Date32, return DateTime64. + +Type: [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). + +**Example** + +``` sql + SELECT changeMinute(toDate('1999-01-01'), 15), changeMinute(toDateTime64('1999-01-01 00:00:00.000', 3), 15); +``` + +Result: + +``` +┌─changeMinute(toDate('1999-01-01'), 15)─┬─changeMinute(toDateTime64('1999-01-01 00:00:00.000', 3), 15)─┐ +│ 1999-01-01 00:15:00 │ 1999-01-01 00:15:00.000 │ +└────────────────────────────────────────┴──────────────────────────────────────────────────────────────┘ +``` ## changeSecond -Like [changeHour](#changeHour) but changes a seconds of the passed date argument. \ No newline at end of file +Changes the second component of a date or date time. + +**Syntax** + +``` sql +changeSecond(date_or_datetime, value) +``` + +**Arguments** + +- `date_or_datetime` - a [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md) +- `value` - a new value of the second. [Integer](../../sql-reference/data-types/int-uint.md). + +**Return value** + +- The same type as `date_or_datetime`. If the input is a Date, return DateTime. If the input is a Date32, return DateTime64. + +Type: [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). + +**Example** + +``` sql + SELECT changeSecond(toDate('1999-01-01'), 15), changeSecond(toDateTime64('1999-01-01 00:00:00.000', 3), 15); +``` + +Result: + +``` +┌─changeSecond(toDate('1999-01-01'), 15)─┬─changeSecond(toDateTime64('1999-01-01 00:00:00.000', 3), 15)─┐ +│ 1999-01-01 00:00:15 │ 1999-01-01 00:00:15.000 │ +└────────────────────────────────────────┴──────────────────────────────────────────────────────────────┘ +``` diff --git a/src/Functions/changeDate.cpp b/src/Functions/changeDate.cpp index d17787f4f55..bd73154a6f1 100644 --- a/src/Functions/changeDate.cpp +++ b/src/Functions/changeDate.cpp @@ -45,9 +45,9 @@ enum class Component bool isTimeComponentChange(Component type) { - return type == Component::Hour || - type == Component::Minute || - type == Component::Second; + return type == Component::Hour || type == Component::Minute || type == Component::Second; +} + } template @@ -63,13 +63,11 @@ public: DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { - if (arguments.size() != 2) - throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Function {} requires 2 parameters: date, new_value. Passed {}.", getName(), arguments.size()); - - if (!isDateOrDate32OrDateTimeOrDateTime64(*arguments[0].type)) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "First argument for function {} must be Date, Date32, DateTime or DateTime64", getName()); - if (!isNumber(*arguments[1].type)) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Second argument for function {} must be numeric", getName()); + FunctionArgumentDescriptors args{ + {"date_or_datetime", static_cast(&isDateOrDate32OrDateTimeOrDateTime64), nullptr, "Date or date with time"}, + {"value", static_cast(&isNativeInteger), nullptr, "Integer"} + }; + validateFunctionArgumentTypes(*this, arguments, args); const auto & input_type = arguments[0].type; @@ -335,62 +333,62 @@ struct ChangeSecondTraits static constexpr auto component = Component::Second; }; -} - REGISTER_FUNCTION(ChangeDate) { - factory.registerFunction>( - FunctionDocumentation{ - .description = R"( -Changes the year of the given Date(32) or DateTime(64). -Returns the same type as the input data. -)", - .categories{"Dates and Times"} - } - ); - factory.registerFunction>( - FunctionDocumentation{ - .description = R"( -Same as changeYear function, but changes month of the date. -)", - .categories{"Dates and Times"} - } - ); - factory.registerFunction>( - FunctionDocumentation{ - .description = R"( -Same as changeYear function, but changes day_of_month of the date. -)", - .categories{"Dates and Times"} - } - ); - factory.registerFunction>( - FunctionDocumentation{ - .description = R"( -Changes the hour of the given Date(32) or DateTime(64). -If the input data is Date, return DateTime; -if the input data is Date32, return DateTime64; -In other cases returns the same type as the input data. -)", - .categories{"Dates and Times"} - } - ); - factory.registerFunction>( - FunctionDocumentation{ - .description = R"( -Same as changeHour function, but changes minute of the date. -)", - .categories{"Dates and Times"} - } - ); - factory.registerFunction>( - FunctionDocumentation{ - .description = R"( -Same as changeHour function, but changes seconds of the date. -)", - .categories{"Dates and Times"} - } - ); + { + FunctionDocumentation::Description description = "Changes the year component of a date or date time."; + FunctionDocumentation::Syntax syntax = "changeYear(date_or_datetime, value);"; + FunctionDocumentation::Arguments arguments = {{"date_or_datetime", "The value to change. Type: Date, Date32, DateTime, or DateTime64"}, {"value", "The new value. Type: [U]Int*"}}; + FunctionDocumentation::ReturnedValue returned_value = "The same type as date_or_datetime."; + FunctionDocumentation::Categories categories = {"Dates and Times"}; + FunctionDocumentation function_documentation = {.description = description, .syntax = syntax, .arguments = arguments, .returned_value = returned_value, .categories = categories}; + factory.registerFunction>(function_documentation); + } + { + FunctionDocumentation::Description description = "Changes the month component of a date or date time."; + FunctionDocumentation::Syntax syntax = "changeMonth(date_or_datetime, value);"; + FunctionDocumentation::Arguments arguments = {{"date_or_datetime", "The value to change. Type: Date, Date32, DateTime, or DateTime64"}, {"value", "The new value. Type: [U]Int*"}}; + FunctionDocumentation::ReturnedValue returned_value = "The same type as date_or_datetime."; + FunctionDocumentation::Categories categories = {"Dates and Times"}; + FunctionDocumentation function_documentation = {.description = description, .syntax = syntax, .arguments = arguments, .returned_value = returned_value, .categories = categories}; + factory.registerFunction>(function_documentation); + } + { + FunctionDocumentation::Description description = "Changes the day component of a date or date time."; + FunctionDocumentation::Syntax syntax = "changeDay(date_or_datetime, value);"; + FunctionDocumentation::Arguments arguments = {{"date_or_datetime", "The value to change. Type: Date, Date32, DateTime, or DateTime64"}, {"value", "The new value. Type: [U]Int*"}}; + FunctionDocumentation::ReturnedValue returned_value = "The same type as date_or_datetime."; + FunctionDocumentation::Categories categories = {"Dates and Times"}; + FunctionDocumentation function_documentation = {.description = description, .syntax = syntax, .arguments = arguments, .returned_value = returned_value, .categories = categories}; + factory.registerFunction>(function_documentation); + } + { + FunctionDocumentation::Description description = "Changes the hour component of a date or date time."; + FunctionDocumentation::Syntax syntax = "changeHour(date_or_datetime, value);"; + FunctionDocumentation::Arguments arguments = {{"date_or_datetime", "The value to change. Type: Date, Date32, DateTime, or DateTime64"}, {"value", "The new value. Type: [U]Int*"}}; + FunctionDocumentation::ReturnedValue returned_value = "The same type as date_or_datetime. If the input is a Date, return DateTime. If the input is a Date32, return DateTime64."; + FunctionDocumentation::Categories categories = {"Dates and Times"}; + FunctionDocumentation function_documentation = {.description = description, .syntax = syntax, .arguments = arguments, .returned_value = returned_value, .categories = categories}; + factory.registerFunction>(function_documentation); + } + { + FunctionDocumentation::Description description = "Changes the minute component of a date or date time."; + FunctionDocumentation::Syntax syntax = "changeMinute(date_or_datetime, value);"; + FunctionDocumentation::Arguments arguments = {{"date_or_datetime", "The value to change. Type: Date, Date32, DateTime, or DateTime64"}, {"value", "The new value. Type: [U]Int*"}}; + FunctionDocumentation::ReturnedValue returned_value = "The same type as date_or_datetime. If the input is a Date, return DateTime. If the input is a Date32, return DateTime64."; + FunctionDocumentation::Categories categories = {"Dates and Times"}; + FunctionDocumentation function_documentation = {.description = description, .syntax = syntax, .arguments = arguments, .returned_value = returned_value, .categories = categories}; + factory.registerFunction>(function_documentation); + } + { + FunctionDocumentation::Description description = "Changes the second component of a date or date time."; + FunctionDocumentation::Syntax syntax = "changeSecond(date_or_datetime, value);"; + FunctionDocumentation::Arguments arguments = {{"date_or_datetime", "The value to change. Type: Date, Date32, DateTime, or DateTime64"}, {"value", "The new value. Type: [U]Int*"}}; + FunctionDocumentation::ReturnedValue returned_value = "The same type as date_or_datetime. If the input is a Date, return DateTime. If the input is a Date32, return DateTime64."; + FunctionDocumentation::Categories categories = {"Dates and Times"}; + FunctionDocumentation function_documentation = {.description = description, .syntax = syntax, .arguments = arguments, .returned_value = returned_value, .categories = categories}; + factory.registerFunction>(function_documentation); + } } } diff --git a/tests/queries/0_stateless/02982_changeDate.reference b/tests/queries/0_stateless/02982_changeDate.reference index 3e8ad45d9a8..8ce647481bb 100644 --- a/tests/queries/0_stateless/02982_changeDate.reference +++ b/tests/queries/0_stateless/02982_changeDate.reference @@ -1,37 +1,170 @@ +Negative tests +changeYear +-- Date 2001-01-01 -2002-01-01 -2003-01-01 11:22:33 -2004-01-01 11:22:33.4444 +1970-01-01 +1970-01-01 +2149-06-06 +-- Date32 +2001-01-01 +1900-01-01 +1900-01-01 +2299-12-31 +-- DateTime +2001-01-01 11:22:33 +1970-01-01 01:00:00 +1970-01-01 01:00:00 +2106-02-07 07:28:15 +-- DateTime64 +2001-01-01 11:22:33.4444 +1900-01-01 00:00:00.0000 +1900-01-01 00:00:00.0000 +2299-12-31 23:59:59.9999 +changeMonth +-- Date +2000-01-01 2000-02-01 -2000-03-01 -2000-04-01 11:22:33 -2000-05-01 11:22:33.4444 +2000-12-01 +1970-01-01 +1970-01-01 +1970-01-01 +-- Date32 +2000-01-01 +2000-02-01 +2000-12-01 +1900-01-01 +1900-01-01 +1900-01-01 +-- DateTime +2000-01-01 11:22:33 +2000-02-01 11:22:33 +2000-12-01 11:22:33 +1970-01-01 01:00:00 +1970-01-01 01:00:00 +1970-01-01 01:00:00 +-- DateTime64 +2000-01-01 11:22:33.4444 +2000-02-01 11:22:33.4444 +2000-12-01 11:22:33.4444 +1900-01-01 00:00:00.0000 +1900-01-01 00:00:00.0000 +1900-01-01 00:00:00.0000 +changeDay +-- Date +2000-01-01 2000-01-02 -2000-01-03 -2000-01-04 11:22:33 -2000-01-05 11:22:33.4444 -2000-01-01 12:00:00 -2000-01-01 13:00:00.000 -2000-01-01 14:22:33 -2000-01-01 15:22:33.4444 -2000-01-01 00:23:00 -2000-01-01 00:24:00.000 -2000-01-01 11:25:33 -2000-01-01 11:26:33.4444 -2000-01-01 00:00:34 -2000-01-01 00:00:35.000 -2000-01-01 11:22:36 -2000-01-01 11:22:37.4444 -1970-01-01 -2149-06-06 -2149-06-06 +2000-01-31 1970-01-01 1970-01-01 1970-01-01 +-- Date32 +2000-01-01 +2000-01-02 +2000-01-31 +1900-01-01 +1900-01-01 +1900-01-01 +-- DateTime +2000-01-01 11:22:33 +2000-01-02 11:22:33 +2000-01-31 11:22:33 +1970-01-01 01:00:00 +1970-01-01 01:00:00 +1970-01-01 01:00:00 +-- DateTime64 +2000-01-01 11:22:33.4444 +2000-01-02 11:22:33.4444 +2000-01-31 11:22:33.4444 +1900-01-01 00:00:00.0000 +1900-01-01 00:00:00.0000 +1900-01-01 00:00:00.0000 +-- Special case: change to 29 Feb in a leap year +2000-02-29 +2000-02-29 +2000-02-29 11:22:33 +2000-02-29 11:22:33.4444 +changeHour +-- Date +2000-01-01 00:00:00 +2000-01-01 02:00:00 +2000-01-01 23:00:00 +1970-01-01 01:00:00 +1970-01-01 01:00:00 +-- Date32 +2000-01-01 00:00:00.000 +2000-01-01 02:00:00.000 +2000-01-01 23:00:00.000 +1900-01-01 00:00:00.000 +1900-01-01 00:00:00.000 +-- DateTime +2000-01-01 00:22:33 +2000-01-01 02:22:33 +2000-01-01 23:22:33 +1970-01-01 01:00:00 +1970-01-01 01:00:00 +-- DateTime64 +2000-01-01 00:22:33.4444 +2000-01-01 02:22:33.4444 +2000-01-01 23:22:33.4444 +1900-01-01 00:00:00.0000 +1900-01-01 00:00:00.0000 +-- With different timezone 1970-01-01 07:00:00 1970-01-01 07:00:00 +changeMinute +-- Date +2000-01-01 00:00:00 +2000-01-01 00:02:00 +2000-01-01 00:59:00 +1970-01-01 01:00:00 +1970-01-01 01:00:00 +-- Date32 +2000-01-01 00:00:00.000 +2000-01-01 00:02:00.000 +2000-01-01 00:59:00.000 +1900-01-01 00:00:00.000 +1900-01-01 00:00:00.000 +-- DateTime +2000-01-01 11:00:33 +2000-01-01 11:02:33 +2000-01-01 11:59:33 +1970-01-01 01:00:00 +1970-01-01 01:00:00 +-- DateTime64 +2000-01-01 11:00:33.4444 +2000-01-01 11:02:33.4444 +2000-01-01 11:59:33.4444 +1900-01-01 00:00:00.0000 +1900-01-01 00:00:00.0000 +-- With different timezone 1970-01-01 07:00:00 1970-01-01 07:00:00 +changeSecond +-- Date +2000-01-01 00:00:00 +2000-01-01 00:00:02 +2000-01-01 00:00:59 +1970-01-01 01:00:00 +1970-01-01 01:00:00 +-- Date32 +2000-01-01 00:00:00.000 +2000-01-01 00:00:02.000 +2000-01-01 00:00:59.000 +1900-01-01 00:00:00.000 +1900-01-01 00:00:00.000 +-- DateTime +2000-01-01 11:22:00 +2000-01-01 11:22:02 +2000-01-01 11:22:59 +1970-01-01 01:00:00 +1970-01-01 01:00:00 +-- DateTime64 +2000-01-01 11:22:00.4444 +2000-01-01 11:22:02.4444 +2000-01-01 11:22:59.4444 +1900-01-01 00:00:00.0000 +1900-01-01 00:00:00.0000 +-- With different timezone 1970-01-01 07:00:00 1970-01-01 07:00:00 2000-02-01 diff --git a/tests/queries/0_stateless/02982_changeDate.sql b/tests/queries/0_stateless/02982_changeDate.sql index dec95c2d79d..62232079d61 100644 --- a/tests/queries/0_stateless/02982_changeDate.sql +++ b/tests/queries/0_stateless/02982_changeDate.sql @@ -1,43 +1,186 @@ +SELECT 'Negative tests'; +-- as changeYear, changeMonth, changeDay, changeMinute, changeSecond share the same implementation, just testing one of them +SELECT changeYear(); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +SELECT changeYear(toDate('2000-01-01')); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +SELECT changeYear(toDate('2000-01-01'), 2000, 1); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +SELECT changeYear(1999, 2000); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT changeYear(toDate('2000-01-01'), 'abc'); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT changeYear(toDate('2000-01-01'), 1.5); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } + +-- Disable timezone randomization +SET session_timezone='CET'; + +SELECT 'changeYear'; +SELECT '-- Date'; SELECT changeYear(toDate('2000-01-01'), 2001); -SELECT changeYear(toDate32('2000-01-01'), 2002); -SELECT changeYear(toDateTime('2000-01-01 11:22:33'), 2003); -SELECT changeYear(toDateTime64('2000-01-01 11:22:33.4444', 4), 2004); +SELECT changeYear(toDate('2000-01-01'), 1800); -- out-of-bounds +SELECT changeYear(toDate('2000-01-01'), -5000); -- out-of-bounds +SELECT changeYear(toDate('2000-01-01'), 2500); -- out-of-bounds +SELECT '-- Date32'; +SELECT changeYear(toDate32('2000-01-01'), 2001); +SELECT changeYear(toDate32('2000-01-01'), 1800); -- out-of-bounds +SELECT changeYear(toDate32('2000-01-01'), -5000); -- out-of-bounds +SELECT changeYear(toDate32('2000-01-01'), 2500); -- out-of-bounds +SELECT '-- DateTime'; +SELECT changeYear(toDateTime('2000-01-01 11:22:33'), 2001); +SELECT changeYear(toDateTime('2000-01-01 11:22:33'), 1800); -- out-of-bounds +SELECT changeYear(toDateTime('2000-01-01 11:22:33'), -5000); -- out-of-bounds +SELECT changeYear(toDateTime('2000-01-01 11:22:33'), 2500); -- out-of-bounds +SELECT '-- DateTime64'; +SELECT changeYear(toDateTime64('2000-01-01 11:22:33.4444', 4), 2001); +SELECT changeYear(toDateTime64('2000-01-01 11:22:33.4444', 4), 1800); -- out-of-bounds +SELECT changeYear(toDateTime64('2000-01-01 11:22:33.4444', 4), -5000); -- out-of-bounds +SELECT changeYear(toDateTime64('2000-01-01 11:22:33.4444', 4), 2500); -- out-of-bounds -SELECT changeMonth(toDate('2000-01-01'), 02); -SELECT changeMonth(toDate32('2000-01-01'), 03); -SELECT changeMonth(toDateTime('2000-01-01 11:22:33'), 04); -SELECT changeMonth(toDateTime64('2000-01-01 11:22:33.4444', 4), 05); +SELECT 'changeMonth'; +SELECT '-- Date'; +SELECT changeMonth(toDate('2000-01-01'), 1); +SELECT changeMonth(toDate('2000-01-01'), 2); +SELECT changeMonth(toDate('2000-01-01'), 12); +SELECT changeMonth(toDate('2000-01-01'), 0); -- out-of-bounds +SELECT changeMonth(toDate('2000-01-01'), -1); -- out-of-bounds +SELECT changeMonth(toDate('2000-01-01'), 13); -- out-of-bounds +SELECT '-- Date32'; +SELECT changeMonth(toDate32('2000-01-01'), 1); +SELECT changeMonth(toDate32('2000-01-01'), 2); +SELECT changeMonth(toDate32('2000-01-01'), 12); +SELECT changeMonth(toDate32('2000-01-01'), 0); -- out-of-bounds +SELECT changeMonth(toDate32('2000-01-01'), -1); -- out-of-bounds +SELECT changeMonth(toDate32('2000-01-01'), 13); -- out-of-bounds +SELECT '-- DateTime'; +SELECT changeMonth(toDateTime('2000-01-01 11:22:33'), 1); +SELECT changeMonth(toDateTime('2000-01-01 11:22:33'), 2); +SELECT changeMonth(toDateTime('2000-01-01 11:22:33'), 12); +SELECT changeMonth(toDateTime('2000-01-01 11:22:33'), 0); -- out-of-bounds +SELECT changeMonth(toDateTime('2000-01-01 11:22:33'), -1); -- out-of-bounds +SELECT changeMonth(toDateTime('2000-01-01 11:22:33'), 13); -- out-of-bounds +SELECT '-- DateTime64'; +SELECT changeMonth(toDateTime64('2000-01-01 11:22:33.4444', 4), 1); +SELECT changeMonth(toDateTime64('2000-01-01 11:22:33.4444', 4), 2); +SELECT changeMonth(toDateTime64('2000-01-01 11:22:33.4444', 4), 12); +SELECT changeMonth(toDateTime64('2000-01-01 11:22:33.4444', 4), 0); -- out-of-bounds +SELECT changeMonth(toDateTime64('2000-01-01 11:22:33.4444', 4), -1); -- out-of-bounds +SELECT changeMonth(toDateTime64('2000-01-01 11:22:33.4444', 4), 13); -- out-of-bounds -SELECT changeDay(toDate('2000-01-01'), 02); -SELECT changeDay(toDate32('2000-01-01'), 03); -SELECT changeDay(toDateTime('2000-01-01 11:22:33'), 04); -SELECT changeDay(toDateTime64('2000-01-01 11:22:33.4444', 4), 05); +SELECT 'changeDay'; +SELECT '-- Date'; +SELECT changeDay(toDate('2000-01-01'), 1); +SELECT changeDay(toDate('2000-01-01'), 2); +SELECT changeDay(toDate('2000-01-01'), 31); +SELECT changeDay(toDate('2000-01-01'), 0); -- out-of-bounds +SELECT changeDay(toDate('2000-01-01'), -1); -- out-of-bounds +SELECT changeDay(toDate('2000-01-01'), 32); -- out-of-bounds +SELECT '-- Date32'; +SELECT changeDay(toDate32('2000-01-01'), 1); +SELECT changeDay(toDate32('2000-01-01'), 2); +SELECT changeDay(toDate32('2000-01-01'), 31); +SELECT changeDay(toDate32('2000-01-01'), 0); -- out-of-bounds +SELECT changeDay(toDate32('2000-01-01'), -1); -- out-of-bounds +SELECT changeDay(toDate32('2000-01-01'), 32); -- out-of-bounds +SELECT '-- DateTime'; +SELECT changeDay(toDateTime('2000-01-01 11:22:33'), 1); +SELECT changeDay(toDateTime('2000-01-01 11:22:33'), 2); +SELECT changeDay(toDateTime('2000-01-01 11:22:33'), 31); +SELECT changeDay(toDateTime('2000-01-01 11:22:33'), 0); -- out-of-bounds +SELECT changeDay(toDateTime('2000-01-01 11:22:33'), -1); -- out-of-bounds +SELECT changeDay(toDateTime('2000-01-01 11:22:33'), 32); -- out-of-bounds +SELECT '-- DateTime64'; +SELECT changeDay(toDateTime64('2000-01-01 11:22:33.4444', 4), 1); +SELECT changeDay(toDateTime64('2000-01-01 11:22:33.4444', 4), 2); +SELECT changeDay(toDateTime64('2000-01-01 11:22:33.4444', 4), 31); +SELECT changeDay(toDateTime64('2000-01-01 11:22:33.4444', 4), 0); -- out-of-bounds +SELECT changeDay(toDateTime64('2000-01-01 11:22:33.4444', 4), -1); -- out-of-bounds +SELECT changeDay(toDateTime64('2000-01-01 11:22:33.4444', 4), 32); -- out-of-bounds +SELECT '-- Special case: change to 29 Feb in a leap year'; +SELECT changeDay(toDate('2000-02-28'), 29); +SELECT changeDay(toDate32('2000-02-01'), 29); +SELECT changeDay(toDateTime('2000-02-01 11:22:33'), 29); +SELECT changeDay(toDateTime64('2000-02-01 11:22:33.4444', 4), 29); -SELECT changeHour(toDate('2000-01-01'), 12); -SELECT changeHour(toDate32('2000-01-01'), 13); -SELECT changeHour(toDateTime('2000-01-01 11:22:33'), 14); -SELECT changeHour(toDateTime64('2000-01-01 11:22:33.4444', 4), 15); - -SELECT changeMinute(toDate('2000-01-01'), 23); -SELECT changeMinute(toDate32('2000-01-01'), 24); -SELECT changeMinute(toDateTime('2000-01-01 11:22:33'), 25); -SELECT changeMinute(toDateTime64('2000-01-01 11:22:33.4444', 4), 26); - -SELECT changeSecond(toDate('2000-01-01'), 34); -SELECT changeSecond(toDate32('2000-01-01'), 35); -SELECT changeSecond(toDateTime('2000-01-01 11:22:33'), 36); -SELECT changeSecond(toDateTime64('2000-01-01 11:22:33.4444', 4), 37); - -SELECT changeYear(toDate('2000-01-01'), 1969.0); -SELECT changeYear(toDate('2000-06-07'), 2149.0); -SELECT changeMonth(toDate('2149-01-01'), 07); -SELECT changeMonth(toDate('2000-01-01'), 13); -SELECT changeDay(toDate('2000-01-01'), 0); -SELECT changeDay(toDate('2000-01-01'), 32); +SELECT 'changeHour'; +SELECT '-- Date'; +SELECT changeHour(toDate('2000-01-01'), 0); +SELECT changeHour(toDate('2000-01-01'), 2); +SELECT changeHour(toDate('2000-01-01'), 23); +SELECT changeHour(toDate('2000-01-01'), -1); -- out-of-bounds +SELECT changeHour(toDate('2000-01-01'), 24); -- out-of-bounds +SELECT '-- Date32'; +SELECT changeHour(toDate32('2000-01-01'), 0); +SELECT changeHour(toDate32('2000-01-01'), 2); +SELECT changeHour(toDate32('2000-01-01'), 23); +SELECT changeHour(toDate32('2000-01-01'), -1); -- out-of-bounds +SELECT changeHour(toDate32('2000-01-01'), 24); -- out-of-bounds +SELECT '-- DateTime'; +SELECT changeHour(toDateTime('2000-01-01 11:22:33'), 0); +SELECT changeHour(toDateTime('2000-01-01 11:22:33'), 2); +SELECT changeHour(toDateTime('2000-01-01 11:22:33'), 23); +SELECT changeHour(toDateTime('2000-01-01 11:22:33'), -1); -- out-of-bounds +SELECT changeHour(toDateTime('2000-01-01 11:22:33'), 24); -- out-of-bounds +SELECT '-- DateTime64'; +SELECT changeHour(toDateTime64('2000-01-01 11:22:33.4444', 4), 0); +SELECT changeHour(toDateTime64('2000-01-01 11:22:33.4444', 4), 2); +SELECT changeHour(toDateTime64('2000-01-01 11:22:33.4444', 4), 23); +SELECT changeHour(toDateTime64('2000-01-01 11:22:33.4444', 4), -1); -- out-of-bounds +SELECT changeHour(toDateTime64('2000-01-01 11:22:33.4444', 4), 24); -- out-of-bounds +SELECT '-- With different timezone'; SELECT changeHour(toDate('2000-01-01'), -1) SETTINGS session_timezone = 'Asia/Novosibirsk'; SELECT changeHour(toDate('2000-01-01'), 24) SETTINGS session_timezone = 'Asia/Novosibirsk'; + +SELECT 'changeMinute'; +SELECT '-- Date'; +SELECT changeMinute(toDate('2000-01-01'), 0); +SELECT changeMinute(toDate('2000-01-01'), 2); +SELECT changeMinute(toDate('2000-01-01'), 59); +SELECT changeMinute(toDate('2000-01-01'), -1); -- out-of-bounds +SELECT changeMinute(toDate('2000-01-01'), 60); -- out-of-bounds +SELECT '-- Date32'; +SELECT changeMinute(toDate32('2000-01-01'), 0); +SELECT changeMinute(toDate32('2000-01-01'), 2); +SELECT changeMinute(toDate32('2000-01-01'), 59); +SELECT changeMinute(toDate32('2000-01-01'), -1); -- out-of-bounds +SELECT changeMinute(toDate32('2000-01-01'), 60); -- out-of-bounds +SELECT '-- DateTime'; +SELECT changeMinute(toDateTime('2000-01-01 11:22:33'), 0); +SELECT changeMinute(toDateTime('2000-01-01 11:22:33'), 2); +SELECT changeMinute(toDateTime('2000-01-01 11:22:33'), 59); +SELECT changeMinute(toDateTime('2000-01-01 11:22:33'), -1); -- out-of-bounds +SELECT changeMinute(toDateTime('2000-01-01 11:22:33'), 60); -- out-of-bounds +SELECT '-- DateTime64'; +SELECT changeMinute(toDateTime64('2000-01-01 11:22:33.4444', 4), 0); +SELECT changeMinute(toDateTime64('2000-01-01 11:22:33.4444', 4), 2); +SELECT changeMinute(toDateTime64('2000-01-01 11:22:33.4444', 4), 59); +SELECT changeMinute(toDateTime64('2000-01-01 11:22:33.4444', 4), -1); -- out-of-bounds +SELECT changeMinute(toDateTime64('2000-01-01 11:22:33.4444', 4), 60); -- out-of-bounds +SELECT '-- With different timezone'; SELECT changeMinute(toDate('2000-01-01'), -1) SETTINGS session_timezone = 'Asia/Novosibirsk'; SELECT changeMinute(toDate('2000-01-01'), 60) SETTINGS session_timezone = 'Asia/Novosibirsk'; + +SELECT 'changeSecond'; +SELECT '-- Date'; +SELECT changeSecond(toDate('2000-01-01'), 0); +SELECT changeSecond(toDate('2000-01-01'), 2); +SELECT changeSecond(toDate('2000-01-01'), 59); +SELECT changeSecond(toDate('2000-01-01'), -1); -- out-of-bounds +SELECT changeSecond(toDate('2000-01-01'), 60); -- out-of-bounds +SELECT '-- Date32'; +SELECT changeSecond(toDate32('2000-01-01'), 0); +SELECT changeSecond(toDate32('2000-01-01'), 2); +SELECT changeSecond(toDate32('2000-01-01'), 59); +SELECT changeSecond(toDate32('2000-01-01'), -1); -- out-of-bounds +SELECT changeSecond(toDate32('2000-01-01'), 60); -- out-of-bounds +SELECT '-- DateTime'; +SELECT changeSecond(toDateTime('2000-01-01 11:22:33'), 0); +SELECT changeSecond(toDateTime('2000-01-01 11:22:33'), 2); +SELECT changeSecond(toDateTime('2000-01-01 11:22:33'), 59); +SELECT changeSecond(toDateTime('2000-01-01 11:22:33'), -1); -- out-of-bounds +SELECT changeSecond(toDateTime('2000-01-01 11:22:33'), 60); -- out-of-bounds +SELECT '-- DateTime64'; +SELECT changeSecond(toDateTime64('2000-01-01 11:22:33.4444', 4), 0); +SELECT changeSecond(toDateTime64('2000-01-01 11:22:33.4444', 4), 2); +SELECT changeSecond(toDateTime64('2000-01-01 11:22:33.4444', 4), 59); +SELECT changeSecond(toDateTime64('2000-01-01 11:22:33.4444', 4), -1); -- out-of-bounds +SELECT changeSecond(toDateTime64('2000-01-01 11:22:33.4444', 4), 60); -- out-of-bounds +SELECT '-- With different timezone'; SELECT changeSecond(toDate('2000-01-01'), -1) SETTINGS session_timezone = 'Asia/Novosibirsk'; SELECT changeSecond(toDate('2000-01-01'), 60) SETTINGS session_timezone = 'Asia/Novosibirsk'; From 6ff3cf82819e61020f3483687934a70ade67be67 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Mon, 20 May 2024 15:36:25 +0000 Subject: [PATCH 019/299] Some fixups, pt. V --- .../functions/date-time-functions.md | 419 +++++++++--------- 1 file changed, 210 insertions(+), 209 deletions(-) diff --git a/docs/en/sql-reference/functions/date-time-functions.md b/docs/en/sql-reference/functions/date-time-functions.md index 8bf301d76c2..7a39cc74372 100644 --- a/docs/en/sql-reference/functions/date-time-functions.md +++ b/docs/en/sql-reference/functions/date-time-functions.md @@ -3520,6 +3520,216 @@ Result: └───────────────────────────────────────────────────────────────────────┘ ``` +## changeYear + +Changes the year component of a date or date time. + +**Syntax** + +``` sql +changeYear(date_or_datetime, value) +``` + +**Arguments** + +- `date_or_datetime` - a [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md) +- `value` - a new value of the year. [Integer](../../sql-reference/data-types/int-uint.md). + +**Return value** + +- The same type as `date_or_datetime`. + +Type: [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). + +**Example** + +``` sql + SELECT changeYear(toDate('1999-01-01'), 2000), changeYear(toDateTime64('1999-01-01 00:00:00.000', 3), 2000); +``` + +Result: + +``` +┌─changeYear(toDate('1999-01-01'), 2000)─┬─changeYear(toDateTime64('1999-01-01 00:00:00.000', 3), 2000)─┐ +│ 2000-01-01 │ 2000-01-01 00:00:00.000 │ +└────────────────────────────────────────┴──────────────────────────────────────────────────────────────┘ +``` + +## changeMonth + +Changes the month component of a date or date time. + +**Syntax** + +``` sql +changeMonth(date_or_datetime, value) +``` + +**Arguments** + +- `date_or_datetime` - a [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md) +- `value` - a new value of the month. [Integer](../../sql-reference/data-types/int-uint.md). + +**Return value** + +- The same type as `date_or_datetime`. + +Type: [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). + +**Example** + +``` sql + SELECT changeMonth(toDate('1999-01-01'), 2), changeMonth(toDateTime64('1999-01-01 00:00:00.000', 3), 2); +``` + +Result: + +``` +┌─changeMonth(toDate('1999-01-01'), 2)─┬─changeMonth(toDateTime64('1999-01-01 00:00:00.000', 3), 2)─┐ +│ 1999-02-01 │ 1999-02-01 00:00:00.000 │ +└──────────────────────────────────────┴────────────────────────────────────────────────────────────┘ +``` + +## changeDay + +Changes the day component of a date or date time. + +**Syntax** + +``` sql +changeDay(date_or_datetime, value) +``` + +**Arguments** + +- `date_or_datetime` - a [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md) +- `value` - a new value of the day. [Integer](../../sql-reference/data-types/int-uint.md). + +**Return value** + +- The same type as `date_or_datetime`. + +Type: [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). + +**Example** + +``` sql + SELECT changeDay(toDate('1999-01-01'), 5), changeDay(toDateTime64('1999-01-01 00:00:00.000', 3), 5); +``` + +Result: + +``` +┌─changeDay(toDate('1999-01-01'), 5)─┬─changeDay(toDateTime64('1999-01-01 00:00:00.000', 3), 5)─┐ +│ 1999-01-05 │ 1999-01-05 00:00:00.000 │ +└────────────────────────────────────┴──────────────────────────────────────────────────────────┘ +``` + +## changeHour + +Changes the hour component of a date or date time. + +**Syntax** + +``` sql +changeHour(date_or_datetime, value) +``` + +**Arguments** + +- `date_or_datetime` - a [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md) +- `value` - a new value of the hour. [Integer](../../sql-reference/data-types/int-uint.md). + +**Return value** + +- The same type as `date_or_datetime`. If the input is a Date, return DateTime. If the input is a Date32, return DateTime64. + +Type: [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). + +**Example** + +``` sql + SELECT changeHour(toDate('1999-01-01'), 14), changeHour(toDateTime64('1999-01-01 00:00:00.000', 3), 14); +``` + +Result: + +``` +┌─changeHour(toDate('1999-01-01'), 14)─┬─changeHour(toDateTime64('1999-01-01 00:00:00.000', 3), 14)─┐ +│ 1999-01-01 14:00:00 │ 1999-01-01 14:00:00.000 │ +└──────────────────────────────────────┴────────────────────────────────────────────────────────────┘ +``` + +## changeMinute + +Changes the minute component of a date or date time. + +**Syntax** + +``` sql +changeMinute(date_or_datetime, value) +``` + +**Arguments** + +- `date_or_datetime` - a [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md) +- `value` - a new value of the minute. [Integer](../../sql-reference/data-types/int-uint.md). + +**Return value** + +- The same type as `date_or_datetime`. If the input is a Date, return DateTime. If the input is a Date32, return DateTime64. + +Type: [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). + +**Example** + +``` sql + SELECT changeMinute(toDate('1999-01-01'), 15), changeMinute(toDateTime64('1999-01-01 00:00:00.000', 3), 15); +``` + +Result: + +``` +┌─changeMinute(toDate('1999-01-01'), 15)─┬─changeMinute(toDateTime64('1999-01-01 00:00:00.000', 3), 15)─┐ +│ 1999-01-01 00:15:00 │ 1999-01-01 00:15:00.000 │ +└────────────────────────────────────────┴──────────────────────────────────────────────────────────────┘ +``` + +## changeSecond + +Changes the second component of a date or date time. + +**Syntax** + +``` sql +changeSecond(date_or_datetime, value) +``` + +**Arguments** + +- `date_or_datetime` - a [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md) +- `value` - a new value of the second. [Integer](../../sql-reference/data-types/int-uint.md). + +**Return value** + +- The same type as `date_or_datetime`. If the input is a Date, return DateTime. If the input is a Date32, return DateTime64. + +Type: [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). + +**Example** + +``` sql + SELECT changeSecond(toDate('1999-01-01'), 15), changeSecond(toDateTime64('1999-01-01 00:00:00.000', 3), 15); +``` + +Result: + +``` +┌─changeSecond(toDate('1999-01-01'), 15)─┬─changeSecond(toDateTime64('1999-01-01 00:00:00.000', 3), 15)─┐ +│ 1999-01-01 00:00:15 │ 1999-01-01 00:00:15.000 │ +└────────────────────────────────────────┴──────────────────────────────────────────────────────────────┘ +``` + ## timeSlots(StartTime, Duration,\[, Size\]) For a time interval starting at ‘StartTime’ and continuing for ‘Duration’ seconds, it returns an array of moments in time, consisting of points from this interval rounded down to the ‘Size’ in seconds. ‘Size’ is an optional parameter set to 1800 (30 minutes) by default. @@ -4102,212 +4312,3 @@ timeDiff(toDateTime64('1927-01-01 00:00:00', 3), toDate32('1927-01-02')); - Blog: [Working with time series data in ClickHouse](https://clickhouse.com/blog/working-with-time-series-data-and-functions-ClickHouse) -## changeYear - -Changes the year component of a date or date time. - -**Syntax** - -``` sql -changeYear(date_or_datetime, value) -``` - -**Arguments** - -- `date_or_datetime` - a [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md) -- `value` - a new value of the year. [Integer](../../sql-reference/data-types/int-uint.md). - -**Return value** - -- The same type as `date_or_datetime`. - -Type: [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). - -**Example** - -``` sql - SELECT changeYear(toDate('1999-01-01'), 2000), changeYear(toDateTime64('1999-01-01 00:00:00.000', 3), 2000); -``` - -Result: - -``` -┌─changeYear(toDate('1999-01-01'), 2000)─┬─changeYear(toDateTime64('1999-01-01 00:00:00.000', 3), 2000)─┐ -│ 2000-01-01 │ 2000-01-01 00:00:00.000 │ -└────────────────────────────────────────┴──────────────────────────────────────────────────────────────┘ -``` - -## changeMonth - -Changes the month component of a date or date time. - -**Syntax** - -``` sql -changeMonth(date_or_datetime, value) -``` - -**Arguments** - -- `date_or_datetime` - a [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md) -- `value` - a new value of the month. [Integer](../../sql-reference/data-types/int-uint.md). - -**Return value** - -- The same type as `date_or_datetime`. - -Type: [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). - -**Example** - -``` sql - SELECT changeMonth(toDate('1999-01-01'), 2), changeMonth(toDateTime64('1999-01-01 00:00:00.000', 3), 2); -``` - -Result: - -``` -┌─changeMonth(toDate('1999-01-01'), 2)─┬─changeMonth(toDateTime64('1999-01-01 00:00:00.000', 3), 2)─┐ -│ 1999-02-01 │ 1999-02-01 00:00:00.000 │ -└──────────────────────────────────────┴────────────────────────────────────────────────────────────┘ -``` - -## changeDay - -Changes the day component of a date or date time. - -**Syntax** - -``` sql -changeDay(date_or_datetime, value) -``` - -**Arguments** - -- `date_or_datetime` - a [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md) -- `value` - a new value of the day. [Integer](../../sql-reference/data-types/int-uint.md). - -**Return value** - -- The same type as `date_or_datetime`. - -Type: [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). - -**Example** - -``` sql - SELECT changeDay(toDate('1999-01-01'), 5), changeDay(toDateTime64('1999-01-01 00:00:00.000', 3), 5); -``` - -Result: - -``` -┌─changeDay(toDate('1999-01-01'), 5)─┬─changeDay(toDateTime64('1999-01-01 00:00:00.000', 3), 5)─┐ -│ 1999-01-05 │ 1999-01-05 00:00:00.000 │ -└────────────────────────────────────┴──────────────────────────────────────────────────────────┘ -``` - -## changeHour - -Changes the hour component of a date or date time. - -**Syntax** - -``` sql -changeHour(date_or_datetime, value) -``` - -**Arguments** - -- `date_or_datetime` - a [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md) -- `value` - a new value of the hour. [Integer](../../sql-reference/data-types/int-uint.md). - -**Return value** - -- The same type as `date_or_datetime`. If the input is a Date, return DateTime. If the input is a Date32, return DateTime64. - -Type: [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). - -**Example** - -``` sql - SELECT changeHour(toDate('1999-01-01'), 14), changeHour(toDateTime64('1999-01-01 00:00:00.000', 3), 14); -``` - -Result: - -``` -┌─changeHour(toDate('1999-01-01'), 14)─┬─changeHour(toDateTime64('1999-01-01 00:00:00.000', 3), 14)─┐ -│ 1999-01-01 14:00:00 │ 1999-01-01 14:00:00.000 │ -└──────────────────────────────────────┴────────────────────────────────────────────────────────────┘ -``` - -## changeMinute - -Changes the minute component of a date or date time. - -**Syntax** - -``` sql -changeMinute(date_or_datetime, value) -``` - -**Arguments** - -- `date_or_datetime` - a [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md) -- `value` - a new value of the minute. [Integer](../../sql-reference/data-types/int-uint.md). - -**Return value** - -- The same type as `date_or_datetime`. If the input is a Date, return DateTime. If the input is a Date32, return DateTime64. - -Type: [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). - -**Example** - -``` sql - SELECT changeMinute(toDate('1999-01-01'), 15), changeMinute(toDateTime64('1999-01-01 00:00:00.000', 3), 15); -``` - -Result: - -``` -┌─changeMinute(toDate('1999-01-01'), 15)─┬─changeMinute(toDateTime64('1999-01-01 00:00:00.000', 3), 15)─┐ -│ 1999-01-01 00:15:00 │ 1999-01-01 00:15:00.000 │ -└────────────────────────────────────────┴──────────────────────────────────────────────────────────────┘ -``` - -## changeSecond - -Changes the second component of a date or date time. - -**Syntax** - -``` sql -changeSecond(date_or_datetime, value) -``` - -**Arguments** - -- `date_or_datetime` - a [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md) -- `value` - a new value of the second. [Integer](../../sql-reference/data-types/int-uint.md). - -**Return value** - -- The same type as `date_or_datetime`. If the input is a Date, return DateTime. If the input is a Date32, return DateTime64. - -Type: [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). - -**Example** - -``` sql - SELECT changeSecond(toDate('1999-01-01'), 15), changeSecond(toDateTime64('1999-01-01 00:00:00.000', 3), 15); -``` - -Result: - -``` -┌─changeSecond(toDate('1999-01-01'), 15)─┬─changeSecond(toDateTime64('1999-01-01 00:00:00.000', 3), 15)─┐ -│ 1999-01-01 00:00:15 │ 1999-01-01 00:00:15.000 │ -└────────────────────────────────────────┴──────────────────────────────────────────────────────────────┘ -``` From b47524a9a5d5c379227da1ac290938f42d2b0586 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Tue, 21 May 2024 05:19:49 +0000 Subject: [PATCH 020/299] Fix spelling --- .../aspell-ignore/en/aspell-dict.txt | 116 +++++++++--------- 1 file changed, 61 insertions(+), 55 deletions(-) diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt index 2a9aa259fdd..ade822f508a 100644 --- a/utils/check-style/aspell-ignore/en/aspell-dict.txt +++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt @@ -29,13 +29,6 @@ Alexey AnyEvent AppleClang Approximative -arrayDotProduct -arrayEnumerateDenseRanked -arrayEnumerateUniqRanked -arrayFirstOrNull -arrayLastOrNull -arrayPartialShuffle -arrayShuffle ArrayJoin ArrowStream AsyncInsertCacheSize @@ -184,7 +177,6 @@ ComplexKeyCache ComplexKeyDirect ComplexKeyHashed Composable -composable Config ConnectionDetails Const @@ -394,8 +386,6 @@ InterserverThreads IsPentagon IsResClassIII IsValid -isNotDistinctFrom -isNullable JBOD JOINed JOINs @@ -464,8 +454,6 @@ KittenHouse Klickhouse Kolmogorov Konstantin -kostik -kostikConsistentHash Korzeniewski Kubernetes LDAP @@ -475,9 +463,8 @@ LLDB LLVM's LOCALTIME LOCALTIMESTAMP -LOONGARCH LONGLONG -LoongArch +LOONGARCH Levenshtein Liao LibFuzzer @@ -495,6 +482,7 @@ LocalThreadActive LogQL Logstash LookML +LoongArch LowCardinality LpDistance LpNorm @@ -569,17 +557,6 @@ MindsDB Mongodb Monotonicity MsgPack -multiSearchAllPositionsCaseInsensitive -multiSearchAllPositionsCaseInsensitiveUTF -multiSearchAnyCaseInsensitive -multiSearchAnyCaseInsensitiveUTF -multiSearchAnyUTF -multiSearchFirstIndexCaseInsensitive -multiSearchFirstIndexCaseInsensitiveUTF -multiSearchFirstIndexUTF -multiSearchFirstPositionCaseInsensitive -multiSearchFirstPositionCaseInsensitiveUTF -multiSearchFirstPositionUTF MultiPolygon Multiline Multiqueries @@ -681,8 +658,8 @@ OSUserTimeNormalized OTLP OUTFILE ObjectId -Observability Oblakov +Observability Octonica Ok OnTime @@ -883,7 +860,6 @@ Simhash SimpleAggregateFunction SimpleState SipHash -sigmoid Smirnov's Smirnov'test Soundex @@ -929,7 +905,6 @@ TAVG TCPConnection TCPThreads TDigest -ThreadMonotonic TINYINT TLSv TMAX @@ -955,7 +930,6 @@ TablesLoaderForegroundThreads TablesLoaderForegroundThreadsActive TablesToDropQueueSize TargetSpecific -tanh Telegraf TemplateIgnoreSpaces TemporaryFilesForAggregation @@ -965,6 +939,7 @@ TemporaryFilesUnknown Testflows Tgz Theil's +ThreadMonotonic ThreadPoolFSReaderThreads ThreadPoolFSReaderThreadsActive ThreadPoolRemoteFSReaderThreads @@ -1025,7 +1000,6 @@ UncompressedCacheBytes UncompressedCacheCells UnidirectionalEdgeIsValid UniqThetaSketch -unshuffled Updatable Uppercased Uptime @@ -1092,6 +1066,7 @@ activerecord addDate addDays addHours +addInterval addMicroseconds addMilliseconds addMinutes @@ -1099,10 +1074,9 @@ addMonths addNanoseconds addQuarters addSeconds +addTupleOfIntervals addWeeks addYears -addInterval -addTupleOfIntervals addr addressToLine addressToLineWithInlines @@ -1144,15 +1118,19 @@ arrayCumSum arrayCumSumNonNegative arrayDifference arrayDistinct +arrayDotProduct arrayElement arrayEnumerate arrayEnumerateDense +arrayEnumerateDenseRanked arrayEnumerateUniq +arrayEnumerateUniqRanked arrayExists arrayFill arrayFilter arrayFirst arrayFirstIndex +arrayFirstOrNull arrayFlatten arrayFold arrayIntersect @@ -1160,10 +1138,12 @@ arrayJaccardIndex arrayJoin arrayLast arrayLastIndex +arrayLastOrNull arrayMap arrayMax arrayMin arrayPartialReverseSort +arrayPartialShuffle arrayPartialSort arrayPopBack arrayPopFront @@ -1183,6 +1163,7 @@ arrayRotateRight arrayShiftLeft arrayShiftRight arrayShingles +arrayShuffle arraySlice arraySort arraySplit @@ -1323,6 +1304,12 @@ cfg cgroup cgroups chadmin +changeDay +changeHour +changeMinute +changeMonth +changeSecond +changeYear changelog changelogs charset @@ -1364,6 +1351,7 @@ collapsingmergetree combinator combinators comparising +composable compressability concat concatAssumeInjective @@ -1725,8 +1713,8 @@ hasSubsequenceCaseInsensitive hasSubsequenceCaseInsensitiveUTF hasSubsequenceUTF hasSubstr -hasToken hasThreadFuzzer +hasToken hasTokenCaseInsensitive hasTokenCaseInsensitiveOrNull hasTokenOrNull @@ -1799,8 +1787,10 @@ isIPAddressInRange isIPv isInfinite isNaN +isNotDistinctFrom isNotNull isNull +isNullable isValidJSON isValidUTF isZeroOrNull @@ -1852,6 +1842,8 @@ kolmogorovSmirnovTest kolmogorovsmirnovtest kolya konsole +kostik +kostikConsistentHash kurtPop kurtSamp kurtosis @@ -1863,9 +1855,9 @@ laravel largestTriangleThreeBuckets latencies ldap -leftUTF leftPad leftPadUTF +leftUTF lemmatization lemmatize lemmatized @@ -1912,8 +1904,8 @@ logTrace logagent loghouse london -loongarch lookups +loongarch lowcardinality lowerUTF lowercased @@ -1984,8 +1976,8 @@ mispredictions mmap mmapped modularization -moduloOrZero moduli +moduloOrZero mongodb monotonicity monthName @@ -2002,10 +1994,21 @@ multiMatchAllIndices multiMatchAny multiMatchAnyIndex multiSearchAllPositions +multiSearchAllPositionsCaseInsensitive +multiSearchAllPositionsCaseInsensitiveUTF multiSearchAllPositionsUTF multiSearchAny +multiSearchAnyCaseInsensitive +multiSearchAnyCaseInsensitiveUTF +multiSearchAnyUTF multiSearchFirstIndex +multiSearchFirstIndexCaseInsensitive +multiSearchFirstIndexCaseInsensitiveUTF +multiSearchFirstIndexUTF multiSearchFirstPosition +multiSearchFirstPositionCaseInsensitive +multiSearchFirstPositionCaseInsensitiveUTF +multiSearchFirstPositionUTF multibyte multidirectory multiline @@ -2340,8 +2343,8 @@ retentions rethrow retransmit retriable -rewritable reverseUTF +rewritable rightPad rightPadUTF rightUTF @@ -2401,8 +2404,9 @@ sharded sharding shortcircuit shortkeys -showCertificate shoutout +showCertificate +sigmoid simdjson simpleJSON simpleJSONExtractBool @@ -2416,8 +2420,8 @@ simpleLinearRegression simpleaggregatefunction simplelinearregression simpod -singlepart singleValueOrNull +singlepart singlevalueornull sinh sipHash @@ -2462,13 +2466,13 @@ statbox stateful stddev stddevPop -stddevSamp -stddevpop -stddevsamp -stddevpopstable stddevPopStable -stddevsampstable +stddevSamp stddevSampStable +stddevpop +stddevpopstable +stddevsamp +stddevsampstable stderr stdin stdout @@ -2529,6 +2533,7 @@ substrings subtitiles subtractDays subtractHours +subtractInterval subtractMicroseconds subtractMilliseconds subtractMinutes @@ -2536,10 +2541,9 @@ subtractMonths subtractNanoseconds subtractQuarters subtractSeconds +subtractTupleOfIntervals subtractWeeks subtractYears -subtractInterval -subtractTupleOfIntervals subtree subtrees subtype @@ -2548,13 +2552,13 @@ sumCount sumKahan sumMap sumMapFiltered +sumMapFilteredWithOverflow +sumMapWithOverflow sumWithOverflow sumcount sumkahan summap summapwithoverflow -sumMapWithOverflow -sumMapFilteredWithOverflow summingmergetree sumwithoverflow superaggregates @@ -2577,6 +2581,7 @@ tabseparatedrawwithnames tabseparatedrawwithnamesandtypes tabseparatedwithnames tabseparatedwithnamesandtypes +tanh tcp tcpPort tcpnodelay @@ -2711,18 +2716,18 @@ tupleDivide tupleDivideByNumber tupleElement tupleHammingDistance +tupleIntDiv +tupleIntDivByNumber +tupleIntDivOrZero +tupleIntDivOrZeroByNumber tupleMinus +tupleModulo +tupleModuloByNumber tupleMultiply tupleMultiplyByNumber tupleNegate tuplePlus tupleToNameValuePairs -tupleIntDiv -tupleIntDivByNumber -tupleIntDivOrZero -tupleIntDivOrZeroByNumber -tupleModulo -tupleModuloByNumber turbostat txt typename @@ -2765,6 +2770,7 @@ unrealiable unreplicated unresolvable unrounded +unshuffled untracked untrusted untuple @@ -2775,8 +2781,8 @@ uptime uptrace uring url -urlencoded urlCluster +urlencoded urls usearch userspace From c026a5b7e938b797d3f8cb016457dab38c002aab Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Tue, 21 May 2024 05:20:46 +0000 Subject: [PATCH 021/299] Fix style check --- src/Functions/changeDate.cpp | 7 ------- 1 file changed, 7 deletions(-) diff --git a/src/Functions/changeDate.cpp b/src/Functions/changeDate.cpp index bd73154a6f1..9868ac5b914 100644 --- a/src/Functions/changeDate.cpp +++ b/src/Functions/changeDate.cpp @@ -23,13 +23,6 @@ namespace DB { -namespace ErrorCodes -{ - extern const int ILLEGAL_TYPE_OF_ARGUMENT; - extern const int LOGICAL_ERROR; - extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; -} - namespace { From 336e791ea87c8298ff584ae2de8a140f0947f02a Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Tue, 21 May 2024 20:50:54 +0000 Subject: [PATCH 022/299] Fix style check, pt. II --- src/Functions/changeDate.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/Functions/changeDate.cpp b/src/Functions/changeDate.cpp index 9868ac5b914..b400680d272 100644 --- a/src/Functions/changeDate.cpp +++ b/src/Functions/changeDate.cpp @@ -23,6 +23,11 @@ namespace DB { +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + namespace { From ab2baf5e9664133097025c149a93a09e84cca152 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Wed, 22 May 2024 12:23:14 +0000 Subject: [PATCH 023/299] Fix expected results --- .../0_stateless/02982_changeDate.reference | 17 -------------- .../queries/0_stateless/02982_changeDate.sql | 23 ------------------- 2 files changed, 40 deletions(-) diff --git a/tests/queries/0_stateless/02982_changeDate.reference b/tests/queries/0_stateless/02982_changeDate.reference index 8ce647481bb..4a7f093ca2b 100644 --- a/tests/queries/0_stateless/02982_changeDate.reference +++ b/tests/queries/0_stateless/02982_changeDate.reference @@ -167,20 +167,3 @@ changeSecond -- With different timezone 1970-01-01 07:00:00 1970-01-01 07:00:00 -2000-02-01 -2000-02-01 -2000-02-01 -2000-02-01 -2000-02-01 -2000-02-01 -2000-02-01 -2000-02-01 -2000-02-01 -2000-02-01 -2000-02-01 -2299-12-31 -1900-01-01 -2106-02-07 13:28:15 -1970-01-01 07:00:00 -2299-12-31 23:59:59.999 -1900-01-01 00:00:00.000 diff --git a/tests/queries/0_stateless/02982_changeDate.sql b/tests/queries/0_stateless/02982_changeDate.sql index 62232079d61..2bc9aa95569 100644 --- a/tests/queries/0_stateless/02982_changeDate.sql +++ b/tests/queries/0_stateless/02982_changeDate.sql @@ -183,26 +183,3 @@ SELECT changeSecond(toDateTime64('2000-01-01 11:22:33.4444', 4), 60); -- out-of- SELECT '-- With different timezone'; SELECT changeSecond(toDate('2000-01-01'), -1) SETTINGS session_timezone = 'Asia/Novosibirsk'; SELECT changeSecond(toDate('2000-01-01'), 60) SETTINGS session_timezone = 'Asia/Novosibirsk'; - -SELECT changeYear(toDate('2000-01-01')); -- { serverError 42 } -SELECT changeYear(toDate('2000-01-01'), 2001, 2002); -- { serverError 42 } -SELECT changeYear(toDate('2000-01-01'), '2001'); -- { serverError 43 } - -SELECT changeMonth(toDate('2000-01-01'), CAST(2 AS Int8)); -SELECT changeMonth(toDate('2000-01-01'), CAST(2 AS Int16)); -SELECT changeMonth(toDate('2000-01-01'), CAST(2 AS Int32)); -SELECT changeMonth(toDate('2000-01-01'), CAST(2 AS Int64)); -SELECT changeMonth(toDate('2000-01-01'), CAST(2 AS UInt8)); -SELECT changeMonth(toDate('2000-01-01'), CAST(2 AS UInt16)); -SELECT changeMonth(toDate('2000-01-01'), CAST(2 AS UInt32)); -SELECT changeMonth(toDate('2000-01-01'), CAST(2 AS UInt64)); -SELECT changeMonth(toDate('2000-01-01'), CAST(2 AS Float32)); -SELECT changeMonth(toDate('2000-01-01'), CAST(2 AS Float64)); -SELECT changeMonth(toDate('2000-01-01'), CAST(2 AS Decimal(10, 5))); - -SELECT changeYear(toDate32('2000-01-01'), 2300); -SELECT changeYear(toDate32('2000-01-01'), 1899); -SELECT changeSecond(toDateTime('2106-02-07 13:28:15'), 16) SETTINGS session_timezone = 'Asia/Novosibirsk'; -SELECT changeHour(toDateTime('1970-01-01 23:59:59'), 6) SETTINGS session_timezone = 'Asia/Novosibirsk'; -SELECT changeYear(toDateTime64('2000-01-01 00:00:00.000', 3), 2300) SETTINGS session_timezone = 'Asia/Novosibirsk'; -SELECT changeYear(toDateTime64('2000-01-01 00:00:00.000', 3), 1899) SETTINGS session_timezone = 'Asia/Novosibirsk'; \ No newline at end of file From cdd99a73a0e46801bb01f47df49c47e67fd9bb6f Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Thu, 23 May 2024 10:12:32 +0000 Subject: [PATCH 024/299] Fix clang-tidy --- src/Functions/changeDate.cpp | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/src/Functions/changeDate.cpp b/src/Functions/changeDate.cpp index b400680d272..e24391afe12 100644 --- a/src/Functions/changeDate.cpp +++ b/src/Functions/changeDate.cpp @@ -274,15 +274,25 @@ public: } Int64 result; - if (isDateOrDate32(result_type)) + if (isDate(result_type) || isDate32(result_type)) result = date_lut.makeDayNum(year, month, day); else if (isDateTime(result_type)) result = date_lut.makeDateTime(year, month, day, hours, minutes, seconds); else +#ifndef __clang_analyzer__ + /// ^^ This looks funny. It is the least terrible suppression of a false positive reported by clang-analyzer (a sub-class + /// of clang-tidy checks) deep down in 'decimalFromComponents'. Usual suppressions of the form NOLINT* don't work here (they + /// would only affect code in _this_ file), and suppressing the issue in 'decimalFromComponents' may suppress true positives. result = DecimalUtils::decimalFromComponents( date_lut.makeDateTime(year, month, day, hours, minutes, seconds), - static_cast(fraction), + fraction, static_cast(scale)); +#else + { + UNUSED(fraction); + result = 0; + } +#endif if (result < min_date) return min_date; From 26fce1f59d668f2a080bf4d8d1e97bc99cf0b305 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Wed, 29 May 2024 22:43:50 +0000 Subject: [PATCH 025/299] use buffering before merging sorted --- src/Core/Settings.h | 1 + .../QueryPlan/BufferChunksTransform.cpp | 158 ++++++++++++++++++ .../QueryPlan/BufferChunksTransform.h | 48 ++++++ .../Optimizations/optimizeReadInOrder.cpp | 36 ++-- src/Processors/QueryPlan/SortingStep.cpp | 20 ++- src/Processors/QueryPlan/SortingStep.h | 4 +- 6 files changed, 251 insertions(+), 16 deletions(-) create mode 100644 src/Processors/QueryPlan/BufferChunksTransform.cpp create mode 100644 src/Processors/QueryPlan/BufferChunksTransform.h diff --git a/src/Core/Settings.h b/src/Core/Settings.h index dc61a049de8..c3091656884 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -542,6 +542,7 @@ class IColumn; M(Bool, optimize_read_in_order, true, "Enable ORDER BY optimization for reading data in corresponding order in MergeTree tables.", 0) \ M(Bool, optimize_read_in_window_order, true, "Enable ORDER BY optimization in window clause for reading data in corresponding order in MergeTree tables.", 0) \ M(Bool, optimize_aggregation_in_order, false, "Enable GROUP BY optimization for aggregating data in corresponding order in MergeTree tables.", 0) \ + M(UInt64, read_in_order_max_bytes_to_buffer, 128 * 1024 * 1024, "Max bytes to buffer before merging while reading in order of primary key. The higher value increases parallelism of query execution", 0) \ M(UInt64, aggregation_in_order_max_block_bytes, 50000000, "Maximal size of block in bytes accumulated during aggregation in order of primary key. Lower block size allows to parallelize more final merge stage of aggregation.", 0) \ M(UInt64, read_in_order_two_level_merge_threshold, 100, "Minimal number of parts to read to run preliminary merge step during multithread reading in order of primary key.", 0) \ M(Bool, low_cardinality_allow_in_native_format, true, "Use LowCardinality type in Native format. Otherwise, convert LowCardinality columns to ordinary for select query, and convert ordinary columns to required LowCardinality for insert query.", 0) \ diff --git a/src/Processors/QueryPlan/BufferChunksTransform.cpp b/src/Processors/QueryPlan/BufferChunksTransform.cpp new file mode 100644 index 00000000000..872f3090259 --- /dev/null +++ b/src/Processors/QueryPlan/BufferChunksTransform.cpp @@ -0,0 +1,158 @@ +#include +#include "Common/Logger.h" + +namespace DB +{ + +BufferChunksTransform::BufferChunksTransform(const Block & header_, size_t num_ports_, size_t max_bytes_to_buffer_, size_t limit_) + : IProcessor(InputPorts(num_ports_, header_), OutputPorts(num_ports_, header_)) + , max_bytes_to_buffer(max_bytes_to_buffer_) + , limit(limit_) + , chunks(num_ports_) + , num_processed_rows(num_ports_) +{ + for (auto & input : inputs) + input_ports.push_back({.port = &input, .is_finished = false}); + + for (auto & output : outputs) + output_ports.push_back({.port = &output, .is_finished = false}); +} + +IProcessor::Status BufferChunksTransform::prepare(const PortNumbers & updated_inputs, const PortNumbers & updated_outputs) +{ + if (!is_reading_started) + { + for (auto & input : inputs) + input.setNeeded(); + + is_reading_started = true; + } + + for (const auto & idx : updated_outputs) + { + auto & input = input_ports[idx]; + auto & output = output_ports[idx]; + + if (output.port->isFinished()) + { + if (!output.is_finished) + { + output.is_finished = true; + ++num_finished_outputs; + } + } + else if (output.port->canPush()) + { + available_outputs.push(idx); + } + else if (num_buffered_bytes >= max_bytes_to_buffer) + { + input.port->setNotNeeded(); + } + } + + for (const auto & idx : updated_inputs) + { + auto & input = input_ports[idx]; + + if (input.port->isFinished()) + { + if (!input.is_finished) + { + input.is_finished = true; + ++num_finished_inputs; + } + } + else if (input.port->hasData() && num_buffered_bytes < max_bytes_to_buffer) + { + auto chunk = pullChunk(idx); + num_buffered_bytes += chunk.bytes(); + chunks[idx].push(std::move(chunk)); + } + } + + std::queue next_available_outputs; + bool pushed_directly = false; + + while (!available_outputs.empty()) + { + UInt64 idx = available_outputs.front(); + available_outputs.pop(); + + auto & input = input_ports[idx]; + auto & output = output_ports[idx]; + chassert(output.port->canPush()); + + if (!chunks[idx].empty()) + { + auto & chunk = chunks[idx].front(); + num_buffered_bytes -= chunk.bytes(); + output.port->push(std::move(chunk)); + chunks[idx].pop(); + } + else if (input.port->hasData()) + { + /// Process chunk without buffering if possible. + auto chunk = pullChunk(idx); + output.port->push(std::move(chunk)); + pushed_directly = true; + } + else if (input.is_finished) + { + output.port->finish(); + output.is_finished = true; + ++num_finished_outputs; + } + else + { + input.port->setNeeded(); + next_available_outputs.push(idx); + } + } + + available_outputs = std::move(next_available_outputs); + + if (num_finished_outputs == outputs.size()) + { + for (auto & input : inputs) + input.close(); + + return Status::Finished; + } + + if (num_finished_inputs == inputs.size()) + { + if (num_buffered_bytes == 0) + { + for (auto & output : outputs) + output.finish(); + + return Status::Finished; + } + + return Status::PortFull; + } + + bool need_data = pushed_directly || num_buffered_bytes < max_bytes_to_buffer; + return need_data ? Status::NeedData : Status::PortFull; +} + +Chunk BufferChunksTransform::pullChunk(size_t input_idx) +{ + auto & input = input_ports[input_idx]; + input.port->setNeeded(); + + auto chunk = input.port->pull(); + num_processed_rows[input_idx] += chunk.getNumRows(); + + if (limit && num_processed_rows[input_idx] >= limit) + { + input.port->close(); + input.is_finished = true; + ++num_finished_inputs; + } + + return chunk; +} + +} diff --git a/src/Processors/QueryPlan/BufferChunksTransform.h b/src/Processors/QueryPlan/BufferChunksTransform.h new file mode 100644 index 00000000000..d741fb992c9 --- /dev/null +++ b/src/Processors/QueryPlan/BufferChunksTransform.h @@ -0,0 +1,48 @@ +#pragma once +#include +#include + +namespace DB +{ + +class BufferChunksTransform : public IProcessor +{ +public: + BufferChunksTransform(const Block & header_, size_t num_ports_, size_t max_bytes_to_buffer_, size_t limit_); + + String getName() const override { return "BufferChunks"; } + Status prepare(const PortNumbers & updated_inputs, const PortNumbers & updated_outputs) override; + +private: + Chunk pullChunk(size_t input_idx); + + size_t max_bytes_to_buffer; + size_t limit; + + struct InputPortWithStatus + { + InputPort * port; + bool is_finished; + }; + + struct OutputPortWithStatus + { + OutputPort * port; + bool is_finished; + }; + + std::vector> chunks; + std::vector num_processed_rows; + + std::vector input_ports; + std::vector output_ports; + std::queue available_outputs; + + bool is_reading_started = false; + + size_t num_finished_inputs = 0; + size_t num_finished_outputs = 0; + size_t num_buffered_bytes = 0; +}; + +} diff --git a/src/Processors/QueryPlan/Optimizations/optimizeReadInOrder.cpp b/src/Processors/QueryPlan/Optimizations/optimizeReadInOrder.cpp index bc1b3695d88..e87b6f7cf35 100644 --- a/src/Processors/QueryPlan/Optimizations/optimizeReadInOrder.cpp +++ b/src/Processors/QueryPlan/Optimizations/optimizeReadInOrder.cpp @@ -915,18 +915,30 @@ void optimizeReadInOrder(QueryPlan::Node & node, QueryPlan::Nodes & nodes) { auto & union_node = node.children.front(); + const SortDescription * best_sort_descr = nullptr; + StepStack best_steps_to_update; + bool use_buffering = false; + std::vector infos; - const SortDescription * max_sort_descr = nullptr; infos.reserve(node.children.size()); + for (auto * child : union_node->children) { infos.push_back(buildInputOrderInfo(*sorting, *child, steps_to_update)); - if (infos.back() && (!max_sort_descr || max_sort_descr->size() < infos.back()->sort_description_for_merging.size())) - max_sort_descr = &infos.back()->sort_description_for_merging; + if (infos.back()) + { + if (!best_sort_descr || best_sort_descr->size() < infos.back()->sort_description_for_merging.size()) + { + best_sort_descr = &infos.back()->sort_description_for_merging; + best_steps_to_update = steps_to_update; + } + + use_buffering |= infos.back()->limit == 0; + } } - if (!max_sort_descr || max_sort_descr->empty()) + if (!best_sort_descr || best_sort_descr->empty()) return; for (size_t i = 0; i < infos.size(); ++i) @@ -941,7 +953,7 @@ void optimizeReadInOrder(QueryPlan::Node & node, QueryPlan::Nodes & nodes) auto limit = sorting->getLimit(); /// If we have limit, it's better to sort up to full description and apply limit. /// We cannot sort up to partial read-in-order description with limit cause result set can be wrong. - const auto & descr = limit ? sorting->getSortDescription() : *max_sort_descr; + const auto & descr = limit ? sorting->getSortDescription() : *best_sort_descr; additional_sorting = std::make_unique( child->step->getOutputStream(), descr, @@ -949,12 +961,12 @@ void optimizeReadInOrder(QueryPlan::Node & node, QueryPlan::Nodes & nodes) sorting->getSettings(), false); } - else if (info->sort_description_for_merging.size() < max_sort_descr->size()) + else if (info->sort_description_for_merging.size() < best_sort_descr->size()) { additional_sorting = std::make_unique( child->step->getOutputStream(), info->sort_description_for_merging, - *max_sort_descr, + *best_sort_descr, sorting->getSettings().max_block_size, 0); /// TODO: support limit with ties } @@ -968,12 +980,14 @@ void optimizeReadInOrder(QueryPlan::Node & node, QueryPlan::Nodes & nodes) } } - sorting->convertToFinishSorting(*max_sort_descr); + sorting->convertToFinishSorting(*best_sort_descr, use_buffering); + updateStepsDataStreams(best_steps_to_update); } else if (auto order_info = buildInputOrderInfo(*sorting, *node.children.front(), steps_to_update)) { - sorting->convertToFinishSorting(order_info->sort_description_for_merging); - /// update data stream's sorting properties + /// Use buffering only if have filter or don't have limit. + bool use_buffering = order_info->limit == 0; + sorting->convertToFinishSorting(order_info->sort_description_for_merging, use_buffering); updateStepsDataStreams(steps_to_update); } } @@ -1087,7 +1101,7 @@ size_t tryReuseStorageOrderingForWindowFunctions(QueryPlan::Node * parent_node, bool can_read = read_from_merge_tree->requestReadingInOrder(order_info->used_prefix_of_sorting_key_size, order_info->direction, order_info->limit); if (!can_read) return 0; - sorting->convertToFinishSorting(order_info->sort_description_for_merging); + sorting->convertToFinishSorting(order_info->sort_description_for_merging, false); } return 0; diff --git a/src/Processors/QueryPlan/SortingStep.cpp b/src/Processors/QueryPlan/SortingStep.cpp index 8f40e523b42..b5b3443deca 100644 --- a/src/Processors/QueryPlan/SortingStep.cpp +++ b/src/Processors/QueryPlan/SortingStep.cpp @@ -1,5 +1,4 @@ #include -#include #include #include #include @@ -8,6 +7,7 @@ #include #include #include +#include #include #include @@ -38,6 +38,7 @@ SortingStep::Settings::Settings(const Context & context) tmp_data = context.getTempDataOnDisk(); min_free_disk_space = settings.min_free_disk_space_for_temporary_data; max_block_bytes = settings.prefer_external_sort_block_bytes; + read_in_order_max_bytes_to_buffer = settings.read_in_order_max_bytes_to_buffer; } SortingStep::Settings::Settings(size_t max_block_size_) @@ -153,10 +154,11 @@ void SortingStep::updateLimit(size_t limit_) } } -void SortingStep::convertToFinishSorting(SortDescription prefix_description_) +void SortingStep::convertToFinishSorting(SortDescription prefix_description_, bool use_buffering_) { type = Type::FinishSorting; prefix_description = std::move(prefix_description_); + use_buffering = use_buffering_; } void SortingStep::scatterByPartitionIfNeeded(QueryPipelineBuilder& pipeline) @@ -244,6 +246,17 @@ void SortingStep::mergingSorted(QueryPipelineBuilder & pipeline, const SortDescr /// If there are several streams, then we merge them into one if (pipeline.getNumStreams() > 1) { + if (use_buffering && sort_settings.read_in_order_max_bytes_to_buffer) + { + auto transform = std::make_shared( + pipeline.getHeader(), + pipeline.getNumStreams(), + sort_settings.read_in_order_max_bytes_to_buffer, + limit_); + + pipeline.addTransform(std::move(transform)); + } + auto transform = std::make_shared( pipeline.getHeader(), pipeline.getNumStreams(), @@ -373,9 +386,8 @@ void SortingStep::transformPipeline(QueryPipelineBuilder & pipeline, const Build mergingSorted(pipeline, prefix_description, (need_finish_sorting ? 0 : limit)); if (need_finish_sorting) - { finishSorting(pipeline, prefix_description, result_description, limit); - } + return; } diff --git a/src/Processors/QueryPlan/SortingStep.h b/src/Processors/QueryPlan/SortingStep.h index 49dcf9f3121..57658b6dafb 100644 --- a/src/Processors/QueryPlan/SortingStep.h +++ b/src/Processors/QueryPlan/SortingStep.h @@ -28,6 +28,7 @@ public: TemporaryDataOnDiskScopePtr tmp_data = nullptr; size_t min_free_disk_space = 0; size_t max_block_bytes = 0; + size_t read_in_order_max_bytes_to_buffer = 0; explicit Settings(const Context & context); explicit Settings(size_t max_block_size_); @@ -80,7 +81,7 @@ public: const SortDescription & getSortDescription() const { return result_description; } - void convertToFinishSorting(SortDescription prefix_description); + void convertToFinishSorting(SortDescription prefix_description, bool use_buffering_); Type getType() const { return type; } const Settings & getSettings() const { return sort_settings; } @@ -126,6 +127,7 @@ private: UInt64 limit; bool always_read_till_end = false; + bool use_buffering = false; Settings sort_settings; From e561f5cc7b30c41b7b44e0d7280cf343ac981758 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Thu, 30 May 2024 12:41:22 +0000 Subject: [PATCH 026/299] use buffering before merging sorted --- .../QueryPlan/BufferChunksTransform.cpp | 170 +++++------------- .../QueryPlan/BufferChunksTransform.h | 34 +--- .../Optimizations/optimizeReadInOrder.cpp | 21 +-- src/Processors/QueryPlan/SortingStep.cpp | 11 +- 4 files changed, 65 insertions(+), 171 deletions(-) diff --git a/src/Processors/QueryPlan/BufferChunksTransform.cpp b/src/Processors/QueryPlan/BufferChunksTransform.cpp index 872f3090259..ac24a0816ec 100644 --- a/src/Processors/QueryPlan/BufferChunksTransform.cpp +++ b/src/Processors/QueryPlan/BufferChunksTransform.cpp @@ -1,156 +1,74 @@ #include -#include "Common/Logger.h" namespace DB { -BufferChunksTransform::BufferChunksTransform(const Block & header_, size_t num_ports_, size_t max_bytes_to_buffer_, size_t limit_) - : IProcessor(InputPorts(num_ports_, header_), OutputPorts(num_ports_, header_)) +BufferChunksTransform::BufferChunksTransform(const Block & header_, size_t max_bytes_to_buffer_, size_t limit_) + : IProcessor({header_}, {header_}) + , input(inputs.front()) + , output(outputs.front()) , max_bytes_to_buffer(max_bytes_to_buffer_) , limit(limit_) - , chunks(num_ports_) - , num_processed_rows(num_ports_) { - for (auto & input : inputs) - input_ports.push_back({.port = &input, .is_finished = false}); - - for (auto & output : outputs) - output_ports.push_back({.port = &output, .is_finished = false}); } -IProcessor::Status BufferChunksTransform::prepare(const PortNumbers & updated_inputs, const PortNumbers & updated_outputs) +IProcessor::Status BufferChunksTransform::prepare() { - if (!is_reading_started) + if (output.isFinished()) { - for (auto & input : inputs) - input.setNeeded(); - - is_reading_started = true; - } - - for (const auto & idx : updated_outputs) - { - auto & input = input_ports[idx]; - auto & output = output_ports[idx]; - - if (output.port->isFinished()) - { - if (!output.is_finished) - { - output.is_finished = true; - ++num_finished_outputs; - } - } - else if (output.port->canPush()) - { - available_outputs.push(idx); - } - else if (num_buffered_bytes >= max_bytes_to_buffer) - { - input.port->setNotNeeded(); - } - } - - for (const auto & idx : updated_inputs) - { - auto & input = input_ports[idx]; - - if (input.port->isFinished()) - { - if (!input.is_finished) - { - input.is_finished = true; - ++num_finished_inputs; - } - } - else if (input.port->hasData() && num_buffered_bytes < max_bytes_to_buffer) - { - auto chunk = pullChunk(idx); - num_buffered_bytes += chunk.bytes(); - chunks[idx].push(std::move(chunk)); - } - } - - std::queue next_available_outputs; - bool pushed_directly = false; - - while (!available_outputs.empty()) - { - UInt64 idx = available_outputs.front(); - available_outputs.pop(); - - auto & input = input_ports[idx]; - auto & output = output_ports[idx]; - chassert(output.port->canPush()); - - if (!chunks[idx].empty()) - { - auto & chunk = chunks[idx].front(); - num_buffered_bytes -= chunk.bytes(); - output.port->push(std::move(chunk)); - chunks[idx].pop(); - } - else if (input.port->hasData()) - { - /// Process chunk without buffering if possible. - auto chunk = pullChunk(idx); - output.port->push(std::move(chunk)); - pushed_directly = true; - } - else if (input.is_finished) - { - output.port->finish(); - output.is_finished = true; - ++num_finished_outputs; - } - else - { - input.port->setNeeded(); - next_available_outputs.push(idx); - } - } - - available_outputs = std::move(next_available_outputs); - - if (num_finished_outputs == outputs.size()) - { - for (auto & input : inputs) - input.close(); - + chunks = {}; + input.close(); return Status::Finished; } - if (num_finished_inputs == inputs.size()) + if (output.canPush()) { - if (num_buffered_bytes == 0) - { - for (auto & output : outputs) - output.finish(); + input.setNeeded(); + if (!chunks.empty()) + { + auto chunk = std::move(chunks.front()); + chunks.pop(); + + num_buffered_bytes -= chunk.bytes(); + output.push(std::move(chunk)); + } + else if (input.hasData()) + { + auto chunk = pullChunk(); + output.push(std::move(chunk)); + } + else if (input.isFinished()) + { + output.finish(); return Status::Finished; } + } + if (input.hasData() && num_buffered_bytes < max_bytes_to_buffer) + { + auto chunk = pullChunk(); + num_buffered_bytes += chunk.bytes(); + chunks.push(std::move(chunk)); + } + + if (num_buffered_bytes >= max_bytes_to_buffer) + { + input.setNotNeeded(); return Status::PortFull; } - bool need_data = pushed_directly || num_buffered_bytes < max_bytes_to_buffer; - return need_data ? Status::NeedData : Status::PortFull; + input.setNeeded(); + return Status::NeedData; } -Chunk BufferChunksTransform::pullChunk(size_t input_idx) +Chunk BufferChunksTransform::pullChunk() { - auto & input = input_ports[input_idx]; - input.port->setNeeded(); + auto chunk = input.pull(); + num_processed_rows += chunk.getNumRows(); - auto chunk = input.port->pull(); - num_processed_rows[input_idx] += chunk.getNumRows(); - - if (limit && num_processed_rows[input_idx] >= limit) - { - input.port->close(); - input.is_finished = true; - ++num_finished_inputs; - } + if (limit && num_processed_rows >= limit) + input.close(); return chunk; } diff --git a/src/Processors/QueryPlan/BufferChunksTransform.h b/src/Processors/QueryPlan/BufferChunksTransform.h index d741fb992c9..c912f280a8a 100644 --- a/src/Processors/QueryPlan/BufferChunksTransform.h +++ b/src/Processors/QueryPlan/BufferChunksTransform.h @@ -8,41 +8,23 @@ namespace DB class BufferChunksTransform : public IProcessor { public: - BufferChunksTransform(const Block & header_, size_t num_ports_, size_t max_bytes_to_buffer_, size_t limit_); + BufferChunksTransform(const Block & header_, size_t max_bytes_to_buffer_, size_t limit_); + Status prepare() override; String getName() const override { return "BufferChunks"; } - Status prepare(const PortNumbers & updated_inputs, const PortNumbers & updated_outputs) override; private: - Chunk pullChunk(size_t input_idx); + Chunk pullChunk(); + + InputPort & input; + OutputPort & output; size_t max_bytes_to_buffer; size_t limit; - struct InputPortWithStatus - { - InputPort * port; - bool is_finished; - }; - - struct OutputPortWithStatus - { - OutputPort * port; - bool is_finished; - }; - - std::vector> chunks; - std::vector num_processed_rows; - - std::vector input_ports; - std::vector output_ports; - std::queue available_outputs; - - bool is_reading_started = false; - - size_t num_finished_inputs = 0; - size_t num_finished_outputs = 0; + std::queue chunks; size_t num_buffered_bytes = 0; + size_t num_processed_rows = 0; }; } diff --git a/src/Processors/QueryPlan/Optimizations/optimizeReadInOrder.cpp b/src/Processors/QueryPlan/Optimizations/optimizeReadInOrder.cpp index e87b6f7cf35..cb8023f603f 100644 --- a/src/Processors/QueryPlan/Optimizations/optimizeReadInOrder.cpp +++ b/src/Processors/QueryPlan/Optimizations/optimizeReadInOrder.cpp @@ -915,9 +915,8 @@ void optimizeReadInOrder(QueryPlan::Node & node, QueryPlan::Nodes & nodes) { auto & union_node = node.children.front(); - const SortDescription * best_sort_descr = nullptr; - StepStack best_steps_to_update; bool use_buffering = false; + const SortDescription * max_sort_descr = nullptr; std::vector infos; infos.reserve(node.children.size()); @@ -928,17 +927,14 @@ void optimizeReadInOrder(QueryPlan::Node & node, QueryPlan::Nodes & nodes) if (infos.back()) { - if (!best_sort_descr || best_sort_descr->size() < infos.back()->sort_description_for_merging.size()) - { - best_sort_descr = &infos.back()->sort_description_for_merging; - best_steps_to_update = steps_to_update; - } + if (!max_sort_descr || max_sort_descr->size() < infos.back()->sort_description_for_merging.size()) + max_sort_descr = &infos.back()->sort_description_for_merging; use_buffering |= infos.back()->limit == 0; } } - if (!best_sort_descr || best_sort_descr->empty()) + if (!max_sort_descr || max_sort_descr->empty()) return; for (size_t i = 0; i < infos.size(); ++i) @@ -953,7 +949,7 @@ void optimizeReadInOrder(QueryPlan::Node & node, QueryPlan::Nodes & nodes) auto limit = sorting->getLimit(); /// If we have limit, it's better to sort up to full description and apply limit. /// We cannot sort up to partial read-in-order description with limit cause result set can be wrong. - const auto & descr = limit ? sorting->getSortDescription() : *best_sort_descr; + const auto & descr = limit ? sorting->getSortDescription() : *max_sort_descr; additional_sorting = std::make_unique( child->step->getOutputStream(), descr, @@ -961,12 +957,12 @@ void optimizeReadInOrder(QueryPlan::Node & node, QueryPlan::Nodes & nodes) sorting->getSettings(), false); } - else if (info->sort_description_for_merging.size() < best_sort_descr->size()) + else if (info->sort_description_for_merging.size() < max_sort_descr->size()) { additional_sorting = std::make_unique( child->step->getOutputStream(), info->sort_description_for_merging, - *best_sort_descr, + *max_sort_descr, sorting->getSettings().max_block_size, 0); /// TODO: support limit with ties } @@ -980,8 +976,7 @@ void optimizeReadInOrder(QueryPlan::Node & node, QueryPlan::Nodes & nodes) } } - sorting->convertToFinishSorting(*best_sort_descr, use_buffering); - updateStepsDataStreams(best_steps_to_update); + sorting->convertToFinishSorting(*max_sort_descr, use_buffering); } else if (auto order_info = buildInputOrderInfo(*sorting, *node.children.front(), steps_to_update)) { diff --git a/src/Processors/QueryPlan/SortingStep.cpp b/src/Processors/QueryPlan/SortingStep.cpp index b5b3443deca..a853c908317 100644 --- a/src/Processors/QueryPlan/SortingStep.cpp +++ b/src/Processors/QueryPlan/SortingStep.cpp @@ -248,13 +248,12 @@ void SortingStep::mergingSorted(QueryPipelineBuilder & pipeline, const SortDescr { if (use_buffering && sort_settings.read_in_order_max_bytes_to_buffer) { - auto transform = std::make_shared( - pipeline.getHeader(), - pipeline.getNumStreams(), - sort_settings.read_in_order_max_bytes_to_buffer, - limit_); + size_t bytes_to_buffer = sort_settings.read_in_order_max_bytes_to_buffer / pipeline.getNumStreams(); - pipeline.addTransform(std::move(transform)); + pipeline.addSimpleTransform([&](const Block & header) + { + return std::make_shared(header, bytes_to_buffer, limit_); + }); } auto transform = std::make_shared( From 71cf78a91148a34df06ef3d16c27c9ca78ce4683 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Thu, 30 May 2024 14:30:15 +0000 Subject: [PATCH 027/299] add setting to changes --- src/Core/SettingsChangesHistory.h | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Core/SettingsChangesHistory.h b/src/Core/SettingsChangesHistory.h index 3a0f2ca1e27..ec5a50b44b8 100644 --- a/src/Core/SettingsChangesHistory.h +++ b/src/Core/SettingsChangesHistory.h @@ -93,6 +93,7 @@ static std::map sett {"hdfs_ignore_file_doesnt_exist", false, false, "Allow to return 0 rows when the requested files don't exist instead of throwing an exception in HDFS table engine"}, {"azure_ignore_file_doesnt_exist", false, false, "Allow to return 0 rows when the requested files don't exist instead of throwing an exception in AzureBlobStorage table engine"}, {"s3_ignore_file_doesnt_exist", false, false, "Allow to return 0 rows when the requested files don't exist instead of throwing an exception in S3 table engine"}, + {"read_in_order_max_bytes_to_buffer", 0, 128 * 1024 * 1024, "Max bytes to buffer before merging while reading in order of primary key"}, }}, {"24.5", {{"allow_deprecated_error_prone_window_functions", true, false, "Allow usage of deprecated error prone window functions (neighbor, runningAccumulate, runningDifferenceStartingWithFirstValue, runningDifference)"}, {"allow_experimental_join_condition", false, false, "Support join with inequal conditions which involve columns from both left and right table. e.g. t1.y < t2.y."}, From e2f9ecbf69e023da2dd7b187c257385f1a39ee3e Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Fri, 24 May 2024 16:26:54 +0100 Subject: [PATCH 028/299] move HT cache to a separate file --- src/Interpreters/Aggregator.cpp | 135 +---------------- src/Interpreters/Aggregator.h | 30 +--- src/Interpreters/HashTablesStatistics.h | 157 ++++++++++++++++++++ src/Interpreters/InterpreterSelectQuery.cpp | 2 +- src/Planner/Planner.cpp | 2 +- 5 files changed, 164 insertions(+), 162 deletions(-) create mode 100644 src/Interpreters/HashTablesStatistics.h diff --git a/src/Interpreters/Aggregator.cpp b/src/Interpreters/Aggregator.cpp index 45b43ae2d3a..b7143731576 100644 --- a/src/Interpreters/Aggregator.cpp +++ b/src/Interpreters/Aggregator.cpp @@ -24,9 +24,7 @@ #include #include #include -#include #include -#include #include #include #include @@ -78,115 +76,6 @@ namespace ErrorCodes namespace { -/** Collects observed HashMap-s sizes to avoid redundant intermediate resizes. - */ -class HashTablesStatistics -{ -public: - struct Entry - { - size_t sum_of_sizes; // used to determine if it's better to convert aggregation to two-level from the beginning - size_t median_size; // roughly the size we're going to preallocate on each thread - }; - - using Cache = DB::CacheBase; - using CachePtr = std::shared_ptr; - using Params = DB::Aggregator::Params::StatsCollectingParams; - - /// Collection and use of the statistics should be enabled. - std::optional getSizeHint(const Params & params) - { - if (!params.isCollectionAndUseEnabled()) - throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Collection and use of the statistics should be enabled."); - - std::lock_guard lock(mutex); - const auto cache = getHashTableStatsCache(params, lock); - if (const auto hint = cache->get(params.key)) - { - LOG_TRACE( - getLogger("Aggregator"), - "An entry for key={} found in cache: sum_of_sizes={}, median_size={}", - params.key, - hint->sum_of_sizes, - hint->median_size); - return *hint; - } - return std::nullopt; - } - - /// Collection and use of the statistics should be enabled. - void update(size_t sum_of_sizes, size_t median_size, const Params & params) - { - if (!params.isCollectionAndUseEnabled()) - throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Collection and use of the statistics should be enabled."); - - std::lock_guard lock(mutex); - const auto cache = getHashTableStatsCache(params, lock); - const auto hint = cache->get(params.key); - // We'll maintain the maximum among all the observed values until the next prediction turns out to be too wrong. - if (!hint || sum_of_sizes < hint->sum_of_sizes / 2 || hint->sum_of_sizes < sum_of_sizes || median_size < hint->median_size / 2 - || hint->median_size < median_size) - { - LOG_TRACE( - getLogger("Aggregator"), - "Statistics updated for key={}: new sum_of_sizes={}, median_size={}", - params.key, - sum_of_sizes, - median_size); - cache->set(params.key, std::make_shared(Entry{.sum_of_sizes = sum_of_sizes, .median_size = median_size})); - } - } - - std::optional getCacheStats() const - { - std::lock_guard lock(mutex); - if (hash_table_stats) - { - size_t hits = 0, misses = 0; - hash_table_stats->getStats(hits, misses); - return DB::HashTablesCacheStatistics{.entries = hash_table_stats->count(), .hits = hits, .misses = misses}; - } - return std::nullopt; - } - - static size_t calculateCacheKey(const DB::ASTPtr & select_query) - { - if (!select_query) - throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Query ptr cannot be null"); - - const auto & select = select_query->as(); - - // It may happen in some corner cases like `select 1 as num group by num`. - if (!select.tables()) - return 0; - - SipHash hash; - hash.update(select.tables()->getTreeHash(/*ignore_aliases=*/ true)); - if (const auto where = select.where()) - hash.update(where->getTreeHash(/*ignore_aliases=*/ true)); - if (const auto group_by = select.groupBy()) - hash.update(group_by->getTreeHash(/*ignore_aliases=*/ true)); - return hash.get64(); - } - -private: - CachePtr getHashTableStatsCache(const Params & params, const std::lock_guard &) - { - if (!hash_table_stats || hash_table_stats->maxSizeInBytes() != params.max_entries_for_hash_table_stats) - hash_table_stats = std::make_shared(params.max_entries_for_hash_table_stats); - return hash_table_stats; - } - - mutable std::mutex mutex; - CachePtr hash_table_stats; -}; - -HashTablesStatistics & getHashTablesStatistics() -{ - static HashTablesStatistics hash_tables_stats; - return hash_tables_stats; -} - bool worthConvertToTwoLevel( size_t group_by_two_level_threshold, size_t result_size, size_t group_by_two_level_threshold_bytes, auto result_size_bytes) { @@ -217,7 +106,7 @@ void initDataVariantsWithSizeHint( const auto & stats_collecting_params = params.stats_collecting_params; if (stats_collecting_params.isCollectionAndUseEnabled()) { - if (auto hint = getHashTablesStatistics().getSizeHint(stats_collecting_params)) + if (auto hint = DB::getHashTablesStatistics().getSizeHint(stats_collecting_params)) { const auto max_threads = params.group_by_two_level_threshold != 0 ? std::max(params.max_threads, 1ul) : 1; const auto lower_limit = hint->sum_of_sizes / max_threads; @@ -254,7 +143,7 @@ void initDataVariantsWithSizeHint( } /// Collection and use of the statistics should be enabled. -void updateStatistics(const DB::ManyAggregatedDataVariants & data_variants, const DB::Aggregator::Params::StatsCollectingParams & params) +void updateStatistics(const DB::ManyAggregatedDataVariants & data_variants, const DB::StatsCollectingParams & params) { if (!params.isCollectionAndUseEnabled()) throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Collection and use of the statistics should be enabled."); @@ -265,7 +154,7 @@ void updateStatistics(const DB::ManyAggregatedDataVariants & data_variants, cons const auto median_size = sizes.begin() + sizes.size() / 2; // not precisely though... std::nth_element(sizes.begin(), median_size, sizes.end()); const auto sum_of_sizes = std::accumulate(sizes.begin(), sizes.end(), 0ull); - getHashTablesStatistics().update(sum_of_sizes, *median_size, params); + DB::getHashTablesStatistics().update(sum_of_sizes, *median_size, params); } DB::ColumnNumbers calculateKeysPositions(const DB::Block & header, const DB::Aggregator::Params & params) @@ -300,24 +189,6 @@ size_t getMinBytesForPrefetch() namespace DB { -std::optional getHashTablesCacheStatistics() -{ - return getHashTablesStatistics().getCacheStats(); -} - -Aggregator::Params::StatsCollectingParams::StatsCollectingParams() = default; - -Aggregator::Params::StatsCollectingParams::StatsCollectingParams( - const ASTPtr & select_query_, - bool collect_hash_table_stats_during_aggregation_, - size_t max_entries_for_hash_table_stats_, - size_t max_size_to_preallocate_for_aggregation_) - : key(collect_hash_table_stats_during_aggregation_ ? HashTablesStatistics::calculateCacheKey(select_query_) : 0) - , max_entries_for_hash_table_stats(max_entries_for_hash_table_stats_) - , max_size_to_preallocate_for_aggregation(max_size_to_preallocate_for_aggregation_) -{ -} - Block Aggregator::getHeader(bool final) const { return params.getHeader(header, final); diff --git a/src/Interpreters/Aggregator.h b/src/Interpreters/Aggregator.h index 406d28597cf..9c301d29a27 100644 --- a/src/Interpreters/Aggregator.h +++ b/src/Interpreters/Aggregator.h @@ -39,9 +39,10 @@ #include -#include #include #include +#include +#include namespace DB { @@ -128,24 +129,6 @@ public: const double min_hit_rate_to_use_consecutive_keys_optimization; - struct StatsCollectingParams - { - StatsCollectingParams(); - - StatsCollectingParams( - const ASTPtr & select_query_, - bool collect_hash_table_stats_during_aggregation_, - size_t max_entries_for_hash_table_stats_, - size_t max_size_to_preallocate_for_aggregation_); - - bool isCollectionAndUseEnabled() const { return key != 0; } - void disable() { key = 0; } - - UInt64 key = 0; - const size_t max_entries_for_hash_table_stats = 0; - const size_t max_size_to_preallocate_for_aggregation = 0; - }; - StatsCollectingParams stats_collecting_params; Params( @@ -685,13 +668,4 @@ APPLY_FOR_AGGREGATED_VARIANTS(M) #undef M - -struct HashTablesCacheStatistics -{ - size_t entries = 0; - size_t hits = 0; - size_t misses = 0; -}; - -std::optional getHashTablesCacheStatistics(); } diff --git a/src/Interpreters/HashTablesStatistics.h b/src/Interpreters/HashTablesStatistics.h new file mode 100644 index 00000000000..4af9cf68817 --- /dev/null +++ b/src/Interpreters/HashTablesStatistics.h @@ -0,0 +1,157 @@ +#pragma once + +#include +#include +#include +#include +#include + + +namespace DB +{ + +struct HashTablesCacheStatistics +{ + size_t entries = 0; + size_t hits = 0; + size_t misses = 0; +}; + +inline size_t calculateCacheKey(const DB::ASTPtr & select_query) +{ + if (!select_query) + throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Query ptr cannot be null"); + + const auto & select = select_query->as(); + + // It may happen in some corner cases like `select 1 as num group by num`. + if (!select.tables()) + return 0; + + SipHash hash; + hash.update(select.tables()->getTreeHash(/*ignore_aliases=*/true)); + if (const auto where = select.where()) + hash.update(where->getTreeHash(/*ignore_aliases=*/true)); + if (const auto group_by = select.groupBy()) + hash.update(group_by->getTreeHash(/*ignore_aliases=*/true)); + return hash.get64(); +} + +struct StatsCollectingParams +{ + StatsCollectingParams() = default; + + StatsCollectingParams( + const ASTPtr & select_query_, + bool collect_hash_table_stats_during_aggregation_, + size_t max_entries_for_hash_table_stats_, + size_t max_size_to_preallocate_for_aggregation_) + : key(collect_hash_table_stats_during_aggregation_ ? calculateCacheKey(select_query_) : 0) + , max_entries_for_hash_table_stats(max_entries_for_hash_table_stats_) + , max_size_to_preallocate_for_aggregation(max_size_to_preallocate_for_aggregation_) + { + } + + bool isCollectionAndUseEnabled() const { return key != 0; } + void disable() { key = 0; } + + UInt64 key = 0; + const size_t max_entries_for_hash_table_stats = 0; + const size_t max_size_to_preallocate_for_aggregation = 0; +}; + +/** Collects observed HashMap-s sizes to avoid redundant intermediate resizes. + */ +class HashTablesStatistics +{ +public: + struct Entry + { + size_t sum_of_sizes; // used to determine if it's better to convert aggregation to two-level from the beginning + size_t median_size; // roughly the size we're going to preallocate on each thread + }; + + using Cache = DB::CacheBase; + using CachePtr = std::shared_ptr; + using Params = StatsCollectingParams; + + /// Collection and use of the statistics should be enabled. + std::optional getSizeHint(const Params & params) + { + if (!params.isCollectionAndUseEnabled()) + throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Collection and use of the statistics should be enabled."); + + std::lock_guard lock(mutex); + const auto cache = getHashTableStatsCache(params, lock); + if (const auto hint = cache->get(params.key)) + { + LOG_TRACE( + getLogger("Aggregator"), + "An entry for key={} found in cache: sum_of_sizes={}, median_size={}", + params.key, + hint->sum_of_sizes, + hint->median_size); + return *hint; + } + return std::nullopt; + } + + /// Collection and use of the statistics should be enabled. + void update(size_t sum_of_sizes, size_t median_size, const Params & params) + { + if (!params.isCollectionAndUseEnabled()) + throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Collection and use of the statistics should be enabled."); + + std::lock_guard lock(mutex); + const auto cache = getHashTableStatsCache(params, lock); + const auto hint = cache->get(params.key); + // We'll maintain the maximum among all the observed values until the next prediction turns out to be too wrong. + if (!hint || sum_of_sizes < hint->sum_of_sizes / 2 || hint->sum_of_sizes < sum_of_sizes || median_size < hint->median_size / 2 + || hint->median_size < median_size) + { + LOG_TRACE( + getLogger("Aggregator"), + "Statistics updated for key={}: new sum_of_sizes={}, median_size={}", + params.key, + sum_of_sizes, + median_size); + cache->set(params.key, std::make_shared(Entry{.sum_of_sizes = sum_of_sizes, .median_size = median_size})); + } + } + + std::optional getCacheStats() const + { + std::lock_guard lock(mutex); + if (hash_table_stats) + { + size_t hits = 0, misses = 0; + hash_table_stats->getStats(hits, misses); + return DB::HashTablesCacheStatistics{.entries = hash_table_stats->count(), .hits = hits, .misses = misses}; + } + return std::nullopt; + } + +private: + CachePtr getHashTableStatsCache(const Params & params, const std::lock_guard &) + { + if (!hash_table_stats || hash_table_stats->maxSizeInBytes() != params.max_entries_for_hash_table_stats) + hash_table_stats = std::make_shared(params.max_entries_for_hash_table_stats); + return hash_table_stats; + } + + mutable std::mutex mutex; + CachePtr hash_table_stats; +}; + +inline HashTablesStatistics & getHashTablesStatistics() +{ + static HashTablesStatistics hash_tables_stats; + return hash_tables_stats; +} + +inline std::optional getHashTablesCacheStatistics() +{ + return getHashTablesStatistics().getCacheStats(); +} + +} diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index ffe45d55643..ec9341cf9d5 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -2664,7 +2664,7 @@ static Aggregator::Params getAggregatorParams( size_t group_by_two_level_threshold, size_t group_by_two_level_threshold_bytes) { - const auto stats_collecting_params = Aggregator::Params::StatsCollectingParams( + const auto stats_collecting_params = StatsCollectingParams( query_ptr, settings.collect_hash_table_stats_during_aggregation, settings.max_entries_for_hash_table_stats, diff --git a/src/Planner/Planner.cpp b/src/Planner/Planner.cpp index b40e23a9553..8ed7004cd42 100644 --- a/src/Planner/Planner.cpp +++ b/src/Planner/Planner.cpp @@ -362,7 +362,7 @@ Aggregator::Params getAggregatorParams(const PlannerContextPtr & planner_context const auto & query_context = planner_context->getQueryContext(); const Settings & settings = query_context->getSettingsRef(); - const auto stats_collecting_params = Aggregator::Params::StatsCollectingParams( + const auto stats_collecting_params = StatsCollectingParams( select_query_info.query, settings.collect_hash_table_stats_during_aggregation, settings.max_entries_for_hash_table_stats, From 1a6f89dd02514eeeaf4cae43538d6a512d85220d Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Sun, 26 May 2024 18:17:18 +0100 Subject: [PATCH 029/299] cache hash table sizes in ConcurrentHJ --- src/Interpreters/Aggregator.cpp | 67 ++++++++++----------- src/Interpreters/Aggregator.h | 1 + src/Interpreters/ConcurrentHashJoin.cpp | 47 ++++++++++++++- src/Interpreters/ConcurrentHashJoin.h | 6 ++ src/Interpreters/ExpressionAnalyzer.cpp | 3 +- src/Interpreters/HashTablesStatistics.h | 65 +++++++++++--------- src/Interpreters/InterpreterSelectQuery.cpp | 7 ++- src/Planner/Planner.cpp | 3 +- src/Planner/PlannerJoins.cpp | 11 +++- 9 files changed, 137 insertions(+), 73 deletions(-) diff --git a/src/Interpreters/Aggregator.cpp b/src/Interpreters/Aggregator.cpp index b7143731576..46c2d3c09f0 100644 --- a/src/Interpreters/Aggregator.cpp +++ b/src/Interpreters/Aggregator.cpp @@ -104,42 +104,22 @@ void initDataVariantsWithSizeHint( DB::AggregatedDataVariants & result, DB::AggregatedDataVariants::Type method_chosen, const DB::Aggregator::Params & params) { const auto & stats_collecting_params = params.stats_collecting_params; - if (stats_collecting_params.isCollectionAndUseEnabled()) + const auto max_threads = params.group_by_two_level_threshold != 0 ? std::max(params.max_threads, 1ul) : 1; + if (auto hint = findSizeHint(stats_collecting_params, max_threads)) { - if (auto hint = DB::getHashTablesStatistics().getSizeHint(stats_collecting_params)) - { - const auto max_threads = params.group_by_two_level_threshold != 0 ? std::max(params.max_threads, 1ul) : 1; - const auto lower_limit = hint->sum_of_sizes / max_threads; - const auto upper_limit = stats_collecting_params.max_size_to_preallocate_for_aggregation / max_threads; - if (hint->median_size > upper_limit) - { - /// Since we cannot afford to preallocate as much as we want, we will likely need to do resize anyway. - /// But we will also work with the big (i.e. not so cache friendly) HT from the beginning which may result in a slight slowdown. - /// So let's just do nothing. - LOG_TRACE( - getLogger("Aggregator"), - "No space were preallocated in hash tables because 'max_size_to_preallocate_for_aggregation' has too small value: {}, " - "should be at least {}", - stats_collecting_params.max_size_to_preallocate_for_aggregation, - hint->median_size * max_threads); - } - /// https://github.com/ClickHouse/ClickHouse/issues/44402#issuecomment-1359920703 - else if ((max_threads > 1 && hint->sum_of_sizes > 100'000) || hint->sum_of_sizes > 500'000) - { - const auto adjusted = std::max(lower_limit, hint->median_size); - if (worthConvertToTwoLevel( - params.group_by_two_level_threshold, - hint->sum_of_sizes, - /*group_by_two_level_threshold_bytes*/ 0, - /*result_size_bytes*/ 0)) - method_chosen = convertToTwoLevelTypeIfPossible(method_chosen); - result.init(method_chosen, adjusted); - ProfileEvents::increment(ProfileEvents::AggregationHashTablesInitializedAsTwoLevel, result.isTwoLevel()); - return; - } - } + if (worthConvertToTwoLevel( + params.group_by_two_level_threshold, + hint->sum_of_sizes, + /*group_by_two_level_threshold_bytes*/ 0, + /*result_size_bytes*/ 0)) + method_chosen = convertToTwoLevelTypeIfPossible(method_chosen); + result.init(method_chosen, hint->median_size); + ProfileEvents::increment(ProfileEvents::AggregationHashTablesInitializedAsTwoLevel, result.isTwoLevel()); + } + else + { + result.init(method_chosen); } - result.init(method_chosen); } /// Collection and use of the statistics should be enabled. @@ -3357,4 +3337,23 @@ void Aggregator::destroyAllAggregateStates(AggregatedDataVariants & result) cons throw Exception(ErrorCodes::UNKNOWN_AGGREGATED_DATA_VARIANT, "Unknown aggregated data variant."); } +UInt64 calculateCacheKey(const DB::ASTPtr & select_query) +{ + if (!select_query) + throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Query ptr cannot be null"); + + const auto & select = select_query->as(); + + // It may happen in some corner cases like `select 1 as num group by num`. + if (!select.tables()) + return 0; + + SipHash hash; + hash.update(select.tables()->getTreeHash(/*ignore_aliases=*/true)); + if (const auto where = select.where()) + hash.update(where->getTreeHash(/*ignore_aliases=*/true)); + if (const auto group_by = select.groupBy()) + hash.update(group_by->getTreeHash(/*ignore_aliases=*/true)); + return hash.get64(); +} } diff --git a/src/Interpreters/Aggregator.h b/src/Interpreters/Aggregator.h index 9c301d29a27..f4f1e9a1df3 100644 --- a/src/Interpreters/Aggregator.h +++ b/src/Interpreters/Aggregator.h @@ -657,6 +657,7 @@ private: Arena * arena); }; +UInt64 calculateCacheKey(const DB::ASTPtr & select_query); /** Get the aggregation variant by its type. */ template Method & getDataVariant(AggregatedDataVariants & variants); diff --git a/src/Interpreters/ConcurrentHashJoin.cpp b/src/Interpreters/ConcurrentHashJoin.cpp index 53987694e46..85e67898569 100644 --- a/src/Interpreters/ConcurrentHashJoin.cpp +++ b/src/Interpreters/ConcurrentHashJoin.cpp @@ -29,6 +29,25 @@ extern const Metric ConcurrentHashJoinPoolThreadsActive; extern const Metric ConcurrentHashJoinPoolThreadsScheduled; } +namespace +{ + +void updateStatistics(const auto & hash_joins, const DB::StatsCollectingParams & params) +{ + if (!params.isCollectionAndUseEnabled()) + throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Collection and use of the statistics should be enabled."); + + std::vector sizes(hash_joins.size()); + for (size_t i = 0; i < hash_joins.size(); ++i) + sizes[i] = hash_joins[i]->data->getTotalRowCount(); + const auto median_size = sizes.begin() + sizes.size() / 2; // not precisely though... + std::nth_element(sizes.begin(), median_size, sizes.end()); + if (auto sum_of_sizes = std::accumulate(sizes.begin(), sizes.end(), 0ull)) + DB::getHashTablesStatistics().update(sum_of_sizes, *median_size, params); +} + +} + namespace DB { @@ -46,7 +65,12 @@ static UInt32 toPowerOfTwo(UInt32 x) } ConcurrentHashJoin::ConcurrentHashJoin( - ContextPtr context_, std::shared_ptr table_join_, size_t slots_, const Block & right_sample_block, bool any_take_last_row_) + ContextPtr context_, + std::shared_ptr table_join_, + size_t slots_, + const Block & right_sample_block, + const StatsCollectingParams & stats_collecting_params_, + bool any_take_last_row_) : context(context_) , table_join(table_join_) , slots(toPowerOfTwo(std::min(static_cast(slots_), 256))) @@ -55,6 +79,7 @@ ConcurrentHashJoin::ConcurrentHashJoin( CurrentMetrics::ConcurrentHashJoinPoolThreadsActive, CurrentMetrics::ConcurrentHashJoinPoolThreadsScheduled, slots)) + , stats_collecting_params(stats_collecting_params_) { hash_joins.resize(slots); @@ -74,9 +99,13 @@ ConcurrentHashJoin::ConcurrentHashJoin( CurrentThread::attachToGroupIfDetached(thread_group); setThreadName("ConcurrentJoin"); + size_t reserve_size = 0; + if (auto hint = findSizeHint(stats_collecting_params, slots)) + reserve_size = hint->median_size; + auto inner_hash_join = std::make_shared(); inner_hash_join->data = std::make_unique( - table_join_, right_sample_block, any_take_last_row_, 0, fmt::format("concurrent{}", idx)); + table_join_, right_sample_block, any_take_last_row_, reserve_size, fmt::format("concurrent{}", idx)); /// Non zero `max_joined_block_rows` allows to process block partially and return not processed part. /// TODO: It's not handled properly in ConcurrentHashJoin case, so we set it to 0 to disable this feature. inner_hash_join->data->setMaxJoinedBlockRows(0); @@ -97,6 +126,8 @@ ConcurrentHashJoin::~ConcurrentHashJoin() { try { + updateStatistics(hash_joins, stats_collecting_params); + for (size_t i = 0; i < slots; ++i) { // Hash tables destruction may be very time-consuming. @@ -300,4 +331,16 @@ Blocks ConcurrentHashJoin::dispatchBlock(const Strings & key_columns_names, cons return result; } +UInt64 calculateCacheKey(std::shared_ptr & table_join, const QueryTreeNodePtr & right_table_expression) +{ + IQueryTreeNode::HashState hash; + hash.update(right_table_expression->getTreeHash()); + chassert(table_join->oneDisjunct()); + for (const auto & name : table_join->getClauses().at(0).key_names_right) + hash.update(name); + LOG_DEBUG(&Poco::Logger::get("debug"), "table_join->getClauses()={}", fmt::join(table_join->getClauses()[0].key_names_right, ", ")); + LOG_DEBUG(&Poco::Logger::get("debug"), "right_table_expression->dumpTree()={}", right_table_expression->dumpTree()); + LOG_DEBUG(&Poco::Logger::get("debug"), "hash.get64()={}", hash.get64()); + return hash.get64(); +} } diff --git a/src/Interpreters/ConcurrentHashJoin.h b/src/Interpreters/ConcurrentHashJoin.h index c797ff27ece..9f01117df3a 100644 --- a/src/Interpreters/ConcurrentHashJoin.h +++ b/src/Interpreters/ConcurrentHashJoin.h @@ -3,9 +3,11 @@ #include #include #include +#include #include #include #include +#include #include #include #include @@ -38,6 +40,7 @@ public: std::shared_ptr table_join_, size_t slots_, const Block & right_sample_block, + const StatsCollectingParams & stats_collecting_params_, bool any_take_last_row_ = false); ~ConcurrentHashJoin() override; @@ -70,6 +73,8 @@ private: std::unique_ptr pool; std::vector> hash_joins; + StatsCollectingParams stats_collecting_params; + std::mutex totals_mutex; Block totals; @@ -77,4 +82,5 @@ private: Blocks dispatchBlock(const Strings & key_columns_names, const Block & from_block); }; +UInt64 calculateCacheKey(std::shared_ptr & table_join, const QueryTreeNodePtr & right_table_expression); } diff --git a/src/Interpreters/ExpressionAnalyzer.cpp b/src/Interpreters/ExpressionAnalyzer.cpp index d80d5cd5b93..477f66f241c 100644 --- a/src/Interpreters/ExpressionAnalyzer.cpp +++ b/src/Interpreters/ExpressionAnalyzer.cpp @@ -982,7 +982,8 @@ static std::shared_ptr tryCreateJoin( const auto & settings = context->getSettings(); if (analyzed_join->allowParallelHashJoin()) - return std::make_shared(context, analyzed_join, settings.max_threads, right_sample_block); + return std::make_shared( + context, analyzed_join, settings.max_threads, right_sample_block, StatsCollectingParams{}); return std::make_shared(analyzed_join, right_sample_block); } diff --git a/src/Interpreters/HashTablesStatistics.h b/src/Interpreters/HashTablesStatistics.h index 4af9cf68817..4a13a0d3747 100644 --- a/src/Interpreters/HashTablesStatistics.h +++ b/src/Interpreters/HashTablesStatistics.h @@ -17,38 +17,14 @@ struct HashTablesCacheStatistics size_t misses = 0; }; -inline size_t calculateCacheKey(const DB::ASTPtr & select_query) -{ - if (!select_query) - throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Query ptr cannot be null"); - - const auto & select = select_query->as(); - - // It may happen in some corner cases like `select 1 as num group by num`. - if (!select.tables()) - return 0; - - SipHash hash; - hash.update(select.tables()->getTreeHash(/*ignore_aliases=*/true)); - if (const auto where = select.where()) - hash.update(where->getTreeHash(/*ignore_aliases=*/true)); - if (const auto group_by = select.groupBy()) - hash.update(group_by->getTreeHash(/*ignore_aliases=*/true)); - return hash.get64(); -} - struct StatsCollectingParams { StatsCollectingParams() = default; - StatsCollectingParams( - const ASTPtr & select_query_, - bool collect_hash_table_stats_during_aggregation_, - size_t max_entries_for_hash_table_stats_, - size_t max_size_to_preallocate_for_aggregation_) - : key(collect_hash_table_stats_during_aggregation_ ? calculateCacheKey(select_query_) : 0) + StatsCollectingParams(UInt64 key_, bool enable_, size_t max_entries_for_hash_table_stats_, size_t max_size_to_preallocate_) + : key(enable_ ? key_ : 0) , max_entries_for_hash_table_stats(max_entries_for_hash_table_stats_) - , max_size_to_preallocate_for_aggregation(max_size_to_preallocate_for_aggregation_) + , max_size_to_preallocate(max_size_to_preallocate_) { } @@ -56,8 +32,8 @@ struct StatsCollectingParams void disable() { key = 0; } UInt64 key = 0; - const size_t max_entries_for_hash_table_stats = 0; - const size_t max_size_to_preallocate_for_aggregation = 0; + const size_t max_entries_for_hash_table_stats = 0; /// TODO: move to server settings + const size_t max_size_to_preallocate = 0; }; /** Collects observed HashMap-s sizes to avoid redundant intermediate resizes. @@ -154,4 +130,35 @@ inline std::optional getHashTablesCacheStatistics() return getHashTablesStatistics().getCacheStats(); } +inline std::optional +findSizeHint(const DB::StatsCollectingParams & stats_collecting_params, size_t max_threads) +{ + if (stats_collecting_params.isCollectionAndUseEnabled()) + { + if (auto hint = DB::getHashTablesStatistics().getSizeHint(stats_collecting_params)) + { + const auto lower_limit = hint->sum_of_sizes / max_threads; + const auto upper_limit = stats_collecting_params.max_size_to_preallocate / max_threads; + if (hint->median_size > upper_limit) + { + /// Since we cannot afford to preallocate as much as we want, we will likely need to do resize anyway. + /// But we will also work with the big (i.e. not so cache friendly) HT from the beginning which may result in a slight slowdown. + /// So let's just do nothing. + LOG_TRACE( + getLogger("HashTablesStatistics"), + "No space were preallocated in hash tables because 'max_size_to_preallocate' has too small value: {}, " + "should be at least {}", + stats_collecting_params.max_size_to_preallocate, + hint->median_size * max_threads); + } + /// https://github.com/ClickHouse/ClickHouse/issues/44402#issuecomment-1359920703 + else if ((max_threads > 1 && hint->sum_of_sizes > 100'000) || hint->sum_of_sizes > 500'000) + { + const auto adjusted = std::max(lower_limit, hint->median_size); + return HashTablesStatistics::Entry{hint->sum_of_sizes, adjusted}; + } + } + } + return std::nullopt; +} } diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index ec9341cf9d5..6476e3ab419 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -85,16 +85,17 @@ #include #include #include +#include #include #include #include #include #include +#include +#include #include #include #include -#include -#include namespace ProfileEvents @@ -2665,7 +2666,7 @@ static Aggregator::Params getAggregatorParams( size_t group_by_two_level_threshold_bytes) { const auto stats_collecting_params = StatsCollectingParams( - query_ptr, + calculateCacheKey(query_ptr), settings.collect_hash_table_stats_during_aggregation, settings.max_entries_for_hash_table_stats, settings.max_size_to_preallocate_for_aggregation); diff --git a/src/Planner/Planner.cpp b/src/Planner/Planner.cpp index 8ed7004cd42..d18b4fec52e 100644 --- a/src/Planner/Planner.cpp +++ b/src/Planner/Planner.cpp @@ -3,6 +3,7 @@ #include #include #include +#include #include #include @@ -363,7 +364,7 @@ Aggregator::Params getAggregatorParams(const PlannerContextPtr & planner_context const Settings & settings = query_context->getSettingsRef(); const auto stats_collecting_params = StatsCollectingParams( - select_query_info.query, + calculateCacheKey(select_query_info.query), settings.collect_hash_table_stats_during_aggregation, settings.max_entries_for_hash_table_stats, settings.max_size_to_preallocate_for_aggregation); diff --git a/src/Planner/PlannerJoins.cpp b/src/Planner/PlannerJoins.cpp index c410b04f209..e88d0d95341 100644 --- a/src/Planner/PlannerJoins.cpp +++ b/src/Planner/PlannerJoins.cpp @@ -768,10 +768,8 @@ std::shared_ptr tryDirectJoin(const std::shared_ptr(table_join, right_table_expression_header, storage, right_table_expression_header_with_storage_column_names); } - } - static std::shared_ptr tryCreateJoin(JoinAlgorithm algorithm, std::shared_ptr & table_join, const QueryTreeNodePtr & right_table_expression, @@ -805,7 +803,14 @@ static std::shared_ptr tryCreateJoin(JoinAlgorithm algorithm, if (table_join->allowParallelHashJoin()) { auto query_context = planner_context->getQueryContext(); - return std::make_shared(query_context, table_join, query_context->getSettings().max_threads, right_table_expression_header); + const auto & settings = query_context->getSettingsRef(); + StatsCollectingParams params{ + calculateCacheKey(table_join, right_table_expression), + settings.collect_hash_table_stats_during_aggregation, + settings.max_entries_for_hash_table_stats, + settings.max_size_to_preallocate_for_aggregation}; + return std::make_shared( + query_context, table_join, query_context->getSettings().max_threads, right_table_expression_header, params); } return std::make_shared(table_join, right_table_expression_header); From 6a8bd46d3688544ed05c3d98a2e0042258cab88c Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Fri, 31 May 2024 15:51:49 +0000 Subject: [PATCH 030/299] add tests --- .../03168_read_in_order_buffering_1.reference | 6 +++ .../03168_read_in_order_buffering_1.sql | 45 +++++++++++++++++++ .../03168_read_in_order_buffering_2.reference | 0 .../03168_read_in_order_buffering_2.sql | 16 +++++++ 4 files changed, 67 insertions(+) create mode 100644 tests/queries/0_stateless/03168_read_in_order_buffering_1.reference create mode 100644 tests/queries/0_stateless/03168_read_in_order_buffering_1.sql create mode 100644 tests/queries/0_stateless/03168_read_in_order_buffering_2.reference create mode 100644 tests/queries/0_stateless/03168_read_in_order_buffering_2.sql diff --git a/tests/queries/0_stateless/03168_read_in_order_buffering_1.reference b/tests/queries/0_stateless/03168_read_in_order_buffering_1.reference new file mode 100644 index 00000000000..306885a0974 --- /dev/null +++ b/tests/queries/0_stateless/03168_read_in_order_buffering_1.reference @@ -0,0 +1,6 @@ +1 +0 +1 +0 +0 +0 diff --git a/tests/queries/0_stateless/03168_read_in_order_buffering_1.sql b/tests/queries/0_stateless/03168_read_in_order_buffering_1.sql new file mode 100644 index 00000000000..02ffc9ecb7d --- /dev/null +++ b/tests/queries/0_stateless/03168_read_in_order_buffering_1.sql @@ -0,0 +1,45 @@ +DROP TABLE IF EXISTS t_read_in_order_1; + +CREATE TABLE t_read_in_order_1 (id UInt64, v UInt64) +ENGINE = MergeTree ORDER BY id +SETTINGS index_granularity = 1024, index_granularity_bytes = '10M'; + +INSERT INTO t_read_in_order_1 SELECT number, number FROM numbers(1000000); + +SET max_threads = 8; +SET optimize_read_in_order = 1; +SET read_in_order_max_bytes_to_buffer = '128M'; + +SELECT count() FROM +( + EXPLAIN PIPELINE SELECT * FROM t_read_in_order_1 ORDER BY id +) WHERE explain LIKE '%BufferChunks%'; + +SELECT count() FROM +( + EXPLAIN PIPELINE SELECT * FROM t_read_in_order_1 ORDER BY id LIMIT 10 +) WHERE explain LIKE '%BufferChunks%'; + +SELECT count() FROM +( + EXPLAIN PIPELINE SELECT * FROM t_read_in_order_1 WHERE v % 10 = 0 ORDER BY id LIMIT 10 +) WHERE explain LIKE '%BufferChunks%'; + +SET read_in_order_max_bytes_to_buffer = 0; + +SELECT count() FROM +( + EXPLAIN PIPELINE SELECT * FROM t_read_in_order_1 ORDER BY id +) WHERE explain LIKE '%BufferChunks%'; + +SELECT count() FROM +( + EXPLAIN PIPELINE SELECT * FROM t_read_in_order_1 ORDER BY id LIMIT 10 +) WHERE explain LIKE '%BufferChunks%'; + +SELECT count() FROM +( + EXPLAIN PIPELINE SELECT * FROM t_read_in_order_1 WHERE v % 10 = 0 ORDER BY id LIMIT 10 +) WHERE explain LIKE '%BufferChunks%'; + +DROP TABLE t_read_in_order_1; diff --git a/tests/queries/0_stateless/03168_read_in_order_buffering_2.reference b/tests/queries/0_stateless/03168_read_in_order_buffering_2.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/03168_read_in_order_buffering_2.sql b/tests/queries/0_stateless/03168_read_in_order_buffering_2.sql new file mode 100644 index 00000000000..0631a2798dd --- /dev/null +++ b/tests/queries/0_stateless/03168_read_in_order_buffering_2.sql @@ -0,0 +1,16 @@ +-- Tags: long, no-random-settings + +DROP TABLE IF EXISTS t_read_in_order_2; + +CREATE TABLE t_read_in_order_2 (id UInt64, v UInt64) ENGINE = MergeTree ORDER BY id; + +INSERT INTO t_read_in_order_2 SELECT number, number FROM numbers(100000000); + +SET optimize_read_in_order = 1; +SET max_threads = 8; +SET read_in_order_max_bytes_to_buffer = '100M'; +SET max_memory_usage = '250M'; + +SELECT * FROM t_read_in_order_2 ORDER BY id FORMAT Null; + +DROP TABLE t_read_in_order_2; From 410c19de06194881825a271bbc2eaee8d957228e Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Mon, 3 Jun 2024 14:43:38 +0000 Subject: [PATCH 031/299] fix test --- tests/queries/0_stateless/03168_read_in_order_buffering_2.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/03168_read_in_order_buffering_2.sql b/tests/queries/0_stateless/03168_read_in_order_buffering_2.sql index 0631a2798dd..7781871390c 100644 --- a/tests/queries/0_stateless/03168_read_in_order_buffering_2.sql +++ b/tests/queries/0_stateless/03168_read_in_order_buffering_2.sql @@ -8,7 +8,7 @@ INSERT INTO t_read_in_order_2 SELECT number, number FROM numbers(100000000); SET optimize_read_in_order = 1; SET max_threads = 8; -SET read_in_order_max_bytes_to_buffer = '100M'; +SET read_in_order_max_bytes_to_buffer = '80M'; SET max_memory_usage = '250M'; SELECT * FROM t_read_in_order_2 ORDER BY id FORMAT Null; From 8a1984c245ff15740910c8c127a6a69640ca4088 Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Tue, 28 May 2024 22:29:44 +0100 Subject: [PATCH 032/299] add test fix tests --- src/Common/ProfileEvents.cpp | 1 + src/Interpreters/ConcurrentHashJoin.cpp | 7 ++ ...151_hash_table_sizes_stats_joins.reference | 10 +++ .../02151_hash_table_sizes_stats_joins.sh | 74 +++++++++++++++++++ 4 files changed, 92 insertions(+) create mode 100644 tests/queries/0_stateless/02151_hash_table_sizes_stats_joins.reference create mode 100755 tests/queries/0_stateless/02151_hash_table_sizes_stats_joins.sh diff --git a/src/Common/ProfileEvents.cpp b/src/Common/ProfileEvents.cpp index 9bb7bece0f0..bf741272a62 100644 --- a/src/Common/ProfileEvents.cpp +++ b/src/Common/ProfileEvents.cpp @@ -555,6 +555,7 @@ The server successfully detected this situation and will download merged part fr M(AggregationPreallocatedElementsInHashTables, "How many elements were preallocated in hash tables for aggregation.") \ M(AggregationHashTablesInitializedAsTwoLevel, "How many hash tables were inited as two-level for aggregation.") \ M(AggregationOptimizedEqualRangesOfKeys, "For how many blocks optimization of equal ranges of keys was applied") \ + M(HashJoinPreallocatedElementsInHashTables, "How many elements were preallocated in hash tables for hash join.") \ \ M(MetadataFromKeeperCacheHit, "Number of times an object storage metadata request was answered from cache without making request to Keeper") \ M(MetadataFromKeeperCacheMiss, "Number of times an object storage metadata request had to be answered from Keeper") \ diff --git a/src/Interpreters/ConcurrentHashJoin.cpp b/src/Interpreters/ConcurrentHashJoin.cpp index 85e67898569..bde24941f1c 100644 --- a/src/Interpreters/ConcurrentHashJoin.cpp +++ b/src/Interpreters/ConcurrentHashJoin.cpp @@ -16,12 +16,18 @@ #include #include #include +#include #include #include #include #include #include +namespace ProfileEvents +{ +extern const Event HashJoinPreallocatedElementsInHashTables; +} + namespace CurrentMetrics { extern const Metric ConcurrentHashJoinPoolThreads; @@ -102,6 +108,7 @@ ConcurrentHashJoin::ConcurrentHashJoin( size_t reserve_size = 0; if (auto hint = findSizeHint(stats_collecting_params, slots)) reserve_size = hint->median_size; + ProfileEvents::increment(ProfileEvents::HashJoinPreallocatedElementsInHashTables, reserve_size); auto inner_hash_join = std::make_shared(); inner_hash_join->data = std::make_unique( diff --git a/tests/queries/0_stateless/02151_hash_table_sizes_stats_joins.reference b/tests/queries/0_stateless/02151_hash_table_sizes_stats_joins.reference new file mode 100644 index 00000000000..d3d171221e8 --- /dev/null +++ b/tests/queries/0_stateless/02151_hash_table_sizes_stats_joins.reference @@ -0,0 +1,10 @@ +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 diff --git a/tests/queries/0_stateless/02151_hash_table_sizes_stats_joins.sh b/tests/queries/0_stateless/02151_hash_table_sizes_stats_joins.sh new file mode 100755 index 00000000000..4ce32e51615 --- /dev/null +++ b/tests/queries/0_stateless/02151_hash_table_sizes_stats_joins.sh @@ -0,0 +1,74 @@ +#!/usr/bin/env bash +# Tags: long, distributed, no-debug, no-tsan, no-msan, no-ubsan, no-asan, no-random-settings, no-random-merge-tree-settings + +# shellcheck disable=SC2154 + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + + +opts=( + --join_algorithm='parallel_hash' +) + +$CLICKHOUSE_CLIENT -nq " + CREATE TABLE t1(a UInt32, b UInt32) ENGINE=MergeTree ORDER BY (); + INSERT INTO t1 SELECT number, number FROM numbers_mt(1e6); + + CREATE TABLE t2(a UInt32, b UInt32) ENGINE=MergeTree ORDER BY (); + INSERT INTO t2 SELECT number, number FROM numbers_mt(1e6); +" + +queries_without_preallocation=() +queries_with_preallocation=() + +run_new_query() { + query_id1="hash_table_sizes_stats_joins_$RANDOM$RANDOM" + # when we see a query for the first time we only collect it stats when execution ends. preallocation will happen only on the next run + queries_without_preallocation+=("$query_id1") + $CLICKHOUSE_CLIENT "${opts[@]}" --query_id="$query_id1" -q "$1" --format Null + + query_id2="hash_table_sizes_stats_joins_$RANDOM$RANDOM" + queries_with_preallocation+=("$query_id2") + $CLICKHOUSE_CLIENT "${opts[@]}" --query_id="$query_id2" -q "$1" --format Null +} + +run_new_query "SELECT * FROM t1 AS x INNER JOIN t2 AS y ON x.a = y.a" +# it only matters what columns from the right table are part of the join key, as soon as we change them - it is a new cache entry +run_new_query "SELECT * FROM t1 AS x INNER JOIN t2 AS y ON x.a = y.b" +run_new_query "SELECT * FROM t1 AS x INNER JOIN t2 AS y USING (a, b)" + +# we already had a join on t2.a, so cache should be populated +query_id="hash_table_sizes_stats_joins_$RANDOM$RANDOM" +queries_with_preallocation+=("$query_id") +$CLICKHOUSE_CLIENT "${opts[@]}" --query_id="$query_id" -q "SELECT * FROM t1 AS x INNER JOIN t2 AS y ON x.b = y.a" --format Null +# the same query with a different alias for the t2 +query_id="hash_table_sizes_stats_joins_$RANDOM$RANDOM" +queries_with_preallocation+=("$query_id") +$CLICKHOUSE_CLIENT "${opts[@]}" --query_id="$query_id" -q "SELECT * FROM t1 AS x INNER JOIN t2 AS z ON x.b = z.a" --format Null + +# now t1 is the right table +run_new_query "SELECT * FROM t2 AS x INNER JOIN t1 AS y ON x.a = y.a" + +################################## + +$CLICKHOUSE_CLIENT -q "SYSTEM FLUSH LOGS" + +for i in "${!queries_without_preallocation[@]}"; do + $CLICKHOUSE_CLIENT --param_query_id="${queries_without_preallocation[$i]}" -q " + SELECT COUNT(*) + FROM system.query_log + WHERE event_date >= yesterday() AND query_id = {query_id:String} AND current_database = currentDatabase() + AND ProfileEvents['HashJoinPreallocatedElementsInHashTables'] = 0 AND type = 'QueryFinish' + " +done + +for i in "${!queries_with_preallocation[@]}"; do + $CLICKHOUSE_CLIENT --param_query_id="${queries_with_preallocation[$i]}" -q " + SELECT COUNT(*) + FROM system.query_log + WHERE event_date >= yesterday() AND query_id = {query_id:String} AND current_database = currentDatabase() + AND ProfileEvents['HashJoinPreallocatedElementsInHashTables'] > 0 AND type = 'QueryFinish' + " +done From 03352063e0361a07b24e5567b4024c4e89f080a9 Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Tue, 28 May 2024 23:00:19 +0100 Subject: [PATCH 033/299] better refactor --- src/Interpreters/Aggregator.cpp | 10 +- src/Interpreters/ConcurrentHashJoin.cpp | 15 +-- src/Interpreters/HashTablesStatistics.cpp | 112 ++++++++++++++++++++++ src/Interpreters/HashTablesStatistics.h | 110 ++------------------- 4 files changed, 133 insertions(+), 114 deletions(-) create mode 100644 src/Interpreters/HashTablesStatistics.cpp diff --git a/src/Interpreters/Aggregator.cpp b/src/Interpreters/Aggregator.cpp index 46c2d3c09f0..e073b7a49b6 100644 --- a/src/Interpreters/Aggregator.cpp +++ b/src/Interpreters/Aggregator.cpp @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include @@ -105,7 +106,7 @@ void initDataVariantsWithSizeHint( { const auto & stats_collecting_params = params.stats_collecting_params; const auto max_threads = params.group_by_two_level_threshold != 0 ? std::max(params.max_threads, 1ul) : 1; - if (auto hint = findSizeHint(stats_collecting_params, max_threads)) + if (auto hint = getSizeHint(stats_collecting_params, /*tables_cnt=*/max_threads)) { if (worthConvertToTwoLevel( params.group_by_two_level_threshold, @@ -114,19 +115,19 @@ void initDataVariantsWithSizeHint( /*result_size_bytes*/ 0)) method_chosen = convertToTwoLevelTypeIfPossible(method_chosen); result.init(method_chosen, hint->median_size); - ProfileEvents::increment(ProfileEvents::AggregationHashTablesInitializedAsTwoLevel, result.isTwoLevel()); } else { result.init(method_chosen); } + ProfileEvents::increment(ProfileEvents::AggregationHashTablesInitializedAsTwoLevel, result.isTwoLevel()); } /// Collection and use of the statistics should be enabled. void updateStatistics(const DB::ManyAggregatedDataVariants & data_variants, const DB::StatsCollectingParams & params) { if (!params.isCollectionAndUseEnabled()) - throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Collection and use of the statistics should be enabled."); + return; std::vector sizes(data_variants.size()); for (size_t i = 0; i < data_variants.size(); ++i) @@ -2634,8 +2635,7 @@ ManyAggregatedDataVariants Aggregator::prepareVariantsToMerge(ManyAggregatedData LOG_TRACE(log, "Merging aggregated data"); - if (params.stats_collecting_params.isCollectionAndUseEnabled()) - updateStatistics(data_variants, params.stats_collecting_params); + updateStatistics(data_variants, params.stats_collecting_params); ManyAggregatedDataVariants non_empty_data; non_empty_data.reserve(data_variants.size()); diff --git a/src/Interpreters/ConcurrentHashJoin.cpp b/src/Interpreters/ConcurrentHashJoin.cpp index bde24941f1c..4493a9f4dbd 100644 --- a/src/Interpreters/ConcurrentHashJoin.cpp +++ b/src/Interpreters/ConcurrentHashJoin.cpp @@ -2,6 +2,7 @@ #include #include #include +#include #include #include #include @@ -41,7 +42,7 @@ namespace void updateStatistics(const auto & hash_joins, const DB::StatsCollectingParams & params) { if (!params.isCollectionAndUseEnabled()) - throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Collection and use of the statistics should be enabled."); + return; std::vector sizes(hash_joins.size()); for (size_t i = 0; i < hash_joins.size(); ++i) @@ -106,7 +107,7 @@ ConcurrentHashJoin::ConcurrentHashJoin( setThreadName("ConcurrentJoin"); size_t reserve_size = 0; - if (auto hint = findSizeHint(stats_collecting_params, slots)) + if (auto hint = getSizeHint(stats_collecting_params, slots)) reserve_size = hint->median_size; ProfileEvents::increment(ProfileEvents::HashJoinPreallocatedElementsInHashTables, reserve_size); @@ -341,13 +342,13 @@ Blocks ConcurrentHashJoin::dispatchBlock(const Strings & key_columns_names, cons UInt64 calculateCacheKey(std::shared_ptr & table_join, const QueryTreeNodePtr & right_table_expression) { IQueryTreeNode::HashState hash; + chassert(right_table_expression); hash.update(right_table_expression->getTreeHash()); - chassert(table_join->oneDisjunct()); - for (const auto & name : table_join->getClauses().at(0).key_names_right) + chassert(table_join && table_join->oneDisjunct()); + const auto keys + = NameOrderedSet{table_join->getClauses().at(0).key_names_right.begin(), table_join->getClauses().at(0).key_names_right.end()}; + for (const auto & name : keys) hash.update(name); - LOG_DEBUG(&Poco::Logger::get("debug"), "table_join->getClauses()={}", fmt::join(table_join->getClauses()[0].key_names_right, ", ")); - LOG_DEBUG(&Poco::Logger::get("debug"), "right_table_expression->dumpTree()={}", right_table_expression->dumpTree()); - LOG_DEBUG(&Poco::Logger::get("debug"), "hash.get64()={}", hash.get64()); return hash.get64(); } } diff --git a/src/Interpreters/HashTablesStatistics.cpp b/src/Interpreters/HashTablesStatistics.cpp new file mode 100644 index 00000000000..5ad06cdcea0 --- /dev/null +++ b/src/Interpreters/HashTablesStatistics.cpp @@ -0,0 +1,112 @@ +#include + +#include +#include + +namespace DB +{ + +std::optional HashTablesStatistics::getSizeHint(const Params & params) +{ + if (!params.isCollectionAndUseEnabled()) + throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Collection and use of the statistics should be enabled."); + + std::lock_guard lock(mutex); + const auto cache = getHashTableStatsCache(params, lock); + if (const auto hint = cache->get(params.key)) + { + LOG_TRACE( + getLogger("HashTablesStatistics"), + "An entry for key={} found in cache: sum_of_sizes={}, median_size={}", + params.key, + hint->sum_of_sizes, + hint->median_size); + return *hint; + } + return std::nullopt; +} + +/// Collection and use of the statistics should be enabled. +void HashTablesStatistics::update(size_t sum_of_sizes, size_t median_size, const Params & params) +{ + if (!params.isCollectionAndUseEnabled()) + throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Collection and use of the statistics should be enabled."); + + std::lock_guard lock(mutex); + const auto cache = getHashTableStatsCache(params, lock); + const auto hint = cache->get(params.key); + // We'll maintain the maximum among all the observed values until another prediction is much lower (that should indicate some change) + if (!hint || sum_of_sizes < hint->sum_of_sizes / 2 || hint->sum_of_sizes < sum_of_sizes || median_size < hint->median_size / 2 + || hint->median_size < median_size) + { + LOG_TRACE( + getLogger("HashTablesStatistics"), + "Statistics updated for key={}: new sum_of_sizes={}, median_size={}", + params.key, + sum_of_sizes, + median_size); + cache->set(params.key, std::make_shared(Entry{.sum_of_sizes = sum_of_sizes, .median_size = median_size})); + } +} + +std::optional HashTablesStatistics::getCacheStats() const +{ + std::lock_guard lock(mutex); + if (hash_table_stats) + { + size_t hits = 0, misses = 0; + hash_table_stats->getStats(hits, misses); + return DB::HashTablesCacheStatistics{.entries = hash_table_stats->count(), .hits = hits, .misses = misses}; + } + return std::nullopt; +} + +HashTablesStatistics::CachePtr HashTablesStatistics::getHashTableStatsCache(const Params & params, const std::lock_guard &) +{ + if (!hash_table_stats || hash_table_stats->maxSizeInBytes() != params.max_entries_for_hash_table_stats) + hash_table_stats = std::make_shared(params.max_entries_for_hash_table_stats); + return hash_table_stats; +} + +HashTablesStatistics & getHashTablesStatistics() +{ + static HashTablesStatistics hash_tables_stats; + return hash_tables_stats; +} + +std::optional getHashTablesCacheStatistics() +{ + return getHashTablesStatistics().getCacheStats(); +} + +std::optional getSizeHint(const DB::StatsCollectingParams & stats_collecting_params, size_t tables_cnt) +{ + if (stats_collecting_params.isCollectionAndUseEnabled()) + { + if (auto hint = DB::getHashTablesStatistics().getSizeHint(stats_collecting_params)) + { + const auto lower_limit = hint->sum_of_sizes / tables_cnt; + const auto upper_limit = stats_collecting_params.max_size_to_preallocate / tables_cnt; + if (hint->median_size > upper_limit) + { + /// Since we cannot afford to preallocate as much as needed, we would likely have to do at least one resize anyway. + /// Though it still sounds better than N resizes, but in actuality we saw that one big resize (remember, HT-s grow exponentially) + /// plus worse cache locality since we're dealing with big HT-s from the beginning yields worse performance. + /// So let's just do nothing. + LOG_TRACE( + getLogger("HashTablesStatistics"), + "No space were preallocated in hash tables because 'max_size_to_preallocate' has too small value: {}, " + "should be at least {}", + stats_collecting_params.max_size_to_preallocate, + hint->median_size * tables_cnt); + } + /// https://github.com/ClickHouse/ClickHouse/issues/44402#issuecomment-1359920703 + else if ((tables_cnt > 1 && hint->sum_of_sizes > 100'000) || hint->sum_of_sizes > 500'000) + { + return HashTablesStatistics::Entry{hint->sum_of_sizes, std::max(lower_limit, hint->median_size)}; + } + } + } + return std::nullopt; +} +} diff --git a/src/Interpreters/HashTablesStatistics.h b/src/Interpreters/HashTablesStatistics.h index 4a13a0d3747..f4357c6e8cb 100644 --- a/src/Interpreters/HashTablesStatistics.h +++ b/src/Interpreters/HashTablesStatistics.h @@ -1,10 +1,6 @@ #pragma once -#include -#include #include -#include -#include namespace DB @@ -36,7 +32,7 @@ struct StatsCollectingParams const size_t max_size_to_preallocate = 0; }; -/** Collects observed HashMap-s sizes to avoid redundant intermediate resizes. +/** Collects observed HashTable-s sizes to avoid redundant intermediate resizes. */ class HashTablesStatistics { @@ -52,113 +48,23 @@ public: using Params = StatsCollectingParams; /// Collection and use of the statistics should be enabled. - std::optional getSizeHint(const Params & params) - { - if (!params.isCollectionAndUseEnabled()) - throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Collection and use of the statistics should be enabled."); - - std::lock_guard lock(mutex); - const auto cache = getHashTableStatsCache(params, lock); - if (const auto hint = cache->get(params.key)) - { - LOG_TRACE( - getLogger("Aggregator"), - "An entry for key={} found in cache: sum_of_sizes={}, median_size={}", - params.key, - hint->sum_of_sizes, - hint->median_size); - return *hint; - } - return std::nullopt; - } + std::optional getSizeHint(const Params & params); /// Collection and use of the statistics should be enabled. - void update(size_t sum_of_sizes, size_t median_size, const Params & params) - { - if (!params.isCollectionAndUseEnabled()) - throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Collection and use of the statistics should be enabled."); + void update(size_t sum_of_sizes, size_t median_size, const Params & params); - std::lock_guard lock(mutex); - const auto cache = getHashTableStatsCache(params, lock); - const auto hint = cache->get(params.key); - // We'll maintain the maximum among all the observed values until the next prediction turns out to be too wrong. - if (!hint || sum_of_sizes < hint->sum_of_sizes / 2 || hint->sum_of_sizes < sum_of_sizes || median_size < hint->median_size / 2 - || hint->median_size < median_size) - { - LOG_TRACE( - getLogger("Aggregator"), - "Statistics updated for key={}: new sum_of_sizes={}, median_size={}", - params.key, - sum_of_sizes, - median_size); - cache->set(params.key, std::make_shared(Entry{.sum_of_sizes = sum_of_sizes, .median_size = median_size})); - } - } - - std::optional getCacheStats() const - { - std::lock_guard lock(mutex); - if (hash_table_stats) - { - size_t hits = 0, misses = 0; - hash_table_stats->getStats(hits, misses); - return DB::HashTablesCacheStatistics{.entries = hash_table_stats->count(), .hits = hits, .misses = misses}; - } - return std::nullopt; - } + std::optional getCacheStats() const; private: - CachePtr getHashTableStatsCache(const Params & params, const std::lock_guard &) - { - if (!hash_table_stats || hash_table_stats->maxSizeInBytes() != params.max_entries_for_hash_table_stats) - hash_table_stats = std::make_shared(params.max_entries_for_hash_table_stats); - return hash_table_stats; - } + CachePtr getHashTableStatsCache(const Params & params, const std::lock_guard &); mutable std::mutex mutex; CachePtr hash_table_stats; }; -inline HashTablesStatistics & getHashTablesStatistics() -{ - static HashTablesStatistics hash_tables_stats; - return hash_tables_stats; -} +HashTablesStatistics & getHashTablesStatistics(); -inline std::optional getHashTablesCacheStatistics() -{ - return getHashTablesStatistics().getCacheStats(); -} +std::optional getHashTablesCacheStatistics(); -inline std::optional -findSizeHint(const DB::StatsCollectingParams & stats_collecting_params, size_t max_threads) -{ - if (stats_collecting_params.isCollectionAndUseEnabled()) - { - if (auto hint = DB::getHashTablesStatistics().getSizeHint(stats_collecting_params)) - { - const auto lower_limit = hint->sum_of_sizes / max_threads; - const auto upper_limit = stats_collecting_params.max_size_to_preallocate / max_threads; - if (hint->median_size > upper_limit) - { - /// Since we cannot afford to preallocate as much as we want, we will likely need to do resize anyway. - /// But we will also work with the big (i.e. not so cache friendly) HT from the beginning which may result in a slight slowdown. - /// So let's just do nothing. - LOG_TRACE( - getLogger("HashTablesStatistics"), - "No space were preallocated in hash tables because 'max_size_to_preallocate' has too small value: {}, " - "should be at least {}", - stats_collecting_params.max_size_to_preallocate, - hint->median_size * max_threads); - } - /// https://github.com/ClickHouse/ClickHouse/issues/44402#issuecomment-1359920703 - else if ((max_threads > 1 && hint->sum_of_sizes > 100'000) || hint->sum_of_sizes > 500'000) - { - const auto adjusted = std::max(lower_limit, hint->median_size); - return HashTablesStatistics::Entry{hint->sum_of_sizes, adjusted}; - } - } - } - return std::nullopt; -} +std::optional getSizeHint(const DB::StatsCollectingParams & stats_collecting_params, size_t tables_cnt); } From 985a4badd871ace80c26423240b064bb7e658c44 Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Mon, 3 Jun 2024 22:40:19 +0100 Subject: [PATCH 034/299] fix style fix style fix style --- src/Interpreters/HashTablesStatistics.cpp | 5 +++++ src/Interpreters/HashTablesStatistics.h | 1 - 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/src/Interpreters/HashTablesStatistics.cpp b/src/Interpreters/HashTablesStatistics.cpp index 5ad06cdcea0..91473ef7bbb 100644 --- a/src/Interpreters/HashTablesStatistics.cpp +++ b/src/Interpreters/HashTablesStatistics.cpp @@ -6,6 +6,11 @@ namespace DB { +namespace ErrorCodes +{ +extern const int LOGICAL_ERROR; +} + std::optional HashTablesStatistics::getSizeHint(const Params & params) { if (!params.isCollectionAndUseEnabled()) diff --git a/src/Interpreters/HashTablesStatistics.h b/src/Interpreters/HashTablesStatistics.h index f4357c6e8cb..73dd3c097d4 100644 --- a/src/Interpreters/HashTablesStatistics.h +++ b/src/Interpreters/HashTablesStatistics.h @@ -2,7 +2,6 @@ #include - namespace DB { From 44e4495e5bddbe044a47cf26f3996dd344a12e6e Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Mon, 3 Jun 2024 22:20:49 +0100 Subject: [PATCH 035/299] move max_entries_for_hash_table_stats to server settings --- src/Core/ServerSettings.h | 1 + src/Core/Settings.h | 2 +- src/Interpreters/HashTablesStatistics.cpp | 4 ++-- src/Interpreters/InterpreterSelectQuery.cpp | 2 +- src/Planner/Planner.cpp | 2 +- src/Planner/PlannerJoins.cpp | 2 +- 6 files changed, 7 insertions(+), 6 deletions(-) diff --git a/src/Core/ServerSettings.h b/src/Core/ServerSettings.h index 45f235116ab..dd840e9bda3 100644 --- a/src/Core/ServerSettings.h +++ b/src/Core/ServerSettings.h @@ -146,6 +146,7 @@ namespace DB M(UInt64, global_profiler_real_time_period_ns, 0, "Period for real clock timer of global profiler (in nanoseconds). Set 0 value to turn off the real clock global profiler. Recommended value is at least 10000000 (100 times a second) for single queries or 1000000000 (once a second) for cluster-wide profiling.", 0) \ M(UInt64, global_profiler_cpu_time_period_ns, 0, "Period for CPU clock timer of global profiler (in nanoseconds). Set 0 value to turn off the CPU clock global profiler. Recommended value is at least 10000000 (100 times a second) for single queries or 1000000000 (once a second) for cluster-wide profiling.", 0) \ M(Bool, enable_azure_sdk_logging, false, "Enables logging from Azure sdk", 0) \ + M(UInt64, max_entries_for_hash_table_stats, 10'000, "How many entries hash table statistics collected during aggregation is allowed to have", 0) \ /// If you add a setting which can be updated at runtime, please update 'changeable_settings' map in StorageSystemServerSettings.cpp diff --git a/src/Core/Settings.h b/src/Core/Settings.h index dc61a049de8..a9c96597fa1 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -699,7 +699,6 @@ class IColumn; M(UInt64, insert_shard_id, 0, "If non zero, when insert into a distributed table, the data will be inserted into the shard `insert_shard_id` synchronously. Possible values range from 1 to `shards_number` of corresponding distributed table", 0) \ \ M(Bool, collect_hash_table_stats_during_aggregation, true, "Enable collecting hash table statistics to optimize memory allocation", 0) \ - M(UInt64, max_entries_for_hash_table_stats, 10'000, "How many entries hash table statistics collected during aggregation is allowed to have", 0) \ M(UInt64, max_size_to_preallocate_for_aggregation, 100'000'000, "For how many elements it is allowed to preallocate space in all hash tables in total before aggregation", 0) \ \ M(Bool, kafka_disable_num_consumers_limit, false, "Disable limit on kafka_num_consumers that depends on the number of available CPU cores", 0) \ @@ -976,6 +975,7 @@ class IColumn; MAKE_DEPRECATED_BY_SERVER_CONFIG(M, UInt64, async_insert_threads, 16) \ MAKE_DEPRECATED_BY_SERVER_CONFIG(M, UInt64, max_replicated_fetches_network_bandwidth_for_server, 0) \ MAKE_DEPRECATED_BY_SERVER_CONFIG(M, UInt64, max_replicated_sends_network_bandwidth_for_server, 0) \ + MAKE_DEPRECATED_BY_SERVER_CONFIG(M, UInt64, max_entries_for_hash_table_stats, 10'000) \ /* ---- */ \ MAKE_OBSOLETE(M, DefaultDatabaseEngine, default_database_engine, DefaultDatabaseEngine::Atomic) \ MAKE_OBSOLETE(M, UInt64, max_pipeline_depth, 0) \ diff --git a/src/Interpreters/HashTablesStatistics.cpp b/src/Interpreters/HashTablesStatistics.cpp index 91473ef7bbb..d66f1bbd1d3 100644 --- a/src/Interpreters/HashTablesStatistics.cpp +++ b/src/Interpreters/HashTablesStatistics.cpp @@ -68,8 +68,8 @@ std::optional HashTablesStatistics::getCacheStats() c HashTablesStatistics::CachePtr HashTablesStatistics::getHashTableStatsCache(const Params & params, const std::lock_guard &) { - if (!hash_table_stats || hash_table_stats->maxSizeInBytes() != params.max_entries_for_hash_table_stats) - hash_table_stats = std::make_shared(params.max_entries_for_hash_table_stats); + if (!hash_table_stats) + hash_table_stats = std::make_shared(params.max_entries_for_hash_table_stats * sizeof(Entry)); return hash_table_stats; } diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index 6476e3ab419..5f1af2f24bf 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -2668,7 +2668,7 @@ static Aggregator::Params getAggregatorParams( const auto stats_collecting_params = StatsCollectingParams( calculateCacheKey(query_ptr), settings.collect_hash_table_stats_during_aggregation, - settings.max_entries_for_hash_table_stats, + context.getServerSettings().max_entries_for_hash_table_stats, settings.max_size_to_preallocate_for_aggregation); return Aggregator::Params diff --git a/src/Planner/Planner.cpp b/src/Planner/Planner.cpp index d18b4fec52e..5f7a8fdbdde 100644 --- a/src/Planner/Planner.cpp +++ b/src/Planner/Planner.cpp @@ -366,7 +366,7 @@ Aggregator::Params getAggregatorParams(const PlannerContextPtr & planner_context const auto stats_collecting_params = StatsCollectingParams( calculateCacheKey(select_query_info.query), settings.collect_hash_table_stats_during_aggregation, - settings.max_entries_for_hash_table_stats, + query_context->getServerSettings().max_entries_for_hash_table_stats, settings.max_size_to_preallocate_for_aggregation); auto aggregate_descriptions = aggregation_analysis_result.aggregate_descriptions; diff --git a/src/Planner/PlannerJoins.cpp b/src/Planner/PlannerJoins.cpp index e88d0d95341..b98d2ec4da7 100644 --- a/src/Planner/PlannerJoins.cpp +++ b/src/Planner/PlannerJoins.cpp @@ -807,7 +807,7 @@ static std::shared_ptr tryCreateJoin(JoinAlgorithm algorithm, StatsCollectingParams params{ calculateCacheKey(table_join, right_table_expression), settings.collect_hash_table_stats_during_aggregation, - settings.max_entries_for_hash_table_stats, + query_context->getServerSettings().max_entries_for_hash_table_stats, settings.max_size_to_preallocate_for_aggregation}; return std::make_shared( query_context, table_join, query_context->getSettings().max_threads, right_table_expression_header, params); From 4d49be0145fa642dabdd62a5730938f63f3b6d7a Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Tue, 4 Jun 2024 22:36:00 +0100 Subject: [PATCH 036/299] fix --- src/Planner/PlannerJoins.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/Planner/PlannerJoins.cpp b/src/Planner/PlannerJoins.cpp index b98d2ec4da7..cfcb43cb98d 100644 --- a/src/Planner/PlannerJoins.cpp +++ b/src/Planner/PlannerJoins.cpp @@ -43,6 +43,8 @@ #include #include +#include + namespace DB { From 3a4592c6285a6014a2c01010a5641274adfda7d2 Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Thu, 6 Jun 2024 13:52:41 +0100 Subject: [PATCH 037/299] fix test --- .../0_stateless/02151_hash_table_sizes_stats_joins.sh | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/02151_hash_table_sizes_stats_joins.sh b/tests/queries/0_stateless/02151_hash_table_sizes_stats_joins.sh index 4ce32e51615..fb0e4315652 100755 --- a/tests/queries/0_stateless/02151_hash_table_sizes_stats_joins.sh +++ b/tests/queries/0_stateless/02151_hash_table_sizes_stats_joins.sh @@ -57,7 +57,8 @@ $CLICKHOUSE_CLIENT -q "SYSTEM FLUSH LOGS" for i in "${!queries_without_preallocation[@]}"; do $CLICKHOUSE_CLIENT --param_query_id="${queries_without_preallocation[$i]}" -q " - SELECT COUNT(*) + -- the old analyzer is not supported + SELECT COUNT(*) * getSetting('allow_experimental_analyzer') FROM system.query_log WHERE event_date >= yesterday() AND query_id = {query_id:String} AND current_database = currentDatabase() AND ProfileEvents['HashJoinPreallocatedElementsInHashTables'] = 0 AND type = 'QueryFinish' @@ -66,7 +67,8 @@ done for i in "${!queries_with_preallocation[@]}"; do $CLICKHOUSE_CLIENT --param_query_id="${queries_with_preallocation[$i]}" -q " - SELECT COUNT(*) + -- the old analyzer is not supported + SELECT COUNT(*) * getSetting('allow_experimental_analyzer') FROM system.query_log WHERE event_date >= yesterday() AND query_id = {query_id:String} AND current_database = currentDatabase() AND ProfileEvents['HashJoinPreallocatedElementsInHashTables'] > 0 AND type = 'QueryFinish' From b17feefc580de21115d84e4fdec7952e6c2de3c2 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Thu, 6 Jun 2024 16:07:09 +0000 Subject: [PATCH 038/299] fix test --- tests/queries/0_stateless/03168_read_in_order_buffering_2.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/03168_read_in_order_buffering_2.sql b/tests/queries/0_stateless/03168_read_in_order_buffering_2.sql index 7781871390c..a706965f3ff 100644 --- a/tests/queries/0_stateless/03168_read_in_order_buffering_2.sql +++ b/tests/queries/0_stateless/03168_read_in_order_buffering_2.sql @@ -1,4 +1,4 @@ --- Tags: long, no-random-settings +-- Tags: long, no-random-settings, no-tsan, no-asan, no-msan DROP TABLE IF EXISTS t_read_in_order_2; From bdd8bcc0d9b68474ca10df52772babc5aa1a20d4 Mon Sep 17 00:00:00 2001 From: Konstantin Morozov Date: Fri, 7 Jun 2024 15:51:13 +0000 Subject: [PATCH 039/299] add some log --- src/Databases/DatabaseAtomic.cpp | 7 +++++++ src/Storages/StorageReplicatedMergeTree.cpp | 19 ++++++++++++++++--- 2 files changed, 23 insertions(+), 3 deletions(-) diff --git a/src/Databases/DatabaseAtomic.cpp b/src/Databases/DatabaseAtomic.cpp index ccab72cfbae..d431eb5c1b7 100644 --- a/src/Databases/DatabaseAtomic.cpp +++ b/src/Databases/DatabaseAtomic.cpp @@ -112,6 +112,13 @@ StoragePtr DatabaseAtomic::detachTable(ContextPtr /* context */, const String & table_name_to_path.erase(name); detached_tables.emplace(table->getStorageID().uuid, table); not_in_use = cleanupDetachedTables(); + + if (!not_in_use.empty()) + { + not_in_use.clear(); + LOG_DEBUG(log, "Finish removing non using detached tables"); + } + return table; } diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index e18e66d7af9..68bb5916d7c 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -5,20 +5,21 @@ #include #include +#include +#include #include #include #include #include +#include #include #include #include #include #include +#include #include #include -#include -#include -#include #include @@ -5272,6 +5273,8 @@ void StorageReplicatedMergeTree::flushAndPrepareForShutdown() if (shutdown_prepared_called.exchange(true)) return; + LOG_TRACE(log, "Start preparing for shutdown"); + try { auto settings_ptr = getSettings(); @@ -5282,7 +5285,11 @@ void StorageReplicatedMergeTree::flushAndPrepareForShutdown() stopBeingLeader(); if (attach_thread) + { attach_thread->shutdown(); + LOG_TRACE(log, "Attach thread shutdowned"); + } + restarting_thread.shutdown(/* part_of_full_shutdown */true); /// Explicitly set the event, because the restarting thread will not set it again @@ -5295,6 +5302,8 @@ void StorageReplicatedMergeTree::flushAndPrepareForShutdown() shutdown_deadline.emplace(std::chrono::system_clock::now()); throw; } + + LOG_TRACE(log, "Finish preparing for shutdown"); } void StorageReplicatedMergeTree::partialShutdown() @@ -5332,6 +5341,9 @@ void StorageReplicatedMergeTree::shutdown(bool) if (shutdown_called.exchange(true)) return; + const auto storage_name = getStorageID().getNameForLogs(); + LOG_TRACE(log, "Shutdown started, table={}", storage_name); + flushAndPrepareForShutdown(); if (!shutdown_deadline.has_value()) @@ -5374,6 +5386,7 @@ void StorageReplicatedMergeTree::shutdown(bool) /// Wait for all of them std::lock_guard lock(data_parts_exchange_ptr->rwlock); } + LOG_TRACE(log, "Shutdown finished, table={}", storage_name); } From a357e228a28d988d4781fcee0c7c9683721df40d Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Tue, 11 Jun 2024 00:39:09 +0100 Subject: [PATCH 040/299] fix test --- .../0_stateless/02151_hash_table_sizes_stats_joins.sh | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/tests/queries/0_stateless/02151_hash_table_sizes_stats_joins.sh b/tests/queries/0_stateless/02151_hash_table_sizes_stats_joins.sh index fb0e4315652..6d715604d93 100755 --- a/tests/queries/0_stateless/02151_hash_table_sizes_stats_joins.sh +++ b/tests/queries/0_stateless/02151_hash_table_sizes_stats_joins.sh @@ -58,19 +58,17 @@ $CLICKHOUSE_CLIENT -q "SYSTEM FLUSH LOGS" for i in "${!queries_without_preallocation[@]}"; do $CLICKHOUSE_CLIENT --param_query_id="${queries_without_preallocation[$i]}" -q " -- the old analyzer is not supported - SELECT COUNT(*) * getSetting('allow_experimental_analyzer') + SELECT sum(if(getSetting('allow_experimental_analyzer'), ProfileEvents['HashJoinPreallocatedElementsInHashTables'] = 0, 1)) FROM system.query_log - WHERE event_date >= yesterday() AND query_id = {query_id:String} AND current_database = currentDatabase() - AND ProfileEvents['HashJoinPreallocatedElementsInHashTables'] = 0 AND type = 'QueryFinish' + WHERE event_date >= yesterday() AND query_id = {query_id:String} AND current_database = currentDatabase() AND type = 'QueryFinish' " done for i in "${!queries_with_preallocation[@]}"; do $CLICKHOUSE_CLIENT --param_query_id="${queries_with_preallocation[$i]}" -q " -- the old analyzer is not supported - SELECT COUNT(*) * getSetting('allow_experimental_analyzer') + SELECT sum(if(getSetting('allow_experimental_analyzer'), ProfileEvents['HashJoinPreallocatedElementsInHashTables'] > 0, 1)) FROM system.query_log - WHERE event_date >= yesterday() AND query_id = {query_id:String} AND current_database = currentDatabase() - AND ProfileEvents['HashJoinPreallocatedElementsInHashTables'] > 0 AND type = 'QueryFinish' + WHERE event_date >= yesterday() AND query_id = {query_id:String} AND current_database = currentDatabase() AND type = 'QueryFinish' " done From ada56bdeeb46a903dbb4756bd61cc213b2d4b5dd Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Tue, 11 Jun 2024 13:13:56 +0000 Subject: [PATCH 041/299] fix test --- tests/queries/0_stateless/03168_read_in_order_buffering_2.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/03168_read_in_order_buffering_2.sql b/tests/queries/0_stateless/03168_read_in_order_buffering_2.sql index a706965f3ff..7ce07a55d5d 100644 --- a/tests/queries/0_stateless/03168_read_in_order_buffering_2.sql +++ b/tests/queries/0_stateless/03168_read_in_order_buffering_2.sql @@ -1,4 +1,4 @@ --- Tags: long, no-random-settings, no-tsan, no-asan, no-msan +-- Tags: long, no-random-settings, no-tsan, no-asan, no-msan, no-s3-storage DROP TABLE IF EXISTS t_read_in_order_2; From 8af89e6e6d919cf4f0c1eb4a5372ab49dfd9b144 Mon Sep 17 00:00:00 2001 From: Konstantin Morozov Date: Thu, 13 Jun 2024 13:22:25 +0000 Subject: [PATCH 042/299] apply comments --- src/Databases/DatabaseAtomic.cpp | 2 +- src/Storages/StorageReplicatedMergeTree.cpp | 12 +++++------- 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/src/Databases/DatabaseAtomic.cpp b/src/Databases/DatabaseAtomic.cpp index d431eb5c1b7..b30b05bb7a7 100644 --- a/src/Databases/DatabaseAtomic.cpp +++ b/src/Databases/DatabaseAtomic.cpp @@ -116,7 +116,7 @@ StoragePtr DatabaseAtomic::detachTable(ContextPtr /* context */, const String & if (!not_in_use.empty()) { not_in_use.clear(); - LOG_DEBUG(log, "Finish removing non using detached tables"); + LOG_DEBUG(log, "Finished removing not used detached tables"); } return table; diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 68bb5916d7c..9b914e3de8f 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -5,17 +5,15 @@ #include #include -#include -#include #include #include #include #include -#include #include #include #include #include +#include #include #include #include @@ -5287,7 +5285,7 @@ void StorageReplicatedMergeTree::flushAndPrepareForShutdown() if (attach_thread) { attach_thread->shutdown(); - LOG_TRACE(log, "Attach thread shutdowned"); + LOG_TRACE(log, "The attach thread is shutdown"); } @@ -5303,7 +5301,7 @@ void StorageReplicatedMergeTree::flushAndPrepareForShutdown() throw; } - LOG_TRACE(log, "Finish preparing for shutdown"); + LOG_TRACE(log, "Finished preparing for shutdown"); } void StorageReplicatedMergeTree::partialShutdown() @@ -5342,7 +5340,7 @@ void StorageReplicatedMergeTree::shutdown(bool) return; const auto storage_name = getStorageID().getNameForLogs(); - LOG_TRACE(log, "Shutdown started, table={}", storage_name); + LOG_TRACE(log, "Shutdown started"); flushAndPrepareForShutdown(); @@ -5386,7 +5384,7 @@ void StorageReplicatedMergeTree::shutdown(bool) /// Wait for all of them std::lock_guard lock(data_parts_exchange_ptr->rwlock); } - LOG_TRACE(log, "Shutdown finished, table={}", storage_name); + LOG_TRACE(log, "Shutdown finished"); } From f7eac01b822c94184a16dfda1685d95f05c5cc8a Mon Sep 17 00:00:00 2001 From: Konstantin Morozov Date: Thu, 13 Jun 2024 13:31:52 +0000 Subject: [PATCH 043/299] up includes --- src/Storages/StorageReplicatedMergeTree.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index b33514907f9..a1f4a40a0ab 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -5,10 +5,12 @@ #include #include +#include #include #include #include #include +#include #include #include #include From c5fdc87c1e6dd8c1d1216e5599042fee682c23f3 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Mon, 17 Jun 2024 18:15:29 +0000 Subject: [PATCH 044/299] use rows threshold --- src/Core/Settings.h | 2 +- src/Core/SettingsChangesHistory.h | 2 +- src/Processors/QueryPlan/BufferChunksTransform.cpp | 12 ++++++------ src/Processors/QueryPlan/BufferChunksTransform.h | 6 +++--- src/Processors/QueryPlan/SortingStep.cpp | 8 +++----- src/Processors/QueryPlan/SortingStep.h | 2 +- .../0_stateless/03168_read_in_order_buffering_1.sql | 4 ++-- .../0_stateless/03168_read_in_order_buffering_2.sql | 9 +++++---- 8 files changed, 22 insertions(+), 23 deletions(-) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 121be813e31..b3e0ecd9e9c 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -547,7 +547,7 @@ class IColumn; M(Bool, optimize_read_in_order, true, "Enable ORDER BY optimization for reading data in corresponding order in MergeTree tables.", 0) \ M(Bool, optimize_read_in_window_order, true, "Enable ORDER BY optimization in window clause for reading data in corresponding order in MergeTree tables.", 0) \ M(Bool, optimize_aggregation_in_order, false, "Enable GROUP BY optimization for aggregating data in corresponding order in MergeTree tables.", 0) \ - M(UInt64, read_in_order_max_bytes_to_buffer, 128 * 1024 * 1024, "Max bytes to buffer before merging while reading in order of primary key. The higher value increases parallelism of query execution", 0) \ + M(Bool, read_in_order_use_buffering, true, "Use buffering before merging while reading in order of primary key. It increases the parallelism of query execution", 0) \ M(UInt64, aggregation_in_order_max_block_bytes, 50000000, "Maximal size of block in bytes accumulated during aggregation in order of primary key. Lower block size allows to parallelize more final merge stage of aggregation.", 0) \ M(UInt64, read_in_order_two_level_merge_threshold, 100, "Minimal number of parts to read to run preliminary merge step during multithread reading in order of primary key.", 0) \ M(Bool, low_cardinality_allow_in_native_format, true, "Use LowCardinality type in Native format. Otherwise, convert LowCardinality columns to ordinary for select query, and convert ordinary columns to required LowCardinality for insert query.", 0) \ diff --git a/src/Core/SettingsChangesHistory.h b/src/Core/SettingsChangesHistory.h index f94959fbd74..328ffe01a02 100644 --- a/src/Core/SettingsChangesHistory.h +++ b/src/Core/SettingsChangesHistory.h @@ -96,7 +96,7 @@ static const std::map= max_bytes_to_buffer) + if (num_buffered_rows >= max_rows_to_buffer) { input.setNotNeeded(); return Status::PortFull; diff --git a/src/Processors/QueryPlan/BufferChunksTransform.h b/src/Processors/QueryPlan/BufferChunksTransform.h index c912f280a8a..a7e9d413c64 100644 --- a/src/Processors/QueryPlan/BufferChunksTransform.h +++ b/src/Processors/QueryPlan/BufferChunksTransform.h @@ -8,7 +8,7 @@ namespace DB class BufferChunksTransform : public IProcessor { public: - BufferChunksTransform(const Block & header_, size_t max_bytes_to_buffer_, size_t limit_); + BufferChunksTransform(const Block & header_, size_t max_rows_to_buffer_, size_t limit_); Status prepare() override; String getName() const override { return "BufferChunks"; } @@ -19,11 +19,11 @@ private: InputPort & input; OutputPort & output; - size_t max_bytes_to_buffer; + size_t max_rows_to_buffer; size_t limit; std::queue chunks; - size_t num_buffered_bytes = 0; + size_t num_buffered_rows = 0; size_t num_processed_rows = 0; }; diff --git a/src/Processors/QueryPlan/SortingStep.cpp b/src/Processors/QueryPlan/SortingStep.cpp index a853c908317..a06910bef7f 100644 --- a/src/Processors/QueryPlan/SortingStep.cpp +++ b/src/Processors/QueryPlan/SortingStep.cpp @@ -38,7 +38,7 @@ SortingStep::Settings::Settings(const Context & context) tmp_data = context.getTempDataOnDisk(); min_free_disk_space = settings.min_free_disk_space_for_temporary_data; max_block_bytes = settings.prefer_external_sort_block_bytes; - read_in_order_max_bytes_to_buffer = settings.read_in_order_max_bytes_to_buffer; + read_in_order_use_buffering = settings.read_in_order_use_buffering; } SortingStep::Settings::Settings(size_t max_block_size_) @@ -246,13 +246,11 @@ void SortingStep::mergingSorted(QueryPipelineBuilder & pipeline, const SortDescr /// If there are several streams, then we merge them into one if (pipeline.getNumStreams() > 1) { - if (use_buffering && sort_settings.read_in_order_max_bytes_to_buffer) + if (use_buffering && sort_settings.read_in_order_use_buffering) { - size_t bytes_to_buffer = sort_settings.read_in_order_max_bytes_to_buffer / pipeline.getNumStreams(); - pipeline.addSimpleTransform([&](const Block & header) { - return std::make_shared(header, bytes_to_buffer, limit_); + return std::make_shared(header, sort_settings.max_block_bytes, limit_); }); } diff --git a/src/Processors/QueryPlan/SortingStep.h b/src/Processors/QueryPlan/SortingStep.h index 57658b6dafb..b4a49394a13 100644 --- a/src/Processors/QueryPlan/SortingStep.h +++ b/src/Processors/QueryPlan/SortingStep.h @@ -28,7 +28,7 @@ public: TemporaryDataOnDiskScopePtr tmp_data = nullptr; size_t min_free_disk_space = 0; size_t max_block_bytes = 0; - size_t read_in_order_max_bytes_to_buffer = 0; + size_t read_in_order_use_buffering = 0; explicit Settings(const Context & context); explicit Settings(size_t max_block_size_); diff --git a/tests/queries/0_stateless/03168_read_in_order_buffering_1.sql b/tests/queries/0_stateless/03168_read_in_order_buffering_1.sql index 02ffc9ecb7d..75025dcadc8 100644 --- a/tests/queries/0_stateless/03168_read_in_order_buffering_1.sql +++ b/tests/queries/0_stateless/03168_read_in_order_buffering_1.sql @@ -8,7 +8,7 @@ INSERT INTO t_read_in_order_1 SELECT number, number FROM numbers(1000000); SET max_threads = 8; SET optimize_read_in_order = 1; -SET read_in_order_max_bytes_to_buffer = '128M'; +SET read_in_order_use_buffering = 1; SELECT count() FROM ( @@ -25,7 +25,7 @@ SELECT count() FROM EXPLAIN PIPELINE SELECT * FROM t_read_in_order_1 WHERE v % 10 = 0 ORDER BY id LIMIT 10 ) WHERE explain LIKE '%BufferChunks%'; -SET read_in_order_max_bytes_to_buffer = 0; +SET read_in_order_use_buffering = 0; SELECT count() FROM ( diff --git a/tests/queries/0_stateless/03168_read_in_order_buffering_2.sql b/tests/queries/0_stateless/03168_read_in_order_buffering_2.sql index 7ce07a55d5d..1d3a75412e0 100644 --- a/tests/queries/0_stateless/03168_read_in_order_buffering_2.sql +++ b/tests/queries/0_stateless/03168_read_in_order_buffering_2.sql @@ -4,12 +4,13 @@ DROP TABLE IF EXISTS t_read_in_order_2; CREATE TABLE t_read_in_order_2 (id UInt64, v UInt64) ENGINE = MergeTree ORDER BY id; -INSERT INTO t_read_in_order_2 SELECT number, number FROM numbers(100000000); +INSERT INTO t_read_in_order_2 SELECT number, number FROM numbers(10000000); +OPTIMIZE TABLE t_read_in_order_2 FINAL; SET optimize_read_in_order = 1; -SET max_threads = 8; -SET read_in_order_max_bytes_to_buffer = '80M'; -SET max_memory_usage = '250M'; +SET max_threads = 4; +SET read_in_order_use_buffering = 1; +SET max_memory_usage = '100M'; SELECT * FROM t_read_in_order_2 ORDER BY id FORMAT Null; From 148a3c80cbeaafe52834f931b3bcc627a3f60888 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Mon, 17 Jun 2024 20:44:53 +0000 Subject: [PATCH 045/299] better thresholds --- .../QueryPlan/BufferChunksTransform.cpp | 18 +++++++++++++----- .../QueryPlan/BufferChunksTransform.h | 8 +++++++- src/Processors/QueryPlan/SortingStep.cpp | 2 +- 3 files changed, 21 insertions(+), 7 deletions(-) diff --git a/src/Processors/QueryPlan/BufferChunksTransform.cpp b/src/Processors/QueryPlan/BufferChunksTransform.cpp index a34f0be16ef..283a0acf172 100644 --- a/src/Processors/QueryPlan/BufferChunksTransform.cpp +++ b/src/Processors/QueryPlan/BufferChunksTransform.cpp @@ -3,11 +3,16 @@ namespace DB { -BufferChunksTransform::BufferChunksTransform(const Block & header_, size_t max_rows_to_buffer_, size_t limit_) +BufferChunksTransform::BufferChunksTransform( + const Block & header_, + size_t max_rows_to_buffer_, + size_t max_bytes_to_buffer_, + size_t limit_) : IProcessor({header_}, {header_}) , input(inputs.front()) , output(outputs.front()) , max_rows_to_buffer(max_rows_to_buffer_) + , max_bytes_to_buffer(max_bytes_to_buffer_) , limit(limit_) { } @@ -30,7 +35,9 @@ IProcessor::Status BufferChunksTransform::prepare() auto chunk = std::move(chunks.front()); chunks.pop(); - num_buffered_rows -= chunk.bytes(); + num_buffered_rows -= chunk.getNumRows(); + num_buffered_bytes -= chunk.bytes(); + output.push(std::move(chunk)); } else if (input.hasData()) @@ -45,14 +52,15 @@ IProcessor::Status BufferChunksTransform::prepare() } } - if (input.hasData() && num_buffered_rows < max_rows_to_buffer) + if (input.hasData() && (num_buffered_rows < max_rows_to_buffer || num_buffered_bytes < max_bytes_to_buffer)) { auto chunk = pullChunk(); - num_buffered_rows += chunk.bytes(); + num_buffered_rows += chunk.getNumRows(); + num_buffered_bytes += chunk.bytes(); chunks.push(std::move(chunk)); } - if (num_buffered_rows >= max_rows_to_buffer) + if (num_buffered_rows >= max_rows_to_buffer && num_buffered_bytes >= max_bytes_to_buffer) { input.setNotNeeded(); return Status::PortFull; diff --git a/src/Processors/QueryPlan/BufferChunksTransform.h b/src/Processors/QueryPlan/BufferChunksTransform.h index a7e9d413c64..84c35431364 100644 --- a/src/Processors/QueryPlan/BufferChunksTransform.h +++ b/src/Processors/QueryPlan/BufferChunksTransform.h @@ -8,7 +8,11 @@ namespace DB class BufferChunksTransform : public IProcessor { public: - BufferChunksTransform(const Block & header_, size_t max_rows_to_buffer_, size_t limit_); + BufferChunksTransform( + const Block & header_, + size_t max_rows_to_buffer_, + size_t max_bytes_to_buffer_, + size_t limit_); Status prepare() override; String getName() const override { return "BufferChunks"; } @@ -20,10 +24,12 @@ private: OutputPort & output; size_t max_rows_to_buffer; + size_t max_bytes_to_buffer; size_t limit; std::queue chunks; size_t num_buffered_rows = 0; + size_t num_buffered_bytes = 0; size_t num_processed_rows = 0; }; diff --git a/src/Processors/QueryPlan/SortingStep.cpp b/src/Processors/QueryPlan/SortingStep.cpp index a06910bef7f..1c40f84d23d 100644 --- a/src/Processors/QueryPlan/SortingStep.cpp +++ b/src/Processors/QueryPlan/SortingStep.cpp @@ -250,7 +250,7 @@ void SortingStep::mergingSorted(QueryPipelineBuilder & pipeline, const SortDescr { pipeline.addSimpleTransform([&](const Block & header) { - return std::make_shared(header, sort_settings.max_block_bytes, limit_); + return std::make_shared(header, sort_settings.max_block_size, sort_settings.max_block_bytes, limit_); }); } From cc45847b9f75ecf0c19c32861a48d8e50f833f78 Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Tue, 18 Jun 2024 19:14:28 +0100 Subject: [PATCH 046/299] new check --- src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp b/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp index 79c0e6ad262..326b4455596 100644 --- a/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp +++ b/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp @@ -443,6 +443,9 @@ void DefaultCoordinator::doHandleInitialAllRangesAnnouncement(InitialAllRangesAn ErrorCodes::LOGICAL_ERROR, "Replica number ({}) is bigger than total replicas count ({})", replica_num, stats.size()); ++stats[replica_num].number_of_requests; + + if (replica_status[replica_num].is_announcement_received) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Duplicate announcement received for replica number {}", replica_num); replica_status[replica_num].is_announcement_received = true; LOG_DEBUG(log, "Sent initial requests: {} Replicas count: {}", sent_initial_requests, replicas_count); From a7ffb0e8148b0e1cd3abe05f5320af62728f65b1 Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Tue, 18 Jun 2024 19:14:46 +0100 Subject: [PATCH 047/299] add test --- ...03173_parallel_replicas_join_bug.reference | 7 ++ .../03173_parallel_replicas_join_bug.sql | 67 +++++++++++++++++++ 2 files changed, 74 insertions(+) create mode 100644 tests/queries/0_stateless/03173_parallel_replicas_join_bug.reference create mode 100644 tests/queries/0_stateless/03173_parallel_replicas_join_bug.sql diff --git a/tests/queries/0_stateless/03173_parallel_replicas_join_bug.reference b/tests/queries/0_stateless/03173_parallel_replicas_join_bug.reference new file mode 100644 index 00000000000..b23d6b02bc1 --- /dev/null +++ b/tests/queries/0_stateless/03173_parallel_replicas_join_bug.reference @@ -0,0 +1,7 @@ +a1451105-722e-4fe7-bfaa-65ad2ae249c2 whatever +a1451105-722e-4fe7-bfaa-65ad2ae249c2 whatever +a1451105-722e-4fe7-bfaa-65ad2ae249c2 +a1451105-722e-4fe7-bfaa-65ad2ae249c2 +a1451105-722e-4fe7-bfaa-65ad2ae249c2 +a1451105-722e-4fe7-bfaa-65ad2ae249c2 +a1451105-722e-4fe7-bfaa-65ad2ae249c2 diff --git a/tests/queries/0_stateless/03173_parallel_replicas_join_bug.sql b/tests/queries/0_stateless/03173_parallel_replicas_join_bug.sql new file mode 100644 index 00000000000..3dee67fbf3f --- /dev/null +++ b/tests/queries/0_stateless/03173_parallel_replicas_join_bug.sql @@ -0,0 +1,67 @@ +CREATE TABLE ids (id UUID, whatever String) Engine=MergeTree ORDER BY tuple(); +INSERT INTO ids VALUES ('a1451105-722e-4fe7-bfaa-65ad2ae249c2', 'whatever'); + +CREATE TABLE data (id UUID, event_time DateTime, status String) Engine=MergeTree ORDER BY tuple(); +INSERT INTO data VALUES ('a1451105-722e-4fe7-bfaa-65ad2ae249c2', '2000-01-01', 'CREATED'); + +CREATE TABLE data2 (id UUID, event_time DateTime, status String) Engine=MergeTree ORDER BY tuple(); +INSERT INTO data2 VALUES ('a1451105-722e-4fe7-bfaa-65ad2ae249c2', '2000-01-02', 'CREATED'); + +SET allow_experimental_analyzer = 1, cluster_for_parallel_replicas = 'parallel_replicas', max_parallel_replicas = 10, allow_experimental_parallel_reading_from_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, max_threads = 1; + +SELECT + id, + whatever +FROM ids AS l +INNER JOIN view( + SELECT * + FROM merge(currentDatabase(), 'data*') +) AS s ON l.id = s.id +WHERE status IN ['CREATED', 'CREATING'] +ORDER BY event_time DESC; + +with +results1 as ( + SELECT id + FROM data t1 + inner join ids t2 + on t1.id = t2.id +), +results2 as ( + SELECT id + FROM ids t1 + inner join data t2 + on t1.id = t2.id +) +select * from results1 union all select * from results2; + +with +results1 as ( + SELECT id + FROM data t1 + inner join ids t2 + on t1.id = t2.id +), +results2 as ( + SELECT id + FROM ids t1 + inner join data t2 + on t1.id = t2.id +) +select * from results1 t1 inner join results2 t2 using (id); + +with +results1 as ( + SELECT t1.id + FROM data t1 + inner join ids t2 on t1.id = t2.id + left join data t3 on t2.id = t3.id +), +results2 as ( + SELECT id + FROM ids t1 + inner join data t2 + on t1.id = t2.id +) +select * from results1 union all select * from results2; + From ff9c64fa4e83f48c343ea920ec88ccb770b113d1 Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Tue, 18 Jun 2024 21:00:44 +0100 Subject: [PATCH 048/299] fix --- src/Planner/PlannerJoinTree.cpp | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/src/Planner/PlannerJoinTree.cpp b/src/Planner/PlannerJoinTree.cpp index 6ec460b0894..86faec29760 100644 --- a/src/Planner/PlannerJoinTree.cpp +++ b/src/Planner/PlannerJoinTree.cpp @@ -857,12 +857,23 @@ JoinTreeQueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expres from_stage = storage->getQueryProcessingStage( query_context, select_query_options.to_stage, storage_snapshot, table_expression_query_info); + auto context_for_read = Context::createCopy(query_context); + + /// It is just a safety check needed until we have a proper sending plan to replicas. + /// If we have a non-trivial storage like View it might create its own Planner inside read(), run findTableForParallelReplicas() + /// and find some other table that might be used for reading with parallel replicas. It will lead to errors. + const bool other_table_already_chosen_for_reading_with_parallel_replicas + = planner_context->getGlobalPlannerContext()->parallel_replicas_table + && !table_expression_query_info.analyzer_can_use_parallel_replicas_on_follower; + if (other_table_already_chosen_for_reading_with_parallel_replicas) + context_for_read->setSetting("allow_experimental_parallel_reading_from_replicas", Field(0)); + storage->read( query_plan, columns_names, storage_snapshot, table_expression_query_info, - query_context, + context_for_read, from_stage, max_block_size, max_streams); From 0791677268446b6b9aeb42fd4d7e01db91bc42c8 Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Tue, 18 Jun 2024 22:35:46 +0100 Subject: [PATCH 049/299] fix? --- src/Planner/PlannerJoinTree.cpp | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/Planner/PlannerJoinTree.cpp b/src/Planner/PlannerJoinTree.cpp index 86faec29760..0af7bfea0b0 100644 --- a/src/Planner/PlannerJoinTree.cpp +++ b/src/Planner/PlannerJoinTree.cpp @@ -857,8 +857,6 @@ JoinTreeQueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expres from_stage = storage->getQueryProcessingStage( query_context, select_query_options.to_stage, storage_snapshot, table_expression_query_info); - auto context_for_read = Context::createCopy(query_context); - /// It is just a safety check needed until we have a proper sending plan to replicas. /// If we have a non-trivial storage like View it might create its own Planner inside read(), run findTableForParallelReplicas() /// and find some other table that might be used for reading with parallel replicas. It will lead to errors. @@ -866,14 +864,14 @@ JoinTreeQueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expres = planner_context->getGlobalPlannerContext()->parallel_replicas_table && !table_expression_query_info.analyzer_can_use_parallel_replicas_on_follower; if (other_table_already_chosen_for_reading_with_parallel_replicas) - context_for_read->setSetting("allow_experimental_parallel_reading_from_replicas", Field(0)); + planner_context->getMutableQueryContext()->setSetting("allow_experimental_parallel_reading_from_replicas", Field(0)); storage->read( query_plan, columns_names, storage_snapshot, table_expression_query_info, - context_for_read, + query_context, from_stage, max_block_size, max_streams); From ed44e4cf6b9f66fed04a4267b72cb375c9eae295 Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Tue, 18 Jun 2024 23:17:50 +0100 Subject: [PATCH 050/299] fix test --- ...ql => 03173_parallel_replicas_join_bug.sh} | 26 +++++++++++++------ 1 file changed, 18 insertions(+), 8 deletions(-) rename tests/queries/0_stateless/{03173_parallel_replicas_join_bug.sql => 03173_parallel_replicas_join_bug.sh} (59%) mode change 100644 => 100755 diff --git a/tests/queries/0_stateless/03173_parallel_replicas_join_bug.sql b/tests/queries/0_stateless/03173_parallel_replicas_join_bug.sh old mode 100644 new mode 100755 similarity index 59% rename from tests/queries/0_stateless/03173_parallel_replicas_join_bug.sql rename to tests/queries/0_stateless/03173_parallel_replicas_join_bug.sh index 3dee67fbf3f..4638609b00c --- a/tests/queries/0_stateless/03173_parallel_replicas_join_bug.sql +++ b/tests/queries/0_stateless/03173_parallel_replicas_join_bug.sh @@ -1,12 +1,22 @@ -CREATE TABLE ids (id UUID, whatever String) Engine=MergeTree ORDER BY tuple(); -INSERT INTO ids VALUES ('a1451105-722e-4fe7-bfaa-65ad2ae249c2', 'whatever'); +#!/usr/bin/env bash -CREATE TABLE data (id UUID, event_time DateTime, status String) Engine=MergeTree ORDER BY tuple(); -INSERT INTO data VALUES ('a1451105-722e-4fe7-bfaa-65ad2ae249c2', '2000-01-01', 'CREATED'); +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh -CREATE TABLE data2 (id UUID, event_time DateTime, status String) Engine=MergeTree ORDER BY tuple(); -INSERT INTO data2 VALUES ('a1451105-722e-4fe7-bfaa-65ad2ae249c2', '2000-01-02', 'CREATED'); +$CLICKHOUSE_CLIENT -nq " + CREATE TABLE ids (id UUID, whatever String) Engine=MergeTree ORDER BY tuple(); + INSERT INTO ids VALUES ('a1451105-722e-4fe7-bfaa-65ad2ae249c2', 'whatever'); + + CREATE TABLE data (id UUID, event_time DateTime, status String) Engine=MergeTree ORDER BY tuple(); + INSERT INTO data VALUES ('a1451105-722e-4fe7-bfaa-65ad2ae249c2', '2000-01-01', 'CREATED'); + + CREATE TABLE data2 (id UUID, event_time DateTime, status String) Engine=MergeTree ORDER BY tuple(); + INSERT INTO data2 VALUES ('a1451105-722e-4fe7-bfaa-65ad2ae249c2', '2000-01-02', 'CREATED'); +" + +$CLICKHOUSE_CLIENT -nq " SET allow_experimental_analyzer = 1, cluster_for_parallel_replicas = 'parallel_replicas', max_parallel_replicas = 10, allow_experimental_parallel_reading_from_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, max_threads = 1; SELECT @@ -15,7 +25,7 @@ SELECT FROM ids AS l INNER JOIN view( SELECT * - FROM merge(currentDatabase(), 'data*') + FROM merge($CLICKHOUSE_DATABASE, 'data.*') ) AS s ON l.id = s.id WHERE status IN ['CREATED', 'CREATING'] ORDER BY event_time DESC; @@ -64,4 +74,4 @@ results2 as ( on t1.id = t2.id ) select * from results1 union all select * from results2; - +" From 96fd928bced6c14e9de98ad14b77b370ed14de8e Mon Sep 17 00:00:00 2001 From: Konstantin Morozov Date: Wed, 19 Jun 2024 08:59:48 +0000 Subject: [PATCH 051/299] remove unused var --- src/Storages/StorageReplicatedMergeTree.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index a1f4a40a0ab..61a492c1f63 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -5341,7 +5341,6 @@ void StorageReplicatedMergeTree::shutdown(bool) if (shutdown_called.exchange(true)) return; - const auto storage_name = getStorageID().getNameForLogs(); LOG_TRACE(log, "Shutdown started"); flushAndPrepareForShutdown(); From b1f45da3eb72f0aae9976c64919bdbc7a4353bc6 Mon Sep 17 00:00:00 2001 From: avogar Date: Fri, 21 Jun 2024 18:20:35 +0000 Subject: [PATCH 052/299] Implement binary encoding for ClickHouse data types --- docs/en/interfaces/formats.md | 4 + .../operations/settings/settings-formats.md | 24 + .../data-types/data-types-binary-encoding.md | 104 +++ .../domains/data-types-binary-encoding.md | 0 src/Columns/ColumnDynamic.cpp | 48 +- src/Common/FieldBinaryEncoding.cpp | 388 ++++++++++ src/Common/FieldBinaryEncoding.h | 43 ++ src/Common/IntervalKind.h | 23 +- src/Core/Field.cpp | 16 +- src/Core/Settings.h | 4 + .../tests/gtest_fields_binary_enciding.cpp | 65 ++ src/DataTypes/DataTypeAggregateFunction.h | 3 +- .../DataTypeCustomSimpleAggregateFunction.cpp | 13 + .../DataTypeCustomSimpleAggregateFunction.h | 5 + src/DataTypes/DataTypeNested.h | 2 + src/DataTypes/DataTypesBinaryEncoding.cpp | 677 ++++++++++++++++++ src/DataTypes/DataTypesBinaryEncoding.h | 117 +++ src/DataTypes/Serializations/ISerialization.h | 6 + .../Serializations/SerializationArray.cpp | 8 +- .../Serializations/SerializationDynamic.cpp | 67 +- .../Serializations/SerializationMap.cpp | 4 +- .../Serializations/SerializationString.cpp | 16 +- .../SerializationVariantElement.cpp | 2 +- .../data_type_deserialization_fuzzer.cpp | 4 +- .../gtest_data_types_binary_encoding.cpp | 123 ++++ src/Formats/FormatFactory.cpp | 8 +- src/Formats/FormatSettings.h | 12 +- src/Formats/NativeReader.cpp | 36 +- src/Formats/NativeReader.h | 12 +- src/Formats/NativeWriter.cpp | 45 +- src/Formats/NativeWriter.h | 4 +- src/Formats/SchemaInferenceUtils.cpp | 92 ++- src/Interpreters/Context.cpp | 3 - .../Formats/Impl/BinaryRowInputFormat.cpp | 24 +- .../Formats/Impl/BinaryRowOutputFormat.cpp | 11 +- src/Processors/Formats/Impl/NativeFormat.cpp | 21 +- src/Server/TCPHandler.cpp | 3 + src/Storages/StorageMemory.cpp | 2 +- src/Storages/StorageStripeLog.cpp | 2 +- ...172_dynamic_binary_serialization.reference | 48 ++ .../03172_dynamic_binary_serialization.sh | 61 ++ ...native_with_binary_encoded_types.reference | 102 +++ ...ry_and_native_with_binary_encoded_types.sh | 63 ++ 43 files changed, 2128 insertions(+), 187 deletions(-) create mode 100644 docs/en/sql-reference/data-types/data-types-binary-encoding.md create mode 100644 docs/en/sql-reference/data-types/domains/data-types-binary-encoding.md create mode 100644 src/Common/FieldBinaryEncoding.cpp create mode 100644 src/Common/FieldBinaryEncoding.h create mode 100644 src/Core/tests/gtest_fields_binary_enciding.cpp create mode 100644 src/DataTypes/DataTypesBinaryEncoding.cpp create mode 100644 src/DataTypes/DataTypesBinaryEncoding.h create mode 100644 src/DataTypes/tests/gtest_data_types_binary_encoding.cpp create mode 100644 tests/queries/0_stateless/03172_dynamic_binary_serialization.reference create mode 100755 tests/queries/0_stateless/03172_dynamic_binary_serialization.sh create mode 100644 tests/queries/0_stateless/03173_row_binary_and_native_with_binary_encoded_types.reference create mode 100755 tests/queries/0_stateless/03173_row_binary_and_native_with_binary_encoded_types.sh diff --git a/docs/en/interfaces/formats.md b/docs/en/interfaces/formats.md index fdbfb742a10..b71e58b8b0e 100644 --- a/docs/en/interfaces/formats.md +++ b/docs/en/interfaces/formats.md @@ -1533,6 +1533,10 @@ the columns from input data will be mapped to the columns from the table by thei Otherwise, the first row will be skipped. If setting [input_format_with_types_use_header](/docs/en/operations/settings/settings-formats.md/#input_format_with_types_use_header) is set to 1, the types from input data will be compared with the types of the corresponding columns from the table. Otherwise, the second row will be skipped. +If setting [output_format_binary_encode_types_in_binary_format](/docs/en/operations/settings/settings-formats.md/#output_format_binary_encode_types_in_binary_format) is set to 1, +the types in header will be written using [binary encoding](../../sql-reference/data-types/data-types-binary-encoding.md) instead of strings with type names in RowBinaryWithNamesAndTypes output format. +If setting [input_format_binary_encode_types_in_binary_format](/docs/en/operations/settings/settings-formats.md/#input_format_binary_encode_types_in_binary_format) is set to 1, +the types in header will be read using [binary encoding](../../sql-reference/data-types/data-types-binary-encoding.md) instead of strings with type names in RowBinaryWithNamesAndTypes input format. ::: ## RowBinaryWithDefaults {#rowbinarywithdefaults} diff --git a/docs/en/operations/settings/settings-formats.md b/docs/en/operations/settings/settings-formats.md index 6aae1ea62e5..6709d5a8ab9 100644 --- a/docs/en/operations/settings/settings-formats.md +++ b/docs/en/operations/settings/settings-formats.md @@ -1907,6 +1907,18 @@ The maximum allowed size for String in RowBinary format. It prevents allocating Default value: `1GiB`. +### output_format_binary_encode_types_in_binary_format {#output_format_binary_encode_types_in_binary_format} + +Write data types in [binary format](../../sql-reference/data-types/data-types-binary-encoding.md) instead of type names in RowBinaryWithNamesAndTypes output format. + +Disabled by default. + +### input_format_binary_decode_types_in_binary_format {#input_format_binary_decode_types_in_binary_format} + +Read data types in [binary format](../../sql-reference/data-types/data-types-binary-encoding.md) instead of type names in RowBinaryWithNamesAndTypes input format. + +Disabled by default. + ## Native format settings {#native-format-settings} ### input_format_native_allow_types_conversion {#input_format_native_allow_types_conversion} @@ -1914,3 +1926,15 @@ Default value: `1GiB`. Allow types conversion in Native input format between columns from input data and requested columns. Enabled by default. + +### output_format_native_encode_types_in_binary_format {#output_format_native_encode_types_in_binary_format} + +Write data types in [binary format](../../sql-reference/data-types/data-types-binary-encoding.md) instead of type names in Native output format. + +Disabled by default. + +### input_format_native_decode_types_in_binary_format {#input_format_native_decode_types_in_binary_format} + +Read data types in [binary format](../../sql-reference/data-types/data-types-binary-encoding.md) instead of type names in Native input format. + +Disabled by default. \ No newline at end of file diff --git a/docs/en/sql-reference/data-types/data-types-binary-encoding.md b/docs/en/sql-reference/data-types/data-types-binary-encoding.md new file mode 100644 index 00000000000..ebcb480ea0a --- /dev/null +++ b/docs/en/sql-reference/data-types/data-types-binary-encoding.md @@ -0,0 +1,104 @@ +--- +slug: /en/sql-reference/data-types/dynamic +sidebar_position: 56 +sidebar_label: Data types binary encoding specification. +--- + + +# Data types binary encoding specification + +| ClickHouse data type | Binary encoding | +|--------------------------------------------------------------------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `Nothing` | `0x00` | +| `UInt8` | `0x01` | +| `UInt16` | `0x02` | +| `UInt32` | `0x03` | +| `UInt64` | `0x04` | +| `UInt128` | `0x05` | +| `UInt256` | `0x06` | +| `Int8` | `0x07` | +| `Int16` | `0x08` | +| `Int32` | `0x09` | +| `Int64` | `0x0A` | +| `Int128` | `0x0B` | +| `Int256` | `0x0C` | +| `Float32` | `0x0D` | +| `Float64` | `0x0E` | +| `Date` | `0x0F` | +| `Date32` | `0x10` | +| `DateTime` | `0x11` | +| `DateTime64(P)` | `0x12` | +| `String` | `0x13` | +| `FixedString(N)` | `0x14` | +| `Enum8` | `0x15...` | +| `Enum16` | `0x16...>` | +| `Decimal32(P, S)` | `0x17` | +| `Decimal64(P, S)` | `0x18` | +| `Decimal128(P, S)` | `0x19` | +| `Decimal256(P, S)` | `0x1A` | +| `UUID` | `0x1B` | +| `Array(T)` | `0x1C` | +| `Tuple(T1, ..., TN)` | `0x1D...` | +| `Tuple(name1 T1, ..., nameN TN)` | `0x1E...` | +| `Set` | `0x1F` | +| `Interval` | `0x20` (see [interval kind binary encoding](#interval-kind-binary-encoding)) | +| `Nullable(T)` | `0x21` | +| `Function` | `0x22...` | +| `AggregateFunction(function_name(param_1, ..., param_N), arg_T1, ..., arg_TN)` | `0x23......` (see [aggregate function parameter binary encoding](#aggregate-function-parameter-binary-encoding)) | +| `LowCardinality(T)` | `0x24` | +| `Map(K, V)` | `0x25` | +| `Object('schema_format')` | `0x26` | +| `IPv4` | `0x27` | +| `IPv6` | `0x28` | +| `Variant(T1, ..., TN)` | `0x29...` | +| `Dynamic` | `0x2A` | +| `Custom type` (`Ring`, `Polygon`, etc) | `0x2B` | +| `Bool` | `0x2C` | +| `SimpleAggregateFunction(function_name(param_1, ..., param_N), arg_T1, ..., arg_TN)` | `0x2D......` (see [aggregate function parameter binary encoding](#aggregate-function-parameter-binary-encoding)) | +| `Nested(name1 T1, ..., nameN TN)` | `0x2E...` | + + +### Interval kind binary encoding + +| Interval kind | Binary encoding | +|---------------|-----------------| +| `Nanosecond` | `0x00` | +| `Microsecond` | `0x01` | +| `Millisecond` | `0x02` | +| `Second` | `0x03` | +| `Minute` | `0x04` | +| `Hour` | `0x05` | +| `Day` | `0x06` | +| `Week` | `0x07` | +| `Month` | `0x08` | +| `Quarter` | `0x09` | +| `Year` | `0x1A` | + +### Aggregate function parameter binary encoding + +| Parameter type | Binary encoding | +|--------------------------|--------------------------------------------------------------------------------------------------------------------------------| +| `Null` | `0x00` | +| `UInt64` | `0x01` | +| `Int64` | `0x02` | +| `UInt128` | `0x03` | +| `Int128` | `0x04` | +| `UInt128` | `0x05` | +| `Int128` | `0x06` | +| `Float64` | `0x07` | +| `Decimal32` | `0x08` | +| `Decimal64` | `0x09` | +| `Decimal128` | `0x0A` | +| `Decimal256` | `0x0B` | +| `String` | `0x0C` | +| `Array` | `0x0D...` | +| `Tuple` | `0x0E...` | +| `Map` | `0x0F...` | +| `IPv4` | `0x10` | +| `IPv6` | `0x11` | +| `UUID` | `0x12` | +| `Bool` | `0x13` | +| `Object` | `0x14...` | +| `AggregateFunctionState` | `0x15` | +| `Negative infinity` | `0xFE` | +| `Positive infinity` | `0xFF` | diff --git a/docs/en/sql-reference/data-types/domains/data-types-binary-encoding.md b/docs/en/sql-reference/data-types/domains/data-types-binary-encoding.md new file mode 100644 index 00000000000..e69de29bb2d diff --git a/src/Columns/ColumnDynamic.cpp b/src/Columns/ColumnDynamic.cpp index 3c147b6f123..d56999ce5a2 100644 --- a/src/Columns/ColumnDynamic.cpp +++ b/src/Columns/ColumnDynamic.cpp @@ -4,7 +4,9 @@ #include #include #include +#include #include +#include #include #include #include @@ -481,7 +483,7 @@ StringRef ColumnDynamic::serializeValueIntoArena(size_t n, DB::Arena & arena, co /// We cannot use Variant serialization here as it serializes discriminator + value, /// but Dynamic doesn't have fixed mapping discriminator <-> variant type /// as different Dynamic column can have different Variants. - /// Instead, we serialize null bit + variant type name (size + bytes) + value. + /// Instead, we serialize null bit + variant type in binary format (size + bytes) + value. const auto & variant_col = assert_cast(*variant_column); auto discr = variant_col.globalDiscriminatorAt(n); StringRef res; @@ -495,14 +497,15 @@ StringRef ColumnDynamic::serializeValueIntoArena(size_t n, DB::Arena & arena, co return res; } - const auto & variant_name = variant_info.variant_names[discr]; - size_t variant_name_size = variant_name.size(); - char * pos = arena.allocContinue(sizeof(UInt8) + sizeof(size_t) + variant_name.size(), begin); + const auto & variant_type = assert_cast(*variant_info.variant_type).getVariant(discr); + String variant_type_binary_data = encodeDataType(variant_type); + size_t variant_type_binary_data_size = variant_type_binary_data.size(); + char * pos = arena.allocContinue(sizeof(UInt8) + sizeof(size_t) + variant_type_binary_data.size(), begin); memcpy(pos, &null_bit, sizeof(UInt8)); - memcpy(pos + sizeof(UInt8), &variant_name_size, sizeof(size_t)); - memcpy(pos + sizeof(UInt8) + sizeof(size_t), variant_name.data(), variant_name.size()); + memcpy(pos + sizeof(UInt8), &variant_type_binary_data_size, sizeof(size_t)); + memcpy(pos + sizeof(UInt8) + sizeof(size_t), variant_type_binary_data.data(), variant_type_binary_data.size()); res.data = pos; - res.size = sizeof(UInt8) + sizeof(size_t) + variant_name.size(); + res.size = sizeof(UInt8) + sizeof(size_t) + variant_type_binary_data.size(); auto value_ref = variant_col.getVariantByGlobalDiscriminator(discr).serializeValueIntoArena(variant_col.offsetAt(n), arena, begin); res.data = value_ref.data - res.size; @@ -521,13 +524,15 @@ const char * ColumnDynamic::deserializeAndInsertFromArena(const char * pos) return pos; } - /// Read variant type name. - const size_t variant_name_size = unalignedLoad(pos); - pos += sizeof(variant_name_size); - String variant_name; - variant_name.resize(variant_name_size); - memcpy(variant_name.data(), pos, variant_name_size); - pos += variant_name_size; + /// Read variant type in binary format. + const size_t variant_type_binary_data_size = unalignedLoad(pos); + pos += sizeof(variant_type_binary_data_size); + String variant_type_binary_data; + variant_type_binary_data.resize(variant_type_binary_data_size); + memcpy(variant_type_binary_data.data(), pos, variant_type_binary_data_size); + pos += variant_type_binary_data_size; + auto variant_type = decodeDataType(variant_type_binary_data); + auto variant_name = variant_type->getName(); /// If we already have such variant, just deserialize it into corresponding variant column. auto it = variant_info.variant_name_to_discriminator.find(variant_name); if (it != variant_info.variant_name_to_discriminator.end()) @@ -537,7 +542,6 @@ const char * ColumnDynamic::deserializeAndInsertFromArena(const char * pos) } /// If we don't have such variant, add it. - auto variant_type = DataTypeFactory::instance().get(variant_name); if (likely(addNewVariant(variant_type))) { auto discr = variant_info.variant_name_to_discriminator[variant_name]; @@ -563,13 +567,13 @@ const char * ColumnDynamic::skipSerializedInArena(const char * pos) const if (null_bit) return pos; - const size_t variant_name_size = unalignedLoad(pos); - pos += sizeof(variant_name_size); - String variant_name; - variant_name.resize(variant_name_size); - memcpy(variant_name.data(), pos, variant_name_size); - pos += variant_name_size; - auto tmp_variant_column = DataTypeFactory::instance().get(variant_name)->createColumn(); + const size_t variant_type_binary_data_size = unalignedLoad(pos); + pos += sizeof(variant_type_binary_data_size); + String variant_type_binary_data; + variant_type_binary_data.resize(variant_type_binary_data_size); + memcpy(variant_type_binary_data.data(), pos, variant_type_binary_data_size); + pos += variant_type_binary_data_size; + auto tmp_variant_column = decodeDataType(variant_type_binary_data)->createColumn(); return tmp_variant_column->skipSerializedInArena(pos); } diff --git a/src/Common/FieldBinaryEncoding.cpp b/src/Common/FieldBinaryEncoding.cpp new file mode 100644 index 00000000000..7e0d815368c --- /dev/null +++ b/src/Common/FieldBinaryEncoding.cpp @@ -0,0 +1,388 @@ +#include +#include +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int UNSUPPORTED_METHOD; +} + +namespace +{ + +enum class FieldBinaryTypeIndex: uint8_t +{ + Null = 0x00, + UInt64 = 0x01, + Int64 = 0x02, + UInt128 = 0x03, + Int128 = 0x04, + UInt256 = 0x05, + Int256 = 0x06, + Float64 = 0x07, + Decimal32 = 0x08, + Decimal64 = 0x09, + Decimal128 = 0x0A, + Decimal256 = 0x0B, + String = 0x0C, + Array = 0x0D, + Tuple = 0x0E, + Map = 0x0F, + IPv4 = 0x10, + IPv6 = 0x11, + UUID = 0x12, + Bool = 0x13, + Object = 0x14, + AggregateFunctionState = 0x15, + + NegativeInfinity = 0xFE, + PositiveInfinity = 0xFF, +}; + +class FieldVisitorEncodeBinary +{ +public: + void operator() (const Null & x, WriteBuffer & buf) const; + void operator() (const UInt64 & x, WriteBuffer & buf) const; + void operator() (const UInt128 & x, WriteBuffer & buf) const; + void operator() (const UInt256 & x, WriteBuffer & buf) const; + void operator() (const Int64 & x, WriteBuffer & buf) const; + void operator() (const Int128 & x, WriteBuffer & buf) const; + void operator() (const Int256 & x, WriteBuffer & buf) const; + void operator() (const UUID & x, WriteBuffer & buf) const; + void operator() (const IPv4 & x, WriteBuffer & buf) const; + void operator() (const IPv6 & x, WriteBuffer & buf) const; + void operator() (const Float64 & x, WriteBuffer & buf) const; + void operator() (const String & x, WriteBuffer & buf) const; + void operator() (const Array & x, WriteBuffer & buf) const; + void operator() (const Tuple & x, WriteBuffer & buf) const; + void operator() (const Map & x, WriteBuffer & buf) const; + void operator() (const Object & x, WriteBuffer & buf) const; + void operator() (const DecimalField & x, WriteBuffer & buf) const; + void operator() (const DecimalField & x, WriteBuffer & buf) const; + void operator() (const DecimalField & x, WriteBuffer & buf) const; + void operator() (const DecimalField & x, WriteBuffer & buf) const; + void operator() (const AggregateFunctionStateData & x, WriteBuffer & buf) const; + [[noreturn]] void operator() (const CustomType & x, WriteBuffer & buf) const; + void operator() (const bool & x, WriteBuffer & buf) const; +}; + +void FieldVisitorEncodeBinary::operator() (const Null & x, WriteBuffer & buf) const +{ + if (x.isNull()) + writeBinary(UInt8(FieldBinaryTypeIndex::Null), buf); + else if (x.isPositiveInfinity()) + writeBinary(UInt8(FieldBinaryTypeIndex::PositiveInfinity), buf); + else if (x.isNegativeInfinity()) + writeBinary(UInt8(FieldBinaryTypeIndex::NegativeInfinity), buf); +} + +void FieldVisitorEncodeBinary::operator() (const UInt64 & x, WriteBuffer & buf) const +{ + writeBinary(UInt8(FieldBinaryTypeIndex::UInt64), buf); + writeVarUInt(x, buf); +} + +void FieldVisitorEncodeBinary::operator() (const Int64 & x, WriteBuffer & buf) const +{ + writeBinary(UInt8(FieldBinaryTypeIndex::Int64), buf); + writeVarInt(x, buf); +} + +void FieldVisitorEncodeBinary::operator() (const Float64 & x, WriteBuffer & buf) const +{ + writeBinary(UInt8(FieldBinaryTypeIndex::Float64), buf); + writeBinaryLittleEndian(x, buf); +} + +void FieldVisitorEncodeBinary::operator() (const String & x, WriteBuffer & buf) const +{ + writeBinary(UInt8(FieldBinaryTypeIndex::String), buf); + writeStringBinary(x, buf); +} + +void FieldVisitorEncodeBinary::operator() (const UInt128 & x, WriteBuffer & buf) const +{ + writeBinary(UInt8(FieldBinaryTypeIndex::UInt128), buf); + writeBinaryLittleEndian(x, buf); +} + +void FieldVisitorEncodeBinary::operator() (const Int128 & x, WriteBuffer & buf) const +{ + writeBinary(UInt8(FieldBinaryTypeIndex::Int128), buf); + writeBinaryLittleEndian(x, buf); +} + +void FieldVisitorEncodeBinary::operator() (const UInt256 & x, WriteBuffer & buf) const +{ + writeBinary(UInt8(FieldBinaryTypeIndex::UInt256), buf); + writeBinaryLittleEndian(x, buf); +} + +void FieldVisitorEncodeBinary::operator() (const Int256 & x, WriteBuffer & buf) const +{ + writeBinary(UInt8(FieldBinaryTypeIndex::Int256), buf); + writeBinaryLittleEndian(x, buf); +} + +void FieldVisitorEncodeBinary::operator() (const UUID & x, WriteBuffer & buf) const +{ + writeBinary(UInt8(FieldBinaryTypeIndex::UUID), buf); + writeBinaryLittleEndian(x, buf); +} + +void FieldVisitorEncodeBinary::operator() (const IPv4 & x, WriteBuffer & buf) const +{ + writeBinary(UInt8(FieldBinaryTypeIndex::IPv4), buf); + writeBinaryLittleEndian(x, buf); +} + +void FieldVisitorEncodeBinary::operator() (const IPv6 & x, WriteBuffer & buf) const +{ + writeBinary(UInt8(FieldBinaryTypeIndex::IPv6), buf); + writeBinaryLittleEndian(x, buf); +} + +void FieldVisitorEncodeBinary::operator() (const DecimalField & x, WriteBuffer & buf) const +{ + writeBinary(UInt8(FieldBinaryTypeIndex::Decimal32), buf); + writeVarUInt(x.getScale(), buf); + writeBinaryLittleEndian(x.getValue(), buf); +} + +void FieldVisitorEncodeBinary::operator() (const DecimalField & x, WriteBuffer & buf) const +{ + writeBinary(UInt8(FieldBinaryTypeIndex::Decimal64), buf); + writeVarUInt(x.getScale(), buf); + writeBinaryLittleEndian(x.getValue(), buf); +} + +void FieldVisitorEncodeBinary::operator() (const DecimalField & x, WriteBuffer & buf) const +{ + writeBinary(UInt8(FieldBinaryTypeIndex::Decimal128), buf); + writeVarUInt(x.getScale(), buf); + writeBinaryLittleEndian(x.getValue(), buf); +} + +void FieldVisitorEncodeBinary::operator() (const DecimalField & x, WriteBuffer & buf) const +{ + writeBinary(UInt8(FieldBinaryTypeIndex::Decimal256), buf); + writeVarUInt(x.getScale(), buf); + writeBinaryLittleEndian(x.getValue(), buf); +} + +void FieldVisitorEncodeBinary::operator() (const AggregateFunctionStateData & x, WriteBuffer & buf) const +{ + writeBinary(UInt8(FieldBinaryTypeIndex::AggregateFunctionState), buf); + writeStringBinary(x.name, buf); + writeStringBinary(x.data, buf); +} + +void FieldVisitorEncodeBinary::operator() (const Array & x, WriteBuffer & buf) const +{ + writeBinary(UInt8(FieldBinaryTypeIndex::Array), buf); + size_t size = x.size(); + writeVarUInt(size, buf); + for (size_t i = 0; i < size; ++i) + Field::dispatch([&buf] (const auto & value) { FieldVisitorEncodeBinary()(value, buf); }, x[i]); +} + +void FieldVisitorEncodeBinary::operator() (const Tuple & x, WriteBuffer & buf) const +{ + writeBinary(UInt8(FieldBinaryTypeIndex::Tuple), buf); + size_t size = x.size(); + writeVarUInt(size, buf); + for (size_t i = 0; i < size; ++i) + Field::dispatch([&buf] (const auto & value) { FieldVisitorEncodeBinary()(value, buf); }, x[i]); +} + +void FieldVisitorEncodeBinary::operator() (const Map & x, WriteBuffer & buf) const +{ + writeBinary(UInt8(FieldBinaryTypeIndex::Map), buf); + size_t size = x.size(); + writeVarUInt(size, buf); + for (size_t i = 0; i < size; ++i) + { + const Tuple & key_and_value = x[i].get(); + Field::dispatch([&buf] (const auto & value) { FieldVisitorEncodeBinary()(value, buf); }, key_and_value[0]); + Field::dispatch([&buf] (const auto & value) { FieldVisitorEncodeBinary()(value, buf); }, key_and_value[1]); + } +} + +void FieldVisitorEncodeBinary::operator() (const Object & x, WriteBuffer & buf) const +{ + writeBinary(UInt8(FieldBinaryTypeIndex::Object), buf); + + size_t size = x.size(); + writeVarUInt(size, buf); + for (const auto & [key, value] : x) + { + writeStringBinary(key, buf); + Field::dispatch([&buf] (const auto & val) { FieldVisitorEncodeBinary()(val, buf); }, value); + } +} + +void FieldVisitorEncodeBinary::operator()(const bool & x, WriteBuffer & buf) const +{ + writeBinary(UInt8(FieldBinaryTypeIndex::Bool), buf); + writeBinary(static_cast(x), buf); +} + +[[noreturn]] void FieldVisitorEncodeBinary::operator()(const CustomType &, WriteBuffer &) const +{ + /// TODO: Support binary encoding/decoding for custom types somehow. + throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "Binary encoding of Field with custom type is not supported"); +} + +template +Field decodeBigInteger(ReadBuffer & buf) +{ + T value; + readBinaryLittleEndian(value, buf); + return value; +} + +template +DecimalField decodeDecimal(ReadBuffer & buf) +{ + UInt32 scale; + readVarUInt(scale, buf); + T value; + readBinaryLittleEndian(value, buf); + return DecimalField(value, scale); +} + +template +T decodeValueLittleEndian(ReadBuffer & buf) +{ + T value; + readBinaryLittleEndian(value, buf); + return value; +} + +template +T decodeArrayLikeField(ReadBuffer & buf) +{ + size_t size; + readVarUInt(size, buf); + T value; + for (size_t i = 0; i != size; ++i) + value.push_back(decodeField(buf)); + return value; +} + +} +void encodeField(const Field & x, WriteBuffer & buf) +{ + Field::dispatch([&buf] (const auto & val) { FieldVisitorEncodeBinary()(val, buf); }, x); +} + +Field decodeField(ReadBuffer & buf) +{ + UInt8 type; + readBinary(type, buf); + switch (FieldBinaryTypeIndex(type)) + { + case FieldBinaryTypeIndex::Null: + return Null(); + case FieldBinaryTypeIndex::PositiveInfinity: + return POSITIVE_INFINITY; + case FieldBinaryTypeIndex::NegativeInfinity: + return NEGATIVE_INFINITY; + case FieldBinaryTypeIndex::Int64: + { + Int64 value; + readVarInt(value, buf); + return value; + } + case FieldBinaryTypeIndex::UInt64: + { + UInt64 value; + readVarUInt(value, buf); + return value; + } + case FieldBinaryTypeIndex::Int128: + return decodeBigInteger(buf); + case FieldBinaryTypeIndex::UInt128: + return decodeBigInteger(buf); + case FieldBinaryTypeIndex::Int256: + return decodeBigInteger(buf); + case FieldBinaryTypeIndex::UInt256: + return decodeBigInteger(buf); + case FieldBinaryTypeIndex::Float64: + return decodeValueLittleEndian(buf); + case FieldBinaryTypeIndex::Decimal32: + return decodeDecimal(buf); + case FieldBinaryTypeIndex::Decimal64: + return decodeDecimal(buf); + case FieldBinaryTypeIndex::Decimal128: + return decodeDecimal(buf); + case FieldBinaryTypeIndex::Decimal256: + return decodeDecimal(buf); + case FieldBinaryTypeIndex::String: + { + String value; + readStringBinary(value, buf); + return value; + } + case FieldBinaryTypeIndex::UUID: + return decodeValueLittleEndian(buf); + case FieldBinaryTypeIndex::IPv4: + return decodeValueLittleEndian(buf); + case FieldBinaryTypeIndex::IPv6: + return decodeValueLittleEndian(buf); + case FieldBinaryTypeIndex::Bool: + { + bool value; + readBinary(value, buf); + return value; + } + case FieldBinaryTypeIndex::Array: + return decodeArrayLikeField(buf); + case FieldBinaryTypeIndex::Tuple: + return decodeArrayLikeField(buf); + case FieldBinaryTypeIndex::Map: + { + size_t size; + readVarUInt(size, buf); + Map map; + for (size_t i = 0; i != size; ++i) + { + Tuple key_and_value; + key_and_value.push_back(decodeField(buf)); + key_and_value.push_back(decodeField(buf)); + map.push_back(key_and_value); + } + return map; + } + case FieldBinaryTypeIndex::Object: + { + size_t size; + readVarUInt(size, buf); + Object value; + for (size_t i = 0; i != size; ++i) + { + String name; + readStringBinary(name, buf); + value[name] = decodeField(buf); + } + return value; + } + case FieldBinaryTypeIndex::AggregateFunctionState: + { + String name; + readStringBinary(name, buf); + String data; + readStringBinary(data, buf); + return AggregateFunctionStateData{.name = name, .data = data}; + } + } + + throw Exception(ErrorCodes::INCORRECT_DATA, "Unknown Field type: {0:#04x}", UInt64(type)); +} + +} diff --git a/src/Common/FieldBinaryEncoding.h b/src/Common/FieldBinaryEncoding.h new file mode 100644 index 00000000000..aa6694cb03e --- /dev/null +++ b/src/Common/FieldBinaryEncoding.h @@ -0,0 +1,43 @@ +#pragma once + +#include + +namespace DB +{ + +/** +Binary encoding for Fields: +|--------------------------|--------------------------------------------------------------------------------------------------------------------------------| +| Field type | Binary encoding | +|--------------------------|--------------------------------------------------------------------------------------------------------------------------------| +| `Null` | `0x00` | +| `UInt64` | `0x01` | +| `Int64` | `0x02` | +| `UInt128` | `0x03` | +| `Int128` | `0x04` | +| `UInt128` | `0x05` | +| `Int128` | `0x06` | +| `Float64` | `0x07` | +| `Decimal32` | `0x08` | +| `Decimal64` | `0x09` | +| `Decimal128` | `0x0A` | +| `Decimal256` | `0x0B` | +| `String` | `0x0C` | +| `Array` | `0x0D...` | +| `Tuple` | `0x0E...` | +| `Map` | `0x0F...` | +| `IPv4` | `0x10` | +| `IPv6` | `0x11` | +| `UUID` | `0x12` | +| `Bool` | `0x13` | +| `Object` | `0x14...` | +| `AggregateFunctionState` | `0x15` | +| `Negative infinity` | `0xFE` | +| `Positive infinity` | `0xFF` | +|--------------------------|--------------------------------------------------------------------------------------------------------------------------------| +*/ + +void encodeField(const Field &, WriteBuffer & buf); +Field decodeField(ReadBuffer & buf); + +} diff --git a/src/Common/IntervalKind.h b/src/Common/IntervalKind.h index f8e1fe87276..d66ea6018d4 100644 --- a/src/Common/IntervalKind.h +++ b/src/Common/IntervalKind.h @@ -7,19 +7,20 @@ namespace DB /// Kind of a temporal interval. struct IntervalKind { + /// note: The order and numbers are important and used in binary encoding, append new interval kinds to the end of list. enum class Kind : uint8_t { - Nanosecond, - Microsecond, - Millisecond, - Second, - Minute, - Hour, - Day, - Week, - Month, - Quarter, - Year, + Nanosecond = 0x00, + Microsecond = 0x01, + Millisecond = 0x02, + Second = 0x03, + Minute = 0x04, + Hour = 0x05, + Day = 0x06, + Week = 0x07, + Month = 0x08, + Quarter = 0x09, + Year = 0x0A, }; Kind kind = Kind::Second; diff --git a/src/Core/Field.cpp b/src/Core/Field.cpp index 0e5b1bac000..fb820ad2b56 100644 --- a/src/Core/Field.cpp +++ b/src/Core/Field.cpp @@ -1,11 +1,12 @@ -#include -#include -#include -#include -#include -#include -#include #include +#include +#include +#include +#include +#include +#include +#include +#include #include #include #include @@ -21,6 +22,7 @@ namespace ErrorCodes extern const int CANNOT_RESTORE_FROM_FIELD_DUMP; extern const int DECIMAL_OVERFLOW; extern const int INCORRECT_DATA; + extern const int UNSUPPORTED_METHOD; } template diff --git a/src/Core/Settings.h b/src/Core/Settings.h index bda403b1b40..23c5d7fc1a2 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -1105,6 +1105,8 @@ class IColumn; M(Bool, input_format_tsv_crlf_end_of_line, false, "If it is set true, file function will read TSV format with \\r\\n instead of \\n.", 0) \ \ M(Bool, input_format_native_allow_types_conversion, true, "Allow data types conversion in Native input format", 0) \ + M(Bool, input_format_native_decode_types_in_binary_format, false, "Read data types in binary format instead of type names in Native input format", 0) \ + M(Bool, output_format_native_encode_types_in_binary_format, false, "Write data types in binary format instead of type names in Native output format", 0) \ \ M(DateTimeInputFormat, date_time_input_format, FormatSettings::DateTimeInputFormat::Basic, "Method to read DateTime from text input formats. Possible values: 'basic', 'best_effort' and 'best_effort_us'.", 0) \ M(DateTimeOutputFormat, date_time_output_format, FormatSettings::DateTimeOutputFormat::Simple, "Method to write DateTime to text output. Possible values: 'simple', 'iso', 'unix_timestamp'.", 0) \ @@ -1124,6 +1126,8 @@ class IColumn; M(Bool, input_format_avro_null_as_default, false, "For Avro/AvroConfluent format: insert default in case of null and non Nullable column", 0) \ M(UInt64, format_binary_max_string_size, 1_GiB, "The maximum allowed size for String in RowBinary format. It prevents allocating large amount of memory in case of corrupted data. 0 means there is no limit", 0) \ M(UInt64, format_binary_max_array_size, 1_GiB, "The maximum allowed size for Array in RowBinary format. It prevents allocating large amount of memory in case of corrupted data. 0 means there is no limit", 0) \ + M(Bool, input_format_binary_decode_types_in_binary_format, false, "Read data types in binary format instead of type names in RowBinaryWithNamesAndTypes input format", 0) \ + M(Bool, output_format_binary_encode_types_in_binary_format, false, "Write data types in binary format instead of type names in RowBinaryWithNamesAndTypes output format ", 0) \ M(URI, format_avro_schema_registry_url, "", "For AvroConfluent format: Confluent Schema Registry URL.", 0) \ \ M(Bool, output_format_json_quote_64bit_integers, true, "Controls quoting of 64-bit integers in JSON output format.", 0) \ diff --git a/src/Core/tests/gtest_fields_binary_enciding.cpp b/src/Core/tests/gtest_fields_binary_enciding.cpp new file mode 100644 index 00000000000..087caf746bb --- /dev/null +++ b/src/Core/tests/gtest_fields_binary_enciding.cpp @@ -0,0 +1,65 @@ +#include +#include +#include +#include + +using namespace DB; + +namespace DB::ErrorCodes +{ + extern const int UNSUPPORTED_METHOD; +} + + +void check(const Field & field) +{ +// std::cerr << "Check " << toString(field) << "\n"; + WriteBufferFromOwnString ostr; + encodeField(field, ostr); + ReadBufferFromString istr(ostr.str()); + Field decoded_field = decodeField(istr); + ASSERT_TRUE(istr.eof()); + ASSERT_EQ(field, decoded_field); +} + +GTEST_TEST(FieldBinaryEncoding, EncodeAndDecode) +{ + check(Null()); + check(POSITIVE_INFINITY); + check(NEGATIVE_INFINITY); + check(true); + check(UInt64(42)); + check(Int64(-42)); + check(UInt128(42)); + check(Int128(-42)); + check(UInt256(42)); + check(Int256(-42)); + check(UUID(42)); + check(IPv4(42)); + check(IPv6(42)); + check(Float64(42.42)); + check(String("Hello, World!")); + check(Array({Field(UInt64(42)), Field(UInt64(43))})); + check(Tuple({Field(UInt64(42)), Field(Null()), Field(UUID(42)), Field(String("Hello, World!"))})); + check(Map({Tuple{Field(UInt64(42)), Field(String("str_42"))}, Tuple{Field(UInt64(43)), Field(String("str_43"))}})); + check(Object({{String("key_1"), Field(UInt64(42))}, {String("key_2"), Field(UInt64(43))}})); + check(DecimalField(4242, 3)); + check(DecimalField(4242, 3)); + check(DecimalField(Int128(4242), 3)); + check(DecimalField(Int256(4242), 3)); + check(AggregateFunctionStateData{.name="some_name", .data="some_data"}); + try + { + check(CustomType()); + } + catch (const Exception & e) + { + ASSERT_EQ(e.code(), ErrorCodes::UNSUPPORTED_METHOD); + } + + check(Array({ + Tuple({Field(UInt64(42)), Map({Tuple{Field(UInt64(42)), Field(String("str_42"))}, Tuple{Field(UInt64(43)), Field(String("str_43"))}}), Field(UUID(42)), Field(String("Hello, World!"))}), + Tuple({Field(UInt64(43)), Map({Tuple{Field(UInt64(43)), Field(String("str_43"))}, Tuple{Field(UInt64(44)), Field(String("str_44"))}}), Field(UUID(43)), Field(String("Hello, World 2!"))}) + })); +} + diff --git a/src/DataTypes/DataTypeAggregateFunction.h b/src/DataTypes/DataTypeAggregateFunction.h index 8b4b3d6ee4c..52ed151107e 100644 --- a/src/DataTypes/DataTypeAggregateFunction.h +++ b/src/DataTypes/DataTypeAggregateFunction.h @@ -25,7 +25,6 @@ private: mutable std::optional version; String getNameImpl(bool with_version) const; - size_t getVersion() const; public: static constexpr bool is_parametric = true; @@ -39,6 +38,8 @@ public: { } + size_t getVersion() const; + String getFunctionName() const; AggregateFunctionPtr getFunction() const { return function; } diff --git a/src/DataTypes/DataTypeCustomSimpleAggregateFunction.cpp b/src/DataTypes/DataTypeCustomSimpleAggregateFunction.cpp index cae9622bcb9..d3b3adc4965 100644 --- a/src/DataTypes/DataTypeCustomSimpleAggregateFunction.cpp +++ b/src/DataTypes/DataTypeCustomSimpleAggregateFunction.cpp @@ -165,6 +165,19 @@ static std::pair create(const ASTPtr & argum return std::make_pair(storage_type, std::make_unique(std::move(custom_name), nullptr)); } +String DataTypeCustomSimpleAggregateFunction::getFunctionName() const +{ + return function->getName(); +} + +DataTypePtr createSimpleAggregateFunctionType(const AggregateFunctionPtr & function, const DataTypes & argument_types, const Array & parameters) +{ + auto custom_desc = std::make_unique( + std::make_unique(function, argument_types, parameters)); + + return DataTypeFactory::instance().getCustom(std::move(custom_desc)); +} + void registerDataTypeDomainSimpleAggregateFunction(DataTypeFactory & factory) { factory.registerDataTypeCustom("SimpleAggregateFunction", create); diff --git a/src/DataTypes/DataTypeCustomSimpleAggregateFunction.h b/src/DataTypes/DataTypeCustomSimpleAggregateFunction.h index bdabb465fe5..303da86979a 100644 --- a/src/DataTypes/DataTypeCustomSimpleAggregateFunction.h +++ b/src/DataTypes/DataTypeCustomSimpleAggregateFunction.h @@ -40,8 +40,13 @@ public: : function(function_), argument_types(argument_types_), parameters(parameters_) {} AggregateFunctionPtr getFunction() const { return function; } + String getFunctionName() const; + const DataTypes & getArgumentsDataTypes() const { return argument_types; } + const Array & getParameters() const { return parameters; } String getName() const override; static void checkSupportedFunctions(const AggregateFunctionPtr & function); }; +DataTypePtr createSimpleAggregateFunctionType(const AggregateFunctionPtr & function, const DataTypes & argument_types, const Array & parameters); + } diff --git a/src/DataTypes/DataTypeNested.h b/src/DataTypes/DataTypeNested.h index 1ad06477a6e..102e6c293cc 100644 --- a/src/DataTypes/DataTypeNested.h +++ b/src/DataTypes/DataTypeNested.h @@ -19,6 +19,8 @@ public: } String getName() const override; + const DataTypes & getElements() const { return elems; } + const Names & getNames() const { return names; } }; DataTypePtr createNested(const DataTypes & types, const Names & names); diff --git a/src/DataTypes/DataTypesBinaryEncoding.cpp b/src/DataTypes/DataTypesBinaryEncoding.cpp new file mode 100644 index 00000000000..4c42f650798 --- /dev/null +++ b/src/DataTypes/DataTypesBinaryEncoding.cpp @@ -0,0 +1,677 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int UNSUPPORTED_METHOD; + extern const int UNKNOWN_TYPE; +} + +namespace +{ + +enum class BinaryTypeIndex : uint8_t +{ + Nothing = 0x00, + UInt8 = 0x01, + UInt16 = 0x02, + UInt32 = 0x03, + UInt64 = 0x04, + UInt128 = 0x05, + UInt256 = 0x06, + Int8 = 0x07, + Int16 = 0x08, + Int32 = 0x09, + Int64 = 0x0A, + Int128 = 0x0B, + Int256 = 0x0C, + Float32 = 0x0D, + Float64 = 0x0E, + Date = 0x0F, + Date32 = 0x10, + DateTime = 0x11, + DateTime64 = 0x12, + String = 0x13, + FixedString = 0x14, + Enum8 = 0x15, + Enum16 = 0x16, + Decimal32 = 0x17, + Decimal64 = 0x18, + Decimal128 = 0x19, + Decimal256 = 0x1A, + UUID = 0x1B, + Array = 0x1C, + UnnamedTuple = 0x1D, + NamedTuple = 0x1E, + Set = 0x1F, + Interval = 0x20, + Nullable = 0x21, + Function = 0x22, + AggregateFunction = 0x23, + LowCardinality = 0x24, + Map = 0x25, + Object = 0x26, + IPv4 = 0x27, + IPv6 = 0x28, + Variant = 0x29, + Dynamic = 0x2A, + Custom = 0x2B, + Bool = 0x2C, + SimpleAggregateFunction = 0x2D, + Nested = 0x2E, +}; + +BinaryTypeIndex getBinaryTypeIndex(const DataTypePtr & type) +{ + /// By default custom types don't have their own BinaryTypeIndex. + if (type->hasCustomName()) + { + /// Some widely used custom types have separate BinaryTypeIndex for better serialization. + /// Right now it's Bool, SimpleAggregateFunction and Nested types. + /// TODO: Consider adding BinaryTypeIndex for more custom types. + + if (isBool(type)) + return BinaryTypeIndex::Bool; + + if (typeid_cast(type->getCustomName())) + return BinaryTypeIndex::SimpleAggregateFunction; + + if (isNested(type)) + return BinaryTypeIndex::Nested; + + return BinaryTypeIndex::Custom; + } + + switch (type->getTypeId()) + { + case TypeIndex::Nothing: + return BinaryTypeIndex::Nothing; + case TypeIndex::UInt8: + return BinaryTypeIndex::UInt8; + case TypeIndex::UInt16: + return BinaryTypeIndex::UInt16; + case TypeIndex::UInt32: + return BinaryTypeIndex::UInt32; + case TypeIndex::UInt64: + return BinaryTypeIndex::UInt64; + case TypeIndex::UInt128: + return BinaryTypeIndex::UInt128; + case TypeIndex::UInt256: + return BinaryTypeIndex::UInt256; + case TypeIndex::Int8: + return BinaryTypeIndex::Int8; + case TypeIndex::Int16: + return BinaryTypeIndex::Int16; + case TypeIndex::Int32: + return BinaryTypeIndex::Int32; + case TypeIndex::Int64: + return BinaryTypeIndex::Int64; + case TypeIndex::Int128: + return BinaryTypeIndex::Int128; + case TypeIndex::Int256: + return BinaryTypeIndex::Int256; + case TypeIndex::Float32: + return BinaryTypeIndex::Float32; + case TypeIndex::Float64: + return BinaryTypeIndex::Float64; + case TypeIndex::Date: + return BinaryTypeIndex::Date; + case TypeIndex::Date32: + return BinaryTypeIndex::Date32; + case TypeIndex::DateTime: + return BinaryTypeIndex::DateTime; + case TypeIndex::DateTime64: + return BinaryTypeIndex::DateTime64; + case TypeIndex::String: + return BinaryTypeIndex::String; + case TypeIndex::FixedString: + return BinaryTypeIndex::FixedString; + case TypeIndex::Enum8: + return BinaryTypeIndex::Enum8; + case TypeIndex::Enum16: + return BinaryTypeIndex::Enum16; + case TypeIndex::Decimal32: + return BinaryTypeIndex::Decimal32; + case TypeIndex::Decimal64: + return BinaryTypeIndex::Decimal64; + case TypeIndex::Decimal128: + return BinaryTypeIndex::Decimal128; + case TypeIndex::Decimal256: + return BinaryTypeIndex::Decimal256; + case TypeIndex::UUID: + return BinaryTypeIndex::UUID; + case TypeIndex::Array: + return BinaryTypeIndex::Array; + case TypeIndex::Tuple: + { + const auto & tuple_type = assert_cast(*type); + if (tuple_type.haveExplicitNames()) + return BinaryTypeIndex::NamedTuple; + return BinaryTypeIndex::UnnamedTuple; + } + case TypeIndex::Set: + return BinaryTypeIndex::Set; + case TypeIndex::Interval: + return BinaryTypeIndex::Interval; + case TypeIndex::Nullable: + return BinaryTypeIndex::Nullable; + case TypeIndex::Function: + return BinaryTypeIndex::Function; + case TypeIndex::AggregateFunction: + return BinaryTypeIndex::AggregateFunction; + case TypeIndex::LowCardinality: + return BinaryTypeIndex::LowCardinality; + case TypeIndex::Map: + return BinaryTypeIndex::Map; + case TypeIndex::Object: + return BinaryTypeIndex::Object; + case TypeIndex::IPv4: + return BinaryTypeIndex::IPv4; + case TypeIndex::IPv6: + return BinaryTypeIndex::IPv6; + case TypeIndex::Variant: + return BinaryTypeIndex::Variant; + case TypeIndex::Dynamic: + return BinaryTypeIndex::Dynamic; + /// JSONPaths is used only during schema inference and cannot be used anywhere else. + case TypeIndex::JSONPaths: + throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "Binary encoding of type JSONPaths is not supported"); + } +} + +template +void encodeEnumValues(const DataTypePtr & type, WriteBuffer & buf) +{ + const auto & enum_type = assert_cast &>(*type); + const auto & values = enum_type.getValues(); + writeVarUInt(values.size(), buf); + for (const auto & [name, value] : values) + { + writeStringBinary(name, buf); + writeBinaryLittleEndian(value, buf); + } +} + +template +DataTypePtr decodeEnum(ReadBuffer & buf) +{ + typename DataTypeEnum::Values values; + size_t size; + readVarUInt(size, buf); + for (size_t i = 0; i != size; ++i) + { + String name; + readStringBinary(name, buf); + T value; + readBinaryLittleEndian(value, buf); + values.emplace_back(name, value); + } + + return std::make_shared>(values); +} + +template +void encodeDecimal(const DataTypePtr & type, WriteBuffer & buf) +{ + const auto & decimal_type = assert_cast &>(*type); + /// Both precision and scale should be less than 76, so we can decode it in 1 byte. + writeBinary(UInt8(decimal_type.getPrecision()), buf); + writeBinary(UInt8(decimal_type.getScale()), buf); +} + +template +DataTypePtr decodeDecimal(ReadBuffer & buf) +{ + UInt8 precision; + readBinary(precision, buf); + UInt8 scale; + readBinary(scale, buf); + return std::make_shared>(precision, scale); +} + +void encodeAggregateFunction(const String & function_name, const Array & parameters, const DataTypes & arguments_types, WriteBuffer & buf) +{ + writeStringBinary(function_name, buf); + writeVarUInt(parameters.size(), buf); + for (const auto & param : parameters) + encodeField(param, buf); + writeVarUInt(arguments_types.size(), buf); + for (const auto & argument_type : arguments_types) + encodeDataType(argument_type, buf); +} + +std::tuple decodeAggregateFunction(ReadBuffer & buf) +{ + String function_name; + readStringBinary(function_name, buf); + size_t num_parameters; + readVarUInt(num_parameters, buf); + Array parameters; + parameters.reserve(num_parameters); + for (size_t i = 0; i != num_parameters; ++i) + parameters.push_back(decodeField(buf)); + size_t num_arguments; + readVarUInt(num_arguments, buf); + DataTypes arguments_types; + arguments_types.reserve(num_arguments); + for (size_t i = 0; i != num_arguments; ++i) + arguments_types.push_back(decodeDataType(buf)); + AggregateFunctionProperties properties; + auto action = NullsAction::EMPTY; + auto function = AggregateFunctionFactory::instance().get(function_name, action, arguments_types, parameters, properties); + return {function, parameters, arguments_types}; +} + +} + +void encodeDataType(const DataTypePtr & type, WriteBuffer & buf) +{ + /// First, write the BinaryTypeIndex byte. + auto binary_type_index = getBinaryTypeIndex(type); + buf.write(UInt8(binary_type_index)); + /// Then, write additional information depending on the data type. + switch (binary_type_index) + { + case BinaryTypeIndex::DateTime64: + { + const auto & datetime64_type = assert_cast(*type); + /// Maximum scale for DateTime64 is 9, so we can write it as 1 byte. + buf.write(UInt8(datetime64_type.getScale())); + break; + } + case BinaryTypeIndex::FixedString: + { + const auto & fixed_string_type = assert_cast(*type); + writeVarUInt(fixed_string_type.getN(), buf); + break; + } + case BinaryTypeIndex::Enum8: + { + encodeEnumValues(type, buf); + break; + } + case BinaryTypeIndex::Enum16: + { + encodeEnumValues(type, buf); + break; + } + case BinaryTypeIndex::Decimal32: + { + encodeDecimal(type, buf); + break; + } + case BinaryTypeIndex::Decimal64: + { + encodeDecimal(type, buf); + break; + } + case BinaryTypeIndex::Decimal128: + { + encodeDecimal(type, buf); + break; + } + case BinaryTypeIndex::Decimal256: + { + encodeDecimal(type, buf); + break; + } + case BinaryTypeIndex::Array: + { + const auto & array_type = assert_cast(*type); + encodeDataType(array_type.getNestedType(), buf); + break; + } + case BinaryTypeIndex::NamedTuple: + { + const auto & tuple_type = assert_cast(*type); + const auto & types = tuple_type.getElements(); + const auto & names = tuple_type.getElementNames(); + writeVarUInt(types.size(), buf); + for (size_t i = 0; i != types.size(); ++i) + { + writeStringBinary(names[i], buf); + encodeDataType(types[i], buf); + } + break; + } + case BinaryTypeIndex::UnnamedTuple: + { + const auto & tuple_type = assert_cast(*type); + const auto & types = tuple_type.getElements(); + writeVarUInt(types.size(), buf); + for (size_t i = 0; i != types.size(); ++i) + encodeDataType(types[i], buf); + break; + } + case BinaryTypeIndex::Interval: + { + const auto & interval_type = assert_cast(*type); + writeBinary(UInt8(interval_type.getKind().kind), buf); + break; + } + case BinaryTypeIndex::Nullable: + { + const auto & nullable_type = assert_cast(*type); + encodeDataType(nullable_type.getNestedType(), buf); + break; + } + case BinaryTypeIndex::Function: + { + const auto & function_type = assert_cast(*type); + const auto & arguments_types = function_type.getArgumentTypes(); + const auto & return_type = function_type.getReturnType(); + writeVarUInt(arguments_types.size(), buf); + for (const auto & argument_type : arguments_types) + encodeDataType(argument_type, buf); + encodeDataType(return_type, buf); + break; + } + case BinaryTypeIndex::LowCardinality: + { + const auto & low_cardinality_type = assert_cast(*type); + encodeDataType(low_cardinality_type.getDictionaryType(), buf); + break; + } + case BinaryTypeIndex::Map: + { + const auto & map_type = assert_cast(*type); + encodeDataType(map_type.getKeyType(), buf); + encodeDataType(map_type.getValueType(), buf); + break; + } + case BinaryTypeIndex::Object: + { + const auto & object_deprecated_type = assert_cast(*type); + writeBinary(object_deprecated_type.hasNullableSubcolumns(), buf); + writeStringBinary(object_deprecated_type.getSchemaFormat(), buf); + break; + } + case BinaryTypeIndex::Variant: + { + const auto & variant_type = assert_cast(*type); + const auto & variants = variant_type.getVariants(); + writeVarUInt(variants.size(), buf); + for (const auto & variant : variants) + encodeDataType(variant, buf); + break; + } + case BinaryTypeIndex::AggregateFunction: + { + const auto & aggregate_function_type = assert_cast(*type); + writeVarUInt(aggregate_function_type.getVersion(), buf); + encodeAggregateFunction(aggregate_function_type.getFunctionName(), aggregate_function_type.getParameters(), aggregate_function_type.getArgumentsDataTypes(), buf); + break; + } + case BinaryTypeIndex::SimpleAggregateFunction: + { + const auto & simple_aggregate_function_type = assert_cast(*type->getCustomName()); + encodeAggregateFunction(simple_aggregate_function_type.getFunctionName(), simple_aggregate_function_type.getParameters(), simple_aggregate_function_type.getArgumentsDataTypes(), buf); + break; + } + case BinaryTypeIndex::Nested: + { + const auto & nested_type = assert_cast(*type->getCustomName()); + const auto & elements = nested_type.getElements(); + const auto & names = nested_type.getNames(); + writeVarUInt(elements.size(), buf); + for (size_t i = 0; i != elements.size(); ++i) + { + writeStringBinary(names[i], buf); + encodeDataType(elements[i], buf); + } + break; + } + case BinaryTypeIndex::Custom: + { + const auto & type_name = type->getName(); + writeStringBinary(type_name, buf); + break; + } + default: + break; + } +} + +String encodeDataType(const DataTypePtr & type) +{ + WriteBufferFromOwnString buf; + encodeDataType(type, buf); + return buf.str(); +} + +DataTypePtr decodeDataType(ReadBuffer & buf) +{ + UInt8 type; + readBinary(type, buf); + switch (BinaryTypeIndex(type)) + { + case BinaryTypeIndex::Nothing: + return std::make_shared(); + case BinaryTypeIndex::UInt8: + return std::make_shared(); + case BinaryTypeIndex::Bool: + return DataTypeFactory::instance().get("Bool"); + case BinaryTypeIndex::UInt16: + return std::make_shared(); + case BinaryTypeIndex::UInt32: + return std::make_shared(); + case BinaryTypeIndex::UInt64: + return std::make_shared(); + case BinaryTypeIndex::UInt128: + return std::make_shared(); + case BinaryTypeIndex::UInt256: + return std::make_shared(); + case BinaryTypeIndex::Int8: + return std::make_shared(); + case BinaryTypeIndex::Int16: + return std::make_shared(); + case BinaryTypeIndex::Int32: + return std::make_shared(); + case BinaryTypeIndex::Int64: + return std::make_shared(); + case BinaryTypeIndex::Int128: + return std::make_shared(); + case BinaryTypeIndex::Int256: + return std::make_shared(); + case BinaryTypeIndex::Float32: + return std::make_shared(); + case BinaryTypeIndex::Float64: + return std::make_shared(); + case BinaryTypeIndex::Date: + return std::make_shared(); + case BinaryTypeIndex::Date32: + return std::make_shared(); + case BinaryTypeIndex::DateTime: + return std::make_shared(); + case BinaryTypeIndex::DateTime64: + { + UInt8 scale; + readBinary(scale, buf); + return std::make_shared(scale); + } + case BinaryTypeIndex::String: + return std::make_shared(); + case BinaryTypeIndex::FixedString: + { + UInt64 size; + readVarUInt(size, buf); + return std::make_shared(size); + } + case BinaryTypeIndex::Enum8: + return decodeEnum(buf); + case BinaryTypeIndex::Enum16: + return decodeEnum(buf); + case BinaryTypeIndex::Decimal32: + return decodeDecimal(buf); + case BinaryTypeIndex::Decimal64: + return decodeDecimal(buf); + case BinaryTypeIndex::Decimal128: + return decodeDecimal(buf); + case BinaryTypeIndex::Decimal256: + return decodeDecimal(buf); + case BinaryTypeIndex::UUID: + return std::make_shared(); + case BinaryTypeIndex::Array: + return std::make_shared(decodeDataType(buf)); + case BinaryTypeIndex::NamedTuple: + { + size_t size; + readVarUInt(size, buf); + DataTypes elements; + elements.reserve(size); + Names names; + names.reserve(size); + for (size_t i = 0; i != size; ++i) + { + names.emplace_back(); + readStringBinary(names.back(), buf); + elements.push_back(decodeDataType(buf)); + } + + return std::make_shared(elements, names); + } + case BinaryTypeIndex::UnnamedTuple: + { + size_t size; + readVarUInt(size, buf); + DataTypes elements; + elements.reserve(size); + for (size_t i = 0; i != size; ++i) + elements.push_back(decodeDataType(buf)); + return std::make_shared(elements); + } + case BinaryTypeIndex::Set: + return std::make_shared(); + case BinaryTypeIndex::Interval: + { + UInt8 kind; + readBinary(kind, buf); + return std::make_shared(IntervalKind(IntervalKind::Kind(kind))); + } + case BinaryTypeIndex::Nullable: + return std::make_shared(decodeDataType(buf)); + case BinaryTypeIndex::Function: + { + size_t arguments_size; + readVarUInt(arguments_size, buf); + DataTypes arguments; + arguments.reserve(arguments_size); + for (size_t i = 0; i != arguments_size; ++i) + arguments.push_back(decodeDataType(buf)); + auto return_type = decodeDataType(buf); + return std::make_shared(arguments, return_type); + } + case BinaryTypeIndex::LowCardinality: + return std::make_shared(decodeDataType(buf)); + case BinaryTypeIndex::Map: + { + auto key_type = decodeDataType(buf); + auto value_type = decodeDataType(buf); + return std::make_shared(key_type, value_type); + } + case BinaryTypeIndex::Object: + { + bool has_nullable_subcolumns; + readBinary(has_nullable_subcolumns, buf); + String schema_format; + readStringBinary(schema_format, buf); + return std::make_shared(schema_format, has_nullable_subcolumns); + } + case BinaryTypeIndex::IPv4: + return std::make_shared(); + case BinaryTypeIndex::IPv6: + return std::make_shared(); + case BinaryTypeIndex::Variant: + { + size_t size; + readVarUInt(size, buf); + DataTypes variants; + variants.reserve(size); + for (size_t i = 0; i != size; ++i) + variants.push_back(decodeDataType(buf)); + return std::make_shared(variants); + } + case BinaryTypeIndex::Dynamic: + return std::make_shared(); + case BinaryTypeIndex::AggregateFunction: + { + size_t version; + readVarUInt(version, buf); + const auto & [function, parameters, arguments_types] = decodeAggregateFunction(buf); + return std::make_shared(function, arguments_types, parameters, version); + } + case BinaryTypeIndex::SimpleAggregateFunction: + { + const auto & [function, parameters, arguments_types] = decodeAggregateFunction(buf); + return createSimpleAggregateFunctionType(function, arguments_types, parameters); + } + case BinaryTypeIndex::Nested: + { + size_t size; + readVarUInt(size, buf); + Names names; + names.reserve(size); + DataTypes elements; + elements.reserve(size); + for (size_t i = 0; i != size; ++i) + { + names.emplace_back(); + readStringBinary(names.back(), buf); + elements.push_back(decodeDataType(buf)); + } + + return createNested(elements, names); + } + case BinaryTypeIndex::Custom: + { + String type_name; + readStringBinary(type_name, buf); + return DataTypeFactory::instance().get(type_name); + } + } + + throw Exception(ErrorCodes::UNKNOWN_TYPE, "Unknown type code: {0:#04x}", UInt64(type)); +} + +DataTypePtr decodeDataType(const String & data) +{ + ReadBufferFromString buf(data); + return decodeDataType(buf); +} + +} diff --git a/src/DataTypes/DataTypesBinaryEncoding.h b/src/DataTypes/DataTypesBinaryEncoding.h new file mode 100644 index 00000000000..b4ed500f185 --- /dev/null +++ b/src/DataTypes/DataTypesBinaryEncoding.h @@ -0,0 +1,117 @@ +#pragma once + +#include + +namespace DB +{ + +/** + +Binary encoding for ClickHouse data types: +|--------------------------------------------------------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| ClickHouse data type | Binary encoding | +|--------------------------------------------------------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `Nothing` | `0x00` | +| `UInt8` | `0x01` | +| `UInt16` | `0x02` | +| `UInt32` | `0x03` | +| `UInt64` | `0x04` | +| `UInt128` | `0x05` | +| `UInt256` | `0x06` | +| `Int8` | `0x07` | +| `Int16` | `0x08` | +| `Int32` | `0x09` | +| `Int64` | `0x0A` | +| `Int128` | `0x0B` | +| `Int256` | `0x0C` | +| `Float32` | `0x0D` | +| `Float64` | `0x0E` | +| `Date` | `0x0F` | +| `Date32` | `0x10` | +| `DateTime` | `0x11` | +| `DateTime64(P)` | `0x12` | +| `String` | `0x13` | +| `FixedString(N)` | `0x14` | +| `Enum8` | `0x15...` | +| `Enum16` | `0x16...>` | +| `Decimal32(P, S)` | `0x17` | +| `Decimal64(P, S)` | `0x18` | +| `Decimal128(P, S)` | `0x19` | +| `Decimal256(P, S)` | `0x1A` | +| `UUID` | `0x1B` | +| `Array(T)` | `0x1C` | +| `Tuple(T1, ..., TN)` | `0x1D...` | +| `Tuple(name1 T1, ..., nameN TN)` | `0x1E...` | +| `Set` | `0x1F` | +| `Interval` | `0x20` | +| `Nullable(T)` | `0x21` | +| `Function` | `0x22...` | +| `AggregateFunction(function_name(param_1, ..., param_N), arg_T1, ..., arg_TN)` | `0x23......` | +| `LowCardinality(T)` | `0x24` | +| `Map(K, V)` | `0x25` | +| `Object('schema_format')` | `0x26` | +| `IPv4` | `0x27` | +| `IPv6` | `0x28` | +| `Variant(T1, ..., TN)` | `0x29...` | +| `Dynamic` | `0x2A` | +| `Custom type` (`Ring`, `Polygon`, etc) | `0x2B` | +| `Bool` | `0x2C` | +| `SimpleAggregateFunction(function_name(param_1, ..., param_N), arg_T1, ..., arg_TN)` | `0x2D......` | +| `Nested(name1 T1, ..., nameN TN)` | `0x2E...` | +|--------------------------------------------------------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| + +Interval kind binary encoding: +|---------------|-----------------| +| Interval kind | Binary encoding | +|---------------|-----------------| +| `Nanosecond` | `0x00` | +| `Microsecond` | `0x01` | +| `Millisecond` | `0x02` | +| `Second` | `0x03` | +| `Minute` | `0x04` | +| `Hour` | `0x05` | +| `Day` | `0x06` | +| `Week` | `0x07` | +| `Month` | `0x08` | +| `Quarter` | `0x09` | +| `Year` | `0x1A` | +|---------------|-----------------| + +Aggregate function parameter binary encoding (binary encoding of a Field, see src/Common/FieldBinaryEncoding.h): +|--------------------------|--------------------------------------------------------------------------------------------------------------------------------| +| Parameter type | Binary encoding | +|--------------------------|--------------------------------------------------------------------------------------------------------------------------------| +| `Null` | `0x00` | +| `UInt64` | `0x01` | +| `Int64` | `0x02` | +| `UInt128` | `0x03` | +| `Int128` | `0x04` | +| `UInt128` | `0x05` | +| `Int128` | `0x06` | +| `Float64` | `0x07` | +| `Decimal32` | `0x08` | +| `Decimal64` | `0x09` | +| `Decimal128` | `0x0A` | +| `Decimal256` | `0x0B` | +| `String` | `0x0C` | +| `Array` | `0x0D...` | +| `Tuple` | `0x0E...` | +| `Map` | `0x0F...` | +| `IPv4` | `0x10` | +| `IPv6` | `0x11` | +| `UUID` | `0x12` | +| `Bool` | `0x13` | +| `Object` | `0x14...` | +| `AggregateFunctionState` | `0x15` | +| `Negative infinity` | `0xFE` | +| `Positive infinity` | `0xFF` | +|--------------------------|--------------------------------------------------------------------------------------------------------------------------------| +*/ + +String encodeDataType(const DataTypePtr & type); +void encodeDataType(const DataTypePtr & type, WriteBuffer & buf); + +DataTypePtr decodeDataType(const String & data); +DataTypePtr decodeDataType(ReadBuffer & buf); + +} diff --git a/src/DataTypes/Serializations/ISerialization.h b/src/DataTypes/Serializations/ISerialization.h index 914ff9cf4a2..f1ccb13f97f 100644 --- a/src/DataTypes/Serializations/ISerialization.h +++ b/src/DataTypes/Serializations/ISerialization.h @@ -256,6 +256,9 @@ public: bool position_independent_encoding = true; + /// True if data type names should be serialized in binary encoding. + bool data_types_binary_encoding = false; + enum class DynamicStatisticsMode { NONE, /// Don't write statistics. @@ -275,6 +278,9 @@ public: bool position_independent_encoding = true; + /// True if data type names should be deserialized in binary encoding. + bool data_types_binary_encoding = false; + bool native_format = false; /// If not zero, may be used to avoid reallocations while reading column of String type. diff --git a/src/DataTypes/Serializations/SerializationArray.cpp b/src/DataTypes/Serializations/SerializationArray.cpp index ac7b8f4d084..b7d43332085 100644 --- a/src/DataTypes/Serializations/SerializationArray.cpp +++ b/src/DataTypes/Serializations/SerializationArray.cpp @@ -42,13 +42,13 @@ void SerializationArray::deserializeBinary(Field & field, ReadBuffer & istr, con { size_t size; readVarUInt(size, istr); - if (settings.max_binary_array_size && size > settings.max_binary_array_size) + if (settings.binary.max_binary_string_size && size > settings.binary.max_binary_string_size) throw Exception( ErrorCodes::TOO_LARGE_ARRAY_SIZE, "Too large array size: {}. The maximum is: {}. To increase the maximum, use setting " "format_binary_max_array_size", size, - settings.max_binary_array_size); + settings.binary.max_binary_string_size); field = Array(); Array & arr = field.get(); @@ -82,13 +82,13 @@ void SerializationArray::deserializeBinary(IColumn & column, ReadBuffer & istr, size_t size; readVarUInt(size, istr); - if (settings.max_binary_array_size && size > settings.max_binary_array_size) + if (settings.binary.max_binary_string_size && size > settings.binary.max_binary_string_size) throw Exception( ErrorCodes::TOO_LARGE_ARRAY_SIZE, "Too large array size: {}. The maximum is: {}. To increase the maximum, use setting " "format_binary_max_array_size", size, - settings.max_binary_array_size); + settings.binary.max_binary_string_size); IColumn & nested_column = column_array.getData(); diff --git a/src/DataTypes/Serializations/SerializationDynamic.cpp b/src/DataTypes/Serializations/SerializationDynamic.cpp index 6351ff0ca0b..7609ffc91ca 100644 --- a/src/DataTypes/Serializations/SerializationDynamic.cpp +++ b/src/DataTypes/Serializations/SerializationDynamic.cpp @@ -4,6 +4,8 @@ #include #include #include +#include +#include #include #include @@ -109,7 +111,10 @@ void SerializationDynamic::serializeBinaryBulkStatePrefix( const auto & variant_column = column_dynamic.getVariantColumn(); /// Write internal Variant type name. - writeStringBinary(dynamic_state->variant_type->getName(), *stream); + if (settings.data_types_binary_encoding) + encodeDataType(dynamic_state->variant_type, *stream); + else + writeStringBinary(dynamic_state->variant_type->getName(), *stream); /// Write statistics in prefix if needed. if (settings.dynamic_write_statistics == SerializeBinaryBulkSettings::DynamicStatisticsMode::PREFIX) @@ -178,9 +183,16 @@ ISerialization::DeserializeBinaryBulkStatePtr SerializationDynamic::deserializeD readBinaryLittleEndian(structure_version, *structure_stream); auto structure_state = std::make_shared(structure_version); /// Read internal Variant type name. - String data_type_name; - readStringBinary(data_type_name, *structure_stream); - structure_state->variant_type = DataTypeFactory::instance().get(data_type_name); + if (settings.data_types_binary_encoding) + { + structure_state->variant_type = decodeDataType(*structure_stream); + } + else + { + String data_type_name; + readStringBinary(data_type_name, *structure_stream); + structure_state->variant_type = DataTypeFactory::instance().get(data_type_name); + } const auto * variant_type = typeid_cast(structure_state->variant_type.get()); if (!variant_type) throw Exception(ErrorCodes::INCORRECT_DATA, "Incorrect type of Dynamic nested column, expected Variant, got {}", structure_state->variant_type->getName()); @@ -280,33 +292,27 @@ void SerializationDynamic::deserializeBinaryBulkWithMultipleStreams( void SerializationDynamic::serializeBinary(const Field & field, WriteBuffer & ostr, const FormatSettings & settings) const { - UInt8 null_bit = field.isNull(); - writeBinary(null_bit, ostr); - if (null_bit) + /// Serialize NULL as Nothing type with no value. + if (field.isNull()) + { + encodeDataType(std::make_shared(), ostr); return; + } auto field_type = applyVisitor(FieldToDataType(), field); - auto field_type_name = field_type->getName(); - writeVarUInt(field_type_name.size(), ostr); - writeString(field_type_name, ostr); + encodeDataType(field_type, ostr); field_type->getDefaultSerialization()->serializeBinary(field, ostr, settings); } void SerializationDynamic::deserializeBinary(Field & field, ReadBuffer & istr, const FormatSettings & settings) const { - UInt8 null_bit; - readBinary(null_bit, istr); - if (null_bit) + auto field_type = decodeDataType(istr); + if (isNothing(field_type)) { field = Null(); return; } - size_t field_type_name_size; - readVarUInt(field_type_name_size, istr); - String field_type_name(field_type_name_size, 0); - istr.readStrict(field_type_name.data(), field_type_name_size); - auto field_type = DataTypeFactory::instance().get(field_type_name); field_type->getDefaultSerialization()->deserializeBinary(field, istr, settings); } @@ -317,15 +323,15 @@ void SerializationDynamic::serializeBinary(const IColumn & column, size_t row_nu const auto & variant_column = dynamic_column.getVariantColumn(); auto global_discr = variant_column.globalDiscriminatorAt(row_num); - UInt8 null_bit = global_discr == ColumnVariant::NULL_DISCRIMINATOR; - writeBinary(null_bit, ostr); - if (null_bit) + /// Serialize NULL as Nothing type with no value. + if (global_discr == ColumnVariant::NULL_DISCRIMINATOR) + { + encodeDataType(std::make_shared(), ostr); return; + } const auto & variant_type = assert_cast(*variant_info.variant_type).getVariant(global_discr); - const auto & variant_type_name = variant_info.variant_names[global_discr]; - writeVarUInt(variant_type_name.size(), ostr); - writeString(variant_type_name, ostr); + encodeDataType(variant_type, ostr); variant_type->getDefaultSerialization()->serializeBinary(variant_column.getVariantByGlobalDiscriminator(global_discr), variant_column.offsetAt(row_num), ostr, settings); } @@ -346,30 +352,23 @@ static void deserializeVariant( void SerializationDynamic::deserializeBinary(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const { auto & dynamic_column = assert_cast(column); - UInt8 null_bit; - readBinary(null_bit, istr); - if (null_bit) + auto variant_type = decodeDataType(istr); + if (isNothing(variant_type)) { dynamic_column.insertDefault(); return; } - size_t variant_type_name_size; - readVarUInt(variant_type_name_size, istr); - String variant_type_name(variant_type_name_size, 0); - istr.readStrict(variant_type_name.data(), variant_type_name_size); - + auto variant_type_name = variant_type->getName(); const auto & variant_info = dynamic_column.getVariantInfo(); auto it = variant_info.variant_name_to_discriminator.find(variant_type_name); if (it != variant_info.variant_name_to_discriminator.end()) { - const auto & variant_type = assert_cast(*variant_info.variant_type).getVariant(it->second); deserializeVariant(dynamic_column.getVariantColumn(), variant_type, it->second, istr, [&settings](const ISerialization & serialization, IColumn & variant, ReadBuffer & buf){ serialization.deserializeBinary(variant, buf, settings); }); return; } /// We don't have this variant yet. Let's try to add it. - auto variant_type = DataTypeFactory::instance().get(variant_type_name); if (dynamic_column.addNewVariant(variant_type)) { auto discr = variant_info.variant_name_to_discriminator.at(variant_type_name); diff --git a/src/DataTypes/Serializations/SerializationMap.cpp b/src/DataTypes/Serializations/SerializationMap.cpp index 70fe5182ade..0bef3c7d79d 100644 --- a/src/DataTypes/Serializations/SerializationMap.cpp +++ b/src/DataTypes/Serializations/SerializationMap.cpp @@ -55,13 +55,13 @@ void SerializationMap::deserializeBinary(Field & field, ReadBuffer & istr, const { size_t size; readVarUInt(size, istr); - if (settings.max_binary_array_size && size > settings.max_binary_array_size) + if (settings.binary.max_binary_string_size && size > settings.binary.max_binary_string_size) throw Exception( ErrorCodes::TOO_LARGE_ARRAY_SIZE, "Too large map size: {}. The maximum is: {}. To increase the maximum, use setting " "format_binary_max_array_size", size, - settings.max_binary_array_size); + settings.binary.max_binary_string_size); field = Map(); Map & map = field.get(); map.reserve(size); diff --git a/src/DataTypes/Serializations/SerializationString.cpp b/src/DataTypes/Serializations/SerializationString.cpp index 9e39ab23709..9e523d0d745 100644 --- a/src/DataTypes/Serializations/SerializationString.cpp +++ b/src/DataTypes/Serializations/SerializationString.cpp @@ -33,13 +33,13 @@ namespace ErrorCodes void SerializationString::serializeBinary(const Field & field, WriteBuffer & ostr, const FormatSettings & settings) const { const String & s = field.get(); - if (settings.max_binary_string_size && s.size() > settings.max_binary_string_size) + if (settings.binary.max_binary_string_size && s.size() > settings.binary.max_binary_string_size) throw Exception( ErrorCodes::TOO_LARGE_STRING_SIZE, "Too large string size: {}. The maximum is: {}. To increase the maximum, use setting " "format_binary_max_string_size", s.size(), - settings.max_binary_string_size); + settings.binary.max_binary_string_size); writeVarUInt(s.size(), ostr); writeString(s, ostr); @@ -50,13 +50,13 @@ void SerializationString::deserializeBinary(Field & field, ReadBuffer & istr, co { UInt64 size; readVarUInt(size, istr); - if (settings.max_binary_string_size && size > settings.max_binary_string_size) + if (settings.binary.max_binary_string_size && size > settings.binary.max_binary_string_size) throw Exception( ErrorCodes::TOO_LARGE_STRING_SIZE, "Too large string size: {}. The maximum is: {}. To increase the maximum, use setting " "format_binary_max_string_size", size, - settings.max_binary_string_size); + settings.binary.max_binary_string_size); field = String(); String & s = field.get(); @@ -68,13 +68,13 @@ void SerializationString::deserializeBinary(Field & field, ReadBuffer & istr, co void SerializationString::serializeBinary(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const { const StringRef & s = assert_cast(column).getDataAt(row_num); - if (settings.max_binary_string_size && s.size > settings.max_binary_string_size) + if (settings.binary.max_binary_string_size && s.size > settings.binary.max_binary_string_size) throw Exception( ErrorCodes::TOO_LARGE_STRING_SIZE, "Too large string size: {}. The maximum is: {}. To increase the maximum, use setting " "format_binary_max_string_size", s.size, - settings.max_binary_string_size); + settings.binary.max_binary_string_size); writeVarUInt(s.size, ostr); writeString(s, ostr); @@ -89,13 +89,13 @@ void SerializationString::deserializeBinary(IColumn & column, ReadBuffer & istr, UInt64 size; readVarUInt(size, istr); - if (settings.max_binary_string_size && size > settings.max_binary_string_size) + if (settings.binary.max_binary_string_size && size > settings.binary.max_binary_string_size) throw Exception( ErrorCodes::TOO_LARGE_STRING_SIZE, "Too large string size: {}. The maximum is: {}. To increase the maximum, use setting " "format_binary_max_string_size", size, - settings.max_binary_string_size); + settings.binary.max_binary_string_size); size_t old_chars_size = data.size(); size_t offset = old_chars_size + size + 1; diff --git a/src/DataTypes/Serializations/SerializationVariantElement.cpp b/src/DataTypes/Serializations/SerializationVariantElement.cpp index 1f9a81ac671..ec0b4019c2f 100644 --- a/src/DataTypes/Serializations/SerializationVariantElement.cpp +++ b/src/DataTypes/Serializations/SerializationVariantElement.cpp @@ -146,7 +146,7 @@ void SerializationVariantElement::deserializeBinaryBulkWithMultipleStreams( } /// If we started to read a new column, reinitialize variant column in deserialization state. - if (!variant_element_state->variant || result_column->empty()) + if (!variant_element_state->variant || mutable_column->empty()) { variant_element_state->variant = mutable_column->cloneEmpty(); diff --git a/src/DataTypes/fuzzers/data_type_deserialization_fuzzer.cpp b/src/DataTypes/fuzzers/data_type_deserialization_fuzzer.cpp index 0ae325871fb..033a6ea8a4a 100644 --- a/src/DataTypes/fuzzers/data_type_deserialization_fuzzer.cpp +++ b/src/DataTypes/fuzzers/data_type_deserialization_fuzzer.cpp @@ -72,8 +72,8 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t * data, size_t size) DataTypePtr type = DataTypeFactory::instance().get(data_type); FormatSettings settings; - settings.max_binary_string_size = 100; - settings.max_binary_array_size = 100; + settings.binary.max_binary_string_size = 100; + settings.binary.max_binary_string_size = 100; Field field; type->getDefaultSerialization()->deserializeBinary(field, in, settings); diff --git a/src/DataTypes/tests/gtest_data_types_binary_encoding.cpp b/src/DataTypes/tests/gtest_data_types_binary_encoding.cpp new file mode 100644 index 00000000000..4459e2558b6 --- /dev/null +++ b/src/DataTypes/tests/gtest_data_types_binary_encoding.cpp @@ -0,0 +1,123 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using namespace DB; + +namespace DB::ErrorCodes +{ +extern const int UNSUPPORTED_METHOD; +} + + +void check(const DataTypePtr & type) +{ +// std::cerr << "Check " << type->getName() << "\n"; + WriteBufferFromOwnString ostr; + encodeDataType(type, ostr); + ReadBufferFromString istr(ostr.str()); + DataTypePtr decoded_type = decodeDataType(istr); + ASSERT_TRUE(istr.eof()); + ASSERT_EQ(type->getName(), decoded_type->getName()); + ASSERT_TRUE(type->equals(*decoded_type)); +} + +GTEST_TEST(DataTypesBinaryEncoding, EncodeAndDecode) +{ + registerAggregateFunctions(); + check(std::make_shared()); + check(std::make_shared()); + check(std::make_shared()); + check(std::make_shared()); + check(std::make_shared()); + check(std::make_shared()); + check(std::make_shared()); + check(std::make_shared()); + check(std::make_shared()); + check(std::make_shared()); + check(std::make_shared()); + check(std::make_shared()); + check(std::make_shared()); + check(std::make_shared()); + check(std::make_shared()); + check(std::make_shared()); + check(std::make_shared()); + check(std::make_shared()); + check(std::make_shared(3)); + check(std::make_shared()); + check(std::make_shared(10)); + check(DataTypeFactory::instance().get("Enum8('a' = 1, 'b' = 2, 'c' = 3, 'd' = -128)")); + check(DataTypeFactory::instance().get("Enum16('a' = 1, 'b' = 2, 'c' = 3, 'd' = -1000)")); + check(std::make_shared(3, 6)); + check(std::make_shared(3, 6)); + check(std::make_shared(3, 6)); + check(std::make_shared(3, 6)); + check(std::make_shared()); + check(DataTypeFactory::instance().get("Array(UInt32)")); + check(DataTypeFactory::instance().get("Array(Array(Array(UInt32)))")); + check(DataTypeFactory::instance().get("Tuple(UInt32, String, UUID)")); + check(DataTypeFactory::instance().get("Tuple(UInt32, String, Tuple(UUID, Date, IPv4))")); + check(DataTypeFactory::instance().get("Tuple(c1 UInt32, c2 String, c3 UUID)")); + check(DataTypeFactory::instance().get("Tuple(c1 UInt32, c2 String, c3 Tuple(c4 UUID, c5 Date, c6 IPv4))")); + check(std::make_shared()); + check(std::make_shared(IntervalKind::Kind::Nanosecond)); + check(std::make_shared(IntervalKind::Kind::Microsecond)); + check(DataTypeFactory::instance().get("Nullable(UInt32)")); + check(DataTypeFactory::instance().get("Nullable(Nothing)")); + check(DataTypeFactory::instance().get("Nullable(UUID)")); + check(std::make_shared( + DataTypes{ + std::make_shared(), + std::make_shared(), + DataTypeFactory::instance().get("Array(Array(Array(UInt32)))")}, + DataTypeFactory::instance().get("Tuple(c1 UInt32, c2 String, c3 UUID)"))); + DataTypes argument_types = {std::make_shared()}; + Array parameters = {Field(0.1), Field(0.2)}; + AggregateFunctionProperties properties; + AggregateFunctionPtr function = AggregateFunctionFactory::instance().get("quantiles", NullsAction::EMPTY, argument_types, parameters, properties); + check(std::make_shared(function, argument_types, parameters)); + check(std::make_shared(function, argument_types, parameters, 2)); + check(DataTypeFactory::instance().get("AggregateFunction(sum, UInt64)")); + check(DataTypeFactory::instance().get("AggregateFunction(quantiles(0.5, 0.9), UInt64)")); + check(DataTypeFactory::instance().get("AggregateFunction(sequenceMatch('(?1)(?2)'), Date, UInt8, UInt8)")); + check(DataTypeFactory::instance().get("AggregateFunction(sumMapFiltered([1, 4, 8]), Array(UInt64), Array(UInt64))")); + check(DataTypeFactory::instance().get("LowCardinality(UInt32)")); + check(DataTypeFactory::instance().get("LowCardinality(Nullable(String))")); + check(DataTypeFactory::instance().get("Map(String, UInt32)")); + check(DataTypeFactory::instance().get("Map(String, Map(String, Map(String, UInt32)))")); + check(std::make_shared()); + check(std::make_shared()); + check(DataTypeFactory::instance().get("Variant(String, UInt32, Date32)")); + check(std::make_shared()); + check(DataTypeFactory::instance().get("Bool")); + check(DataTypeFactory::instance().get("SimpleAggregateFunction(sum, UInt64)")); + check(DataTypeFactory::instance().get("SimpleAggregateFunction(maxMap, Tuple(Array(UInt32), Array(UInt32)))")); + check(DataTypeFactory::instance().get("SimpleAggregateFunction(groupArrayArray(19), Array(UInt64))")); + check(DataTypeFactory::instance().get("Nested(a UInt32, b UInt32)")); + check(DataTypeFactory::instance().get("Nested(a UInt32, b Nested(c String, d Nested(e Date)))")); + check(DataTypeFactory::instance().get("Ring")); + check(DataTypeFactory::instance().get("Point")); + check(DataTypeFactory::instance().get("Polygon")); + check(DataTypeFactory::instance().get("MultiPolygon")); + check(DataTypeFactory::instance().get("Tuple(Map(LowCardinality(String), Array(AggregateFunction(2, quantiles(0.1, 0.2), Float32))), Array(Array(Tuple(UInt32, Tuple(a Map(String, String), b Nullable(Date), c Variant(Tuple(g String, d Array(UInt32)), Date, Map(String, String)))))))")); +} diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp index e9a405aa796..a51eb975180 100644 --- a/src/Formats/FormatFactory.cpp +++ b/src/Formats/FormatFactory.cpp @@ -265,9 +265,13 @@ FormatSettings getFormatSettings(const ContextPtr & context, const Settings & se format_settings.markdown.escape_special_characters = settings.output_format_markdown_escape_special_characters; format_settings.bson.output_string_as_string = settings.output_format_bson_string_as_string; format_settings.bson.skip_fields_with_unsupported_types_in_schema_inference = settings.input_format_bson_skip_fields_with_unsupported_types_in_schema_inference; - format_settings.max_binary_string_size = settings.format_binary_max_string_size; - format_settings.max_binary_array_size = settings.format_binary_max_array_size; + format_settings.binary.max_binary_string_size = settings.format_binary_max_string_size; + format_settings.binary.max_binary_array_size = settings.format_binary_max_array_size; + format_settings.binary.encode_types_in_binary_format = settings.output_format_binary_encode_types_in_binary_format; + format_settings.binary.decode_types_in_binary_format = settings.input_format_binary_decode_types_in_binary_format; format_settings.native.allow_types_conversion = settings.input_format_native_allow_types_conversion; + format_settings.native.encode_types_in_binary_format = settings.output_format_native_encode_types_in_binary_format; + format_settings.native.decode_types_in_binary_format = settings.input_format_native_decode_types_in_binary_format; format_settings.max_parser_depth = context->getSettingsRef().max_parser_depth; format_settings.client_protocol_version = context->getClientProtocolVersion(); format_settings.date_time_overflow_behavior = settings.date_time_overflow_behavior; diff --git a/src/Formats/FormatSettings.h b/src/Formats/FormatSettings.h index 421ed4d112d..69a0c64b3b0 100644 --- a/src/Formats/FormatSettings.h +++ b/src/Formats/FormatSettings.h @@ -106,8 +106,6 @@ struct FormatSettings UInt64 input_allow_errors_num = 0; Float32 input_allow_errors_ratio = 0; - UInt64 max_binary_string_size = 1_GiB; - UInt64 max_binary_array_size = 1_GiB; UInt64 client_protocol_version = 0; UInt64 max_parser_depth = DBMS_DEFAULT_MAX_PARSER_DEPTH; @@ -121,6 +119,14 @@ struct FormatSettings ZSTD }; + struct + { + UInt64 max_binary_string_size = 1_GiB; + UInt64 max_binary_array_size = 1_GiB; + bool encode_types_in_binary_format = false; + bool decode_types_in_binary_format = false; + } binary{}; + struct { UInt64 row_group_size = 1000000; @@ -454,6 +460,8 @@ struct FormatSettings struct { bool allow_types_conversion = true; + bool encode_types_in_binary_format = false; + bool decode_types_in_binary_format = false; } native{}; struct diff --git a/src/Formats/NativeReader.cpp b/src/Formats/NativeReader.cpp index fa5d41d6536..45be0402dc4 100644 --- a/src/Formats/NativeReader.cpp +++ b/src/Formats/NativeReader.cpp @@ -6,6 +6,7 @@ #include #include +#include #include #include @@ -31,8 +32,8 @@ namespace ErrorCodes } -NativeReader::NativeReader(ReadBuffer & istr_, UInt64 server_revision_) - : istr(istr_), server_revision(server_revision_) +NativeReader::NativeReader(ReadBuffer & istr_, UInt64 server_revision_, std::optional format_settings_) + : istr(istr_), server_revision(server_revision_), format_settings(format_settings_) { } @@ -40,16 +41,12 @@ NativeReader::NativeReader( ReadBuffer & istr_, const Block & header_, UInt64 server_revision_, - bool skip_unknown_columns_, - bool null_as_default_, - bool allow_types_conversion_, + std::optionalformat_settings_, BlockMissingValues * block_missing_values_) : istr(istr_) , header(header_) , server_revision(server_revision_) - , skip_unknown_columns(skip_unknown_columns_) - , null_as_default(null_as_default_) - , allow_types_conversion(allow_types_conversion_) + , format_settings(std::move(format_settings_)) , block_missing_values(block_missing_values_) { } @@ -83,13 +80,14 @@ void NativeReader::resetParser() use_index = false; } -void NativeReader::readData(const ISerialization & serialization, ColumnPtr & column, ReadBuffer & istr, size_t rows, double avg_value_size_hint) +static void readData(const ISerialization & serialization, ColumnPtr & column, ReadBuffer & istr, const std::optional & format_settings, size_t rows, double avg_value_size_hint) { ISerialization::DeserializeBinaryBulkSettings settings; settings.getter = [&](ISerialization::SubstreamPath) -> ReadBuffer * { return &istr; }; settings.avg_value_size_hint = avg_value_size_hint; settings.position_independent_encoding = false; settings.native_format = true; + settings.data_types_binary_encoding = format_settings && format_settings->native.decode_types_in_binary_format; ISerialization::DeserializeBinaryBulkStatePtr state; @@ -167,8 +165,16 @@ Block NativeReader::read() /// Type String type_name; - readBinary(type_name, istr); - column.type = data_type_factory.get(type_name); + if (format_settings && format_settings->native.decode_types_in_binary_format) + { + column.type = decodeDataType(istr); + type_name = column.type->getName(); + } + else + { + readBinary(type_name, istr); + column.type = data_type_factory.get(type_name); + } setVersionToAggregateFunctions(column.type, true, server_revision); @@ -203,7 +209,7 @@ Block NativeReader::read() double avg_value_size_hint = avg_value_size_hints.empty() ? 0 : avg_value_size_hints[i]; if (rows) /// If no rows, nothing to read. - readData(*serialization, read_column, istr, rows, avg_value_size_hint); + readData(*serialization, read_column, istr, format_settings, rows, avg_value_size_hint); column.column = std::move(read_column); @@ -214,12 +220,12 @@ Block NativeReader::read() { auto & header_column = header.getByName(column.name); - if (null_as_default) + if (format_settings && format_settings->null_as_default) insertNullAsDefaultIfNeeded(column, header_column, header.getPositionByName(column.name), block_missing_values); if (!header_column.type->equals(*column.type)) { - if (allow_types_conversion) + if (format_settings && format_settings->native.allow_types_conversion) { try { @@ -246,7 +252,7 @@ Block NativeReader::read() } else { - if (!skip_unknown_columns) + if (format_settings && !format_settings->skip_unknown_fields) throw Exception(ErrorCodes::INCORRECT_DATA, "Unknown column with name {} found while reading data in Native format", column.name); use_in_result = false; } diff --git a/src/Formats/NativeReader.h b/src/Formats/NativeReader.h index 3cec4afd997..97b6ea22b15 100644 --- a/src/Formats/NativeReader.h +++ b/src/Formats/NativeReader.h @@ -20,7 +20,7 @@ class NativeReader { public: /// If a non-zero server_revision is specified, additional block information may be expected and read. - NativeReader(ReadBuffer & istr_, UInt64 server_revision_); + NativeReader(ReadBuffer & istr_, UInt64 server_revision_, std::optional format_settings_ = std::nullopt); /// For cases when data structure (header) is known in advance. /// NOTE We may use header for data validation and/or type conversions. It is not implemented. @@ -28,9 +28,7 @@ public: ReadBuffer & istr_, const Block & header_, UInt64 server_revision_, - bool skip_unknown_columns_ = false, - bool null_as_default_ = false, - bool allow_types_conversion_ = false, + std::optional format_settings_ = std::nullopt, BlockMissingValues * block_missing_values_ = nullptr); /// For cases when we have an index. It allows to skip columns. Only columns specified in the index will be read. @@ -38,8 +36,6 @@ public: IndexForNativeFormat::Blocks::const_iterator index_block_it_, IndexForNativeFormat::Blocks::const_iterator index_block_end_); - static void readData(const ISerialization & serialization, ColumnPtr & column, ReadBuffer & istr, size_t rows, double avg_value_size_hint); - Block getHeader() const; void resetParser(); @@ -50,9 +46,7 @@ private: ReadBuffer & istr; Block header; UInt64 server_revision; - bool skip_unknown_columns = false; - bool null_as_default = false; - bool allow_types_conversion = false; + std::optional format_settings = std::nullopt; BlockMissingValues * block_missing_values = nullptr; bool use_index = false; diff --git a/src/Formats/NativeWriter.cpp b/src/Formats/NativeWriter.cpp index b150561a5fc..3c87e489b1c 100644 --- a/src/Formats/NativeWriter.cpp +++ b/src/Formats/NativeWriter.cpp @@ -14,6 +14,7 @@ #include #include #include +#include namespace DB { @@ -25,10 +26,20 @@ namespace ErrorCodes NativeWriter::NativeWriter( - WriteBuffer & ostr_, UInt64 client_revision_, const Block & header_, bool remove_low_cardinality_, - IndexForNativeFormat * index_, size_t initial_size_of_file_) - : ostr(ostr_), client_revision(client_revision_), header(header_), - index(index_), initial_size_of_file(initial_size_of_file_), remove_low_cardinality(remove_low_cardinality_) + WriteBuffer & ostr_, + UInt64 client_revision_, + const Block & header_, + std::optional format_settings_, + bool remove_low_cardinality_, + IndexForNativeFormat * index_, + size_t initial_size_of_file_) + : ostr(ostr_) + , client_revision(client_revision_) + , header(header_) + , index(index_) + , initial_size_of_file(initial_size_of_file_) + , remove_low_cardinality(remove_low_cardinality_) + , format_settings(std::move(format_settings_)) { if (index) { @@ -45,7 +56,7 @@ void NativeWriter::flush() } -static void writeData(const ISerialization & serialization, const ColumnPtr & column, WriteBuffer & ostr, UInt64 offset, UInt64 limit) +static void writeData(const ISerialization & serialization, const ColumnPtr & column, WriteBuffer & ostr, const std::optional & format_settings, UInt64 offset, UInt64 limit) { /** If there are columns-constants - then we materialize them. * (Since the data type does not know how to serialize / deserialize constants.) @@ -57,6 +68,7 @@ static void writeData(const ISerialization & serialization, const ColumnPtr & co settings.getter = [&ostr](ISerialization::SubstreamPath) -> WriteBuffer * { return &ostr; }; settings.position_independent_encoding = false; settings.low_cardinality_max_dictionary_size = 0; + settings.data_types_binary_encoding = format_settings && format_settings->native.encode_types_in_binary_format; ISerialization::SerializeBinaryBulkStatePtr state; serialization.serializeBinaryBulkStatePrefix(*full_column, settings, state); @@ -121,15 +133,22 @@ size_t NativeWriter::write(const Block & block) setVersionToAggregateFunctions(column.type, include_version, include_version ? std::optional(client_revision) : std::nullopt); /// Type - String type_name = column.type->getName(); + if (format_settings && format_settings->native.encode_types_in_binary_format) + { + encodeDataType(column.type, ostr); + } + else + { + String type_name = column.type->getName(); - /// For compatibility, we will not send explicit timezone parameter in DateTime data type - /// to older clients, that cannot understand it. - if (client_revision < DBMS_MIN_REVISION_WITH_TIME_ZONE_PARAMETER_IN_DATETIME_DATA_TYPE - && startsWith(type_name, "DateTime(")) - type_name = "DateTime"; + /// For compatibility, we will not send explicit timezone parameter in DateTime data type + /// to older clients, that cannot understand it. + if (client_revision < DBMS_MIN_REVISION_WITH_TIME_ZONE_PARAMETER_IN_DATETIME_DATA_TYPE + && startsWith(type_name, "DateTime(")) + type_name = "DateTime"; - writeStringBinary(type_name, ostr); + writeStringBinary(type_name, ostr); + } /// Serialization. Dynamic, if client supports it. SerializationPtr serialization; @@ -161,7 +180,7 @@ size_t NativeWriter::write(const Block & block) /// Data if (rows) /// Zero items of data is always represented as zero number of bytes. - writeData(*serialization, column.column, ostr, 0, 0); + writeData(*serialization, column.column, ostr, format_settings, 0, 0); if (index) { diff --git a/src/Formats/NativeWriter.h b/src/Formats/NativeWriter.h index 7bb377d2e4a..b4903243d45 100644 --- a/src/Formats/NativeWriter.h +++ b/src/Formats/NativeWriter.h @@ -3,6 +3,7 @@ #include #include #include +#include namespace DB { @@ -23,7 +24,7 @@ public: /** If non-zero client_revision is specified, additional block information can be written. */ NativeWriter( - WriteBuffer & ostr_, UInt64 client_revision_, const Block & header_, bool remove_low_cardinality_ = false, + WriteBuffer & ostr_, UInt64 client_revision_, const Block & header_, std::optional format_settings_ = std::nullopt, bool remove_low_cardinality_ = false, IndexForNativeFormat * index_ = nullptr, size_t initial_size_of_file_ = 0); Block getHeader() const { return header; } @@ -44,6 +45,7 @@ private: CompressedWriteBuffer * ostr_concrete = nullptr; bool remove_low_cardinality; + std::optional format_settings; }; } diff --git a/src/Formats/SchemaInferenceUtils.cpp b/src/Formats/SchemaInferenceUtils.cpp index 31faea2e13e..6cbcae2bebe 100644 --- a/src/Formats/SchemaInferenceUtils.cpp +++ b/src/Formats/SchemaInferenceUtils.cpp @@ -879,11 +879,11 @@ namespace } template - bool tryReadFloat(Float64 & value, ReadBuffer & buf, const FormatSettings & settings, bool & has_fractional) + bool tryReadFloat(Float64 & value, ReadBuffer & buf, const FormatSettings & settings) { if (is_json || settings.try_infer_exponent_floats) - return tryReadFloatTextExt(value, buf, has_fractional); - return tryReadFloatTextExtNoExponent(value, buf, has_fractional); + return tryReadFloatText(value, buf); + return tryReadFloatTextNoExponent(value, buf); } template @@ -893,31 +893,46 @@ namespace return nullptr; Float64 tmp_float; - bool has_fractional; if (settings.try_infer_integers) { /// If we read from String, we can do it in a more efficient way. if (auto * string_buf = dynamic_cast(&buf)) { /// Remember the pointer to the start of the number to rollback to it. - /// We can safely get back to the start of the number, because we read from a string and we didn't reach eof. char * number_start = buf.position(); - - /// NOTE: it may break parsing of tryReadFloat() != tryReadIntText() + parsing of '.'/'e' - /// But, for now it is true - if (tryReadFloat(tmp_float, buf, settings, has_fractional) && has_fractional) - return std::make_shared(); - Int64 tmp_int; - buf.position() = number_start; - if (tryReadIntText(tmp_int, buf)) - return std::make_shared(); + bool read_int = tryReadIntText(tmp_int, buf); + /// If we reached eof, it cannot be float (it requires no less data than integer) + if (buf.eof()) + return read_int ? std::make_shared() : nullptr; - /// In case of Int64 overflow we can try to infer UInt64. - UInt64 tmp_uint; + char * int_end = buf.position(); + /// We can safely get back to the start of the number, because we read from a string and we didn't reach eof. buf.position() = number_start; - if (tryReadIntText(tmp_uint, buf)) - return std::make_shared(); + + bool read_uint = false; + char * uint_end = nullptr; + /// In case of Int64 overflow we can try to infer UInt64. + if (!read_int) + { + UInt64 tmp_uint; + read_uint = tryReadIntText(tmp_uint, buf); + /// If we reached eof, it cannot be float (it requires no less data than integer) + if (buf.eof()) + return read_uint ? std::make_shared() : nullptr; + + uint_end = buf.position(); + buf.position() = number_start; + } + + if (tryReadFloat(tmp_float, buf, settings)) + { + if (read_int && buf.position() == int_end) + return std::make_shared(); + if (read_uint && buf.position() == uint_end) + return std::make_shared(); + return std::make_shared(); + } return nullptr; } @@ -927,22 +942,36 @@ namespace /// and then as float. PeekableReadBuffer peekable_buf(buf); PeekableReadBufferCheckpoint checkpoint(peekable_buf); - - if (tryReadFloat(tmp_float, peekable_buf, settings, has_fractional) && has_fractional) - return std::make_shared(); - peekable_buf.rollbackToCheckpoint(/* drop= */ false); - Int64 tmp_int; - if (tryReadIntText(tmp_int, peekable_buf)) - return std::make_shared(); - peekable_buf.rollbackToCheckpoint(/* drop= */ true); + bool read_int = tryReadIntText(tmp_int, peekable_buf); + auto * int_end = peekable_buf.position(); + peekable_buf.rollbackToCheckpoint(true); + bool read_uint = false; + char * uint_end = nullptr; /// In case of Int64 overflow we can try to infer UInt64. - UInt64 tmp_uint; - if (tryReadIntText(tmp_uint, peekable_buf)) - return std::make_shared(); + if (!read_int) + { + PeekableReadBufferCheckpoint new_checkpoint(peekable_buf); + UInt64 tmp_uint; + read_uint = tryReadIntText(tmp_uint, peekable_buf); + uint_end = peekable_buf.position(); + peekable_buf.rollbackToCheckpoint(true); + } + + if (tryReadFloat(tmp_float, peekable_buf, settings)) + { + /// Float parsing reads no fewer bytes than integer parsing, + /// so position of the buffer is either the same, or further. + /// If it's the same, then it's integer. + if (read_int && peekable_buf.position() == int_end) + return std::make_shared(); + if (read_uint && peekable_buf.position() == uint_end) + return std::make_shared(); + return std::make_shared(); + } } - else if (tryReadFloat(tmp_float, buf, settings, has_fractional)) + else if (tryReadFloat(tmp_float, buf, settings)) { return std::make_shared(); } @@ -975,8 +1004,7 @@ namespace buf.position() = buf.buffer().begin(); Float64 tmp; - bool has_fractional; - if (tryReadFloat(tmp, buf, settings, has_fractional) && buf.eof()) + if (tryReadFloat(tmp, buf, settings) && buf.eof()) return std::make_shared(); return nullptr; diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index f4433cd8288..db5c5a37125 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -18,7 +18,6 @@ #include #include #include -#include #include #include #include @@ -611,8 +610,6 @@ struct ContextSharedPart : boost::noncopyable LOG_TRACE(log, "Shutting down database catalog"); DatabaseCatalog::shutdown(); - NamedCollectionFactory::instance().shutdown(); - delete_async_insert_queue.reset(); SHUTDOWN(log, "merges executor", merge_mutate_executor, wait()); diff --git a/src/Processors/Formats/Impl/BinaryRowInputFormat.cpp b/src/Processors/Formats/Impl/BinaryRowInputFormat.cpp index ac5da172210..c5336f3bcc7 100644 --- a/src/Processors/Formats/Impl/BinaryRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/BinaryRowInputFormat.cpp @@ -4,6 +4,7 @@ #include #include #include +#include namespace DB { @@ -55,10 +56,25 @@ std::vector BinaryFormatReader::readNames() template std::vector BinaryFormatReader::readTypes() { - auto types = readHeaderRow(); - for (const auto & type_name : types) - read_data_types.push_back(DataTypeFactory::instance().get(type_name)); - return types; + read_data_types.reserve(read_columns); + Names type_names; + if (format_settings.binary.decode_types_in_binary_format) + { + type_names.reserve(read_columns); + for (size_t i = 0; i < read_columns; ++i) + { + read_data_types.push_back(decodeDataType(*in)); + type_names.push_back(read_data_types.back()->getName()); + } + } + else + { + type_names = readHeaderRow(); + for (const auto & type_name : type_names) + read_data_types.push_back(DataTypeFactory::instance().get(type_name)); + } + + return type_names; } template diff --git a/src/Processors/Formats/Impl/BinaryRowOutputFormat.cpp b/src/Processors/Formats/Impl/BinaryRowOutputFormat.cpp index ff904f61d22..d4c2348d080 100644 --- a/src/Processors/Formats/Impl/BinaryRowOutputFormat.cpp +++ b/src/Processors/Formats/Impl/BinaryRowOutputFormat.cpp @@ -2,6 +2,7 @@ #include #include #include +#include #include #include #include @@ -35,9 +36,15 @@ void BinaryRowOutputFormat::writePrefix() if (with_types) { - for (size_t i = 0; i < columns; ++i) + if (format_settings.binary.encode_types_in_binary_format) { - writeStringBinary(header.safeGetByPosition(i).type->getName(), out); + for (size_t i = 0; i < columns; ++i) + encodeDataType(header.safeGetByPosition(i).type, out); + } + else + { + for (size_t i = 0; i < columns; ++i) + writeStringBinary(header.safeGetByPosition(i).type->getName(), out); } } } diff --git a/src/Processors/Formats/Impl/NativeFormat.cpp b/src/Processors/Formats/Impl/NativeFormat.cpp index a7a49ab6a8c..38fac60eef6 100644 --- a/src/Processors/Formats/Impl/NativeFormat.cpp +++ b/src/Processors/Formats/Impl/NativeFormat.cpp @@ -21,9 +21,7 @@ public: buf, header_, 0, - settings.skip_unknown_fields, - settings.null_as_default, - settings.native.allow_types_conversion, + settings, settings.defaults_for_omitted_fields ? &block_missing_values : nullptr)) , header(header_) {} @@ -72,9 +70,9 @@ private: class NativeOutputFormat final : public IOutputFormat { public: - NativeOutputFormat(WriteBuffer & buf, const Block & header, UInt64 client_protocol_version = 0) + NativeOutputFormat(WriteBuffer & buf, const Block & header, const FormatSettings & settings, UInt64 client_protocol_version = 0) : IOutputFormat(header, buf) - , writer(buf, client_protocol_version, header) + , writer(buf, client_protocol_version, header, settings) { } @@ -103,14 +101,17 @@ private: class NativeSchemaReader : public ISchemaReader { public: - explicit NativeSchemaReader(ReadBuffer & in_) : ISchemaReader(in_) {} + explicit NativeSchemaReader(ReadBuffer & in_, const FormatSettings & settings_) : ISchemaReader(in_), settings(settings_) {} NamesAndTypesList readSchema() override { - auto reader = NativeReader(in, 0); + auto reader = NativeReader(in, 0, settings); auto block = reader.read(); return block.getNamesAndTypesList(); } + +private: + const FormatSettings settings; }; @@ -134,16 +135,16 @@ void registerOutputFormatNative(FormatFactory & factory) const Block & sample, const FormatSettings & settings) { - return std::make_shared(buf, sample, settings.client_protocol_version); + return std::make_shared(buf, sample, settings, settings.client_protocol_version); }); } void registerNativeSchemaReader(FormatFactory & factory) { - factory.registerSchemaReader("Native", [](ReadBuffer & buf, const FormatSettings &) + factory.registerSchemaReader("Native", [](ReadBuffer & buf, const FormatSettings & settings) { - return std::make_shared(buf); + return std::make_shared(buf, settings); }); } diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp index e3a820340ad..c8f86b1c2c9 100644 --- a/src/Server/TCPHandler.cpp +++ b/src/Server/TCPHandler.cpp @@ -2086,6 +2086,7 @@ void TCPHandler::initBlockOutput(const Block & block) *state.maybe_compressed_out, client_tcp_protocol_version, block.cloneEmpty(), + std::nullopt, !query_settings.low_cardinality_allow_in_native_format); } } @@ -2100,6 +2101,7 @@ void TCPHandler::initLogsBlockOutput(const Block & block) *out, client_tcp_protocol_version, block.cloneEmpty(), + std::nullopt, !query_settings.low_cardinality_allow_in_native_format); } } @@ -2114,6 +2116,7 @@ void TCPHandler::initProfileEventsBlockOutput(const Block & block) *out, client_tcp_protocol_version, block.cloneEmpty(), + std::nullopt, !query_settings.low_cardinality_allow_in_native_format); } } diff --git a/src/Storages/StorageMemory.cpp b/src/Storages/StorageMemory.cpp index f69c4adb552..57da72d06ed 100644 --- a/src/Storages/StorageMemory.cpp +++ b/src/Storages/StorageMemory.cpp @@ -408,7 +408,7 @@ namespace auto data_file_path = temp_dir / fs::path{file_paths[data_bin_pos]}.filename(); auto data_out_compressed = temp_disk->writeFile(data_file_path); auto data_out = std::make_unique(*data_out_compressed, CompressionCodecFactory::instance().getDefaultCodec(), max_compress_block_size); - NativeWriter block_out{*data_out, 0, metadata_snapshot->getSampleBlock(), false, &index}; + NativeWriter block_out{*data_out, 0, metadata_snapshot->getSampleBlock(), std::nullopt, false, &index}; for (const auto & block : *blocks) block_out.write(block); data_out->finalize(); diff --git a/src/Storages/StorageStripeLog.cpp b/src/Storages/StorageStripeLog.cpp index f0c5103d657..e456665a615 100644 --- a/src/Storages/StorageStripeLog.cpp +++ b/src/Storages/StorageStripeLog.cpp @@ -193,7 +193,7 @@ public: storage.saveFileSizes(lock); size_t initial_data_size = storage.file_checker.getFileSize(storage.data_file_path); - block_out = std::make_unique(*data_out, 0, metadata_snapshot->getSampleBlock(), false, &storage.indices, initial_data_size); + block_out = std::make_unique(*data_out, 0, metadata_snapshot->getSampleBlock(), std::nullopt, false, &storage.indices, initial_data_size); } String getName() const override { return "StripeLogSink"; } diff --git a/tests/queries/0_stateless/03172_dynamic_binary_serialization.reference b/tests/queries/0_stateless/03172_dynamic_binary_serialization.reference new file mode 100644 index 00000000000..26bd3326d5c --- /dev/null +++ b/tests/queries/0_stateless/03172_dynamic_binary_serialization.reference @@ -0,0 +1,48 @@ +\N None +42 UInt8 +-42 Int8 +42 UInt16 +-42 Int16 +42 UInt32 +-42 Int32 +42 UInt64 +-42 Int64 +42 UInt128 +-42 Int128 +42 UInt256 +-42 Int256 +42.42 Float32 +42.42 Float64 +2020-01-01 Date +2020-01-01 Date32 +2020-01-01 00:00:00 DateTime +2020-01-01 00:00:00.000000 DateTime64(6) +Hello, World! String +aaaaa FixedString(5) +a Enum8(\'c\' = -128, \'a\' = 1, \'b\' = 2) +a Enum16(\'c\' = -1280, \'a\' = 1, \'b\' = 2) +42.42 Decimal(9, 3) +42.42 Decimal(18, 3) +42.42 Decimal(38, 3) +42.42 Decimal(76, 3) +984ac60f-4d08-4ef1-9c62-d82f343fbc90 UUID +[1,2,3] Array(UInt64) +[[[1],[2]],[[3,4,5]]] Array(Array(Array(UInt64))) +(1,'str',42.42) Tuple(UInt32, String, Float32) +(1,'str',42.42) Tuple(a UInt32, b String, c Float32) +(1,('str',(42.42,-30))) Tuple(UInt32, Tuple(String, Tuple(Float32, Int8))) +(1,('str',(42.42,-30))) Tuple(a UInt32, b Tuple(c String, d Tuple(e Float32, f Int8))) +\0 \0\0\0\0\0\0\0\0\0\0\0\0\06364136223846793005 0 123459*\0\0\0\0\0\0\0 AggregateFunction(quantile(0.5), UInt64) +42 SimpleAggregateFunction(sum, UInt64) +Hello, World! LowCardinality(String) +{1:'str1',2:'str2'} Map(UInt64, String) +{1:{1:{1:'str1'}},2:{2:{2:'str2'}}} Map(UInt64, Map(UInt64, Map(UInt64, String))) +127.0.0.0 IPv4 +2001:db8:cafe:1::1 IPv6 +true Bool +[(1,2),(3,4)] Nested(a UInt32, b UInt32) +[(0,0),(10,0),(10,10),(0,10)] Ring +(0,0) Point +[[(20,20),(50,20),(50,50),(20,50)],[(30,30),(50,50),(50,30)]] Polygon +[[[(0,0),(10,0),(10,10),(0,10)]],[[(20,20),(50,20),(50,50),(20,50)],[(30,30),(50,50),(50,30)]]] MultiPolygon +[{42:(1,[(2,{1:2})])}] Array(Map(UInt8, Tuple(UInt8, Array(Tuple(UInt8, Map(UInt8, UInt8)))))) diff --git a/tests/queries/0_stateless/03172_dynamic_binary_serialization.sh b/tests/queries/0_stateless/03172_dynamic_binary_serialization.sh new file mode 100755 index 00000000000..b078211f088 --- /dev/null +++ b/tests/queries/0_stateless/03172_dynamic_binary_serialization.sh @@ -0,0 +1,61 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +$CLICKHOUSE_CLIENT -q "drop table if exists test" +$CLICKHOUSE_CLIENT --allow_experimental_dynamic_type=1 -q "create table test (id UInt64, d Dynamic(max_types=255)) engine=Memory" + +$CLICKHOUSE_CLIENT -q "insert into test select 0, NULL" +$CLICKHOUSE_CLIENT -q "insert into test select 1, materialize(42)::UInt8" +$CLICKHOUSE_CLIENT -q "insert into test select 2, materialize(-42)::Int8" +$CLICKHOUSE_CLIENT -q "insert into test select 3, materialize(42)::UInt16" +$CLICKHOUSE_CLIENT -q "insert into test select 4, materialize(-42)::Int16" +$CLICKHOUSE_CLIENT -q "insert into test select 5, materialize(42)::UInt32" +$CLICKHOUSE_CLIENT -q "insert into test select 6, materialize(-42)::Int32" +$CLICKHOUSE_CLIENT -q "insert into test select 7, materialize(42)::UInt64" +$CLICKHOUSE_CLIENT -q "insert into test select 8, materialize(-42)::Int64" +$CLICKHOUSE_CLIENT -q "insert into test select 9, materialize(42)::UInt128" +$CLICKHOUSE_CLIENT -q "insert into test select 10, materialize(-42)::Int128" +$CLICKHOUSE_CLIENT -q "insert into test select 11, materialize(42)::UInt256" +$CLICKHOUSE_CLIENT -q "insert into test select 12, materialize(-42)::Int256" +$CLICKHOUSE_CLIENT -q "insert into test select 13, materialize(42.42)::Float32" +$CLICKHOUSE_CLIENT -q "insert into test select 14, materialize(42.42)::Float64" +$CLICKHOUSE_CLIENT -q "insert into test select 15, materialize('2020-01-01')::Date" +$CLICKHOUSE_CLIENT -q "insert into test select 16, materialize('2020-01-01')::Date32" +$CLICKHOUSE_CLIENT -q "insert into test select 17, materialize('2020-01-01 00:00:00')::DateTime" +$CLICKHOUSE_CLIENT -q "insert into test select 18, materialize('2020-01-01 00:00:00.000000')::DateTime64(6)" +$CLICKHOUSE_CLIENT -q "insert into test select 19, materialize('Hello, World!')" +$CLICKHOUSE_CLIENT -q "insert into test select 20, materialize('aaaaa')::FixedString(5)" +$CLICKHOUSE_CLIENT -q "insert into test select 21, materialize('a')::Enum8('a' = 1, 'b' = 2, 'c' = -128)" +$CLICKHOUSE_CLIENT -q "insert into test select 22, materialize('a')::Enum16('a' = 1, 'b' = 2, 'c' = -1280)" +$CLICKHOUSE_CLIENT -q "insert into test select 23, materialize(42.42)::Decimal32(3)" +$CLICKHOUSE_CLIENT -q "insert into test select 24, materialize(42.42)::Decimal64(3)" +$CLICKHOUSE_CLIENT -q "insert into test select 25, materialize(42.42)::Decimal128(3)" +$CLICKHOUSE_CLIENT -q "insert into test select 26, materialize(42.42)::Decimal256(3)" +$CLICKHOUSE_CLIENT -q "insert into test select 27, materialize('984ac60f-4d08-4ef1-9c62-d82f343fbc90')::UUID" +$CLICKHOUSE_CLIENT -q "insert into test select 28, materialize([1, 2, 3])::Array(UInt64)" +$CLICKHOUSE_CLIENT -q "insert into test select 29, materialize([[[1], [2]], [[3, 4, 5]]])::Array(Array(Array(UInt64)))" +$CLICKHOUSE_CLIENT -q "insert into test select 30, materialize(tuple(1, 'str', 42.42))::Tuple(UInt32, String, Float32)" +$CLICKHOUSE_CLIENT -q "insert into test select 31, materialize(tuple(1, 'str', 42.42))::Tuple(a UInt32, b String, c Float32)" +$CLICKHOUSE_CLIENT -q "insert into test select 32, materialize(tuple(1, tuple('str', tuple(42.42, -30))))::Tuple(UInt32, Tuple(String, Tuple(Float32, Int8)))" +$CLICKHOUSE_CLIENT -q "insert into test select 33, materialize(tuple(1, tuple('str', tuple(42.42, -30))))::Tuple(a UInt32, b Tuple(c String, d Tuple(e Float32, f Int8)))" +$CLICKHOUSE_CLIENT -q "insert into test select 34, quantileState(0.5)(42::UInt64)" +$CLICKHOUSE_CLIENT -q "insert into test select 35, sumSimpleState(42::UInt64)" +$CLICKHOUSE_CLIENT -q "insert into test select 36, toLowCardinality('Hello, World!')" +$CLICKHOUSE_CLIENT -q "insert into test select 37, materialize(map(1, 'str1', 2, 'str2'))::Map(UInt64, String)" +$CLICKHOUSE_CLIENT -q "insert into test select 38, materialize(map(1, map(1, map(1, 'str1')), 2, map(2, map(2, 'str2'))))::Map(UInt64, Map(UInt64, Map(UInt64, String)))" +$CLICKHOUSE_CLIENT -q "insert into test select 39, materialize('127.0.0.0')::IPv4" +$CLICKHOUSE_CLIENT -q "insert into test select 40, materialize('2001:db8:cafe:1:0:0:0:1')::IPv6" +$CLICKHOUSE_CLIENT -q "insert into test select 41, materialize(true)::Bool" +$CLICKHOUSE_CLIENT -q "insert into test select 42, materialize([tuple(1, 2), tuple(3, 4)])::Nested(a UInt32, b UInt32)" +$CLICKHOUSE_CLIENT -q "insert into test select 43, materialize([(0, 0), (10, 0), (10, 10), (0, 10)])::Ring" +$CLICKHOUSE_CLIENT -q "insert into test select 44, materialize((0, 0))::Point" +$CLICKHOUSE_CLIENT -q "insert into test select 45, materialize([[(20, 20), (50, 20), (50, 50), (20, 50)], [(30, 30), (50, 50), (50, 30)]])::Polygon" +$CLICKHOUSE_CLIENT -q "insert into test select 46, materialize([[[(0, 0), (10, 0), (10, 10), (0, 10)]], [[(20, 20), (50, 20), (50, 50), (20, 50)],[(30, 30), (50, 50), (50, 30)]]])::MultiPolygon" +$CLICKHOUSE_CLIENT -q "insert into test select 47, materialize([map(42, tuple(1, [tuple(2, map(1, 2))]))])" + +$CLICKHOUSE_CLIENT -q "select * from test format RowBinary" | $CLICKHOUSE_LOCAL --allow_experimental_dynamic_type=1 --input-format RowBinary --structure 'id UInt64, d Dynamic(max_types=255)' -q "select d, dynamicType(d) from table order by id" +$CLICKHOUSE_CLIENT -q "drop table test" + diff --git a/tests/queries/0_stateless/03173_row_binary_and_native_with_binary_encoded_types.reference b/tests/queries/0_stateless/03173_row_binary_and_native_with_binary_encoded_types.reference new file mode 100644 index 00000000000..0bc257adf23 --- /dev/null +++ b/tests/queries/0_stateless/03173_row_binary_and_native_with_binary_encoded_types.reference @@ -0,0 +1,102 @@ +42 UInt8 +42 UInt8 +\N Nullable(Nothing) +\N Nullable(Nothing) +42 UInt8 +42 UInt8 +-42 Int8 +-42 Int8 +42 UInt16 +42 UInt16 +-42 Int16 +-42 Int16 +42 UInt32 +42 UInt32 +-42 Int32 +-42 Int32 +42 UInt64 +42 UInt64 +-42 Int64 +-42 Int64 +42 UInt128 +42 UInt128 +-42 Int128 +-42 Int128 +42 UInt256 +42 UInt256 +-42 Int256 +-42 Int256 +42.42 Float32 +42.42 Float32 +42.42 Float64 +42.42 Float64 +2020-01-01 Date +2020-01-01 Date +2020-01-01 Date32 +2020-01-01 Date32 +2020-01-01 00:00:00 DateTime +2020-01-01 00:00:00 DateTime +2020-01-01 00:00:00.000000 DateTime64(6) +2020-01-01 00:00:00.000000 DateTime64(6) +Hello, World! String +Hello, World! String +aaaaa FixedString(5) +aaaaa FixedString(5) +a Enum8(\'c\' = -128, \'a\' = 1, \'b\' = 2) +a Enum8(\'c\' = -128, \'a\' = 1, \'b\' = 2) +a Enum16(\'c\' = -1280, \'a\' = 1, \'b\' = 2) +a Enum16(\'c\' = -1280, \'a\' = 1, \'b\' = 2) +42.42 Decimal(9, 3) +42.42 Decimal(9, 3) +42.42 Decimal(18, 3) +42.42 Decimal(18, 3) +42.42 Decimal(38, 3) +42.42 Decimal(38, 3) +42.42 Decimal(76, 3) +42.42 Decimal(76, 3) +984ac60f-4d08-4ef1-9c62-d82f343fbc90 UUID +984ac60f-4d08-4ef1-9c62-d82f343fbc90 UUID +[1,2,3] Array(UInt64) +[1,2,3] Array(UInt64) +[[[1],[2]],[[3,4,5]]] Array(Array(Array(UInt64))) +[[[1],[2]],[[3,4,5]]] Array(Array(Array(UInt64))) +(1,'str',42.42) Tuple(UInt32, String, Float32) +(1,'str',42.42) Tuple(UInt32, String, Float32) +(1,'str',42.42) Tuple(\n a UInt32,\n b String,\n c Float32) +(1,'str',42.42) Tuple(\n a UInt32,\n b String,\n c Float32) +(1,('str',(42.42,-30))) Tuple(UInt32, Tuple(String, Tuple(Float32, Int8))) +(1,('str',(42.42,-30))) Tuple(UInt32, Tuple(String, Tuple(Float32, Int8))) +(1,('str',(42.42,-30))) Tuple(\n a UInt32,\n b Tuple(\n c String,\n d Tuple(\n e Float32,\n f Int8))) +(1,('str',(42.42,-30))) Tuple(\n a UInt32,\n b Tuple(\n c String,\n d Tuple(\n e Float32,\n f Int8))) +\0 \0\0\0\0\0\0\0\0\0\0\0\0\06364136223846793005 0 123459*\0\0\0\0\0\0\0 AggregateFunction(quantile(0.5), UInt64) +\0 \0\0\0\0\0\0\0\0\0\0\0\0\06364136223846793005 0 123459*\0\0\0\0\0\0\0 AggregateFunction(quantile(0.5), UInt64) +42 SimpleAggregateFunction(sum, UInt64) +42 SimpleAggregateFunction(sum, UInt64) +Hello, World! LowCardinality(String) +Hello, World! LowCardinality(String) +{1:'str1',2:'str2'} Map(UInt64, String) +{1:'str1',2:'str2'} Map(UInt64, String) +{1:{1:{1:'str1'}},2:{2:{2:'str2'}}} Map(UInt64, Map(UInt64, Map(UInt64, String))) +{1:{1:{1:'str1'}},2:{2:{2:'str2'}}} Map(UInt64, Map(UInt64, Map(UInt64, String))) +127.0.0.0 IPv4 +127.0.0.0 IPv4 +2001:db8:cafe:1::1 IPv6 +2001:db8:cafe:1::1 IPv6 +true Bool +true Bool +[(1,2),(3,4)] Nested(a UInt32, b UInt32) +[(1,2),(3,4)] Nested(a UInt32, b UInt32) +[(0,0),(10,0),(10,10),(0,10)] Ring +[(0,0),(10,0),(10,10),(0,10)] Ring +(0,0) Point +(0,0) Point +[[(20,20),(50,20),(50,50),(20,50)],[(30,30),(50,50),(50,30)]] Polygon +[[(20,20),(50,20),(50,50),(20,50)],[(30,30),(50,50),(50,30)]] Polygon +[[[(0,0),(10,0),(10,10),(0,10)]],[[(20,20),(50,20),(50,50),(20,50)],[(30,30),(50,50),(50,30)]]] MultiPolygon +[[[(0,0),(10,0),(10,10),(0,10)]],[[(20,20),(50,20),(50,50),(20,50)],[(30,30),(50,50),(50,30)]]] MultiPolygon +[{42:(1,[(2,{1:2})])}] Array(Map(UInt8, Tuple(UInt8, Array(Tuple(UInt8, Map(UInt8, UInt8)))))) +[{42:(1,[(2,{1:2})])}] Array(Map(UInt8, Tuple(UInt8, Array(Tuple(UInt8, Map(UInt8, UInt8)))))) +42 Variant(String, Tuple(\n a UInt32,\n b Array(Map(String, String))), UInt32) +42 Variant(String, Tuple(\n a UInt32,\n b Array(Map(String, String))), UInt32) +[{42:(1,[(2,{1:2})])}] Dynamic +[{42:(1,[(2,{1:2})])}] Dynamic diff --git a/tests/queries/0_stateless/03173_row_binary_and_native_with_binary_encoded_types.sh b/tests/queries/0_stateless/03173_row_binary_and_native_with_binary_encoded_types.sh new file mode 100755 index 00000000000..a1e8aa99548 --- /dev/null +++ b/tests/queries/0_stateless/03173_row_binary_and_native_with_binary_encoded_types.sh @@ -0,0 +1,63 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +function test +{ + $CLICKHOUSE_LOCAL --allow_experimental_dynamic_type=1 --allow_experimental_variant_type=1 --output_format_binary_encode_types_in_binary_format=1 -q "select $1 as value format RowBinaryWithNamesAndTypes" | $CLICKHOUSE_LOCAL --input-format RowBinaryWithNamesAndTypes --input_format_binary_decode_types_in_binary_format=1 -q "select value, toTypeName(value) from table" + $CLICKHOUSE_LOCAL --allow_experimental_dynamic_type=1 --allow_experimental_variant_type=1 --output_format_native_encode_types_in_binary_format=1 -q "select $1 as value format Native" | $CLICKHOUSE_LOCAL --input-format Native --input_format_native_decode_types_in_binary_format=1 -q "select value, toTypeName(value) from table" +} + +test "materialize(42)::UInt8" +test "NULL" +test "materialize(42)::UInt8" +test "materialize(-42)::Int8" +test "materialize(42)::UInt16" +test "materialize(-42)::Int16" +test "materialize(42)::UInt32" +test "materialize(-42)::Int32" +test "materialize(42)::UInt64" +test "materialize(-42)::Int64" +test "materialize(42)::UInt128" +test "materialize(-42)::Int128" +test "materialize(42)::UInt256" +test "materialize(-42)::Int256" +test "materialize(42.42)::Float32" +test "materialize(42.42)::Float64" +test "materialize('2020-01-01')::Date" +test "materialize('2020-01-01')::Date32" +test "materialize('2020-01-01 00:00:00')::DateTime" +test "materialize('2020-01-01 00:00:00.000000')::DateTime64(6)" +test "materialize('Hello, World!')" +test "materialize('aaaaa')::FixedString(5)" +test "materialize('a')::Enum8('a' = 1, 'b' = 2, 'c' = -128)" +test "materialize('a')::Enum16('a' = 1, 'b' = 2, 'c' = -1280)" +test "materialize(42.42)::Decimal32(3)" +test "materialize(42.42)::Decimal64(3)" +test "materialize(42.42)::Decimal128(3)" +test "materialize(42.42)::Decimal256(3)" +test "materialize('984ac60f-4d08-4ef1-9c62-d82f343fbc90')::UUID" +test "materialize([1, 2, 3])::Array(UInt64)" +test "materialize([[[1], [2]], [[3, 4, 5]]])::Array(Array(Array(UInt64)))" +test "materialize(tuple(1, 'str', 42.42))::Tuple(UInt32, String, Float32)" +test "materialize(tuple(1, 'str', 42.42))::Tuple(a UInt32, b String, c Float32)" +test "materialize(tuple(1, tuple('str', tuple(42.42, -30))))::Tuple(UInt32, Tuple(String, Tuple(Float32, Int8)))" +test "materialize(tuple(1, tuple('str', tuple(42.42, -30))))::Tuple(a UInt32, b Tuple(c String, d Tuple(e Float32, f Int8)))" +test "quantileState(0.5)(42::UInt64)" +test "sumSimpleState(42::UInt64)" +test "toLowCardinality('Hello, World!')" +test "materialize(map(1, 'str1', 2, 'str2'))::Map(UInt64, String)" +test "materialize(map(1, map(1, map(1, 'str1')), 2, map(2, map(2, 'str2'))))::Map(UInt64, Map(UInt64, Map(UInt64, String)))" +test "materialize('127.0.0.0')::IPv4" +test "materialize('2001:db8:cafe:1:0:0:0:1')::IPv6" +test "materialize(true)::Bool" +test "materialize([tuple(1, 2), tuple(3, 4)])::Nested(a UInt32, b UInt32)" +test "materialize([(0, 0), (10, 0), (10, 10), (0, 10)])::Ring" +test "materialize((0, 0))::Point" +test "materialize([[(20, 20), (50, 20), (50, 50), (20, 50)], [(30, 30), (50, 50), (50, 30)]])::Polygon" +test "materialize([[[(0, 0), (10, 0), (10, 10), (0, 10)]], [[(20, 20), (50, 20), (50, 50), (20, 50)],[(30, 30), (50, 50), (50, 30)]]])::MultiPolygon" +test "materialize([map(42, tuple(1, [tuple(2, map(1, 2))]))])" +test "materialize(42::UInt32)::Variant(UInt32, String, Tuple(a UInt32, b Array(Map(String, String))))" +test "materialize([map(42, tuple(1, [tuple(2, map(1, 2))]))])::Dynamic" From b9fbbbb28496692a44b77528f552b81097110a8d Mon Sep 17 00:00:00 2001 From: avogar Date: Fri, 21 Jun 2024 18:23:52 +0000 Subject: [PATCH 053/299] Update settings changes history --- src/Core/Settings.h | 6 +--- src/Core/SettingsChangesHistory.h | 9 ++--- src/Formats/FormatFactory.cpp | 2 -- src/Formats/FormatSettings.h | 2 -- src/Interpreters/Context.cpp | 60 +++---------------------------- src/Server/TCPHandler.cpp | 24 +++++-------- 6 files changed, 19 insertions(+), 84 deletions(-) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index f718acf9b25..23c5d7fc1a2 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -31,7 +31,6 @@ class IColumn; * for tracking settings changes in different versions and for special `compatibility` setting to work correctly. */ -// clang-format off #define COMMON_SETTINGS(M, ALIAS) \ M(Dialect, dialect, Dialect::clickhouse, "Which dialect will be used to parse query", 0)\ M(UInt64, min_compress_block_size, 65536, "The actual size of the block to compress, if the uncompressed data less than max_compress_block_size is no less than this value and no less than the volume of data for one mark.", 0) \ @@ -934,7 +933,6 @@ class IColumn; M(Int64, prefer_warmed_unmerged_parts_seconds, 0, "Only available in ClickHouse Cloud. If a merged part is less than this many seconds old and is not pre-warmed (see cache_populated_by_fetch), but all its source parts are available and pre-warmed, SELECT queries will read from those parts instead. Only for ReplicatedMergeTree. Note that this only checks whether CacheWarmer processed the part; if the part was fetched into cache by something else, it'll still be considered cold until CacheWarmer gets to it; if it was warmed, then evicted from cache, it'll still be considered warm.", 0) \ M(Bool, iceberg_engine_ignore_schema_evolution, false, "Ignore schema evolution in Iceberg table engine and read all data using latest schema saved on table creation. Note that it can lead to incorrect result", 0) \ M(Bool, allow_deprecated_error_prone_window_functions, false, "Allow usage of deprecated error prone window functions (neighbor, runningAccumulate, runningDifferenceStartingWithFirstValue, runningDifference)", 0) \ - M(Bool, uniform_snowflake_conversion_functions, true, "Enables functions snowflakeIDToDateTime[64] and dateTime[64]ToSnowflakeID while disabling functions snowflakeToDateTime[64] and dateTime[64]ToSnowflake.", 0) \ // End of COMMON_SETTINGS // Please add settings related to formats into the FORMAT_FACTORY_SETTINGS, move obsolete settings to OBSOLETE_SETTINGS and obsolete format settings to OBSOLETE_FORMAT_SETTINGS. @@ -1150,9 +1148,7 @@ class IColumn; M(UInt64, output_format_pretty_max_value_width, 10000, "Maximum width of value to display in Pretty formats. If greater - it will be cut.", 0) \ M(UInt64, output_format_pretty_max_value_width_apply_for_single_value, false, "Only cut values (see the `output_format_pretty_max_value_width` setting) when it is not a single value in a block. Otherwise output it entirely, which is useful for the `SHOW CREATE TABLE` query.", 0) \ M(UInt64Auto, output_format_pretty_color, "auto", "Use ANSI escape sequences in Pretty formats. 0 - disabled, 1 - enabled, 'auto' - enabled if a terminal.", 0) \ - M(String, output_format_pretty_grid_charset, "UTF-8", "Charset for printing grid borders. Available charsets: ASCII, UTF-8 (default one).", 0) \ - M(UInt64, output_format_pretty_display_footer_column_names, true, "Display column names in the footer if there are 999 or more rows.", 0) \ - M(UInt64, output_format_pretty_display_footer_column_names_min_rows, 50, "Sets the minimum threshold value of rows for which to enable displaying column names in the footer. 50 (default)", 0) \ + M(String, output_format_pretty_grid_charset, "UTF-8", "Charset for printing grid borders. Available charsets: ASCII, UTF-8 (default one).", 0) \ M(UInt64, output_format_parquet_row_group_size, 1000000, "Target row group size in rows.", 0) \ M(UInt64, output_format_parquet_row_group_size_bytes, 512 * 1024 * 1024, "Target row group size in bytes, before compression.", 0) \ M(Bool, output_format_parquet_string_as_string, true, "Use Parquet String type instead of Binary for String columns.", 0) \ diff --git a/src/Core/SettingsChangesHistory.h b/src/Core/SettingsChangesHistory.h index 895db9c7ca0..e9da55e66c5 100644 --- a/src/Core/SettingsChangesHistory.h +++ b/src/Core/SettingsChangesHistory.h @@ -75,7 +75,6 @@ namespace SettingsChangesHistory using SettingsChanges = std::vector; } -// clang-format off /// History of settings changes that controls some backward incompatible changes /// across all ClickHouse versions. It maps ClickHouse version to settings changes that were done /// in this version. This history contains both changes to existing settings and newly added settings. @@ -86,6 +85,11 @@ namespace SettingsChangesHistory /// It's used to implement `compatibility` setting (see https://github.com/ClickHouse/ClickHouse/issues/35972) static const std::map settings_changes_history = { + {"24.7", {{"output_format_binary_encode_types_in_binary_format", false, false, "Added new setting to allow to write type names in binary format in RowBinaryWithNamesAndTypes output format"}, + {"input_format_binary_encode_types_in_binary_format", false, false, "Added new setting to allow to read type names in binary format in RowBinaryWithNamesAndTypes input format"}, + {"output_format_native_encode_types_in_binary_format", false, false, "Added new setting to allow to write type names in binary format in Native output format"}, + {"input_format_binary_encode_types_in_binary_format", false, false, "Added new setting to allow to read type names in binary format in Native output format"} + }}, {"24.6", {{"materialize_skip_indexes_on_insert", true, true, "Added new setting to allow to disable materialization of skip indexes on insert"}, {"materialize_statistics_on_insert", true, true, "Added new setting to allow to disable materialization of statistics on insert"}, {"input_format_parquet_use_native_reader", false, false, "When reading Parquet files, to use native reader instead of arrow reader."}, @@ -102,7 +106,6 @@ static const std::map access_control TSA_GUARDED_BY(mutex); mutable OnceFlag resource_manager_initialized; mutable ResourceManagerPtr resource_manager; @@ -832,7 +830,6 @@ ContextMutablePtr Context::createGlobal(ContextSharedPart * shared_part) auto res = std::shared_ptr(new Context); res->shared = shared_part; res->query_access_info = std::make_shared(); - res->query_privileges_info = std::make_shared(); return res; } @@ -1425,7 +1422,7 @@ void Context::checkAccess(const AccessFlags & flags, const StorageID & table_id, void Context::checkAccess(const AccessRightsElement & element) const { checkAccessImpl(element); } void Context::checkAccess(const AccessRightsElements & elements) const { checkAccessImpl(elements); } -std::shared_ptr Context::getAccess() const +std::shared_ptr Context::getAccess() const { /// A helper function to collect parameters for calculating access rights, called with Context::getLocalSharedLock() acquired. auto get_params = [this]() @@ -1442,14 +1439,14 @@ std::shared_ptr Context::getAccess() const { SharedLockGuard lock(mutex); if (access && !need_recalculate_access) - return std::make_shared(access, shared_from_this()); /// No need to recalculate access rights. + return access; /// No need to recalculate access rights. params.emplace(get_params()); if (access && (access->getParams() == *params)) { need_recalculate_access = false; - return std::make_shared(access, shared_from_this()); /// No need to recalculate access rights. + return access; /// No need to recalculate access rights. } } @@ -1469,7 +1466,7 @@ std::shared_ptr Context::getAccess() const } } - return std::make_shared(res, shared_from_this()); + return res; } RowPolicyFilterPtr Context::getRowPolicyFilter(const String & database, const String & table_name, RowPolicyFilterType filter_type) const @@ -1561,36 +1558,11 @@ ResourceManagerPtr Context::getResourceManager() const ClassifierPtr Context::getWorkloadClassifier() const { std::lock_guard lock(mutex); - // NOTE: Workload cannot be changed after query start, and getWorkloadClassifier() should not be called before proper `workload` is set if (!classifier) classifier = getResourceManager()->acquire(getSettingsRef().workload); return classifier; } -String Context::getMergeWorkload() const -{ - SharedLockGuard lock(shared->mutex); - return shared->merge_workload; -} - -void Context::setMergeWorkload(const String & value) -{ - std::lock_guard lock(shared->mutex); - shared->merge_workload = value; -} - -String Context::getMutationWorkload() const -{ - SharedLockGuard lock(shared->mutex); - return shared->mutation_workload; -} - -void Context::setMutationWorkload(const String & value) -{ - std::lock_guard lock(shared->mutex); - shared->mutation_workload = value; -} - Scalars Context::getScalars() const { @@ -1855,15 +1827,6 @@ void Context::addQueryFactoriesInfo(QueryLogFactories factory_type, const String } } -void Context::addQueryPrivilegesInfo(const String & privilege, bool granted) const -{ - std::lock_guard lock(query_privileges_info->mutex); - if (granted) - query_privileges_info->used_privileges.emplace(privilege); - else - query_privileges_info->missing_privileges.emplace(privilege); -} - static bool findIdentifier(const ASTFunction * function) { if (!function || !function->arguments) @@ -2545,21 +2508,6 @@ void Context::makeQueryContext() local_read_query_throttler.reset(); local_write_query_throttler.reset(); backups_query_throttler.reset(); - query_privileges_info = std::make_shared(*query_privileges_info); -} - -void Context::makeQueryContextForMerge(const MergeTreeSettings & merge_tree_settings) -{ - makeQueryContext(); - classifier.reset(); // It is assumed that there are no active queries running using this classifier, otherwise this will lead to crashes - settings.workload = merge_tree_settings.merge_workload.value.empty() ? getMergeWorkload() : merge_tree_settings.merge_workload; -} - -void Context::makeQueryContextForMutate(const MergeTreeSettings & merge_tree_settings) -{ - makeQueryContext(); - classifier.reset(); // It is assumed that there are no active queries running using this classifier, otherwise this will lead to crashes - settings.workload = merge_tree_settings.mutation_workload.value.empty() ? getMutationWorkload() : merge_tree_settings.mutation_workload; } void Context::makeSessionContext() diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp index d340fbe7e77..c8f86b1c2c9 100644 --- a/src/Server/TCPHandler.cpp +++ b/src/Server/TCPHandler.cpp @@ -1,8 +1,9 @@ -#include -#include -#include +#include "Interpreters/AsynchronousInsertQueue.h" +#include "Interpreters/SquashingTransform.h" +#include "Parsers/ASTInsertQuery.h" #include #include +#include #include #include #include @@ -245,6 +246,7 @@ TCPHandler::~TCPHandler() void TCPHandler::runImpl() { setThreadName("TCPHandler"); + ThreadStatus thread_status; extractConnectionSettingsFromContext(server.context()); @@ -884,16 +886,13 @@ AsynchronousInsertQueue::PushResult TCPHandler::processAsyncInsertQuery(Asynchro using PushResult = AsynchronousInsertQueue::PushResult; startInsertQuery(); - Squashing squashing(state.input_header, 0, query_context->getSettingsRef().async_insert_max_data_size); + SquashingTransform squashing(0, query_context->getSettingsRef().async_insert_max_data_size); while (readDataNext()) { - squashing.header = state.block_for_insert; - auto planned_chunk = squashing.add({state.block_for_insert.getColumns(), state.block_for_insert.rows()}); - if (planned_chunk.hasChunkInfo()) + auto result = squashing.add(std::move(state.block_for_insert)); + if (result) { - Chunk result_chunk = DB::Squashing::squash(std::move(planned_chunk)); - auto result = state.block_for_insert.cloneWithColumns(result_chunk.getColumns()); return PushResult { .status = PushResult::TOO_MUCH_DATA, @@ -902,12 +901,7 @@ AsynchronousInsertQueue::PushResult TCPHandler::processAsyncInsertQuery(Asynchro } } - auto planned_chunk = squashing.flush(); - Chunk result_chunk; - if (planned_chunk.hasChunkInfo()) - result_chunk = DB::Squashing::squash(std::move(planned_chunk)); - - auto result = squashing.header.cloneWithColumns(result_chunk.getColumns()); + auto result = squashing.add({}); return insert_queue.pushQueryWithBlock(state.parsed_query, std::move(result), query_context); } From 7e7dd78844fb3000e46742bf4884f5166f2abe30 Mon Sep 17 00:00:00 2001 From: avogar Date: Fri, 21 Jun 2024 18:31:41 +0000 Subject: [PATCH 054/299] Revert unrelated changes --- src/Core/Field.cpp | 10 ++- src/Formats/SchemaInferenceUtils.cpp | 96 ++++++++++------------------ 2 files changed, 38 insertions(+), 68 deletions(-) diff --git a/src/Core/Field.cpp b/src/Core/Field.cpp index fb820ad2b56..0e5b1bac000 100644 --- a/src/Core/Field.cpp +++ b/src/Core/Field.cpp @@ -1,12 +1,11 @@ -#include -#include #include -#include -#include #include +#include #include +#include #include -#include +#include +#include #include #include #include @@ -22,7 +21,6 @@ namespace ErrorCodes extern const int CANNOT_RESTORE_FROM_FIELD_DUMP; extern const int DECIMAL_OVERFLOW; extern const int INCORRECT_DATA; - extern const int UNSUPPORTED_METHOD; } template diff --git a/src/Formats/SchemaInferenceUtils.cpp b/src/Formats/SchemaInferenceUtils.cpp index 6cbcae2bebe..31faea2e13e 100644 --- a/src/Formats/SchemaInferenceUtils.cpp +++ b/src/Formats/SchemaInferenceUtils.cpp @@ -879,11 +879,11 @@ namespace } template - bool tryReadFloat(Float64 & value, ReadBuffer & buf, const FormatSettings & settings) + bool tryReadFloat(Float64 & value, ReadBuffer & buf, const FormatSettings & settings, bool & has_fractional) { if (is_json || settings.try_infer_exponent_floats) - return tryReadFloatText(value, buf); - return tryReadFloatTextNoExponent(value, buf); + return tryReadFloatTextExt(value, buf, has_fractional); + return tryReadFloatTextExtNoExponent(value, buf, has_fractional); } template @@ -893,46 +893,31 @@ namespace return nullptr; Float64 tmp_float; + bool has_fractional; if (settings.try_infer_integers) { /// If we read from String, we can do it in a more efficient way. if (auto * string_buf = dynamic_cast(&buf)) { /// Remember the pointer to the start of the number to rollback to it. - char * number_start = buf.position(); - Int64 tmp_int; - bool read_int = tryReadIntText(tmp_int, buf); - /// If we reached eof, it cannot be float (it requires no less data than integer) - if (buf.eof()) - return read_int ? std::make_shared() : nullptr; - - char * int_end = buf.position(); /// We can safely get back to the start of the number, because we read from a string and we didn't reach eof. - buf.position() = number_start; + char * number_start = buf.position(); - bool read_uint = false; - char * uint_end = nullptr; - /// In case of Int64 overflow we can try to infer UInt64. - if (!read_int) - { - UInt64 tmp_uint; - read_uint = tryReadIntText(tmp_uint, buf); - /// If we reached eof, it cannot be float (it requires no less data than integer) - if (buf.eof()) - return read_uint ? std::make_shared() : nullptr; - - uint_end = buf.position(); - buf.position() = number_start; - } - - if (tryReadFloat(tmp_float, buf, settings)) - { - if (read_int && buf.position() == int_end) - return std::make_shared(); - if (read_uint && buf.position() == uint_end) - return std::make_shared(); + /// NOTE: it may break parsing of tryReadFloat() != tryReadIntText() + parsing of '.'/'e' + /// But, for now it is true + if (tryReadFloat(tmp_float, buf, settings, has_fractional) && has_fractional) return std::make_shared(); - } + + Int64 tmp_int; + buf.position() = number_start; + if (tryReadIntText(tmp_int, buf)) + return std::make_shared(); + + /// In case of Int64 overflow we can try to infer UInt64. + UInt64 tmp_uint; + buf.position() = number_start; + if (tryReadIntText(tmp_uint, buf)) + return std::make_shared(); return nullptr; } @@ -942,36 +927,22 @@ namespace /// and then as float. PeekableReadBuffer peekable_buf(buf); PeekableReadBufferCheckpoint checkpoint(peekable_buf); - Int64 tmp_int; - bool read_int = tryReadIntText(tmp_int, peekable_buf); - auto * int_end = peekable_buf.position(); - peekable_buf.rollbackToCheckpoint(true); - bool read_uint = false; - char * uint_end = nullptr; - /// In case of Int64 overflow we can try to infer UInt64. - if (!read_int) - { - PeekableReadBufferCheckpoint new_checkpoint(peekable_buf); - UInt64 tmp_uint; - read_uint = tryReadIntText(tmp_uint, peekable_buf); - uint_end = peekable_buf.position(); - peekable_buf.rollbackToCheckpoint(true); - } - - if (tryReadFloat(tmp_float, peekable_buf, settings)) - { - /// Float parsing reads no fewer bytes than integer parsing, - /// so position of the buffer is either the same, or further. - /// If it's the same, then it's integer. - if (read_int && peekable_buf.position() == int_end) - return std::make_shared(); - if (read_uint && peekable_buf.position() == uint_end) - return std::make_shared(); + if (tryReadFloat(tmp_float, peekable_buf, settings, has_fractional) && has_fractional) return std::make_shared(); - } + peekable_buf.rollbackToCheckpoint(/* drop= */ false); + + Int64 tmp_int; + if (tryReadIntText(tmp_int, peekable_buf)) + return std::make_shared(); + peekable_buf.rollbackToCheckpoint(/* drop= */ true); + + /// In case of Int64 overflow we can try to infer UInt64. + UInt64 tmp_uint; + if (tryReadIntText(tmp_uint, peekable_buf)) + return std::make_shared(); } - else if (tryReadFloat(tmp_float, buf, settings)) + else if (tryReadFloat(tmp_float, buf, settings, has_fractional)) { return std::make_shared(); } @@ -1004,7 +975,8 @@ namespace buf.position() = buf.buffer().begin(); Float64 tmp; - if (tryReadFloat(tmp, buf, settings) && buf.eof()) + bool has_fractional; + if (tryReadFloat(tmp, buf, settings, has_fractional) && buf.eof()) return std::make_shared(); return nullptr; From 785f3ac5c9297dc70cfd5b0dbb9790b869551436 Mon Sep 17 00:00:00 2001 From: avogar Date: Fri, 21 Jun 2024 18:40:49 +0000 Subject: [PATCH 055/299] Revert unrelated changes --- .../domains/data-types-binary-encoding.md | 0 src/Core/Settings.h | 4 ++ src/Core/SettingsChangesHistory.h | 4 ++ src/Formats/FormatFactory.cpp | 2 + src/Formats/FormatSettings.h | 2 + src/Formats/NativeReader.cpp | 2 +- src/Interpreters/Context.cpp | 64 +++++++++++++++++-- src/Server/TCPHandler.cpp | 24 ++++--- 8 files changed, 87 insertions(+), 15 deletions(-) delete mode 100644 docs/en/sql-reference/data-types/domains/data-types-binary-encoding.md diff --git a/docs/en/sql-reference/data-types/domains/data-types-binary-encoding.md b/docs/en/sql-reference/data-types/domains/data-types-binary-encoding.md deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 23c5d7fc1a2..65146a65a0f 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -31,6 +31,7 @@ class IColumn; * for tracking settings changes in different versions and for special `compatibility` setting to work correctly. */ +// clang-format off #define COMMON_SETTINGS(M, ALIAS) \ M(Dialect, dialect, Dialect::clickhouse, "Which dialect will be used to parse query", 0)\ M(UInt64, min_compress_block_size, 65536, "The actual size of the block to compress, if the uncompressed data less than max_compress_block_size is no less than this value and no less than the volume of data for one mark.", 0) \ @@ -933,6 +934,7 @@ class IColumn; M(Int64, prefer_warmed_unmerged_parts_seconds, 0, "Only available in ClickHouse Cloud. If a merged part is less than this many seconds old and is not pre-warmed (see cache_populated_by_fetch), but all its source parts are available and pre-warmed, SELECT queries will read from those parts instead. Only for ReplicatedMergeTree. Note that this only checks whether CacheWarmer processed the part; if the part was fetched into cache by something else, it'll still be considered cold until CacheWarmer gets to it; if it was warmed, then evicted from cache, it'll still be considered warm.", 0) \ M(Bool, iceberg_engine_ignore_schema_evolution, false, "Ignore schema evolution in Iceberg table engine and read all data using latest schema saved on table creation. Note that it can lead to incorrect result", 0) \ M(Bool, allow_deprecated_error_prone_window_functions, false, "Allow usage of deprecated error prone window functions (neighbor, runningAccumulate, runningDifferenceStartingWithFirstValue, runningDifference)", 0) \ + M(Bool, uniform_snowflake_conversion_functions, true, "Enables functions snowflakeIDToDateTime[64] and dateTime[64]ToSnowflakeID while disabling functions snowflakeToDateTime[64] and dateTime[64]ToSnowflake.", 0) \ // End of COMMON_SETTINGS // Please add settings related to formats into the FORMAT_FACTORY_SETTINGS, move obsolete settings to OBSOLETE_SETTINGS and obsolete format settings to OBSOLETE_FORMAT_SETTINGS. @@ -1149,6 +1151,8 @@ class IColumn; M(UInt64, output_format_pretty_max_value_width_apply_for_single_value, false, "Only cut values (see the `output_format_pretty_max_value_width` setting) when it is not a single value in a block. Otherwise output it entirely, which is useful for the `SHOW CREATE TABLE` query.", 0) \ M(UInt64Auto, output_format_pretty_color, "auto", "Use ANSI escape sequences in Pretty formats. 0 - disabled, 1 - enabled, 'auto' - enabled if a terminal.", 0) \ M(String, output_format_pretty_grid_charset, "UTF-8", "Charset for printing grid borders. Available charsets: ASCII, UTF-8 (default one).", 0) \ + M(UInt64, output_format_pretty_display_footer_column_names, true, "Display column names in the footer if there are 999 or more rows.", 0) \ + M(UInt64, output_format_pretty_display_footer_column_names_min_rows, 50, "Sets the minimum threshold value of rows for which to enable displaying column names in the footer. 50 (default)", 0) \ M(UInt64, output_format_parquet_row_group_size, 1000000, "Target row group size in rows.", 0) \ M(UInt64, output_format_parquet_row_group_size_bytes, 512 * 1024 * 1024, "Target row group size in bytes, before compression.", 0) \ M(Bool, output_format_parquet_string_as_string, true, "Use Parquet String type instead of Binary for String columns.", 0) \ diff --git a/src/Core/SettingsChangesHistory.h b/src/Core/SettingsChangesHistory.h index e9da55e66c5..e48503bb705 100644 --- a/src/Core/SettingsChangesHistory.h +++ b/src/Core/SettingsChangesHistory.h @@ -75,6 +75,7 @@ namespace SettingsChangesHistory using SettingsChanges = std::vector; } +// clang-format off /// History of settings changes that controls some backward incompatible changes /// across all ClickHouse versions. It maps ClickHouse version to settings changes that were done /// in this version. This history contains both changes to existing settings and newly added settings. @@ -106,6 +107,7 @@ static const std::mapformat_settings_, + std::optional format_settings_, BlockMissingValues * block_missing_values_) : istr(istr_) , header(header_) diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index db5c5a37125..90c52d683c2 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -280,6 +281,8 @@ struct ContextSharedPart : boost::noncopyable String default_profile_name; /// Default profile name used for default values. String system_profile_name; /// Profile used by system processes String buffer_profile_name; /// Profile used by Buffer engine for flushing to the underlying + String merge_workload TSA_GUARDED_BY(mutex); /// Workload setting value that is used by all merges + String mutation_workload TSA_GUARDED_BY(mutex); /// Workload setting value that is used by all mutations std::unique_ptr access_control TSA_GUARDED_BY(mutex); mutable OnceFlag resource_manager_initialized; mutable ResourceManagerPtr resource_manager; @@ -610,6 +613,8 @@ struct ContextSharedPart : boost::noncopyable LOG_TRACE(log, "Shutting down database catalog"); DatabaseCatalog::shutdown(); + NamedCollectionFactory::instance().shutdown(); + delete_async_insert_queue.reset(); SHUTDOWN(log, "merges executor", merge_mutate_executor, wait()); @@ -830,6 +835,7 @@ ContextMutablePtr Context::createGlobal(ContextSharedPart * shared_part) auto res = std::shared_ptr(new Context); res->shared = shared_part; res->query_access_info = std::make_shared(); + res->query_privileges_info = std::make_shared(); return res; } @@ -1422,7 +1428,7 @@ void Context::checkAccess(const AccessFlags & flags, const StorageID & table_id, void Context::checkAccess(const AccessRightsElement & element) const { checkAccessImpl(element); } void Context::checkAccess(const AccessRightsElements & elements) const { checkAccessImpl(elements); } -std::shared_ptr Context::getAccess() const +std::shared_ptr Context::getAccess() const { /// A helper function to collect parameters for calculating access rights, called with Context::getLocalSharedLock() acquired. auto get_params = [this]() @@ -1439,14 +1445,14 @@ std::shared_ptr Context::getAccess() const { SharedLockGuard lock(mutex); if (access && !need_recalculate_access) - return access; /// No need to recalculate access rights. + return std::make_shared(access, shared_from_this()); /// No need to recalculate access rights. params.emplace(get_params()); if (access && (access->getParams() == *params)) { need_recalculate_access = false; - return access; /// No need to recalculate access rights. + return std::make_shared(access, shared_from_this()); /// No need to recalculate access rights. } } @@ -1466,7 +1472,7 @@ std::shared_ptr Context::getAccess() const } } - return res; + return std::make_shared(res, shared_from_this()); } RowPolicyFilterPtr Context::getRowPolicyFilter(const String & database, const String & table_name, RowPolicyFilterType filter_type) const @@ -1558,11 +1564,36 @@ ResourceManagerPtr Context::getResourceManager() const ClassifierPtr Context::getWorkloadClassifier() const { std::lock_guard lock(mutex); + // NOTE: Workload cannot be changed after query start, and getWorkloadClassifier() should not be called before proper `workload` is set if (!classifier) classifier = getResourceManager()->acquire(getSettingsRef().workload); return classifier; } +String Context::getMergeWorkload() const +{ + SharedLockGuard lock(shared->mutex); + return shared->merge_workload; +} + +void Context::setMergeWorkload(const String & value) +{ + std::lock_guard lock(shared->mutex); + shared->merge_workload = value; +} + +String Context::getMutationWorkload() const +{ + SharedLockGuard lock(shared->mutex); + return shared->mutation_workload; +} + +void Context::setMutationWorkload(const String & value) +{ + std::lock_guard lock(shared->mutex); + shared->mutation_workload = value; +} + Scalars Context::getScalars() const { @@ -1827,6 +1858,15 @@ void Context::addQueryFactoriesInfo(QueryLogFactories factory_type, const String } } +void Context::addQueryPrivilegesInfo(const String & privilege, bool granted) const +{ + std::lock_guard lock(query_privileges_info->mutex); + if (granted) + query_privileges_info->used_privileges.emplace(privilege); + else + query_privileges_info->missing_privileges.emplace(privilege); +} + static bool findIdentifier(const ASTFunction * function) { if (!function || !function->arguments) @@ -2508,6 +2548,21 @@ void Context::makeQueryContext() local_read_query_throttler.reset(); local_write_query_throttler.reset(); backups_query_throttler.reset(); + query_privileges_info = std::make_shared(*query_privileges_info); +} + +void Context::makeQueryContextForMerge(const MergeTreeSettings & merge_tree_settings) +{ + makeQueryContext(); + classifier.reset(); // It is assumed that there are no active queries running using this classifier, otherwise this will lead to crashes + settings.workload = merge_tree_settings.merge_workload.value.empty() ? getMergeWorkload() : merge_tree_settings.merge_workload; +} + +void Context::makeQueryContextForMutate(const MergeTreeSettings & merge_tree_settings) +{ + makeQueryContext(); + classifier.reset(); // It is assumed that there are no active queries running using this classifier, otherwise this will lead to crashes + settings.workload = merge_tree_settings.mutation_workload.value.empty() ? getMutationWorkload() : merge_tree_settings.mutation_workload; } void Context::makeSessionContext() @@ -3943,7 +3998,6 @@ std::shared_ptr Context::getQueryThreadLog() const std::shared_ptr Context::getQueryViewsLog() const { SharedLockGuard lock(shared->mutex); - if (!shared->system_logs) return {}; diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp index c8f86b1c2c9..d340fbe7e77 100644 --- a/src/Server/TCPHandler.cpp +++ b/src/Server/TCPHandler.cpp @@ -1,9 +1,8 @@ -#include "Interpreters/AsynchronousInsertQueue.h" -#include "Interpreters/SquashingTransform.h" -#include "Parsers/ASTInsertQuery.h" +#include +#include +#include #include #include -#include #include #include #include @@ -246,7 +245,6 @@ TCPHandler::~TCPHandler() void TCPHandler::runImpl() { setThreadName("TCPHandler"); - ThreadStatus thread_status; extractConnectionSettingsFromContext(server.context()); @@ -886,13 +884,16 @@ AsynchronousInsertQueue::PushResult TCPHandler::processAsyncInsertQuery(Asynchro using PushResult = AsynchronousInsertQueue::PushResult; startInsertQuery(); - SquashingTransform squashing(0, query_context->getSettingsRef().async_insert_max_data_size); + Squashing squashing(state.input_header, 0, query_context->getSettingsRef().async_insert_max_data_size); while (readDataNext()) { - auto result = squashing.add(std::move(state.block_for_insert)); - if (result) + squashing.header = state.block_for_insert; + auto planned_chunk = squashing.add({state.block_for_insert.getColumns(), state.block_for_insert.rows()}); + if (planned_chunk.hasChunkInfo()) { + Chunk result_chunk = DB::Squashing::squash(std::move(planned_chunk)); + auto result = state.block_for_insert.cloneWithColumns(result_chunk.getColumns()); return PushResult { .status = PushResult::TOO_MUCH_DATA, @@ -901,7 +902,12 @@ AsynchronousInsertQueue::PushResult TCPHandler::processAsyncInsertQuery(Asynchro } } - auto result = squashing.add({}); + auto planned_chunk = squashing.flush(); + Chunk result_chunk; + if (planned_chunk.hasChunkInfo()) + result_chunk = DB::Squashing::squash(std::move(planned_chunk)); + + auto result = squashing.header.cloneWithColumns(result_chunk.getColumns()); return insert_queue.pushQueryWithBlock(state.parsed_query, std::move(result), query_context); } From ae072260bcc016a2a38f28aaaced8e690f85ed36 Mon Sep 17 00:00:00 2001 From: avogar Date: Fri, 21 Jun 2024 19:22:10 +0000 Subject: [PATCH 056/299] Fix style --- src/Common/FieldBinaryEncoding.cpp | 3 ++- src/DataTypes/DataTypesBinaryEncoding.cpp | 6 +++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/src/Common/FieldBinaryEncoding.cpp b/src/Common/FieldBinaryEncoding.cpp index 7e0d815368c..6c1a8496fe6 100644 --- a/src/Common/FieldBinaryEncoding.cpp +++ b/src/Common/FieldBinaryEncoding.cpp @@ -9,9 +9,10 @@ namespace DB namespace ErrorCodes { extern const int UNSUPPORTED_METHOD; + extern const int INCORRECT_DATA; } -namespace +namespace { enum class FieldBinaryTypeIndex: uint8_t diff --git a/src/DataTypes/DataTypesBinaryEncoding.cpp b/src/DataTypes/DataTypesBinaryEncoding.cpp index 4c42f650798..ea62c7b1286 100644 --- a/src/DataTypes/DataTypesBinaryEncoding.cpp +++ b/src/DataTypes/DataTypesBinaryEncoding.cpp @@ -39,7 +39,7 @@ namespace DB namespace ErrorCodes { extern const int UNSUPPORTED_METHOD; - extern const int UNKNOWN_TYPE; + extern const int INCORRECT_DATA; } namespace @@ -275,7 +275,7 @@ void encodeAggregateFunction(const String & function_name, const Array & paramet encodeDataType(argument_type, buf); } -std::tuple decodeAggregateFunction(ReadBuffer & buf) +std::tuple decodeAggregateFunction(ReadBuffer & buf) { String function_name; readStringBinary(function_name, buf); @@ -665,7 +665,7 @@ DataTypePtr decodeDataType(ReadBuffer & buf) } } - throw Exception(ErrorCodes::UNKNOWN_TYPE, "Unknown type code: {0:#04x}", UInt64(type)); + throw Exception(ErrorCodes::INCORRECT_DATA, "Unknown type code: {0:#04x}", UInt64(type)); } DataTypePtr decodeDataType(const String & data) From b2c22b07c5b191527e6384d17a662cb35d23e58b Mon Sep 17 00:00:00 2001 From: avogar Date: Fri, 21 Jun 2024 19:27:48 +0000 Subject: [PATCH 057/299] Update docs --- docs/en/sql-reference/data-types/data-types-binary-encoding.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/sql-reference/data-types/data-types-binary-encoding.md b/docs/en/sql-reference/data-types/data-types-binary-encoding.md index ebcb480ea0a..58bc0ed8428 100644 --- a/docs/en/sql-reference/data-types/data-types-binary-encoding.md +++ b/docs/en/sql-reference/data-types/data-types-binary-encoding.md @@ -1,5 +1,5 @@ --- -slug: /en/sql-reference/data-types/dynamic +slug: /en/sql-reference/data-types/data-types-binary-encoding sidebar_position: 56 sidebar_label: Data types binary encoding specification. --- From a0c8106a36515dd0073d7d738720b00b8ac910da Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Fri, 21 Jun 2024 22:15:26 +0200 Subject: [PATCH 058/299] Fix settings names --- src/Core/SettingsChangesHistory.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Core/SettingsChangesHistory.h b/src/Core/SettingsChangesHistory.h index e48503bb705..a18f442140e 100644 --- a/src/Core/SettingsChangesHistory.h +++ b/src/Core/SettingsChangesHistory.h @@ -87,9 +87,9 @@ namespace SettingsChangesHistory static const std::map settings_changes_history = { {"24.7", {{"output_format_binary_encode_types_in_binary_format", false, false, "Added new setting to allow to write type names in binary format in RowBinaryWithNamesAndTypes output format"}, - {"input_format_binary_encode_types_in_binary_format", false, false, "Added new setting to allow to read type names in binary format in RowBinaryWithNamesAndTypes input format"}, + {"input_format_binary_decode_types_in_binary_format", false, false, "Added new setting to allow to read type names in binary format in RowBinaryWithNamesAndTypes input format"}, {"output_format_native_encode_types_in_binary_format", false, false, "Added new setting to allow to write type names in binary format in Native output format"}, - {"input_format_binary_encode_types_in_binary_format", false, false, "Added new setting to allow to read type names in binary format in Native output format"} + {"input_format_binary_decode_types_in_binary_format", false, false, "Added new setting to allow to read type names in binary format in Native output format"} }}, {"24.6", {{"materialize_skip_indexes_on_insert", true, true, "Added new setting to allow to disable materialization of skip indexes on insert"}, {"materialize_statistics_on_insert", true, true, "Added new setting to allow to disable materialization of statistics on insert"}, From 5044a89f3f08e68ea2c8589b75b9c93cd2a71be2 Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Fri, 21 Jun 2024 22:48:47 +0200 Subject: [PATCH 059/299] Update SettingsChangesHistory.h --- src/Core/SettingsChangesHistory.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Core/SettingsChangesHistory.h b/src/Core/SettingsChangesHistory.h index a18f442140e..4a19baf2492 100644 --- a/src/Core/SettingsChangesHistory.h +++ b/src/Core/SettingsChangesHistory.h @@ -89,7 +89,7 @@ static const std::map Date: Mon, 24 Jun 2024 13:25:25 +0000 Subject: [PATCH 060/299] Add separate encoding for datetimes with timezones, fix encoding for Dynamic type --- .../data-types/data-types-binary-encoding.md | 60 +++--- src/DataTypes/DataTypeDynamic.cpp | 2 +- src/DataTypes/DataTypesBinaryEncoding.cpp | 124 ++++++++---- src/DataTypes/DataTypesBinaryEncoding.h | 182 +++++++++--------- .../gtest_data_types_binary_encoding.cpp | 9 +- ...172_dynamic_binary_serialization.reference | 6 +- .../03172_dynamic_binary_serialization.sh | 6 +- ...native_with_binary_encoded_types.reference | 12 ++ ...ry_and_native_with_binary_encoded_types.sh | 6 + 9 files changed, 242 insertions(+), 165 deletions(-) diff --git a/docs/en/sql-reference/data-types/data-types-binary-encoding.md b/docs/en/sql-reference/data-types/data-types-binary-encoding.md index 58bc0ed8428..ba1a4fa44c5 100644 --- a/docs/en/sql-reference/data-types/data-types-binary-encoding.md +++ b/docs/en/sql-reference/data-types/data-types-binary-encoding.md @@ -27,35 +27,37 @@ sidebar_label: Data types binary encoding specification. | `Date` | `0x0F` | | `Date32` | `0x10` | | `DateTime` | `0x11` | -| `DateTime64(P)` | `0x12` | -| `String` | `0x13` | -| `FixedString(N)` | `0x14` | -| `Enum8` | `0x15...` | -| `Enum16` | `0x16...>` | -| `Decimal32(P, S)` | `0x17` | -| `Decimal64(P, S)` | `0x18` | -| `Decimal128(P, S)` | `0x19` | -| `Decimal256(P, S)` | `0x1A` | -| `UUID` | `0x1B` | -| `Array(T)` | `0x1C` | -| `Tuple(T1, ..., TN)` | `0x1D...` | -| `Tuple(name1 T1, ..., nameN TN)` | `0x1E...` | -| `Set` | `0x1F` | -| `Interval` | `0x20` (see [interval kind binary encoding](#interval-kind-binary-encoding)) | -| `Nullable(T)` | `0x21` | -| `Function` | `0x22...` | -| `AggregateFunction(function_name(param_1, ..., param_N), arg_T1, ..., arg_TN)` | `0x23......` (see [aggregate function parameter binary encoding](#aggregate-function-parameter-binary-encoding)) | -| `LowCardinality(T)` | `0x24` | -| `Map(K, V)` | `0x25` | -| `Object('schema_format')` | `0x26` | -| `IPv4` | `0x27` | -| `IPv6` | `0x28` | -| `Variant(T1, ..., TN)` | `0x29...` | -| `Dynamic` | `0x2A` | -| `Custom type` (`Ring`, `Polygon`, etc) | `0x2B` | -| `Bool` | `0x2C` | -| `SimpleAggregateFunction(function_name(param_1, ..., param_N), arg_T1, ..., arg_TN)` | `0x2D......` (see [aggregate function parameter binary encoding](#aggregate-function-parameter-binary-encoding)) | -| `Nested(name1 T1, ..., nameN TN)` | `0x2E...` | +| `DateTime(time_zone)` | `0x12` | +| `DateTime64(P)` | `0x13` | +| `DateTime64(P, time_zone)` | `0x14` | +| `String` | `0x15` | +| `FixedString(N)` | `0x16` | +| `Enum8` | `0x17...` | +| `Enum16` | `0x18...>` | +| `Decimal32(P, S)` | `0x19` | +| `Decimal64(P, S)` | `0x1A` | +| `Decimal128(P, S)` | `0x1B` | +| `Decimal256(P, S)` | `0x1C` | +| `UUID` | `0x1D` | +| `Array(T)` | `0x1E` | +| `Tuple(T1, ..., TN)` | `0x1F...` | +| `Tuple(name1 T1, ..., nameN TN)` | `0x20...` | +| `Set` | `0x21` | +| `Interval` | `0x22` (see [interval kind binary encoding](#interval-kind-binary-encoding)) | +| `Nullable(T)` | `0x23` | +| `Function` | `0x24...` | +| `AggregateFunction(function_name(param_1, ..., param_N), arg_T1, ..., arg_TN)` | `0x25......` (see [aggregate function parameter binary encoding](#aggregate-function-parameter-binary-encoding)) | +| `LowCardinality(T)` | `0x26` | +| `Map(K, V)` | `0x27` | +| `Object('schema_format')` | `0x28` | +| `IPv4` | `0x29` | +| `IPv6` | `0x2A` | +| `Variant(T1, ..., TN)` | `0x2B...` | +| `Dynamic(max_types=N)` | `0x2C` | +| `Custom type` (`Ring`, `Polygon`, etc) | `0x2D` | +| `Bool` | `0x2E` | +| `SimpleAggregateFunction(function_name(param_1, ..., param_N), arg_T1, ..., arg_TN)` | `0x2F......` (see [aggregate function parameter binary encoding](#aggregate-function-parameter-binary-encoding)) | +| `Nested(name1 T1, ..., nameN TN)` | `0x30...` | ### Interval kind binary encoding diff --git a/src/DataTypes/DataTypeDynamic.cpp b/src/DataTypes/DataTypeDynamic.cpp index c920e69c13b..ba263c2f7fa 100644 --- a/src/DataTypes/DataTypeDynamic.cpp +++ b/src/DataTypes/DataTypeDynamic.cpp @@ -69,7 +69,7 @@ static DataTypePtr create(const ASTPtr & arguments) auto * literal = argument->arguments->children[1]->as(); - if (!literal || literal->value.getType() != Field::Types::UInt64 || literal->value.get() == 0 || literal->value.get() > 255) + if (!literal || literal->value.getType() != Field::Types::UInt64 || literal->value.get() == 0 || literal->value.get() > ColumnVariant::MAX_NESTED_COLUMNS) throw Exception(ErrorCodes::UNEXPECTED_AST_STRUCTURE, "'max_types' argument for Dynamic type should be a positive integer between 1 and 255"); return std::make_shared(literal->value.get()); diff --git a/src/DataTypes/DataTypesBinaryEncoding.cpp b/src/DataTypes/DataTypesBinaryEncoding.cpp index ea62c7b1286..02a58897529 100644 --- a/src/DataTypes/DataTypesBinaryEncoding.cpp +++ b/src/DataTypes/DataTypesBinaryEncoding.cpp @@ -64,36 +64,38 @@ enum class BinaryTypeIndex : uint8_t Float64 = 0x0E, Date = 0x0F, Date32 = 0x10, - DateTime = 0x11, - DateTime64 = 0x12, - String = 0x13, - FixedString = 0x14, - Enum8 = 0x15, - Enum16 = 0x16, - Decimal32 = 0x17, - Decimal64 = 0x18, - Decimal128 = 0x19, - Decimal256 = 0x1A, - UUID = 0x1B, - Array = 0x1C, - UnnamedTuple = 0x1D, - NamedTuple = 0x1E, - Set = 0x1F, - Interval = 0x20, - Nullable = 0x21, - Function = 0x22, - AggregateFunction = 0x23, - LowCardinality = 0x24, - Map = 0x25, - Object = 0x26, - IPv4 = 0x27, - IPv6 = 0x28, - Variant = 0x29, - Dynamic = 0x2A, - Custom = 0x2B, - Bool = 0x2C, - SimpleAggregateFunction = 0x2D, - Nested = 0x2E, + DateTimeUTC = 0x11, + DateTimeWithTimezone = 0x12, + DateTime64UTC = 0x13, + DateTime64WithTimezone = 0x14, + String = 0x15, + FixedString = 0x16, + Enum8 = 0x17, + Enum16 = 0x18, + Decimal32 = 0x19, + Decimal64 = 0x1A, + Decimal128 = 0x1B, + Decimal256 = 0x1C, + UUID = 0x1D, + Array = 0x1E, + UnnamedTuple = 0x1F, + NamedTuple = 0x20, + Set = 0x21, + Interval = 0x22, + Nullable = 0x23, + Function = 0x24, + AggregateFunction = 0x25, + LowCardinality = 0x26, + Map = 0x27, + Object = 0x28, + IPv4 = 0x29, + IPv6 = 0x2A, + Variant = 0x2B, + Dynamic = 0x2C, + Custom = 0x2D, + Bool = 0x2E, + SimpleAggregateFunction = 0x2F, + Nested = 0x30, }; BinaryTypeIndex getBinaryTypeIndex(const DataTypePtr & type) @@ -154,9 +156,13 @@ BinaryTypeIndex getBinaryTypeIndex(const DataTypePtr & type) case TypeIndex::Date32: return BinaryTypeIndex::Date32; case TypeIndex::DateTime: - return BinaryTypeIndex::DateTime; + if (assert_cast(*type).hasExplicitTimeZone()) + return BinaryTypeIndex::DateTimeWithTimezone; + return BinaryTypeIndex::DateTimeUTC; case TypeIndex::DateTime64: - return BinaryTypeIndex::DateTime64; + if (assert_cast(*type).hasExplicitTimeZone()) + return BinaryTypeIndex::DateTime64WithTimezone; + return BinaryTypeIndex::DateTime64UTC; case TypeIndex::String: return BinaryTypeIndex::String; case TypeIndex::FixedString: @@ -307,13 +313,26 @@ void encodeDataType(const DataTypePtr & type, WriteBuffer & buf) /// Then, write additional information depending on the data type. switch (binary_type_index) { - case BinaryTypeIndex::DateTime64: + case BinaryTypeIndex::DateTimeWithTimezone: + { + const auto & datetime_type = assert_cast(*type); + writeStringBinary(datetime_type.getTimeZone().getTimeZone(), buf); + break; + } + case BinaryTypeIndex::DateTime64UTC: { const auto & datetime64_type = assert_cast(*type); /// Maximum scale for DateTime64 is 9, so we can write it as 1 byte. buf.write(UInt8(datetime64_type.getScale())); break; } + case BinaryTypeIndex::DateTime64WithTimezone: + { + const auto & datetime64_type = assert_cast(*type); + buf.write(UInt8(datetime64_type.getScale())); + writeStringBinary(datetime64_type.getTimeZone().getTimeZone(), buf); + break; + } case BinaryTypeIndex::FixedString: { const auto & fixed_string_type = assert_cast(*type); @@ -372,10 +391,10 @@ void encodeDataType(const DataTypePtr & type, WriteBuffer & buf) case BinaryTypeIndex::UnnamedTuple: { const auto & tuple_type = assert_cast(*type); - const auto & types = tuple_type.getElements(); - writeVarUInt(types.size(), buf); - for (size_t i = 0; i != types.size(); ++i) - encodeDataType(types[i], buf); + const auto & element_types = tuple_type.getElements(); + writeVarUInt(element_types.size(), buf); + for (const auto & element_type : element_types) + encodeDataType(element_type, buf); break; } case BinaryTypeIndex::Interval: @@ -430,6 +449,13 @@ void encodeDataType(const DataTypePtr & type, WriteBuffer & buf) encodeDataType(variant, buf); break; } + case BinaryTypeIndex::Dynamic: + { + const auto & dynamic_type = assert_cast(*type); + /// Maximum number of dynamic types is 255, we can write it as 1 byte. + writeBinary(UInt8(dynamic_type.getMaxDynamicTypes()), buf); + break; + } case BinaryTypeIndex::AggregateFunction: { const auto & aggregate_function_type = assert_cast(*type); @@ -516,14 +542,28 @@ DataTypePtr decodeDataType(ReadBuffer & buf) return std::make_shared(); case BinaryTypeIndex::Date32: return std::make_shared(); - case BinaryTypeIndex::DateTime: + case BinaryTypeIndex::DateTimeUTC: return std::make_shared(); - case BinaryTypeIndex::DateTime64: + case BinaryTypeIndex::DateTimeWithTimezone: + { + String time_zone; + readStringBinary(time_zone, buf); + return std::make_shared(time_zone); + } + case BinaryTypeIndex::DateTime64UTC: { UInt8 scale; readBinary(scale, buf); return std::make_shared(scale); } + case BinaryTypeIndex::DateTime64WithTimezone: + { + UInt8 scale; + readBinary(scale, buf); + String time_zone; + readStringBinary(time_zone, buf); + return std::make_shared(scale, time_zone); + } case BinaryTypeIndex::String: return std::make_shared(); case BinaryTypeIndex::FixedString: @@ -627,7 +667,11 @@ DataTypePtr decodeDataType(ReadBuffer & buf) return std::make_shared(variants); } case BinaryTypeIndex::Dynamic: - return std::make_shared(); + { + UInt8 max_dynamic_types; + readBinary(max_dynamic_types, buf); + return std::make_shared(max_dynamic_types); + } case BinaryTypeIndex::AggregateFunction: { size_t version; diff --git a/src/DataTypes/DataTypesBinaryEncoding.h b/src/DataTypes/DataTypesBinaryEncoding.h index b4ed500f185..d735565f636 100644 --- a/src/DataTypes/DataTypesBinaryEncoding.h +++ b/src/DataTypes/DataTypesBinaryEncoding.h @@ -8,104 +8,106 @@ namespace DB /** Binary encoding for ClickHouse data types: -|--------------------------------------------------------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| ClickHouse data type | Binary encoding | -|--------------------------------------------------------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| `Nothing` | `0x00` | -| `UInt8` | `0x01` | -| `UInt16` | `0x02` | -| `UInt32` | `0x03` | -| `UInt64` | `0x04` | -| `UInt128` | `0x05` | -| `UInt256` | `0x06` | -| `Int8` | `0x07` | -| `Int16` | `0x08` | -| `Int32` | `0x09` | -| `Int64` | `0x0A` | -| `Int128` | `0x0B` | -| `Int256` | `0x0C` | -| `Float32` | `0x0D` | -| `Float64` | `0x0E` | -| `Date` | `0x0F` | -| `Date32` | `0x10` | -| `DateTime` | `0x11` | -| `DateTime64(P)` | `0x12` | -| `String` | `0x13` | -| `FixedString(N)` | `0x14` | -| `Enum8` | `0x15...` | -| `Enum16` | `0x16...>` | -| `Decimal32(P, S)` | `0x17` | -| `Decimal64(P, S)` | `0x18` | -| `Decimal128(P, S)` | `0x19` | -| `Decimal256(P, S)` | `0x1A` | -| `UUID` | `0x1B` | -| `Array(T)` | `0x1C` | -| `Tuple(T1, ..., TN)` | `0x1D...` | -| `Tuple(name1 T1, ..., nameN TN)` | `0x1E...` | -| `Set` | `0x1F` | -| `Interval` | `0x20` | -| `Nullable(T)` | `0x21` | -| `Function` | `0x22...` | -| `AggregateFunction(function_name(param_1, ..., param_N), arg_T1, ..., arg_TN)` | `0x23......` | -| `LowCardinality(T)` | `0x24` | -| `Map(K, V)` | `0x25` | -| `Object('schema_format')` | `0x26` | -| `IPv4` | `0x27` | -| `IPv6` | `0x28` | -| `Variant(T1, ..., TN)` | `0x29...` | -| `Dynamic` | `0x2A` | -| `Custom type` (`Ring`, `Polygon`, etc) | `0x2B` | -| `Bool` | `0x2C` | -| `SimpleAggregateFunction(function_name(param_1, ..., param_N), arg_T1, ..., arg_TN)` | `0x2D......` | -| `Nested(name1 T1, ..., nameN TN)` | `0x2E...` | -|--------------------------------------------------------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +|------------------------------------------------------------------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| ClickHouse data type | Binary encoding | +|------------------------------------------------------------------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Nothing | 0x00 | +| UInt8 | 0x01 | +| UInt16 | 0x02 | +| UInt32 | 0x03 | +| UInt64 | 0x04 | +| UInt128 | 0x05 | +| UInt256 | 0x06 | +| Int8 | 0x07 | +| Int16 | 0x08 | +| Int32 | 0x09 | +| Int64 | 0x0A | +| Int128 | 0x0B | +| Int256 | 0x0C | +| Float32 | 0x0D | +| Float64 | 0x0E | +| Date | 0x0F | +| Date32 | 0x10 | +| DateTime | 0x11 | +| DateTime(time_zone) | 0x12 | +| DateTime64(P) | 0x13 | +| DateTime64(P, time_zone) | 0x14 | +| String | 0x15 | +| FixedString(N) | 0x16 | +| Enum8 | 0x17... | +| Enum16 | 0x18...> | +| Decimal32(P, S) | 0x19 | +| Decimal64(P, S) | 0x1A | +| Decimal128(P, S) | 0x1B | +| Decimal256(P, S) | 0x1C | +| UUID | 0x1D | +| Array(T) | 0x1E | +| Tuple(T1, ..., TN) | 0x1F... | +| Tuple(name1 T1, ..., nameN TN) | 0x20... | +| Set | 0x21 | +| Interval | 0x22 | +| Nullable(T) | 0x23 | +| Function | 0x24... | +| AggregateFunction(function_name(param_1, ..., param_N), arg_T1, ..., arg_TN) | 0x25...... | +| LowCardinality(T) | 0x26 | +| Map(K, V) | 0x27 | +| Object('schema_format') | 0x28 | +| IPv4 | 0x29 | +| IPv6 | 0x2A | +| Variant(T1, ..., TN) | 0x2B... | +| Dynamic(max_types=N) | 0x2C | +| Custom type (Ring, Polygon, etc) | 0x2D | +| Bool | 0x2E | +| SimpleAggregateFunction(function_name(param_1, ..., param_N), arg_T1, ..., arg_TN) | 0x2F...... | +| Nested(name1 T1, ..., nameN TN) | 0x30... | +|------------------------------------------------------------------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| Interval kind binary encoding: |---------------|-----------------| | Interval kind | Binary encoding | |---------------|-----------------| -| `Nanosecond` | `0x00` | -| `Microsecond` | `0x01` | -| `Millisecond` | `0x02` | -| `Second` | `0x03` | -| `Minute` | `0x04` | -| `Hour` | `0x05` | -| `Day` | `0x06` | -| `Week` | `0x07` | -| `Month` | `0x08` | -| `Quarter` | `0x09` | -| `Year` | `0x1A` | +| Nanosecond | 0x00 | +| Microsecond | 0x01 | +| Millisecond | 0x02 | +| Second | 0x03 | +| Minute | 0x04 | +| Hour | 0x05 | +| Day | 0x06 | +| Week | 0x07 | +| Month | 0x08 | +| Quarter | 0x09 | +| Year | 0x1A | |---------------|-----------------| Aggregate function parameter binary encoding (binary encoding of a Field, see src/Common/FieldBinaryEncoding.h): -|--------------------------|--------------------------------------------------------------------------------------------------------------------------------| -| Parameter type | Binary encoding | -|--------------------------|--------------------------------------------------------------------------------------------------------------------------------| -| `Null` | `0x00` | -| `UInt64` | `0x01` | -| `Int64` | `0x02` | -| `UInt128` | `0x03` | -| `Int128` | `0x04` | -| `UInt128` | `0x05` | -| `Int128` | `0x06` | -| `Float64` | `0x07` | -| `Decimal32` | `0x08` | -| `Decimal64` | `0x09` | -| `Decimal128` | `0x0A` | -| `Decimal256` | `0x0B` | -| `String` | `0x0C` | -| `Array` | `0x0D...` | -| `Tuple` | `0x0E...` | -| `Map` | `0x0F...` | -| `IPv4` | `0x10` | -| `IPv6` | `0x11` | -| `UUID` | `0x12` | -| `Bool` | `0x13` | -| `Object` | `0x14...` | -| `AggregateFunctionState` | `0x15` | -| `Negative infinity` | `0xFE` | -| `Positive infinity` | `0xFF` | -|--------------------------|--------------------------------------------------------------------------------------------------------------------------------| +|------------------------|------------------------------------------------------------------------------------------------------------------------------| +| Parameter type | Binary encoding | +|------------------------|------------------------------------------------------------------------------------------------------------------------------| +| Null | 0x00 | +| UInt64 | 0x01 | +| Int64 | 0x02 | +| UInt128 | 0x03 | +| Int128 | 0x04 | +| UInt128 | 0x05 | +| Int128 | 0x06 | +| Float64 | 0x07 | +| Decimal32 | 0x08 | +| Decimal64 | 0x09 | +| Decimal128 | 0x0A | +| Decimal256 | 0x0B | +| String | 0x0C | +| Array | 0x0D... | +| Tuple | 0x0E... | +| Map | 0x0F... | +| IPv4 | 0x10 | +| IPv6 | 0x11 | +| UUID | 0x12 | +| Bool | 0x13 | +| Object | 0x14... | +| AggregateFunctionState | 0x15 | +| Negative infinity | 0xFE | +| Positive infinity | 0xFF | +|------------------------|------------------------------------------------------------------------------------------------------------------------------| */ String encodeDataType(const DataTypePtr & type); diff --git a/src/DataTypes/tests/gtest_data_types_binary_encoding.cpp b/src/DataTypes/tests/gtest_data_types_binary_encoding.cpp index 4459e2558b6..6dbb9451922 100644 --- a/src/DataTypes/tests/gtest_data_types_binary_encoding.cpp +++ b/src/DataTypes/tests/gtest_data_types_binary_encoding.cpp @@ -21,6 +21,7 @@ #include #include #include +#include using namespace DB; @@ -44,7 +45,7 @@ void check(const DataTypePtr & type) GTEST_TEST(DataTypesBinaryEncoding, EncodeAndDecode) { - registerAggregateFunctions(); + tryRegisterAggregateFunctions(); check(std::make_shared()); check(std::make_shared()); check(std::make_shared()); @@ -63,7 +64,11 @@ GTEST_TEST(DataTypesBinaryEncoding, EncodeAndDecode) check(std::make_shared()); check(std::make_shared()); check(std::make_shared()); + check(std::make_shared("EST")); + check(std::make_shared("CET")); check(std::make_shared(3)); + check(std::make_shared(3, "EST")); + check(std::make_shared(3, "CET")); check(std::make_shared()); check(std::make_shared(10)); check(DataTypeFactory::instance().get("Enum8('a' = 1, 'b' = 2, 'c' = 3, 'd' = -128)")); @@ -109,6 +114,8 @@ GTEST_TEST(DataTypesBinaryEncoding, EncodeAndDecode) check(std::make_shared()); check(DataTypeFactory::instance().get("Variant(String, UInt32, Date32)")); check(std::make_shared()); + check(std::make_shared(10)); + check(std::make_shared(255)); check(DataTypeFactory::instance().get("Bool")); check(DataTypeFactory::instance().get("SimpleAggregateFunction(sum, UInt64)")); check(DataTypeFactory::instance().get("SimpleAggregateFunction(maxMap, Tuple(Array(UInt32), Array(UInt32)))")); diff --git a/tests/queries/0_stateless/03172_dynamic_binary_serialization.reference b/tests/queries/0_stateless/03172_dynamic_binary_serialization.reference index 26bd3326d5c..1670a7e9634 100644 --- a/tests/queries/0_stateless/03172_dynamic_binary_serialization.reference +++ b/tests/queries/0_stateless/03172_dynamic_binary_serialization.reference @@ -15,8 +15,10 @@ 42.42 Float64 2020-01-01 Date 2020-01-01 Date32 -2020-01-01 00:00:00 DateTime -2020-01-01 00:00:00.000000 DateTime64(6) +2020-01-01 00:00:00 DateTime(\'CET\') +2020-01-01 00:00:00 DateTime(\'EST\') +2020-01-01 00:00:00.000000 DateTime64(6, \'EST\') +2020-01-01 00:00:00.000000 DateTime64(6, \'CET\') Hello, World! String aaaaa FixedString(5) a Enum8(\'c\' = -128, \'a\' = 1, \'b\' = 2) diff --git a/tests/queries/0_stateless/03172_dynamic_binary_serialization.sh b/tests/queries/0_stateless/03172_dynamic_binary_serialization.sh index b078211f088..9e6d78adba5 100755 --- a/tests/queries/0_stateless/03172_dynamic_binary_serialization.sh +++ b/tests/queries/0_stateless/03172_dynamic_binary_serialization.sh @@ -24,8 +24,10 @@ $CLICKHOUSE_CLIENT -q "insert into test select 13, materialize(42.42)::Float32" $CLICKHOUSE_CLIENT -q "insert into test select 14, materialize(42.42)::Float64" $CLICKHOUSE_CLIENT -q "insert into test select 15, materialize('2020-01-01')::Date" $CLICKHOUSE_CLIENT -q "insert into test select 16, materialize('2020-01-01')::Date32" -$CLICKHOUSE_CLIENT -q "insert into test select 17, materialize('2020-01-01 00:00:00')::DateTime" -$CLICKHOUSE_CLIENT -q "insert into test select 18, materialize('2020-01-01 00:00:00.000000')::DateTime64(6)" +$CLICKHOUSE_CLIENT -q "insert into test select 17, materialize('2020-01-01 00:00:00')::DateTime('EST')" +$CLICKHOUSE_CLIENT -q "insert into test select 17, materialize('2020-01-01 00:00:00')::DateTime('CET')" +$CLICKHOUSE_CLIENT -q "insert into test select 18, materialize('2020-01-01 00:00:00.000000')::DateTime64(6, 'EST')" +$CLICKHOUSE_CLIENT -q "insert into test select 18, materialize('2020-01-01 00:00:00.000000')::DateTime64(6, 'CET')" $CLICKHOUSE_CLIENT -q "insert into test select 19, materialize('Hello, World!')" $CLICKHOUSE_CLIENT -q "insert into test select 20, materialize('aaaaa')::FixedString(5)" $CLICKHOUSE_CLIENT -q "insert into test select 21, materialize('a')::Enum8('a' = 1, 'b' = 2, 'c' = -128)" diff --git a/tests/queries/0_stateless/03173_row_binary_and_native_with_binary_encoded_types.reference b/tests/queries/0_stateless/03173_row_binary_and_native_with_binary_encoded_types.reference index 0bc257adf23..1ba147f9627 100644 --- a/tests/queries/0_stateless/03173_row_binary_and_native_with_binary_encoded_types.reference +++ b/tests/queries/0_stateless/03173_row_binary_and_native_with_binary_encoded_types.reference @@ -36,8 +36,16 @@ 2020-01-01 Date32 2020-01-01 00:00:00 DateTime 2020-01-01 00:00:00 DateTime +2020-01-01 00:00:00 DateTime(\'EST\') +2020-01-01 00:00:00 DateTime(\'EST\') +2020-01-01 00:00:00 DateTime(\'CET\') +2020-01-01 00:00:00 DateTime(\'CET\') 2020-01-01 00:00:00.000000 DateTime64(6) 2020-01-01 00:00:00.000000 DateTime64(6) +2020-01-01 00:00:00.000000 DateTime64(6, \'EST\') +2020-01-01 00:00:00.000000 DateTime64(6, \'EST\') +2020-01-01 00:00:00.000000 DateTime64(6, \'CET\') +2020-01-01 00:00:00.000000 DateTime64(6, \'CET\') Hello, World! String Hello, World! String aaaaa FixedString(5) @@ -100,3 +108,7 @@ true Bool 42 Variant(String, Tuple(\n a UInt32,\n b Array(Map(String, String))), UInt32) [{42:(1,[(2,{1:2})])}] Dynamic [{42:(1,[(2,{1:2})])}] Dynamic +[{42:(1,[(2,{1:2})])}] Dynamic(max_types=10) +[{42:(1,[(2,{1:2})])}] Dynamic(max_types=10) +[{42:(1,[(2,{1:2})])}] Dynamic(max_types=255) +[{42:(1,[(2,{1:2})])}] Dynamic(max_types=255) diff --git a/tests/queries/0_stateless/03173_row_binary_and_native_with_binary_encoded_types.sh b/tests/queries/0_stateless/03173_row_binary_and_native_with_binary_encoded_types.sh index a1e8aa99548..723b11ad620 100755 --- a/tests/queries/0_stateless/03173_row_binary_and_native_with_binary_encoded_types.sh +++ b/tests/queries/0_stateless/03173_row_binary_and_native_with_binary_encoded_types.sh @@ -29,7 +29,11 @@ test "materialize(42.42)::Float64" test "materialize('2020-01-01')::Date" test "materialize('2020-01-01')::Date32" test "materialize('2020-01-01 00:00:00')::DateTime" +test "materialize('2020-01-01 00:00:00')::DateTime('EST')" +test "materialize('2020-01-01 00:00:00')::DateTime('CET')" test "materialize('2020-01-01 00:00:00.000000')::DateTime64(6)" +test "materialize('2020-01-01 00:00:00.000000')::DateTime64(6, 'EST')" +test "materialize('2020-01-01 00:00:00.000000')::DateTime64(6, 'CET')" test "materialize('Hello, World!')" test "materialize('aaaaa')::FixedString(5)" test "materialize('a')::Enum8('a' = 1, 'b' = 2, 'c' = -128)" @@ -61,3 +65,5 @@ test "materialize([[[(0, 0), (10, 0), (10, 10), (0, 10)]], [[(20, 20), (50, 20), test "materialize([map(42, tuple(1, [tuple(2, map(1, 2))]))])" test "materialize(42::UInt32)::Variant(UInt32, String, Tuple(a UInt32, b Array(Map(String, String))))" test "materialize([map(42, tuple(1, [tuple(2, map(1, 2))]))])::Dynamic" +test "materialize([map(42, tuple(1, [tuple(2, map(1, 2))]))])::Dynamic(max_types=10)" +test "materialize([map(42, tuple(1, [tuple(2, map(1, 2))]))])::Dynamic(max_types=255)" From 1c667723a374854fb6a8249cf3f7b62458f65ba3 Mon Sep 17 00:00:00 2001 From: avogar Date: Tue, 25 Jun 2024 12:12:51 +0000 Subject: [PATCH 061/299] Fix test flakiness --- ...172_dynamic_binary_serialization.reference | 2 +- .../03172_dynamic_binary_serialization.sh | 64 +++++++++---------- 2 files changed, 33 insertions(+), 33 deletions(-) diff --git a/tests/queries/0_stateless/03172_dynamic_binary_serialization.reference b/tests/queries/0_stateless/03172_dynamic_binary_serialization.reference index 1670a7e9634..f5668ed935b 100644 --- a/tests/queries/0_stateless/03172_dynamic_binary_serialization.reference +++ b/tests/queries/0_stateless/03172_dynamic_binary_serialization.reference @@ -15,8 +15,8 @@ 42.42 Float64 2020-01-01 Date 2020-01-01 Date32 -2020-01-01 00:00:00 DateTime(\'CET\') 2020-01-01 00:00:00 DateTime(\'EST\') +2020-01-01 00:00:00 DateTime(\'CET\') 2020-01-01 00:00:00.000000 DateTime64(6, \'EST\') 2020-01-01 00:00:00.000000 DateTime64(6, \'CET\') Hello, World! String diff --git a/tests/queries/0_stateless/03172_dynamic_binary_serialization.sh b/tests/queries/0_stateless/03172_dynamic_binary_serialization.sh index 9e6d78adba5..9b57e5c8718 100755 --- a/tests/queries/0_stateless/03172_dynamic_binary_serialization.sh +++ b/tests/queries/0_stateless/03172_dynamic_binary_serialization.sh @@ -25,38 +25,38 @@ $CLICKHOUSE_CLIENT -q "insert into test select 14, materialize(42.42)::Float64" $CLICKHOUSE_CLIENT -q "insert into test select 15, materialize('2020-01-01')::Date" $CLICKHOUSE_CLIENT -q "insert into test select 16, materialize('2020-01-01')::Date32" $CLICKHOUSE_CLIENT -q "insert into test select 17, materialize('2020-01-01 00:00:00')::DateTime('EST')" -$CLICKHOUSE_CLIENT -q "insert into test select 17, materialize('2020-01-01 00:00:00')::DateTime('CET')" -$CLICKHOUSE_CLIENT -q "insert into test select 18, materialize('2020-01-01 00:00:00.000000')::DateTime64(6, 'EST')" -$CLICKHOUSE_CLIENT -q "insert into test select 18, materialize('2020-01-01 00:00:00.000000')::DateTime64(6, 'CET')" -$CLICKHOUSE_CLIENT -q "insert into test select 19, materialize('Hello, World!')" -$CLICKHOUSE_CLIENT -q "insert into test select 20, materialize('aaaaa')::FixedString(5)" -$CLICKHOUSE_CLIENT -q "insert into test select 21, materialize('a')::Enum8('a' = 1, 'b' = 2, 'c' = -128)" -$CLICKHOUSE_CLIENT -q "insert into test select 22, materialize('a')::Enum16('a' = 1, 'b' = 2, 'c' = -1280)" -$CLICKHOUSE_CLIENT -q "insert into test select 23, materialize(42.42)::Decimal32(3)" -$CLICKHOUSE_CLIENT -q "insert into test select 24, materialize(42.42)::Decimal64(3)" -$CLICKHOUSE_CLIENT -q "insert into test select 25, materialize(42.42)::Decimal128(3)" -$CLICKHOUSE_CLIENT -q "insert into test select 26, materialize(42.42)::Decimal256(3)" -$CLICKHOUSE_CLIENT -q "insert into test select 27, materialize('984ac60f-4d08-4ef1-9c62-d82f343fbc90')::UUID" -$CLICKHOUSE_CLIENT -q "insert into test select 28, materialize([1, 2, 3])::Array(UInt64)" -$CLICKHOUSE_CLIENT -q "insert into test select 29, materialize([[[1], [2]], [[3, 4, 5]]])::Array(Array(Array(UInt64)))" -$CLICKHOUSE_CLIENT -q "insert into test select 30, materialize(tuple(1, 'str', 42.42))::Tuple(UInt32, String, Float32)" -$CLICKHOUSE_CLIENT -q "insert into test select 31, materialize(tuple(1, 'str', 42.42))::Tuple(a UInt32, b String, c Float32)" -$CLICKHOUSE_CLIENT -q "insert into test select 32, materialize(tuple(1, tuple('str', tuple(42.42, -30))))::Tuple(UInt32, Tuple(String, Tuple(Float32, Int8)))" -$CLICKHOUSE_CLIENT -q "insert into test select 33, materialize(tuple(1, tuple('str', tuple(42.42, -30))))::Tuple(a UInt32, b Tuple(c String, d Tuple(e Float32, f Int8)))" -$CLICKHOUSE_CLIENT -q "insert into test select 34, quantileState(0.5)(42::UInt64)" -$CLICKHOUSE_CLIENT -q "insert into test select 35, sumSimpleState(42::UInt64)" -$CLICKHOUSE_CLIENT -q "insert into test select 36, toLowCardinality('Hello, World!')" -$CLICKHOUSE_CLIENT -q "insert into test select 37, materialize(map(1, 'str1', 2, 'str2'))::Map(UInt64, String)" -$CLICKHOUSE_CLIENT -q "insert into test select 38, materialize(map(1, map(1, map(1, 'str1')), 2, map(2, map(2, 'str2'))))::Map(UInt64, Map(UInt64, Map(UInt64, String)))" -$CLICKHOUSE_CLIENT -q "insert into test select 39, materialize('127.0.0.0')::IPv4" -$CLICKHOUSE_CLIENT -q "insert into test select 40, materialize('2001:db8:cafe:1:0:0:0:1')::IPv6" -$CLICKHOUSE_CLIENT -q "insert into test select 41, materialize(true)::Bool" -$CLICKHOUSE_CLIENT -q "insert into test select 42, materialize([tuple(1, 2), tuple(3, 4)])::Nested(a UInt32, b UInt32)" -$CLICKHOUSE_CLIENT -q "insert into test select 43, materialize([(0, 0), (10, 0), (10, 10), (0, 10)])::Ring" -$CLICKHOUSE_CLIENT -q "insert into test select 44, materialize((0, 0))::Point" -$CLICKHOUSE_CLIENT -q "insert into test select 45, materialize([[(20, 20), (50, 20), (50, 50), (20, 50)], [(30, 30), (50, 50), (50, 30)]])::Polygon" -$CLICKHOUSE_CLIENT -q "insert into test select 46, materialize([[[(0, 0), (10, 0), (10, 10), (0, 10)]], [[(20, 20), (50, 20), (50, 50), (20, 50)],[(30, 30), (50, 50), (50, 30)]]])::MultiPolygon" -$CLICKHOUSE_CLIENT -q "insert into test select 47, materialize([map(42, tuple(1, [tuple(2, map(1, 2))]))])" +$CLICKHOUSE_CLIENT -q "insert into test select 18, materialize('2020-01-01 00:00:00')::DateTime('CET')" +$CLICKHOUSE_CLIENT -q "insert into test select 19, materialize('2020-01-01 00:00:00.000000')::DateTime64(6, 'EST')" +$CLICKHOUSE_CLIENT -q "insert into test select 20, materialize('2020-01-01 00:00:00.000000')::DateTime64(6, 'CET')" +$CLICKHOUSE_CLIENT -q "insert into test select 21, materialize('Hello, World!')" +$CLICKHOUSE_CLIENT -q "insert into test select 22, materialize('aaaaa')::FixedString(5)" +$CLICKHOUSE_CLIENT -q "insert into test select 23, materialize('a')::Enum8('a' = 1, 'b' = 2, 'c' = -128)" +$CLICKHOUSE_CLIENT -q "insert into test select 24, materialize('a')::Enum16('a' = 1, 'b' = 2, 'c' = -1280)" +$CLICKHOUSE_CLIENT -q "insert into test select 25, materialize(42.42)::Decimal32(3)" +$CLICKHOUSE_CLIENT -q "insert into test select 26, materialize(42.42)::Decimal64(3)" +$CLICKHOUSE_CLIENT -q "insert into test select 27, materialize(42.42)::Decimal128(3)" +$CLICKHOUSE_CLIENT -q "insert into test select 28, materialize(42.42)::Decimal256(3)" +$CLICKHOUSE_CLIENT -q "insert into test select 29, materialize('984ac60f-4d08-4ef1-9c62-d82f343fbc90')::UUID" +$CLICKHOUSE_CLIENT -q "insert into test select 30, materialize([1, 2, 3])::Array(UInt64)" +$CLICKHOUSE_CLIENT -q "insert into test select 31, materialize([[[1], [2]], [[3, 4, 5]]])::Array(Array(Array(UInt64)))" +$CLICKHOUSE_CLIENT -q "insert into test select 32, materialize(tuple(1, 'str', 42.42))::Tuple(UInt32, String, Float32)" +$CLICKHOUSE_CLIENT -q "insert into test select 33, materialize(tuple(1, 'str', 42.42))::Tuple(a UInt32, b String, c Float32)" +$CLICKHOUSE_CLIENT -q "insert into test select 34, materialize(tuple(1, tuple('str', tuple(42.42, -30))))::Tuple(UInt32, Tuple(String, Tuple(Float32, Int8)))" +$CLICKHOUSE_CLIENT -q "insert into test select 35, materialize(tuple(1, tuple('str', tuple(42.42, -30))))::Tuple(a UInt32, b Tuple(c String, d Tuple(e Float32, f Int8)))" +$CLICKHOUSE_CLIENT -q "insert into test select 36, quantileState(0.5)(42::UInt64)" +$CLICKHOUSE_CLIENT -q "insert into test select 37, sumSimpleState(42::UInt64)" +$CLICKHOUSE_CLIENT -q "insert into test select 38, toLowCardinality('Hello, World!')" +$CLICKHOUSE_CLIENT -q "insert into test select 39, materialize(map(1, 'str1', 2, 'str2'))::Map(UInt64, String)" +$CLICKHOUSE_CLIENT -q "insert into test select 40, materialize(map(1, map(1, map(1, 'str1')), 2, map(2, map(2, 'str2'))))::Map(UInt64, Map(UInt64, Map(UInt64, String)))" +$CLICKHOUSE_CLIENT -q "insert into test select 41, materialize('127.0.0.0')::IPv4" +$CLICKHOUSE_CLIENT -q "insert into test select 42, materialize('2001:db8:cafe:1:0:0:0:1')::IPv6" +$CLICKHOUSE_CLIENT -q "insert into test select 43, materialize(true)::Bool" +$CLICKHOUSE_CLIENT -q "insert into test select 44, materialize([tuple(1, 2), tuple(3, 4)])::Nested(a UInt32, b UInt32)" +$CLICKHOUSE_CLIENT -q "insert into test select 45, materialize([(0, 0), (10, 0), (10, 10), (0, 10)])::Ring" +$CLICKHOUSE_CLIENT -q "insert into test select 46, materialize((0, 0))::Point" +$CLICKHOUSE_CLIENT -q "insert into test select 47, materialize([[(20, 20), (50, 20), (50, 50), (20, 50)], [(30, 30), (50, 50), (50, 30)]])::Polygon" +$CLICKHOUSE_CLIENT -q "insert into test select 48, materialize([[[(0, 0), (10, 0), (10, 10), (0, 10)]], [[(20, 20), (50, 20), (50, 50), (20, 50)],[(30, 30), (50, 50), (50, 30)]]])::MultiPolygon" +$CLICKHOUSE_CLIENT -q "insert into test select 49, materialize([map(42, tuple(1, [tuple(2, map(1, 2))]))])" $CLICKHOUSE_CLIENT -q "select * from test format RowBinary" | $CLICKHOUSE_LOCAL --allow_experimental_dynamic_type=1 --input-format RowBinary --structure 'id UInt64, d Dynamic(max_types=255)' -q "select d, dynamicType(d) from table order by id" $CLICKHOUSE_CLIENT -q "drop table test" From 1525dff4c3ba7c5c31d677864014ae75bdee8102 Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Tue, 25 Jun 2024 18:01:42 +0200 Subject: [PATCH 062/299] Fix links in docs --- docs/en/interfaces/formats.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/en/interfaces/formats.md b/docs/en/interfaces/formats.md index dcf87c13786..d3eb37b536f 100644 --- a/docs/en/interfaces/formats.md +++ b/docs/en/interfaces/formats.md @@ -1536,9 +1536,9 @@ Otherwise, the first row will be skipped. If setting [input_format_with_types_use_header](/docs/en/operations/settings/settings-formats.md/#input_format_with_types_use_header) is set to 1, the types from input data will be compared with the types of the corresponding columns from the table. Otherwise, the second row will be skipped. If setting [output_format_binary_encode_types_in_binary_format](/docs/en/operations/settings/settings-formats.md/#output_format_binary_encode_types_in_binary_format) is set to 1, -the types in header will be written using [binary encoding](../../sql-reference/data-types/data-types-binary-encoding.md) instead of strings with type names in RowBinaryWithNamesAndTypes output format. +the types in header will be written using [binary encoding](/docs/en/sql-reference/data-types/data-types-binary-encoding.md) instead of strings with type names in RowBinaryWithNamesAndTypes output format. If setting [input_format_binary_encode_types_in_binary_format](/docs/en/operations/settings/settings-formats.md/#input_format_binary_encode_types_in_binary_format) is set to 1, -the types in header will be read using [binary encoding](../../sql-reference/data-types/data-types-binary-encoding.md) instead of strings with type names in RowBinaryWithNamesAndTypes input format. +the types in header will be read using [binary encoding](/docs/en/sql-reference/data-types/data-types-binary-encoding.md) instead of strings with type names in RowBinaryWithNamesAndTypes input format. ::: ## RowBinaryWithDefaults {#rowbinarywithdefaults} From cbb850517f71607a9b7f5b8d645b9ac8c90b387b Mon Sep 17 00:00:00 2001 From: avogar Date: Thu, 27 Jun 2024 15:18:14 +0000 Subject: [PATCH 063/299] Remove support for depricated Object type in the encoding --- .../data-types/data-types-binary-encoding.md | 17 +++++---- src/DataTypes/DataTypesBinaryEncoding.cpp | 36 ++++++------------- src/DataTypes/DataTypesBinaryEncoding.h | 17 +++++---- .../gtest_data_types_binary_encoding.cpp | 1 - 4 files changed, 26 insertions(+), 45 deletions(-) diff --git a/docs/en/sql-reference/data-types/data-types-binary-encoding.md b/docs/en/sql-reference/data-types/data-types-binary-encoding.md index ba1a4fa44c5..d7eddf848d6 100644 --- a/docs/en/sql-reference/data-types/data-types-binary-encoding.md +++ b/docs/en/sql-reference/data-types/data-types-binary-encoding.md @@ -49,15 +49,14 @@ sidebar_label: Data types binary encoding specification. | `AggregateFunction(function_name(param_1, ..., param_N), arg_T1, ..., arg_TN)` | `0x25......` (see [aggregate function parameter binary encoding](#aggregate-function-parameter-binary-encoding)) | | `LowCardinality(T)` | `0x26` | | `Map(K, V)` | `0x27` | -| `Object('schema_format')` | `0x28` | -| `IPv4` | `0x29` | -| `IPv6` | `0x2A` | -| `Variant(T1, ..., TN)` | `0x2B...` | -| `Dynamic(max_types=N)` | `0x2C` | -| `Custom type` (`Ring`, `Polygon`, etc) | `0x2D` | -| `Bool` | `0x2E` | -| `SimpleAggregateFunction(function_name(param_1, ..., param_N), arg_T1, ..., arg_TN)` | `0x2F......` (see [aggregate function parameter binary encoding](#aggregate-function-parameter-binary-encoding)) | -| `Nested(name1 T1, ..., nameN TN)` | `0x30...` | +| `IPv4` | `0x28` | +| `IPv6` | `0x29` | +| `Variant(T1, ..., TN)` | `0x2A...` | +| `Dynamic(max_types=N)` | `0x2B` | +| `Custom type` (`Ring`, `Polygon`, etc) | `0x2C` | +| `Bool` | `0x2D` | +| `SimpleAggregateFunction(function_name(param_1, ..., param_N), arg_T1, ..., arg_TN)` | `0x2E......` (see [aggregate function parameter binary encoding](#aggregate-function-parameter-binary-encoding)) | +| `Nested(name1 T1, ..., nameN TN)` | `0x2F...` | ### Interval kind binary encoding diff --git a/src/DataTypes/DataTypesBinaryEncoding.cpp b/src/DataTypes/DataTypesBinaryEncoding.cpp index 02a58897529..bd994e313ba 100644 --- a/src/DataTypes/DataTypesBinaryEncoding.cpp +++ b/src/DataTypes/DataTypesBinaryEncoding.cpp @@ -9,7 +9,6 @@ #include #include #include -#include #include #include #include @@ -87,15 +86,14 @@ enum class BinaryTypeIndex : uint8_t AggregateFunction = 0x25, LowCardinality = 0x26, Map = 0x27, - Object = 0x28, - IPv4 = 0x29, - IPv6 = 0x2A, - Variant = 0x2B, - Dynamic = 0x2C, - Custom = 0x2D, - Bool = 0x2E, - SimpleAggregateFunction = 0x2F, - Nested = 0x30, + IPv4 = 0x28, + IPv6 = 0x29, + Variant = 0x2A, + Dynamic = 0x2B, + Custom = 0x2C, + Bool = 0x2D, + SimpleAggregateFunction = 0x2E, + Nested = 0x2F, }; BinaryTypeIndex getBinaryTypeIndex(const DataTypePtr & type) @@ -205,7 +203,8 @@ BinaryTypeIndex getBinaryTypeIndex(const DataTypePtr & type) case TypeIndex::Map: return BinaryTypeIndex::Map; case TypeIndex::Object: - return BinaryTypeIndex::Object; + /// Object type will be deprecated and replaced by new implementation. No need to support it here. + throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "Binary encoding of type Object is not supported"); case TypeIndex::IPv4: return BinaryTypeIndex::IPv4; case TypeIndex::IPv6: @@ -433,13 +432,6 @@ void encodeDataType(const DataTypePtr & type, WriteBuffer & buf) encodeDataType(map_type.getValueType(), buf); break; } - case BinaryTypeIndex::Object: - { - const auto & object_deprecated_type = assert_cast(*type); - writeBinary(object_deprecated_type.hasNullableSubcolumns(), buf); - writeStringBinary(object_deprecated_type.getSchemaFormat(), buf); - break; - } case BinaryTypeIndex::Variant: { const auto & variant_type = assert_cast(*type); @@ -644,14 +636,6 @@ DataTypePtr decodeDataType(ReadBuffer & buf) auto value_type = decodeDataType(buf); return std::make_shared(key_type, value_type); } - case BinaryTypeIndex::Object: - { - bool has_nullable_subcolumns; - readBinary(has_nullable_subcolumns, buf); - String schema_format; - readStringBinary(schema_format, buf); - return std::make_shared(schema_format, has_nullable_subcolumns); - } case BinaryTypeIndex::IPv4: return std::make_shared(); case BinaryTypeIndex::IPv6: diff --git a/src/DataTypes/DataTypesBinaryEncoding.h b/src/DataTypes/DataTypesBinaryEncoding.h index d735565f636..d02e7f85942 100644 --- a/src/DataTypes/DataTypesBinaryEncoding.h +++ b/src/DataTypes/DataTypesBinaryEncoding.h @@ -51,15 +51,14 @@ Binary encoding for ClickHouse data types: | AggregateFunction(function_name(param_1, ..., param_N), arg_T1, ..., arg_TN) | 0x25...... | | LowCardinality(T) | 0x26 | | Map(K, V) | 0x27 | -| Object('schema_format') | 0x28 | -| IPv4 | 0x29 | -| IPv6 | 0x2A | -| Variant(T1, ..., TN) | 0x2B... | -| Dynamic(max_types=N) | 0x2C | -| Custom type (Ring, Polygon, etc) | 0x2D | -| Bool | 0x2E | -| SimpleAggregateFunction(function_name(param_1, ..., param_N), arg_T1, ..., arg_TN) | 0x2F...... | -| Nested(name1 T1, ..., nameN TN) | 0x30... | +| IPv4 | 0x28 | +| IPv6 | 0x29 | +| Variant(T1, ..., TN) | 0x2A... | +| Dynamic(max_types=N) | 0x2B | +| Custom type (Ring, Polygon, etc) | 0x2C | +| Bool | 0x2D | +| SimpleAggregateFunction(function_name(param_1, ..., param_N), arg_T1, ..., arg_TN) | 0x2E...... | +| Nested(name1 T1, ..., nameN TN) | 0x2F... | |------------------------------------------------------------------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| Interval kind binary encoding: diff --git a/src/DataTypes/tests/gtest_data_types_binary_encoding.cpp b/src/DataTypes/tests/gtest_data_types_binary_encoding.cpp index 6dbb9451922..4d0bfc67183 100644 --- a/src/DataTypes/tests/gtest_data_types_binary_encoding.cpp +++ b/src/DataTypes/tests/gtest_data_types_binary_encoding.cpp @@ -6,7 +6,6 @@ #include #include #include -#include #include #include #include From f95434f4c001ce2fdce6066de241181f771a48f0 Mon Sep 17 00:00:00 2001 From: vdimir Date: Fri, 28 Jun 2024 14:31:11 +0000 Subject: [PATCH 064/299] Better handling join on nulls --- .../Passes/LogicalExpressionOptimizerPass.cpp | 191 +++++++++++++----- src/Analyzer/QueryTreeBuilder.cpp | 1 + src/Parsers/ASTFunction.cpp | 39 ++-- src/Parsers/ASTTablesInSelectQuery.cpp | 2 +- src/Planner/PlannerJoins.cpp | 2 +- ...11_join_on_nullsafe_optimization.reference | 6 +- .../02911_join_on_nullsafe_optimization.sql | 6 +- 7 files changed, 171 insertions(+), 76 deletions(-) diff --git a/src/Analyzer/Passes/LogicalExpressionOptimizerPass.cpp b/src/Analyzer/Passes/LogicalExpressionOptimizerPass.cpp index ac221bd66e7..3cbc44315c9 100644 --- a/src/Analyzer/Passes/LogicalExpressionOptimizerPass.cpp +++ b/src/Analyzer/Passes/LogicalExpressionOptimizerPass.cpp @@ -10,6 +10,7 @@ #include #include +#include namespace DB { @@ -26,12 +27,100 @@ static constexpr std::array boolean_functions{ "like"sv, "notLike"sv, "ilike"sv, "notILike"sv, "empty"sv, "notEmpty"sv, "not"sv, "and"sv, "or"sv}; -static bool isBooleanFunction(const String & func_name) + +bool isBooleanFunction(const String & func_name) { return std::any_of( boolean_functions.begin(), boolean_functions.end(), [&](const auto boolean_func) { return func_name == boolean_func; }); } +bool isNodeFunction(const QueryTreeNodePtr & node, const String & func_name) +{ + if (const auto * function_node = node->as()) + return function_node->getFunctionName() == func_name; + return false; +} + +QueryTreeNodePtr getFunctionArgument(const QueryTreeNodePtr & node, size_t idx) +{ + if (const auto * function_node = node->as()) + { + const auto & args = function_node->getArguments().getNodes(); + if (idx < args.size()) + return args[idx]; + } + throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected '{}' to be a function with at least {} arguments", node->formatASTForErrorMessage(), idx + 1); +} + +QueryTreeNodePtr findEqualsFunction(const QueryTreeNodes & nodes) +{ + for (const auto & node : nodes) + { + const auto * function_node = node->as(); + if (function_node && function_node->getFunctionName() == "equals" && + function_node->getArguments().getNodes().size() == 2) + { + return node; + } + } + return nullptr; +} + +bool isNodeBooleanConstant(const QueryTreeNodePtr & node, bool expected_value) +{ + const auto * constant_node = node->as(); + if (!constant_node || !constant_node->getResultType()->equals(DataTypeUInt8())) + return false; + + UInt64 constant_value; + return (constant_node->getValue().tryGet(constant_value) && constant_value == expected_value); +} + +/// Returns true if expression consists of only conjunctions of functions with the specified name or true constants +bool isOnlyConjunctionOfFunctions( + const QueryTreeNodePtr & node, + const String & func_name, + const QueryTreeNodePtrWithHashSet & allowed_arguments) +{ + if (isNodeBooleanConstant(node, true)) + return true; + + const auto * node_function = node->as(); + if (node_function + && node_function->getFunctionName() == "isNotNull" + && allowed_arguments.contains(node_function->getArgumentsNode())) + return true; + + if (node_function && node_function->getFunctionName() == "and") + { + for (const auto & and_argument : node_function->getArguments().getNodes()) + { + if (!isOnlyConjunctionOfFunctions(and_argument, func_name, allowed_arguments)) + return false; + } + } + return false; +} + +/// We can rewrite to a <=> b only if we are joining on a and b, +/// because the function is not yet implemented for other cases. +bool isTwoArgumentsFromDifferentSides(const FunctionNode & node_function, const JoinNode & join_node) +{ + const auto & argument_nodes = node_function.getArguments().getNodes(); + if (argument_nodes.size() != 2) + return false; + + auto first_src = getExpressionSource(argument_nodes[0]); + auto second_src = getExpressionSource(argument_nodes[1]); + if (!first_src || !second_src) + return false; + + const auto & lhs_join = *join_node.getLeftTableExpression(); + const auto & rhs_join = *join_node.getRightTableExpression(); + return (first_src->isEqual(lhs_join) && second_src->isEqual(rhs_join)) || + (first_src->isEqual(rhs_join) && second_src->isEqual(lhs_join)); +} + /// Visitor that optimizes logical expressions _only_ in JOIN ON section class JoinOnLogicalExpressionOptimizerVisitor : public InDepthQueryTreeVisitorWithContext { @@ -78,7 +167,6 @@ private: auto & function_node = node->as(); chassert(function_node.getFunctionName() == "or"); - QueryTreeNodes or_operands; or_operands.reserve(function_node.getArguments().getNodes().size()); @@ -93,14 +181,22 @@ private: * b => [(a IS NULL AND b IS NULL)] * c => [(a IS NULL AND c IS NULL)] * } - * Then for each a <=> b we can find all operands that contains both a IS NULL and b IS NULL + * Then for each equality a = b we can check if we have operand (a IS NULL AND b IS NULL) */ QueryTreeNodePtrWithHashMap> is_null_argument_to_indices; + bool is_anything_changed = false; + for (const auto & argument : function_node.getArguments()) { - or_operands.push_back(argument); + if (isNodeBooleanConstant(argument, false)) + { + /// Remove false constants from OR + is_anything_changed = true; + continue; + } + or_operands.push_back(argument); auto * argument_function = argument->as(); if (!argument_function) continue; @@ -108,32 +204,48 @@ private: const auto & func_name = argument_function->getFunctionName(); if (func_name == "equals" || func_name == "isNotDistinctFrom") { - const auto & argument_nodes = argument_function->getArguments().getNodes(); - if (argument_nodes.size() != 2) - continue; - /// We can rewrite to a <=> b only if we are joining on a and b, - /// because the function is not yet implemented for other cases. - auto first_src = getExpressionSource(argument_nodes[0]); - auto second_src = getExpressionSource(argument_nodes[1]); - if (!first_src || !second_src) - continue; - const auto & lhs_join = *join_node->getLeftTableExpression(); - const auto & rhs_join = *join_node->getRightTableExpression(); - bool arguments_from_both_sides = (first_src->isEqual(lhs_join) && second_src->isEqual(rhs_join)) || - (first_src->isEqual(rhs_join) && second_src->isEqual(lhs_join)); - if (!arguments_from_both_sides) - continue; - equals_functions_indices.push_back(or_operands.size() - 1); + if (isTwoArgumentsFromDifferentSides(*argument_function, *join_node)) + equals_functions_indices.push_back(or_operands.size() - 1); } else if (func_name == "and") { - for (const auto & and_argument : argument_function->getArguments().getNodes()) + const auto & and_arguments = argument_function->getArguments().getNodes(); + bool all_are_is_null = and_arguments.size() == 2 && isNodeFunction(and_arguments[0], "isNull") && isNodeFunction(and_arguments[1], "isNull"); + if (all_are_is_null) { - auto * and_argument_function = and_argument->as(); - if (and_argument_function && and_argument_function->getFunctionName() == "isNull") + is_null_argument_to_indices[getFunctionArgument(and_arguments.front(), 0)].push_back(or_operands.size() - 1); + is_null_argument_to_indices[getFunctionArgument(and_arguments.back(), 0)].push_back(or_operands.size() - 1); + } + + /// Expression `a = b AND (a IS NOT NULL) AND true AND (b IS NOT NULL)` we can be replaced with `a = b` + if (const auto & equals_function = findEqualsFunction(and_arguments)) + { + const auto & equals_arguments = equals_function->as()->getArguments().getNodes(); + /// Expected isNotNull arguments + QueryTreeNodePtrWithHashSet allowed_arguments; + allowed_arguments.insert(QueryTreeNodePtrWithHash(std::make_shared(QueryTreeNodes{equals_arguments[0]}))); + allowed_arguments.insert(QueryTreeNodePtrWithHash(std::make_shared(QueryTreeNodes{equals_arguments[1]}))); + + bool can_be_optimized = true; + for (const auto & and_argument : and_arguments) { - const auto & is_null_argument = and_argument_function->getArguments().getNodes()[0]; - is_null_argument_to_indices[is_null_argument].push_back(or_operands.size() - 1); + if (and_argument.get() == equals_function.get()) + continue; + + if (isOnlyConjunctionOfFunctions(and_argument, "isNotNull", allowed_arguments)) + continue; + + can_be_optimized = false; + break; + } + + if (can_be_optimized) + { + is_anything_changed = true; + or_operands.pop_back(); + or_operands.push_back(equals_function); + if (isTwoArgumentsFromDifferentSides(equals_function->as(), *join_node)) + equals_functions_indices.push_back(or_operands.size() - 1); } } } @@ -146,7 +258,7 @@ private: { auto * equals_function = or_operands[equals_function_idx]->as(); - /// For a <=> b we are looking for expressions containing both `a IS NULL` and `b IS NULL` combined with AND + /// For a = b we are looking for all expressions `a IS NULL AND b IS NULL` const auto & argument_nodes = equals_function->getArguments().getNodes(); const auto & lhs_is_null_parents = is_null_argument_to_indices[argument_nodes[0]]; const auto & rhs_is_null_parents = is_null_argument_to_indices[argument_nodes[1]]; @@ -161,33 +273,14 @@ private: for (size_t to_optimize_idx : operands_to_optimize) { - /// We are looking for operand `a IS NULL AND b IS NULL AND ...` + /// Remove `a IS NULL AND b IS NULL` auto * operand_to_optimize = or_operands[to_optimize_idx]->as(); - - /// Remove `a IS NULL` and `b IS NULL` arguments from AND - QueryTreeNodes new_arguments; - for (const auto & and_argument : operand_to_optimize->getArguments().getNodes()) - { - bool to_eliminate = false; - - const auto * and_argument_function = and_argument->as(); - if (and_argument_function && and_argument_function->getFunctionName() == "isNull") - { - const auto & is_null_argument = and_argument_function->getArguments().getNodes()[0]; - to_eliminate = (is_null_argument->isEqual(*argument_nodes[0]) || is_null_argument->isEqual(*argument_nodes[1])); - } - - if (to_eliminate) - arguments_to_reresolve.insert(to_optimize_idx); - else - new_arguments.emplace_back(and_argument); - } - /// If less than two arguments left, we will remove or replace the whole AND below - operand_to_optimize->getArguments().getNodes() = std::move(new_arguments); + operand_to_optimize->getArguments().getNodes() = {}; + arguments_to_reresolve.insert(to_optimize_idx); } } - if (arguments_to_reresolve.empty()) + if (arguments_to_reresolve.empty() && !is_anything_changed) /// Nothing have been changed return false; diff --git a/src/Analyzer/QueryTreeBuilder.cpp b/src/Analyzer/QueryTreeBuilder.cpp index 6a5db4bc1de..dd083dd5df6 100644 --- a/src/Analyzer/QueryTreeBuilder.cpp +++ b/src/Analyzer/QueryTreeBuilder.cpp @@ -940,6 +940,7 @@ QueryTreeNodePtr QueryTreeBuilder::buildJoinTree(const ASTPtr & tables_in_select table_join.locality, result_join_strictness, result_join_kind); + join_node->setOriginalAST(table_element.table_join); /** Original AST is not set because it will contain only join part and does * not include left table expression. diff --git a/src/Parsers/ASTFunction.cpp b/src/Parsers/ASTFunction.cpp index 602ef8c232b..f39229d7566 100644 --- a/src/Parsers/ASTFunction.cpp +++ b/src/Parsers/ASTFunction.cpp @@ -408,25 +408,26 @@ void ASTFunction::formatImplWithoutAlias(const FormatSettings & settings, Format { const char * operators[] = { - "multiply", " * ", - "divide", " / ", - "modulo", " % ", - "plus", " + ", - "minus", " - ", - "notEquals", " != ", - "lessOrEquals", " <= ", - "greaterOrEquals", " >= ", - "less", " < ", - "greater", " > ", - "equals", " = ", - "like", " LIKE ", - "ilike", " ILIKE ", - "notLike", " NOT LIKE ", - "notILike", " NOT ILIKE ", - "in", " IN ", - "notIn", " NOT IN ", - "globalIn", " GLOBAL IN ", - "globalNotIn", " GLOBAL NOT IN ", + "multiply", " * ", + "divide", " / ", + "modulo", " % ", + "plus", " + ", + "minus", " - ", + "notEquals", " != ", + "lessOrEquals", " <= ", + "greaterOrEquals", " >= ", + "less", " < ", + "greater", " > ", + "equals", " = ", + "isNotDistinctFrom", " <=> ", + "like", " LIKE ", + "ilike", " ILIKE ", + "notLike", " NOT LIKE ", + "notILike", " NOT ILIKE ", + "in", " IN ", + "notIn", " NOT IN ", + "globalIn", " GLOBAL IN ", + "globalNotIn", " GLOBAL NOT IN ", nullptr }; diff --git a/src/Parsers/ASTTablesInSelectQuery.cpp b/src/Parsers/ASTTablesInSelectQuery.cpp index e782bad797e..d22a4eca0fc 100644 --- a/src/Parsers/ASTTablesInSelectQuery.cpp +++ b/src/Parsers/ASTTablesInSelectQuery.cpp @@ -243,7 +243,7 @@ void ASTTableJoin::formatImplAfterTable(const FormatSettings & settings, FormatS void ASTTableJoin::formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const { formatImplBeforeTable(settings, state, frame); - settings.ostr << " ... "; + settings.ostr << " ..."; formatImplAfterTable(settings, state, frame); } diff --git a/src/Planner/PlannerJoins.cpp b/src/Planner/PlannerJoins.cpp index 84efdd21336..58332df62a0 100644 --- a/src/Planner/PlannerJoins.cpp +++ b/src/Planner/PlannerJoins.cpp @@ -528,7 +528,7 @@ JoinClausesAndActions buildJoinClausesAndActions( size_t join_clause_key_nodes_size = join_clause.getLeftKeyNodes().size(); if (join_clause_key_nodes_size == 0) - throw Exception(ErrorCodes::INVALID_JOIN_ON_EXPRESSION, "JOIN {} cannot get JOIN keys", + throw Exception(ErrorCodes::INVALID_JOIN_ON_EXPRESSION, "Cannot determine join keys in {}", join_node.formatASTForErrorMessage()); for (size_t i = 0; i < join_clause_key_nodes_size; ++i) diff --git a/tests/queries/0_stateless/02911_join_on_nullsafe_optimization.reference b/tests/queries/0_stateless/02911_join_on_nullsafe_optimization.reference index 5b6c14ca24f..1ab16084cb4 100644 --- a/tests/queries/0_stateless/02911_join_on_nullsafe_optimization.reference +++ b/tests/queries/0_stateless/02911_join_on_nullsafe_optimization.reference @@ -3,7 +3,7 @@ SELECT * FROM t1 JOIN t2 ON (t1.x <=> t2.x OR (t1.x IS NULL AND t2.x IS NULL)) O 2 2 2 2 3 3 3 33 \N \N \N \N -SELECT * FROM t1 JOIN t2 ON (t1.x <=> t2.x OR t1.x IS NULL AND t1.y <=> t2.y AND t2.x IS NULL) ORDER BY t1.x NULLS LAST; +SELECT * FROM t1 JOIN t2 ON (t1.x <=> t2.x OR t1.x IS NULL AND t2.x IS NULL) OR t1.y <=> t2.y ORDER BY t1.x NULLS LAST; 1 42 4 42 2 2 2 2 3 3 3 33 @@ -12,14 +12,14 @@ SELECT * FROM t1 JOIN t2 ON (t1.x = t2.x OR t1.x IS NULL AND t2.x IS NULL) ORDER 2 2 2 2 3 3 3 33 \N \N \N \N -SELECT * FROM t1 JOIN t2 ON t1.x <=> t2.x AND (t1.x = t1.y OR t1.x IS NULL AND t1.y IS NULL) ORDER BY t1.x; +SELECT * FROM t1 JOIN t2 ON t1.x <=> t2.x AND ((t1.x = t1.y) OR t1.x IS NULL AND t1.y IS NULL) ORDER BY t1.x; 2 2 2 2 3 3 3 33 \N \N \N \N SELECT * FROM t1 JOIN t2 ON (t1.x = t2.x OR t1.x IS NULL AND t2.x IS NULL) AND t1.y <=> t2.y ORDER BY t1.x NULLS LAST; 2 2 2 2 \N \N \N \N -SELECT * FROM t1 JOIN t2 ON (t1.x <=> t2.x OR t1.y <=> t2.y OR (t1.x IS NULL AND t1.y IS NULL AND t2.x IS NULL AND t2.y IS NULL)) ORDER BY t1.x NULLS LAST; +SELECT * FROM t1 JOIN t2 ON (t1.x <=> t2.x OR t1.y <=> t2.y OR (t1.x IS NULL AND t2.x IS NULL) OR (t1.y IS NULL AND t2.y IS NULL)) ORDER BY t1.x NULLS LAST; 1 42 4 42 2 2 2 2 3 3 3 33 diff --git a/tests/queries/0_stateless/02911_join_on_nullsafe_optimization.sql b/tests/queries/0_stateless/02911_join_on_nullsafe_optimization.sql index 5458370db8c..6a163482d68 100644 --- a/tests/queries/0_stateless/02911_join_on_nullsafe_optimization.sql +++ b/tests/queries/0_stateless/02911_join_on_nullsafe_optimization.sql @@ -12,14 +12,14 @@ SET allow_experimental_analyzer = 1; -- { echoOn } SELECT * FROM t1 JOIN t2 ON (t1.x <=> t2.x OR (t1.x IS NULL AND t2.x IS NULL)) ORDER BY t1.x NULLS LAST; -SELECT * FROM t1 JOIN t2 ON (t1.x <=> t2.x OR t1.x IS NULL AND t1.y <=> t2.y AND t2.x IS NULL) ORDER BY t1.x NULLS LAST; +SELECT * FROM t1 JOIN t2 ON (t1.x <=> t2.x OR t1.x IS NULL AND t2.x IS NULL) OR t1.y <=> t2.y ORDER BY t1.x NULLS LAST; SELECT * FROM t1 JOIN t2 ON (t1.x = t2.x OR t1.x IS NULL AND t2.x IS NULL) ORDER BY t1.x; -SELECT * FROM t1 JOIN t2 ON t1.x <=> t2.x AND (t1.x = t1.y OR t1.x IS NULL AND t1.y IS NULL) ORDER BY t1.x; +SELECT * FROM t1 JOIN t2 ON t1.x <=> t2.x AND ((t1.x = t1.y) OR t1.x IS NULL AND t1.y IS NULL) ORDER BY t1.x; SELECT * FROM t1 JOIN t2 ON (t1.x = t2.x OR t1.x IS NULL AND t2.x IS NULL) AND t1.y <=> t2.y ORDER BY t1.x NULLS LAST; -SELECT * FROM t1 JOIN t2 ON (t1.x <=> t2.x OR t1.y <=> t2.y OR (t1.x IS NULL AND t1.y IS NULL AND t2.x IS NULL AND t2.y IS NULL)) ORDER BY t1.x NULLS LAST; +SELECT * FROM t1 JOIN t2 ON (t1.x <=> t2.x OR t1.y <=> t2.y OR (t1.x IS NULL AND t2.x IS NULL) OR (t1.y IS NULL AND t2.y IS NULL)) ORDER BY t1.x NULLS LAST; SELECT * FROM t1 JOIN t2 ON (t1.x <=> t2.x OR (t1.x IS NULL AND t2.x IS NULL)) AND (t1.y == t2.y OR (t1.y IS NULL AND t2.y IS NULL)) AND COALESCE(t1.x, 0) != 2 ORDER BY t1.x NULLS LAST; From 8e770c7046679ac36d72c5b1142f14a94851711e Mon Sep 17 00:00:00 2001 From: vdimir Date: Fri, 28 Jun 2024 15:19:33 +0000 Subject: [PATCH 065/299] fix --- .../Passes/LogicalExpressionOptimizerPass.cpp | 11 ++++++++--- ...02911_join_on_nullsafe_optimization.reference | 7 +++++++ .../02911_join_on_nullsafe_optimization.sql | 16 ++++++++++++++++ 3 files changed, 31 insertions(+), 3 deletions(-) diff --git a/src/Analyzer/Passes/LogicalExpressionOptimizerPass.cpp b/src/Analyzer/Passes/LogicalExpressionOptimizerPass.cpp index 3cbc44315c9..c74148a7252 100644 --- a/src/Analyzer/Passes/LogicalExpressionOptimizerPass.cpp +++ b/src/Analyzer/Passes/LogicalExpressionOptimizerPass.cpp @@ -83,21 +83,26 @@ bool isOnlyConjunctionOfFunctions( const QueryTreeNodePtrWithHashSet & allowed_arguments) { if (isNodeBooleanConstant(node, true)) + { return true; + } const auto * node_function = node->as(); - if (node_function - && node_function->getFunctionName() == "isNotNull" + if (!node_function) + return false; + + if (node_function->getFunctionName() == func_name && allowed_arguments.contains(node_function->getArgumentsNode())) return true; - if (node_function && node_function->getFunctionName() == "and") + if (node_function->getFunctionName() == "and") { for (const auto & and_argument : node_function->getArguments().getNodes()) { if (!isOnlyConjunctionOfFunctions(and_argument, func_name, allowed_arguments)) return false; } + return true; } return false; } diff --git a/tests/queries/0_stateless/02911_join_on_nullsafe_optimization.reference b/tests/queries/0_stateless/02911_join_on_nullsafe_optimization.reference index 1ab16084cb4..1df3606592c 100644 --- a/tests/queries/0_stateless/02911_join_on_nullsafe_optimization.reference +++ b/tests/queries/0_stateless/02911_join_on_nullsafe_optimization.reference @@ -31,3 +31,10 @@ SELECT x = y OR (x IS NULL AND y IS NULL) FROM t1 ORDER BY x NULLS LAST; 1 1 1 +SELECT * FROM t1 JOIN t2 ON (t1.x == t2.x AND ((t2.x IS NOT NULL) AND (t1.x IS NOT NULL)) ) OR ( (t2.x IS NULL) AND (t1.x IS NULL) ) ORDER BY t1.x NULLS LAST; +2 2 2 2 +3 3 3 33 +\N \N \N \N +SELECT * FROM t1n as t1 JOIN t2n as t2 ON (t1.x == t2.x AND ((t2.x IS NOT NULL) AND (t1.x IS NOT NULL)) ) OR ( (t2.x IS NULL) AND (t1.x IS NULL) ) ORDER BY t1.x NULLS LAST; +2 2 2 2 +3 3 3 33 diff --git a/tests/queries/0_stateless/02911_join_on_nullsafe_optimization.sql b/tests/queries/0_stateless/02911_join_on_nullsafe_optimization.sql index 6a163482d68..3b150c2f284 100644 --- a/tests/queries/0_stateless/02911_join_on_nullsafe_optimization.sql +++ b/tests/queries/0_stateless/02911_join_on_nullsafe_optimization.sql @@ -1,5 +1,7 @@ DROP TABLE IF EXISTS t1; +DROP TABLE IF EXISTS t1n; DROP TABLE IF EXISTS t2; +DROP TABLE IF EXISTS t2n; CREATE TABLE t1 (x Nullable(Int64), y Nullable(UInt64)) ENGINE = TinyLog; CREATE TABLE t2 (x Nullable(Int64), y Nullable(UInt64)) ENGINE = TinyLog; @@ -7,6 +9,13 @@ CREATE TABLE t2 (x Nullable(Int64), y Nullable(UInt64)) ENGINE = TinyLog; INSERT INTO t1 VALUES (1,42), (2,2), (3,3), (NULL,NULL); INSERT INTO t2 VALUES (NULL,NULL), (2,2), (3,33), (4,42); + +CREATE TABLE t1n (x Int64, y UInt64) ENGINE = TinyLog; +CREATE TABLE t2n (x Int64, y UInt64) ENGINE = TinyLog; + +INSERT INTO t1n VALUES (1,42), (2,2), (3,3); +INSERT INTO t2n VALUES (2,2), (3,33), (4,42); + SET allow_experimental_analyzer = 1; -- { echoOn } @@ -24,7 +33,14 @@ SELECT * FROM t1 JOIN t2 ON (t1.x <=> t2.x OR t1.y <=> t2.y OR (t1.x IS NULL AND SELECT * FROM t1 JOIN t2 ON (t1.x <=> t2.x OR (t1.x IS NULL AND t2.x IS NULL)) AND (t1.y == t2.y OR (t1.y IS NULL AND t2.y IS NULL)) AND COALESCE(t1.x, 0) != 2 ORDER BY t1.x NULLS LAST; SELECT x = y OR (x IS NULL AND y IS NULL) FROM t1 ORDER BY x NULLS LAST; + +SELECT * FROM t1 JOIN t2 ON (t1.x == t2.x AND ((t2.x IS NOT NULL) AND (t1.x IS NOT NULL)) ) OR ( (t2.x IS NULL) AND (t1.x IS NULL) ) ORDER BY t1.x NULLS LAST; + +SELECT * FROM t1n as t1 JOIN t2n as t2 ON (t1.x == t2.x AND ((t2.x IS NOT NULL) AND (t1.x IS NOT NULL)) ) OR ( (t2.x IS NULL) AND (t1.x IS NULL) ) ORDER BY t1.x NULLS LAST; + -- { echoOff } DROP TABLE IF EXISTS t1; DROP TABLE IF EXISTS t2; +DROP TABLE IF EXISTS t1n; +DROP TABLE IF EXISTS t2n; From aa7017a7fb1dcc09f6d7f948d3adb2d65a7b5201 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 1 Jul 2024 00:32:39 +0200 Subject: [PATCH 066/299] Add a test for #43003 --- .../03199_join_with_materialized_column.reference | 0 .../0_stateless/03199_join_with_materialized_column.sql | 6 ++++++ 2 files changed, 6 insertions(+) create mode 100644 tests/queries/0_stateless/03199_join_with_materialized_column.reference create mode 100644 tests/queries/0_stateless/03199_join_with_materialized_column.sql diff --git a/tests/queries/0_stateless/03199_join_with_materialized_column.reference b/tests/queries/0_stateless/03199_join_with_materialized_column.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/03199_join_with_materialized_column.sql b/tests/queries/0_stateless/03199_join_with_materialized_column.sql new file mode 100644 index 00000000000..8c53c5b3e66 --- /dev/null +++ b/tests/queries/0_stateless/03199_join_with_materialized_column.sql @@ -0,0 +1,6 @@ +SET allow_experimental_analyzer = 1; + +DROP TABLE IF EXISTS table_with_materialized; +CREATE TABLE table_with_materialized (col String MATERIALIZED 'A') ENGINE = Memory; +SELECT number FROM numbers(1) AS n, table_with_materialized; +DROP TABLE table_with_materialized; From d627709179db7b3af9b6e2f1b1c30b3e87da0cb1 Mon Sep 17 00:00:00 2001 From: vdimir Date: Mon, 1 Jul 2024 09:17:28 +0000 Subject: [PATCH 067/299] upd test --- tests/queries/0_stateless/01881_join_on_conditions_hash.sql.j2 | 2 +- .../queries/0_stateless/01881_join_on_conditions_merge.sql.j2 | 2 +- .../0_stateless/02911_join_on_nullsafe_optimization.sql | 3 +-- 3 files changed, 3 insertions(+), 4 deletions(-) diff --git a/tests/queries/0_stateless/01881_join_on_conditions_hash.sql.j2 b/tests/queries/0_stateless/01881_join_on_conditions_hash.sql.j2 index bd20d34b684..c2d85cefb18 100644 --- a/tests/queries/0_stateless/01881_join_on_conditions_hash.sql.j2 +++ b/tests/queries/0_stateless/01881_join_on_conditions_hash.sql.j2 @@ -72,7 +72,7 @@ SELECT * FROM t1 INNER ALL JOIN t2 ON t1.id == t2.id AND t1.id + 2; -- { serverE SELECT * FROM t1 INNER ALL JOIN t2 ON t1.id == t2.id AND t2.id + 2; -- { serverError 403 } SELECT * FROM t1 INNER ALL JOIN t2 ON t1.id == t2.id AND t1.key; -- { serverError 43, 403 } SELECT * FROM t1 INNER ALL JOIN t2 ON t1.id == t2.id AND t2.key; -- { serverError 43, 403 } -SELECT * FROM t1 JOIN t2 ON t2.key == t2.key2 AND (t1.id == t2.id OR isNull(t2.key2)); -- { serverError 403 } +SELECT * FROM t1 JOIN t2_nullable as t2 ON t2.key == t2.key2 AND (t1.id == t2.id OR isNull(t2.key2)); -- { serverError 403 } SELECT * FROM t1 JOIN t2 ON t2.key == t2.key2 OR t1.id == t2.id; -- { serverError 403 } SELECT * FROM t1 JOIN t2 ON (t2.key == t2.key2 AND (t1.key == t1.key2 AND t1.key != 'XXX' OR t1.id == t2.id)) AND t1.id == t2.id; -- { serverError 403 } SELECT * FROM t1 JOIN t2 ON t2.key == t2.key2 AND t1.key == t1.key2 AND t1.key != 'XXX' AND t1.id == t2.id OR t2.key == t2.key2 AND t1.id == t2.id AND t1.id == t2.id; diff --git a/tests/queries/0_stateless/01881_join_on_conditions_merge.sql.j2 b/tests/queries/0_stateless/01881_join_on_conditions_merge.sql.j2 index e4b704247b2..13703771ac8 100644 --- a/tests/queries/0_stateless/01881_join_on_conditions_merge.sql.j2 +++ b/tests/queries/0_stateless/01881_join_on_conditions_merge.sql.j2 @@ -70,7 +70,7 @@ SELECT * FROM t1 INNER ALL JOIN t2 ON t1.id == t2.id AND t1.id + 2; -- { serverE SELECT * FROM t1 INNER ALL JOIN t2 ON t1.id == t2.id AND t2.id + 2; -- { serverError 403 } SELECT * FROM t1 INNER ALL JOIN t2 ON t1.id == t2.id AND t1.key; -- { serverError 43, 403 } SELECT * FROM t1 INNER ALL JOIN t2 ON t1.id == t2.id AND t2.key; -- { serverError 43, 403 } -SELECT * FROM t1 JOIN t2 ON t2.key == t2.key2 AND (t1.id == t2.id OR isNull(t2.key2)); -- { serverError 403 } +SELECT * FROM t1 JOIN t2_nullable as t2 ON t2.key == t2.key2 AND (t1.id == t2.id OR isNull(t2.key2)); -- { serverError 403 } SELECT * FROM t1 JOIN t2 ON t2.key == t2.key2 OR t1.id == t2.id; -- { serverError 403 } SELECT * FROM t1 JOIN t2 ON (t2.key == t2.key2 AND (t1.key == t1.key2 AND t1.key != 'XXX' OR t1.id == t2.id)) AND t1.id == t2.id; -- { serverError 403 } SELECT * FROM t1 JOIN t2 ON t2.key == t2.key2 AND t1.key == t1.key2 AND t1.key != 'XXX' AND t1.id == t2.id OR t2.key == t2.key2 AND t1.id == t2.id AND t1.id == t2.id; -- { serverError 48 } diff --git a/tests/queries/0_stateless/02911_join_on_nullsafe_optimization.sql b/tests/queries/0_stateless/02911_join_on_nullsafe_optimization.sql index 3b150c2f284..0a642a716a4 100644 --- a/tests/queries/0_stateless/02911_join_on_nullsafe_optimization.sql +++ b/tests/queries/0_stateless/02911_join_on_nullsafe_optimization.sql @@ -1,6 +1,6 @@ DROP TABLE IF EXISTS t1; -DROP TABLE IF EXISTS t1n; DROP TABLE IF EXISTS t2; +DROP TABLE IF EXISTS t1n; DROP TABLE IF EXISTS t2n; CREATE TABLE t1 (x Nullable(Int64), y Nullable(UInt64)) ENGINE = TinyLog; @@ -9,7 +9,6 @@ CREATE TABLE t2 (x Nullable(Int64), y Nullable(UInt64)) ENGINE = TinyLog; INSERT INTO t1 VALUES (1,42), (2,2), (3,3), (NULL,NULL); INSERT INTO t2 VALUES (NULL,NULL), (2,2), (3,33), (4,42); - CREATE TABLE t1n (x Int64, y UInt64) ENGINE = TinyLog; CREATE TABLE t2n (x Int64, y UInt64) ENGINE = TinyLog; From c43ce89a6254bf790467129bd911e9a6e32631d9 Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Mon, 1 Jul 2024 16:27:10 +0300 Subject: [PATCH 068/299] PostgreSQL source cancel query comments --- src/Processors/Sources/PostgreSQLSource.cpp | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/Processors/Sources/PostgreSQLSource.cpp b/src/Processors/Sources/PostgreSQLSource.cpp index 37a84d9fe96..a3d6fd691d8 100644 --- a/src/Processors/Sources/PostgreSQLSource.cpp +++ b/src/Processors/Sources/PostgreSQLSource.cpp @@ -193,7 +193,15 @@ PostgreSQLSource::~PostgreSQLSource() { if (stream) { + /** Internally libpqxx::stream_from runs PostgreSQL copy query `COPY query TO STDOUT`. + * During transaction abort we try to execute PostgreSQL `ROLLBACK` command and if + * copy query is not cancelled, we wait until it finishes. + */ tx->conn().cancel_query(); + + /** If stream is not closed, libpqxx::stream_from closes stream in destructor, but that way + * exception is added into transaction pending error and we can potentially ignore exception message. + */ stream->close(); } From 6573b5436edc23bc94ee7207a5479bfd0c32064c Mon Sep 17 00:00:00 2001 From: avogar Date: Mon, 1 Jul 2024 10:29:23 +0000 Subject: [PATCH 069/299] Add docs about dynamic binary format --- docs/en/sql-reference/data-types/dynamic.md | 11 + src/Formats/JSONExtractTree.cpp | 666 ++++++++++++++++++++ src/Formats/JSONExtractTree.h | 37 ++ 3 files changed, 714 insertions(+) create mode 100644 src/Formats/JSONExtractTree.cpp create mode 100644 src/Formats/JSONExtractTree.h diff --git a/docs/en/sql-reference/data-types/dynamic.md b/docs/en/sql-reference/data-types/dynamic.md index 955fd54e641..f45feb9fe5e 100644 --- a/docs/en/sql-reference/data-types/dynamic.md +++ b/docs/en/sql-reference/data-types/dynamic.md @@ -493,3 +493,14 @@ SELECT count(), dynamicType(d), _part FROM test GROUP BY _part, dynamicType(d) O ``` As we can see, ClickHouse kept the most frequent types `UInt64` and `Array(UInt64)` and casted all other types to `String`. + + +### Binary output format + +In [RowBinary](../../interfaces/formats.md#rowbinary-rowbinary) format values of `Dynamic` type are serialized in the following format: + +```text + +``` + +See the [data types binary encoding specification](../../sql-reference/data-types/data-types-binary-encoding.md) diff --git a/src/Formats/JSONExtractTree.cpp b/src/Formats/JSONExtractTree.cpp new file mode 100644 index 00000000000..6a4241003f1 --- /dev/null +++ b/src/Formats/JSONExtractTree.cpp @@ -0,0 +1,666 @@ +//#include +// +//#include +//#include +//#include +//#include +// +//#include +//#include +//#include +// +//#include +//#include +//#include +// +//namespace DB +//{ +// +//namespace +//{ +// +//const FormatSettings & getFormatSettings() +//{ +// static const FormatSettings instance = [] +// { +// FormatSettings settings; +// settings.json.escape_forward_slashes = false; +// return settings; +// }(); +// return instance; +//} +// +//template +//void elementToString(const Element & element, WriteBuffer & buf) +//{ +// if (element.isInt64()) +// { +// writeIntText(element.getInt64(), buf); +// return; +// } +// if (element.isUInt64()) +// { +// writeIntText(element.getUInt64(), buf); +// return; +// } +// if (element.isDouble()) +// { +// writeFloatText(element.getDouble(), buf); +// return; +// } +// if (element.isBool()) +// { +// if (element.getBool()) +// writeCString("true", buf); +// else +// writeCString("false", buf); +// return; +// } +// if (element.isString()) +// { +// writeJSONString(element.getString(), buf, getFormatSettings()); +// return; +// } +// if (element.isArray()) +// { +// writeChar('[', buf); +// bool need_comma = false; +// for (auto value : element.getArray()) +// { +// if (std::exchange(need_comma, true)) +// writeChar(',', buf); +// elementToString(value, buf); +// } +// writeChar(']', buf); +// return; +// } +// if (element.isObject()) +// { +// writeChar('{', buf); +// bool need_comma = false; +// for (auto [key, value] : element.getObject()) +// { +// if (std::exchange(need_comma, true)) +// writeChar(',', buf); +// writeJSONString(key, buf, getFormatSettings()); +// writeChar(':', buf); +// elementToString(value, buf); +// } +// writeChar('}', buf); +// return; +// } +// if (element.isNull()) +// { +// writeCString("null", buf); +// return; +// } +//} +// +//template +//class NumericNode : public JSONExtractTree::Node +//{ +//public: +// NumericNode(bool convert_bool_to_integer_) : convert_bool_to_integer(convert_bool_to_integer_) {} +// +// bool insertResultToColumn(IColumn & dest, const Element & element) override +// { +// NumberType value; +// if (!tryGetValue(element, value)) +// return false; +// +// auto & col_vec = assert_cast &>(dest); +// col_vec.insertValue(value); +// return true; +// } +// +// bool tryGetValue(const Element & element, NumberType & value) +// { +// switch (element.type()) +// { +// case ElementType::DOUBLE: +// if constexpr (std::is_floating_point_v) +// { +// /// We permit inaccurate conversion of double to float. +// /// Example: double 0.1 from JSON is not representable in float. +// /// But it will be more convenient for user to perform conversion. +// value = static_cast(element.getDouble()); +// } +// else if (!accurate::convertNumeric(element.getDouble(), value)) +// return false; +// break; +// case ElementType::UINT64: +// if (!accurate::convertNumeric(element.getUInt64(), value)) +// return false; +// break; +// case ElementType::INT64: +// if (!accurate::convertNumeric(element.getInt64(), value)) +// return false; +// break; +// case ElementType::BOOL: +// if constexpr (is_integer) +// { +// if (convert_bool_to_integer) +// { +// value = static_cast(element.getBool()); +// break; +// } +// } +// return false; +// case ElementType::STRING: +// { +// auto rb = ReadBufferFromMemory{element.getString()}; +// if constexpr (std::is_floating_point_v) +// { +// if (!tryReadFloatText(value, rb) || !rb.eof()) +// return false; +// } +// else +// { +// if (tryReadIntText(value, rb) && rb.eof()) +// break; +// +// /// Try to parse float and convert it to integer. +// Float64 tmp_float; +// rb.position() = rb.buffer().begin(); +// if (!tryReadFloatText(tmp_float, rb) || !rb.eof()) +// return false; +// +// if (!accurate::convertNumeric(tmp_float, value)) +// return false; +// } +// break; +// } +// case ElementType::NULL_VALUE: +// { +// if () +// } +// default: +// return false; +// } +// +// return true; +// } +// +//private: +// bool convert_bool_to_integer; +//}; +// +//template +//class LowCardinalityNumericNode : public NumericNode +//{ +// bool insertResultToColumn(IColumn & dest, const Element & element) override +// { +// NumberType value; +// if (!tryGetValue(element, value)) +// return false; +// +// auto & col_lc = assert_cast(dest); +// col_lc.insertData(reinterpret_cast(&value), sizeof(value)); +// return true; +// } +//}; +// +//template +//class StringNode : public JSONExtractTree::Node +//{ +//public: +// bool insertResultToColumn(IColumn & dest, const Element & element) override +// { +// if (element.isNull()) +// return false; +// +// if (!element.isString()) +// { +// ColumnString & col_str = assert_cast(dest); +// auto & chars = col_str.getChars(); +// WriteBufferFromVector buf(chars, AppendModeTag()); +// elementToString(element, buf); +// buf.finalize(); +// chars.push_back(0); +// col_str.getOffsets().push_back(chars.size()); +// return true; +// } +// else +// { +// auto str = element.getString(); +// ColumnString & col_str = assert_cast(dest); +// col_str.insertData(str.data(), str.size()); +// } +// return true; +// } +//}; +// +//template +//class LowCardinalityStringNode : public JSONExtractTree::Node +//{ +// bool insertResultToColumn(IColumn & dest, const Element & element) override +// { +// if (element.isNull()) +// return false; +// +// if (!element.isString()) +// { +// ColumnString & col_str = assert_cast(dest); +// auto & chars = col_str.getChars(); +// WriteBufferFromVector buf(chars, AppendModeTag()); +// elementToString(element, buf); +// buf.finalize(); +// chars.push_back(0); +// col_str.getOffsets().push_back(chars.size()); +// return true; +// } +// else +// { +// auto str = element.getString(); +// ColumnString & col_str = assert_cast(dest); +// col_str.insertData(str.data(), str.size()); +// } +// return true; +// } +//}; +// +// +// +// +// +// +//class LowCardinalityFixedStringNode : public Node +//{ +//public: +// explicit LowCardinalityFixedStringNode(const size_t fixed_length_) : fixed_length(fixed_length_) { } +// bool insertResultToColumn(IColumn & dest, const Element & element) override +// { +// // If element is an object we delegate the insertion to JSONExtractRawImpl +// if (element.isObject()) +// return JSONExtractRawImpl::insertResultToLowCardinalityFixedStringColumn(dest, element, fixed_length); +// else if (!element.isString()) +// return false; +// +// auto str = element.getString(); +// if (str.size() > fixed_length) +// return false; +// +// // For the non low cardinality case of FixedString, the padding is done in the FixedString Column implementation. +// // In order to avoid having to pass the data to a FixedString Column and read it back (which would slow down the execution) +// // the data is padded here and written directly to the Low Cardinality Column +// if (str.size() == fixed_length) +// { +// assert_cast(dest).insertData(str.data(), str.size()); +// } +// else +// { +// String padded_str(str); +// padded_str.resize(fixed_length, '\0'); +// +// assert_cast(dest).insertData(padded_str.data(), padded_str.size()); +// } +// return true; +// } +// +//private: +// const size_t fixed_length; +//}; +// +//class UUIDNode : public Node +//{ +//public: +// bool insertResultToColumn(IColumn & dest, const Element & element) override +// { +// if (!element.isString()) +// return false; +// +// auto uuid = parseFromString(element.getString()); +// if (dest.getDataType() == TypeIndex::LowCardinality) +// { +// ColumnLowCardinality & col_low = assert_cast(dest); +// col_low.insertData(reinterpret_cast(&uuid), sizeof(uuid)); +// } +// else +// { +// assert_cast(dest).insert(uuid); +// } +// return true; +// } +//}; +// +//template +//class DecimalNode : public Node +//{ +//public: +// explicit DecimalNode(DataTypePtr data_type_) : data_type(data_type_) {} +// bool insertResultToColumn(IColumn & dest, const Element & element) override +// { +// const auto * type = assert_cast *>(data_type.get()); +// +// DecimalType value{}; +// +// switch (element.type()) +// { +// case ElementType::DOUBLE: +// value = convertToDecimal, DataTypeDecimal>( +// element.getDouble(), type->getScale()); +// break; +// case ElementType::UINT64: +// value = convertToDecimal, DataTypeDecimal>( +// element.getUInt64(), type->getScale()); +// break; +// case ElementType::INT64: +// value = convertToDecimal, DataTypeDecimal>( +// element.getInt64(), type->getScale()); +// break; +// case ElementType::STRING: { +// auto rb = ReadBufferFromMemory{element.getString()}; +// if (!SerializationDecimal::tryReadText(value, rb, DecimalUtils::max_precision, type->getScale())) +// return false; +// break; +// } +// default: +// return false; +// } +// +// assert_cast &>(dest).insertValue(value); +// return true; +// } +// +//private: +// DataTypePtr data_type; +//}; +// +//class FixedStringNode : public Node +//{ +//public: +// bool insertResultToColumn(IColumn & dest, const Element & element) override +// { +// if (element.isNull()) +// return false; +// +// if (!element.isString()) +// return JSONExtractRawImpl::insertResultToFixedStringColumn(dest, element, {}); +// +// auto str = element.getString(); +// auto & col_str = assert_cast(dest); +// if (str.size() > col_str.getN()) +// return false; +// col_str.insertData(str.data(), str.size()); +// +// return true; +// } +//}; +// +//template +//class EnumNode : public Node +//{ +//public: +// explicit EnumNode(const std::vector> & name_value_pairs_) : name_value_pairs(name_value_pairs_) +// { +// for (const auto & name_value_pair : name_value_pairs) +// { +// name_to_value_map.emplace(name_value_pair.first, name_value_pair.second); +// only_values.emplace(name_value_pair.second); +// } +// } +// +// bool insertResultToColumn(IColumn & dest, const Element & element) override +// { +// auto & col_vec = assert_cast &>(dest); +// +// if (element.isInt64()) +// { +// Type value; +// if (!accurate::convertNumeric(element.getInt64(), value) || !only_values.contains(value)) +// return false; +// col_vec.insertValue(value); +// return true; +// } +// +// if (element.isUInt64()) +// { +// Type value; +// if (!accurate::convertNumeric(element.getUInt64(), value) || !only_values.contains(value)) +// return false; +// col_vec.insertValue(value); +// return true; +// } +// +// if (element.isString()) +// { +// auto value = name_to_value_map.find(element.getString()); +// if (value == name_to_value_map.end()) +// return false; +// col_vec.insertValue(value->second); +// return true; +// } +// +// return false; +// } +// +//private: +// std::vector> name_value_pairs; +// std::unordered_map name_to_value_map; +// std::unordered_set only_values; +//}; +// +//class NullableNode : public Node +//{ +//public: +// explicit NullableNode(std::unique_ptr nested_) : nested(std::move(nested_)) {} +// +// bool insertResultToColumn(IColumn & dest, const Element & element) override +// { +// if (dest.getDataType() == TypeIndex::LowCardinality) +// { +// /// We do not need to handle nullability in that case +// /// because nested node handles LowCardinality columns and will call proper overload of `insertData` +// return nested->insertResultToColumn(dest, element); +// } +// +// ColumnNullable & col_null = assert_cast(dest); +// if (!nested->insertResultToColumn(col_null.getNestedColumn(), element)) +// return false; +// col_null.getNullMapColumn().insertValue(0); +// return true; +// } +// +//private: +// std::unique_ptr nested; +//}; +// +//class ArrayNode : public Node +//{ +//public: +// explicit ArrayNode(std::unique_ptr nested_) : nested(std::move(nested_)) {} +// +// bool insertResultToColumn(IColumn & dest, const Element & element) override +// { +// if (!element.isArray()) +// return false; +// +// auto array = element.getArray(); +// +// ColumnArray & col_arr = assert_cast(dest); +// auto & data = col_arr.getData(); +// size_t old_size = data.size(); +// bool were_valid_elements = false; +// +// for (auto value : array) +// { +// if (nested->insertResultToColumn(data, value)) +// were_valid_elements = true; +// else +// data.insertDefault(); +// } +// +// if (!were_valid_elements) +// { +// data.popBack(data.size() - old_size); +// return false; +// } +// +// col_arr.getOffsets().push_back(data.size()); +// return true; +// } +// +//private: +// std::unique_ptr nested; +//}; +// +//class TupleNode : public Node +//{ +//public: +// TupleNode(std::vector> nested_, const std::vector & explicit_names_) : nested(std::move(nested_)), explicit_names(explicit_names_) +// { +// for (size_t i = 0; i != explicit_names.size(); ++i) +// name_to_index_map.emplace(explicit_names[i], i); +// } +// +// bool insertResultToColumn(IColumn & dest, const Element & element) override +// { +// ColumnTuple & tuple = assert_cast(dest); +// size_t old_size = dest.size(); +// bool were_valid_elements = false; +// +// auto set_size = [&](size_t size) +// { +// for (size_t i = 0; i != tuple.tupleSize(); ++i) +// { +// auto & col = tuple.getColumn(i); +// if (col.size() != size) +// { +// if (col.size() > size) +// col.popBack(col.size() - size); +// else +// while (col.size() < size) +// col.insertDefault(); +// } +// } +// }; +// +// if (element.isArray()) +// { +// auto array = element.getArray(); +// auto it = array.begin(); +// +// for (size_t index = 0; (index != nested.size()) && (it != array.end()); ++index) +// { +// if (nested[index]->insertResultToColumn(tuple.getColumn(index), *it++)) +// were_valid_elements = true; +// else +// tuple.getColumn(index).insertDefault(); +// } +// +// set_size(old_size + static_cast(were_valid_elements)); +// return were_valid_elements; +// } +// +// if (element.isObject()) +// { +// auto object = element.getObject(); +// if (name_to_index_map.empty()) +// { +// auto it = object.begin(); +// for (size_t index = 0; (index != nested.size()) && (it != object.end()); ++index) +// { +// if (nested[index]->insertResultToColumn(tuple.getColumn(index), (*it++).second)) +// were_valid_elements = true; +// else +// tuple.getColumn(index).insertDefault(); +// } +// } +// else +// { +// for (const auto & [key, value] : object) +// { +// auto index = name_to_index_map.find(key); +// if (index != name_to_index_map.end()) +// { +// if (nested[index->second]->insertResultToColumn(tuple.getColumn(index->second), value)) +// were_valid_elements = true; +// } +// } +// } +// +// set_size(old_size + static_cast(were_valid_elements)); +// return were_valid_elements; +// } +// +// return false; +// } +// +//private: +// std::vector> nested; +// std::vector explicit_names; +// std::unordered_map name_to_index_map; +//}; +// +//class MapNode : public Node +//{ +//public: +// MapNode(std::unique_ptr key_, std::unique_ptr value_) : key(std::move(key_)), value(std::move(value_)) { } +// +// bool insertResultToColumn(IColumn & dest, const Element & element) override +// { +// if (!element.isObject()) +// return false; +// +// ColumnMap & map_col = assert_cast(dest); +// auto & offsets = map_col.getNestedColumn().getOffsets(); +// auto & tuple_col = map_col.getNestedData(); +// auto & key_col = tuple_col.getColumn(0); +// auto & value_col = tuple_col.getColumn(1); +// size_t old_size = tuple_col.size(); +// +// auto object = element.getObject(); +// auto it = object.begin(); +// for (; it != object.end(); ++it) +// { +// auto pair = *it; +// +// /// Insert key +// key_col.insertData(pair.first.data(), pair.first.size()); +// +// /// Insert value +// if (!value->insertResultToColumn(value_col, pair.second)) +// value_col.insertDefault(); +// } +// +// offsets.push_back(old_size + object.size()); +// return true; +// } +// +//private: +// std::unique_ptr key; +// std::unique_ptr value; +//}; +// +//class VariantNode : public Node +//{ +//public: +// VariantNode(std::vector> variant_nodes_, std::vector order_) : variant_nodes(std::move(variant_nodes_)), order(std::move(order_)) { } +// +// bool insertResultToColumn(IColumn & dest, const Element & element) override +// { +// auto & column_variant = assert_cast(dest); +// for (size_t i : order) +// { +// auto & variant = column_variant.getVariantByGlobalDiscriminator(i); +// if (variant_nodes[i]->insertResultToColumn(variant, element)) +// { +// column_variant.getLocalDiscriminators().push_back(column_variant.localDiscriminatorByGlobal(i)); +// column_variant.getOffsets().push_back(variant.size() - 1); +// return true; +// } +// } +// +// return false; +// } +// +//private: +// std::vector> variant_nodes; +// /// Order in which we should try variants nodes. +// /// For example, String should be always the last one. +// std::vector order; +//}; +// +//} +// +//} diff --git a/src/Formats/JSONExtractTree.h b/src/Formats/JSONExtractTree.h new file mode 100644 index 00000000000..f07c974f595 --- /dev/null +++ b/src/Formats/JSONExtractTree.h @@ -0,0 +1,37 @@ +#pragma once +#include +#include + +namespace DB +{ + +template +struct JSONExtractTree +{ + class Node + { + public: + Node() = default; + virtual ~Node() = default; + virtual bool insertResultToColumn(IColumn &, const Element &) = 0; + }; + + struct Settings + { + bool convert_bool_to_integer = true; + bool type_json_infer_numbers_from_strings = true; + bool type_json_infer_date = true; + bool type_json_infer_datetime = true; + bool type_json_infer_ipv4 = true; + bool type_json_infer_ipv6 = true; + bool type_json_infer_uuid = true; + bool insert_null_as_default = true; + }; + + static std::unique_ptr build(const DataTypePtr & type, const Settings & settings, const char * source_for_exception_message); +}; + +template +void elementToString(const Element & element, WriteBuffer & buf); + +} From d81e8f3d2d897d4dd364610cc56e8a95e546fb4a Mon Sep 17 00:00:00 2001 From: avogar Date: Mon, 1 Jul 2024 18:54:28 +0000 Subject: [PATCH 070/299] Bump CI From 4f61f530bd02fec686b273f4c2519cde77a689f2 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 2 Jul 2024 11:22:05 +0200 Subject: [PATCH 071/299] Named collections in clickhouse-local --- programs/local/LocalServer.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp index b33e1595056..74906d8797c 100644 --- a/programs/local/LocalServer.cpp +++ b/programs/local/LocalServer.cpp @@ -376,6 +376,7 @@ void LocalServer::setupUsers() " " " default" " default" + " 1 " " " " " " From e45a905904b5afd94e960bf48de015017a351527 Mon Sep 17 00:00:00 2001 From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> Date: Tue, 2 Jul 2024 11:52:51 +0200 Subject: [PATCH 072/299] Update LocalServer.cpp --- programs/local/LocalServer.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp index 74906d8797c..46b543e49e9 100644 --- a/programs/local/LocalServer.cpp +++ b/programs/local/LocalServer.cpp @@ -376,7 +376,7 @@ void LocalServer::setupUsers() " " " default" " default" - " 1 + " 1" " " " " " " From a2f7d8e13fc597bdfa8a8ebfbc5797dcf767fe7c Mon Sep 17 00:00:00 2001 From: avogar Date: Tue, 2 Jul 2024 10:42:12 +0000 Subject: [PATCH 073/299] Add missing settings in schema inference cache --- src/Formats/EscapingRuleUtils.cpp | 6 ++-- .../Formats/Impl/CSVRowInputFormat.cpp | 2 +- .../Impl/TabSeparatedRowInputFormat.cpp | 5 +-- ...settings_in_csv_tsv_schema_cache.reference | 28 +++++++++++++++ .../03198_settings_in_csv_tsv_schema_cache.sh | 34 +++++++++++++++++++ 5 files changed, 70 insertions(+), 5 deletions(-) create mode 100644 tests/queries/0_stateless/03198_settings_in_csv_tsv_schema_cache.reference create mode 100755 tests/queries/0_stateless/03198_settings_in_csv_tsv_schema_cache.sh diff --git a/src/Formats/EscapingRuleUtils.cpp b/src/Formats/EscapingRuleUtils.cpp index 36d16d8d154..58407a810c5 100644 --- a/src/Formats/EscapingRuleUtils.cpp +++ b/src/Formats/EscapingRuleUtils.cpp @@ -439,13 +439,15 @@ String getAdditionalFormatInfoByEscapingRule(const FormatSettings & settings, Fo case FormatSettings::EscapingRule::CSV: result += fmt::format( ", use_best_effort_in_schema_inference={}, bool_true_representation={}, bool_false_representation={}," - " null_representation={}, delimiter={}, tuple_delimiter={}", + " null_representation={}, delimiter={}, tuple_delimiter={}, try_infer_numbers_from_strings={}, try_infer_strings_from_quoted_tuples={}", settings.csv.use_best_effort_in_schema_inference, settings.bool_true_representation, settings.bool_false_representation, settings.csv.null_representation, settings.csv.delimiter, - settings.csv.tuple_delimiter); + settings.csv.tuple_delimiter, + settings.csv.try_infer_numbers_from_strings, + settings.csv.try_infer_strings_from_quoted_tuples); break; case FormatSettings::EscapingRule::JSON: result += fmt::format( diff --git a/src/Processors/Formats/Impl/CSVRowInputFormat.cpp b/src/Processors/Formats/Impl/CSVRowInputFormat.cpp index dd7d6c6b024..b7f84748f61 100644 --- a/src/Processors/Formats/Impl/CSVRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/CSVRowInputFormat.cpp @@ -616,7 +616,7 @@ void registerCSVSchemaReader(FormatFactory & factory) { String result = getAdditionalFormatInfoByEscapingRule(settings, FormatSettings::EscapingRule::CSV); if (!with_names) - result += fmt::format(", column_names_for_schema_inference={}, try_detect_header={}", settings.column_names_for_schema_inference, settings.csv.try_detect_header); + result += fmt::format(", column_names_for_schema_inference={}, try_detect_header={}, skip_first_lines={}", settings.column_names_for_schema_inference, settings.csv.try_detect_header, settings.csv.skip_first_lines); return result; }); } diff --git a/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp b/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp index 6d4dcba9e60..d2e17e92924 100644 --- a/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp @@ -440,9 +440,10 @@ void registerTSVSchemaReader(FormatFactory & factory) settings, is_raw ? FormatSettings::EscapingRule::Raw : FormatSettings::EscapingRule::Escaped); if (!with_names) result += fmt::format( - ", column_names_for_schema_inference={}, try_detect_header={}", + ", column_names_for_schema_inference={}, try_detect_header={}, skip_first_lines={}", settings.column_names_for_schema_inference, - settings.tsv.try_detect_header); + settings.tsv.try_detect_header, + settings.tsv.skip_first_lines); return result; }); } diff --git a/tests/queries/0_stateless/03198_settings_in_csv_tsv_schema_cache.reference b/tests/queries/0_stateless/03198_settings_in_csv_tsv_schema_cache.reference new file mode 100644 index 00000000000..aecacd10e00 --- /dev/null +++ b/tests/queries/0_stateless/03198_settings_in_csv_tsv_schema_cache.reference @@ -0,0 +1,28 @@ +c1 Nullable(Int64) +c2 Nullable(Int64) +c3 Nullable(Int64) +a Nullable(Int64) +b Nullable(Int64) +c Nullable(Int64) +2 +a Nullable(Int64) +b Nullable(Int64) +c Nullable(Int64) +a Nullable(String) +b Nullable(Int64) +c Nullable(Int64) +2 +a Nullable(String) +b Nullable(Int64) +c Nullable(Int64) +a Tuple(Nullable(Int64), Nullable(Int64), Nullable(Int64)) +b Nullable(Int64) +c Nullable(Int64) +2 +c1 Nullable(Int64) +c2 Nullable(Int64) +c3 Nullable(Int64) +a Nullable(Int64) +b Nullable(Int64) +c Nullable(Int64) +2 diff --git a/tests/queries/0_stateless/03198_settings_in_csv_tsv_schema_cache.sh b/tests/queries/0_stateless/03198_settings_in_csv_tsv_schema_cache.sh new file mode 100755 index 00000000000..ce53f467823 --- /dev/null +++ b/tests/queries/0_stateless/03198_settings_in_csv_tsv_schema_cache.sh @@ -0,0 +1,34 @@ +#!/usr/bin/env bash + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +echo -e 'a,b,c\n1,2,3' > $CLICKHOUSE_TEST_UNIQUE_NAME.csv +$CLICKHOUSE_LOCAL -nm -q " +DESC file('$CLICKHOUSE_TEST_UNIQUE_NAME.csv') SETTINGS input_format_csv_skip_first_lines=1; +DESC file('$CLICKHOUSE_TEST_UNIQUE_NAME.csv') SETTINGS input_format_csv_skip_first_lines=0; +SELECT count() from system.schema_inference_cache where format = 'CSV' and additional_format_info like '%skip_first_lines%';" + +echo -e 'a,b,c\n"1",2,3' > $CLICKHOUSE_TEST_UNIQUE_NAME.csv +$CLICKHOUSE_LOCAL -nm -q " +DESC file('$CLICKHOUSE_TEST_UNIQUE_NAME.csv') SETTINGS input_format_csv_try_infer_numbers_from_strings=1; +DESC file('$CLICKHOUSE_TEST_UNIQUE_NAME.csv') SETTINGS input_format_csv_try_infer_numbers_from_strings=0; +SELECT count() from system.schema_inference_cache where format = 'CSV' and additional_format_info like '%try_infer_numbers_from_strings%';" + +echo -e 'a,b,c\n"(1,2,3)",2,3' > $CLICKHOUSE_TEST_UNIQUE_NAME.csv +$CLICKHOUSE_LOCAL -nm -q " +DESC file('$CLICKHOUSE_TEST_UNIQUE_NAME.csv') SETTINGS input_format_csv_try_infer_strings_from_quoted_tuples=1; +DESC file('$CLICKHOUSE_TEST_UNIQUE_NAME.csv') SETTINGS input_format_csv_try_infer_strings_from_quoted_tuples=0; +SELECT count() from system.schema_inference_cache where format = 'CSV' and additional_format_info like '%try_infer_strings_from_quoted_tuples%';" + +echo -e 'a\tb\tc\n1\t2\t3' > $CLICKHOUSE_TEST_UNIQUE_NAME.tsv +$CLICKHOUSE_LOCAL -nm -q " +DESC file('$CLICKHOUSE_TEST_UNIQUE_NAME.tsv') SETTINGS input_format_tsv_skip_first_lines=1; +DESC file('$CLICKHOUSE_TEST_UNIQUE_NAME.tsv') SETTINGS input_format_tsv_skip_first_lines=0; +SELECT count() from system.schema_inference_cache where format = 'TSV' and additional_format_info like '%skip_first_lines%';" + + +rm $CLICKHOUSE_TEST_UNIQUE_NAME.csv +rm $CLICKHOUSE_TEST_UNIQUE_NAME.tsv + From 9821c042f1a046b094fd7d17fb355f572c74cc9f Mon Sep 17 00:00:00 2001 From: gabrielmcg44 Date: Fri, 28 Jun 2024 10:22:26 -0300 Subject: [PATCH 074/299] fix tie handling issue --- src/Functions/array/arrayAUC.cpp | 28 ++++++++++++++++++++-------- 1 file changed, 20 insertions(+), 8 deletions(-) diff --git a/src/Functions/array/arrayAUC.cpp b/src/Functions/array/arrayAUC.cpp index 499fe4ce7b2..878a8fcae34 100644 --- a/src/Functions/array/arrayAUC.cpp +++ b/src/Functions/array/arrayAUC.cpp @@ -103,27 +103,39 @@ private: sorted_labels[i].label = label; } - /// Stable sort is required for for labels to apply in same order if score is equal - std::stable_sort(sorted_labels.begin(), sorted_labels.end(), [](const auto & lhs, const auto & rhs) { return lhs.score > rhs.score; }); + /// Sorting scores in descending order to traverse the ROC curve from left to right + std::sort(sorted_labels.begin(), sorted_labels.end(), [](const auto & lhs, const auto & rhs) { return lhs.score > rhs.score; }); /// We will first calculate non-normalized area. - size_t area = 0; - size_t count_positive = 0; + Float64 area = 0.0; + Float64 prev_score = sorted_labels[0].score; + size_t prev_fp = 0, prev_tp = 0; + size_t curr_fp = 0, curr_tp = 0; for (size_t i = 0; i < size; ++i) { + // Only change the current ROC point when the score changes + if (sorted_labels[i].score != prev_score) { + area += (curr_fp - prev_fp) * (curr_tp + prev_tp) / 2.0; // Trapezoidal area under curve (might degenerate to zero or to a rectangle) + prev_fp = curr_fp; + prev_tp = curr_tp; + prev_score = sorted_labels[i].score; + } + if (sorted_labels[i].label) - ++count_positive; /// The curve moves one step up. No area increase. + curr_tp += 1; /// The curve moves one step up. else - area += count_positive; /// The curve moves one step right. Area is increased by 1 * height = count_positive. + curr_fp += 1; /// The curve moves one step right. } + area += (curr_fp - prev_fp) * (curr_tp + prev_tp) / 2.0; + /// Then divide the area to the area of rectangle. - if (count_positive == 0 || count_positive == size) + if (curr_tp == 0 || curr_tp == size) return std::numeric_limits::quiet_NaN(); - return static_cast(area) / count_positive / (size - count_positive); + return static_cast(area) / curr_tp / (size - curr_tp); } static void vector( From 6a0d912b1c9a35aab2466c50f2e51730215f1a35 Mon Sep 17 00:00:00 2001 From: gabrielmcg44 Date: Fri, 28 Jun 2024 11:05:48 -0300 Subject: [PATCH 075/299] remove trailing spaces --- src/Functions/array/arrayAUC.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Functions/array/arrayAUC.cpp b/src/Functions/array/arrayAUC.cpp index 878a8fcae34..58e2a864a7f 100644 --- a/src/Functions/array/arrayAUC.cpp +++ b/src/Functions/array/arrayAUC.cpp @@ -121,7 +121,7 @@ private: prev_tp = curr_tp; prev_score = sorted_labels[i].score; } - + if (sorted_labels[i].label) curr_tp += 1; /// The curve moves one step up. else From 7d7750e29b88bcde9b6b53efbd86f89915985747 Mon Sep 17 00:00:00 2001 From: gabrielmcg44 Date: Fri, 28 Jun 2024 11:40:56 -0300 Subject: [PATCH 076/299] remove brackets --- src/Functions/array/arrayAUC.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/Functions/array/arrayAUC.cpp b/src/Functions/array/arrayAUC.cpp index 58e2a864a7f..940cb01a5bf 100644 --- a/src/Functions/array/arrayAUC.cpp +++ b/src/Functions/array/arrayAUC.cpp @@ -115,12 +115,11 @@ private: for (size_t i = 0; i < size; ++i) { // Only change the current ROC point when the score changes - if (sorted_labels[i].score != prev_score) { + if (sorted_labels[i].score != prev_score) area += (curr_fp - prev_fp) * (curr_tp + prev_tp) / 2.0; // Trapezoidal area under curve (might degenerate to zero or to a rectangle) prev_fp = curr_fp; prev_tp = curr_tp; prev_score = sorted_labels[i].score; - } if (sorted_labels[i].label) curr_tp += 1; /// The curve moves one step up. From 067b1474513069840646414d99b947766c939a9d Mon Sep 17 00:00:00 2001 From: gabrielmcg44 Date: Fri, 28 Jun 2024 12:17:12 -0300 Subject: [PATCH 077/299] add test --- .../03198_fix_auc_tie_handling.reference | 2 + .../03198_fix_auc_tie_handling.sql | 39 +++++++++++++++++++ 2 files changed, 41 insertions(+) create mode 100644 tests/queries/0_stateless/03198_fix_auc_tie_handling.reference create mode 100644 tests/queries/0_stateless/03198_fix_auc_tie_handling.sql diff --git a/tests/queries/0_stateless/03198_fix_auc_tie_handling.reference b/tests/queries/0_stateless/03198_fix_auc_tie_handling.reference new file mode 100644 index 00000000000..56c97b6849c --- /dev/null +++ b/tests/queries/0_stateless/03198_fix_auc_tie_handling.reference @@ -0,0 +1,2 @@ +0.58333 +0.58333 diff --git a/tests/queries/0_stateless/03198_fix_auc_tie_handling.sql b/tests/queries/0_stateless/03198_fix_auc_tie_handling.sql new file mode 100644 index 00000000000..1064668a2b5 --- /dev/null +++ b/tests/queries/0_stateless/03198_fix_auc_tie_handling.sql @@ -0,0 +1,39 @@ +CREATE TABLE labels_unordered +( + idx Int64, + score Float64, + label Int64 +) +ENGINE = MergeTree +PRIMARY KEY idx +ORDER BY idx; + +INSERT INTO labels_unordered (idx,score,label) VALUES + (1,0.1,0), + (2,0.35,1), + (3,0.4,0), + (4,0.8,1), + (5,0.8,0); + +SELECT floor(arrayAUC(array_concat_agg([score]), array_concat_agg([label])), 5) +FROM labels_unordered; + +CREATE TABLE labels_ordered +( + idx Int64, + score Float64, + label Int64 +) +ENGINE = MergeTree +PRIMARY KEY idx +ORDER BY idx; + +INSERT INTO labels_ordered (idx,score,label) VALUES + (1,0.1,0), + (2,0.35,1), + (3,0.4,0), + (4,0.8,0), + (5,0.8,1); + +SELECT floor(arrayAUC(array_concat_agg([score]), array_concat_agg([label])), 5) +FROM labels_ordered; \ No newline at end of file From dd1eccd32f09da8bd076b3adb1cc838f75338b7c Mon Sep 17 00:00:00 2001 From: gabrielmcg44 Date: Fri, 28 Jun 2024 12:21:19 -0300 Subject: [PATCH 078/299] fix brackets --- src/Functions/array/arrayAUC.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/Functions/array/arrayAUC.cpp b/src/Functions/array/arrayAUC.cpp index 940cb01a5bf..0fc05fdf996 100644 --- a/src/Functions/array/arrayAUC.cpp +++ b/src/Functions/array/arrayAUC.cpp @@ -116,10 +116,12 @@ private: { // Only change the current ROC point when the score changes if (sorted_labels[i].score != prev_score) + { area += (curr_fp - prev_fp) * (curr_tp + prev_tp) / 2.0; // Trapezoidal area under curve (might degenerate to zero or to a rectangle) prev_fp = curr_fp; prev_tp = curr_tp; prev_score = sorted_labels[i].score; + } if (sorted_labels[i].label) curr_tp += 1; /// The curve moves one step up. From dc38c863d3de80849d0f0d87117d78b1638ffa76 Mon Sep 17 00:00:00 2001 From: gabrielmcg44 Date: Fri, 28 Jun 2024 12:47:46 -0300 Subject: [PATCH 079/299] fix test syntax --- .../0_stateless/03198_fix_auc_tie_handling.sql | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/tests/queries/0_stateless/03198_fix_auc_tie_handling.sql b/tests/queries/0_stateless/03198_fix_auc_tie_handling.sql index 1064668a2b5..394a384ec8d 100644 --- a/tests/queries/0_stateless/03198_fix_auc_tie_handling.sql +++ b/tests/queries/0_stateless/03198_fix_auc_tie_handling.sql @@ -8,12 +8,7 @@ ENGINE = MergeTree PRIMARY KEY idx ORDER BY idx; -INSERT INTO labels_unordered (idx,score,label) VALUES - (1,0.1,0), - (2,0.35,1), - (3,0.4,0), - (4,0.8,1), - (5,0.8,0); +INSERT INTO labels_unordered (idx,score,label) VALUES (1,0.1,0), (2,0.35,1), (3,0.4,0), (4,0.8,1), (5,0.8,0); SELECT floor(arrayAUC(array_concat_agg([score]), array_concat_agg([label])), 5) FROM labels_unordered; @@ -28,12 +23,7 @@ ENGINE = MergeTree PRIMARY KEY idx ORDER BY idx; -INSERT INTO labels_ordered (idx,score,label) VALUES - (1,0.1,0), - (2,0.35,1), - (3,0.4,0), - (4,0.8,0), - (5,0.8,1); +INSERT INTO labels_ordered (idx,score,label) VALUES (1,0.1,0), (2,0.35,1), (3,0.4,0), (4,0.8,0), (5,0.8,1); SELECT floor(arrayAUC(array_concat_agg([score]), array_concat_agg([label])), 5) FROM labels_ordered; \ No newline at end of file From b847ccabc8e9f6968f20a2d317e1ca5702b8e57f Mon Sep 17 00:00:00 2001 From: gabrielmcg44 Date: Fri, 28 Jun 2024 18:55:23 -0300 Subject: [PATCH 080/299] fix previous test and add test with empty arrays --- .../queries/0_stateless/01202_array_auc_special.reference | 8 ++++---- .../0_stateless/03198_fix_auc_tie_handling.reference | 1 + tests/queries/0_stateless/03198_fix_auc_tie_handling.sql | 3 +++ 3 files changed, 8 insertions(+), 4 deletions(-) diff --git a/tests/queries/0_stateless/01202_array_auc_special.reference b/tests/queries/0_stateless/01202_array_auc_special.reference index 85c230fba58..8f3f0cf1efe 100644 --- a/tests/queries/0_stateless/01202_array_auc_special.reference +++ b/tests/queries/0_stateless/01202_array_auc_special.reference @@ -1,9 +1,9 @@ nan nan nan -0 -1 -0 0.5 1 -0.5 +0 +0.75 +1 +0.75 diff --git a/tests/queries/0_stateless/03198_fix_auc_tie_handling.reference b/tests/queries/0_stateless/03198_fix_auc_tie_handling.reference index 56c97b6849c..f35b39d5972 100644 --- a/tests/queries/0_stateless/03198_fix_auc_tie_handling.reference +++ b/tests/queries/0_stateless/03198_fix_auc_tie_handling.reference @@ -1,2 +1,3 @@ +nan 0.58333 0.58333 diff --git a/tests/queries/0_stateless/03198_fix_auc_tie_handling.sql b/tests/queries/0_stateless/03198_fix_auc_tie_handling.sql index 394a384ec8d..5de0844f445 100644 --- a/tests/queries/0_stateless/03198_fix_auc_tie_handling.sql +++ b/tests/queries/0_stateless/03198_fix_auc_tie_handling.sql @@ -8,6 +8,9 @@ ENGINE = MergeTree PRIMARY KEY idx ORDER BY idx; +SELECT floor(arrayAUC(array_concat_agg([score]), array_concat_agg([label])), 5) +FROM labels_unordered; + INSERT INTO labels_unordered (idx,score,label) VALUES (1,0.1,0), (2,0.35,1), (3,0.4,0), (4,0.8,1), (5,0.8,0); SELECT floor(arrayAUC(array_concat_agg([score]), array_concat_agg([label])), 5) From df086999ff0fe471277e25ff0abc5afff618a746 Mon Sep 17 00:00:00 2001 From: gabrielmcg44 Date: Sat, 29 Jun 2024 07:13:08 -0300 Subject: [PATCH 081/299] remove redundant type cast --- src/Functions/array/arrayAUC.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Functions/array/arrayAUC.cpp b/src/Functions/array/arrayAUC.cpp index 0fc05fdf996..acc1505fbb3 100644 --- a/src/Functions/array/arrayAUC.cpp +++ b/src/Functions/array/arrayAUC.cpp @@ -136,7 +136,7 @@ private: if (curr_tp == 0 || curr_tp == size) return std::numeric_limits::quiet_NaN(); - return static_cast(area) / curr_tp / (size - curr_tp); + return area / curr_tp / (size - curr_tp); } static void vector( From b9b030d0aad0060a0d6c1cde6ad3165f0ef8757d Mon Sep 17 00:00:00 2001 From: gabrielmcg44 Date: Mon, 1 Jul 2024 10:29:51 -0300 Subject: [PATCH 082/299] better comment --- src/Functions/array/arrayAUC.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Functions/array/arrayAUC.cpp b/src/Functions/array/arrayAUC.cpp index acc1505fbb3..04a840e8da5 100644 --- a/src/Functions/array/arrayAUC.cpp +++ b/src/Functions/array/arrayAUC.cpp @@ -114,7 +114,7 @@ private: size_t curr_fp = 0, curr_tp = 0; for (size_t i = 0; i < size; ++i) { - // Only change the current ROC point when the score changes + // Only increment the area when the score changes if (sorted_labels[i].score != prev_score) { area += (curr_fp - prev_fp) * (curr_tp + prev_tp) / 2.0; // Trapezoidal area under curve (might degenerate to zero or to a rectangle) From ce00de19e92041bf5dcf10d40379732f33b8f1d3 Mon Sep 17 00:00:00 2001 From: gabrielmcg44 Date: Tue, 2 Jul 2024 11:38:49 -0300 Subject: [PATCH 083/299] rebase and rename test --- ...ie_handling.reference => 03199_fix_auc_tie_handling.reference} | 0 ...98_fix_auc_tie_handling.sql => 03199_fix_auc_tie_handling.sql} | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename tests/queries/0_stateless/{03198_fix_auc_tie_handling.reference => 03199_fix_auc_tie_handling.reference} (100%) rename tests/queries/0_stateless/{03198_fix_auc_tie_handling.sql => 03199_fix_auc_tie_handling.sql} (100%) diff --git a/tests/queries/0_stateless/03198_fix_auc_tie_handling.reference b/tests/queries/0_stateless/03199_fix_auc_tie_handling.reference similarity index 100% rename from tests/queries/0_stateless/03198_fix_auc_tie_handling.reference rename to tests/queries/0_stateless/03199_fix_auc_tie_handling.reference diff --git a/tests/queries/0_stateless/03198_fix_auc_tie_handling.sql b/tests/queries/0_stateless/03199_fix_auc_tie_handling.sql similarity index 100% rename from tests/queries/0_stateless/03198_fix_auc_tie_handling.sql rename to tests/queries/0_stateless/03199_fix_auc_tie_handling.sql From 02de44124616de36ae2606f22a4a6a0ba212ed3c Mon Sep 17 00:00:00 2001 From: avogar Date: Tue, 2 Jul 2024 15:55:26 +0000 Subject: [PATCH 084/299] Remove unrelated files --- src/Formats/JSONExtractTree.cpp | 666 -------------------------------- src/Formats/JSONExtractTree.h | 37 -- 2 files changed, 703 deletions(-) delete mode 100644 src/Formats/JSONExtractTree.cpp delete mode 100644 src/Formats/JSONExtractTree.h diff --git a/src/Formats/JSONExtractTree.cpp b/src/Formats/JSONExtractTree.cpp deleted file mode 100644 index 6a4241003f1..00000000000 --- a/src/Formats/JSONExtractTree.cpp +++ /dev/null @@ -1,666 +0,0 @@ -//#include -// -//#include -//#include -//#include -//#include -// -//#include -//#include -//#include -// -//#include -//#include -//#include -// -//namespace DB -//{ -// -//namespace -//{ -// -//const FormatSettings & getFormatSettings() -//{ -// static const FormatSettings instance = [] -// { -// FormatSettings settings; -// settings.json.escape_forward_slashes = false; -// return settings; -// }(); -// return instance; -//} -// -//template -//void elementToString(const Element & element, WriteBuffer & buf) -//{ -// if (element.isInt64()) -// { -// writeIntText(element.getInt64(), buf); -// return; -// } -// if (element.isUInt64()) -// { -// writeIntText(element.getUInt64(), buf); -// return; -// } -// if (element.isDouble()) -// { -// writeFloatText(element.getDouble(), buf); -// return; -// } -// if (element.isBool()) -// { -// if (element.getBool()) -// writeCString("true", buf); -// else -// writeCString("false", buf); -// return; -// } -// if (element.isString()) -// { -// writeJSONString(element.getString(), buf, getFormatSettings()); -// return; -// } -// if (element.isArray()) -// { -// writeChar('[', buf); -// bool need_comma = false; -// for (auto value : element.getArray()) -// { -// if (std::exchange(need_comma, true)) -// writeChar(',', buf); -// elementToString(value, buf); -// } -// writeChar(']', buf); -// return; -// } -// if (element.isObject()) -// { -// writeChar('{', buf); -// bool need_comma = false; -// for (auto [key, value] : element.getObject()) -// { -// if (std::exchange(need_comma, true)) -// writeChar(',', buf); -// writeJSONString(key, buf, getFormatSettings()); -// writeChar(':', buf); -// elementToString(value, buf); -// } -// writeChar('}', buf); -// return; -// } -// if (element.isNull()) -// { -// writeCString("null", buf); -// return; -// } -//} -// -//template -//class NumericNode : public JSONExtractTree::Node -//{ -//public: -// NumericNode(bool convert_bool_to_integer_) : convert_bool_to_integer(convert_bool_to_integer_) {} -// -// bool insertResultToColumn(IColumn & dest, const Element & element) override -// { -// NumberType value; -// if (!tryGetValue(element, value)) -// return false; -// -// auto & col_vec = assert_cast &>(dest); -// col_vec.insertValue(value); -// return true; -// } -// -// bool tryGetValue(const Element & element, NumberType & value) -// { -// switch (element.type()) -// { -// case ElementType::DOUBLE: -// if constexpr (std::is_floating_point_v) -// { -// /// We permit inaccurate conversion of double to float. -// /// Example: double 0.1 from JSON is not representable in float. -// /// But it will be more convenient for user to perform conversion. -// value = static_cast(element.getDouble()); -// } -// else if (!accurate::convertNumeric(element.getDouble(), value)) -// return false; -// break; -// case ElementType::UINT64: -// if (!accurate::convertNumeric(element.getUInt64(), value)) -// return false; -// break; -// case ElementType::INT64: -// if (!accurate::convertNumeric(element.getInt64(), value)) -// return false; -// break; -// case ElementType::BOOL: -// if constexpr (is_integer) -// { -// if (convert_bool_to_integer) -// { -// value = static_cast(element.getBool()); -// break; -// } -// } -// return false; -// case ElementType::STRING: -// { -// auto rb = ReadBufferFromMemory{element.getString()}; -// if constexpr (std::is_floating_point_v) -// { -// if (!tryReadFloatText(value, rb) || !rb.eof()) -// return false; -// } -// else -// { -// if (tryReadIntText(value, rb) && rb.eof()) -// break; -// -// /// Try to parse float and convert it to integer. -// Float64 tmp_float; -// rb.position() = rb.buffer().begin(); -// if (!tryReadFloatText(tmp_float, rb) || !rb.eof()) -// return false; -// -// if (!accurate::convertNumeric(tmp_float, value)) -// return false; -// } -// break; -// } -// case ElementType::NULL_VALUE: -// { -// if () -// } -// default: -// return false; -// } -// -// return true; -// } -// -//private: -// bool convert_bool_to_integer; -//}; -// -//template -//class LowCardinalityNumericNode : public NumericNode -//{ -// bool insertResultToColumn(IColumn & dest, const Element & element) override -// { -// NumberType value; -// if (!tryGetValue(element, value)) -// return false; -// -// auto & col_lc = assert_cast(dest); -// col_lc.insertData(reinterpret_cast(&value), sizeof(value)); -// return true; -// } -//}; -// -//template -//class StringNode : public JSONExtractTree::Node -//{ -//public: -// bool insertResultToColumn(IColumn & dest, const Element & element) override -// { -// if (element.isNull()) -// return false; -// -// if (!element.isString()) -// { -// ColumnString & col_str = assert_cast(dest); -// auto & chars = col_str.getChars(); -// WriteBufferFromVector buf(chars, AppendModeTag()); -// elementToString(element, buf); -// buf.finalize(); -// chars.push_back(0); -// col_str.getOffsets().push_back(chars.size()); -// return true; -// } -// else -// { -// auto str = element.getString(); -// ColumnString & col_str = assert_cast(dest); -// col_str.insertData(str.data(), str.size()); -// } -// return true; -// } -//}; -// -//template -//class LowCardinalityStringNode : public JSONExtractTree::Node -//{ -// bool insertResultToColumn(IColumn & dest, const Element & element) override -// { -// if (element.isNull()) -// return false; -// -// if (!element.isString()) -// { -// ColumnString & col_str = assert_cast(dest); -// auto & chars = col_str.getChars(); -// WriteBufferFromVector buf(chars, AppendModeTag()); -// elementToString(element, buf); -// buf.finalize(); -// chars.push_back(0); -// col_str.getOffsets().push_back(chars.size()); -// return true; -// } -// else -// { -// auto str = element.getString(); -// ColumnString & col_str = assert_cast(dest); -// col_str.insertData(str.data(), str.size()); -// } -// return true; -// } -//}; -// -// -// -// -// -// -//class LowCardinalityFixedStringNode : public Node -//{ -//public: -// explicit LowCardinalityFixedStringNode(const size_t fixed_length_) : fixed_length(fixed_length_) { } -// bool insertResultToColumn(IColumn & dest, const Element & element) override -// { -// // If element is an object we delegate the insertion to JSONExtractRawImpl -// if (element.isObject()) -// return JSONExtractRawImpl::insertResultToLowCardinalityFixedStringColumn(dest, element, fixed_length); -// else if (!element.isString()) -// return false; -// -// auto str = element.getString(); -// if (str.size() > fixed_length) -// return false; -// -// // For the non low cardinality case of FixedString, the padding is done in the FixedString Column implementation. -// // In order to avoid having to pass the data to a FixedString Column and read it back (which would slow down the execution) -// // the data is padded here and written directly to the Low Cardinality Column -// if (str.size() == fixed_length) -// { -// assert_cast(dest).insertData(str.data(), str.size()); -// } -// else -// { -// String padded_str(str); -// padded_str.resize(fixed_length, '\0'); -// -// assert_cast(dest).insertData(padded_str.data(), padded_str.size()); -// } -// return true; -// } -// -//private: -// const size_t fixed_length; -//}; -// -//class UUIDNode : public Node -//{ -//public: -// bool insertResultToColumn(IColumn & dest, const Element & element) override -// { -// if (!element.isString()) -// return false; -// -// auto uuid = parseFromString(element.getString()); -// if (dest.getDataType() == TypeIndex::LowCardinality) -// { -// ColumnLowCardinality & col_low = assert_cast(dest); -// col_low.insertData(reinterpret_cast(&uuid), sizeof(uuid)); -// } -// else -// { -// assert_cast(dest).insert(uuid); -// } -// return true; -// } -//}; -// -//template -//class DecimalNode : public Node -//{ -//public: -// explicit DecimalNode(DataTypePtr data_type_) : data_type(data_type_) {} -// bool insertResultToColumn(IColumn & dest, const Element & element) override -// { -// const auto * type = assert_cast *>(data_type.get()); -// -// DecimalType value{}; -// -// switch (element.type()) -// { -// case ElementType::DOUBLE: -// value = convertToDecimal, DataTypeDecimal>( -// element.getDouble(), type->getScale()); -// break; -// case ElementType::UINT64: -// value = convertToDecimal, DataTypeDecimal>( -// element.getUInt64(), type->getScale()); -// break; -// case ElementType::INT64: -// value = convertToDecimal, DataTypeDecimal>( -// element.getInt64(), type->getScale()); -// break; -// case ElementType::STRING: { -// auto rb = ReadBufferFromMemory{element.getString()}; -// if (!SerializationDecimal::tryReadText(value, rb, DecimalUtils::max_precision, type->getScale())) -// return false; -// break; -// } -// default: -// return false; -// } -// -// assert_cast &>(dest).insertValue(value); -// return true; -// } -// -//private: -// DataTypePtr data_type; -//}; -// -//class FixedStringNode : public Node -//{ -//public: -// bool insertResultToColumn(IColumn & dest, const Element & element) override -// { -// if (element.isNull()) -// return false; -// -// if (!element.isString()) -// return JSONExtractRawImpl::insertResultToFixedStringColumn(dest, element, {}); -// -// auto str = element.getString(); -// auto & col_str = assert_cast(dest); -// if (str.size() > col_str.getN()) -// return false; -// col_str.insertData(str.data(), str.size()); -// -// return true; -// } -//}; -// -//template -//class EnumNode : public Node -//{ -//public: -// explicit EnumNode(const std::vector> & name_value_pairs_) : name_value_pairs(name_value_pairs_) -// { -// for (const auto & name_value_pair : name_value_pairs) -// { -// name_to_value_map.emplace(name_value_pair.first, name_value_pair.second); -// only_values.emplace(name_value_pair.second); -// } -// } -// -// bool insertResultToColumn(IColumn & dest, const Element & element) override -// { -// auto & col_vec = assert_cast &>(dest); -// -// if (element.isInt64()) -// { -// Type value; -// if (!accurate::convertNumeric(element.getInt64(), value) || !only_values.contains(value)) -// return false; -// col_vec.insertValue(value); -// return true; -// } -// -// if (element.isUInt64()) -// { -// Type value; -// if (!accurate::convertNumeric(element.getUInt64(), value) || !only_values.contains(value)) -// return false; -// col_vec.insertValue(value); -// return true; -// } -// -// if (element.isString()) -// { -// auto value = name_to_value_map.find(element.getString()); -// if (value == name_to_value_map.end()) -// return false; -// col_vec.insertValue(value->second); -// return true; -// } -// -// return false; -// } -// -//private: -// std::vector> name_value_pairs; -// std::unordered_map name_to_value_map; -// std::unordered_set only_values; -//}; -// -//class NullableNode : public Node -//{ -//public: -// explicit NullableNode(std::unique_ptr nested_) : nested(std::move(nested_)) {} -// -// bool insertResultToColumn(IColumn & dest, const Element & element) override -// { -// if (dest.getDataType() == TypeIndex::LowCardinality) -// { -// /// We do not need to handle nullability in that case -// /// because nested node handles LowCardinality columns and will call proper overload of `insertData` -// return nested->insertResultToColumn(dest, element); -// } -// -// ColumnNullable & col_null = assert_cast(dest); -// if (!nested->insertResultToColumn(col_null.getNestedColumn(), element)) -// return false; -// col_null.getNullMapColumn().insertValue(0); -// return true; -// } -// -//private: -// std::unique_ptr nested; -//}; -// -//class ArrayNode : public Node -//{ -//public: -// explicit ArrayNode(std::unique_ptr nested_) : nested(std::move(nested_)) {} -// -// bool insertResultToColumn(IColumn & dest, const Element & element) override -// { -// if (!element.isArray()) -// return false; -// -// auto array = element.getArray(); -// -// ColumnArray & col_arr = assert_cast(dest); -// auto & data = col_arr.getData(); -// size_t old_size = data.size(); -// bool were_valid_elements = false; -// -// for (auto value : array) -// { -// if (nested->insertResultToColumn(data, value)) -// were_valid_elements = true; -// else -// data.insertDefault(); -// } -// -// if (!were_valid_elements) -// { -// data.popBack(data.size() - old_size); -// return false; -// } -// -// col_arr.getOffsets().push_back(data.size()); -// return true; -// } -// -//private: -// std::unique_ptr nested; -//}; -// -//class TupleNode : public Node -//{ -//public: -// TupleNode(std::vector> nested_, const std::vector & explicit_names_) : nested(std::move(nested_)), explicit_names(explicit_names_) -// { -// for (size_t i = 0; i != explicit_names.size(); ++i) -// name_to_index_map.emplace(explicit_names[i], i); -// } -// -// bool insertResultToColumn(IColumn & dest, const Element & element) override -// { -// ColumnTuple & tuple = assert_cast(dest); -// size_t old_size = dest.size(); -// bool were_valid_elements = false; -// -// auto set_size = [&](size_t size) -// { -// for (size_t i = 0; i != tuple.tupleSize(); ++i) -// { -// auto & col = tuple.getColumn(i); -// if (col.size() != size) -// { -// if (col.size() > size) -// col.popBack(col.size() - size); -// else -// while (col.size() < size) -// col.insertDefault(); -// } -// } -// }; -// -// if (element.isArray()) -// { -// auto array = element.getArray(); -// auto it = array.begin(); -// -// for (size_t index = 0; (index != nested.size()) && (it != array.end()); ++index) -// { -// if (nested[index]->insertResultToColumn(tuple.getColumn(index), *it++)) -// were_valid_elements = true; -// else -// tuple.getColumn(index).insertDefault(); -// } -// -// set_size(old_size + static_cast(were_valid_elements)); -// return were_valid_elements; -// } -// -// if (element.isObject()) -// { -// auto object = element.getObject(); -// if (name_to_index_map.empty()) -// { -// auto it = object.begin(); -// for (size_t index = 0; (index != nested.size()) && (it != object.end()); ++index) -// { -// if (nested[index]->insertResultToColumn(tuple.getColumn(index), (*it++).second)) -// were_valid_elements = true; -// else -// tuple.getColumn(index).insertDefault(); -// } -// } -// else -// { -// for (const auto & [key, value] : object) -// { -// auto index = name_to_index_map.find(key); -// if (index != name_to_index_map.end()) -// { -// if (nested[index->second]->insertResultToColumn(tuple.getColumn(index->second), value)) -// were_valid_elements = true; -// } -// } -// } -// -// set_size(old_size + static_cast(were_valid_elements)); -// return were_valid_elements; -// } -// -// return false; -// } -// -//private: -// std::vector> nested; -// std::vector explicit_names; -// std::unordered_map name_to_index_map; -//}; -// -//class MapNode : public Node -//{ -//public: -// MapNode(std::unique_ptr key_, std::unique_ptr value_) : key(std::move(key_)), value(std::move(value_)) { } -// -// bool insertResultToColumn(IColumn & dest, const Element & element) override -// { -// if (!element.isObject()) -// return false; -// -// ColumnMap & map_col = assert_cast(dest); -// auto & offsets = map_col.getNestedColumn().getOffsets(); -// auto & tuple_col = map_col.getNestedData(); -// auto & key_col = tuple_col.getColumn(0); -// auto & value_col = tuple_col.getColumn(1); -// size_t old_size = tuple_col.size(); -// -// auto object = element.getObject(); -// auto it = object.begin(); -// for (; it != object.end(); ++it) -// { -// auto pair = *it; -// -// /// Insert key -// key_col.insertData(pair.first.data(), pair.first.size()); -// -// /// Insert value -// if (!value->insertResultToColumn(value_col, pair.second)) -// value_col.insertDefault(); -// } -// -// offsets.push_back(old_size + object.size()); -// return true; -// } -// -//private: -// std::unique_ptr key; -// std::unique_ptr value; -//}; -// -//class VariantNode : public Node -//{ -//public: -// VariantNode(std::vector> variant_nodes_, std::vector order_) : variant_nodes(std::move(variant_nodes_)), order(std::move(order_)) { } -// -// bool insertResultToColumn(IColumn & dest, const Element & element) override -// { -// auto & column_variant = assert_cast(dest); -// for (size_t i : order) -// { -// auto & variant = column_variant.getVariantByGlobalDiscriminator(i); -// if (variant_nodes[i]->insertResultToColumn(variant, element)) -// { -// column_variant.getLocalDiscriminators().push_back(column_variant.localDiscriminatorByGlobal(i)); -// column_variant.getOffsets().push_back(variant.size() - 1); -// return true; -// } -// } -// -// return false; -// } -// -//private: -// std::vector> variant_nodes; -// /// Order in which we should try variants nodes. -// /// For example, String should be always the last one. -// std::vector order; -//}; -// -//} -// -//} diff --git a/src/Formats/JSONExtractTree.h b/src/Formats/JSONExtractTree.h deleted file mode 100644 index f07c974f595..00000000000 --- a/src/Formats/JSONExtractTree.h +++ /dev/null @@ -1,37 +0,0 @@ -#pragma once -#include -#include - -namespace DB -{ - -template -struct JSONExtractTree -{ - class Node - { - public: - Node() = default; - virtual ~Node() = default; - virtual bool insertResultToColumn(IColumn &, const Element &) = 0; - }; - - struct Settings - { - bool convert_bool_to_integer = true; - bool type_json_infer_numbers_from_strings = true; - bool type_json_infer_date = true; - bool type_json_infer_datetime = true; - bool type_json_infer_ipv4 = true; - bool type_json_infer_ipv6 = true; - bool type_json_infer_uuid = true; - bool insert_null_as_default = true; - }; - - static std::unique_ptr build(const DataTypePtr & type, const Settings & settings, const char * source_for_exception_message); -}; - -template -void elementToString(const Element & element, WriteBuffer & buf); - -} From be9a17f2cca06814c68b712a6df2c18a44362220 Mon Sep 17 00:00:00 2001 From: gun9nir Date: Tue, 2 Jul 2024 21:42:59 -0700 Subject: [PATCH 085/299] feat: implicitly append wildcard if querying directory in file engine --- src/Storages/StorageFile.cpp | 16 +++++++++++----- ...03198_table_function_directory_path.reference | 3 +++ .../03198_table_function_directory_path.sql | 11 +++++++++++ 3 files changed, 25 insertions(+), 5 deletions(-) create mode 100644 tests/queries/0_stateless/03198_table_function_directory_path.reference create mode 100644 tests/queries/0_stateless/03198_table_function_directory_path.sql diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp index 7f39ff615f0..9ff2a6667af 100644 --- a/src/Storages/StorageFile.cpp +++ b/src/Storages/StorageFile.cpp @@ -366,12 +366,18 @@ Strings StorageFile::getPathsList(const String & table_path, const String & user } else if (path.find_first_of("*?{") == std::string::npos) { - std::error_code error; - size_t size = fs::file_size(path, error); - if (!error) - total_bytes_to_read += size; + if (!fs::is_directory(path)) { + std::error_code error; + size_t size = fs::file_size(path, error); + if (!error) + total_bytes_to_read += size; - paths.push_back(path); + paths.push_back(path); + } else { + /// We list non-directory files under that directory. + paths = listFilesWithRegexpMatching(path / fs::path("*"), total_bytes_to_read); + can_be_directory = false; + } } else { diff --git a/tests/queries/0_stateless/03198_table_function_directory_path.reference b/tests/queries/0_stateless/03198_table_function_directory_path.reference new file mode 100644 index 00000000000..19920de3d3c --- /dev/null +++ b/tests/queries/0_stateless/03198_table_function_directory_path.reference @@ -0,0 +1,3 @@ +2 +2 +1 diff --git a/tests/queries/0_stateless/03198_table_function_directory_path.sql b/tests/queries/0_stateless/03198_table_function_directory_path.sql new file mode 100644 index 00000000000..671074ab45a --- /dev/null +++ b/tests/queries/0_stateless/03198_table_function_directory_path.sql @@ -0,0 +1,11 @@ +INSERT INTO FUNCTION file('data_03198_table_function_directory_path/1.csv', 'csv') SELECT '1.csv'; +INSERT INTO FUNCTION file('data_03198_table_function_directory_path/2.csv', 'csv') SELECT '2.csv'; +INSERT INTO FUNCTION file('data_03198_table_function_directory_path/dir/3.csv', 'csv') SELECT '3.csv'; +INSERT INTO FUNCTION file('data_03198_table_function_directory_path/dir1/dir/4.csv', 'csv') SELECT '4.csv'; +INSERT INTO FUNCTION file('data_03198_table_function_directory_path/dir2/dir/5.csv', 'csv') SELECT '5.csv'; + +SELECT COUNT(*) FROM file('data_03198_table_function_directory_path'); +SELECT COUNT(*) FROM file('data_03198_table_function_directory_path/'); +SELECT COUNT(*) FROM file('data_03198_table_function_directory_path/dir'); +SELECT COUNT(*) FROM file('data_03198_table_function_directory_path/*/dir', 'csv'); -- { serverError 74, 636 } +SELECT COUNT(*) FROM file('data_03198_table_function_directory_pat'); -- { serverError 400 } From 7a993d737b55177a27436f98159f0458f03610fc Mon Sep 17 00:00:00 2001 From: vdimir Date: Wed, 3 Jul 2024 11:45:31 +0000 Subject: [PATCH 086/299] better --- .../Passes/LogicalExpressionOptimizerPass.cpp | 82 +++++++++---------- ...11_join_on_nullsafe_optimization.reference | 20 +++++ .../02911_join_on_nullsafe_optimization.sql | 25 ++++++ 3 files changed, 85 insertions(+), 42 deletions(-) diff --git a/src/Analyzer/Passes/LogicalExpressionOptimizerPass.cpp b/src/Analyzer/Passes/LogicalExpressionOptimizerPass.cpp index c74148a7252..698602ca5bc 100644 --- a/src/Analyzer/Passes/LogicalExpressionOptimizerPass.cpp +++ b/src/Analyzer/Passes/LogicalExpressionOptimizerPass.cpp @@ -66,7 +66,7 @@ QueryTreeNodePtr findEqualsFunction(const QueryTreeNodes & nodes) return nullptr; } -bool isNodeBooleanConstant(const QueryTreeNodePtr & node, bool expected_value) +bool isBooleanConstant(const QueryTreeNodePtr & node, bool expected_value) { const auto * constant_node = node->as(); if (!constant_node || !constant_node->getResultType()->equals(DataTypeUInt8())) @@ -82,10 +82,8 @@ bool isOnlyConjunctionOfFunctions( const String & func_name, const QueryTreeNodePtrWithHashSet & allowed_arguments) { - if (isNodeBooleanConstant(node, true)) - { + if (isBooleanConstant(node, true)) return true; - } const auto * node_function = node->as(); if (!node_function) @@ -141,15 +139,16 @@ public: { auto * function_node = node->as(); - if (!function_node) - return; + QueryTreeNodePtr new_node = nullptr; + if (function_node && function_node->getFunctionName() == "or") + new_node = tryOptimizeJoinOnNulls(function_node->getArguments().getNodes(), getContext()); + else + new_node = tryOptimizeJoinOnNulls({node}, getContext()); - if (function_node->getFunctionName() == "or") + if (new_node) { - bool is_argument_type_changed = tryOptimizeIsNotDistinctOrIsNull(node, getContext()); - if (is_argument_type_changed) - need_rerun_resolve = true; - return; + need_rerun_resolve |= !new_node->getResultType()->equals(*node->getResultType()); + node = new_node; } } @@ -166,14 +165,11 @@ private: const JoinNode * join_node; bool need_rerun_resolve = false; - /// Returns true if type of some operand is changed and parent function needs to be re-resolved - bool tryOptimizeIsNotDistinctOrIsNull(QueryTreeNodePtr & node, const ContextPtr & context) + /// Returns optimized node or nullptr if nothing have been changed + QueryTreeNodePtr tryOptimizeJoinOnNulls(const QueryTreeNodes & nodes, const ContextPtr & context) { - auto & function_node = node->as(); - chassert(function_node.getFunctionName() == "or"); - QueryTreeNodes or_operands; - or_operands.reserve(function_node.getArguments().getNodes().size()); + or_operands.reserve(nodes.size()); /// Indices of `equals` or `isNotDistinctFrom` functions in the vector above std::vector equals_functions_indices; @@ -192,17 +188,17 @@ private: bool is_anything_changed = false; - for (const auto & argument : function_node.getArguments()) + for (const auto & node : nodes) { - if (isNodeBooleanConstant(argument, false)) + if (isBooleanConstant(node, false)) { /// Remove false constants from OR is_anything_changed = true; continue; } - or_operands.push_back(argument); - auto * argument_function = argument->as(); + or_operands.push_back(node); + auto * argument_function = node->as(); if (!argument_function) continue; @@ -223,6 +219,8 @@ private: } /// Expression `a = b AND (a IS NOT NULL) AND true AND (b IS NOT NULL)` we can be replaced with `a = b` + /// Even though this expression are not equivalent (first is NULL on NULLs, while second is FALSE), + /// it is still correct since for JOIN ON condition NULL is treated as FALSE if (const auto & equals_function = findEqualsFunction(and_arguments)) { const auto & equals_arguments = equals_function->as()->getArguments().getNodes(); @@ -261,7 +259,7 @@ private: for (size_t equals_function_idx : equals_functions_indices) { - auto * equals_function = or_operands[equals_function_idx]->as(); + const auto * equals_function = or_operands[equals_function_idx]->as(); /// For a = b we are looking for all expressions `a IS NULL AND b IS NULL` const auto & argument_nodes = equals_function->getArguments().getNodes(); @@ -279,40 +277,39 @@ private: for (size_t to_optimize_idx : operands_to_optimize) { /// Remove `a IS NULL AND b IS NULL` - auto * operand_to_optimize = or_operands[to_optimize_idx]->as(); - operand_to_optimize->getArguments().getNodes() = {}; - arguments_to_reresolve.insert(to_optimize_idx); + or_operands[to_optimize_idx] = nullptr; + is_anything_changed = true; } } if (arguments_to_reresolve.empty() && !is_anything_changed) /// Nothing have been changed - return false; + return nullptr; auto and_function_resolver = FunctionFactory::instance().get("and", context); auto strict_equals_function_resolver = FunctionFactory::instance().get("isNotDistinctFrom", context); - bool need_reresolve = false; QueryTreeNodes new_or_operands; for (size_t i = 0; i < or_operands.size(); ++i) { if (arguments_to_reresolve.contains(i)) { - auto * function = or_operands[i]->as(); + const auto * function = or_operands[i]->as(); if (function->getFunctionName() == "equals") { /// We should replace `a = b` with `a <=> b` because we removed checks for IS NULL - need_reresolve |= function->getResultType()->isNullable(); - function->resolveAsFunction(strict_equals_function_resolver); - new_or_operands.emplace_back(std::move(or_operands[i])); + auto new_function = or_operands[i]->clone(); + new_function->as()->resolveAsFunction(strict_equals_function_resolver); + new_or_operands.emplace_back(std::move(new_function)); } else if (function->getFunctionName() == "and") { const auto & and_arguments = function->getArguments().getNodes(); if (and_arguments.size() > 1) { - function->resolveAsFunction(and_function_resolver); - new_or_operands.emplace_back(std::move(or_operands[i])); + auto new_function = or_operands[i]->clone(); + new_function->as()->resolveAsFunction(and_function_resolver); + new_or_operands.emplace_back(std::move(new_function)); } else if (and_arguments.size() == 1) { @@ -321,25 +318,26 @@ private: } } else - throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected function name: '{}'", function->getFunctionName()); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected function '{}'", function->getFunctionName()); } - else + else if (or_operands[i]) { new_or_operands.emplace_back(std::move(or_operands[i])); } } + if (new_or_operands.empty()) + return nullptr; + if (new_or_operands.size() == 1) - { - node = std::move(new_or_operands[0]); - return need_reresolve; - } + return new_or_operands[0]; /// Rebuild OR function auto or_function_resolver = FunctionFactory::instance().get("or", context); - function_node.getArguments().getNodes() = std::move(new_or_operands); - function_node.resolveAsFunction(or_function_resolver); - return need_reresolve; + auto function_node = std::make_shared("or"); + function_node->getArguments().getNodes() = std::move(new_or_operands); + function_node->resolveAsFunction(or_function_resolver); + return function_node; } }; diff --git a/tests/queries/0_stateless/02911_join_on_nullsafe_optimization.reference b/tests/queries/0_stateless/02911_join_on_nullsafe_optimization.reference index 1df3606592c..f0463509b80 100644 --- a/tests/queries/0_stateless/02911_join_on_nullsafe_optimization.reference +++ b/tests/queries/0_stateless/02911_join_on_nullsafe_optimization.reference @@ -35,6 +35,26 @@ SELECT * FROM t1 JOIN t2 ON (t1.x == t2.x AND ((t2.x IS NOT NULL) AND (t1.x IS N 2 2 2 2 3 3 3 33 \N \N \N \N +-- aliases defined in the join condition are valid +SELECT *, e, e2 FROM t1 FULL JOIN t2 ON ( ( ((t1.x == t2.x) AS e) AND ((t2.x IS NOT NULL) AND (t1.x IS NOT NULL)) ) OR ( (t2.x IS NULL) AND (t1.x IS NULL) ) AS e2 ) ORDER BY t1.x NULLS LAST, t2.x NULLS LAST; +1 42 \N \N \N 0 +2 2 2 2 1 1 +3 3 3 33 1 1 +\N \N 4 42 \N 0 +\N \N \N \N \N 1 +SELECT *, e, e2 FROM t1 FULL JOIN t2 ON ( ( ((t1.x == t2.x) AS e) AND ((t2.x IS NOT NULL) AND (t1.x IS NOT NULL)) ) AS e2 ) ORDER BY t1.x NULLS LAST, t2.x NULLS LAST; +1 42 \N \N \N 0 +2 2 2 2 1 1 +3 3 3 33 1 1 +\N \N 4 42 \N 0 +\N \N \N \N \N 0 +\N \N \N \N \N 0 +-- check for non-nullable columns for which `is null` is replaced with constant SELECT * FROM t1n as t1 JOIN t2n as t2 ON (t1.x == t2.x AND ((t2.x IS NOT NULL) AND (t1.x IS NOT NULL)) ) OR ( (t2.x IS NULL) AND (t1.x IS NULL) ) ORDER BY t1.x NULLS LAST; 2 2 2 2 3 3 3 33 +-- +0 +0 +2 +2 diff --git a/tests/queries/0_stateless/02911_join_on_nullsafe_optimization.sql b/tests/queries/0_stateless/02911_join_on_nullsafe_optimization.sql index 0a642a716a4..67918f4302f 100644 --- a/tests/queries/0_stateless/02911_join_on_nullsafe_optimization.sql +++ b/tests/queries/0_stateless/02911_join_on_nullsafe_optimization.sql @@ -35,10 +35,35 @@ SELECT x = y OR (x IS NULL AND y IS NULL) FROM t1 ORDER BY x NULLS LAST; SELECT * FROM t1 JOIN t2 ON (t1.x == t2.x AND ((t2.x IS NOT NULL) AND (t1.x IS NOT NULL)) ) OR ( (t2.x IS NULL) AND (t1.x IS NULL) ) ORDER BY t1.x NULLS LAST; +-- aliases defined in the join condition are valid +SELECT *, e, e2 FROM t1 FULL JOIN t2 ON ( ( ((t1.x == t2.x) AS e) AND ((t2.x IS NOT NULL) AND (t1.x IS NOT NULL)) ) OR ( (t2.x IS NULL) AND (t1.x IS NULL) ) AS e2 ) ORDER BY t1.x NULLS LAST, t2.x NULLS LAST; +SELECT *, e, e2 FROM t1 FULL JOIN t2 ON ( ( ((t1.x == t2.x) AS e) AND ((t2.x IS NOT NULL) AND (t1.x IS NOT NULL)) ) AS e2 ) ORDER BY t1.x NULLS LAST, t2.x NULLS LAST; + +-- check for non-nullable columns for which `is null` is replaced with constant SELECT * FROM t1n as t1 JOIN t2n as t2 ON (t1.x == t2.x AND ((t2.x IS NOT NULL) AND (t1.x IS NOT NULL)) ) OR ( (t2.x IS NULL) AND (t1.x IS NULL) ) ORDER BY t1.x NULLS LAST; -- { echoOff } +SELECT '--'; + +-- IS NOT NULL and constants are optimized out +SELECT count() FROM ( EXPLAIN QUERY TREE + SELECT * FROM t1 JOIN t2 ON ( (t1.x = t2.x) AND (t1.x IS NOT NULL) AND true AND (t2.x IS NOT NULL) ) +) WHERE explain like '%CONSTANT%' OR explain ilike '%is%null%'; + +SELECT count() FROM ( EXPLAIN QUERY TREE + SELECT * FROM t1 JOIN t2 ON ( (t1.x = t2.x) AND true ) +) WHERE explain like '%CONSTANT%' OR explain ilike '%is%null%'; + +-- this is not optimized out +SELECT count() FROM ( EXPLAIN QUERY TREE + SELECT * FROM t1 JOIN t2 ON (t1.x <=> t2.x OR t1.x IS NULL AND t1.y <=> t2.y AND t2.x IS NULL) +) WHERE explain like '%CONSTANT%' OR explain ilike '%is%null%'; + +SELECT count() FROM ( EXPLAIN QUERY TREE + SELECT * FROM t1 JOIN t2 ON t1.x <=> t2.x AND (t1.x = t1.y OR t1.x IS NULL AND t1.y IS NULL) +) WHERE explain like '%CONSTANT%' OR explain ilike '%is%null%'; + DROP TABLE IF EXISTS t1; DROP TABLE IF EXISTS t2; DROP TABLE IF EXISTS t1n; From 97b5c78b8bad243f69c2e27ff056873758314d99 Mon Sep 17 00:00:00 2001 From: gabrielmcg44 Date: Wed, 3 Jul 2024 10:25:37 -0300 Subject: [PATCH 087/299] clearer comment --- src/Functions/array/arrayAUC.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Functions/array/arrayAUC.cpp b/src/Functions/array/arrayAUC.cpp index 04a840e8da5..3e2a3bf6863 100644 --- a/src/Functions/array/arrayAUC.cpp +++ b/src/Functions/array/arrayAUC.cpp @@ -131,7 +131,7 @@ private: area += (curr_fp - prev_fp) * (curr_tp + prev_tp) / 2.0; - /// Then divide the area to the area of rectangle. + /// Then normalize it dividing by the area to the area of rectangle. if (curr_tp == 0 || curr_tp == size) return std::numeric_limits::quiet_NaN(); From ea3b0e735de285db89cb36e2782db88c6d403ee2 Mon Sep 17 00:00:00 2001 From: avogar Date: Wed, 3 Jul 2024 13:40:41 +0000 Subject: [PATCH 088/299] Refactor JSONExtract functions and support more types and reuse its code in new JSON type --- docs/en/sql-reference/data-types/dynamic.md | 34 + src/Common/JSONParsers/SimdJSONParser.h | 1 + src/DataTypes/DataTypeDynamic.cpp | 1 + src/Formats/JSONExtractTree.cpp | 1561 +++++++++ src/Formats/JSONExtractTree.h | 35 + src/Formats/SchemaInferenceUtils.cpp | 94 +- src/Formats/SchemaInferenceUtils.h | 10 + src/Functions/FunctionsJSON.cpp | 1061 +++++- src/Functions/FunctionsJSON.h | 3054 +++++++---------- .../03198_json_extract_more_types.reference | 21 + .../03198_json_extract_more_types.sql | 29 + .../03199_json_extract_dynamic.reference | 30 + .../03199_json_extract_dynamic.sql | 37 + 13 files changed, 4158 insertions(+), 1810 deletions(-) create mode 100644 src/Formats/JSONExtractTree.cpp create mode 100644 src/Formats/JSONExtractTree.h create mode 100644 tests/queries/0_stateless/03198_json_extract_more_types.reference create mode 100644 tests/queries/0_stateless/03198_json_extract_more_types.sql create mode 100644 tests/queries/0_stateless/03199_json_extract_dynamic.reference create mode 100644 tests/queries/0_stateless/03199_json_extract_dynamic.sql diff --git a/docs/en/sql-reference/data-types/dynamic.md b/docs/en/sql-reference/data-types/dynamic.md index 955fd54e641..e063bed2de4 100644 --- a/docs/en/sql-reference/data-types/dynamic.md +++ b/docs/en/sql-reference/data-types/dynamic.md @@ -493,3 +493,37 @@ SELECT count(), dynamicType(d), _part FROM test GROUP BY _part, dynamicType(d) O ``` As we can see, ClickHouse kept the most frequent types `UInt64` and `Array(UInt64)` and casted all other types to `String`. + + +## JSONExtract functions with Dynamic + +All `JSONExtract*` functions support `Dynamic` type: + +```sql +SELECT JSONExtract('{"a" : [1, 2, 3]}', 'a', 'Dynamic') AS dynamic, dynamicType(dynamic) AS dynamic_type; +``` + +```text +┌─dynamic─┬─dynamic_type───────────┐ +│ [1,2,3] │ Array(Nullable(Int64)) │ +└─────────┴────────────────────────┘ +``` + +```sql +SELECT JSONExtract('{"obj" : {"a" : 42, "b" : "Hello", "c" : [1,2,3]}}', 'obj', 'Map(String, Variant(UInt32, String, Array(UInt32)))') AS map_of_dynamics, mapApply((k, v) -> (k, variantType(v)), map_of_dynamics) AS map_of_dynamic_types``` + +```text +┌─map_of_dynamics──────────────────┬─map_of_dynamic_types────────────────────────────┐ +│ {'a':42,'b':'Hello','c':[1,2,3]} │ {'a':'UInt32','b':'String','c':'Array(UInt32)'} │ +└──────────────────────────────────┴─────────────────────────────────────────────────┘ +``` + +```sql +SELECT JSONExtractKeysAndValues('{"a" : 42, "b" : "Hello", "c" : [1,2,3]}', 'Variant(UInt32, String, Array(UInt32))') AS dynamics, arrayMap(x -> (x.1, variantType(x.2)), dynamics) AS dynamic_types``` +``` + +```text +┌─dynamics───────────────────────────────┬─dynamic_types─────────────────────────────────────────┐ +│ [('a',42),('b','Hello'),('c',[1,2,3])] │ [('a','UInt32'),('b','String'),('c','Array(UInt32)')] │ +└────────────────────────────────────────┴───────────────────────────────────────────────────────┘ +``` diff --git a/src/Common/JSONParsers/SimdJSONParser.h b/src/Common/JSONParsers/SimdJSONParser.h index 827d142266a..db679b14f52 100644 --- a/src/Common/JSONParsers/SimdJSONParser.h +++ b/src/Common/JSONParsers/SimdJSONParser.h @@ -14,6 +14,7 @@ namespace DB { + namespace ErrorCodes { extern const int CANNOT_ALLOCATE_MEMORY; diff --git a/src/DataTypes/DataTypeDynamic.cpp b/src/DataTypes/DataTypeDynamic.cpp index c920e69c13b..6826c46a1a7 100644 --- a/src/DataTypes/DataTypeDynamic.cpp +++ b/src/DataTypes/DataTypeDynamic.cpp @@ -12,6 +12,7 @@ #include #include #include +#include namespace DB { diff --git a/src/Formats/JSONExtractTree.cpp b/src/Formats/JSONExtractTree.cpp new file mode 100644 index 00000000000..6d019f96ba6 --- /dev/null +++ b/src/Formats/JSONExtractTree.cpp @@ -0,0 +1,1561 @@ +#include +#include + +#include +#if USE_SIMDJSON +#include +#endif +#if USE_RAPIDJSON +#include +#endif + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +#include +#include +#include +#include + +namespace DB +{ + +template +void jsonElementToString(const typename JSONParser::Element & element, WriteBuffer & buf, const FormatSettings & format_settings) +{ + if (element.isInt64()) + { + writeIntText(element.getInt64(), buf); + return; + } + if (element.isUInt64()) + { + writeIntText(element.getUInt64(), buf); + return; + } + if (element.isDouble()) + { + writeFloatText(element.getDouble(), buf); + return; + } + if (element.isBool()) + { + if (element.getBool()) + writeCString("true", buf); + else + writeCString("false", buf); + return; + } + if (element.isString()) + { + writeJSONString(element.getString(), buf, format_settings); + return; + } + if (element.isArray()) + { + writeChar('[', buf); + bool need_comma = false; + for (auto value : element.getArray()) + { + if (std::exchange(need_comma, true)) + writeChar(',', buf); + jsonElementToString(value, buf, format_settings); + } + writeChar(']', buf); + return; + } + if (element.isObject()) + { + writeChar('{', buf); + bool need_comma = false; + for (auto [key, value] : element.getObject()) + { + if (std::exchange(need_comma, true)) + writeChar(',', buf); + writeJSONString(key, buf, format_settings); + writeChar(':', buf); + jsonElementToString(value, buf, format_settings); + } + writeChar('}', buf); + return; + } + if (element.isNull()) + { + writeCString("null", buf); + return; + } +} + +template +bool tryGetNumericValueFromJSONElement( + NumberType & value, + const typename JSONParser::Element & element, + bool convert_bool_to_integer, + String & error) +{ + switch (element.type()) + { + case ElementType::DOUBLE: + if constexpr (std::is_floating_point_v) + { + /// We permit inaccurate conversion of double to float. + /// Example: double 0.1 from JSON is not representable in float. + /// But it will be more convenient for user to perform conversion. + value = static_cast(element.getDouble()); + } + else if (!accurate::convertNumeric(element.getDouble(), value)) + { + error = fmt::format("cannot convert double value {} to {}", element.getDouble(), TypeName); + return false; + } + break; + case ElementType::UINT64: + if (!accurate::convertNumeric(element.getUInt64(), value)) + { + error = fmt::format("cannot convert UInt64 value {} to {}", element.getUInt64(), TypeName); + return false; + } + break; + case ElementType::INT64: + if (!accurate::convertNumeric(element.getInt64(), value)) + { + error = fmt::format("cannot convert Int64 value {} to {}", element.getInt64(), TypeName); + return false; + } + break; + case ElementType::BOOL: + if constexpr (is_integer) + { + if (convert_bool_to_integer) + { + value = static_cast(element.getBool()); + break; + } + } + error = fmt::format("cannot convert bool value to {}", TypeName); + return false; + case ElementType::STRING: { + auto rb = ReadBufferFromMemory{element.getString()}; + if constexpr (std::is_floating_point_v) + { + if (!tryReadFloatText(value, rb) || !rb.eof()) + { + error = fmt::format("cannot parse {} value here: {}", TypeName, element.getString()); + return false; + } + } + else + { + if (tryReadIntText(value, rb) && rb.eof()) + break; + + /// Try to parse float and convert it to integer. + Float64 tmp_float; + rb.position() = rb.buffer().begin(); + if (!tryReadFloatText(tmp_float, rb) || !rb.eof()) + { + error = fmt::format("cannot parse {} value here: {}", TypeName, element.getString()); + return false; + } + + if (!accurate::convertNumeric(tmp_float, value)) + { + error = fmt::format("cannot parse {} value here: {}", TypeName, element.getString()); + return false; + } + } + break; + } + default: + return false; + } + + return true; +} + +namespace +{ + +template +String jsonElementToString(const typename JSONParser::Element & element, const FormatSettings & format_settings) +{ + WriteBufferFromOwnString buf; + jsonElementToString(element, buf, format_settings); + return buf.str(); +} + +template +class NumericNode : public JSONExtractTreeNode +{ +public: + explicit NumericNode(bool is_bool_type_ = false) : is_bool_type(is_bool_type_) { } + + bool insertResultToColumn( + IColumn & column, const typename JSONParser::Element & element, const JSONExtractInsertSettings & insert_settings, const FormatSettings & format_settings, String & error) const override + { + if (element.isNull()) + { + if (format_settings.null_as_default) + { + column.insertDefault(); + return true; + } + + error = fmt::format("cannot parse {} value from null", TypeName); + return false; + } + + NumberType value; + if (!tryGetNumericValueFromJSONElement(value, element, insert_settings.convert_bool_to_integer || is_bool_type, error)) + { + if (error.empty()) + error = fmt::format("cannot read {} value from JSON element: {}", TypeName, jsonElementToString(element, format_settings)); + return false; + } + + if (is_bool_type) + value = static_cast(value); + + auto & col_vec = assert_cast &>(column); + col_vec.insertValue(value); + return true; + } + +protected: + bool is_bool_type; +}; + +template +class LowCardinalityNumericNode : public NumericNode +{ +public: + explicit LowCardinalityNumericNode(bool is_nullable_, bool is_bool_type_ = false) + : NumericNode(is_bool_type_), is_nullable(is_nullable_) + { + } + + bool insertResultToColumn( + IColumn & column, const typename JSONParser::Element & element, const JSONExtractInsertSettings & insert_settings, const FormatSettings & format_settings, String & error) const override + { + if (element.isNull()) + { + if (is_nullable || format_settings.null_as_default) + { + column.insertDefault(); + return true; + } + + error = fmt::format("cannot parse {} value from null", TypeName); + return false; + } + + NumberType value; + if (!tryGetNumericValueFromJSONElement(value, element, insert_settings.convert_bool_to_integer || this->is_bool_type, error)) + { + if (error.empty()) + error = fmt::format("cannot read {} value from JSON element: {}", TypeName, jsonElementToString(element, format_settings)); + return false; + } + + if (this->is_bool_type) + value = static_cast(value); + + auto & col_lc = assert_cast(column); + col_lc.insertData(reinterpret_cast(&value), sizeof(value)); + return true; + } + +private: + bool is_nullable; +}; + +template +class StringNode : public JSONExtractTreeNode +{ +public: + bool insertResultToColumn( + IColumn & column, const typename JSONParser::Element & element, const JSONExtractInsertSettings &, const FormatSettings & format_settings, String & error) const override + { + if (element.isNull()) + { + if (format_settings.null_as_default) + { + column.insertDefault(); + return true; + } + error = "cannot parse String value from null"; + return false; + } + + if (!element.isString()) + { + auto & col_str = assert_cast(column); + auto & chars = col_str.getChars(); + WriteBufferFromVector buf(chars, AppendModeTag()); + jsonElementToString(element, buf, format_settings); + buf.finalize(); + chars.push_back(0); + col_str.getOffsets().push_back(chars.size()); + } + else + { + auto value = element.getString(); + auto & col_str = assert_cast(column); + col_str.insertData(value.data(), value.size()); + } + return true; + } +}; + +template +class LowCardinalityStringNode : public JSONExtractTreeNode +{ +public: + explicit LowCardinalityStringNode(bool is_nullable_) : is_nullable(is_nullable_) { } + + bool insertResultToColumn( + IColumn & column, const typename JSONParser::Element & element, const JSONExtractInsertSettings &, const FormatSettings & format_settings, String & error) const override + { + if (element.isNull()) + { + if (is_nullable || format_settings.null_as_default) + { + column.insertDefault(); + return true; + } + + error = "cannot parse String value from null"; + return false; + } + + if (!element.isString()) + { + auto value = jsonElementToString(element, format_settings); + assert_cast(column).insertData(value.data(), value.size()); + } + else + { + auto value = element.getString(); + assert_cast(column).insertData(value.data(), value.size()); + } + + return true; + } + +private: + bool is_nullable; +}; + +template +class FixedStringNode : public JSONExtractTreeNode +{ +public: + explicit FixedStringNode(size_t fixed_length_) : fixed_length(fixed_length_) { } + bool insertResultToColumn( + IColumn & column, const typename JSONParser::Element & element, const JSONExtractInsertSettings &, const FormatSettings & format_settings, String & error) const override + { + if (element.isNull()) + { + if (format_settings.null_as_default) + { + column.insertDefault(); + return true; + } + + error = "cannot parse FixedString value from null"; + return false; + } + + if (!element.isString()) + return checkValueSizeAndInsert(column, jsonElementToString(element, format_settings), error); + return checkValueSizeAndInsert(column, element.getString(), error); + } + +private: + template + bool checkValueSizeAndInsert(IColumn & column, const T & value, String & error) const + { + if (value.size() > fixed_length) + { + error = fmt::format("too large string for FixedString({}): {}", fixed_length, value); + return false; + } + assert_cast(column).insertData(value.data(), value.size()); + return true; + } + + size_t fixed_length; +}; + +template +class LowCardinalityFixedStringNode : public JSONExtractTreeNode +{ +public: + explicit LowCardinalityFixedStringNode(bool is_nullable_, size_t fixed_length_) : is_nullable(is_nullable_), fixed_length(fixed_length_) + { + } + + bool insertResultToColumn( + IColumn & column, const typename JSONParser::Element & element, const JSONExtractInsertSettings &, const FormatSettings & format_settings, String & error) const override + { + if (element.isNull()) + { + if (is_nullable || format_settings.null_as_default) + { + column.insertDefault(); + return true; + } + error = "cannot parse FixedString value from null"; + return false; + } + + if (!element.isString()) + return checkValueSizeAndInsert(column, jsonElementToString(element, format_settings), error); + return checkValueSizeAndInsert(column, element.getString(), error); + } + +private: + template + bool checkValueSizeAndInsert(IColumn & column, const T & value, String & error) const + { + if (value.size() > fixed_length) + { + error = fmt::format("too large string for FixedString({}): {}", fixed_length, value); + return false; + } + + // For the non low cardinality case of FixedString, the padding is done in the FixedString Column implementation. + // In order to avoid having to pass the data to a FixedString Column and read it back (which would slow down the execution) + // the data is padded here and written directly to the Low Cardinality Column + if (value.size() == fixed_length) + { + assert_cast(column).insertData(value.data(), value.size()); + } + else + { + String padded_value(value); + padded_value.resize(fixed_length, '\0'); + assert_cast(column).insertData(padded_value.data(), padded_value.size()); + } + return true; + } + + bool is_nullable; + size_t fixed_length; +}; + +template +class UUIDNode : public JSONExtractTreeNode +{ +public: + bool insertResultToColumn( + IColumn & column, const typename JSONParser::Element & element, const JSONExtractInsertSettings &, const FormatSettings & format_settings, String & error) const override + { + if (element.isNull() && format_settings.null_as_default) + { + column.insertDefault(); + return true; + } + + if (!element.isString()) + { + error = fmt::format("cannot read UUID value from JSON element: {}", jsonElementToString(element, format_settings)); + return false; + } + + auto data = element.getString(); + UUID uuid; + if (!tryParse(uuid, data)) + { + error = fmt::format("cannot parse UUID value here: {}", data); + return false; + } + + assert_cast(column).insert(uuid); + return true; + } + + + static bool tryParse(UUID & uuid, std::string_view data) + { + ReadBufferFromMemory buf(data.data(), data.size()); + return tryReadUUIDText(uuid, buf) && buf.eof(); + } +}; + +template +class LowCardinalityUUIDNode : public JSONExtractTreeNode +{ +public: + explicit LowCardinalityUUIDNode(bool is_nullable_) : is_nullable(is_nullable_) { } + + bool insertResultToColumn( + IColumn & column, const typename JSONParser::Element & element, const JSONExtractInsertSettings &, const FormatSettings & format_settings, String & error) const override + { + if (element.isNull() && (is_nullable || format_settings.null_as_default)) + { + column.insertDefault(); + return true; + } + + if (!element.isString()) + { + error = fmt::format("cannot read UUID value from JSON element: {}", jsonElementToString(element, format_settings)); + return false; + } + + auto data = element.getString(); + ReadBufferFromMemory buf(data.data(), data.size()); + UUID uuid; + if (!tryReadUUIDText(uuid, buf) || !buf.eof()) + { + error = fmt::format("cannot parse UUID value here: {}", data); + return false; + } + assert_cast(column).insertData(reinterpret_cast(&uuid), sizeof(uuid)); + return true; + } + +private: + bool is_nullable; +}; + +template +class DateNode : public JSONExtractTreeNode +{ +public: + bool insertResultToColumn( + IColumn & column, const typename JSONParser::Element & element, const JSONExtractInsertSettings &, const FormatSettings & format_settings, String & error) const override + { + if (element.isNull() && format_settings.null_as_default) + { + column.insertDefault(); + return true; + } + + if (!element.isString()) + { + error = fmt::format("cannot read Date value from JSON element: {}", jsonElementToString(element, format_settings)); + return false; + } + + auto data = element.getString(); + ReadBufferFromMemory buf(data.data(), data.size()); + DateType date; + if (!tryReadDateText(date, buf) || !buf.eof()) + { + error = fmt::format("cannot parse Date value here: {}", data); + return false; + } + + assert_cast &>(column).insertValue(date); + return true; + } +}; + +template +class DateTimeNode : public JSONExtractTreeNode, public TimezoneMixin +{ +public: + explicit DateTimeNode(const DataTypeDateTime & datetime_type) : TimezoneMixin(datetime_type) { } + + bool insertResultToColumn( + IColumn & column, const typename JSONParser::Element & element, const JSONExtractInsertSettings &, const FormatSettings & format_settings, String & error) const override + { + if (element.isNull() && format_settings.null_as_default) + { + column.insertDefault(); + return true; + } + + time_t value; + if (element.isString()) + { + if (!tryParse(value, element.getString(), format_settings.date_time_input_format)) + { + error = fmt::format("cannot parse DateTime value here: {}", element.getString()); + return false; + } + } + else if (element.isUInt64()) + { + value = element.getUInt64(); + } + else + { + error = fmt::format("cannot read DateTime value from JSON element: {}", jsonElementToString(element, format_settings)); + return false; + } + + assert_cast(column).insert(value); + return true; + } + + bool tryParse(time_t & value, std::string_view data, FormatSettings::DateTimeInputFormat date_time_input_format) const + { + ReadBufferFromMemory buf(data.data(), data.size()); + switch (date_time_input_format) + { + case FormatSettings::DateTimeInputFormat::Basic: + if (tryReadDateTimeText(value, buf, time_zone) && buf.eof()) + return true; + break; + case FormatSettings::DateTimeInputFormat::BestEffort: + if (tryParseDateTimeBestEffort(value, buf, time_zone, utc_time_zone) && buf.eof()) + return true; + break; + case FormatSettings::DateTimeInputFormat::BestEffortUS: + if (tryParseDateTimeBestEffortUS(value, buf, time_zone, utc_time_zone) && buf.eof()) + return true; + break; + } + + return false; + } +}; + +template +class DecimalNode : public JSONExtractTreeNode +{ +public: + explicit DecimalNode(const DataTypePtr & type) : scale(assert_cast &>(*type).getScale()) { } + + bool insertResultToColumn( + IColumn & column, const typename JSONParser::Element & element, const JSONExtractInsertSettings &, const FormatSettings & format_settings, String & error) const override + { + DecimalType value{}; + + switch (element.type()) + { + case ElementType::DOUBLE: + value = convertToDecimal, DataTypeDecimal>(element.getDouble(), scale); + break; + case ElementType::UINT64: + value = convertToDecimal, DataTypeDecimal>(element.getUInt64(), scale); + break; + case ElementType::INT64: + value = convertToDecimal, DataTypeDecimal>(element.getInt64(), scale); + break; + case ElementType::STRING: { + auto rb = ReadBufferFromMemory{element.getString()}; + if (!SerializationDecimal::tryReadText(value, rb, DecimalUtils::max_precision, scale)) + { + error = fmt::format("cannot parse Decimal value here: {}", element.getString()); + return false; + } + break; + } + case ElementType::NULL_VALUE: { + if (!format_settings.null_as_default) + { + error = "cannot convert null to Decimal value"; + return false; + } + break; + } + default: { + error = fmt::format("cannot read Decimal value from JSON element: {}", jsonElementToString(element, format_settings)); + return false; + } + } + + assert_cast &>(column).insertValue(value); + return true; + } + +private: + UInt32 scale; +}; + + +template +class DateTime64Node : public JSONExtractTreeNode, public TimezoneMixin +{ +public: + explicit DateTime64Node(const DataTypeDateTime64 & datetime64_type) : TimezoneMixin(datetime64_type), scale(datetime64_type.getScale()) { } + + bool insertResultToColumn( + IColumn & column, const typename JSONParser::Element & element, const JSONExtractInsertSettings &, const FormatSettings & format_settings, String & error) const override + { + if (element.isNull() && format_settings.null_as_default) + { + column.insertDefault(); + return true; + } + + DateTime64 value; + if (element.isString()) + { + if (!tryParse(value, element.getString(), format_settings.date_time_input_format)) + { + error = fmt::format("cannot parse DateTime64 value here: {}", element.getString()); + return false; + } + } + else + { + switch (element.type()) + { + case ElementType::DOUBLE: + value = convertToDecimal, DataTypeDecimal>(element.getDouble(), scale); + break; + case ElementType::UINT64: + value = convertToDecimal, DataTypeDecimal>(element.getUInt64(), scale); + break; + case ElementType::INT64: + value = convertToDecimal, DataTypeDecimal>(element.getInt64(), scale); + break; + default: + error = fmt::format("cannot read DateTime64 value from JSON element: {}", jsonElementToString(element, format_settings)); + return false; + } + } + + assert_cast(column).insert(value); + return true; + } + + bool tryParse(DateTime64 & value, std::string_view data, FormatSettings::DateTimeInputFormat date_time_input_format) const + { + ReadBufferFromMemory buf(data.data(), data.size()); + switch (date_time_input_format) + { + case FormatSettings::DateTimeInputFormat::Basic: + if (tryReadDateTime64Text(value, scale, buf, time_zone) && buf.eof()) + return true; + break; + case FormatSettings::DateTimeInputFormat::BestEffort: + if (tryParseDateTime64BestEffort(value, scale, buf, time_zone, utc_time_zone) && buf.eof()) + return true; + break; + case FormatSettings::DateTimeInputFormat::BestEffortUS: + if (tryParseDateTime64BestEffortUS(value, scale, buf, time_zone, utc_time_zone) && buf.eof()) + return true; + break; + } + + return false; + } + +private: + UInt32 scale; +}; + +template +class EnumNode : public JSONExtractTreeNode +{ +public: + explicit EnumNode(const std::vector> & name_value_pairs_) : name_value_pairs(name_value_pairs_) + { + for (const auto & name_value_pair : name_value_pairs) + { + name_to_value_map.emplace(name_value_pair.first, name_value_pair.second); + only_values.emplace(name_value_pair.second); + } + } + + bool insertResultToColumn( + IColumn & column, const typename JSONParser::Element & element, const JSONExtractInsertSettings &, const FormatSettings & format_settings, String & error) const override + { + if (element.isNull()) + { + if (format_settings.null_as_default) + { + column.insertDefault(); + return true; + } + + error = "cannot convert null to Enum value"; + return false; + } + + auto & col_vec = assert_cast &>(column); + + if (element.isInt64()) + { + Type value; + if (!accurate::convertNumeric(element.getInt64(), value) || !only_values.contains(value)) + { + error = fmt::format("cannot convert value {} to enum: there is no such value in enum", element.getInt64()); + return false; + } + col_vec.insertValue(value); + return true; + } + + if (element.isUInt64()) + { + Type value; + if (!accurate::convertNumeric(element.getUInt64(), value) || !only_values.contains(value)) + { + error = fmt::format("cannot convert value {} to enum: there is no such value in enum", element.getUInt64()); + return false; + } + col_vec.insertValue(value); + return true; + } + + if (element.isString()) + { + auto value = name_to_value_map.find(element.getString()); + if (value == name_to_value_map.end()) + { + error = fmt::format("cannot convert value {} to enum: there is no such value in enum", element.getString()); + return false; + } + col_vec.insertValue(value->second); + return true; + } + + error = fmt::format("cannot read Enum value from JSON element: {}", jsonElementToString(element, format_settings)); + return false; + } + +private: + std::vector> name_value_pairs; + std::unordered_map name_to_value_map; + std::unordered_set only_values; +}; + +template +class IPv4Node : public JSONExtractTreeNode +{ +public: + bool insertResultToColumn( + IColumn & column, const typename JSONParser::Element & element, const JSONExtractInsertSettings &, const FormatSettings & format_settings, String & error) const override + { + if (element.isNull() && format_settings.null_as_default) + { + column.insertDefault(); + return true; + } + + if (!element.isString()) + { + error = fmt::format("cannot read IPv4 value from JSON element: {}", jsonElementToString(element, format_settings)); + return false; + } + + auto data = element.getString(); + IPv4 value; + if (!tryParse(value, data)) + { + error = fmt::format("cannot parse IPv4 value here: {}", data); + return false; + } + + assert_cast(column).insert(value); + return true; + } + + static bool tryParse(IPv4 & value, std::string_view data) + { + ReadBufferFromMemory buf(data.data(), data.size()); + return tryReadIPv4Text(value, buf) && buf.eof(); + } +}; + +template +class IPv6Node : public JSONExtractTreeNode +{ +public: + bool insertResultToColumn( + IColumn & column, const typename JSONParser::Element & element, const JSONExtractInsertSettings &, const FormatSettings & format_settings, String & error) const override + { + if (element.isNull() && format_settings.null_as_default) + { + column.insertDefault(); + return true; + } + + if (!element.isString()) + { + error = fmt::format("cannot read IPv6 value from JSON element: {}", jsonElementToString(element, format_settings)); + return false; + } + + auto data = element.getString(); + IPv6 value; + if (!tryParse(value, data)) + { + error = fmt::format("cannot parse IPv6 value here: {}", data); + return false; + } + + assert_cast(column).insert(value); + return true; + } + + + static bool tryParse(IPv6 & value, std::string_view data) + { + ReadBufferFromMemory buf(data.data(), data.size()); + return tryReadIPv6Text(value, buf) && buf.eof(); + } +}; + +template +class NullableNode : public JSONExtractTreeNode +{ +public: + explicit NullableNode(std::unique_ptr> nested_) : nested(std::move(nested_)) { } + + bool insertResultToColumn( + IColumn & column, const typename JSONParser::Element & element, const JSONExtractInsertSettings & insert_settings, const FormatSettings & format_settings, String & error) const override + { + if (element.isNull()) + { + column.insertDefault(); + return true; + } + + auto & col_null = assert_cast(column); + if (!nested-> insertResultToColumn(col_null.getNestedColumn(), element, insert_settings, format_settings, error)) + return false; + col_null.getNullMapColumn().insertValue(0); + return true; + } + +private: + std::unique_ptr> nested; +}; + +template +class LowCardinalityNode : public JSONExtractTreeNode +{ +public: + explicit LowCardinalityNode(bool is_nullable_, std::unique_ptr> nested_) + : is_nullable(is_nullable_), nested(std::move(nested_)) + { + } + + bool insertResultToColumn( + IColumn & column, const typename JSONParser::Element & element, const JSONExtractInsertSettings & insert_settings, const FormatSettings & format_settings, String & error) const override + { + if (element.isNull() && (is_nullable || format_settings.null_as_default)) + { + column.insertDefault(); + return true; + } + + auto & col_lc = assert_cast(column); + auto tmp_nested = col_lc.getDictionary().getNestedColumn()->cloneEmpty(); + if (!nested-> insertResultToColumn(*tmp_nested, element, insert_settings, format_settings, error)) + return false; + + col_lc.insertFromFullColumn(*tmp_nested, 0); + return true; + } + +private: + bool is_nullable; + std::unique_ptr> nested; +}; + +template +class ArrayNode : public JSONExtractTreeNode +{ +public: + explicit ArrayNode(std::unique_ptr> nested_) : nested(std::move(nested_)) { } + + bool insertResultToColumn( + IColumn & column, const typename JSONParser::Element & element, const JSONExtractInsertSettings & insert_settings, const FormatSettings & format_settings, String & error) const override + { + if (element.isNull() && format_settings.null_as_default) + { + column.insertDefault(); + return true; + } + + if (!element.isArray()) + { + error = fmt::format("cannot read Array value from JSON element: {}", jsonElementToString(element, format_settings)); + return false; + } + + auto array = element.getArray(); + + auto & col_arr = assert_cast(column); + auto & data = col_arr.getData(); + size_t old_size = data.size(); + bool were_valid_elements = false; + + for (auto value : array) + { + if (nested-> insertResultToColumn(data, value, insert_settings, format_settings, error)) + { + were_valid_elements = true; + } + else if (insert_settings.insert_default_on_invalid_elements_in_complex_types) + { + data.insertDefault(); + } + else + { + data.popBack(data.size() - old_size); + return false; + } + } + + if (!were_valid_elements) + { + data.popBack(data.size() - old_size); + return false; + } + + col_arr.getOffsets().push_back(data.size()); + return true; + } + +private: + std::unique_ptr> nested; +}; + +template +class TupleNode : public JSONExtractTreeNode +{ +public: + TupleNode(std::vector>> nested_, const std::vector & explicit_names_) + : nested(std::move(nested_)), explicit_names(explicit_names_) + { + for (size_t i = 0; i != explicit_names.size(); ++i) + name_to_index_map.emplace(explicit_names[i], i); + } + + bool insertResultToColumn( + IColumn & column, const typename JSONParser::Element & element, const JSONExtractInsertSettings & insert_settings, const FormatSettings & format_settings, String & error) const override + { + auto & tuple = assert_cast(column); + size_t old_size = column.size(); + bool were_valid_elements = false; + + auto set_size = [&](size_t size) + { + for (size_t i = 0; i != tuple.tupleSize(); ++i) + { + auto & col = tuple.getColumn(i); + if (col.size() != size) + { + if (col.size() > size) + col.popBack(col.size() - size); + else + while (col.size() < size) + col.insertDefault(); + } + } + }; + + if (element.isArray()) + { + auto array = element.getArray(); + auto it = array.begin(); + + for (size_t index = 0; (index != nested.size()) && (it != array.end()); ++index) + { + if (nested[index]-> insertResultToColumn(tuple.getColumn(index), *it++, insert_settings, format_settings, error)) + { + were_valid_elements = true; + } + else if (insert_settings.insert_default_on_invalid_elements_in_complex_types) + { + tuple.getColumn(index).insertDefault(); + } + else + { + set_size(old_size); + error += fmt::format("(during reading tuple {} element)", index); + return false; + } + } + + set_size(old_size + static_cast(were_valid_elements)); + return were_valid_elements; + } + + if (element.isObject()) + { + auto object = element.getObject(); + if (name_to_index_map.empty()) + { + auto it = object.begin(); + for (size_t index = 0; (index != nested.size()) && (it != object.end()); ++index) + { + if (nested[index]-> insertResultToColumn(tuple.getColumn(index), (*it++).second, insert_settings, format_settings, error)) + { + were_valid_elements = true; + } + else if (insert_settings.insert_default_on_invalid_elements_in_complex_types) + { + tuple.getColumn(index).insertDefault(); + } + else + { + set_size(old_size); + error += fmt::format("(during reading tuple {} element)", index); + return false; + } + } + } + else + { + for (const auto & [key, value] : object) + { + auto index = name_to_index_map.find(key); + if (index != name_to_index_map.end()) + { + if (nested[index->second]-> insertResultToColumn(tuple.getColumn(index->second), value, insert_settings, format_settings, error)) + { + were_valid_elements = true; + } + else if (!insert_settings.insert_default_on_invalid_elements_in_complex_types) + { + set_size(old_size); + error += fmt::format("(during reading tuple element \"{}\")", key); + return false; + } + } + } + } + + set_size(old_size + static_cast(were_valid_elements)); + return were_valid_elements; + } + + error = fmt::format("cannot read Tuple value from JSON element: {}", jsonElementToString(element, format_settings)); + return false; + } + +private: + std::vector>> nested; + std::vector explicit_names; + std::unordered_map name_to_index_map; +}; + +template +class MapNode : public JSONExtractTreeNode +{ +public: + explicit MapNode(std::unique_ptr> value_) : value(std::move(value_)) { } + + bool insertResultToColumn( + IColumn & column, const typename JSONParser::Element & element, const JSONExtractInsertSettings & insert_settings, const FormatSettings & format_settings, String & error) const override + { + if (!element.isObject()) + { + error = fmt::format("cannot read Map value from JSON element: {}", jsonElementToString(element, format_settings)); + return false; + } + + auto & map_col = assert_cast(column); + auto & offsets = map_col.getNestedColumn().getOffsets(); + auto & tuple_col = map_col.getNestedData(); + auto & key_col = tuple_col.getColumn(0); + auto & value_col = tuple_col.getColumn(1); + size_t old_size = tuple_col.size(); + + auto object = element.getObject(); + auto it = object.begin(); + for (; it != object.end(); ++it) + { + auto pair = *it; + + /// Insert key + key_col.insertData(pair.first.data(), pair.first.size()); + + /// Insert value + if (!value-> insertResultToColumn(value_col, pair.second, insert_settings, format_settings, error)) + { + if (insert_settings.insert_default_on_invalid_elements_in_complex_types) + { + value_col.insertDefault(); + } + else + { + key_col.popBack(key_col.size() - offsets.back()); + value_col.popBack(value_col.size() - offsets.back()); + error += fmt::format("(during reading value of key \"{}\")", pair.first); + return false; + } + } + } + + offsets.push_back(old_size + object.size()); + return true; + } + +private: + std::unique_ptr> value; +}; + +template +class VariantNode : public JSONExtractTreeNode +{ +public: + VariantNode(std::vector>> variant_nodes_, std::vector order_) + : variant_nodes(std::move(variant_nodes_)), order(std::move(order_)) + { + } + + bool insertResultToColumn( + IColumn & column, const typename JSONParser::Element & element, const JSONExtractInsertSettings & insert_settings, const FormatSettings & format_settings, String & error) const override + { + auto & column_variant = assert_cast(column); + for (size_t i : order) + { + auto & variant = column_variant.getVariantByGlobalDiscriminator(i); + if (variant_nodes[i]-> insertResultToColumn(variant, element, insert_settings, format_settings, error)) + { + column_variant.getLocalDiscriminators().push_back(column_variant.localDiscriminatorByGlobal(i)); + column_variant.getOffsets().push_back(variant.size() - 1); + return true; + } + } + + error = fmt::format("cannot read Map value from JSON element: {}", jsonElementToString(element, format_settings)); + return false; + } + +private: + std::vector>> variant_nodes; + /// Order in which we should try variants nodes. + /// For example, String should be always the last one. + std::vector order; +}; + + +template +class DynamicNode : public JSONExtractTreeNode +{ +public: + bool insertResultToColumn(IColumn & column, const typename JSONParser::Element & element, const JSONExtractInsertSettings & insert_settings, const FormatSettings & format_settings, String & error) const override + { + auto & column_dynamic = assert_cast(column); + auto & variant_column = column_dynamic.getVariantColumn(); + auto variant_info = column_dynamic.getVariantInfo(); + /// First, infer ClickHouse type for this element and add it as a new variant. + auto element_type = elementToDataType(element, format_settings); + if (column_dynamic.addNewVariant(element_type)) + { + auto node = buildJSONExtractTree(element_type, "Dynamic inference"); + auto global_discriminator = variant_info.variant_name_to_discriminator[element_type->getName()]; + auto & variant = variant_column.getVariantByGlobalDiscriminator(global_discriminator); + if (!node-> insertResultToColumn(variant, element, insert_settings, format_settings, error)) + return false; + variant_column.getLocalDiscriminators().push_back(variant_column.localDiscriminatorByGlobal(global_discriminator)); + variant_column.getOffsets().push_back(variant.size() - 1); + return true; + } + + /// We couldn't add new variant. Try to insert element into current variants. + auto variant_node = buildJSONExtractTree(variant_info.variant_type, "Dynamic inference"); + if (variant_node-> insertResultToColumn(variant_column, element, insert_settings, format_settings, error)) + return true; + + /// We couldn't insert element into any existing variant, add String variant and read value as String. + column_dynamic.addStringVariant(); + auto string_global_discriminator = variant_info.variant_name_to_discriminator["String"]; + auto & string_column = variant_column.getVariantByGlobalDiscriminator(string_global_discriminator); + if (!getStringNode()-> insertResultToColumn(string_column, element, insert_settings, format_settings, error)) + return false; + variant_column.getLocalDiscriminators().push_back(variant_column.localDiscriminatorByGlobal(string_global_discriminator)); + variant_column.getOffsets().push_back(string_column.size() - 1); + return true; + } + + static const std::unique_ptr> & getStringNode() + { + static const std::unique_ptr> string_node + = buildJSONExtractTree(std::make_shared(), "Dynamic inference"); + return string_node; + } + + static DataTypePtr elementToDataType(const typename JSONParser::Element & element, const FormatSettings & format_settings) + { + JSONInferenceInfo json_inference_info; + auto type = elementToDataTypeImpl(element, format_settings, json_inference_info); + transformFinalInferredJSONTypeIfNeeded(type, format_settings, &json_inference_info); + return type; + } + +private: + static DataTypePtr elementToDataTypeImpl(const typename JSONParser::Element & element, const FormatSettings & format_settings, JSONInferenceInfo & json_inference_info) + { + switch (element.type()) + { + case ElementType::NULL_VALUE: + return makeNullable(std::make_shared()); + case ElementType::BOOL: + return DataTypeFactory::instance().get("Bool"); + case ElementType::INT64: + { + auto type = std::make_shared(); + if (element.getInt64() < 0) + json_inference_info.negative_integers.insert(type.get()); + return type; + } + case ElementType::UINT64: + return std::make_shared(); + case ElementType::DOUBLE: + return std::make_shared(); + case ElementType::STRING: + { + auto data = element.getString(); + + if (auto type = tryInferDateOrDateTimeFromString(data, format_settings)) + return type; + + if (format_settings.json.try_infer_numbers_from_strings) + { + bool is_negative = false; + if (auto type = tryInferJSONNumberFromString(data, format_settings, &json_inference_info)) + { + json_inference_info.numbers_parsed_from_json_strings.insert(type.get()); + if (is_negative) + json_inference_info.negative_integers.insert(type.get()); + return type; + } + } + + return std::make_shared(); + } + case ElementType::ARRAY: + { + auto array = element.getArray(); + DataTypes types; + types.reserve(array.size()); + for (auto value : array) + types.push_back(makeNullableSafe(elementToDataTypeImpl(value, format_settings, json_inference_info))); + + if (types.empty()) + return std::make_shared(makeNullable(std::make_shared())); + + if (checkIfTypesAreEqual(types)) + return std::make_shared(types.back()); + + /// For JSON if we have not complete types, we should not try to transform them + /// and return it as a Tuple. + /// For example, if we have types [Nullable(Float64), Nullable(Nothing), Nullable(Float64)] + /// it can be Array(Nullable(Float64)) or Tuple(Nullable(Float64), , Nullable(Float64)) and + /// we can't determine which one it is right now. But we will be able to do it later + /// when we will have the final top level type. + /// For example, we can have JSON element [[42.42, null, 43.43], [44.44, "Some string", 45.45]] and we should + /// determine the type for this element as Tuple(Nullable(Float64), Nullable(String), Nullable(Float64)). + for (const auto & type : types) + { + if (!checkIfTypeIsComplete(type)) + return std::make_shared(types); + } + + auto types_copy = types; + transformInferredJSONTypesIfNeeded(types_copy, format_settings, &json_inference_info); + + if (checkIfTypesAreEqual(types_copy)) + return std::make_shared(types_copy.back()); + + return std::make_shared(types); + } + case ElementType::OBJECT: { + /// TODO: Use new JSON type here when it's ready. + return std::make_shared(std::make_shared(), makeNullable(std::make_shared())); + } + } + } +}; + +} + +template +std::unique_ptr> buildJSONExtractTree(const DataTypePtr & type, const char * source_for_exception_message) +{ + switch (type->getTypeId()) + { + case TypeIndex::UInt8: + return std::make_unique>(isBool(type)); + case TypeIndex::UInt16: + return std::make_unique>(); + case TypeIndex::UInt32: + return std::make_unique>(); + case TypeIndex::UInt64: + return std::make_unique>(); + case TypeIndex::UInt128: + return std::make_unique>(); + case TypeIndex::UInt256: + return std::make_unique>(); + case TypeIndex::Int8: + return std::make_unique>(); + case TypeIndex::Int16: + return std::make_unique>(); + case TypeIndex::Int32: + return std::make_unique>(); + case TypeIndex::Int64: + return std::make_unique>(); + case TypeIndex::Int128: + return std::make_unique>(); + case TypeIndex::Int256: + return std::make_unique>(); + case TypeIndex::Float32: + return std::make_unique>(); + case TypeIndex::Float64: + return std::make_unique>(); + case TypeIndex::String: + return std::make_unique>(); + case TypeIndex::FixedString: + return std::make_unique>(assert_cast(*type).getN()); + case TypeIndex::UUID: + return std::make_unique>(); + case TypeIndex::IPv4: + return std::make_unique>(); + case TypeIndex::IPv6: + return std::make_unique>(); + case TypeIndex::Date:; + return std::make_unique>(); + case TypeIndex::Date32: + return std::make_unique>(); + case TypeIndex::DateTime: + return std::make_unique>(assert_cast(*type)); + case TypeIndex::DateTime64: + return std::make_unique>(assert_cast(*type)); + case TypeIndex::Decimal32: + return std::make_unique>(type); + case TypeIndex::Decimal64: + return std::make_unique>(type); + case TypeIndex::Decimal128: + return std::make_unique>(type); + case TypeIndex::Decimal256: + return std::make_unique>(type); + case TypeIndex::Enum8: + return std::make_unique>(assert_cast(*type).getValues()); + case TypeIndex::Enum16: + return std::make_unique>(assert_cast(*type).getValues()); + case TypeIndex::LowCardinality: + { + /// To optimize inserting into LowCardinality we have special nodes for LowCardinality of numeric and string types. + auto lc_type = typeid_cast(type.get()); + auto dictionary_type = removeNullable(lc_type->getDictionaryType()); + bool is_nullable = lc_type->isLowCardinalityNullable(); + + switch (dictionary_type->getTypeId()) + { + case TypeIndex::UInt8: + return std::make_unique>(is_nullable, isBool(type)); + case TypeIndex::UInt16: + return std::make_unique>(is_nullable); + case TypeIndex::UInt32: + return std::make_unique>(is_nullable); + case TypeIndex::UInt64: + return std::make_unique>(is_nullable); + case TypeIndex::Int8: + return std::make_unique>(is_nullable); + case TypeIndex::Int16: + return std::make_unique>(is_nullable); + case TypeIndex::Int32: + return std::make_unique>(is_nullable); + case TypeIndex::Int64: + return std::make_unique>(is_nullable); + case TypeIndex::Float32: + return std::make_unique>(is_nullable); + case TypeIndex::Float64: + return std::make_unique>(is_nullable); + case TypeIndex::String: + return std::make_unique>(is_nullable); + case TypeIndex::FixedString: + return std::make_unique>(is_nullable, assert_cast(*dictionary_type).getN()); + case TypeIndex::UUID: + return std::make_unique>(is_nullable); + default: + return std::make_unique>(is_nullable, buildJSONExtractTree(dictionary_type, source_for_exception_message)); + } + } + case TypeIndex::Nullable: + return std::make_unique>(buildJSONExtractTree(assert_cast(*type).getNestedType(), source_for_exception_message)); + case TypeIndex::Array: + return std::make_unique>(buildJSONExtractTree(assert_cast(*type).getNestedType(), source_for_exception_message)); + case TypeIndex::Tuple: + { + const auto & tuple = assert_cast(*type); + const auto & tuple_elements = tuple.getElements(); + std::vector>> elements; + elements.reserve(tuple_elements.size()); + for (const auto & tuple_element : tuple_elements) + elements.emplace_back(buildJSONExtractTree(tuple_element, source_for_exception_message)); + return std::make_unique>(std::move(elements), tuple.haveExplicitNames() ? tuple.getElementNames() : Strings{}); + } + case TypeIndex::Map: + { + const auto & map_type = assert_cast(*type); + const auto & key_type = map_type.getKeyType(); + if (!isString(removeLowCardinality(key_type))) + throw Exception( + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "{} doesn't support the return type schema: {} with key type not String", + source_for_exception_message, + type->getName()); + + const auto & value_type = map_type.getValueType(); + return std::make_unique>(buildJSONExtractTree(value_type, source_for_exception_message)); + } + case TypeIndex::Variant: + { + const auto & variant_type = assert_cast(*type); + const auto & variants = variant_type.getVariants(); + std::vector>> variant_nodes; + variant_nodes.reserve(variants.size()); + for (const auto & variant : variants) + variant_nodes.push_back(buildJSONExtractTree(variant, source_for_exception_message)); + return std::make_unique>(std::move(variant_nodes), SerializationVariant::getVariantsDeserializeTextOrder(variants)); + } + case TypeIndex::Dynamic: + return std::make_unique>(); + default: + throw Exception( + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "{} doesn't support the return type schema: {}", + source_for_exception_message, + type->getName()); + } +} + +#if USE_SIMDJSON +template void jsonElementToString(const SimdJSONParser::Element & element, WriteBuffer & buf, const FormatSettings & format_settings); +template std::unique_ptr> buildJSONExtractTree(const DataTypePtr & type, const char * source_for_exception_message); +#endif + +#if USE_RAPIDJSON +template void jsonElementToString(const RapidJSONParser::Element & element, WriteBuffer & buf, const FormatSettings & format_settings); +template std::unique_ptr> buildJSONExtractTree(const DataTypePtr & type, const char * source_for_exception_message); +#else +template void jsonElementToString(const DummyJSONParser::Element & element, WriteBuffer & buf, const FormatSettings & format_settings); +template std::unique_ptr> buildJSONExtractTree(const DataTypePtr & type, const char * source_for_exception_message); +#endif + +} diff --git a/src/Formats/JSONExtractTree.h b/src/Formats/JSONExtractTree.h new file mode 100644 index 00000000000..4735f568b1c --- /dev/null +++ b/src/Formats/JSONExtractTree.h @@ -0,0 +1,35 @@ +#pragma once +#include +#include +#include + + +namespace DB +{ + +struct JSONExtractInsertSettings +{ + bool convert_bool_to_integer = true; + bool insert_default_on_invalid_elements_in_complex_types = false; +}; + +template +class JSONExtractTreeNode +{ +public: + JSONExtractTreeNode() = default; + virtual ~JSONExtractTreeNode() = default; + virtual bool insertResultToColumn(IColumn &, const typename JSONParser::Element &, const JSONExtractInsertSettings & insert_setting, const FormatSettings & format_settings, String & error) const = 0; +}; + +/// Build a tree for insertion JSON element into a column with provided data type. +template +std::unique_ptr> buildJSONExtractTree(const DataTypePtr & type, const char * source_for_exception_message); + +template +void jsonElementToString(const typename JSONParser::Element & element, WriteBuffer & buf, const FormatSettings & format_settings); + +template +bool tryGetNumericValueFromJSONElement(NumberType & value, const typename JSONParser::Element & element, bool convert_bool_to_integer, String & error); + +} diff --git a/src/Formats/SchemaInferenceUtils.cpp b/src/Formats/SchemaInferenceUtils.cpp index 31faea2e13e..6519d54a8c5 100644 --- a/src/Formats/SchemaInferenceUtils.cpp +++ b/src/Formats/SchemaInferenceUtils.cpp @@ -225,19 +225,6 @@ namespace Paths paths; }; - bool checkIfTypesAreEqual(const DataTypes & types) - { - if (types.empty()) - return true; - - for (size_t i = 1; i < types.size(); ++i) - { - if (!types[0]->equals(*types[i])) - return false; - } - return true; - } - void updateTypeIndexes(DataTypes & data_types, TypeIndexesSet & type_indexes) { type_indexes.clear(); @@ -272,24 +259,31 @@ namespace type_indexes.erase(TypeIndex::Nothing); } - /// If we have both Int64 and UInt64, convert all Int64 to UInt64, + /// If we have both Int64 and UInt64, convert all not-negative Int64 to UInt64, /// because UInt64 is inferred only in case of Int64 overflow. - void transformIntegers(DataTypes & data_types, TypeIndexesSet & type_indexes) + void transformIntegers(DataTypes & data_types, TypeIndexesSet & type_indexes, JSONInferenceInfo * json_info) { if (!type_indexes.contains(TypeIndex::Int64) || !type_indexes.contains(TypeIndex::UInt64)) return; + bool have_negative_integers = false; for (auto & type : data_types) { if (WhichDataType(type).isInt64()) - type = std::make_shared(); + { + bool is_negative = json_info->negative_integers.contains(type.get()); + have_negative_integers |= is_negative; + if (!is_negative) + type = std::make_shared(); + } } - type_indexes.erase(TypeIndex::Int64); + if (!have_negative_integers) + type_indexes.erase(TypeIndex::Int64); } /// If we have both Int64 and Float64 types, convert all Int64 to Float64. - void transformIntegersAndFloatsToFloats(DataTypes & data_types, TypeIndexesSet & type_indexes) + void transformIntegersAndFloatsToFloats(DataTypes & data_types, TypeIndexesSet & type_indexes, JSONInferenceInfo * json_info) { bool have_floats = type_indexes.contains(TypeIndex::Float64); bool have_integers = type_indexes.contains(TypeIndex::Int64) || type_indexes.contains(TypeIndex::UInt64); @@ -300,7 +294,12 @@ namespace { WhichDataType which(type); if (which.isInt64() || which.isUInt64()) - type = std::make_shared(); + { + auto new_type = std::make_shared(); + if (json_info->numbers_parsed_from_json_strings.erase(type.get())) + json_info->numbers_parsed_from_json_strings.insert(new_type.get()); + type = new_type; + } } type_indexes.erase(TypeIndex::Int64); @@ -635,9 +634,9 @@ namespace if (settings.try_infer_integers) { /// Transform Int64 to UInt64 if needed. - transformIntegers(data_types, type_indexes); + transformIntegers(data_types, type_indexes, json_info); /// Transform integers to floats if needed. - transformIntegersAndFloatsToFloats(data_types, type_indexes); + transformIntegersAndFloatsToFloats(data_types, type_indexes, json_info); } /// Transform Date to DateTime or both to String if needed. @@ -887,7 +886,7 @@ namespace } template - DataTypePtr tryInferNumber(ReadBuffer & buf, const FormatSettings & settings) + DataTypePtr tryInferNumber(ReadBuffer & buf, const FormatSettings & settings, JSONInferenceInfo * json_info) { if (buf.eof()) return nullptr; @@ -911,7 +910,12 @@ namespace Int64 tmp_int; buf.position() = number_start; if (tryReadIntText(tmp_int, buf)) - return std::make_shared(); + { + auto type = std::make_shared(); + if (json_info && tmp_int < 0) + json_info->negative_integers.insert(type.get()); + return type; + } /// In case of Int64 overflow we can try to infer UInt64. UInt64 tmp_uint; @@ -934,7 +938,12 @@ namespace Int64 tmp_int; if (tryReadIntText(tmp_int, peekable_buf)) - return std::make_shared(); + { + auto type = std::make_shared(); + if (json_info && tmp_int < 0) + json_info->negative_integers.insert(type.get()); + return type; + } peekable_buf.rollbackToCheckpoint(/* drop= */ true); /// In case of Int64 overflow we can try to infer UInt64. @@ -952,7 +961,7 @@ namespace } template - DataTypePtr tryInferNumberFromStringImpl(std::string_view field, const FormatSettings & settings) + DataTypePtr tryInferNumberFromStringImpl(std::string_view field, const FormatSettings & settings, JSONInferenceInfo * json_inference_info = nullptr) { ReadBufferFromString buf(field); @@ -960,7 +969,12 @@ namespace { Int64 tmp_int; if (tryReadIntText(tmp_int, buf) && buf.eof()) - return std::make_shared(); + { + auto type = std::make_shared(); + if (json_inference_info && tmp_int < 0) + json_inference_info->negative_integers.insert(type.get()); + return type; + } /// We can safely get back to the start of buffer, because we read from a string and we didn't reach eof. buf.position() = buf.buffer().begin(); @@ -1011,7 +1025,7 @@ namespace { if (settings.json.try_infer_numbers_from_strings) { - if (auto number_type = tryInferNumberFromStringImpl(field, settings)) + if (auto number_type = tryInferNumberFromStringImpl(field, settings, json_info)) { json_info->numbers_parsed_from_json_strings.insert(number_type.get()); return number_type; @@ -1254,10 +1268,23 @@ namespace } /// Number - return tryInferNumber(buf, settings); + return tryInferNumber(buf, settings, json_info); } } +bool checkIfTypesAreEqual(const DataTypes & types) +{ + if (types.empty()) + return true; + + for (size_t i = 1; i < types.size(); ++i) + { + if (!types[0]->equals(*types[i])) + return false; + } + return true; +} + void transformInferredTypesIfNeeded(DataTypePtr & first, DataTypePtr & second, const FormatSettings & settings) { DataTypes types = {first, second}; @@ -1275,6 +1302,11 @@ void transformInferredJSONTypesIfNeeded( second = std::move(types[1]); } +void transformInferredJSONTypesIfNeeded(DataTypes & types, const FormatSettings & settings, JSONInferenceInfo * json_info) +{ + transformInferredTypesIfNeededImpl(types, settings, json_info); +} + void transformInferredJSONTypesFromDifferentFilesIfNeeded(DataTypePtr & first, DataTypePtr & second, const FormatSettings & settings) { JSONInferenceInfo json_info; @@ -1396,6 +1428,12 @@ DataTypePtr tryInferNumberFromString(std::string_view field, const FormatSetting return tryInferNumberFromStringImpl(field, settings); } +DataTypePtr tryInferJSONNumberFromString(std::string_view field, const FormatSettings & settings, JSONInferenceInfo * json_info) +{ + return tryInferNumberFromStringImpl(field, settings, json_info); + +} + DataTypePtr tryInferDateOrDateTimeFromString(std::string_view field, const FormatSettings & settings) { if (settings.try_infer_dates && tryInferDate(field)) diff --git a/src/Formats/SchemaInferenceUtils.h b/src/Formats/SchemaInferenceUtils.h index bcf3d194825..06c14c0797a 100644 --- a/src/Formats/SchemaInferenceUtils.h +++ b/src/Formats/SchemaInferenceUtils.h @@ -2,6 +2,7 @@ #include #include +#include #include @@ -18,6 +19,11 @@ struct JSONInferenceInfo /// We store numbers that were parsed from strings. /// It's used in types transformation to change such numbers back to string if needed. std::unordered_set numbers_parsed_from_json_strings; + /// Store integer types that were inferred from negative numbers. + /// It's used to determine common type for Int64 and UInt64 + /// TODO: check it not only in JSON formats. + std::unordered_set negative_integers; + /// Indicates if currently we are inferring type for Map/Object key. bool is_object_key = false; /// When we transform types for the same column from different files @@ -48,6 +54,7 @@ DataTypePtr tryInferDateOrDateTimeFromString(std::string_view field, const Forma /// Try to parse a number value from a string. By default, it tries to parse Float64, /// but if setting try_infer_integers is enabled, it also tries to parse Int64. DataTypePtr tryInferNumberFromString(std::string_view field, const FormatSettings & settings); +DataTypePtr tryInferJSONNumberFromString(std::string_view field, const FormatSettings & settings, JSONInferenceInfo * json_info); /// It takes two types inferred for the same column and tries to transform them to a common type if possible. /// It's also used when we try to infer some not ordinary types from another types. @@ -77,6 +84,7 @@ void transformInferredTypesIfNeeded(DataTypePtr & first, DataTypePtr & second, c /// Example 2: /// We merge DataTypeJSONPaths types to a single DataTypeJSONPaths type with union of all JSON paths. void transformInferredJSONTypesIfNeeded(DataTypePtr & first, DataTypePtr & second, const FormatSettings & settings, JSONInferenceInfo * json_info); +void transformInferredJSONTypesIfNeeded(DataTypes & types, const FormatSettings & settings, JSONInferenceInfo * json_info); /// Make final transform for types inferred in JSON format. It does 3 types of transformation: /// 1) Checks if type is unnamed Tuple(...), tries to transform nested types to find a common type for them and if all nested types @@ -107,4 +115,6 @@ NamesAndTypesList getNamesAndRecursivelyNullableTypes(const Block & header); /// Check if type contains Nothing, like Array(Tuple(Nullable(Nothing), String)) bool checkIfTypeIsComplete(const DataTypePtr & type); +bool checkIfTypesAreEqual(const DataTypes & types); + } diff --git a/src/Functions/FunctionsJSON.cpp b/src/Functions/FunctionsJSON.cpp index fbd987577e9..c6af0674db7 100644 --- a/src/Functions/FunctionsJSON.cpp +++ b/src/Functions/FunctionsJSON.cpp @@ -1,10 +1,1069 @@ -#include +#include +#include + +#include + +#include +#include + +#include +#include + +#include + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include #include +#include +#include +#include +#include + +#include + +#include "config.h" namespace DB { +namespace ErrorCodes +{ +extern const int ILLEGAL_TYPE_OF_ARGUMENT; +extern const int ILLEGAL_COLUMN; +extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; +} + +template +concept HasIndexOperator = requires (T t) +{ + t[0]; +}; + +/// Functions to parse JSONs and extract values from it. +/// The first argument of all these functions gets a JSON, +/// after that there are any number of arguments specifying path to a desired part from the JSON's root. +/// For example, +/// select JSONExtractInt('{"a": "hello", "b": [-100, 200.0, 300]}', 'b', 1) = -100 + +class FunctionJSONHelpers +{ +public: + template typename Impl, class JSONParser> + class Executor + { + public: + static ColumnPtr run(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count, const FormatSettings & format_settings) + { + MutableColumnPtr to{result_type->createColumn()}; + to->reserve(input_rows_count); + + if (arguments.empty()) + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Function {} requires at least one argument", String(Name::name)); + + const auto & first_column = arguments[0]; + if (!isString(first_column.type)) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "The first argument of function {} should be a string containing JSON, illegal type: " + "{}", String(Name::name), first_column.type->getName()); + + const ColumnPtr & arg_json = first_column.column; + const auto * col_json_const = typeid_cast(arg_json.get()); + const auto * col_json_string + = typeid_cast(col_json_const ? col_json_const->getDataColumnPtr().get() : arg_json.get()); + + if (!col_json_string) + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {}", arg_json->getName()); + + const ColumnString::Chars & chars = col_json_string->getChars(); + const ColumnString::Offsets & offsets = col_json_string->getOffsets(); + + size_t num_index_arguments = Impl::getNumberOfIndexArguments(arguments); + std::vector moves = prepareMoves(Name::name, arguments, 1, num_index_arguments); + + /// Preallocate memory in parser if necessary. + JSONParser parser; + if constexpr (has_member_function_reserve::value) + { + size_t max_size = calculateMaxSize(offsets); + if (max_size) + parser.reserve(max_size); + } + + Impl impl; + + /// prepare() does Impl-specific preparation before handling each row. + if constexpr (has_member_function_prepare::*)(const char *, const ColumnsWithTypeAndName &, const DataTypePtr &)>::value) + impl.prepare(Name::name, arguments, result_type); + + using Element = typename JSONParser::Element; + + Element document; + bool document_ok = false; + if (col_json_const) + { + std::string_view json{reinterpret_cast(chars.data()), offsets[0] - 1}; + document_ok = parser.parse(json, document); + } + + String error; + for (const auto i : collections::range(0, input_rows_count)) + { + if (!col_json_const) + { + std::string_view json{reinterpret_cast(&chars[offsets[i - 1]]), offsets[i] - offsets[i - 1] - 1}; + document_ok = parser.parse(json, document); + } + + bool added_to_column = false; + if (document_ok) + { + /// Perform moves. + Element element; + std::string_view last_key; + bool moves_ok = performMoves(arguments, i, document, moves, element, last_key); + + if (moves_ok) + added_to_column = impl.insertResultToColumn(*to, element, last_key, format_settings, error); + } + + /// We add default value (=null or zero) if something goes wrong, we don't throw exceptions in these JSON functions. + if (!added_to_column) + to->insertDefault(); + } + return to; + } + }; + +private: + BOOST_TTI_HAS_MEMBER_FUNCTION(reserve) + BOOST_TTI_HAS_MEMBER_FUNCTION(prepare) + + /// Represents a move of a JSON iterator described by a single argument passed to a JSON function. + /// For example, the call JSONExtractInt('{"a": "hello", "b": [-100, 200.0, 300]}', 'b', 1) + /// contains two moves: {MoveType::ConstKey, "b"} and {MoveType::ConstIndex, 1}. + /// Keys and indices can be nonconst, in this case they are calculated for each row. + enum class MoveType : uint8_t + { + Key, + Index, + ConstKey, + ConstIndex, + }; + + struct Move + { + explicit Move(MoveType type_, size_t index_ = 0) : type(type_), index(index_) {} + Move(MoveType type_, const String & key_) : type(type_), key(key_) {} + MoveType type; + size_t index = 0; + String key; + }; + + static std::vector prepareMoves( + const char * function_name, + const ColumnsWithTypeAndName & columns, + size_t first_index_argument, + size_t num_index_arguments) + { + std::vector moves; + moves.reserve(num_index_arguments); + for (const auto i : collections::range(first_index_argument, first_index_argument + num_index_arguments)) + { + const auto & column = columns[i]; + if (!isString(column.type) && !isNativeInteger(column.type)) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "The argument {} of function {} should be a string specifying key " + "or an integer specifying index, illegal type: {}", + std::to_string(i + 1), String(function_name), column.type->getName()); + + if (column.column && isColumnConst(*column.column)) + { + const auto & column_const = assert_cast(*column.column); + if (isString(column.type)) + moves.emplace_back(MoveType::ConstKey, column_const.getValue()); + else + moves.emplace_back(MoveType::ConstIndex, column_const.getInt(0)); + } + else + { + if (isString(column.type)) + moves.emplace_back(MoveType::Key, ""); + else + moves.emplace_back(MoveType::Index, 0); + } + } + return moves; + } + + + /// Performs moves of types MoveType::Index and MoveType::ConstIndex. + template + static bool performMoves(const ColumnsWithTypeAndName & arguments, size_t row, + const typename JSONParser::Element & document, const std::vector & moves, + typename JSONParser::Element & element, std::string_view & last_key) + { + typename JSONParser::Element res_element = document; + std::string_view key; + + for (size_t j = 0; j != moves.size(); ++j) + { + switch (moves[j].type) + { + case MoveType::ConstIndex: + { + if (!moveToElementByIndex(res_element, static_cast(moves[j].index), key)) + return false; + break; + } + case MoveType::ConstKey: + { + key = moves[j].key; + if (!moveToElementByKey(res_element, key)) + return false; + break; + } + case MoveType::Index: + { + Int64 index = (*arguments[j + 1].column)[row].get(); + if (!moveToElementByIndex(res_element, static_cast(index), key)) + return false; + break; + } + case MoveType::Key: + { + key = arguments[j + 1].column->getDataAt(row).toView(); + if (!moveToElementByKey(res_element, key)) + return false; + break; + } + } + } + + element = res_element; + last_key = key; + return true; + } + + template + static bool moveToElementByIndex(typename JSONParser::Element & element, int index, std::string_view & out_key) + { + if (element.isArray()) + { + auto array = element.getArray(); + if (index >= 0) + --index; + else + index += array.size(); + + if (static_cast(index) >= array.size()) + return false; + element = array[index]; + out_key = {}; + return true; + } + + if constexpr (HasIndexOperator) + { + if (element.isObject()) + { + auto object = element.getObject(); + if (index >= 0) + --index; + else + index += object.size(); + + if (static_cast(index) >= object.size()) + return false; + std::tie(out_key, element) = object[index]; + return true; + } + } + + return {}; + } + + /// Performs moves of types MoveType::Key and MoveType::ConstKey. + template + static bool moveToElementByKey(typename JSONParser::Element & element, std::string_view key) + { + if (!element.isObject()) + return false; + auto object = element.getObject(); + return object.find(key, element); + } + + static size_t calculateMaxSize(const ColumnString::Offsets & offsets) + { + size_t max_size = 0; + for (const auto i : collections::range(0, offsets.size())) + { + size_t size = offsets[i] - offsets[i - 1]; + max_size = std::max(max_size, size); + } + if (max_size) + --max_size; + return max_size; + } + +}; + +template +class JSONExtractImpl; + +template +class JSONExtractKeysAndValuesImpl; + +/** +* Functions JSONExtract and JSONExtractKeysAndValues force the return type - it is specified in the last argument. +* For example - `SELECT JSONExtract(materialize('{"a": 131231, "b": 1234}'), 'b', 'LowCardinality(FixedString(4))')` +* But by default ClickHouse decides on its own whether the return type will be LowCardinality based on the types of +* input arguments. +* And for these specific functions we cannot rely on this mechanism, so these functions have their own implementation - +* just convert all of the LowCardinality input columns to full ones, execute and wrap the resulting column in LowCardinality +* if needed. +*/ +template typename Impl> +constexpr bool functionForcesTheReturnType() +{ + return std::is_same_v, JSONExtractImpl> || std::is_same_v, JSONExtractKeysAndValuesImpl>; +} + +template typename Impl> +class ExecutableFunctionJSON : public IExecutableFunction +{ + +public: + explicit ExecutableFunctionJSON(const NullPresence & null_presence_, bool allow_simdjson_, const DataTypePtr & json_return_type_, const FormatSettings & format_settings_) + : null_presence(null_presence_), allow_simdjson(allow_simdjson_), json_return_type(json_return_type_), format_settings(format_settings_) + { + format_settings.json.escape_forward_slashes = false; + format_settings.null_as_default = false; + } + + String getName() const override { return Name::name; } + bool useDefaultImplementationForNulls() const override { return false; } + bool useDefaultImplementationForConstants() const override { return true; } + bool useDefaultImplementationForLowCardinalityColumns() const override + { + return !functionForcesTheReturnType(); + } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override + { + if (null_presence.has_null_constant) + return result_type->createColumnConstWithDefaultValue(input_rows_count); + + if constexpr (functionForcesTheReturnType()) + { + ColumnsWithTypeAndName columns_without_low_cardinality = arguments; + + for (auto & column : columns_without_low_cardinality) + { + column.column = recursiveRemoveLowCardinality(column.column); + column.type = recursiveRemoveLowCardinality(column.type); + } + + ColumnsWithTypeAndName temporary_columns = null_presence.has_nullable ? createBlockWithNestedColumns(columns_without_low_cardinality) : columns_without_low_cardinality; + ColumnPtr temporary_result = chooseAndRunJSONParser(temporary_columns, json_return_type, input_rows_count); + + if (null_presence.has_nullable) + temporary_result = wrapInNullable(temporary_result, columns_without_low_cardinality, result_type, input_rows_count); + + if (result_type->lowCardinality()) + temporary_result = recursiveLowCardinalityTypeConversion(temporary_result, json_return_type, result_type); + + return temporary_result; + } + else + { + ColumnsWithTypeAndName temporary_columns = null_presence.has_nullable ? createBlockWithNestedColumns(arguments) : arguments; + ColumnPtr temporary_result = chooseAndRunJSONParser(temporary_columns, json_return_type, input_rows_count); + + if (null_presence.has_nullable) + temporary_result = wrapInNullable(temporary_result, arguments, result_type, input_rows_count); + + if (result_type->lowCardinality()) + temporary_result = recursiveLowCardinalityTypeConversion(temporary_result, json_return_type, result_type); + + return temporary_result; + } + } + +private: + + ColumnPtr + chooseAndRunJSONParser(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const + { +#if USE_SIMDJSON + if (allow_simdjson) + return FunctionJSONHelpers::Executor::run(arguments, result_type, input_rows_count, format_settings); +#endif + +#if USE_RAPIDJSON + return FunctionJSONHelpers::Executor::run(arguments, result_type, input_rows_count, format_settings); +#else + return FunctionJSONHelpers::Executor::run(arguments, result_type, input_rows_count, format_settings); +#endif + } + + NullPresence null_presence; + bool allow_simdjson; + DataTypePtr json_return_type; + FormatSettings format_settings; +}; + + +template typename Impl> +class FunctionBaseFunctionJSON : public IFunctionBase +{ +public: + explicit FunctionBaseFunctionJSON( + const NullPresence & null_presence_, + bool allow_simdjson_, + DataTypes argument_types_, + DataTypePtr return_type_, + DataTypePtr json_return_type_, + const FormatSettings & format_settings_) + : null_presence(null_presence_) + , allow_simdjson(allow_simdjson_) + , argument_types(std::move(argument_types_)) + , return_type(std::move(return_type_)) + , json_return_type(std::move(json_return_type_)) + , format_settings(format_settings_) + { + } + + String getName() const override { return Name::name; } + + const DataTypes & getArgumentTypes() const override + { + return argument_types; + } + + const DataTypePtr & getResultType() const override + { + return return_type; + } + + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } + + ExecutableFunctionPtr prepare(const ColumnsWithTypeAndName &) const override + { + return std::make_unique>(null_presence, allow_simdjson, json_return_type, format_settings); + } + +private: + NullPresence null_presence; + bool allow_simdjson; + DataTypes argument_types; + DataTypePtr return_type; + DataTypePtr json_return_type; + FormatSettings format_settings; +}; + +/// We use IFunctionOverloadResolver instead of IFunction to handle non-default NULL processing. +/// Both NULL and JSON NULL should generate NULL value. If any argument is NULL, return NULL. +template typename Impl> +class JSONOverloadResolver : public IFunctionOverloadResolver, WithContext +{ +public: + static constexpr auto name = Name::name; + + String getName() const override { return name; } + + static FunctionOverloadResolverPtr create(ContextPtr context_) + { + return std::make_unique(context_); + } + + explicit JSONOverloadResolver(ContextPtr context_) : WithContext(context_) {} + + bool isVariadic() const override { return true; } + size_t getNumberOfArguments() const override { return 0; } + bool useDefaultImplementationForNulls() const override { return false; } + bool useDefaultImplementationForLowCardinalityColumns() const override + { + return !functionForcesTheReturnType(); + } + + FunctionBasePtr build(const ColumnsWithTypeAndName & arguments) const override + { + bool has_nothing_argument = false; + for (const auto & arg : arguments) + has_nothing_argument |= isNothing(arg.type); + + DataTypePtr json_return_type = Impl::getReturnType(Name::name, createBlockWithNestedColumns(arguments)); + NullPresence null_presence = getNullPresense(arguments); + DataTypePtr return_type; + if (has_nothing_argument) + return_type = std::make_shared(); + else if (null_presence.has_null_constant) + return_type = makeNullable(std::make_shared()); + else if (null_presence.has_nullable) + return_type = makeNullable(json_return_type); + else + return_type = json_return_type; + + DataTypes argument_types; + argument_types.reserve(arguments.size()); + for (const auto & argument : arguments) + argument_types.emplace_back(argument.type); + return std::make_unique>( + null_presence, getContext()->getSettingsRef().allow_simdjson, argument_types, return_type, json_return_type, getFormatSettings(getContext())); + } +}; + +struct NameJSONHas { static constexpr auto name{"JSONHas"}; }; +struct NameIsValidJSON { static constexpr auto name{"isValidJSON"}; }; +struct NameJSONLength { static constexpr auto name{"JSONLength"}; }; +struct NameJSONKey { static constexpr auto name{"JSONKey"}; }; +struct NameJSONType { static constexpr auto name{"JSONType"}; }; +struct NameJSONExtractInt { static constexpr auto name{"JSONExtractInt"}; }; +struct NameJSONExtractUInt { static constexpr auto name{"JSONExtractUInt"}; }; +struct NameJSONExtractFloat { static constexpr auto name{"JSONExtractFloat"}; }; +struct NameJSONExtractBool { static constexpr auto name{"JSONExtractBool"}; }; +struct NameJSONExtractString { static constexpr auto name{"JSONExtractString"}; }; +struct NameJSONExtract { static constexpr auto name{"JSONExtract"}; }; +struct NameJSONExtractKeysAndValues { static constexpr auto name{"JSONExtractKeysAndValues"}; }; +struct NameJSONExtractRaw { static constexpr auto name{"JSONExtractRaw"}; }; +struct NameJSONExtractArrayRaw { static constexpr auto name{"JSONExtractArrayRaw"}; }; +struct NameJSONExtractKeysAndValuesRaw { static constexpr auto name{"JSONExtractKeysAndValuesRaw"}; }; +struct NameJSONExtractKeys { static constexpr auto name{"JSONExtractKeys"}; }; + + +template +class JSONHasImpl +{ +public: + using Element = typename JSONParser::Element; + + static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &) { return std::make_shared(); } + + static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; } + + static bool insertResultToColumn(IColumn & dest, const Element &, std::string_view, const FormatSettings &, String &) + { + ColumnVector & col_vec = assert_cast &>(dest); + col_vec.insertValue(1); + return true; + } +}; + + +template +class IsValidJSONImpl +{ +public: + using Element = typename JSONParser::Element; + + static DataTypePtr getReturnType(const char * function_name, const ColumnsWithTypeAndName & arguments) + { + if (arguments.size() != 1) + { + /// IsValidJSON() shouldn't get parameters other than JSON. + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Function {} needs exactly one argument", + String(function_name)); + } + return std::make_shared(); + } + + static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName &) { return 0; } + + static bool insertResultToColumn(IColumn & dest, const Element &, std::string_view, const FormatSettings &, String &) + { + /// This function is called only if JSON is valid. + /// If JSON isn't valid then `FunctionJSON::Executor::run()` adds default value (=zero) to `dest` without calling this function. + ColumnVector & col_vec = assert_cast &>(dest); + col_vec.insertValue(1); + return true; + } +}; + + +template +class JSONLengthImpl +{ +public: + using Element = typename JSONParser::Element; + + static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &) + { + return std::make_shared(); + } + + static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; } + + static bool insertResultToColumn(IColumn & dest, const Element & element, std::string_view, const FormatSettings &, String &) + { + size_t size; + if (element.isArray()) + size = element.getArray().size(); + else if (element.isObject()) + size = element.getObject().size(); + else + return false; + + ColumnVector & col_vec = assert_cast &>(dest); + col_vec.insertValue(size); + return true; + } +}; + + +template +class JSONKeyImpl +{ +public: + using Element = typename JSONParser::Element; + + static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &) + { + return std::make_shared(); + } + + static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; } + + static bool insertResultToColumn(IColumn & dest, const Element &, std::string_view last_key, const FormatSettings &, String &) + { + if (last_key.empty()) + return false; + ColumnString & col_str = assert_cast(dest); + col_str.insertData(last_key.data(), last_key.size()); + return true; + } +}; + + +template +class JSONTypeImpl +{ +public: + using Element = typename JSONParser::Element; + + static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &) + { + static const std::vector> values = { + {"Array", '['}, + {"Object", '{'}, + {"String", '"'}, + {"Int64", 'i'}, + {"UInt64", 'u'}, + {"Double", 'd'}, + {"Bool", 'b'}, + {"Null", 0}, /// the default value for the column. + }; + return std::make_shared>(values); + } + + static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; } + + static bool insertResultToColumn(IColumn & dest, const Element & element, std::string_view, const FormatSettings &, String &) + { + UInt8 type; + switch (element.type()) + { + case ElementType::INT64: + type = 'i'; + break; + case ElementType::UINT64: + type = 'u'; + break; + case ElementType::DOUBLE: + type = 'd'; + break; + case ElementType::STRING: + type = '"'; + break; + case ElementType::ARRAY: + type = '['; + break; + case ElementType::OBJECT: + type = '{'; + break; + case ElementType::BOOL: + type = 'b'; + break; + case ElementType::NULL_VALUE: + type = 0; + break; + } + + ColumnVector & col_vec = assert_cast &>(dest); + col_vec.insertValue(type); + return true; + } +}; + + +template +class JSONExtractNumericImpl +{ +public: + using Element = typename JSONParser::Element; + + static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &) + { + return std::make_shared>(); + } + + static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; } + + static const std::unique_ptr> & getInsertNode() + { + static const std::unique_ptr> node = buildJSONExtractTree(std::make_shared>()); + } + + static bool insertResultToColumn(IColumn & dest, const Element & element, std::string_view, const FormatSettings &, String & error) + { + NumberType value; + + tryGetNumericValueFromJSONElement(value, element, convert_bool_to_integer, error); + + if (dest.getDataType() == TypeIndex::LowCardinality) + { + ColumnLowCardinality & col_low = assert_cast(dest); + col_low.insertData(reinterpret_cast(&value), sizeof(value)); + } + else + { + auto & col_vec = assert_cast &>(dest); + col_vec.insertValue(value); + } + return true; + } +}; + + +template +using JSONExtractInt64Impl = JSONExtractNumericImpl; +template +using JSONExtractUInt64Impl = JSONExtractNumericImpl; +template +using JSONExtractFloat64Impl = JSONExtractNumericImpl; + + +template +class JSONExtractBoolImpl +{ +public: + using Element = typename JSONParser::Element; + + static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &) + { + return std::make_shared(); + } + + static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; } + + static bool insertResultToColumn(IColumn & dest, const Element & element, std::string_view, const FormatSettings &, String &) + { + bool value; + switch (element.type()) + { + case ElementType::BOOL: + value = element.getBool(); + break; + case ElementType::INT64: + value = element.getInt64() != 0; + break; + case ElementType::UINT64: + value = element.getUInt64() != 0; + break; + default: + return false; + } + + auto & col_vec = assert_cast &>(dest); + col_vec.insertValue(static_cast(value)); + return true; + } +}; + +template +class JSONExtractRawImpl; + +template +class JSONExtractStringImpl +{ +public: + using Element = typename JSONParser::Element; + + static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &) + { + return std::make_shared(); + } + + static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; } + + static bool insertResultToColumn(IColumn & dest, const Element & element, std::string_view, const FormatSettings & format_settings, String & error) + { + if (element.isNull()) + return false; + + if (!element.isString()) + return JSONExtractRawImpl::insertResultToColumn(dest, element, {}, format_settings, error); + + auto str = element.getString(); + ColumnString & col_str = assert_cast(dest); + col_str.insertData(str.data(), str.size()); + return true; + } +}; + +template +class JSONExtractImpl +{ +public: + using Element = typename JSONParser::Element; + + static DataTypePtr getReturnType(const char * function_name, const ColumnsWithTypeAndName & arguments) + { + if (arguments.size() < 2) + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Function {} requires at least two arguments", String(function_name)); + + const auto & col = arguments.back(); + const auto * col_type_const = typeid_cast(col.column.get()); + if (!col_type_const || !isString(col.type)) + throw Exception(ErrorCodes::ILLEGAL_COLUMN, + "The last argument of function {} should " + "be a constant string specifying the return data type, illegal value: {}", + String(function_name), col.name); + + return DataTypeFactory::instance().get(col_type_const->getValue()); + } + + static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 2; } + + void prepare(const char * function_name, const ColumnsWithTypeAndName &, const DataTypePtr & result_type) + { + extract_tree = buildJSONExtractTree(result_type, function_name); + insert_settings.insert_default_on_invalid_elements_in_complex_types = true; + } + + bool insertResultToColumn(IColumn & dest, const Element & element, std::string_view, const FormatSettings & format_settings, String & error) + { + return extract_tree->insertResultToColumn(dest, element, insert_settings, format_settings, error); + } + +protected: + std::unique_ptr> extract_tree; + JSONExtractInsertSettings insert_settings; +}; + + +template +class JSONExtractKeysAndValuesImpl +{ +public: + using Element = typename JSONParser::Element; + + static DataTypePtr getReturnType(const char * function_name, const ColumnsWithTypeAndName & arguments) + { + if (arguments.size() < 2) + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Function {} requires at least two arguments", String(function_name)); + + const auto & col = arguments.back(); + const auto * col_type_const = typeid_cast(col.column.get()); + if (!col_type_const || !isString(col.type)) + throw Exception(ErrorCodes::ILLEGAL_COLUMN, + "The last argument of function {} should " + "be a constant string specifying the values' data type, illegal value: {}", + String(function_name), col.name); + + DataTypePtr key_type = std::make_unique(); + DataTypePtr value_type = DataTypeFactory::instance().get(col_type_const->getValue()); + DataTypePtr tuple_type = std::make_unique(DataTypes{key_type, value_type}); + return std::make_unique(tuple_type); + } + + static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 2; } + + void prepare(const char * function_name, const ColumnsWithTypeAndName &, const DataTypePtr & result_type) + { + const auto tuple_type = typeid_cast(result_type.get())->getNestedType(); + const auto value_type = typeid_cast(tuple_type.get())->getElements()[1]; + extract_tree = buildJSONExtractTree(value_type, function_name); + insert_settings.insert_default_on_invalid_elements_in_complex_types = true; + } + + bool insertResultToColumn(IColumn & dest, const Element & element, std::string_view, const FormatSettings & format_settings, String & error) + { + if (!element.isObject()) + return false; + + auto object = element.getObject(); + + auto & col_arr = assert_cast(dest); + auto & col_tuple = assert_cast(col_arr.getData()); + size_t old_size = col_tuple.size(); + auto & col_key = assert_cast(col_tuple.getColumn(0)); + auto & col_value = col_tuple.getColumn(1); + + for (const auto & [key, value] : object) + { + if (extract_tree->insertResultToColumn(col_value, value, insert_settings, format_settings, error)) + col_key.insertData(key.data(), key.size()); + } + + if (col_tuple.size() == old_size) + return false; + + col_arr.getOffsets().push_back(col_tuple.size()); + return true; + } + +private: + std::unique_ptr> extract_tree; + JSONExtractInsertSettings insert_settings; +}; + + +template +class JSONExtractRawImpl +{ +public: + using Element = typename JSONParser::Element; + + static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &) + { + return std::make_shared(); + } + + static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; } + + static bool insertResultToColumn(IColumn & dest, const Element & element, std::string_view, const FormatSettings & format_settings, String &) + { + ColumnString & col_str = assert_cast(dest); + auto & chars = col_str.getChars(); + WriteBufferFromVector buf(chars, AppendModeTag()); + jsonElementToString(element, buf, format_settings); + buf.finalize(); + chars.push_back(0); + col_str.getOffsets().push_back(chars.size()); + return true; + } +}; + + +template +class JSONExtractArrayRawImpl +{ +public: + using Element = typename JSONParser::Element; + + static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &) + { + return std::make_shared(std::make_shared()); + } + + static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; } + + static bool insertResultToColumn(IColumn & dest, const Element & element, std::string_view, const FormatSettings & format_settings, String & error) + { + if (!element.isArray()) + return false; + + auto array = element.getArray(); + ColumnArray & col_res = assert_cast(dest); + + for (auto value : array) + JSONExtractRawImpl::insertResultToColumn(col_res.getData(), value, {}, format_settings, error); + + col_res.getOffsets().push_back(col_res.getOffsets().back() + array.size()); + return true; + } +}; + + +template +class JSONExtractKeysAndValuesRawImpl +{ +public: + using Element = typename JSONParser::Element; + + static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &) + { + DataTypePtr string_type = std::make_unique(); + DataTypePtr tuple_type = std::make_unique(DataTypes{string_type, string_type}); + return std::make_unique(tuple_type); + } + + static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; } + + bool insertResultToColumn(IColumn & dest, const Element & element, std::string_view, const FormatSettings & format_settings, String & error) + { + if (!element.isObject()) + return false; + + auto object = element.getObject(); + + auto & col_arr = assert_cast(dest); + auto & col_tuple = assert_cast(col_arr.getData()); + auto & col_key = assert_cast(col_tuple.getColumn(0)); + auto & col_value = assert_cast(col_tuple.getColumn(1)); + + for (const auto & [key, value] : object) + { + col_key.insertData(key.data(), key.size()); + JSONExtractRawImpl::insertResultToColumn(col_value, value, {}, format_settings, error); + } + + col_arr.getOffsets().push_back(col_arr.getOffsets().back() + object.size()); + return true; + } +}; + +template +class JSONExtractKeysImpl +{ +public: + using Element = typename JSONParser::Element; + + static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &) + { + return std::make_unique(std::make_shared()); + } + + static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; } + + bool insertResultToColumn(IColumn & dest, const Element & element, std::string_view, const FormatSettings &, String &) + { + if (!element.isObject()) + return false; + + auto object = element.getObject(); + + ColumnArray & col_res = assert_cast(dest); + auto & col_key = assert_cast(col_res.getData()); + + for (const auto & [key, value] : object) + { + col_key.insertData(key.data(), key.size()); + } + + col_res.getOffsets().push_back(col_res.getOffsets().back() + object.size()); + return true; + } +}; + REGISTER_FUNCTION(JSON) { factory.registerFunction>(); diff --git a/src/Functions/FunctionsJSON.h b/src/Functions/FunctionsJSON.h index 8a2ad457d34..5d44e22300d 100644 --- a/src/Functions/FunctionsJSON.h +++ b/src/Functions/FunctionsJSON.h @@ -1,1781 +1,1273 @@ -#pragma once - -#include -#include - -#include - -#include -#include -#include - -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include - -#include -#include - - -#include "config.h" - - -namespace DB -{ - -namespace ErrorCodes -{ - extern const int ILLEGAL_TYPE_OF_ARGUMENT; - extern const int ILLEGAL_COLUMN; - extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; -} - -template -concept HasIndexOperator = requires (T t) -{ - t[0]; -}; - -/// Functions to parse JSONs and extract values from it. -/// The first argument of all these functions gets a JSON, -/// after that there are any number of arguments specifying path to a desired part from the JSON's root. -/// For example, -/// select JSONExtractInt('{"a": "hello", "b": [-100, 200.0, 300]}', 'b', 1) = -100 - -class FunctionJSONHelpers -{ -public: - template typename Impl, class JSONParser> - class Executor - { - public: - static ColumnPtr run(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) - { - MutableColumnPtr to{result_type->createColumn()}; - to->reserve(input_rows_count); - - if (arguments.empty()) - throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Function {} requires at least one argument", String(Name::name)); - - const auto & first_column = arguments[0]; - if (!isString(first_column.type)) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, - "The first argument of function {} should be a string containing JSON, illegal type: " - "{}", String(Name::name), first_column.type->getName()); - - const ColumnPtr & arg_json = first_column.column; - const auto * col_json_const = typeid_cast(arg_json.get()); - const auto * col_json_string - = typeid_cast(col_json_const ? col_json_const->getDataColumnPtr().get() : arg_json.get()); - - if (!col_json_string) - throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {}", arg_json->getName()); - - const ColumnString::Chars & chars = col_json_string->getChars(); - const ColumnString::Offsets & offsets = col_json_string->getOffsets(); - - size_t num_index_arguments = Impl::getNumberOfIndexArguments(arguments); - std::vector moves = prepareMoves(Name::name, arguments, 1, num_index_arguments); - - /// Preallocate memory in parser if necessary. - JSONParser parser; - if constexpr (has_member_function_reserve::value) - { - size_t max_size = calculateMaxSize(offsets); - if (max_size) - parser.reserve(max_size); - } - - Impl impl; - - /// prepare() does Impl-specific preparation before handling each row. - if constexpr (has_member_function_prepare::*)(const char *, const ColumnsWithTypeAndName &, const DataTypePtr &)>::value) - impl.prepare(Name::name, arguments, result_type); - - using Element = typename JSONParser::Element; - - Element document; - bool document_ok = false; - if (col_json_const) - { - std::string_view json{reinterpret_cast(chars.data()), offsets[0] - 1}; - document_ok = parser.parse(json, document); - } - - for (const auto i : collections::range(0, input_rows_count)) - { - if (!col_json_const) - { - std::string_view json{reinterpret_cast(&chars[offsets[i - 1]]), offsets[i] - offsets[i - 1] - 1}; - document_ok = parser.parse(json, document); - } - - bool added_to_column = false; - if (document_ok) - { - /// Perform moves. - Element element; - std::string_view last_key; - bool moves_ok = performMoves(arguments, i, document, moves, element, last_key); - - if (moves_ok) - added_to_column = impl.insertResultToColumn(*to, element, last_key); - } - - /// We add default value (=null or zero) if something goes wrong, we don't throw exceptions in these JSON functions. - if (!added_to_column) - to->insertDefault(); - } - return to; - } - }; - -private: - BOOST_TTI_HAS_MEMBER_FUNCTION(reserve) - BOOST_TTI_HAS_MEMBER_FUNCTION(prepare) - - /// Represents a move of a JSON iterator described by a single argument passed to a JSON function. - /// For example, the call JSONExtractInt('{"a": "hello", "b": [-100, 200.0, 300]}', 'b', 1) - /// contains two moves: {MoveType::ConstKey, "b"} and {MoveType::ConstIndex, 1}. - /// Keys and indices can be nonconst, in this case they are calculated for each row. - enum class MoveType : uint8_t - { - Key, - Index, - ConstKey, - ConstIndex, - }; - - struct Move - { - explicit Move(MoveType type_, size_t index_ = 0) : type(type_), index(index_) {} - Move(MoveType type_, const String & key_) : type(type_), key(key_) {} - MoveType type; - size_t index = 0; - String key; - }; - - static std::vector prepareMoves( - const char * function_name, - const ColumnsWithTypeAndName & columns, - size_t first_index_argument, - size_t num_index_arguments) - { - std::vector moves; - moves.reserve(num_index_arguments); - for (const auto i : collections::range(first_index_argument, first_index_argument + num_index_arguments)) - { - const auto & column = columns[i]; - if (!isString(column.type) && !isNativeInteger(column.type)) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, - "The argument {} of function {} should be a string specifying key " - "or an integer specifying index, illegal type: {}", - std::to_string(i + 1), String(function_name), column.type->getName()); - - if (column.column && isColumnConst(*column.column)) - { - const auto & column_const = assert_cast(*column.column); - if (isString(column.type)) - moves.emplace_back(MoveType::ConstKey, column_const.getValue()); - else - moves.emplace_back(MoveType::ConstIndex, column_const.getInt(0)); - } - else - { - if (isString(column.type)) - moves.emplace_back(MoveType::Key, ""); - else - moves.emplace_back(MoveType::Index, 0); - } - } - return moves; - } - - - /// Performs moves of types MoveType::Index and MoveType::ConstIndex. - template - static bool performMoves(const ColumnsWithTypeAndName & arguments, size_t row, - const typename JSONParser::Element & document, const std::vector & moves, - typename JSONParser::Element & element, std::string_view & last_key) - { - typename JSONParser::Element res_element = document; - std::string_view key; - - for (size_t j = 0; j != moves.size(); ++j) - { - switch (moves[j].type) - { - case MoveType::ConstIndex: - { - if (!moveToElementByIndex(res_element, static_cast(moves[j].index), key)) - return false; - break; - } - case MoveType::ConstKey: - { - key = moves[j].key; - if (!moveToElementByKey(res_element, key)) - return false; - break; - } - case MoveType::Index: - { - Int64 index = (*arguments[j + 1].column)[row].get(); - if (!moveToElementByIndex(res_element, static_cast(index), key)) - return false; - break; - } - case MoveType::Key: - { - key = arguments[j + 1].column->getDataAt(row).toView(); - if (!moveToElementByKey(res_element, key)) - return false; - break; - } - } - } - - element = res_element; - last_key = key; - return true; - } - - template - static bool moveToElementByIndex(typename JSONParser::Element & element, int index, std::string_view & out_key) - { - if (element.isArray()) - { - auto array = element.getArray(); - if (index >= 0) - --index; - else - index += array.size(); - - if (static_cast(index) >= array.size()) - return false; - element = array[index]; - out_key = {}; - return true; - } - - if constexpr (HasIndexOperator) - { - if (element.isObject()) - { - auto object = element.getObject(); - if (index >= 0) - --index; - else - index += object.size(); - - if (static_cast(index) >= object.size()) - return false; - std::tie(out_key, element) = object[index]; - return true; - } - } - - return {}; - } - - /// Performs moves of types MoveType::Key and MoveType::ConstKey. - template - static bool moveToElementByKey(typename JSONParser::Element & element, std::string_view key) - { - if (!element.isObject()) - return false; - auto object = element.getObject(); - return object.find(key, element); - } - - static size_t calculateMaxSize(const ColumnString::Offsets & offsets) - { - size_t max_size = 0; - for (const auto i : collections::range(0, offsets.size())) - { - size_t size = offsets[i] - offsets[i - 1]; - max_size = std::max(max_size, size); - } - if (max_size) - --max_size; - return max_size; - } - -}; - -template -class JSONExtractImpl; - -template -class JSONExtractKeysAndValuesImpl; - -/** -* Functions JSONExtract and JSONExtractKeysAndValues force the return type - it is specified in the last argument. -* For example - `SELECT JSONExtract(materialize('{"a": 131231, "b": 1234}'), 'b', 'LowCardinality(FixedString(4))')` -* But by default ClickHouse decides on its own whether the return type will be LowCardinality based on the types of -* input arguments. -* And for these specific functions we cannot rely on this mechanism, so these functions have their own implementation - -* just convert all of the LowCardinality input columns to full ones, execute and wrap the resulting column in LowCardinality -* if needed. -*/ -template typename Impl> -constexpr bool functionForcesTheReturnType() -{ - return std::is_same_v, JSONExtractImpl> || std::is_same_v, JSONExtractKeysAndValuesImpl>; -} - -template typename Impl> -class ExecutableFunctionJSON : public IExecutableFunction -{ - -public: - explicit ExecutableFunctionJSON(const NullPresence & null_presence_, bool allow_simdjson_, const DataTypePtr & json_return_type_) - : null_presence(null_presence_), allow_simdjson(allow_simdjson_), json_return_type(json_return_type_) - { - } - - String getName() const override { return Name::name; } - bool useDefaultImplementationForNulls() const override { return false; } - bool useDefaultImplementationForConstants() const override { return true; } - bool useDefaultImplementationForLowCardinalityColumns() const override - { - return !functionForcesTheReturnType(); - } - - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override - { - if (null_presence.has_null_constant) - return result_type->createColumnConstWithDefaultValue(input_rows_count); - - if constexpr (functionForcesTheReturnType()) - { - ColumnsWithTypeAndName columns_without_low_cardinality = arguments; - - for (auto & column : columns_without_low_cardinality) - { - column.column = recursiveRemoveLowCardinality(column.column); - column.type = recursiveRemoveLowCardinality(column.type); - } - - ColumnsWithTypeAndName temporary_columns = null_presence.has_nullable ? createBlockWithNestedColumns(columns_without_low_cardinality) : columns_without_low_cardinality; - ColumnPtr temporary_result = chooseAndRunJSONParser(temporary_columns, json_return_type, input_rows_count); - - if (null_presence.has_nullable) - temporary_result = wrapInNullable(temporary_result, columns_without_low_cardinality, result_type, input_rows_count); - - if (result_type->lowCardinality()) - temporary_result = recursiveLowCardinalityTypeConversion(temporary_result, json_return_type, result_type); - - return temporary_result; - } - else - { - ColumnsWithTypeAndName temporary_columns = null_presence.has_nullable ? createBlockWithNestedColumns(arguments) : arguments; - ColumnPtr temporary_result = chooseAndRunJSONParser(temporary_columns, json_return_type, input_rows_count); - - if (null_presence.has_nullable) - temporary_result = wrapInNullable(temporary_result, arguments, result_type, input_rows_count); - - if (result_type->lowCardinality()) - temporary_result = recursiveLowCardinalityTypeConversion(temporary_result, json_return_type, result_type); - - return temporary_result; - } - } - -private: - - ColumnPtr - chooseAndRunJSONParser(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const - { -#if USE_SIMDJSON - if (allow_simdjson) - return FunctionJSONHelpers::Executor::run(arguments, result_type, input_rows_count); -#endif - -#if USE_RAPIDJSON - return FunctionJSONHelpers::Executor::run(arguments, result_type, input_rows_count); -#else - return FunctionJSONHelpers::Executor::run(arguments, result_type, input_rows_count); -#endif - } - - NullPresence null_presence; - bool allow_simdjson; - DataTypePtr json_return_type; -}; - - -template typename Impl> -class FunctionBaseFunctionJSON : public IFunctionBase -{ -public: - explicit FunctionBaseFunctionJSON( - const NullPresence & null_presence_, - bool allow_simdjson_, - DataTypes argument_types_, - DataTypePtr return_type_, - DataTypePtr json_return_type_) - : null_presence(null_presence_) - , allow_simdjson(allow_simdjson_) - , argument_types(std::move(argument_types_)) - , return_type(std::move(return_type_)) - , json_return_type(std::move(json_return_type_)) - { - } - - String getName() const override { return Name::name; } - - const DataTypes & getArgumentTypes() const override - { - return argument_types; - } - - const DataTypePtr & getResultType() const override - { - return return_type; - } - - bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } - - ExecutableFunctionPtr prepare(const ColumnsWithTypeAndName &) const override - { - return std::make_unique>(null_presence, allow_simdjson, json_return_type); - } - -private: - NullPresence null_presence; - bool allow_simdjson; - DataTypes argument_types; - DataTypePtr return_type; - DataTypePtr json_return_type; -}; - -/// We use IFunctionOverloadResolver instead of IFunction to handle non-default NULL processing. -/// Both NULL and JSON NULL should generate NULL value. If any argument is NULL, return NULL. -template typename Impl> -class JSONOverloadResolver : public IFunctionOverloadResolver, WithContext -{ -public: - static constexpr auto name = Name::name; - - String getName() const override { return name; } - - static FunctionOverloadResolverPtr create(ContextPtr context_) - { - return std::make_unique(context_); - } - - explicit JSONOverloadResolver(ContextPtr context_) : WithContext(context_) {} - - bool isVariadic() const override { return true; } - size_t getNumberOfArguments() const override { return 0; } - bool useDefaultImplementationForNulls() const override { return false; } - bool useDefaultImplementationForLowCardinalityColumns() const override - { - return !functionForcesTheReturnType(); - } - - FunctionBasePtr build(const ColumnsWithTypeAndName & arguments) const override - { - bool has_nothing_argument = false; - for (const auto & arg : arguments) - has_nothing_argument |= isNothing(arg.type); - - DataTypePtr json_return_type = Impl::getReturnType(Name::name, createBlockWithNestedColumns(arguments)); - NullPresence null_presence = getNullPresense(arguments); - DataTypePtr return_type; - if (has_nothing_argument) - return_type = std::make_shared(); - else if (null_presence.has_null_constant) - return_type = makeNullable(std::make_shared()); - else if (null_presence.has_nullable) - return_type = makeNullable(json_return_type); - else - return_type = json_return_type; - - /// Top-level LowCardinality columns are processed outside JSON parser. - json_return_type = removeLowCardinality(json_return_type); - - DataTypes argument_types; - argument_types.reserve(arguments.size()); - for (const auto & argument : arguments) - argument_types.emplace_back(argument.type); - return std::make_unique>( - null_presence, getContext()->getSettingsRef().allow_simdjson, argument_types, return_type, json_return_type); - } -}; - -struct NameJSONHas { static constexpr auto name{"JSONHas"}; }; -struct NameIsValidJSON { static constexpr auto name{"isValidJSON"}; }; -struct NameJSONLength { static constexpr auto name{"JSONLength"}; }; -struct NameJSONKey { static constexpr auto name{"JSONKey"}; }; -struct NameJSONType { static constexpr auto name{"JSONType"}; }; -struct NameJSONExtractInt { static constexpr auto name{"JSONExtractInt"}; }; -struct NameJSONExtractUInt { static constexpr auto name{"JSONExtractUInt"}; }; -struct NameJSONExtractFloat { static constexpr auto name{"JSONExtractFloat"}; }; -struct NameJSONExtractBool { static constexpr auto name{"JSONExtractBool"}; }; -struct NameJSONExtractString { static constexpr auto name{"JSONExtractString"}; }; -struct NameJSONExtract { static constexpr auto name{"JSONExtract"}; }; -struct NameJSONExtractKeysAndValues { static constexpr auto name{"JSONExtractKeysAndValues"}; }; -struct NameJSONExtractRaw { static constexpr auto name{"JSONExtractRaw"}; }; -struct NameJSONExtractArrayRaw { static constexpr auto name{"JSONExtractArrayRaw"}; }; -struct NameJSONExtractKeysAndValuesRaw { static constexpr auto name{"JSONExtractKeysAndValuesRaw"}; }; -struct NameJSONExtractKeys { static constexpr auto name{"JSONExtractKeys"}; }; - - -template -class JSONHasImpl -{ -public: - using Element = typename JSONParser::Element; - - static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &) { return std::make_shared(); } - - static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; } - - static bool insertResultToColumn(IColumn & dest, const Element &, std::string_view) - { - ColumnVector & col_vec = assert_cast &>(dest); - col_vec.insertValue(1); - return true; - } -}; - - -template -class IsValidJSONImpl -{ -public: - using Element = typename JSONParser::Element; - - static DataTypePtr getReturnType(const char * function_name, const ColumnsWithTypeAndName & arguments) - { - if (arguments.size() != 1) - { - /// IsValidJSON() shouldn't get parameters other than JSON. - throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Function {} needs exactly one argument", - String(function_name)); - } - return std::make_shared(); - } - - static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName &) { return 0; } - - static bool insertResultToColumn(IColumn & dest, const Element &, std::string_view) - { - /// This function is called only if JSON is valid. - /// If JSON isn't valid then `FunctionJSON::Executor::run()` adds default value (=zero) to `dest` without calling this function. - ColumnVector & col_vec = assert_cast &>(dest); - col_vec.insertValue(1); - return true; - } -}; - - -template -class JSONLengthImpl -{ -public: - using Element = typename JSONParser::Element; - - static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &) - { - return std::make_shared(); - } - - static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; } - - static bool insertResultToColumn(IColumn & dest, const Element & element, std::string_view) - { - size_t size; - if (element.isArray()) - size = element.getArray().size(); - else if (element.isObject()) - size = element.getObject().size(); - else - return false; - - ColumnVector & col_vec = assert_cast &>(dest); - col_vec.insertValue(size); - return true; - } -}; - - -template -class JSONKeyImpl -{ -public: - using Element = typename JSONParser::Element; - - static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &) - { - return std::make_shared(); - } - - static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; } - - static bool insertResultToColumn(IColumn & dest, const Element &, std::string_view last_key) - { - if (last_key.empty()) - return false; - ColumnString & col_str = assert_cast(dest); - col_str.insertData(last_key.data(), last_key.size()); - return true; - } -}; - - -template -class JSONTypeImpl -{ -public: - using Element = typename JSONParser::Element; - - static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &) - { - static const std::vector> values = { - {"Array", '['}, - {"Object", '{'}, - {"String", '"'}, - {"Int64", 'i'}, - {"UInt64", 'u'}, - {"Double", 'd'}, - {"Bool", 'b'}, - {"Null", 0}, /// the default value for the column. - }; - return std::make_shared>(values); - } - - static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; } - - static bool insertResultToColumn(IColumn & dest, const Element & element, std::string_view) - { - UInt8 type; - switch (element.type()) - { - case ElementType::INT64: - type = 'i'; - break; - case ElementType::UINT64: - type = 'u'; - break; - case ElementType::DOUBLE: - type = 'd'; - break; - case ElementType::STRING: - type = '"'; - break; - case ElementType::ARRAY: - type = '['; - break; - case ElementType::OBJECT: - type = '{'; - break; - case ElementType::BOOL: - type = 'b'; - break; - case ElementType::NULL_VALUE: - type = 0; - break; - } - - ColumnVector & col_vec = assert_cast &>(dest); - col_vec.insertValue(type); - return true; - } -}; - - -template -class JSONExtractNumericImpl -{ -public: - using Element = typename JSONParser::Element; - - static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &) - { - return std::make_shared>(); - } - - static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; } - - static bool insertResultToColumn(IColumn & dest, const Element & element, std::string_view) - { - NumberType value; - - switch (element.type()) - { - case ElementType::DOUBLE: - if constexpr (std::is_floating_point_v) - { - /// We permit inaccurate conversion of double to float. - /// Example: double 0.1 from JSON is not representable in float. - /// But it will be more convenient for user to perform conversion. - value = static_cast(element.getDouble()); - } - else if (!accurate::convertNumeric(element.getDouble(), value)) - return false; - break; - case ElementType::UINT64: - if (!accurate::convertNumeric(element.getUInt64(), value)) - return false; - break; - case ElementType::INT64: - if (!accurate::convertNumeric(element.getInt64(), value)) - return false; - break; - case ElementType::BOOL: - if constexpr (is_integer && convert_bool_to_integer) - { - value = static_cast(element.getBool()); - break; - } - return false; - case ElementType::STRING: - { - auto rb = ReadBufferFromMemory{element.getString()}; - if constexpr (std::is_floating_point_v) - { - if (!tryReadFloatText(value, rb) || !rb.eof()) - return false; - } - else - { - if (tryReadIntText(value, rb) && rb.eof()) - break; - - /// Try to parse float and convert it to integer. - Float64 tmp_float; - rb.position() = rb.buffer().begin(); - if (!tryReadFloatText(tmp_float, rb) || !rb.eof()) - return false; - - if (!accurate::convertNumeric(tmp_float, value)) - return false; - } - break; - } - default: - return false; - } - - if (dest.getDataType() == TypeIndex::LowCardinality) - { - ColumnLowCardinality & col_low = assert_cast(dest); - col_low.insertData(reinterpret_cast(&value), sizeof(value)); - } - else - { - auto & col_vec = assert_cast &>(dest); - col_vec.insertValue(value); - } - return true; - } -}; - - -template -using JSONExtractInt64Impl = JSONExtractNumericImpl; -template -using JSONExtractUInt64Impl = JSONExtractNumericImpl; -template -using JSONExtractFloat64Impl = JSONExtractNumericImpl; - - -template -class JSONExtractBoolImpl -{ -public: - using Element = typename JSONParser::Element; - - static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &) - { - return std::make_shared(); - } - - static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; } - - static bool insertResultToColumn(IColumn & dest, const Element & element, std::string_view) - { - bool value; - switch (element.type()) - { - case ElementType::BOOL: - value = element.getBool(); - break; - case ElementType::INT64: - value = element.getInt64() != 0; - break; - case ElementType::UINT64: - value = element.getUInt64() != 0; - break; - default: - return false; - } - - auto & col_vec = assert_cast &>(dest); - col_vec.insertValue(static_cast(value)); - return true; - } -}; - -template -class JSONExtractRawImpl; - -template -class JSONExtractStringImpl -{ -public: - using Element = typename JSONParser::Element; - - static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &) - { - return std::make_shared(); - } - - static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; } - - static bool insertResultToColumn(IColumn & dest, const Element & element, std::string_view) - { - if (element.isNull()) - return false; - - if (!element.isString()) - return JSONExtractRawImpl::insertResultToColumn(dest, element, {}); - - auto str = element.getString(); - - if (dest.getDataType() == TypeIndex::LowCardinality) - { - ColumnLowCardinality & col_low = assert_cast(dest); - col_low.insertData(str.data(), str.size()); - } - else - { - ColumnString & col_str = assert_cast(dest); - col_str.insertData(str.data(), str.size()); - } - return true; - } -}; - -/// Nodes of the extract tree. We need the extract tree to extract from JSON complex values containing array, tuples or nullables. -template -struct JSONExtractTree -{ - using Element = typename JSONParser::Element; - - class Node - { - public: - Node() = default; - virtual ~Node() = default; - virtual bool insertResultToColumn(IColumn &, const Element &) = 0; - }; - - template - class NumericNode : public Node - { - public: - bool insertResultToColumn(IColumn & dest, const Element & element) override - { - return JSONExtractNumericImpl::insertResultToColumn(dest, element, {}); - } - }; - - class LowCardinalityFixedStringNode : public Node - { - public: - explicit LowCardinalityFixedStringNode(const size_t fixed_length_) : fixed_length(fixed_length_) { } - bool insertResultToColumn(IColumn & dest, const Element & element) override - { - // If element is an object we delegate the insertion to JSONExtractRawImpl - if (element.isObject()) - return JSONExtractRawImpl::insertResultToLowCardinalityFixedStringColumn(dest, element, fixed_length); - else if (!element.isString()) - return false; - - auto str = element.getString(); - if (str.size() > fixed_length) - return false; - - // For the non low cardinality case of FixedString, the padding is done in the FixedString Column implementation. - // In order to avoid having to pass the data to a FixedString Column and read it back (which would slow down the execution) - // the data is padded here and written directly to the Low Cardinality Column - if (str.size() == fixed_length) - { - assert_cast(dest).insertData(str.data(), str.size()); - } - else - { - String padded_str(str); - padded_str.resize(fixed_length, '\0'); - - assert_cast(dest).insertData(padded_str.data(), padded_str.size()); - } - return true; - } - - private: - const size_t fixed_length; - }; - - class UUIDNode : public Node - { - public: - bool insertResultToColumn(IColumn & dest, const Element & element) override - { - if (!element.isString()) - return false; - - auto uuid = parseFromString(element.getString()); - if (dest.getDataType() == TypeIndex::LowCardinality) - { - ColumnLowCardinality & col_low = assert_cast(dest); - col_low.insertData(reinterpret_cast(&uuid), sizeof(uuid)); - } - else - { - assert_cast(dest).insert(uuid); - } - return true; - } - }; - - template - class DecimalNode : public Node - { - public: - explicit DecimalNode(DataTypePtr data_type_) : data_type(data_type_) {} - bool insertResultToColumn(IColumn & dest, const Element & element) override - { - const auto * type = assert_cast *>(data_type.get()); - - DecimalType value{}; - - switch (element.type()) - { - case ElementType::DOUBLE: - value = convertToDecimal, DataTypeDecimal>( - element.getDouble(), type->getScale()); - break; - case ElementType::UINT64: - value = convertToDecimal, DataTypeDecimal>( - element.getUInt64(), type->getScale()); - break; - case ElementType::INT64: - value = convertToDecimal, DataTypeDecimal>( - element.getInt64(), type->getScale()); - break; - case ElementType::STRING: { - auto rb = ReadBufferFromMemory{element.getString()}; - if (!SerializationDecimal::tryReadText(value, rb, DecimalUtils::max_precision, type->getScale())) - return false; - break; - } - default: - return false; - } - - assert_cast &>(dest).insertValue(value); - return true; - } - - private: - DataTypePtr data_type; - }; - - class StringNode : public Node - { - public: - bool insertResultToColumn(IColumn & dest, const Element & element) override - { - return JSONExtractStringImpl::insertResultToColumn(dest, element, {}); - } - }; - - class FixedStringNode : public Node - { - public: - bool insertResultToColumn(IColumn & dest, const Element & element) override - { - if (element.isNull()) - return false; - - if (!element.isString()) - return JSONExtractRawImpl::insertResultToFixedStringColumn(dest, element, {}); - - auto str = element.getString(); - auto & col_str = assert_cast(dest); - if (str.size() > col_str.getN()) - return false; - col_str.insertData(str.data(), str.size()); - - return true; - } - }; - - template - class EnumNode : public Node - { - public: - explicit EnumNode(const std::vector> & name_value_pairs_) : name_value_pairs(name_value_pairs_) - { - for (const auto & name_value_pair : name_value_pairs) - { - name_to_value_map.emplace(name_value_pair.first, name_value_pair.second); - only_values.emplace(name_value_pair.second); - } - } - - bool insertResultToColumn(IColumn & dest, const Element & element) override - { - auto & col_vec = assert_cast &>(dest); - - if (element.isInt64()) - { - Type value; - if (!accurate::convertNumeric(element.getInt64(), value) || !only_values.contains(value)) - return false; - col_vec.insertValue(value); - return true; - } - - if (element.isUInt64()) - { - Type value; - if (!accurate::convertNumeric(element.getUInt64(), value) || !only_values.contains(value)) - return false; - col_vec.insertValue(value); - return true; - } - - if (element.isString()) - { - auto value = name_to_value_map.find(element.getString()); - if (value == name_to_value_map.end()) - return false; - col_vec.insertValue(value->second); - return true; - } - - return false; - } - - private: - std::vector> name_value_pairs; - std::unordered_map name_to_value_map; - std::unordered_set only_values; - }; - - class NullableNode : public Node - { - public: - explicit NullableNode(std::unique_ptr nested_) : nested(std::move(nested_)) {} - - bool insertResultToColumn(IColumn & dest, const Element & element) override - { - if (dest.getDataType() == TypeIndex::LowCardinality) - { - /// We do not need to handle nullability in that case - /// because nested node handles LowCardinality columns and will call proper overload of `insertData` - return nested->insertResultToColumn(dest, element); - } - - ColumnNullable & col_null = assert_cast(dest); - if (!nested->insertResultToColumn(col_null.getNestedColumn(), element)) - return false; - col_null.getNullMapColumn().insertValue(0); - return true; - } - - private: - std::unique_ptr nested; - }; - - class ArrayNode : public Node - { - public: - explicit ArrayNode(std::unique_ptr nested_) : nested(std::move(nested_)) {} - - bool insertResultToColumn(IColumn & dest, const Element & element) override - { - if (!element.isArray()) - return false; - - auto array = element.getArray(); - - ColumnArray & col_arr = assert_cast(dest); - auto & data = col_arr.getData(); - size_t old_size = data.size(); - bool were_valid_elements = false; - - for (auto value : array) - { - if (nested->insertResultToColumn(data, value)) - were_valid_elements = true; - else - data.insertDefault(); - } - - if (!were_valid_elements) - { - data.popBack(data.size() - old_size); - return false; - } - - col_arr.getOffsets().push_back(data.size()); - return true; - } - - private: - std::unique_ptr nested; - }; - - class TupleNode : public Node - { - public: - TupleNode(std::vector> nested_, const std::vector & explicit_names_) : nested(std::move(nested_)), explicit_names(explicit_names_) - { - for (size_t i = 0; i != explicit_names.size(); ++i) - name_to_index_map.emplace(explicit_names[i], i); - } - - bool insertResultToColumn(IColumn & dest, const Element & element) override - { - ColumnTuple & tuple = assert_cast(dest); - size_t old_size = dest.size(); - bool were_valid_elements = false; - - auto set_size = [&](size_t size) - { - for (size_t i = 0; i != tuple.tupleSize(); ++i) - { - auto & col = tuple.getColumn(i); - if (col.size() != size) - { - if (col.size() > size) - col.popBack(col.size() - size); - else - while (col.size() < size) - col.insertDefault(); - } - } - }; - - if (element.isArray()) - { - auto array = element.getArray(); - auto it = array.begin(); - - for (size_t index = 0; (index != nested.size()) && (it != array.end()); ++index) - { - if (nested[index]->insertResultToColumn(tuple.getColumn(index), *it++)) - were_valid_elements = true; - else - tuple.getColumn(index).insertDefault(); - } - - set_size(old_size + static_cast(were_valid_elements)); - return were_valid_elements; - } - - if (element.isObject()) - { - auto object = element.getObject(); - if (name_to_index_map.empty()) - { - auto it = object.begin(); - for (size_t index = 0; (index != nested.size()) && (it != object.end()); ++index) - { - if (nested[index]->insertResultToColumn(tuple.getColumn(index), (*it++).second)) - were_valid_elements = true; - else - tuple.getColumn(index).insertDefault(); - } - } - else - { - for (const auto & [key, value] : object) - { - auto index = name_to_index_map.find(key); - if (index != name_to_index_map.end()) - { - if (nested[index->second]->insertResultToColumn(tuple.getColumn(index->second), value)) - were_valid_elements = true; - } - } - } - - set_size(old_size + static_cast(were_valid_elements)); - return were_valid_elements; - } - - return false; - } - - private: - std::vector> nested; - std::vector explicit_names; - std::unordered_map name_to_index_map; - }; - - class MapNode : public Node - { - public: - MapNode(std::unique_ptr key_, std::unique_ptr value_) : key(std::move(key_)), value(std::move(value_)) { } - - bool insertResultToColumn(IColumn & dest, const Element & element) override - { - if (!element.isObject()) - return false; - - ColumnMap & map_col = assert_cast(dest); - auto & offsets = map_col.getNestedColumn().getOffsets(); - auto & tuple_col = map_col.getNestedData(); - auto & key_col = tuple_col.getColumn(0); - auto & value_col = tuple_col.getColumn(1); - size_t old_size = tuple_col.size(); - - auto object = element.getObject(); - auto it = object.begin(); - for (; it != object.end(); ++it) - { - auto pair = *it; - - /// Insert key - key_col.insertData(pair.first.data(), pair.first.size()); - - /// Insert value - if (!value->insertResultToColumn(value_col, pair.second)) - value_col.insertDefault(); - } - - offsets.push_back(old_size + object.size()); - return true; - } - - private: - std::unique_ptr key; - std::unique_ptr value; - }; - - class VariantNode : public Node - { - public: - VariantNode(std::vector> variant_nodes_, std::vector order_) : variant_nodes(std::move(variant_nodes_)), order(std::move(order_)) { } - - bool insertResultToColumn(IColumn & dest, const Element & element) override - { - auto & column_variant = assert_cast(dest); - for (size_t i : order) - { - auto & variant = column_variant.getVariantByGlobalDiscriminator(i); - if (variant_nodes[i]->insertResultToColumn(variant, element)) - { - column_variant.getLocalDiscriminators().push_back(column_variant.localDiscriminatorByGlobal(i)); - column_variant.getOffsets().push_back(variant.size() - 1); - return true; - } - } - - return false; - } - - private: - std::vector> variant_nodes; - /// Order in which we should try variants nodes. - /// For example, String should be always the last one. - std::vector order; - }; - - static std::unique_ptr build(const char * function_name, const DataTypePtr & type) - { - switch (type->getTypeId()) - { - case TypeIndex::UInt8: return std::make_unique>(); - case TypeIndex::UInt16: return std::make_unique>(); - case TypeIndex::UInt32: return std::make_unique>(); - case TypeIndex::UInt64: return std::make_unique>(); - case TypeIndex::UInt128: return std::make_unique>(); - case TypeIndex::UInt256: return std::make_unique>(); - case TypeIndex::Int8: return std::make_unique>(); - case TypeIndex::Int16: return std::make_unique>(); - case TypeIndex::Int32: return std::make_unique>(); - case TypeIndex::Int64: return std::make_unique>(); - case TypeIndex::Int128: return std::make_unique>(); - case TypeIndex::Int256: return std::make_unique>(); - case TypeIndex::Float32: return std::make_unique>(); - case TypeIndex::Float64: return std::make_unique>(); - case TypeIndex::String: return std::make_unique(); - case TypeIndex::FixedString: return std::make_unique(); - case TypeIndex::UUID: return std::make_unique(); - case TypeIndex::LowCardinality: - { - // The low cardinality case is treated in two different ways: - // For FixedString type, an especial class is implemented for inserting the data in the destination column, - // as the string length must be passed in order to check and pad the incoming data. - // For the rest of low cardinality types, the insertion is done in their corresponding class, adapting the data - // as needed for the insertData function of the ColumnLowCardinality. - auto dictionary_type = typeid_cast(type.get())->getDictionaryType(); - if ((*dictionary_type).getTypeId() == TypeIndex::FixedString) - { - auto fixed_length = typeid_cast(dictionary_type.get())->getN(); - return std::make_unique(fixed_length); - } - return build(function_name, dictionary_type); - } - case TypeIndex::Decimal256: return std::make_unique>(type); - case TypeIndex::Decimal128: return std::make_unique>(type); - case TypeIndex::Decimal64: return std::make_unique>(type); - case TypeIndex::Decimal32: return std::make_unique>(type); - case TypeIndex::Enum8: - return std::make_unique>(static_cast(*type).getValues()); - case TypeIndex::Enum16: - return std::make_unique>(static_cast(*type).getValues()); - case TypeIndex::Nullable: - { - return std::make_unique(build(function_name, static_cast(*type).getNestedType())); - } - case TypeIndex::Array: - { - return std::make_unique(build(function_name, static_cast(*type).getNestedType())); - } - case TypeIndex::Tuple: - { - const auto & tuple = static_cast(*type); - const auto & tuple_elements = tuple.getElements(); - std::vector> elements; - elements.reserve(tuple_elements.size()); - for (const auto & tuple_element : tuple_elements) - elements.emplace_back(build(function_name, tuple_element)); - return std::make_unique(std::move(elements), tuple.haveExplicitNames() ? tuple.getElementNames() : Strings{}); - } - case TypeIndex::Map: - { - const auto & map_type = static_cast(*type); - const auto & key_type = map_type.getKeyType(); - if (!isString(removeLowCardinality(key_type))) - throw Exception( - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, - "Function {} doesn't support the return type schema: {} with key type not String", - String(function_name), - type->getName()); - - const auto & value_type = map_type.getValueType(); - return std::make_unique(build(function_name, key_type), build(function_name, value_type)); - } - case TypeIndex::Variant: - { - const auto & variant_type = static_cast(*type); - const auto & variants = variant_type.getVariants(); - std::vector> variant_nodes; - variant_nodes.reserve(variants.size()); - for (const auto & variant : variants) - variant_nodes.push_back(build(function_name, variant)); - return std::make_unique(std::move(variant_nodes), SerializationVariant::getVariantsDeserializeTextOrder(variants)); - } - default: - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, - "Function {} doesn't support the return type schema: {}", - String(function_name), type->getName()); - } - } -}; - - -template -class JSONExtractImpl -{ -public: - using Element = typename JSONParser::Element; - - static DataTypePtr getReturnType(const char * function_name, const ColumnsWithTypeAndName & arguments) - { - if (arguments.size() < 2) - throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Function {} requires at least two arguments", String(function_name)); - - const auto & col = arguments.back(); - const auto * col_type_const = typeid_cast(col.column.get()); - if (!col_type_const || !isString(col.type)) - throw Exception(ErrorCodes::ILLEGAL_COLUMN, - "The last argument of function {} should " - "be a constant string specifying the return data type, illegal value: {}", - String(function_name), col.name); - - return DataTypeFactory::instance().get(col_type_const->getValue()); - } - - static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 2; } - - void prepare(const char * function_name, const ColumnsWithTypeAndName &, const DataTypePtr & result_type) - { - extract_tree = JSONExtractTree::build(function_name, result_type); - } - - bool insertResultToColumn(IColumn & dest, const Element & element, std::string_view) - { - return extract_tree->insertResultToColumn(dest, element); - } - -protected: - std::unique_ptr::Node> extract_tree; -}; - - -template -class JSONExtractKeysAndValuesImpl -{ -public: - using Element = typename JSONParser::Element; - - static DataTypePtr getReturnType(const char * function_name, const ColumnsWithTypeAndName & arguments) - { - if (arguments.size() < 2) - throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Function {} requires at least two arguments", String(function_name)); - - const auto & col = arguments.back(); - const auto * col_type_const = typeid_cast(col.column.get()); - if (!col_type_const || !isString(col.type)) - throw Exception(ErrorCodes::ILLEGAL_COLUMN, - "The last argument of function {} should " - "be a constant string specifying the values' data type, illegal value: {}", - String(function_name), col.name); - - DataTypePtr key_type = std::make_unique(); - DataTypePtr value_type = DataTypeFactory::instance().get(col_type_const->getValue()); - DataTypePtr tuple_type = std::make_unique(DataTypes{key_type, value_type}); - return std::make_unique(tuple_type); - } - - static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 2; } - - void prepare(const char * function_name, const ColumnsWithTypeAndName &, const DataTypePtr & result_type) - { - const auto tuple_type = typeid_cast(result_type.get())->getNestedType(); - const auto value_type = typeid_cast(tuple_type.get())->getElements()[1]; - extract_tree = JSONExtractTree::build(function_name, value_type); - } - - bool insertResultToColumn(IColumn & dest, const Element & element, std::string_view) - { - if (!element.isObject()) - return false; - - auto object = element.getObject(); - - auto & col_arr = assert_cast(dest); - auto & col_tuple = assert_cast(col_arr.getData()); - size_t old_size = col_tuple.size(); - auto & col_key = assert_cast(col_tuple.getColumn(0)); - auto & col_value = col_tuple.getColumn(1); - - for (const auto & [key, value] : object) - { - if (extract_tree->insertResultToColumn(col_value, value)) - col_key.insertData(key.data(), key.size()); - } - - if (col_tuple.size() == old_size) - return false; - - col_arr.getOffsets().push_back(col_tuple.size()); - return true; - } - -private: - std::unique_ptr::Node> extract_tree; -}; - - -template -class JSONExtractRawImpl -{ -public: - using Element = typename JSONParser::Element; - - static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &) - { - return std::make_shared(); - } - - static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; } - - static bool insertResultToColumn(IColumn & dest, const Element & element, std::string_view) - { - if (dest.getDataType() == TypeIndex::LowCardinality) - { - ColumnString::Chars chars; - WriteBufferFromVector buf(chars, AppendModeTag()); - traverse(element, buf); - buf.finalize(); - assert_cast(dest).insertData(reinterpret_cast(chars.data()), chars.size()); - } - else - { - ColumnString & col_str = assert_cast(dest); - auto & chars = col_str.getChars(); - WriteBufferFromVector buf(chars, AppendModeTag()); - traverse(element, buf); - buf.finalize(); - chars.push_back(0); - col_str.getOffsets().push_back(chars.size()); - } - return true; - } - - // We use insertResultToFixedStringColumn in case we are inserting raw data in a FixedString column - static bool insertResultToFixedStringColumn(IColumn & dest, const Element & element, std::string_view) - { - ColumnFixedString::Chars chars; - WriteBufferFromVector buf(chars, AppendModeTag()); - traverse(element, buf); - buf.finalize(); - - auto & col_str = assert_cast(dest); - - if (chars.size() > col_str.getN()) - return false; - - chars.resize_fill(col_str.getN()); - col_str.insertData(reinterpret_cast(chars.data()), chars.size()); - - - return true; - } - - // We use insertResultToLowCardinalityFixedStringColumn in case we are inserting raw data in a Low Cardinality FixedString column - static bool insertResultToLowCardinalityFixedStringColumn(IColumn & dest, const Element & element, size_t fixed_length) - { - if (element.getObject().size() > fixed_length) - return false; - - ColumnFixedString::Chars chars; - WriteBufferFromVector buf(chars, AppendModeTag()); - traverse(element, buf); - buf.finalize(); - - if (chars.size() > fixed_length) - return false; - chars.resize_fill(fixed_length); - assert_cast(dest).insertData(reinterpret_cast(chars.data()), chars.size()); - - return true; - } - -private: - static void traverse(const Element & element, WriteBuffer & buf) - { - if (element.isInt64()) - { - writeIntText(element.getInt64(), buf); - return; - } - if (element.isUInt64()) - { - writeIntText(element.getUInt64(), buf); - return; - } - if (element.isDouble()) - { - writeFloatText(element.getDouble(), buf); - return; - } - if (element.isBool()) - { - if (element.getBool()) - writeCString("true", buf); - else - writeCString("false", buf); - return; - } - if (element.isString()) - { - writeJSONString(element.getString(), buf, formatSettings()); - return; - } - if (element.isArray()) - { - writeChar('[', buf); - bool need_comma = false; - for (auto value : element.getArray()) - { - if (std::exchange(need_comma, true)) - writeChar(',', buf); - traverse(value, buf); - } - writeChar(']', buf); - return; - } - if (element.isObject()) - { - writeChar('{', buf); - bool need_comma = false; - for (auto [key, value] : element.getObject()) - { - if (std::exchange(need_comma, true)) - writeChar(',', buf); - writeJSONString(key, buf, formatSettings()); - writeChar(':', buf); - traverse(value, buf); - } - writeChar('}', buf); - return; - } - if (element.isNull()) - { - writeCString("null", buf); - return; - } - } - - static const FormatSettings & formatSettings() - { - static const FormatSettings the_instance = [] - { - FormatSettings settings; - settings.json.escape_forward_slashes = false; - return settings; - }(); - return the_instance; - } -}; - - -template -class JSONExtractArrayRawImpl -{ -public: - using Element = typename JSONParser::Element; - - static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &) - { - return std::make_shared(std::make_shared()); - } - - static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; } - - static bool insertResultToColumn(IColumn & dest, const Element & element, std::string_view) - { - if (!element.isArray()) - return false; - - auto array = element.getArray(); - ColumnArray & col_res = assert_cast(dest); - - for (auto value : array) - JSONExtractRawImpl::insertResultToColumn(col_res.getData(), value, {}); - - col_res.getOffsets().push_back(col_res.getOffsets().back() + array.size()); - return true; - } -}; - - -template -class JSONExtractKeysAndValuesRawImpl -{ -public: - using Element = typename JSONParser::Element; - - static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &) - { - DataTypePtr string_type = std::make_unique(); - DataTypePtr tuple_type = std::make_unique(DataTypes{string_type, string_type}); - return std::make_unique(tuple_type); - } - - static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; } - - bool insertResultToColumn(IColumn & dest, const Element & element, std::string_view) - { - if (!element.isObject()) - return false; - - auto object = element.getObject(); - - auto & col_arr = assert_cast(dest); - auto & col_tuple = assert_cast(col_arr.getData()); - auto & col_key = assert_cast(col_tuple.getColumn(0)); - auto & col_value = assert_cast(col_tuple.getColumn(1)); - - for (const auto & [key, value] : object) - { - col_key.insertData(key.data(), key.size()); - JSONExtractRawImpl::insertResultToColumn(col_value, value, {}); - } - - col_arr.getOffsets().push_back(col_arr.getOffsets().back() + object.size()); - return true; - } -}; - -template -class JSONExtractKeysImpl -{ -public: - using Element = typename JSONParser::Element; - - static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &) - { - return std::make_unique(std::make_shared()); - } - - static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; } - - bool insertResultToColumn(IColumn & dest, const Element & element, std::string_view) - { - if (!element.isObject()) - return false; - - auto object = element.getObject(); - - ColumnArray & col_res = assert_cast(dest); - auto & col_key = assert_cast(col_res.getData()); - - for (const auto & [key, value] : object) - { - col_key.insertData(key.data(), key.size()); - } - - col_res.getOffsets().push_back(col_res.getOffsets().back() + object.size()); - return true; - } -}; - -} +//#pragma once +// +//#include +//#include +// +//#include +// +//#include +// +//#include +//#include +//#include +// +//#include +////#include +// +//#include +//#include +//#include +//#include +//#include +//#include +//#include +//#include +//#include +//#include +// +//#include +//#include +//#include +//#include +//#include +//#include +//#include +//#include +//#include +//#include +//#include +//#include +//#include +//#include +//#include +//#include +// +//#include +//#include +//#include +//#include +//#include +// +//#include +//#include +// +// +//#include "config.h" +// +// +//namespace DB +//{ +// +//namespace ErrorCodes +//{ +// extern const int ILLEGAL_TYPE_OF_ARGUMENT; +// extern const int ILLEGAL_COLUMN; +// extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; +//} +// +//template +//concept HasIndexOperator = requires (T t) +//{ +// t[0]; +//}; +// +///// Functions to parse JSONs and extract values from it. +///// The first argument of all these functions gets a JSON, +///// after that there are any number of arguments specifying path to a desired part from the JSON's root. +///// For example, +///// select JSONExtractInt('{"a": "hello", "b": [-100, 200.0, 300]}', 'b', 1) = -100 +// +//class FunctionJSONHelpers +//{ +//public: +// template typename Impl, class JSONParser> +// class Executor +// { +// public: +// static ColumnPtr run(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) +// { +// MutableColumnPtr to{result_type->createColumn()}; +// to->reserve(input_rows_count); +// +// if (arguments.empty()) +// throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Function {} requires at least one argument", String(Name::name)); +// +// const auto & first_column = arguments[0]; +// if (!isString(first_column.type)) +// throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, +// "The first argument of function {} should be a string containing JSON, illegal type: " +// "{}", String(Name::name), first_column.type->getName()); +// +// const ColumnPtr & arg_json = first_column.column; +// const auto * col_json_const = typeid_cast(arg_json.get()); +// const auto * col_json_string +// = typeid_cast(col_json_const ? col_json_const->getDataColumnPtr().get() : arg_json.get()); +// +// if (!col_json_string) +// throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {}", arg_json->getName()); +// +// const ColumnString::Chars & chars = col_json_string->getChars(); +// const ColumnString::Offsets & offsets = col_json_string->getOffsets(); +// +// size_t num_index_arguments = Impl::getNumberOfIndexArguments(arguments); +// std::vector moves = prepareMoves(Name::name, arguments, 1, num_index_arguments); +// +// /// Preallocate memory in parser if necessary. +// JSONParser parser; +// if constexpr (has_member_function_reserve::value) +// { +// size_t max_size = calculateMaxSize(offsets); +// if (max_size) +// parser.reserve(max_size); +// } +// +// Impl impl; +// +// /// prepare() does Impl-specific preparation before handling each row. +// if constexpr (has_member_function_prepare::*)(const char *, const ColumnsWithTypeAndName &, const DataTypePtr &)>::value) +// impl.prepare(Name::name, arguments, result_type); +// +// using Element = typename JSONParser::Element; +// +// Element document; +// bool document_ok = false; +// if (col_json_const) +// { +// std::string_view json{reinterpret_cast(chars.data()), offsets[0] - 1}; +// document_ok = parser.parse(json, document); +// } +// +// for (const auto i : collections::range(0, input_rows_count)) +// { +// if (!col_json_const) +// { +// std::string_view json{reinterpret_cast(&chars[offsets[i - 1]]), offsets[i] - offsets[i - 1] - 1}; +// document_ok = parser.parse(json, document); +// } +// +// bool added_to_column = false; +// if (document_ok) +// { +// /// Perform moves. +// Element element; +// std::string_view last_key; +// bool moves_ok = performMoves(arguments, i, document, moves, element, last_key); +// +// if (moves_ok) +// added_to_column = impl.insertResultToColumn(*to, element, last_key); +// } +// +// /// We add default value (=null or zero) if something goes wrong, we don't throw exceptions in these JSON functions. +// if (!added_to_column) +// to->insertDefault(); +// } +// return to; +// } +// }; +// +//private: +// BOOST_TTI_HAS_MEMBER_FUNCTION(reserve) +// BOOST_TTI_HAS_MEMBER_FUNCTION(prepare) +// +// /// Represents a move of a JSON iterator described by a single argument passed to a JSON function. +// /// For example, the call JSONExtractInt('{"a": "hello", "b": [-100, 200.0, 300]}', 'b', 1) +// /// contains two moves: {MoveType::ConstKey, "b"} and {MoveType::ConstIndex, 1}. +// /// Keys and indices can be nonconst, in this case they are calculated for each row. +// enum class MoveType : uint8_t +// { +// Key, +// Index, +// ConstKey, +// ConstIndex, +// }; +// +// struct Move +// { +// explicit Move(MoveType type_, size_t index_ = 0) : type(type_), index(index_) {} +// Move(MoveType type_, const String & key_) : type(type_), key(key_) {} +// MoveType type; +// size_t index = 0; +// String key; +// }; +// +// static std::vector prepareMoves( +// const char * function_name, +// const ColumnsWithTypeAndName & columns, +// size_t first_index_argument, +// size_t num_index_arguments) +// { +// std::vector moves; +// moves.reserve(num_index_arguments); +// for (const auto i : collections::range(first_index_argument, first_index_argument + num_index_arguments)) +// { +// const auto & column = columns[i]; +// if (!isString(column.type) && !isNativeInteger(column.type)) +// throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, +// "The argument {} of function {} should be a string specifying key " +// "or an integer specifying index, illegal type: {}", +// std::to_string(i + 1), String(function_name), column.type->getName()); +// +// if (column.column && isColumnConst(*column.column)) +// { +// const auto & column_const = assert_cast(*column.column); +// if (isString(column.type)) +// moves.emplace_back(MoveType::ConstKey, column_const.getValue()); +// else +// moves.emplace_back(MoveType::ConstIndex, column_const.getInt(0)); +// } +// else +// { +// if (isString(column.type)) +// moves.emplace_back(MoveType::Key, ""); +// else +// moves.emplace_back(MoveType::Index, 0); +// } +// } +// return moves; +// } +// +// +// /// Performs moves of types MoveType::Index and MoveType::ConstIndex. +// template +// static bool performMoves(const ColumnsWithTypeAndName & arguments, size_t row, +// const typename JSONParser::Element & document, const std::vector & moves, +// typename JSONParser::Element & element, std::string_view & last_key) +// { +// typename JSONParser::Element res_element = document; +// std::string_view key; +// +// for (size_t j = 0; j != moves.size(); ++j) +// { +// switch (moves[j].type) +// { +// case MoveType::ConstIndex: +// { +// if (!moveToElementByIndex(res_element, static_cast(moves[j].index), key)) +// return false; +// break; +// } +// case MoveType::ConstKey: +// { +// key = moves[j].key; +// if (!moveToElementByKey(res_element, key)) +// return false; +// break; +// } +// case MoveType::Index: +// { +// Int64 index = (*arguments[j + 1].column)[row].get(); +// if (!moveToElementByIndex(res_element, static_cast(index), key)) +// return false; +// break; +// } +// case MoveType::Key: +// { +// key = arguments[j + 1].column->getDataAt(row).toView(); +// if (!moveToElementByKey(res_element, key)) +// return false; +// break; +// } +// } +// } +// +// element = res_element; +// last_key = key; +// return true; +// } +// +// template +// static bool moveToElementByIndex(typename JSONParser::Element & element, int index, std::string_view & out_key) +// { +// if (element.isArray()) +// { +// auto array = element.getArray(); +// if (index >= 0) +// --index; +// else +// index += array.size(); +// +// if (static_cast(index) >= array.size()) +// return false; +// element = array[index]; +// out_key = {}; +// return true; +// } +// +// if constexpr (HasIndexOperator) +// { +// if (element.isObject()) +// { +// auto object = element.getObject(); +// if (index >= 0) +// --index; +// else +// index += object.size(); +// +// if (static_cast(index) >= object.size()) +// return false; +// std::tie(out_key, element) = object[index]; +// return true; +// } +// } +// +// return {}; +// } +// +// /// Performs moves of types MoveType::Key and MoveType::ConstKey. +// template +// static bool moveToElementByKey(typename JSONParser::Element & element, std::string_view key) +// { +// if (!element.isObject()) +// return false; +// auto object = element.getObject(); +// return object.find(key, element); +// } +// +// static size_t calculateMaxSize(const ColumnString::Offsets & offsets) +// { +// size_t max_size = 0; +// for (const auto i : collections::range(0, offsets.size())) +// { +// size_t size = offsets[i] - offsets[i - 1]; +// max_size = std::max(max_size, size); +// } +// if (max_size) +// --max_size; +// return max_size; +// } +// +//}; +// +//template +//class JSONExtractImpl; +// +//template +//class JSONExtractKeysAndValuesImpl; +// +///** +//* Functions JSONExtract and JSONExtractKeysAndValues force the return type - it is specified in the last argument. +//* For example - `SELECT JSONExtract(materialize('{"a": 131231, "b": 1234}'), 'b', 'LowCardinality(FixedString(4))')` +//* But by default ClickHouse decides on its own whether the return type will be LowCardinality based on the types of +//* input arguments. +//* And for these specific functions we cannot rely on this mechanism, so these functions have their own implementation - +//* just convert all of the LowCardinality input columns to full ones, execute and wrap the resulting column in LowCardinality +//* if needed. +//*/ +//template typename Impl> +//constexpr bool functionForcesTheReturnType() +//{ +// return std::is_same_v, JSONExtractImpl> || std::is_same_v, JSONExtractKeysAndValuesImpl>; +//} +// +//template typename Impl> +//class ExecutableFunctionJSON : public IExecutableFunction +//{ +// +//public: +// explicit ExecutableFunctionJSON(const NullPresence & null_presence_, bool allow_simdjson_, const DataTypePtr & json_return_type_) +// : null_presence(null_presence_), allow_simdjson(allow_simdjson_), json_return_type(json_return_type_) +// { +// } +// +// String getName() const override { return Name::name; } +// bool useDefaultImplementationForNulls() const override { return false; } +// bool useDefaultImplementationForConstants() const override { return true; } +// bool useDefaultImplementationForLowCardinalityColumns() const override +// { +// return !functionForcesTheReturnType(); +// } +// +// ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override +// { +// if (null_presence.has_null_constant) +// return result_type->createColumnConstWithDefaultValue(input_rows_count); +// +// if constexpr (functionForcesTheReturnType()) +// { +// ColumnsWithTypeAndName columns_without_low_cardinality = arguments; +// +// for (auto & column : columns_without_low_cardinality) +// { +// column.column = recursiveRemoveLowCardinality(column.column); +// column.type = recursiveRemoveLowCardinality(column.type); +// } +// +// ColumnsWithTypeAndName temporary_columns = null_presence.has_nullable ? createBlockWithNestedColumns(columns_without_low_cardinality) : columns_without_low_cardinality; +// ColumnPtr temporary_result = chooseAndRunJSONParser(temporary_columns, json_return_type, input_rows_count); +// +// if (null_presence.has_nullable) +// temporary_result = wrapInNullable(temporary_result, columns_without_low_cardinality, result_type, input_rows_count); +// +// if (result_type->lowCardinality()) +// temporary_result = recursiveLowCardinalityTypeConversion(temporary_result, json_return_type, result_type); +// +// return temporary_result; +// } +// else +// { +// ColumnsWithTypeAndName temporary_columns = null_presence.has_nullable ? createBlockWithNestedColumns(arguments) : arguments; +// ColumnPtr temporary_result = chooseAndRunJSONParser(temporary_columns, json_return_type, input_rows_count); +// +// if (null_presence.has_nullable) +// temporary_result = wrapInNullable(temporary_result, arguments, result_type, input_rows_count); +// +// if (result_type->lowCardinality()) +// temporary_result = recursiveLowCardinalityTypeConversion(temporary_result, json_return_type, result_type); +// +// return temporary_result; +// } +// } +// +//private: +// +// ColumnPtr +// chooseAndRunJSONParser(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const +// { +//#if USE_SIMDJSON +// if (allow_simdjson) +// return FunctionJSONHelpers::Executor::run(arguments, result_type, input_rows_count); +//#endif +// +//#if USE_RAPIDJSON +// return FunctionJSONHelpers::Executor::run(arguments, result_type, input_rows_count); +//#else +// return FunctionJSONHelpers::Executor::run(arguments, result_type, input_rows_count); +//#endif +// } +// +// NullPresence null_presence; +// bool allow_simdjson; +// DataTypePtr json_return_type; +//}; +// +// +//template typename Impl> +//class FunctionBaseFunctionJSON : public IFunctionBase +//{ +//public: +// explicit FunctionBaseFunctionJSON( +// const NullPresence & null_presence_, +// bool allow_simdjson_, +// DataTypes argument_types_, +// DataTypePtr return_type_, +// DataTypePtr json_return_type_) +// : null_presence(null_presence_) +// , allow_simdjson(allow_simdjson_) +// , argument_types(std::move(argument_types_)) +// , return_type(std::move(return_type_)) +// , json_return_type(std::move(json_return_type_)) +// { +// } +// +// String getName() const override { return Name::name; } +// +// const DataTypes & getArgumentTypes() const override +// { +// return argument_types; +// } +// +// const DataTypePtr & getResultType() const override +// { +// return return_type; +// } +// +// bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } +// +// ExecutableFunctionPtr prepare(const ColumnsWithTypeAndName &) const override +// { +// return std::make_unique>(null_presence, allow_simdjson, json_return_type); +// } +// +//private: +// NullPresence null_presence; +// bool allow_simdjson; +// DataTypes argument_types; +// DataTypePtr return_type; +// DataTypePtr json_return_type; +//}; +// +///// We use IFunctionOverloadResolver instead of IFunction to handle non-default NULL processing. +///// Both NULL and JSON NULL should generate NULL value. If any argument is NULL, return NULL. +//template typename Impl> +//class JSONOverloadResolver : public IFunctionOverloadResolver, WithContext +//{ +//public: +// static constexpr auto name = Name::name; +// +// String getName() const override { return name; } +// +// static FunctionOverloadResolverPtr create(ContextPtr context_) +// { +// return std::make_unique(context_); +// } +// +// explicit JSONOverloadResolver(ContextPtr context_) : WithContext(context_) {} +// +// bool isVariadic() const override { return true; } +// size_t getNumberOfArguments() const override { return 0; } +// bool useDefaultImplementationForNulls() const override { return false; } +// bool useDefaultImplementationForLowCardinalityColumns() const override +// { +// return !functionForcesTheReturnType(); +// } +// +// FunctionBasePtr build(const ColumnsWithTypeAndName & arguments) const override +// { +// bool has_nothing_argument = false; +// for (const auto & arg : arguments) +// has_nothing_argument |= isNothing(arg.type); +// +// DataTypePtr json_return_type = Impl::getReturnType(Name::name, createBlockWithNestedColumns(arguments)); +// NullPresence null_presence = getNullPresense(arguments); +// DataTypePtr return_type; +// if (has_nothing_argument) +// return_type = std::make_shared(); +// else if (null_presence.has_null_constant) +// return_type = makeNullable(std::make_shared()); +// else if (null_presence.has_nullable) +// return_type = makeNullable(json_return_type); +// else +// return_type = json_return_type; +// +// /// Top-level LowCardinality columns are processed outside JSON parser. +// json_return_type = removeLowCardinality(json_return_type); +// +// DataTypes argument_types; +// argument_types.reserve(arguments.size()); +// for (const auto & argument : arguments) +// argument_types.emplace_back(argument.type); +// return std::make_unique>( +// null_presence, getContext()->getSettingsRef().allow_simdjson, argument_types, return_type, json_return_type); +// } +//}; +// +//struct NameJSONHas { static constexpr auto name{"JSONHas"}; }; +//struct NameIsValidJSON { static constexpr auto name{"isValidJSON"}; }; +//struct NameJSONLength { static constexpr auto name{"JSONLength"}; }; +//struct NameJSONKey { static constexpr auto name{"JSONKey"}; }; +//struct NameJSONType { static constexpr auto name{"JSONType"}; }; +//struct NameJSONExtractInt { static constexpr auto name{"JSONExtractInt"}; }; +//struct NameJSONExtractUInt { static constexpr auto name{"JSONExtractUInt"}; }; +//struct NameJSONExtractFloat { static constexpr auto name{"JSONExtractFloat"}; }; +//struct NameJSONExtractBool { static constexpr auto name{"JSONExtractBool"}; }; +//struct NameJSONExtractString { static constexpr auto name{"JSONExtractString"}; }; +//struct NameJSONExtract { static constexpr auto name{"JSONExtract"}; }; +//struct NameJSONExtractKeysAndValues { static constexpr auto name{"JSONExtractKeysAndValues"}; }; +//struct NameJSONExtractRaw { static constexpr auto name{"JSONExtractRaw"}; }; +//struct NameJSONExtractArrayRaw { static constexpr auto name{"JSONExtractArrayRaw"}; }; +//struct NameJSONExtractKeysAndValuesRaw { static constexpr auto name{"JSONExtractKeysAndValuesRaw"}; }; +//struct NameJSONExtractKeys { static constexpr auto name{"JSONExtractKeys"}; }; +// +// +//template +//class JSONHasImpl +//{ +//public: +// using Element = typename JSONParser::Element; +// +// static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &) { return std::make_shared(); } +// +// static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; } +// +// static bool insertResultToColumn(IColumn & dest, const Element &, std::string_view) +// { +// ColumnVector & col_vec = assert_cast &>(dest); +// col_vec.insertValue(1); +// return true; +// } +//}; +// +// +//template +//class IsValidJSONImpl +//{ +//public: +// using Element = typename JSONParser::Element; +// +// static DataTypePtr getReturnType(const char * function_name, const ColumnsWithTypeAndName & arguments) +// { +// if (arguments.size() != 1) +// { +// /// IsValidJSON() shouldn't get parameters other than JSON. +// throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Function {} needs exactly one argument", +// String(function_name)); +// } +// return std::make_shared(); +// } +// +// static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName &) { return 0; } +// +// static bool insertResultToColumn(IColumn & dest, const Element &, std::string_view) +// { +// /// This function is called only if JSON is valid. +// /// If JSON isn't valid then `FunctionJSON::Executor::run()` adds default value (=zero) to `dest` without calling this function. +// ColumnVector & col_vec = assert_cast &>(dest); +// col_vec.insertValue(1); +// return true; +// } +//}; +// +// +//template +//class JSONLengthImpl +//{ +//public: +// using Element = typename JSONParser::Element; +// +// static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &) +// { +// return std::make_shared(); +// } +// +// static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; } +// +// static bool insertResultToColumn(IColumn & dest, const Element & element, std::string_view) +// { +// size_t size; +// if (element.isArray()) +// size = element.getArray().size(); +// else if (element.isObject()) +// size = element.getObject().size(); +// else +// return false; +// +// ColumnVector & col_vec = assert_cast &>(dest); +// col_vec.insertValue(size); +// return true; +// } +//}; +// +// +//template +//class JSONKeyImpl +//{ +//public: +// using Element = typename JSONParser::Element; +// +// static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &) +// { +// return std::make_shared(); +// } +// +// static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; } +// +// static bool insertResultToColumn(IColumn & dest, const Element &, std::string_view last_key) +// { +// if (last_key.empty()) +// return false; +// ColumnString & col_str = assert_cast(dest); +// col_str.insertData(last_key.data(), last_key.size()); +// return true; +// } +//}; +// +// +//template +//class JSONTypeImpl +//{ +//public: +// using Element = typename JSONParser::Element; +// +// static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &) +// { +// static const std::vector> values = { +// {"Array", '['}, +// {"Object", '{'}, +// {"String", '"'}, +// {"Int64", 'i'}, +// {"UInt64", 'u'}, +// {"Double", 'd'}, +// {"Bool", 'b'}, +// {"Null", 0}, /// the default value for the column. +// }; +// return std::make_shared>(values); +// } +// +// static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; } +// +// static bool insertResultToColumn(IColumn & dest, const Element & element, std::string_view) +// { +// UInt8 type; +// switch (element.type()) +// { +// case ElementType::INT64: +// type = 'i'; +// break; +// case ElementType::UINT64: +// type = 'u'; +// break; +// case ElementType::DOUBLE: +// type = 'd'; +// break; +// case ElementType::STRING: +// type = '"'; +// break; +// case ElementType::ARRAY: +// type = '['; +// break; +// case ElementType::OBJECT: +// type = '{'; +// break; +// case ElementType::BOOL: +// type = 'b'; +// break; +// case ElementType::NULL_VALUE: +// type = 0; +// break; +// } +// +// ColumnVector & col_vec = assert_cast &>(dest); +// col_vec.insertValue(type); +// return true; +// } +//}; +// +// +//template +//class JSONExtractNumericImpl +//{ +//public: +// using Element = typename JSONParser::Element; +// +// static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &) +// { +// return std::make_shared>(); +// } +// +// static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; } +// +// static bool insertResultToColumn(IColumn & dest, const Element & element, std::string_view) +// { +// NumberType value; +// +// switch (element.type()) +// { +// case ElementType::DOUBLE: +// if constexpr (std::is_floating_point_v) +// { +// /// We permit inaccurate conversion of double to float. +// /// Example: double 0.1 from JSON is not representable in float. +// /// But it will be more convenient for user to perform conversion. +// value = static_cast(element.getDouble()); +// } +// else if (!accurate::convertNumeric(element.getDouble(), value)) +// return false; +// break; +// case ElementType::UINT64: +// if (!accurate::convertNumeric(element.getUInt64(), value)) +// return false; +// break; +// case ElementType::INT64: +// if (!accurate::convertNumeric(element.getInt64(), value)) +// return false; +// break; +// case ElementType::BOOL: +// if constexpr (is_integer && convert_bool_to_integer) +// { +// value = static_cast(element.getBool()); +// break; +// } +// return false; +// case ElementType::STRING: +// { +// auto rb = ReadBufferFromMemory{element.getString()}; +// if constexpr (std::is_floating_point_v) +// { +// if (!tryReadFloatText(value, rb) || !rb.eof()) +// return false; +// } +// else +// { +// if (tryReadIntText(value, rb) && rb.eof()) +// break; +// +// /// Try to parse float and convert it to integer. +// Float64 tmp_float; +// rb.position() = rb.buffer().begin(); +// if (!tryReadFloatText(tmp_float, rb) || !rb.eof()) +// return false; +// +// if (!accurate::convertNumeric(tmp_float, value)) +// return false; +// } +// break; +// } +// default: +// return false; +// } +// +// if (dest.getDataType() == TypeIndex::LowCardinality) +// { +// ColumnLowCardinality & col_low = assert_cast(dest); +// col_low.insertData(reinterpret_cast(&value), sizeof(value)); +// } +// else +// { +// auto & col_vec = assert_cast &>(dest); +// col_vec.insertValue(value); +// } +// return true; +// } +//}; +// +// +//template +//using JSONExtractInt64Impl = JSONExtractNumericImpl; +//template +//using JSONExtractUInt64Impl = JSONExtractNumericImpl; +//template +//using JSONExtractFloat64Impl = JSONExtractNumericImpl; +// +// +//template +//class JSONExtractBoolImpl +//{ +//public: +// using Element = typename JSONParser::Element; +// +// static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &) +// { +// return std::make_shared(); +// } +// +// static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; } +// +// static bool insertResultToColumn(IColumn & dest, const Element & element, std::string_view) +// { +// bool value; +// switch (element.type()) +// { +// case ElementType::BOOL: +// value = element.getBool(); +// break; +// case ElementType::INT64: +// value = element.getInt64() != 0; +// break; +// case ElementType::UINT64: +// value = element.getUInt64() != 0; +// break; +// default: +// return false; +// } +// +// auto & col_vec = assert_cast &>(dest); +// col_vec.insertValue(static_cast(value)); +// return true; +// } +//}; +// +//template +//class JSONExtractRawImpl; +// +//template +//class JSONExtractStringImpl +//{ +//public: +// using Element = typename JSONParser::Element; +// +// static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &) +// { +// return std::make_shared(); +// } +// +// static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; } +// +// static bool insertResultToColumn(IColumn & dest, const Element & element, std::string_view) +// { +// if (element.isNull()) +// return false; +// +// if (!element.isString()) +// return JSONExtractRawImpl::insertResultToColumn(dest, element, {}); +// +// auto str = element.getString(); +// +// if (dest.getDataType() == TypeIndex::LowCardinality) +// { +// ColumnLowCardinality & col_low = assert_cast(dest); +// col_low.insertData(str.data(), str.size()); +// } +// else +// { +// ColumnString & col_str = assert_cast(dest); +// col_str.insertData(str.data(), str.size()); +// } +// return true; +// } +//}; +// +// +//static const JSONExtractInsertSettings & getJSONExtractInsertSettings() +//{ +// static const JSONExtractInsertSettings instance = [] +// { +// JSONExtractInsertSettings settings; +// settings.insert_null_as_default = false; +// settings.insert_default_on_invalid_elements_in_complex_types = true; +// return settings; +// }(); +// return instance; +//} +// +//template +//class JSONExtractImpl +//{ +//public: +// using Element = typename JSONParser::Element; +// +// static DataTypePtr getReturnType(const char * function_name, const ColumnsWithTypeAndName & arguments) +// { +// if (arguments.size() < 2) +// throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Function {} requires at least two arguments", String(function_name)); +// +// const auto & col = arguments.back(); +// const auto * col_type_const = typeid_cast(col.column.get()); +// if (!col_type_const || !isString(col.type)) +// throw Exception(ErrorCodes::ILLEGAL_COLUMN, +// "The last argument of function {} should " +// "be a constant string specifying the return data type, illegal value: {}", +// String(function_name), col.name); +// +// return DataTypeFactory::instance().get(col_type_const->getValue()); +// } +// +// static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 2; } +// +// void prepare(const char * function_name, const ColumnsWithTypeAndName &, const DataTypePtr & result_type) +// { +// extract_tree = buildJSONExtractTree(result_type, function_name); +// } +// +// bool insertResultToColumn(IColumn & dest, const Element & element, std::string_view) +// { +// String error; +// return extract_tree->insertResultToColumn(dest, element, getJSONExtractInsertSettings(), error); +// } +// +//protected: +// std::unique_ptr> extract_tree; +//}; +// +// +//template +//class JSONExtractKeysAndValuesImpl +//{ +//public: +// using Element = typename JSONParser::Element; +// +// static DataTypePtr getReturnType(const char * function_name, const ColumnsWithTypeAndName & arguments) +// { +// if (arguments.size() < 2) +// throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Function {} requires at least two arguments", String(function_name)); +// +// const auto & col = arguments.back(); +// const auto * col_type_const = typeid_cast(col.column.get()); +// if (!col_type_const || !isString(col.type)) +// throw Exception(ErrorCodes::ILLEGAL_COLUMN, +// "The last argument of function {} should " +// "be a constant string specifying the values' data type, illegal value: {}", +// String(function_name), col.name); +// +// DataTypePtr key_type = std::make_unique(); +// DataTypePtr value_type = DataTypeFactory::instance().get(col_type_const->getValue()); +// DataTypePtr tuple_type = std::make_unique(DataTypes{key_type, value_type}); +// return std::make_unique(tuple_type); +// } +// +// static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 2; } +// +// void prepare(const char * function_name, const ColumnsWithTypeAndName &, const DataTypePtr & result_type) +// { +// const auto tuple_type = typeid_cast(result_type.get())->getNestedType(); +// const auto value_type = typeid_cast(tuple_type.get())->getElements()[1]; +// extract_tree = buildJSONExtractTree(value_type, function_name); +// } +// +// bool insertResultToColumn(IColumn & dest, const Element & element, std::string_view) +// { +// if (!element.isObject()) +// return false; +// +// auto object = element.getObject(); +// +// auto & col_arr = assert_cast(dest); +// auto & col_tuple = assert_cast(col_arr.getData()); +// size_t old_size = col_tuple.size(); +// auto & col_key = assert_cast(col_tuple.getColumn(0)); +// auto & col_value = col_tuple.getColumn(1); +// +// String error; +// for (const auto & [key, value] : object) +// { +// if (extract_tree->insertResultToColumn(col_value, value, getJSONExtractInsertSettings(), error)) +// col_key.insertData(key.data(), key.size()); +// } +// +// if (col_tuple.size() == old_size) +// return false; +// +// col_arr.getOffsets().push_back(col_tuple.size()); +// return true; +// } +// +//private: +// std::unique_ptr> extract_tree; +//}; +// +// +//template +//class JSONExtractRawImpl +//{ +//public: +// using Element = typename JSONParser::Element; +// +// static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &) +// { +// return std::make_shared(); +// } +// +// static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; } +// +// static bool insertResultToColumn(IColumn & dest, const Element & element, std::string_view) +// { +// if (dest.getDataType() == TypeIndex::LowCardinality) +// { +// ColumnString::Chars chars; +// WriteBufferFromVector buf(chars, AppendModeTag()); +// traverse(element, buf); +// buf.finalize(); +// assert_cast(dest).insertData(reinterpret_cast(chars.data()), chars.size()); +// } +// else +// { +// ColumnString & col_str = assert_cast(dest); +// auto & chars = col_str.getChars(); +// WriteBufferFromVector buf(chars, AppendModeTag()); +// traverse(element, buf); +// buf.finalize(); +// chars.push_back(0); +// col_str.getOffsets().push_back(chars.size()); +// } +// return true; +// } +// +// // We use insertResultToFixedStringColumn in case we are inserting raw data in a FixedString column +// static bool insertResultToFixedStringColumn(IColumn & dest, const Element & element, std::string_view) +// { +// ColumnFixedString::Chars chars; +// WriteBufferFromVector buf(chars, AppendModeTag()); +// traverse(element, buf); +// buf.finalize(); +// +// auto & col_str = assert_cast(dest); +// +// if (chars.size() > col_str.getN()) +// return false; +// +// chars.resize_fill(col_str.getN()); +// col_str.insertData(reinterpret_cast(chars.data()), chars.size()); +// +// +// return true; +// } +// +// // We use insertResultToLowCardinalityFixedStringColumn in case we are inserting raw data in a Low Cardinality FixedString column +// static bool insertResultToLowCardinalityFixedStringColumn(IColumn & dest, const Element & element, size_t fixed_length) +// { +// if (element.getObject().size() > fixed_length) +// return false; +// +// ColumnFixedString::Chars chars; +// WriteBufferFromVector buf(chars, AppendModeTag()); +// traverse(element, buf); +// buf.finalize(); +// +// if (chars.size() > fixed_length) +// return false; +// chars.resize_fill(fixed_length); +// assert_cast(dest).insertData(reinterpret_cast(chars.data()), chars.size()); +// +// return true; +// } +// +//private: +// static void traverse(const Element & element, WriteBuffer & buf) +// { +// if (element.isInt64()) +// { +// writeIntText(element.getInt64(), buf); +// return; +// } +// if (element.isUInt64()) +// { +// writeIntText(element.getUInt64(), buf); +// return; +// } +// if (element.isDouble()) +// { +// writeFloatText(element.getDouble(), buf); +// return; +// } +// if (element.isBool()) +// { +// if (element.getBool()) +// writeCString("true", buf); +// else +// writeCString("false", buf); +// return; +// } +// if (element.isString()) +// { +// writeJSONString(element.getString(), buf, formatSettings()); +// return; +// } +// if (element.isArray()) +// { +// writeChar('[', buf); +// bool need_comma = false; +// for (auto value : element.getArray()) +// { +// if (std::exchange(need_comma, true)) +// writeChar(',', buf); +// traverse(value, buf); +// } +// writeChar(']', buf); +// return; +// } +// if (element.isObject()) +// { +// writeChar('{', buf); +// bool need_comma = false; +// for (auto [key, value] : element.getObject()) +// { +// if (std::exchange(need_comma, true)) +// writeChar(',', buf); +// writeJSONString(key, buf, formatSettings()); +// writeChar(':', buf); +// traverse(value, buf); +// } +// writeChar('}', buf); +// return; +// } +// if (element.isNull()) +// { +// writeCString("null", buf); +// return; +// } +// } +// +// static const FormatSettings & formatSettings() +// { +// static const FormatSettings the_instance = [] +// { +// FormatSettings settings; +// settings.json.escape_forward_slashes = false; +// return settings; +// }(); +// return the_instance; +// } +//}; +// +// +//template +//class JSONExtractArrayRawImpl +//{ +//public: +// using Element = typename JSONParser::Element; +// +// static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &) +// { +// return std::make_shared(std::make_shared()); +// } +// +// static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; } +// +// static bool insertResultToColumn(IColumn & dest, const Element & element, std::string_view) +// { +// if (!element.isArray()) +// return false; +// +// auto array = element.getArray(); +// ColumnArray & col_res = assert_cast(dest); +// +// for (auto value : array) +// JSONExtractRawImpl::insertResultToColumn(col_res.getData(), value, {}); +// +// col_res.getOffsets().push_back(col_res.getOffsets().back() + array.size()); +// return true; +// } +//}; +// +// +//template +//class JSONExtractKeysAndValuesRawImpl +//{ +//public: +// using Element = typename JSONParser::Element; +// +// static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &) +// { +// DataTypePtr string_type = std::make_unique(); +// DataTypePtr tuple_type = std::make_unique(DataTypes{string_type, string_type}); +// return std::make_unique(tuple_type); +// } +// +// static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; } +// +// bool insertResultToColumn(IColumn & dest, const Element & element, std::string_view) +// { +// if (!element.isObject()) +// return false; +// +// auto object = element.getObject(); +// +// auto & col_arr = assert_cast(dest); +// auto & col_tuple = assert_cast(col_arr.getData()); +// auto & col_key = assert_cast(col_tuple.getColumn(0)); +// auto & col_value = assert_cast(col_tuple.getColumn(1)); +// +// for (const auto & [key, value] : object) +// { +// col_key.insertData(key.data(), key.size()); +// JSONExtractRawImpl::insertResultToColumn(col_value, value, {}); +// } +// +// col_arr.getOffsets().push_back(col_arr.getOffsets().back() + object.size()); +// return true; +// } +//}; +// +//template +//class JSONExtractKeysImpl +//{ +//public: +// using Element = typename JSONParser::Element; +// +// static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &) +// { +// return std::make_unique(std::make_shared()); +// } +// +// static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; } +// +// bool insertResultToColumn(IColumn & dest, const Element & element, std::string_view) +// { +// if (!element.isObject()) +// return false; +// +// auto object = element.getObject(); +// +// ColumnArray & col_res = assert_cast(dest); +// auto & col_key = assert_cast(col_res.getData()); +// +// for (const auto & [key, value] : object) +// { +// col_key.insertData(key.data(), key.size()); +// } +// +// col_res.getOffsets().push_back(col_res.getOffsets().back() + object.size()); +// return true; +// } +//}; +// +//} diff --git a/tests/queries/0_stateless/03198_json_extract_more_types.reference b/tests/queries/0_stateless/03198_json_extract_more_types.reference new file mode 100644 index 00000000000..9a6580ff81b --- /dev/null +++ b/tests/queries/0_stateless/03198_json_extract_more_types.reference @@ -0,0 +1,21 @@ +2020-01-01 +2020-01-01 +2020-01-01 00:00:00 +2020-01-01 00:00:00.000000 +127.0.0.1 +2001:db8:85a3::8a2e:370:7334 +42 +42 +42 +42 +42 +42 +42 +42 +42 +42 +Hello +Hello +\0\0\0 +Hello\0\0\0\0\0 +5801c962-1182-458a-89f8-d077da5074f9 diff --git a/tests/queries/0_stateless/03198_json_extract_more_types.sql b/tests/queries/0_stateless/03198_json_extract_more_types.sql new file mode 100644 index 00000000000..28d24bbb271 --- /dev/null +++ b/tests/queries/0_stateless/03198_json_extract_more_types.sql @@ -0,0 +1,29 @@ +set allow_suspicious_low_cardinality_types=1; + +select JSONExtract('{"a" : "2020-01-01"}', 'a', 'Date'); +select JSONExtract('{"a" : "2020-01-01"}', 'a', 'Date32'); +select JSONExtract('{"a" : "2020-01-01 00:00:00"}', 'a', 'DateTime'); +select JSONExtract('{"a" : "2020-01-01 00:00:00.000000"}', 'a', 'DateTime64(6)'); +select JSONExtract('{"a" : "127.0.0.1"}', 'a', 'IPv4'); +select JSONExtract('{"a" : "2001:0db8:85a3:0000:0000:8a2e:0370:7334"}', 'a', 'IPv6'); + + +select JSONExtract('{"a" : 42}', 'a', 'LowCardinality(UInt8)'); +select JSONExtract('{"a" : 42}', 'a', 'LowCardinality(Int8)'); +select JSONExtract('{"a" : 42}', 'a', 'LowCardinality(UInt16)'); +select JSONExtract('{"a" : 42}', 'a', 'LowCardinality(Int16)'); +select JSONExtract('{"a" : 42}', 'a', 'LowCardinality(UInt32)'); +select JSONExtract('{"a" : 42}', 'a', 'LowCardinality(Int32)'); +select JSONExtract('{"a" : 42}', 'a', 'LowCardinality(UInt64)'); +select JSONExtract('{"a" : 42}', 'a', 'LowCardinality(Int64)'); + +select JSONExtract('{"a" : 42}', 'a', 'LowCardinality(Float32)'); +select JSONExtract('{"a" : 42}', 'a', 'LowCardinality(Float32)'); + +select JSONExtract('{"a" : "Hello"}', 'a', 'LowCardinality(String)'); +select JSONExtract('{"a" : "Hello"}', 'a', 'LowCardinality(FixedString(5))'); +select JSONExtract('{"a" : "Hello"}', 'a', 'LowCardinality(FixedString(3))'); +select JSONExtract('{"a" : "Hello"}', 'a', 'LowCardinality(FixedString(10))'); + +select JSONExtract('{"a" : "5801c962-1182-458a-89f8-d077da5074f9"}', 'a', 'LowCardinality(UUID)'); + diff --git a/tests/queries/0_stateless/03199_json_extract_dynamic.reference b/tests/queries/0_stateless/03199_json_extract_dynamic.reference new file mode 100644 index 00000000000..759b7763cd1 --- /dev/null +++ b/tests/queries/0_stateless/03199_json_extract_dynamic.reference @@ -0,0 +1,30 @@ +true Bool +42 Int64 +-42 Int64 +18446744073709551615 UInt64 +42.42 Float64 +42 Int64 +-42 Int64 +18446744073709551615 UInt64 +Hello String +2020-01-01 Date +2020-01-01 00:00:00.000000000 DateTime64(9) +[1,2,3] Array(Nullable(Int64)) +['str1','str2','str3'] Array(Nullable(String)) +[[[1],[2,3,4]],[[5,6],[7]]] Array(Array(Array(Nullable(Int64)))) +['2020-01-01 00:00:00.000000000','2020-01-01 00:00:00.000000000'] Array(Nullable(DateTime64(9))) +['2020-01-01','2020-01-01 date'] Array(Nullable(String)) +['2020-01-01','2020-01-01 00:00:00','str'] Array(Nullable(String)) +['2020-01-01','2020-01-01 00:00:00','42'] Array(Nullable(String)) +['str','42'] Array(Nullable(String)) +[42,42.42] Array(Nullable(Float64)) +[42,18446744073709552000,42.42] Array(Nullable(Float64)) +[42,42.42] Array(Nullable(Float64)) +[NULL,NULL] Array(Nullable(String)) +[NULL,42] Array(Nullable(Int64)) +[[NULL],[],[42]] Array(Array(Nullable(Int64))) +[[],[NULL,NULL],[1,NULL,3],[NULL,2,NULL]] Array(Array(Nullable(Int64))) +[[],[NULL,NULL],['1',NULL,'3'],[NULL,'2',NULL],['2020-01-01']] Array(Array(Nullable(String))) +('str',42,[42]) Tuple(Nullable(String), Nullable(Int64), Array(Nullable(Int64))) +[42,18446744073709551615] Array(Nullable(UInt64)) +(-42,18446744073709551615) Tuple(Nullable(Int64), Nullable(UInt64)) diff --git a/tests/queries/0_stateless/03199_json_extract_dynamic.sql b/tests/queries/0_stateless/03199_json_extract_dynamic.sql new file mode 100644 index 00000000000..286949f4d3e --- /dev/null +++ b/tests/queries/0_stateless/03199_json_extract_dynamic.sql @@ -0,0 +1,37 @@ +set input_format_json_try_infer_numbers_from_strings=1; + +select JSONExtract(materialize('{"d" : true}'), 'd', 'Dynamic') as d, dynamicType(d); +select JSONExtract(materialize('{"d" : 42}'), 'd', 'Dynamic') as d, dynamicType(d); +select JSONExtract(materialize('{"d" : -42}'), 'd', 'Dynamic') as d, dynamicType(d); +select JSONExtract(materialize('{"d" : 18446744073709551615}'), 'd', 'Dynamic') as d, dynamicType(d); +select JSONExtract(materialize('{"d" : 42.42}'), 'd', 'Dynamic') as d, dynamicType(d); +select JSONExtract(materialize('{"d" : "42"}'), 'd', 'Dynamic') as d, dynamicType(d); +select JSONExtract(materialize('{"d" : "-42"}'), 'd', 'Dynamic') as d, dynamicType(d); +select JSONExtract(materialize('{"d" : "18446744073709551615"}'), 'd', 'Dynamic') as d, dynamicType(d); + +select JSONExtract(materialize('{"d" : "Hello"}'), 'd', 'Dynamic') as d, dynamicType(d); +select JSONExtract(materialize('{"d" : "2020-01-01"}'), 'd', 'Dynamic') as d, dynamicType(d); +select JSONExtract(materialize('{"d" : "2020-01-01 00:00:00.000"}'), 'd', 'Dynamic') as d, dynamicType(d); + +select JSONExtract(materialize('{"d" : [1, 2, 3]}'), 'd', 'Dynamic') as d, dynamicType(d); +select JSONExtract(materialize('{"d" : ["str1", "str2", "str3"]}'), 'd', 'Dynamic') as d, dynamicType(d); +select JSONExtract(materialize('{"d" : [[[1], [2, 3, 4]], [[5, 6], [7]]]}'), 'd', 'Dynamic') as d, dynamicType(d); + +select JSONExtract(materialize('{"d" : ["2020-01-01", "2020-01-01 00:00:00"]}'), 'd', 'Dynamic') as d, dynamicType(d); +select JSONExtract(materialize('{"d" : ["2020-01-01", "2020-01-01 date"]}'), 'd', 'Dynamic') as d, dynamicType(d); +select JSONExtract(materialize('{"d" : ["2020-01-01", "2020-01-01 00:00:00", "str"]}'), 'd', 'Dynamic') as d, dynamicType(d); +select JSONExtract(materialize('{"d" : ["2020-01-01", "2020-01-01 00:00:00", "42"]}'), 'd', 'Dynamic') as d, dynamicType(d); +select JSONExtract(materialize('{"d" : ["str", "42"]}'), 'd', 'Dynamic') as d, dynamicType(d); +select JSONExtract(materialize('{"d" : [42, 42.42]}'), 'd', 'Dynamic') as d, dynamicType(d); +select JSONExtract(materialize('{"d" : [42, 18446744073709551615, 42.42]}'), 'd', 'Dynamic') as d, dynamicType(d); +select JSONExtract(materialize('{"d" : [42, 42.42]}'), 'd', 'Dynamic') as d, dynamicType(d); + +select JSONExtract(materialize('{"d" : [null, null]}'), 'd', 'Dynamic') as d, dynamicType(d); +select JSONExtract(materialize('{"d" : [null, 42]}'), 'd', 'Dynamic') as d, dynamicType(d); +select JSONExtract(materialize('{"d" : [[null], [], [42]]}'), 'd', 'Dynamic') as d, dynamicType(d); +select JSONExtract(materialize('{"a" : [[], [null, null], ["1", null, "3"], [null, "2", null]]}'), 'a', 'Dynamic') as d, dynamicType(d); +select JSONExtract(materialize('{"a" : [[], [null, null], ["1", null, "3"], [null, "2", null], ["2020-01-01"]]}'), 'a', 'Dynamic') as d, dynamicType(d); + +select JSONExtract(materialize('{"d" : ["str", 42, [42]]}'), 'd', 'Dynamic') as d, dynamicType(d); +select JSONExtract(materialize('{"d" : [42, 18446744073709551615]}'), 'd', 'Dynamic') as d, dynamicType(d); +select JSONExtract(materialize('{"d" : [-42, 18446744073709551615]}'), 'd', 'Dynamic') as d, dynamicType(d); From 5fe594243a4fc281cf3ee878b2f79b09dfd83970 Mon Sep 17 00:00:00 2001 From: avogar Date: Wed, 3 Jul 2024 13:53:30 +0000 Subject: [PATCH 089/299] Remove old file --- src/Functions/FunctionsJSON.h | 1273 --------------------------------- 1 file changed, 1273 deletions(-) delete mode 100644 src/Functions/FunctionsJSON.h diff --git a/src/Functions/FunctionsJSON.h b/src/Functions/FunctionsJSON.h deleted file mode 100644 index 5d44e22300d..00000000000 --- a/src/Functions/FunctionsJSON.h +++ /dev/null @@ -1,1273 +0,0 @@ -//#pragma once -// -//#include -//#include -// -//#include -// -//#include -// -//#include -//#include -//#include -// -//#include -////#include -// -//#include -//#include -//#include -//#include -//#include -//#include -//#include -//#include -//#include -//#include -// -//#include -//#include -//#include -//#include -//#include -//#include -//#include -//#include -//#include -//#include -//#include -//#include -//#include -//#include -//#include -//#include -// -//#include -//#include -//#include -//#include -//#include -// -//#include -//#include -// -// -//#include "config.h" -// -// -//namespace DB -//{ -// -//namespace ErrorCodes -//{ -// extern const int ILLEGAL_TYPE_OF_ARGUMENT; -// extern const int ILLEGAL_COLUMN; -// extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; -//} -// -//template -//concept HasIndexOperator = requires (T t) -//{ -// t[0]; -//}; -// -///// Functions to parse JSONs and extract values from it. -///// The first argument of all these functions gets a JSON, -///// after that there are any number of arguments specifying path to a desired part from the JSON's root. -///// For example, -///// select JSONExtractInt('{"a": "hello", "b": [-100, 200.0, 300]}', 'b', 1) = -100 -// -//class FunctionJSONHelpers -//{ -//public: -// template typename Impl, class JSONParser> -// class Executor -// { -// public: -// static ColumnPtr run(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) -// { -// MutableColumnPtr to{result_type->createColumn()}; -// to->reserve(input_rows_count); -// -// if (arguments.empty()) -// throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Function {} requires at least one argument", String(Name::name)); -// -// const auto & first_column = arguments[0]; -// if (!isString(first_column.type)) -// throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, -// "The first argument of function {} should be a string containing JSON, illegal type: " -// "{}", String(Name::name), first_column.type->getName()); -// -// const ColumnPtr & arg_json = first_column.column; -// const auto * col_json_const = typeid_cast(arg_json.get()); -// const auto * col_json_string -// = typeid_cast(col_json_const ? col_json_const->getDataColumnPtr().get() : arg_json.get()); -// -// if (!col_json_string) -// throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {}", arg_json->getName()); -// -// const ColumnString::Chars & chars = col_json_string->getChars(); -// const ColumnString::Offsets & offsets = col_json_string->getOffsets(); -// -// size_t num_index_arguments = Impl::getNumberOfIndexArguments(arguments); -// std::vector moves = prepareMoves(Name::name, arguments, 1, num_index_arguments); -// -// /// Preallocate memory in parser if necessary. -// JSONParser parser; -// if constexpr (has_member_function_reserve::value) -// { -// size_t max_size = calculateMaxSize(offsets); -// if (max_size) -// parser.reserve(max_size); -// } -// -// Impl impl; -// -// /// prepare() does Impl-specific preparation before handling each row. -// if constexpr (has_member_function_prepare::*)(const char *, const ColumnsWithTypeAndName &, const DataTypePtr &)>::value) -// impl.prepare(Name::name, arguments, result_type); -// -// using Element = typename JSONParser::Element; -// -// Element document; -// bool document_ok = false; -// if (col_json_const) -// { -// std::string_view json{reinterpret_cast(chars.data()), offsets[0] - 1}; -// document_ok = parser.parse(json, document); -// } -// -// for (const auto i : collections::range(0, input_rows_count)) -// { -// if (!col_json_const) -// { -// std::string_view json{reinterpret_cast(&chars[offsets[i - 1]]), offsets[i] - offsets[i - 1] - 1}; -// document_ok = parser.parse(json, document); -// } -// -// bool added_to_column = false; -// if (document_ok) -// { -// /// Perform moves. -// Element element; -// std::string_view last_key; -// bool moves_ok = performMoves(arguments, i, document, moves, element, last_key); -// -// if (moves_ok) -// added_to_column = impl.insertResultToColumn(*to, element, last_key); -// } -// -// /// We add default value (=null or zero) if something goes wrong, we don't throw exceptions in these JSON functions. -// if (!added_to_column) -// to->insertDefault(); -// } -// return to; -// } -// }; -// -//private: -// BOOST_TTI_HAS_MEMBER_FUNCTION(reserve) -// BOOST_TTI_HAS_MEMBER_FUNCTION(prepare) -// -// /// Represents a move of a JSON iterator described by a single argument passed to a JSON function. -// /// For example, the call JSONExtractInt('{"a": "hello", "b": [-100, 200.0, 300]}', 'b', 1) -// /// contains two moves: {MoveType::ConstKey, "b"} and {MoveType::ConstIndex, 1}. -// /// Keys and indices can be nonconst, in this case they are calculated for each row. -// enum class MoveType : uint8_t -// { -// Key, -// Index, -// ConstKey, -// ConstIndex, -// }; -// -// struct Move -// { -// explicit Move(MoveType type_, size_t index_ = 0) : type(type_), index(index_) {} -// Move(MoveType type_, const String & key_) : type(type_), key(key_) {} -// MoveType type; -// size_t index = 0; -// String key; -// }; -// -// static std::vector prepareMoves( -// const char * function_name, -// const ColumnsWithTypeAndName & columns, -// size_t first_index_argument, -// size_t num_index_arguments) -// { -// std::vector moves; -// moves.reserve(num_index_arguments); -// for (const auto i : collections::range(first_index_argument, first_index_argument + num_index_arguments)) -// { -// const auto & column = columns[i]; -// if (!isString(column.type) && !isNativeInteger(column.type)) -// throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, -// "The argument {} of function {} should be a string specifying key " -// "or an integer specifying index, illegal type: {}", -// std::to_string(i + 1), String(function_name), column.type->getName()); -// -// if (column.column && isColumnConst(*column.column)) -// { -// const auto & column_const = assert_cast(*column.column); -// if (isString(column.type)) -// moves.emplace_back(MoveType::ConstKey, column_const.getValue()); -// else -// moves.emplace_back(MoveType::ConstIndex, column_const.getInt(0)); -// } -// else -// { -// if (isString(column.type)) -// moves.emplace_back(MoveType::Key, ""); -// else -// moves.emplace_back(MoveType::Index, 0); -// } -// } -// return moves; -// } -// -// -// /// Performs moves of types MoveType::Index and MoveType::ConstIndex. -// template -// static bool performMoves(const ColumnsWithTypeAndName & arguments, size_t row, -// const typename JSONParser::Element & document, const std::vector & moves, -// typename JSONParser::Element & element, std::string_view & last_key) -// { -// typename JSONParser::Element res_element = document; -// std::string_view key; -// -// for (size_t j = 0; j != moves.size(); ++j) -// { -// switch (moves[j].type) -// { -// case MoveType::ConstIndex: -// { -// if (!moveToElementByIndex(res_element, static_cast(moves[j].index), key)) -// return false; -// break; -// } -// case MoveType::ConstKey: -// { -// key = moves[j].key; -// if (!moveToElementByKey(res_element, key)) -// return false; -// break; -// } -// case MoveType::Index: -// { -// Int64 index = (*arguments[j + 1].column)[row].get(); -// if (!moveToElementByIndex(res_element, static_cast(index), key)) -// return false; -// break; -// } -// case MoveType::Key: -// { -// key = arguments[j + 1].column->getDataAt(row).toView(); -// if (!moveToElementByKey(res_element, key)) -// return false; -// break; -// } -// } -// } -// -// element = res_element; -// last_key = key; -// return true; -// } -// -// template -// static bool moveToElementByIndex(typename JSONParser::Element & element, int index, std::string_view & out_key) -// { -// if (element.isArray()) -// { -// auto array = element.getArray(); -// if (index >= 0) -// --index; -// else -// index += array.size(); -// -// if (static_cast(index) >= array.size()) -// return false; -// element = array[index]; -// out_key = {}; -// return true; -// } -// -// if constexpr (HasIndexOperator) -// { -// if (element.isObject()) -// { -// auto object = element.getObject(); -// if (index >= 0) -// --index; -// else -// index += object.size(); -// -// if (static_cast(index) >= object.size()) -// return false; -// std::tie(out_key, element) = object[index]; -// return true; -// } -// } -// -// return {}; -// } -// -// /// Performs moves of types MoveType::Key and MoveType::ConstKey. -// template -// static bool moveToElementByKey(typename JSONParser::Element & element, std::string_view key) -// { -// if (!element.isObject()) -// return false; -// auto object = element.getObject(); -// return object.find(key, element); -// } -// -// static size_t calculateMaxSize(const ColumnString::Offsets & offsets) -// { -// size_t max_size = 0; -// for (const auto i : collections::range(0, offsets.size())) -// { -// size_t size = offsets[i] - offsets[i - 1]; -// max_size = std::max(max_size, size); -// } -// if (max_size) -// --max_size; -// return max_size; -// } -// -//}; -// -//template -//class JSONExtractImpl; -// -//template -//class JSONExtractKeysAndValuesImpl; -// -///** -//* Functions JSONExtract and JSONExtractKeysAndValues force the return type - it is specified in the last argument. -//* For example - `SELECT JSONExtract(materialize('{"a": 131231, "b": 1234}'), 'b', 'LowCardinality(FixedString(4))')` -//* But by default ClickHouse decides on its own whether the return type will be LowCardinality based on the types of -//* input arguments. -//* And for these specific functions we cannot rely on this mechanism, so these functions have their own implementation - -//* just convert all of the LowCardinality input columns to full ones, execute and wrap the resulting column in LowCardinality -//* if needed. -//*/ -//template typename Impl> -//constexpr bool functionForcesTheReturnType() -//{ -// return std::is_same_v, JSONExtractImpl> || std::is_same_v, JSONExtractKeysAndValuesImpl>; -//} -// -//template typename Impl> -//class ExecutableFunctionJSON : public IExecutableFunction -//{ -// -//public: -// explicit ExecutableFunctionJSON(const NullPresence & null_presence_, bool allow_simdjson_, const DataTypePtr & json_return_type_) -// : null_presence(null_presence_), allow_simdjson(allow_simdjson_), json_return_type(json_return_type_) -// { -// } -// -// String getName() const override { return Name::name; } -// bool useDefaultImplementationForNulls() const override { return false; } -// bool useDefaultImplementationForConstants() const override { return true; } -// bool useDefaultImplementationForLowCardinalityColumns() const override -// { -// return !functionForcesTheReturnType(); -// } -// -// ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override -// { -// if (null_presence.has_null_constant) -// return result_type->createColumnConstWithDefaultValue(input_rows_count); -// -// if constexpr (functionForcesTheReturnType()) -// { -// ColumnsWithTypeAndName columns_without_low_cardinality = arguments; -// -// for (auto & column : columns_without_low_cardinality) -// { -// column.column = recursiveRemoveLowCardinality(column.column); -// column.type = recursiveRemoveLowCardinality(column.type); -// } -// -// ColumnsWithTypeAndName temporary_columns = null_presence.has_nullable ? createBlockWithNestedColumns(columns_without_low_cardinality) : columns_without_low_cardinality; -// ColumnPtr temporary_result = chooseAndRunJSONParser(temporary_columns, json_return_type, input_rows_count); -// -// if (null_presence.has_nullable) -// temporary_result = wrapInNullable(temporary_result, columns_without_low_cardinality, result_type, input_rows_count); -// -// if (result_type->lowCardinality()) -// temporary_result = recursiveLowCardinalityTypeConversion(temporary_result, json_return_type, result_type); -// -// return temporary_result; -// } -// else -// { -// ColumnsWithTypeAndName temporary_columns = null_presence.has_nullable ? createBlockWithNestedColumns(arguments) : arguments; -// ColumnPtr temporary_result = chooseAndRunJSONParser(temporary_columns, json_return_type, input_rows_count); -// -// if (null_presence.has_nullable) -// temporary_result = wrapInNullable(temporary_result, arguments, result_type, input_rows_count); -// -// if (result_type->lowCardinality()) -// temporary_result = recursiveLowCardinalityTypeConversion(temporary_result, json_return_type, result_type); -// -// return temporary_result; -// } -// } -// -//private: -// -// ColumnPtr -// chooseAndRunJSONParser(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const -// { -//#if USE_SIMDJSON -// if (allow_simdjson) -// return FunctionJSONHelpers::Executor::run(arguments, result_type, input_rows_count); -//#endif -// -//#if USE_RAPIDJSON -// return FunctionJSONHelpers::Executor::run(arguments, result_type, input_rows_count); -//#else -// return FunctionJSONHelpers::Executor::run(arguments, result_type, input_rows_count); -//#endif -// } -// -// NullPresence null_presence; -// bool allow_simdjson; -// DataTypePtr json_return_type; -//}; -// -// -//template typename Impl> -//class FunctionBaseFunctionJSON : public IFunctionBase -//{ -//public: -// explicit FunctionBaseFunctionJSON( -// const NullPresence & null_presence_, -// bool allow_simdjson_, -// DataTypes argument_types_, -// DataTypePtr return_type_, -// DataTypePtr json_return_type_) -// : null_presence(null_presence_) -// , allow_simdjson(allow_simdjson_) -// , argument_types(std::move(argument_types_)) -// , return_type(std::move(return_type_)) -// , json_return_type(std::move(json_return_type_)) -// { -// } -// -// String getName() const override { return Name::name; } -// -// const DataTypes & getArgumentTypes() const override -// { -// return argument_types; -// } -// -// const DataTypePtr & getResultType() const override -// { -// return return_type; -// } -// -// bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } -// -// ExecutableFunctionPtr prepare(const ColumnsWithTypeAndName &) const override -// { -// return std::make_unique>(null_presence, allow_simdjson, json_return_type); -// } -// -//private: -// NullPresence null_presence; -// bool allow_simdjson; -// DataTypes argument_types; -// DataTypePtr return_type; -// DataTypePtr json_return_type; -//}; -// -///// We use IFunctionOverloadResolver instead of IFunction to handle non-default NULL processing. -///// Both NULL and JSON NULL should generate NULL value. If any argument is NULL, return NULL. -//template typename Impl> -//class JSONOverloadResolver : public IFunctionOverloadResolver, WithContext -//{ -//public: -// static constexpr auto name = Name::name; -// -// String getName() const override { return name; } -// -// static FunctionOverloadResolverPtr create(ContextPtr context_) -// { -// return std::make_unique(context_); -// } -// -// explicit JSONOverloadResolver(ContextPtr context_) : WithContext(context_) {} -// -// bool isVariadic() const override { return true; } -// size_t getNumberOfArguments() const override { return 0; } -// bool useDefaultImplementationForNulls() const override { return false; } -// bool useDefaultImplementationForLowCardinalityColumns() const override -// { -// return !functionForcesTheReturnType(); -// } -// -// FunctionBasePtr build(const ColumnsWithTypeAndName & arguments) const override -// { -// bool has_nothing_argument = false; -// for (const auto & arg : arguments) -// has_nothing_argument |= isNothing(arg.type); -// -// DataTypePtr json_return_type = Impl::getReturnType(Name::name, createBlockWithNestedColumns(arguments)); -// NullPresence null_presence = getNullPresense(arguments); -// DataTypePtr return_type; -// if (has_nothing_argument) -// return_type = std::make_shared(); -// else if (null_presence.has_null_constant) -// return_type = makeNullable(std::make_shared()); -// else if (null_presence.has_nullable) -// return_type = makeNullable(json_return_type); -// else -// return_type = json_return_type; -// -// /// Top-level LowCardinality columns are processed outside JSON parser. -// json_return_type = removeLowCardinality(json_return_type); -// -// DataTypes argument_types; -// argument_types.reserve(arguments.size()); -// for (const auto & argument : arguments) -// argument_types.emplace_back(argument.type); -// return std::make_unique>( -// null_presence, getContext()->getSettingsRef().allow_simdjson, argument_types, return_type, json_return_type); -// } -//}; -// -//struct NameJSONHas { static constexpr auto name{"JSONHas"}; }; -//struct NameIsValidJSON { static constexpr auto name{"isValidJSON"}; }; -//struct NameJSONLength { static constexpr auto name{"JSONLength"}; }; -//struct NameJSONKey { static constexpr auto name{"JSONKey"}; }; -//struct NameJSONType { static constexpr auto name{"JSONType"}; }; -//struct NameJSONExtractInt { static constexpr auto name{"JSONExtractInt"}; }; -//struct NameJSONExtractUInt { static constexpr auto name{"JSONExtractUInt"}; }; -//struct NameJSONExtractFloat { static constexpr auto name{"JSONExtractFloat"}; }; -//struct NameJSONExtractBool { static constexpr auto name{"JSONExtractBool"}; }; -//struct NameJSONExtractString { static constexpr auto name{"JSONExtractString"}; }; -//struct NameJSONExtract { static constexpr auto name{"JSONExtract"}; }; -//struct NameJSONExtractKeysAndValues { static constexpr auto name{"JSONExtractKeysAndValues"}; }; -//struct NameJSONExtractRaw { static constexpr auto name{"JSONExtractRaw"}; }; -//struct NameJSONExtractArrayRaw { static constexpr auto name{"JSONExtractArrayRaw"}; }; -//struct NameJSONExtractKeysAndValuesRaw { static constexpr auto name{"JSONExtractKeysAndValuesRaw"}; }; -//struct NameJSONExtractKeys { static constexpr auto name{"JSONExtractKeys"}; }; -// -// -//template -//class JSONHasImpl -//{ -//public: -// using Element = typename JSONParser::Element; -// -// static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &) { return std::make_shared(); } -// -// static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; } -// -// static bool insertResultToColumn(IColumn & dest, const Element &, std::string_view) -// { -// ColumnVector & col_vec = assert_cast &>(dest); -// col_vec.insertValue(1); -// return true; -// } -//}; -// -// -//template -//class IsValidJSONImpl -//{ -//public: -// using Element = typename JSONParser::Element; -// -// static DataTypePtr getReturnType(const char * function_name, const ColumnsWithTypeAndName & arguments) -// { -// if (arguments.size() != 1) -// { -// /// IsValidJSON() shouldn't get parameters other than JSON. -// throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Function {} needs exactly one argument", -// String(function_name)); -// } -// return std::make_shared(); -// } -// -// static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName &) { return 0; } -// -// static bool insertResultToColumn(IColumn & dest, const Element &, std::string_view) -// { -// /// This function is called only if JSON is valid. -// /// If JSON isn't valid then `FunctionJSON::Executor::run()` adds default value (=zero) to `dest` without calling this function. -// ColumnVector & col_vec = assert_cast &>(dest); -// col_vec.insertValue(1); -// return true; -// } -//}; -// -// -//template -//class JSONLengthImpl -//{ -//public: -// using Element = typename JSONParser::Element; -// -// static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &) -// { -// return std::make_shared(); -// } -// -// static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; } -// -// static bool insertResultToColumn(IColumn & dest, const Element & element, std::string_view) -// { -// size_t size; -// if (element.isArray()) -// size = element.getArray().size(); -// else if (element.isObject()) -// size = element.getObject().size(); -// else -// return false; -// -// ColumnVector & col_vec = assert_cast &>(dest); -// col_vec.insertValue(size); -// return true; -// } -//}; -// -// -//template -//class JSONKeyImpl -//{ -//public: -// using Element = typename JSONParser::Element; -// -// static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &) -// { -// return std::make_shared(); -// } -// -// static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; } -// -// static bool insertResultToColumn(IColumn & dest, const Element &, std::string_view last_key) -// { -// if (last_key.empty()) -// return false; -// ColumnString & col_str = assert_cast(dest); -// col_str.insertData(last_key.data(), last_key.size()); -// return true; -// } -//}; -// -// -//template -//class JSONTypeImpl -//{ -//public: -// using Element = typename JSONParser::Element; -// -// static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &) -// { -// static const std::vector> values = { -// {"Array", '['}, -// {"Object", '{'}, -// {"String", '"'}, -// {"Int64", 'i'}, -// {"UInt64", 'u'}, -// {"Double", 'd'}, -// {"Bool", 'b'}, -// {"Null", 0}, /// the default value for the column. -// }; -// return std::make_shared>(values); -// } -// -// static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; } -// -// static bool insertResultToColumn(IColumn & dest, const Element & element, std::string_view) -// { -// UInt8 type; -// switch (element.type()) -// { -// case ElementType::INT64: -// type = 'i'; -// break; -// case ElementType::UINT64: -// type = 'u'; -// break; -// case ElementType::DOUBLE: -// type = 'd'; -// break; -// case ElementType::STRING: -// type = '"'; -// break; -// case ElementType::ARRAY: -// type = '['; -// break; -// case ElementType::OBJECT: -// type = '{'; -// break; -// case ElementType::BOOL: -// type = 'b'; -// break; -// case ElementType::NULL_VALUE: -// type = 0; -// break; -// } -// -// ColumnVector & col_vec = assert_cast &>(dest); -// col_vec.insertValue(type); -// return true; -// } -//}; -// -// -//template -//class JSONExtractNumericImpl -//{ -//public: -// using Element = typename JSONParser::Element; -// -// static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &) -// { -// return std::make_shared>(); -// } -// -// static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; } -// -// static bool insertResultToColumn(IColumn & dest, const Element & element, std::string_view) -// { -// NumberType value; -// -// switch (element.type()) -// { -// case ElementType::DOUBLE: -// if constexpr (std::is_floating_point_v) -// { -// /// We permit inaccurate conversion of double to float. -// /// Example: double 0.1 from JSON is not representable in float. -// /// But it will be more convenient for user to perform conversion. -// value = static_cast(element.getDouble()); -// } -// else if (!accurate::convertNumeric(element.getDouble(), value)) -// return false; -// break; -// case ElementType::UINT64: -// if (!accurate::convertNumeric(element.getUInt64(), value)) -// return false; -// break; -// case ElementType::INT64: -// if (!accurate::convertNumeric(element.getInt64(), value)) -// return false; -// break; -// case ElementType::BOOL: -// if constexpr (is_integer && convert_bool_to_integer) -// { -// value = static_cast(element.getBool()); -// break; -// } -// return false; -// case ElementType::STRING: -// { -// auto rb = ReadBufferFromMemory{element.getString()}; -// if constexpr (std::is_floating_point_v) -// { -// if (!tryReadFloatText(value, rb) || !rb.eof()) -// return false; -// } -// else -// { -// if (tryReadIntText(value, rb) && rb.eof()) -// break; -// -// /// Try to parse float and convert it to integer. -// Float64 tmp_float; -// rb.position() = rb.buffer().begin(); -// if (!tryReadFloatText(tmp_float, rb) || !rb.eof()) -// return false; -// -// if (!accurate::convertNumeric(tmp_float, value)) -// return false; -// } -// break; -// } -// default: -// return false; -// } -// -// if (dest.getDataType() == TypeIndex::LowCardinality) -// { -// ColumnLowCardinality & col_low = assert_cast(dest); -// col_low.insertData(reinterpret_cast(&value), sizeof(value)); -// } -// else -// { -// auto & col_vec = assert_cast &>(dest); -// col_vec.insertValue(value); -// } -// return true; -// } -//}; -// -// -//template -//using JSONExtractInt64Impl = JSONExtractNumericImpl; -//template -//using JSONExtractUInt64Impl = JSONExtractNumericImpl; -//template -//using JSONExtractFloat64Impl = JSONExtractNumericImpl; -// -// -//template -//class JSONExtractBoolImpl -//{ -//public: -// using Element = typename JSONParser::Element; -// -// static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &) -// { -// return std::make_shared(); -// } -// -// static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; } -// -// static bool insertResultToColumn(IColumn & dest, const Element & element, std::string_view) -// { -// bool value; -// switch (element.type()) -// { -// case ElementType::BOOL: -// value = element.getBool(); -// break; -// case ElementType::INT64: -// value = element.getInt64() != 0; -// break; -// case ElementType::UINT64: -// value = element.getUInt64() != 0; -// break; -// default: -// return false; -// } -// -// auto & col_vec = assert_cast &>(dest); -// col_vec.insertValue(static_cast(value)); -// return true; -// } -//}; -// -//template -//class JSONExtractRawImpl; -// -//template -//class JSONExtractStringImpl -//{ -//public: -// using Element = typename JSONParser::Element; -// -// static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &) -// { -// return std::make_shared(); -// } -// -// static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; } -// -// static bool insertResultToColumn(IColumn & dest, const Element & element, std::string_view) -// { -// if (element.isNull()) -// return false; -// -// if (!element.isString()) -// return JSONExtractRawImpl::insertResultToColumn(dest, element, {}); -// -// auto str = element.getString(); -// -// if (dest.getDataType() == TypeIndex::LowCardinality) -// { -// ColumnLowCardinality & col_low = assert_cast(dest); -// col_low.insertData(str.data(), str.size()); -// } -// else -// { -// ColumnString & col_str = assert_cast(dest); -// col_str.insertData(str.data(), str.size()); -// } -// return true; -// } -//}; -// -// -//static const JSONExtractInsertSettings & getJSONExtractInsertSettings() -//{ -// static const JSONExtractInsertSettings instance = [] -// { -// JSONExtractInsertSettings settings; -// settings.insert_null_as_default = false; -// settings.insert_default_on_invalid_elements_in_complex_types = true; -// return settings; -// }(); -// return instance; -//} -// -//template -//class JSONExtractImpl -//{ -//public: -// using Element = typename JSONParser::Element; -// -// static DataTypePtr getReturnType(const char * function_name, const ColumnsWithTypeAndName & arguments) -// { -// if (arguments.size() < 2) -// throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Function {} requires at least two arguments", String(function_name)); -// -// const auto & col = arguments.back(); -// const auto * col_type_const = typeid_cast(col.column.get()); -// if (!col_type_const || !isString(col.type)) -// throw Exception(ErrorCodes::ILLEGAL_COLUMN, -// "The last argument of function {} should " -// "be a constant string specifying the return data type, illegal value: {}", -// String(function_name), col.name); -// -// return DataTypeFactory::instance().get(col_type_const->getValue()); -// } -// -// static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 2; } -// -// void prepare(const char * function_name, const ColumnsWithTypeAndName &, const DataTypePtr & result_type) -// { -// extract_tree = buildJSONExtractTree(result_type, function_name); -// } -// -// bool insertResultToColumn(IColumn & dest, const Element & element, std::string_view) -// { -// String error; -// return extract_tree->insertResultToColumn(dest, element, getJSONExtractInsertSettings(), error); -// } -// -//protected: -// std::unique_ptr> extract_tree; -//}; -// -// -//template -//class JSONExtractKeysAndValuesImpl -//{ -//public: -// using Element = typename JSONParser::Element; -// -// static DataTypePtr getReturnType(const char * function_name, const ColumnsWithTypeAndName & arguments) -// { -// if (arguments.size() < 2) -// throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Function {} requires at least two arguments", String(function_name)); -// -// const auto & col = arguments.back(); -// const auto * col_type_const = typeid_cast(col.column.get()); -// if (!col_type_const || !isString(col.type)) -// throw Exception(ErrorCodes::ILLEGAL_COLUMN, -// "The last argument of function {} should " -// "be a constant string specifying the values' data type, illegal value: {}", -// String(function_name), col.name); -// -// DataTypePtr key_type = std::make_unique(); -// DataTypePtr value_type = DataTypeFactory::instance().get(col_type_const->getValue()); -// DataTypePtr tuple_type = std::make_unique(DataTypes{key_type, value_type}); -// return std::make_unique(tuple_type); -// } -// -// static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 2; } -// -// void prepare(const char * function_name, const ColumnsWithTypeAndName &, const DataTypePtr & result_type) -// { -// const auto tuple_type = typeid_cast(result_type.get())->getNestedType(); -// const auto value_type = typeid_cast(tuple_type.get())->getElements()[1]; -// extract_tree = buildJSONExtractTree(value_type, function_name); -// } -// -// bool insertResultToColumn(IColumn & dest, const Element & element, std::string_view) -// { -// if (!element.isObject()) -// return false; -// -// auto object = element.getObject(); -// -// auto & col_arr = assert_cast(dest); -// auto & col_tuple = assert_cast(col_arr.getData()); -// size_t old_size = col_tuple.size(); -// auto & col_key = assert_cast(col_tuple.getColumn(0)); -// auto & col_value = col_tuple.getColumn(1); -// -// String error; -// for (const auto & [key, value] : object) -// { -// if (extract_tree->insertResultToColumn(col_value, value, getJSONExtractInsertSettings(), error)) -// col_key.insertData(key.data(), key.size()); -// } -// -// if (col_tuple.size() == old_size) -// return false; -// -// col_arr.getOffsets().push_back(col_tuple.size()); -// return true; -// } -// -//private: -// std::unique_ptr> extract_tree; -//}; -// -// -//template -//class JSONExtractRawImpl -//{ -//public: -// using Element = typename JSONParser::Element; -// -// static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &) -// { -// return std::make_shared(); -// } -// -// static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; } -// -// static bool insertResultToColumn(IColumn & dest, const Element & element, std::string_view) -// { -// if (dest.getDataType() == TypeIndex::LowCardinality) -// { -// ColumnString::Chars chars; -// WriteBufferFromVector buf(chars, AppendModeTag()); -// traverse(element, buf); -// buf.finalize(); -// assert_cast(dest).insertData(reinterpret_cast(chars.data()), chars.size()); -// } -// else -// { -// ColumnString & col_str = assert_cast(dest); -// auto & chars = col_str.getChars(); -// WriteBufferFromVector buf(chars, AppendModeTag()); -// traverse(element, buf); -// buf.finalize(); -// chars.push_back(0); -// col_str.getOffsets().push_back(chars.size()); -// } -// return true; -// } -// -// // We use insertResultToFixedStringColumn in case we are inserting raw data in a FixedString column -// static bool insertResultToFixedStringColumn(IColumn & dest, const Element & element, std::string_view) -// { -// ColumnFixedString::Chars chars; -// WriteBufferFromVector buf(chars, AppendModeTag()); -// traverse(element, buf); -// buf.finalize(); -// -// auto & col_str = assert_cast(dest); -// -// if (chars.size() > col_str.getN()) -// return false; -// -// chars.resize_fill(col_str.getN()); -// col_str.insertData(reinterpret_cast(chars.data()), chars.size()); -// -// -// return true; -// } -// -// // We use insertResultToLowCardinalityFixedStringColumn in case we are inserting raw data in a Low Cardinality FixedString column -// static bool insertResultToLowCardinalityFixedStringColumn(IColumn & dest, const Element & element, size_t fixed_length) -// { -// if (element.getObject().size() > fixed_length) -// return false; -// -// ColumnFixedString::Chars chars; -// WriteBufferFromVector buf(chars, AppendModeTag()); -// traverse(element, buf); -// buf.finalize(); -// -// if (chars.size() > fixed_length) -// return false; -// chars.resize_fill(fixed_length); -// assert_cast(dest).insertData(reinterpret_cast(chars.data()), chars.size()); -// -// return true; -// } -// -//private: -// static void traverse(const Element & element, WriteBuffer & buf) -// { -// if (element.isInt64()) -// { -// writeIntText(element.getInt64(), buf); -// return; -// } -// if (element.isUInt64()) -// { -// writeIntText(element.getUInt64(), buf); -// return; -// } -// if (element.isDouble()) -// { -// writeFloatText(element.getDouble(), buf); -// return; -// } -// if (element.isBool()) -// { -// if (element.getBool()) -// writeCString("true", buf); -// else -// writeCString("false", buf); -// return; -// } -// if (element.isString()) -// { -// writeJSONString(element.getString(), buf, formatSettings()); -// return; -// } -// if (element.isArray()) -// { -// writeChar('[', buf); -// bool need_comma = false; -// for (auto value : element.getArray()) -// { -// if (std::exchange(need_comma, true)) -// writeChar(',', buf); -// traverse(value, buf); -// } -// writeChar(']', buf); -// return; -// } -// if (element.isObject()) -// { -// writeChar('{', buf); -// bool need_comma = false; -// for (auto [key, value] : element.getObject()) -// { -// if (std::exchange(need_comma, true)) -// writeChar(',', buf); -// writeJSONString(key, buf, formatSettings()); -// writeChar(':', buf); -// traverse(value, buf); -// } -// writeChar('}', buf); -// return; -// } -// if (element.isNull()) -// { -// writeCString("null", buf); -// return; -// } -// } -// -// static const FormatSettings & formatSettings() -// { -// static const FormatSettings the_instance = [] -// { -// FormatSettings settings; -// settings.json.escape_forward_slashes = false; -// return settings; -// }(); -// return the_instance; -// } -//}; -// -// -//template -//class JSONExtractArrayRawImpl -//{ -//public: -// using Element = typename JSONParser::Element; -// -// static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &) -// { -// return std::make_shared(std::make_shared()); -// } -// -// static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; } -// -// static bool insertResultToColumn(IColumn & dest, const Element & element, std::string_view) -// { -// if (!element.isArray()) -// return false; -// -// auto array = element.getArray(); -// ColumnArray & col_res = assert_cast(dest); -// -// for (auto value : array) -// JSONExtractRawImpl::insertResultToColumn(col_res.getData(), value, {}); -// -// col_res.getOffsets().push_back(col_res.getOffsets().back() + array.size()); -// return true; -// } -//}; -// -// -//template -//class JSONExtractKeysAndValuesRawImpl -//{ -//public: -// using Element = typename JSONParser::Element; -// -// static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &) -// { -// DataTypePtr string_type = std::make_unique(); -// DataTypePtr tuple_type = std::make_unique(DataTypes{string_type, string_type}); -// return std::make_unique(tuple_type); -// } -// -// static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; } -// -// bool insertResultToColumn(IColumn & dest, const Element & element, std::string_view) -// { -// if (!element.isObject()) -// return false; -// -// auto object = element.getObject(); -// -// auto & col_arr = assert_cast(dest); -// auto & col_tuple = assert_cast(col_arr.getData()); -// auto & col_key = assert_cast(col_tuple.getColumn(0)); -// auto & col_value = assert_cast(col_tuple.getColumn(1)); -// -// for (const auto & [key, value] : object) -// { -// col_key.insertData(key.data(), key.size()); -// JSONExtractRawImpl::insertResultToColumn(col_value, value, {}); -// } -// -// col_arr.getOffsets().push_back(col_arr.getOffsets().back() + object.size()); -// return true; -// } -//}; -// -//template -//class JSONExtractKeysImpl -//{ -//public: -// using Element = typename JSONParser::Element; -// -// static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &) -// { -// return std::make_unique(std::make_shared()); -// } -// -// static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; } -// -// bool insertResultToColumn(IColumn & dest, const Element & element, std::string_view) -// { -// if (!element.isObject()) -// return false; -// -// auto object = element.getObject(); -// -// ColumnArray & col_res = assert_cast(dest); -// auto & col_key = assert_cast(col_res.getData()); -// -// for (const auto & [key, value] : object) -// { -// col_key.insertData(key.data(), key.size()); -// } -// -// col_res.getOffsets().push_back(col_res.getOffsets().back() + object.size()); -// return true; -// } -//}; -// -//} From 63303dd79893ace08ce2ed4be6bfee422287d44b Mon Sep 17 00:00:00 2001 From: avogar Date: Wed, 3 Jul 2024 14:03:04 +0000 Subject: [PATCH 090/299] Fix style --- src/Formats/JSONExtractTree.cpp | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/Formats/JSONExtractTree.cpp b/src/Formats/JSONExtractTree.cpp index 6d019f96ba6..18437c16bc9 100644 --- a/src/Formats/JSONExtractTree.cpp +++ b/src/Formats/JSONExtractTree.cpp @@ -50,6 +50,11 @@ namespace DB { +namespace ErrorCodes +{ + extern const int ILLEGAL_TYPE_OF_ARGUMENT; +} + template void jsonElementToString(const typename JSONParser::Element & element, WriteBuffer & buf, const FormatSettings & format_settings) { @@ -207,7 +212,7 @@ namespace { template -String jsonElementToString(const typename JSONParser::Element & element, const FormatSettings & format_settings) +String jsonElementToString(const typename JSONParser::Element & element, const FormatSettings & format_settings) { WriteBufferFromOwnString buf; jsonElementToString(element, buf, format_settings); @@ -1440,7 +1445,7 @@ std::unique_ptr> buildJSONExtractTree(const Data case TypeIndex::Date:; return std::make_unique>(); case TypeIndex::Date32: - return std::make_unique>(); + return std::make_unique>(); case TypeIndex::DateTime: return std::make_unique>(assert_cast(*type)); case TypeIndex::DateTime64: From 0b909ed673185cf1cc410a7b5339d264e8993cee Mon Sep 17 00:00:00 2001 From: Dale McDiarmid Date: Wed, 3 Jul 2024 15:14:55 +0100 Subject: [PATCH 091/299] Update json.md Point to guide for json --- docs/en/sql-reference/data-types/json.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/docs/en/sql-reference/data-types/json.md b/docs/en/sql-reference/data-types/json.md index c29be2cff58..9e9c3fdb489 100644 --- a/docs/en/sql-reference/data-types/json.md +++ b/docs/en/sql-reference/data-types/json.md @@ -5,6 +5,9 @@ sidebar_label: Object Data Type keywords: [object, data type] --- +:::note +**ClickHouse handles JSON documents in a variety of way, allowing you to bring structure to this data and query it quickly and efficiently. See [this guide](/docs/en/integrations/data-formats/json) for detailed guidance on working with JSON.**::: + # Object Data Type :::note From d03fcb5ff121203f9cd6bf729df98764593328fe Mon Sep 17 00:00:00 2001 From: avogar Date: Wed, 3 Jul 2024 14:23:38 +0000 Subject: [PATCH 092/299] Fix --- src/Formats/SchemaInferenceUtils.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Formats/SchemaInferenceUtils.cpp b/src/Formats/SchemaInferenceUtils.cpp index 6519d54a8c5..f2ad1dc6717 100644 --- a/src/Formats/SchemaInferenceUtils.cpp +++ b/src/Formats/SchemaInferenceUtils.cpp @@ -271,7 +271,7 @@ namespace { if (WhichDataType(type).isInt64()) { - bool is_negative = json_info->negative_integers.contains(type.get()); + bool is_negative = json_info && json_info->negative_integers.contains(type.get()); have_negative_integers |= is_negative; if (!is_negative) type = std::make_shared(); From 6530ae104d16ffbda51cf849b5f89e3d4080d2af Mon Sep 17 00:00:00 2001 From: avogar Date: Wed, 3 Jul 2024 15:23:01 +0000 Subject: [PATCH 093/299] Fix tests --- src/Formats/SchemaInferenceUtils.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Formats/SchemaInferenceUtils.cpp b/src/Formats/SchemaInferenceUtils.cpp index f2ad1dc6717..3c374ada9e6 100644 --- a/src/Formats/SchemaInferenceUtils.cpp +++ b/src/Formats/SchemaInferenceUtils.cpp @@ -296,7 +296,7 @@ namespace if (which.isInt64() || which.isUInt64()) { auto new_type = std::make_shared(); - if (json_info->numbers_parsed_from_json_strings.erase(type.get())) + if (json_info && json_info->numbers_parsed_from_json_strings.erase(type.get())) json_info->numbers_parsed_from_json_strings.insert(new_type.get()); type = new_type; } From a5adf31b9e4dfa041150fa263ab68f32cb47122c Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Wed, 3 Jul 2024 19:30:36 +0200 Subject: [PATCH 094/299] Fix special build --- src/Formats/JSONExtractTree.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Formats/JSONExtractTree.cpp b/src/Formats/JSONExtractTree.cpp index 18437c16bc9..b94981e7cb4 100644 --- a/src/Formats/JSONExtractTree.cpp +++ b/src/Formats/JSONExtractTree.cpp @@ -1465,9 +1465,9 @@ std::unique_ptr> buildJSONExtractTree(const Data case TypeIndex::LowCardinality: { /// To optimize inserting into LowCardinality we have special nodes for LowCardinality of numeric and string types. - auto lc_type = typeid_cast(type.get()); - auto dictionary_type = removeNullable(lc_type->getDictionaryType()); - bool is_nullable = lc_type->isLowCardinalityNullable(); + const auto & lc_type = assert_cast(*type)); + auto dictionary_type = removeNullable(lc_type.getDictionaryType()); + bool is_nullable = lc_type.isLowCardinalityNullable(); switch (dictionary_type->getTypeId()) { From bbbf6cd6f9fef235a29fcb059bdd1a6833fe146a Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Wed, 3 Jul 2024 19:41:05 +0100 Subject: [PATCH 095/299] remove todo --- src/Interpreters/HashTablesStatistics.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Interpreters/HashTablesStatistics.h b/src/Interpreters/HashTablesStatistics.h index 73dd3c097d4..7b4c4fcbfeb 100644 --- a/src/Interpreters/HashTablesStatistics.h +++ b/src/Interpreters/HashTablesStatistics.h @@ -27,7 +27,7 @@ struct StatsCollectingParams void disable() { key = 0; } UInt64 key = 0; - const size_t max_entries_for_hash_table_stats = 0; /// TODO: move to server settings + const size_t max_entries_for_hash_table_stats = 0; const size_t max_size_to_preallocate = 0; }; From 8cb7936838357783b702fbb461c22a7178d34bf3 Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Wed, 3 Jul 2024 19:41:26 +0100 Subject: [PATCH 096/299] dedicated setting for joins --- src/Core/Settings.h | 3 +++ src/Core/SettingsChangesHistory.cpp | 2 ++ src/Planner/PlannerJoins.cpp | 4 ++-- 3 files changed, 7 insertions(+), 2 deletions(-) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index b1bb6edfc38..898dd02c655 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -709,6 +709,9 @@ class IColumn; M(Bool, collect_hash_table_stats_during_aggregation, true, "Enable collecting hash table statistics to optimize memory allocation", 0) \ M(UInt64, max_size_to_preallocate_for_aggregation, 100'000'000, "For how many elements it is allowed to preallocate space in all hash tables in total before aggregation", 0) \ \ + M(Bool, collect_hash_table_stats_during_joins, true, "Enable collecting hash table statistics to optimize memory allocation", 0) \ + M(UInt64, max_size_to_preallocate_for_joins, 100'000'000, "For how many elements it is allowed to preallocate space in all hash tables in total before join", 0) \ + \ M(Bool, kafka_disable_num_consumers_limit, false, "Disable limit on kafka_num_consumers that depends on the number of available CPU cores", 0) \ M(Bool, enable_software_prefetch_in_aggregation, true, "Enable use of software prefetch in aggregation", 0) \ M(Bool, allow_aggregate_partitions_independently, false, "Enable independent aggregation of partitions on separate threads when partition key suits group by key. Beneficial when number of partitions close to number of cores and partitions have roughly the same size", 0) \ diff --git a/src/Core/SettingsChangesHistory.cpp b/src/Core/SettingsChangesHistory.cpp index b0725340f46..b3bbd9d424a 100644 --- a/src/Core/SettingsChangesHistory.cpp +++ b/src/Core/SettingsChangesHistory.cpp @@ -59,6 +59,8 @@ static std::initializer_list tryCreateJoin(JoinAlgorithm algorithm, const auto & settings = query_context->getSettingsRef(); StatsCollectingParams params{ calculateCacheKey(table_join, right_table_expression), - settings.collect_hash_table_stats_during_aggregation, + settings.collect_hash_table_stats_during_joins, query_context->getServerSettings().max_entries_for_hash_table_stats, - settings.max_size_to_preallocate_for_aggregation}; + settings.max_size_to_preallocate_for_joins}; return std::make_shared( query_context, table_join, query_context->getSettings().max_threads, right_table_expression_header, params); } From a61907e96e3c65e85406b680a8ede75f4c4a8015 Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Wed, 3 Jul 2024 20:46:36 +0100 Subject: [PATCH 097/299] better test --- .../0_stateless/03173_parallel_replicas_join_bug.reference | 5 ++++- .../queries/0_stateless/03173_parallel_replicas_join_bug.sh | 6 ++++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/03173_parallel_replicas_join_bug.reference b/tests/queries/0_stateless/03173_parallel_replicas_join_bug.reference index b23d6b02bc1..93018551e1b 100644 --- a/tests/queries/0_stateless/03173_parallel_replicas_join_bug.reference +++ b/tests/queries/0_stateless/03173_parallel_replicas_join_bug.reference @@ -1,7 +1,10 @@ a1451105-722e-4fe7-bfaa-65ad2ae249c2 whatever a1451105-722e-4fe7-bfaa-65ad2ae249c2 whatever +--------------------------- a1451105-722e-4fe7-bfaa-65ad2ae249c2 a1451105-722e-4fe7-bfaa-65ad2ae249c2 -a1451105-722e-4fe7-bfaa-65ad2ae249c2 +--------------------------- +a1451105-722e-4fe7-bfaa-65ad2ae249c2 +--------------------------- a1451105-722e-4fe7-bfaa-65ad2ae249c2 a1451105-722e-4fe7-bfaa-65ad2ae249c2 diff --git a/tests/queries/0_stateless/03173_parallel_replicas_join_bug.sh b/tests/queries/0_stateless/03173_parallel_replicas_join_bug.sh index 4638609b00c..20a29e2734e 100755 --- a/tests/queries/0_stateless/03173_parallel_replicas_join_bug.sh +++ b/tests/queries/0_stateless/03173_parallel_replicas_join_bug.sh @@ -30,6 +30,8 @@ INNER JOIN view( WHERE status IN ['CREATED', 'CREATING'] ORDER BY event_time DESC; +SELECT '---------------------------'; + with results1 as ( SELECT id @@ -45,6 +47,8 @@ results2 as ( ) select * from results1 union all select * from results2; +SELECT '---------------------------'; + with results1 as ( SELECT id @@ -60,6 +64,8 @@ results2 as ( ) select * from results1 t1 inner join results2 t2 using (id); +SELECT '---------------------------'; + with results1 as ( SELECT t1.id From 37851686ad20edf14a736e676f9d364818733c65 Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Wed, 3 Jul 2024 20:47:07 +0100 Subject: [PATCH 098/299] better variable name --- src/Planner/PlannerJoinTree.cpp | 5 +++-- src/Storages/SelectQueryInfo.h | 2 +- src/Storages/StorageMergeTree.cpp | 3 ++- src/Storages/StorageReplicatedMergeTree.cpp | 3 ++- 4 files changed, 8 insertions(+), 5 deletions(-) diff --git a/src/Planner/PlannerJoinTree.cpp b/src/Planner/PlannerJoinTree.cpp index 0af7bfea0b0..6c4fcfa345b 100644 --- a/src/Planner/PlannerJoinTree.cpp +++ b/src/Planner/PlannerJoinTree.cpp @@ -647,7 +647,8 @@ JoinTreeQueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expres auto table_expression_query_info = select_query_info; table_expression_query_info.table_expression = table_expression; table_expression_query_info.filter_actions_dag = table_expression_data.getFilterActions(); - table_expression_query_info.analyzer_can_use_parallel_replicas_on_follower = table_node == planner_context->getGlobalPlannerContext()->parallel_replicas_table; + table_expression_query_info.current_table_chosen_for_reading_with_parallel_replicas + = table_node == planner_context->getGlobalPlannerContext()->parallel_replicas_table; size_t max_streams = settings.max_threads; size_t max_threads_execute_query = settings.max_threads; @@ -862,7 +863,7 @@ JoinTreeQueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expres /// and find some other table that might be used for reading with parallel replicas. It will lead to errors. const bool other_table_already_chosen_for_reading_with_parallel_replicas = planner_context->getGlobalPlannerContext()->parallel_replicas_table - && !table_expression_query_info.analyzer_can_use_parallel_replicas_on_follower; + && !table_expression_query_info.current_table_chosen_for_reading_with_parallel_replicas; if (other_table_already_chosen_for_reading_with_parallel_replicas) planner_context->getMutableQueryContext()->setSetting("allow_experimental_parallel_reading_from_replicas", Field(0)); diff --git a/src/Storages/SelectQueryInfo.h b/src/Storages/SelectQueryInfo.h index 52b6674c93d..5662f057bd4 100644 --- a/src/Storages/SelectQueryInfo.h +++ b/src/Storages/SelectQueryInfo.h @@ -163,7 +163,7 @@ struct SelectQueryInfo /// It's guaranteed to be present in JOIN TREE of `query_tree` QueryTreeNodePtr table_expression; - bool analyzer_can_use_parallel_replicas_on_follower = false; + bool current_table_chosen_for_reading_with_parallel_replicas = false; /// Table expression modifiers for storage std::optional table_expression_modifiers; diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp index 9352f772ce1..8c24a9c191f 100644 --- a/src/Storages/StorageMergeTree.cpp +++ b/src/Storages/StorageMergeTree.cpp @@ -225,7 +225,8 @@ void StorageMergeTree::read( { const bool enable_parallel_reading = local_context->canUseParallelReplicasOnFollower() && local_context->getSettingsRef().parallel_replicas_for_non_replicated_merge_tree - && (!local_context->getSettingsRef().allow_experimental_analyzer || query_info.analyzer_can_use_parallel_replicas_on_follower); + && (!local_context->getSettingsRef().allow_experimental_analyzer + || query_info.current_table_chosen_for_reading_with_parallel_replicas); if (auto plan = reader.read( column_names, diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index db58d0081c6..480cdbb8c66 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -5508,7 +5508,8 @@ void StorageReplicatedMergeTree::readLocalImpl( const size_t num_streams) { const bool enable_parallel_reading = local_context->canUseParallelReplicasOnFollower() - && (!local_context->getSettingsRef().allow_experimental_analyzer || query_info.analyzer_can_use_parallel_replicas_on_follower); + && (!local_context->getSettingsRef().allow_experimental_analyzer + || query_info.current_table_chosen_for_reading_with_parallel_replicas); auto plan = reader.read( column_names, storage_snapshot, query_info, From 41b9216dd1d862b46ed72d50e899197e2fec9daa Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Thu, 4 Jul 2024 00:22:41 +0200 Subject: [PATCH 099/299] Fix build --- src/Formats/JSONExtractTree.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Formats/JSONExtractTree.cpp b/src/Formats/JSONExtractTree.cpp index b94981e7cb4..827f276311a 100644 --- a/src/Formats/JSONExtractTree.cpp +++ b/src/Formats/JSONExtractTree.cpp @@ -1465,7 +1465,7 @@ std::unique_ptr> buildJSONExtractTree(const Data case TypeIndex::LowCardinality: { /// To optimize inserting into LowCardinality we have special nodes for LowCardinality of numeric and string types. - const auto & lc_type = assert_cast(*type)); + const auto & lc_type = assert_cast(*type); auto dictionary_type = removeNullable(lc_type.getDictionaryType()); bool is_nullable = lc_type.isLowCardinalityNullable(); From 7be481decf23d39afbb1fac9ced2d67d391a859c Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Wed, 3 Jul 2024 18:01:14 +0200 Subject: [PATCH 100/299] init --- src/Interpreters/DatabaseCatalog.cpp | 220 +++++++++++++------- src/Interpreters/DatabaseCatalog.h | 11 +- src/Interpreters/InterpreterUndropQuery.cpp | 2 +- 3 files changed, 155 insertions(+), 78 deletions(-) diff --git a/src/Interpreters/DatabaseCatalog.cpp b/src/Interpreters/DatabaseCatalog.cpp index aaec94a4fb0..0a71a842452 100644 --- a/src/Interpreters/DatabaseCatalog.cpp +++ b/src/Interpreters/DatabaseCatalog.cpp @@ -1,5 +1,7 @@ +#include #include #include +#include #include #include #include @@ -26,6 +28,7 @@ #include #include #include +#include #include @@ -190,6 +193,7 @@ void DatabaseCatalog::initializeAndLoadTemporaryDatabase() unused_dir_rm_timeout_sec = getContext()->getConfigRef().getInt64("database_catalog_unused_dir_rm_timeout_sec", unused_dir_rm_timeout_sec); unused_dir_cleanup_period_sec = getContext()->getConfigRef().getInt64("database_catalog_unused_dir_cleanup_period_sec", unused_dir_cleanup_period_sec); drop_error_cooldown_sec = getContext()->getConfigRef().getInt64("database_catalog_drop_error_cooldown_sec", drop_error_cooldown_sec); + drop_table_concurrency = getContext()->getConfigRef().getInt64("database_catalog_drop_table_concurrency", drop_table_concurrency); auto db_for_temporary_and_external_tables = std::make_shared(TEMPORARY_DATABASE, getContext()); attachDatabase(TEMPORARY_DATABASE, db_for_temporary_and_external_tables); @@ -1141,7 +1145,7 @@ void DatabaseCatalog::enqueueDroppedTableCleanup(StorageID table_id, StoragePtr (*drop_task)->schedule(); } -void DatabaseCatalog::dequeueDroppedTableCleanup(StorageID table_id) +void DatabaseCatalog::undropTable(StorageID table_id) { String latest_metadata_dropped_path; TableMarkedAsDropped dropped_table; @@ -1216,91 +1220,155 @@ void DatabaseCatalog::dequeueDroppedTableCleanup(StorageID table_id) LOG_INFO(log, "Table {} was successfully undropped.", dropped_table.table_id.getNameForLogs()); } +std::tuple DatabaseCatalog::getDroppedTablesCountAndInuseCount() +{ + std::lock_guard lock(tables_marked_dropped_mutex); + + size_t in_use_count = 0; + for (const auto & item : tables_marked_dropped) + { + bool in_use = item.table && !item.table.unique(); + in_use_count += in_use; + } + return {tables_marked_dropped.size(), in_use_count}; +} + +time_t DatabaseCatalog::getMinDropTime() +{ + time_t min_drop_time = std::numeric_limits::max(); + for (const auto & item : tables_marked_dropped) + { + min_drop_time = std::min(min_drop_time, item.drop_time); + } + return min_drop_time; +} + +DatabaseCatalog::TablesMarkedAsDropped DatabaseCatalog::getTablesToDrop() +{ + time_t current_time = std::chrono::system_clock::to_time_t(std::chrono::system_clock::now()); + DatabaseCatalog::TablesMarkedAsDropped result; + + std::lock_guard lock(tables_marked_dropped_mutex); + + auto it = tables_marked_dropped.begin(); + while (it != tables_marked_dropped.end()) + { + bool in_use = it->table && !it->table.unique(); + bool old_enough = it->drop_time <= current_time; + LOG_DEBUG(log, "check {}: in_use {}, old_enough {}", it->table_id.getFullTableName(), in_use, old_enough); + + if (in_use || !old_enough) + { + ++it; + continue; + } + + if (it == first_async_drop_in_queue) + ++first_async_drop_in_queue; + + result.emplace_back(std::move(*it)); + it = tables_marked_dropped.erase(it); + } + + return result; +} + +void DatabaseCatalog::rescheduleDropTableTask() +{ + std::lock_guard lock(tables_marked_dropped_mutex); + + if (tables_marked_dropped.empty()) + return; + + if (first_async_drop_in_queue != tables_marked_dropped.begin()) + { + (*drop_task)->scheduleAfter(0); + return; + } + + time_t current_time = std::chrono::system_clock::to_time_t(std::chrono::system_clock::now()); + auto min_drop_time = getMinDropTime(); + time_t schedule_after = min_drop_time < current_time ? (min_drop_time - current_time) * 1000 : 0; + (*drop_task)->scheduleAfter(schedule_after); +} + +void DatabaseCatalog::dropTablesParallel(TablesMarkedAsDropped tables_to_drop) +{ + if (tables_to_drop.empty()) + return; + + SCOPE_EXIT({ + std::lock_guard lock(tables_marked_dropped_mutex); + if (first_async_drop_in_queue == tables_marked_dropped.end()) + first_async_drop_in_queue = tables_to_drop.begin(); + + tables_marked_dropped.splice(tables_marked_dropped.end(), tables_to_drop); + }); + + ThreadPool pool( + CurrentMetrics::DatabaseCatalogThreads, + CurrentMetrics::DatabaseCatalogThreadsActive, + CurrentMetrics::DatabaseCatalogThreadsScheduled, + /* max_threads */drop_table_concurrency, + /* max_free_threads */0, + /* queue_size */tables_to_drop.size()); + + while (!tables_to_drop.empty()) + { + auto front_table = std::move(tables_to_drop.front()); + tables_to_drop.pop_front(); + + pool.scheduleOrThrowOnError([&, table = front_table] () + { + try + { + dropTableFinally(table); + std::lock_guard lock(tables_marked_dropped_mutex); + [[maybe_unused]] auto removed = tables_marked_dropped_ids.erase(table.table_id.uuid); + chassert(removed); + wait_table_finally_dropped.notify_all(); + } + catch (...) + { + tryLogCurrentException(log, "Cannot drop table " + table.table_id.getNameForLogs() + + ". Will retry later."); + { + std::lock_guard lock(tables_marked_dropped_mutex); + tables_marked_dropped.emplace_back(table); + tables_marked_dropped.back().drop_time = std::chrono::system_clock::to_time_t(std::chrono::system_clock::now()) + drop_error_cooldown_sec; + if (first_async_drop_in_queue == tables_marked_dropped.end()) + --first_async_drop_in_queue; + } + } + }); + } + + pool.wait(); +} + void DatabaseCatalog::dropTableDataTask() { /// Background task that removes data of tables which were marked as dropped by Atomic databases. /// Table can be removed when it's not used by queries and drop_delay_sec elapsed since it was marked as dropped. - bool need_reschedule = true; - /// Default reschedule time for the case when we are waiting for reference count to become 1. - size_t schedule_after_ms = reschedule_time_ms; - TableMarkedAsDropped table; - try + auto [drop_tables_count, drop_tables_in_use_count] = getDroppedTablesCountAndInuseCount(); + + auto tables_to_drop = getTablesToDrop(); + + if (!tables_to_drop.empty()) { - std::lock_guard lock(tables_marked_dropped_mutex); - if (tables_marked_dropped.empty()) - return; - time_t current_time = std::chrono::system_clock::to_time_t(std::chrono::system_clock::now()); - time_t min_drop_time = std::numeric_limits::max(); - size_t tables_in_use_count = 0; - auto it = std::find_if(tables_marked_dropped.begin(), tables_marked_dropped.end(), [&](const auto & elem) - { - bool not_in_use = !elem.table || elem.table.unique(); - bool old_enough = elem.drop_time <= current_time; - min_drop_time = std::min(min_drop_time, elem.drop_time); - tables_in_use_count += !not_in_use; - return not_in_use && old_enough; - }); - if (it != tables_marked_dropped.end()) - { - table = std::move(*it); - LOG_INFO(log, "Have {} tables in drop queue ({} of them are in use), will try drop {}", - tables_marked_dropped.size(), tables_in_use_count, table.table_id.getNameForLogs()); - if (first_async_drop_in_queue == it) - ++first_async_drop_in_queue; - tables_marked_dropped.erase(it); - /// Schedule the task as soon as possible, while there are suitable tables to drop. - schedule_after_ms = 0; - } - else if (current_time < min_drop_time) - { - /// We are waiting for drop_delay_sec to exceed, no sense to wakeup until min_drop_time. - /// If new table is added to the queue with ignore_delay flag, schedule() is called to wakeup the task earlier. - schedule_after_ms = (min_drop_time - current_time) * 1000; - LOG_TRACE(log, "Not found any suitable tables to drop, still have {} tables in drop queue ({} of them are in use). " - "Will check again after {} seconds", tables_marked_dropped.size(), tables_in_use_count, min_drop_time - current_time); - } - need_reschedule = !tables_marked_dropped.empty(); + LOG_INFO(log, "Have {} tables in drop queue ({} of them are in use), will try drop {} tables", + drop_tables_count, drop_tables_in_use_count, tables_to_drop.size()); + + dropTablesParallel(tables_to_drop); } - catch (...) + else { - tryLogCurrentException(log, __PRETTY_FUNCTION__); + LOG_TRACE(log, "Not found any suitable tables to drop, still have {} tables in drop queue ({} of them are in use). " + "Will check again later", drop_tables_count, drop_tables_in_use_count); } - if (table.table_id) - { - try - { - dropTableFinally(table); - std::lock_guard lock(tables_marked_dropped_mutex); - [[maybe_unused]] auto removed = tables_marked_dropped_ids.erase(table.table_id.uuid); - assert(removed); - } - catch (...) - { - tryLogCurrentException(log, "Cannot drop table " + table.table_id.getNameForLogs() + - ". Will retry later."); - { - table.drop_time = std::chrono::system_clock::to_time_t(std::chrono::system_clock::now()) + drop_error_cooldown_sec; - std::lock_guard lock(tables_marked_dropped_mutex); - tables_marked_dropped.emplace_back(std::move(table)); - if (first_async_drop_in_queue == tables_marked_dropped.end()) - --first_async_drop_in_queue; - /// If list of dropped tables was empty, schedule a task to retry deletion. - if (tables_marked_dropped.size() == 1) - { - need_reschedule = true; - schedule_after_ms = drop_error_cooldown_sec * 1000; - } - } - } - - wait_table_finally_dropped.notify_all(); - } - - /// Do not schedule a task if there is no tables to drop - if (need_reschedule) - (*drop_task)->scheduleAfter(schedule_after_ms); + rescheduleDropTableTask(); } void DatabaseCatalog::dropTableFinally(const TableMarkedAsDropped & table) diff --git a/src/Interpreters/DatabaseCatalog.h b/src/Interpreters/DatabaseCatalog.h index 17d34e96245..f9b8233e85a 100644 --- a/src/Interpreters/DatabaseCatalog.h +++ b/src/Interpreters/DatabaseCatalog.h @@ -225,7 +225,7 @@ public: String getPathForDroppedMetadata(const StorageID & table_id) const; String getPathForMetadata(const StorageID & table_id) const; void enqueueDroppedTableCleanup(StorageID table_id, StoragePtr table, String dropped_metadata_path, bool ignore_delay = false); - void dequeueDroppedTableCleanup(StorageID table_id); + void undropTable(StorageID table_id); void waitTableFinallyDropped(const UUID & uuid); @@ -296,6 +296,12 @@ private: void dropTableDataTask(); void dropTableFinally(const TableMarkedAsDropped & table); + time_t getMinDropTime() TSA_REQUIRES(tables_marked_dropped_mutex); + std::tuple getDroppedTablesCountAndInuseCount(); + TablesMarkedAsDropped getTablesToDrop(); + void dropTablesParallel(TablesMarkedAsDropped tables); + void rescheduleDropTableTask(); + void cleanupStoreDirectoryTask(); bool maybeRemoveDirectory(const String & disk_name, const DiskPtr & disk, const String & unused_dir); @@ -363,6 +369,9 @@ private: static constexpr time_t default_drop_error_cooldown_sec = 5; time_t drop_error_cooldown_sec = default_drop_error_cooldown_sec; + static constexpr size_t default_drop_table_concurrency = 10; + size_t drop_table_concurrency = default_drop_table_concurrency; + std::unique_ptr reload_disks_task; std::mutex reload_disks_mutex; std::set disks_to_reload; diff --git a/src/Interpreters/InterpreterUndropQuery.cpp b/src/Interpreters/InterpreterUndropQuery.cpp index 920df3d6aed..8f935e951ef 100644 --- a/src/Interpreters/InterpreterUndropQuery.cpp +++ b/src/Interpreters/InterpreterUndropQuery.cpp @@ -64,7 +64,7 @@ BlockIO InterpreterUndropQuery::executeToTable(ASTUndropQuery & query) database->checkMetadataFilenameAvailability(table_id.table_name); - DatabaseCatalog::instance().dequeueDroppedTableCleanup(table_id); + DatabaseCatalog::instance().undropTable(table_id); return {}; } From 3776fafc881bf4725bfbb356e6e81df66ad336b6 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Thu, 4 Jul 2024 13:44:44 +0000 Subject: [PATCH 101/299] Print stacktrace in case of about after logical error. --- src/Common/Exception.cpp | 9 ++++++--- src/Common/StackTrace.cpp | 2 +- src/Common/StackTrace.h | 2 +- 3 files changed, 8 insertions(+), 5 deletions(-) diff --git a/src/Common/Exception.cpp b/src/Common/Exception.cpp index 1f4b0aea8f2..181b4f1488e 100644 --- a/src/Common/Exception.cpp +++ b/src/Common/Exception.cpp @@ -38,9 +38,12 @@ namespace ErrorCodes extern const int CANNOT_MREMAP; } -void abortOnFailedAssertion(const String & description) +void abortOnFailedAssertion(const String & description, const Exception::FramePointers * trace = nullptr) { - LOG_FATAL(&Poco::Logger::root(), "Logical error: '{}'.", description); + auto & logger = Poco::Logger::root(); + LOG_FATAL(&logger, "Logical error: '{}'.", description); + if (trace) + LOG_FATAL(&logger, "Stack trace (when copying this message, always include the lines below):\n\n{}", StackTrace::toString(trace->data(), 0, trace->size())); abort(); } @@ -58,7 +61,7 @@ void handle_error_code(const std::string & msg, int code, bool remote, const Exc #ifdef ABORT_ON_LOGICAL_ERROR if (code == ErrorCodes::LOGICAL_ERROR) { - abortOnFailedAssertion(msg); + abortOnFailedAssertion(msg, &trace); } #endif diff --git a/src/Common/StackTrace.cpp b/src/Common/StackTrace.cpp index 239e957bdfe..34f6f0b7535 100644 --- a/src/Common/StackTrace.cpp +++ b/src/Common/StackTrace.cpp @@ -545,7 +545,7 @@ std::string StackTrace::toString() const return toStringCached(frame_pointers, offset, size); } -std::string StackTrace::toString(void ** frame_pointers_raw, size_t offset, size_t size) +std::string StackTrace::toString(void * const * frame_pointers_raw, size_t offset, size_t size) { __msan_unpoison(frame_pointers_raw, size * sizeof(*frame_pointers_raw)); diff --git a/src/Common/StackTrace.h b/src/Common/StackTrace.h index 4ce9a9281f3..2078828f3d7 100644 --- a/src/Common/StackTrace.h +++ b/src/Common/StackTrace.h @@ -59,7 +59,7 @@ public: const FramePointers & getFramePointers() const { return frame_pointers; } std::string toString() const; - static std::string toString(void ** frame_pointers, size_t offset, size_t size); + static std::string toString(void * const * frame_pointers, size_t offset, size_t size); static void dropCache(); /// @param fatal - if true, will process inline frames (slower) From 4271b2b6e3d6940603ef0d1836fbabf42b092d65 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Thu, 4 Jul 2024 16:29:32 +0000 Subject: [PATCH 102/299] Add noreturn/ --- src/Common/Exception.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Common/Exception.cpp b/src/Common/Exception.cpp index 181b4f1488e..07bda6a75be 100644 --- a/src/Common/Exception.cpp +++ b/src/Common/Exception.cpp @@ -38,7 +38,7 @@ namespace ErrorCodes extern const int CANNOT_MREMAP; } -void abortOnFailedAssertion(const String & description, const Exception::FramePointers * trace = nullptr) +[[noreturn]] void abortOnFailedAssertion(const String & description, const Exception::FramePointers * trace = nullptr) { auto & logger = Poco::Logger::root(); LOG_FATAL(&logger, "Logical error: '{}'.", description); From 163f4c2199983f719aed3b859534b58548b57800 Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Thu, 4 Jul 2024 18:48:59 +0200 Subject: [PATCH 103/299] Better docs --- .../data-types/data-types-binary-encoding.md | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/docs/en/sql-reference/data-types/data-types-binary-encoding.md b/docs/en/sql-reference/data-types/data-types-binary-encoding.md index d7eddf848d6..c9720da0f1c 100644 --- a/docs/en/sql-reference/data-types/data-types-binary-encoding.md +++ b/docs/en/sql-reference/data-types/data-types-binary-encoding.md @@ -7,6 +7,11 @@ sidebar_label: Data types binary encoding specification. # Data types binary encoding specification +This specification describes the binary format that can be used for binary encoding and decoding of ClickHouse data types. This format is used in `Dynamic` column [binary serialization](dynamic.md#binary-output-format) and can be used in input/output formats [RowBinaryWithNamesAndTypes](../../interfaces/formats.md#rowbinarywithnamesandtypes) and [Native](../../interfaces/formats.md#native) under corresponding settings. + +The table below describes how each data type is represented in bunary format. Each data type encoding consist of 1 byte that indicates the type and some optional additional information. +`var_uint` in the binary encoding means that the size is encoded using Variable-Length Quantity compression. + | ClickHouse data type | Binary encoding | |--------------------------------------------------------------------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| | `Nothing` | `0x00` | @@ -61,6 +66,8 @@ sidebar_label: Data types binary encoding specification. ### Interval kind binary encoding +The table below describes how different interval kinds of `Interval` data type are endoced. + | Interval kind | Binary encoding | |---------------|-----------------| | `Nanosecond` | `0x00` | @@ -77,6 +84,9 @@ sidebar_label: Data types binary encoding specification. ### Aggregate function parameter binary encoding +The table below describes how parameters of `AggragateFunction` and `SimpleAggregateFunction` are encoded. +The encoding of a parameter consists of 1 byte indicating the type of the parameter and the value itself. + | Parameter type | Binary encoding | |--------------------------|--------------------------------------------------------------------------------------------------------------------------------| | `Null` | `0x00` | From f556f2cd9529acfdf796df91c20abec0ce405a95 Mon Sep 17 00:00:00 2001 From: avogar Date: Thu, 4 Jul 2024 18:28:22 +0000 Subject: [PATCH 104/299] Try to fix special build --- src/Formats/JSONExtractTree.cpp | 1 + src/Functions/FunctionsJSON.cpp | 13 ++----------- 2 files changed, 3 insertions(+), 11 deletions(-) diff --git a/src/Formats/JSONExtractTree.cpp b/src/Formats/JSONExtractTree.cpp index 827f276311a..8fe472930d3 100644 --- a/src/Formats/JSONExtractTree.cpp +++ b/src/Formats/JSONExtractTree.cpp @@ -1558,6 +1558,7 @@ template std::unique_ptr> buildJSONExtractTr #if USE_RAPIDJSON template void jsonElementToString(const RapidJSONParser::Element & element, WriteBuffer & buf, const FormatSettings & format_settings); template std::unique_ptr> buildJSONExtractTree(const DataTypePtr & type, const char * source_for_exception_message); +template bool tryGetNumericValueFromJSONElement(Float64 & value, const RapidJSONParser::Element & element, bool convert_bool_to_integer, String & error); #else template void jsonElementToString(const DummyJSONParser::Element & element, WriteBuffer & buf, const FormatSettings & format_settings); template std::unique_ptr> buildJSONExtractTree(const DataTypePtr & type, const char * source_for_exception_message); diff --git a/src/Functions/FunctionsJSON.cpp b/src/Functions/FunctionsJSON.cpp index c6af0674db7..ca233becb63 100644 --- a/src/Functions/FunctionsJSON.cpp +++ b/src/Functions/FunctionsJSON.cpp @@ -736,17 +736,8 @@ public: NumberType value; tryGetNumericValueFromJSONElement(value, element, convert_bool_to_integer, error); - - if (dest.getDataType() == TypeIndex::LowCardinality) - { - ColumnLowCardinality & col_low = assert_cast(dest); - col_low.insertData(reinterpret_cast(&value), sizeof(value)); - } - else - { - auto & col_vec = assert_cast &>(dest); - col_vec.insertValue(value); - } + auto & col_vec = assert_cast &>(dest); + col_vec.insertValue(value); return true; } }; From 64ef36dab362094e0bfa4a32e09b830502eb2c56 Mon Sep 17 00:00:00 2001 From: Konstantin Morozov Date: Fri, 5 Jul 2024 11:19:06 +0000 Subject: [PATCH 105/299] fix deadlock --- src/Databases/DatabaseAtomic.cpp | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/src/Databases/DatabaseAtomic.cpp b/src/Databases/DatabaseAtomic.cpp index b30b05bb7a7..a48eb2abce6 100644 --- a/src/Databases/DatabaseAtomic.cpp +++ b/src/Databases/DatabaseAtomic.cpp @@ -106,12 +106,17 @@ void DatabaseAtomic::attachTable(ContextPtr /* context_ */, const String & name, StoragePtr DatabaseAtomic::detachTable(ContextPtr /* context */, const String & name) { + // it is important to call destructures not_in_use without + // blocking mutex for avoid potential deadlock. DetachedTables not_in_use; - std::lock_guard lock(mutex); - auto table = DatabaseOrdinary::detachTableUnlocked(name); - table_name_to_path.erase(name); - detached_tables.emplace(table->getStorageID().uuid, table); - not_in_use = cleanupDetachedTables(); + StoragePtr table; + { + std::lock_guard lock(mutex); + table = DatabaseOrdinary::detachTableUnlocked(name); + table_name_to_path.erase(name); + detached_tables.emplace(table->getStorageID().uuid, table); + not_in_use = cleanupDetachedTables(); + } if (!not_in_use.empty()) { From bf0b782960dd57250eaf5b48e1a55843ac6a1e5c Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Fri, 5 Jul 2024 12:34:23 +0200 Subject: [PATCH 106/299] Correctly print long processing requests in Keeper --- src/Coordination/KeeperDispatcher.cpp | 27 +++++---- src/Coordination/KeeperDispatcher.h | 4 +- src/Coordination/KeeperStateMachine.cpp | 31 +++++----- src/Coordination/KeeperStorage.h | 1 + src/Server/KeeperTCPHandler.cpp | 75 ++++++++++++++----------- src/Server/KeeperTCPHandler.h | 10 +++- 6 files changed, 86 insertions(+), 62 deletions(-) diff --git a/src/Coordination/KeeperDispatcher.cpp b/src/Coordination/KeeperDispatcher.cpp index f36b1ef151f..85960ac659a 100644 --- a/src/Coordination/KeeperDispatcher.cpp +++ b/src/Coordination/KeeperDispatcher.cpp @@ -6,6 +6,7 @@ #include #include "Common/ZooKeeper/IKeeper.h" +#include "Common/ZooKeeper/ZooKeeperCommon.h" #include #include #include @@ -320,7 +321,7 @@ void KeeperDispatcher::responseThread() try { - setResponse(response_for_session.session_id, response_for_session.response); + setResponse(response_for_session.session_id, response_for_session.response, response_for_session.request); } catch (...) { @@ -355,7 +356,7 @@ void KeeperDispatcher::snapshotThread() } } -void KeeperDispatcher::setResponse(int64_t session_id, const Coordination::ZooKeeperResponsePtr & response) +void KeeperDispatcher::setResponse(int64_t session_id, const Coordination::ZooKeeperResponsePtr & response, Coordination::ZooKeeperRequestPtr request) { std::lock_guard lock(session_to_response_callback_mutex); @@ -369,7 +370,7 @@ void KeeperDispatcher::setResponse(int64_t session_id, const Coordination::ZooKe return; auto callback = new_session_id_response_callback[session_id_resp.internal_id]; - callback(response); + callback(response, request); new_session_id_response_callback.erase(session_id_resp.internal_id); } else /// Normal response, just write to client @@ -380,7 +381,7 @@ void KeeperDispatcher::setResponse(int64_t session_id, const Coordination::ZooKe if (session_response_callback == session_to_response_callback.end()) return; - session_response_callback->second(response); + session_response_callback->second(response, request); /// Session closed, no more writes if (response->xid != Coordination::WATCH_XID && response->getOpNum() == Coordination::OpNum::Close) @@ -771,21 +772,27 @@ int64_t KeeperDispatcher::getSessionID(int64_t session_timeout_ms) { std::lock_guard lock(session_to_response_callback_mutex); - new_session_id_response_callback[request->internal_id] = [promise, internal_id = request->internal_id] (const Coordination::ZooKeeperResponsePtr & response) + new_session_id_response_callback[request->internal_id] + = [promise, internal_id = request->internal_id]( + const Coordination::ZooKeeperResponsePtr & response, Coordination::ZooKeeperRequestPtr /*request*/) { if (response->getOpNum() != Coordination::OpNum::SessionID) - promise->set_exception(std::make_exception_ptr(Exception(ErrorCodes::LOGICAL_ERROR, - "Incorrect response of type {} instead of SessionID response", response->getOpNum()))); + promise->set_exception(std::make_exception_ptr(Exception( + ErrorCodes::LOGICAL_ERROR, "Incorrect response of type {} instead of SessionID response", response->getOpNum()))); auto session_id_response = dynamic_cast(*response); if (session_id_response.internal_id != internal_id) { - promise->set_exception(std::make_exception_ptr(Exception(ErrorCodes::LOGICAL_ERROR, - "Incorrect response with internal id {} instead of {}", session_id_response.internal_id, internal_id))); + promise->set_exception(std::make_exception_ptr(Exception( + ErrorCodes::LOGICAL_ERROR, + "Incorrect response with internal id {} instead of {}", + session_id_response.internal_id, + internal_id))); } if (response->error != Coordination::Error::ZOK) - promise->set_exception(std::make_exception_ptr(zkutil::KeeperException::fromMessage(response->error, "SessionID request failed with error"))); + promise->set_exception( + std::make_exception_ptr(zkutil::KeeperException::fromMessage(response->error, "SessionID request failed with error"))); promise->set_value(session_id_response.session_id); }; diff --git a/src/Coordination/KeeperDispatcher.h b/src/Coordination/KeeperDispatcher.h index 2e0c73131d5..a487b886d98 100644 --- a/src/Coordination/KeeperDispatcher.h +++ b/src/Coordination/KeeperDispatcher.h @@ -20,7 +20,7 @@ namespace DB { -using ZooKeeperResponseCallback = std::function; +using ZooKeeperResponseCallback = std::function; /// Highlevel wrapper for ClickHouse Keeper. /// Process user requests via consensus and return responses. @@ -92,7 +92,7 @@ private: void clusterUpdateWithReconfigDisabledThread(); void clusterUpdateThread(); - void setResponse(int64_t session_id, const Coordination::ZooKeeperResponsePtr & response); + void setResponse(int64_t session_id, const Coordination::ZooKeeperResponsePtr & response, Coordination::ZooKeeperRequestPtr request = nullptr); /// Add error responses for requests to responses queue. /// Clears requests. diff --git a/src/Coordination/KeeperStateMachine.cpp b/src/Coordination/KeeperStateMachine.cpp index e7cae714ba6..3d3d862e1dd 100644 --- a/src/Coordination/KeeperStateMachine.cpp +++ b/src/Coordination/KeeperStateMachine.cpp @@ -407,7 +407,7 @@ nuraft::ptr KeeperStateMachine::commit(const uint64_t log_idx, n if (!keeper_context->localLogsPreprocessed() && !preprocess(*request_for_session)) return nullptr; - auto try_push = [&](const KeeperStorage::ResponseForSession& response) + auto try_push = [&](const KeeperStorage::ResponseForSession & response) { if (!responses_queue.push(response)) { @@ -416,17 +416,6 @@ nuraft::ptr KeeperStateMachine::commit(const uint64_t log_idx, n "Failed to push response with session id {} to the queue, probably because of shutdown", response.session_id); } - - using namespace std::chrono; - uint64_t elapsed = duration_cast(system_clock::now().time_since_epoch()).count() - request_for_session->time; - if (elapsed > keeper_context->getCoordinationSettings()->log_slow_total_threshold_ms) - { - LOG_INFO( - log, - "Total time to process a request took too long ({}ms).\nRequest info: {}", - elapsed, - request_for_session->request->toString(/*short_format=*/true)); - } }; try @@ -443,6 +432,7 @@ nuraft::ptr KeeperStateMachine::commit(const uint64_t log_idx, n KeeperStorage::ResponseForSession response_for_session; response_for_session.session_id = -1; response_for_session.response = response; + response_for_session.request = request_for_session->request; LockGuardWithStats lock(storage_and_responses_lock); session_id = storage->getSessionID(session_id_request.session_timeout_ms); @@ -462,8 +452,14 @@ nuraft::ptr KeeperStateMachine::commit(const uint64_t log_idx, n LockGuardWithStats lock(storage_and_responses_lock); KeeperStorage::ResponsesForSessions responses_for_sessions = storage->processRequest(request_for_session->request, request_for_session->session_id, request_for_session->zxid); + for (auto & response_for_session : responses_for_sessions) + { + if (response_for_session.response->xid != Coordination::WATCH_XID) + response_for_session.request = request_for_session->request; + try_push(response_for_session); + } if (keeper_context->digestEnabled() && request_for_session->digest) assertDigest(*request_for_session->digest, storage->getNodesDigest(true), *request_for_session->request, request_for_session->log_idx, true); @@ -797,9 +793,14 @@ void KeeperStateMachine::processReadRequest(const KeeperStorage::RequestForSessi LockGuardWithStats lock(storage_and_responses_lock); auto responses = storage->processRequest( request_for_session.request, request_for_session.session_id, std::nullopt, true /*check_acl*/, true /*is_local*/); - for (const auto & response : responses) - if (!responses_queue.push(response)) - LOG_WARNING(log, "Failed to push response with session id {} to the queue, probably because of shutdown", response.session_id); + + for (auto & response_for_session : responses) + { + if (response_for_session.response->xid != Coordination::WATCH_XID) + response_for_session.request = request_for_session.request; + if (!responses_queue.push(response_for_session)) + LOG_WARNING(log, "Failed to push response with session id {} to the queue, probably because of shutdown", response_for_session.session_id); + } } void KeeperStateMachine::shutdownStorage() diff --git a/src/Coordination/KeeperStorage.h b/src/Coordination/KeeperStorage.h index d5e9a64e69c..f7812ad8877 100644 --- a/src/Coordination/KeeperStorage.h +++ b/src/Coordination/KeeperStorage.h @@ -206,6 +206,7 @@ public: { int64_t session_id; Coordination::ZooKeeperResponsePtr response; + Coordination::ZooKeeperRequestPtr request = nullptr; }; using ResponsesForSessions = std::vector; diff --git a/src/Server/KeeperTCPHandler.cpp b/src/Server/KeeperTCPHandler.cpp index 47064b467e7..5f26542d39c 100644 --- a/src/Server/KeeperTCPHandler.cpp +++ b/src/Server/KeeperTCPHandler.cpp @@ -2,31 +2,31 @@ #if USE_NURAFT -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include -#ifdef POCO_HAVE_FD_EPOLL - #include -#else - #include -#endif +# ifdef POCO_HAVE_FD_EPOLL +# include +# else +# include +# endif namespace ProfileEvents { @@ -400,13 +400,11 @@ void KeeperTCPHandler::runImpl() } auto response_fd = poll_wrapper->getResponseFD(); - auto response_callback = [responses_ = this->responses, response_fd](const Coordination::ZooKeeperResponsePtr & response) + auto response_callback = [my_responses = this->responses, + response_fd](const Coordination::ZooKeeperResponsePtr & response, Coordination::ZooKeeperRequestPtr request) { - if (!responses_->push(response)) - throw Exception(ErrorCodes::SYSTEM_ERROR, - "Could not push response with xid {} and zxid {}", - response->xid, - response->zxid); + if (!my_responses->push(RequestWithResponse{response, std::move(request)})) + throw Exception(ErrorCodes::SYSTEM_ERROR, "Could not push response with xid {} and zxid {}", response->xid, response->zxid); UInt8 single_byte = 1; [[maybe_unused]] ssize_t result = write(response_fd, &single_byte, sizeof(single_byte)); @@ -470,19 +468,20 @@ void KeeperTCPHandler::runImpl() /// became inconsistent and race condition is possible. while (result.responses_count != 0) { - Coordination::ZooKeeperResponsePtr response; + RequestWithResponse request_with_response; - if (!responses->tryPop(response)) + if (!responses->tryPop(request_with_response)) throw Exception(ErrorCodes::LOGICAL_ERROR, "We must have ready response, but queue is empty. It's a bug."); log_long_operation("Waiting for response to be ready"); + auto & response = request_with_response.response; if (response->xid == close_xid) { LOG_DEBUG(log, "Session #{} successfully closed", session_id); return; } - updateStats(response); + updateStats(response, request_with_response.request); packageSent(); response->write(getWriteBuffer()); @@ -609,7 +608,7 @@ void KeeperTCPHandler::packageReceived() keeper_dispatcher->incrementPacketsReceived(); } -void KeeperTCPHandler::updateStats(Coordination::ZooKeeperResponsePtr & response) +void KeeperTCPHandler::updateStats(Coordination::ZooKeeperResponsePtr & response, const Coordination::ZooKeeperRequestPtr & request) { /// update statistics ignoring watch response and heartbeat. if (response->xid != Coordination::WATCH_XID && response->getOpNum() != Coordination::OpNum::Heartbeat) @@ -617,6 +616,16 @@ void KeeperTCPHandler::updateStats(Coordination::ZooKeeperResponsePtr & response Int64 elapsed = (Poco::Timestamp() - operations[response->xid]); ProfileEvents::increment(ProfileEvents::KeeperTotalElapsedMicroseconds, elapsed); Int64 elapsed_ms = elapsed / 1000; + + if (request && elapsed_ms > static_cast(keeper_dispatcher->getKeeperContext()->getCoordinationSettings()->log_slow_total_threshold_ms)) + { + LOG_INFO( + log, + "Total time to process a request took too long ({}ms).\nRequest info: {}", + elapsed, + request->toString(/*short_format=*/true)); + } + conn_stats.updateLatency(elapsed_ms); operations.erase(response->xid); diff --git a/src/Server/KeeperTCPHandler.h b/src/Server/KeeperTCPHandler.h index c1c522eee89..7c2b8acf624 100644 --- a/src/Server/KeeperTCPHandler.h +++ b/src/Server/KeeperTCPHandler.h @@ -26,7 +26,13 @@ namespace DB struct SocketInterruptablePollWrapper; using SocketInterruptablePollWrapperPtr = std::unique_ptr; -using ThreadSafeResponseQueue = ConcurrentBoundedQueue; +struct RequestWithResponse +{ + Coordination::ZooKeeperResponsePtr response; + Coordination::ZooKeeperRequestPtr request; /// it can be nullptr for some responses +}; + +using ThreadSafeResponseQueue = ConcurrentBoundedQueue; using ThreadSafeResponseQueuePtr = std::shared_ptr; struct LastOp; @@ -104,7 +110,7 @@ private: void packageSent(); void packageReceived(); - void updateStats(Coordination::ZooKeeperResponsePtr & response); + void updateStats(Coordination::ZooKeeperResponsePtr & response, const Coordination::ZooKeeperRequestPtr & request); Poco::Timestamp established; From 05a027822be6acacc99d8eeccc124a8304edc797 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Fri, 5 Jul 2024 15:57:17 +0200 Subject: [PATCH 107/299] Update ZooKeeperImpl.cpp --- src/Common/ZooKeeper/ZooKeeperImpl.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/Common/ZooKeeper/ZooKeeperImpl.cpp b/src/Common/ZooKeeper/ZooKeeperImpl.cpp index 8653af51308..2728f953bea 100644 --- a/src/Common/ZooKeeper/ZooKeeperImpl.cpp +++ b/src/Common/ZooKeeper/ZooKeeperImpl.cpp @@ -996,6 +996,10 @@ void ZooKeeper::receiveEvent() if (request_info.callback) request_info.callback(*response); + + /// Finalize current session if we receive a hardware error from ZooKeeper + if (err != Error::ZOK && isHardwareError(err)) + finalize(/*error_send*/ false, /*error_receive*/ true, fmt::format("Hardware error: {}", err)); } From e2885b1cfa976e9fe072e6453d8142364c013aa9 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 6 Jul 2024 02:16:51 +0200 Subject: [PATCH 108/299] Add a test --- .../03201_local_named_collections.reference | 1 + .../03201_local_named_collections.sh | 20 +++++++++++++++++++ 2 files changed, 21 insertions(+) create mode 100644 tests/queries/0_stateless/03201_local_named_collections.reference create mode 100755 tests/queries/0_stateless/03201_local_named_collections.sh diff --git a/tests/queries/0_stateless/03201_local_named_collections.reference b/tests/queries/0_stateless/03201_local_named_collections.reference new file mode 100644 index 00000000000..af5626b4a11 --- /dev/null +++ b/tests/queries/0_stateless/03201_local_named_collections.reference @@ -0,0 +1 @@ +Hello, world! diff --git a/tests/queries/0_stateless/03201_local_named_collections.sh b/tests/queries/0_stateless/03201_local_named_collections.sh new file mode 100755 index 00000000000..54ca76a52d9 --- /dev/null +++ b/tests/queries/0_stateless/03201_local_named_collections.sh @@ -0,0 +1,20 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +${CLICKHOUSE_CLIENT} --multiquery " +DROP TABLE IF EXISTS test; +CREATE TABLE test (s String) ORDER BY (); +INSERT INTO test VALUES ('Hello, world!'); +" + +${CLICKHOUSE_LOCAL} --multiquery " +CREATE NAMED COLLECTION mydb AS host = '${CLICKHOUSE_HOST}', port = ${CLICKHOUSE_PORT_TCP}, user = 'default', password = '', db = '${CLICKHOUSE_DATABASE}'; +SELECT * FROM remote(mydb, table = 'test'); +" + +${CLICKHOUSE_CLIENT} --multiquery " +DROP TABLE test; +" From fbe172dd593108ba6728a93ed9be6b0300c631f8 Mon Sep 17 00:00:00 2001 From: Justin de Guzman Date: Fri, 5 Jul 2024 18:28:59 -0700 Subject: [PATCH 109/299] [Docs] Make it obvious that Object Data Type is deprecated --- docs/en/sql-reference/data-types/json.md | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/docs/en/sql-reference/data-types/json.md b/docs/en/sql-reference/data-types/json.md index 9e9c3fdb489..f2eac12594d 100644 --- a/docs/en/sql-reference/data-types/json.md +++ b/docs/en/sql-reference/data-types/json.md @@ -5,14 +5,11 @@ sidebar_label: Object Data Type keywords: [object, data type] --- -:::note -**ClickHouse handles JSON documents in a variety of way, allowing you to bring structure to this data and query it quickly and efficiently. See [this guide](/docs/en/integrations/data-formats/json) for detailed guidance on working with JSON.**::: +# Object Data Type (deprecated) -# Object Data Type +**This feature is not production-ready and is now deprecated.** If you need to work with JSON documents, consider using [this guide](/docs/en/integrations/data-ingestion/data-formats/json) instead. A new implementation to support JSON object is in progress and can be tracked [here](https://github.com/ClickHouse/ClickHouse/issues/54864). -:::note -This feature is not production-ready and is now deprecated. If you need to work with JSON documents, consider using [this guide](/docs/en/integrations/data-ingestion/data-formats/json) instead. A new implementation to support JSON object is in progress and can be tracked [here](https://github.com/ClickHouse/ClickHouse/issues/54864) -::: +
Stores JavaScript Object Notation (JSON) documents in a single column. From 3a8747992633f64beb0962ecc91ed0a9e650e627 Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Sat, 6 Jul 2024 16:06:36 +0200 Subject: [PATCH 110/299] fix a crash --- src/Interpreters/DatabaseCatalog.cpp | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/Interpreters/DatabaseCatalog.cpp b/src/Interpreters/DatabaseCatalog.cpp index 0a71a842452..230deeab8d5 100644 --- a/src/Interpreters/DatabaseCatalog.cpp +++ b/src/Interpreters/DatabaseCatalog.cpp @@ -1250,6 +1250,8 @@ DatabaseCatalog::TablesMarkedAsDropped DatabaseCatalog::getTablesToDrop() std::lock_guard lock(tables_marked_dropped_mutex); + const auto was_count = tables_marked_dropped.size(); + auto it = tables_marked_dropped.begin(); while (it != tables_marked_dropped.end()) { @@ -1270,6 +1272,8 @@ DatabaseCatalog::TablesMarkedAsDropped DatabaseCatalog::getTablesToDrop() it = tables_marked_dropped.erase(it); } + chassert(was_count == tables_marked_dropped.size() + result.size()); + return result; } @@ -1299,10 +1303,10 @@ void DatabaseCatalog::dropTablesParallel(TablesMarkedAsDropped tables_to_drop) SCOPE_EXIT({ std::lock_guard lock(tables_marked_dropped_mutex); - if (first_async_drop_in_queue == tables_marked_dropped.end()) - first_async_drop_in_queue = tables_to_drop.begin(); - tables_marked_dropped.splice(tables_marked_dropped.end(), tables_to_drop); + + if (first_async_drop_in_queue == tables_marked_dropped.end()) + first_async_drop_in_queue = tables_marked_dropped.begin(); }); ThreadPool pool( From e3c036e2d902e3fc0994a5d31135f19fe0e929c5 Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Sun, 7 Jul 2024 00:21:22 +0200 Subject: [PATCH 111/299] remove debug logs --- src/Interpreters/DatabaseCatalog.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/Interpreters/DatabaseCatalog.cpp b/src/Interpreters/DatabaseCatalog.cpp index 230deeab8d5..61ad5bf96de 100644 --- a/src/Interpreters/DatabaseCatalog.cpp +++ b/src/Interpreters/DatabaseCatalog.cpp @@ -1257,8 +1257,6 @@ DatabaseCatalog::TablesMarkedAsDropped DatabaseCatalog::getTablesToDrop() { bool in_use = it->table && !it->table.unique(); bool old_enough = it->drop_time <= current_time; - LOG_DEBUG(log, "check {}: in_use {}, old_enough {}", it->table_id.getFullTableName(), in_use, old_enough); - if (in_use || !old_enough) { ++it; From 17e089c490efbf1ac4224fa8cf0b74bb3f50739a Mon Sep 17 00:00:00 2001 From: zhongyuankai <872237106@qq.com> Date: Sun, 7 Jul 2024 18:22:55 +0800 Subject: [PATCH 112/299] Refactor `OptimizeIfWithConstantConditionVisitor` using `InDepthNodeVisitor` --- ...OptimizeIfWithConstantConditionVisitor.cpp | 93 ++++++++----------- .../OptimizeIfWithConstantConditionVisitor.h | 17 ++-- src/Interpreters/TreeOptimizer.cpp | 3 +- 3 files changed, 52 insertions(+), 61 deletions(-) diff --git a/src/Interpreters/OptimizeIfWithConstantConditionVisitor.cpp b/src/Interpreters/OptimizeIfWithConstantConditionVisitor.cpp index 20451fb20ad..48c9988b6fc 100644 --- a/src/Interpreters/OptimizeIfWithConstantConditionVisitor.cpp +++ b/src/Interpreters/OptimizeIfWithConstantConditionVisitor.cpp @@ -73,66 +73,55 @@ static bool tryExtractConstValueFromCondition(const ASTPtr & condition, bool & v return false; } -void OptimizeIfWithConstantConditionVisitor::visit(ASTPtr & current_ast) +void OptimizeIfWithConstantConditionVisitorData::visit(ASTFunction & function_node, ASTPtr & ast) { - if (!current_ast) - return; - checkStackSize(); - for (ASTPtr & child : current_ast->children) + if (function_node.name != "if") + return; + + if (!function_node.arguments) + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Wrong number of arguments for function 'if' (0 instead of 3)"); + + if (function_node.arguments->children.size() != 3) + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Wrong number of arguments for function 'if' ({} instead of 3)", + function_node.arguments->children.size()); + + const auto * args = function_node.arguments->as(); + + ASTPtr condition_expr = args->children[0]; + ASTPtr then_expr = args->children[1]; + ASTPtr else_expr = args->children[2]; + + bool condition; + if (tryExtractConstValueFromCondition(condition_expr, condition)) { - auto * function_node = child->as(); - if (!function_node || function_node->name != "if") + ASTPtr replace_ast = condition ? then_expr : else_expr; + ASTPtr child_copy = ast; + String replace_alias = replace_ast->tryGetAlias(); + String if_alias = ast->tryGetAlias(); + + if (replace_alias.empty()) { - visit(child); - continue; + replace_ast->setAlias(if_alias); + ast = replace_ast; + } + else + { + /// Only copy of one node is required here. + /// But IAST has only method for deep copy of subtree. + /// This can be a reason of performance degradation in case of deep queries. + ASTPtr replace_ast_deep_copy = replace_ast->clone(); + replace_ast_deep_copy->setAlias(if_alias); + ast = replace_ast_deep_copy; } - if (!function_node->arguments) - throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Wrong number of arguments for function 'if' (0 instead of 3)"); - - if (function_node->arguments->children.size() != 3) - throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, - "Wrong number of arguments for function 'if' ({} instead of 3)", - function_node->arguments->children.size()); - - visit(function_node->arguments); - const auto * args = function_node->arguments->as(); - - ASTPtr condition_expr = args->children[0]; - ASTPtr then_expr = args->children[1]; - ASTPtr else_expr = args->children[2]; - - bool condition; - if (tryExtractConstValueFromCondition(condition_expr, condition)) + if (!if_alias.empty()) { - ASTPtr replace_ast = condition ? then_expr : else_expr; - ASTPtr child_copy = child; - String replace_alias = replace_ast->tryGetAlias(); - String if_alias = child->tryGetAlias(); - - if (replace_alias.empty()) - { - replace_ast->setAlias(if_alias); - child = replace_ast; - } - else - { - /// Only copy of one node is required here. - /// But IAST has only method for deep copy of subtree. - /// This can be a reason of performance degradation in case of deep queries. - ASTPtr replace_ast_deep_copy = replace_ast->clone(); - replace_ast_deep_copy->setAlias(if_alias); - child = replace_ast_deep_copy; - } - - if (!if_alias.empty()) - { - auto alias_it = aliases.find(if_alias); - if (alias_it != aliases.end() && alias_it->second.get() == child_copy.get()) - alias_it->second = child; - } + auto alias_it = aliases.find(if_alias); + if (alias_it != aliases.end() && alias_it->second.get() == child_copy.get()) + alias_it->second = ast; } } } diff --git a/src/Interpreters/OptimizeIfWithConstantConditionVisitor.h b/src/Interpreters/OptimizeIfWithConstantConditionVisitor.h index ad98f92bafd..3b46f90f07c 100644 --- a/src/Interpreters/OptimizeIfWithConstantConditionVisitor.h +++ b/src/Interpreters/OptimizeIfWithConstantConditionVisitor.h @@ -1,23 +1,24 @@ #pragma once #include +#include namespace DB { - -/// It removes Function_if node from AST if condition is constant. -/// TODO: rewrite with InDepthNodeVisitor -class OptimizeIfWithConstantConditionVisitor +struct OptimizeIfWithConstantConditionVisitorData { -public: - explicit OptimizeIfWithConstantConditionVisitor(Aliases & aliases_) + using TypeToVisit = ASTFunction; + + explicit OptimizeIfWithConstantConditionVisitorData(Aliases & aliases_) : aliases(aliases_) {} - void visit(ASTPtr & ast); - + void visit(ASTFunction & function_node, ASTPtr & ast); private: Aliases & aliases; }; +/// It removes Function_if node from AST if condition is constant. +using OptimizeIfWithConstantConditionVisitor = InDepthNodeVisitor, false>; + } diff --git a/src/Interpreters/TreeOptimizer.cpp b/src/Interpreters/TreeOptimizer.cpp index b88d75cd5a2..b872eb94fde 100644 --- a/src/Interpreters/TreeOptimizer.cpp +++ b/src/Interpreters/TreeOptimizer.cpp @@ -577,7 +577,8 @@ void TreeOptimizer::optimizeIf(ASTPtr & query, Aliases & aliases, bool if_chain_ optimizeMultiIfToIf(query); /// Optimize if with constant condition after constants was substituted instead of scalar subqueries. - OptimizeIfWithConstantConditionVisitor(aliases).visit(query); + OptimizeIfWithConstantConditionVisitorData visitor_data(aliases); + OptimizeIfWithConstantConditionVisitor(visitor_data).visit(query); if (if_chain_to_multiif) OptimizeIfChainsVisitor().visit(query); From 5f28c025ce39b1f9e9ea2e48b3cee78ad9a432be Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 7 Jul 2024 12:51:29 +0200 Subject: [PATCH 113/299] Fix test (connections use coroutines) --- tests/queries/0_stateless/03201_local_named_collections.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/03201_local_named_collections.sh b/tests/queries/0_stateless/03201_local_named_collections.sh index 54ca76a52d9..2054a09df06 100755 --- a/tests/queries/0_stateless/03201_local_named_collections.sh +++ b/tests/queries/0_stateless/03201_local_named_collections.sh @@ -13,7 +13,7 @@ INSERT INTO test VALUES ('Hello, world!'); ${CLICKHOUSE_LOCAL} --multiquery " CREATE NAMED COLLECTION mydb AS host = '${CLICKHOUSE_HOST}', port = ${CLICKHOUSE_PORT_TCP}, user = 'default', password = '', db = '${CLICKHOUSE_DATABASE}'; SELECT * FROM remote(mydb, table = 'test'); -" +" | grep --text -F -v "ASan doesn't fully support makecontext/swapcontext functions" ${CLICKHOUSE_CLIENT} --multiquery " DROP TABLE test; From 7e20f26735a179948cb61186986b84ce4e9f1300 Mon Sep 17 00:00:00 2001 From: Amos Bird Date: Tue, 18 Jun 2024 20:35:33 +0800 Subject: [PATCH 114/299] Extend tuple() and add tupleNames() --- .../functions/tuple-functions.md | 56 ++++++++- src/Core/Settings.h | 1 + src/Core/SettingsChangesHistory.cpp | 1 + src/Functions/tuple.cpp | 12 +- src/Functions/tuple.h | 39 ++++-- src/Functions/tupleNames.cpp | 118 ++++++++++++++++++ src/Parsers/isUnquotedIdentifier.cpp | 20 +++ src/Parsers/isUnquotedIdentifier.h | 10 ++ .../Formats/Impl/ValuesBlockInputFormat.h | 13 +- .../queries/0_stateless/00307_format_xml.sql | 2 +- tests/queries/0_stateless/00309_formats.sql | 2 + .../01144_multiword_data_types.sql | 4 +- .../0_stateless/01232_untuple.reference | 2 +- tests/queries/0_stateless/01232_untuple.sql | 1 + .../02294_nothing_arguments_in_functions.sql | 2 + ...new_functions_must_be_documented.reference | 1 - .../02541_tuple_element_with_null.sql | 2 +- .../02890_named_tuple_functions.reference | 8 ++ .../02890_named_tuple_functions.sql | 22 ++++ .../02890_untuple_column_names.reference | 4 + .../02890_untuple_column_names.sql | 7 +- .../03038_nested_dynamic_merges.sh | 2 +- .../aspell-ignore/en/aspell-dict.txt | 1 + 23 files changed, 311 insertions(+), 19 deletions(-) create mode 100644 src/Functions/tupleNames.cpp create mode 100644 src/Parsers/isUnquotedIdentifier.cpp create mode 100644 src/Parsers/isUnquotedIdentifier.h create mode 100644 tests/queries/0_stateless/02890_named_tuple_functions.reference create mode 100644 tests/queries/0_stateless/02890_named_tuple_functions.sql diff --git a/docs/en/sql-reference/functions/tuple-functions.md b/docs/en/sql-reference/functions/tuple-functions.md index 0663be08240..3b4d68e44b2 100644 --- a/docs/en/sql-reference/functions/tuple-functions.md +++ b/docs/en/sql-reference/functions/tuple-functions.md @@ -7,7 +7,7 @@ sidebar_label: Tuples ## tuple A function that allows grouping multiple columns. -For columns with the types T1, T2, ..., it returns a Tuple(T1, T2, ...) type tuple containing these columns. There is no cost to execute the function. +For columns C1, C2, ... with the types T1, T2, ..., it returns a named Tuple(C1 T1, C2 T2, ...) type tuple containing these columns if their names are unique and can be treated as unquoted identifiers, otherwise a Tuple(T1, T2, ...) is returned. There is no cost to execute the function. Tuples are normally used as intermediate values for an argument of IN operators, or for creating a list of formal parameters of lambda functions. Tuples can’t be written to a table. The function implements the operator `(x, y, ...)`. @@ -259,6 +259,60 @@ Result: └───────────────────────────────────────┘ ``` +## tupleNames + +Converts a tuple into an array of column names. For a tuple in the form `Tuple(a T, b T, ...)`, it returns an array of strings representing the named columns of the tuple. If the tuple elements do not have explicit names, their indices will be used as the column names instead. + +**Syntax** + +``` sql +tupleNames(tuple) +``` + +**Arguments** + +- `tuple` — Named tuple. [Tuple](../../sql-reference/data-types/tuple.md) with any types of values. + +**Returned value** + +- An array with strings. + +Type: [Array](../../sql-reference/data-types/array.md)([Tuple](../../sql-reference/data-types/tuple.md)([String](../../sql-reference/data-types/string.md), ...)). + +**Example** + +Query: + +``` sql +CREATE TABLE tupletest (col Tuple(user_ID UInt64, session_ID UInt64)) ENGINE = Memory; + +INSERT INTO tupletest VALUES (tuple(1, 2)); + +SELECT tupleNames(col) FROM tupletest; +``` + +Result: + +``` text +┌─tupleNames(col)──────────┐ +│ ['user_ID','session_ID'] │ +└──────────────────────────┘ +``` + +If you pass a simple tuple to the function, ClickHouse uses the indexes of the columns as their names: + +``` sql +SELECT tupleNames(tuple(3, 2, 1)); +``` + +Result: + +``` text +┌─tupleNames((3, 2, 1))─┐ +│ ['1','2','3'] │ +└───────────────────────┘ +``` + ## tuplePlus Calculates the sum of corresponding values of two tuples of the same size. diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 5903dbd32eb..e175bdfcbbe 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -755,6 +755,7 @@ class IColumn; M(Bool, optimize_group_by_function_keys, true, "Eliminates functions of other keys in GROUP BY section", 0) \ M(Bool, optimize_group_by_constant_keys, true, "Optimize GROUP BY when all keys in block are constant", 0) \ M(Bool, legacy_column_name_of_tuple_literal, false, "List all names of element of large tuple literals in their column names instead of hash. This settings exists only for compatibility reasons. It makes sense to set to 'true', while doing rolling update of cluster from version lower than 21.7 to higher.", 0) \ + M(Bool, enable_named_columns_in_function_tuple, true, "Generate named tuples in function tuple() when all names are unique and can be treated as unquoted identifiers.", 0) \ \ M(Bool, query_plan_enable_optimizations, true, "Globally enable/disable query optimization at the query plan level", 0) \ M(UInt64, query_plan_max_optimizations_to_apply, 10000, "Limit the total number of optimizations applied to query plan. If zero, ignored. If limit reached, throw exception", 0) \ diff --git a/src/Core/SettingsChangesHistory.cpp b/src/Core/SettingsChangesHistory.cpp index 8c096c13634..4e8cbf07509 100644 --- a/src/Core/SettingsChangesHistory.cpp +++ b/src/Core/SettingsChangesHistory.cpp @@ -59,6 +59,7 @@ static std::initializer_list(); + factory.registerFunction(FunctionDocumentation{ + .description = R"( +Returns a tuple by grouping input arguments. + +For columns C1, C2, ... with the types T1, T2, ..., it returns a named Tuple(C1 T1, C2 T2, ...) type tuple containing these columns if their names are unique and can be treated as unquoted identifiers, otherwise a Tuple(T1, T2, ...) is returned. There is no cost to execute the function. +Tuples are normally used as intermediate values for an argument of IN operators, or for creating a list of formal parameters of lambda functions. Tuples can’t be written to a table. + +The function implements the operator `(x, y, ...)`. +)", + .examples{{"typical", "SELECT tuple(1, 2)", "(1,2)"}}, + .categories{"Miscellaneous"}}); } } diff --git a/src/Functions/tuple.h b/src/Functions/tuple.h index 8b3e041f781..94529d86861 100644 --- a/src/Functions/tuple.h +++ b/src/Functions/tuple.h @@ -6,20 +6,28 @@ #include #include #include +#include namespace DB { -/** tuple(x, y, ...) is a function that allows you to group several columns +/** tuple(x, y, ...) is a function that allows you to group several columns. * tupleElement(tuple, n) is a function that allows you to retrieve a column from tuple. */ class FunctionTuple : public IFunction { + bool enable_named_columns; + public: static constexpr auto name = "tuple"; /// maybe_unused: false-positive - [[ maybe_unused ]] static FunctionPtr create(ContextPtr) { return std::make_shared(); } + [[maybe_unused]] static FunctionPtr create(ContextPtr context) + { + return std::make_shared(context->getSettingsRef().enable_named_columns_in_function_tuple); + } + + explicit FunctionTuple(bool enable_named_columns_ = false) : enable_named_columns(enable_named_columns_) { } String getName() const override { return name; } @@ -38,9 +46,26 @@ public: bool useDefaultImplementationForConstants() const override { return true; } bool useDefaultImplementationForLowCardinalityColumns() const override { return false; } - DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { - return std::make_shared(arguments); + if (arguments.empty()) + return std::make_shared(DataTypes{}); + + DataTypes types; + Names names; + NameSet name_set; + for (const auto & argument : arguments) + { + types.emplace_back(argument.type); + names.emplace_back(argument.name); + name_set.emplace(argument.name); + } + + if (enable_named_columns && name_set.size() == names.size() + && std::all_of(names.cbegin(), names.cend(), [](const auto & n) { return isUnquotedIdentifier(n); })) + return std::make_shared(types, names); + else + return std::make_shared(types); } ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override @@ -53,9 +78,9 @@ public: for (size_t i = 0; i < tuple_size; ++i) { /** If tuple is mixed of constant and not constant columns, - * convert all to non-constant columns, - * because many places in code expect all non-constant columns in non-constant tuple. - */ + * convert all to non-constant columns, + * because many places in code expect all non-constant columns in non-constant tuple. + */ tuple_columns[i] = arguments[i].column->convertToFullColumnIfConst(); } return ColumnTuple::create(tuple_columns); diff --git a/src/Functions/tupleNames.cpp b/src/Functions/tupleNames.cpp new file mode 100644 index 00000000000..e444478c224 --- /dev/null +++ b/src/Functions/tupleNames.cpp @@ -0,0 +1,118 @@ +#include +#include +#include +#include +#include +#include + +namespace DB +{ +namespace ErrorCodes +{ +extern const int ILLEGAL_TYPE_OF_ARGUMENT; +} + +namespace +{ + +/** Transform a named tuple into names, which is a constant array of strings. + */ +class ExecutableFunctionTupleNames : public IExecutableFunction +{ +public: + static constexpr auto name = "tupleNames"; + + explicit ExecutableFunctionTupleNames(Array name_fields_) : name_fields(std::move(name_fields_)) { } + + String getName() const override { return name; } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName &, const DataTypePtr & result_type, size_t input_rows_count) const override + { + return result_type->createColumnConst(input_rows_count, name_fields); + } + +private: + Array name_fields; +}; + +class FunctionBaseTupleNames : public IFunctionBase +{ +public: + static constexpr auto name = "tupleNames"; + + explicit FunctionBaseTupleNames(DataTypePtr argument_type, DataTypePtr result_type_, Array name_fields_) + : argument_types({std::move(argument_type)}), result_type(std::move(result_type_)), name_fields(std::move(name_fields_)) + { + } + + String getName() const override { return name; } + + bool isSuitableForConstantFolding() const override { return true; } + + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } + + const DataTypes & getArgumentTypes() const override { return argument_types; } + + const DataTypePtr & getResultType() const override { return result_type; } + + ExecutableFunctionPtr prepare(const ColumnsWithTypeAndName &) const override + { + return std::make_unique(name_fields); + } + +private: + DataTypes argument_types; + DataTypePtr result_type; + Array name_fields; +}; + +class TupleNamesOverloadResolver : public IFunctionOverloadResolver +{ +public: + static constexpr auto name = "tupleNames"; + + static FunctionOverloadResolverPtr create(ContextPtr) { return std::make_unique(); } + + String getName() const override { return name; } + + size_t getNumberOfArguments() const override { return 1; } + + DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override + { + const DataTypeTuple * tuple = checkAndGetDataType(arguments[0].type.get()); + + if (!tuple) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "First argument for function {} must be a tuple", getName()); + + return std::make_shared(std::make_shared()); + } + + FunctionBasePtr buildImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type) const override + { + const DataTypeTuple * tuple = checkAndGetDataType(arguments[0].type.get()); + + if (!tuple) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "First argument for function {} must be a tuple", getName()); + + DataTypes types = tuple->getElements(); + Array name_fields; + for (const auto & elem_name : tuple->getElementNames()) + name_fields.emplace_back(elem_name); + + return std::make_unique(arguments[0].type, result_type, std::move(name_fields)); + } +}; + +} + +REGISTER_FUNCTION(TupleNames) +{ + factory.registerFunction(FunctionDocumentation{ + .description = R"( +Converts a tuple into an array of column names. For a tuple in the form `Tuple(a T, b T, ...)`, it returns an array of strings representing the named columns of the tuple. If the tuple elements do not have explicit names, their indices will be used as the column names instead. +)", + .examples{{"typical", "SELECT tupleNames(tuple(1 as a, 2 as b))", "['a','b']"}}, + .categories{"Miscellaneous"}}); +} + +} diff --git a/src/Parsers/isUnquotedIdentifier.cpp b/src/Parsers/isUnquotedIdentifier.cpp new file mode 100644 index 00000000000..6f2442635ec --- /dev/null +++ b/src/Parsers/isUnquotedIdentifier.cpp @@ -0,0 +1,20 @@ +#include + +#include + +namespace DB +{ + +bool isUnquotedIdentifier(const String & name) +{ + Lexer lexer(name.data(), name.data() + name.size()); + + auto maybe_ident = lexer.nextToken(); + + if (maybe_ident.type != TokenType::BareWord) + return false; + + return lexer.nextToken().isEnd(); +} + +} diff --git a/src/Parsers/isUnquotedIdentifier.h b/src/Parsers/isUnquotedIdentifier.h new file mode 100644 index 00000000000..839e5860ad3 --- /dev/null +++ b/src/Parsers/isUnquotedIdentifier.h @@ -0,0 +1,10 @@ +#pragma once + +#include + +namespace DB +{ + +bool isUnquotedIdentifier(const String & name); + +} diff --git a/src/Processors/Formats/Impl/ValuesBlockInputFormat.h b/src/Processors/Formats/Impl/ValuesBlockInputFormat.h index 0abafc896ff..b1bce098e99 100644 --- a/src/Processors/Formats/Impl/ValuesBlockInputFormat.h +++ b/src/Processors/Formats/Impl/ValuesBlockInputFormat.h @@ -37,7 +37,18 @@ public: void resetReadBuffer() override; /// TODO: remove context somehow. - void setContext(const ContextPtr & context_) { context = Context::createCopy(context_); } + void setContext(const ContextPtr & context_) + { + auto context_copy = Context::createCopy(context_); + + /// ConstantExpressionTemplate generates placeholder names (_dummy_N) + /// for all literals, which are valid names for creating named tuples. + /// This behavior needs to be explicitly disabled, because if named + /// tuples with different names are inserted into a named tuple, it will + /// only insert default values. + context_copy->setSetting("enable_named_columns_in_function_tuple", false); + context = context_copy; + } const BlockMissingValues & getMissingValues() const override { return block_missing_values; } diff --git a/tests/queries/0_stateless/00307_format_xml.sql b/tests/queries/0_stateless/00307_format_xml.sql index 7fdca83b69f..29c733bb186 100644 --- a/tests/queries/0_stateless/00307_format_xml.sql +++ b/tests/queries/0_stateless/00307_format_xml.sql @@ -1,2 +1,2 @@ SET output_format_write_statistics = 0; -SELECT 'Hello & world' AS s, 'Hello\n', toDateTime('2001-02-03 04:05:06') AS time, arrayMap(x -> toString(x), range(10)) AS arr, (s, time) AS tpl SETTINGS extremes = 1 FORMAT XML; +SELECT 'Hello & world' AS s, 'Hello\n', toDateTime('2001-02-03 04:05:06') AS time, arrayMap(x -> toString(x), range(10)) AS arr, (s, time) AS tpl SETTINGS extremes = 1, enable_named_columns_in_function_tuple = 0 FORMAT XML; diff --git a/tests/queries/0_stateless/00309_formats.sql b/tests/queries/0_stateless/00309_formats.sql index 87a1ea454d0..b0939c00a10 100644 --- a/tests/queries/0_stateless/00309_formats.sql +++ b/tests/queries/0_stateless/00309_formats.sql @@ -1,4 +1,6 @@ SET output_format_write_statistics = 0; +SET enable_named_columns_in_function_tuple = 0; + SELECT number * 246 + 10 AS n, toDate('2000-01-01') + n AS d, range(n) AS arr, arrayStringConcat(arrayMap(x -> reinterpretAsString(x), arr)) AS s, (n, d) AS tuple FROM system.numbers LIMIT 2 FORMAT RowBinary; SELECT number * 246 + 10 AS n, toDate('2000-01-01') + n AS d, range(n) AS arr, arrayStringConcat(arrayMap(x -> reinterpretAsString(x), arr)) AS s, (n, d) AS tuple FROM system.numbers LIMIT 2 FORMAT RowBinaryWithNamesAndTypes; SELECT number * 246 + 10 AS n, toDate('2000-01-01') + n AS d, range(n) AS arr, arrayStringConcat(arrayMap(x -> reinterpretAsString(x), arr)) AS s, (n, d) AS tuple FROM system.numbers LIMIT 2 FORMAT TabSeparatedWithNamesAndTypes; diff --git a/tests/queries/0_stateless/01144_multiword_data_types.sql b/tests/queries/0_stateless/01144_multiword_data_types.sql index cc380f82d63..56def658ae0 100644 --- a/tests/queries/0_stateless/01144_multiword_data_types.sql +++ b/tests/queries/0_stateless/01144_multiword_data_types.sql @@ -23,7 +23,7 @@ CREATE TABLE multiword_types ( SHOW CREATE TABLE multiword_types; INSERT INTO multiword_types(a) VALUES (1); -SELECT toTypeName((*,)) FROM multiword_types; +SELECT toTypeName((*,)) FROM multiword_types SETTINGS enable_named_columns_in_function_tuple = 0; CREATE TABLE unsigned_types ( a TINYINT SIGNED, @@ -43,7 +43,7 @@ CREATE TABLE unsigned_types ( SHOW CREATE TABLE unsigned_types; INSERT INTO unsigned_types(a) VALUES (1); -SELECT toTypeName((*,)) FROM unsigned_types; +SELECT toTypeName((*,)) FROM unsigned_types SETTINGS enable_named_columns_in_function_tuple = 0; SELECT CAST('42' AS DOUBLE PRECISION), CAST(42, 'NATIONAL CHARACTER VARYING'), CAST(-1 AS tinyint UnSiGnEd), CAST(65535, ' sMaLlInT signed '); diff --git a/tests/queries/0_stateless/01232_untuple.reference b/tests/queries/0_stateless/01232_untuple.reference index 0358cde1354..3cd8eaa5611 100644 --- a/tests/queries/0_stateless/01232_untuple.reference +++ b/tests/queries/0_stateless/01232_untuple.reference @@ -2,7 +2,7 @@ hello 1 3 world 9 9 (0,1) -key tupleElement(argMax((v1, v2, v3, v4, v5), v1), \'1\') tupleElement(argMax((v1, v2, v3, v4, v5), v1), \'2\') tupleElement(argMax((v1, v2, v3, v4, v5), v1), \'3\') tupleElement(argMax((v1, v2, v3, v4, v5), v1), \'4\') tupleElement(argMax((v1, v2, v3, v4, v5), v1), \'5\') +key tupleElement(argMax((v1, v2, v3, v4, v5), v1), \'v1\') tupleElement(argMax((v1, v2, v3, v4, v5), v1), \'v2\') tupleElement(argMax((v1, v2, v3, v4, v5), v1), \'v3\') tupleElement(argMax((v1, v2, v3, v4, v5), v1), \'v4\') tupleElement(argMax((v1, v2, v3, v4, v5), v1), \'v5\') 1 20 20 10 20 30 2 11 20 10 20 30 3 70 20 10 20 30 diff --git a/tests/queries/0_stateless/01232_untuple.sql b/tests/queries/0_stateless/01232_untuple.sql index ccefd13a772..391d08ab859 100644 --- a/tests/queries/0_stateless/01232_untuple.sql +++ b/tests/queries/0_stateless/01232_untuple.sql @@ -1,4 +1,5 @@ SET allow_experimental_analyzer = 1; +SET enable_named_columns_in_function_tuple = 1; select untuple((* except (b),)) from (select 1 a, 2 b, 3 c); select 'hello', untuple((* except (b),)), 'world' from (select 1 a, 2 b, 3 c); diff --git a/tests/queries/0_stateless/02294_nothing_arguments_in_functions.sql b/tests/queries/0_stateless/02294_nothing_arguments_in_functions.sql index 4406a05df0c..ecf4f9cab93 100644 --- a/tests/queries/0_stateless/02294_nothing_arguments_in_functions.sql +++ b/tests/queries/0_stateless/02294_nothing_arguments_in_functions.sql @@ -1,3 +1,5 @@ +set enable_named_columns_in_function_tuple = 0; + select arrayMap(x -> 2 * x, []); select toTypeName(arrayMap(x -> 2 * x, [])); select arrayMap((x, y) -> x + y, [], []); diff --git a/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference b/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference index a152066a460..8dd8910c858 100644 --- a/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference +++ b/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference @@ -876,7 +876,6 @@ tryBase58Decode tumble tumbleEnd tumbleStart -tuple tupleConcat tupleDivide tupleDivideByNumber diff --git a/tests/queries/0_stateless/02541_tuple_element_with_null.sql b/tests/queries/0_stateless/02541_tuple_element_with_null.sql index d2062b60d49..e1581ce3755 100644 --- a/tests/queries/0_stateless/02541_tuple_element_with_null.sql +++ b/tests/queries/0_stateless/02541_tuple_element_with_null.sql @@ -9,7 +9,7 @@ SETTINGS index_granularity = 8192; INSERT INTO test_tuple_element VALUES (tuple(1,2)), (tuple(NULL, 3)); -SELECT +SELECT tupleElement(tuple, 'k1', 0) fine_k1_with_0, tupleElement(tuple, 'k1', NULL) k1_with_null, tupleElement(tuple, 'k2', 0) k2_with_0, diff --git a/tests/queries/0_stateless/02890_named_tuple_functions.reference b/tests/queries/0_stateless/02890_named_tuple_functions.reference new file mode 100644 index 00000000000..efccfef0817 --- /dev/null +++ b/tests/queries/0_stateless/02890_named_tuple_functions.reference @@ -0,0 +1,8 @@ +Tuple(\n i Int32,\n j Int32) +['i','j'] +Tuple(UInt8, Int32) +['1','2'] +Tuple(\n k UInt8,\n j Int32) +['k','j'] +Tuple(Int32, Int32, Int32, Int32) +['1','2','3','4'] diff --git a/tests/queries/0_stateless/02890_named_tuple_functions.sql b/tests/queries/0_stateless/02890_named_tuple_functions.sql new file mode 100644 index 00000000000..abd24e1cbfe --- /dev/null +++ b/tests/queries/0_stateless/02890_named_tuple_functions.sql @@ -0,0 +1,22 @@ +set enable_named_columns_in_function_tuple = 1; +set allow_experimental_analyzer = 1; + +drop table if exists x; +create table x (i int, j int) engine MergeTree order by i; +insert into x values (1, 2); + +select toTypeName(tuple(i, j)) from x; +select tupleNames(tuple(i, j)) from x; + +select toTypeName(tuple(1, j)) from x; +select tupleNames(tuple(1, j)) from x; + +select toTypeName(tuple(1 as k, j)) from x; +select tupleNames(tuple(1 as k, j)) from x; + +select toTypeName(tuple(i, i, j, j)) from x; +select tupleNames(tuple(i, i, j, j)) from x; + +select tupleNames(1); -- { serverError 43 } + +drop table x; diff --git a/tests/queries/0_stateless/02890_untuple_column_names.reference b/tests/queries/0_stateless/02890_untuple_column_names.reference index 388f974c45f..13a85c70138 100644 --- a/tests/queries/0_stateless/02890_untuple_column_names.reference +++ b/tests/queries/0_stateless/02890_untuple_column_names.reference @@ -57,6 +57,10 @@ t.1: 1 Row 1: ────── t.1: 1 +-- tuple() with enable_named_columns_in_function_tuple = 1 and allow_experimental_analyzer = 1 keeps the column names +Row 1: +────── +t.a: 1 -- thankfully JSONExtract() keeps them Row 1: ────── diff --git a/tests/queries/0_stateless/02890_untuple_column_names.sql b/tests/queries/0_stateless/02890_untuple_column_names.sql index ab6748cb54d..cd490ca3522 100644 --- a/tests/queries/0_stateless/02890_untuple_column_names.sql +++ b/tests/queries/0_stateless/02890_untuple_column_names.sql @@ -37,8 +37,11 @@ SELECT untuple(tuple(1)::Tuple(Int)), untuple(tuple(1)::Tuple(Int)) FORMAT Verti SELECT untuple(tuple(1)::Tuple(Int)), untuple(tuple(1)::Tuple(Int)) FORMAT Vertical SETTINGS allow_experimental_analyzer = 1; -- Bug: doesn't throw an exception SELECT '-- tuple() loses the column names (would be good to fix, see #36773)'; -SELECT untuple(tuple(1 as a)) as t FORMAT Vertical SETTINGS allow_experimental_analyzer = 0; -SELECT untuple(tuple(1 as a)) as t FORMAT Vertical SETTINGS allow_experimental_analyzer = 1; +SELECT untuple(tuple(1 as a)) as t FORMAT Vertical SETTINGS allow_experimental_analyzer = 0, enable_named_columns_in_function_tuple = 0; +SELECT untuple(tuple(1 as a)) as t FORMAT Vertical SETTINGS allow_experimental_analyzer = 1, enable_named_columns_in_function_tuple = 0; + +SELECT '-- tuple() with enable_named_columns_in_function_tuple = 1 and allow_experimental_analyzer = 1 keeps the column names'; +SELECT untuple(tuple(1 as a)) as t FORMAT Vertical SETTINGS allow_experimental_analyzer = 1, enable_named_columns_in_function_tuple = 1; SELECT '-- thankfully JSONExtract() keeps them'; SELECT untuple(JSONExtract('{"key": "value"}', 'Tuple(key String)')) x FORMAT Vertical SETTINGS allow_experimental_analyzer = 0; diff --git a/tests/queries/0_stateless/03038_nested_dynamic_merges.sh b/tests/queries/0_stateless/03038_nested_dynamic_merges.sh index b82ddb3813e..5d8eac082cf 100755 --- a/tests/queries/0_stateless/03038_nested_dynamic_merges.sh +++ b/tests/queries/0_stateless/03038_nested_dynamic_merges.sh @@ -7,7 +7,7 @@ CLICKHOUSE_LOG_COMMENT= # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_variant_type=1 --use_variant_as_common_type=1 --allow_experimental_dynamic_type=1" +CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_variant_type=1 --use_variant_as_common_type=1 --allow_experimental_dynamic_type=1 --enable_named_columns_in_function_tuple=0" function test() diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt index 02bc520743f..88d94ff3825 100644 --- a/utils/check-style/aspell-ignore/en/aspell-dict.txt +++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt @@ -2797,6 +2797,7 @@ tupleModulo tupleModuloByNumber tupleMultiply tupleMultiplyByNumber +tupleNames tupleNegate tuplePlus tupleToNameValuePairs From 7b19076ebee9a89628d6ef564e155b767f279118 Mon Sep 17 00:00:00 2001 From: Vitaly Baranov Date: Sun, 7 Jul 2024 22:31:45 +0200 Subject: [PATCH 115/299] Avoid using harmful function rand() in grpc. --- contrib/grpc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/grpc b/contrib/grpc index 77b2737a709..f5b7fdc2dff 160000 --- a/contrib/grpc +++ b/contrib/grpc @@ -1 +1 @@ -Subproject commit 77b2737a709d43d8c6895e3f03ca62b00bd9201c +Subproject commit f5b7fdc2dff09ada06dbf6c75df298fb40f898df From 97c6cbec46f5c93c2c6199576592a9262aff56f0 Mon Sep 17 00:00:00 2001 From: Blargian Date: Sun, 7 Jul 2024 22:38:43 +0200 Subject: [PATCH 116/299] ad individual window function pages --- .../window-functions/dense_rank.md | 73 ++++++++++++++++++ .../sql-reference/window-functions/index.md | 35 ++++----- .../en/sql-reference/window-functions/rank.md | 74 +++++++++++++++++++ .../window-functions/row_number.md | 0 4 files changed, 165 insertions(+), 17 deletions(-) create mode 100644 docs/en/sql-reference/window-functions/dense_rank.md create mode 100644 docs/en/sql-reference/window-functions/rank.md create mode 100644 docs/en/sql-reference/window-functions/row_number.md diff --git a/docs/en/sql-reference/window-functions/dense_rank.md b/docs/en/sql-reference/window-functions/dense_rank.md new file mode 100644 index 00000000000..17ab894707e --- /dev/null +++ b/docs/en/sql-reference/window-functions/dense_rank.md @@ -0,0 +1,73 @@ +--- +slug: /en/sql-reference/window-functions/dense_rank +sidebar_label: dense_rank +sidebar_position: 2 +--- + +# dense_rank + +This window function ranks the current row within its partition without gaps. In other words, if the value of any new row encountered is equal to the value of one of the previous rows then it will receive the next successive rank without any gaps in ranking. + +The [rank](./rank.md) function provides the same behaviour, but with gaps in ranking. + +**Syntax** + +```sql +dense_rank (column_name) + OVER ([[PARTITION BY grouping_column] [ORDER BY sorting_column] + [ROWS or RANGE expression_to_bound_rows_withing_the_group]] | [window_name]) +FROM table_name +WINDOW window_name as ([[PARTITION BY grouping_column] [ORDER BY sorting_column]) +``` + +For more detail on window function syntax see: [Window Functions - Syntax](./index.md/#syntax). + +**Returned value** + +- A number for the current row within its partition, without gaps in ranking. [UInt64](../data-types/int-uint.md). + +**Example** + +The following example is based on the example provided in the video instructional [Ranking window functions in ClickHouse](https://youtu.be/Yku9mmBYm_4?si=XIMu1jpYucCQEoXA). + +Query: + +```sql +CREATE TABLE salaries +( + `team` String, + `player` String, + `salary` UInt32, + `position` String +) +Engine = Memory; + +INSERT INTO salaries FORMAT Values + ('Port Elizabeth Barbarians', 'Gary Chen', 195000, 'F'), + ('New Coreystad Archdukes', 'Charles Juarez', 190000, 'F'), + ('Port Elizabeth Barbarians', 'Michael Stanley', 150000, 'D'), + ('New Coreystad Archdukes', 'Scott Harrison', 150000, 'D'), + ('Port Elizabeth Barbarians', 'Robert George', 195000, 'M'), + ('South Hampton Seagulls', 'Douglas Benson', 150000, 'M'), + ('South Hampton Seagulls', 'James Henderson', 140000, 'M'); +``` + +```sql +SELECT player, salary, + dense_rank() OVER (ORDER BY salary DESC) AS dense_rank +FROM salaries; +``` + +Result: + +```response + ┌─player──────────┬─salary─┬─dense_rank─┐ +1. │ Gary Chen │ 195000 │ 1 │ +2. │ Robert George │ 195000 │ 1 │ +3. │ Charles Juarez │ 190000 │ 2 │ +4. │ Michael Stanley │ 150000 │ 3 │ +5. │ Douglas Benson │ 150000 │ 3 │ +6. │ Scott Harrison │ 150000 │ 3 │ +7. │ James Henderson │ 140000 │ 4 │ + └─────────────────┴────────┴────────────┘ +``` \ No newline at end of file diff --git a/docs/en/sql-reference/window-functions/index.md b/docs/en/sql-reference/window-functions/index.md index 3a8afd10359..a0246af610f 100644 --- a/docs/en/sql-reference/window-functions/index.md +++ b/docs/en/sql-reference/window-functions/index.md @@ -1,10 +1,11 @@ --- slug: /en/sql-reference/window-functions/ -sidebar_position: 62 sidebar_label: Window Functions -title: Window Functions +sidebar_position: 1 --- +# Window Functions + Windows functions let you perform calculations across a set of rows that are related to the current row. Some of the calculations that you can do are similar to those that can be done with an aggregate function, but a window function doesn't cause rows to be grouped into a single output - the individual rows are still returned. @@ -12,19 +13,19 @@ Some of the calculations that you can do are similar to those that can be done w ClickHouse supports the standard grammar for defining windows and window functions. The table below indicates whether a feature is currently supported. -| Feature | Supported? | -|------------------------------------------------------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Feature | Supported? | +|--------------------------------------------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| | ad hoc window specification (`count(*) over (partition by id order by time desc)`) | ✅ | -| expressions involving window functions, e.g. `(count(*) over ()) / 2)` | ✅ | -| `WINDOW` clause (`select ... from table window w as (partition by id)`) | ✅ | -| `ROWS` frame | ✅ | -| `RANGE` frame | ✅ (the default) | -| `INTERVAL` syntax for `DateTime` `RANGE OFFSET` frame | ❌ (specify the number of seconds instead (`RANGE` works with any numeric type).) | -| `GROUPS` frame | ❌ | -| Calculating aggregate functions over a frame (`sum(value) over (order by time)`) | ✅ (All aggregate functions are supported) | -| `rank()`, `dense_rank()`, `row_number()` | ✅ | -| `lag/lead(value, offset)` | ❌
You can use one of the following workarounds:
1) `any(value) over (.... rows between preceding and preceding)`, or `following` for `lead`
2) `lagInFrame/leadInFrame`, which are analogous, but respect the window frame. To get behavior identical to `lag/lead`, use `rows between unbounded preceding and unbounded following` | -| ntile(buckets) | ✅
Specify window like, (partition by x order by y rows between unbounded preceding and unrounded following). | +| expressions involving window functions, e.g. `(count(*) over ()) / 2)` | ✅ | +| `WINDOW` clause (`select ... from table window w as (partition by id)`) | ✅ | +| `ROWS` frame | ✅ | +| `RANGE` frame | ✅ (the default) | +| `INTERVAL` syntax for `DateTime` `RANGE OFFSET` frame | ❌ (specify the number of seconds instead (`RANGE` works with any numeric type).) | +| `GROUPS` frame | ❌ | +| Calculating aggregate functions over a frame (`sum(value) over (order by time)`) | ✅ (All aggregate functions are supported) | +| `rank()`, `dense_rank()`, `row_number()` | ✅ | +| `lag/lead(value, offset)` | ❌
You can use one of the following workarounds:
1) `any(value) over (.... rows between preceding and preceding)`, or `following` for `lead`
2) `lagInFrame/leadInFrame`, which are analogous, but respect the window frame. To get behavior identical to `lag/lead`, use `rows between unbounded preceding and unbounded following` | +| ntile(buckets) | ✅
Specify window like, (partition by x order by y rows between unbounded preceding and unrounded following). | ## ClickHouse-specific Window Functions @@ -74,12 +75,12 @@ WINDOW window_name as ([[PARTITION BY grouping_column] [ORDER BY sorting_column] These functions can be used only as a window function. -- `row_number()` - Number the current row within its partition starting from 1. +- [`row_number()`](./row_number.md) - Number the current row within its partition starting from 1. - `first_value(x)` - Return the first non-NULL value evaluated within its ordered frame. - `last_value(x)` - Return the last non-NULL value evaluated within its ordered frame. - `nth_value(x, offset)` - Return the first non-NULL value evaluated against the nth row (offset) in its ordered frame. -- `rank()` - Rank the current row within its partition with gaps. -- `dense_rank()` - Rank the current row within its partition without gaps. +- [`rank()`](./rank.md) - Rank the current row within its partition with gaps. +- [`dense_rank()`](./dense_rank.md) - Rank the current row within its partition without gaps. - `lagInFrame(x)` - Return a value evaluated at the row that is at a specified physical offset row before the current row within the ordered frame. - `leadInFrame(x)` - Return a value evaluated at the row that is offset rows after the current row within the ordered frame. diff --git a/docs/en/sql-reference/window-functions/rank.md b/docs/en/sql-reference/window-functions/rank.md new file mode 100644 index 00000000000..17db889ef92 --- /dev/null +++ b/docs/en/sql-reference/window-functions/rank.md @@ -0,0 +1,74 @@ +--- +slug: /en/sql-reference/window-functions/rank +sidebar_label: rank +sidebar_position: 3 +--- + +# rank + +This window function ranks the current row within its partition with gaps. In other words, if the value of any row it encounters is equal to the value of a previous row then it will receive the same rank as that previous row. +The rank of the next row is then equal to the rank of the previous row plus a gap equal to the number of times the previous rank was given. + +The [dense_rank](./dense_rank.md) function provides the same behaviour but without gaps in ranking. + +**Syntax** + +```sql +rank (column_name) + OVER ([[PARTITION BY grouping_column] [ORDER BY sorting_column] + [ROWS or RANGE expression_to_bound_rows_withing_the_group]] | [window_name]) +FROM table_name +WINDOW window_name as ([[PARTITION BY grouping_column] [ORDER BY sorting_column]) +``` + +For more detail on window function syntax see: [Window Functions - Syntax](./index.md/#syntax). + +**Returned value** + +- A number for the current row within its partition, including gaps. [UInt64](../data-types/int-uint.md). + +**Example** + +The following example is based on the example provided in the video instructional [Ranking window functions in ClickHouse](https://youtu.be/Yku9mmBYm_4?si=XIMu1jpYucCQEoXA). + +Query: + +```sql +CREATE TABLE salaries +( + `team` String, + `player` String, + `salary` UInt32, + `position` String +) +Engine = Memory; + +INSERT INTO salaries FORMAT Values + ('Port Elizabeth Barbarians', 'Gary Chen', 195000, 'F'), + ('New Coreystad Archdukes', 'Charles Juarez', 190000, 'F'), + ('Port Elizabeth Barbarians', 'Michael Stanley', 150000, 'D'), + ('New Coreystad Archdukes', 'Scott Harrison', 150000, 'D'), + ('Port Elizabeth Barbarians', 'Robert George', 195000, 'M'), + ('South Hampton Seagulls', 'Douglas Benson', 150000, 'M'), + ('South Hampton Seagulls', 'James Henderson', 140000, 'M'); +``` + +```sql +SELECT player, salary, + rank() OVER (ORDER BY salary DESC) AS rank +FROM salaries; +``` + +Result: + +```response + ┌─player──────────┬─salary─┬─rank─┐ +1. │ Gary Chen │ 195000 │ 1 │ +2. │ Robert George │ 195000 │ 1 │ +3. │ Charles Juarez │ 190000 │ 3 │ +4. │ Douglas Benson │ 150000 │ 4 │ +5. │ Michael Stanley │ 150000 │ 4 │ +6. │ Scott Harrison │ 150000 │ 4 │ +7. │ James Henderson │ 140000 │ 7 │ + └─────────────────┴────────┴──────┘ +``` \ No newline at end of file diff --git a/docs/en/sql-reference/window-functions/row_number.md b/docs/en/sql-reference/window-functions/row_number.md new file mode 100644 index 00000000000..e69de29bb2d From 0b1e6be8ea6ee68d501c6d1f9bd1a5d7eb02d738 Mon Sep 17 00:00:00 2001 From: Blargian Date: Sun, 7 Jul 2024 22:46:18 +0200 Subject: [PATCH 117/299] add disclaimer --- .../aggregate-functions/reference/singlevalueornull.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/sql-reference/aggregate-functions/reference/singlevalueornull.md b/docs/en/sql-reference/aggregate-functions/reference/singlevalueornull.md index 21344b58ba6..19154c488d9 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/singlevalueornull.md +++ b/docs/en/sql-reference/aggregate-functions/reference/singlevalueornull.md @@ -16,7 +16,7 @@ singleValueOrNull(x) **Parameters** -- `x` — Column of any [data type](../../data-types/index.md). +- `x` — Column of any [data type](../../data-types/index.md) (except [Map](../../data-types/map.md), [Array](../../data-types/array.md) or [Tuple](../../data-types/tuple) which cannot be of type [Nullable](../../data-types/nullable.md)). **Returned values** From e16cb83ca3071eb9aecf924aa17af21e1bb76450 Mon Sep 17 00:00:00 2001 From: gun9nir Date: Sun, 7 Jul 2024 19:24:29 -0700 Subject: [PATCH 118/299] style --- src/Storages/StorageFile.cpp | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp index 9ff2a6667af..8797f6a3dfa 100644 --- a/src/Storages/StorageFile.cpp +++ b/src/Storages/StorageFile.cpp @@ -366,14 +366,17 @@ Strings StorageFile::getPathsList(const String & table_path, const String & user } else if (path.find_first_of("*?{") == std::string::npos) { - if (!fs::is_directory(path)) { + if (!fs::is_directory(path)) + { std::error_code error; size_t size = fs::file_size(path, error); if (!error) total_bytes_to_read += size; paths.push_back(path); - } else { + } + else + { /// We list non-directory files under that directory. paths = listFilesWithRegexpMatching(path / fs::path("*"), total_bytes_to_read); can_be_directory = false; From 7b3ce3c3b38f698eb80923061c8ec0e309e2cff6 Mon Sep 17 00:00:00 2001 From: Blargian Date: Mon, 8 Jul 2024 06:20:10 +0200 Subject: [PATCH 119/299] add leadInFrame, lagInFrame, row_number --- .../window-functions/lagInFrame.md | 79 +++++++++++++++++++ .../window-functions/leadInFrame.md | 60 ++++++++++++++ .../window-functions/row_number.md | 67 ++++++++++++++++ 3 files changed, 206 insertions(+) create mode 100644 docs/en/sql-reference/window-functions/lagInFrame.md create mode 100644 docs/en/sql-reference/window-functions/leadInFrame.md diff --git a/docs/en/sql-reference/window-functions/lagInFrame.md b/docs/en/sql-reference/window-functions/lagInFrame.md new file mode 100644 index 00000000000..ea9f6d9dea2 --- /dev/null +++ b/docs/en/sql-reference/window-functions/lagInFrame.md @@ -0,0 +1,79 @@ +--- +slug: /en/sql-reference/window-functions/lagInFrame +sidebar_label: lagInFrame +sidebar_position: 5 +--- + +# lagInFrame + +Return a value evaluated at the row that is at a specified physical offset before the current row within the ordered frame. The offset parameter, if not specified, defaults to 1, meaning it will fetch the value from the next row. If the calculated row exceeds the boundaries of the window frame, the specified default value is returned. + +**Syntax** + +```sql +lagInFrame(x[, offset[, default]]) + OVER ([[PARTITION BY grouping_column] [ORDER BY sorting_column] + [ROWS or RANGE expression_to_bound_rows_withing_the_group]] | [window_name]) +FROM table_name +WINDOW window_name as ([[PARTITION BY grouping_column] [ORDER BY sorting_column]) +``` + +For more detail on window function syntax see: [Window Functions - Syntax](./index.md/#syntax). + +**Parameters** +- `x` — Column name. +- `offset` — Offset to apply. [(U)Int*](../data-types/int-uint.md). (Optional - `1` by default). +- `default` — Value to return if calculated row exceeds the boundaries of the window frame. (Optional - `null` by default). + +**Returned value** + +- Value evaluated at the row that is at a specified physical offset before the current row within the ordered frame. + +**Example** + +This example looks at historical data for a specific stock and uses the `lagInFrame` function to calculate a day-to-day delta and percentage change in the closing price of the stock. + +Query: + +```sql +CREATE TABLE stock_prices +( + `date` Date, + `open` Float32, -- opening price + `high` Float32, -- daily high + `low` Float32, -- daily low + `close` Float32, -- closing price + `volume` UInt32 -- trade volume +) +Engine = Memory; + +INSERT INTO stock_prices FORMAT Values + ('2024-06-03', 113.62, 115.00, 112.00, 115.00, 438392000), + ('2024-06-04', 115.72, 116.60, 114.04, 116.44, 403324000), + ('2024-06-05', 118.37, 122.45, 117.47, 122.44, 528402000), + ('2024-06-06', 124.05, 125.59, 118.32, 121.00, 664696000), + ('2024-06-07', 119.77, 121.69, 118.02, 120.89, 412386000); +``` + +```sql +SELECT + date, + close, + lagInFrame(close, 1, close) OVER (ORDER BY date ASC) AS previous_day_close, + COALESCE(ROUND(close - previous_day_close, 2)) AS delta, + COALESCE(ROUND((delta / previous_day_close) * 100, 2)) AS percent_change +FROM stock_prices +ORDER BY date DESC; +``` + +Result: + +```response + ┌───────date─┬──close─┬─previous_day_close─┬─delta─┬─percent_change─┐ +1. │ 2024-06-07 │ 120.89 │ 121 │ -0.11 │ -0.09 │ +2. │ 2024-06-06 │ 121 │ 122.44 │ -1.44 │ -1.18 │ +3. │ 2024-06-05 │ 122.44 │ 116.44 │ 6 │ 5.15 │ +4. │ 2024-06-04 │ 116.44 │ 115 │ 1.44 │ 1.25 │ +5. │ 2024-06-03 │ 115 │ 115 │ 0 │ 0 │ + └────────────┴────────┴────────────────────┴───────┴────────────────┘ +``` \ No newline at end of file diff --git a/docs/en/sql-reference/window-functions/leadInFrame.md b/docs/en/sql-reference/window-functions/leadInFrame.md new file mode 100644 index 00000000000..e3b65af9a4d --- /dev/null +++ b/docs/en/sql-reference/window-functions/leadInFrame.md @@ -0,0 +1,60 @@ +--- +slug: /en/sql-reference/window-functions/leadInFrame +sidebar_label: leadInFrame +sidebar_position: 6 +--- + +# leadInFrame + +Return a value evaluated at the row that is offset rows after the current row within the ordered frame. + +**Syntax** + +```sql +leadInFrame(x[, offset[, default]]) + OVER ([[PARTITION BY grouping_column] [ORDER BY sorting_column] + [ROWS or RANGE expression_to_bound_rows_withing_the_group]] | [window_name]) +FROM table_name +WINDOW window_name as ([[PARTITION BY grouping_column] [ORDER BY sorting_column]) +``` + +For more detail on window function syntax see: [Window Functions - Syntax](./index.md/#syntax). + +**Parameters** +- `x` — Column name. +- `offset` — Offset to apply. [(U)Int*](../data-types/int-uint.md). (Optional - `1` by default). +- `default` — Value to return if calculated row exceeds the boundaries of the window frame. (Optional - `null` by default). + +**Returned value** + +- value evaluated at the row that is offset rows after the current row within the ordered frame. + +**Example** + +This example looks at [historical data](https://www.kaggle.com/datasets/sazidthe1/nobel-prize-data) for Nobel Prize winners and uses the `leadInFrame` function to return a list of successive winners in the physics category. + +Query: + +```sql +CREATE OR REPLACE VIEW nobel_prize_laureates AS FROM file('nobel_laureates_data.csv') SELECT *; +``` + +```sql +FROM nobel_prize_laureates SELECT fullName, leadInFrame(year, 1, year) OVER (PARTITION BY category ORDER BY year) AS year, category, motivation WHERE category == 'physics' ORDER BY year DESC LIMIT 9; +``` + +Result: + +```response + ┌─fullName─────────┬─year─┬─category─┬─motivation─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐ +1. │ Pierre Agostini │ 2023 │ physics │ for experimental methods that generate attosecond pulses of light for the study of electron dynamics in matter │ +2. │ Ferenc Krausz │ 2023 │ physics │ for experimental methods that generate attosecond pulses of light for the study of electron dynamics in matter │ +3. │ Anne L Huillier │ 2023 │ physics │ for experimental methods that generate attosecond pulses of light for the study of electron dynamics in matter │ +4. │ Alain Aspect │ 2022 │ physics │ for experiments with entangled photons establishing the violation of Bell inequalities and pioneering quantum information science │ +5. │ Anton Zeilinger │ 2022 │ physics │ for experiments with entangled photons establishing the violation of Bell inequalities and pioneering quantum information science │ +6. │ John Clauser │ 2022 │ physics │ for experiments with entangled photons establishing the violation of Bell inequalities and pioneering quantum information science │ +7. │ Syukuro Manabe │ 2021 │ physics │ for the physical modelling of Earths climate quantifying variability and reliably predicting global warming │ +8. │ Klaus Hasselmann │ 2021 │ physics │ for the physical modelling of Earths climate quantifying variability and reliably predicting global warming │ +9. │ Giorgio Parisi │ 2021 │ physics │ for the discovery of the interplay of disorder and fluctuations in physical systems from atomic to planetary scales │ + └──────────────────┴──────┴──────────┴────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘ +``` \ No newline at end of file diff --git a/docs/en/sql-reference/window-functions/row_number.md b/docs/en/sql-reference/window-functions/row_number.md index e69de29bb2d..428bb34a8ba 100644 --- a/docs/en/sql-reference/window-functions/row_number.md +++ b/docs/en/sql-reference/window-functions/row_number.md @@ -0,0 +1,67 @@ +--- +slug: /en/sql-reference/window-functions/row_number +sidebar_label: row_number +sidebar_position: 4 +--- + +# row_number + +Numbers the current row within its partition starting from 1 + +**Syntax** + +```sql +row_number (column_name) + OVER ([[PARTITION BY grouping_column] [ORDER BY sorting_column] + [ROWS or RANGE expression_to_bound_rows_withing_the_group]] | [window_name]) +FROM table_name +WINDOW window_name as ([[PARTITION BY grouping_column] [ORDER BY sorting_column]) +``` + +For more detail on window function syntax see: [Window Functions - Syntax](./index.md/#syntax). + +**Returned value** + +- A number for the current row within its partition. [UInt64](../data-types/int-uint.md). + +**Example** + +The following example is based on the example provided in the video instructional [Ranking window functions in ClickHouse](https://youtu.be/Yku9mmBYm_4?si=XIMu1jpYucCQEoXA). + +Query: + +```sql +CREATE TABLE salaries +( + `team` String, + `player` String, + `salary` UInt32, + `position` String +) +Engine = Memory; + +INSERT INTO salaries FORMAT Values + ('Port Elizabeth Barbarians', 'Gary Chen', 195000, 'F'), + ('New Coreystad Archdukes', 'Charles Juarez', 190000, 'F'), + ('Port Elizabeth Barbarians', 'Michael Stanley', 150000, 'D'), + ('New Coreystad Archdukes', 'Scott Harrison', 150000, 'D'), + ('Port Elizabeth Barbarians', 'Robert George', 195000, 'M'); +``` + +```sql +SELECT player, salary, + row_number() OVER (ORDER BY salary DESC) AS row_number +FROM salaries; +``` + +Result: + +```response + ┌─player──────────┬─salary─┬─row_number─┐ +1. │ Gary Chen │ 195000 │ 1 │ +2. │ Robert George │ 195000 │ 2 │ +3. │ Charles Juarez │ 190000 │ 3 │ +4. │ Scott Harrison │ 150000 │ 4 │ +5. │ Michael Stanley │ 150000 │ 5 │ + └─────────────────┴────────┴────────────┘ +``` \ No newline at end of file From e0d3213481769299881044512d78d23551c2aa8a Mon Sep 17 00:00:00 2001 From: Eduard Karacharov Date: Sun, 16 Jun 2024 20:43:26 +0300 Subject: [PATCH 120/299] support set transformation in partition pruning --- src/Storages/MergeTree/KeyCondition.cpp | 202 +++++++++++++- src/Storages/MergeTree/KeyCondition.h | 14 +- ...et_transformed_partition_pruning.reference | 50 ++++ ...3173_set_transformed_partition_pruning.sql | 258 ++++++++++++++++++ 4 files changed, 521 insertions(+), 3 deletions(-) create mode 100644 tests/queries/0_stateless/03173_set_transformed_partition_pruning.reference create mode 100644 tests/queries/0_stateless/03173_set_transformed_partition_pruning.sql diff --git a/src/Storages/MergeTree/KeyCondition.cpp b/src/Storages/MergeTree/KeyCondition.cpp index 7e4b1db4c89..85eca4644e7 100644 --- a/src/Storages/MergeTree/KeyCondition.cpp +++ b/src/Storages/MergeTree/KeyCondition.cpp @@ -1186,15 +1186,62 @@ bool KeyCondition::canConstantBeWrappedByFunctions( }); } +/// Sequentially applies functions to set column, returns `true` +/// if all function arguments are compatible with functions +/// signatures, and none of the functions produce `NULL` output. +/// +/// After functions chain execution, fills result set column and +/// its type. +bool applyFunctionChainToSetValues( + const ColumnPtr & set_column, + const DataTypePtr & set_data_type, + const std::vector & functions, + ColumnPtr & out_set_column, + DataTypePtr & out_set_type) +{ + auto result_column = set_column->convertToFullColumnIfLowCardinality(); + auto result_type = removeLowCardinality(set_data_type); + + for (const auto & func : functions) + { + if (func->getArgumentTypes().empty()) + return false; + + auto argument_type = func->getArgumentTypes()[0]; + if (!canBeSafelyCasted(result_type, argument_type)) + return false; + + result_column = castColumnAccurate({result_column, result_type, ""}, argument_type); + result_column = func->execute({{result_column, argument_type, ""}}, func->getResultType(), result_column->size()); + if (result_column->isNullable()) + { + const auto & result_column_nullable = assert_cast(*result_column); + const auto & null_map_data = result_column_nullable.getNullMapData(); + for (char8_t i : null_map_data) + { + if (i != 0) + return false; + } + } + result_type = func->getResultType(); + } + out_set_column = result_column; + out_set_type = result_type; + + return true; +} + bool KeyCondition::tryPrepareSetIndex( const RPNBuilderFunctionTreeNode & func, RPNElement & out, - size_t & out_key_column_num) + size_t & out_key_column_num, + bool & is_constant_transformed) { const auto & left_arg = func.getArgumentAt(0); out_key_column_num = 0; std::vector indexes_mapping; + std::vector set_transforming_chains; DataTypes data_types; auto get_key_tuple_position_mapping = [&](const RPNBuilderTreeNode & node, size_t tuple_index) @@ -1203,6 +1250,7 @@ bool KeyCondition::tryPrepareSetIndex( index_mapping.tuple_index = tuple_index; DataTypePtr data_type; std::optional key_space_filling_curve_argument_pos; + MonotonicFunctionsChain set_transforming_chain; if (isKeyPossiblyWrappedByMonotonicFunctions( node, index_mapping.key_index, key_space_filling_curve_argument_pos, data_type, index_mapping.functions) && !key_space_filling_curve_argument_pos) /// We don't support the analysis of space-filling curves and IN set. @@ -1210,6 +1258,14 @@ bool KeyCondition::tryPrepareSetIndex( indexes_mapping.push_back(index_mapping); data_types.push_back(data_type); out_key_column_num = std::max(out_key_column_num, index_mapping.key_index); + set_transforming_chains.push_back(set_transforming_chain); + } + else if (single_point && canSetValuesBeWrappedByFunctions(node, index_mapping.key_index, data_type, set_transforming_chain)) + { + indexes_mapping.push_back(index_mapping); + data_types.push_back(data_type); + out_key_column_num = std::max(out_key_column_num, index_mapping.key_index); + set_transforming_chains.push_back(set_transforming_chain); } }; @@ -1275,6 +1331,18 @@ bool KeyCondition::tryPrepareSetIndex( auto set_element_type = set_types[set_element_index]; auto set_column = set_columns[set_element_index]; + if (!set_transforming_chains[indexes_mapping_index].empty()) + { + ColumnPtr transformed_set_column; + DataTypePtr transformed_set_type; + if (!applyFunctionChainToSetValues(set_column, set_element_type, set_transforming_chains[indexes_mapping_index], transformed_set_column, transformed_set_type)) + return false; + + set_column = transformed_set_column; + set_element_type = transformed_set_type; + is_constant_transformed = true; + } + if (canBeSafelyCasted(set_element_type, key_column_type)) { set_columns[set_element_index] = castColumn({set_column, set_element_type, {}}, key_column_type); @@ -1571,6 +1639,136 @@ bool KeyCondition::isKeyPossiblyWrappedByMonotonicFunctionsImpl( return false; } +bool KeyCondition::canSetValuesBeWrappedByFunctions( + const RPNBuilderTreeNode & node, + size_t & out_key_column_num, + DataTypePtr & out_key_res_column_type, + MonotonicFunctionsChain & out_functions_chain) +{ + // Checking if column name matches any of key subexpressions + String expr_name = node.getColumnName(); + + if (array_joined_column_names.contains(expr_name)) + return false; + + if (!key_subexpr_names.contains(expr_name)) + { + expr_name = node.getColumnNameWithModuloLegacy(); + + if (!key_subexpr_names.contains(expr_name)) + return false; + } + + // If match has been found, need to identify key column, and + // sequence of function nodes, which forms key column + for (const auto & key_expr_node : key_expr->getNodes()) + { + auto it = key_columns.find(key_expr_node.result_name); + if (it != key_columns.end()) + { + std::vector chain; + + const auto * cur_node = &key_expr_node; + bool is_valid_chain = true; + + while (is_valid_chain) + { + if (cur_node->result_name == expr_name) + break; + + if (cur_node->type == ActionsDAG::ActionType::FUNCTION && cur_node->children.size() <= 2) + { + chain.push_back(cur_node); + is_valid_chain = cur_node->function_base->isDeterministic(); + + const ActionsDAG::Node * next_node = nullptr; + for (const auto * arg : cur_node->children) + { + if (arg->column && isColumnConst(*arg->column)) + continue; + + if (next_node) + is_valid_chain = false; + + next_node = arg; + } + + if (!next_node) + is_valid_chain = false; + + cur_node = next_node; + } + else if (cur_node->type == ActionsDAG::ActionType::ALIAS) + cur_node = cur_node->children.front(); + else + is_valid_chain = false; + } + + // If key column has been found, and it consists of deterministic function, + // in order to make them applicable to constants from query predicate, + // reversing the sequence of functions, and binding constant arguments + if (is_valid_chain) + { + // Removing low cardinality from `node` output type to avoid + // passing it through functions signatures + auto nonconst_input_type = removeLowCardinality(node.getDAGNode()->result_type); + + for (auto iter = chain.rbegin(); iter != chain.rend(); ++iter) + { + const auto * function_node = *iter; + auto function = function_node->function_base; + auto func_builder = FunctionFactory::instance().tryGet(function->getName(), node.getTreeContext().getQueryContext()); + if (!func_builder) + return false; + ColumnsWithTypeAndName arguments; + ColumnWithTypeAndName const_arg; + FunctionWithOptionalConstArg::Kind kind = FunctionWithOptionalConstArg::Kind::NO_CONST; + if (function_node->children.size() == 2) + { + const auto * left = function_node->children[0]; + const auto * right = function_node->children[1]; + if (left->column && isColumnConst(*left->column)) + { + const_arg = {left->result_type->createColumnConst(0, (*left->column)[0]), left->result_type, ""}; + arguments.push_back(const_arg); + arguments.push_back({nullptr, nonconst_input_type, ""}); + kind = FunctionWithOptionalConstArg::Kind::LEFT_CONST; + } + else if (right->column && isColumnConst(*right->column)) + { + const_arg = {right->result_type->createColumnConst(0, (*right->column)[0]), right->result_type, ""}; + arguments.push_back({nullptr, nonconst_input_type, ""}); + arguments.push_back(const_arg); + kind = FunctionWithOptionalConstArg::Kind::RIGHT_CONST; + } + + /// If constant arg of binary operator is NULL, there will be no monotonicity. + if (const_arg.column->isNullAt(0)) + return false; + } + else + arguments.push_back({nullptr, nonconst_input_type, ""}); + auto func = func_builder->build(arguments); + + if (kind == FunctionWithOptionalConstArg::Kind::NO_CONST) + out_functions_chain.push_back(func); + else + out_functions_chain.push_back(std::make_shared(func, const_arg, kind)); + + nonconst_input_type = func->getResultType(); + } + + const auto & sample_block = key_expr->getSampleBlock(); + out_key_column_num = it->second; + out_key_res_column_type = sample_block.getByName(it->first).type; + + return true; + } + } + } + + return false; +} static void castValueToType(const DataTypePtr & desired_type, Field & src_value, const DataTypePtr & src_type, const String & node_column_name) { @@ -1649,7 +1847,7 @@ bool KeyCondition::extractAtomFromTree(const RPNBuilderTreeNode & node, RPNEleme if (functionIsInOrGlobalInOperator(func_name)) { - if (tryPrepareSetIndex(func, out, key_column_num)) + if (tryPrepareSetIndex(func, out, key_column_num, is_constant_transformed)) { key_arg_pos = 0; is_set_const = true; diff --git a/src/Storages/MergeTree/KeyCondition.h b/src/Storages/MergeTree/KeyCondition.h index 6e5956706aa..11dd1c85c9d 100644 --- a/src/Storages/MergeTree/KeyCondition.h +++ b/src/Storages/MergeTree/KeyCondition.h @@ -276,13 +276,25 @@ private: Field & out_value, DataTypePtr & out_type); + /// Checks if node is a subexpression of any of key columns expressions, + /// wrapped by deterministic functions, and if so, returns `true`, and + /// specifies key column position / type. Besides that it produces the + /// chain of functions which should be executed on set, to transform it + /// into key column values. + bool canSetValuesBeWrappedByFunctions( + const RPNBuilderTreeNode & node, + size_t & out_key_column_num, + DataTypePtr & out_key_res_column_type, + MonotonicFunctionsChain & out_functions_chain); + /// If it's possible to make an RPNElement /// that will filter values (possibly tuples) by the content of 'prepared_set', /// do it and return true. bool tryPrepareSetIndex( const RPNBuilderFunctionTreeNode & func, RPNElement & out, - size_t & out_key_column_num); + size_t & out_key_column_num, + bool & is_constant_transformed); /// Checks that the index can not be used. /// diff --git a/tests/queries/0_stateless/03173_set_transformed_partition_pruning.reference b/tests/queries/0_stateless/03173_set_transformed_partition_pruning.reference new file mode 100644 index 00000000000..3a6727b70e8 --- /dev/null +++ b/tests/queries/0_stateless/03173_set_transformed_partition_pruning.reference @@ -0,0 +1,50 @@ +-- Single partition by function +0 +2 +-- Nested partition by function +1 +2 +1 +1 +-- Nested partition by function, LowCardinality +1 +2 +1 +1 +-- Nested partition by function, Nullable +1 +2 +1 +1 +-- Nested partition by function, LowCardinality + Nullable +1 +2 +1 +1 +-- Non-safe cast +2 +2 +-- Multiple partition columns +1 +1 +1 +2 +-- LowCardinality set +1 +1 +-- Nullable set +1 +1 +-- LowCardinality + Nullable set +1 +1 +-- Not failing with date parsing functions +1 +0 +-- Pruning + not failing with nested date parsing functions +1 +2 +0 +-- Empty transform functions +2 +1 diff --git a/tests/queries/0_stateless/03173_set_transformed_partition_pruning.sql b/tests/queries/0_stateless/03173_set_transformed_partition_pruning.sql new file mode 100644 index 00000000000..8ffabacaa8c --- /dev/null +++ b/tests/queries/0_stateless/03173_set_transformed_partition_pruning.sql @@ -0,0 +1,258 @@ +SELECT '-- Single partition by function'; + +DROP TABLE IF EXISTS 03173_single_function; +CREATE TABLE 03173_single_function ( + dt Date, +) +ENGINE = MergeTree +ORDER BY tuple() +PARTITION BY toMonth(dt); + +INSERT INTO 03173_single_function +SELECT toDate('2000-01-01') + 10 * number FROM numbers(50) +UNION ALL +SELECT toDate('2100-01-01') + 10 * number FROM numbers(50); +OPTIMIZE TABLE 03173_single_function FINAL; + +SELECT count() FROM 03173_single_function WHERE dt IN ('2024-01-20', '2024-05-25') SETTINGS log_comment='03173_single_function'; +SYSTEM FLUSH LOGS; +SELECT ProfileEvents['SelectedParts'] FROM system.query_log WHERE type = 'QueryFinish' AND current_database = currentDatabase() AND log_comment = '03173_single_function'; + +DROP TABLE IF EXISTS 03173_single_function; + +SELECT '-- Nested partition by function'; + +DROP TABLE IF EXISTS 03173_nested_function; +CREATE TABLE 03173_nested_function( + id Int32, +) +ENGINE = MergeTree +ORDER BY tuple() +PARTITION BY xxHash32(id) % 3; + +INSERT INTO 03173_nested_function SELECT number FROM numbers(100); +OPTIMIZE TABLE 03173_nested_function FINAL; + +SELECT count() FROM 03173_nested_function WHERE id IN (10) SETTINGS log_comment='03173_nested_function'; +SELECT count() FROM 03173_nested_function WHERE xxHash32(id) IN (2158931063, 1449383981) SETTINGS log_comment='03173_nested_function_subexpr'; +SYSTEM FLUSH LOGS; +SELECT ProfileEvents['SelectedParts'] FROM system.query_log WHERE type = 'QueryFinish' AND current_database = currentDatabase() AND log_comment = '03173_nested_function'; +SELECT ProfileEvents['SelectedParts'] FROM system.query_log WHERE type = 'QueryFinish' AND current_database = currentDatabase() AND log_comment = '03173_nested_function_subexpr'; + +DROP TABLE IF EXISTS 03173_nested_function; + +SELECT '-- Nested partition by function, LowCardinality'; + +SET allow_suspicious_low_cardinality_types = 1; + +DROP TABLE IF EXISTS 03173_nested_function_lc; +CREATE TABLE 03173_nested_function_lc( + id LowCardinality(Int32), +) +ENGINE = MergeTree +ORDER BY tuple() +PARTITION BY xxHash32(id) % 3; + +INSERT INTO 03173_nested_function_lc SELECT number FROM numbers(100); +OPTIMIZE TABLE 03173_nested_function_lc FINAL; + +SELECT count() FROM 03173_nested_function_lc WHERE id IN (10) SETTINGS log_comment='03173_nested_function_lc'; +SELECT count() FROM 03173_nested_function_lc WHERE xxHash32(id) IN (2158931063, 1449383981) SETTINGS log_comment='03173_nested_function_subexpr_lc'; +SYSTEM FLUSH LOGS; +SELECT ProfileEvents['SelectedParts'] FROM system.query_log WHERE type = 'QueryFinish' AND current_database = currentDatabase() AND log_comment = '03173_nested_function_lc'; +SELECT ProfileEvents['SelectedParts'] FROM system.query_log WHERE type = 'QueryFinish' AND current_database = currentDatabase() AND log_comment = '03173_nested_function_subexpr_lc'; + +DROP TABLE IF EXISTS 03173_nested_function_lc; + +SELECT '-- Nested partition by function, Nullable'; + +DROP TABLE IF EXISTS 03173_nested_function_null; +CREATE TABLE 03173_nested_function_null( + id Nullable(Int32), +) +ENGINE = MergeTree +ORDER BY tuple() +PARTITION BY xxHash32(id) % 3 +SETTINGS allow_nullable_key=1; + +INSERT INTO 03173_nested_function_null SELECT number FROM numbers(100); +OPTIMIZE TABLE 03173_nested_function_null FINAL; + +SELECT count() FROM 03173_nested_function_null WHERE id IN (10) SETTINGS log_comment='03173_nested_function_null'; +SELECT count() FROM 03173_nested_function_null WHERE xxHash32(id) IN (2158931063, 1449383981) SETTINGS log_comment='03173_nested_function_subexpr_null'; +SYSTEM FLUSH LOGS; +SELECT ProfileEvents['SelectedParts'] FROM system.query_log WHERE type = 'QueryFinish' AND current_database = currentDatabase() AND log_comment = '03173_nested_function_null'; +SELECT ProfileEvents['SelectedParts'] FROM system.query_log WHERE type = 'QueryFinish' AND current_database = currentDatabase() AND log_comment = '03173_nested_function_subexpr_null'; + +DROP TABLE IF EXISTS 03173_nested_function_null; + +SELECT '-- Nested partition by function, LowCardinality + Nullable'; + +DROP TABLE IF EXISTS 03173_nested_function_lc_null; + +SET allow_suspicious_low_cardinality_types = 1; +CREATE TABLE 03173_nested_function_lc_null( + id LowCardinality(Nullable(Int32)), +) +ENGINE = MergeTree +ORDER BY tuple() +PARTITION BY xxHash32(id) % 3 +SETTINGS allow_nullable_key=1; + +INSERT INTO 03173_nested_function_lc_null SELECT number FROM numbers(100); +OPTIMIZE TABLE 03173_nested_function_lc_null FINAL; + +SELECT count() FROM 03173_nested_function_lc_null WHERE id IN (10) SETTINGS log_comment='03173_nested_function_lc_null'; +SELECT count() FROM 03173_nested_function_lc_null WHERE xxHash32(id) IN (2158931063, 1449383981) SETTINGS log_comment='03173_nested_function_subexpr_lc_null'; +SYSTEM FLUSH LOGS; +SELECT ProfileEvents['SelectedParts'] FROM system.query_log WHERE type = 'QueryFinish' AND current_database = currentDatabase() AND log_comment = '03173_nested_function_lc_null'; +SELECT ProfileEvents['SelectedParts'] FROM system.query_log WHERE type = 'QueryFinish' AND current_database = currentDatabase() AND log_comment = '03173_nested_function_subexpr_lc_null'; + +DROP TABLE IF EXISTS 03173_nested_function_lc_null; + +SELECT '-- Non-safe cast'; + +DROP TABLE IF EXISTS 03173_nonsafe_cast; +CREATE TABLE 03173_nonsafe_cast( + id Int64, +) +ENGINE = MergeTree +ORDER BY tuple() +PARTITION BY xxHash32(id) % 3; + +INSERT INTO 03173_nonsafe_cast SELECT number FROM numbers(100); +OPTIMIZE TABLE 03173_nonsafe_cast FINAL; + +SELECT count() FROM 03173_nonsafe_cast WHERE id IN (SELECT '50' UNION ALL SELECT '99') SETTINGS log_comment='03173_nonsafe_cast'; +SYSTEM FLUSH LOGS; +SELECT ProfileEvents['SelectedParts'] FROM system.query_log WHERE type = 'QueryFinish' AND current_database = currentDatabase() AND log_comment = '03173_nonsafe_cast'; + +DROP TABLE IF EXISTS 03173_nonsafe_cast; + +SELECT '-- Multiple partition columns'; + +DROP TABLE IF EXISTS 03173_multiple_partition_cols; +CREATE TABLE 03173_multiple_partition_cols ( + key1 Int32, + key2 Int32 +) +ENGINE = MergeTree +ORDER BY tuple() +PARTITION BY (intDiv(key1, 50), xxHash32(key2) % 3); + +INSERT INTO 03173_multiple_partition_cols SELECT number, number FROM numbers(100); +OPTIMIZE TABLE 03173_multiple_partition_cols FINAL; + +SELECT count() FROM 03173_multiple_partition_cols WHERE key2 IN (4) SETTINGS log_comment='03173_multiple_columns'; +SELECT count() FROM 03173_multiple_partition_cols WHERE xxHash32(key2) IN (4251411170) SETTINGS log_comment='03173_multiple_columns_subexpr'; +SYSTEM FLUSH LOGS; +SELECT ProfileEvents['SelectedParts'] FROM system.query_log WHERE type = 'QueryFinish' AND current_database = currentDatabase() AND log_comment = '03173_multiple_columns'; +-- Due to xxHash32() in WHERE condition, MinMax is unable to eliminate any parts, +-- so partition pruning leave two parts (for key1 // 50 = 0 and key1 // 50 = 1) +SELECT ProfileEvents['SelectedParts'] FROM system.query_log WHERE type = 'QueryFinish' AND current_database = currentDatabase() AND log_comment = '03173_multiple_columns_subexpr'; + +-- Preparing base table for filtering by LowCardinality/Nullable sets +DROP TABLE IF EXISTS 03173_base_data_source; +CREATE TABLE 03173_base_data_source( + id Int32, +) +ENGINE = MergeTree +ORDER BY tuple() +PARTITION BY xxHash32(id) % 3; + +INSERT INTO 03173_base_data_source SELECT number FROM numbers(100); +OPTIMIZE TABLE 03173_base_data_source FINAL; + +SELECT '-- LowCardinality set'; + +SET allow_suspicious_low_cardinality_types = 1; +DROP TABLE IF EXISTS 03173_low_cardinality_set; +CREATE TABLE 03173_low_cardinality_set (id LowCardinality(Int32)) ENGINE=Memory AS SELECT 10; + +SELECT count() FROM 03173_base_data_source WHERE id IN (SELECT id FROM 03173_low_cardinality_set) SETTINGS log_comment='03173_low_cardinality_set'; +SYSTEM FLUSH LOGS; +SELECT ProfileEvents['SelectedParts'] FROM system.query_log WHERE type = 'QueryFinish' AND current_database = currentDatabase() AND log_comment = '03173_low_cardinality_set'; + +DROP TABLE IF EXISTS 03173_low_cardinality_set; + +SELECT '-- Nullable set'; + +DROP TABLE IF EXISTS 03173_nullable_set; +CREATE TABLE 03173_nullable_set (id Nullable(Int32)) ENGINE=Memory AS SELECT 10; + +SELECT count() FROM 03173_base_data_source WHERE id IN (SELECT id FROM 03173_nullable_set) SETTINGS log_comment='03173_nullable_set'; +SYSTEM FLUSH LOGS; +SELECT ProfileEvents['SelectedParts'] FROM system.query_log WHERE type = 'QueryFinish' AND current_database = currentDatabase() AND log_comment = '03173_nullable_set'; + +DROP TABLE IF EXISTS 03173_nullable_set; + +SELECT '-- LowCardinality + Nullable set'; + +DROP TABLE IF EXISTS 03173_lc_nullable_set; +CREATE TABLE 03173_lc_nullable_set (id LowCardinality(Nullable(Int32))) ENGINE=Memory AS SELECT 10 UNION ALL SELECT NULL; + +SELECT count() FROM 03173_base_data_source WHERE id IN (SELECT id FROM 03173_lc_nullable_set) SETTINGS log_comment='03173_lc_nullable_set'; +SYSTEM FLUSH LOGS; +SELECT ProfileEvents['SelectedParts'] FROM system.query_log WHERE type = 'QueryFinish' AND current_database = currentDatabase() AND log_comment = '03173_lc_nullable_set'; + +DROP TABLE IF EXISTS 03173_lc_nullable_set; + +SELECT '-- Not failing with date parsing functions'; + +DROP TABLE IF EXISTS 03173_date_parsing; +CREATE TABLE 03173_date_parsing ( + id String +) +ENGINE=MergeTree +ORDER BY tuple() +PARTITION BY toDate(id); + +INSERT INTO 03173_date_parsing +SELECT toString(toDate('2023-04-01') + number) +FROM numbers(20); + +SELECT count() FROM 03173_date_parsing WHERE id IN ('2023-04-02', '2023-05-02'); +SELECT count() FROM 03173_date_parsing WHERE id IN ('not a date'); + +DROP TABLE IF EXISTS 03173_date_parsing; + +SELECT '-- Pruning + not failing with nested date parsing functions'; + +DROP TABLE IF EXISTS 03173_nested_date_parsing; +CREATE TABLE 03173_nested_date_parsing ( + id String +) +ENGINE=MergeTree +ORDER BY tuple() +PARTITION BY toMonth(toDate(id)); + +INSERT INTO 03173_nested_date_parsing +SELECT toString(toDate('2000-01-01') + 10 * number) FROM numbers(50) +UNION ALL +SELECT toString(toDate('2100-01-01') + 10 * number) FROM numbers(50); + +SELECT count() FROM 03173_nested_date_parsing WHERE id IN ('2000-01-21', '2023-05-02') SETTINGS log_comment='03173_nested_date_parsing'; +SYSTEM FLUSH LOGS; +SELECT ProfileEvents['SelectedParts'] FROM system.query_log WHERE type = 'QueryFinish' AND current_database = currentDatabase() AND log_comment = '03173_nested_date_parsing'; +SELECT count() FROM 03173_nested_date_parsing WHERE id IN ('not a date'); + +DROP TABLE IF EXISTS 03173_nested_date_parsing; + +SELECT '-- Empty transform functions'; + +DROP TABLE IF EXISTS 03173_empty_transform; +CREATE TABLE 03173_empty_transform( + id Int32, +) +ENGINE = MergeTree +ORDER BY tuple() +PARTITION BY xxHash32(id) % 3; + +INSERT INTO 03173_empty_transform SELECT number FROM numbers(6); +OPTIMIZE TABLE 03173_empty_transform FINAL; + +SELECT id FROM 03173_empty_transform WHERE xxHash32(id) % 3 IN (xxHash32(2::Int32) % 3) SETTINGS log_comment='03173_empty_transform'; +SYSTEM FLUSH LOGS; +SELECT ProfileEvents['SelectedParts'] FROM system.query_log WHERE type = 'QueryFinish' AND current_database = currentDatabase() AND log_comment = '03173_empty_transform'; + +DROP TABLE IF EXISTS 03173_empty_transform; From b2306fc67acb992afbe25ef1a0ee9714ed19fbd0 Mon Sep 17 00:00:00 2001 From: Eduard Karacharov Date: Wed, 19 Jun 2024 09:32:29 +0300 Subject: [PATCH 121/299] process sets and scalars by same functions --- src/Storages/MergeTree/KeyCondition.cpp | 549 +++++++++++------------- src/Storages/MergeTree/KeyCondition.h | 6 +- 2 files changed, 249 insertions(+), 306 deletions(-) diff --git a/src/Storages/MergeTree/KeyCondition.cpp b/src/Storages/MergeTree/KeyCondition.cpp index 85eca4644e7..d38001a0feb 100644 --- a/src/Storages/MergeTree/KeyCondition.cpp +++ b/src/Storages/MergeTree/KeyCondition.cpp @@ -874,46 +874,6 @@ static Field applyFunctionForField( return (*col)[0]; } -/// The case when arguments may have types different than in the primary key. -static std::pair applyFunctionForFieldOfUnknownType( - const FunctionBasePtr & func, - const DataTypePtr & arg_type, - const Field & arg_value) -{ - ColumnsWithTypeAndName arguments{{ arg_type->createColumnConst(1, arg_value), arg_type, "x" }}; - DataTypePtr return_type = func->getResultType(); - - auto col = func->execute(arguments, return_type, 1); - - Field result = (*col)[0]; - - return {std::move(result), std::move(return_type)}; -} - - -/// Same as above but for binary operators -static std::pair applyBinaryFunctionForFieldOfUnknownType( - const FunctionOverloadResolverPtr & func, - const DataTypePtr & arg_type, - const Field & arg_value, - const DataTypePtr & arg_type2, - const Field & arg_value2) -{ - ColumnsWithTypeAndName arguments{ - {arg_type->createColumnConst(1, arg_value), arg_type, "x"}, {arg_type2->createColumnConst(1, arg_value2), arg_type2, "y"}}; - - FunctionBasePtr func_base = func->build(arguments); - - DataTypePtr return_type = func_base->getResultType(); - - auto col = func_base->execute(arguments, return_type, 1); - - Field result = (*col)[0]; - - return {std::move(result), std::move(return_type)}; -} - - static FieldRef applyFunction(const FunctionBasePtr & func, const DataTypePtr & current_type, const FieldRef & field) { /// Fallback for fields without block reference. @@ -940,164 +900,92 @@ static FieldRef applyFunction(const FunctionBasePtr & func, const DataTypePtr & return {field.columns, field.row_idx, result_idx}; } -/** When table's key has expression with these functions from a column, - * and when a column in a query is compared with a constant, such as: - * CREATE TABLE (x String) ORDER BY toDate(x) - * SELECT ... WHERE x LIKE 'Hello%' - * we want to apply the function to the constant for index analysis, - * but should modify it to pass on un-parsable values. - */ -static std::set date_time_parsing_functions = { - "toDate", - "toDate32", - "toDateTime", - "toDateTime64", - "parseDateTimeBestEffort", - "parseDateTimeBestEffortUS", - "parseDateTime32BestEffort", - "parseDateTime64BestEffort", - "parseDateTime", - "parseDateTimeInJodaSyntax", -}; - -/** The key functional expression constraint may be inferred from a plain column in the expression. - * For example, if the key contains `toStartOfHour(Timestamp)` and query contains `WHERE Timestamp >= now()`, - * it can be assumed that if `toStartOfHour()` is monotonic on [now(), inf), the `toStartOfHour(Timestamp) >= toStartOfHour(now())` - * condition also holds, so the index may be used to select only parts satisfying this condition. - * - * To check the assumption, we'd need to assert that the inverse function to this transformation is also monotonic, however the - * inversion isn't exported (or even viable for not strictly monotonic functions such as `toStartOfHour()`). - * Instead, we can qualify only functions that do not transform the range (for example rounding), - * which while not strictly monotonic, are monotonic everywhere on the input range. - */ -bool KeyCondition::transformConstantWithValidFunctions( - ContextPtr context, - const String & expr_name, - size_t & out_key_column_num, - DataTypePtr & out_key_column_type, - Field & out_value, - DataTypePtr & out_type, - std::function always_monotonic) const +/// Sequentially applies functions to the column, returns `true` +/// if all function arguments are compatible with functions +/// signatures, and none of the functions produce `NULL` output. +/// +/// After functions chain execution, fills result column and its type. +bool applyFunctionChainToColumn( + const ColumnPtr & in_column, + const DataTypePtr & in_data_type, + const std::vector & functions, + ColumnPtr & out_column, + DataTypePtr & out_data_type) { - const auto & sample_block = key_expr->getSampleBlock(); + // Remove LowCardinality from input column, and convert it to regular one + auto result_column = in_column->convertToFullIfNeeded(); + auto result_type = removeLowCardinality(in_data_type); - for (const auto & node : key_expr->getNodes()) + // In case function sequence is empty, return full non-LowCardinality column + if (functions.empty()) { - auto it = key_columns.find(node.result_name); - if (it != key_columns.end()) - { - std::stack chain; - - const auto * cur_node = &node; - bool is_valid_chain = true; - - while (is_valid_chain) - { - if (cur_node->result_name == expr_name) - break; - - chain.push(cur_node); - - if (cur_node->type == ActionsDAG::ActionType::FUNCTION && cur_node->children.size() <= 2) - { - is_valid_chain = always_monotonic(*cur_node->function_base, *cur_node->result_type); - - const ActionsDAG::Node * next_node = nullptr; - for (const auto * arg : cur_node->children) - { - if (arg->column && isColumnConst(*arg->column)) - continue; - - if (next_node) - is_valid_chain = false; - - next_node = arg; - } - - if (!next_node) - is_valid_chain = false; - - cur_node = next_node; - } - else if (cur_node->type == ActionsDAG::ActionType::ALIAS) - cur_node = cur_node->children.front(); - else - is_valid_chain = false; - } - - if (is_valid_chain) - { - out_type = removeLowCardinality(out_type); - auto const_type = removeLowCardinality(cur_node->result_type); - auto const_column = out_type->createColumnConst(1, out_value); - auto const_value = (*castColumnAccurateOrNull({const_column, out_type, ""}, const_type))[0]; - - if (const_value.isNull()) - return false; - - while (!chain.empty()) - { - const auto * func = chain.top(); - chain.pop(); - - if (func->type != ActionsDAG::ActionType::FUNCTION) - continue; - - const auto & func_name = func->function_base->getName(); - auto func_base = func->function_base; - const auto & arg_types = func_base->getArgumentTypes(); - if (date_time_parsing_functions.contains(func_name) && !arg_types.empty() && isStringOrFixedString(arg_types[0])) - { - auto func_or_null = FunctionFactory::instance().get(func_name + "OrNull", context); - ColumnsWithTypeAndName arguments; - int i = 0; - for (const auto & type : func->function_base->getArgumentTypes()) - arguments.push_back({nullptr, type, fmt::format("_{}", i++)}); - - func_base = func_or_null->build(arguments); - } - - if (func->children.size() == 1) - { - std::tie(const_value, const_type) - = applyFunctionForFieldOfUnknownType(func_base, const_type, const_value); - } - else if (func->children.size() == 2) - { - const auto * left = func->children[0]; - const auto * right = func->children[1]; - if (left->column && isColumnConst(*left->column)) - { - auto left_arg_type = left->result_type; - auto left_arg_value = (*left->column)[0]; - std::tie(const_value, const_type) = applyBinaryFunctionForFieldOfUnknownType( - FunctionFactory::instance().get(func_base->getName(), context), - left_arg_type, left_arg_value, const_type, const_value); - } - else - { - auto right_arg_type = right->result_type; - auto right_arg_value = (*right->column)[0]; - std::tie(const_value, const_type) = applyBinaryFunctionForFieldOfUnknownType( - FunctionFactory::instance().get(func_base->getName(), context), - const_type, const_value, right_arg_type, right_arg_value); - } - } - - if (const_value.isNull()) - return false; - } - - out_key_column_num = it->second; - out_key_column_type = sample_block.getByName(it->first).type; - out_value = const_value; - out_type = const_type; - return true; - } - } + out_column = result_column; + out_data_type = result_type; + return true; } - return false; + // If first function arguments are empty, cannot transform input column + if (functions[0]->getArgumentTypes().empty()) + { + return false; + } + + // And cast it to the argument type of the first function in the chain + auto in_argument_type = functions[0]->getArgumentTypes()[0]; + if (canBeSafelyCasted(result_type, in_argument_type)) + { + result_column = castColumnAccurate({result_column, result_type, ""}, in_argument_type); + result_type = in_argument_type; + } + // If column cannot be casted accurate, casting with OrNull, and in case all + // values has been casted (no nulls), unpacking nested column from nullable. + // In case any further functions require Nullable input, they'll be able + // to cast it. + else + { + result_column = castColumnAccurateOrNull({result_column, result_type, ""}, in_argument_type); + const auto & result_column_nullable = assert_cast(*result_column); + const auto & null_map_data = result_column_nullable.getNullMapData(); + for (char8_t i : null_map_data) + { + if (i != 0) + return false; + } + result_column = result_column_nullable.getNestedColumnPtr(); + result_type = removeNullable(in_argument_type); + } + + for (const auto & func : functions) + { + if (func->getArgumentTypes().empty()) + return false; + + auto argument_type = func->getArgumentTypes()[0]; + if (!canBeSafelyCasted(result_type, argument_type)) + return false; + + result_column = castColumnAccurate({result_column, result_type, ""}, argument_type); + result_column = func->execute({{result_column, argument_type, ""}}, func->getResultType(), result_column->size()); + result_type = func->getResultType(); + + // Transforming nullable columns to the nested ones, in case no nulls found + if (result_column->isNullable()) + { + const auto & result_column_nullable = assert_cast(*result_column); + const auto & null_map_data = result_column_nullable.getNullMapData(); + for (char8_t i : null_map_data) + { + if (i != 0) + return false; + } + result_column = result_column_nullable.getNestedColumnPtr(); + result_type = removeNullable(func->getResultType()); + } + } + out_column = result_column; + out_data_type = result_type; + + return true; } bool KeyCondition::canConstantBeWrappedByMonotonicFunctions( @@ -1118,13 +1006,13 @@ bool KeyCondition::canConstantBeWrappedByMonotonicFunctions( if (out_value.isNull()) return false; - return transformConstantWithValidFunctions( + MonotonicFunctionsChain transform_functions; + auto can_transform_constant = extractMonotonicFunctionsChainFromKey( node.getTreeContext().getQueryContext(), expr_name, out_key_column_num, out_key_column_type, - out_value, - out_type, + transform_functions, [](const IFunctionBase & func, const IDataType & type) { if (!func.hasInformationAboutMonotonicity()) @@ -1138,6 +1026,27 @@ bool KeyCondition::canConstantBeWrappedByMonotonicFunctions( } return true; }); + + if (!can_transform_constant) + return false; + + auto const_column = out_type->createColumnConst(1, out_value); + + ColumnPtr transformed_const_column; + DataTypePtr transformed_const_type; + bool constant_transformed = applyFunctionChainToColumn( + const_column, + out_type, + transform_functions, + transformed_const_column, + transformed_const_type); + + if (!constant_transformed) + return false; + + out_value = (*transformed_const_column)[0]; + out_type = transformed_const_type; + return true; } /// Looking for possible transformation of `column = constant` into `partition_expr = function(constant)` @@ -1173,61 +1082,34 @@ bool KeyCondition::canConstantBeWrappedByFunctions( if (out_value.isNull()) return false; - return transformConstantWithValidFunctions( + MonotonicFunctionsChain transform_functions; + auto can_transform_constant = extractMonotonicFunctionsChainFromKey( node.getTreeContext().getQueryContext(), expr_name, out_key_column_num, out_key_column_type, - out_value, + transform_functions, + [](const IFunctionBase & func, const IDataType &) { return func.isDeterministic(); }); + + if (!can_transform_constant) + return false; + + auto const_column = out_type->createColumnConst(1, out_value); + + ColumnPtr transformed_const_column; + DataTypePtr transformed_const_type; + bool constant_transformed = applyFunctionChainToColumn( + const_column, out_type, - [](const IFunctionBase & func, const IDataType &) - { - return func.isDeterministic(); - }); -} + transform_functions, + transformed_const_column, + transformed_const_type); -/// Sequentially applies functions to set column, returns `true` -/// if all function arguments are compatible with functions -/// signatures, and none of the functions produce `NULL` output. -/// -/// After functions chain execution, fills result set column and -/// its type. -bool applyFunctionChainToSetValues( - const ColumnPtr & set_column, - const DataTypePtr & set_data_type, - const std::vector & functions, - ColumnPtr & out_set_column, - DataTypePtr & out_set_type) -{ - auto result_column = set_column->convertToFullColumnIfLowCardinality(); - auto result_type = removeLowCardinality(set_data_type); - - for (const auto & func : functions) - { - if (func->getArgumentTypes().empty()) - return false; - - auto argument_type = func->getArgumentTypes()[0]; - if (!canBeSafelyCasted(result_type, argument_type)) - return false; - - result_column = castColumnAccurate({result_column, result_type, ""}, argument_type); - result_column = func->execute({{result_column, argument_type, ""}}, func->getResultType(), result_column->size()); - if (result_column->isNullable()) - { - const auto & result_column_nullable = assert_cast(*result_column); - const auto & null_map_data = result_column_nullable.getNullMapData(); - for (char8_t i : null_map_data) - { - if (i != 0) - return false; - } - } - result_type = func->getResultType(); - } - out_set_column = result_column; - out_set_type = result_type; + if (!constant_transformed) + return false; + out_value = (*transformed_const_column)[0]; + out_type = transformed_const_type; return true; } @@ -1260,6 +1142,7 @@ bool KeyCondition::tryPrepareSetIndex( out_key_column_num = std::max(out_key_column_num, index_mapping.key_index); set_transforming_chains.push_back(set_transforming_chain); } + // For partition index, checking if set can be transformed to prune any partitions else if (single_point && canSetValuesBeWrappedByFunctions(node, index_mapping.key_index, data_type, set_transforming_chain)) { indexes_mapping.push_back(index_mapping); @@ -1335,7 +1218,12 @@ bool KeyCondition::tryPrepareSetIndex( { ColumnPtr transformed_set_column; DataTypePtr transformed_set_type; - if (!applyFunctionChainToSetValues(set_column, set_element_type, set_transforming_chains[indexes_mapping_index], transformed_set_column, transformed_set_type)) + if (!applyFunctionChainToColumn( + set_column, + set_element_type, + set_transforming_chains[indexes_mapping_index], + transformed_set_column, + transformed_set_type)) return false; set_column = transformed_set_column; @@ -1639,36 +1527,54 @@ bool KeyCondition::isKeyPossiblyWrappedByMonotonicFunctionsImpl( return false; } -bool KeyCondition::canSetValuesBeWrappedByFunctions( - const RPNBuilderTreeNode & node, +/** When table's key has expression with these functions from a column, + * and when a column in a query is compared with a constant, such as: + * CREATE TABLE (x String) ORDER BY toDate(x) + * SELECT ... WHERE x LIKE 'Hello%' + * we want to apply the function to the constant for index analysis, + * but should modify it to pass on un-parsable values. + */ +static std::set date_time_parsing_functions = { + "toDate", + "toDate32", + "toDateTime", + "toDateTime64", + "parseDateTimeBestEffort", + "parseDateTimeBestEffortUS", + "parseDateTime32BestEffort", + "parseDateTime64BestEffort", + "parseDateTime", + "parseDateTimeInJodaSyntax", +}; + +/** The key functional expression constraint may be inferred from a plain column in the expression. + * For example, if the key contains `toStartOfHour(Timestamp)` and query contains `WHERE Timestamp >= now()`, + * it can be assumed that if `toStartOfHour()` is monotonic on [now(), inf), the `toStartOfHour(Timestamp) >= toStartOfHour(now())` + * condition also holds, so the index may be used to select only parts satisfying this condition. + * + * To check the assumption, we'd need to assert that the inverse function to this transformation is also monotonic, however the + * inversion isn't exported (or even viable for not strictly monotonic functions such as `toStartOfHour()`). + * Instead, we can qualify only functions that do not transform the range (for example rounding), + * which while not strictly monotonic, are monotonic everywhere on the input range. + */ +bool KeyCondition::extractMonotonicFunctionsChainFromKey( + ContextPtr context, + const String & expr_name, size_t & out_key_column_num, - DataTypePtr & out_key_res_column_type, - MonotonicFunctionsChain & out_functions_chain) + DataTypePtr & out_key_column_type, + MonotonicFunctionsChain & out_functions_chain, + std::function always_monotonic) const { - // Checking if column name matches any of key subexpressions - String expr_name = node.getColumnName(); + const auto & sample_block = key_expr->getSampleBlock(); - if (array_joined_column_names.contains(expr_name)) - return false; - - if (!key_subexpr_names.contains(expr_name)) + for (const auto & node : key_expr->getNodes()) { - expr_name = node.getColumnNameWithModuloLegacy(); - - if (!key_subexpr_names.contains(expr_name)) - return false; - } - - // If match has been found, need to identify key column, and - // sequence of function nodes, which forms key column - for (const auto & key_expr_node : key_expr->getNodes()) - { - auto it = key_columns.find(key_expr_node.result_name); + auto it = key_columns.find(node.result_name); if (it != key_columns.end()) { - std::vector chain; + std::stack chain; - const auto * cur_node = &key_expr_node; + const auto * cur_node = &node; bool is_valid_chain = true; while (is_valid_chain) @@ -1676,10 +1582,11 @@ bool KeyCondition::canSetValuesBeWrappedByFunctions( if (cur_node->result_name == expr_name) break; + chain.push(cur_node); + if (cur_node->type == ActionsDAG::ActionType::FUNCTION && cur_node->children.size() <= 2) { - chain.push_back(cur_node); - is_valid_chain = cur_node->function_base->isDeterministic(); + is_valid_chain = always_monotonic(*cur_node->function_base, *cur_node->result_type); const ActionsDAG::Node * next_node = nullptr; for (const auto * arg : cur_node->children) @@ -1704,64 +1611,68 @@ bool KeyCondition::canSetValuesBeWrappedByFunctions( is_valid_chain = false; } - // If key column has been found, and it consists of deterministic function, - // in order to make them applicable to constants from query predicate, - // reversing the sequence of functions, and binding constant arguments if (is_valid_chain) { - // Removing low cardinality from `node` output type to avoid - // passing it through functions signatures - auto nonconst_input_type = removeLowCardinality(node.getDAGNode()->result_type); - - for (auto iter = chain.rbegin(); iter != chain.rend(); ++iter) + while (!chain.empty()) { - const auto * function_node = *iter; - auto function = function_node->function_base; - auto func_builder = FunctionFactory::instance().tryGet(function->getName(), node.getTreeContext().getQueryContext()); - if (!func_builder) - return false; + const auto * func = chain.top(); + chain.pop(); + + if (func->type != ActionsDAG::ActionType::FUNCTION) + continue; + + auto func_name = func->function_base->getName(); + auto func_base = func->function_base; + ColumnsWithTypeAndName arguments; ColumnWithTypeAndName const_arg; FunctionWithOptionalConstArg::Kind kind = FunctionWithOptionalConstArg::Kind::NO_CONST; - if (function_node->children.size() == 2) + + if (date_time_parsing_functions.contains(func_name)) { - const auto * left = function_node->children[0]; - const auto * right = function_node->children[1]; + const auto & arg_types = func_base->getArgumentTypes(); + if (!arg_types.empty() && isStringOrFixedString(arg_types[0])) + { + func_name = func_name + "OrNull"; + } + + } + + auto func_builder = FunctionFactory::instance().tryGet(func_name, context); + + if (func->children.size() == 1) + { + arguments.push_back({nullptr, removeLowCardinality(func->children[0]->result_type), ""}); + } + else if (func->children.size() == 2) + { + const auto * left = func->children[0]; + const auto * right = func->children[1]; if (left->column && isColumnConst(*left->column)) { const_arg = {left->result_type->createColumnConst(0, (*left->column)[0]), left->result_type, ""}; arguments.push_back(const_arg); - arguments.push_back({nullptr, nonconst_input_type, ""}); + arguments.push_back({nullptr, removeLowCardinality(right->result_type), ""}); kind = FunctionWithOptionalConstArg::Kind::LEFT_CONST; } - else if (right->column && isColumnConst(*right->column)) + else { const_arg = {right->result_type->createColumnConst(0, (*right->column)[0]), right->result_type, ""}; - arguments.push_back({nullptr, nonconst_input_type, ""}); + arguments.push_back({nullptr, removeLowCardinality(left->result_type), ""}); arguments.push_back(const_arg); kind = FunctionWithOptionalConstArg::Kind::RIGHT_CONST; } - - /// If constant arg of binary operator is NULL, there will be no monotonicity. - if (const_arg.column->isNullAt(0)) - return false; } - else - arguments.push_back({nullptr, nonconst_input_type, ""}); - auto func = func_builder->build(arguments); + auto out_func = func_builder->build(arguments); if (kind == FunctionWithOptionalConstArg::Kind::NO_CONST) - out_functions_chain.push_back(func); + out_functions_chain.push_back(out_func); else - out_functions_chain.push_back(std::make_shared(func, const_arg, kind)); - - nonconst_input_type = func->getResultType(); + out_functions_chain.push_back(std::make_shared(out_func, const_arg, kind)); } - const auto & sample_block = key_expr->getSampleBlock(); out_key_column_num = it->second; - out_key_res_column_type = sample_block.getByName(it->first).type; - + out_key_column_type = sample_block.getByName(it->first).type; return true; } } @@ -1770,6 +1681,38 @@ bool KeyCondition::canSetValuesBeWrappedByFunctions( return false; } +bool KeyCondition::canSetValuesBeWrappedByFunctions( + const RPNBuilderTreeNode & node, + size_t & out_key_column_num, + DataTypePtr & out_key_res_column_type, + MonotonicFunctionsChain & out_functions_chain) +{ + // Checking if column name matches any of key subexpressions + String expr_name = node.getColumnName(); + + if (array_joined_column_names.contains(expr_name)) + return false; + + if (!key_subexpr_names.contains(expr_name)) + { + expr_name = node.getColumnNameWithModuloLegacy(); + + if (!key_subexpr_names.contains(expr_name)) + return false; + } + + return extractMonotonicFunctionsChainFromKey( + node.getTreeContext().getQueryContext(), + expr_name, + out_key_column_num, + out_key_res_column_type, + out_functions_chain, + [](const IFunctionBase & func, const IDataType &) + { + return func.isDeterministic(); + }); +} + static void castValueToType(const DataTypePtr & desired_type, Field & src_value, const DataTypePtr & src_type, const String & node_column_name) { try diff --git a/src/Storages/MergeTree/KeyCondition.h b/src/Storages/MergeTree/KeyCondition.h index 11dd1c85c9d..9e2218d7a29 100644 --- a/src/Storages/MergeTree/KeyCondition.h +++ b/src/Storages/MergeTree/KeyCondition.h @@ -14,6 +14,7 @@ #include #include +#include "DataTypes/Serializations/ISerialization.h" namespace DB @@ -253,13 +254,12 @@ private: DataTypePtr & out_key_column_type, std::vector & out_functions_chain); - bool transformConstantWithValidFunctions( + bool extractMonotonicFunctionsChainFromKey( ContextPtr context, const String & expr_name, size_t & out_key_column_num, DataTypePtr & out_key_column_type, - Field & out_value, - DataTypePtr & out_type, + MonotonicFunctionsChain & out_functions_chain, std::function always_monotonic) const; bool canConstantBeWrappedByMonotonicFunctions( From 9b1003527dc8aba15729a63a86428390470bff07 Mon Sep 17 00:00:00 2001 From: kssenii Date: Mon, 8 Jul 2024 11:34:39 +0200 Subject: [PATCH 122/299] Fix the order --- .../DataLakes/DeltaLakeMetadata.cpp | 75 ++++++++++--------- 1 file changed, 38 insertions(+), 37 deletions(-) diff --git a/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.cpp b/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.cpp index bc64ef15cf1..12341c877e2 100644 --- a/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.cpp +++ b/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.cpp @@ -209,43 +209,6 @@ struct DeltaLakeMetadataImpl // object->stringify(oss); // LOG_TEST(log, "Metadata: {}", oss.str()); - if (object->has("add")) - { - auto add_object = object->get("add").extract(); - auto path = add_object->getValue("path"); - result.insert(fs::path(configuration->getPath()) / path); - - auto filename = fs::path(path).filename().string(); - auto it = file_partition_columns.find(filename); - if (it == file_partition_columns.end()) - { - if (add_object->has("partitionValues")) - { - auto partition_values = add_object->get("partitionValues").extract(); - if (partition_values->size()) - { - auto & current_partition_columns = file_partition_columns[filename]; - for (const auto & partition_name : partition_values->getNames()) - { - const auto value = partition_values->getValue(partition_name); - auto name_and_type = file_schema.tryGetByName(partition_name); - if (!name_and_type) - throw Exception(ErrorCodes::LOGICAL_ERROR, "No such column in schema: {}", partition_name); - - auto field = getFieldValue(value, name_and_type->type); - current_partition_columns.emplace_back(*name_and_type, field); - - LOG_TEST(log, "Partition {} value is {} (for {})", partition_name, value, filename); - } - } - } - } - } - else if (object->has("remove")) - { - auto path = object->get("remove").extract()->getValue("path"); - result.erase(fs::path(configuration->getPath()) / path); - } if (object->has("metaData")) { const auto metadata_object = object->get("metaData").extract(); @@ -289,6 +252,44 @@ struct DeltaLakeMetadataImpl file_schema.toString(), current_schema.toString()); } } + + if (object->has("add")) + { + auto add_object = object->get("add").extract(); + auto path = add_object->getValue("path"); + result.insert(fs::path(configuration->getPath()) / path); + + auto filename = fs::path(path).filename().string(); + auto it = file_partition_columns.find(filename); + if (it == file_partition_columns.end()) + { + if (add_object->has("partitionValues")) + { + auto partition_values = add_object->get("partitionValues").extract(); + if (partition_values->size()) + { + auto & current_partition_columns = file_partition_columns[filename]; + for (const auto & partition_name : partition_values->getNames()) + { + const auto value = partition_values->getValue(partition_name); + auto name_and_type = file_schema.tryGetByName(partition_name); + if (!name_and_type) + throw Exception(ErrorCodes::LOGICAL_ERROR, "No such column in schema: {}", partition_name); + + auto field = getFieldValue(value, name_and_type->type); + current_partition_columns.emplace_back(*name_and_type, field); + + LOG_TEST(log, "Partition {} value is {} (for {})", partition_name, value, filename); + } + } + } + } + } + else if (object->has("remove")) + { + auto path = object->get("remove").extract()->getValue("path"); + result.erase(fs::path(configuration->getPath()) / path); + } } } From db53c2c5f79a3b65d10dec37fcad78e1192c8c96 Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Mon, 8 Jul 2024 12:39:58 +0200 Subject: [PATCH 123/299] remove iterator from queue after removing table --- src/Interpreters/DatabaseCatalog.cpp | 62 +++++++++++++--------------- src/Interpreters/DatabaseCatalog.h | 4 +- 2 files changed, 31 insertions(+), 35 deletions(-) diff --git a/src/Interpreters/DatabaseCatalog.cpp b/src/Interpreters/DatabaseCatalog.cpp index 61ad5bf96de..4131e214278 100644 --- a/src/Interpreters/DatabaseCatalog.cpp +++ b/src/Interpreters/DatabaseCatalog.cpp @@ -1243,15 +1243,13 @@ time_t DatabaseCatalog::getMinDropTime() return min_drop_time; } -DatabaseCatalog::TablesMarkedAsDropped DatabaseCatalog::getTablesToDrop() +std::vector DatabaseCatalog::getTablesToDrop() { time_t current_time = std::chrono::system_clock::to_time_t(std::chrono::system_clock::now()); - DatabaseCatalog::TablesMarkedAsDropped result; + decltype(getTablesToDrop()) result; std::lock_guard lock(tables_marked_dropped_mutex); - const auto was_count = tables_marked_dropped.size(); - auto it = tables_marked_dropped.begin(); while (it != tables_marked_dropped.end()) { @@ -1263,15 +1261,9 @@ DatabaseCatalog::TablesMarkedAsDropped DatabaseCatalog::getTablesToDrop() continue; } - if (it == first_async_drop_in_queue) - ++first_async_drop_in_queue; - - result.emplace_back(std::move(*it)); - it = tables_marked_dropped.erase(it); + result.emplace_back(it); } - chassert(was_count == tables_marked_dropped.size() + result.size()); - return result; } @@ -1294,19 +1286,11 @@ void DatabaseCatalog::rescheduleDropTableTask() (*drop_task)->scheduleAfter(schedule_after); } -void DatabaseCatalog::dropTablesParallel(TablesMarkedAsDropped tables_to_drop) +void DatabaseCatalog::dropTablesParallel(std::vector tables_to_drop) { if (tables_to_drop.empty()) return; - SCOPE_EXIT({ - std::lock_guard lock(tables_marked_dropped_mutex); - tables_marked_dropped.splice(tables_marked_dropped.end(), tables_to_drop); - - if (first_async_drop_in_queue == tables_marked_dropped.end()) - first_async_drop_in_queue = tables_marked_dropped.begin(); - }); - ThreadPool pool( CurrentMetrics::DatabaseCatalogThreads, CurrentMetrics::DatabaseCatalogThreadsActive, @@ -1315,29 +1299,41 @@ void DatabaseCatalog::dropTablesParallel(TablesMarkedAsDropped tables_to_drop) /* max_free_threads */0, /* queue_size */tables_to_drop.size()); - while (!tables_to_drop.empty()) + for (const auto & item : tables_to_drop) { - auto front_table = std::move(tables_to_drop.front()); - tables_to_drop.pop_front(); - - pool.scheduleOrThrowOnError([&, table = front_table] () + pool.scheduleOrThrowOnError([&, table_iterator = item] () { try { - dropTableFinally(table); - std::lock_guard lock(tables_marked_dropped_mutex); - [[maybe_unused]] auto removed = tables_marked_dropped_ids.erase(table.table_id.uuid); - chassert(removed); - wait_table_finally_dropped.notify_all(); + dropTableFinally(*table_iterator); + + { + std::lock_guard lock(tables_marked_dropped_mutex); + + if (first_async_drop_in_queue == table_iterator) + ++first_async_drop_in_queue; + + [[maybe_unused]] auto removed = tables_marked_dropped_ids.erase(table_iterator->table_id.uuid); + chassert(removed); + + tables_marked_dropped.erase(table_iterator); + + wait_table_finally_dropped.notify_all(); + } } catch (...) { - tryLogCurrentException(log, "Cannot drop table " + table.table_id.getNameForLogs() + + tryLogCurrentException(log, "Cannot drop table " + table_iterator->table_id.getNameForLogs() + ". Will retry later."); { std::lock_guard lock(tables_marked_dropped_mutex); - tables_marked_dropped.emplace_back(table); - tables_marked_dropped.back().drop_time = std::chrono::system_clock::to_time_t(std::chrono::system_clock::now()) + drop_error_cooldown_sec; + + if (first_async_drop_in_queue == table_iterator) + ++first_async_drop_in_queue; + + tables_marked_dropped.splice(tables_marked_dropped.end(), tables_marked_dropped, table_iterator); + table_iterator->drop_time = std::chrono::system_clock::to_time_t(std::chrono::system_clock::now()) + drop_error_cooldown_sec; + if (first_async_drop_in_queue == tables_marked_dropped.end()) --first_async_drop_in_queue; } diff --git a/src/Interpreters/DatabaseCatalog.h b/src/Interpreters/DatabaseCatalog.h index f9b8233e85a..23e38a6445e 100644 --- a/src/Interpreters/DatabaseCatalog.h +++ b/src/Interpreters/DatabaseCatalog.h @@ -298,8 +298,8 @@ private: time_t getMinDropTime() TSA_REQUIRES(tables_marked_dropped_mutex); std::tuple getDroppedTablesCountAndInuseCount(); - TablesMarkedAsDropped getTablesToDrop(); - void dropTablesParallel(TablesMarkedAsDropped tables); + std::vector getTablesToDrop(); + void dropTablesParallel(std::vector tables); void rescheduleDropTableTask(); void cleanupStoreDirectoryTask(); From 4227447eac30dd77c6ba70d4b1685bbf11a8221f Mon Sep 17 00:00:00 2001 From: Blargian Date: Mon, 8 Jul 2024 12:53:55 +0200 Subject: [PATCH 124/299] add nth_value and update ordering --- .../sql-reference/window-functions/index.md | 6 +- .../window-functions/lagInFrame.md | 2 +- .../window-functions/leadInFrame.md | 2 +- .../window-functions/nth_value.md | 77 +++++++++++++++++++ .../en/sql-reference/window-functions/rank.md | 2 +- .../window-functions/row_number.md | 2 +- 6 files changed, 84 insertions(+), 7 deletions(-) create mode 100644 docs/en/sql-reference/window-functions/nth_value.md diff --git a/docs/en/sql-reference/window-functions/index.md b/docs/en/sql-reference/window-functions/index.md index a0246af610f..ee54a679ba1 100644 --- a/docs/en/sql-reference/window-functions/index.md +++ b/docs/en/sql-reference/window-functions/index.md @@ -78,11 +78,11 @@ These functions can be used only as a window function. - [`row_number()`](./row_number.md) - Number the current row within its partition starting from 1. - `first_value(x)` - Return the first non-NULL value evaluated within its ordered frame. - `last_value(x)` - Return the last non-NULL value evaluated within its ordered frame. -- `nth_value(x, offset)` - Return the first non-NULL value evaluated against the nth row (offset) in its ordered frame. +- [`nth_value(x, offset)`](./nth_value.md) - Return the first non-NULL value evaluated against the nth row (offset) in its ordered frame. - [`rank()`](./rank.md) - Rank the current row within its partition with gaps. - [`dense_rank()`](./dense_rank.md) - Rank the current row within its partition without gaps. -- `lagInFrame(x)` - Return a value evaluated at the row that is at a specified physical offset row before the current row within the ordered frame. -- `leadInFrame(x)` - Return a value evaluated at the row that is offset rows after the current row within the ordered frame. +- [`lagInFrame(x)`](./lagInFrame.md) - Return a value evaluated at the row that is at a specified physical offset row before the current row within the ordered frame. +- [`leadInFrame(x)`](./leadInFrame.md) - Return a value evaluated at the row that is offset rows after the current row within the ordered frame. ## Examples diff --git a/docs/en/sql-reference/window-functions/lagInFrame.md b/docs/en/sql-reference/window-functions/lagInFrame.md index ea9f6d9dea2..b67cf252283 100644 --- a/docs/en/sql-reference/window-functions/lagInFrame.md +++ b/docs/en/sql-reference/window-functions/lagInFrame.md @@ -1,7 +1,7 @@ --- slug: /en/sql-reference/window-functions/lagInFrame sidebar_label: lagInFrame -sidebar_position: 5 +sidebar_position: 3 --- # lagInFrame diff --git a/docs/en/sql-reference/window-functions/leadInFrame.md b/docs/en/sql-reference/window-functions/leadInFrame.md index e3b65af9a4d..0cb4eea52b2 100644 --- a/docs/en/sql-reference/window-functions/leadInFrame.md +++ b/docs/en/sql-reference/window-functions/leadInFrame.md @@ -1,7 +1,7 @@ --- slug: /en/sql-reference/window-functions/leadInFrame sidebar_label: leadInFrame -sidebar_position: 6 +sidebar_position: 4 --- # leadInFrame diff --git a/docs/en/sql-reference/window-functions/nth_value.md b/docs/en/sql-reference/window-functions/nth_value.md new file mode 100644 index 00000000000..26c90110aaa --- /dev/null +++ b/docs/en/sql-reference/window-functions/nth_value.md @@ -0,0 +1,77 @@ +--- +slug: /en/sql-reference/window-functions/leadInFrame +sidebar_label: leadInFrame +sidebar_position: 5 +--- + +# nth_value + +Return the first non-NULL value evaluated against the nth row (offset) in its ordered frame. + +The [dense_rank](./dense_rank.md) function provides the same behaviour but without gaps in ranking. + +**Syntax** + +```sql +nth_value (x, offset) + OVER ([[PARTITION BY grouping_column] [ORDER BY sorting_column] + [ROWS or RANGE expression_to_bound_rows_withing_the_group]] | [window_name]) +FROM table_name +WINDOW window_name as ([[PARTITION BY grouping_column] [ORDER BY sorting_column]) +``` + +For more detail on window function syntax see: [Window Functions - Syntax](./index.md/#syntax). + +**Parameters** + +- `x` — Column name. +- `offset` — nth row to evaluate current row against. + +**Returned value** + +- The first non-NULL value evaluated against the nth row (offset) in its ordered frame. + +**Example** + +In this example the `nth-value` function is used to find the third-highest salary from a fictional dataset of salaries of Premier League football players. + +Query: + +```sql +DROP TABLE IF EXISTS salaries; +CREATE TABLE salaries +( + `team` String, + `player` String, + `salary` UInt32, + `position` String +) +Engine = Memory; + +INSERT INTO salaries FORMAT Values + ('Port Elizabeth Barbarians', 'Gary Chen', 195000, 'F'), + ('New Coreystad Archdukes', 'Charles Juarez', 190000, 'F'), + ('Port Elizabeth Barbarians', 'Michael Stanley', 10000, 'D'), + ('New Coreystad Archdukes', 'Scott Harrison', 180000, 'D'), + ('Port Elizabeth Barbarians', 'Robert George', 195000, 'M'), + ('South Hampton Seagulls', 'Douglas Benson', 150000, 'M'), + ('South Hampton Seagulls', 'James Henderson', 140000, 'M'); +``` + +```sql +SELECT salary, nth_value(salary,3) OVER(ORDER BY salary DESC) FROM salaries GROUP BY salary; +``` + +Result: + +```response + ┌─player──────────┬─salary─┬─rank─┐ +1. │ Gary Chen │ 195000 │ 1 │ +2. │ Robert George │ 195000 │ 1 │ +3. │ Charles Juarez │ 190000 │ 3 │ +4. │ Douglas Benson │ 150000 │ 4 │ +5. │ Michael Stanley │ 150000 │ 4 │ +6. │ Scott Harrison │ 150000 │ 4 │ +7. │ James Henderson │ 140000 │ 7 │ + └─────────────────┴────────┴──────┘ +``` \ No newline at end of file diff --git a/docs/en/sql-reference/window-functions/rank.md b/docs/en/sql-reference/window-functions/rank.md index 17db889ef92..9ac99dde6df 100644 --- a/docs/en/sql-reference/window-functions/rank.md +++ b/docs/en/sql-reference/window-functions/rank.md @@ -1,7 +1,7 @@ --- slug: /en/sql-reference/window-functions/rank sidebar_label: rank -sidebar_position: 3 +sidebar_position: 6 --- # rank diff --git a/docs/en/sql-reference/window-functions/row_number.md b/docs/en/sql-reference/window-functions/row_number.md index 428bb34a8ba..e7165d60169 100644 --- a/docs/en/sql-reference/window-functions/row_number.md +++ b/docs/en/sql-reference/window-functions/row_number.md @@ -1,7 +1,7 @@ --- slug: /en/sql-reference/window-functions/row_number sidebar_label: row_number -sidebar_position: 4 +sidebar_position: 7 --- # row_number From 312dd824254a8518b35c9a3bed75f2887edb769e Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Mon, 8 Jul 2024 11:35:00 +0000 Subject: [PATCH 125/299] Bump rocksdb to v6.24.2 --- contrib/rocksdb | 2 +- contrib/rocksdb-cmake/CMakeLists.txt | 112 ++++++------------ ...ksdb_build_version.cc => build_version.cc} | 0 3 files changed, 34 insertions(+), 80 deletions(-) rename contrib/rocksdb-cmake/{rocksdb_build_version.cc => build_version.cc} (100%) diff --git a/contrib/rocksdb b/contrib/rocksdb index 078fa563869..2aed45919b9 160000 --- a/contrib/rocksdb +++ b/contrib/rocksdb @@ -1 +1 @@ -Subproject commit 078fa5638690004e1f744076d1bdcc4e93767304 +Subproject commit 2aed45919b9fee4208221e01f368483fef11be61 diff --git a/contrib/rocksdb-cmake/CMakeLists.txt b/contrib/rocksdb-cmake/CMakeLists.txt index 943e1d8acbd..5502d3b6205 100644 --- a/contrib/rocksdb-cmake/CMakeLists.txt +++ b/contrib/rocksdb-cmake/CMakeLists.txt @@ -5,20 +5,13 @@ if (NOT ENABLE_ROCKSDB) return() endif() -## this file is extracted from `contrib/rocksdb/CMakeLists.txt` -set(ROCKSDB_SOURCE_DIR "${ClickHouse_SOURCE_DIR}/contrib/rocksdb") -list(APPEND CMAKE_MODULE_PATH "${ROCKSDB_SOURCE_DIR}/cmake/modules/") - -set(PORTABLE ON) -## always disable jemalloc for rocksdb by default -## because it introduces non-standard jemalloc APIs +# Always disable jemalloc for rocksdb by default because it introduces non-standard jemalloc APIs option(WITH_JEMALLOC "build with JeMalloc" OFF) -set(USE_SNAPPY OFF) -if (TARGET ch_contrib::snappy) - set(USE_SNAPPY ON) -endif() -option(WITH_SNAPPY "build with SNAPPY" ${USE_SNAPPY}) -## lz4, zlib, zstd is enabled in ClickHouse by default + +option(WITH_LIBURING "build with liburing" OFF) # TODO could try to enable this conditionally, depending on ClickHouse's ENABLE_LIBURING + +# ClickHouse cannot be compiled without snappy, lz4, zlib, zstd +option(WITH_SNAPPY "build with SNAPPY" ON) option(WITH_LZ4 "build with lz4" ON) option(WITH_ZLIB "build with zlib" ON) option(WITH_ZSTD "build with zstd" ON) @@ -26,74 +19,34 @@ option(WITH_ZSTD "build with zstd" ON) # third-party/folly is only validated to work on Linux and Windows for now. # So only turn it on there by default. if(CMAKE_SYSTEM_NAME MATCHES "Linux|Windows") - if(MSVC AND MSVC_VERSION LESS 1910) - # Folly does not compile with MSVC older than VS2017 - option(WITH_FOLLY_DISTRIBUTED_MUTEX "build with folly::DistributedMutex" OFF) - else() - option(WITH_FOLLY_DISTRIBUTED_MUTEX "build with folly::DistributedMutex" ON) - endif() + option(WITH_FOLLY_DISTRIBUTED_MUTEX "build with folly::DistributedMutex" ON) else() option(WITH_FOLLY_DISTRIBUTED_MUTEX "build with folly::DistributedMutex" OFF) endif() -if( NOT DEFINED CMAKE_CXX_STANDARD ) - set(CMAKE_CXX_STANDARD 11) +if(WITH_SNAPPY) + add_definitions(-DSNAPPY) + list(APPEND THIRDPARTY_LIBS ch_contrib::snappy) endif() -if(MSVC) - option(WITH_XPRESS "build with windows built in compression" OFF) - include("${ROCKSDB_SOURCE_DIR}/thirdparty.inc") -else() - if(CMAKE_SYSTEM_NAME MATCHES "FreeBSD" AND NOT CMAKE_SYSTEM_NAME MATCHES "kFreeBSD") - # FreeBSD has jemalloc as default malloc - # but it does not have all the jemalloc files in include/... - set(WITH_JEMALLOC ON) - else() - if(WITH_JEMALLOC AND TARGET ch_contrib::jemalloc) - add_definitions(-DROCKSDB_JEMALLOC -DJEMALLOC_NO_DEMANGLE) - list(APPEND THIRDPARTY_LIBS ch_contrib::jemalloc) - endif() - endif() - - if(WITH_SNAPPY) - add_definitions(-DSNAPPY) - list(APPEND THIRDPARTY_LIBS ch_contrib::snappy) - endif() - - if(WITH_ZLIB) - add_definitions(-DZLIB) - list(APPEND THIRDPARTY_LIBS ch_contrib::zlib) - endif() - - if(WITH_LZ4) - add_definitions(-DLZ4) - list(APPEND THIRDPARTY_LIBS ch_contrib::lz4) - endif() - - if(WITH_ZSTD) - add_definitions(-DZSTD) - list(APPEND THIRDPARTY_LIBS ch_contrib::zstd) - endif() +if(WITH_ZLIB) + add_definitions(-DZLIB) + list(APPEND THIRDPARTY_LIBS ch_contrib::zlib) endif() -if(CMAKE_SYSTEM_PROCESSOR MATCHES "^(powerpc|ppc)64") - if(POWER9) - set(HAS_POWER9 1) - set(HAS_ALTIVEC 1) - else() - set(HAS_POWER8 1) - set(HAS_ALTIVEC 1) - endif(POWER9) -endif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(powerpc|ppc)64") +if(WITH_LZ4) + add_definitions(-DLZ4) + list(APPEND THIRDPARTY_LIBS ch_contrib::lz4) +endif() -if(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64|AARCH64|arm64|ARM64") - set(HAS_ARMV8_CRC 1) - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -march=armv8-a+crc+crypto -Wno-unused-function") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=armv8-a+crc+crypto -Wno-unused-function") -endif(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64|AARCH64|arm64|ARM64") +if(WITH_ZSTD) + add_definitions(-DZSTD) + list(APPEND THIRDPARTY_LIBS ch_contrib::zstd) +endif() +option(PORTABLE "build a portable binary" ON) -if(ENABLE_AVX2 AND ENABLE_PCLMULQDQ) +if(ENABLE_SSE42 AND ENABLE_PCLMULQDQ) add_definitions(-DHAVE_SSE42) add_definitions(-DHAVE_PCLMUL) endif() @@ -107,8 +60,6 @@ if(CMAKE_SYSTEM_NAME MATCHES "Darwin") add_definitions(-DOS_MACOSX) elseif(CMAKE_SYSTEM_NAME MATCHES "Linux") add_definitions(-DOS_LINUX) -elseif(CMAKE_SYSTEM_NAME MATCHES "SunOS") - add_definitions(-DOS_SOLARIS) elseif(CMAKE_SYSTEM_NAME MATCHES "FreeBSD") add_definitions(-DOS_FREEBSD) elseif(CMAKE_SYSTEM_NAME MATCHES "Android") @@ -123,12 +74,10 @@ endif() if (OS_LINUX) add_definitions(-DROCKSDB_SCHED_GETCPU_PRESENT) - add_definitions(-DROCKSDB_AUXV_SYSAUXV_PRESENT) add_definitions(-DROCKSDB_AUXV_GETAUXVAL_PRESENT) -elseif (OS_FREEBSD) - add_definitions(-DROCKSDB_AUXV_SYSAUXV_PRESENT) endif() +set(ROCKSDB_SOURCE_DIR "${ClickHouse_SOURCE_DIR}/contrib/rocksdb") include_directories(${ROCKSDB_SOURCE_DIR}) include_directories("${ROCKSDB_SOURCE_DIR}/include") @@ -136,8 +85,6 @@ if(WITH_FOLLY_DISTRIBUTED_MUTEX) include_directories("${ROCKSDB_SOURCE_DIR}/third-party/folly") endif() -# Main library source code - set(SOURCES ${ROCKSDB_SOURCE_DIR}/cache/cache.cc ${ROCKSDB_SOURCE_DIR}/cache/cache_entry_roles.cc @@ -333,9 +280,12 @@ set(SOURCES ${ROCKSDB_SOURCE_DIR}/tools/ldb_tool.cc ${ROCKSDB_SOURCE_DIR}/tools/sst_dump_tool.cc ${ROCKSDB_SOURCE_DIR}/tools/trace_analyzer_tool.cc - ${ROCKSDB_SOURCE_DIR}/trace_replay/trace_replay.cc ${ROCKSDB_SOURCE_DIR}/trace_replay/block_cache_tracer.cc ${ROCKSDB_SOURCE_DIR}/trace_replay/io_tracer.cc + ${ROCKSDB_SOURCE_DIR}/trace_replay/trace_record_handler.cc + ${ROCKSDB_SOURCE_DIR}/trace_replay/trace_record_result.cc + ${ROCKSDB_SOURCE_DIR}/trace_replay/trace_record.cc + ${ROCKSDB_SOURCE_DIR}/trace_replay/trace_replay.cc ${ROCKSDB_SOURCE_DIR}/util/coding.cc ${ROCKSDB_SOURCE_DIR}/util/compaction_job_stats_impl.cc ${ROCKSDB_SOURCE_DIR}/util/comparator.cc @@ -366,6 +316,7 @@ set(SOURCES ${ROCKSDB_SOURCE_DIR}/utilities/cassandra/format.cc ${ROCKSDB_SOURCE_DIR}/utilities/cassandra/merge_operator.cc ${ROCKSDB_SOURCE_DIR}/utilities/checkpoint/checkpoint_impl.cc + ${ROCKSDB_SOURCE_DIR}/utilities/compaction_filters.cc ${ROCKSDB_SOURCE_DIR}/utilities/compaction_filters/remove_emptyvalue_compactionfilter.cc ${ROCKSDB_SOURCE_DIR}/utilities/debug.cc ${ROCKSDB_SOURCE_DIR}/utilities/env_mirror.cc @@ -374,6 +325,7 @@ set(SOURCES ${ROCKSDB_SOURCE_DIR}/utilities/fault_injection_fs.cc ${ROCKSDB_SOURCE_DIR}/utilities/leveldb_options/leveldb_options.cc ${ROCKSDB_SOURCE_DIR}/utilities/memory/memory_util.cc + ${ROCKSDB_SOURCE_DIR}/utilities/merge_operators.cc ${ROCKSDB_SOURCE_DIR}/utilities/merge_operators/bytesxor.cc ${ROCKSDB_SOURCE_DIR}/utilities/merge_operators/max.cc ${ROCKSDB_SOURCE_DIR}/utilities/merge_operators/put.cc @@ -393,6 +345,7 @@ set(SOURCES ${ROCKSDB_SOURCE_DIR}/utilities/simulator_cache/sim_cache.cc ${ROCKSDB_SOURCE_DIR}/utilities/table_properties_collectors/compact_on_deletion_collector.cc ${ROCKSDB_SOURCE_DIR}/utilities/trace/file_trace_reader_writer.cc + ${ROCKSDB_SOURCE_DIR}/utilities/trace/replayer_impl.cc ${ROCKSDB_SOURCE_DIR}/utilities/transactions/lock/lock_manager.cc ${ROCKSDB_SOURCE_DIR}/utilities/transactions/lock/point/point_lock_tracker.cc ${ROCKSDB_SOURCE_DIR}/utilities/transactions/lock/point/point_lock_manager.cc @@ -425,7 +378,7 @@ set(SOURCES ${ROCKSDB_SOURCE_DIR}/utilities/transactions/lock/range/range_tree/lib/standalone_port.cc ${ROCKSDB_SOURCE_DIR}/utilities/transactions/lock/range/range_tree/lib/util/dbt.cc ${ROCKSDB_SOURCE_DIR}/utilities/transactions/lock/range/range_tree/lib/util/memarena.cc - rocksdb_build_version.cc) + build_version.cc) # generated by hand if(ENABLE_SSE42 AND ENABLE_PCLMULQDQ) set_source_files_properties( @@ -462,5 +415,6 @@ endif() add_library(_rocksdb ${SOURCES}) add_library(ch_contrib::rocksdb ALIAS _rocksdb) target_link_libraries(_rocksdb PRIVATE ${THIRDPARTY_LIBS} ${SYSTEM_LIBS}) + # SYSTEM is required to overcome some issues target_include_directories(_rocksdb SYSTEM BEFORE INTERFACE "${ROCKSDB_SOURCE_DIR}/include") diff --git a/contrib/rocksdb-cmake/rocksdb_build_version.cc b/contrib/rocksdb-cmake/build_version.cc similarity index 100% rename from contrib/rocksdb-cmake/rocksdb_build_version.cc rename to contrib/rocksdb-cmake/build_version.cc From da5bde7f3a8957e61c4a113dd20cee5c60fa5c99 Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Mon, 8 Jul 2024 15:00:23 +0200 Subject: [PATCH 126/299] fix peak_threads_usage --- src/Interpreters/ThreadStatusExt.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/Interpreters/ThreadStatusExt.cpp b/src/Interpreters/ThreadStatusExt.cpp index 9ca521a4ab3..6ec6a64b13d 100644 --- a/src/Interpreters/ThreadStatusExt.cpp +++ b/src/Interpreters/ThreadStatusExt.cpp @@ -233,7 +233,8 @@ void ThreadStatus::attachToGroupImpl(const ThreadGroupPtr & thread_group_) { /// Attach or init current thread to thread group and copy useful information from it thread_group = thread_group_; - thread_group->linkThread(thread_id); + if (!internal_thread) + thread_group->linkThread(thread_id); performance_counters.setParent(&thread_group->performance_counters); memory_tracker.setParent(&thread_group->memory_tracker); @@ -269,7 +270,8 @@ void ThreadStatus::detachFromGroup() /// Extract MemoryTracker out from query and user context memory_tracker.setParent(&total_memory_tracker); - thread_group->unlinkThread(); + if (!internal_thread) + thread_group->unlinkThread(); thread_group.reset(); From c5eebf3b9eec37b9c3117071e03b262cf28e612f Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Mon, 8 Jul 2024 15:23:52 +0200 Subject: [PATCH 127/299] fix getTablesToDrop --- src/Interpreters/DatabaseCatalog.cpp | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/src/Interpreters/DatabaseCatalog.cpp b/src/Interpreters/DatabaseCatalog.cpp index 9df6fd93020..afe89e7b9a7 100644 --- a/src/Interpreters/DatabaseCatalog.cpp +++ b/src/Interpreters/DatabaseCatalog.cpp @@ -1252,18 +1252,12 @@ std::vector DatabaseCatalog::g std::lock_guard lock(tables_marked_dropped_mutex); - auto it = tables_marked_dropped.begin(); - while (it != tables_marked_dropped.end()) + for (auto it = tables_marked_dropped.begin(); it != tables_marked_dropped.end(); ++it) { bool in_use = it->table && !it->table.unique(); bool old_enough = it->drop_time <= current_time; - if (in_use || !old_enough) - { - ++it; - continue; - } - - result.emplace_back(it); + if (!in_use && old_enough) + result.emplace_back(it); } return result; From adcee80b2d637d9d79a3ecfe4501828339efe050 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Mon, 8 Jul 2024 13:03:19 +0000 Subject: [PATCH 128/299] Bump rocksdb to v6.25.3 --- contrib/rocksdb | 2 +- contrib/rocksdb-cmake/CMakeLists.txt | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/contrib/rocksdb b/contrib/rocksdb index 2aed45919b9..6df587a7eb3 160000 --- a/contrib/rocksdb +++ b/contrib/rocksdb @@ -1 +1 @@ -Subproject commit 2aed45919b9fee4208221e01f368483fef11be61 +Subproject commit 6df587a7eb3e7bb835a71c2f4a668a51cabefd67 diff --git a/contrib/rocksdb-cmake/CMakeLists.txt b/contrib/rocksdb-cmake/CMakeLists.txt index 5502d3b6205..48c97257d94 100644 --- a/contrib/rocksdb-cmake/CMakeLists.txt +++ b/contrib/rocksdb-cmake/CMakeLists.txt @@ -88,6 +88,7 @@ endif() set(SOURCES ${ROCKSDB_SOURCE_DIR}/cache/cache.cc ${ROCKSDB_SOURCE_DIR}/cache/cache_entry_roles.cc + ${ROCKSDB_SOURCE_DIR}/cache/cache_reservation_manager.cc ${ROCKSDB_SOURCE_DIR}/cache/clock_cache.cc ${ROCKSDB_SOURCE_DIR}/cache/lru_cache.cc ${ROCKSDB_SOURCE_DIR}/cache/sharded_cache.cc @@ -176,6 +177,7 @@ set(SOURCES ${ROCKSDB_SOURCE_DIR}/env/file_system_tracer.cc ${ROCKSDB_SOURCE_DIR}/env/fs_remap.cc ${ROCKSDB_SOURCE_DIR}/env/mock_env.cc + ${ROCKSDB_SOURCE_DIR}/env/unique_id.cc ${ROCKSDB_SOURCE_DIR}/file/delete_scheduler.cc ${ROCKSDB_SOURCE_DIR}/file/file_prefetch_buffer.cc ${ROCKSDB_SOURCE_DIR}/file/file_util.cc @@ -297,6 +299,7 @@ set(SOURCES ${ROCKSDB_SOURCE_DIR}/util/murmurhash.cc ${ROCKSDB_SOURCE_DIR}/util/random.cc ${ROCKSDB_SOURCE_DIR}/util/rate_limiter.cc + ${ROCKSDB_SOURCE_DIR}/util/regex.cc ${ROCKSDB_SOURCE_DIR}/util/ribbon_config.cc ${ROCKSDB_SOURCE_DIR}/util/slice.cc ${ROCKSDB_SOURCE_DIR}/util/file_checksum_helper.cc From 31f9bed44285eeaae964f98e5a4ce3150d57ac5f Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Mon, 8 Jul 2024 13:43:28 +0000 Subject: [PATCH 129/299] Bump rocksdb to 6.26.1 --- contrib/rocksdb | 2 +- contrib/rocksdb-cmake/CMakeLists.txt | 6 +++++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/contrib/rocksdb b/contrib/rocksdb index 6df587a7eb3..19ab8db7a73 160000 --- a/contrib/rocksdb +++ b/contrib/rocksdb @@ -1 +1 @@ -Subproject commit 6df587a7eb3e7bb835a71c2f4a668a51cabefd67 +Subproject commit 19ab8db7a736306d6d12992a21e545e0336ab34a diff --git a/contrib/rocksdb-cmake/CMakeLists.txt b/contrib/rocksdb-cmake/CMakeLists.txt index 48c97257d94..525b301f31f 100644 --- a/contrib/rocksdb-cmake/CMakeLists.txt +++ b/contrib/rocksdb-cmake/CMakeLists.txt @@ -177,7 +177,7 @@ set(SOURCES ${ROCKSDB_SOURCE_DIR}/env/file_system_tracer.cc ${ROCKSDB_SOURCE_DIR}/env/fs_remap.cc ${ROCKSDB_SOURCE_DIR}/env/mock_env.cc - ${ROCKSDB_SOURCE_DIR}/env/unique_id.cc + ${ROCKSDB_SOURCE_DIR}/env/unique_id_gen.cc ${ROCKSDB_SOURCE_DIR}/file/delete_scheduler.cc ${ROCKSDB_SOURCE_DIR}/file/file_prefetch_buffer.cc ${ROCKSDB_SOURCE_DIR}/file/file_util.cc @@ -271,6 +271,7 @@ set(SOURCES ${ROCKSDB_SOURCE_DIR}/table/table_factory.cc ${ROCKSDB_SOURCE_DIR}/table/table_properties.cc ${ROCKSDB_SOURCE_DIR}/table/two_level_iterator.cc + ${ROCKSDB_SOURCE_DIR}/table/unique_id.cc ${ROCKSDB_SOURCE_DIR}/test_util/sync_point.cc ${ROCKSDB_SOURCE_DIR}/test_util/sync_point_impl.cc ${ROCKSDB_SOURCE_DIR}/test_util/testutil.cc @@ -315,6 +316,8 @@ set(SOURCES ${ROCKSDB_SOURCE_DIR}/utilities/blob_db/blob_db_impl_filesnapshot.cc ${ROCKSDB_SOURCE_DIR}/utilities/blob_db/blob_dump_tool.cc ${ROCKSDB_SOURCE_DIR}/utilities/blob_db/blob_file.cc + ${ROCKSDB_SOURCE_DIR}/utilities/cache_dump_load.cc + ${ROCKSDB_SOURCE_DIR}/utilities/cache_dump_load_impl.cc ${ROCKSDB_SOURCE_DIR}/utilities/cassandra/cassandra_compaction_filter.cc ${ROCKSDB_SOURCE_DIR}/utilities/cassandra/format.cc ${ROCKSDB_SOURCE_DIR}/utilities/cassandra/merge_operator.cc @@ -367,6 +370,7 @@ set(SOURCES ${ROCKSDB_SOURCE_DIR}/utilities/transactions/write_unprepared_txn.cc ${ROCKSDB_SOURCE_DIR}/utilities/transactions/write_unprepared_txn_db.cc ${ROCKSDB_SOURCE_DIR}/utilities/ttl/db_ttl_impl.cc + ${ROCKSDB_SOURCE_DIR}/utilities/wal_filter.cc ${ROCKSDB_SOURCE_DIR}/utilities/write_batch_with_index/write_batch_with_index.cc ${ROCKSDB_SOURCE_DIR}/utilities/write_batch_with_index/write_batch_with_index_internal.cc ${ROCKSDB_SOURCE_DIR}/utilities/transactions/lock/range/range_tree/lib/locktree/concurrent_tree.cc From 6098bc20d90fc3ab45128f62db70624efa0c05dc Mon Sep 17 00:00:00 2001 From: Arthur Passos Date: Mon, 8 Jul 2024 10:49:46 -0300 Subject: [PATCH 130/299] remove test --- .../__init__.py | 0 .../configs/config.xml | 4 - .../configs/host_regexp.xml | 11 --- .../configs/listen_host.xml | 5 -- .../coredns_config/Corefile | 8 -- .../coredns_config/example.com | 1 - .../scripts/stress_test.py | 62 ------------- .../test.py | 88 ------------------- 8 files changed, 179 deletions(-) delete mode 100644 tests/integration/test_host_regexp_multiple_ptr_records_concurrent/__init__.py delete mode 100644 tests/integration/test_host_regexp_multiple_ptr_records_concurrent/configs/config.xml delete mode 100644 tests/integration/test_host_regexp_multiple_ptr_records_concurrent/configs/host_regexp.xml delete mode 100644 tests/integration/test_host_regexp_multiple_ptr_records_concurrent/configs/listen_host.xml delete mode 100644 tests/integration/test_host_regexp_multiple_ptr_records_concurrent/coredns_config/Corefile delete mode 100644 tests/integration/test_host_regexp_multiple_ptr_records_concurrent/coredns_config/example.com delete mode 100644 tests/integration/test_host_regexp_multiple_ptr_records_concurrent/scripts/stress_test.py delete mode 100644 tests/integration/test_host_regexp_multiple_ptr_records_concurrent/test.py diff --git a/tests/integration/test_host_regexp_multiple_ptr_records_concurrent/__init__.py b/tests/integration/test_host_regexp_multiple_ptr_records_concurrent/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/tests/integration/test_host_regexp_multiple_ptr_records_concurrent/configs/config.xml b/tests/integration/test_host_regexp_multiple_ptr_records_concurrent/configs/config.xml deleted file mode 100644 index 42a1f962705..00000000000 --- a/tests/integration/test_host_regexp_multiple_ptr_records_concurrent/configs/config.xml +++ /dev/null @@ -1,4 +0,0 @@ - - 1 - 250 - diff --git a/tests/integration/test_host_regexp_multiple_ptr_records_concurrent/configs/host_regexp.xml b/tests/integration/test_host_regexp_multiple_ptr_records_concurrent/configs/host_regexp.xml deleted file mode 100644 index 9329c8dbde2..00000000000 --- a/tests/integration/test_host_regexp_multiple_ptr_records_concurrent/configs/host_regexp.xml +++ /dev/null @@ -1,11 +0,0 @@ - - - - - - test1\.example\.com$ - - default - - - diff --git a/tests/integration/test_host_regexp_multiple_ptr_records_concurrent/configs/listen_host.xml b/tests/integration/test_host_regexp_multiple_ptr_records_concurrent/configs/listen_host.xml deleted file mode 100644 index 9c27c612f63..00000000000 --- a/tests/integration/test_host_regexp_multiple_ptr_records_concurrent/configs/listen_host.xml +++ /dev/null @@ -1,5 +0,0 @@ - - :: - 0.0.0.0 - 1 - diff --git a/tests/integration/test_host_regexp_multiple_ptr_records_concurrent/coredns_config/Corefile b/tests/integration/test_host_regexp_multiple_ptr_records_concurrent/coredns_config/Corefile deleted file mode 100644 index 3edf37dafa5..00000000000 --- a/tests/integration/test_host_regexp_multiple_ptr_records_concurrent/coredns_config/Corefile +++ /dev/null @@ -1,8 +0,0 @@ -. { - hosts /example.com { - reload "20ms" - fallthrough - } - forward . 127.0.0.11 - log -} diff --git a/tests/integration/test_host_regexp_multiple_ptr_records_concurrent/coredns_config/example.com b/tests/integration/test_host_regexp_multiple_ptr_records_concurrent/coredns_config/example.com deleted file mode 100644 index 9beb415c290..00000000000 --- a/tests/integration/test_host_regexp_multiple_ptr_records_concurrent/coredns_config/example.com +++ /dev/null @@ -1 +0,0 @@ -filled in runtime, but needs to exist in order to be volume mapped in docker \ No newline at end of file diff --git a/tests/integration/test_host_regexp_multiple_ptr_records_concurrent/scripts/stress_test.py b/tests/integration/test_host_regexp_multiple_ptr_records_concurrent/scripts/stress_test.py deleted file mode 100644 index 70419f95dd3..00000000000 --- a/tests/integration/test_host_regexp_multiple_ptr_records_concurrent/scripts/stress_test.py +++ /dev/null @@ -1,62 +0,0 @@ -import pycurl -import threading -from io import BytesIO -import sys - -client_ip = sys.argv[1] -server_ip = sys.argv[2] - -mutex = threading.Lock() -success_counter = 0 -number_of_threads = 100 -number_of_iterations = 50 - - -def perform_request(): - buffer = BytesIO() - crl = pycurl.Curl() - crl.setopt(pycurl.INTERFACE, client_ip) - crl.setopt(crl.WRITEDATA, buffer) - crl.setopt(crl.URL, f"http://{server_ip}:8123/?query=select+1&user=test_dns") - - crl.perform() - - # End curl session - crl.close() - - str_response = buffer.getvalue().decode("iso-8859-1") - expected_response = "1\n" - - mutex.acquire() - - global success_counter - - if str_response == expected_response: - success_counter += 1 - - mutex.release() - - -def perform_multiple_requests(n): - for request_number in range(n): - perform_request() - - -threads = [] - - -for i in range(number_of_threads): - thread = threading.Thread( - target=perform_multiple_requests, args=(number_of_iterations,) - ) - thread.start() - threads.append(thread) - -for thread in threads: - thread.join() - - -if success_counter == number_of_threads * number_of_iterations: - exit(0) - -exit(1) diff --git a/tests/integration/test_host_regexp_multiple_ptr_records_concurrent/test.py b/tests/integration/test_host_regexp_multiple_ptr_records_concurrent/test.py deleted file mode 100644 index d73e8813e79..00000000000 --- a/tests/integration/test_host_regexp_multiple_ptr_records_concurrent/test.py +++ /dev/null @@ -1,88 +0,0 @@ -import pytest -import socket -from helpers.cluster import ClickHouseCluster, get_docker_compose_path, run_and_check -from time import sleep -import os - -DOCKER_COMPOSE_PATH = get_docker_compose_path() -SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__)) - -cluster = ClickHouseCluster(__file__) - -ch_server = cluster.add_instance( - "clickhouse-server", - with_coredns=True, - main_configs=["configs/config.xml", "configs/listen_host.xml"], - user_configs=["configs/host_regexp.xml"], -) - -client = cluster.add_instance( - "clickhouse-client", -) - - -@pytest.fixture(scope="module") -def started_cluster(): - global cluster - try: - cluster.start() - yield cluster - - finally: - cluster.shutdown() - - -def check_ptr_record(ip, hostname): - try: - host, aliaslist, ipaddrlist = socket.gethostbyaddr(ip) - if hostname.lower() == host.lower(): - return True - except socket.herror: - pass - return False - - -def setup_dns_server(ip): - domains_string = "test3.example.com test2.example.com test1.example.com" - example_file_path = f'{ch_server.env_variables["COREDNS_CONFIG_DIR"]}/example.com' - run_and_check(f"echo '{ip} {domains_string}' > {example_file_path}", shell=True) - - # DNS server takes time to reload the configuration. - for try_num in range(10): - if all(check_ptr_record(ip, host) for host in domains_string.split()): - break - sleep(1) - - -def setup_ch_server(dns_server_ip): - ch_server.exec_in_container( - (["bash", "-c", f"echo 'nameserver {dns_server_ip}' > /etc/resolv.conf"]) - ) - ch_server.exec_in_container( - (["bash", "-c", "echo 'options ndots:0' >> /etc/resolv.conf"]) - ) - ch_server.query("SYSTEM DROP DNS CACHE") - - -def build_endpoint_v4(ip): - return f"'http://{ip}:8123/?query=SELECT+1&user=test_dns'" - - -def build_endpoint_v6(ip): - return build_endpoint_v4(f"[{ip}]") - - -def test_host_regexp_multiple_ptr_v4(started_cluster): - server_ip = cluster.get_instance_ip("clickhouse-server") - client_ip = cluster.get_instance_ip("clickhouse-client") - dns_server_ip = cluster.get_instance_ip(cluster.coredns_host) - - setup_dns_server(client_ip) - setup_ch_server(dns_server_ip) - - current_dir = os.path.dirname(__file__) - client.copy_file_to_container( - os.path.join(current_dir, "scripts", "stress_test.py"), "stress_test.py" - ) - - client.exec_in_container(["python3", f"stress_test.py", client_ip, server_ip]) From 9509802866206df50b7802ab74a556e9fd979852 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Mon, 8 Jul 2024 13:53:24 +0000 Subject: [PATCH 131/299] Bump rocksdb to v6.27.3 --- contrib/rocksdb | 2 +- contrib/rocksdb-cmake/CMakeLists.txt | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/contrib/rocksdb b/contrib/rocksdb index 19ab8db7a73..630bc2d1c3b 160000 --- a/contrib/rocksdb +++ b/contrib/rocksdb @@ -1 +1 @@ -Subproject commit 19ab8db7a736306d6d12992a21e545e0336ab34a +Subproject commit 630bc2d1c3bcf654ebada4d7a092996de8cfb779 diff --git a/contrib/rocksdb-cmake/CMakeLists.txt b/contrib/rocksdb-cmake/CMakeLists.txt index 525b301f31f..d6e2a1afd50 100644 --- a/contrib/rocksdb-cmake/CMakeLists.txt +++ b/contrib/rocksdb-cmake/CMakeLists.txt @@ -104,6 +104,7 @@ set(SOURCES ${ROCKSDB_SOURCE_DIR}/db/blob/blob_log_format.cc ${ROCKSDB_SOURCE_DIR}/db/blob/blob_log_sequential_reader.cc ${ROCKSDB_SOURCE_DIR}/db/blob/blob_log_writer.cc + ${ROCKSDB_SOURCE_DIR}/db/blob/prefetch_buffer_collection.cc ${ROCKSDB_SOURCE_DIR}/db/builder.cc ${ROCKSDB_SOURCE_DIR}/db/c.cc ${ROCKSDB_SOURCE_DIR}/db/column_family.cc @@ -329,6 +330,7 @@ set(SOURCES ${ROCKSDB_SOURCE_DIR}/utilities/env_timed.cc ${ROCKSDB_SOURCE_DIR}/utilities/fault_injection_env.cc ${ROCKSDB_SOURCE_DIR}/utilities/fault_injection_fs.cc + ${ROCKSDB_SOURCE_DIR}/utilities/fault_injection_secondary_cache.cc ${ROCKSDB_SOURCE_DIR}/utilities/leveldb_options/leveldb_options.cc ${ROCKSDB_SOURCE_DIR}/utilities/memory/memory_util.cc ${ROCKSDB_SOURCE_DIR}/utilities/merge_operators.cc From 1bfafa42be11ad338a76746f695ffa1710d198d1 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Mon, 8 Jul 2024 14:38:51 +0000 Subject: [PATCH 132/299] Bump rocksdb to v6.28.2 --- contrib/rocksdb | 2 +- contrib/rocksdb-cmake/CMakeLists.txt | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/contrib/rocksdb b/contrib/rocksdb index 630bc2d1c3b..b8a996ce196 160000 --- a/contrib/rocksdb +++ b/contrib/rocksdb @@ -1 +1 @@ -Subproject commit 630bc2d1c3bcf654ebada4d7a092996de8cfb779 +Subproject commit b8a996ce1969a3f7141aca7fb5c54196a58a654a diff --git a/contrib/rocksdb-cmake/CMakeLists.txt b/contrib/rocksdb-cmake/CMakeLists.txt index d6e2a1afd50..96558b40174 100644 --- a/contrib/rocksdb-cmake/CMakeLists.txt +++ b/contrib/rocksdb-cmake/CMakeLists.txt @@ -88,6 +88,7 @@ endif() set(SOURCES ${ROCKSDB_SOURCE_DIR}/cache/cache.cc ${ROCKSDB_SOURCE_DIR}/cache/cache_entry_roles.cc + ${ROCKSDB_SOURCE_DIR}/cache/cache_key.cc ${ROCKSDB_SOURCE_DIR}/cache/cache_reservation_manager.cc ${ROCKSDB_SOURCE_DIR}/cache/clock_cache.cc ${ROCKSDB_SOURCE_DIR}/cache/lru_cache.cc @@ -197,6 +198,7 @@ set(SOURCES ${ROCKSDB_SOURCE_DIR}/memory/concurrent_arena.cc ${ROCKSDB_SOURCE_DIR}/memory/jemalloc_nodump_allocator.cc ${ROCKSDB_SOURCE_DIR}/memory/memkind_kmem_allocator.cc + ${ROCKSDB_SOURCE_DIR}/memory/memory_allocator.cc ${ROCKSDB_SOURCE_DIR}/memtable/alloc_tracker.cc ${ROCKSDB_SOURCE_DIR}/memtable/hash_linklist_rep.cc ${ROCKSDB_SOURCE_DIR}/memtable/hash_skiplist_rep.cc From 93d0f7a4850de40080f18b49b15f26c8e618ed55 Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Tue, 25 Jun 2024 15:03:36 +0100 Subject: [PATCH 133/299] refactor --- src/Columns/ColumnAggregateFunction.cpp | 4 +-- src/Columns/ColumnAggregateFunction.h | 8 +++--- src/Columns/ColumnArray.cpp | 6 ++--- src/Columns/ColumnArray.h | 6 ++--- src/Columns/ColumnCompressed.h | 4 +-- src/Columns/ColumnConst.h | 8 +++--- src/Columns/ColumnDecimal.cpp | 4 +-- src/Columns/ColumnDecimal.h | 8 +++--- src/Columns/ColumnDynamic.cpp | 8 +++--- src/Columns/ColumnDynamic.h | 8 +++--- src/Columns/ColumnFixedString.cpp | 6 ++--- src/Columns/ColumnFixedString.h | 8 +++--- src/Columns/ColumnFunction.cpp | 4 +-- src/Columns/ColumnFunction.h | 6 ++--- src/Columns/ColumnLowCardinality.cpp | 6 ++--- src/Columns/ColumnLowCardinality.h | 6 ++--- src/Columns/ColumnMap.cpp | 8 +++--- src/Columns/ColumnMap.h | 8 +++--- src/Columns/ColumnNullable.cpp | 8 +++--- src/Columns/ColumnNullable.h | 8 +++--- src/Columns/ColumnObject.cpp | 4 +-- src/Columns/ColumnObject.h | 6 ++--- src/Columns/ColumnSparse.cpp | 6 ++--- src/Columns/ColumnSparse.h | 6 ++--- src/Columns/ColumnString.cpp | 4 +-- src/Columns/ColumnString.h | 8 +++--- src/Columns/ColumnTuple.cpp | 8 +++--- src/Columns/ColumnTuple.h | 8 +++--- src/Columns/ColumnUnique.h | 4 +-- src/Columns/ColumnVariant.cpp | 8 +++--- src/Columns/ColumnVariant.h | 11 +++++--- src/Columns/ColumnVector.cpp | 2 +- src/Columns/ColumnVector.h | 8 +++--- src/Columns/IColumn.cpp | 2 +- src/Columns/IColumn.h | 27 +++++++++++++------ src/Columns/IColumnDummy.h | 6 ++--- src/Columns/IColumnUnique.h | 2 +- .../benchmark_column_insert_many_from.cpp | 2 +- 38 files changed, 135 insertions(+), 119 deletions(-) diff --git a/src/Columns/ColumnAggregateFunction.cpp b/src/Columns/ColumnAggregateFunction.cpp index f7e6b1a1ccc..be00f086ccf 100644 --- a/src/Columns/ColumnAggregateFunction.cpp +++ b/src/Columns/ColumnAggregateFunction.cpp @@ -267,7 +267,7 @@ bool ColumnAggregateFunction::structureEquals(const IColumn & to) const } -void ColumnAggregateFunction::insertRangeFrom(const IColumn & from, size_t start, size_t length) +void ColumnAggregateFunction::doInsertRangeFrom(const IColumn & from, size_t start, size_t length) { const ColumnAggregateFunction & from_concrete = assert_cast(from); @@ -462,7 +462,7 @@ void ColumnAggregateFunction::insertFromWithOwnership(const IColumn & from, size insertMergeFrom(from, n); } -void ColumnAggregateFunction::insertFrom(const IColumn & from, size_t n) +void ColumnAggregateFunction::doInsertFrom(const IColumn & from, size_t n) { insertRangeFrom(from, n, 1); } diff --git a/src/Columns/ColumnAggregateFunction.h b/src/Columns/ColumnAggregateFunction.h index a75b27e835c..a34def52d08 100644 --- a/src/Columns/ColumnAggregateFunction.h +++ b/src/Columns/ColumnAggregateFunction.h @@ -145,7 +145,9 @@ public: void insertData(const char * pos, size_t length) override; - void insertFrom(const IColumn & from, size_t n) override; + void doInsertFrom(const IColumn & from, size_t n) override; + + using IColumn::insertFrom; void insertFrom(ConstAggregateDataPtr place); @@ -182,7 +184,7 @@ public: void protect() override; - void insertRangeFrom(const IColumn & from, size_t start, size_t length) override; + void doInsertRangeFrom(const IColumn & from, size_t start, size_t length) override; void popBack(size_t n) override; @@ -201,7 +203,7 @@ public: MutableColumns scatter(ColumnIndex num_columns, const Selector & selector) const override; - int compareAt(size_t, size_t, const IColumn &, int) const override + int doCompareAt(size_t, size_t, const IColumn &, int) const override { return 0; } diff --git a/src/Columns/ColumnArray.cpp b/src/Columns/ColumnArray.cpp index 0b7e6541560..756fce07f8e 100644 --- a/src/Columns/ColumnArray.cpp +++ b/src/Columns/ColumnArray.cpp @@ -337,7 +337,7 @@ bool ColumnArray::tryInsert(const Field & x) return true; } -void ColumnArray::insertFrom(const IColumn & src_, size_t n) +void ColumnArray::doInsertFrom(const IColumn & src_, size_t n) { const ColumnArray & src = assert_cast(src_); size_t size = src.sizeAt(n); @@ -392,7 +392,7 @@ int ColumnArray::compareAtImpl(size_t n, size_t m, const IColumn & rhs_, int nan : 1); } -int ColumnArray::compareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const +int ColumnArray::doCompareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const { return compareAtImpl(n, m, rhs_, nan_direction_hint); } @@ -535,7 +535,7 @@ void ColumnArray::getExtremes(Field & min, Field & max) const } -void ColumnArray::insertRangeFrom(const IColumn & src, size_t start, size_t length) +void ColumnArray::doInsertRangeFrom(const IColumn & src, size_t start, size_t length) { if (length == 0) return; diff --git a/src/Columns/ColumnArray.h b/src/Columns/ColumnArray.h index 53eb5166df8..b609c64598c 100644 --- a/src/Columns/ColumnArray.h +++ b/src/Columns/ColumnArray.h @@ -84,10 +84,10 @@ public: void updateHashWithValue(size_t n, SipHash & hash) const override; void updateWeakHash32(WeakHash32 & hash) const override; void updateHashFast(SipHash & hash) const override; - void insertRangeFrom(const IColumn & src, size_t start, size_t length) override; + void doInsertRangeFrom(const IColumn & src, size_t start, size_t length) override; void insert(const Field & x) override; bool tryInsert(const Field & x) override; - void insertFrom(const IColumn & src_, size_t n) override; + void doInsertFrom(const IColumn & src_, size_t n) override; void insertDefault() override; void popBack(size_t n) override; ColumnPtr filter(const Filter & filt, ssize_t result_size_hint) const override; @@ -95,7 +95,7 @@ public: ColumnPtr permute(const Permutation & perm, size_t limit) const override; ColumnPtr index(const IColumn & indexes, size_t limit) const override; template ColumnPtr indexImpl(const PaddedPODArray & indexes, size_t limit) const; - int compareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const override; + int doCompareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const override; int compareAtWithCollation(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint, const Collator & collator) const override; void getPermutation(PermutationSortDirection direction, PermutationSortStability stability, size_t limit, int nan_direction_hint, Permutation & res) const override; diff --git a/src/Columns/ColumnCompressed.h b/src/Columns/ColumnCompressed.h index 934adf07cf4..f6e56b15d30 100644 --- a/src/Columns/ColumnCompressed.h +++ b/src/Columns/ColumnCompressed.h @@ -85,7 +85,7 @@ public: bool isDefaultAt(size_t) const override { throwMustBeDecompressed(); } void insert(const Field &) override { throwMustBeDecompressed(); } bool tryInsert(const Field &) override { throwMustBeDecompressed(); } - void insertRangeFrom(const IColumn &, size_t, size_t) override { throwMustBeDecompressed(); } + void doInsertRangeFrom(const IColumn &, size_t, size_t) override { throwMustBeDecompressed(); } void insertData(const char *, size_t) override { throwMustBeDecompressed(); } void insertDefault() override { throwMustBeDecompressed(); } void popBack(size_t) override { throwMustBeDecompressed(); } @@ -100,7 +100,7 @@ public: void expand(const Filter &, bool) override { throwMustBeDecompressed(); } ColumnPtr permute(const Permutation &, size_t) const override { throwMustBeDecompressed(); } ColumnPtr index(const IColumn &, size_t) const override { throwMustBeDecompressed(); } - int compareAt(size_t, size_t, const IColumn &, int) const override { throwMustBeDecompressed(); } + int doCompareAt(size_t, size_t, const IColumn &, int) const override { throwMustBeDecompressed(); } void compareColumn(const IColumn &, size_t, PaddedPODArray *, PaddedPODArray &, int, int) const override { throwMustBeDecompressed(); diff --git a/src/Columns/ColumnConst.h b/src/Columns/ColumnConst.h index c2c0fa3027c..e782d06a6ca 100644 --- a/src/Columns/ColumnConst.h +++ b/src/Columns/ColumnConst.h @@ -121,7 +121,7 @@ public: return data->isNullAt(0); } - void insertRangeFrom(const IColumn &, size_t /*start*/, size_t length) override + void doInsertRangeFrom(const IColumn &, size_t /*start*/, size_t length) override { s += length; } @@ -145,12 +145,12 @@ public: ++s; } - void insertFrom(const IColumn &, size_t) override + void doInsertFrom(const IColumn &, size_t) override { ++s; } - void insertManyFrom(const IColumn & /*src*/, size_t /* position */, size_t length) override { s += length; } + void doInsertManyFrom(const IColumn & /*src*/, size_t /* position */, size_t length) override { s += length; } void insertDefault() override { @@ -223,7 +223,7 @@ public: return data->allocatedBytes() + sizeof(s); } - int compareAt(size_t, size_t, const IColumn & rhs, int nan_direction_hint) const override + int doCompareAt(size_t, size_t, const IColumn & rhs, int nan_direction_hint) const override { return data->compareAt(0, 0, *assert_cast(rhs).data, nan_direction_hint); } diff --git a/src/Columns/ColumnDecimal.cpp b/src/Columns/ColumnDecimal.cpp index eb9784c14dd..ad17ccce778 100644 --- a/src/Columns/ColumnDecimal.cpp +++ b/src/Columns/ColumnDecimal.cpp @@ -32,7 +32,7 @@ namespace ErrorCodes } template -int ColumnDecimal::compareAt(size_t n, size_t m, const IColumn & rhs_, int) const +int ColumnDecimal::doCompareAt(size_t n, size_t m, const IColumn & rhs_, int) const { auto & other = static_cast(rhs_); const T & a = data[n]; @@ -331,7 +331,7 @@ void ColumnDecimal::insertData(const char * src, size_t /*length*/) } template -void ColumnDecimal::insertRangeFrom(const IColumn & src, size_t start, size_t length) +void ColumnDecimal::doInsertRangeFrom(const IColumn & src, size_t start, size_t length) { const ColumnDecimal & src_vec = assert_cast(src); diff --git a/src/Columns/ColumnDecimal.h b/src/Columns/ColumnDecimal.h index c4510ba2922..019ce442bcc 100644 --- a/src/Columns/ColumnDecimal.h +++ b/src/Columns/ColumnDecimal.h @@ -55,9 +55,9 @@ public: void reserve(size_t n) override { data.reserve_exact(n); } void shrinkToFit() override { data.shrink_to_fit(); } - void insertFrom(const IColumn & src, size_t n) override { data.push_back(static_cast(src).getData()[n]); } + void doInsertFrom(const IColumn & src, size_t n) override { data.push_back(static_cast(src).getData()[n]); } - void insertManyFrom(const IColumn & src, size_t position, size_t length) override + void doInsertManyFrom(const IColumn & src, size_t position, size_t length) override { ValueType v = assert_cast(src).getData()[position]; data.resize_fill(data.size() + length, v); @@ -68,7 +68,7 @@ public: void insertManyDefaults(size_t length) override { data.resize_fill(data.size() + length); } void insert(const Field & x) override { data.push_back(x.get()); } bool tryInsert(const Field & x) override; - void insertRangeFrom(const IColumn & src, size_t start, size_t length) override; + void doInsertRangeFrom(const IColumn & src, size_t start, size_t length) override; void popBack(size_t n) override { @@ -92,7 +92,7 @@ public: void updateHashWithValue(size_t n, SipHash & hash) const override; void updateWeakHash32(WeakHash32 & hash) const override; void updateHashFast(SipHash & hash) const override; - int compareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const override; + int doCompareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const override; void getPermutation(IColumn::PermutationSortDirection direction, IColumn::PermutationSortStability stability, size_t limit, int nan_direction_hint, IColumn::Permutation & res) const override; void updatePermutation(IColumn::PermutationSortDirection direction, IColumn::PermutationSortStability stability, diff --git a/src/Columns/ColumnDynamic.cpp b/src/Columns/ColumnDynamic.cpp index 3c147b6f123..63ddd5e566c 100644 --- a/src/Columns/ColumnDynamic.cpp +++ b/src/Columns/ColumnDynamic.cpp @@ -213,7 +213,7 @@ bool ColumnDynamic::tryInsert(const DB::Field & x) } -void ColumnDynamic::insertFrom(const DB::IColumn & src_, size_t n) +void ColumnDynamic::doInsertFrom(const DB::IColumn & src_, size_t n) { const auto & dynamic_src = assert_cast(src_); @@ -263,7 +263,7 @@ void ColumnDynamic::insertFrom(const DB::IColumn & src_, size_t n) variant_col.insertIntoVariantFrom(string_variant_discr, *tmp_string_column, 0); } -void ColumnDynamic::insertRangeFrom(const DB::IColumn & src_, size_t start, size_t length) +void ColumnDynamic::doInsertRangeFrom(const DB::IColumn & src_, size_t start, size_t length) { if (start + length > src_.size()) throw Exception(ErrorCodes::PARAMETER_OUT_OF_BOUND, "Parameter out of bound in ColumnDynamic::insertRangeFrom method. " @@ -429,7 +429,7 @@ void ColumnDynamic::insertRangeFrom(const DB::IColumn & src_, size_t start, size } } -void ColumnDynamic::insertManyFrom(const DB::IColumn & src_, size_t position, size_t length) +void ColumnDynamic::doInsertManyFrom(const DB::IColumn & src_, size_t position, size_t length) { const auto & dynamic_src = assert_cast(src_); @@ -587,7 +587,7 @@ void ColumnDynamic::updateHashWithValue(size_t n, SipHash & hash) const variant_col.getVariantByGlobalDiscriminator(discr).updateHashWithValue(variant_col.offsetAt(n), hash); } -int ColumnDynamic::compareAt(size_t n, size_t m, const DB::IColumn & rhs, int nan_direction_hint) const +int ColumnDynamic::doCompareAt(size_t n, size_t m, const DB::IColumn & rhs, int nan_direction_hint) const { const auto & left_variant = assert_cast(*variant_column); const auto & right_dynamic = assert_cast(rhs); diff --git a/src/Columns/ColumnDynamic.h b/src/Columns/ColumnDynamic.h index 27ad0dd583f..7156cd49678 100644 --- a/src/Columns/ColumnDynamic.h +++ b/src/Columns/ColumnDynamic.h @@ -142,9 +142,9 @@ public: void insert(const Field & x) override; bool tryInsert(const Field & x) override; - void insertFrom(const IColumn & src_, size_t n) override; - void insertRangeFrom(const IColumn & src, size_t start, size_t length) override; - void insertManyFrom(const IColumn & src, size_t position, size_t length) override; + void doInsertFrom(const IColumn & src_, size_t n) override; + void doInsertRangeFrom(const IColumn & src, size_t start, size_t length) override; + void doInsertManyFrom(const IColumn & src, size_t position, size_t length) override; void insertDefault() override { @@ -213,7 +213,7 @@ public: return scattered_columns; } - int compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override; + int doCompareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override; bool hasEqualValues() const override { diff --git a/src/Columns/ColumnFixedString.cpp b/src/Columns/ColumnFixedString.cpp index d7e4eff2727..081a46b78d9 100644 --- a/src/Columns/ColumnFixedString.cpp +++ b/src/Columns/ColumnFixedString.cpp @@ -74,7 +74,7 @@ bool ColumnFixedString::tryInsert(const Field & x) return true; } -void ColumnFixedString::insertFrom(const IColumn & src_, size_t index) +void ColumnFixedString::doInsertFrom(const IColumn & src_, size_t index) { const ColumnFixedString & src = assert_cast(src_); @@ -86,7 +86,7 @@ void ColumnFixedString::insertFrom(const IColumn & src_, size_t index) memcpySmallAllowReadWriteOverflow15(chars.data() + old_size, &src.chars[n * index], n); } -void ColumnFixedString::insertManyFrom(const IColumn & src, size_t position, size_t length) +void ColumnFixedString::doInsertManyFrom(const IColumn & src, size_t position, size_t length) { const ColumnFixedString & src_concrete = assert_cast(src); if (n != src_concrete.getN()) @@ -219,7 +219,7 @@ size_t ColumnFixedString::estimateCardinalityInPermutedRange(const Permutation & return elements.size(); } -void ColumnFixedString::insertRangeFrom(const IColumn & src, size_t start, size_t length) +void ColumnFixedString::doInsertRangeFrom(const IColumn & src, size_t start, size_t length) { const ColumnFixedString & src_concrete = assert_cast(src); chassert(this->n == src_concrete.n); diff --git a/src/Columns/ColumnFixedString.h b/src/Columns/ColumnFixedString.h index 7b46dc11cd6..47df6bec4cc 100644 --- a/src/Columns/ColumnFixedString.h +++ b/src/Columns/ColumnFixedString.h @@ -98,9 +98,9 @@ public: bool tryInsert(const Field & x) override; - void insertFrom(const IColumn & src_, size_t index) override; + void doInsertFrom(const IColumn & src_, size_t index) override; - void insertManyFrom(const IColumn & src, size_t position, size_t length) override; + void doInsertManyFrom(const IColumn & src, size_t position, size_t length) override; void insertData(const char * pos, size_t length) override; @@ -129,7 +129,7 @@ public: void updateHashFast(SipHash & hash) const override; - int compareAt(size_t p1, size_t p2, const IColumn & rhs_, int /*nan_direction_hint*/) const override + int doCompareAt(size_t p1, size_t p2, const IColumn & rhs_, int /*nan_direction_hint*/) const override { const ColumnFixedString & rhs = assert_cast(rhs_); chassert(this->n == rhs.n); @@ -144,7 +144,7 @@ public: size_t estimateCardinalityInPermutedRange(const Permutation & permutation, const EqualRange & equal_range) const override; - void insertRangeFrom(const IColumn & src, size_t start, size_t length) override; + void doInsertRangeFrom(const IColumn & src, size_t start, size_t length) override; ColumnPtr filter(const IColumn::Filter & filt, ssize_t result_size_hint) const override; diff --git a/src/Columns/ColumnFunction.cpp b/src/Columns/ColumnFunction.cpp index 0ab9d15ad50..f40b951ef13 100644 --- a/src/Columns/ColumnFunction.cpp +++ b/src/Columns/ColumnFunction.cpp @@ -72,7 +72,7 @@ ColumnPtr ColumnFunction::cut(size_t start, size_t length) const return ColumnFunction::create(length, function, capture, is_short_circuit_argument, is_function_compiled); } -void ColumnFunction::insertFrom(const IColumn & src, size_t n) +void ColumnFunction::doInsertFrom(const IColumn & src, size_t n) { const ColumnFunction & src_func = assert_cast(src); @@ -89,7 +89,7 @@ void ColumnFunction::insertFrom(const IColumn & src, size_t n) ++elements_size; } -void ColumnFunction::insertRangeFrom(const IColumn & src, size_t start, size_t length) +void ColumnFunction::doInsertRangeFrom(const IColumn & src, size_t start, size_t length) { const ColumnFunction & src_func = assert_cast(src); diff --git a/src/Columns/ColumnFunction.h b/src/Columns/ColumnFunction.h index 6fdc6679d3e..ab5a63d1afe 100644 --- a/src/Columns/ColumnFunction.h +++ b/src/Columns/ColumnFunction.h @@ -94,8 +94,8 @@ public: throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Cannot insert into {}", getName()); } - void insertFrom(const IColumn & src, size_t n) override; - void insertRangeFrom(const IColumn &, size_t start, size_t length) override; + void doInsertFrom(const IColumn & src, size_t n) override; + void doInsertRangeFrom(const IColumn &, size_t start, size_t length) override; void insertData(const char *, size_t) override { @@ -137,7 +137,7 @@ public: throw Exception(ErrorCodes::NOT_IMPLEMENTED, "popBack is not implemented for {}", getName()); } - int compareAt(size_t, size_t, const IColumn &, int) const override + int doCompareAt(size_t, size_t, const IColumn &, int) const override { throw Exception(ErrorCodes::NOT_IMPLEMENTED, "compareAt is not implemented for {}", getName()); } diff --git a/src/Columns/ColumnLowCardinality.cpp b/src/Columns/ColumnLowCardinality.cpp index 208326fe629..a2d4903284e 100644 --- a/src/Columns/ColumnLowCardinality.cpp +++ b/src/Columns/ColumnLowCardinality.cpp @@ -159,7 +159,7 @@ void ColumnLowCardinality::insertDefault() idx.insertPosition(getDictionary().getDefaultValueIndex()); } -void ColumnLowCardinality::insertFrom(const IColumn & src, size_t n) +void ColumnLowCardinality::doInsertFrom(const IColumn & src, size_t n) { const auto * low_cardinality_src = typeid_cast(&src); @@ -187,7 +187,7 @@ void ColumnLowCardinality::insertFromFullColumn(const IColumn & src, size_t n) idx.insertPosition(getDictionary().uniqueInsertFrom(src, n)); } -void ColumnLowCardinality::insertRangeFrom(const IColumn & src, size_t start, size_t length) +void ColumnLowCardinality::doInsertRangeFrom(const IColumn & src, size_t start, size_t length) { const auto * low_cardinality_src = typeid_cast(&src); @@ -364,7 +364,7 @@ int ColumnLowCardinality::compareAtImpl(size_t n, size_t m, const IColumn & rhs, return getDictionary().compareAt(n_index, m_index, low_cardinality_column.getDictionary(), nan_direction_hint); } -int ColumnLowCardinality::compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const +int ColumnLowCardinality::doCompareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const { return compareAtImpl(n, m, rhs, nan_direction_hint); } diff --git a/src/Columns/ColumnLowCardinality.h b/src/Columns/ColumnLowCardinality.h index ac3b725b22f..43984cbcf8a 100644 --- a/src/Columns/ColumnLowCardinality.h +++ b/src/Columns/ColumnLowCardinality.h @@ -78,10 +78,10 @@ public: bool tryInsert(const Field & x) override; void insertDefault() override; - void insertFrom(const IColumn & src, size_t n) override; + void doInsertFrom(const IColumn & src, size_t n) override; void insertFromFullColumn(const IColumn & src, size_t n); - void insertRangeFrom(const IColumn & src, size_t start, size_t length) override; + void doInsertRangeFrom(const IColumn & src, size_t start, size_t length) override; void insertRangeFromFullColumn(const IColumn & src, size_t start, size_t length); void insertRangeFromDictionaryEncodedColumn(const IColumn & keys, const IColumn & positions); @@ -127,7 +127,7 @@ public: return ColumnLowCardinality::create(dictionary.getColumnUniquePtr(), getIndexes().index(indexes_, limit)); } - int compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override; + int doCompareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override; int compareAtWithCollation(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint, const Collator &) const override; diff --git a/src/Columns/ColumnMap.cpp b/src/Columns/ColumnMap.cpp index eecea1a273f..5b4cd04d67a 100644 --- a/src/Columns/ColumnMap.cpp +++ b/src/Columns/ColumnMap.cpp @@ -153,17 +153,17 @@ void ColumnMap::updateHashFast(SipHash & hash) const nested->updateHashFast(hash); } -void ColumnMap::insertFrom(const IColumn & src, size_t n) +void ColumnMap::doInsertFrom(const IColumn & src, size_t n) { nested->insertFrom(assert_cast(src).getNestedColumn(), n); } -void ColumnMap::insertManyFrom(const IColumn & src, size_t position, size_t length) +void ColumnMap::doInsertManyFrom(const IColumn & src, size_t position, size_t length) { assert_cast(*nested).insertManyFrom(assert_cast(src).getNestedColumn(), position, length); } -void ColumnMap::insertRangeFrom(const IColumn & src, size_t start, size_t length) +void ColumnMap::doInsertRangeFrom(const IColumn & src, size_t start, size_t length) { nested->insertRangeFrom( assert_cast(src).getNestedColumn(), @@ -210,7 +210,7 @@ MutableColumns ColumnMap::scatter(ColumnIndex num_columns, const Selector & sele return res; } -int ColumnMap::compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const +int ColumnMap::doCompareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const { const auto & rhs_map = assert_cast(rhs); return nested->compareAt(n, m, rhs_map.getNestedColumn(), nan_direction_hint); diff --git a/src/Columns/ColumnMap.h b/src/Columns/ColumnMap.h index 52165d0d74e..9834b2ed0ee 100644 --- a/src/Columns/ColumnMap.h +++ b/src/Columns/ColumnMap.h @@ -66,16 +66,16 @@ public: void updateHashWithValue(size_t n, SipHash & hash) const override; void updateWeakHash32(WeakHash32 & hash) const override; void updateHashFast(SipHash & hash) const override; - void insertFrom(const IColumn & src_, size_t n) override; - void insertManyFrom(const IColumn & src, size_t position, size_t length) override; - void insertRangeFrom(const IColumn & src, size_t start, size_t length) override; + void doInsertFrom(const IColumn & src_, size_t n) override; + void doInsertManyFrom(const IColumn & src, size_t position, size_t length) override; + void doInsertRangeFrom(const IColumn & src, size_t start, size_t length) override; ColumnPtr filter(const Filter & filt, ssize_t result_size_hint) const override; void expand(const Filter & mask, bool inverted) override; ColumnPtr permute(const Permutation & perm, size_t limit) const override; ColumnPtr index(const IColumn & indexes, size_t limit) const override; ColumnPtr replicate(const Offsets & offsets) const override; MutableColumns scatter(ColumnIndex num_columns, const Selector & selector) const override; - int compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override; + int doCompareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override; void getExtremes(Field & min, Field & max) const override; void getPermutation(IColumn::PermutationSortDirection direction, IColumn::PermutationSortStability stability, size_t limit, int nan_direction_hint, IColumn::Permutation & res) const override; diff --git a/src/Columns/ColumnNullable.cpp b/src/Columns/ColumnNullable.cpp index 1d12a59fd59..8e57204721f 100644 --- a/src/Columns/ColumnNullable.cpp +++ b/src/Columns/ColumnNullable.cpp @@ -221,7 +221,7 @@ const char * ColumnNullable::skipSerializedInArena(const char * pos) const return pos; } -void ColumnNullable::insertRangeFrom(const IColumn & src, size_t start, size_t length) +void ColumnNullable::doInsertRangeFrom(const IColumn & src, size_t start, size_t length) { const ColumnNullable & nullable_col = assert_cast(src); getNullMapColumn().insertRangeFrom(*nullable_col.null_map, start, length); @@ -258,7 +258,7 @@ bool ColumnNullable::tryInsert(const Field & x) return true; } -void ColumnNullable::insertFrom(const IColumn & src, size_t n) +void ColumnNullable::doInsertFrom(const IColumn & src, size_t n) { const ColumnNullable & src_concrete = assert_cast(src); getNestedColumn().insertFrom(src_concrete.getNestedColumn(), n); @@ -266,7 +266,7 @@ void ColumnNullable::insertFrom(const IColumn & src, size_t n) } -void ColumnNullable::insertManyFrom(const IColumn & src, size_t position, size_t length) +void ColumnNullable::doInsertManyFrom(const IColumn & src, size_t position, size_t length) { const ColumnNullable & src_concrete = assert_cast(src); getNestedColumn().insertManyFrom(src_concrete.getNestedColumn(), position, length); @@ -402,7 +402,7 @@ int ColumnNullable::compareAtImpl(size_t n, size_t m, const IColumn & rhs_, int return getNestedColumn().compareAt(n, m, nested_rhs, null_direction_hint); } -int ColumnNullable::compareAt(size_t n, size_t m, const IColumn & rhs_, int null_direction_hint) const +int ColumnNullable::doCompareAt(size_t n, size_t m, const IColumn & rhs_, int null_direction_hint) const { return compareAtImpl(n, m, rhs_, null_direction_hint); } diff --git a/src/Columns/ColumnNullable.h b/src/Columns/ColumnNullable.h index 510a4cacf1e..1e5866b8abb 100644 --- a/src/Columns/ColumnNullable.h +++ b/src/Columns/ColumnNullable.h @@ -69,11 +69,11 @@ public: char * serializeValueIntoMemory(size_t n, char * memory) const override; const char * deserializeAndInsertFromArena(const char * pos) override; const char * skipSerializedInArena(const char * pos) const override; - void insertRangeFrom(const IColumn & src, size_t start, size_t length) override; + void doInsertRangeFrom(const IColumn & src, size_t start, size_t length) override; void insert(const Field & x) override; bool tryInsert(const Field & x) override; - void insertFrom(const IColumn & src, size_t n) override; - void insertManyFrom(const IColumn & src, size_t position, size_t length) override; + void doInsertFrom(const IColumn & src, size_t n) override; + void doInsertManyFrom(const IColumn & src, size_t position, size_t length) override; void insertFromNotNullable(const IColumn & src, size_t n); void insertRangeFromNotNullable(const IColumn & src, size_t start, size_t length); @@ -90,7 +90,7 @@ public: void expand(const Filter & mask, bool inverted) override; ColumnPtr permute(const Permutation & perm, size_t limit) const override; ColumnPtr index(const IColumn & indexes, size_t limit) const override; - int compareAt(size_t n, size_t m, const IColumn & rhs_, int null_direction_hint) const override; + int doCompareAt(size_t n, size_t m, const IColumn & rhs_, int null_direction_hint) const override; #if USE_EMBEDDED_COMPILER diff --git a/src/Columns/ColumnObject.cpp b/src/Columns/ColumnObject.cpp index ded56b60e64..86e94d9397a 100644 --- a/src/Columns/ColumnObject.cpp +++ b/src/Columns/ColumnObject.cpp @@ -763,12 +763,12 @@ void ColumnObject::get(size_t n, Field & res) const } } -void ColumnObject::insertFrom(const IColumn & src, size_t n) +void ColumnObject::doInsertFrom(const IColumn & src, size_t n) { insert(src[n]); } -void ColumnObject::insertRangeFrom(const IColumn & src, size_t start, size_t length) +void ColumnObject::doInsertRangeFrom(const IColumn & src, size_t start, size_t length) { const auto & src_object = assert_cast(src); diff --git a/src/Columns/ColumnObject.h b/src/Columns/ColumnObject.h index b1b8827622f..2fe5adb70b3 100644 --- a/src/Columns/ColumnObject.h +++ b/src/Columns/ColumnObject.h @@ -209,8 +209,8 @@ public: void insert(const Field & field) override; bool tryInsert(const Field & field) override; void insertDefault() override; - void insertFrom(const IColumn & src, size_t n) override; - void insertRangeFrom(const IColumn & src, size_t start, size_t length) override; + void doInsertFrom(const IColumn & src, size_t n) override; + void doInsertRangeFrom(const IColumn & src, size_t start, size_t length) override; void popBack(size_t length) override; Field operator[](size_t n) const override; void get(size_t n, Field & res) const override; @@ -228,7 +228,7 @@ public: /// Order of rows in ColumnObject is undefined. void getPermutation(PermutationSortDirection, PermutationSortStability, size_t, int, Permutation & res) const override; void updatePermutation(PermutationSortDirection, PermutationSortStability, size_t, int, Permutation &, EqualRanges &) const override {} - int compareAt(size_t, size_t, const IColumn &, int) const override { return 0; } + int doCompareAt(size_t, size_t, const IColumn &, int) const override { return 0; } void getExtremes(Field & min, Field & max) const override; /// All other methods throw exception. diff --git a/src/Columns/ColumnSparse.cpp b/src/Columns/ColumnSparse.cpp index 5190ceb49e5..2100ac0cc03 100644 --- a/src/Columns/ColumnSparse.cpp +++ b/src/Columns/ColumnSparse.cpp @@ -174,7 +174,7 @@ const char * ColumnSparse::skipSerializedInArena(const char * pos) const return values->skipSerializedInArena(pos); } -void ColumnSparse::insertRangeFrom(const IColumn & src, size_t start, size_t length) +void ColumnSparse::doInsertRangeFrom(const IColumn & src, size_t start, size_t length) { if (length == 0) return; @@ -248,7 +248,7 @@ bool ColumnSparse::tryInsert(const Field & x) return true; } -void ColumnSparse::insertFrom(const IColumn & src, size_t n) +void ColumnSparse::doInsertFrom(const IColumn & src, size_t n) { if (const auto * src_sparse = typeid_cast(&src)) { @@ -446,7 +446,7 @@ ColumnPtr ColumnSparse::indexImpl(const PaddedPODArray & indexes, size_t l return ColumnSparse::create(std::move(res_values), std::move(res_offsets), limit); } -int ColumnSparse::compareAt(size_t n, size_t m, const IColumn & rhs_, int null_direction_hint) const +int ColumnSparse::doCompareAt(size_t n, size_t m, const IColumn & rhs_, int null_direction_hint) const { if (const auto * rhs_sparse = typeid_cast(&rhs_)) return values->compareAt(getValueIndex(n), rhs_sparse->getValueIndex(m), rhs_sparse->getValuesColumn(), null_direction_hint); diff --git a/src/Columns/ColumnSparse.h b/src/Columns/ColumnSparse.h index 12b2def7cf1..85040255e2e 100644 --- a/src/Columns/ColumnSparse.h +++ b/src/Columns/ColumnSparse.h @@ -81,10 +81,10 @@ public: char * serializeValueIntoMemory(size_t n, char * memory) const override; const char * deserializeAndInsertFromArena(const char * pos) override; const char * skipSerializedInArena(const char *) const override; - void insertRangeFrom(const IColumn & src, size_t start, size_t length) override; + void doInsertRangeFrom(const IColumn & src, size_t start, size_t length) override; void insert(const Field & x) override; bool tryInsert(const Field & x) override; - void insertFrom(const IColumn & src, size_t n) override; + void doInsertFrom(const IColumn & src, size_t n) override; void insertDefault() override; void insertManyDefaults(size_t length) override; @@ -98,7 +98,7 @@ public: template ColumnPtr indexImpl(const PaddedPODArray & indexes, size_t limit) const; - int compareAt(size_t n, size_t m, const IColumn & rhs_, int null_direction_hint) const override; + int doCompareAt(size_t n, size_t m, const IColumn & rhs_, int null_direction_hint) const override; void compareColumn(const IColumn & rhs, size_t rhs_row_num, PaddedPODArray * row_indexes, PaddedPODArray & compare_results, int direction, int nan_direction_hint) const override; diff --git a/src/Columns/ColumnString.cpp b/src/Columns/ColumnString.cpp index a84aea73486..b93488ab1be 100644 --- a/src/Columns/ColumnString.cpp +++ b/src/Columns/ColumnString.cpp @@ -39,7 +39,7 @@ ColumnString::ColumnString(const ColumnString & src) last_offset, chars.size()); } -void ColumnString::insertManyFrom(const IColumn & src, size_t position, size_t length) +void ColumnString::doInsertManyFrom(const IColumn & src, size_t position, size_t length) { const ColumnString & src_concrete = assert_cast(src); const UInt8 * src_buf = &src_concrete.chars[src_concrete.offsets[position - 1]]; @@ -129,7 +129,7 @@ void ColumnString::updateWeakHash32(WeakHash32 & hash) const } -void ColumnString::insertRangeFrom(const IColumn & src, size_t start, size_t length) +void ColumnString::doInsertRangeFrom(const IColumn & src, size_t start, size_t length) { if (length == 0) return; diff --git a/src/Columns/ColumnString.h b/src/Columns/ColumnString.h index 39d4684fd89..c0f2d4c6e50 100644 --- a/src/Columns/ColumnString.h +++ b/src/Columns/ColumnString.h @@ -142,7 +142,7 @@ public: return true; } - void insertFrom(const IColumn & src_, size_t n) override + void doInsertFrom(const IColumn & src_, size_t n) override { const ColumnString & src = assert_cast(src_); const size_t size_to_append = src.offsets[n] - src.offsets[n - 1]; /// -1th index is Ok, see PaddedPODArray. @@ -165,7 +165,7 @@ public: } } - void insertManyFrom(const IColumn & src, size_t position, size_t length) override; + void doInsertManyFrom(const IColumn & src, size_t position, size_t length) override; void insertData(const char * pos, size_t length) override { @@ -212,7 +212,7 @@ public: hash.update(reinterpret_cast(chars.data()), chars.size() * sizeof(chars[0])); } - void insertRangeFrom(const IColumn & src, size_t start, size_t length) override; + void doInsertRangeFrom(const IColumn & src, size_t start, size_t length) override; ColumnPtr filter(const Filter & filt, ssize_t result_size_hint) const override; @@ -238,7 +238,7 @@ public: offsets.push_back(offsets.back() + 1); } - int compareAt(size_t n, size_t m, const IColumn & rhs_, int /*nan_direction_hint*/) const override + int doCompareAt(size_t n, size_t m, const IColumn & rhs_, int /*nan_direction_hint*/) const override { const ColumnString & rhs = assert_cast(rhs_); return memcmpSmallAllowOverflow15(chars.data() + offsetAt(n), sizeAt(n) - 1, rhs.chars.data() + rhs.offsetAt(m), rhs.sizeAt(m) - 1); diff --git a/src/Columns/ColumnTuple.cpp b/src/Columns/ColumnTuple.cpp index 2159495b68f..fd79afaea96 100644 --- a/src/Columns/ColumnTuple.cpp +++ b/src/Columns/ColumnTuple.cpp @@ -205,7 +205,7 @@ bool ColumnTuple::tryInsert(const Field & x) return true; } -void ColumnTuple::insertFrom(const IColumn & src_, size_t n) +void ColumnTuple::doInsertFrom(const IColumn & src_, size_t n) { const ColumnTuple & src = assert_cast(src_); @@ -218,7 +218,7 @@ void ColumnTuple::insertFrom(const IColumn & src_, size_t n) columns[i]->insertFrom(*src.columns[i], n); } -void ColumnTuple::insertManyFrom(const IColumn & src, size_t position, size_t length) +void ColumnTuple::doInsertManyFrom(const IColumn & src, size_t position, size_t length) { const ColumnTuple & src_tuple = assert_cast(src); @@ -318,7 +318,7 @@ void ColumnTuple::updateHashFast(SipHash & hash) const column->updateHashFast(hash); } -void ColumnTuple::insertRangeFrom(const IColumn & src, size_t start, size_t length) +void ColumnTuple::doInsertRangeFrom(const IColumn & src, size_t start, size_t length) { column_length += length; const size_t tuple_size = columns.size(); @@ -470,7 +470,7 @@ int ColumnTuple::compareAtImpl(size_t n, size_t m, const IColumn & rhs, int nan_ return 0; } -int ColumnTuple::compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const +int ColumnTuple::doCompareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const { return compareAtImpl(n, m, rhs, nan_direction_hint); } diff --git a/src/Columns/ColumnTuple.h b/src/Columns/ColumnTuple.h index 0103f81b242..78ecb75862e 100644 --- a/src/Columns/ColumnTuple.h +++ b/src/Columns/ColumnTuple.h @@ -65,8 +65,8 @@ public: void insertData(const char * pos, size_t length) override; void insert(const Field & x) override; bool tryInsert(const Field & x) override; - void insertFrom(const IColumn & src_, size_t n) override; - void insertManyFrom(const IColumn & src, size_t position, size_t length) override; + void doInsertFrom(const IColumn & src_, size_t n) override; + void doInsertManyFrom(const IColumn & src, size_t position, size_t length) override; void insertDefault() override; void popBack(size_t n) override; StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const override; @@ -76,14 +76,14 @@ public: void updateHashWithValue(size_t n, SipHash & hash) const override; void updateWeakHash32(WeakHash32 & hash) const override; void updateHashFast(SipHash & hash) const override; - void insertRangeFrom(const IColumn & src, size_t start, size_t length) override; + void doInsertRangeFrom(const IColumn & src, size_t start, size_t length) override; ColumnPtr filter(const Filter & filt, ssize_t result_size_hint) const override; void expand(const Filter & mask, bool inverted) override; ColumnPtr permute(const Permutation & perm, size_t limit) const override; ColumnPtr index(const IColumn & indexes, size_t limit) const override; ColumnPtr replicate(const Offsets & offsets) const override; MutableColumns scatter(ColumnIndex num_columns, const Selector & selector) const override; - int compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override; + int doCompareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override; int compareAtWithCollation(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint, const Collator & collator) const override; void getExtremes(Field & min, Field & max) const override; void getPermutation(IColumn::PermutationSortDirection direction, IColumn::PermutationSortStability stability, diff --git a/src/Columns/ColumnUnique.h b/src/Columns/ColumnUnique.h index 0311efd4c83..12f765f42af 100644 --- a/src/Columns/ColumnUnique.h +++ b/src/Columns/ColumnUnique.h @@ -90,7 +90,7 @@ public: return getNestedColumn()->updateHashWithValue(n, hash_func); } - int compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override; + int doCompareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override; void getExtremes(Field & min, Field & max) const override { column_holder->getExtremes(min, max); } bool valuesHaveFixedSize() const override { return column_holder->valuesHaveFixedSize(); } @@ -488,7 +488,7 @@ const char * ColumnUnique::skipSerializedInArena(const char *) const } template -int ColumnUnique::compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const +int ColumnUnique::doCompareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const { if (is_nullable) { diff --git a/src/Columns/ColumnVariant.cpp b/src/Columns/ColumnVariant.cpp index ec47f5dfa74..137a396d3c4 100644 --- a/src/Columns/ColumnVariant.cpp +++ b/src/Columns/ColumnVariant.cpp @@ -595,17 +595,17 @@ void ColumnVariant::insertManyFromImpl(const DB::IColumn & src_, size_t position } } -void ColumnVariant::insertFrom(const IColumn & src_, size_t n) +void ColumnVariant::doInsertFrom(const IColumn & src_, size_t n) { insertFromImpl(src_, n, nullptr); } -void ColumnVariant::insertRangeFrom(const IColumn & src_, size_t start, size_t length) +void ColumnVariant::doInsertRangeFrom(const IColumn & src_, size_t start, size_t length) { insertRangeFromImpl(src_, start, length, nullptr); } -void ColumnVariant::insertManyFrom(const DB::IColumn & src_, size_t position, size_t length) +void ColumnVariant::doInsertManyFrom(const DB::IColumn & src_, size_t position, size_t length) { insertManyFromImpl(src_, position, length, nullptr); } @@ -1174,7 +1174,7 @@ bool ColumnVariant::hasEqualValues() const return local_discriminators->hasEqualValues() && variants[localDiscriminatorAt(0)]->hasEqualValues(); } -int ColumnVariant::compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const +int ColumnVariant::doCompareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const { const auto & rhs_variant = assert_cast(rhs); Discriminator left_discr = globalDiscriminatorAt(n); diff --git a/src/Columns/ColumnVariant.h b/src/Columns/ColumnVariant.h index e5a4498f340..f516b616e02 100644 --- a/src/Columns/ColumnVariant.h +++ b/src/Columns/ColumnVariant.h @@ -180,9 +180,12 @@ public: void insert(const Field & x) override; bool tryInsert(const Field & x) override; - void insertFrom(const IColumn & src_, size_t n) override; - void insertRangeFrom(const IColumn & src_, size_t start, size_t length) override; - void insertManyFrom(const IColumn & src_, size_t position, size_t length) override; + void doInsertFrom(const IColumn & src_, size_t n) override; + void doInsertRangeFrom(const IColumn & src_, size_t start, size_t length) override; + void doInsertManyFrom(const IColumn & src_, size_t position, size_t length) override; + + using IColumn::insertFrom; + using IColumn::insertRangeFrom; /// Methods for insertion from another Variant but with known mapping between global discriminators. void insertFrom(const IColumn & src_, size_t n, const std::vector & global_discriminators_mapping); @@ -213,7 +216,7 @@ public: ColumnPtr indexImpl(const PaddedPODArray & indexes, size_t limit) const; ColumnPtr replicate(const Offsets & replicate_offsets) const override; MutableColumns scatter(ColumnIndex num_columns, const Selector & selector) const override; - int compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override; + int doCompareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override; bool hasEqualValues() const override; void getExtremes(Field & min, Field & max) const override; void getPermutation(IColumn::PermutationSortDirection direction, IColumn::PermutationSortStability stability, diff --git a/src/Columns/ColumnVector.cpp b/src/Columns/ColumnVector.cpp index 35d9f5386ed..f46d244e9d9 100644 --- a/src/Columns/ColumnVector.cpp +++ b/src/Columns/ColumnVector.cpp @@ -503,7 +503,7 @@ bool ColumnVector::tryInsert(const DB::Field & x) } template -void ColumnVector::insertRangeFrom(const IColumn & src, size_t start, size_t length) +void ColumnVector::doInsertRangeFrom(const IColumn & src, size_t start, size_t length) { const ColumnVector & src_vec = assert_cast(src); diff --git a/src/Columns/ColumnVector.h b/src/Columns/ColumnVector.h index bbd27c91a70..f0bc8257a3f 100644 --- a/src/Columns/ColumnVector.h +++ b/src/Columns/ColumnVector.h @@ -64,12 +64,12 @@ public: return data.size(); } - void insertFrom(const IColumn & src, size_t n) override + void doInsertFrom(const IColumn & src, size_t n) override { data.push_back(assert_cast(src).getData()[n]); } - void insertManyFrom(const IColumn & src, size_t position, size_t length) override + void doInsertManyFrom(const IColumn & src, size_t position, size_t length) override { ValueType v = assert_cast(src).getData()[position]; data.resize_fill(data.size() + length, v); @@ -142,7 +142,7 @@ public: } /// This method implemented in header because it could be possibly devirtualized. - int compareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const override + int doCompareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const override { return CompareHelper::compare(data[n], assert_cast(rhs_).data[m], nan_direction_hint); } @@ -228,7 +228,7 @@ public: bool tryInsert(const DB::Field & x) override; - void insertRangeFrom(const IColumn & src, size_t start, size_t length) override; + void doInsertRangeFrom(const IColumn & src, size_t start, size_t length) override; ColumnPtr filter(const IColumn::Filter & filt, ssize_t result_size_hint) const override; diff --git a/src/Columns/IColumn.cpp b/src/Columns/IColumn.cpp index 90cccef2b03..1b4bd8f6e62 100644 --- a/src/Columns/IColumn.cpp +++ b/src/Columns/IColumn.cpp @@ -46,7 +46,7 @@ String IColumn::dumpStructure() const return res.str(); } -void IColumn::insertFrom(const IColumn & src, size_t n) +void IColumn::doInsertFrom(const IColumn & src, size_t n) { insert(src[n]); } diff --git a/src/Columns/IColumn.h b/src/Columns/IColumn.h index afa301d5c1c..6c69b40ecc8 100644 --- a/src/Columns/IColumn.h +++ b/src/Columns/IColumn.h @@ -180,18 +180,14 @@ public: /// Appends n-th element from other column with the same type. /// Is used in merge-sort and merges. It could be implemented in inherited classes more optimally than default implementation. - virtual void insertFrom(const IColumn & src, size_t n); + void insertFrom(const IColumn & src, size_t n) { doInsertFrom(src, n); } /// Appends range of elements from other column with the same type. /// Could be used to concatenate columns. - virtual void insertRangeFrom(const IColumn & src, size_t start, size_t length) = 0; + void insertRangeFrom(const IColumn & src, size_t start, size_t length) { doInsertRangeFrom(src, start, length); } /// Appends one element from other column with the same type multiple times. - virtual void insertManyFrom(const IColumn & src, size_t position, size_t length) - { - for (size_t i = 0; i < length; ++i) - insertFrom(src, position); - } + void insertManyFrom(const IColumn & src, size_t position, size_t length) { doInsertManyFrom(src, position, length); } /// Appends one field multiple times. Can be optimized in inherited classes. virtual void insertMany(const Field & field, size_t length) @@ -322,7 +318,10 @@ public: * * For non Nullable and non floating point types, nan_direction_hint is ignored. */ - [[nodiscard]] virtual int compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const = 0; + [[nodiscard]] int compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const + { + return doCompareAt(n, m, rhs, nan_direction_hint); + } #if USE_EMBEDDED_COMPILER @@ -633,6 +632,18 @@ protected: Equals equals, Sort full_sort, PartialSort partial_sort) const; + + virtual void doInsertFrom(const IColumn & src, size_t n); + + virtual void doInsertRangeFrom(const IColumn & src, size_t start, size_t length) = 0; + + virtual void doInsertManyFrom(const IColumn & src, size_t position, size_t length) + { + for (size_t i = 0; i < length; ++i) + insertFrom(src, position); + } + + virtual int doCompareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const = 0; }; using ColumnPtr = IColumn::Ptr; diff --git a/src/Columns/IColumnDummy.h b/src/Columns/IColumnDummy.h index 27f420fbc71..f6a1109d0a2 100644 --- a/src/Columns/IColumnDummy.h +++ b/src/Columns/IColumnDummy.h @@ -26,7 +26,7 @@ public: size_t byteSize() const override { return 0; } size_t byteSizeAt(size_t) const override { return 0; } size_t allocatedBytes() const override { return 0; } - int compareAt(size_t, size_t, const IColumn &, int) const override { return 0; } + int doCompareAt(size_t, size_t, const IColumn &, int) const override { return 0; } void compareColumn(const IColumn &, size_t, PaddedPODArray *, PaddedPODArray &, int, int) const override { } @@ -67,12 +67,12 @@ public: { } - void insertFrom(const IColumn &, size_t) override + void doInsertFrom(const IColumn &, size_t) override { ++s; } - void insertRangeFrom(const IColumn & /*src*/, size_t /*start*/, size_t length) override + void doInsertRangeFrom(const IColumn & /*src*/, size_t /*start*/, size_t length) override { s += length; } diff --git a/src/Columns/IColumnUnique.h b/src/Columns/IColumnUnique.h index f71f19a5da6..3ced489c04d 100644 --- a/src/Columns/IColumnUnique.h +++ b/src/Columns/IColumnUnique.h @@ -85,7 +85,7 @@ public: throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method tryInsert is not supported for ColumnUnique."); } - void insertRangeFrom(const IColumn &, size_t, size_t) override + void doInsertRangeFrom(const IColumn &, size_t, size_t) override { throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method insertRangeFrom is not supported for ColumnUnique."); } diff --git a/src/Columns/benchmarks/benchmark_column_insert_many_from.cpp b/src/Columns/benchmarks/benchmark_column_insert_many_from.cpp index 325cf5559cd..6b9c329cd55 100644 --- a/src/Columns/benchmarks/benchmark_column_insert_many_from.cpp +++ b/src/Columns/benchmarks/benchmark_column_insert_many_from.cpp @@ -52,7 +52,7 @@ static ColumnPtr mockColumn(const DataTypePtr & type, size_t rows) } -static NO_INLINE void insertManyFrom(IColumn & dst, const IColumn & src) +static NO_INLINE void doInsertManyFrom(IColumn & dst, const IColumn & src) { size_t size = src.size(); dst.insertManyFrom(src, size / 2, size); From dd1b85e63d69c7e461922ca15aae9b436bf20053 Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Tue, 25 Jun 2024 19:42:58 +0100 Subject: [PATCH 134/299] add type check --- src/Columns/IColumn.h | 32 ++++++++++++++++++++++++-------- 1 file changed, 24 insertions(+), 8 deletions(-) diff --git a/src/Columns/IColumn.h b/src/Columns/IColumn.h index 6c69b40ecc8..14bcbf60c1b 100644 --- a/src/Columns/IColumn.h +++ b/src/Columns/IColumn.h @@ -1,11 +1,11 @@ #pragma once -#include -#include -#include -#include -#include #include +#include +#include +#include +#include +#include #include "config.h" @@ -180,14 +180,26 @@ public: /// Appends n-th element from other column with the same type. /// Is used in merge-sort and merges. It could be implemented in inherited classes more optimally than default implementation. - void insertFrom(const IColumn & src, size_t n) { doInsertFrom(src, n); } + void insertFrom(const IColumn & src, size_t n) + { + assertTypeEquality(src); + doInsertFrom(src, n); + } /// Appends range of elements from other column with the same type. /// Could be used to concatenate columns. - void insertRangeFrom(const IColumn & src, size_t start, size_t length) { doInsertRangeFrom(src, start, length); } + void insertRangeFrom(const IColumn & src, size_t start, size_t length) + { + assertTypeEquality(src); + doInsertRangeFrom(src, start, length); + } /// Appends one element from other column with the same type multiple times. - void insertManyFrom(const IColumn & src, size_t position, size_t length) { doInsertManyFrom(src, position, length); } + void insertManyFrom(const IColumn & src, size_t position, size_t length) + { + assertTypeEquality(src); + doInsertManyFrom(src, position, length); + } /// Appends one field multiple times. Can be optimized in inherited classes. virtual void insertMany(const Field & field, size_t length) @@ -320,6 +332,7 @@ public: */ [[nodiscard]] int compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const { + assertTypeEquality(rhs); return doCompareAt(n, m, rhs, nan_direction_hint); } @@ -644,6 +657,9 @@ protected: } virtual int doCompareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const = 0; + +private: + void assertTypeEquality(const IColumn & rhs) const { chassert(typeid(*this) == typeid(rhs)); } }; using ColumnPtr = IColumn::Ptr; From 2302fcf9ac8237ae736fe7a0aa924c4f4dd58da1 Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Wed, 26 Jun 2024 12:01:24 +0100 Subject: [PATCH 135/299] fix build --- src/Columns/ColumnVariant.h | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Columns/ColumnVariant.h b/src/Columns/ColumnVariant.h index f516b616e02..fe8cf20e20d 100644 --- a/src/Columns/ColumnVariant.h +++ b/src/Columns/ColumnVariant.h @@ -185,6 +185,7 @@ public: void doInsertManyFrom(const IColumn & src_, size_t position, size_t length) override; using IColumn::insertFrom; + using IColumn::insertManyFrom; using IColumn::insertRangeFrom; /// Methods for insertion from another Variant but with known mapping between global discriminators. From 286f2abca25e4a4a5479984e8d9a8fe4037dd70d Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Wed, 26 Jun 2024 23:18:35 +0100 Subject: [PATCH 136/299] try fix --- src/Columns/IColumn.cpp | 8 ++++++++ src/Columns/IColumn.h | 5 ++++- 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/src/Columns/IColumn.cpp b/src/Columns/IColumn.cpp index 1b4bd8f6e62..985d791cfbc 100644 --- a/src/Columns/IColumn.cpp +++ b/src/Columns/IColumn.cpp @@ -104,6 +104,14 @@ void IColumn::forEachSubcolumnRecursively(RecursiveColumnCallback callback) cons }); } +void IColumn::assertTypeEquality(const IColumn & rhs) const +{ + if (typeid(*this) != typeid(rhs)) + LOG_DEBUG(&Poco::Logger::get("IColumn"), "typeid(*this) = {}, typeid(rhs) = {}", typeid(*this).name(), typeid(rhs).name()); + + chassert(isSparse() || typeid(*this) == typeid(rhs)); +} + bool isColumnNullable(const IColumn & column) { return checkColumn(column); diff --git a/src/Columns/IColumn.h b/src/Columns/IColumn.h index 14bcbf60c1b..a586214ab0f 100644 --- a/src/Columns/IColumn.h +++ b/src/Columns/IColumn.h @@ -9,6 +9,9 @@ #include "config.h" +#include +#include + class SipHash; class Collator; @@ -659,7 +662,7 @@ protected: virtual int doCompareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const = 0; private: - void assertTypeEquality(const IColumn & rhs) const { chassert(typeid(*this) == typeid(rhs)); } + void assertTypeEquality(const IColumn & rhs) const; }; using ColumnPtr = IColumn::Ptr; From f972e80589ef09164ee51b783f9ebf6361e03df4 Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Thu, 27 Jun 2024 13:48:03 +0100 Subject: [PATCH 137/299] fix --- src/Columns/IColumn.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Columns/IColumn.cpp b/src/Columns/IColumn.cpp index 985d791cfbc..d054d3ad8d4 100644 --- a/src/Columns/IColumn.cpp +++ b/src/Columns/IColumn.cpp @@ -109,7 +109,7 @@ void IColumn::assertTypeEquality(const IColumn & rhs) const if (typeid(*this) != typeid(rhs)) LOG_DEBUG(&Poco::Logger::get("IColumn"), "typeid(*this) = {}, typeid(rhs) = {}", typeid(*this).name(), typeid(rhs).name()); - chassert(isSparse() || typeid(*this) == typeid(rhs)); + chassert(isColumnConst(*this) || isSparse() || typeid(*this) == typeid(rhs)); } bool isColumnNullable(const IColumn & column) From 55f363d5f06968c8eb3316e8176f2d1a5071c255 Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Thu, 27 Jun 2024 23:59:53 +0100 Subject: [PATCH 138/299] better --- src/Columns/IColumn.cpp | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/Columns/IColumn.cpp b/src/Columns/IColumn.cpp index d054d3ad8d4..6d4d4a73df9 100644 --- a/src/Columns/IColumn.cpp +++ b/src/Columns/IColumn.cpp @@ -106,10 +106,7 @@ void IColumn::forEachSubcolumnRecursively(RecursiveColumnCallback callback) cons void IColumn::assertTypeEquality(const IColumn & rhs) const { - if (typeid(*this) != typeid(rhs)) - LOG_DEBUG(&Poco::Logger::get("IColumn"), "typeid(*this) = {}, typeid(rhs) = {}", typeid(*this).name(), typeid(rhs).name()); - - chassert(isColumnConst(*this) || isSparse() || typeid(*this) == typeid(rhs)); + chassert(((isColumnConst(*this) || isSparse()) && getDataType() == rhs.getDataType()) || typeid(*this) == typeid(rhs)); } bool isColumnNullable(const IColumn & column) From 7dc3ae381fe7525d9f1a9ce2d8a33ef09053eb7e Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Fri, 28 Jun 2024 12:09:48 +0100 Subject: [PATCH 139/299] better --- src/Columns/ColumnConst.h | 2 ++ src/Columns/IColumn.cpp | 5 ----- src/Columns/IColumn.h | 14 +++++++++----- 3 files changed, 11 insertions(+), 10 deletions(-) diff --git a/src/Columns/ColumnConst.h b/src/Columns/ColumnConst.h index e782d06a6ca..98f9c8f4266 100644 --- a/src/Columns/ColumnConst.h +++ b/src/Columns/ColumnConst.h @@ -32,6 +32,8 @@ private: ColumnConst(const ColumnConst & src) = default; public: + bool isConst() const override { return true; } + ColumnPtr convertToFullColumn() const; ColumnPtr convertToFullColumnIfConst() const override diff --git a/src/Columns/IColumn.cpp b/src/Columns/IColumn.cpp index 6d4d4a73df9..1b4bd8f6e62 100644 --- a/src/Columns/IColumn.cpp +++ b/src/Columns/IColumn.cpp @@ -104,11 +104,6 @@ void IColumn::forEachSubcolumnRecursively(RecursiveColumnCallback callback) cons }); } -void IColumn::assertTypeEquality(const IColumn & rhs) const -{ - chassert(((isColumnConst(*this) || isSparse()) && getDataType() == rhs.getDataType()) || typeid(*this) == typeid(rhs)); -} - bool isColumnNullable(const IColumn & column) { return checkColumn(column); diff --git a/src/Columns/IColumn.h b/src/Columns/IColumn.h index a586214ab0f..b482a16a42e 100644 --- a/src/Columns/IColumn.h +++ b/src/Columns/IColumn.h @@ -9,10 +9,6 @@ #include "config.h" -#include -#include - - class SipHash; class Collator; @@ -625,6 +621,8 @@ public: [[nodiscard]] virtual bool isSparse() const { return false; } + [[nodiscard]] virtual bool isConst() const { return false; } + [[nodiscard]] virtual bool isCollationSupported() const { return false; } virtual ~IColumn() = default; @@ -662,7 +660,13 @@ protected: virtual int doCompareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const = 0; private: - void assertTypeEquality(const IColumn & rhs) const; + void assertTypeEquality(const IColumn & rhs) const + { + /// For Sparse and Const columns, we can compare only internal types. It is considered normal to e.g. insert from normal vector column to a sparse vector column. + /// This case is specifically handled in ColumnSparse implementation. Similar situation with Const column. + /// For the rest of column types we can compare the types directly. + chassert((isConst() || isSparse()) ? getDataType() == rhs.getDataType() : typeid(*this) == typeid(rhs)); + } }; using ColumnPtr = IColumn::Ptr; From 053228eea1bc1e733603f70448dca559dd580039 Mon Sep 17 00:00:00 2001 From: Konstantin Bogdanov Date: Wed, 3 Jul 2024 23:48:18 +0200 Subject: [PATCH 140/299] Empty user when JWT is set --- programs/client/Client.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/programs/client/Client.cpp b/programs/client/Client.cpp index 6343dc85d00..0126613f797 100644 --- a/programs/client/Client.cpp +++ b/programs/client/Client.cpp @@ -1117,6 +1117,7 @@ void Client::processOptions(const OptionsDescription & options_description, if (!options["user"].defaulted()) throw Exception(ErrorCodes::BAD_ARGUMENTS, "User and JWT flags can't be specified together"); config().setString("jwt", options["jwt"].as()); + config().setString("user", ""); } if (options.count("accept-invalid-certificate")) { From d320f0f3f29e685b6f7e2a3ed2fcac6ee457a3cf Mon Sep 17 00:00:00 2001 From: Konstantin Bogdanov Date: Wed, 3 Jul 2024 23:48:30 +0200 Subject: [PATCH 141/299] Typo --- src/Core/Protocol.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Core/Protocol.h b/src/Core/Protocol.h index 4c0848c0706..2e5b91e9b1b 100644 --- a/src/Core/Protocol.h +++ b/src/Core/Protocol.h @@ -63,7 +63,7 @@ const char USER_INTERSERVER_MARKER[] = " INTERSERVER SECRET "; /// Marker for SSH-keys-based authentication (passed as the user name) const char SSH_KEY_AUTHENTICAION_MARKER[] = " SSH KEY AUTHENTICATION "; -/// Market for JSON Web Token authentication +/// Marker for JSON Web Token authentication const char JWT_AUTHENTICAION_MARKER[] = " JWT AUTHENTICATION "; }; From 9ba10ca604ad6705ad46a60b9d03569c4729afcc Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sun, 30 Jun 2024 17:22:42 +0200 Subject: [PATCH 142/299] Remove mysqlxx::Pool::Entry assignment operator v2: fix tidy https://s3.amazonaws.com/clickhouse-builds/PRs/65920/86789491be1a945602f6ebf0b3b93bf5272e52ab/binary_tidy/build_log.log Signed-off-by: Azat Khuzhin --- src/Common/mysqlxx/Pool.cpp | 1 - src/Common/mysqlxx/mysqlxx/Pool.h | 11 ----------- src/Common/mysqlxx/tests/mysqlxx_pool_test.cpp | 4 +--- src/Databases/MySQL/MaterializedMySQLSyncThread.cpp | 12 +++++++----- 4 files changed, 8 insertions(+), 20 deletions(-) diff --git a/src/Common/mysqlxx/Pool.cpp b/src/Common/mysqlxx/Pool.cpp index cc5b18214c8..546e9e91dc7 100644 --- a/src/Common/mysqlxx/Pool.cpp +++ b/src/Common/mysqlxx/Pool.cpp @@ -228,7 +228,6 @@ Pool::Entry Pool::tryGet() for (auto connection_it = connections.cbegin(); connection_it != connections.cend();) { Connection * connection_ptr = *connection_it; - /// Fixme: There is a race condition here b/c we do not synchronize with Pool::Entry's copy-assignment operator if (connection_ptr->ref_count == 0) { { diff --git a/src/Common/mysqlxx/mysqlxx/Pool.h b/src/Common/mysqlxx/mysqlxx/Pool.h index 6e509d8bdd6..f1ef81e28dd 100644 --- a/src/Common/mysqlxx/mysqlxx/Pool.h +++ b/src/Common/mysqlxx/mysqlxx/Pool.h @@ -64,17 +64,6 @@ public: decrementRefCount(); } - Entry & operator= (const Entry & src) /// NOLINT - { - pool = src.pool; - if (data) - decrementRefCount(); - data = src.data; - if (data) - incrementRefCount(); - return * this; - } - bool isNull() const { return data == nullptr; diff --git a/src/Common/mysqlxx/tests/mysqlxx_pool_test.cpp b/src/Common/mysqlxx/tests/mysqlxx_pool_test.cpp index 61d6a117285..121767edc84 100644 --- a/src/Common/mysqlxx/tests/mysqlxx_pool_test.cpp +++ b/src/Common/mysqlxx/tests/mysqlxx_pool_test.cpp @@ -13,13 +13,11 @@ mysqlxx::Pool::Entry getWithFailover(mysqlxx::Pool & connections_pool) constexpr size_t max_tries = 3; - mysqlxx::Pool::Entry worker_connection; - for (size_t try_no = 1; try_no <= max_tries; ++try_no) { try { - worker_connection = connections_pool.tryGet(); + mysqlxx::Pool::Entry worker_connection = connections_pool.tryGet(); if (!worker_connection.isNull()) { diff --git a/src/Databases/MySQL/MaterializedMySQLSyncThread.cpp b/src/Databases/MySQL/MaterializedMySQLSyncThread.cpp index 7ab4235feeb..27ebe0b6d21 100644 --- a/src/Databases/MySQL/MaterializedMySQLSyncThread.cpp +++ b/src/Databases/MySQL/MaterializedMySQLSyncThread.cpp @@ -29,6 +29,7 @@ #include #include #include +#include #include #include #include @@ -532,13 +533,17 @@ static inline void dumpDataForTables( bool MaterializedMySQLSyncThread::prepareSynchronized(MaterializeMetadata & metadata) { bool opened_transaction = false; - mysqlxx::PoolWithFailover::Entry connection; while (!isCancelled()) { try { - connection = pool.tryGet(); + mysqlxx::PoolWithFailover::Entry connection = pool.tryGet(); + SCOPE_EXIT({ + if (opened_transaction) + connection->query("ROLLBACK").execute(); + }); + if (connection.isNull()) { if (settings->max_wait_time_when_mysql_unavailable < 0) @@ -602,9 +607,6 @@ bool MaterializedMySQLSyncThread::prepareSynchronized(MaterializeMetadata & meta { tryLogCurrentException(log); - if (opened_transaction) - connection->query("ROLLBACK").execute(); - if (settings->max_wait_time_when_mysql_unavailable < 0) throw; From 24dc3b40209db9e1bc59186cc16c978a1b9d2a27 Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Mon, 8 Jul 2024 16:57:00 +0100 Subject: [PATCH 143/299] insertRangeFrom --- src/Columns/ColumnAggregateFunction.cpp | 4 ++++ src/Columns/ColumnAggregateFunction.h | 4 ++++ src/Columns/ColumnArray.cpp | 4 ++++ src/Columns/ColumnArray.h | 4 ++++ src/Columns/ColumnCompressed.h | 4 ++++ src/Columns/ColumnConst.h | 4 ++++ src/Columns/ColumnDecimal.cpp | 4 ++++ src/Columns/ColumnDecimal.h | 4 ++++ src/Columns/ColumnDynamic.cpp | 4 ++++ src/Columns/ColumnDynamic.h | 4 ++++ src/Columns/ColumnFixedString.cpp | 4 ++++ src/Columns/ColumnFixedString.h | 4 ++++ src/Columns/ColumnFunction.cpp | 4 ++++ src/Columns/ColumnFunction.h | 4 ++++ src/Columns/ColumnLowCardinality.cpp | 4 ++++ src/Columns/ColumnLowCardinality.h | 4 ++++ src/Columns/ColumnMap.cpp | 4 ++++ src/Columns/ColumnMap.h | 4 ++++ src/Columns/ColumnNullable.cpp | 4 ++++ src/Columns/ColumnNullable.h | 4 ++++ src/Columns/ColumnObject.cpp | 4 ++++ src/Columns/ColumnObject.h | 4 ++++ src/Columns/ColumnSparse.cpp | 4 ++++ src/Columns/ColumnSparse.h | 4 ++++ src/Columns/ColumnString.cpp | 4 ++++ src/Columns/ColumnString.h | 4 ++++ src/Columns/ColumnTuple.cpp | 4 ++++ src/Columns/ColumnTuple.h | 4 ++++ src/Columns/ColumnVariant.cpp | 4 ++++ src/Columns/ColumnVariant.h | 4 ++++ src/Columns/ColumnVector.cpp | 4 ++++ src/Columns/ColumnVector.h | 4 ++++ src/Columns/IColumn.h | 6 ++++++ src/Columns/IColumnDummy.h | 4 ++++ src/Columns/IColumnUnique.h | 4 ++++ 35 files changed, 142 insertions(+) diff --git a/src/Columns/ColumnAggregateFunction.cpp b/src/Columns/ColumnAggregateFunction.cpp index be00f086ccf..7124a03c605 100644 --- a/src/Columns/ColumnAggregateFunction.cpp +++ b/src/Columns/ColumnAggregateFunction.cpp @@ -267,7 +267,11 @@ bool ColumnAggregateFunction::structureEquals(const IColumn & to) const } +#if !defined(ABORT_ON_LOGICAL_ERROR) +void ColumnAggregateFunction::insertRangeFrom(const IColumn & from, size_t start, size_t length) +#else void ColumnAggregateFunction::doInsertRangeFrom(const IColumn & from, size_t start, size_t length) +#endif { const ColumnAggregateFunction & from_concrete = assert_cast(from); diff --git a/src/Columns/ColumnAggregateFunction.h b/src/Columns/ColumnAggregateFunction.h index a34def52d08..b33fd7dfc47 100644 --- a/src/Columns/ColumnAggregateFunction.h +++ b/src/Columns/ColumnAggregateFunction.h @@ -184,7 +184,11 @@ public: void protect() override; +#if !defined(ABORT_ON_LOGICAL_ERROR) + void insertRangeFrom(const IColumn & from, size_t start, size_t length) override; +#else void doInsertRangeFrom(const IColumn & from, size_t start, size_t length) override; +#endif void popBack(size_t n) override; diff --git a/src/Columns/ColumnArray.cpp b/src/Columns/ColumnArray.cpp index 756fce07f8e..bf89341d5a0 100644 --- a/src/Columns/ColumnArray.cpp +++ b/src/Columns/ColumnArray.cpp @@ -535,7 +535,11 @@ void ColumnArray::getExtremes(Field & min, Field & max) const } +#if !defined(ABORT_ON_LOGICAL_ERROR) +void ColumnArray::insertRangeFrom(const IColumn & src, size_t start, size_t length) +#else void ColumnArray::doInsertRangeFrom(const IColumn & src, size_t start, size_t length) +#endif { if (length == 0) return; diff --git a/src/Columns/ColumnArray.h b/src/Columns/ColumnArray.h index b609c64598c..bdadbd18a49 100644 --- a/src/Columns/ColumnArray.h +++ b/src/Columns/ColumnArray.h @@ -84,7 +84,11 @@ public: void updateHashWithValue(size_t n, SipHash & hash) const override; void updateWeakHash32(WeakHash32 & hash) const override; void updateHashFast(SipHash & hash) const override; +#if !defined(ABORT_ON_LOGICAL_ERROR) + void insertRangeFrom(const IColumn & src, size_t start, size_t length) override; +#else void doInsertRangeFrom(const IColumn & src, size_t start, size_t length) override; +#endif void insert(const Field & x) override; bool tryInsert(const Field & x) override; void doInsertFrom(const IColumn & src_, size_t n) override; diff --git a/src/Columns/ColumnCompressed.h b/src/Columns/ColumnCompressed.h index f6e56b15d30..d0f4c2c5910 100644 --- a/src/Columns/ColumnCompressed.h +++ b/src/Columns/ColumnCompressed.h @@ -85,7 +85,11 @@ public: bool isDefaultAt(size_t) const override { throwMustBeDecompressed(); } void insert(const Field &) override { throwMustBeDecompressed(); } bool tryInsert(const Field &) override { throwMustBeDecompressed(); } +#if !defined(ABORT_ON_LOGICAL_ERROR) + void insertRangeFrom(const IColumn &, size_t, size_t) override { throwMustBeDecompressed(); } +#else void doInsertRangeFrom(const IColumn &, size_t, size_t) override { throwMustBeDecompressed(); } +#endif void insertData(const char *, size_t) override { throwMustBeDecompressed(); } void insertDefault() override { throwMustBeDecompressed(); } void popBack(size_t) override { throwMustBeDecompressed(); } diff --git a/src/Columns/ColumnConst.h b/src/Columns/ColumnConst.h index 98f9c8f4266..df51991e22f 100644 --- a/src/Columns/ColumnConst.h +++ b/src/Columns/ColumnConst.h @@ -123,7 +123,11 @@ public: return data->isNullAt(0); } +#if !defined(ABORT_ON_LOGICAL_ERROR) + void insertRangeFrom(const IColumn &, size_t /*start*/, size_t length) override +#else void doInsertRangeFrom(const IColumn &, size_t /*start*/, size_t length) override +#endif { s += length; } diff --git a/src/Columns/ColumnDecimal.cpp b/src/Columns/ColumnDecimal.cpp index ad17ccce778..b09d7b1ee90 100644 --- a/src/Columns/ColumnDecimal.cpp +++ b/src/Columns/ColumnDecimal.cpp @@ -331,7 +331,11 @@ void ColumnDecimal::insertData(const char * src, size_t /*length*/) } template +#if !defined(ABORT_ON_LOGICAL_ERROR) +void ColumnDecimal::insertRangeFrom(const IColumn & src, size_t start, size_t length) +#else void ColumnDecimal::doInsertRangeFrom(const IColumn & src, size_t start, size_t length) +#endif { const ColumnDecimal & src_vec = assert_cast(src); diff --git a/src/Columns/ColumnDecimal.h b/src/Columns/ColumnDecimal.h index 019ce442bcc..af2e9286a24 100644 --- a/src/Columns/ColumnDecimal.h +++ b/src/Columns/ColumnDecimal.h @@ -68,7 +68,11 @@ public: void insertManyDefaults(size_t length) override { data.resize_fill(data.size() + length); } void insert(const Field & x) override { data.push_back(x.get()); } bool tryInsert(const Field & x) override; +#if !defined(ABORT_ON_LOGICAL_ERROR) + void insertRangeFrom(const IColumn & src, size_t start, size_t length) override; +#else void doInsertRangeFrom(const IColumn & src, size_t start, size_t length) override; +#endif void popBack(size_t n) override { diff --git a/src/Columns/ColumnDynamic.cpp b/src/Columns/ColumnDynamic.cpp index 63ddd5e566c..c80cb79384e 100644 --- a/src/Columns/ColumnDynamic.cpp +++ b/src/Columns/ColumnDynamic.cpp @@ -263,7 +263,11 @@ void ColumnDynamic::doInsertFrom(const DB::IColumn & src_, size_t n) variant_col.insertIntoVariantFrom(string_variant_discr, *tmp_string_column, 0); } +#if !defined(ABORT_ON_LOGICAL_ERROR) +void ColumnDynamic::insertRangeFrom(const DB::IColumn & src_, size_t start, size_t length) +#else void ColumnDynamic::doInsertRangeFrom(const DB::IColumn & src_, size_t start, size_t length) +#endif { if (start + length > src_.size()) throw Exception(ErrorCodes::PARAMETER_OUT_OF_BOUND, "Parameter out of bound in ColumnDynamic::insertRangeFrom method. " diff --git a/src/Columns/ColumnDynamic.h b/src/Columns/ColumnDynamic.h index 7156cd49678..59e889a3644 100644 --- a/src/Columns/ColumnDynamic.h +++ b/src/Columns/ColumnDynamic.h @@ -143,7 +143,11 @@ public: void insert(const Field & x) override; bool tryInsert(const Field & x) override; void doInsertFrom(const IColumn & src_, size_t n) override; +#if !defined(ABORT_ON_LOGICAL_ERROR) + void insertRangeFrom(const IColumn & src, size_t start, size_t length) override; +#else void doInsertRangeFrom(const IColumn & src, size_t start, size_t length) override; +#endif void doInsertManyFrom(const IColumn & src, size_t position, size_t length) override; void insertDefault() override diff --git a/src/Columns/ColumnFixedString.cpp b/src/Columns/ColumnFixedString.cpp index 081a46b78d9..ed0f1301a07 100644 --- a/src/Columns/ColumnFixedString.cpp +++ b/src/Columns/ColumnFixedString.cpp @@ -219,7 +219,11 @@ size_t ColumnFixedString::estimateCardinalityInPermutedRange(const Permutation & return elements.size(); } +#if !defined(ABORT_ON_LOGICAL_ERROR) +void ColumnFixedString::insertRangeFrom(const IColumn & src, size_t start, size_t length) +#else void ColumnFixedString::doInsertRangeFrom(const IColumn & src, size_t start, size_t length) +#endif { const ColumnFixedString & src_concrete = assert_cast(src); chassert(this->n == src_concrete.n); diff --git a/src/Columns/ColumnFixedString.h b/src/Columns/ColumnFixedString.h index 47df6bec4cc..cc99634adf8 100644 --- a/src/Columns/ColumnFixedString.h +++ b/src/Columns/ColumnFixedString.h @@ -144,7 +144,11 @@ public: size_t estimateCardinalityInPermutedRange(const Permutation & permutation, const EqualRange & equal_range) const override; +#if !defined(ABORT_ON_LOGICAL_ERROR) + void insertRangeFrom(const IColumn & src, size_t start, size_t length) override; +#else void doInsertRangeFrom(const IColumn & src, size_t start, size_t length) override; +#endif ColumnPtr filter(const IColumn::Filter & filt, ssize_t result_size_hint) const override; diff --git a/src/Columns/ColumnFunction.cpp b/src/Columns/ColumnFunction.cpp index f40b951ef13..4f19cf87fc6 100644 --- a/src/Columns/ColumnFunction.cpp +++ b/src/Columns/ColumnFunction.cpp @@ -89,7 +89,11 @@ void ColumnFunction::doInsertFrom(const IColumn & src, size_t n) ++elements_size; } +#if !defined(ABORT_ON_LOGICAL_ERROR) +void ColumnFunction::insertRangeFrom(const IColumn & src, size_t start, size_t length) +#else void ColumnFunction::doInsertRangeFrom(const IColumn & src, size_t start, size_t length) +#endif { const ColumnFunction & src_func = assert_cast(src); diff --git a/src/Columns/ColumnFunction.h b/src/Columns/ColumnFunction.h index ab5a63d1afe..b5b0c53feb5 100644 --- a/src/Columns/ColumnFunction.h +++ b/src/Columns/ColumnFunction.h @@ -95,7 +95,11 @@ public: } void doInsertFrom(const IColumn & src, size_t n) override; +#if !defined(ABORT_ON_LOGICAL_ERROR) + void insertRangeFrom(const IColumn &, size_t start, size_t length) override; +#else void doInsertRangeFrom(const IColumn &, size_t start, size_t length) override; +#endif void insertData(const char *, size_t) override { diff --git a/src/Columns/ColumnLowCardinality.cpp b/src/Columns/ColumnLowCardinality.cpp index a2d4903284e..cf43640a669 100644 --- a/src/Columns/ColumnLowCardinality.cpp +++ b/src/Columns/ColumnLowCardinality.cpp @@ -187,7 +187,11 @@ void ColumnLowCardinality::insertFromFullColumn(const IColumn & src, size_t n) idx.insertPosition(getDictionary().uniqueInsertFrom(src, n)); } +#if !defined(ABORT_ON_LOGICAL_ERROR) +void ColumnLowCardinality::insertRangeFrom(const IColumn & src, size_t start, size_t length) +#else void ColumnLowCardinality::doInsertRangeFrom(const IColumn & src, size_t start, size_t length) +#endif { const auto * low_cardinality_src = typeid_cast(&src); diff --git a/src/Columns/ColumnLowCardinality.h b/src/Columns/ColumnLowCardinality.h index 43984cbcf8a..ef0df669729 100644 --- a/src/Columns/ColumnLowCardinality.h +++ b/src/Columns/ColumnLowCardinality.h @@ -81,7 +81,11 @@ public: void doInsertFrom(const IColumn & src, size_t n) override; void insertFromFullColumn(const IColumn & src, size_t n); +#if !defined(ABORT_ON_LOGICAL_ERROR) + void insertRangeFrom(const IColumn & src, size_t start, size_t length) override; +#else void doInsertRangeFrom(const IColumn & src, size_t start, size_t length) override; +#endif void insertRangeFromFullColumn(const IColumn & src, size_t start, size_t length); void insertRangeFromDictionaryEncodedColumn(const IColumn & keys, const IColumn & positions); diff --git a/src/Columns/ColumnMap.cpp b/src/Columns/ColumnMap.cpp index 5b4cd04d67a..072df6a91c5 100644 --- a/src/Columns/ColumnMap.cpp +++ b/src/Columns/ColumnMap.cpp @@ -163,7 +163,11 @@ void ColumnMap::doInsertManyFrom(const IColumn & src, size_t position, size_t le assert_cast(*nested).insertManyFrom(assert_cast(src).getNestedColumn(), position, length); } +#if !defined(ABORT_ON_LOGICAL_ERROR) +void ColumnMap::insertRangeFrom(const IColumn & src, size_t start, size_t length) +#else void ColumnMap::doInsertRangeFrom(const IColumn & src, size_t start, size_t length) +#endif { nested->insertRangeFrom( assert_cast(src).getNestedColumn(), diff --git a/src/Columns/ColumnMap.h b/src/Columns/ColumnMap.h index 9834b2ed0ee..54370c15650 100644 --- a/src/Columns/ColumnMap.h +++ b/src/Columns/ColumnMap.h @@ -68,7 +68,11 @@ public: void updateHashFast(SipHash & hash) const override; void doInsertFrom(const IColumn & src_, size_t n) override; void doInsertManyFrom(const IColumn & src, size_t position, size_t length) override; +#if !defined(ABORT_ON_LOGICAL_ERROR) + void insertRangeFrom(const IColumn & src, size_t start, size_t length) override; +#else void doInsertRangeFrom(const IColumn & src, size_t start, size_t length) override; +#endif ColumnPtr filter(const Filter & filt, ssize_t result_size_hint) const override; void expand(const Filter & mask, bool inverted) override; ColumnPtr permute(const Permutation & perm, size_t limit) const override; diff --git a/src/Columns/ColumnNullable.cpp b/src/Columns/ColumnNullable.cpp index 8e57204721f..6268c802be7 100644 --- a/src/Columns/ColumnNullable.cpp +++ b/src/Columns/ColumnNullable.cpp @@ -221,7 +221,11 @@ const char * ColumnNullable::skipSerializedInArena(const char * pos) const return pos; } +#if !defined(ABORT_ON_LOGICAL_ERROR) +void ColumnNullable::insertRangeFrom(const IColumn & src, size_t start, size_t length) +#else void ColumnNullable::doInsertRangeFrom(const IColumn & src, size_t start, size_t length) +#endif { const ColumnNullable & nullable_col = assert_cast(src); getNullMapColumn().insertRangeFrom(*nullable_col.null_map, start, length); diff --git a/src/Columns/ColumnNullable.h b/src/Columns/ColumnNullable.h index 1e5866b8abb..45104577d12 100644 --- a/src/Columns/ColumnNullable.h +++ b/src/Columns/ColumnNullable.h @@ -69,7 +69,11 @@ public: char * serializeValueIntoMemory(size_t n, char * memory) const override; const char * deserializeAndInsertFromArena(const char * pos) override; const char * skipSerializedInArena(const char * pos) const override; +#if !defined(ABORT_ON_LOGICAL_ERROR) + void insertRangeFrom(const IColumn & src, size_t start, size_t length) override; +#else void doInsertRangeFrom(const IColumn & src, size_t start, size_t length) override; +#endif void insert(const Field & x) override; bool tryInsert(const Field & x) override; void doInsertFrom(const IColumn & src, size_t n) override; diff --git a/src/Columns/ColumnObject.cpp b/src/Columns/ColumnObject.cpp index 86e94d9397a..69dbf780eeb 100644 --- a/src/Columns/ColumnObject.cpp +++ b/src/Columns/ColumnObject.cpp @@ -768,7 +768,11 @@ void ColumnObject::doInsertFrom(const IColumn & src, size_t n) insert(src[n]); } +#if !defined(ABORT_ON_LOGICAL_ERROR) +void ColumnObject::insertRangeFrom(const IColumn & src, size_t start, size_t length) +#else void ColumnObject::doInsertRangeFrom(const IColumn & src, size_t start, size_t length) +#endif { const auto & src_object = assert_cast(src); diff --git a/src/Columns/ColumnObject.h b/src/Columns/ColumnObject.h index 2fe5adb70b3..eb8ac2874c2 100644 --- a/src/Columns/ColumnObject.h +++ b/src/Columns/ColumnObject.h @@ -210,7 +210,11 @@ public: bool tryInsert(const Field & field) override; void insertDefault() override; void doInsertFrom(const IColumn & src, size_t n) override; +#if !defined(ABORT_ON_LOGICAL_ERROR) + void insertRangeFrom(const IColumn & src, size_t start, size_t length) override; +#else void doInsertRangeFrom(const IColumn & src, size_t start, size_t length) override; +#endif void popBack(size_t length) override; Field operator[](size_t n) const override; void get(size_t n, Field & res) const override; diff --git a/src/Columns/ColumnSparse.cpp b/src/Columns/ColumnSparse.cpp index 2100ac0cc03..937d016c55c 100644 --- a/src/Columns/ColumnSparse.cpp +++ b/src/Columns/ColumnSparse.cpp @@ -174,7 +174,11 @@ const char * ColumnSparse::skipSerializedInArena(const char * pos) const return values->skipSerializedInArena(pos); } +#if !defined(ABORT_ON_LOGICAL_ERROR) +void ColumnSparse::insertRangeFrom(const IColumn & src, size_t start, size_t length) +#else void ColumnSparse::doInsertRangeFrom(const IColumn & src, size_t start, size_t length) +#endif { if (length == 0) return; diff --git a/src/Columns/ColumnSparse.h b/src/Columns/ColumnSparse.h index 85040255e2e..73bcdb78984 100644 --- a/src/Columns/ColumnSparse.h +++ b/src/Columns/ColumnSparse.h @@ -81,7 +81,11 @@ public: char * serializeValueIntoMemory(size_t n, char * memory) const override; const char * deserializeAndInsertFromArena(const char * pos) override; const char * skipSerializedInArena(const char *) const override; +#if !defined(ABORT_ON_LOGICAL_ERROR) + void insertRangeFrom(const IColumn & src, size_t start, size_t length) override; +#else void doInsertRangeFrom(const IColumn & src, size_t start, size_t length) override; +#endif void insert(const Field & x) override; bool tryInsert(const Field & x) override; void doInsertFrom(const IColumn & src, size_t n) override; diff --git a/src/Columns/ColumnString.cpp b/src/Columns/ColumnString.cpp index b93488ab1be..24ef2f26b95 100644 --- a/src/Columns/ColumnString.cpp +++ b/src/Columns/ColumnString.cpp @@ -129,7 +129,11 @@ void ColumnString::updateWeakHash32(WeakHash32 & hash) const } +#if !defined(ABORT_ON_LOGICAL_ERROR) +void ColumnString::insertRangeFrom(const IColumn & src, size_t start, size_t length) +#else void ColumnString::doInsertRangeFrom(const IColumn & src, size_t start, size_t length) +#endif { if (length == 0) return; diff --git a/src/Columns/ColumnString.h b/src/Columns/ColumnString.h index c0f2d4c6e50..69b2c71e107 100644 --- a/src/Columns/ColumnString.h +++ b/src/Columns/ColumnString.h @@ -212,7 +212,11 @@ public: hash.update(reinterpret_cast(chars.data()), chars.size() * sizeof(chars[0])); } +#if !defined(ABORT_ON_LOGICAL_ERROR) + void insertRangeFrom(const IColumn & src, size_t start, size_t length) override; +#else void doInsertRangeFrom(const IColumn & src, size_t start, size_t length) override; +#endif ColumnPtr filter(const Filter & filt, ssize_t result_size_hint) const override; diff --git a/src/Columns/ColumnTuple.cpp b/src/Columns/ColumnTuple.cpp index fd79afaea96..9d99ce9837a 100644 --- a/src/Columns/ColumnTuple.cpp +++ b/src/Columns/ColumnTuple.cpp @@ -318,7 +318,11 @@ void ColumnTuple::updateHashFast(SipHash & hash) const column->updateHashFast(hash); } +#if !defined(ABORT_ON_LOGICAL_ERROR) +void ColumnTuple::insertRangeFrom(const IColumn & src, size_t start, size_t length) +#else void ColumnTuple::doInsertRangeFrom(const IColumn & src, size_t start, size_t length) +#endif { column_length += length; const size_t tuple_size = columns.size(); diff --git a/src/Columns/ColumnTuple.h b/src/Columns/ColumnTuple.h index 78ecb75862e..1536dfcaa0b 100644 --- a/src/Columns/ColumnTuple.h +++ b/src/Columns/ColumnTuple.h @@ -76,7 +76,11 @@ public: void updateHashWithValue(size_t n, SipHash & hash) const override; void updateWeakHash32(WeakHash32 & hash) const override; void updateHashFast(SipHash & hash) const override; +#if !defined(ABORT_ON_LOGICAL_ERROR) + void insertRangeFrom(const IColumn & src, size_t start, size_t length) override; +#else void doInsertRangeFrom(const IColumn & src, size_t start, size_t length) override; +#endif ColumnPtr filter(const Filter & filt, ssize_t result_size_hint) const override; void expand(const Filter & mask, bool inverted) override; ColumnPtr permute(const Permutation & perm, size_t limit) const override; diff --git a/src/Columns/ColumnVariant.cpp b/src/Columns/ColumnVariant.cpp index 137a396d3c4..d9803846ca2 100644 --- a/src/Columns/ColumnVariant.cpp +++ b/src/Columns/ColumnVariant.cpp @@ -600,7 +600,11 @@ void ColumnVariant::doInsertFrom(const IColumn & src_, size_t n) insertFromImpl(src_, n, nullptr); } +#if !defined(ABORT_ON_LOGICAL_ERROR) +void ColumnVariant::insertRangeFrom(const IColumn & src_, size_t start, size_t length) +#else void ColumnVariant::doInsertRangeFrom(const IColumn & src_, size_t start, size_t length) +#endif { insertRangeFromImpl(src_, start, length, nullptr); } diff --git a/src/Columns/ColumnVariant.h b/src/Columns/ColumnVariant.h index fe8cf20e20d..cfb6d254cec 100644 --- a/src/Columns/ColumnVariant.h +++ b/src/Columns/ColumnVariant.h @@ -181,7 +181,11 @@ public: bool tryInsert(const Field & x) override; void doInsertFrom(const IColumn & src_, size_t n) override; +#if !defined(ABORT_ON_LOGICAL_ERROR) + void insertRangeFrom(const IColumn & src_, size_t start, size_t length) override; +#else void doInsertRangeFrom(const IColumn & src_, size_t start, size_t length) override; +#endif void doInsertManyFrom(const IColumn & src_, size_t position, size_t length) override; using IColumn::insertFrom; diff --git a/src/Columns/ColumnVector.cpp b/src/Columns/ColumnVector.cpp index f46d244e9d9..19d1b800961 100644 --- a/src/Columns/ColumnVector.cpp +++ b/src/Columns/ColumnVector.cpp @@ -503,7 +503,11 @@ bool ColumnVector::tryInsert(const DB::Field & x) } template +#if !defined(ABORT_ON_LOGICAL_ERROR) +void ColumnVector::insertRangeFrom(const IColumn & src, size_t start, size_t length) +#else void ColumnVector::doInsertRangeFrom(const IColumn & src, size_t start, size_t length) +#endif { const ColumnVector & src_vec = assert_cast(src); diff --git a/src/Columns/ColumnVector.h b/src/Columns/ColumnVector.h index f0bc8257a3f..f078386cfcd 100644 --- a/src/Columns/ColumnVector.h +++ b/src/Columns/ColumnVector.h @@ -228,7 +228,11 @@ public: bool tryInsert(const DB::Field & x) override; +#if !defined(ABORT_ON_LOGICAL_ERROR) + void insertRangeFrom(const IColumn & src, size_t start, size_t length) override; +#else void doInsertRangeFrom(const IColumn & src, size_t start, size_t length) override; +#endif ColumnPtr filter(const IColumn::Filter & filt, ssize_t result_size_hint) const override; diff --git a/src/Columns/IColumn.h b/src/Columns/IColumn.h index b482a16a42e..5b1de8a1f39 100644 --- a/src/Columns/IColumn.h +++ b/src/Columns/IColumn.h @@ -187,11 +187,15 @@ public: /// Appends range of elements from other column with the same type. /// Could be used to concatenate columns. +#if !defined(ABORT_ON_LOGICAL_ERROR) + virtual void insertRangeFrom(const IColumn & src, size_t start, size_t length) = 0; +#else void insertRangeFrom(const IColumn & src, size_t start, size_t length) { assertTypeEquality(src); doInsertRangeFrom(src, start, length); } +#endif /// Appends one element from other column with the same type multiple times. void insertManyFrom(const IColumn & src, size_t position, size_t length) @@ -649,7 +653,9 @@ protected: virtual void doInsertFrom(const IColumn & src, size_t n); +#if defined(ABORT_ON_LOGICAL_ERROR) virtual void doInsertRangeFrom(const IColumn & src, size_t start, size_t length) = 0; +#endif virtual void doInsertManyFrom(const IColumn & src, size_t position, size_t length) { diff --git a/src/Columns/IColumnDummy.h b/src/Columns/IColumnDummy.h index f6a1109d0a2..023cbbc463a 100644 --- a/src/Columns/IColumnDummy.h +++ b/src/Columns/IColumnDummy.h @@ -72,7 +72,11 @@ public: ++s; } +#if !defined(ABORT_ON_LOGICAL_ERROR) + void insertRangeFrom(const IColumn & /*src*/, size_t /*start*/, size_t length) override +#else void doInsertRangeFrom(const IColumn & /*src*/, size_t /*start*/, size_t length) override +#endif { s += length; } diff --git a/src/Columns/IColumnUnique.h b/src/Columns/IColumnUnique.h index 3ced489c04d..3398452b7ee 100644 --- a/src/Columns/IColumnUnique.h +++ b/src/Columns/IColumnUnique.h @@ -85,7 +85,11 @@ public: throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method tryInsert is not supported for ColumnUnique."); } +#if !defined(ABORT_ON_LOGICAL_ERROR) + void insertRangeFrom(const IColumn &, size_t, size_t) override +#else void doInsertRangeFrom(const IColumn &, size_t, size_t) override +#endif { throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method insertRangeFrom is not supported for ColumnUnique."); } From 3c0ccb0e294526bc25c20caf5132dee6b71a27c9 Mon Sep 17 00:00:00 2001 From: Amos Bird Date: Tue, 9 Jul 2024 00:03:25 +0800 Subject: [PATCH 144/299] Use a better placeholder name --- .../Formats/Impl/ConstantExpressionTemplate.cpp | 7 ++++++- .../Formats/Impl/ValuesBlockInputFormat.h | 13 +------------ .../02890_named_tuple_functions.reference | 1 + .../0_stateless/02890_named_tuple_functions.sql | 9 +++++++++ 4 files changed, 17 insertions(+), 13 deletions(-) diff --git a/src/Processors/Formats/Impl/ConstantExpressionTemplate.cpp b/src/Processors/Formats/Impl/ConstantExpressionTemplate.cpp index 9d056b42101..fe82d1b1c53 100644 --- a/src/Processors/Formats/Impl/ConstantExpressionTemplate.cpp +++ b/src/Processors/Formats/Impl/ConstantExpressionTemplate.cpp @@ -227,7 +227,12 @@ private: return true; } - String column_name = "_dummy_" + std::to_string(replaced_literals.size()); + /// When generating placeholder names, ensure that we use names + /// requiring quotes to be valid identifiers. This prevents the + /// tuple() function from generating named tuples. Otherwise, + /// inserting named tuples with different names into another named + /// tuple will result in only default values being inserted. + String column_name = "-dummy-" + std::to_string(replaced_literals.size()); replaced_literals.emplace_back(literal, column_name, force_nullable); setDataType(replaced_literals.back()); ast = std::make_shared(column_name); diff --git a/src/Processors/Formats/Impl/ValuesBlockInputFormat.h b/src/Processors/Formats/Impl/ValuesBlockInputFormat.h index b1bce098e99..0abafc896ff 100644 --- a/src/Processors/Formats/Impl/ValuesBlockInputFormat.h +++ b/src/Processors/Formats/Impl/ValuesBlockInputFormat.h @@ -37,18 +37,7 @@ public: void resetReadBuffer() override; /// TODO: remove context somehow. - void setContext(const ContextPtr & context_) - { - auto context_copy = Context::createCopy(context_); - - /// ConstantExpressionTemplate generates placeholder names (_dummy_N) - /// for all literals, which are valid names for creating named tuples. - /// This behavior needs to be explicitly disabled, because if named - /// tuples with different names are inserted into a named tuple, it will - /// only insert default values. - context_copy->setSetting("enable_named_columns_in_function_tuple", false); - context = context_copy; - } + void setContext(const ContextPtr & context_) { context = Context::createCopy(context_); } const BlockMissingValues & getMissingValues() const override { return block_missing_values; } diff --git a/tests/queries/0_stateless/02890_named_tuple_functions.reference b/tests/queries/0_stateless/02890_named_tuple_functions.reference index efccfef0817..f7a0c440b5a 100644 --- a/tests/queries/0_stateless/02890_named_tuple_functions.reference +++ b/tests/queries/0_stateless/02890_named_tuple_functions.reference @@ -6,3 +6,4 @@ Tuple(\n k UInt8,\n j Int32) ['k','j'] Tuple(Int32, Int32, Int32, Int32) ['1','2','3','4'] +(1,2,3) diff --git a/tests/queries/0_stateless/02890_named_tuple_functions.sql b/tests/queries/0_stateless/02890_named_tuple_functions.sql index abd24e1cbfe..8e0c9c2b10e 100644 --- a/tests/queries/0_stateless/02890_named_tuple_functions.sql +++ b/tests/queries/0_stateless/02890_named_tuple_functions.sql @@ -20,3 +20,12 @@ select tupleNames(tuple(i, i, j, j)) from x; select tupleNames(1); -- { serverError 43 } drop table x; + +drop table if exists tbl; + +-- Make sure named tuple won't break Values insert +create table tbl (x Tuple(a Int32, b Int32, c Int32)) engine MergeTree order by (); +insert into tbl values (tuple(1, 2, 3)); -- without tuple it's interpreted differently inside values block. +select * from tbl; + +drop table tbl From 3be0ab5c4b6ab964e0e97269dd5e265bff1b064a Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Mon, 8 Jul 2024 17:05:55 +0100 Subject: [PATCH 145/299] insertManyFrom --- src/Columns/ColumnConst.h | 4 ++++ src/Columns/ColumnDecimal.h | 4 ++++ src/Columns/ColumnDynamic.cpp | 4 ++++ src/Columns/ColumnDynamic.h | 4 ++++ src/Columns/ColumnFixedString.cpp | 4 ++++ src/Columns/ColumnFixedString.h | 4 ++++ src/Columns/ColumnMap.cpp | 4 ++++ src/Columns/ColumnMap.h | 4 ++++ src/Columns/ColumnNullable.cpp | 4 ++++ src/Columns/ColumnNullable.h | 4 ++++ src/Columns/ColumnString.cpp | 4 ++++ src/Columns/ColumnString.h | 4 ++++ src/Columns/ColumnTuple.cpp | 4 ++++ src/Columns/ColumnTuple.h | 4 ++++ src/Columns/ColumnVariant.cpp | 4 ++++ src/Columns/ColumnVariant.h | 4 ++++ src/Columns/ColumnVector.h | 4 ++++ src/Columns/IColumn.h | 10 ++++++++++ .../benchmarks/benchmark_column_insert_many_from.cpp | 4 ++++ 19 files changed, 82 insertions(+) diff --git a/src/Columns/ColumnConst.h b/src/Columns/ColumnConst.h index df51991e22f..72c3d1f0f0e 100644 --- a/src/Columns/ColumnConst.h +++ b/src/Columns/ColumnConst.h @@ -156,7 +156,11 @@ public: ++s; } +#if !defined(ABORT_ON_LOGICAL_ERROR) + void insertManyFrom(const IColumn & /*src*/, size_t /* position */, size_t length) override { s += length; } +#else void doInsertManyFrom(const IColumn & /*src*/, size_t /* position */, size_t length) override { s += length; } +#endif void insertDefault() override { diff --git a/src/Columns/ColumnDecimal.h b/src/Columns/ColumnDecimal.h index af2e9286a24..27b5c765710 100644 --- a/src/Columns/ColumnDecimal.h +++ b/src/Columns/ColumnDecimal.h @@ -57,7 +57,11 @@ public: void doInsertFrom(const IColumn & src, size_t n) override { data.push_back(static_cast(src).getData()[n]); } +#if !defined(ABORT_ON_LOGICAL_ERROR) + void insertManyFrom(const IColumn & src, size_t position, size_t length) override +#else void doInsertManyFrom(const IColumn & src, size_t position, size_t length) override +#endif { ValueType v = assert_cast(src).getData()[position]; data.resize_fill(data.size() + length, v); diff --git a/src/Columns/ColumnDynamic.cpp b/src/Columns/ColumnDynamic.cpp index c80cb79384e..6cc3e27b7ff 100644 --- a/src/Columns/ColumnDynamic.cpp +++ b/src/Columns/ColumnDynamic.cpp @@ -433,7 +433,11 @@ void ColumnDynamic::doInsertRangeFrom(const DB::IColumn & src_, size_t start, si } } +#if !defined(ABORT_ON_LOGICAL_ERROR) +void ColumnDynamic::insertManyFrom(const DB::IColumn & src_, size_t position, size_t length) +#else void ColumnDynamic::doInsertManyFrom(const DB::IColumn & src_, size_t position, size_t length) +#endif { const auto & dynamic_src = assert_cast(src_); diff --git a/src/Columns/ColumnDynamic.h b/src/Columns/ColumnDynamic.h index 59e889a3644..e3fd944a6bc 100644 --- a/src/Columns/ColumnDynamic.h +++ b/src/Columns/ColumnDynamic.h @@ -148,7 +148,11 @@ public: #else void doInsertRangeFrom(const IColumn & src, size_t start, size_t length) override; #endif +#if !defined(ABORT_ON_LOGICAL_ERROR) + void insertManyFrom(const IColumn & src, size_t position, size_t length) override; +#else void doInsertManyFrom(const IColumn & src, size_t position, size_t length) override; +#endif void insertDefault() override { diff --git a/src/Columns/ColumnFixedString.cpp b/src/Columns/ColumnFixedString.cpp index ed0f1301a07..21435d405b3 100644 --- a/src/Columns/ColumnFixedString.cpp +++ b/src/Columns/ColumnFixedString.cpp @@ -86,7 +86,11 @@ void ColumnFixedString::doInsertFrom(const IColumn & src_, size_t index) memcpySmallAllowReadWriteOverflow15(chars.data() + old_size, &src.chars[n * index], n); } +#if !defined(ABORT_ON_LOGICAL_ERROR) +void ColumnFixedString::insertManyFrom(const IColumn & src, size_t position, size_t length) +#else void ColumnFixedString::doInsertManyFrom(const IColumn & src, size_t position, size_t length) +#endif { const ColumnFixedString & src_concrete = assert_cast(src); if (n != src_concrete.getN()) diff --git a/src/Columns/ColumnFixedString.h b/src/Columns/ColumnFixedString.h index cc99634adf8..faa63910c00 100644 --- a/src/Columns/ColumnFixedString.h +++ b/src/Columns/ColumnFixedString.h @@ -100,7 +100,11 @@ public: void doInsertFrom(const IColumn & src_, size_t index) override; +#if !defined(ABORT_ON_LOGICAL_ERROR) + void insertManyFrom(const IColumn & src, size_t position, size_t length) override; +#else void doInsertManyFrom(const IColumn & src, size_t position, size_t length) override; +#endif void insertData(const char * pos, size_t length) override; diff --git a/src/Columns/ColumnMap.cpp b/src/Columns/ColumnMap.cpp index 072df6a91c5..53f8a81e40f 100644 --- a/src/Columns/ColumnMap.cpp +++ b/src/Columns/ColumnMap.cpp @@ -158,7 +158,11 @@ void ColumnMap::doInsertFrom(const IColumn & src, size_t n) nested->insertFrom(assert_cast(src).getNestedColumn(), n); } +#if !defined(ABORT_ON_LOGICAL_ERROR) +void ColumnMap::insertManyFrom(const IColumn & src, size_t position, size_t length) +#else void ColumnMap::doInsertManyFrom(const IColumn & src, size_t position, size_t length) +#endif { assert_cast(*nested).insertManyFrom(assert_cast(src).getNestedColumn(), position, length); } diff --git a/src/Columns/ColumnMap.h b/src/Columns/ColumnMap.h index 54370c15650..05a32682e23 100644 --- a/src/Columns/ColumnMap.h +++ b/src/Columns/ColumnMap.h @@ -67,7 +67,11 @@ public: void updateWeakHash32(WeakHash32 & hash) const override; void updateHashFast(SipHash & hash) const override; void doInsertFrom(const IColumn & src_, size_t n) override; +#if !defined(ABORT_ON_LOGICAL_ERROR) + void insertManyFrom(const IColumn & src, size_t position, size_t length) override; +#else void doInsertManyFrom(const IColumn & src, size_t position, size_t length) override; +#endif #if !defined(ABORT_ON_LOGICAL_ERROR) void insertRangeFrom(const IColumn & src, size_t start, size_t length) override; #else diff --git a/src/Columns/ColumnNullable.cpp b/src/Columns/ColumnNullable.cpp index 6268c802be7..c24278d02c5 100644 --- a/src/Columns/ColumnNullable.cpp +++ b/src/Columns/ColumnNullable.cpp @@ -270,7 +270,11 @@ void ColumnNullable::doInsertFrom(const IColumn & src, size_t n) } +#if !defined(ABORT_ON_LOGICAL_ERROR) +void ColumnNullable::insertManyFrom(const IColumn & src, size_t position, size_t length) +#else void ColumnNullable::doInsertManyFrom(const IColumn & src, size_t position, size_t length) +#endif { const ColumnNullable & src_concrete = assert_cast(src); getNestedColumn().insertManyFrom(src_concrete.getNestedColumn(), position, length); diff --git a/src/Columns/ColumnNullable.h b/src/Columns/ColumnNullable.h index 45104577d12..47d007c6dcf 100644 --- a/src/Columns/ColumnNullable.h +++ b/src/Columns/ColumnNullable.h @@ -77,7 +77,11 @@ public: void insert(const Field & x) override; bool tryInsert(const Field & x) override; void doInsertFrom(const IColumn & src, size_t n) override; +#if !defined(ABORT_ON_LOGICAL_ERROR) + void insertManyFrom(const IColumn & src, size_t position, size_t length) override; +#else void doInsertManyFrom(const IColumn & src, size_t position, size_t length) override; +#endif void insertFromNotNullable(const IColumn & src, size_t n); void insertRangeFromNotNullable(const IColumn & src, size_t start, size_t length); diff --git a/src/Columns/ColumnString.cpp b/src/Columns/ColumnString.cpp index 24ef2f26b95..1eda9714d62 100644 --- a/src/Columns/ColumnString.cpp +++ b/src/Columns/ColumnString.cpp @@ -39,7 +39,11 @@ ColumnString::ColumnString(const ColumnString & src) last_offset, chars.size()); } +#if !defined(ABORT_ON_LOGICAL_ERROR) +void ColumnString::insertManyFrom(const IColumn & src, size_t position, size_t length) +#else void ColumnString::doInsertManyFrom(const IColumn & src, size_t position, size_t length) +#endif { const ColumnString & src_concrete = assert_cast(src); const UInt8 * src_buf = &src_concrete.chars[src_concrete.offsets[position - 1]]; diff --git a/src/Columns/ColumnString.h b/src/Columns/ColumnString.h index 69b2c71e107..6a1b4be0505 100644 --- a/src/Columns/ColumnString.h +++ b/src/Columns/ColumnString.h @@ -165,7 +165,11 @@ public: } } +#if !defined(ABORT_ON_LOGICAL_ERROR) + void insertManyFrom(const IColumn & src, size_t position, size_t length) override; +#else void doInsertManyFrom(const IColumn & src, size_t position, size_t length) override; +#endif void insertData(const char * pos, size_t length) override { diff --git a/src/Columns/ColumnTuple.cpp b/src/Columns/ColumnTuple.cpp index 9d99ce9837a..382d2182b61 100644 --- a/src/Columns/ColumnTuple.cpp +++ b/src/Columns/ColumnTuple.cpp @@ -218,7 +218,11 @@ void ColumnTuple::doInsertFrom(const IColumn & src_, size_t n) columns[i]->insertFrom(*src.columns[i], n); } +#if !defined(ABORT_ON_LOGICAL_ERROR) +void ColumnTuple::insertManyFrom(const IColumn & src, size_t position, size_t length) +#else void ColumnTuple::doInsertManyFrom(const IColumn & src, size_t position, size_t length) +#endif { const ColumnTuple & src_tuple = assert_cast(src); diff --git a/src/Columns/ColumnTuple.h b/src/Columns/ColumnTuple.h index 1536dfcaa0b..7ca065a8439 100644 --- a/src/Columns/ColumnTuple.h +++ b/src/Columns/ColumnTuple.h @@ -66,7 +66,11 @@ public: void insert(const Field & x) override; bool tryInsert(const Field & x) override; void doInsertFrom(const IColumn & src_, size_t n) override; +#if !defined(ABORT_ON_LOGICAL_ERROR) + void insertManyFrom(const IColumn & src, size_t position, size_t length) override; +#else void doInsertManyFrom(const IColumn & src, size_t position, size_t length) override; +#endif void insertDefault() override; void popBack(size_t n) override; StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const override; diff --git a/src/Columns/ColumnVariant.cpp b/src/Columns/ColumnVariant.cpp index d9803846ca2..c571913614d 100644 --- a/src/Columns/ColumnVariant.cpp +++ b/src/Columns/ColumnVariant.cpp @@ -609,7 +609,11 @@ void ColumnVariant::doInsertRangeFrom(const IColumn & src_, size_t start, size_t insertRangeFromImpl(src_, start, length, nullptr); } +#if !defined(ABORT_ON_LOGICAL_ERROR) +void ColumnVariant::insertManyFrom(const DB::IColumn & src_, size_t position, size_t length) +#else void ColumnVariant::doInsertManyFrom(const DB::IColumn & src_, size_t position, size_t length) +#endif { insertManyFromImpl(src_, position, length, nullptr); } diff --git a/src/Columns/ColumnVariant.h b/src/Columns/ColumnVariant.h index cfb6d254cec..6ec2529a48e 100644 --- a/src/Columns/ColumnVariant.h +++ b/src/Columns/ColumnVariant.h @@ -186,7 +186,11 @@ public: #else void doInsertRangeFrom(const IColumn & src_, size_t start, size_t length) override; #endif +#if !defined(ABORT_ON_LOGICAL_ERROR) + void insertManyFrom(const IColumn & src_, size_t position, size_t length) override; +#else void doInsertManyFrom(const IColumn & src_, size_t position, size_t length) override; +#endif using IColumn::insertFrom; using IColumn::insertManyFrom; diff --git a/src/Columns/ColumnVector.h b/src/Columns/ColumnVector.h index f078386cfcd..19dacdad775 100644 --- a/src/Columns/ColumnVector.h +++ b/src/Columns/ColumnVector.h @@ -69,7 +69,11 @@ public: data.push_back(assert_cast(src).getData()[n]); } +#if !defined(ABORT_ON_LOGICAL_ERROR) + void insertManyFrom(const IColumn & src, size_t position, size_t length) override +#else void doInsertManyFrom(const IColumn & src, size_t position, size_t length) override +#endif { ValueType v = assert_cast(src).getData()[position]; data.resize_fill(data.size() + length, v); diff --git a/src/Columns/IColumn.h b/src/Columns/IColumn.h index 5b1de8a1f39..e5a82d5b772 100644 --- a/src/Columns/IColumn.h +++ b/src/Columns/IColumn.h @@ -198,11 +198,19 @@ public: #endif /// Appends one element from other column with the same type multiple times. +#if !defined(ABORT_ON_LOGICAL_ERROR) + virtual void insertManyFrom(const IColumn & src, size_t position, size_t length) + { + for (size_t i = 0; i < length; ++i) + insertFrom(src, position); + } +#else void insertManyFrom(const IColumn & src, size_t position, size_t length) { assertTypeEquality(src); doInsertManyFrom(src, position, length); } +#endif /// Appends one field multiple times. Can be optimized in inherited classes. virtual void insertMany(const Field & field, size_t length) @@ -657,11 +665,13 @@ protected: virtual void doInsertRangeFrom(const IColumn & src, size_t start, size_t length) = 0; #endif +#if defined(ABORT_ON_LOGICAL_ERROR) virtual void doInsertManyFrom(const IColumn & src, size_t position, size_t length) { for (size_t i = 0; i < length; ++i) insertFrom(src, position); } +#endif virtual int doCompareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const = 0; diff --git a/src/Columns/benchmarks/benchmark_column_insert_many_from.cpp b/src/Columns/benchmarks/benchmark_column_insert_many_from.cpp index 6b9c329cd55..645f6ed79f3 100644 --- a/src/Columns/benchmarks/benchmark_column_insert_many_from.cpp +++ b/src/Columns/benchmarks/benchmark_column_insert_many_from.cpp @@ -52,7 +52,11 @@ static ColumnPtr mockColumn(const DataTypePtr & type, size_t rows) } +#if !defined(ABORT_ON_LOGICAL_ERROR) +static NO_INLINE void insertManyFrom(IColumn & dst, const IColumn & src) +#else static NO_INLINE void doInsertManyFrom(IColumn & dst, const IColumn & src) +#endif { size_t size = src.size(); dst.insertManyFrom(src, size / 2, size); From 1b45ac30a2aecb67a297ca91f8fc092122d2fc9c Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Mon, 8 Jul 2024 17:14:25 +0100 Subject: [PATCH 146/299] insertFrom --- src/Columns/ColumnAggregateFunction.cpp | 4 ++++ src/Columns/ColumnAggregateFunction.h | 4 ++++ src/Columns/ColumnArray.cpp | 4 ++++ src/Columns/ColumnArray.h | 4 ++++ src/Columns/ColumnConst.h | 4 ++++ src/Columns/ColumnDecimal.h | 4 ++++ src/Columns/ColumnDynamic.cpp | 4 ++++ src/Columns/ColumnDynamic.h | 4 ++++ src/Columns/ColumnFixedString.cpp | 4 ++++ src/Columns/ColumnFixedString.h | 4 ++++ src/Columns/ColumnFunction.cpp | 4 ++++ src/Columns/ColumnFunction.h | 4 ++++ src/Columns/ColumnLowCardinality.cpp | 4 ++++ src/Columns/ColumnLowCardinality.h | 4 ++++ src/Columns/ColumnMap.cpp | 4 ++++ src/Columns/ColumnMap.h | 4 ++++ src/Columns/ColumnNullable.cpp | 4 ++++ src/Columns/ColumnNullable.h | 4 ++++ src/Columns/ColumnObject.cpp | 4 ++++ src/Columns/ColumnObject.h | 4 ++++ src/Columns/ColumnSparse.cpp | 4 ++++ src/Columns/ColumnSparse.h | 4 ++++ src/Columns/ColumnString.h | 4 ++++ src/Columns/ColumnTuple.cpp | 4 ++++ src/Columns/ColumnTuple.h | 4 ++++ src/Columns/ColumnVariant.cpp | 4 ++++ src/Columns/ColumnVariant.h | 4 ++++ src/Columns/ColumnVector.h | 4 ++++ src/Columns/IColumn.cpp | 4 ++++ src/Columns/IColumn.h | 8 +++++--- src/Columns/IColumnDummy.h | 4 ++++ 31 files changed, 125 insertions(+), 3 deletions(-) diff --git a/src/Columns/ColumnAggregateFunction.cpp b/src/Columns/ColumnAggregateFunction.cpp index 7124a03c605..cfd07c27765 100644 --- a/src/Columns/ColumnAggregateFunction.cpp +++ b/src/Columns/ColumnAggregateFunction.cpp @@ -466,7 +466,11 @@ void ColumnAggregateFunction::insertFromWithOwnership(const IColumn & from, size insertMergeFrom(from, n); } +#if !defined(ABORT_ON_LOGICAL_ERROR) +void ColumnAggregateFunction::insertFrom(const IColumn & from, size_t n) +#else void ColumnAggregateFunction::doInsertFrom(const IColumn & from, size_t n) +#endif { insertRangeFrom(from, n, 1); } diff --git a/src/Columns/ColumnAggregateFunction.h b/src/Columns/ColumnAggregateFunction.h index b33fd7dfc47..8d12d0fb851 100644 --- a/src/Columns/ColumnAggregateFunction.h +++ b/src/Columns/ColumnAggregateFunction.h @@ -145,7 +145,11 @@ public: void insertData(const char * pos, size_t length) override; +#if !defined(ABORT_ON_LOGICAL_ERROR) + void insertFrom(const IColumn & from, size_t n) override; +#else void doInsertFrom(const IColumn & from, size_t n) override; +#endif using IColumn::insertFrom; diff --git a/src/Columns/ColumnArray.cpp b/src/Columns/ColumnArray.cpp index bf89341d5a0..fba3ca420d7 100644 --- a/src/Columns/ColumnArray.cpp +++ b/src/Columns/ColumnArray.cpp @@ -337,7 +337,11 @@ bool ColumnArray::tryInsert(const Field & x) return true; } +#if !defined(ABORT_ON_LOGICAL_ERROR) +void ColumnArray::insertFrom(const IColumn & src_, size_t n) +#else void ColumnArray::doInsertFrom(const IColumn & src_, size_t n) +#endif { const ColumnArray & src = assert_cast(src_); size_t size = src.sizeAt(n); diff --git a/src/Columns/ColumnArray.h b/src/Columns/ColumnArray.h index bdadbd18a49..ad3db6a4822 100644 --- a/src/Columns/ColumnArray.h +++ b/src/Columns/ColumnArray.h @@ -91,7 +91,11 @@ public: #endif void insert(const Field & x) override; bool tryInsert(const Field & x) override; +#if !defined(ABORT_ON_LOGICAL_ERROR) + void insertFrom(const IColumn & src_, size_t n) override; +#else void doInsertFrom(const IColumn & src_, size_t n) override; +#endif void insertDefault() override; void popBack(size_t n) override; ColumnPtr filter(const Filter & filt, ssize_t result_size_hint) const override; diff --git a/src/Columns/ColumnConst.h b/src/Columns/ColumnConst.h index 72c3d1f0f0e..28892f3f09a 100644 --- a/src/Columns/ColumnConst.h +++ b/src/Columns/ColumnConst.h @@ -151,7 +151,11 @@ public: ++s; } +#if !defined(ABORT_ON_LOGICAL_ERROR) + void insertFrom(const IColumn &, size_t) override +#else void doInsertFrom(const IColumn &, size_t) override +#endif { ++s; } diff --git a/src/Columns/ColumnDecimal.h b/src/Columns/ColumnDecimal.h index 27b5c765710..cea1554c2bd 100644 --- a/src/Columns/ColumnDecimal.h +++ b/src/Columns/ColumnDecimal.h @@ -55,7 +55,11 @@ public: void reserve(size_t n) override { data.reserve_exact(n); } void shrinkToFit() override { data.shrink_to_fit(); } +#if !defined(ABORT_ON_LOGICAL_ERROR) + void insertFrom(const IColumn & src, size_t n) override { data.push_back(static_cast(src).getData()[n]); } +#else void doInsertFrom(const IColumn & src, size_t n) override { data.push_back(static_cast(src).getData()[n]); } +#endif #if !defined(ABORT_ON_LOGICAL_ERROR) void insertManyFrom(const IColumn & src, size_t position, size_t length) override diff --git a/src/Columns/ColumnDynamic.cpp b/src/Columns/ColumnDynamic.cpp index 6cc3e27b7ff..4d2474219fb 100644 --- a/src/Columns/ColumnDynamic.cpp +++ b/src/Columns/ColumnDynamic.cpp @@ -213,7 +213,11 @@ bool ColumnDynamic::tryInsert(const DB::Field & x) } +#if !defined(ABORT_ON_LOGICAL_ERROR) +void ColumnDynamic::insertFrom(const DB::IColumn & src_, size_t n) +#else void ColumnDynamic::doInsertFrom(const DB::IColumn & src_, size_t n) +#endif { const auto & dynamic_src = assert_cast(src_); diff --git a/src/Columns/ColumnDynamic.h b/src/Columns/ColumnDynamic.h index e3fd944a6bc..b394f5e38ad 100644 --- a/src/Columns/ColumnDynamic.h +++ b/src/Columns/ColumnDynamic.h @@ -142,7 +142,11 @@ public: void insert(const Field & x) override; bool tryInsert(const Field & x) override; +#if !defined(ABORT_ON_LOGICAL_ERROR) + void insertFrom(const IColumn & src_, size_t n) override; +#else void doInsertFrom(const IColumn & src_, size_t n) override; +#endif #if !defined(ABORT_ON_LOGICAL_ERROR) void insertRangeFrom(const IColumn & src, size_t start, size_t length) override; #else diff --git a/src/Columns/ColumnFixedString.cpp b/src/Columns/ColumnFixedString.cpp index 21435d405b3..1c2de203a94 100644 --- a/src/Columns/ColumnFixedString.cpp +++ b/src/Columns/ColumnFixedString.cpp @@ -74,7 +74,11 @@ bool ColumnFixedString::tryInsert(const Field & x) return true; } +#if !defined(ABORT_ON_LOGICAL_ERROR) +void ColumnFixedString::insertFrom(const IColumn & src_, size_t index) +#else void ColumnFixedString::doInsertFrom(const IColumn & src_, size_t index) +#endif { const ColumnFixedString & src = assert_cast(src_); diff --git a/src/Columns/ColumnFixedString.h b/src/Columns/ColumnFixedString.h index faa63910c00..5dbb514e639 100644 --- a/src/Columns/ColumnFixedString.h +++ b/src/Columns/ColumnFixedString.h @@ -98,7 +98,11 @@ public: bool tryInsert(const Field & x) override; +#if !defined(ABORT_ON_LOGICAL_ERROR) + void insertFrom(const IColumn & src_, size_t index) override; +#else void doInsertFrom(const IColumn & src_, size_t index) override; +#endif #if !defined(ABORT_ON_LOGICAL_ERROR) void insertManyFrom(const IColumn & src, size_t position, size_t length) override; diff --git a/src/Columns/ColumnFunction.cpp b/src/Columns/ColumnFunction.cpp index 4f19cf87fc6..fa57f35a823 100644 --- a/src/Columns/ColumnFunction.cpp +++ b/src/Columns/ColumnFunction.cpp @@ -72,7 +72,11 @@ ColumnPtr ColumnFunction::cut(size_t start, size_t length) const return ColumnFunction::create(length, function, capture, is_short_circuit_argument, is_function_compiled); } +#if !defined(ABORT_ON_LOGICAL_ERROR) +void ColumnFunction::insertFrom(const IColumn & src, size_t n) +#else void ColumnFunction::doInsertFrom(const IColumn & src, size_t n) +#endif { const ColumnFunction & src_func = assert_cast(src); diff --git a/src/Columns/ColumnFunction.h b/src/Columns/ColumnFunction.h index b5b0c53feb5..0af6c525599 100644 --- a/src/Columns/ColumnFunction.h +++ b/src/Columns/ColumnFunction.h @@ -94,7 +94,11 @@ public: throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Cannot insert into {}", getName()); } +#if !defined(ABORT_ON_LOGICAL_ERROR) + void insertFrom(const IColumn & src, size_t n) override; +#else void doInsertFrom(const IColumn & src, size_t n) override; +#endif #if !defined(ABORT_ON_LOGICAL_ERROR) void insertRangeFrom(const IColumn &, size_t start, size_t length) override; #else diff --git a/src/Columns/ColumnLowCardinality.cpp b/src/Columns/ColumnLowCardinality.cpp index cf43640a669..3da7af7f168 100644 --- a/src/Columns/ColumnLowCardinality.cpp +++ b/src/Columns/ColumnLowCardinality.cpp @@ -159,7 +159,11 @@ void ColumnLowCardinality::insertDefault() idx.insertPosition(getDictionary().getDefaultValueIndex()); } +#if !defined(ABORT_ON_LOGICAL_ERROR) +void ColumnLowCardinality::insertFrom(const IColumn & src, size_t n) +#else void ColumnLowCardinality::doInsertFrom(const IColumn & src, size_t n) +#endif { const auto * low_cardinality_src = typeid_cast(&src); diff --git a/src/Columns/ColumnLowCardinality.h b/src/Columns/ColumnLowCardinality.h index ef0df669729..26c83a49107 100644 --- a/src/Columns/ColumnLowCardinality.h +++ b/src/Columns/ColumnLowCardinality.h @@ -78,7 +78,11 @@ public: bool tryInsert(const Field & x) override; void insertDefault() override; +#if !defined(ABORT_ON_LOGICAL_ERROR) + void insertFrom(const IColumn & src, size_t n) override; +#else void doInsertFrom(const IColumn & src, size_t n) override; +#endif void insertFromFullColumn(const IColumn & src, size_t n); #if !defined(ABORT_ON_LOGICAL_ERROR) diff --git a/src/Columns/ColumnMap.cpp b/src/Columns/ColumnMap.cpp index 53f8a81e40f..8c4a7f2f5d3 100644 --- a/src/Columns/ColumnMap.cpp +++ b/src/Columns/ColumnMap.cpp @@ -153,7 +153,11 @@ void ColumnMap::updateHashFast(SipHash & hash) const nested->updateHashFast(hash); } +#if !defined(ABORT_ON_LOGICAL_ERROR) +void ColumnMap::insertFrom(const IColumn & src, size_t n) +#else void ColumnMap::doInsertFrom(const IColumn & src, size_t n) +#endif { nested->insertFrom(assert_cast(src).getNestedColumn(), n); } diff --git a/src/Columns/ColumnMap.h b/src/Columns/ColumnMap.h index 05a32682e23..dae39d32fe1 100644 --- a/src/Columns/ColumnMap.h +++ b/src/Columns/ColumnMap.h @@ -66,7 +66,11 @@ public: void updateHashWithValue(size_t n, SipHash & hash) const override; void updateWeakHash32(WeakHash32 & hash) const override; void updateHashFast(SipHash & hash) const override; +#if !defined(ABORT_ON_LOGICAL_ERROR) + void insertFrom(const IColumn & src_, size_t n) override; +#else void doInsertFrom(const IColumn & src_, size_t n) override; +#endif #if !defined(ABORT_ON_LOGICAL_ERROR) void insertManyFrom(const IColumn & src, size_t position, size_t length) override; #else diff --git a/src/Columns/ColumnNullable.cpp b/src/Columns/ColumnNullable.cpp index c24278d02c5..adc7ce40a42 100644 --- a/src/Columns/ColumnNullable.cpp +++ b/src/Columns/ColumnNullable.cpp @@ -262,7 +262,11 @@ bool ColumnNullable::tryInsert(const Field & x) return true; } +#if !defined(ABORT_ON_LOGICAL_ERROR) +void ColumnNullable::insertFrom(const IColumn & src, size_t n) +#else void ColumnNullable::doInsertFrom(const IColumn & src, size_t n) +#endif { const ColumnNullable & src_concrete = assert_cast(src); getNestedColumn().insertFrom(src_concrete.getNestedColumn(), n); diff --git a/src/Columns/ColumnNullable.h b/src/Columns/ColumnNullable.h index 47d007c6dcf..a8d1352e44c 100644 --- a/src/Columns/ColumnNullable.h +++ b/src/Columns/ColumnNullable.h @@ -76,7 +76,11 @@ public: #endif void insert(const Field & x) override; bool tryInsert(const Field & x) override; +#if !defined(ABORT_ON_LOGICAL_ERROR) + void insertFrom(const IColumn & src, size_t n) override; +#else void doInsertFrom(const IColumn & src, size_t n) override; +#endif #if !defined(ABORT_ON_LOGICAL_ERROR) void insertManyFrom(const IColumn & src, size_t position, size_t length) override; #else diff --git a/src/Columns/ColumnObject.cpp b/src/Columns/ColumnObject.cpp index 69dbf780eeb..9c9dade3dd8 100644 --- a/src/Columns/ColumnObject.cpp +++ b/src/Columns/ColumnObject.cpp @@ -763,7 +763,11 @@ void ColumnObject::get(size_t n, Field & res) const } } +#if !defined(ABORT_ON_LOGICAL_ERROR) +void ColumnObject::insertFrom(const IColumn & src, size_t n) +#else void ColumnObject::doInsertFrom(const IColumn & src, size_t n) +#endif { insert(src[n]); } diff --git a/src/Columns/ColumnObject.h b/src/Columns/ColumnObject.h index eb8ac2874c2..ac227f1ed05 100644 --- a/src/Columns/ColumnObject.h +++ b/src/Columns/ColumnObject.h @@ -209,7 +209,11 @@ public: void insert(const Field & field) override; bool tryInsert(const Field & field) override; void insertDefault() override; +#if !defined(ABORT_ON_LOGICAL_ERROR) + void insertFrom(const IColumn & src, size_t n) override; +#else void doInsertFrom(const IColumn & src, size_t n) override; +#endif #if !defined(ABORT_ON_LOGICAL_ERROR) void insertRangeFrom(const IColumn & src, size_t start, size_t length) override; #else diff --git a/src/Columns/ColumnSparse.cpp b/src/Columns/ColumnSparse.cpp index 937d016c55c..0937bc92c26 100644 --- a/src/Columns/ColumnSparse.cpp +++ b/src/Columns/ColumnSparse.cpp @@ -252,7 +252,11 @@ bool ColumnSparse::tryInsert(const Field & x) return true; } +#if !defined(ABORT_ON_LOGICAL_ERROR) +void ColumnSparse::insertFrom(const IColumn & src, size_t n) +#else void ColumnSparse::doInsertFrom(const IColumn & src, size_t n) +#endif { if (const auto * src_sparse = typeid_cast(&src)) { diff --git a/src/Columns/ColumnSparse.h b/src/Columns/ColumnSparse.h index 73bcdb78984..8de7584bddc 100644 --- a/src/Columns/ColumnSparse.h +++ b/src/Columns/ColumnSparse.h @@ -88,7 +88,11 @@ public: #endif void insert(const Field & x) override; bool tryInsert(const Field & x) override; +#if !defined(ABORT_ON_LOGICAL_ERROR) + void insertFrom(const IColumn & src, size_t n) override; +#else void doInsertFrom(const IColumn & src, size_t n) override; +#endif void insertDefault() override; void insertManyDefaults(size_t length) override; diff --git a/src/Columns/ColumnString.h b/src/Columns/ColumnString.h index 6a1b4be0505..be88f4a49c0 100644 --- a/src/Columns/ColumnString.h +++ b/src/Columns/ColumnString.h @@ -142,7 +142,11 @@ public: return true; } +#if !defined(ABORT_ON_LOGICAL_ERROR) + void insertFrom(const IColumn & src_, size_t n) override +#else void doInsertFrom(const IColumn & src_, size_t n) override +#endif { const ColumnString & src = assert_cast(src_); const size_t size_to_append = src.offsets[n] - src.offsets[n - 1]; /// -1th index is Ok, see PaddedPODArray. diff --git a/src/Columns/ColumnTuple.cpp b/src/Columns/ColumnTuple.cpp index 382d2182b61..8cf2dec2452 100644 --- a/src/Columns/ColumnTuple.cpp +++ b/src/Columns/ColumnTuple.cpp @@ -205,7 +205,11 @@ bool ColumnTuple::tryInsert(const Field & x) return true; } +#if !defined(ABORT_ON_LOGICAL_ERROR) +void ColumnTuple::insertFrom(const IColumn & src_, size_t n) +#else void ColumnTuple::doInsertFrom(const IColumn & src_, size_t n) +#endif { const ColumnTuple & src = assert_cast(src_); diff --git a/src/Columns/ColumnTuple.h b/src/Columns/ColumnTuple.h index 7ca065a8439..ac4f713d341 100644 --- a/src/Columns/ColumnTuple.h +++ b/src/Columns/ColumnTuple.h @@ -65,7 +65,11 @@ public: void insertData(const char * pos, size_t length) override; void insert(const Field & x) override; bool tryInsert(const Field & x) override; +#if !defined(ABORT_ON_LOGICAL_ERROR) + void insertFrom(const IColumn & src_, size_t n) override; +#else void doInsertFrom(const IColumn & src_, size_t n) override; +#endif #if !defined(ABORT_ON_LOGICAL_ERROR) void insertManyFrom(const IColumn & src, size_t position, size_t length) override; #else diff --git a/src/Columns/ColumnVariant.cpp b/src/Columns/ColumnVariant.cpp index c571913614d..f8dcae258f3 100644 --- a/src/Columns/ColumnVariant.cpp +++ b/src/Columns/ColumnVariant.cpp @@ -595,7 +595,11 @@ void ColumnVariant::insertManyFromImpl(const DB::IColumn & src_, size_t position } } +#if !defined(ABORT_ON_LOGICAL_ERROR) +void ColumnVariant::insertFrom(const IColumn & src_, size_t n) +#else void ColumnVariant::doInsertFrom(const IColumn & src_, size_t n) +#endif { insertFromImpl(src_, n, nullptr); } diff --git a/src/Columns/ColumnVariant.h b/src/Columns/ColumnVariant.h index 6ec2529a48e..48719d4e9ca 100644 --- a/src/Columns/ColumnVariant.h +++ b/src/Columns/ColumnVariant.h @@ -180,7 +180,11 @@ public: void insert(const Field & x) override; bool tryInsert(const Field & x) override; +#if !defined(ABORT_ON_LOGICAL_ERROR) + void insertFrom(const IColumn & src_, size_t n) override; +#else void doInsertFrom(const IColumn & src_, size_t n) override; +#endif #if !defined(ABORT_ON_LOGICAL_ERROR) void insertRangeFrom(const IColumn & src_, size_t start, size_t length) override; #else diff --git a/src/Columns/ColumnVector.h b/src/Columns/ColumnVector.h index 19dacdad775..49ca42cc57b 100644 --- a/src/Columns/ColumnVector.h +++ b/src/Columns/ColumnVector.h @@ -64,7 +64,11 @@ public: return data.size(); } +#if !defined(ABORT_ON_LOGICAL_ERROR) + void insertFrom(const IColumn & src, size_t n) override +#else void doInsertFrom(const IColumn & src, size_t n) override +#endif { data.push_back(assert_cast(src).getData()[n]); } diff --git a/src/Columns/IColumn.cpp b/src/Columns/IColumn.cpp index 1b4bd8f6e62..552e52cf51c 100644 --- a/src/Columns/IColumn.cpp +++ b/src/Columns/IColumn.cpp @@ -46,7 +46,11 @@ String IColumn::dumpStructure() const return res.str(); } +#if !defined(ABORT_ON_LOGICAL_ERROR) +void IColumn::insertFrom(const IColumn & src, size_t n) +#else void IColumn::doInsertFrom(const IColumn & src, size_t n) +#endif { insert(src[n]); } diff --git a/src/Columns/IColumn.h b/src/Columns/IColumn.h index e5a82d5b772..8dbbf6bf9ea 100644 --- a/src/Columns/IColumn.h +++ b/src/Columns/IColumn.h @@ -179,11 +179,15 @@ public: /// Appends n-th element from other column with the same type. /// Is used in merge-sort and merges. It could be implemented in inherited classes more optimally than default implementation. +#if !defined(ABORT_ON_LOGICAL_ERROR) + virtual void insertFrom(const IColumn & src, size_t n); +#else void insertFrom(const IColumn & src, size_t n) { assertTypeEquality(src); doInsertFrom(src, n); } +#endif /// Appends range of elements from other column with the same type. /// Could be used to concatenate columns. @@ -659,13 +663,11 @@ protected: Sort full_sort, PartialSort partial_sort) const; +#if defined(ABORT_ON_LOGICAL_ERROR) virtual void doInsertFrom(const IColumn & src, size_t n); -#if defined(ABORT_ON_LOGICAL_ERROR) virtual void doInsertRangeFrom(const IColumn & src, size_t start, size_t length) = 0; -#endif -#if defined(ABORT_ON_LOGICAL_ERROR) virtual void doInsertManyFrom(const IColumn & src, size_t position, size_t length) { for (size_t i = 0; i < length; ++i) diff --git a/src/Columns/IColumnDummy.h b/src/Columns/IColumnDummy.h index 023cbbc463a..e52c143ace7 100644 --- a/src/Columns/IColumnDummy.h +++ b/src/Columns/IColumnDummy.h @@ -67,7 +67,11 @@ public: { } +#if !defined(ABORT_ON_LOGICAL_ERROR) + void insertFrom(const IColumn &, size_t) override +#else void doInsertFrom(const IColumn &, size_t) override +#endif { ++s; } From b3993532a9abc41360f3011c30b446894a2f424f Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Mon, 8 Jul 2024 17:17:39 +0100 Subject: [PATCH 147/299] compareAt --- src/Columns/ColumnAggregateFunction.h | 4 ++++ src/Columns/ColumnArray.cpp | 4 ++++ src/Columns/ColumnArray.h | 4 ++++ src/Columns/ColumnCompressed.h | 4 ++++ src/Columns/ColumnConst.h | 4 ++++ src/Columns/ColumnDecimal.cpp | 4 ++++ src/Columns/ColumnDecimal.h | 4 ++++ src/Columns/ColumnDynamic.cpp | 4 ++++ src/Columns/ColumnDynamic.h | 4 ++++ src/Columns/ColumnFixedString.h | 4 ++++ src/Columns/ColumnFunction.h | 4 ++++ src/Columns/ColumnLowCardinality.cpp | 4 ++++ src/Columns/ColumnLowCardinality.h | 4 ++++ src/Columns/ColumnMap.cpp | 4 ++++ src/Columns/ColumnMap.h | 4 ++++ src/Columns/ColumnNullable.cpp | 4 ++++ src/Columns/ColumnNullable.h | 4 ++++ src/Columns/ColumnObject.h | 4 ++++ src/Columns/ColumnSparse.cpp | 4 ++++ src/Columns/ColumnSparse.h | 4 ++++ src/Columns/ColumnString.h | 4 ++++ src/Columns/ColumnTuple.cpp | 4 ++++ src/Columns/ColumnTuple.h | 4 ++++ src/Columns/ColumnUnique.h | 8 ++++++++ src/Columns/ColumnVariant.cpp | 4 ++++ src/Columns/ColumnVariant.h | 4 ++++ src/Columns/ColumnVector.h | 4 ++++ src/Columns/IColumn.h | 6 +++++- src/Columns/IColumnDummy.h | 4 ++++ 29 files changed, 121 insertions(+), 1 deletion(-) diff --git a/src/Columns/ColumnAggregateFunction.h b/src/Columns/ColumnAggregateFunction.h index 8d12d0fb851..fada5fc0c68 100644 --- a/src/Columns/ColumnAggregateFunction.h +++ b/src/Columns/ColumnAggregateFunction.h @@ -211,7 +211,11 @@ public: MutableColumns scatter(ColumnIndex num_columns, const Selector & selector) const override; +#if !defined(ABORT_ON_LOGICAL_ERROR) + int compareAt(size_t, size_t, const IColumn &, int) const override +#else int doCompareAt(size_t, size_t, const IColumn &, int) const override +#endif { return 0; } diff --git a/src/Columns/ColumnArray.cpp b/src/Columns/ColumnArray.cpp index fba3ca420d7..5d7350f3a79 100644 --- a/src/Columns/ColumnArray.cpp +++ b/src/Columns/ColumnArray.cpp @@ -396,7 +396,11 @@ int ColumnArray::compareAtImpl(size_t n, size_t m, const IColumn & rhs_, int nan : 1); } +#if !defined(ABORT_ON_LOGICAL_ERROR) +int ColumnArray::compareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const +#else int ColumnArray::doCompareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const +#endif { return compareAtImpl(n, m, rhs_, nan_direction_hint); } diff --git a/src/Columns/ColumnArray.h b/src/Columns/ColumnArray.h index ad3db6a4822..6cd3e2f6c3b 100644 --- a/src/Columns/ColumnArray.h +++ b/src/Columns/ColumnArray.h @@ -103,7 +103,11 @@ public: ColumnPtr permute(const Permutation & perm, size_t limit) const override; ColumnPtr index(const IColumn & indexes, size_t limit) const override; template ColumnPtr indexImpl(const PaddedPODArray & indexes, size_t limit) const; +#if !defined(ABORT_ON_LOGICAL_ERROR) + int compareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const override; +#else int doCompareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const override; +#endif int compareAtWithCollation(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint, const Collator & collator) const override; void getPermutation(PermutationSortDirection direction, PermutationSortStability stability, size_t limit, int nan_direction_hint, Permutation & res) const override; diff --git a/src/Columns/ColumnCompressed.h b/src/Columns/ColumnCompressed.h index d0f4c2c5910..5e455709fec 100644 --- a/src/Columns/ColumnCompressed.h +++ b/src/Columns/ColumnCompressed.h @@ -104,7 +104,11 @@ public: void expand(const Filter &, bool) override { throwMustBeDecompressed(); } ColumnPtr permute(const Permutation &, size_t) const override { throwMustBeDecompressed(); } ColumnPtr index(const IColumn &, size_t) const override { throwMustBeDecompressed(); } +#if !defined(ABORT_ON_LOGICAL_ERROR) + int compareAt(size_t, size_t, const IColumn &, int) const override { throwMustBeDecompressed(); } +#else int doCompareAt(size_t, size_t, const IColumn &, int) const override { throwMustBeDecompressed(); } +#endif void compareColumn(const IColumn &, size_t, PaddedPODArray *, PaddedPODArray &, int, int) const override { throwMustBeDecompressed(); diff --git a/src/Columns/ColumnConst.h b/src/Columns/ColumnConst.h index 28892f3f09a..b55a1f42037 100644 --- a/src/Columns/ColumnConst.h +++ b/src/Columns/ColumnConst.h @@ -237,7 +237,11 @@ public: return data->allocatedBytes() + sizeof(s); } +#if !defined(ABORT_ON_LOGICAL_ERROR) + int compareAt(size_t, size_t, const IColumn & rhs, int nan_direction_hint) const override +#else int doCompareAt(size_t, size_t, const IColumn & rhs, int nan_direction_hint) const override +#endif { return data->compareAt(0, 0, *assert_cast(rhs).data, nan_direction_hint); } diff --git a/src/Columns/ColumnDecimal.cpp b/src/Columns/ColumnDecimal.cpp index b09d7b1ee90..cf413f790a7 100644 --- a/src/Columns/ColumnDecimal.cpp +++ b/src/Columns/ColumnDecimal.cpp @@ -32,7 +32,11 @@ namespace ErrorCodes } template +#if !defined(ABORT_ON_LOGICAL_ERROR) +int ColumnDecimal::compareAt(size_t n, size_t m, const IColumn & rhs_, int) const +#else int ColumnDecimal::doCompareAt(size_t n, size_t m, const IColumn & rhs_, int) const +#endif { auto & other = static_cast(rhs_); const T & a = data[n]; diff --git a/src/Columns/ColumnDecimal.h b/src/Columns/ColumnDecimal.h index cea1554c2bd..32efeb643a6 100644 --- a/src/Columns/ColumnDecimal.h +++ b/src/Columns/ColumnDecimal.h @@ -104,7 +104,11 @@ public: void updateHashWithValue(size_t n, SipHash & hash) const override; void updateWeakHash32(WeakHash32 & hash) const override; void updateHashFast(SipHash & hash) const override; +#if !defined(ABORT_ON_LOGICAL_ERROR) + int compareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const override; +#else int doCompareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const override; +#endif void getPermutation(IColumn::PermutationSortDirection direction, IColumn::PermutationSortStability stability, size_t limit, int nan_direction_hint, IColumn::Permutation & res) const override; void updatePermutation(IColumn::PermutationSortDirection direction, IColumn::PermutationSortStability stability, diff --git a/src/Columns/ColumnDynamic.cpp b/src/Columns/ColumnDynamic.cpp index 4d2474219fb..7bf378bb0ac 100644 --- a/src/Columns/ColumnDynamic.cpp +++ b/src/Columns/ColumnDynamic.cpp @@ -599,7 +599,11 @@ void ColumnDynamic::updateHashWithValue(size_t n, SipHash & hash) const variant_col.getVariantByGlobalDiscriminator(discr).updateHashWithValue(variant_col.offsetAt(n), hash); } +#if !defined(ABORT_ON_LOGICAL_ERROR) +int ColumnDynamic::compareAt(size_t n, size_t m, const DB::IColumn & rhs, int nan_direction_hint) const +#else int ColumnDynamic::doCompareAt(size_t n, size_t m, const DB::IColumn & rhs, int nan_direction_hint) const +#endif { const auto & left_variant = assert_cast(*variant_column); const auto & right_dynamic = assert_cast(rhs); diff --git a/src/Columns/ColumnDynamic.h b/src/Columns/ColumnDynamic.h index b394f5e38ad..eb8a2ad6ed6 100644 --- a/src/Columns/ColumnDynamic.h +++ b/src/Columns/ColumnDynamic.h @@ -225,7 +225,11 @@ public: return scattered_columns; } +#if !defined(ABORT_ON_LOGICAL_ERROR) + int compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override; +#else int doCompareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override; +#endif bool hasEqualValues() const override { diff --git a/src/Columns/ColumnFixedString.h b/src/Columns/ColumnFixedString.h index 5dbb514e639..6e88136fc50 100644 --- a/src/Columns/ColumnFixedString.h +++ b/src/Columns/ColumnFixedString.h @@ -137,7 +137,11 @@ public: void updateHashFast(SipHash & hash) const override; +#if !defined(ABORT_ON_LOGICAL_ERROR) + int compareAt(size_t p1, size_t p2, const IColumn & rhs_, int /*nan_direction_hint*/) const override +#else int doCompareAt(size_t p1, size_t p2, const IColumn & rhs_, int /*nan_direction_hint*/) const override +#endif { const ColumnFixedString & rhs = assert_cast(rhs_); chassert(this->n == rhs.n); diff --git a/src/Columns/ColumnFunction.h b/src/Columns/ColumnFunction.h index 0af6c525599..ba924c49a82 100644 --- a/src/Columns/ColumnFunction.h +++ b/src/Columns/ColumnFunction.h @@ -145,7 +145,11 @@ public: throw Exception(ErrorCodes::NOT_IMPLEMENTED, "popBack is not implemented for {}", getName()); } +#if !defined(ABORT_ON_LOGICAL_ERROR) + int compareAt(size_t, size_t, const IColumn &, int) const override +#else int doCompareAt(size_t, size_t, const IColumn &, int) const override +#endif { throw Exception(ErrorCodes::NOT_IMPLEMENTED, "compareAt is not implemented for {}", getName()); } diff --git a/src/Columns/ColumnLowCardinality.cpp b/src/Columns/ColumnLowCardinality.cpp index 3da7af7f168..eb694a10b0f 100644 --- a/src/Columns/ColumnLowCardinality.cpp +++ b/src/Columns/ColumnLowCardinality.cpp @@ -372,7 +372,11 @@ int ColumnLowCardinality::compareAtImpl(size_t n, size_t m, const IColumn & rhs, return getDictionary().compareAt(n_index, m_index, low_cardinality_column.getDictionary(), nan_direction_hint); } +#if !defined(ABORT_ON_LOGICAL_ERROR) +int ColumnLowCardinality::compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const +#else int ColumnLowCardinality::doCompareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const +#endif { return compareAtImpl(n, m, rhs, nan_direction_hint); } diff --git a/src/Columns/ColumnLowCardinality.h b/src/Columns/ColumnLowCardinality.h index 26c83a49107..e99be07cd8d 100644 --- a/src/Columns/ColumnLowCardinality.h +++ b/src/Columns/ColumnLowCardinality.h @@ -135,7 +135,11 @@ public: return ColumnLowCardinality::create(dictionary.getColumnUniquePtr(), getIndexes().index(indexes_, limit)); } +#if !defined(ABORT_ON_LOGICAL_ERROR) + int compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override; +#else int doCompareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override; +#endif int compareAtWithCollation(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint, const Collator &) const override; diff --git a/src/Columns/ColumnMap.cpp b/src/Columns/ColumnMap.cpp index 8c4a7f2f5d3..2dffddb2dc9 100644 --- a/src/Columns/ColumnMap.cpp +++ b/src/Columns/ColumnMap.cpp @@ -222,7 +222,11 @@ MutableColumns ColumnMap::scatter(ColumnIndex num_columns, const Selector & sele return res; } +#if !defined(ABORT_ON_LOGICAL_ERROR) +int ColumnMap::compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const +#else int ColumnMap::doCompareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const +#endif { const auto & rhs_map = assert_cast(rhs); return nested->compareAt(n, m, rhs_map.getNestedColumn(), nan_direction_hint); diff --git a/src/Columns/ColumnMap.h b/src/Columns/ColumnMap.h index dae39d32fe1..d15829f4147 100644 --- a/src/Columns/ColumnMap.h +++ b/src/Columns/ColumnMap.h @@ -87,7 +87,11 @@ public: ColumnPtr index(const IColumn & indexes, size_t limit) const override; ColumnPtr replicate(const Offsets & offsets) const override; MutableColumns scatter(ColumnIndex num_columns, const Selector & selector) const override; +#if !defined(ABORT_ON_LOGICAL_ERROR) + int compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override; +#else int doCompareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override; +#endif void getExtremes(Field & min, Field & max) const override; void getPermutation(IColumn::PermutationSortDirection direction, IColumn::PermutationSortStability stability, size_t limit, int nan_direction_hint, IColumn::Permutation & res) const override; diff --git a/src/Columns/ColumnNullable.cpp b/src/Columns/ColumnNullable.cpp index adc7ce40a42..f060e74b315 100644 --- a/src/Columns/ColumnNullable.cpp +++ b/src/Columns/ColumnNullable.cpp @@ -414,7 +414,11 @@ int ColumnNullable::compareAtImpl(size_t n, size_t m, const IColumn & rhs_, int return getNestedColumn().compareAt(n, m, nested_rhs, null_direction_hint); } +#if !defined(ABORT_ON_LOGICAL_ERROR) +int ColumnNullable::compareAt(size_t n, size_t m, const IColumn & rhs_, int null_direction_hint) const +#else int ColumnNullable::doCompareAt(size_t n, size_t m, const IColumn & rhs_, int null_direction_hint) const +#endif { return compareAtImpl(n, m, rhs_, null_direction_hint); } diff --git a/src/Columns/ColumnNullable.h b/src/Columns/ColumnNullable.h index a8d1352e44c..f89e67b6000 100644 --- a/src/Columns/ColumnNullable.h +++ b/src/Columns/ColumnNullable.h @@ -102,7 +102,11 @@ public: void expand(const Filter & mask, bool inverted) override; ColumnPtr permute(const Permutation & perm, size_t limit) const override; ColumnPtr index(const IColumn & indexes, size_t limit) const override; +#if !defined(ABORT_ON_LOGICAL_ERROR) + int compareAt(size_t n, size_t m, const IColumn & rhs_, int null_direction_hint) const override; +#else int doCompareAt(size_t n, size_t m, const IColumn & rhs_, int null_direction_hint) const override; +#endif #if USE_EMBEDDED_COMPILER diff --git a/src/Columns/ColumnObject.h b/src/Columns/ColumnObject.h index ac227f1ed05..9de2adb87de 100644 --- a/src/Columns/ColumnObject.h +++ b/src/Columns/ColumnObject.h @@ -236,7 +236,11 @@ public: /// Order of rows in ColumnObject is undefined. void getPermutation(PermutationSortDirection, PermutationSortStability, size_t, int, Permutation & res) const override; void updatePermutation(PermutationSortDirection, PermutationSortStability, size_t, int, Permutation &, EqualRanges &) const override {} +#if !defined(ABORT_ON_LOGICAL_ERROR) + int compareAt(size_t, size_t, const IColumn &, int) const override { return 0; } +#else int doCompareAt(size_t, size_t, const IColumn &, int) const override { return 0; } +#endif void getExtremes(Field & min, Field & max) const override; /// All other methods throw exception. diff --git a/src/Columns/ColumnSparse.cpp b/src/Columns/ColumnSparse.cpp index 0937bc92c26..809586d8810 100644 --- a/src/Columns/ColumnSparse.cpp +++ b/src/Columns/ColumnSparse.cpp @@ -454,7 +454,11 @@ ColumnPtr ColumnSparse::indexImpl(const PaddedPODArray & indexes, size_t l return ColumnSparse::create(std::move(res_values), std::move(res_offsets), limit); } +#if !defined(ABORT_ON_LOGICAL_ERROR) +int ColumnSparse::compareAt(size_t n, size_t m, const IColumn & rhs_, int null_direction_hint) const +#else int ColumnSparse::doCompareAt(size_t n, size_t m, const IColumn & rhs_, int null_direction_hint) const +#endif { if (const auto * rhs_sparse = typeid_cast(&rhs_)) return values->compareAt(getValueIndex(n), rhs_sparse->getValueIndex(m), rhs_sparse->getValuesColumn(), null_direction_hint); diff --git a/src/Columns/ColumnSparse.h b/src/Columns/ColumnSparse.h index 8de7584bddc..3e34d1de94a 100644 --- a/src/Columns/ColumnSparse.h +++ b/src/Columns/ColumnSparse.h @@ -106,7 +106,11 @@ public: template ColumnPtr indexImpl(const PaddedPODArray & indexes, size_t limit) const; +#if !defined(ABORT_ON_LOGICAL_ERROR) + int compareAt(size_t n, size_t m, const IColumn & rhs_, int null_direction_hint) const override; +#else int doCompareAt(size_t n, size_t m, const IColumn & rhs_, int null_direction_hint) const override; +#endif void compareColumn(const IColumn & rhs, size_t rhs_row_num, PaddedPODArray * row_indexes, PaddedPODArray & compare_results, int direction, int nan_direction_hint) const override; diff --git a/src/Columns/ColumnString.h b/src/Columns/ColumnString.h index be88f4a49c0..602ffac65e8 100644 --- a/src/Columns/ColumnString.h +++ b/src/Columns/ColumnString.h @@ -250,7 +250,11 @@ public: offsets.push_back(offsets.back() + 1); } +#if !defined(ABORT_ON_LOGICAL_ERROR) + int compareAt(size_t n, size_t m, const IColumn & rhs_, int /*nan_direction_hint*/) const override +#else int doCompareAt(size_t n, size_t m, const IColumn & rhs_, int /*nan_direction_hint*/) const override +#endif { const ColumnString & rhs = assert_cast(rhs_); return memcmpSmallAllowOverflow15(chars.data() + offsetAt(n), sizeAt(n) - 1, rhs.chars.data() + rhs.offsetAt(m), rhs.sizeAt(m) - 1); diff --git a/src/Columns/ColumnTuple.cpp b/src/Columns/ColumnTuple.cpp index 8cf2dec2452..b471725f516 100644 --- a/src/Columns/ColumnTuple.cpp +++ b/src/Columns/ColumnTuple.cpp @@ -482,7 +482,11 @@ int ColumnTuple::compareAtImpl(size_t n, size_t m, const IColumn & rhs, int nan_ return 0; } +#if !defined(ABORT_ON_LOGICAL_ERROR) +int ColumnTuple::compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const +#else int ColumnTuple::doCompareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const +#endif { return compareAtImpl(n, m, rhs, nan_direction_hint); } diff --git a/src/Columns/ColumnTuple.h b/src/Columns/ColumnTuple.h index ac4f713d341..11c09f827f5 100644 --- a/src/Columns/ColumnTuple.h +++ b/src/Columns/ColumnTuple.h @@ -95,7 +95,11 @@ public: ColumnPtr index(const IColumn & indexes, size_t limit) const override; ColumnPtr replicate(const Offsets & offsets) const override; MutableColumns scatter(ColumnIndex num_columns, const Selector & selector) const override; +#if !defined(ABORT_ON_LOGICAL_ERROR) + int compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override; +#else int doCompareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override; +#endif int compareAtWithCollation(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint, const Collator & collator) const override; void getExtremes(Field & min, Field & max) const override; void getPermutation(IColumn::PermutationSortDirection direction, IColumn::PermutationSortStability stability, diff --git a/src/Columns/ColumnUnique.h b/src/Columns/ColumnUnique.h index 12f765f42af..ec1f8e0a4d5 100644 --- a/src/Columns/ColumnUnique.h +++ b/src/Columns/ColumnUnique.h @@ -90,7 +90,11 @@ public: return getNestedColumn()->updateHashWithValue(n, hash_func); } +#if !defined(ABORT_ON_LOGICAL_ERROR) + int compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override; +#else int doCompareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override; +#endif void getExtremes(Field & min, Field & max) const override { column_holder->getExtremes(min, max); } bool valuesHaveFixedSize() const override { return column_holder->valuesHaveFixedSize(); } @@ -488,7 +492,11 @@ const char * ColumnUnique::skipSerializedInArena(const char *) const } template +#if !defined(ABORT_ON_LOGICAL_ERROR) +int ColumnUnique::compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const +#else int ColumnUnique::doCompareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const +#endif { if (is_nullable) { diff --git a/src/Columns/ColumnVariant.cpp b/src/Columns/ColumnVariant.cpp index f8dcae258f3..ee5de4c2dde 100644 --- a/src/Columns/ColumnVariant.cpp +++ b/src/Columns/ColumnVariant.cpp @@ -1186,7 +1186,11 @@ bool ColumnVariant::hasEqualValues() const return local_discriminators->hasEqualValues() && variants[localDiscriminatorAt(0)]->hasEqualValues(); } +#if !defined(ABORT_ON_LOGICAL_ERROR) +int ColumnVariant::compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const +#else int ColumnVariant::doCompareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const +#endif { const auto & rhs_variant = assert_cast(rhs); Discriminator left_discr = globalDiscriminatorAt(n); diff --git a/src/Columns/ColumnVariant.h b/src/Columns/ColumnVariant.h index 48719d4e9ca..b492d4b49c7 100644 --- a/src/Columns/ColumnVariant.h +++ b/src/Columns/ColumnVariant.h @@ -229,7 +229,11 @@ public: ColumnPtr indexImpl(const PaddedPODArray & indexes, size_t limit) const; ColumnPtr replicate(const Offsets & replicate_offsets) const override; MutableColumns scatter(ColumnIndex num_columns, const Selector & selector) const override; +#if !defined(ABORT_ON_LOGICAL_ERROR) + int compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override; +#else int doCompareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override; +#endif bool hasEqualValues() const override; void getExtremes(Field & min, Field & max) const override; void getPermutation(IColumn::PermutationSortDirection direction, IColumn::PermutationSortStability stability, diff --git a/src/Columns/ColumnVector.h b/src/Columns/ColumnVector.h index 49ca42cc57b..3a0acf5e312 100644 --- a/src/Columns/ColumnVector.h +++ b/src/Columns/ColumnVector.h @@ -150,7 +150,11 @@ public: } /// This method implemented in header because it could be possibly devirtualized. +#if !defined(ABORT_ON_LOGICAL_ERROR) + int compareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const override +#else int doCompareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const override +#endif { return CompareHelper::compare(data[n], assert_cast(rhs_).data[m], nan_direction_hint); } diff --git a/src/Columns/IColumn.h b/src/Columns/IColumn.h index 8dbbf6bf9ea..4b6f34e5aa2 100644 --- a/src/Columns/IColumn.h +++ b/src/Columns/IColumn.h @@ -345,11 +345,15 @@ public: * * For non Nullable and non floating point types, nan_direction_hint is ignored. */ +#if !defined(ABORT_ON_LOGICAL_ERROR) + [[nodiscard]] virtual int compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const = 0; +#else [[nodiscard]] int compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const { assertTypeEquality(rhs); return doCompareAt(n, m, rhs, nan_direction_hint); } +#endif #if USE_EMBEDDED_COMPILER @@ -673,7 +677,6 @@ protected: for (size_t i = 0; i < length; ++i) insertFrom(src, position); } -#endif virtual int doCompareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const = 0; @@ -685,6 +688,7 @@ private: /// For the rest of column types we can compare the types directly. chassert((isConst() || isSparse()) ? getDataType() == rhs.getDataType() : typeid(*this) == typeid(rhs)); } +#endif }; using ColumnPtr = IColumn::Ptr; diff --git a/src/Columns/IColumnDummy.h b/src/Columns/IColumnDummy.h index e52c143ace7..c19fb704d9b 100644 --- a/src/Columns/IColumnDummy.h +++ b/src/Columns/IColumnDummy.h @@ -26,7 +26,11 @@ public: size_t byteSize() const override { return 0; } size_t byteSizeAt(size_t) const override { return 0; } size_t allocatedBytes() const override { return 0; } +#if !defined(ABORT_ON_LOGICAL_ERROR) + int compareAt(size_t, size_t, const IColumn &, int) const override { return 0; } +#else int doCompareAt(size_t, size_t, const IColumn &, int) const override { return 0; } +#endif void compareColumn(const IColumn &, size_t, PaddedPODArray *, PaddedPODArray &, int, int) const override { } From bd42a096b7e023195316d798188adfe0a98555f5 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Mon, 8 Jul 2024 16:52:36 +0000 Subject: [PATCH 148/299] Bump rocksdb to v6.29.5 --- contrib/rocksdb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/rocksdb b/contrib/rocksdb index b8a996ce196..be366233921 160000 --- a/contrib/rocksdb +++ b/contrib/rocksdb @@ -1 +1 @@ -Subproject commit b8a996ce1969a3f7141aca7fb5c54196a58a654a +Subproject commit be366233921293bd07a84dc4ea6991858665f202 From bb4115b4e6ee1291115a509f555f1d0c591769e9 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Mon, 8 Jul 2024 18:34:31 +0200 Subject: [PATCH 149/299] Reduce inaccuracy of input_wait_elapsed_us/input_wait_elapsed_us/elapsed_us By collecting them with nanoseconds precision, and only store them in system.processors_profile_log in microseconds. This should fix 02210_processors_profile_log test failures like this one [1]: ExpressionTransform 999989 1 1 1 1 When the total elapsed_us is less then 1 second. [1]: https://s3.amazonaws.com/clickhouse-test-reports/65920/ce417c78be566d8a616df3544e2801b845277f44/stateless_tests__release__old_analyzer__s3__databasereplicated__[1_4].html Signed-off-by: Azat Khuzhin --- src/Interpreters/executeQuery.cpp | 6 +++--- src/Processors/Executors/ExecutingGraph.cpp | 4 ++-- .../Executors/ExecutionThreadContext.cpp | 6 +++--- src/Processors/IProcessor.h | 18 +++++++++--------- 4 files changed, 17 insertions(+), 17 deletions(-) diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp index 9f33cbf1c27..336e226a691 100644 --- a/src/Interpreters/executeQuery.cpp +++ b/src/Interpreters/executeQuery.cpp @@ -476,9 +476,9 @@ void logQueryFinish( processor_elem.processor_name = processor->getName(); /// NOTE: convert this to UInt64 - processor_elem.elapsed_us = static_cast(processor->getElapsedUs()); - processor_elem.input_wait_elapsed_us = static_cast(processor->getInputWaitElapsedUs()); - processor_elem.output_wait_elapsed_us = static_cast(processor->getOutputWaitElapsedUs()); + processor_elem.elapsed_us = static_cast(processor->getElapsedNs() / 1000U); + processor_elem.input_wait_elapsed_us = static_cast(processor->getInputWaitElapsedNs() / 1000U); + processor_elem.output_wait_elapsed_us = static_cast(processor->getOutputWaitElapsedNs() / 1000U); auto stats = processor->getProcessorDataStats(); processor_elem.input_rows = stats.input_rows; diff --git a/src/Processors/Executors/ExecutingGraph.cpp b/src/Processors/Executors/ExecutingGraph.cpp index 27f6a454b24..6d5b60d8159 100644 --- a/src/Processors/Executors/ExecutingGraph.cpp +++ b/src/Processors/Executors/ExecutingGraph.cpp @@ -292,7 +292,7 @@ bool ExecutingGraph::updateNode(uint64_t pid, Queue & queue, Queue & async_queue } else if (last_status == IProcessor::Status::NeedData && status != IProcessor::Status::NeedData) { - processor.input_wait_elapsed_us += processor.input_wait_watch.elapsedMicroseconds(); + processor.input_wait_elapsed_ns += processor.input_wait_watch.elapsedNanoseconds(); } /// PortFull @@ -302,7 +302,7 @@ bool ExecutingGraph::updateNode(uint64_t pid, Queue & queue, Queue & async_queue } else if (last_status == IProcessor::Status::PortFull && status != IProcessor::Status::PortFull) { - processor.output_wait_elapsed_us += processor.output_wait_watch.elapsedMicroseconds(); + processor.output_wait_elapsed_ns += processor.output_wait_watch.elapsedNanoseconds(); } } } diff --git a/src/Processors/Executors/ExecutionThreadContext.cpp b/src/Processors/Executors/ExecutionThreadContext.cpp index 05669725f9a..17b6773ad83 100644 --- a/src/Processors/Executors/ExecutionThreadContext.cpp +++ b/src/Processors/Executors/ExecutionThreadContext.cpp @@ -103,10 +103,10 @@ bool ExecutionThreadContext::executeTask() if (profile_processors) { - UInt64 elapsed_microseconds = execution_time_watch->elapsedMicroseconds(); - node->processor->elapsed_us += elapsed_microseconds; + UInt64 elapsed_ns = execution_time_watch->elapsedNanoseconds(); + node->processor->elapsed_ns += elapsed_ns; if (trace_processors) - span->addAttribute("execution_time_ms", elapsed_microseconds); + span->addAttribute("execution_time_ms", elapsed_ns / 1000U); } #ifndef NDEBUG execution_time_ns += execution_time_watch->elapsed(); diff --git a/src/Processors/IProcessor.h b/src/Processors/IProcessor.h index 6f779e7a8d4..02f7b6b3d12 100644 --- a/src/Processors/IProcessor.h +++ b/src/Processors/IProcessor.h @@ -303,9 +303,9 @@ public: IQueryPlanStep * getQueryPlanStep() const { return query_plan_step; } size_t getQueryPlanStepGroup() const { return query_plan_step_group; } - uint64_t getElapsedUs() const { return elapsed_us; } - uint64_t getInputWaitElapsedUs() const { return input_wait_elapsed_us; } - uint64_t getOutputWaitElapsedUs() const { return output_wait_elapsed_us; } + uint64_t getElapsedNs() const { return elapsed_ns; } + uint64_t getInputWaitElapsedNs() const { return input_wait_elapsed_ns; } + uint64_t getOutputWaitElapsedNs() const { return output_wait_elapsed_ns; } struct ProcessorDataStats { @@ -369,21 +369,21 @@ protected: private: /// For: - /// - elapsed_us + /// - elapsed_ns friend class ExecutionThreadContext; /// For - /// - input_wait_elapsed_us - /// - output_wait_elapsed_us + /// - input_wait_elapsed_ns + /// - output_wait_elapsed_ns friend class ExecutingGraph; std::string processor_description; /// For processors_profile_log - uint64_t elapsed_us = 0; + uint64_t elapsed_ns = 0; Stopwatch input_wait_watch; - uint64_t input_wait_elapsed_us = 0; + uint64_t input_wait_elapsed_ns = 0; Stopwatch output_wait_watch; - uint64_t output_wait_elapsed_us = 0; + uint64_t output_wait_elapsed_ns = 0; size_t stream_number = NO_STREAM; From 8550e64352dc98bf6e8002fbbadbed1b1e014027 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Mon, 8 Jul 2024 19:03:35 +0200 Subject: [PATCH 150/299] Fix type of input_wait_elapsed_us/input_wait_elapsed_us/elapsed_us (was UInt32) Even though type in the system.processors_profile_log is UInt64, intermediate type was UInt32 (sigh). Signed-off-by: Azat Khuzhin --- src/Interpreters/ProcessorsProfileLog.h | 6 +++--- src/Interpreters/executeQuery.cpp | 7 +++---- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/src/Interpreters/ProcessorsProfileLog.h b/src/Interpreters/ProcessorsProfileLog.h index 8319d373f39..abece2604f2 100644 --- a/src/Interpreters/ProcessorsProfileLog.h +++ b/src/Interpreters/ProcessorsProfileLog.h @@ -25,11 +25,11 @@ struct ProcessorProfileLogElement String processor_name; /// Milliseconds spend in IProcessor::work() - UInt32 elapsed_us{}; + UInt64 elapsed_us{}; /// IProcessor::NeedData - UInt32 input_wait_elapsed_us{}; + UInt64 input_wait_elapsed_us{}; /// IProcessor::PortFull - UInt32 output_wait_elapsed_us{}; + UInt64 output_wait_elapsed_us{}; size_t input_rows{}; size_t input_bytes{}; diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp index 336e226a691..d9d3ba58160 100644 --- a/src/Interpreters/executeQuery.cpp +++ b/src/Interpreters/executeQuery.cpp @@ -475,10 +475,9 @@ void logQueryFinish( processor_elem.processor_name = processor->getName(); - /// NOTE: convert this to UInt64 - processor_elem.elapsed_us = static_cast(processor->getElapsedNs() / 1000U); - processor_elem.input_wait_elapsed_us = static_cast(processor->getInputWaitElapsedNs() / 1000U); - processor_elem.output_wait_elapsed_us = static_cast(processor->getOutputWaitElapsedNs() / 1000U); + processor_elem.elapsed_us = static_cast(processor->getElapsedNs() / 1000U); + processor_elem.input_wait_elapsed_us = static_cast(processor->getInputWaitElapsedNs() / 1000U); + processor_elem.output_wait_elapsed_us = static_cast(processor->getOutputWaitElapsedNs() / 1000U); auto stats = processor->getProcessorDataStats(); processor_elem.input_rows = stats.input_rows; From 1e48831d76d90c13cb60fc4e96fdd98a7c42cb2a Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Mon, 8 Jul 2024 18:21:06 +0100 Subject: [PATCH 151/299] better --- src/Columns/ColumnAggregateFunction.h | 3 ++- src/Columns/ColumnDynamic.h | 11 +++-------- src/Columns/ColumnMap.h | 12 ++++-------- src/Columns/ColumnNullable.h | 6 ++---- src/Columns/ColumnObject.h | 7 +++---- src/Columns/ColumnTuple.h | 7 +++---- src/Columns/ColumnVariant.h | 16 +++++----------- 7 files changed, 22 insertions(+), 40 deletions(-) diff --git a/src/Columns/ColumnAggregateFunction.h b/src/Columns/ColumnAggregateFunction.h index fada5fc0c68..1be7a862438 100644 --- a/src/Columns/ColumnAggregateFunction.h +++ b/src/Columns/ColumnAggregateFunction.h @@ -148,10 +148,11 @@ public: #if !defined(ABORT_ON_LOGICAL_ERROR) void insertFrom(const IColumn & from, size_t n) override; #else + using IColumn::insertFrom; + void doInsertFrom(const IColumn & from, size_t n) override; #endif - using IColumn::insertFrom; void insertFrom(ConstAggregateDataPtr place); diff --git a/src/Columns/ColumnDynamic.h b/src/Columns/ColumnDynamic.h index eb8a2ad6ed6..9abddc7a26d 100644 --- a/src/Columns/ColumnDynamic.h +++ b/src/Columns/ColumnDynamic.h @@ -142,19 +142,14 @@ public: void insert(const Field & x) override; bool tryInsert(const Field & x) override; + #if !defined(ABORT_ON_LOGICAL_ERROR) void insertFrom(const IColumn & src_, size_t n) override; -#else - void doInsertFrom(const IColumn & src_, size_t n) override; -#endif -#if !defined(ABORT_ON_LOGICAL_ERROR) void insertRangeFrom(const IColumn & src, size_t start, size_t length) override; -#else - void doInsertRangeFrom(const IColumn & src, size_t start, size_t length) override; -#endif -#if !defined(ABORT_ON_LOGICAL_ERROR) void insertManyFrom(const IColumn & src, size_t position, size_t length) override; #else + void doInsertFrom(const IColumn & src_, size_t n) override; + void doInsertRangeFrom(const IColumn & src, size_t start, size_t length) override; void doInsertManyFrom(const IColumn & src, size_t position, size_t length) override; #endif diff --git a/src/Columns/ColumnMap.h b/src/Columns/ColumnMap.h index d15829f4147..a54071a2974 100644 --- a/src/Columns/ColumnMap.h +++ b/src/Columns/ColumnMap.h @@ -66,21 +66,17 @@ public: void updateHashWithValue(size_t n, SipHash & hash) const override; void updateWeakHash32(WeakHash32 & hash) const override; void updateHashFast(SipHash & hash) const override; + #if !defined(ABORT_ON_LOGICAL_ERROR) void insertFrom(const IColumn & src_, size_t n) override; -#else - void doInsertFrom(const IColumn & src_, size_t n) override; -#endif -#if !defined(ABORT_ON_LOGICAL_ERROR) void insertManyFrom(const IColumn & src, size_t position, size_t length) override; -#else - void doInsertManyFrom(const IColumn & src, size_t position, size_t length) override; -#endif -#if !defined(ABORT_ON_LOGICAL_ERROR) void insertRangeFrom(const IColumn & src, size_t start, size_t length) override; #else + void doInsertFrom(const IColumn & src_, size_t n) override; + void doInsertManyFrom(const IColumn & src, size_t position, size_t length) override; void doInsertRangeFrom(const IColumn & src, size_t start, size_t length) override; #endif + ColumnPtr filter(const Filter & filt, ssize_t result_size_hint) const override; void expand(const Filter & mask, bool inverted) override; ColumnPtr permute(const Permutation & perm, size_t limit) const override; diff --git a/src/Columns/ColumnNullable.h b/src/Columns/ColumnNullable.h index f89e67b6000..a6d0483e527 100644 --- a/src/Columns/ColumnNullable.h +++ b/src/Columns/ColumnNullable.h @@ -76,14 +76,12 @@ public: #endif void insert(const Field & x) override; bool tryInsert(const Field & x) override; + #if !defined(ABORT_ON_LOGICAL_ERROR) void insertFrom(const IColumn & src, size_t n) override; -#else - void doInsertFrom(const IColumn & src, size_t n) override; -#endif -#if !defined(ABORT_ON_LOGICAL_ERROR) void insertManyFrom(const IColumn & src, size_t position, size_t length) override; #else + void doInsertFrom(const IColumn & src, size_t n) override; void doInsertManyFrom(const IColumn & src, size_t position, size_t length) override; #endif diff --git a/src/Columns/ColumnObject.h b/src/Columns/ColumnObject.h index 9de2adb87de..7470dfa6302 100644 --- a/src/Columns/ColumnObject.h +++ b/src/Columns/ColumnObject.h @@ -209,16 +209,15 @@ public: void insert(const Field & field) override; bool tryInsert(const Field & field) override; void insertDefault() override; + #if !defined(ABORT_ON_LOGICAL_ERROR) void insertFrom(const IColumn & src, size_t n) override; -#else - void doInsertFrom(const IColumn & src, size_t n) override; -#endif -#if !defined(ABORT_ON_LOGICAL_ERROR) void insertRangeFrom(const IColumn & src, size_t start, size_t length) override; #else + void doInsertFrom(const IColumn & src, size_t n) override; void doInsertRangeFrom(const IColumn & src, size_t start, size_t length) override; #endif + void popBack(size_t length) override; Field operator[](size_t n) const override; void get(size_t n, Field & res) const override; diff --git a/src/Columns/ColumnTuple.h b/src/Columns/ColumnTuple.h index 11c09f827f5..38e479791d4 100644 --- a/src/Columns/ColumnTuple.h +++ b/src/Columns/ColumnTuple.h @@ -65,16 +65,15 @@ public: void insertData(const char * pos, size_t length) override; void insert(const Field & x) override; bool tryInsert(const Field & x) override; + #if !defined(ABORT_ON_LOGICAL_ERROR) void insertFrom(const IColumn & src_, size_t n) override; -#else - void doInsertFrom(const IColumn & src_, size_t n) override; -#endif -#if !defined(ABORT_ON_LOGICAL_ERROR) void insertManyFrom(const IColumn & src, size_t position, size_t length) override; #else + void doInsertFrom(const IColumn & src_, size_t n) override; void doInsertManyFrom(const IColumn & src, size_t position, size_t length) override; #endif + void insertDefault() override; void popBack(size_t n) override; StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const override; diff --git a/src/Columns/ColumnVariant.h b/src/Columns/ColumnVariant.h index b492d4b49c7..d91b8e93a7d 100644 --- a/src/Columns/ColumnVariant.h +++ b/src/Columns/ColumnVariant.h @@ -182,24 +182,18 @@ public: #if !defined(ABORT_ON_LOGICAL_ERROR) void insertFrom(const IColumn & src_, size_t n) override; -#else - void doInsertFrom(const IColumn & src_, size_t n) override; -#endif -#if !defined(ABORT_ON_LOGICAL_ERROR) void insertRangeFrom(const IColumn & src_, size_t start, size_t length) override; -#else - void doInsertRangeFrom(const IColumn & src_, size_t start, size_t length) override; -#endif -#if !defined(ABORT_ON_LOGICAL_ERROR) void insertManyFrom(const IColumn & src_, size_t position, size_t length) override; #else - void doInsertManyFrom(const IColumn & src_, size_t position, size_t length) override; -#endif - using IColumn::insertFrom; using IColumn::insertManyFrom; using IColumn::insertRangeFrom; + void doInsertFrom(const IColumn & src_, size_t n) override; + void doInsertRangeFrom(const IColumn & src_, size_t start, size_t length) override; + void doInsertManyFrom(const IColumn & src_, size_t position, size_t length) override; +#endif + /// Methods for insertion from another Variant but with known mapping between global discriminators. void insertFrom(const IColumn & src_, size_t n, const std::vector & global_discriminators_mapping); void insertRangeFrom(const IColumn & src_, size_t start, size_t length, const std::vector & global_discriminators_mapping); From 8a352fdd30190ae342140441d2012754bbdd58c2 Mon Sep 17 00:00:00 2001 From: gun9nir Date: Mon, 8 Jul 2024 10:28:24 -0700 Subject: [PATCH 152/299] add no-parallel tag on the test --- .../queries/0_stateless/03198_table_function_directory_path.sql | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/queries/0_stateless/03198_table_function_directory_path.sql b/tests/queries/0_stateless/03198_table_function_directory_path.sql index 671074ab45a..90b81b3fe5e 100644 --- a/tests/queries/0_stateless/03198_table_function_directory_path.sql +++ b/tests/queries/0_stateless/03198_table_function_directory_path.sql @@ -1,3 +1,5 @@ +-- Tags: no-parallel + INSERT INTO FUNCTION file('data_03198_table_function_directory_path/1.csv', 'csv') SELECT '1.csv'; INSERT INTO FUNCTION file('data_03198_table_function_directory_path/2.csv', 'csv') SELECT '2.csv'; INSERT INTO FUNCTION file('data_03198_table_function_directory_path/dir/3.csv', 'csv') SELECT '3.csv'; From fb4fb2ca5e9a69570b99a0dfaf89dd68aaf9022c Mon Sep 17 00:00:00 2001 From: gun9nir Date: Mon, 8 Jul 2024 10:39:35 -0700 Subject: [PATCH 153/299] add docs --- docs/en/sql-reference/table-functions/file.md | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/docs/en/sql-reference/table-functions/file.md b/docs/en/sql-reference/table-functions/file.md index 3a3162dad9a..44b1b50620a 100644 --- a/docs/en/sql-reference/table-functions/file.md +++ b/docs/en/sql-reference/table-functions/file.md @@ -130,7 +130,9 @@ SELECT * FROM file('user_files/archives/archive{1..2}.zip :: table.csv'); ## Globs in path -Paths may use globbing. Files must match the whole path pattern, not only the suffix or prefix. +Paths may use globbing. Files must match the whole path pattern, not only the suffix or prefix. There is one exception that if the path refers to an existing +directory and does not use globs, a `*` will be implicitly added to the path so +all the files in the directory are selected. - `*` — Represents arbitrarily many characters except `/` but including the empty string. - `?` — Represents an arbitrary single character. @@ -163,6 +165,12 @@ An alternative path expression which achieves the same: SELECT count(*) FROM file('{some,another}_dir/*', 'TSV', 'name String, value UInt32'); ``` +Query the total number of rows in `some_dir` using the implicit `*`: + +```sql +SELECT count(*) FROM file('some_dir', 'TSV', 'name String, value UInt32'); +``` + :::note If your listing of files contains number ranges with leading zeros, use the construction with braces for each digit separately or use `?`. ::: From 5b9c74b4b88ac4a9592edba8e642fda2e91a904d Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Mon, 8 Jul 2024 21:13:30 +0200 Subject: [PATCH 154/299] fix tests, lesser logs --- src/Interpreters/DatabaseCatalog.cpp | 41 +++++++++++++++++++++------- 1 file changed, 31 insertions(+), 10 deletions(-) diff --git a/src/Interpreters/DatabaseCatalog.cpp b/src/Interpreters/DatabaseCatalog.cpp index afe89e7b9a7..2abda981a8c 100644 --- a/src/Interpreters/DatabaseCatalog.cpp +++ b/src/Interpreters/DatabaseCatalog.cpp @@ -1272,14 +1272,23 @@ void DatabaseCatalog::rescheduleDropTableTask() if (first_async_drop_in_queue != tables_marked_dropped.begin()) { + LOG_TRACE( + log, + "Have {} tables in queue to drop. Some of them are being dropped in sync mode. Schedule background task ASAP", + tables_marked_dropped.size()); (*drop_task)->scheduleAfter(0); return; } time_t current_time = std::chrono::system_clock::to_time_t(std::chrono::system_clock::now()); auto min_drop_time = getMinDropTime(); - time_t schedule_after = min_drop_time < current_time ? (min_drop_time - current_time) * 1000 : 0; - (*drop_task)->scheduleAfter(schedule_after); + time_t schedule_after_ms = min_drop_time > current_time ? (min_drop_time - current_time) * 1000 : 0; + + LOG_TRACE( + log, + "Have {} tables in queue to drop. Schedule background task in {} seconds", + tables_marked_dropped.size(), schedule_after_ms / 1000); + (*drop_task)->scheduleAfter(schedule_after_ms); } void DatabaseCatalog::dropTablesParallel(std::vector tables_to_drop) @@ -1297,7 +1306,7 @@ void DatabaseCatalog::dropTablesParallel(std::vector Date: Mon, 8 Jul 2024 10:14:09 +0000 Subject: [PATCH 155/299] slightly better --- src/Processors/QueryPlan/BufferChunksTransform.cpp | 11 ++++++----- src/Processors/QueryPlan/BufferChunksTransform.h | 6 ++++++ 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/src/Processors/QueryPlan/BufferChunksTransform.cpp b/src/Processors/QueryPlan/BufferChunksTransform.cpp index 283a0acf172..3601a68d36e 100644 --- a/src/Processors/QueryPlan/BufferChunksTransform.cpp +++ b/src/Processors/QueryPlan/BufferChunksTransform.cpp @@ -26,6 +26,12 @@ IProcessor::Status BufferChunksTransform::prepare() return Status::Finished; } + if (input.isFinished() && chunks.empty()) + { + output.finish(); + return Status::Finished; + } + if (output.canPush()) { input.setNeeded(); @@ -45,11 +51,6 @@ IProcessor::Status BufferChunksTransform::prepare() auto chunk = pullChunk(); output.push(std::move(chunk)); } - else if (input.isFinished()) - { - output.finish(); - return Status::Finished; - } } if (input.hasData() && (num_buffered_rows < max_rows_to_buffer || num_buffered_bytes < max_bytes_to_buffer)) diff --git a/src/Processors/QueryPlan/BufferChunksTransform.h b/src/Processors/QueryPlan/BufferChunksTransform.h index 84c35431364..752f9910734 100644 --- a/src/Processors/QueryPlan/BufferChunksTransform.h +++ b/src/Processors/QueryPlan/BufferChunksTransform.h @@ -5,9 +5,15 @@ namespace DB { +/// Transform that buffers chunks from the input +/// up to the certain limit and pushes chunks to +/// the output whenever it is ready. It can be used +/// to increase parallelism of execution, for example +/// when it is adeded before MergingSortedTransform. class BufferChunksTransform : public IProcessor { public: + /// OR condition is used for the limits on rows and bytes. BufferChunksTransform( const Block & header_, size_t max_rows_to_buffer_, From db1817a633d1fc1dca13eefa75303f0cb78145b7 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Mon, 8 Jul 2024 19:33:52 +0000 Subject: [PATCH 156/299] Some minor fixups --- .../settings/merge-tree-settings.md | 2 +- .../functions/date-time-functions.md | 434 +++++++++--------- src/Functions/changeDate.cpp | 110 ++--- .../aspell-ignore/en/aspell-dict.txt | 3 + 4 files changed, 285 insertions(+), 264 deletions(-) diff --git a/docs/en/operations/settings/merge-tree-settings.md b/docs/en/operations/settings/merge-tree-settings.md index 22c8c704ba2..7278b91f90d 100644 --- a/docs/en/operations/settings/merge-tree-settings.md +++ b/docs/en/operations/settings/merge-tree-settings.md @@ -1030,7 +1030,7 @@ A table with no primary key represents the extreme case of a single equivalence The fewer and the larger the equivalence classes are, the higher the degree of freedom when re-shuffling rows. -The heuristics applied to find the best row order within each equivalence class is suggested by D. Lemir, O. Kaser in [Reordering columns for smaller indexes](https://doi.org/10.1016/j.ins.2011.02.002) and based on sorting the rows within each equivalence class by ascending cardinality of the non-primary key columns. +The heuristics applied to find the best row order within each equivalence class is suggested by D. Lemire, O. Kaser in [Reordering columns for smaller indexes](https://doi.org/10.1016/j.ins.2011.02.002) and based on sorting the rows within each equivalence class by ascending cardinality of the non-primary key columns. It performs three steps: 1. Find all equivalence classes based on the row values in primary key columns. 2. For each equivalence class, calculate (usually estimate) the cardinalities of the non-primary-key columns. diff --git a/docs/en/sql-reference/functions/date-time-functions.md b/docs/en/sql-reference/functions/date-time-functions.md index 1edd8d407eb..4f5e5a5d716 100644 --- a/docs/en/sql-reference/functions/date-time-functions.md +++ b/docs/en/sql-reference/functions/date-time-functions.md @@ -2698,6 +2698,204 @@ Like function `YYYYMMDDhhmmssToDate()` but produces a [DateTime64](../data-types Accepts an additional, optional `precision` parameter after the `timezone` parameter. +## changeYear + +Changes the year component of a date or date time. + +**Syntax** +``` sql + +changeYear(date_or_datetime, value) +``` + +**Arguments** + +- `date_or_datetime` - a [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md) +- `value` - a new value of the year. [Integer](../../sql-reference/data-types/int-uint.md). + +**Returned value** + +- The same type as `date_or_datetime`. + +**Example** + +``` sql +SELECT changeYear(toDate('1999-01-01'), 2000), changeYear(toDateTime64('1999-01-01 00:00:00.000', 3), 2000); +``` + +Result: + +``` +┌─changeYear(toDate('1999-01-01'), 2000)─┬─changeYear(toDateTime64('1999-01-01 00:00:00.000', 3), 2000)─┐ +│ 2000-01-01 │ 2000-01-01 00:00:00.000 │ +└────────────────────────────────────────┴──────────────────────────────────────────────────────────────┘ +``` + +## changeMonth + +Changes the month component of a date or date time. + +**Syntax** + +``` sql +changeMonth(date_or_datetime, value) +``` + +**Arguments** + +- `date_or_datetime` - a [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md) +- `value` - a new value of the month. [Integer](../../sql-reference/data-types/int-uint.md). + +**Returned value** + +- Returns a value of same type as `date_or_datetime`. + +**Example** + +``` sql +SELECT changeMonth(toDate('1999-01-01'), 2), changeMonth(toDateTime64('1999-01-01 00:00:00.000', 3), 2); +``` + +Result: + +``` +┌─changeMonth(toDate('1999-01-01'), 2)─┬─changeMonth(toDateTime64('1999-01-01 00:00:00.000', 3), 2)─┐ +│ 1999-02-01 │ 1999-02-01 00:00:00.000 │ +└──────────────────────────────────────┴────────────────────────────────────────────────────────────┘ +``` + +## changeDay + +Changes the day component of a date or date time. + +**Syntax** + +``` sql +changeDay(date_or_datetime, value) +``` + +**Arguments** + +- `date_or_datetime` - a [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md) +- `value` - a new value of the day. [Integer](../../sql-reference/data-types/int-uint.md). + +**Returned value** + +- Returns a value of same type as `date_or_datetime`. + +**Example** + +``` sql +SELECT changeDay(toDate('1999-01-01'), 5), changeDay(toDateTime64('1999-01-01 00:00:00.000', 3), 5); +``` + +Result: + +``` +┌─changeDay(toDate('1999-01-01'), 5)─┬─changeDay(toDateTime64('1999-01-01 00:00:00.000', 3), 5)─┐ +│ 1999-01-05 │ 1999-01-05 00:00:00.000 │ +└────────────────────────────────────┴──────────────────────────────────────────────────────────┘ +``` + +## changeHour + +Changes the hour component of a date or date time. + +**Syntax** + +``` sql +changeHour(date_or_datetime, value) +``` + +**Arguments** + +- `date_or_datetime` - a [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md) +- `value` - a new value of the hour. [Integer](../../sql-reference/data-types/int-uint.md). + +**Returned value** + +- Returns a value of same type as `date_or_datetime`. If the input is a [Date](../data-types/date.md), return [DateTime](../data-types/datetime.md). If the input is a [Date32](../data-types/date32.md), return [DateTime64](../data-types/datetime64.md). + +**Example** + +``` sql +SELECT changeHour(toDate('1999-01-01'), 14), changeHour(toDateTime64('1999-01-01 00:00:00.000', 3), 14); +``` + +Result: + +``` +┌─changeHour(toDate('1999-01-01'), 14)─┬─changeHour(toDateTime64('1999-01-01 00:00:00.000', 3), 14)─┐ +│ 1999-01-01 14:00:00 │ 1999-01-01 14:00:00.000 │ +└──────────────────────────────────────┴────────────────────────────────────────────────────────────┘ +``` + +## changeMinute + +Changes the minute component of a date or date time. + +**Syntax** + +``` sql +changeMinute(date_or_datetime, value) +``` + +**Arguments** + +- `date_or_datetime` - a [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md) +- `value` - a new value of the minute. [Integer](../../sql-reference/data-types/int-uint.md). + +**Returned value** + +- Returns a value of same type as `date_or_datetime`. If the input is a [Date](../data-types/date.md), return [DateTime](../data-types/datetime.md). If the input is a [Date32](../data-types/date32.md), return [DateTime64](../data-types/datetime64.md). + +**Example** + +``` sql + SELECT changeMinute(toDate('1999-01-01'), 15), changeMinute(toDateTime64('1999-01-01 00:00:00.000', 3), 15); +``` + +Result: + +``` +┌─changeMinute(toDate('1999-01-01'), 15)─┬─changeMinute(toDateTime64('1999-01-01 00:00:00.000', 3), 15)─┐ +│ 1999-01-01 00:15:00 │ 1999-01-01 00:15:00.000 │ +└────────────────────────────────────────┴──────────────────────────────────────────────────────────────┘ +``` + +## changeSecond + +Changes the second component of a date or date time. + +**Syntax** + +``` sql +changeSecond(date_or_datetime, value) +``` + +**Arguments** + +- `date_or_datetime` - a [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md) +- `value` - a new value of the second. [Integer](../../sql-reference/data-types/int-uint.md). + +**Returned value** + +- Returns a value of same type as `date_or_datetime`. If the input is a [Date](../data-types/date.md), return [DateTime](../data-types/datetime.md). If the input is a [Date32](../data-types/date32.md), return [DateTime64](../data-types/datetime64.md). + +**Example** + +``` sql +SELECT changeSecond(toDate('1999-01-01'), 15), changeSecond(toDateTime64('1999-01-01 00:00:00.000', 3), 15); +``` + +Result: + +``` +┌─changeSecond(toDate('1999-01-01'), 15)─┬─changeSecond(toDateTime64('1999-01-01 00:00:00.000', 3), 15)─┐ +│ 1999-01-01 00:00:15 │ 1999-01-01 00:00:15.000 │ +└────────────────────────────────────────┴──────────────────────────────────────────────────────────────┘ +``` + ## addYears Adds a specified number of years to a date, a date with time or a string-encoded date / date with time. @@ -2714,6 +2912,7 @@ addYears(date, num) - `num`: Number of years to add. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md). **Returned value** + - Returns `date` plus `num` years. [Date](../data-types/date.md)/[Date32](../data-types/date32.md)/[DateTime](../data-types/datetime.md)/[DateTime64](../data-types/datetime64.md). **Example** @@ -2751,6 +2950,7 @@ addQuarters(date, num) - `num`: Number of quarters to add. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md). **Returned value** + - Returns `date` plus `num` quarters. [Date](../data-types/date.md)/[Date32](../data-types/date32.md)/[DateTime](../data-types/datetime.md)/[DateTime64](../data-types/datetime64.md). **Example** @@ -2788,6 +2988,7 @@ addMonths(date, num) - `num`: Number of months to add. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md). **Returned value** + - Returns `date` plus `num` months. [Date](../data-types/date.md)/[Date32](../data-types/date32.md)/[DateTime](../data-types/datetime.md)/[DateTime64](../data-types/datetime64.md). **Example** @@ -2825,6 +3026,7 @@ addWeeks(date, num) - `num`: Number of weeks to add. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md). **Returned value** + - Returns `date` plus `num` weeks. [Date](../data-types/date.md)/[Date32](../data-types/date32.md)/[DateTime](../data-types/datetime.md)/[DateTime64](../data-types/datetime64.md). **Example** @@ -2862,6 +3064,7 @@ addDays(date, num) - `num`: Number of days to add. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md). **Returned value** + - Returns `date` plus `num` days. [Date](../data-types/date.md)/[Date32](../data-types/date32.md)/[DateTime](../data-types/datetime.md)/[DateTime64](../data-types/datetime64.md). **Example** @@ -2899,6 +3102,7 @@ addHours(date, num) - `num`: Number of hours to add. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md). **Returned value** +o - Returns `date` plus `num` hours. [Date](../data-types/date.md)/[Date32](../data-types/date32.md)/[DateTime](../data-types/datetime.md)/[DateTime64](../data-types/datetime64.md). **Example** @@ -2936,6 +3140,7 @@ addMinutes(date, num) - `num`: Number of minutes to add. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md). **Returned value** + - Returns `date` plus `num` minutes. [Date](../data-types/date.md)/[Date32](../data-types/date32.md)/[DateTime](../data-types/datetime.md)/[DateTime64](../data-types/datetime64.md). **Example** @@ -2973,6 +3178,7 @@ addSeconds(date, num) - `num`: Number of seconds to add. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md). **Returned value** + - Returns `date` plus `num` seconds. [Date](../data-types/date.md)/[Date32](../data-types/date32.md)/[DateTime](../data-types/datetime.md)/[DateTime64](../data-types/datetime64.md). **Example** @@ -3010,6 +3216,7 @@ addMilliseconds(date_time, num) - `num`: Number of milliseconds to add. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md). **Returned value** + - Returns `date_time` plus `num` milliseconds. [DateTime64](../data-types/datetime64.md). **Example** @@ -3045,6 +3252,7 @@ addMicroseconds(date_time, num) - `num`: Number of microseconds to add. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md). **Returned value** + - Returns `date_time` plus `num` microseconds. [DateTime64](../data-types/datetime64.md). **Example** @@ -3080,6 +3288,7 @@ addNanoseconds(date_time, num) - `num`: Number of nanoseconds to add. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md). **Returned value** + - Returns `date_time` plus `num` nanoseconds. [DateTime64](../data-types/datetime64.md). **Example** @@ -3115,6 +3324,7 @@ addInterval(interval_1, interval_2) - `interval_2`: Second interval to be added. [interval](../data-types/special-data-types/interval.md). **Returned value** + - Returns a tuple of intervals. [tuple](../data-types/tuple.md)([interval](../data-types/special-data-types/interval.md)). :::note @@ -3161,6 +3371,7 @@ addTupleOfIntervals(interval_1, interval_2) - `intervals`: Tuple of intervals to add to `date`. [tuple](../data-types/tuple.md)([interval](../data-types/special-data-types/interval.md)). **Returned value** + - Returns `date` with added `intervals`. [date](../data-types/date.md)/[date32](../data-types/date32.md)/[datetime](../data-types/datetime.md)/[datetime64](../data-types/datetime64.md). **Example** @@ -3195,6 +3406,7 @@ subtractYears(date, num) - `num`: Number of years to subtract. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md). **Returned value** + - Returns `date` minus `num` years. [Date](../data-types/date.md)/[Date32](../data-types/date32.md)/[DateTime](../data-types/datetime.md)/[DateTime64](../data-types/datetime64.md). **Example** @@ -3232,6 +3444,7 @@ subtractQuarters(date, num) - `num`: Number of quarters to subtract. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md). **Returned value** + - Returns `date` minus `num` quarters. [Date](../data-types/date.md)/[Date32](../data-types/date32.md)/[DateTime](../data-types/datetime.md)/[DateTime64](../data-types/datetime64.md). **Example** @@ -3269,6 +3482,7 @@ subtractMonths(date, num) - `num`: Number of months to subtract. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md). **Returned value** + - Returns `date` minus `num` months. [Date](../data-types/date.md)/[Date32](../data-types/date32.md)/[DateTime](../data-types/datetime.md)/[DateTime64](../data-types/datetime64.md). **Example** @@ -3306,6 +3520,7 @@ subtractWeeks(date, num) - `num`: Number of weeks to subtract. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md). **Returned value** + - Returns `date` minus `num` weeks. [Date](../data-types/date.md)/[Date32](../data-types/date32.md)/[DateTime](../data-types/datetime.md)/[DateTime64](../data-types/datetime64.md). **Example** @@ -3343,6 +3558,7 @@ subtractDays(date, num) - `num`: Number of days to subtract. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md). **Returned value** + - Returns `date` minus `num` days. [Date](../data-types/date.md)/[Date32](../data-types/date32.md)/[DateTime](../data-types/datetime.md)/[DateTime64](../data-types/datetime64.md). **Example** @@ -3380,6 +3596,7 @@ subtractHours(date, num) - `num`: Number of hours to subtract. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md). **Returned value** + - Returns `date` minus `num` hours. [Date](../data-types/date.md)/[Date32](../data-types/date32.md)/[Datetime](../data-types/datetime.md)/[DateTime64](../data-types/datetime64.md). **Example** @@ -3417,6 +3634,7 @@ subtractMinutes(date, num) - `num`: Number of minutes to subtract. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md). **Returned value** + - Returns `date` minus `num` minutes. [Date](../data-types/date.md)/[Date32](../data-types/date32.md)/[DateTime](../data-types/datetime.md)/[DateTime64](../data-types/datetime64.md). **Example** @@ -3454,6 +3672,7 @@ subtractSeconds(date, num) - `num`: Number of seconds to subtract. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md). **Returned value** + - Returns `date` minus `num` seconds. [Date](../data-types/date.md)/[Date32](../data-types/date32.md)/[DateTime](../data-types/datetime.md)/[DateTime64](../data-types/datetime64.md). **Example** @@ -3491,6 +3710,7 @@ subtractMilliseconds(date_time, num) - `num`: Number of milliseconds to subtract. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md). **Returned value** + - Returns `date_time` minus `num` milliseconds. [DateTime64](../data-types/datetime64.md). **Example** @@ -3526,6 +3746,7 @@ subtractMicroseconds(date_time, num) - `num`: Number of microseconds to subtract. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md). **Returned value** + - Returns `date_time` minus `num` microseconds. [DateTime64](../data-types/datetime64.md). **Example** @@ -3561,6 +3782,7 @@ subtractNanoseconds(date_time, num) - `num`: Number of nanoseconds to subtract. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md). **Returned value** + - Returns `date_time` minus `num` nanoseconds. [DateTime64](../data-types/datetime64.md). **Example** @@ -3596,6 +3818,7 @@ subtractInterval(interval_1, interval_2) - `interval_2`: Second interval to be negated. [interval](../data-types/special-data-types/interval.md). **Returned value** + - Returns a tuple of intervals. [tuple](../data-types/tuple.md)([interval](../data-types/special-data-types/interval.md)). :::note @@ -3642,6 +3865,7 @@ subtractTupleOfIntervals(interval_1, interval_2) - `intervals`: Tuple of intervals to subtract from `date`. [tuple](../data-types/tuple.md)([interval](../data-types/special-data-types/interval.md)). **Returned value** + - Returns `date` with subtracted `intervals`. [Date](../data-types/date.md)/[Date32](../data-types/date32.md)/[DateTime](../data-types/datetime.md)/[DateTime64](../data-types/datetime64.md). **Example** @@ -3660,216 +3884,6 @@ Result: └───────────────────────────────────────────────────────────────────────┘ ``` -## changeYear - -Changes the year component of a date or date time. - -**Syntax** - -``` sql -changeYear(date_or_datetime, value) -``` - -**Arguments** - -- `date_or_datetime` - a [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md) -- `value` - a new value of the year. [Integer](../../sql-reference/data-types/int-uint.md). - -**Return value** - -- The same type as `date_or_datetime`. - -Type: [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). - -**Example** - -``` sql - SELECT changeYear(toDate('1999-01-01'), 2000), changeYear(toDateTime64('1999-01-01 00:00:00.000', 3), 2000); -``` - -Result: - -``` -┌─changeYear(toDate('1999-01-01'), 2000)─┬─changeYear(toDateTime64('1999-01-01 00:00:00.000', 3), 2000)─┐ -│ 2000-01-01 │ 2000-01-01 00:00:00.000 │ -└────────────────────────────────────────┴──────────────────────────────────────────────────────────────┘ -``` - -## changeMonth - -Changes the month component of a date or date time. - -**Syntax** - -``` sql -changeMonth(date_or_datetime, value) -``` - -**Arguments** - -- `date_or_datetime` - a [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md) -- `value` - a new value of the month. [Integer](../../sql-reference/data-types/int-uint.md). - -**Return value** - -- The same type as `date_or_datetime`. - -Type: [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). - -**Example** - -``` sql - SELECT changeMonth(toDate('1999-01-01'), 2), changeMonth(toDateTime64('1999-01-01 00:00:00.000', 3), 2); -``` - -Result: - -``` -┌─changeMonth(toDate('1999-01-01'), 2)─┬─changeMonth(toDateTime64('1999-01-01 00:00:00.000', 3), 2)─┐ -│ 1999-02-01 │ 1999-02-01 00:00:00.000 │ -└──────────────────────────────────────┴────────────────────────────────────────────────────────────┘ -``` - -## changeDay - -Changes the day component of a date or date time. - -**Syntax** - -``` sql -changeDay(date_or_datetime, value) -``` - -**Arguments** - -- `date_or_datetime` - a [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md) -- `value` - a new value of the day. [Integer](../../sql-reference/data-types/int-uint.md). - -**Return value** - -- The same type as `date_or_datetime`. - -Type: [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). - -**Example** - -``` sql - SELECT changeDay(toDate('1999-01-01'), 5), changeDay(toDateTime64('1999-01-01 00:00:00.000', 3), 5); -``` - -Result: - -``` -┌─changeDay(toDate('1999-01-01'), 5)─┬─changeDay(toDateTime64('1999-01-01 00:00:00.000', 3), 5)─┐ -│ 1999-01-05 │ 1999-01-05 00:00:00.000 │ -└────────────────────────────────────┴──────────────────────────────────────────────────────────┘ -``` - -## changeHour - -Changes the hour component of a date or date time. - -**Syntax** - -``` sql -changeHour(date_or_datetime, value) -``` - -**Arguments** - -- `date_or_datetime` - a [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md) -- `value` - a new value of the hour. [Integer](../../sql-reference/data-types/int-uint.md). - -**Return value** - -- The same type as `date_or_datetime`. If the input is a Date, return DateTime. If the input is a Date32, return DateTime64. - -Type: [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). - -**Example** - -``` sql - SELECT changeHour(toDate('1999-01-01'), 14), changeHour(toDateTime64('1999-01-01 00:00:00.000', 3), 14); -``` - -Result: - -``` -┌─changeHour(toDate('1999-01-01'), 14)─┬─changeHour(toDateTime64('1999-01-01 00:00:00.000', 3), 14)─┐ -│ 1999-01-01 14:00:00 │ 1999-01-01 14:00:00.000 │ -└──────────────────────────────────────┴────────────────────────────────────────────────────────────┘ -``` - -## changeMinute - -Changes the minute component of a date or date time. - -**Syntax** - -``` sql -changeMinute(date_or_datetime, value) -``` - -**Arguments** - -- `date_or_datetime` - a [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md) -- `value` - a new value of the minute. [Integer](../../sql-reference/data-types/int-uint.md). - -**Return value** - -- The same type as `date_or_datetime`. If the input is a Date, return DateTime. If the input is a Date32, return DateTime64. - -Type: [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). - -**Example** - -``` sql - SELECT changeMinute(toDate('1999-01-01'), 15), changeMinute(toDateTime64('1999-01-01 00:00:00.000', 3), 15); -``` - -Result: - -``` -┌─changeMinute(toDate('1999-01-01'), 15)─┬─changeMinute(toDateTime64('1999-01-01 00:00:00.000', 3), 15)─┐ -│ 1999-01-01 00:15:00 │ 1999-01-01 00:15:00.000 │ -└────────────────────────────────────────┴──────────────────────────────────────────────────────────────┘ -``` - -## changeSecond - -Changes the second component of a date or date time. - -**Syntax** - -``` sql -changeSecond(date_or_datetime, value) -``` - -**Arguments** - -- `date_or_datetime` - a [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md) -- `value` - a new value of the second. [Integer](../../sql-reference/data-types/int-uint.md). - -**Return value** - -- The same type as `date_or_datetime`. If the input is a Date, return DateTime. If the input is a Date32, return DateTime64. - -Type: [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). - -**Example** - -``` sql - SELECT changeSecond(toDate('1999-01-01'), 15), changeSecond(toDateTime64('1999-01-01 00:00:00.000', 3), 15); -``` - -Result: - -``` -┌─changeSecond(toDate('1999-01-01'), 15)─┬─changeSecond(toDateTime64('1999-01-01 00:00:00.000', 3), 15)─┐ -│ 1999-01-01 00:00:15 │ 1999-01-01 00:00:15.000 │ -└────────────────────────────────────────┴──────────────────────────────────────────────────────────────┘ -``` - ## timeSlots(StartTime, Duration,\[, Size\]) For a time interval starting at ‘StartTime’ and continuing for ‘Duration’ seconds, it returns an array of moments in time, consisting of points from this interval rounded down to the ‘Size’ in seconds. ‘Size’ is an optional parameter set to 1800 (30 minutes) by default. diff --git a/src/Functions/changeDate.cpp b/src/Functions/changeDate.cpp index e24391afe12..5965f3d1d00 100644 --- a/src/Functions/changeDate.cpp +++ b/src/Functions/changeDate.cpp @@ -41,11 +41,6 @@ enum class Component Second }; -bool isTimeComponentChange(Component type) -{ - return type == Component::Hour || type == Component::Minute || type == Component::Second; -} - } template @@ -65,11 +60,11 @@ public: {"date_or_datetime", static_cast(&isDateOrDate32OrDateTimeOrDateTime64), nullptr, "Date or date with time"}, {"value", static_cast(&isNativeInteger), nullptr, "Integer"} }; - validateFunctionArgumentTypes(*this, arguments, args); + validateFunctionArguments(*this, arguments, args); const auto & input_type = arguments[0].type; - if (isTimeComponentChange(Traits::component)) + if constexpr (Traits::component == Component::Hour || Traits::component == Component::Minute || Traits::component == Component::Second) { if (isDate(input_type)) return std::make_shared(); @@ -85,13 +80,13 @@ public: const auto & input_type = arguments[0].type; if (isDate(input_type)) { - if (isTimeComponentChange(Traits::component)) + if constexpr (Traits::component == Component::Hour || Traits::component == Component::Minute || Traits::component == Component::Second) return execute(arguments, input_type, result_type, input_rows_count); return execute(arguments, input_type, result_type, input_rows_count); } if (isDate32(input_type)) { - if (isTimeComponentChange(Traits::component)) + if constexpr (Traits::component == Component::Hour || Traits::component == Component::Minute || Traits::component == Component::Second) return execute(arguments, input_type, result_type, input_rows_count); return execute(arguments, input_type, result_type, input_rows_count); } @@ -109,83 +104,92 @@ public: bool is_const = (isColumnConst(*arguments[0].column) && isColumnConst(*arguments[1].column)); size_t result_rows_count = (is_const ? 1 : input_rows_count); - typename ResultDataType::ColumnType::MutablePtr result_column; + typename ResultDataType::ColumnType::MutablePtr result_col; if constexpr (std::is_same_v) { auto scale = DataTypeDateTime64::default_scale; if constexpr (std::is_same_v) scale = typeid_cast(*result_type).getScale(); - result_column = ResultDataType::ColumnType::create(result_rows_count, scale); + result_col = ResultDataType::ColumnType::create(result_rows_count, scale); } else - result_column = ResultDataType::ColumnType::create(result_rows_count); + result_col = ResultDataType::ColumnType::create(result_rows_count); - auto input_column = arguments[0].column->convertToFullIfNeeded(); - const auto & input_column_data = typeid_cast(*input_column).getData(); + auto date_time_col = arguments[0].column->convertToFullIfNeeded(); + const auto & date_time_col_data = typeid_cast(*date_time_col).getData(); - auto new_value_column = castColumn(arguments[1], std::make_shared()); - new_value_column = new_value_column->convertToFullIfNeeded(); - const auto & new_value_column_data = typeid_cast(*new_value_column).getData(); + auto value_col = castColumn(arguments[1], std::make_shared()); + value_col = value_col->convertToFullIfNeeded(); + const auto & value_col_data = typeid_cast(*value_col).getData(); - auto & result_data = result_column->getData(); + auto & result_col_data = result_col->getData(); - for (size_t i = 0; i < result_rows_count; ++i) + if constexpr (std::is_same_v) { - if constexpr (std::is_same_v) + const auto scale = typeid_cast(*result_type).getScale(); + const auto & date_lut = typeid_cast(*result_type).getTimeZone(); + + Int64 deg = 1; + for (size_t j = 0; j < scale; ++j) + deg *= 10; + + for (size_t i = 0; i < result_rows_count; ++i) { - const auto scale = typeid_cast(*result_type).getScale(); - const auto & date_lut = typeid_cast(*result_type).getTimeZone(); + Int64 time = date_lut.toNumYYYYMMDDhhmmss(date_time_col_data[i] / deg); + Int64 fraction = date_time_col_data[i] % deg; - Int64 deg = 1; - for (size_t j = 0; j < scale; ++j) - deg *= 10; - - Int64 time = date_lut.toNumYYYYMMDDhhmmss(input_column_data[i] / deg); - Int64 fraction = input_column_data[i] % deg; - - result_data[i] = getChangedDate(time, new_value_column_data[i], result_type, date_lut, scale, fraction); + result_col_data[i] = getChangedDate(time, value_col_data[i], result_type, date_lut, scale, fraction); } - else if constexpr (std::is_same_v && std::is_same_v) + } + else if constexpr (std::is_same_v && std::is_same_v) + { + const auto & date_lut = typeid_cast(*result_type).getTimeZone(); + for (size_t i = 0; i < result_rows_count; ++i) { - const auto & date_lut = typeid_cast(*result_type).getTimeZone(); - Int64 time = static_cast(date_lut.toNumYYYYMMDD(ExtendedDayNum(input_column_data[i]))) * 1'000'000; - - result_data[i] = getChangedDate(time, new_value_column_data[i], result_type, date_lut, 3, 0); + Int64 time = static_cast(date_lut.toNumYYYYMMDD(ExtendedDayNum(date_time_col_data[i]))) * 1'000'000; + result_col_data[i] = getChangedDate(time, value_col_data[i], result_type, date_lut, 3, 0); } - else if constexpr (std::is_same_v && std::is_same_v) + } + else if constexpr (std::is_same_v && std::is_same_v) + { + const auto & date_lut = typeid_cast(*result_type).getTimeZone(); + for (size_t i = 0; i < result_rows_count; ++i) { - const auto & date_lut = typeid_cast(*result_type).getTimeZone(); - Int64 time = static_cast(date_lut.toNumYYYYMMDD(ExtendedDayNum(input_column_data[i]))) * 1'000'000; - - result_data[i] = static_cast(getChangedDate(time, new_value_column_data[i], result_type, date_lut)); + Int64 time = static_cast(date_lut.toNumYYYYMMDD(ExtendedDayNum(date_time_col_data[i]))) * 1'000'000; + result_col_data[i] = static_cast(getChangedDate(time, value_col_data[i], result_type, date_lut)); } - else if constexpr (std::is_same_v) + } + else if constexpr (std::is_same_v) + { + const auto & date_lut = typeid_cast(*result_type).getTimeZone(); + for (size_t i = 0; i < result_rows_count; ++i) { - const auto & date_lut = typeid_cast(*result_type).getTimeZone(); - Int64 time = date_lut.toNumYYYYMMDDhhmmss(input_column_data[i]); - - result_data[i] = static_cast(getChangedDate(time, new_value_column_data[i], result_type, date_lut)); + Int64 time = date_lut.toNumYYYYMMDDhhmmss(date_time_col_data[i]); + result_col_data[i] = static_cast(getChangedDate(time, value_col_data[i], result_type, date_lut)); } - else + } + else + { + const auto & date_lut = DateLUT::instance(); + for (size_t i = 0; i < result_rows_count; ++i) { - const auto & date_lut = DateLUT::instance(); Int64 time; if (isDate(input_type)) - time = static_cast(date_lut.toNumYYYYMMDD(DayNum(input_column_data[i]))) * 1'000'000; + time = static_cast(date_lut.toNumYYYYMMDD(DayNum(date_time_col_data[i]))) * 1'000'000; else - time = static_cast(date_lut.toNumYYYYMMDD(ExtendedDayNum(input_column_data[i]))) * 1'000'000; + time = static_cast(date_lut.toNumYYYYMMDD(ExtendedDayNum(date_time_col_data[i]))) * 1'000'000; if (isDate(result_type)) - result_data[i] = static_cast(getChangedDate(time, new_value_column_data[i], result_type, date_lut)); + result_col_data[i] = static_cast(getChangedDate(time, value_col_data[i], result_type, date_lut)); else - result_data[i] = static_cast(getChangedDate(time, new_value_column_data[i], result_type, date_lut)); + result_col_data[i] = static_cast(getChangedDate(time, value_col_data[i], result_type, date_lut)); } } if (is_const) - return ColumnConst::create(std::move(result_column), input_rows_count); + return ColumnConst::create(std::move(result_col), input_rows_count); - return result_column; + return result_col; } Int64 getChangedDate(Int64 time, Float64 new_value, const DataTypePtr & result_type, const DateLUTImpl & date_lut, Int64 scale = 0, Int64 fraction = 0) const diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt index 5689f94d2ae..5e31a09effb 100644 --- a/utils/check-style/aspell-ignore/en/aspell-dict.txt +++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt @@ -467,6 +467,7 @@ LOCALTIME LOCALTIMESTAMP LONGLONG LOONGARCH +Lemire Levenshtein Liao LibFuzzer @@ -1962,6 +1963,8 @@ loghouse london lookups loongarch +lowCardinalityIndices +lowCardinalityKeys lowcardinality lowerUTF lowercased From be343c7dddfa388f46282e0aa3cf264b2f1e0f2f Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Mon, 8 Jul 2024 14:08:03 +0200 Subject: [PATCH 157/299] don't throw TIMEOUT_EXCEEDED for none_only_active --- src/Interpreters/executeDDLQueryOnCluster.cpp | 7 ++++--- .../02447_drop_database_replica.reference | 17 +++++++++++++-- .../02447_drop_database_replica.sh | 21 +++++++++++++++++-- 3 files changed, 38 insertions(+), 7 deletions(-) diff --git a/src/Interpreters/executeDDLQueryOnCluster.cpp b/src/Interpreters/executeDDLQueryOnCluster.cpp index e372f036073..9c3f85128cf 100644 --- a/src/Interpreters/executeDDLQueryOnCluster.cpp +++ b/src/Interpreters/executeDDLQueryOnCluster.cpp @@ -237,6 +237,7 @@ private: Int64 timeout_seconds = 120; bool is_replicated_database = false; bool throw_on_timeout = true; + bool throw_on_timeout_only_active = false; bool only_running_hosts = false; bool timeout_exceeded = false; @@ -316,8 +317,8 @@ DDLQueryStatusSource::DDLQueryStatusSource( , log(getLogger("DDLQueryStatusSource")) { auto output_mode = context->getSettingsRef().distributed_ddl_output_mode; - throw_on_timeout = output_mode == DistributedDDLOutputMode::THROW || output_mode == DistributedDDLOutputMode::THROW_ONLY_ACTIVE - || output_mode == DistributedDDLOutputMode::NONE || output_mode == DistributedDDLOutputMode::NONE_ONLY_ACTIVE; + throw_on_timeout = output_mode == DistributedDDLOutputMode::THROW || output_mode == DistributedDDLOutputMode::NONE; + throw_on_timeout_only_active = output_mode == DistributedDDLOutputMode::THROW_ONLY_ACTIVE || output_mode == DistributedDDLOutputMode::NONE_ONLY_ACTIVE; if (hosts_to_wait) { @@ -451,7 +452,7 @@ Chunk DDLQueryStatusSource::generate() "({} of them are currently executing the task, {} are inactive). " "They are going to execute the query in background. Was waiting for {} seconds{}"; - if (throw_on_timeout) + if (throw_on_timeout || (throw_on_timeout_only_active && !stop_waiting_offline_hosts)) { if (!first_exception) first_exception = std::make_unique(Exception(ErrorCodes::TIMEOUT_EXCEEDED, diff --git a/tests/queries/0_stateless/02447_drop_database_replica.reference b/tests/queries/0_stateless/02447_drop_database_replica.reference index bd3b689ca3c..d997b7ba830 100644 --- a/tests/queries/0_stateless/02447_drop_database_replica.reference +++ b/tests/queries/0_stateless/02447_drop_database_replica.reference @@ -12,11 +12,21 @@ t 2 rdb_default 1 1 s1 r1 1 2 -2 -2 +skip inactive s1 r1 OK 2 0 s1 r2 QUEUED 2 0 s2 r1 QUEUED 2 0 +s1 r1 OK 2 0 +s1 r2 QUEUED 2 0 +s2 r1 QUEUED 2 0 +timeout on active +2 +2 +s1 r1 OK 3 0 +s1 r2 QUEUED 3 0 +s2 r1 QUEUED 3 0 +s9 r9 QUEUED 3 0 +drop replica 2 rdb_default 1 1 s1 r1 1 rdb_default 1 2 s1 r2 0 @@ -24,6 +34,9 @@ rdb_default 1 2 s1 r2 0 2 t t2 +t22 t3 +t33 t4 +t44 rdb_default_4 1 1 s1 r1 1 diff --git a/tests/queries/0_stateless/02447_drop_database_replica.sh b/tests/queries/0_stateless/02447_drop_database_replica.sh index 1604d527f2b..93a5fcee8e2 100755 --- a/tests/queries/0_stateless/02447_drop_database_replica.sh +++ b/tests/queries/0_stateless/02447_drop_database_replica.sh @@ -33,10 +33,27 @@ $CLICKHOUSE_CLIENT -q "select cluster, shard_num, replica_num, database_shard_na $CLICKHOUSE_CLIENT -q "system drop database replica 's1|r1' from database $db2" 2>&1| grep -Fac "is active, cannot drop it" # Also check that it doesn't exceed distributed_ddl_task_timeout waiting for inactive replicas -timeout 60s $CLICKHOUSE_CLIENT --distributed_ddl_task_timeout=1000 --distributed_ddl_output_mode=none_only_active -q "create table $db.t2 (n int) engine=Log" 2>&1| grep -Fac "TIMEOUT_EXCEEDED" -timeout 60s $CLICKHOUSE_CLIENT --distributed_ddl_task_timeout=1000 --distributed_ddl_output_mode=throw_only_active -q "create table $db.t3 (n int) engine=Log" 2>&1| grep -Fac "TIMEOUT_EXCEEDED" +echo 'skip inactive' +timeout 60s $CLICKHOUSE_CLIENT --distributed_ddl_task_timeout=1000 --distributed_ddl_output_mode=none_only_active -q "create table $db.t2 (n int) engine=Log" +timeout 60s $CLICKHOUSE_CLIENT --distributed_ddl_task_timeout=1000 --distributed_ddl_output_mode=throw_only_active -q "create table $db.t3 (n int) engine=Log" | sort timeout 60s $CLICKHOUSE_CLIENT --distributed_ddl_task_timeout=1000 --distributed_ddl_output_mode=null_status_on_timeout_only_active -q "create table $db.t4 (n int) engine=Log" | sort +# And that it still throws TIMEOUT_EXCEEDED for active replicas +echo 'timeout on active' +db9="${db}_9" +$CLICKHOUSE_CLIENT -q "create database $db9 engine=Replicated('/test/$CLICKHOUSE_DATABASE/rdb', 's9', 'r9')" +$CLICKHOUSE_CLIENT -q "detach database $db9" +$CLICKHOUSE_CLIENT -q "insert into system.zookeeper(name, path, value) values ('active', '/test/$CLICKHOUSE_DATABASE/rdb/replicas/s9|r9', '$($CLICKHOUSE_CLIENT -q "select serverUUID()")')" + +$CLICKHOUSE_CLIENT --distributed_ddl_task_timeout=5 --distributed_ddl_output_mode=none_only_active -q "create table $db.t22 (n int) engine=Log" 2>&1| grep -Fac "TIMEOUT_EXCEEDED" +$CLICKHOUSE_CLIENT --distributed_ddl_task_timeout=5 --distributed_ddl_output_mode=throw_only_active -q "create table $db.t33 (n int) engine=Log" 2>&1| grep -Fac "TIMEOUT_EXCEEDED" +$CLICKHOUSE_CLIENT --distributed_ddl_task_timeout=5 --distributed_ddl_output_mode=null_status_on_timeout_only_active -q "create table $db.t44 (n int) engine=Log" | sort + +$CLICKHOUSE_CLIENT -q "attach database $db9" +$CLICKHOUSE_CLIENT -q "drop database $db9" + +echo 'drop replica' + $CLICKHOUSE_CLIENT -q "detach database $db3" $CLICKHOUSE_CLIENT -q "system drop database replica 'r1' from shard 's2' from database $db" $CLICKHOUSE_CLIENT -q "attach database $db3" 2>/dev/null From 708c81f6e2a81b530a47f543795bb8ff6ec663ed Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Tue, 9 Jul 2024 01:20:31 +0200 Subject: [PATCH 158/299] Update 00504_mergetree_arrays_rw.sql --- tests/queries/0_stateless/00504_mergetree_arrays_rw.sql | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/queries/0_stateless/00504_mergetree_arrays_rw.sql b/tests/queries/0_stateless/00504_mergetree_arrays_rw.sql index 7c939d060ea..14929045356 100644 --- a/tests/queries/0_stateless/00504_mergetree_arrays_rw.sql +++ b/tests/queries/0_stateless/00504_mergetree_arrays_rw.sql @@ -1,5 +1,8 @@ set allow_deprecated_syntax_for_merge_tree=1; +set max_threads = 1; +set max_insert_threads = 1; + drop table if exists test_ins_arr; create table test_ins_arr (date Date, val Array(UInt64)) engine = MergeTree(date, (date), 8192); insert into test_ins_arr select toDate('2017-10-02'), [number, 42] from system.numbers limit 10000; From 05fe5c9f2ca14c9704a965c8224f6d49bfcbbd42 Mon Sep 17 00:00:00 2001 From: gun9nir Date: Mon, 8 Jul 2024 19:04:08 -0700 Subject: [PATCH 159/299] truncate file in insert --- .../03198_table_function_directory_path.sql | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/queries/0_stateless/03198_table_function_directory_path.sql b/tests/queries/0_stateless/03198_table_function_directory_path.sql index 90b81b3fe5e..9e2791847af 100644 --- a/tests/queries/0_stateless/03198_table_function_directory_path.sql +++ b/tests/queries/0_stateless/03198_table_function_directory_path.sql @@ -1,10 +1,10 @@ -- Tags: no-parallel -INSERT INTO FUNCTION file('data_03198_table_function_directory_path/1.csv', 'csv') SELECT '1.csv'; -INSERT INTO FUNCTION file('data_03198_table_function_directory_path/2.csv', 'csv') SELECT '2.csv'; -INSERT INTO FUNCTION file('data_03198_table_function_directory_path/dir/3.csv', 'csv') SELECT '3.csv'; -INSERT INTO FUNCTION file('data_03198_table_function_directory_path/dir1/dir/4.csv', 'csv') SELECT '4.csv'; -INSERT INTO FUNCTION file('data_03198_table_function_directory_path/dir2/dir/5.csv', 'csv') SELECT '5.csv'; +INSERT INTO FUNCTION file('data_03198_table_function_directory_path/1.csv', 'csv') SELECT '1.csv' SETTINGS engine_file_truncate_on_insert=1; +INSERT INTO FUNCTION file('data_03198_table_function_directory_path/2.csv', 'csv') SELECT '2.csv' SETTINGS engine_file_truncate_on_insert=1; +INSERT INTO FUNCTION file('data_03198_table_function_directory_path/dir/3.csv', 'csv') SELECT '3.csv' SETTINGS engine_file_truncate_on_insert=1; +INSERT INTO FUNCTION file('data_03198_table_function_directory_path/dir1/dir/4.csv', 'csv') SELECT '4.csv' SETTINGS engine_file_truncate_on_insert=1; +INSERT INTO FUNCTION file('data_03198_table_function_directory_path/dir2/dir/5.csv', 'csv') SELECT '5.csv' SETTINGS engine_file_truncate_on_insert=1; SELECT COUNT(*) FROM file('data_03198_table_function_directory_path'); SELECT COUNT(*) FROM file('data_03198_table_function_directory_path/'); From 595a12ee884840da76e2b50ae723511e00915b85 Mon Sep 17 00:00:00 2001 From: nauu Date: Tue, 9 Jul 2024 09:48:51 +0800 Subject: [PATCH 160/299] to avoid ambiguity, replace FilesystemCacheFailToReserveSpaceBecauseOfLockContention with FilesystemCacheFailToReserveSpaceBecauseOfCacheResize. to avoid ambiguity, replace FilesystemCacheFailToReserveSpaceBecauseOfLockContention with FilesystemCacheResize. --- src/Common/ProfileEvents.cpp | 1 + src/Interpreters/Cache/FileCache.cpp | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/src/Common/ProfileEvents.cpp b/src/Common/ProfileEvents.cpp index 439965a92fb..e80afc95e8d 100644 --- a/src/Common/ProfileEvents.cpp +++ b/src/Common/ProfileEvents.cpp @@ -508,6 +508,7 @@ The server successfully detected this situation and will download merged part fr M(FileSegmentHolderCompleteMicroseconds, "File segments holder complete() time") \ M(FileSegmentFailToIncreasePriority, "Number of times the priority was not increased due to a high contention on the cache lock") \ M(FilesystemCacheFailToReserveSpaceBecauseOfLockContention, "Number of times space reservation was skipped due to a high contention on the cache lock") \ + M(FilesystemCacheFailToReserveSpaceBecauseOfCacheResize, "Number of times space reservation was skipped due to the cache is being resized") \ M(FilesystemCacheHoldFileSegments, "Filesystem cache file segments count, which were hold") \ M(FilesystemCacheUnusedHoldFileSegments, "Filesystem cache file segments count, which were hold, but not used (because of seek or LIMIT n, etc)") \ M(FilesystemCacheFreeSpaceKeepingThreadRun, "Number of times background thread executed free space keeping job") \ diff --git a/src/Interpreters/Cache/FileCache.cpp b/src/Interpreters/Cache/FileCache.cpp index 0d33e39ffa3..a3848fa3a75 100644 --- a/src/Interpreters/Cache/FileCache.cpp +++ b/src/Interpreters/Cache/FileCache.cpp @@ -30,6 +30,7 @@ namespace ProfileEvents extern const Event FilesystemCacheFailToReserveSpaceBecauseOfLockContention; extern const Event FilesystemCacheFreeSpaceKeepingThreadRun; extern const Event FilesystemCacheFreeSpaceKeepingThreadWorkMilliseconds; + extern const Event FilesystemCacheFailToReserveSpaceBecauseOfCacheResize; } namespace DB @@ -813,7 +814,7 @@ bool FileCache::tryReserve( /// ok compared to the number of cases this check will help. if (cache_is_being_resized.load(std::memory_order_relaxed)) { - ProfileEvents::increment(ProfileEvents::FilesystemCacheFailToReserveSpaceBecauseOfLockContention); + ProfileEvents::increment(ProfileEvents::FilesystemCacheFailToReserveSpaceBecauseOfCacheResize); return false; } From 9153e65456dd3a90d9be85c1a8a52592ce054e77 Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Tue, 9 Jul 2024 10:15:41 +0200 Subject: [PATCH 161/299] Remove unneded include --- src/DataTypes/DataTypeDynamic.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/DataTypes/DataTypeDynamic.cpp b/src/DataTypes/DataTypeDynamic.cpp index 6826c46a1a7..c920e69c13b 100644 --- a/src/DataTypes/DataTypeDynamic.cpp +++ b/src/DataTypes/DataTypeDynamic.cpp @@ -12,7 +12,6 @@ #include #include #include -#include namespace DB { From b4dd700c26c43a6910c7394e0bdb246afef47884 Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Tue, 9 Jul 2024 12:26:39 +0200 Subject: [PATCH 162/299] Fix typos --- .../en/sql-reference/data-types/data-types-binary-encoding.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/en/sql-reference/data-types/data-types-binary-encoding.md b/docs/en/sql-reference/data-types/data-types-binary-encoding.md index c9720da0f1c..812e946e43e 100644 --- a/docs/en/sql-reference/data-types/data-types-binary-encoding.md +++ b/docs/en/sql-reference/data-types/data-types-binary-encoding.md @@ -9,7 +9,7 @@ sidebar_label: Data types binary encoding specification. This specification describes the binary format that can be used for binary encoding and decoding of ClickHouse data types. This format is used in `Dynamic` column [binary serialization](dynamic.md#binary-output-format) and can be used in input/output formats [RowBinaryWithNamesAndTypes](../../interfaces/formats.md#rowbinarywithnamesandtypes) and [Native](../../interfaces/formats.md#native) under corresponding settings. -The table below describes how each data type is represented in bunary format. Each data type encoding consist of 1 byte that indicates the type and some optional additional information. +The table below describes how each data type is represented in binary format. Each data type encoding consist of 1 byte that indicates the type and some optional additional information. `var_uint` in the binary encoding means that the size is encoded using Variable-Length Quantity compression. | ClickHouse data type | Binary encoding | @@ -66,7 +66,7 @@ The table below describes how each data type is represented in bunary format. Ea ### Interval kind binary encoding -The table below describes how different interval kinds of `Interval` data type are endoced. +The table below describes how different interval kinds of `Interval` data type are encoded. | Interval kind | Binary encoding | |---------------|-----------------| From 58eb57dffb6be5033beb6dd79d05a3292512a9f9 Mon Sep 17 00:00:00 2001 From: Dmitry Novik Date: Mon, 8 Jul 2024 15:43:16 +0200 Subject: [PATCH 163/299] Add settings to control connection to PostgreSQL --- src/Core/PostgreSQL/PoolWithFailover.cpp | 23 +++++++++++++++---- src/Core/PostgreSQL/PoolWithFailover.h | 7 +++--- src/Core/PostgreSQL/Utils.cpp | 4 ++-- src/Core/PostgreSQL/Utils.h | 2 +- src/Core/Settings.h | 2 ++ src/Core/SettingsChangesHistory.cpp | 2 ++ .../DatabaseMaterializedPostgreSQL.cpp | 7 +++++- .../PostgreSQL/DatabasePostgreSQL.cpp | 5 ++-- .../PostgreSQLDictionarySource.cpp | 5 ++-- .../StorageMaterializedPostgreSQL.cpp | 3 ++- src/Storages/StorageExternalDistributed.cpp | 5 ++-- src/Storages/StoragePostgreSQL.cpp | 5 ++-- .../TableFunctionPostgreSQL.cpp | 5 ++-- 13 files changed, 52 insertions(+), 23 deletions(-) diff --git a/src/Core/PostgreSQL/PoolWithFailover.cpp b/src/Core/PostgreSQL/PoolWithFailover.cpp index a034c50094d..5014564dbe0 100644 --- a/src/Core/PostgreSQL/PoolWithFailover.cpp +++ b/src/Core/PostgreSQL/PoolWithFailover.cpp @@ -27,7 +27,8 @@ PoolWithFailover::PoolWithFailover( size_t pool_size, size_t pool_wait_timeout_, size_t max_tries_, - bool auto_close_connection_) + bool auto_close_connection_, + size_t connection_attempt_timeout_) : pool_wait_timeout(pool_wait_timeout_) , max_tries(max_tries_) , auto_close_connection(auto_close_connection_) @@ -39,8 +40,13 @@ PoolWithFailover::PoolWithFailover( { for (const auto & replica_configuration : configurations) { - auto connection_info = formatConnectionString(replica_configuration.database, - replica_configuration.host, replica_configuration.port, replica_configuration.username, replica_configuration.password); + auto connection_info = formatConnectionString( + replica_configuration.database, + replica_configuration.host, + replica_configuration.port, + replica_configuration.username, + replica_configuration.password, + connection_attempt_timeout_); replicas_with_priority[priority].emplace_back(connection_info, pool_size); } } @@ -51,7 +57,8 @@ PoolWithFailover::PoolWithFailover( size_t pool_size, size_t pool_wait_timeout_, size_t max_tries_, - bool auto_close_connection_) + bool auto_close_connection_, + size_t connection_attempt_timeout_) : pool_wait_timeout(pool_wait_timeout_) , max_tries(max_tries_) , auto_close_connection(auto_close_connection_) @@ -63,7 +70,13 @@ PoolWithFailover::PoolWithFailover( for (const auto & [host, port] : configuration.addresses) { LOG_DEBUG(getLogger("PostgreSQLPoolWithFailover"), "Adding address host: {}, port: {} to connection pool", host, port); - auto connection_string = formatConnectionString(configuration.database, host, port, configuration.username, configuration.password); + auto connection_string = formatConnectionString( + configuration.database, + host, + port, + configuration.username, + configuration.password, + connection_attempt_timeout_); replicas_with_priority[0].emplace_back(connection_string, pool_size); } } diff --git a/src/Core/PostgreSQL/PoolWithFailover.h b/src/Core/PostgreSQL/PoolWithFailover.h index 3c538fc3dea..502a9a9b7d7 100644 --- a/src/Core/PostgreSQL/PoolWithFailover.h +++ b/src/Core/PostgreSQL/PoolWithFailover.h @@ -14,7 +14,6 @@ static constexpr inline auto POSTGRESQL_POOL_DEFAULT_SIZE = 16; static constexpr inline auto POSTGRESQL_POOL_WAIT_TIMEOUT = 5000; -static constexpr inline auto POSTGRESQL_POOL_WITH_FAILOVER_DEFAULT_MAX_TRIES = 2; namespace postgres { @@ -30,14 +29,16 @@ public: size_t pool_size, size_t pool_wait_timeout, size_t max_tries_, - bool auto_close_connection_); + bool auto_close_connection_, + size_t connection_attempt_timeout_); explicit PoolWithFailover( const DB::StoragePostgreSQL::Configuration & configuration, size_t pool_size, size_t pool_wait_timeout, size_t max_tries_, - bool auto_close_connection_); + bool auto_close_connection_, + size_t connection_attempt_timeout_); PoolWithFailover(const PoolWithFailover & other) = delete; diff --git a/src/Core/PostgreSQL/Utils.cpp b/src/Core/PostgreSQL/Utils.cpp index 810bf62fdab..9dc010c1c69 100644 --- a/src/Core/PostgreSQL/Utils.cpp +++ b/src/Core/PostgreSQL/Utils.cpp @@ -8,7 +8,7 @@ namespace postgres { -ConnectionInfo formatConnectionString(String dbname, String host, UInt16 port, String user, String password) +ConnectionInfo formatConnectionString(String dbname, String host, UInt16 port, String user, String password, UInt64 timeout) { DB::WriteBufferFromOwnString out; out << "dbname=" << DB::quote << dbname @@ -16,7 +16,7 @@ ConnectionInfo formatConnectionString(String dbname, String host, UInt16 port, S << " port=" << port << " user=" << DB::quote << user << " password=" << DB::quote << password - << " connect_timeout=2"; + << " connect_timeout=" << timeout; return {out.str(), host + ':' + DB::toString(port)}; } diff --git a/src/Core/PostgreSQL/Utils.h b/src/Core/PostgreSQL/Utils.h index f179ab14c89..f2b8f1ac084 100644 --- a/src/Core/PostgreSQL/Utils.h +++ b/src/Core/PostgreSQL/Utils.h @@ -18,7 +18,7 @@ namespace pqxx namespace postgres { -ConnectionInfo formatConnectionString(String dbname, String host, UInt16 port, String user, String password); +ConnectionInfo formatConnectionString(String dbname, String host, UInt16 port, String user, String password, UInt64 timeout); String getConnectionForLog(const String & host, UInt16 port); diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 6764076349a..cb1dbd2929e 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -561,7 +561,9 @@ class IColumn; M(UInt64, max_partition_size_to_drop, 50000000000lu, "Same as max_table_size_to_drop, but for the partitions.", 0) \ \ M(UInt64, postgresql_connection_pool_size, 16, "Connection pool size for PostgreSQL table engine and database engine.", 0) \ + M(UInt64, postgresql_connection_attempt_timeout, 2, "Connection timeout to PostgreSQL table engine and database engine in seconds.", 0) \ M(UInt64, postgresql_connection_pool_wait_timeout, 5000, "Connection pool push/pop timeout on empty pool for PostgreSQL table engine and database engine. By default it will block on empty pool.", 0) \ + M(UInt64, postgresql_connection_pool_retries, 2, "Connection pool push/pop retries number for PostgreSQL table engine and database engine.", 0) \ M(Bool, postgresql_connection_pool_auto_close_connection, false, "Close connection before returning connection to the pool.", 0) \ M(UInt64, glob_expansion_max_elements, 1000, "Maximum number of allowed addresses (For external storages, table functions, etc).", 0) \ M(UInt64, odbc_bridge_connection_pool_size, 16, "Connection pool size for each connection settings string in ODBC bridge.", 0) \ diff --git a/src/Core/SettingsChangesHistory.cpp b/src/Core/SettingsChangesHistory.cpp index 2e2853a9170..3bae67e7a43 100644 --- a/src/Core/SettingsChangesHistory.cpp +++ b/src/Core/SettingsChangesHistory.cpp @@ -69,6 +69,8 @@ static std::initializer_listgetSettingsRef().postgresql_connection_attempt_timeout); auto postgresql_replica_settings = std::make_unique(); if (engine_define->settings) diff --git a/src/Databases/PostgreSQL/DatabasePostgreSQL.cpp b/src/Databases/PostgreSQL/DatabasePostgreSQL.cpp index 136fb7fd6d2..b22356bebea 100644 --- a/src/Databases/PostgreSQL/DatabasePostgreSQL.cpp +++ b/src/Databases/PostgreSQL/DatabasePostgreSQL.cpp @@ -545,8 +545,9 @@ void registerDatabasePostgreSQL(DatabaseFactory & factory) configuration, settings.postgresql_connection_pool_size, settings.postgresql_connection_pool_wait_timeout, - POSTGRESQL_POOL_WITH_FAILOVER_DEFAULT_MAX_TRIES, - settings.postgresql_connection_pool_auto_close_connection); + settings.postgresql_connection_pool_retries, + settings.postgresql_connection_pool_auto_close_connection, + settings.postgresql_connection_attempt_timeout); return std::make_shared( args.context, diff --git a/src/Dictionaries/PostgreSQLDictionarySource.cpp b/src/Dictionaries/PostgreSQLDictionarySource.cpp index c7401386e40..b35e14577a8 100644 --- a/src/Dictionaries/PostgreSQLDictionarySource.cpp +++ b/src/Dictionaries/PostgreSQLDictionarySource.cpp @@ -205,8 +205,9 @@ void registerDictionarySourcePostgreSQL(DictionarySourceFactory & factory) configuration.replicas_configurations, settings.postgresql_connection_pool_size, settings.postgresql_connection_pool_wait_timeout, - POSTGRESQL_POOL_WITH_FAILOVER_DEFAULT_MAX_TRIES, - settings.postgresql_connection_pool_auto_close_connection); + settings.postgresql_connection_pool_retries, + settings.postgresql_connection_pool_auto_close_connection, + settings.postgresql_connection_attempt_timeout); PostgreSQLDictionarySource::Configuration dictionary_configuration { diff --git a/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp b/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp index f686fbda664..b9edff39b82 100644 --- a/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp +++ b/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp @@ -592,7 +592,8 @@ void registerStorageMaterializedPostgreSQL(StorageFactory & factory) auto configuration = StoragePostgreSQL::getConfiguration(args.engine_args, args.getContext()); auto connection_info = postgres::formatConnectionString( configuration.database, configuration.host, configuration.port, - configuration.username, configuration.password); + configuration.username, configuration.password, + args.getContext()->getSettingsRef().postgresql_connection_attempt_timeout); bool has_settings = args.storage_def->settings; auto postgresql_replication_settings = std::make_unique(); diff --git a/src/Storages/StorageExternalDistributed.cpp b/src/Storages/StorageExternalDistributed.cpp index beb93afc972..d712bd10da4 100644 --- a/src/Storages/StorageExternalDistributed.cpp +++ b/src/Storages/StorageExternalDistributed.cpp @@ -167,8 +167,9 @@ void registerStorageExternalDistributed(StorageFactory & factory) current_configuration, settings.postgresql_connection_pool_size, settings.postgresql_connection_pool_wait_timeout, - POSTGRESQL_POOL_WITH_FAILOVER_DEFAULT_MAX_TRIES, - settings.postgresql_connection_pool_auto_close_connection); + settings.postgresql_connection_pool_retries, + settings.postgresql_connection_pool_auto_close_connection, + settings.postgresql_connection_attempt_timeout); shards.insert(std::make_shared( args.table_id, std::move(pool), configuration.table, args.columns, args.constraints, String{}, context)); } diff --git a/src/Storages/StoragePostgreSQL.cpp b/src/Storages/StoragePostgreSQL.cpp index a8713c61e4d..b5a388e8159 100644 --- a/src/Storages/StoragePostgreSQL.cpp +++ b/src/Storages/StoragePostgreSQL.cpp @@ -613,8 +613,9 @@ void registerStoragePostgreSQL(StorageFactory & factory) auto pool = std::make_shared(configuration, settings.postgresql_connection_pool_size, settings.postgresql_connection_pool_wait_timeout, - POSTGRESQL_POOL_WITH_FAILOVER_DEFAULT_MAX_TRIES, - settings.postgresql_connection_pool_auto_close_connection); + settings.postgresql_connection_pool_retries, + settings.postgresql_connection_pool_auto_close_connection, + settings.postgresql_connection_attempt_timeout); return std::make_shared( args.table_id, diff --git a/src/TableFunctions/TableFunctionPostgreSQL.cpp b/src/TableFunctions/TableFunctionPostgreSQL.cpp index 8d94988cd65..508f85df6a3 100644 --- a/src/TableFunctions/TableFunctionPostgreSQL.cpp +++ b/src/TableFunctions/TableFunctionPostgreSQL.cpp @@ -80,8 +80,9 @@ void TableFunctionPostgreSQL::parseArguments(const ASTPtr & ast_function, Contex *configuration, settings.postgresql_connection_pool_size, settings.postgresql_connection_pool_wait_timeout, - POSTGRESQL_POOL_WITH_FAILOVER_DEFAULT_MAX_TRIES, - settings.postgresql_connection_pool_auto_close_connection); + settings.postgresql_connection_pool_retries, + settings.postgresql_connection_pool_auto_close_connection, + settings.postgresql_connection_attempt_timeout); } } From eeac41fa455cf1886f535259f485da17f70276f1 Mon Sep 17 00:00:00 2001 From: Dmitry Novik Date: Mon, 8 Jul 2024 16:17:22 +0200 Subject: [PATCH 164/299] Add a documentation --- docs/en/operations/settings/settings.md | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index ed0b29aa851..c3f697c3bdc 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -1358,12 +1358,25 @@ Connection pool size for PostgreSQL table engine and database engine. Default value: 16 +## postgresql_connection_attempt_timeout {#postgresql-connection-attempt-timeout} + +Connection timeout in seconds of a single attempt to connect PostgreSQL end-point. +The value is passed as a `connect_timeout` parameter of the connection URL. + +Default value: `2`. + ## postgresql_connection_pool_wait_timeout {#postgresql-connection-pool-wait-timeout} Connection pool push/pop timeout on empty pool for PostgreSQL table engine and database engine. By default it will block on empty pool. Default value: 5000 +## postgresql_connection_pool_retries {#postgresql-connection-pool-retries} + +The maximum number of retries to establish a connection with the PostgreSQL end-point. + +Default value: `2`. + ## postgresql_connection_pool_auto_close_connection {#postgresql-connection-pool-auto-close-connection} Close connection before returning connection to the pool. From 0174a43d17b61cbb0d57eeeea5068eac463b31e3 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Tue, 9 Jul 2024 11:54:21 +0000 Subject: [PATCH 165/299] Fixing Not-ready set in constraint. --- .../Transforms/CheckConstraintsTransform.cpp | 6 ++++++ .../Transforms/CheckConstraintsTransform.h | 1 + .../System/StorageSystemDetachedParts.cpp | 2 +- .../System/StorageSystemPartsBase.cpp | 4 ++-- src/Storages/VirtualColumnUtils.cpp | 6 +++--- src/Storages/VirtualColumnUtils.h | 2 +- .../02841_not_ready_set_constraints.reference | 1 + .../02841_not_ready_set_constraints.sql | 19 +++++++++++++++++++ 8 files changed, 34 insertions(+), 7 deletions(-) create mode 100644 tests/queries/0_stateless/02841_not_ready_set_constraints.reference create mode 100644 tests/queries/0_stateless/02841_not_ready_set_constraints.sql diff --git a/src/Processors/Transforms/CheckConstraintsTransform.cpp b/src/Processors/Transforms/CheckConstraintsTransform.cpp index e43aa6028da..cdae8c23a3e 100644 --- a/src/Processors/Transforms/CheckConstraintsTransform.cpp +++ b/src/Processors/Transforms/CheckConstraintsTransform.cpp @@ -10,6 +10,7 @@ #include #include #include +#include namespace DB @@ -31,6 +32,7 @@ CheckConstraintsTransform::CheckConstraintsTransform( , table_id(table_id_) , constraints_to_check(constraints_.filterConstraints(ConstraintsDescription::ConstraintType::CHECK)) , expressions(constraints_.getExpressions(context_, header.getNamesAndTypesList())) + , context(std::move(context_)) { } @@ -39,6 +41,10 @@ void CheckConstraintsTransform::onConsume(Chunk chunk) { if (chunk.getNumRows() > 0) { + if (rows_written == 0) + for (const auto & expression : expressions) + VirtualColumnUtils::buildSetsForDAG(expression->getActionsDAG(), context); + Block block_to_calculate = getInputPort().getHeader().cloneWithColumns(chunk.getColumns()); for (size_t i = 0; i < expressions.size(); ++i) { diff --git a/src/Processors/Transforms/CheckConstraintsTransform.h b/src/Processors/Transforms/CheckConstraintsTransform.h index 09833ff396b..f92d0ab855e 100644 --- a/src/Processors/Transforms/CheckConstraintsTransform.h +++ b/src/Processors/Transforms/CheckConstraintsTransform.h @@ -35,6 +35,7 @@ private: StorageID table_id; const ASTs constraints_to_check; const ConstraintsExpressions expressions; + ContextPtr context; size_t rows_written = 0; Chunk cur_chunk; }; diff --git a/src/Storages/System/StorageSystemDetachedParts.cpp b/src/Storages/System/StorageSystemDetachedParts.cpp index f48a8c67971..fbc99ab865e 100644 --- a/src/Storages/System/StorageSystemDetachedParts.cpp +++ b/src/Storages/System/StorageSystemDetachedParts.cpp @@ -328,7 +328,7 @@ void ReadFromSystemDetachedParts::applyFilters(ActionDAGNodes added_filter_nodes filter = VirtualColumnUtils::splitFilterDagForAllowedInputs(predicate, &block); if (filter) - VirtualColumnUtils::buildSetsForDAG(filter, context); + VirtualColumnUtils::buildSetsForDAG(*filter, context); } } diff --git a/src/Storages/System/StorageSystemPartsBase.cpp b/src/Storages/System/StorageSystemPartsBase.cpp index 175c0834bcb..f7d1c1b3eb8 100644 --- a/src/Storages/System/StorageSystemPartsBase.cpp +++ b/src/Storages/System/StorageSystemPartsBase.cpp @@ -274,7 +274,7 @@ void ReadFromSystemPartsBase::applyFilters(ActionDAGNodes added_filter_nodes) filter_by_database = VirtualColumnUtils::splitFilterDagForAllowedInputs(predicate, &block); if (filter_by_database) - VirtualColumnUtils::buildSetsForDAG(filter_by_database, context); + VirtualColumnUtils::buildSetsForDAG(*filter_by_database, context); block.insert(ColumnWithTypeAndName({}, std::make_shared(), table_column_name)); block.insert(ColumnWithTypeAndName({}, std::make_shared(), engine_column_name)); @@ -283,7 +283,7 @@ void ReadFromSystemPartsBase::applyFilters(ActionDAGNodes added_filter_nodes) filter_by_other_columns = VirtualColumnUtils::splitFilterDagForAllowedInputs(predicate, &block); if (filter_by_other_columns) - VirtualColumnUtils::buildSetsForDAG(filter_by_other_columns, context); + VirtualColumnUtils::buildSetsForDAG(*filter_by_other_columns, context); } } diff --git a/src/Storages/VirtualColumnUtils.cpp b/src/Storages/VirtualColumnUtils.cpp index 778c9e13adb..27c52124e9c 100644 --- a/src/Storages/VirtualColumnUtils.cpp +++ b/src/Storages/VirtualColumnUtils.cpp @@ -54,9 +54,9 @@ namespace DB namespace VirtualColumnUtils { -void buildSetsForDAG(const ActionsDAGPtr & dag, const ContextPtr & context) +void buildSetsForDAG(const ActionsDAG & dag, const ContextPtr & context) { - for (const auto & node : dag->getNodes()) + for (const auto & node : dag.getNodes()) { if (node.type == ActionsDAG::ActionType::COLUMN) { @@ -79,7 +79,7 @@ void buildSetsForDAG(const ActionsDAGPtr & dag, const ContextPtr & context) void filterBlockWithDAG(ActionsDAGPtr dag, Block & block, ContextPtr context) { - buildSetsForDAG(dag, context); + buildSetsForDAG(*dag, context); auto actions = std::make_shared(dag); Block block_with_filter = block; actions->execute(block_with_filter, /*dry_run=*/ false, /*allow_duplicates_in_input=*/ true); diff --git a/src/Storages/VirtualColumnUtils.h b/src/Storages/VirtualColumnUtils.h index fbfbdd6c6cc..9045a2f5481 100644 --- a/src/Storages/VirtualColumnUtils.h +++ b/src/Storages/VirtualColumnUtils.h @@ -26,7 +26,7 @@ void filterBlockWithPredicate(const ActionsDAG::Node * predicate, Block & block, void filterBlockWithDAG(ActionsDAGPtr dag, Block & block, ContextPtr context); /// Builds sets used by ActionsDAG inplace. -void buildSetsForDAG(const ActionsDAGPtr & dag, const ContextPtr & context); +void buildSetsForDAG(const ActionsDAG & dag, const ContextPtr & context); /// Recursively checks if all functions used in DAG are deterministic in scope of query. bool isDeterministicInScopeOfQuery(const ActionsDAG::Node * node); diff --git a/tests/queries/0_stateless/02841_not_ready_set_constraints.reference b/tests/queries/0_stateless/02841_not_ready_set_constraints.reference new file mode 100644 index 00000000000..d81cc0710eb --- /dev/null +++ b/tests/queries/0_stateless/02841_not_ready_set_constraints.reference @@ -0,0 +1 @@ +42 diff --git a/tests/queries/0_stateless/02841_not_ready_set_constraints.sql b/tests/queries/0_stateless/02841_not_ready_set_constraints.sql new file mode 100644 index 00000000000..ecdf4d50635 --- /dev/null +++ b/tests/queries/0_stateless/02841_not_ready_set_constraints.sql @@ -0,0 +1,19 @@ +DROP TABLE IF EXISTS t1; +DROP TABLE IF EXISTS t2; + +CREATE TABLE t1 ( + `id` UInt64 +) +ENGINE = MergeTree ORDER BY id; + +INSERT INTO t1(id) VALUES (42); + +CREATE TABLE t2 ( + `conversation` UInt64, + CONSTRAINT constraint_conversation CHECK conversation IN (SELECT id FROM t1) +) +ENGINE = MergeTree ORDER BY conversation; + +INSERT INTO t2(conversation) VALUES (42); + +select * from t2; From 6baa52d10176369fefc6249dbb256a26eb7b1bdc Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Tue, 9 Jul 2024 14:01:28 +0200 Subject: [PATCH 166/299] Fix null insertion into dynamic column --- src/Formats/JSONExtractTree.cpp | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/Formats/JSONExtractTree.cpp b/src/Formats/JSONExtractTree.cpp index 8fe472930d3..9efb1392583 100644 --- a/src/Formats/JSONExtractTree.cpp +++ b/src/Formats/JSONExtractTree.cpp @@ -1265,9 +1265,16 @@ public: bool insertResultToColumn(IColumn & column, const typename JSONParser::Element & element, const JSONExtractInsertSettings & insert_settings, const FormatSettings & format_settings, String & error) const override { auto & column_dynamic = assert_cast(column); + /// First, check if element is NULL. + if (element.isNull()) + { + column_dynamic.insertDefault(); + return true; + } + auto & variant_column = column_dynamic.getVariantColumn(); auto variant_info = column_dynamic.getVariantInfo(); - /// First, infer ClickHouse type for this element and add it as a new variant. + /// Second, infer ClickHouse type for this element and add it as a new variant. auto element_type = elementToDataType(element, format_settings); if (column_dynamic.addNewVariant(element_type)) { From 61f827b5698754af84e7d75a2b83bd0191139820 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A1nos=20Benjamin=20Antal?= Date: Tue, 9 Jul 2024 15:26:33 +0200 Subject: [PATCH 167/299] Update src/Databases/DatabaseAtomic.cpp --- src/Databases/DatabaseAtomic.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Databases/DatabaseAtomic.cpp b/src/Databases/DatabaseAtomic.cpp index a48eb2abce6..5b816e4f282 100644 --- a/src/Databases/DatabaseAtomic.cpp +++ b/src/Databases/DatabaseAtomic.cpp @@ -106,8 +106,8 @@ void DatabaseAtomic::attachTable(ContextPtr /* context_ */, const String & name, StoragePtr DatabaseAtomic::detachTable(ContextPtr /* context */, const String & name) { - // it is important to call destructures not_in_use without - // blocking mutex for avoid potential deadlock. + // it is important to call the destructors of not_in_use without + // locked mutex to avoid potential deadlock. DetachedTables not_in_use; StoragePtr table; { From 830653b1f6fc0fdac9f0a143b99cdba94009c1aa Mon Sep 17 00:00:00 2001 From: Max K Date: Tue, 9 Jul 2024 15:40:44 +0200 Subject: [PATCH 168/299] CI: do not finalize CI running status unless all success --- .github/workflows/pull_request.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml index 4764e6d3c1a..259e6d41110 100644 --- a/.github/workflows/pull_request.yml +++ b/.github/workflows/pull_request.yml @@ -172,7 +172,7 @@ jobs: ################################# Stage Final ################################# # FinishCheck: - if: ${{ !cancelled() }} + if: ${{ !failure() }} needs: [RunConfig, BuildDockers, StyleCheck, FastTest, Builds_1, Builds_2, Builds_Report, Tests_1, Tests_2, Tests_3] runs-on: [self-hosted, style-checker-aarch64] steps: From 61e50a346aecd33eed510580f2ab52eb2f816dc1 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Fri, 5 Jul 2024 20:23:45 +0200 Subject: [PATCH 169/299] Use peak_threads_usage instead of arrayUniq(thread_ids) in tests Should fix flakiness like [1]. [1]: https://s3.amazonaws.com/clickhouse-test-reports/66098/93afc8e6133365007488c4d8340f434f6e8a876f/stateless_tests__aarch64_.html Signed-off-by: Azat Khuzhin --- .../0_stateless/01275_parallel_mv.reference | 16 ++++++++-------- .../queries/0_stateless/01275_parallel_mv.sql.j2 | 2 +- .../0_stateless/01323_too_many_threads_bug.sql | 4 ++-- .../02350_views_max_insert_threads.sql | 2 +- 4 files changed, 12 insertions(+), 12 deletions(-) diff --git a/tests/queries/0_stateless/01275_parallel_mv.reference b/tests/queries/0_stateless/01275_parallel_mv.reference index a9801e3b910..f5f31c4a563 100644 --- a/tests/queries/0_stateless/01275_parallel_mv.reference +++ b/tests/queries/0_stateless/01275_parallel_mv.reference @@ -10,7 +10,7 @@ insert into testX select number from numbers(10) settings optimize_trivial_insert_select=0, max_insert_threads=0; -- { serverError FUNCTION_THROW_IF_VALUE_IS_NON_ZERO } system flush logs; -select arrayUniq(thread_ids) from system.query_log where +select peak_threads_usage from system.query_log where current_database = currentDatabase() and type != 'QueryStart' and query like '%insert into testX %' and @@ -34,7 +34,7 @@ insert into testX select number from numbers(10) settings optimize_trivial_insert_select=0, max_insert_threads=16; -- { serverError FUNCTION_THROW_IF_VALUE_IS_NON_ZERO } system flush logs; -select arrayUniq(thread_ids) from system.query_log where +select peak_threads_usage from system.query_log where current_database = currentDatabase() and type != 'QueryStart' and query like '%insert into testX %' and @@ -58,7 +58,7 @@ insert into testX select number from numbers(10) settings optimize_trivial_insert_select=1, max_insert_threads=0; -- { serverError FUNCTION_THROW_IF_VALUE_IS_NON_ZERO } system flush logs; -select arrayUniq(thread_ids) from system.query_log where +select peak_threads_usage from system.query_log where current_database = currentDatabase() and type != 'QueryStart' and query like '%insert into testX %' and @@ -82,7 +82,7 @@ insert into testX select number from numbers(10) settings optimize_trivial_insert_select=1, max_insert_threads=16; -- { serverError FUNCTION_THROW_IF_VALUE_IS_NON_ZERO } system flush logs; -select arrayUniq(thread_ids) from system.query_log where +select peak_threads_usage from system.query_log where current_database = currentDatabase() and type != 'QueryStart' and query like '%insert into testX %' and @@ -106,7 +106,7 @@ insert into testX select number from numbers(10) settings optimize_trivial_insert_select=0, max_insert_threads=0; -- { serverError FUNCTION_THROW_IF_VALUE_IS_NON_ZERO } system flush logs; -select arrayUniq(thread_ids) from system.query_log where +select peak_threads_usage from system.query_log where current_database = currentDatabase() and type != 'QueryStart' and query like '%insert into testX %' and @@ -130,7 +130,7 @@ insert into testX select number from numbers(10) settings optimize_trivial_insert_select=0, max_insert_threads=16; -- { serverError FUNCTION_THROW_IF_VALUE_IS_NON_ZERO } system flush logs; -select arrayUniq(thread_ids) from system.query_log where +select peak_threads_usage from system.query_log where current_database = currentDatabase() and type != 'QueryStart' and query like '%insert into testX %' and @@ -154,7 +154,7 @@ insert into testX select number from numbers(10) settings optimize_trivial_insert_select=1, max_insert_threads=0; -- { serverError FUNCTION_THROW_IF_VALUE_IS_NON_ZERO } system flush logs; -select arrayUniq(thread_ids) from system.query_log where +select peak_threads_usage from system.query_log where current_database = currentDatabase() and type != 'QueryStart' and query like '%insert into testX %' and @@ -178,7 +178,7 @@ insert into testX select number from numbers(10) settings optimize_trivial_insert_select=1, max_insert_threads=16; -- { serverError FUNCTION_THROW_IF_VALUE_IS_NON_ZERO } system flush logs; -select arrayUniq(thread_ids) from system.query_log where +select peak_threads_usage from system.query_log where current_database = currentDatabase() and type != 'QueryStart' and query like '%insert into testX %' and diff --git a/tests/queries/0_stateless/01275_parallel_mv.sql.j2 b/tests/queries/0_stateless/01275_parallel_mv.sql.j2 index 9d74474c1a4..5918035e9c3 100644 --- a/tests/queries/0_stateless/01275_parallel_mv.sql.j2 +++ b/tests/queries/0_stateless/01275_parallel_mv.sql.j2 @@ -28,7 +28,7 @@ insert into testX select number from numbers(10) settings optimize_trivial_insert_select={{ optimize_trivial_insert_select }}, max_insert_threads={{ max_insert_threads }}; -- { serverError FUNCTION_THROW_IF_VALUE_IS_NON_ZERO } system flush logs; -select arrayUniq(thread_ids) from system.query_log where +select peak_threads_usage from system.query_log where current_database = currentDatabase() and type != 'QueryStart' and query like '%insert into testX %' and diff --git a/tests/queries/0_stateless/01323_too_many_threads_bug.sql b/tests/queries/0_stateless/01323_too_many_threads_bug.sql index c377e2c7570..5bf282808c3 100644 --- a/tests/queries/0_stateless/01323_too_many_threads_bug.sql +++ b/tests/queries/0_stateless/01323_too_many_threads_bug.sql @@ -14,11 +14,11 @@ set log_queries = 1; select x from table_01323_many_parts limit 10 format Null; system flush logs; -select arrayUniq(thread_ids) <= 4 from system.query_log where current_database = currentDatabase() AND event_date >= today() - 1 and query ilike '%select x from table_01323_many_parts%' and query not like '%system.query_log%' and type = 'QueryFinish' order by query_start_time desc limit 1; +select peak_threads_usage <= 4 from system.query_log where current_database = currentDatabase() AND event_date >= today() - 1 and query ilike '%select x from table_01323_many_parts%' and query not like '%system.query_log%' and type = 'QueryFinish' order by query_start_time desc limit 1; select x from table_01323_many_parts order by x limit 10 format Null; system flush logs; -select arrayUniq(thread_ids) <= 36 from system.query_log where current_database = currentDatabase() AND event_date >= today() - 1 and query ilike '%select x from table_01323_many_parts order by x%' and query not like '%system.query_log%' and type = 'QueryFinish' order by query_start_time desc limit 1; +select peak_threads_usage <= 36 from system.query_log where current_database = currentDatabase() AND event_date >= today() - 1 and query ilike '%select x from table_01323_many_parts order by x%' and query not like '%system.query_log%' and type = 'QueryFinish' order by query_start_time desc limit 1; drop table if exists table_01323_many_parts; diff --git a/tests/queries/0_stateless/02350_views_max_insert_threads.sql b/tests/queries/0_stateless/02350_views_max_insert_threads.sql index 25e0fdeadba..a4f7e2546ed 100644 --- a/tests/queries/0_stateless/02350_views_max_insert_threads.sql +++ b/tests/queries/0_stateless/02350_views_max_insert_threads.sql @@ -10,7 +10,7 @@ create materialized view t_mv Engine = Null AS select now() as ts, max(a) from t insert into t select * from numbers_mt(10e6) settings max_threads = 16, max_insert_threads=16, max_block_size=100000; system flush logs; -select arrayUniq(thread_ids)>=16 from system.query_log where +select peak_threads_usage>=16 from system.query_log where event_date >= yesterday() and current_database = currentDatabase() and type = 'QueryFinish' and From 8a202d91ad745089adaff4ebf2cde5e6754503ce Mon Sep 17 00:00:00 2001 From: kssenii Date: Tue, 9 Jul 2024 16:24:35 +0200 Subject: [PATCH 170/299] Properly read schema and partition columns from checkpoint file --- .../DataLakes/DeltaLakeMetadata.cpp | 169 ++++++++++++------ .../DataLakes/IStorageDataLake.h | 10 +- .../StorageObjectStorageSource.cpp | 28 +-- tests/integration/test_storage_delta/test.py | 129 +++++++++++-- 4 files changed, 255 insertions(+), 81 deletions(-) diff --git a/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.cpp b/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.cpp index 12341c877e2..d37bffc42c4 100644 --- a/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.cpp +++ b/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.cpp @@ -8,6 +8,7 @@ #include #include #include +#include #include #include @@ -30,6 +31,7 @@ #include #include #include +#include #include #include @@ -111,7 +113,7 @@ struct DeltaLakeMetadataImpl std::set result_files; NamesAndTypesList current_schema; DataLakePartitionColumns current_partition_columns; - const auto checkpoint_version = getCheckpointIfExists(result_files); + const auto checkpoint_version = getCheckpointIfExists(result_files, current_schema, current_partition_columns); if (checkpoint_version) { @@ -205,9 +207,9 @@ struct DeltaLakeMetadataImpl Poco::Dynamic::Var json = parser.parse(json_str); Poco::JSON::Object::Ptr object = json.extract(); - // std::ostringstream oss; // STYLE_CHECK_ALLOW_STD_STRING_STREAM - // object->stringify(oss); - // LOG_TEST(log, "Metadata: {}", oss.str()); + std::ostringstream oss; // STYLE_CHECK_ALLOW_STD_STRING_STREAM + object->stringify(oss); + LOG_TEST(log, "Metadata: {}", oss.str()); if (object->has("metaData")) { @@ -216,30 +218,9 @@ struct DeltaLakeMetadataImpl Poco::JSON::Parser p; Poco::Dynamic::Var fields_json = parser.parse(schema_object); - Poco::JSON::Object::Ptr fields_object = fields_json.extract(); - - const auto fields = fields_object->get("fields").extract(); - NamesAndTypesList current_schema; - for (size_t i = 0; i < fields->size(); ++i) - { - const auto field = fields->getObject(static_cast(i)); - auto column_name = field->getValue("name"); - auto type = field->getValue("type"); - auto is_nullable = field->getValue("nullable"); - - std::string physical_name; - auto schema_metadata_object = field->get("metadata").extract(); - if (schema_metadata_object->has("delta.columnMapping.physicalName")) - physical_name = schema_metadata_object->getValue("delta.columnMapping.physicalName"); - else - physical_name = column_name; - - LOG_TEST(log, "Found column: {}, type: {}, nullable: {}, physical name: {}", - column_name, type, is_nullable, physical_name); - - current_schema.push_back({physical_name, getFieldType(field, "type", is_nullable)}); - } + const Poco::JSON::Object::Ptr & fields_object = fields_json.extract(); + auto current_schema = parseMetadata(fields_object); if (file_schema.empty()) { file_schema = current_schema; @@ -274,7 +255,12 @@ struct DeltaLakeMetadataImpl const auto value = partition_values->getValue(partition_name); auto name_and_type = file_schema.tryGetByName(partition_name); if (!name_and_type) - throw Exception(ErrorCodes::LOGICAL_ERROR, "No such column in schema: {}", partition_name); + { + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "No such column in schema: {} (schema: {})", + partition_name, file_schema.toNamesAndTypesDescription()); + } auto field = getFieldValue(value, name_and_type->type); current_partition_columns.emplace_back(*name_and_type, field); @@ -293,6 +279,32 @@ struct DeltaLakeMetadataImpl } } + NamesAndTypesList parseMetadata(const Poco::JSON::Object::Ptr & metadata_json) + { + NamesAndTypesList schema; + const auto fields = metadata_json->get("fields").extract(); + for (size_t i = 0; i < fields->size(); ++i) + { + const auto field = fields->getObject(static_cast(i)); + auto column_name = field->getValue("name"); + auto type = field->getValue("type"); + auto is_nullable = field->getValue("nullable"); + + std::string physical_name; + auto schema_metadata_object = field->get("metadata").extract(); + if (schema_metadata_object->has("delta.columnMapping.physicalName")) + physical_name = schema_metadata_object->getValue("delta.columnMapping.physicalName"); + else + physical_name = column_name; + + LOG_TEST(log, "Found column: {}, type: {}, nullable: {}, physical name: {}", + column_name, type, is_nullable, physical_name); + + schema.push_back({physical_name, getFieldType(field, "type", is_nullable)}); + } + return schema; + } + DataTypePtr getFieldType(const Poco::JSON::Object::Ptr & field, const String & type_key, bool is_nullable) { if (field->isObject(type_key)) @@ -506,7 +518,10 @@ struct DeltaLakeMetadataImpl throw Exception(ErrorCodes::BAD_ARGUMENTS, "Arrow error: {}", _s.ToString()); \ } while (false) - size_t getCheckpointIfExists(std::set & result) + size_t getCheckpointIfExists( + std::set & result, + NamesAndTypesList & file_schema, + DataLakePartitionColumns & file_partition_columns) { const auto version = readLastCheckpointIfExists(); if (!version) @@ -527,7 +542,8 @@ struct DeltaLakeMetadataImpl auto columns = ParquetSchemaReader(*buf, format_settings).readSchema(); /// Read only columns that we need. - columns.filterColumns(NameSet{"add", "remove"}); + auto filter_column_names = NameSet{"add", "metaData"}; + columns.filterColumns(filter_column_names); Block header; for (const auto & column : columns) header.insert({column.type->createColumn(), column.type, column.name}); @@ -541,9 +557,6 @@ struct DeltaLakeMetadataImpl ArrowMemoryPool::instance(), &reader)); - std::shared_ptr file_schema; - THROW_ARROW_NOT_OK(reader->GetSchema(&file_schema)); - ArrowColumnToCHColumn column_reader( header, "Parquet", format_settings.parquet.allow_missing_columns, @@ -554,29 +567,85 @@ struct DeltaLakeMetadataImpl std::shared_ptr table; THROW_ARROW_NOT_OK(reader->ReadTable(&table)); - Chunk res = column_reader.arrowTableToCHChunk(table, reader->parquet_reader()->metadata()->num_rows()); - const auto & res_columns = res.getColumns(); + Chunk chunk = column_reader.arrowTableToCHChunk(table, reader->parquet_reader()->metadata()->num_rows()); + auto res_block = header.cloneWithColumns(chunk.detachColumns()); + res_block = Nested::flatten(res_block); - if (res_columns.size() != 2) - { - throw Exception( - ErrorCodes::INCORRECT_DATA, - "Unexpected number of columns: {} (having: {}, expected: {})", - res_columns.size(), res.dumpStructure(), header.dumpStructure()); - } + const auto * nullable_path_column = assert_cast(res_block.getByName("add.path").column.get()); + const auto & path_column = assert_cast(nullable_path_column->getNestedColumn()); + + const auto * nullable_schema_column = assert_cast(res_block.getByName("metaData.schemaString").column.get()); + const auto & schema_column = assert_cast(nullable_schema_column->getNestedColumn()); + + auto partition_values_column_raw = res_block.getByName("add.partitionValues").column; + const auto & partition_values_column = assert_cast(*partition_values_column_raw); - const auto * tuple_column = assert_cast(res_columns[0].get()); - const auto & nullable_column = assert_cast(tuple_column->getColumn(0)); - const auto & path_column = assert_cast(nullable_column.getNestedColumn()); for (size_t i = 0; i < path_column.size(); ++i) { - const auto filename = String(path_column.getDataAt(i)); - if (filename.empty()) + const auto metadata = String(schema_column.getDataAt(i)); + if (!metadata.empty()) + { + Poco::JSON::Parser parser; + Poco::Dynamic::Var json = parser.parse(metadata); + const Poco::JSON::Object::Ptr & object = json.extract(); + + auto current_schema = parseMetadata(object); + if (file_schema.empty()) + { + file_schema = current_schema; + LOG_TEST(log, "Processed schema from checkpoint: {}", file_schema.toString()); + } + else if (file_schema != current_schema) + { + throw Exception(ErrorCodes::NOT_IMPLEMENTED, + "Reading from files with different schema is not possible " + "({} is different from {})", + file_schema.toString(), current_schema.toString()); + } + } + } + + for (size_t i = 0; i < path_column.size(); ++i) + { + const auto path = String(path_column.getDataAt(i)); + if (path.empty()) continue; - LOG_TEST(log, "Adding {}", filename); - const auto [_, inserted] = result.insert(std::filesystem::path(configuration->getPath()) / filename); + + auto filename = fs::path(path).filename().string(); + auto it = file_partition_columns.find(filename); + if (it == file_partition_columns.end()) + { + Field map; + partition_values_column.get(i, map); + auto partition_values_map = map.safeGet(); + if (!partition_values_map.empty()) + { + auto & current_partition_columns = file_partition_columns[filename]; + for (const auto & map_value : partition_values_map) + { + const auto tuple = map_value.safeGet(); + const auto partition_name = tuple[0].safeGet(); + auto name_and_type = file_schema.tryGetByName(partition_name); + if (!name_and_type) + { + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "No such column in schema: {} (schema: {})", + partition_name, file_schema.toString()); + } + const auto value = tuple[1].safeGet(); + auto field = getFieldValue(value, name_and_type->type); + current_partition_columns.emplace_back(*name_and_type, field); + + LOG_TEST(log, "Partition {} value is {} (for {})", partition_name, value, filename); + } + } + } + + LOG_TEST(log, "Adding {}", path); + const auto [_, inserted] = result.insert(std::filesystem::path(configuration->getPath()) / path); if (!inserted) - throw Exception(ErrorCodes::INCORRECT_DATA, "File already exists {}", filename); + throw Exception(ErrorCodes::INCORRECT_DATA, "File already exists {}", path); } return version; diff --git a/src/Storages/ObjectStorage/DataLakes/IStorageDataLake.h b/src/Storages/ObjectStorage/DataLakes/IStorageDataLake.h index f1217bc9729..d6935c706d9 100644 --- a/src/Storages/ObjectStorage/DataLakes/IStorageDataLake.h +++ b/src/Storages/ObjectStorage/DataLakes/IStorageDataLake.h @@ -17,6 +17,10 @@ namespace DB { +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} /// Storage for read-only integration with Apache Iceberg tables in Amazon S3 (see https://iceberg.apache.org/) /// Right now it's implemented on top of StorageS3 and right now it doesn't support @@ -41,6 +45,7 @@ public: auto object_storage = base_configuration->createObjectStorage(context, /* is_readonly */true); DataLakeMetadataPtr metadata; NamesAndTypesList schema_from_metadata; + const bool use_schema_from_metadata = columns_.empty(); if (base_configuration->format == "auto") base_configuration->format = "Parquet"; @@ -50,8 +55,9 @@ public: try { metadata = DataLakeMetadata::create(object_storage, base_configuration, context); - schema_from_metadata = metadata->getTableSchema(); configuration->setPaths(metadata->getDataFiles()); + if (use_schema_from_metadata) + schema_from_metadata = metadata->getTableSchema(); } catch (...) { @@ -66,7 +72,7 @@ public: return std::make_shared>( base_configuration, std::move(metadata), configuration, object_storage, context, table_id_, - columns_.empty() ? ColumnsDescription(schema_from_metadata) : columns_, + use_schema_from_metadata ? ColumnsDescription(schema_from_metadata) : columns_, constraints_, comment_, format_settings_); } diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp index 6940f10cb91..a9a7e062076 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp +++ b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp @@ -206,23 +206,25 @@ Chunk StorageObjectStorageSource::generate() if (!partition_columns.empty() && chunk_size && chunk.hasColumns()) { auto partition_values = partition_columns.find(filename); - - for (const auto & [name_and_type, value] : partition_values->second) + if (partition_values != partition_columns.end()) { - if (!read_from_format_info.source_header.has(name_and_type.name)) - continue; + for (const auto & [name_and_type, value] : partition_values->second) + { + if (!read_from_format_info.source_header.has(name_and_type.name)) + continue; - const auto column_pos = read_from_format_info.source_header.getPositionByName(name_and_type.name); - auto partition_column = name_and_type.type->createColumnConst(chunk.getNumRows(), value)->convertToFullColumnIfConst(); + const auto column_pos = read_from_format_info.source_header.getPositionByName(name_and_type.name); + auto partition_column = name_and_type.type->createColumnConst(chunk.getNumRows(), value)->convertToFullColumnIfConst(); - /// This column is filled with default value now, remove it. - chunk.erase(column_pos); + /// This column is filled with default value now, remove it. + chunk.erase(column_pos); - /// Add correct values. - if (chunk.hasColumns()) - chunk.addColumn(column_pos, std::move(partition_column)); - else - chunk.addColumn(std::move(partition_column)); + /// Add correct values. + if (column_pos < chunk.getNumColumns()) + chunk.addColumn(column_pos, std::move(partition_column)); + else + chunk.addColumn(std::move(partition_column)); + } } } return chunk; diff --git a/tests/integration/test_storage_delta/test.py b/tests/integration/test_storage_delta/test.py index 4cb71895881..d3dd7cfe52a 100644 --- a/tests/integration/test_storage_delta/test.py +++ b/tests/integration/test_storage_delta/test.py @@ -596,19 +596,116 @@ def test_partition_columns(started_cluster): ) assert result == 1 - # instance.query( - # f""" - # DROP TABLE IF EXISTS {TABLE_NAME}; - # CREATE TABLE {TABLE_NAME} (a Int32, b String, c DateTime) - # ENGINE=DeltaLake('http://{started_cluster.minio_ip}:{started_cluster.minio_port}/{bucket}/{result_file}/', 'minio', 'minio123')""" - # ) - # assert ( - # int( - # instance.query( - # f"SELECT count() FROM {TABLE_NAME} WHERE c != toDateTime('2000/01/05')" - # ) - # ) - # == num_rows - 1 - # ) - # instance.query(f"SELECT a, b, c, FROM {TABLE_NAME}") - # assert False + instance.query( + f""" + DROP TABLE IF EXISTS {TABLE_NAME}; + CREATE TABLE {TABLE_NAME} (a Nullable(Int32), b Nullable(String), c Nullable(Date32), d Nullable(Int32), e Nullable(Bool)) + ENGINE=DeltaLake('http://{started_cluster.minio_ip}:{started_cluster.minio_port}/{bucket}/{result_file}/', 'minio', 'minio123')""" + ) + assert ( + """1 test1 2000-01-01 1 false +2 test2 2000-01-02 2 false +3 test3 2000-01-03 3 false +4 test4 2000-01-04 4 false +5 test5 2000-01-05 5 false +6 test6 2000-01-06 6 false +7 test7 2000-01-07 7 false +8 test8 2000-01-08 8 false +9 test9 2000-01-09 9 false""" + == instance.query(f"SELECT * FROM {TABLE_NAME} ORDER BY b").strip() + ) + + assert ( + int( + instance.query( + f"SELECT count() FROM {TABLE_NAME} WHERE c == toDateTime('2000/01/05')" + ) + ) + == 1 + ) + + # Subset of columns should work. + instance.query( + f""" + DROP TABLE IF EXISTS {TABLE_NAME}; + CREATE TABLE {TABLE_NAME} (b Nullable(String), c Nullable(Date32), d Nullable(Int32)) + ENGINE=DeltaLake('http://{started_cluster.minio_ip}:{started_cluster.minio_port}/{bucket}/{result_file}/', 'minio', 'minio123')""" + ) + assert ( + """test1 2000-01-01 1 +test2 2000-01-02 2 +test3 2000-01-03 3 +test4 2000-01-04 4 +test5 2000-01-05 5 +test6 2000-01-06 6 +test7 2000-01-07 7 +test8 2000-01-08 8 +test9 2000-01-09 9""" + == instance.query(f"SELECT * FROM {TABLE_NAME} ORDER BY b").strip() + ) + + for i in range(num_rows + 1, 2 * num_rows + 1): + data = [ + ( + i, + "test" + str(i), + datetime.strptime(f"2000-01-{i}", "%Y-%m-%d"), + i, + False, + ) + ] + df = spark.createDataFrame(data=data, schema=schema) + df.printSchema() + df.write.mode("append").format("delta").partitionBy(partition_columns).save( + f"/{TABLE_NAME}" + ) + + files = upload_directory(minio_client, bucket, f"/{TABLE_NAME}", "") + ok = False + for file in files: + if file.endswith("last_checkpoint"): + ok = True + assert ok + + result = int( + instance.query( + f"""SELECT count() + FROM deltaLake('http://{started_cluster.minio_ip}:{started_cluster.minio_port}/{bucket}/{result_file}/', 'minio', 'minio123') + """ + ) + ) + assert result == num_rows * 2 + + assert ( + """1 test1 2000-01-01 1 false +2 test2 2000-01-02 2 false +3 test3 2000-01-03 3 false +4 test4 2000-01-04 4 false +5 test5 2000-01-05 5 false +6 test6 2000-01-06 6 false +7 test7 2000-01-07 7 false +8 test8 2000-01-08 8 false +9 test9 2000-01-09 9 false +10 test10 2000-01-10 10 false +11 test11 2000-01-11 11 false +12 test12 2000-01-12 12 false +13 test13 2000-01-13 13 false +14 test14 2000-01-14 14 false +15 test15 2000-01-15 15 false +16 test16 2000-01-16 16 false +17 test17 2000-01-17 17 false +18 test18 2000-01-18 18 false""" + == instance.query( + f""" +SELECT * FROM deltaLake('http://{started_cluster.minio_ip}:{started_cluster.minio_port}/{bucket}/{result_file}/', 'minio', 'minio123') ORDER BY c + """ + ).strip() + ) + assert ( + int( + instance.query( + f"SELECT count() FROM {TABLE_NAME} WHERE c == toDateTime('2000/01/15')" + ) + ) + == 1 + ) From 0821d72f1cef93e437f5b38477f2a2d9d9611f8e Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Tue, 9 Jul 2024 17:06:37 +0200 Subject: [PATCH 171/299] Collect core dumps in more tests --- docker/test/stateful/run.sh | 5 +++++ docker/test/stateless/run.sh | 2 ++ docker/test/stateless/stress_tests.lib | 11 ----------- docker/test/stateless/utils.lib | 11 +++++++++++ docker/test/stress/run.sh | 3 +++ 5 files changed, 21 insertions(+), 11 deletions(-) diff --git a/docker/test/stateful/run.sh b/docker/test/stateful/run.sh index 2215ac2b37c..80e5e81a4b1 100755 --- a/docker/test/stateful/run.sh +++ b/docker/test/stateful/run.sh @@ -16,6 +16,9 @@ dpkg -i package_folder/clickhouse-client_*.deb ln -s /usr/share/clickhouse-test/clickhouse-test /usr/bin/clickhouse-test +# shellcheck disable=SC1091 +source /utils.lib + # install test configs /usr/share/clickhouse-test/config/install.sh @@ -272,3 +275,5 @@ if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]] mv /var/log/clickhouse-server/stderr1.log /test_output/ ||: mv /var/log/clickhouse-server/stderr2.log /test_output/ ||: fi + +collect_core_dumps diff --git a/docker/test/stateless/run.sh b/docker/test/stateless/run.sh index 43d3c698d8a..fe551f5d6d6 100755 --- a/docker/test/stateless/run.sh +++ b/docker/test/stateless/run.sh @@ -399,3 +399,5 @@ if [[ -n "$USE_SHARED_CATALOG" ]] && [[ "$USE_SHARED_CATALOG" -eq 1 ]]; then mv /var/log/clickhouse-server/stderr1.log /test_output/ ||: tar -chf /test_output/coordination1.tar /var/lib/clickhouse1/coordination ||: fi + +collect_core_dumps diff --git a/docker/test/stateless/stress_tests.lib b/docker/test/stateless/stress_tests.lib index c069ccbdd8d..682da1df837 100644 --- a/docker/test/stateless/stress_tests.lib +++ b/docker/test/stateless/stress_tests.lib @@ -1,8 +1,5 @@ #!/bin/bash -# core.COMM.PID-TID -sysctl kernel.core_pattern='core.%e.%p-%P' - OK="\tOK\t\\N\t" FAIL="\tFAIL\t\\N\t" @@ -315,12 +312,4 @@ function collect_query_and_trace_logs() done } -function collect_core_dumps() -{ - find . -type f -maxdepth 1 -name 'core.*' | while read -r core; do - zstd --threads=0 "$core" - mv "$core.zst" /test_output/ - done -} - # vi: ft=bash diff --git a/docker/test/stateless/utils.lib b/docker/test/stateless/utils.lib index 833e1a05384..90efb5b1a8e 100644 --- a/docker/test/stateless/utils.lib +++ b/docker/test/stateless/utils.lib @@ -1,5 +1,8 @@ #!/bin/bash +# core.COMM.PID-TID +sysctl kernel.core_pattern='core.%e.%p-%P' + function run_with_retry() { if [[ $- =~ e ]]; then @@ -48,4 +51,12 @@ function timeout_with_logging() { return $exit_code } +function collect_core_dumps() +{ + find . -type f -maxdepth 1 -name 'core.*' | while read -r core; do + zstd --threads=0 "$core" + mv "$core.zst" /test_output/ + done +} + # vi: ft=bash diff --git a/docker/test/stress/run.sh b/docker/test/stress/run.sh index 323944591b1..86467394513 100644 --- a/docker/test/stress/run.sh +++ b/docker/test/stress/run.sh @@ -21,6 +21,9 @@ source /attach_gdb.lib # shellcheck source=../stateless/stress_tests.lib source /stress_tests.lib +# shellcheck disable=SC1091 +source /utils.lib + install_packages package_folder # Thread Fuzzer allows to check more permutations of possible thread scheduling From b539d999cddf263e0b9fa6f9aa9381dea62a5ed3 Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Tue, 9 Jul 2024 17:12:55 +0200 Subject: [PATCH 172/299] limit the logs about rescheduling --- src/Interpreters/DatabaseCatalog.cpp | 3 ++- src/Interpreters/DatabaseCatalog.h | 2 ++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/src/Interpreters/DatabaseCatalog.cpp b/src/Interpreters/DatabaseCatalog.cpp index 2abda981a8c..e34aef125fb 100644 --- a/src/Interpreters/DatabaseCatalog.cpp +++ b/src/Interpreters/DatabaseCatalog.cpp @@ -837,6 +837,7 @@ DatabaseCatalog::DatabaseCatalog(ContextMutablePtr global_context_) , loading_dependencies{"LoadingDeps"} , view_dependencies{"ViewDeps"} , log(getLogger("DatabaseCatalog")) + , limitedLog(std::make_shared(log, 1, 5)) , first_async_drop_in_queue(tables_marked_dropped.end()) { } @@ -1273,7 +1274,7 @@ void DatabaseCatalog::rescheduleDropTableTask() if (first_async_drop_in_queue != tables_marked_dropped.begin()) { LOG_TRACE( - log, + limitedLog, "Have {} tables in queue to drop. Some of them are being dropped in sync mode. Schedule background task ASAP", tables_marked_dropped.size()); (*drop_task)->scheduleAfter(0); diff --git a/src/Interpreters/DatabaseCatalog.h b/src/Interpreters/DatabaseCatalog.h index 23e38a6445e..73e330dedbc 100644 --- a/src/Interpreters/DatabaseCatalog.h +++ b/src/Interpreters/DatabaseCatalog.h @@ -7,6 +7,7 @@ #include #include #include +#include #include #include @@ -325,6 +326,7 @@ private: TablesDependencyGraph view_dependencies TSA_GUARDED_BY(databases_mutex); LoggerPtr log; + LogSeriesLimiterPtr limitedLog; std::atomic_bool is_shutting_down = false; From 19a508747bfc8ece6eaf9f0d245003e1ac1abeb1 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 9 Jul 2024 18:03:56 +0200 Subject: [PATCH 173/299] Update opentelemetry.md --- docs/en/operations/opentelemetry.md | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/docs/en/operations/opentelemetry.md b/docs/en/operations/opentelemetry.md index 70f64d08ba3..fe60ceedc0b 100644 --- a/docs/en/operations/opentelemetry.md +++ b/docs/en/operations/opentelemetry.md @@ -2,15 +2,11 @@ slug: /en/operations/opentelemetry sidebar_position: 62 sidebar_label: Tracing ClickHouse with OpenTelemetry -title: "[experimental] Tracing ClickHouse with OpenTelemetry" +title: "Tracing ClickHouse with OpenTelemetry" --- [OpenTelemetry](https://opentelemetry.io/) is an open standard for collecting traces and metrics from the distributed application. ClickHouse has some support for OpenTelemetry. -:::note -This is an experimental feature that will change in backwards-incompatible ways in future releases. -::: - ## Supplying Trace Context to ClickHouse ClickHouse accepts trace context HTTP headers, as described by the [W3C recommendation](https://www.w3.org/TR/trace-context/). It also accepts trace context over a native protocol that is used for communication between ClickHouse servers or between the client and server. For manual testing, trace context headers conforming to the Trace Context recommendation can be supplied to `clickhouse-client` using `--opentelemetry-traceparent` and `--opentelemetry-tracestate` flags. From 55468caeaee9c7c5074897d50f5aa2c4fe4d584e Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Tue, 9 Jul 2024 16:38:22 +0000 Subject: [PATCH 174/299] Fix ARM build --- contrib/rocksdb-cmake/CMakeLists.txt | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/contrib/rocksdb-cmake/CMakeLists.txt b/contrib/rocksdb-cmake/CMakeLists.txt index 96558b40174..3a14407166c 100644 --- a/contrib/rocksdb-cmake/CMakeLists.txt +++ b/contrib/rocksdb-cmake/CMakeLists.txt @@ -51,6 +51,14 @@ if(ENABLE_SSE42 AND ENABLE_PCLMULQDQ) add_definitions(-DHAVE_PCLMUL) endif() +if(CMAKE_SYSTEM_PROCESSOR MATCHES "arm64|aarch64|AARCH64") + set (HAS_ARMV8_CRC 1) + # the original build descriptions set specific flags for ARM. These flags are already subsumed by ClickHouse's general + # ARM flags, see cmake/cpu_features.cmake + # set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -march=armv8-a+crc+crypto -Wno-unused-function") + # set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=armv8-a+crc+crypto -Wno-unused-function") +endif() + set (HAVE_THREAD_LOCAL 1) if(HAVE_THREAD_LOCAL) add_definitions(-DROCKSDB_SUPPORT_THREAD_LOCAL) From 004d8e0d49d77879e63bcb16456af84db784200f Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Tue, 9 Jul 2024 18:39:21 +0200 Subject: [PATCH 175/299] Run privileged --- tests/ci/functional_test_check.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/ci/functional_test_check.py b/tests/ci/functional_test_check.py index d8e5a7fa27f..4440d0d332c 100644 --- a/tests/ci/functional_test_check.py +++ b/tests/ci/functional_test_check.py @@ -104,6 +104,8 @@ def get_run_command( return ( f"docker run --volume={builds_path}:/package_folder " + # For dmesg and sysctl + "--privileged " f"{ci_logs_args}" f"--volume={repo_path}/tests:/usr/share/clickhouse-test " f"{volume_with_broken_test}" From b4f59b96c274fcde50050e172a91d455eddcb17f Mon Sep 17 00:00:00 2001 From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> Date: Tue, 9 Jul 2024 19:30:15 +0200 Subject: [PATCH 176/299] Update IStorageDataLake.h --- src/Storages/ObjectStorage/DataLakes/IStorageDataLake.h | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/Storages/ObjectStorage/DataLakes/IStorageDataLake.h b/src/Storages/ObjectStorage/DataLakes/IStorageDataLake.h index d6935c706d9..c8603fccb86 100644 --- a/src/Storages/ObjectStorage/DataLakes/IStorageDataLake.h +++ b/src/Storages/ObjectStorage/DataLakes/IStorageDataLake.h @@ -17,10 +17,6 @@ namespace DB { -namespace ErrorCodes -{ - extern const int LOGICAL_ERROR; -} /// Storage for read-only integration with Apache Iceberg tables in Amazon S3 (see https://iceberg.apache.org/) /// Right now it's implemented on top of StorageS3 and right now it doesn't support From 9fc557ad65ab0a306e417d01ea0b4636a0569824 Mon Sep 17 00:00:00 2001 From: MikhailBurdukov Date: Tue, 9 Jul 2024 17:36:09 +0000 Subject: [PATCH 177/299] Ignore ON CLUSTER clause in queries for management of replicated named collections --- .../NamedCollectionsFactory.cpp | 9 +++++- .../NamedCollectionsFactory.h | 2 ++ .../NamedCollectionsMetadataStorage.cpp | 12 ++++---- .../NamedCollectionsMetadataStorage.h | 2 +- src/Core/Settings.h | 1 + .../InterpreterAlterNamedCollectionQuery.cpp | 7 +++-- .../InterpreterCreateNamedCollectionQuery.cpp | 7 +++-- .../InterpreterDropNamedCollectionQuery.cpp | 7 +++-- .../removeOnClusterClauseIfNeeded.cpp | 16 ++++++++++- .../named_collections_with_zookeeper.xml | 17 +++++++++++ .../configs/users.d/users.xml | 5 ++++ .../test_named_collections/test.py | 28 +++++++++++++++++++ 12 files changed, 98 insertions(+), 15 deletions(-) diff --git a/src/Common/NamedCollections/NamedCollectionsFactory.cpp b/src/Common/NamedCollections/NamedCollectionsFactory.cpp index 14105a8651d..2faea1957ba 100644 --- a/src/Common/NamedCollections/NamedCollectionsFactory.cpp +++ b/src/Common/NamedCollections/NamedCollectionsFactory.cpp @@ -235,7 +235,7 @@ bool NamedCollectionFactory::loadIfNot(std::lock_guard & lock) loadFromConfig(context->getConfigRef(), lock); loadFromSQL(lock); - if (metadata_storage->supportsPeriodicUpdate()) + if (metadata_storage->isReplicated()) { update_task = context->getSchedulePool().createTask("NamedCollectionsMetadataStorage", [this]{ updateFunc(); }); update_task->activate(); @@ -357,6 +357,13 @@ void NamedCollectionFactory::reloadFromSQL() add(std::move(collections), lock); } +bool NamedCollectionFactory::usesReplicatedStorage() +{ + std::lock_guard lock(mutex); + loadIfNot(lock); + return metadata_storage->isReplicated(); +} + void NamedCollectionFactory::updateFunc() { LOG_TRACE(log, "Named collections background updating thread started"); diff --git a/src/Common/NamedCollections/NamedCollectionsFactory.h b/src/Common/NamedCollections/NamedCollectionsFactory.h index 6ee5940e686..a0721ad8a50 100644 --- a/src/Common/NamedCollections/NamedCollectionsFactory.h +++ b/src/Common/NamedCollections/NamedCollectionsFactory.h @@ -34,6 +34,8 @@ public: void updateFromSQL(const ASTAlterNamedCollectionQuery & query); + bool usesReplicatedStorage(); + void loadIfNot(); void shutdown(); diff --git a/src/Common/NamedCollections/NamedCollectionsMetadataStorage.cpp b/src/Common/NamedCollections/NamedCollectionsMetadataStorage.cpp index 32fdb25abd3..b3671350f92 100644 --- a/src/Common/NamedCollections/NamedCollectionsMetadataStorage.cpp +++ b/src/Common/NamedCollections/NamedCollectionsMetadataStorage.cpp @@ -67,7 +67,7 @@ public: virtual bool removeIfExists(const std::string & path) = 0; - virtual bool supportsPeriodicUpdate() const = 0; + virtual bool isReplicated() const = 0; virtual bool waitUpdate(size_t /* timeout */) { return false; } }; @@ -89,7 +89,7 @@ public: ~LocalStorage() override = default; - bool supportsPeriodicUpdate() const override { return false; } + bool isReplicated() const override { return false; } std::vector list() const override { @@ -221,7 +221,7 @@ public: ~ZooKeeperStorage() override = default; - bool supportsPeriodicUpdate() const override { return true; } + bool isReplicated() const override { return true; } /// Return true if children changed. bool waitUpdate(size_t timeout) override @@ -465,14 +465,14 @@ void NamedCollectionsMetadataStorage::writeCreateQuery(const ASTCreateNamedColle storage->write(getFileName(query.collection_name), serializeAST(*normalized_query), replace); } -bool NamedCollectionsMetadataStorage::supportsPeriodicUpdate() const +bool NamedCollectionsMetadataStorage::isReplicated() const { - return storage->supportsPeriodicUpdate(); + return storage->isReplicated(); } bool NamedCollectionsMetadataStorage::waitUpdate() { - if (!storage->supportsPeriodicUpdate()) + if (!storage->isReplicated()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Periodic updates are not supported"); const auto & config = Context::getGlobalContextInstance()->getConfigRef(); diff --git a/src/Common/NamedCollections/NamedCollectionsMetadataStorage.h b/src/Common/NamedCollections/NamedCollectionsMetadataStorage.h index 3c089fe2fa2..c3468fbc468 100644 --- a/src/Common/NamedCollections/NamedCollectionsMetadataStorage.h +++ b/src/Common/NamedCollections/NamedCollectionsMetadataStorage.h @@ -30,7 +30,7 @@ public: /// Return true if update was made bool waitUpdate(); - bool supportsPeriodicUpdate() const; + bool isReplicated() const; private: class INamedCollectionsStorage; diff --git a/src/Core/Settings.h b/src/Core/Settings.h index d84e5b149f6..6c53837138b 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -364,6 +364,7 @@ class IColumn; \ M(Bool, ignore_on_cluster_for_replicated_udf_queries, false, "Ignore ON CLUSTER clause for replicated UDF management queries.", 0) \ M(Bool, ignore_on_cluster_for_replicated_access_entities_queries, false, "Ignore ON CLUSTER clause for replicated access entities management queries.", 0) \ + M(Bool, ignore_on_cluster_for_replicated_named_collections_queries, false, "Ignore ON CLUSTER clause for replicated named collections management queries.", 0) \ /** Settings for testing hedged requests */ \ M(Milliseconds, sleep_in_send_tables_status_ms, 0, "Time to sleep in sending tables status response in TCPHandler", 0) \ M(Milliseconds, sleep_in_send_data_ms, 0, "Time to sleep in sending data in TCPHandler", 0) \ diff --git a/src/Interpreters/InterpreterAlterNamedCollectionQuery.cpp b/src/Interpreters/InterpreterAlterNamedCollectionQuery.cpp index 79a17fd1844..0e83e2039f6 100644 --- a/src/Interpreters/InterpreterAlterNamedCollectionQuery.cpp +++ b/src/Interpreters/InterpreterAlterNamedCollectionQuery.cpp @@ -4,6 +4,7 @@ #include #include #include +#include #include @@ -13,14 +14,16 @@ namespace DB BlockIO InterpreterAlterNamedCollectionQuery::execute() { auto current_context = getContext(); - const auto & query = query_ptr->as(); + + const auto updated_query = removeOnClusterClauseIfNeeded(query_ptr, getContext()); + const auto & query = updated_query->as(); current_context->checkAccess(AccessType::ALTER_NAMED_COLLECTION, query.collection_name); if (!query.cluster.empty()) { DDLQueryOnClusterParams params; - return executeDDLQueryOnCluster(query_ptr, current_context, params); + return executeDDLQueryOnCluster(updated_query, current_context, params); } NamedCollectionFactory::instance().updateFromSQL(query); diff --git a/src/Interpreters/InterpreterCreateNamedCollectionQuery.cpp b/src/Interpreters/InterpreterCreateNamedCollectionQuery.cpp index c71441daa8c..b4920b1729f 100644 --- a/src/Interpreters/InterpreterCreateNamedCollectionQuery.cpp +++ b/src/Interpreters/InterpreterCreateNamedCollectionQuery.cpp @@ -4,6 +4,7 @@ #include #include #include +#include #include @@ -13,14 +14,16 @@ namespace DB BlockIO InterpreterCreateNamedCollectionQuery::execute() { auto current_context = getContext(); - const auto & query = query_ptr->as(); + + const auto updated_query = removeOnClusterClauseIfNeeded(query_ptr, getContext()); + const auto & query = updated_query->as(); current_context->checkAccess(AccessType::CREATE_NAMED_COLLECTION, query.collection_name); if (!query.cluster.empty()) { DDLQueryOnClusterParams params; - return executeDDLQueryOnCluster(query_ptr, current_context, params); + return executeDDLQueryOnCluster(updated_query, current_context, params); } NamedCollectionFactory::instance().createFromSQL(query); diff --git a/src/Interpreters/InterpreterDropNamedCollectionQuery.cpp b/src/Interpreters/InterpreterDropNamedCollectionQuery.cpp index 2edaef1b2f2..6233d21b439 100644 --- a/src/Interpreters/InterpreterDropNamedCollectionQuery.cpp +++ b/src/Interpreters/InterpreterDropNamedCollectionQuery.cpp @@ -4,6 +4,7 @@ #include #include #include +#include #include @@ -13,14 +14,16 @@ namespace DB BlockIO InterpreterDropNamedCollectionQuery::execute() { auto current_context = getContext(); - const auto & query = query_ptr->as(); + + const auto updated_query = removeOnClusterClauseIfNeeded(query_ptr, getContext()); + const auto & query = updated_query->as(); current_context->checkAccess(AccessType::DROP_NAMED_COLLECTION, query.collection_name); if (!query.cluster.empty()) { DDLQueryOnClusterParams params; - return executeDDLQueryOnCluster(query_ptr, current_context, params); + return executeDDLQueryOnCluster(updated_query, current_context, params); } NamedCollectionFactory::instance().removeFromSQL(query); diff --git a/src/Interpreters/removeOnClusterClauseIfNeeded.cpp b/src/Interpreters/removeOnClusterClauseIfNeeded.cpp index 44167fe7242..dd20164925c 100644 --- a/src/Interpreters/removeOnClusterClauseIfNeeded.cpp +++ b/src/Interpreters/removeOnClusterClauseIfNeeded.cpp @@ -15,6 +15,10 @@ #include #include #include +#include +#include +#include +#include namespace DB @@ -38,6 +42,13 @@ static bool isAccessControlQuery(const ASTPtr & query) || query->as(); } +static bool isNamedCollectionQuery(const ASTPtr & query) +{ + return query->as() + || query->as() + || query->as(); +} + ASTPtr removeOnClusterClauseIfNeeded(const ASTPtr & query, ContextPtr context, const WithoutOnClusterASTRewriteParams & params) { auto * query_on_cluster = dynamic_cast(query.get()); @@ -50,7 +61,10 @@ ASTPtr removeOnClusterClauseIfNeeded(const ASTPtr & query, ContextPtr context, c && context->getUserDefinedSQLObjectsStorage().isReplicated()) || (isAccessControlQuery(query) && context->getSettings().ignore_on_cluster_for_replicated_access_entities_queries - && context->getAccessControl().containsStorage(ReplicatedAccessStorage::STORAGE_TYPE))) + && context->getAccessControl().containsStorage(ReplicatedAccessStorage::STORAGE_TYPE)) + || (isNamedCollectionQuery(query) + && context->getSettings().ignore_on_cluster_for_replicated_named_collections_queries + && NamedCollectionFactory::instance().usesReplicatedStorage())) { LOG_DEBUG(getLogger("removeOnClusterClauseIfNeeded"), "ON CLUSTER clause was ignored for query {}", query->getID()); return query_on_cluster->getRewrittenASTWithoutOnCluster(params); diff --git a/tests/integration/test_named_collections/configs/config.d/named_collections_with_zookeeper.xml b/tests/integration/test_named_collections/configs/config.d/named_collections_with_zookeeper.xml index 2d7946d1587..43d80ee6f69 100644 --- a/tests/integration/test_named_collections/configs/config.d/named_collections_with_zookeeper.xml +++ b/tests/integration/test_named_collections/configs/config.d/named_collections_with_zookeeper.xml @@ -9,4 +9,21 @@ value1 + + + + + true + + node_with_keeper + 9000 + + + node_with_keeper_2 + 9000 + + + true + + diff --git a/tests/integration/test_named_collections/configs/users.d/users.xml b/tests/integration/test_named_collections/configs/users.d/users.xml index 15da914f666..7d4f0543ff1 100644 --- a/tests/integration/test_named_collections/configs/users.d/users.xml +++ b/tests/integration/test_named_collections/configs/users.d/users.xml @@ -1,4 +1,9 @@ + + + 0 + + diff --git a/tests/integration/test_named_collections/test.py b/tests/integration/test_named_collections/test.py index dbc502236c0..5d38047e885 100644 --- a/tests/integration/test_named_collections/test.py +++ b/tests/integration/test_named_collections/test.py @@ -3,6 +3,8 @@ import pytest import os import time from helpers.cluster import ClickHouseCluster +from contextlib import nullcontext as does_not_raise +from helpers.client import QueryRuntimeException SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__)) NAMED_COLLECTIONS_CONFIG = os.path.join( @@ -761,3 +763,29 @@ def test_keeper_storage(cluster): check_dropped(node1) check_dropped(node2) + + +@pytest.mark.parametrize( + "ignore, expected_raise", + [(True, does_not_raise()), (False, pytest.raises(QueryRuntimeException))], +) +def test_keeper_storage_remove_on_cluster(cluster, ignore, expected_raise): + node = cluster.instances["node_with_keeper"] + + replace_in_users_config( + node, + "ignore_on_cluster_for_replicated_named_collections_queries>.", + f"ignore_on_cluster_for_replicated_named_collections_queries>{int(ignore)}", + ) + node.query("SYSTEM RELOAD CONFIG") + + with expected_raise: + node.query( + f"CREATE NAMED COLLECTION test_nc ON CLUSTER `replicated_nc_nodes_cluster` AS key1=1, key2=2 OVERRIDABLE" + ) + node.query( + f"ALTER NAMED COLLECTION test_nc ON CLUSTER `replicated_nc_nodes_cluster` SET key2=3" + ) + node.query( + f"DROP NAMED COLLECTION test_nc ON CLUSTER `replicated_nc_nodes_cluster`" + ) From eeb3561c37f0554c021e100039ffb197b1058d62 Mon Sep 17 00:00:00 2001 From: Nikita Fomichev Date: Tue, 9 Jul 2024 18:16:38 +0200 Subject: [PATCH 178/299] Stateless tests: run sequential tests in parallel to other tests --- docker/test/fasttest/run.sh | 5 + docker/test/stateless/run.sh | 112 ++++++++++-- tests/ci/ci_config.py | 20 +-- tests/clickhouse-test | 169 +++++++++++++++--- tests/config/install.sh | 1 - ..._log_and_exception_messages_formatting.sql | 2 +- tests/queries/shell_config.sh | 8 + 7 files changed, 265 insertions(+), 52 deletions(-) diff --git a/docker/test/fasttest/run.sh b/docker/test/fasttest/run.sh index c015d3a3542..0d975d64010 100755 --- a/docker/test/fasttest/run.sh +++ b/docker/test/fasttest/run.sh @@ -284,6 +284,11 @@ function run_tests NPROC=1 fi + export CLICKHOUSE_CONFIG_DIR=$FASTTEST_DATA + export CLICKHOUSE_CONFIG="$FASTTEST_DATA/config.xml" + export CLICKHOUSE_USER_FILES="$FASTTEST_DATA/user_files" + export CLICKHOUSE_SCHEMA_FILES="$FASTTEST_DATA/format_schemas" + local test_opts=( --hung-check --fast-tests-only diff --git a/docker/test/stateless/run.sh b/docker/test/stateless/run.sh index 43d3c698d8a..30079073ea2 100755 --- a/docker/test/stateless/run.sh +++ b/docker/test/stateless/run.sh @@ -9,6 +9,16 @@ set -e -x -a MAX_RUN_TIME=${MAX_RUN_TIME:-10800} MAX_RUN_TIME=$((MAX_RUN_TIME == 0 ? 10800 : MAX_RUN_TIME)) +USE_DATABASE_REPLICATED=${USE_DATABASE_REPLICATED:=0} +USE_SHARED_CATALOG=${USE_SHARED_CATALOG:=0} + +# disable for now +RUN_SEQUENTIAL_TESTS_IN_PARALLEL=0 + +if [[ "$USE_DATABASE_REPLICATED" -eq 1 ]] || [[ "$USE_SHARED_CATALOG" -eq 1 ]]; then + RUN_SEQUENTIAL_TESTS_IN_PARALLEL=0 +fi + # Choose random timezone for this test run. # # NOTE: that clickhouse-test will randomize session_timezone by itself as well @@ -89,10 +99,57 @@ if [ "$NUM_TRIES" -gt "1" ]; then mkdir -p /var/run/clickhouse-server fi +# Run a CH instance to execute sequential tests on it in parallel with all other tests. +if [[ "$RUN_SEQUENTIAL_TESTS_IN_PARALLEL" -eq 1 ]]; then + mkdir -p /var/run/clickhouse-server3 /etc/clickhouse-server3 /var/lib/clickhouse3 + cp -r -L /etc/clickhouse-server/* /etc/clickhouse-server3/ + + sudo chown clickhouse:clickhouse /var/run/clickhouse-server3 /var/lib/clickhouse3 /etc/clickhouse-server3/ + sudo chown -R clickhouse:clickhouse /etc/clickhouse-server3/* + + function replace(){ + sudo find /etc/clickhouse-server3/ -type f -name '*.xml' -exec sed -i "$1" {} \; + } + + replace "s|9000|19000|g" + replace "s|9440|19440|g" + replace "s|9988|19988|g" + replace "s|9234|19234|g" + replace "s|9181|19181|g" + replace "s|8443|18443|g" + replace "s|9000|19000|g" + replace "s|9181|19181|g" + replace "s|9440|19440|g" + replace "s|9010|19010|g" + replace "s|9004|19004|g" + replace "s|9005|19005|g" + replace "s|9009|19009|g" + replace "s|8123|18123|g" + replace "s|/var/lib/clickhouse/|/var/lib/clickhouse3/|g" + replace "s|/etc/clickhouse-server/|/etc/clickhouse-server3/|g" + # distributed cache + replace "s|10001|10003|g" + replace "s|10002|10004|g" + + sudo -E -u clickhouse /usr/bin/clickhouse server --daemon --config /etc/clickhouse-server3/config.xml \ + --pid-file /var/run/clickhouse-server3/clickhouse-server.pid \ + -- --path /var/lib/clickhouse3/ --logger.stderr /var/log/clickhouse-server/stderr3.log \ + --logger.log /var/log/clickhouse-server/clickhouse-server3.log --logger.errorlog /var/log/clickhouse-server/clickhouse-server3.err.log \ + --tcp_port 19000 --tcp_port_secure 19440 --http_port 18123 --https_port 18443 --interserver_http_port 19009 --tcp_with_proxy_port 19010 \ + --prometheus.port 19988 --keeper_server.raft_configuration.server.port 19234 --keeper_server.tcp_port 19181 \ + --mysql_port 19004 --postgresql_port 19005 + + for _ in {1..100} + do + clickhouse-client --port 19000 --query "SELECT 1" && break + sleep 1 + done +fi + # simplest way to forward env variables to server sudo -E -u clickhouse /usr/bin/clickhouse-server --config /etc/clickhouse-server/config.xml --daemon --pid-file /var/run/clickhouse-server/clickhouse-server.pid -if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then +if [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then sudo sed -i "s|/var/lib/clickhouse/filesystem_caches/|/var/lib/clickhouse/filesystem_caches_1/|" /etc/clickhouse-server1/config.d/filesystem_caches_path.xml sudo sed -i "s|/var/lib/clickhouse/filesystem_caches/|/var/lib/clickhouse/filesystem_caches_2/|" /etc/clickhouse-server2/config.d/filesystem_caches_path.xml @@ -129,7 +186,7 @@ if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]] MAX_RUN_TIME=$((MAX_RUN_TIME != 0 ? MAX_RUN_TIME : 9000)) # set to 2.5 hours if 0 (unlimited) fi -if [[ -n "$USE_SHARED_CATALOG" ]] && [[ "$USE_SHARED_CATALOG" -eq 1 ]]; then +if [[ "$USE_SHARED_CATALOG" -eq 1 ]]; then sudo cat /etc/clickhouse-server1/config.d/filesystem_caches_path.xml \ | sed "s|/var/lib/clickhouse/filesystem_caches/|/var/lib/clickhouse/filesystem_caches_1/|" \ > /etc/clickhouse-server1/config.d/filesystem_caches_path.xml.tmp @@ -209,15 +266,15 @@ function run_tests() ADDITIONAL_OPTIONS+=('--no-random-merge-tree-settings') fi - if [[ -n "$USE_SHARED_CATALOG" ]] && [[ "$USE_SHARED_CATALOG" -eq 1 ]]; then + if [[ "$USE_SHARED_CATALOG" -eq 1 ]]; then ADDITIONAL_OPTIONS+=('--shared-catalog') fi - if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then + if [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then ADDITIONAL_OPTIONS+=('--replicated-database') # Too many tests fail for DatabaseReplicated in parallel. ADDITIONAL_OPTIONS+=('--jobs') - ADDITIONAL_OPTIONS+=('2') + ADDITIONAL_OPTIONS+=('3') elif [[ 1 == $(clickhouse-client --query "SELECT value LIKE '%SANITIZE_COVERAGE%' FROM system.build_options WHERE name = 'CXX_FLAGS'") ]]; then # Coverage on a per-test basis could only be collected sequentially. # Do not set the --jobs parameter. @@ -225,7 +282,11 @@ function run_tests() else # All other configurations are OK. ADDITIONAL_OPTIONS+=('--jobs') - ADDITIONAL_OPTIONS+=('8') + ADDITIONAL_OPTIONS+=('5') + fi + + if [[ "$RUN_SEQUENTIAL_TESTS_IN_PARALLEL" -eq 1 ]]; then + ADDITIONAL_OPTIONS+=('--run-sequential-tests-in-parallel') fi if [[ -n "$RUN_BY_HASH_NUM" ]] && [[ -n "$RUN_BY_HASH_TOTAL" ]]; then @@ -289,7 +350,7 @@ do err=$(clickhouse-client -q "select * from system.$table into outfile '/test_output/$table.tsv.gz' format TSVWithNamesAndTypes") echo "$err" [[ "0" != "${#err}" ]] && failed_to_save_logs=1 - if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then + if [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then err=$( { clickhouse-client --port 19000 -q "select * from system.$table format TSVWithNamesAndTypes" | zstd --threads=0 > /test_output/$table.1.tsv.zst; } 2>&1 ) echo "$err" [[ "0" != "${#err}" ]] && failed_to_save_logs=1 @@ -298,7 +359,7 @@ do [[ "0" != "${#err}" ]] && failed_to_save_logs=1 fi - if [[ -n "$USE_SHARED_CATALOG" ]] && [[ "$USE_SHARED_CATALOG" -eq 1 ]]; then + if [[ "$USE_SHARED_CATALOG" -eq 1 ]]; then err=$( { clickhouse-client --port 19000 -q "select * from system.$table format TSVWithNamesAndTypes" | zstd --threads=0 > /test_output/$table.1.tsv.zst; } 2>&1 ) echo "$err" [[ "0" != "${#err}" ]] && failed_to_save_logs=1 @@ -309,12 +370,17 @@ done # Why do we read data with clickhouse-local? # Because it's the simplest way to read it when server has crashed. sudo clickhouse stop ||: -if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then + +if [[ "$RUN_SEQUENTIAL_TESTS_IN_PARALLEL" -eq 1 ]]; then + sudo clickhouse stop --pid-path /var/run/clickhouse-server3 ||: +fi + +if [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then sudo clickhouse stop --pid-path /var/run/clickhouse-server1 ||: sudo clickhouse stop --pid-path /var/run/clickhouse-server2 ||: fi -if [[ -n "$USE_SHARED_CATALOG" ]] && [[ "$USE_SHARED_CATALOG" -eq 1 ]]; then +if [[ "$USE_SHARED_CATALOG" -eq 1 ]]; then sudo clickhouse stop --pid-path /var/run/clickhouse-server1 ||: fi @@ -322,6 +388,12 @@ rg -Fa "" /var/log/clickhouse-server/clickhouse-server.log ||: rg -A50 -Fa "============" /var/log/clickhouse-server/stderr.log ||: zstd --threads=0 < /var/log/clickhouse-server/clickhouse-server.log > /test_output/clickhouse-server.log.zst & +if [[ "$RUN_SEQUENTIAL_TESTS_IN_PARALLEL" -eq 1 ]]; then + rg -Fa "" /var/log/clickhouse-server3/clickhouse-server.log ||: + rg -A50 -Fa "============" /var/log/clickhouse-server3/stderr.log ||: + zstd --threads=0 < /var/log/clickhouse-server3/clickhouse-server.log > /test_output/clickhouse-server3.log.zst & +fi + data_path_config="--path=/var/lib/clickhouse/" if [[ -n "$USE_S3_STORAGE_FOR_MERGE_TREE" ]] && [[ "$USE_S3_STORAGE_FOR_MERGE_TREE" -eq 1 ]]; then # We need s3 storage configuration (but it's more likely that clickhouse-local will fail for some reason) @@ -341,12 +413,17 @@ if [ $failed_to_save_logs -ne 0 ]; then for table in query_log zookeeper_log trace_log transactions_info_log metric_log blob_storage_log error_log do clickhouse-local "$data_path_config" --only-system-tables --stacktrace -q "select * from system.$table format TSVWithNamesAndTypes" | zstd --threads=0 > /test_output/$table.tsv.zst ||: - if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then + + if [[ "$RUN_SEQUENTIAL_TESTS_IN_PARALLEL" -eq 1 ]]; then + clickhouse-local --path /var/lib/clickhouse3/ --only-system-tables --stacktrace -q "select * from system.$table format TSVWithNamesAndTypes" | zstd --threads=0 > /test_output/$table.3.tsv.zst ||: + fi + + if [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then clickhouse-local --path /var/lib/clickhouse1/ --only-system-tables --stacktrace -q "select * from system.$table format TSVWithNamesAndTypes" | zstd --threads=0 > /test_output/$table.1.tsv.zst ||: clickhouse-local --path /var/lib/clickhouse2/ --only-system-tables --stacktrace -q "select * from system.$table format TSVWithNamesAndTypes" | zstd --threads=0 > /test_output/$table.2.tsv.zst ||: fi - if [[ -n "$USE_SHARED_CATALOG" ]] && [[ "$USE_SHARED_CATALOG" -eq 1 ]]; then + if [[ "$USE_SHARED_CATALOG" -eq 1 ]]; then clickhouse-local --path /var/lib/clickhouse1/ --only-system-tables --stacktrace -q "select * from system.$table format TSVWithNamesAndTypes" | zstd --threads=0 > /test_output/$table.1.tsv.zst ||: fi done @@ -382,7 +459,14 @@ rm -rf /var/lib/clickhouse/data/system/*/ tar -chf /test_output/store.tar /var/lib/clickhouse/store ||: tar -chf /test_output/metadata.tar /var/lib/clickhouse/metadata/*.sql ||: -if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then +if [[ "$RUN_SEQUENTIAL_TESTS_IN_PARALLEL" -eq 1 ]]; then + rm -rf /var/lib/clickhouse3/data/system/*/ + tar -chf /test_output/store.tar /var/lib/clickhouse3/store ||: + tar -chf /test_output/metadata.tar /var/lib/clickhouse3/metadata/*.sql ||: +fi + + +if [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then rg -Fa "" /var/log/clickhouse-server/clickhouse-server1.log ||: rg -Fa "" /var/log/clickhouse-server/clickhouse-server2.log ||: zstd --threads=0 < /var/log/clickhouse-server/clickhouse-server1.log > /test_output/clickhouse-server1.log.zst ||: @@ -393,7 +477,7 @@ if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]] tar -chf /test_output/coordination2.tar /var/lib/clickhouse2/coordination ||: fi -if [[ -n "$USE_SHARED_CATALOG" ]] && [[ "$USE_SHARED_CATALOG" -eq 1 ]]; then +if [[ "$USE_SHARED_CATALOG" -eq 1 ]]; then rg -Fa "" /var/log/clickhouse-server/clickhouse-server1.log ||: zstd --threads=0 < /var/log/clickhouse-server/clickhouse-server1.log > /test_output/clickhouse-server1.log.zst ||: mv /var/log/clickhouse-server/stderr1.log /test_output/ ||: diff --git a/tests/ci/ci_config.py b/tests/ci/ci_config.py index bef43083a35..8eda6e6b96f 100644 --- a/tests/ci/ci_config.py +++ b/tests/ci/ci_config.py @@ -311,42 +311,42 @@ class CI: random_bucket="parrepl_with_sanitizer", ), JobNames.STATELESS_TEST_ASAN: CommonJobConfigs.STATELESS_TEST.with_properties( - required_builds=[BuildNames.PACKAGE_ASAN], num_batches=4 + required_builds=[BuildNames.PACKAGE_ASAN], num_batches=2 ), JobNames.STATELESS_TEST_TSAN: CommonJobConfigs.STATELESS_TEST.with_properties( - required_builds=[BuildNames.PACKAGE_TSAN], num_batches=5 + required_builds=[BuildNames.PACKAGE_TSAN], num_batches=2 ), JobNames.STATELESS_TEST_MSAN: CommonJobConfigs.STATELESS_TEST.with_properties( - required_builds=[BuildNames.PACKAGE_MSAN], num_batches=6 + required_builds=[BuildNames.PACKAGE_MSAN], num_batches=3 ), JobNames.STATELESS_TEST_UBSAN: CommonJobConfigs.STATELESS_TEST.with_properties( - required_builds=[BuildNames.PACKAGE_UBSAN], num_batches=2 + required_builds=[BuildNames.PACKAGE_UBSAN], num_batches=1 ), JobNames.STATELESS_TEST_DEBUG: CommonJobConfigs.STATELESS_TEST.with_properties( - required_builds=[BuildNames.PACKAGE_DEBUG], num_batches=5 + required_builds=[BuildNames.PACKAGE_DEBUG], num_batches=2 ), JobNames.STATELESS_TEST_RELEASE: CommonJobConfigs.STATELESS_TEST.with_properties( required_builds=[BuildNames.PACKAGE_RELEASE], ), JobNames.STATELESS_TEST_RELEASE_COVERAGE: CommonJobConfigs.STATELESS_TEST.with_properties( - required_builds=[BuildNames.PACKAGE_RELEASE_COVERAGE], num_batches=6 + required_builds=[BuildNames.PACKAGE_RELEASE_COVERAGE], num_batches=5 ), JobNames.STATELESS_TEST_AARCH64: CommonJobConfigs.STATELESS_TEST.with_properties( required_builds=[BuildNames.PACKAGE_AARCH64], runner_type=Runners.FUNC_TESTER_ARM, ), JobNames.STATELESS_TEST_OLD_ANALYZER_S3_REPLICATED_RELEASE: CommonJobConfigs.STATELESS_TEST.with_properties( - required_builds=[BuildNames.PACKAGE_RELEASE], num_batches=4 + required_builds=[BuildNames.PACKAGE_RELEASE], num_batches=3 ), JobNames.STATELESS_TEST_S3_DEBUG: CommonJobConfigs.STATELESS_TEST.with_properties( - required_builds=[BuildNames.PACKAGE_DEBUG], num_batches=6 + required_builds=[BuildNames.PACKAGE_DEBUG], num_batches=2 ), JobNames.STATELESS_TEST_AZURE_ASAN: CommonJobConfigs.STATELESS_TEST.with_properties( - required_builds=[BuildNames.PACKAGE_ASAN], num_batches=4, release_only=True + required_builds=[BuildNames.PACKAGE_ASAN], num_batches=2, release_only=True ), JobNames.STATELESS_TEST_S3_TSAN: CommonJobConfigs.STATELESS_TEST.with_properties( required_builds=[BuildNames.PACKAGE_TSAN], - num_batches=5, + num_batches=3, ), JobNames.STRESS_TEST_DEBUG: CommonJobConfigs.STRESS_TEST.with_properties( required_builds=[BuildNames.PACKAGE_DEBUG], diff --git a/tests/clickhouse-test b/tests/clickhouse-test index 8e7002af889..8dea6297a61 100755 --- a/tests/clickhouse-test +++ b/tests/clickhouse-test @@ -709,9 +709,9 @@ def get_localzone(): class SettingsRandomizer: settings = { - "max_insert_threads": lambda: ( - 0 if random.random() < 0.5 else random.randint(1, 16) - ), + "max_insert_threads": lambda: 32 + if random.random() < 0.03 + else random.randint(1, 3), "group_by_two_level_threshold": threshold_generator(0.2, 0.2, 1, 1000000), "group_by_two_level_threshold_bytes": threshold_generator( 0.2, 0.2, 1, 50000000 @@ -727,7 +727,7 @@ class SettingsRandomizer: "prefer_localhost_replica": lambda: random.randint(0, 1), "max_block_size": lambda: random.randint(8000, 100000), "max_joined_block_size_rows": lambda: random.randint(8000, 100000), - "max_threads": lambda: random.randint(1, 64), + "max_threads": lambda: 64 if random.random() < 0.03 else random.randint(1, 3), "optimize_append_index": lambda: random.randint(0, 1), "optimize_if_chain_to_multiif": lambda: random.randint(0, 1), "optimize_if_transform_strings_to_enum": lambda: random.randint(0, 1), @@ -1217,6 +1217,11 @@ class TestCase: ): return FailureReason.OBJECT_STORAGE + elif "no-batch" in tags and ( + args.run_by_hash_num is not None or args.run_by_hash_total is not None + ): + return FailureReason.SKIP + elif tags: for build_flag in args.build_flags: if "no-" + build_flag in tags: @@ -1447,8 +1452,7 @@ class TestCase: description_full = messages[result.status] description_full += self.print_test_time(result.total_time) if result.reason is not None: - description_full += " - " - description_full += result.reason.value + description_full += f"\nReason: {result.reason.value} " description_full += result.description @@ -1575,10 +1579,11 @@ class TestCase: # pylint:disable-next=consider-using-with; TODO: fix proc = Popen(command, shell=True, env=os.environ, start_new_session=True) - while ( - datetime.now() - start_time - ).total_seconds() < args.timeout and proc.poll() is None: - sleep(0.01) + try: + proc.wait(args.timeout) + except subprocess.TimeoutExpired: + # Whether the test timed out will be decided later + pass debug_log = "" if os.path.exists(self.testcase_args.debug_log_file): @@ -1600,6 +1605,44 @@ class TestCase: # Normalize hostname in stdout file. replace_in_file(self.stdout_file, socket.gethostname(), "localhost") + if os.environ.get("CLICKHOUSE_PORT_TCP"): + replace_in_file( + self.stdout_file, + f"PORT {os.environ['CLICKHOUSE_PORT_TCP']}", + "PORT 9000", + ) + replace_in_file( + self.stdout_file, + f"localhost {os.environ['CLICKHOUSE_PORT_TCP']}", + "localhost 9000", + ) + + if os.environ.get("CLICKHOUSE_PORT_TCP_SECURE"): + replace_in_file( + self.stdout_file, + f"PORT {os.environ['CLICKHOUSE_PORT_TCP_SECURE']}", + "PORT 9440", + ) + replace_in_file( + self.stdout_file, + f"localhost {os.environ['CLICKHOUSE_PORT_TCP_SECURE']}", + "localhost 9440", + ) + + if os.environ.get("CLICKHOUSE_PATH"): + replace_in_file( + self.stdout_file, + os.environ["CLICKHOUSE_PATH"], + "/var/lib/clickhouse", + ) + + if os.environ.get("CLICKHOUSE_PORT_HTTPS"): + replace_in_file( + self.stdout_file, + f"https://localhost:{os.environ['CLICKHOUSE_PORT_HTTPS']}/", + "https://localhost:8443/", + ) + stdout = "" if os.path.exists(self.stdout_file): with open(self.stdout_file, "rb") as stdfd: @@ -2056,8 +2099,13 @@ class GlobalTimeout(Exception): pass -def run_tests_array(all_tests_with_params: Tuple[List[str], int, TestSuite]): - all_tests, num_tests, test_suite = all_tests_with_params +def run_tests_array(all_tests_with_params: Tuple[List[str], int, TestSuite, bool]): + ( + all_tests, + num_tests, + test_suite, + is_concurrent, + ) = all_tests_with_params global stop_time global exit_code global server_died @@ -2100,14 +2148,12 @@ def run_tests_array(all_tests_with_params: Tuple[List[str], int, TestSuite]): failures_chain = 0 start_time = datetime.now() - is_concurrent = multiprocessing.current_process().name != "MainProcess" - client_options = get_additional_client_options(args) if num_tests > 0: about = "about " if is_concurrent else "" proc_name = multiprocessing.current_process().name - print(f"\nRunning {about}{num_tests} {test_suite.suite} tests ({proc_name}).\n") + print(f"Running {about}{num_tests} {test_suite.suite} tests ({proc_name}).") while True: if all_tests: @@ -2128,16 +2174,17 @@ def run_tests_array(all_tests_with_params: Tuple[List[str], int, TestSuite]): try: description = "" - test_cace_name = removesuffix(test_case.name, ".gen", ".sql") + ": " - if not is_concurrent: + test_case_name = removesuffix(test_case.name, ".gen", ".sql") + ": " + + if is_concurrent or args.run_sequential_tests_in_parallel: + description = f"{test_case_name:72}" + else: sys.stdout.flush() - sys.stdout.write(f"{test_cace_name:72}") + sys.stdout.write(f"{test_case_name:72}") # This flush is needed so you can see the test name of the long # running test before it will finish. But don't do it in parallel # mode, so that the lines don't mix. sys.stdout.flush() - else: - description = f"{test_cace_name:72}" while True: test_result = test_case.run( @@ -2372,6 +2419,35 @@ def extract_key(key: str) -> str: )[1] +def override_envs(*args_, **kwargs): + global args + args.client += " --port 19000" + args.http_port = 18123 + args.https_port = 18443 + + updated_env = { + "CLICKHOUSE_CONFIG": "/etc/clickhouse-server3/config.xml", + "CLICKHOUSE_CONFIG_DIR": "/etc/clickhouse-server3", + "CLICKHOUSE_CONFIG_GREP": "/etc/clickhouse-server3/preprocessed/config.xml", + "CLICKHOUSE_USER_FILES": "/var/lib/clickhouse3/user_files", + "CLICKHOUSE_SCHEMA_FILES": "/var/lib/clickhouse3/format_schemas", + "CLICKHOUSE_PATH": "/var/lib/clickhouse3", + "CLICKHOUSE_PORT_TCP": "19000", + "CLICKHOUSE_PORT_TCP_SECURE": "19440", + "CLICKHOUSE_PORT_TCP_WITH_PROXY": "19010", + "CLICKHOUSE_PORT_HTTP": "18123", + "CLICKHOUSE_PORT_HTTPS": "18443", + "CLICKHOUSE_PORT_INTERSERVER": "19009", + "CLICKHOUSE_PORT_KEEPER": "19181", + "CLICKHOUSE_PORT_PROMTHEUS_PORT": "19988", + "CLICKHOUSE_PORT_MYSQL": "19004", + "CLICKHOUSE_PORT_POSTGRESQL": "19005", + } + os.environ.update(updated_env) + + run_tests_array(*args_, **kwargs) + + def do_run_tests(jobs, test_suite: TestSuite): if jobs > 1 and len(test_suite.parallel_tests) > 0: print( @@ -2400,24 +2476,55 @@ def do_run_tests(jobs, test_suite: TestSuite): for job in range(jobs): range_ = job * batch_size, job * batch_size + batch_size batch = test_suite.parallel_tests[range_[0] : range_[1]] - parallel_tests_array.append((batch, batch_size, test_suite)) + parallel_tests_array.append((batch, batch_size, test_suite, True)) try: - with multiprocessing.Pool(processes=jobs) as pool: + with multiprocessing.Pool(processes=jobs + 1) as pool: future = pool.map_async(run_tests_array, parallel_tests_array) + + if args.run_sequential_tests_in_parallel: + # Run parallel tests and sequential tests at the same time + # Sequential tests will use different ClickHouse instance + # In this process we can safely override values in `args` and `os.environ` + future_seq = pool.map_async( + override_envs, + [ + ( + test_suite.sequential_tests, + len(test_suite.sequential_tests), + test_suite, + False, + ) + ], + ) + future_seq.wait() + future.wait() finally: pool.terminate() pool.close() pool.join() - run_tests_array( - (test_suite.sequential_tests, len(test_suite.sequential_tests), test_suite) - ) + if not args.run_sequential_tests_in_parallel: + run_tests_array( + ( + test_suite.sequential_tests, + len(test_suite.sequential_tests), + test_suite, + False, + ) + ) return len(test_suite.sequential_tests) + len(test_suite.parallel_tests) else: num_tests = len(test_suite.all_tests) - run_tests_array((test_suite.all_tests, num_tests, test_suite)) + run_tests_array( + ( + test_suite.all_tests, + num_tests, + test_suite, + False, + ) + ) return num_tests @@ -2722,6 +2829,7 @@ def main(args): f"{get_db_engine(args, db_name)}", settings=get_create_database_settings(args, None), ) + break except HTTPError as e: total_time = (datetime.now() - start_time).total_seconds() if not need_retry(args, e.message, e.message, total_time): @@ -3234,6 +3342,15 @@ def parse_args(): help="Replace ordinary MergeTree engine with SharedMergeTree", ) + parser.add_argument( + "--run-sequential-tests-in-parallel", + action="store_true", + default=False, + help="If `true`, tests with the tag `no-parallel` will run on a " + "separate ClickHouse instance in parallel with other tests. " + "This is used in CI to make test jobs run faster.", + ) + return parser.parse_args() diff --git a/tests/config/install.sh b/tests/config/install.sh index 08ee11a7407..8b58a519bc9 100755 --- a/tests/config/install.sh +++ b/tests/config/install.sh @@ -57,7 +57,6 @@ ln -sf $SRC_PATH/config.d/forbidden_headers.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/enable_keeper_map.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/custom_disks_base_path.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/display_name.xml $DEST_SERVER_PATH/config.d/ -ln -sf $SRC_PATH/config.d/reverse_dns_query_function.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/compressed_marks_and_index.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/disable_s3_env_credentials.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/enable_wait_for_shutdown_replicated_tables.xml $DEST_SERVER_PATH/config.d/ diff --git a/tests/queries/0_stateless/00002_log_and_exception_messages_formatting.sql b/tests/queries/0_stateless/00002_log_and_exception_messages_formatting.sql index 0ca7df8ecd3..07c42d6d039 100644 --- a/tests/queries/0_stateless/00002_log_and_exception_messages_formatting.sql +++ b/tests/queries/0_stateless/00002_log_and_exception_messages_formatting.sql @@ -1,4 +1,4 @@ --- Tags: no-parallel, no-fasttest +-- Tags: no-parallel, no-fasttest, no-ubsan, no-batch -- no-parallel because we want to run this test when most of the other tests already passed -- If this test fails, see the "Top patterns of log messages" diagnostics in the end of run.log diff --git a/tests/queries/shell_config.sh b/tests/queries/shell_config.sh index 614bfcece8f..ef2d89f0218 100644 --- a/tests/queries/shell_config.sh +++ b/tests/queries/shell_config.sh @@ -54,9 +54,17 @@ export CLICKHOUSE_OBFUSCATOR=${CLICKHOUSE_OBFUSCATOR:="${CLICKHOUSE_BINARY}-obfu export CLICKHOUSE_COMPRESSOR=${CLICKHOUSE_COMPRESSOR:="${CLICKHOUSE_BINARY}-compressor"} export CLICKHOUSE_GIT_IMPORT=${CLICKHOUSE_GIT_IMPORT="${CLICKHOUSE_BINARY}-git-import"} +export CLICKHOUSE_CONFIG_DIR=${CLICKHOUSE_CONFIG_DIR:="/etc/clickhouse-server"} export CLICKHOUSE_CONFIG=${CLICKHOUSE_CONFIG:="/etc/clickhouse-server/config.xml"} export CLICKHOUSE_CONFIG_CLIENT=${CLICKHOUSE_CONFIG_CLIENT:="/etc/clickhouse-client/config.xml"} +export CLICKHOUSE_USER_FILES=${CLICKHOUSE_USER_FILES:="/var/lib/clickhouse/user_files"} +export CLICKHOUSE_USER_FILES_UNIQUE=${CLICKHOUSE_USER_FILES_UNIQUE:="${CLICKHOUSE_USER_FILES}/${CLICKHOUSE_TEST_UNIQUE_NAME}"} +# synonym +export USER_FILES_PATH=$CLICKHOUSE_USER_FILES + +export CLICKHOUSE_SCHEMA_FILES=${CLICKHOUSE_SCHEMA_FILES:="/var/lib/clickhouse/format_schemas"} + [ -x "${CLICKHOUSE_BINARY}-extract-from-config" ] && CLICKHOUSE_EXTRACT_CONFIG=${CLICKHOUSE_EXTRACT_CONFIG:="$CLICKHOUSE_BINARY-extract-from-config --config=$CLICKHOUSE_CONFIG"} [ -x "${CLICKHOUSE_BINARY}" ] && CLICKHOUSE_EXTRACT_CONFIG=${CLICKHOUSE_EXTRACT_CONFIG:="$CLICKHOUSE_BINARY extract-from-config --config=$CLICKHOUSE_CONFIG"} export CLICKHOUSE_EXTRACT_CONFIG=${CLICKHOUSE_EXTRACT_CONFIG:="$CLICKHOUSE_BINARY-extract-from-config --config=$CLICKHOUSE_CONFIG"} From 1e72335030a4de446275ece652876887910963e2 Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Tue, 9 Jul 2024 17:53:34 +0000 Subject: [PATCH 179/299] Failed to connect to replica log message --- src/QueryPipeline/RemoteQueryExecutor.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/QueryPipeline/RemoteQueryExecutor.cpp b/src/QueryPipeline/RemoteQueryExecutor.cpp index bde8ce78f55..61a6bf82270 100644 --- a/src/QueryPipeline/RemoteQueryExecutor.cpp +++ b/src/QueryPipeline/RemoteQueryExecutor.cpp @@ -104,6 +104,10 @@ RemoteQueryExecutor::RemoteQueryExecutor( connection_entries.emplace_back(std::move(result.entry)); } + else + { + LOG_DEBUG(log, "Failed to connect to replica {}. {}", pool->getAddress(), fail_message); + } auto res = std::make_unique(std::move(connection_entries), context, throttler); if (extension_ && extension_->replica_info) From 5a12659f43f74aa501610404c4b2ee6b1b4a02c9 Mon Sep 17 00:00:00 2001 From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> Date: Tue, 9 Jul 2024 20:18:03 +0200 Subject: [PATCH 180/299] Update run.sh --- docker/test/stateless/run.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/test/stateless/run.sh b/docker/test/stateless/run.sh index 43d3c698d8a..637d277e6f8 100755 --- a/docker/test/stateless/run.sh +++ b/docker/test/stateless/run.sh @@ -249,7 +249,7 @@ function run_tests() try_run_with_retry 10 clickhouse-client -q "insert into system.zookeeper (name, path, value) values ('auxiliary_zookeeper2', '/test/chroot/', '')" set +e - timeout -s TERM --preserve-status 120m clickhouse-test --testname --shard --zookeeper --check-zookeeper-session --hung-check --print-time \ + timeout -s KILL --preserve-status 120m clickhouse-test --testname --shard --zookeeper --check-zookeeper-session --hung-check --print-time \ --no-drop-if-fail --test-runs "$NUM_TRIES" "${ADDITIONAL_OPTIONS[@]}" 2>&1 \ | ts '%Y-%m-%d %H:%M:%S' \ | tee -a test_output/test_result.txt From beca3b98341e9cf52a9a9840b3f216a67b454e29 Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Tue, 9 Jul 2024 19:31:33 +0000 Subject: [PATCH 181/299] Proper placing for DistributedConnectionFailTry + better logs --- src/Client/ConnectionEstablisher.cpp | 3 +++ src/Client/HedgedConnectionsFactory.cpp | 2 -- src/Common/PoolWithFailoverBase.h | 2 -- src/QueryPipeline/RemoteQueryExecutor.cpp | 6 +++++- 4 files changed, 8 insertions(+), 5 deletions(-) diff --git a/src/Client/ConnectionEstablisher.cpp b/src/Client/ConnectionEstablisher.cpp index 303105751ad..05839b44452 100644 --- a/src/Client/ConnectionEstablisher.cpp +++ b/src/Client/ConnectionEstablisher.cpp @@ -8,6 +8,7 @@ namespace ProfileEvents extern const Event DistributedConnectionUsable; extern const Event DistributedConnectionMissingTable; extern const Event DistributedConnectionStaleReplica; + extern const Event DistributedConnectionFailTry; } namespace DB @@ -97,6 +98,8 @@ void ConnectionEstablisher::run(ConnectionEstablisher::TryResult & result, std:: } catch (const Exception & e) { + ProfileEvents::increment(ProfileEvents::DistributedConnectionFailTry); + if (e.code() != ErrorCodes::NETWORK_ERROR && e.code() != ErrorCodes::SOCKET_TIMEOUT && e.code() != ErrorCodes::ATTEMPT_TO_READ_AFTER_EOF && e.code() != ErrorCodes::DNS_ERROR) throw; diff --git a/src/Client/HedgedConnectionsFactory.cpp b/src/Client/HedgedConnectionsFactory.cpp index 0fa2bc12924..be7397b0fad 100644 --- a/src/Client/HedgedConnectionsFactory.cpp +++ b/src/Client/HedgedConnectionsFactory.cpp @@ -7,7 +7,6 @@ namespace ProfileEvents { extern const Event HedgedRequestsChangeReplica; - extern const Event DistributedConnectionFailTry; extern const Event DistributedConnectionFailAtAll; } @@ -327,7 +326,6 @@ HedgedConnectionsFactory::State HedgedConnectionsFactory::processFinishedConnect { ShuffledPool & shuffled_pool = shuffled_pools[index]; LOG_INFO(log, "Connection failed at try №{}, reason: {}", (shuffled_pool.error_count + 1), fail_message); - ProfileEvents::increment(ProfileEvents::DistributedConnectionFailTry); shuffled_pool.error_count = std::min(pool->getMaxErrorCup(), shuffled_pool.error_count + 1); shuffled_pool.slowdown_count = 0; diff --git a/src/Common/PoolWithFailoverBase.h b/src/Common/PoolWithFailoverBase.h index 2359137012c..3d4de773a36 100644 --- a/src/Common/PoolWithFailoverBase.h +++ b/src/Common/PoolWithFailoverBase.h @@ -28,7 +28,6 @@ namespace ErrorCodes namespace ProfileEvents { - extern const Event DistributedConnectionFailTry; extern const Event DistributedConnectionFailAtAll; extern const Event DistributedConnectionSkipReadOnlyReplica; } @@ -285,7 +284,6 @@ PoolWithFailoverBase::getMany( else { LOG_WARNING(log, "Connection failed at try №{}, reason: {}", (shuffled_pool.error_count + 1), fail_message); - ProfileEvents::increment(ProfileEvents::DistributedConnectionFailTry); shuffled_pool.error_count = std::min(max_error_cap, shuffled_pool.error_count + 1); diff --git a/src/QueryPipeline/RemoteQueryExecutor.cpp b/src/QueryPipeline/RemoteQueryExecutor.cpp index 61a6bf82270..14457d2df43 100644 --- a/src/QueryPipeline/RemoteQueryExecutor.cpp +++ b/src/QueryPipeline/RemoteQueryExecutor.cpp @@ -106,7 +106,11 @@ RemoteQueryExecutor::RemoteQueryExecutor( } else { - LOG_DEBUG(log, "Failed to connect to replica {}. {}", pool->getAddress(), fail_message); + chassert(!fail_message.empty()); + if (result.entry.isNull()) + LOG_DEBUG(log, "Failed to connect to replica {}. {}", pool->getAddress(), fail_message); + else + LOG_DEBUG(log, "Replica is not usable for remote query execution: {}. {}", pool->getAddress(), fail_message); } auto res = std::make_unique(std::move(connection_entries), context, throttler); From 0d54151cb81421b8eaa99df0c8abb224b776570b Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Tue, 9 Jul 2024 19:55:37 +0000 Subject: [PATCH 182/299] Make the pocketfft to point to the upstream/master branch --- contrib/pocketfft | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/pocketfft b/contrib/pocketfft index 9efd4da52cf..f4c1aa8aa9c 160000 --- a/contrib/pocketfft +++ b/contrib/pocketfft @@ -1 +1 @@ -Subproject commit 9efd4da52cf8d28d14531d14e43ad9d913807546 +Subproject commit f4c1aa8aa9ce79ad39e80f2c9c41b92ead90fda3 From 2b091983e8df97a5a103be8aa03ad2c0a836ff46 Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Tue, 9 Jul 2024 19:59:49 +0000 Subject: [PATCH 183/299] Bump Azure to https://github.com/ClickHouse/azure-sdk-for-cpp/commit/ea3e19a7be08519134c643177d56c7484dfec884 --- contrib/azure | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/azure b/contrib/azure index 92c94d7f37a..ea3e19a7be0 160000 --- a/contrib/azure +++ b/contrib/azure @@ -1 +1 @@ -Subproject commit 92c94d7f37a43cc8fc4d466884a95f610c0593bf +Subproject commit ea3e19a7be08519134c643177d56c7484dfec884 From 7ff447de686d687fab0e08a3094eb173d8d128ff Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Mon, 8 Jul 2024 16:13:03 +0000 Subject: [PATCH 184/299] Modify the code for build with new libcxx (cherry picked from commit c896f0bf677bcc948d988cf83b2108e9f7cd761d) --- base/base/demangle.h | 1 + base/base/extended_types.h | 16 ++++++++ base/base/isSharedPtrUnique.h | 9 +++++ base/poco/Foundation/include/Poco/Format.h | 2 +- .../include/Poco/RefCountedObject.h | 2 + base/poco/Foundation/src/Format.cpp | 40 +++++++++---------- base/poco/MongoDB/src/ObjectId.cpp | 2 +- base/poco/MongoDB/src/OpMsgCursor.cpp | 8 ++-- base/poco/Net/src/HTTPMessage.cpp | 12 +++--- src/Backups/BackupOperationInfo.h | 2 + src/Common/AtomicLogger.h | 2 + src/Common/ConcurrencyControl.h | 1 + src/Common/RemoteHostFilter.h | 1 + src/Coordination/Changelog.h | 1 + src/Coordination/FourLetterCommand.h | 2 + src/Databases/DatabaseAtomic.cpp | 5 ++- src/Databases/DatabaseLazy.cpp | 3 +- src/Databases/MySQL/DatabaseMySQL.cpp | 3 +- src/Functions/FunctionsBitToArray.cpp | 9 ++++- src/Interpreters/ActionsVisitor.cpp | 9 +++-- src/Interpreters/Cache/Metadata.h | 4 +- src/Interpreters/DatabaseCatalog.cpp | 6 ++- src/Interpreters/ExternalLoader.h | 1 + src/Interpreters/JIT/CHJIT.cpp | 6 +-- src/Interpreters/Session.cpp | 5 ++- src/Storages/MergeTree/MergeTreeData.cpp | 11 ++--- 26 files changed, 108 insertions(+), 55 deletions(-) create mode 100644 base/base/isSharedPtrUnique.h diff --git a/base/base/demangle.h b/base/base/demangle.h index ddca264ecab..af9ccad16c1 100644 --- a/base/base/demangle.h +++ b/base/base/demangle.h @@ -1,5 +1,6 @@ #pragma once +#include #include #include diff --git a/base/base/extended_types.h b/base/base/extended_types.h index 796167ab45d..3bf3f4ed31d 100644 --- a/base/base/extended_types.h +++ b/base/base/extended_types.h @@ -108,6 +108,14 @@ struct make_unsigned // NOLINT(readability-identifier-naming) using type = std::make_unsigned_t; }; +template <> struct make_unsigned { using type = UInt8; }; +template <> struct make_unsigned { using type = UInt8; }; +template <> struct make_unsigned { using type = UInt16; }; +template <> struct make_unsigned { using type = UInt16; }; +template <> struct make_unsigned { using type = UInt32; }; +template <> struct make_unsigned { using type = UInt32; }; +template <> struct make_unsigned { using type = UInt64; }; +template <> struct make_unsigned { using type = UInt64; }; template <> struct make_unsigned { using type = UInt128; }; template <> struct make_unsigned { using type = UInt128; }; template <> struct make_unsigned { using type = UInt256; }; @@ -121,6 +129,14 @@ struct make_signed // NOLINT(readability-identifier-naming) using type = std::make_signed_t; }; +template <> struct make_signed { using type = Int8; }; +template <> struct make_signed { using type = Int8; }; +template <> struct make_signed { using type = Int16; }; +template <> struct make_signed { using type = Int16; }; +template <> struct make_signed { using type = Int32; }; +template <> struct make_signed { using type = Int32; }; +template <> struct make_signed { using type = Int64; }; +template <> struct make_signed { using type = Int64; }; template <> struct make_signed { using type = Int128; }; template <> struct make_signed { using type = Int128; }; template <> struct make_signed { using type = Int256; }; diff --git a/base/base/isSharedPtrUnique.h b/base/base/isSharedPtrUnique.h new file mode 100644 index 00000000000..c153605ecb1 --- /dev/null +++ b/base/base/isSharedPtrUnique.h @@ -0,0 +1,9 @@ +#pragma once + +#include + +template +bool isSharedPtrUnique(const std::shared_ptr & ptr) +{ + return ptr.use_count() == 1; +} diff --git a/base/poco/Foundation/include/Poco/Format.h b/base/poco/Foundation/include/Poco/Format.h index f84be16d3ad..4f91dd44ca5 100644 --- a/base/poco/Foundation/include/Poco/Format.h +++ b/base/poco/Foundation/include/Poco/Format.h @@ -232,7 +232,7 @@ void Foundation_API format( const Any & value10); -void Foundation_API format(std::string & result, const std::string & fmt, const std::vector & values); +void Foundation_API formatVector(std::string & result, const std::string & fmt, const std::vector & values); /// Supports a variable number of arguments and is used by /// all other variants of format(). diff --git a/base/poco/Foundation/include/Poco/RefCountedObject.h b/base/poco/Foundation/include/Poco/RefCountedObject.h index db966089e00..d0d964d8390 100644 --- a/base/poco/Foundation/include/Poco/RefCountedObject.h +++ b/base/poco/Foundation/include/Poco/RefCountedObject.h @@ -21,6 +21,8 @@ #include "Poco/AtomicCounter.h" #include "Poco/Foundation.h" +#include + namespace Poco { diff --git a/base/poco/Foundation/src/Format.cpp b/base/poco/Foundation/src/Format.cpp index 9872ddff042..94ab124510d 100644 --- a/base/poco/Foundation/src/Format.cpp +++ b/base/poco/Foundation/src/Format.cpp @@ -51,8 +51,8 @@ namespace } if (width != 0) str.width(width); } - - + + void parsePrec(std::ostream& str, std::string::const_iterator& itFmt, const std::string::const_iterator& endFmt) { if (itFmt != endFmt && *itFmt == '.') @@ -67,7 +67,7 @@ namespace if (prec >= 0) str.precision(prec); } } - + char parseMod(std::string::const_iterator& itFmt, const std::string::const_iterator& endFmt) { char mod = 0; @@ -77,13 +77,13 @@ namespace { case 'l': case 'h': - case 'L': + case 'L': case '?': mod = *itFmt++; break; } } return mod; } - + std::size_t parseIndex(std::string::const_iterator& itFmt, const std::string::const_iterator& endFmt) { int index = 0; @@ -110,8 +110,8 @@ namespace case 'f': str << std::fixed; break; } } - - + + void writeAnyInt(std::ostream& str, const Any& any) { if (any.type() == typeid(char)) @@ -201,7 +201,7 @@ namespace str << RefAnyCast(*itVal++); break; case 'z': - str << AnyCast(*itVal++); + str << AnyCast(*itVal++); break; case 'I': case 'D': @@ -303,7 +303,7 @@ void format(std::string& result, const std::string& fmt, const Any& value) { std::vector args; args.push_back(value); - format(result, fmt, args); + formatVector(result, fmt, args); } @@ -312,7 +312,7 @@ void format(std::string& result, const std::string& fmt, const Any& value1, cons std::vector args; args.push_back(value1); args.push_back(value2); - format(result, fmt, args); + formatVector(result, fmt, args); } @@ -322,7 +322,7 @@ void format(std::string& result, const std::string& fmt, const Any& value1, cons args.push_back(value1); args.push_back(value2); args.push_back(value3); - format(result, fmt, args); + formatVector(result, fmt, args); } @@ -333,7 +333,7 @@ void format(std::string& result, const std::string& fmt, const Any& value1, cons args.push_back(value2); args.push_back(value3); args.push_back(value4); - format(result, fmt, args); + formatVector(result, fmt, args); } @@ -345,7 +345,7 @@ void format(std::string& result, const std::string& fmt, const Any& value1, cons args.push_back(value3); args.push_back(value4); args.push_back(value5); - format(result, fmt, args); + formatVector(result, fmt, args); } @@ -358,7 +358,7 @@ void format(std::string& result, const std::string& fmt, const Any& value1, cons args.push_back(value4); args.push_back(value5); args.push_back(value6); - format(result, fmt, args); + formatVector(result, fmt, args); } @@ -372,7 +372,7 @@ void format(std::string& result, const std::string& fmt, const Any& value1, cons args.push_back(value5); args.push_back(value6); args.push_back(value7); - format(result, fmt, args); + formatVector(result, fmt, args); } @@ -387,7 +387,7 @@ void format(std::string& result, const std::string& fmt, const Any& value1, cons args.push_back(value6); args.push_back(value7); args.push_back(value8); - format(result, fmt, args); + formatVector(result, fmt, args); } @@ -403,7 +403,7 @@ void format(std::string& result, const std::string& fmt, const Any& value1, cons args.push_back(value7); args.push_back(value8); args.push_back(value9); - format(result, fmt, args); + formatVector(result, fmt, args); } @@ -420,16 +420,16 @@ void format(std::string& result, const std::string& fmt, const Any& value1, cons args.push_back(value8); args.push_back(value9); args.push_back(value10); - format(result, fmt, args); + formatVector(result, fmt, args); } -void format(std::string& result, const std::string& fmt, const std::vector& values) +void formatVector(std::string& result, const std::string& fmt, const std::vector& values) { std::string::const_iterator itFmt = fmt.begin(); std::string::const_iterator endFmt = fmt.end(); std::vector::const_iterator itVal = values.begin(); - std::vector::const_iterator endVal = values.end(); + std::vector::const_iterator endVal = values.end(); while (itFmt != endFmt) { switch (*itFmt) diff --git a/base/poco/MongoDB/src/ObjectId.cpp b/base/poco/MongoDB/src/ObjectId.cpp index 0125c246c2d..e360d129843 100644 --- a/base/poco/MongoDB/src/ObjectId.cpp +++ b/base/poco/MongoDB/src/ObjectId.cpp @@ -57,7 +57,7 @@ std::string ObjectId::toString(const std::string& fmt) const for (int i = 0; i < 12; ++i) { - s += format(fmt, (unsigned int) _id[i]); + s += Poco::format(fmt, (unsigned int) _id[i]); } return s; } diff --git a/base/poco/MongoDB/src/OpMsgCursor.cpp b/base/poco/MongoDB/src/OpMsgCursor.cpp index bc95851ae33..6abd45ecf76 100644 --- a/base/poco/MongoDB/src/OpMsgCursor.cpp +++ b/base/poco/MongoDB/src/OpMsgCursor.cpp @@ -43,9 +43,9 @@ namespace Poco { namespace MongoDB { -static const std::string keyCursor {"cursor"}; -static const std::string keyFirstBatch {"firstBatch"}; -static const std::string keyNextBatch {"nextBatch"}; +[[ maybe_unused ]] static const std::string keyCursor {"cursor"}; +[[ maybe_unused ]] static const std::string keyFirstBatch {"firstBatch"}; +[[ maybe_unused ]] static const std::string keyNextBatch {"nextBatch"}; static Poco::Int64 cursorIdFromResponse(const MongoDB::Document& doc); @@ -131,7 +131,7 @@ OpMsgMessage& OpMsgCursor::next(Connection& connection) connection.readResponse(_response); } else -#endif +#endif { _response.clear(); _query.setCursor(_cursorID, _batchSize); diff --git a/base/poco/Net/src/HTTPMessage.cpp b/base/poco/Net/src/HTTPMessage.cpp index c0083ec410c..b7ab5543a85 100644 --- a/base/poco/Net/src/HTTPMessage.cpp +++ b/base/poco/Net/src/HTTPMessage.cpp @@ -17,9 +17,9 @@ #include "Poco/NumberFormatter.h" #include "Poco/NumberParser.h" #include "Poco/String.h" +#include #include - using Poco::NumberFormatter; using Poco::NumberParser; using Poco::icompare; @@ -75,7 +75,7 @@ void HTTPMessage::setContentLength(std::streamsize length) erase(CONTENT_LENGTH); } - + std::streamsize HTTPMessage::getContentLength() const { const std::string& contentLength = get(CONTENT_LENGTH, EMPTY); @@ -98,7 +98,7 @@ void HTTPMessage::setContentLength64(Poco::Int64 length) erase(CONTENT_LENGTH); } - + Poco::Int64 HTTPMessage::getContentLength64() const { const std::string& contentLength = get(CONTENT_LENGTH, EMPTY); @@ -133,13 +133,13 @@ void HTTPMessage::setChunkedTransferEncoding(bool flag) setTransferEncoding(IDENTITY_TRANSFER_ENCODING); } - + bool HTTPMessage::getChunkedTransferEncoding() const { return icompare(getTransferEncoding(), CHUNKED_TRANSFER_ENCODING) == 0; } - + void HTTPMessage::setContentType(const std::string& mediaType) { if (mediaType.empty()) @@ -154,7 +154,7 @@ void HTTPMessage::setContentType(const MediaType& mediaType) setContentType(mediaType.toString()); } - + const std::string& HTTPMessage::getContentType() const { return get(CONTENT_TYPE, UNKNOWN_CONTENT_TYPE); diff --git a/src/Backups/BackupOperationInfo.h b/src/Backups/BackupOperationInfo.h index 21b5284458c..71589ec3b30 100644 --- a/src/Backups/BackupOperationInfo.h +++ b/src/Backups/BackupOperationInfo.h @@ -3,6 +3,8 @@ #include #include +#include + namespace DB { diff --git a/src/Common/AtomicLogger.h b/src/Common/AtomicLogger.h index 0ece9e8a09a..c1bbdb41866 100644 --- a/src/Common/AtomicLogger.h +++ b/src/Common/AtomicLogger.h @@ -1,5 +1,7 @@ #pragma once +#include +#include #include #include diff --git a/src/Common/ConcurrencyControl.h b/src/Common/ConcurrencyControl.h index ba94502962c..9d35d7cb8b0 100644 --- a/src/Common/ConcurrencyControl.h +++ b/src/Common/ConcurrencyControl.h @@ -1,5 +1,6 @@ #pragma once +#include #include #include #include diff --git a/src/Common/RemoteHostFilter.h b/src/Common/RemoteHostFilter.h index 2b91306f405..4c8983205fa 100644 --- a/src/Common/RemoteHostFilter.h +++ b/src/Common/RemoteHostFilter.h @@ -1,5 +1,6 @@ #pragma once +#include #include #include #include diff --git a/src/Coordination/Changelog.h b/src/Coordination/Changelog.h index c9b45d9a344..0f833c17e1b 100644 --- a/src/Coordination/Changelog.h +++ b/src/Coordination/Changelog.h @@ -5,6 +5,7 @@ #include #include +#include #include #include #include diff --git a/src/Coordination/FourLetterCommand.h b/src/Coordination/FourLetterCommand.h index 2a53bade62f..e3289982b0d 100644 --- a/src/Coordination/FourLetterCommand.h +++ b/src/Coordination/FourLetterCommand.h @@ -2,9 +2,11 @@ #include "config.h" +#include #include #include #include +#include #include namespace DB diff --git a/src/Databases/DatabaseAtomic.cpp b/src/Databases/DatabaseAtomic.cpp index ccab72cfbae..0ed995c6180 100644 --- a/src/Databases/DatabaseAtomic.cpp +++ b/src/Databases/DatabaseAtomic.cpp @@ -1,4 +1,5 @@ #include +#include #include #include #include @@ -12,7 +13,7 @@ #include #include #include -#include "Common/logger_useful.h" +#include #include #include #include @@ -397,7 +398,7 @@ DatabaseAtomic::DetachedTables DatabaseAtomic::cleanupDetachedTables() LOG_DEBUG(log, "There are {} detached tables. Start searching non used tables.", detached_tables.size()); while (it != detached_tables.end()) { - if (it->second.unique()) + if (isSharedPtrUnique(it->second)) { not_in_use.emplace(it->first, it->second); it = detached_tables.erase(it); diff --git a/src/Databases/DatabaseLazy.cpp b/src/Databases/DatabaseLazy.cpp index 233db07cd68..da942cebf8f 100644 --- a/src/Databases/DatabaseLazy.cpp +++ b/src/Databases/DatabaseLazy.cpp @@ -1,6 +1,7 @@ #include #include +#include #include #include #include @@ -305,7 +306,7 @@ try String table_name = expired_tables.front().table_name; auto it = tables_cache.find(table_name); - if (!it->second.table || it->second.table.unique()) + if (!it->second.table || isSharedPtrUnique(it->second.table)) { LOG_DEBUG(log, "Drop table {} from cache.", backQuote(it->first)); it->second.table.reset(); diff --git a/src/Databases/MySQL/DatabaseMySQL.cpp b/src/Databases/MySQL/DatabaseMySQL.cpp index 1c82131af0d..bb24373a7e1 100644 --- a/src/Databases/MySQL/DatabaseMySQL.cpp +++ b/src/Databases/MySQL/DatabaseMySQL.cpp @@ -2,6 +2,7 @@ #if USE_MYSQL # include +# include # include # include # include @@ -354,7 +355,7 @@ void DatabaseMySQL::cleanOutdatedTables() { for (auto iterator = outdated_tables.begin(); iterator != outdated_tables.end();) { - if (!iterator->unique()) + if (!isSharedPtrUnique(*iterator)) ++iterator; else { diff --git a/src/Functions/FunctionsBitToArray.cpp b/src/Functions/FunctionsBitToArray.cpp index adabda1a7f8..81c80ae07bf 100644 --- a/src/Functions/FunctionsBitToArray.cpp +++ b/src/Functions/FunctionsBitToArray.cpp @@ -284,7 +284,13 @@ public: { while (x) { - result_array_values_data.push_back(std::countr_zero(x)); + /// С++20 char8_t is not an unsigned type anymore + /// https://stackoverflow.com/questions/57402464/is-c20-char8-t-the-same-as-our-old-char + // and thus you cannot use std::countr_zero on it. + if constexpr (std::is_same_v) + result_array_values_data.push_back(std::countr_zero(static_cast(x))); + else + result_array_values_data.push_back(std::countr_zero(x)); x &= (x - 1); } } @@ -336,4 +342,3 @@ REGISTER_FUNCTION(BitToArray) } } - diff --git a/src/Interpreters/ActionsVisitor.cpp b/src/Interpreters/ActionsVisitor.cpp index c3285d73145..9efb1d89a47 100644 --- a/src/Interpreters/ActionsVisitor.cpp +++ b/src/Interpreters/ActionsVisitor.cpp @@ -405,10 +405,6 @@ Block createBlockForSet( } -ScopeStack::Level::Level() = default; -ScopeStack::Level::~Level() = default; -ScopeStack::Level::Level(Level &&) noexcept = default; - FutureSetPtr makeExplicitSet( const ASTFunction * node, const ActionsDAG & actions, ContextPtr context, PreparedSets & prepared_sets) { @@ -462,6 +458,7 @@ public: for (const auto * node : index) map.emplace(node->result_name, node); } + ~Index() = default; void addNode(const ActionsDAG::Node * node) { @@ -502,6 +499,10 @@ public: } }; +ScopeStack::Level::Level() = default; +ScopeStack::Level::~Level() = default; +ScopeStack::Level::Level(Level &&) noexcept = default; + ActionsMatcher::Data::Data( ContextPtr context_, SizeLimits set_size_limit_, diff --git a/src/Interpreters/Cache/Metadata.h b/src/Interpreters/Cache/Metadata.h index a5c8f3c0cf4..d2158457a44 100644 --- a/src/Interpreters/Cache/Metadata.h +++ b/src/Interpreters/Cache/Metadata.h @@ -6,6 +6,8 @@ #include #include #include + +#include #include namespace DB @@ -30,7 +32,7 @@ struct FileSegmentMetadata : private boost::noncopyable explicit FileSegmentMetadata(FileSegmentPtr && file_segment_); - bool releasable() const { return file_segment.unique(); } + bool releasable() const { return file_segment.use_count() == 1; } size_t size() const; diff --git a/src/Interpreters/DatabaseCatalog.cpp b/src/Interpreters/DatabaseCatalog.cpp index 841decf29c5..0cc88ac62f5 100644 --- a/src/Interpreters/DatabaseCatalog.cpp +++ b/src/Interpreters/DatabaseCatalog.cpp @@ -1,3 +1,4 @@ +#include #include #include #include @@ -27,6 +28,7 @@ #include #include +#include #include #include "config.h" @@ -1197,7 +1199,7 @@ void DatabaseCatalog::dequeueDroppedTableCleanup(StorageID table_id) /// It's unsafe to create another instance while the old one exists /// We cannot wait on shared_ptr's refcount, so it's busy wait - while (!dropped_table.table.unique()) + while (!isSharedPtrUnique(dropped_table.table)) std::this_thread::sleep_for(std::chrono::milliseconds(100)); dropped_table.table.reset(); @@ -1237,7 +1239,7 @@ void DatabaseCatalog::dropTableDataTask() size_t tables_in_use_count = 0; auto it = std::find_if(tables_marked_dropped.begin(), tables_marked_dropped.end(), [&](const auto & elem) { - bool not_in_use = !elem.table || elem.table.unique(); + bool not_in_use = !elem.table || isSharedPtrUnique(elem.table); bool old_enough = elem.drop_time <= current_time; min_drop_time = std::min(min_drop_time, elem.drop_time); tables_in_use_count += !not_in_use; diff --git a/src/Interpreters/ExternalLoader.h b/src/Interpreters/ExternalLoader.h index 49b5e68d821..6356a174a01 100644 --- a/src/Interpreters/ExternalLoader.h +++ b/src/Interpreters/ExternalLoader.h @@ -2,6 +2,7 @@ #include #include +#include #include #include #include diff --git a/src/Interpreters/JIT/CHJIT.cpp b/src/Interpreters/JIT/CHJIT.cpp index 21c773ee1d7..c35d2442d1f 100644 --- a/src/Interpreters/JIT/CHJIT.cpp +++ b/src/Interpreters/JIT/CHJIT.cpp @@ -18,12 +18,12 @@ #include #include #include -#include +// #include #include #include -#include +// #include #include -#include +// #include #include #include diff --git a/src/Interpreters/Session.cpp b/src/Interpreters/Session.cpp index bb8c415602f..fb80b12ee60 100644 --- a/src/Interpreters/Session.cpp +++ b/src/Interpreters/Session.cpp @@ -1,5 +1,6 @@ #include +#include #include #include #include @@ -130,7 +131,7 @@ public: LOG_TRACE(log, "Reuse session from storage with session_id: {}, user_id: {}", key.second, key.first); - if (!session.unique()) + if (!isSharedPtrUnique(session)) throw Exception(ErrorCodes::SESSION_IS_LOCKED, "Session {} is locked by a concurrent client", session_id); return {session, false}; } @@ -156,7 +157,7 @@ public: return; } - if (!it->second.unique()) + if (!isSharedPtrUnique(it->second)) throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot close session {} with refcount {}", session_id, it->second.use_count()); sessions.erase(it); diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 467a5c82141..e31f6db5409 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -87,6 +87,7 @@ #include #include +#include #include #include @@ -2464,7 +2465,7 @@ MergeTreeData::DataPartsVector MergeTreeData::grabOldParts(bool force) } /// Grab only parts that are not used by anyone (SELECTs for example). - if (!part.unique()) + if (!isSharedPtrUnique(part)) { part->removal_state.store(DataPartRemovalState::NON_UNIQUE_OWNERSHIP, std::memory_order_relaxed); skipped_parts.push_back(part->info); @@ -4360,13 +4361,13 @@ bool MergeTreeData::tryRemovePartImmediately(DataPartPtr && part) part.reset(); - if (!((*it)->getState() == DataPartState::Outdated && it->unique())) + if (!((*it)->getState() == DataPartState::Outdated && isSharedPtrUnique(*it))) { if ((*it)->getState() != DataPartState::Outdated) LOG_WARNING(log, "Cannot immediately remove part {} because it's not in Outdated state " "usage counter {}", part_name_with_state, it->use_count()); - if (!it->unique()) + if (!isSharedPtrUnique(*it)) LOG_WARNING(log, "Cannot immediately remove part {} because someone using it right now " "usage counter {}", part_name_with_state, it->use_count()); return false; @@ -4432,7 +4433,7 @@ size_t MergeTreeData::getNumberOfOutdatedPartsWithExpiredRemovalTime() const for (const auto & part : outdated_parts_range) { auto part_remove_time = part->remove_time.load(std::memory_order_relaxed); - if (part_remove_time <= time_now && time_now - part_remove_time >= getSettings()->old_parts_lifetime.totalSeconds() && part.unique()) + if (part_remove_time <= time_now && time_now - part_remove_time >= getSettings()->old_parts_lifetime.totalSeconds() && isSharedPtrUnique(part)) ++res; } @@ -8640,7 +8641,7 @@ size_t MergeTreeData::unloadPrimaryKeysOfOutdatedParts() /// Outdated part may be hold by SELECT query and still needs the index. /// This check requires lock of index_mutex but if outdated part is unique then there is no /// contention on it, so it's relatively cheap and it's ok to check under a global parts lock. - if (part.unique() && part->isIndexLoaded()) + if (isSharedPtrUnique(part) && part->isIndexLoaded()) parts_to_unload_index.push_back(part); } } From ab70de1dc9f2a604f1c5cc0933133ce812a2216c Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Mon, 8 Jul 2024 16:16:54 +0000 Subject: [PATCH 185/299] Delete unneeded changes (cherry picked from commit e28e83673d975f00b6df4c0d2a61e8015b2d434b) --- src/Interpreters/Cache/Metadata.h | 5 ++++- src/Interpreters/JIT/CHJIT.cpp | 6 +++--- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/src/Interpreters/Cache/Metadata.h b/src/Interpreters/Cache/Metadata.h index d2158457a44..0e85ead3265 100644 --- a/src/Interpreters/Cache/Metadata.h +++ b/src/Interpreters/Cache/Metadata.h @@ -1,5 +1,8 @@ #pragma once + + #include +#include #include #include #include @@ -32,7 +35,7 @@ struct FileSegmentMetadata : private boost::noncopyable explicit FileSegmentMetadata(FileSegmentPtr && file_segment_); - bool releasable() const { return file_segment.use_count() == 1; } + bool releasable() const { return isSharedPtrUnique(file_segment); } size_t size() const; diff --git a/src/Interpreters/JIT/CHJIT.cpp b/src/Interpreters/JIT/CHJIT.cpp index c35d2442d1f..21c773ee1d7 100644 --- a/src/Interpreters/JIT/CHJIT.cpp +++ b/src/Interpreters/JIT/CHJIT.cpp @@ -18,12 +18,12 @@ #include #include #include -// #include +#include #include #include -// #include +#include #include -// #include +#include #include #include From 8bb48fa9022e55f5f37eca2ca06019fe9f4eabb7 Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Tue, 9 Jul 2024 20:24:16 +0000 Subject: [PATCH 186/299] Correct comment for std::countr_zero --- src/Functions/FunctionsBitToArray.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/Functions/FunctionsBitToArray.cpp b/src/Functions/FunctionsBitToArray.cpp index 81c80ae07bf..beaaccad6db 100644 --- a/src/Functions/FunctionsBitToArray.cpp +++ b/src/Functions/FunctionsBitToArray.cpp @@ -284,9 +284,8 @@ public: { while (x) { - /// С++20 char8_t is not an unsigned type anymore - /// https://stackoverflow.com/questions/57402464/is-c20-char8-t-the-same-as-our-old-char - // and thus you cannot use std::countr_zero on it. + /// С++20 char8_t is not an unsigned integral type anymore https://godbolt.org/z/Mqcb7qn58 + /// and thus you cannot use std::countr_zero on it. if constexpr (std::is_same_v) result_array_values_data.push_back(std::countr_zero(static_cast(x))); else From 65bdf3bccc2a2c4a033a9b49cbee04ba88aa6842 Mon Sep 17 00:00:00 2001 From: Sema Checherinda <104093494+CheSema@users.noreply.github.com> Date: Tue, 9 Jul 2024 22:36:36 +0200 Subject: [PATCH 187/299] Update src/Interpreters/DatabaseCatalog.cpp --- src/Interpreters/DatabaseCatalog.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Interpreters/DatabaseCatalog.cpp b/src/Interpreters/DatabaseCatalog.cpp index e34aef125fb..7087a04e9b4 100644 --- a/src/Interpreters/DatabaseCatalog.cpp +++ b/src/Interpreters/DatabaseCatalog.cpp @@ -837,7 +837,7 @@ DatabaseCatalog::DatabaseCatalog(ContextMutablePtr global_context_) , loading_dependencies{"LoadingDeps"} , view_dependencies{"ViewDeps"} , log(getLogger("DatabaseCatalog")) - , limitedLog(std::make_shared(log, 1, 5)) + , limitedLog(std::make_shared(log, 1, 20)) , first_async_drop_in_queue(tables_marked_dropped.end()) { } From b53e58c501109c81d57a746cc4b3b8c45a6840ef Mon Sep 17 00:00:00 2001 From: Vitaly Baranov Date: Tue, 9 Jul 2024 22:19:47 +0200 Subject: [PATCH 188/299] Fix error reporting while copying to S3. --- src/IO/S3/copyS3File.cpp | 36 ++++++++++++++++++++---------------- 1 file changed, 20 insertions(+), 16 deletions(-) diff --git a/src/IO/S3/copyS3File.cpp b/src/IO/S3/copyS3File.cpp index bb654c3f5c9..0b3e5e50f3d 100644 --- a/src/IO/S3/copyS3File.cpp +++ b/src/IO/S3/copyS3File.cpp @@ -98,7 +98,6 @@ namespace size_t part_size; String tag; bool is_finished = false; - std::exception_ptr exception; }; size_t num_parts; @@ -111,6 +110,7 @@ namespace size_t num_added_bg_tasks TSA_GUARDED_BY(bg_tasks_mutex) = 0; size_t num_finished_bg_tasks TSA_GUARDED_BY(bg_tasks_mutex) = 0; size_t num_finished_parts TSA_GUARDED_BY(bg_tasks_mutex) = 0; + std::exception_ptr bg_exception TSA_GUARDED_BY(bg_tasks_mutex); std::mutex bg_tasks_mutex; std::condition_variable bg_tasks_condvar; @@ -273,7 +273,7 @@ namespace } catch (...) { - tryLogCurrentException(__PRETTY_FUNCTION__); + tryLogCurrentException(log, fmt::format("While performing multipart upload of {}", dest_key)); // Multipart upload failed because it wasn't possible to schedule all the tasks. // To avoid execution of already scheduled tasks we abort MultipartUpload. abortMultipartUpload(); @@ -385,7 +385,12 @@ namespace } catch (...) { - task->exception = std::current_exception(); + std::lock_guard lock(bg_tasks_mutex); + if (!bg_exception) + { + tryLogCurrentException(log, fmt::format("While writing part #{}", task->part_number)); + bg_exception = std::current_exception(); /// The exception will be rethrown after all background tasks stop working. + } } task_finish_notify(); }, Priority{}); @@ -435,22 +440,21 @@ namespace /// Suppress warnings because bg_tasks_mutex is actually hold, but tsa annotations do not understand std::unique_lock bg_tasks_condvar.wait(lock, [this]() {return TSA_SUPPRESS_WARNING_FOR_READ(num_added_bg_tasks) == TSA_SUPPRESS_WARNING_FOR_READ(num_finished_bg_tasks); }); - auto & tasks = TSA_SUPPRESS_WARNING_FOR_WRITE(bg_tasks); - for (auto & task : tasks) + auto exception = TSA_SUPPRESS_WARNING_FOR_READ(bg_exception); + if (exception) { - if (task.exception) - { - /// abortMultipartUpload() might be called already, see processUploadPartRequest(). - /// However if there were concurrent uploads at that time, those part uploads might or might not succeed. - /// As a result, it might be necessary to abort a given multipart upload multiple times in order to completely free - /// all storage consumed by all parts. - abortMultipartUpload(); + /// abortMultipartUpload() might be called already, see processUploadPartRequest(). + /// However if there were concurrent uploads at that time, those part uploads might or might not succeed. + /// As a result, it might be necessary to abort a given multipart upload multiple times in order to completely free + /// all storage consumed by all parts. + abortMultipartUpload(); - std::rethrow_exception(task.exception); - } - - part_tags.push_back(task.tag); + std::rethrow_exception(exception); } + + const auto & tasks = TSA_SUPPRESS_WARNING_FOR_READ(bg_tasks); + for (const auto & task : tasks) + part_tags.push_back(task.tag); } }; From ebc87d0c702e9bb26814718fec97e4c938735dec Mon Sep 17 00:00:00 2001 From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> Date: Tue, 9 Jul 2024 22:58:06 +0200 Subject: [PATCH 189/299] Update run.sh --- docker/test/stateless/run.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/test/stateless/run.sh b/docker/test/stateless/run.sh index 637d277e6f8..1c03f5107b0 100755 --- a/docker/test/stateless/run.sh +++ b/docker/test/stateless/run.sh @@ -249,7 +249,7 @@ function run_tests() try_run_with_retry 10 clickhouse-client -q "insert into system.zookeeper (name, path, value) values ('auxiliary_zookeeper2', '/test/chroot/', '')" set +e - timeout -s KILL --preserve-status 120m clickhouse-test --testname --shard --zookeeper --check-zookeeper-session --hung-check --print-time \ + timeout -s TERM --preserve-status 120m -k 60m clickhouse-test --testname --shard --zookeeper --check-zookeeper-session --hung-check --print-time \ --no-drop-if-fail --test-runs "$NUM_TRIES" "${ADDITIONAL_OPTIONS[@]}" 2>&1 \ | ts '%Y-%m-%d %H:%M:%S' \ | tee -a test_output/test_result.txt From 73db17b499377b3a85a7f4c651649e88e79aad2c Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Wed, 10 Jul 2024 00:19:55 +0200 Subject: [PATCH 190/299] Update parallel_skip.json --- tests/integration/parallel_skip.json | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/integration/parallel_skip.json b/tests/integration/parallel_skip.json index 33dd85aceaf..3c3d1b6cc96 100644 --- a/tests/integration/parallel_skip.json +++ b/tests/integration/parallel_skip.json @@ -48,6 +48,7 @@ "test_system_metrics/test.py::test_readonly_metrics", "test_system_replicated_fetches/test.py::test_system_replicated_fetches", "test_zookeeper_config_load_balancing/test.py::test_round_robin", + "test_zookeeper_config_load_balancing/test.py::test_az", "test_zookeeper_fallback_session/test.py::test_fallback_session", "test_global_overcommit_tracker/test.py::test_global_overcommit", From 88d92367b7dc69a6df1407edaaa58e17f9eacfcf Mon Sep 17 00:00:00 2001 From: Nikita Fomichev Date: Wed, 10 Jul 2024 00:24:53 +0200 Subject: [PATCH 191/299] Stateless tests: run sequential tests in parallel to other tests --- ...ard_memory_tracker_and_exception_safety.sh | 2 +- .../00429_long_http_bufferization.sh | 2 +- .../00600_replace_running_query.sh | 12 +- .../0_stateless/00623_truncate_all_tables.sql | 81 +- .../0_stateless/00623_truncate_table.sql | 3 - .../0_stateless/00719_parallel_ddl_db.sh | 11 +- .../0_stateless/00763_lock_buffer_long.sh | 2 +- .../00816_long_concurrent_alter_column.sh | 2 +- ...ong_concurrent_select_and_drop_deadlock.sh | 4 +- .../0_stateless/00910_buffer_prewhere.sql | 13 +- .../00938_template_input_format.sh | 5 +- .../00989_parallel_parts_loading.sql | 2 - ...rrent_recreate_reattach_and_show_tables.sh | 4 +- ...ent_move_partition_from_table_zookeeper.sh | 4 +- ...rfluous_dict_reload_on_create_database.sql | 34 +- .../01037_polygon_dicts_correctness_all.ans | 2000 ++++++++--------- .../01037_polygon_dicts_correctness_all.sh | 31 +- .../01037_polygon_dicts_correctness_fast.ans | 2000 ++++++++--------- .../01037_polygon_dicts_correctness_fast.sh | 29 +- .../01038_dictionary_lifetime_min_zero_sec.sh | 28 +- ...ionary_invalidate_query_switchover_long.sh | 37 +- ...em_reload_dictionary_reloads_completely.sh | 35 +- .../0_stateless/01053_ssd_dictionary.sh | 2 - .../0_stateless/01055_compact_parts_1.sql | 7 - tests/queries/0_stateless/01060_avro.sh | 5 +- .../0_stateless/01069_database_memory.sql | 2 - ...cache_dictionary_datarace_exception_ptr.sh | 22 +- ...arallel_alter_add_drop_column_zookeeper.sh | 4 +- ...79_parallel_alter_modify_zookeeper_long.sh | 4 +- .../01083_expressions_in_engine_arguments.sql | 4 +- .../0_stateless/01098_msgpack_format.sh | 5 +- .../01111_create_drop_replicated_db_stress.sh | 2 +- ...01113_local_dictionary_type_conversion.sql | 18 +- .../0_stateless/01114_database_atomic.sh | 2 +- .../01114_mysql_database_engine_segfault.sql | 2 +- .../01125_dict_ddl_cannot_add_column.sql | 12 +- ..._month_partitioning_consistency_select.sql | 3 - .../0_stateless/01154_move_partition_long.sh | 2 +- .../01171_mv_select_insert_isolation_long.sh | 2 +- .../01185_create_or_replace_table.sql | 2 +- .../01188_attach_table_from_path.sql | 2 +- .../01225_drop_dictionary_as_table.sql | 19 +- .../01254_dict_create_without_db.sql | 21 +- .../01254_dict_load_after_detach_attach.sql | 27 +- .../01259_dictionary_custom_settings_ddl.sql | 20 +- .../0_stateless/01269_alias_type_differs.sql | 2 - .../0_stateless/01272_suspicious_codecs.sql | 4 - .../01280_ssd_complex_key_dictionary.sh | 2 - ..._recreate_reattach_and_show_tables_long.sh | 4 +- ...1_aggregate_state_exception_memory_leak.sh | 2 +- ...2_aggregate_state_exception_memory_leak.sh | 2 +- .../01338_long_select_and_alter.sh | 2 +- .../01338_long_select_and_alter_zookeeper.sh | 2 +- .../01355_alter_column_with_order.sql | 34 +- tests/queries/0_stateless/01355_ilike.sql | 6 +- .../0_stateless/01388_clear_all_columns.sql | 2 - .../0_stateless/01391_join_on_dict_crash.sql | 17 +- .../0_stateless/01392_column_resolve.sql | 31 +- ...nactive_replica_cleanup_nodes_zookeeper.sh | 2 +- .../01412_cache_dictionary_race.sh | 4 +- .../0_stateless/01415_sticking_mutations.sh | 2 +- .../01442_merge_detach_attach_long.sh | 4 +- .../01444_create_table_drop_database_race.sh | 12 +- ...01454_storagememory_data_race_challenge.sh | 6 +- .../01543_avro_deserialization_with_lc.sh | 2 - .../01593_concurrent_alter_mutations_kill.sh | 4 +- .../01658_read_file_to_stringcolumn.sh | 27 +- .../01684_ssd_cache_dictionary_simple_key.sh | 2 - .../01685_ssd_cache_dictionary_complex_key.sh | 2 - .../01710_projection_vertical_merges.sql | 2 +- .../01747_join_view_filter_dictionary.sql | 2 +- .../01747_system_session_log_long.sh | 12 +- .../01748_dictionary_table_dot.sql | 2 +- ...1780_clickhouse_dictionary_source_loop.sql | 6 +- .../0_stateless/01825_type_json_btc.sh | 9 +- .../01825_type_json_multiple_files.sh | 17 +- .../01825_type_json_schema_inference.sh | 7 +- ...75_ssd_cache_dictionary_decimal256_type.sh | 2 - ..._row_policy_defined_using_user_function.sh | 35 +- .../0_stateless/01889_sqlite_read_write.sh | 8 +- .../01903_ssd_cache_dictionary_array_type.sh | 2 - ..._cache_dictionary_default_nullable_type.sh | 2 - .../0_stateless/01910_view_dictionary.sql | 2 +- ...nt_ttl_and_normal_merges_zookeeper_long.sh | 4 +- ...th_escape_sequence_at_the_end_of_buffer.sh | 6 +- .../queries/0_stateless/02003_compress_bz2.sh | 1 - .../queries/0_stateless/02012_compress_lz4.sh | 1 - .../02022_storage_filelog_one_file.sh | 14 +- .../0_stateless/02023_storage_filelog.sh | 43 +- .../0_stateless/02024_storage_filelog_mv.sh | 25 +- .../02025_storage_filelog_virtual_col.sh | 31 +- .../02026_storage_filelog_largefile.sh | 21 +- .../queries/0_stateless/02030_capnp_format.sh | 7 +- .../02051_symlinks_to_user_files.sh | 5 +- ...2103_tsv_csv_custom_null_representation.sh | 2 +- ...3_with_names_and_types_parallel_parsing.sh | 7 +- .../02104_json_strings_nullable_string.sh | 1 - ...02105_table_function_file_partiotion_by.sh | 8 +- .../02115_write_buffers_finalize.sh | 2 +- ...7_custom_separated_with_names_and_types.sh | 1 - .../02118_deserialize_whole_text.sh | 42 +- .../02125_tskv_proper_names_reading.sh | 2 - .../queries/0_stateless/02126_fix_filelog.sh | 14 +- .../0_stateless/02129_skip_quoted_fields.sh | 1 - .../0_stateless/02130_parse_quoted_null.sh | 6 +- .../02149_external_schema_inference.sh | 9 +- .../0_stateless/02149_schema_inference.sh | 8 +- ...49_schema_inference_create_table_syntax.sh | 7 +- .../02167_format_from_file_extension.sh | 30 +- .../0_stateless/02185_orc_corrupted_file.sh | 1 - .../02207_allow_plaintext_and_no_password.sh | 2 +- ...2_create_table_without_columns_metadata.sh | 2 - .../02227_test_create_empty_sqlite_db.sh | 5 +- .../02228_merge_tree_insert_memory_usage.sql | 2 +- ...w_orc_parquet_nullable_schema_inference.sh | 1 - .../02245_parquet_skip_unknown_type.sh | 1 - ...46_tsv_csv_best_effort_schema_inference.sh | 3 +- .../02247_names_order_in_json_and_tskv.sh | 1 - .../02247_read_bools_as_numbers_json.sh | 5 +- .../0_stateless/02270_errors_in_files.sh | 14 +- .../02286_mysql_dump_input_format.sh | 2 - .../0_stateless/02293_formats_json_columns.sh | 2 - .../02297_regex_parsing_file_names.sh | 36 +- ...02327_capnproto_protobuf_empty_messages.sh | 3 +- .../0_stateless/02353_compression_level.sh | 3 +- .../0_stateless/02358_file_default_value.sh | 1 - .../02360_clickhouse_local_config-option.sh | 2 +- .../0_stateless/02372_data_race_in_avro.sh | 2 +- .../02373_heap_buffer_overflow_in_avro.sh | 2 - .../02383_arrow_dict_special_cases.sh | 1 - .../02402_capnp_format_segments_overflow.sh | 3 +- ...02421_record_errors_row_by_input_format.sh | 13 +- .../02422_allow_implicit_no_password.sh | 2 +- .../02455_one_row_from_csv_memory_usage.sh | 10 +- .../0_stateless/02457_bz2_concatenated.sh | 1 - ..._glob_for_recursive_directory_traversal.sh | 32 +- .../0_stateless/02475_bson_each_row_format.sh | 2 +- ...condition_between_insert_and_droppin_mv.sh | 2 +- .../02482_capnp_list_of_structs.sh | 3 +- .../0_stateless/02483_capnp_decimals.sh | 12 +- .../02504_regexp_dictionary_ua_parser.sh | 21 +- .../02504_regexp_dictionary_yaml_source.sh | 3 - .../0_stateless/02661_read_from_archive.lib | 22 +- ...02703_keeper_map_concurrent_create_drop.sh | 4 +- .../0_stateless/02722_database_filesystem.sh | 5 +- .../02724_decompress_filename_exception.sh | 1 - .../02732_rename_after_processing.sh | 5 +- ...02771_multidirectory_globs_storage_file.sh | 29 +- .../0_stateless/02889_file_log_save_errors.sh | 18 +- .../02892_input_csv_cr_end_count_many_rows.sh | 4 +- .../0_stateless/02895_npy_output_format.sh | 99 +- .../queries/0_stateless/02931_file_cluster.sh | 2 - ...33_change_cache_setting_without_restart.sh | 2 +- ...ynamically_change_filesystem_cache_size.sh | 2 +- ...2950_dictionary_ssd_cache_short_circuit.sh | 4 +- .../02961_storage_config_volume_priority.sh | 2 +- ..._sync_replica_lightweight_from_modifier.sh | 2 +- .../02968_file_log_multiple_read.sh | 7 +- .../0_stateless/02971_analyzer_remote_id.sh | 10 +- .../02973_parse_crlf_with_tsv_files.sh | 2 - .../queries/0_stateless/02984_form_format.sh | 4 +- ...2_dynamically_resize_filesystem_cache_2.sh | 2 +- ...r_add_drop_column_zookeeper_on_steroids.sh | 2 +- .../03153_format_regexp_usability.sh | 2 +- 164 files changed, 2594 insertions(+), 2944 deletions(-) diff --git a/tests/queries/0_stateless/00133_long_shard_memory_tracker_and_exception_safety.sh b/tests/queries/0_stateless/00133_long_shard_memory_tracker_and_exception_safety.sh index a42fd58190a..d57efaa1f0e 100755 --- a/tests/queries/0_stateless/00133_long_shard_memory_tracker_and_exception_safety.sh +++ b/tests/queries/0_stateless/00133_long_shard_memory_tracker_and_exception_safety.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: long, shard, no-parallel +# Tags: long, shard CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/00429_long_http_bufferization.sh b/tests/queries/0_stateless/00429_long_http_bufferization.sh index 98dd300e6ab..83a6a4e8043 100755 --- a/tests/queries/0_stateless/00429_long_http_bufferization.sh +++ b/tests/queries/0_stateless/00429_long_http_bufferization.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: long, no-parallel +# Tags: long set -e diff --git a/tests/queries/0_stateless/00600_replace_running_query.sh b/tests/queries/0_stateless/00600_replace_running_query.sh index 6a682210489..7a71d17f19b 100755 --- a/tests/queries/0_stateless/00600_replace_running_query.sh +++ b/tests/queries/0_stateless/00600_replace_running_query.sh @@ -1,5 +1,4 @@ #!/usr/bin/env bash -# Tags: no-parallel CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL=none @@ -7,9 +6,10 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -${CLICKHOUSE_CLIENT} -q "drop user if exists u_00600" -${CLICKHOUSE_CLIENT} -q "create user u_00600 settings max_execution_time=60, readonly=1" -${CLICKHOUSE_CLIENT} -q "grant select on system.numbers to u_00600" +TEST_PREFIX=$RANDOM +${CLICKHOUSE_CLIENT} -q "drop user if exists u_00600${TEST_PREFIX}" +${CLICKHOUSE_CLIENT} -q "create user u_00600${TEST_PREFIX} settings max_execution_time=60, readonly=1" +${CLICKHOUSE_CLIENT} -q "grant select on system.numbers to u_00600${TEST_PREFIX}" function wait_for_query_to_start() { @@ -26,7 +26,7 @@ $CLICKHOUSE_CURL -sS "$CLICKHOUSE_URL&query_id=hello&replace_running_query=1" -d # Wait for it to be replaced wait -${CLICKHOUSE_CLIENT_BINARY} --user=u_00600 --query_id=42 --query='SELECT 2, count() FROM system.numbers' 2>&1 | grep -cF 'was cancelled' & +${CLICKHOUSE_CLIENT_BINARY} --user=u_00600${TEST_PREFIX} --query_id=42 --query='SELECT 2, count() FROM system.numbers' 2>&1 | grep -cF 'was cancelled' & wait_for_query_to_start '42' # Trying to run another query with the same query_id @@ -43,4 +43,4 @@ wait_for_query_to_start '42' ${CLICKHOUSE_CLIENT} --query_id=42 --replace_running_query=1 --replace_running_query_max_wait_ms=500 --query='SELECT 43' 2>&1 | grep -F "can't be stopped" > /dev/null wait ${CLICKHOUSE_CLIENT} --query_id=42 --replace_running_query=1 --query='SELECT 44' -${CLICKHOUSE_CLIENT} -q "drop user u_00600" +${CLICKHOUSE_CLIENT} -q "drop user u_00600${TEST_PREFIX}" diff --git a/tests/queries/0_stateless/00623_truncate_all_tables.sql b/tests/queries/0_stateless/00623_truncate_all_tables.sql index 2d5e9d48f59..2626f7ed285 100644 --- a/tests/queries/0_stateless/00623_truncate_all_tables.sql +++ b/tests/queries/0_stateless/00623_truncate_all_tables.sql @@ -1,50 +1,43 @@ --- Tags: no-parallel - -DROP DATABASE IF EXISTS truncate_test; - -CREATE DATABASE IF NOT EXISTS truncate_test; -CREATE TABLE IF NOT EXISTS truncate_test.truncate_test_set(id UInt64) ENGINE = Set; -CREATE TABLE IF NOT EXISTS truncate_test.truncate_test_log(id UInt64) ENGINE = Log; -CREATE TABLE IF NOT EXISTS truncate_test.truncate_test_memory(id UInt64) ENGINE = Memory; -CREATE TABLE IF NOT EXISTS truncate_test.truncate_test_tiny_log(id UInt64) ENGINE = TinyLog; -CREATE TABLE IF NOT EXISTS truncate_test.truncate_test_stripe_log(id UInt64) ENGINE = StripeLog; -CREATE TABLE IF NOT EXISTS truncate_test.truncate_test_merge_tree(p Date, k UInt64) ENGINE = MergeTree ORDER BY p; +CREATE TABLE IF NOT EXISTS truncate_test_set(id UInt64) ENGINE = Set; +CREATE TABLE IF NOT EXISTS truncate_test_log(id UInt64) ENGINE = Log; +CREATE TABLE IF NOT EXISTS truncate_test_memory(id UInt64) ENGINE = Memory; +CREATE TABLE IF NOT EXISTS truncate_test_tiny_log(id UInt64) ENGINE = TinyLog; +CREATE TABLE IF NOT EXISTS truncate_test_stripe_log(id UInt64) ENGINE = StripeLog; +CREATE TABLE IF NOT EXISTS truncate_test_merge_tree(p Date, k UInt64) ENGINE = MergeTree ORDER BY p; SELECT '======Before Truncate======'; -INSERT INTO truncate_test.truncate_test_set VALUES(0); -INSERT INTO truncate_test.truncate_test_log VALUES(1); -INSERT INTO truncate_test.truncate_test_memory VALUES(1); -INSERT INTO truncate_test.truncate_test_tiny_log VALUES(1); -INSERT INTO truncate_test.truncate_test_stripe_log VALUES(1); -INSERT INTO truncate_test.truncate_test_merge_tree VALUES('2000-01-01', 1); -SELECT * FROM system.numbers WHERE number NOT IN truncate_test.truncate_test_set LIMIT 1; -SELECT * FROM truncate_test.truncate_test_log; -SELECT * FROM truncate_test.truncate_test_memory; -SELECT * FROM truncate_test.truncate_test_tiny_log; -SELECT * FROM truncate_test.truncate_test_stripe_log; -SELECT * FROM truncate_test.truncate_test_merge_tree; +INSERT INTO truncate_test_set VALUES(0); +INSERT INTO truncate_test_log VALUES(1); +INSERT INTO truncate_test_memory VALUES(1); +INSERT INTO truncate_test_tiny_log VALUES(1); +INSERT INTO truncate_test_stripe_log VALUES(1); +INSERT INTO truncate_test_merge_tree VALUES('2000-01-01', 1); +SELECT * FROM system.numbers WHERE number NOT IN truncate_test_set LIMIT 1; +SELECT * FROM truncate_test_log; +SELECT * FROM truncate_test_memory; +SELECT * FROM truncate_test_tiny_log; +SELECT * FROM truncate_test_stripe_log; +SELECT * FROM truncate_test_merge_tree; SELECT '======After Truncate And Empty======'; -TRUNCATE ALL TABLES FROM IF EXISTS truncate_test; -SELECT * FROM system.numbers WHERE number NOT IN truncate_test.truncate_test_set LIMIT 1; -SELECT * FROM truncate_test.truncate_test_log; -SELECT * FROM truncate_test.truncate_test_memory; -SELECT * FROM truncate_test.truncate_test_tiny_log; -SELECT * FROM truncate_test.truncate_test_stripe_log; -SELECT * FROM truncate_test.truncate_test_merge_tree; +TRUNCATE ALL TABLES FROM IF EXISTS {CLICKHOUSE_DATABASE:Identifier}; +SELECT * FROM system.numbers WHERE number NOT IN truncate_test_set LIMIT 1; +SELECT * FROM truncate_test_log; +SELECT * FROM truncate_test_memory; +SELECT * FROM truncate_test_tiny_log; +SELECT * FROM truncate_test_stripe_log; +SELECT * FROM truncate_test_merge_tree; SELECT '======After Truncate And Insert Data======'; -INSERT INTO truncate_test.truncate_test_set VALUES(0); -INSERT INTO truncate_test.truncate_test_log VALUES(1); -INSERT INTO truncate_test.truncate_test_memory VALUES(1); -INSERT INTO truncate_test.truncate_test_tiny_log VALUES(1); -INSERT INTO truncate_test.truncate_test_stripe_log VALUES(1); -INSERT INTO truncate_test.truncate_test_merge_tree VALUES('2000-01-01', 1); -SELECT * FROM system.numbers WHERE number NOT IN truncate_test.truncate_test_set LIMIT 1; -SELECT * FROM truncate_test.truncate_test_log; -SELECT * FROM truncate_test.truncate_test_memory; -SELECT * FROM truncate_test.truncate_test_tiny_log; -SELECT * FROM truncate_test.truncate_test_stripe_log; -SELECT * FROM truncate_test.truncate_test_merge_tree; - -DROP DATABASE IF EXISTS truncate_test; +INSERT INTO truncate_test_set VALUES(0); +INSERT INTO truncate_test_log VALUES(1); +INSERT INTO truncate_test_memory VALUES(1); +INSERT INTO truncate_test_tiny_log VALUES(1); +INSERT INTO truncate_test_stripe_log VALUES(1); +INSERT INTO truncate_test_merge_tree VALUES('2000-01-01', 1); +SELECT * FROM system.numbers WHERE number NOT IN truncate_test_set LIMIT 1; +SELECT * FROM truncate_test_log; +SELECT * FROM truncate_test_memory; +SELECT * FROM truncate_test_tiny_log; +SELECT * FROM truncate_test_stripe_log; +SELECT * FROM truncate_test_merge_tree; diff --git a/tests/queries/0_stateless/00623_truncate_table.sql b/tests/queries/0_stateless/00623_truncate_table.sql index 4a67e49acda..e35803db1d9 100644 --- a/tests/queries/0_stateless/00623_truncate_table.sql +++ b/tests/queries/0_stateless/00623_truncate_table.sql @@ -1,6 +1,5 @@ set allow_deprecated_syntax_for_merge_tree=1; -DROP DATABASE IF EXISTS truncate_test; DROP TABLE IF EXISTS truncate_test_log; DROP TABLE IF EXISTS truncate_test_memory; DROP TABLE IF EXISTS truncate_test_tiny_log; @@ -9,7 +8,6 @@ DROP TABLE IF EXISTS truncate_test_merge_tree; DROP TABLE IF EXISTS truncate_test_materialized_view; DROP TABLE IF EXISTS truncate_test_materialized_depend; -CREATE DATABASE truncate_test; CREATE TABLE truncate_test_set(id UInt64) ENGINE = Set; CREATE TABLE truncate_test_log(id UInt64) ENGINE = Log; CREATE TABLE truncate_test_memory(id UInt64) ENGINE = Memory; @@ -75,4 +73,3 @@ DROP TABLE IF EXISTS truncate_test_stripe_log; DROP TABLE IF EXISTS truncate_test_merge_tree; DROP TABLE IF EXISTS truncate_test_materialized_view; DROP TABLE IF EXISTS truncate_test_materialized_depend; -DROP DATABASE IF EXISTS truncate_test; diff --git a/tests/queries/0_stateless/00719_parallel_ddl_db.sh b/tests/queries/0_stateless/00719_parallel_ddl_db.sh index b7dea25c182..ceba24df7e4 100755 --- a/tests/queries/0_stateless/00719_parallel_ddl_db.sh +++ b/tests/queries/0_stateless/00719_parallel_ddl_db.sh @@ -1,13 +1,12 @@ #!/usr/bin/env bash -# Tags: no-parallel - set -e CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -${CLICKHOUSE_CLIENT} --query "DROP DATABASE IF EXISTS parallel_ddl" +DB_SUFFIX=$RANDOM +${CLICKHOUSE_CLIENT} --query "DROP DATABASE IF EXISTS parallel_ddl_${DB_SUFFIX}" function query() { @@ -16,8 +15,8 @@ function query() while [ $SECONDS -lt "$TIMELIMIT" ] && [ $it -lt 50 ]; do it=$((it+1)) - ${CLICKHOUSE_CLIENT} --query "CREATE DATABASE IF NOT EXISTS parallel_ddl" - ${CLICKHOUSE_CLIENT} --query "DROP DATABASE IF EXISTS parallel_ddl" + ${CLICKHOUSE_CLIENT} --query "CREATE DATABASE IF NOT EXISTS parallel_ddl_${DB_SUFFIX}" + ${CLICKHOUSE_CLIENT} --query "DROP DATABASE IF EXISTS parallel_ddl_${DB_SUFFIX}" done } @@ -27,4 +26,4 @@ done wait -${CLICKHOUSE_CLIENT} --query "DROP DATABASE IF EXISTS parallel_ddl" +${CLICKHOUSE_CLIENT} --query "DROP DATABASE IF EXISTS parallel_ddl_${DB_SUFFIX}" diff --git a/tests/queries/0_stateless/00763_lock_buffer_long.sh b/tests/queries/0_stateless/00763_lock_buffer_long.sh index 2006d43cdd2..444a66767aa 100755 --- a/tests/queries/0_stateless/00763_lock_buffer_long.sh +++ b/tests/queries/0_stateless/00763_lock_buffer_long.sh @@ -21,7 +21,7 @@ function thread1() function thread2() { - seq 1 1000 | sed -r -e 's/.+/SELECT count() FROM buffer_00763_2;/' | ${CLICKHOUSE_CLIENT} --multiquery --server_logs_file='/dev/null' --ignore-error 2>&1 | grep -vP '^0$|^10$|^Received exception|^Code: 60|^Code: 218|^Code: 473' | grep -v '(query: ' + seq 1 500 | sed -r -e 's/.+/SELECT count() FROM buffer_00763_2;/' | ${CLICKHOUSE_CLIENT} --multiquery --server_logs_file='/dev/null' --ignore-error 2>&1 | grep -vP '^0$|^10$|^Received exception|^Code: 60|^Code: 218|^Code: 473' | grep -v '(query: ' } thread1 & diff --git a/tests/queries/0_stateless/00816_long_concurrent_alter_column.sh b/tests/queries/0_stateless/00816_long_concurrent_alter_column.sh index 71acc11b971..0ed9593c689 100755 --- a/tests/queries/0_stateless/00816_long_concurrent_alter_column.sh +++ b/tests/queries/0_stateless/00816_long_concurrent_alter_column.sh @@ -50,7 +50,7 @@ export -f thread2; export -f thread3; export -f thread4; -TIMEOUT=30 +TIMEOUT=20 timeout $TIMEOUT bash -c thread1 2> /dev/null & timeout $TIMEOUT bash -c thread2 2> /dev/null & diff --git a/tests/queries/0_stateless/00840_long_concurrent_select_and_drop_deadlock.sh b/tests/queries/0_stateless/00840_long_concurrent_select_and_drop_deadlock.sh index 238cdcea547..ae728c8d10d 100755 --- a/tests/queries/0_stateless/00840_long_concurrent_select_and_drop_deadlock.sh +++ b/tests/queries/0_stateless/00840_long_concurrent_select_and_drop_deadlock.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: deadlock, no-parallel, no-debug +# Tags: deadlock, no-debug # NOTE: database = $CLICKHOUSE_DATABASE is unwanted @@ -49,7 +49,7 @@ function thread_select() export -f thread_drop_create export -f thread_select -TIMEOUT=60 +TIMEOUT=30 thread_drop_create $TIMEOUT & thread_select $TIMEOUT & diff --git a/tests/queries/0_stateless/00910_buffer_prewhere.sql b/tests/queries/0_stateless/00910_buffer_prewhere.sql index deda0db85fb..e6b1cc424ad 100644 --- a/tests/queries/0_stateless/00910_buffer_prewhere.sql +++ b/tests/queries/0_stateless/00910_buffer_prewhere.sql @@ -1,9 +1,4 @@ --- Tags: no-parallel - -DROP DATABASE IF EXISTS test_buffer; -CREATE DATABASE test_buffer; -CREATE TABLE test_buffer.mt (uid UInt64, ts DateTime, val Float64) ENGINE = MergeTree PARTITION BY toDate(ts) ORDER BY (uid, ts); -CREATE TABLE test_buffer.buf as test_buffer.mt ENGINE = Buffer(test_buffer, mt, 2, 10, 60, 10000, 100000, 1000000, 10000000); -INSERT INTO test_buffer.buf VALUES (1, '2019-03-01 10:00:00', 0.5), (2, '2019-03-02 10:00:00', 0.15), (1, '2019-03-03 10:00:00', 0.25); -SELECT count() from test_buffer.buf prewhere ts > toDateTime('2019-03-01 12:00:00') and ts < toDateTime('2019-03-02 12:00:00'); -DROP DATABASE test_buffer; +CREATE TABLE mt (uid UInt64, ts DateTime, val Float64) ENGINE = MergeTree PARTITION BY toDate(ts) ORDER BY (uid, ts); +CREATE TABLE buf as mt ENGINE = Buffer({CLICKHOUSE_DATABASE:Identifier}, mt, 2, 10, 60, 10000, 100000, 1000000, 10000000); +INSERT INTO buf VALUES (1, '2019-03-01 10:00:00', 0.5), (2, '2019-03-02 10:00:00', 0.15), (1, '2019-03-03 10:00:00', 0.25); +SELECT count() from buf prewhere ts > toDateTime('2019-03-01 12:00:00') and ts < toDateTime('2019-03-02 12:00:00'); diff --git a/tests/queries/0_stateless/00938_template_input_format.sh b/tests/queries/0_stateless/00938_template_input_format.sh index be75edcdb61..016e662ea3b 100755 --- a/tests/queries/0_stateless/00938_template_input_format.sh +++ b/tests/queries/0_stateless/00938_template_input_format.sh @@ -1,12 +1,13 @@ #!/usr/bin/env bash -# Tags: no-parallel - # shellcheck disable=SC2016,SC2028 CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh +CURDIR=$CURDIR/${CLICKHOUSE_DATABASE} +mkdir -p $CURDIR + $CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS template1"; $CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS template2"; $CLICKHOUSE_CLIENT --query="CREATE TABLE template1 (s1 String, s2 String, s3 String, s4 String, n UInt64, d Date) ENGINE = Memory"; diff --git a/tests/queries/0_stateless/00989_parallel_parts_loading.sql b/tests/queries/0_stateless/00989_parallel_parts_loading.sql index a05515cf756..407e124f137 100644 --- a/tests/queries/0_stateless/00989_parallel_parts_loading.sql +++ b/tests/queries/0_stateless/00989_parallel_parts_loading.sql @@ -1,5 +1,3 @@ --- Tags: no-parallel - DROP TABLE IF EXISTS mt; CREATE TABLE mt (x UInt64) ENGINE = MergeTree ORDER BY x SETTINGS parts_to_delay_insert = 100000, parts_to_throw_insert = 100000; diff --git a/tests/queries/0_stateless/01014_lazy_database_concurrent_recreate_reattach_and_show_tables.sh b/tests/queries/0_stateless/01014_lazy_database_concurrent_recreate_reattach_and_show_tables.sh index 0a6888a5c69..3046fcbcd73 100755 --- a/tests/queries/0_stateless/01014_lazy_database_concurrent_recreate_reattach_and_show_tables.sh +++ b/tests/queries/0_stateless/01014_lazy_database_concurrent_recreate_reattach_and_show_tables.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-parallel, no-fasttest +# Tags: no-fasttest CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh @@ -89,7 +89,7 @@ ${CLICKHOUSE_CLIENT} -n -q " " -TIMEOUT=30 +TIMEOUT=20 timeout $TIMEOUT bash -c recreate_lazy_func1 2> /dev/null & timeout $TIMEOUT bash -c recreate_lazy_func2 2> /dev/null & diff --git a/tests/queries/0_stateless/01035_concurrent_move_partition_from_table_zookeeper.sh b/tests/queries/0_stateless/01035_concurrent_move_partition_from_table_zookeeper.sh index 06a460f3600..0d57bb25543 100755 --- a/tests/queries/0_stateless/01035_concurrent_move_partition_from_table_zookeeper.sh +++ b/tests/queries/0_stateless/01035_concurrent_move_partition_from_table_zookeeper.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: zookeeper, no-parallel, no-fasttest +# Tags: zookeeper, no-fasttest set -e @@ -61,7 +61,7 @@ export -f thread3; export -f thread4; export -f thread5; -TIMEOUT=30 +TIMEOUT=20 timeout $TIMEOUT bash -c thread1 2> /dev/null & timeout $TIMEOUT bash -c thread2 2> /dev/null & diff --git a/tests/queries/0_stateless/01036_no_superfluous_dict_reload_on_create_database.sql b/tests/queries/0_stateless/01036_no_superfluous_dict_reload_on_create_database.sql index 9040d7b3231..d0841124706 100644 --- a/tests/queries/0_stateless/01036_no_superfluous_dict_reload_on_create_database.sql +++ b/tests/queries/0_stateless/01036_no_superfluous_dict_reload_on_create_database.sql @@ -1,35 +1,25 @@ --- Tags: no-parallel - -DROP DATABASE IF EXISTS dict_db_01036; -CREATE DATABASE dict_db_01036; - -CREATE TABLE dict_db_01036.dict_data (key UInt64, val UInt64) Engine=Memory(); -CREATE DICTIONARY dict_db_01036.dict +CREATE TABLE dict_data (key UInt64, val UInt64) Engine=Memory(); +CREATE DICTIONARY dict ( key UInt64 DEFAULT 0, val UInt64 DEFAULT 10 ) PRIMARY KEY key -SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'dict_data' PASSWORD '' DB 'dict_db_01036')) +SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'dict_data' PASSWORD '' DB currentDatabase())) LIFETIME(MIN 0 MAX 0) LAYOUT(FLAT()); -SELECT query_count FROM system.dictionaries WHERE database = 'dict_db_01036' AND name = 'dict'; -SELECT dictGetUInt64('dict_db_01036.dict', 'val', toUInt64(0)); -SELECT query_count FROM system.dictionaries WHERE database = 'dict_db_01036' AND name = 'dict'; +SELECT query_count FROM system.dictionaries WHERE database = currentDatabase() AND name = 'dict'; +SELECT dictGetUInt64('dict', 'val', toUInt64(0)); +SELECT query_count FROM system.dictionaries WHERE database = currentDatabase() AND name = 'dict'; SELECT 'SYSTEM RELOAD DICTIONARY'; -SYSTEM RELOAD DICTIONARY dict_db_01036.dict; -SELECT query_count FROM system.dictionaries WHERE database = 'dict_db_01036' AND name = 'dict'; -SELECT dictGetUInt64('dict_db_01036.dict', 'val', toUInt64(0)); -SELECT query_count FROM system.dictionaries WHERE database = 'dict_db_01036' AND name = 'dict'; +SYSTEM RELOAD DICTIONARY dict; +SELECT query_count FROM system.dictionaries WHERE database = currentDatabase() AND name = 'dict'; +SELECT dictGetUInt64('dict', 'val', toUInt64(0)); +SELECT query_count FROM system.dictionaries WHERE database = currentDatabase() AND name = 'dict'; SELECT 'CREATE DATABASE'; DROP DATABASE IF EXISTS empty_db_01036; -CREATE DATABASE empty_db_01036; -SELECT query_count FROM system.dictionaries WHERE database = 'dict_db_01036' AND name = 'dict'; - -DROP DICTIONARY dict_db_01036.dict; -DROP TABLE dict_db_01036.dict_data; -DROP DATABASE dict_db_01036; -DROP DATABASE empty_db_01036; +CREATE DATABASE IF NOT EXISTS empty_db_01036; +SELECT query_count FROM system.dictionaries WHERE database = currentDatabase() AND name = 'dict'; diff --git a/tests/queries/0_stateless/01037_polygon_dicts_correctness_all.ans b/tests/queries/0_stateless/01037_polygon_dicts_correctness_all.ans index 6e31edbdd40..0a3f4123eb8 100644 --- a/tests/queries/0_stateless/01037_polygon_dicts_correctness_all.ans +++ b/tests/queries/0_stateless/01037_polygon_dicts_correctness_all.ans @@ -1,1000 +1,1000 @@ -dictGet test_01037.dict_array (29.5699,2.50068) 101 -dictGet test_01037.dict_array (29.5796,1.55456) 101 -dictGet test_01037.dict_array (29.5796,2.36864) 101 -dictGet test_01037.dict_array (29.5844,1.59626) 101 -dictGet test_01037.dict_array (29.5886,4.03321) 101 -dictGet test_01037.dict_array (29.5914,3.02628) 101 -dictGet test_01037.dict_array (29.5926,-0.0965169) 101 -dictGet test_01037.dict_array (29.5968,2.37773) 101 -dictGet test_01037.dict_array (29.5984,0.755853) 101 -dictGet test_01037.dict_array (29.6066,3.47173) 101 -dictGet test_01037.dict_array (29.6085,-1.26007) 101 -dictGet test_01037.dict_array (29.6131,0.246565) 101 -dictGet test_01037.dict_array (29.6157,-0.266687) 101 -dictGet test_01037.dict_array (29.6164,2.94674) 101 -dictGet test_01037.dict_array (29.6195,-0.591941) 101 -dictGet test_01037.dict_array (29.6231,1.54818) 101 -dictGet test_01037.dict_array (29.6379,0.764114) 101 -dictGet test_01037.dict_array (29.6462,-0.772059) 934570 -dictGet test_01037.dict_array (29.6579,-1.07336) 101 -dictGet test_01037.dict_array (29.6618,-0.271842) 101 -dictGet test_01037.dict_array (29.6629,-0.303602) 101 -dictGet test_01037.dict_array (29.6659,-0.782823) 934570 -dictGet test_01037.dict_array (29.6736,-0.113832) 101 -dictGet test_01037.dict_array (29.6759,3.02905) 101 -dictGet test_01037.dict_array (29.6778,3.71898) 101 -dictGet test_01037.dict_array (29.6796,1.10433) 101 -dictGet test_01037.dict_array (29.6809,2.13677) 101 -dictGet test_01037.dict_array (29.6935,4.11894) 101 -dictGet test_01037.dict_array (29.6991,-1.4458199999999999) 101 -dictGet test_01037.dict_array (29.6997,3.17297) 101 -dictGet test_01037.dict_array (29.7043,3.6145899999999997) 101 -dictGet test_01037.dict_array (29.7065,3.24885) 101 -dictGet test_01037.dict_array (29.7126,0.28108) 101 -dictGet test_01037.dict_array (29.7192,0.174273) 101 -dictGet test_01037.dict_array (29.7217,-0.523481) 934570 -dictGet test_01037.dict_array (29.7271,1.67967) 101 -dictGet test_01037.dict_array (29.7311,4.12444) 101 -dictGet test_01037.dict_array (29.7347,1.88378) 101 -dictGet test_01037.dict_array (29.7358,0.67944) 101 -dictGet test_01037.dict_array (29.7366,-0.2973) 101 -dictGet test_01037.dict_array (29.7446,0.646536) 101 -dictGet test_01037.dict_array (29.7453,-0.567963) 101 -dictGet test_01037.dict_array (29.764,4.04217) 101 -dictGet test_01037.dict_array (29.7655,1.51372) 101 -dictGet test_01037.dict_array (29.7744,1.12435) 101 -dictGet test_01037.dict_array (29.7774,-0.0681196) 101 -dictGet test_01037.dict_array (29.7784,1.54864) 101 -dictGet test_01037.dict_array (29.7785,2.24139) 101 -dictGet test_01037.dict_array (29.7922,0.220808) 101 -dictGet test_01037.dict_array (29.7936,2.37709) 101 -dictGet test_01037.dict_array (29.8008,0.948536) 101 -dictGet test_01037.dict_array (29.8115,0.201227) 101 -dictGet test_01037.dict_array (29.814,0.149601) 101 -dictGet test_01037.dict_array (29.8193,-1.35858) 101 -dictGet test_01037.dict_array (29.8201,0.965518) 101 -dictGet test_01037.dict_array (29.8265,-0.727286) 101 -dictGet test_01037.dict_array (29.8277,-0.531746) 101 -dictGet test_01037.dict_array (29.8289,3.63009) 101 -dictGet test_01037.dict_array (29.8548,0.838047) 101 -dictGet test_01037.dict_array (29.8641,-0.845265) 101 -dictGet test_01037.dict_array (29.8649,0.0562212) 101 -dictGet test_01037.dict_array (29.8701,-1.02045) 101 -dictGet test_01037.dict_array (29.8733,2.76654) 101 -dictGet test_01037.dict_array (29.876,0.555475) 101 -dictGet test_01037.dict_array (29.8794,-0.800108) 101 -dictGet test_01037.dict_array (29.8813,2.7426399999999997) 101 -dictGet test_01037.dict_array (29.897100000000002,2.66193) 101 -dictGet test_01037.dict_array (29.908,4.01339) 101 -dictGet test_01037.dict_array (29.9165,-1.08246) 101 -dictGet test_01037.dict_array (29.9201,-0.420861) 101 -dictGet test_01037.dict_array (29.9217,3.03778) 101 -dictGet test_01037.dict_array (29.9355,0.773833) 101 -dictGet test_01037.dict_array (29.947,3.76517) 101 -dictGet test_01037.dict_array (29.9518,-0.60557) 101 -dictGet test_01037.dict_array (29.9564,-0.600163) 101 -dictGet test_01037.dict_array (29.959600000000002,4.16591) 101 -dictGet test_01037.dict_array (29.9615,-1.33708) 101 -dictGet test_01037.dict_array (29.9699,-0.392375) 101 -dictGet test_01037.dict_array (29.9776,1.04552) 101 -dictGet test_01037.dict_array (29.9784,4.02756) 101 -dictGet test_01037.dict_array (29.9819,4.00597) 101 -dictGet test_01037.dict_array (29.9826,1.2816100000000001) 101 -dictGet test_01037.dict_array (30.0026,2.76257) 101 -dictGet test_01037.dict_array (30.0126,3.68255) 101 -dictGet test_01037.dict_array (30.0131,0.796576) 101 -dictGet test_01037.dict_array (30.018,1.16523) 101 -dictGet test_01037.dict_array (30.0261,-0.210653) 101 -dictGet test_01037.dict_array (30.0472,-1.11007) 101 -dictGet test_01037.dict_array (30.0542,-0.479585) 101 -dictGet test_01037.dict_array (30.0613,1.6278000000000001) 101 -dictGet test_01037.dict_array (30.0617,-0.0551152) 101 -dictGet test_01037.dict_array (30.0637,2.62066) 101 -dictGet test_01037.dict_array (30.0721,1.6424400000000001) 101 -dictGet test_01037.dict_array (30.0769,-0.402636) 101 -dictGet test_01037.dict_array (30.0791,-0.277435) 101 -dictGet test_01037.dict_array (30.0931,0.0327512) 101 -dictGet test_01037.dict_array (30.1059,3.52623) 101 -dictGet test_01037.dict_array (30.1103,0.865466) 101 -dictGet test_01037.dict_array (30.1115,2.95243) 101 -dictGet test_01037.dict_array (30.1144,1.71029) 101 -dictGet test_01037.dict_array (30.1311,-0.864751) 101 -dictGet test_01037.dict_array (30.1336,-0.851386) 101 -dictGet test_01037.dict_array (30.1393,3.89901) 101 -dictGet test_01037.dict_array (30.1456,-0.531898) 101 -dictGet test_01037.dict_array (30.1492,2.07833) 101 -dictGet test_01037.dict_array (30.1575,2.43856) 101 -dictGet test_01037.dict_array (30.1682,1.19771) 101 -dictGet test_01037.dict_array (30.1716,3.9853300000000003) 101 -dictGet test_01037.dict_array (30.1849,2.78374) 101 -dictGet test_01037.dict_array (30.1866,0.65658) 101 -dictGet test_01037.dict_array (30.1885,1.56943) 101 -dictGet test_01037.dict_array (30.1959,-1.38202) 101 -dictGet test_01037.dict_array (30.1999,1.58413) 101 -dictGet test_01037.dict_array (30.2024,0.713081) 101 -dictGet test_01037.dict_array (30.2054,0.620143) 101 -dictGet test_01037.dict_array (30.2091,1.51641) 101 -dictGet test_01037.dict_array (30.2124,-0.331782) 101 -dictGet test_01037.dict_array (30.226,3.03527) 101 -dictGet test_01037.dict_array (30.2261,3.18486) 101 -dictGet test_01037.dict_array (30.2288,2.48407) 101 -dictGet test_01037.dict_array (30.2345,3.7462400000000002) 101 -dictGet test_01037.dict_array (30.2375,0.62046) 101 -dictGet test_01037.dict_array (30.2425,-0.472914) 101 -dictGet test_01037.dict_array (30.247,3.95863) 101 -dictGet test_01037.dict_array (30.2494,-0.305093) 101 -dictGet test_01037.dict_array (30.2499,2.54337) 101 -dictGet test_01037.dict_array (30.2606,2.16644) 101 -dictGet test_01037.dict_array (30.2672,3.94847) 101 -dictGet test_01037.dict_array (30.2709,-0.136264) 101 -dictGet test_01037.dict_array (30.2764,1.18654) 101 -dictGet test_01037.dict_array (30.2765,1.20383) 101 -dictGet test_01037.dict_array (30.2839,1.05762) 101 -dictGet test_01037.dict_array (30.286,0.469327) 101 -dictGet test_01037.dict_array (30.2927,3.1693) 101 -dictGet test_01037.dict_array (30.2935,3.49854) 101 -dictGet test_01037.dict_array (30.307,0.312338) 101 -dictGet test_01037.dict_array (30.3085,1.07791) 101 -dictGet test_01037.dict_array (30.3139,2.77248) 101 -dictGet test_01037.dict_array (30.314,0.822823) 101 -dictGet test_01037.dict_array (30.3227,-0.587351) 101 -dictGet test_01037.dict_array (30.332,1.00174) 101 -dictGet test_01037.dict_array (30.3388,0.844148) 101 -dictGet test_01037.dict_array (30.3485,0.561902) 101 -dictGet test_01037.dict_array (30.3497,0.180362) 101 -dictGet test_01037.dict_array (30.361,4.13016) 101 -dictGet test_01037.dict_array (30.3623,-0.0484027) 101 -dictGet test_01037.dict_array (30.3638,3.9845800000000002) 101 -dictGet test_01037.dict_array (30.3853,3.16051) 101 -dictGet test_01037.dict_array (30.3974,2.6617800000000003) 101 -dictGet test_01037.dict_array (30.4002,-1.15886) 101 -dictGet test_01037.dict_array (30.4008,-0.387015) 101 -dictGet test_01037.dict_array (30.4018,1.86493) 101 -dictGet test_01037.dict_array (30.4239,1.16818) 101 -dictGet test_01037.dict_array (30.4363,3.63938) 101 -dictGet test_01037.dict_array (30.4377,-0.81315) 101 -dictGet test_01037.dict_array (30.4391,3.54703) 101 -dictGet test_01037.dict_array (30.4424,-1.39435) 101 -dictGet test_01037.dict_array (30.4441,2.8463000000000003) 101 -dictGet test_01037.dict_array (30.4517,3.28117) 101 -dictGet test_01037.dict_array (30.4658,2.6928) 101 -dictGet test_01037.dict_array (30.4734,2.66161) 101 -dictGet test_01037.dict_array (30.4799,-1.07578) 101 -dictGet test_01037.dict_array (30.4837,-1.02486) 101 -dictGet test_01037.dict_array (30.485,1.06326) 101 -dictGet test_01037.dict_array (30.495,1.12306) 101 -dictGet test_01037.dict_array (30.501,2.27264) 101 -dictGet test_01037.dict_array (30.5027,1.99382) 101 -dictGet test_01037.dict_array (30.5194,-1.03943) 101 -dictGet test_01037.dict_array (30.5239,1.04328) 101 -dictGet test_01037.dict_array (30.528,3.82041) 101 -dictGet test_01037.dict_array (30.5299,-0.715248) 101 -dictGet test_01037.dict_array (30.5331,1.19603) 101 -dictGet test_01037.dict_array (30.535800000000002,2.71485) 101 -dictGet test_01037.dict_array (30.5405,0.804694) 101 -dictGet test_01037.dict_array (30.542,1.23739) 101 -dictGet test_01037.dict_array (30.5432,4.04189) 101 -dictGet test_01037.dict_array (30.5457,-0.956121) 101 -dictGet test_01037.dict_array (30.5506,3.07443) 101 -dictGet test_01037.dict_array (30.5539,3.87084) 101 -dictGet test_01037.dict_array (30.5578,3.78837) 101 -dictGet test_01037.dict_array (30.5588,0.966135) 101 -dictGet test_01037.dict_array (30.5637,2.5605) 101 -dictGet test_01037.dict_array (30.5647,-1.27328) 101 -dictGet test_01037.dict_array (30.5656,-0.0581332) 101 -dictGet test_01037.dict_array (30.5715,0.65755) 101 -dictGet test_01037.dict_array (30.5727,3.01604) 101 -dictGet test_01037.dict_array (30.5729,-0.976857) 101 -dictGet test_01037.dict_array (30.5751,0.60204) 101 -dictGet test_01037.dict_array (30.5854,3.02473) 101 -dictGet test_01037.dict_array (30.5866,0.174099) 101 -dictGet test_01037.dict_array (30.5947,0.875193) 101 -dictGet test_01037.dict_array (30.5992,-0.403901) 101 -dictGet test_01037.dict_array (30.6002,4.18891) 101 -dictGet test_01037.dict_array (30.6025,0.217712) 101 -dictGet test_01037.dict_array (30.6054,0.927203) 101 -dictGet test_01037.dict_array (30.6075,3.79359) 101 -dictGet test_01037.dict_array (30.6159,3.82773) 101 -dictGet test_01037.dict_array (30.627,3.84039) 101 -dictGet test_01037.dict_array (30.6308,0.77517) 101 -dictGet test_01037.dict_array (30.6338,0.179565) 101 -dictGet test_01037.dict_array (30.6461,1.3293599999999999) 101 -dictGet test_01037.dict_array (30.6674,-0.424547) 101 -dictGet test_01037.dict_array (30.669,1.76539) 101 -dictGet test_01037.dict_array (30.6788,4.01239) 101 -dictGet test_01037.dict_array (30.6864,3.59158) 101 -dictGet test_01037.dict_array (30.7049,-0.875413) 101 -dictGet test_01037.dict_array (30.705,1.3307) 101 -dictGet test_01037.dict_array (30.7063,-0.473192) 101 -dictGet test_01037.dict_array (30.7075,-1.1958199999999999) 101 -dictGet test_01037.dict_array (30.7101,-0.367562) 101 -dictGet test_01037.dict_array (30.7203,2.98725) 101 -dictGet test_01037.dict_array (30.7213,2.2745699999999998) 101 -dictGet test_01037.dict_array (30.7446,-0.334144) 101 -dictGet test_01037.dict_array (30.7468,3.82967) 101 -dictGet test_01037.dict_array (30.747,-0.384779) 101 -dictGet test_01037.dict_array (30.7681,0.904198) 101 -dictGet test_01037.dict_array (30.7757,1.78743) 101 -dictGet test_01037.dict_array (30.8021,-0.479212) 101 -dictGet test_01037.dict_array (30.8079,-1.40869) 101 -dictGet test_01037.dict_array (30.8206,-0.0608489) 101 -dictGet test_01037.dict_array (30.8218,0.43909) 101 -dictGet test_01037.dict_array (30.8239,0.10014) 101 -dictGet test_01037.dict_array (30.8282,4.15409) 101 -dictGet test_01037.dict_array (30.8288,-0.709528) 101 -dictGet test_01037.dict_array (30.8326,0.156011) 101 -dictGet test_01037.dict_array (30.8328,-1.03704) 101 -dictGet test_01037.dict_array (30.839,2.15528) 101 -dictGet test_01037.dict_array (30.8452,0.219377) 101 -dictGet test_01037.dict_array (30.8463,0.0515355) 101 -dictGet test_01037.dict_array (30.8526,2.06614) 101 -dictGet test_01037.dict_array (30.8566,0.517876) 101 -dictGet test_01037.dict_array (30.8588,-1.31738) 101 -dictGet test_01037.dict_array (30.8681,0.44207) 101 -dictGet test_01037.dict_array (30.8914,1.0072) 101 -dictGet test_01037.dict_array (30.897,0.483425) 101 -dictGet test_01037.dict_array (30.905,2.8731999999999998) 101 -dictGet test_01037.dict_array (30.9051,2.21956) 101 -dictGet test_01037.dict_array (30.9115,4.00663) 101 -dictGet test_01037.dict_array (30.9167,-0.834462) 101 -dictGet test_01037.dict_array (30.9252,-1.3289900000000001) 101 -dictGet test_01037.dict_array (30.9314,1.85384) 101 -dictGet test_01037.dict_array (30.9392,2.53236) 101 -dictGet test_01037.dict_array (30.9569,2.82038) 101 -dictGet test_01037.dict_array (30.9598,-0.641011) 101 -dictGet test_01037.dict_array (30.9601,-0.254928) 101 -dictGet test_01037.dict_array (30.9623,-1.3886) 101 -dictGet test_01037.dict_array (30.9707,0.888854) 101 -dictGet test_01037.dict_array (30.9766,2.81957) 101 -dictGet test_01037.dict_array (30.9775,2.69273) 101 -dictGet test_01037.dict_array (30.9821,0.587715) 101 -dictGet test_01037.dict_array (30.9887,4.0233) 101 -dictGet test_01037.dict_array (30.9914,0.259542) 101 -dictGet test_01037.dict_array (30.9986,-1.36832) 101 -dictGet test_01037.dict_array (31.008,0.628999) 101 -dictGet test_01037.dict_array (31.0168,-1.17462) 101 -dictGet test_01037.dict_array (31.0237,3.52547) 101 -dictGet test_01037.dict_array (31.0306,3.78522) 101 -dictGet test_01037.dict_array (31.0308,-0.72453) 101 -dictGet test_01037.dict_array (31.0463,2.41997) 101 -dictGet test_01037.dict_array (31.047,0.624184) 101 -dictGet test_01037.dict_array (31.0569,0.0706393) 5994232 -dictGet test_01037.dict_array (31.0583,1.3244099999999999) 101 -dictGet test_01037.dict_array (31.063,3.23861) 101 -dictGet test_01037.dict_array (31.068,0.695575) 101 -dictGet test_01037.dict_array (31.0687,1.85675) 101 -dictGet test_01037.dict_array (31.0692,0.254793) 101 -dictGet test_01037.dict_array (31.0766,0.828128) 101 -dictGet test_01037.dict_array (31.0833,0.0612782) 5994232 -dictGet test_01037.dict_array (31.0833,2.59748) 101 -dictGet test_01037.dict_array (31.0861,-1.3778299999999999) 101 -dictGet test_01037.dict_array (31.0874,3.07258) 101 -dictGet test_01037.dict_array (31.0882,1.4882) 101 -dictGet test_01037.dict_array (31.0924,3.42242) 101 -dictGet test_01037.dict_array (31.0927,2.67448) 101 -dictGet test_01037.dict_array (31.0936,1.12292) 101 -dictGet test_01037.dict_array (31.0952,-0.336928) 101 -dictGet test_01037.dict_array (31.0978,3.48482) 101 -dictGet test_01037.dict_array (31.1107,3.7513199999999998) 101 -dictGet test_01037.dict_array (31.1156,1.19171) 101 -dictGet test_01037.dict_array (31.1176,0.223509) 5994232 -dictGet test_01037.dict_array (31.1249,0.946838) 101 -dictGet test_01037.dict_array (31.1267,1.48983) 101 -dictGet test_01037.dict_array (31.138,-0.289981) 101 -dictGet test_01037.dict_array (31.1382,3.02904) 101 -dictGet test_01037.dict_array (31.1475,2.6178) 101 -dictGet test_01037.dict_array (31.1491,1.37873) 101 -dictGet test_01037.dict_array (31.1525,3.72105) 101 -dictGet test_01037.dict_array (31.1526,-1.4129800000000001) 101 -dictGet test_01037.dict_array (31.1526,-0.186457) 101 -dictGet test_01037.dict_array (31.1539,2.78789) 101 -dictGet test_01037.dict_array (31.1548,-1.08552) 101 -dictGet test_01037.dict_array (31.1567,-0.0768925) 101 -dictGet test_01037.dict_array (31.1613,1.49617) 101 -dictGet test_01037.dict_array (31.1653,1.03777) 101 -dictGet test_01037.dict_array (31.1662,3.4214700000000002) 101 -dictGet test_01037.dict_array (31.1672,-0.0813169) 101 -dictGet test_01037.dict_array (31.177,0.440843) 101 -dictGet test_01037.dict_array (31.1788,-0.737151) 101 -dictGet test_01037.dict_array (31.1856,-0.144396) 101 -dictGet test_01037.dict_array (31.1959,3.66813) 101 -dictGet test_01037.dict_array (31.1996,-0.353983) 101 -dictGet test_01037.dict_array (31.2019,2.86802) 101 -dictGet test_01037.dict_array (31.2087,2.31245) 101 -dictGet test_01037.dict_array (31.2125,3.2713200000000002) 101 -dictGet test_01037.dict_array (31.2137,-0.108129) 101 -dictGet test_01037.dict_array (31.216,3.9156) 101 -dictGet test_01037.dict_array (31.2201,-0.202141) 101 -dictGet test_01037.dict_array (31.2285,2.09058) 101 -dictGet test_01037.dict_array (31.2502,4.01526) 101 -dictGet test_01037.dict_array (31.2585,3.11524) 101 -dictGet test_01037.dict_array (31.2645,-0.620418) 101 -dictGet test_01037.dict_array (31.2684,2.74277) 101 -dictGet test_01037.dict_array (31.2821,-1.12772) 101 -dictGet test_01037.dict_array (31.2821,2.46769) 101 -dictGet test_01037.dict_array (31.2887,3.91396) 101 -dictGet test_01037.dict_array (31.295,1.49942) 101 -dictGet test_01037.dict_array (31.2997,3.46122) 101 -dictGet test_01037.dict_array (31.3017,3.3263) 101 -dictGet test_01037.dict_array (31.3022,3.16754) 101 -dictGet test_01037.dict_array (31.3048,0.364962) 101 -dictGet test_01037.dict_array (31.305,3.1967) 101 -dictGet test_01037.dict_array (31.3061,1.84303) 101 -dictGet test_01037.dict_array (31.3082,-0.173851) 101 -dictGet test_01037.dict_array (31.3315,3.90932) 101 -dictGet test_01037.dict_array (31.3351,2.80164) 101 -dictGet test_01037.dict_array (31.3388,0.168765) 5994233 -dictGet test_01037.dict_array (31.339,0.25535) 101 -dictGet test_01037.dict_array (31.3423,1.7036799999999999) 101 -dictGet test_01037.dict_array (31.349,0.386456) 101 -dictGet test_01037.dict_array (31.3558,-1.04336) 101 -dictGet test_01037.dict_array (31.3564,0.478876) 101 -dictGet test_01037.dict_array (31.3607,-0.0860507) 5994233 -dictGet test_01037.dict_array (31.3831,3.84469) 101 -dictGet test_01037.dict_array (31.3886,-0.731137) 101 -dictGet test_01037.dict_array (31.4043,-0.348907) 101 -dictGet test_01037.dict_array (31.4081,1.47391) 101 -dictGet test_01037.dict_array (31.4176,-0.583645) 101 -dictGet test_01037.dict_array (31.4177,1.36972) 101 -dictGet test_01037.dict_array (31.4182,0.958303) 101 -dictGet test_01037.dict_array (31.4199,3.1738) 101 -dictGet test_01037.dict_array (31.4221,2.74876) 101 -dictGet test_01037.dict_array (31.4301,-0.122643) 5994233 -dictGet test_01037.dict_array (31.4344,1.00661) 101 -dictGet test_01037.dict_array (31.4375,4.20304) 101 -dictGet test_01037.dict_array (31.4377,0.289608) 101 -dictGet test_01037.dict_array (31.4379,0.54744) 101 -dictGet test_01037.dict_array (31.4459,3.94945) 101 -dictGet test_01037.dict_array (31.4559,-0.345063) 101 -dictGet test_01037.dict_array (31.464,0.726129) 101 -dictGet test_01037.dict_array (31.4662,-0.299019) 5994233 -dictGet test_01037.dict_array (31.4671,1.9605299999999999) 101 -dictGet test_01037.dict_array (31.4673,-0.403676) 101 -dictGet test_01037.dict_array (31.4712,-0.237941) 5994233 -dictGet test_01037.dict_array (31.4816,0.120264) 5994233 -dictGet test_01037.dict_array (31.4875,0.323483) 101 -dictGet test_01037.dict_array (31.490099999999998,-0.338163) 101 -dictGet test_01037.dict_array (31.4932,0.517674) 101 -dictGet test_01037.dict_array (31.5112,1.9689299999999998) 101 -dictGet test_01037.dict_array (31.5122,2.92785) 101 -dictGet test_01037.dict_array (31.5151,0.166429) 101 -dictGet test_01037.dict_array (31.5174,2.94802) 101 -dictGet test_01037.dict_array (31.5182,4.18776) 101 -dictGet test_01037.dict_array (31.5238,1.18793) 101 -dictGet test_01037.dict_array (31.5271,3.07446) 101 -dictGet test_01037.dict_array (31.5393,1.58061) 101 -dictGet test_01037.dict_array (31.5421,3.13711) 101 -dictGet test_01037.dict_array (31.5479,2.39897) 101 -dictGet test_01037.dict_array (31.5519,0.99285) 101 -dictGet test_01037.dict_array (31.5685,3.47987) 101 -dictGet test_01037.dict_array (31.5959,0.437382) 101 -dictGet test_01037.dict_array (31.6003,0.194376) 101 -dictGet test_01037.dict_array (31.6026,2.15457) 101 -dictGet test_01037.dict_array (31.606,2.45365) 101 -dictGet test_01037.dict_array (31.6062,-0.453441) 101 -dictGet test_01037.dict_array (31.6107,1.35247) 101 -dictGet test_01037.dict_array (31.6155,3.85588) 101 -dictGet test_01037.dict_array (31.6222,2.03326) 101 -dictGet test_01037.dict_array (31.6231,-0.123059) 101 -dictGet test_01037.dict_array (31.6244,1.6885599999999998) 101 -dictGet test_01037.dict_array (31.6459,0.669716) 101 -dictGet test_01037.dict_array (31.6563,-0.0644741) 101 -dictGet test_01037.dict_array (31.6618,-0.551121) 101 -dictGet test_01037.dict_array (31.6725,-0.38922) 101 -dictGet test_01037.dict_array (31.6727,4.10336) 101 -dictGet test_01037.dict_array (31.6739,4.1391) 101 -dictGet test_01037.dict_array (31.6897,2.8694699999999997) 101 -dictGet test_01037.dict_array (31.6902,3.98792) 101 -dictGet test_01037.dict_array (31.6945,2.46687) 101 -dictGet test_01037.dict_array (31.6987,-1.3796) 101 -dictGet test_01037.dict_array (31.7012,2.34845) 101 -dictGet test_01037.dict_array (31.7036,0.0228348) 101 -dictGet test_01037.dict_array (31.7046,3.68111) 101 -dictGet test_01037.dict_array (31.7055,2.92556) 101 -dictGet test_01037.dict_array (31.7102,1.04532) 101 -dictGet test_01037.dict_array (31.7149,-0.443302) 101 -dictGet test_01037.dict_array (31.7195,2.99311) 101 -dictGet test_01037.dict_array (31.7274,0.166719) 101 -dictGet test_01037.dict_array (31.7565,-0.565382) 101 -dictGet test_01037.dict_array (31.7615,0.771626) 101 -dictGet test_01037.dict_array (31.7739,1.8970099999999999) 101 -dictGet test_01037.dict_array (31.7848,1.2623199999999999) 101 -dictGet test_01037.dict_array (31.7912,-0.788599) 101 -dictGet test_01037.dict_array (31.8011,2.65853) 101 -dictGet test_01037.dict_array (31.8032,-0.0590108) 101 -dictGet test_01037.dict_array (31.8038,1.9618799999999998) 101 -dictGet test_01037.dict_array (31.8098,-1.46851) 101 -dictGet test_01037.dict_array (31.8131,3.41982) 101 -dictGet test_01037.dict_array (31.8169,3.31059) 101 -dictGet test_01037.dict_array (31.8202,-0.193692) 101 -dictGet test_01037.dict_array (31.8306,1.57586) 101 -dictGet test_01037.dict_array (31.8382,-0.787948) 101 -dictGet test_01037.dict_array (31.8433,2.49692) 101 -dictGet test_01037.dict_array (31.8436,2.41851) 101 -dictGet test_01037.dict_array (31.8563,-1.10787) 101 -dictGet test_01037.dict_array (31.8683,0.996504) 101 -dictGet test_01037.dict_array (31.8693,-0.828142) 101 -dictGet test_01037.dict_array (31.8723,1.08929) 101 -dictGet test_01037.dict_array (31.8737,0.881127) 101 -dictGet test_01037.dict_array (31.8881,-0.58441) 101 -dictGet test_01037.dict_array (31.9011,0.121349) 101 -dictGet test_01037.dict_array (31.9066,2.13045) 101 -dictGet test_01037.dict_array (31.9142,1.03368) 101 -dictGet test_01037.dict_array (31.9155,3.38363) 101 -dictGet test_01037.dict_array (31.9168,1.3166) 101 -dictGet test_01037.dict_array (31.9185,-1.11879) 101 -dictGet test_01037.dict_array (31.9186,-0.647948) 101 -dictGet test_01037.dict_array (31.9311,3.96928) 101 -dictGet test_01037.dict_array (31.9335,1.47048) 101 -dictGet test_01037.dict_array (31.9443,-1.36175) 101 -dictGet test_01037.dict_array (31.9481,2.34231) 101 -dictGet test_01037.dict_array (31.9526,1.36565) 101 -dictGet test_01037.dict_array (31.9629,2.5208399999999997) 101 -dictGet test_01037.dict_array (31.9765,0.975783) 101 -dictGet test_01037.dict_array (31.9923,3.31773) 101 -dictGet test_01037.dict_array (31.9994,0.972816) 101 -dictGet test_01037.dict_array (32.001,3.47425) 101 -dictGet test_01037.dict_array (32.0127,2.13874) 101 -dictGet test_01037.dict_array (32.0244,3.2092) 101 -dictGet test_01037.dict_array (32.029,1.18039) 101 -dictGet test_01037.dict_array (32.0315,0.566073) 101 -dictGet test_01037.dict_array (32.0354,1.0766499999999999) 101 -dictGet test_01037.dict_array (32.0399,-1.11576) 101 -dictGet test_01037.dict_array (32.053,2.16849) 101 -dictGet test_01037.dict_array (32.0542,0.042328) 101 -dictGet test_01037.dict_array (32.0576,2.47001) 101 -dictGet test_01037.dict_array (32.061,3.7498899999999997) 101 -dictGet test_01037.dict_array (32.0623,1.25134) 101 -dictGet test_01037.dict_array (32.0626,1.9611399999999999) 101 -dictGet test_01037.dict_array (32.0666,-0.0904247) 101 -dictGet test_01037.dict_array (32.0681,2.28442) 101 -dictGet test_01037.dict_array (32.0692,1.50869) 101 -dictGet test_01037.dict_array (32.0724,4.03314) 101 -dictGet test_01037.dict_array (32.0729,-0.064324) 101 -dictGet test_01037.dict_array (32.079,0.293758) 101 -dictGet test_01037.dict_array (32.0847,-1.19814) 101 -dictGet test_01037.dict_array (32.0974,-0.91927) 101 -dictGet test_01037.dict_array (32.0979,-0.736979) 101 -dictGet test_01037.dict_array (32.106,-1.33063) 101 -dictGet test_01037.dict_array (32.1189,0.246715) 101 -dictGet test_01037.dict_array (32.1207,4.00883) 101 -dictGet test_01037.dict_array (32.1396,1.12402) 101 -dictGet test_01037.dict_array (32.1413,1.5668) 101 -dictGet test_01037.dict_array (32.143,1.35559) 101 -dictGet test_01037.dict_array (32.1538,1.32881) 101 -dictGet test_01037.dict_array (32.1549,4.06552) 101 -dictGet test_01037.dict_array (32.1555,-0.79275) 101 -dictGet test_01037.dict_array (32.163,1.17733) 101 -dictGet test_01037.dict_array (32.1634,2.94273) 101 -dictGet test_01037.dict_array (32.1644,1.85666) 101 -dictGet test_01037.dict_array (32.1745,0.435458) 101 -dictGet test_01037.dict_array (32.1765,1.65149) 101 -dictGet test_01037.dict_array (32.1893,2.08924) 101 -dictGet test_01037.dict_array (32.2024,0.222191) 101 -dictGet test_01037.dict_array (32.2107,1.34379) 101 -dictGet test_01037.dict_array (32.2109,3.9018699999999997) 101 -dictGet test_01037.dict_array (32.2123,1.85233) 101 -dictGet test_01037.dict_array (32.2144,3.72534) 101 -dictGet test_01037.dict_array (32.2218,2.5386699999999998) 101 -dictGet test_01037.dict_array (32.2279,2.84267) 101 -dictGet test_01037.dict_array (32.2345,3.33295) 101 -dictGet test_01037.dict_array (32.2435,3.85283) 101 -dictGet test_01037.dict_array (32.2527,-0.480608) 101 -dictGet test_01037.dict_array (32.2566,-0.837882) 101 -dictGet test_01037.dict_array (32.2627,2.57708) 101 -dictGet test_01037.dict_array (32.2733,0.244931) 101 -dictGet test_01037.dict_array (32.2761,4.05808) 101 -dictGet test_01037.dict_array (32.2764,3.78472) 101 -dictGet test_01037.dict_array (32.2814,-1.26011) 101 -dictGet test_01037.dict_array (32.2861,3.02427) 101 -dictGet test_01037.dict_array (32.2924,0.928609) 101 -dictGet test_01037.dict_array (32.2963,-0.78543) 101 -dictGet test_01037.dict_array (32.3039,3.21175) 101 -dictGet test_01037.dict_array (32.3107,0.698287) 101 -dictGet test_01037.dict_array (32.3138,0.0595677) 101 -dictGet test_01037.dict_array (32.3339,0.707056) 101 -dictGet test_01037.dict_array (32.3351,0.415474) 101 -dictGet test_01037.dict_array (32.342,-0.681023) 101 -dictGet test_01037.dict_array (32.3463,1.83196) 101 -dictGet test_01037.dict_array (32.3494,2.43799) 101 -dictGet test_01037.dict_array (32.3524,3.47049) 101 -dictGet test_01037.dict_array (32.3531,2.33115) 101 -dictGet test_01037.dict_array (32.3602,0.116106) 101 -dictGet test_01037.dict_array (32.3612,1.1598) 101 -dictGet test_01037.dict_array (32.3689,3.34847) 101 -dictGet test_01037.dict_array (32.3695,0.734055) 101 -dictGet test_01037.dict_array (32.3825,3.85017) 101 -dictGet test_01037.dict_array (32.3835,-1.25491) 101 -dictGet test_01037.dict_array (32.4018,-0.728568) 101 -dictGet test_01037.dict_array (32.4044,2.96727) 101 -dictGet test_01037.dict_array (32.4101,2.9988) 101 -dictGet test_01037.dict_array (32.417,-1.12908) 101 -dictGet test_01037.dict_array (32.4172,4.1952) 101 -dictGet test_01037.dict_array (32.4239,2.49512) 101 -dictGet test_01037.dict_array (32.4258,4.05137) 101 -dictGet test_01037.dict_array (32.4264,-0.427357) 101 -dictGet test_01037.dict_array (32.4274,3.59377) 101 -dictGet test_01037.dict_array (32.4286,-1.24757) 101 -dictGet test_01037.dict_array (32.4294,3.0665) 101 -dictGet test_01037.dict_array (32.4333,-0.353347) 101 -dictGet test_01037.dict_array (32.4391,3.64421) 101 -dictGet test_01037.dict_array (32.4401,3.70635) 101 -dictGet test_01037.dict_array (32.45,1.68918) 101 -dictGet test_01037.dict_array (32.4507,-0.133471) 101 -dictGet test_01037.dict_array (32.4592,0.976458) 101 -dictGet test_01037.dict_array (32.4595,1.89135) 101 -dictGet test_01037.dict_array (32.4604,0.280248) 101 -dictGet test_01037.dict_array (32.4835,0.472731) 101 -dictGet test_01037.dict_array (32.4855,2.01938) 101 -dictGet test_01037.dict_array (32.4872,2.01697) 101 -dictGet test_01037.dict_array (32.4911,0.613106) 101 -dictGet test_01037.dict_array (32.4918,2.17834) 101 -dictGet test_01037.dict_array (32.4947,2.34595) 101 -dictGet test_01037.dict_array (32.5035,2.92234) 101 -dictGet test_01037.dict_array (32.5132,-0.331206) 101 -dictGet test_01037.dict_array (32.5156,-0.412604) 7652581 -dictGet test_01037.dict_array (32.5158,2.9067499999999997) 101 -dictGet test_01037.dict_array (32.5249,2.44519) 101 -dictGet test_01037.dict_array (32.5293,-0.790952) 101 -dictGet test_01037.dict_array (32.5319,3.96854) 101 -dictGet test_01037.dict_array (32.5518,3.6093) 101 -dictGet test_01037.dict_array (32.5541,3.5225400000000002) 101 -dictGet test_01037.dict_array (32.5569,0.816123) 101 -dictGet test_01037.dict_array (32.5646,1.9775) 101 -dictGet test_01037.dict_array (32.5733,3.81271) 101 -dictGet test_01037.dict_array (32.5767,0.948327) 101 -dictGet test_01037.dict_array (32.5971,1.76179) 101 -dictGet test_01037.dict_array (32.6035,-0.716157) 101 -dictGet test_01037.dict_array (32.6087,4.21614) 101 -dictGet test_01037.dict_array (32.6171,0.024481) 101 -dictGet test_01037.dict_array (32.6189,-0.775391) 101 -dictGet test_01037.dict_array (32.6198,2.92081) 101 -dictGet test_01037.dict_array (32.621,-0.970784) 101 -dictGet test_01037.dict_array (32.6266,0.650009) 101 -dictGet test_01037.dict_array (32.6315,2.15144) 101 -dictGet test_01037.dict_array (32.6385,-0.436803) 101 -dictGet test_01037.dict_array (32.6449,-0.191292) 101 -dictGet test_01037.dict_array (32.6535,2.10385) 101 -dictGet test_01037.dict_array (32.6592,3.49973) 101 -dictGet test_01037.dict_array (32.6598,2.5980600000000003) 101 -dictGet test_01037.dict_array (32.6612,2.95681) 101 -dictGet test_01037.dict_array (32.6636,-0.57235) 101 -dictGet test_01037.dict_array (32.669,-0.382702) 101 -dictGet test_01037.dict_array (32.6752,1.30748) 101 -dictGet test_01037.dict_array (32.6811,2.9559800000000003) 101 -dictGet test_01037.dict_array (32.6821,0.57336) 101 -dictGet test_01037.dict_array (32.6828,3.91304) 101 -dictGet test_01037.dict_array (32.6979,3.96868) 101 -dictGet test_01037.dict_array (32.6983,3.15784) 101 -dictGet test_01037.dict_array (32.7122,0.794293) 101 -dictGet test_01037.dict_array (32.7131,-0.847256) 101 -dictGet test_01037.dict_array (32.7219,0.883461) 101 -dictGet test_01037.dict_array (32.7228,1.78808) 101 -dictGet test_01037.dict_array (32.7273,-0.206908) 101 -dictGet test_01037.dict_array (32.7292,0.259331) 101 -dictGet test_01037.dict_array (32.7304,-1.38317) 101 -dictGet test_01037.dict_array (32.7353,1.01601) 101 -dictGet test_01037.dict_array (32.7354,4.17574) 101 -dictGet test_01037.dict_array (32.7357,-0.190194) 101 -dictGet test_01037.dict_array (32.7465,-1.37598) 101 -dictGet test_01037.dict_array (32.7494,-0.275675) 101 -dictGet test_01037.dict_array (32.7514,0.128951) 101 -dictGet test_01037.dict_array (32.753,3.44207) 101 -dictGet test_01037.dict_array (32.7686,2.11713) 101 -dictGet test_01037.dict_array (32.7694,1.47159) 101 -dictGet test_01037.dict_array (32.7768,0.0401042) 101 -dictGet test_01037.dict_array (32.781,-1.34283) 101 -dictGet test_01037.dict_array (32.7814,1.73876) 101 -dictGet test_01037.dict_array (32.7856,-1.06363) 101 -dictGet test_01037.dict_array (32.792699999999996,-1.1255600000000001) 101 -dictGet test_01037.dict_array (32.7941,-0.645447) 101 -dictGet test_01037.dict_array (32.7946,1.48889) 101 -dictGet test_01037.dict_array (32.797,0.791753) 101 -dictGet test_01037.dict_array (32.7982,-0.537798) 101 -dictGet test_01037.dict_array (32.8091,2.3611) 101 -dictGet test_01037.dict_array (32.81,1.7130800000000002) 101 -dictGet test_01037.dict_array (32.8174,-0.288322) 101 -dictGet test_01037.dict_array (32.823,1.6546699999999999) 101 -dictGet test_01037.dict_array (32.8233,1.62108) 101 -dictGet test_01037.dict_array (32.8428,-0.400045) 101 -dictGet test_01037.dict_array (32.8479,2.13598) 101 -dictGet test_01037.dict_array (32.8524,0.199902) 101 -dictGet test_01037.dict_array (32.8543,3.23553) 101 -dictGet test_01037.dict_array (32.8562,1.31371) 101 -dictGet test_01037.dict_array (32.87,1.44256) 101 -dictGet test_01037.dict_array (32.8789,2.38192) 101 -dictGet test_01037.dict_array (32.8812,2.20734) 5999168 -dictGet test_01037.dict_array (32.8815,-0.54427) 101 -dictGet test_01037.dict_array (32.8853,2.4859) 5999168 -dictGet test_01037.dict_array (32.8909,0.513964) 101 -dictGet test_01037.dict_array (32.9035,2.38999) 101 -dictGet test_01037.dict_array (32.9097,2.48131) 5999168 -dictGet test_01037.dict_array (32.928,-0.943269) 101 -dictGet test_01037.dict_array (32.9322,1.13165) 101 -dictGet test_01037.dict_array (32.9348,1.22606) 101 -dictGet test_01037.dict_array (32.9417,3.77998) 101 -dictGet test_01037.dict_array (32.9428,3.11936) 101 -dictGet test_01037.dict_array (32.9482,1.18092) 101 -dictGet test_01037.dict_array (32.9506,0.0609364) 101 -dictGet test_01037.dict_array (32.953,-0.828308) 101 -dictGet test_01037.dict_array (32.9593,3.5209099999999998) 101 -dictGet test_01037.dict_array (32.9617,2.07711) 5999168 -dictGet test_01037.dict_array (32.966,0.693749) 101 -dictGet test_01037.dict_array (32.9668,-0.716432) 101 -dictGet test_01037.dict_array (32.9702,1.98555) 101 -dictGet test_01037.dict_array (32.9782,1.73819) 101 -dictGet test_01037.dict_array (32.9805,3.71151) 101 -dictGet test_01037.dict_array (32.9821,2.97225) 101 -dictGet test_01037.dict_array (32.995,-0.830301) 101 -dictGet test_01037.dict_array (33.0234,0.770848) 101 -dictGet test_01037.dict_array (33.0312,-0.340964) 101 -dictGet test_01037.dict_array (33.0366,-0.756795) 101 -dictGet test_01037.dict_array (33.0438,0.812871) 101 -dictGet test_01037.dict_array (33.0455,1.84843) 101 -dictGet test_01037.dict_array (33.0498,0.0913292) 101 -dictGet test_01037.dict_array (33.0506,1.53739) 101 -dictGet test_01037.dict_array (33.0554,2.4265) 101 -dictGet test_01037.dict_array (33.0741,3.61332) 101 -dictGet test_01037.dict_array (33.0765,-0.179985) 101 -dictGet test_01037.dict_array (33.087,1.46465) 101 -dictGet test_01037.dict_array (33.0906,-0.620383) 101 -dictGet test_01037.dict_array (33.1047,-1.28027) 101 -dictGet test_01037.dict_array (33.1072,1.96303) 101 -dictGet test_01037.dict_array (33.1081,-0.897874) 101 -dictGet test_01037.dict_array (33.1122,1.8950200000000001) 101 -dictGet test_01037.dict_array (33.1237,2.63993) 101 -dictGet test_01037.dict_array (33.1238,0.753963) 101 -dictGet test_01037.dict_array (33.1257,0.495668) 101 -dictGet test_01037.dict_array (33.1258,1.78341) 101 -dictGet test_01037.dict_array (33.127,2.59646) 101 -dictGet test_01037.dict_array (33.1324,-1.23742) 101 -dictGet test_01037.dict_array (33.1359,3.83491) 101 -dictGet test_01037.dict_array (33.1628,-0.379588) 101 -dictGet test_01037.dict_array (33.1679,1.25601) 101 -dictGet test_01037.dict_array (33.1688,-1.35553) 101 -dictGet test_01037.dict_array (33.181,2.10943) 101 -dictGet test_01037.dict_array (33.1871,2.81171) 101 -dictGet test_01037.dict_array (33.1877,0.771297) 101 -dictGet test_01037.dict_array (33.1883,-0.204797) 101 -dictGet test_01037.dict_array (33.1886,3.27998) 101 -dictGet test_01037.dict_array (33.1955,0.708907) 101 -dictGet test_01037.dict_array (33.2044,-0.769275) 101 -dictGet test_01037.dict_array (33.2182,3.36103) 101 -dictGet test_01037.dict_array (33.2192,3.43586) 101 -dictGet test_01037.dict_array (33.2322,-0.916753) 101 -dictGet test_01037.dict_array (33.2359,-0.81321) 101 -dictGet test_01037.dict_array (33.238,0.635072) 101 -dictGet test_01037.dict_array (33.2398,3.02588) 101 -dictGet test_01037.dict_array (33.2469,2.35698) 101 -dictGet test_01037.dict_array (33.247,2.3327) 101 -dictGet test_01037.dict_array (33.2579,2.8027100000000003) 101 -dictGet test_01037.dict_array (33.2607,0.321082) 101 -dictGet test_01037.dict_array (33.2653,0.243336) 101 -dictGet test_01037.dict_array (33.2758,0.831836) 101 -dictGet test_01037.dict_array (33.2771,0.886536) 101 -dictGet test_01037.dict_array (33.2914,1.16026) 101 -dictGet test_01037.dict_array (33.2914,1.38882) 101 -dictGet test_01037.dict_array (33.2982,-1.16604) 101 -dictGet test_01037.dict_array (33.2985,0.842556) 101 -dictGet test_01037.dict_array (33.3005,2.8338900000000002) 101 -dictGet test_01037.dict_array (33.305,0.0969475) 101 -dictGet test_01037.dict_array (33.3072,3.82163) 101 -dictGet test_01037.dict_array (33.312,3.41475) 101 -dictGet test_01037.dict_array (33.3129,2.46048) 101 -dictGet test_01037.dict_array (33.3134,3.46863) 101 -dictGet test_01037.dict_array (33.3203,2.33139) 101 -dictGet test_01037.dict_array (33.324,0.433701) 101 -dictGet test_01037.dict_array (33.3338,2.44705) 101 -dictGet test_01037.dict_array (33.337,4.06475) 101 -dictGet test_01037.dict_array (33.3469,1.08172) 101 -dictGet test_01037.dict_array (33.3538,0.717896) 101 -dictGet test_01037.dict_array (33.3618,1.37899) 101 -dictGet test_01037.dict_array (33.3698,0.547744) 101 -dictGet test_01037.dict_array (33.3705,0.957619) 101 -dictGet test_01037.dict_array (33.3821,3.07258) 101 -dictGet test_01037.dict_array (33.3881,3.0626) 101 -dictGet test_01037.dict_array (33.393,-0.816186) 101 -dictGet test_01037.dict_array (33.3945,0.869508) 101 -dictGet test_01037.dict_array (33.4001,1.24186) 101 -dictGet test_01037.dict_array (33.4008,2.34911) 101 -dictGet test_01037.dict_array (33.4166,-1.2808899999999999) 101 -dictGet test_01037.dict_array (33.4167,3.0655) 101 -dictGet test_01037.dict_array (33.4204,2.81887) 101 -dictGet test_01037.dict_array (33.4211,1.71128) 101 -dictGet test_01037.dict_array (33.4237,2.91761) 101 -dictGet test_01037.dict_array (33.4266,1.5955599999999999) 101 -dictGet test_01037.dict_array (33.4353,-0.391392) 101 -dictGet test_01037.dict_array (33.4362,-0.134658) 101 -dictGet test_01037.dict_array (33.4386,0.15396) 101 -dictGet test_01037.dict_array (33.4421,-0.50712) 101 -dictGet test_01037.dict_array (33.452,0.915829) 101 -dictGet test_01037.dict_array (33.463,-0.0882717) 101 -dictGet test_01037.dict_array (33.464,-1.00949) 101 -dictGet test_01037.dict_array (33.4692,0.954092) 101 -dictGet test_01037.dict_array (33.4716,1.9538799999999998) 101 -dictGet test_01037.dict_array (33.4756,1.85836) 101 -dictGet test_01037.dict_array (33.4859,4.0751) 101 -dictGet test_01037.dict_array (33.4899,3.54193) 101 -dictGet test_01037.dict_array (33.4935,3.49794) 101 -dictGet test_01037.dict_array (33.494,-0.983356) 101 -dictGet test_01037.dict_array (33.4955,-1.28128) 101 -dictGet test_01037.dict_array (33.4965,-0.278687) 101 -dictGet test_01037.dict_array (33.4991,0.647491) 101 -dictGet test_01037.dict_array (33.5076,2.2272) 101 -dictGet test_01037.dict_array (33.5079,-0.498199) 101 -dictGet test_01037.dict_array (33.5157,0.535034) 101 -dictGet test_01037.dict_array (33.5171,2.49677) 101 -dictGet test_01037.dict_array (33.5255,2.4447200000000002) 101 -dictGet test_01037.dict_array (33.526,4.01194) 101 -dictGet test_01037.dict_array (33.5288,0.789434) 101 -dictGet test_01037.dict_array (33.5356,-1.17671) 101 -dictGet test_01037.dict_array (33.5402,1.49152) 101 -dictGet test_01037.dict_array (33.5418,3.45757) 101 -dictGet test_01037.dict_array (33.5428,1.90712) 101 -dictGet test_01037.dict_array (33.5556,-0.55741) 101 -dictGet test_01037.dict_array (33.5564,0.876858) 101 -dictGet test_01037.dict_array (33.5567,-0.10208) 101 -dictGet test_01037.dict_array (33.5645,-0.124824) 101 -dictGet test_01037.dict_array (33.5663,3.4872) 101 -dictGet test_01037.dict_array (33.5716,-0.0107611) 101 -dictGet test_01037.dict_array (33.578,3.55714) 101 -dictGet test_01037.dict_array (33.5826,-0.49076) 101 -dictGet test_01037.dict_array (33.5909,0.773737) 101 -dictGet test_01037.dict_array (33.5958,2.9619999999999997) 5994231 -dictGet test_01037.dict_array (33.6193,-0.919755) 101 -dictGet test_01037.dict_array (33.6313,0.652132) 101 -dictGet test_01037.dict_array (33.632,0.823351) 101 -dictGet test_01037.dict_array (33.66,2.18998) 101 -dictGet test_01037.dict_array (33.6621,0.535395) 101 -dictGet test_01037.dict_array (33.6726,3.19367) 101 -dictGet test_01037.dict_array (33.6912,1.74522) 101 -dictGet test_01037.dict_array (33.705,0.706397) 101 -dictGet test_01037.dict_array (33.7076,0.7622) 101 -dictGet test_01037.dict_array (33.7112,1.70187) 101 -dictGet test_01037.dict_array (33.7246,-1.14837) 101 -dictGet test_01037.dict_array (33.7326,2.62413) 5994231 -dictGet test_01037.dict_array (33.7332,2.82137) 5994231 -dictGet test_01037.dict_array (33.7434,0.394672) 101 -dictGet test_01037.dict_array (33.7443,1.54557) 101 -dictGet test_01037.dict_array (33.7506,1.57317) 101 -dictGet test_01037.dict_array (33.7526,1.8578999999999999) 101 -dictGet test_01037.dict_array (33.766,4.15013) 101 -dictGet test_01037.dict_array (33.7834,2.41789) 101 -dictGet test_01037.dict_array (33.7864,0.230935) 101 -dictGet test_01037.dict_array (33.7965,3.05709) 101 -dictGet test_01037.dict_array (33.7998,3.32881) 101 -dictGet test_01037.dict_array (33.8003,2.97338) 5994231 -dictGet test_01037.dict_array (33.8007,-1.08962) 101 -dictGet test_01037.dict_array (33.8022,-0.139488) 101 -dictGet test_01037.dict_array (33.8065,2.70857) 5994231 -dictGet test_01037.dict_array (33.8169,-0.607788) 101 -dictGet test_01037.dict_array (33.8203,0.108512) 101 -dictGet test_01037.dict_array (33.8231,-1.03449) 101 -dictGet test_01037.dict_array (33.8312,3.49458) 101 -dictGet test_01037.dict_array (33.8342,0.297518) 101 -dictGet test_01037.dict_array (33.8352,0.165872) 101 -dictGet test_01037.dict_array (33.8354,1.87277) 101 -dictGet test_01037.dict_array (33.8371,1.60103) 101 -dictGet test_01037.dict_array (33.8387,1.9968) 101 -dictGet test_01037.dict_array (33.8403,3.5805) 101 -dictGet test_01037.dict_array (33.8414,-0.703067) 101 -dictGet test_01037.dict_array (33.844,-0.179472) 101 -dictGet test_01037.dict_array (33.8468,3.40137) 101 -dictGet test_01037.dict_array (33.8509,4.15334) 101 -dictGet test_01037.dict_array (33.8539,2.38339) 101 -dictGet test_01037.dict_array (33.858,-1.3122500000000001) 101 -dictGet test_01037.dict_array (33.859,3.72626) 101 -dictGet test_01037.dict_array (33.8616,2.24433) 101 -dictGet test_01037.dict_array (33.8621,3.01035) 101 -dictGet test_01037.dict_array (33.8623,1.17559) 101 -dictGet test_01037.dict_array (33.8682,2.706) 5994231 -dictGet test_01037.dict_array (33.8684,0.189231) 101 -dictGet test_01037.dict_array (33.872,1.93574) 101 -dictGet test_01037.dict_array (33.8844,3.80404) 101 -dictGet test_01037.dict_array (33.8888,0.594884) 101 -dictGet test_01037.dict_array (33.8946,2.74161) 101 -dictGet test_01037.dict_array (33.9023,0.6239) 101 -dictGet test_01037.dict_array (33.9057,0.873222) 101 -dictGet test_01037.dict_array (33.9157,-1.26607) 101 -dictGet test_01037.dict_array (33.92,2.06848) 101 -dictGet test_01037.dict_array (33.9298,-0.00526229) 101 -dictGet test_01037.dict_array (33.932,3.07063) 101 -dictGet test_01037.dict_array (33.9322,0.629385) 101 -dictGet test_01037.dict_array (33.9367,-1.41955) 101 -dictGet test_01037.dict_array (33.937,1.42532) 101 -dictGet test_01037.dict_array (33.9375,1.1467100000000001) 101 -dictGet test_01037.dict_array (33.9434,-1.05739) 101 -dictGet test_01037.dict_array (33.9477,3.34809) 101 -dictGet test_01037.dict_array (33.95,2.21715) 101 -dictGet test_01037.dict_array (33.955799999999996,0.305176) 101 -dictGet test_01037.dict_array (33.9686,-0.28273) 101 -dictGet test_01037.dict_array (33.9703,4.1255) 101 -dictGet test_01037.dict_array (33.9707,3.08199) 101 -dictGet test_01037.dict_array (33.9754,1.06203) 101 -dictGet test_01037.dict_array (33.9757,3.72468) 101 -dictGet test_01037.dict_array (33.9775,-0.0440599) 101 -dictGet test_01037.dict_array (33.9777,-0.251484) 101 -dictGet test_01037.dict_array (33.9789,-0.339374) 101 -dictGet test_01037.dict_array (33.9849,2.54515) 5994231 -dictGet test_01037.dict_array (33.9885,-0.318557) 101 -dictGet test_01037.dict_array (33.9977,1.07175) 101 -dictGet test_01037.dict_array (33.9984,-0.700517) 101 -dictGet test_01037.dict_array (34.0149,3.53338) 101 -dictGet test_01037.dict_array (34.0173,3.39155) 101 -dictGet test_01037.dict_array (34.0317,3.9579) 101 -dictGet test_01037.dict_array (34.0369,3.83612) 101 -dictGet test_01037.dict_array (34.043,-0.0887221) 101 -dictGet test_01037.dict_array (34.0487,1.14252) 101 -dictGet test_01037.dict_array (34.052,1.74832) 101 -dictGet test_01037.dict_array (34.0711,-0.898071) 101 -dictGet test_01037.dict_array (34.0747,1.55057) 101 -dictGet test_01037.dict_array (34.0803,3.16763) 101 -dictGet test_01037.dict_array (34.0872,3.75555) 101 -dictGet test_01037.dict_array (34.0965,1.62038) 101 -dictGet test_01037.dict_array (34.0977,-0.412691) 101 -dictGet test_01037.dict_array (34.0986,0.0294206) 101 -dictGet test_01037.dict_array (34.1072,3.15823) 101 -dictGet test_01037.dict_array (34.1092,3.09599) 101 -dictGet test_01037.dict_array (34.1206,1.04637) 5940222 -dictGet test_01037.dict_array (34.1209,3.13826) 101 -dictGet test_01037.dict_array (34.1265,3.95881) 101 -dictGet test_01037.dict_array (34.1286,-0.539319) 101 -dictGet test_01037.dict_array (34.1358,3.67451) 101 -dictGet test_01037.dict_array (34.1428,0.136115) 101 -dictGet test_01037.dict_array (34.157,1.73522) 101 -dictGet test_01037.dict_array (34.1581,1.48001) 101 -dictGet test_01037.dict_array (34.1682,3.42373) 101 -dictGet test_01037.dict_array (34.1683,-1.26511) 101 -dictGet test_01037.dict_array (34.1684,4.20007) 101 -dictGet test_01037.dict_array (34.1854,3.32089) 101 -dictGet test_01037.dict_array (34.2022,0.749536) 101 -dictGet test_01037.dict_array (34.2044,3.04865) 101 -dictGet test_01037.dict_array (34.22,-0.500055) 101 -dictGet test_01037.dict_array (34.2249,0.743775) 101 -dictGet test_01037.dict_array (34.2254,1.34702) 101 -dictGet test_01037.dict_array (34.2355,-0.898843) 101 -dictGet test_01037.dict_array (34.2394,2.0203699999999998) 101 -dictGet test_01037.dict_array (34.2466,1.83785) 101 -dictGet test_01037.dict_array (34.247,4.09563) 101 -dictGet test_01037.dict_array (34.2508,2.61312) 101 -dictGet test_01037.dict_array (34.2517,1.69642) 101 -dictGet test_01037.dict_array (34.2564,4.13033) 101 -dictGet test_01037.dict_array (34.2574,4.18928) 101 -dictGet test_01037.dict_array (34.2614,-0.478719) 101 -dictGet test_01037.dict_array (34.2625,2.38088) 101 -dictGet test_01037.dict_array (34.2666,3.1503) 101 -dictGet test_01037.dict_array (34.271,4.02223) 101 -dictGet test_01037.dict_array (34.2727,0.514755) 101 -dictGet test_01037.dict_array (34.278,1.98929) 101 -dictGet test_01037.dict_array (34.2798,-0.199208) 101 -dictGet test_01037.dict_array (34.2804,2.05184) 101 -dictGet test_01037.dict_array (34.2945,-1.11051) 101 -dictGet test_01037.dict_array (34.3168,-0.0829721) 101 -dictGet test_01037.dict_array (34.3345,3.4358) 101 -dictGet test_01037.dict_array (34.3377,1.13527) 5940222 -dictGet test_01037.dict_array (34.3383,1.27891) 5940222 -dictGet test_01037.dict_array (34.3391,1.47945) 5940222 -dictGet test_01037.dict_array (34.3441,0.627014) 101 -dictGet test_01037.dict_array (34.347,2.4853) 101 -dictGet test_01037.dict_array (34.3514,2.16247) 101 -dictGet test_01037.dict_array (34.3627,2.64533) 101 -dictGet test_01037.dict_array (34.3682,-0.227501) 101 -dictGet test_01037.dict_array (34.3756,4.21248) 101 -dictGet test_01037.dict_array (34.379,3.96604) 101 -dictGet test_01037.dict_array (34.3827,1.7518) 101 -dictGet test_01037.dict_array (34.3912,2.8834) 101 -dictGet test_01037.dict_array (34.3919,0.668829) 101 -dictGet test_01037.dict_array (34.3949,2.00338) 101 -dictGet test_01037.dict_array (34.3987,0.557268) 101 -dictGet test_01037.dict_array (34.4111,0.768558) 101 -dictGet test_01037.dict_array (34.4119,2.8742) 101 -dictGet test_01037.dict_array (34.416,3.50841) 101 -dictGet test_01037.dict_array (34.4212,1.24916) 5940222 -dictGet test_01037.dict_array (34.4251,0.457029) 101 -dictGet test_01037.dict_array (34.4274,-0.902559) 101 -dictGet test_01037.dict_array (34.4325,4.03159) 101 -dictGet test_01037.dict_array (34.438,1.63994) 101 -dictGet test_01037.dict_array (34.4403,-0.177594) 101 -dictGet test_01037.dict_array (34.4421,0.726712) 101 -dictGet test_01037.dict_array (34.4517,2.98611) 101 -dictGet test_01037.dict_array (34.4658,-1.312) 101 -dictGet test_01037.dict_array (34.4732,-0.0681338) 101 -dictGet test_01037.dict_array (34.4752,2.81646) 101 -dictGet test_01037.dict_array (34.4914,2.3858) 101 -dictGet test_01037.dict_array (34.4923,0.855231) 101 -dictGet test_01037.dict_array (34.5235,1.78468) 101 -dictGet test_01037.dict_array (34.5305,4.10608) 101 -dictGet test_01037.dict_array (34.5389,0.621937) 101 -dictGet test_01037.dict_array (34.5406,3.17145) 101 -dictGet test_01037.dict_array (34.5434,-0.56306) 101 -dictGet test_01037.dict_array (34.5449,3.13311) 101 -dictGet test_01037.dict_array (34.5491,2.31572) 101 -dictGet test_01037.dict_array (34.5539,2.94028) 101 -dictGet test_01037.dict_array (34.5546,-0.208825) 101 -dictGet test_01037.dict_array (34.5549,3.78486) 101 -dictGet test_01037.dict_array (34.5676,0.307148) 101 -dictGet test_01037.dict_array (34.5743,1.5217399999999999) 101 -dictGet test_01037.dict_array (34.5775,3.48046) 101 -dictGet test_01037.dict_array (34.5815,2.5243700000000002) 101 -dictGet test_01037.dict_array (34.5841,4.21191) 101 -dictGet test_01037.dict_array (34.5887,2.65083) 101 -dictGet test_01037.dict_array (34.5937,3.2143) 101 -dictGet test_01037.dict_array (34.6013,-1.0612) 101 -dictGet test_01037.dict_array (34.6089,1.36066) 101 -dictGet test_01037.dict_array (34.6103,3.40227) 101 -dictGet test_01037.dict_array (34.6128,1.92276) 101 -dictGet test_01037.dict_array (34.6175,2.43627) 101 -dictGet test_01037.dict_array (34.6209,3.43776) 101 -dictGet test_01037.dict_array (34.6234,2.60237) 101 -dictGet test_01037.dict_array (34.6275,3.52479) 101 -dictGet test_01037.dict_array (34.635,0.568558) 101 -dictGet test_01037.dict_array (34.6373,2.37692) 101 -dictGet test_01037.dict_array (34.6375,3.52234) 101 -dictGet test_01037.dict_array (34.6426,2.12397) 101 -dictGet test_01037.dict_array (34.6513,2.80915) 101 -dictGet test_01037.dict_array (34.6632,2.30039) 101 -dictGet test_01037.dict_array (34.6691,1.86582) 101 -dictGet test_01037.dict_array (34.6739,0.15342) 101 -dictGet test_01037.dict_array (34.6825,0.0499679) 101 -dictGet test_01037.dict_array (34.6893,0.454326) 101 -dictGet test_01037.dict_array (34.6957,-0.358598) 101 -dictGet test_01037.dict_array (34.6986,0.562679) 101 -dictGet test_01037.dict_array (34.712,1.12114) 101 -dictGet test_01037.dict_array (34.7126,-0.0057301) 101 -dictGet test_01037.dict_array (34.7137,0.0248501) 101 -dictGet test_01037.dict_array (34.7162,1.15623) 101 -dictGet test_01037.dict_array (34.7258,3.95142) 101 -dictGet test_01037.dict_array (34.7347,3.5232099999999997) 101 -dictGet test_01037.dict_array (34.7363,2.23374) 101 -dictGet test_01037.dict_array (34.7375,0.397841) 101 -dictGet test_01037.dict_array (34.7423,3.09198) 101 -dictGet test_01037.dict_array (34.7452,3.09029) 101 -dictGet test_01037.dict_array (34.7539,-1.06943) 101 -dictGet test_01037.dict_array (34.7733,-0.00912717) 101 -dictGet test_01037.dict_array (34.774,2.71088) 101 -dictGet test_01037.dict_array (34.7771,1.46009) 101 -dictGet test_01037.dict_array (34.7782,-1.28308) 101 -dictGet test_01037.dict_array (34.7924,3.63564) 101 -dictGet test_01037.dict_array (34.7939,-0.416676) 101 -dictGet test_01037.dict_array (34.7964,-0.401773) 101 -dictGet test_01037.dict_array (34.7974,0.0286873) 101 -dictGet test_01037.dict_array (34.7975,3.05965) 101 -dictGet test_01037.dict_array (34.8037,3.07263) 101 -dictGet test_01037.dict_array (34.8254,-0.390284) 101 -dictGet test_01037.dict_array (34.828,1.91869) 101 -dictGet test_01037.dict_array (34.8289,3.71058) 101 -dictGet test_01037.dict_array (34.8403,2.14606) 101 -dictGet test_01037.dict_array (34.8437,2.20617) 101 -dictGet test_01037.dict_array (34.8469,2.38435) 101 -dictGet test_01037.dict_array (34.86,1.45705) 101 -dictGet test_01037.dict_array (34.8612,0.914248) 101 -dictGet test_01037.dict_array (34.8663,3.4215400000000002) 101 -dictGet test_01037.dict_array (34.8724,-0.375144) 101 -dictGet test_01037.dict_array (34.8795,3.29317) 101 -dictGet test_01037.dict_array (34.8823,1.21988) 101 -dictGet test_01037.dict_array (34.8834,1.07657) 101 -dictGet test_01037.dict_array (34.8837,0.157648) 101 -dictGet test_01037.dict_array (34.8871,-0.9755) 101 -dictGet test_01037.dict_array (34.8871,1.8943699999999999) 101 -dictGet test_01037.dict_array (34.889,3.36756) 101 -dictGet test_01037.dict_array (34.8907,1.24874) 101 -dictGet test_01037.dict_array (34.8965,3.13508) 101 -dictGet test_01037.dict_array (34.9042,2.62092) 101 -dictGet test_01037.dict_array (34.9055,-0.0448967) 101 -dictGet test_01037.dict_array (34.9122,0.110576) 101 -dictGet test_01037.dict_array (34.9228,3.60183) 101 -dictGet test_01037.dict_array (34.9237,1.21715) 101 -dictGet test_01037.dict_array (34.9296,1.70459) 101 -dictGet test_01037.dict_array (34.941,-1.14663) 101 -dictGet test_01037.dict_array (34.9448,1.18923) 101 -dictGet test_01037.dict_array (34.9462,3.81678) 101 -dictGet test_01037.dict_array (34.9466,0.593463) 101 -dictGet test_01037.dict_array (34.9485,0.150307) 101 -dictGet test_01037.dict_array (34.9542,0.487238) 101 -dictGet test_01037.dict_array (34.9559,2.03473) 101 -dictGet test_01037.dict_array (34.9671,-0.960225) 101 -dictGet test_01037.dict_array (34.9711,2.63444) 101 -dictGet test_01037.dict_array (34.9892,0.354775) 101 -dictGet test_01037.dict_array (34.9907,1.40724) 101 -dictGet test_01037.dict_array (34.9916,-0.00173097) 101 -dictGet test_01037.dict_array (34.9919,2.06167) 101 +dictGet dict_array (29.5699,2.50068) 101 +dictGet dict_array (29.5796,1.55456) 101 +dictGet dict_array (29.5796,2.36864) 101 +dictGet dict_array (29.5844,1.59626) 101 +dictGet dict_array (29.5886,4.03321) 101 +dictGet dict_array (29.5914,3.02628) 101 +dictGet dict_array (29.5926,-0.0965169) 101 +dictGet dict_array (29.5968,2.37773) 101 +dictGet dict_array (29.5984,0.755853) 101 +dictGet dict_array (29.6066,3.47173) 101 +dictGet dict_array (29.6085,-1.26007) 101 +dictGet dict_array (29.6131,0.246565) 101 +dictGet dict_array (29.6157,-0.266687) 101 +dictGet dict_array (29.6164,2.94674) 101 +dictGet dict_array (29.6195,-0.591941) 101 +dictGet dict_array (29.6231,1.54818) 101 +dictGet dict_array (29.6379,0.764114) 101 +dictGet dict_array (29.6462,-0.772059) 934570 +dictGet dict_array (29.6579,-1.07336) 101 +dictGet dict_array (29.6618,-0.271842) 101 +dictGet dict_array (29.6629,-0.303602) 101 +dictGet dict_array (29.6659,-0.782823) 934570 +dictGet dict_array (29.6736,-0.113832) 101 +dictGet dict_array (29.6759,3.02905) 101 +dictGet dict_array (29.6778,3.71898) 101 +dictGet dict_array (29.6796,1.10433) 101 +dictGet dict_array (29.6809,2.13677) 101 +dictGet dict_array (29.6935,4.11894) 101 +dictGet dict_array (29.6991,-1.4458199999999999) 101 +dictGet dict_array (29.6997,3.17297) 101 +dictGet dict_array (29.7043,3.6145899999999997) 101 +dictGet dict_array (29.7065,3.24885) 101 +dictGet dict_array (29.7126,0.28108) 101 +dictGet dict_array (29.7192,0.174273) 101 +dictGet dict_array (29.7217,-0.523481) 934570 +dictGet dict_array (29.7271,1.67967) 101 +dictGet dict_array (29.7311,4.12444) 101 +dictGet dict_array (29.7347,1.88378) 101 +dictGet dict_array (29.7358,0.67944) 101 +dictGet dict_array (29.7366,-0.2973) 101 +dictGet dict_array (29.7446,0.646536) 101 +dictGet dict_array (29.7453,-0.567963) 101 +dictGet dict_array (29.764,4.04217) 101 +dictGet dict_array (29.7655,1.51372) 101 +dictGet dict_array (29.7744,1.12435) 101 +dictGet dict_array (29.7774,-0.0681196) 101 +dictGet dict_array (29.7784,1.54864) 101 +dictGet dict_array (29.7785,2.24139) 101 +dictGet dict_array (29.7922,0.220808) 101 +dictGet dict_array (29.7936,2.37709) 101 +dictGet dict_array (29.8008,0.948536) 101 +dictGet dict_array (29.8115,0.201227) 101 +dictGet dict_array (29.814,0.149601) 101 +dictGet dict_array (29.8193,-1.35858) 101 +dictGet dict_array (29.8201,0.965518) 101 +dictGet dict_array (29.8265,-0.727286) 101 +dictGet dict_array (29.8277,-0.531746) 101 +dictGet dict_array (29.8289,3.63009) 101 +dictGet dict_array (29.8548,0.838047) 101 +dictGet dict_array (29.8641,-0.845265) 101 +dictGet dict_array (29.8649,0.0562212) 101 +dictGet dict_array (29.8701,-1.02045) 101 +dictGet dict_array (29.8733,2.76654) 101 +dictGet dict_array (29.876,0.555475) 101 +dictGet dict_array (29.8794,-0.800108) 101 +dictGet dict_array (29.8813,2.7426399999999997) 101 +dictGet dict_array (29.897100000000002,2.66193) 101 +dictGet dict_array (29.908,4.01339) 101 +dictGet dict_array (29.9165,-1.08246) 101 +dictGet dict_array (29.9201,-0.420861) 101 +dictGet dict_array (29.9217,3.03778) 101 +dictGet dict_array (29.9355,0.773833) 101 +dictGet dict_array (29.947,3.76517) 101 +dictGet dict_array (29.9518,-0.60557) 101 +dictGet dict_array (29.9564,-0.600163) 101 +dictGet dict_array (29.959600000000002,4.16591) 101 +dictGet dict_array (29.9615,-1.33708) 101 +dictGet dict_array (29.9699,-0.392375) 101 +dictGet dict_array (29.9776,1.04552) 101 +dictGet dict_array (29.9784,4.02756) 101 +dictGet dict_array (29.9819,4.00597) 101 +dictGet dict_array (29.9826,1.2816100000000001) 101 +dictGet dict_array (30.0026,2.76257) 101 +dictGet dict_array (30.0126,3.68255) 101 +dictGet dict_array (30.0131,0.796576) 101 +dictGet dict_array (30.018,1.16523) 101 +dictGet dict_array (30.0261,-0.210653) 101 +dictGet dict_array (30.0472,-1.11007) 101 +dictGet dict_array (30.0542,-0.479585) 101 +dictGet dict_array (30.0613,1.6278000000000001) 101 +dictGet dict_array (30.0617,-0.0551152) 101 +dictGet dict_array (30.0637,2.62066) 101 +dictGet dict_array (30.0721,1.6424400000000001) 101 +dictGet dict_array (30.0769,-0.402636) 101 +dictGet dict_array (30.0791,-0.277435) 101 +dictGet dict_array (30.0931,0.0327512) 101 +dictGet dict_array (30.1059,3.52623) 101 +dictGet dict_array (30.1103,0.865466) 101 +dictGet dict_array (30.1115,2.95243) 101 +dictGet dict_array (30.1144,1.71029) 101 +dictGet dict_array (30.1311,-0.864751) 101 +dictGet dict_array (30.1336,-0.851386) 101 +dictGet dict_array (30.1393,3.89901) 101 +dictGet dict_array (30.1456,-0.531898) 101 +dictGet dict_array (30.1492,2.07833) 101 +dictGet dict_array (30.1575,2.43856) 101 +dictGet dict_array (30.1682,1.19771) 101 +dictGet dict_array (30.1716,3.9853300000000003) 101 +dictGet dict_array (30.1849,2.78374) 101 +dictGet dict_array (30.1866,0.65658) 101 +dictGet dict_array (30.1885,1.56943) 101 +dictGet dict_array (30.1959,-1.38202) 101 +dictGet dict_array (30.1999,1.58413) 101 +dictGet dict_array (30.2024,0.713081) 101 +dictGet dict_array (30.2054,0.620143) 101 +dictGet dict_array (30.2091,1.51641) 101 +dictGet dict_array (30.2124,-0.331782) 101 +dictGet dict_array (30.226,3.03527) 101 +dictGet dict_array (30.2261,3.18486) 101 +dictGet dict_array (30.2288,2.48407) 101 +dictGet dict_array (30.2345,3.7462400000000002) 101 +dictGet dict_array (30.2375,0.62046) 101 +dictGet dict_array (30.2425,-0.472914) 101 +dictGet dict_array (30.247,3.95863) 101 +dictGet dict_array (30.2494,-0.305093) 101 +dictGet dict_array (30.2499,2.54337) 101 +dictGet dict_array (30.2606,2.16644) 101 +dictGet dict_array (30.2672,3.94847) 101 +dictGet dict_array (30.2709,-0.136264) 101 +dictGet dict_array (30.2764,1.18654) 101 +dictGet dict_array (30.2765,1.20383) 101 +dictGet dict_array (30.2839,1.05762) 101 +dictGet dict_array (30.286,0.469327) 101 +dictGet dict_array (30.2927,3.1693) 101 +dictGet dict_array (30.2935,3.49854) 101 +dictGet dict_array (30.307,0.312338) 101 +dictGet dict_array (30.3085,1.07791) 101 +dictGet dict_array (30.3139,2.77248) 101 +dictGet dict_array (30.314,0.822823) 101 +dictGet dict_array (30.3227,-0.587351) 101 +dictGet dict_array (30.332,1.00174) 101 +dictGet dict_array (30.3388,0.844148) 101 +dictGet dict_array (30.3485,0.561902) 101 +dictGet dict_array (30.3497,0.180362) 101 +dictGet dict_array (30.361,4.13016) 101 +dictGet dict_array (30.3623,-0.0484027) 101 +dictGet dict_array (30.3638,3.9845800000000002) 101 +dictGet dict_array (30.3853,3.16051) 101 +dictGet dict_array (30.3974,2.6617800000000003) 101 +dictGet dict_array (30.4002,-1.15886) 101 +dictGet dict_array (30.4008,-0.387015) 101 +dictGet dict_array (30.4018,1.86493) 101 +dictGet dict_array (30.4239,1.16818) 101 +dictGet dict_array (30.4363,3.63938) 101 +dictGet dict_array (30.4377,-0.81315) 101 +dictGet dict_array (30.4391,3.54703) 101 +dictGet dict_array (30.4424,-1.39435) 101 +dictGet dict_array (30.4441,2.8463000000000003) 101 +dictGet dict_array (30.4517,3.28117) 101 +dictGet dict_array (30.4658,2.6928) 101 +dictGet dict_array (30.4734,2.66161) 101 +dictGet dict_array (30.4799,-1.07578) 101 +dictGet dict_array (30.4837,-1.02486) 101 +dictGet dict_array (30.485,1.06326) 101 +dictGet dict_array (30.495,1.12306) 101 +dictGet dict_array (30.501,2.27264) 101 +dictGet dict_array (30.5027,1.99382) 101 +dictGet dict_array (30.5194,-1.03943) 101 +dictGet dict_array (30.5239,1.04328) 101 +dictGet dict_array (30.528,3.82041) 101 +dictGet dict_array (30.5299,-0.715248) 101 +dictGet dict_array (30.5331,1.19603) 101 +dictGet dict_array (30.535800000000002,2.71485) 101 +dictGet dict_array (30.5405,0.804694) 101 +dictGet dict_array (30.542,1.23739) 101 +dictGet dict_array (30.5432,4.04189) 101 +dictGet dict_array (30.5457,-0.956121) 101 +dictGet dict_array (30.5506,3.07443) 101 +dictGet dict_array (30.5539,3.87084) 101 +dictGet dict_array (30.5578,3.78837) 101 +dictGet dict_array (30.5588,0.966135) 101 +dictGet dict_array (30.5637,2.5605) 101 +dictGet dict_array (30.5647,-1.27328) 101 +dictGet dict_array (30.5656,-0.0581332) 101 +dictGet dict_array (30.5715,0.65755) 101 +dictGet dict_array (30.5727,3.01604) 101 +dictGet dict_array (30.5729,-0.976857) 101 +dictGet dict_array (30.5751,0.60204) 101 +dictGet dict_array (30.5854,3.02473) 101 +dictGet dict_array (30.5866,0.174099) 101 +dictGet dict_array (30.5947,0.875193) 101 +dictGet dict_array (30.5992,-0.403901) 101 +dictGet dict_array (30.6002,4.18891) 101 +dictGet dict_array (30.6025,0.217712) 101 +dictGet dict_array (30.6054,0.927203) 101 +dictGet dict_array (30.6075,3.79359) 101 +dictGet dict_array (30.6159,3.82773) 101 +dictGet dict_array (30.627,3.84039) 101 +dictGet dict_array (30.6308,0.77517) 101 +dictGet dict_array (30.6338,0.179565) 101 +dictGet dict_array (30.6461,1.3293599999999999) 101 +dictGet dict_array (30.6674,-0.424547) 101 +dictGet dict_array (30.669,1.76539) 101 +dictGet dict_array (30.6788,4.01239) 101 +dictGet dict_array (30.6864,3.59158) 101 +dictGet dict_array (30.7049,-0.875413) 101 +dictGet dict_array (30.705,1.3307) 101 +dictGet dict_array (30.7063,-0.473192) 101 +dictGet dict_array (30.7075,-1.1958199999999999) 101 +dictGet dict_array (30.7101,-0.367562) 101 +dictGet dict_array (30.7203,2.98725) 101 +dictGet dict_array (30.7213,2.2745699999999998) 101 +dictGet dict_array (30.7446,-0.334144) 101 +dictGet dict_array (30.7468,3.82967) 101 +dictGet dict_array (30.747,-0.384779) 101 +dictGet dict_array (30.7681,0.904198) 101 +dictGet dict_array (30.7757,1.78743) 101 +dictGet dict_array (30.8021,-0.479212) 101 +dictGet dict_array (30.8079,-1.40869) 101 +dictGet dict_array (30.8206,-0.0608489) 101 +dictGet dict_array (30.8218,0.43909) 101 +dictGet dict_array (30.8239,0.10014) 101 +dictGet dict_array (30.8282,4.15409) 101 +dictGet dict_array (30.8288,-0.709528) 101 +dictGet dict_array (30.8326,0.156011) 101 +dictGet dict_array (30.8328,-1.03704) 101 +dictGet dict_array (30.839,2.15528) 101 +dictGet dict_array (30.8452,0.219377) 101 +dictGet dict_array (30.8463,0.0515355) 101 +dictGet dict_array (30.8526,2.06614) 101 +dictGet dict_array (30.8566,0.517876) 101 +dictGet dict_array (30.8588,-1.31738) 101 +dictGet dict_array (30.8681,0.44207) 101 +dictGet dict_array (30.8914,1.0072) 101 +dictGet dict_array (30.897,0.483425) 101 +dictGet dict_array (30.905,2.8731999999999998) 101 +dictGet dict_array (30.9051,2.21956) 101 +dictGet dict_array (30.9115,4.00663) 101 +dictGet dict_array (30.9167,-0.834462) 101 +dictGet dict_array (30.9252,-1.3289900000000001) 101 +dictGet dict_array (30.9314,1.85384) 101 +dictGet dict_array (30.9392,2.53236) 101 +dictGet dict_array (30.9569,2.82038) 101 +dictGet dict_array (30.9598,-0.641011) 101 +dictGet dict_array (30.9601,-0.254928) 101 +dictGet dict_array (30.9623,-1.3886) 101 +dictGet dict_array (30.9707,0.888854) 101 +dictGet dict_array (30.9766,2.81957) 101 +dictGet dict_array (30.9775,2.69273) 101 +dictGet dict_array (30.9821,0.587715) 101 +dictGet dict_array (30.9887,4.0233) 101 +dictGet dict_array (30.9914,0.259542) 101 +dictGet dict_array (30.9986,-1.36832) 101 +dictGet dict_array (31.008,0.628999) 101 +dictGet dict_array (31.0168,-1.17462) 101 +dictGet dict_array (31.0237,3.52547) 101 +dictGet dict_array (31.0306,3.78522) 101 +dictGet dict_array (31.0308,-0.72453) 101 +dictGet dict_array (31.0463,2.41997) 101 +dictGet dict_array (31.047,0.624184) 101 +dictGet dict_array (31.0569,0.0706393) 5994232 +dictGet dict_array (31.0583,1.3244099999999999) 101 +dictGet dict_array (31.063,3.23861) 101 +dictGet dict_array (31.068,0.695575) 101 +dictGet dict_array (31.0687,1.85675) 101 +dictGet dict_array (31.0692,0.254793) 101 +dictGet dict_array (31.0766,0.828128) 101 +dictGet dict_array (31.0833,0.0612782) 5994232 +dictGet dict_array (31.0833,2.59748) 101 +dictGet dict_array (31.0861,-1.3778299999999999) 101 +dictGet dict_array (31.0874,3.07258) 101 +dictGet dict_array (31.0882,1.4882) 101 +dictGet dict_array (31.0924,3.42242) 101 +dictGet dict_array (31.0927,2.67448) 101 +dictGet dict_array (31.0936,1.12292) 101 +dictGet dict_array (31.0952,-0.336928) 101 +dictGet dict_array (31.0978,3.48482) 101 +dictGet dict_array (31.1107,3.7513199999999998) 101 +dictGet dict_array (31.1156,1.19171) 101 +dictGet dict_array (31.1176,0.223509) 5994232 +dictGet dict_array (31.1249,0.946838) 101 +dictGet dict_array (31.1267,1.48983) 101 +dictGet dict_array (31.138,-0.289981) 101 +dictGet dict_array (31.1382,3.02904) 101 +dictGet dict_array (31.1475,2.6178) 101 +dictGet dict_array (31.1491,1.37873) 101 +dictGet dict_array (31.1525,3.72105) 101 +dictGet dict_array (31.1526,-1.4129800000000001) 101 +dictGet dict_array (31.1526,-0.186457) 101 +dictGet dict_array (31.1539,2.78789) 101 +dictGet dict_array (31.1548,-1.08552) 101 +dictGet dict_array (31.1567,-0.0768925) 101 +dictGet dict_array (31.1613,1.49617) 101 +dictGet dict_array (31.1653,1.03777) 101 +dictGet dict_array (31.1662,3.4214700000000002) 101 +dictGet dict_array (31.1672,-0.0813169) 101 +dictGet dict_array (31.177,0.440843) 101 +dictGet dict_array (31.1788,-0.737151) 101 +dictGet dict_array (31.1856,-0.144396) 101 +dictGet dict_array (31.1959,3.66813) 101 +dictGet dict_array (31.1996,-0.353983) 101 +dictGet dict_array (31.2019,2.86802) 101 +dictGet dict_array (31.2087,2.31245) 101 +dictGet dict_array (31.2125,3.2713200000000002) 101 +dictGet dict_array (31.2137,-0.108129) 101 +dictGet dict_array (31.216,3.9156) 101 +dictGet dict_array (31.2201,-0.202141) 101 +dictGet dict_array (31.2285,2.09058) 101 +dictGet dict_array (31.2502,4.01526) 101 +dictGet dict_array (31.2585,3.11524) 101 +dictGet dict_array (31.2645,-0.620418) 101 +dictGet dict_array (31.2684,2.74277) 101 +dictGet dict_array (31.2821,-1.12772) 101 +dictGet dict_array (31.2821,2.46769) 101 +dictGet dict_array (31.2887,3.91396) 101 +dictGet dict_array (31.295,1.49942) 101 +dictGet dict_array (31.2997,3.46122) 101 +dictGet dict_array (31.3017,3.3263) 101 +dictGet dict_array (31.3022,3.16754) 101 +dictGet dict_array (31.3048,0.364962) 101 +dictGet dict_array (31.305,3.1967) 101 +dictGet dict_array (31.3061,1.84303) 101 +dictGet dict_array (31.3082,-0.173851) 101 +dictGet dict_array (31.3315,3.90932) 101 +dictGet dict_array (31.3351,2.80164) 101 +dictGet dict_array (31.3388,0.168765) 5994233 +dictGet dict_array (31.339,0.25535) 101 +dictGet dict_array (31.3423,1.7036799999999999) 101 +dictGet dict_array (31.349,0.386456) 101 +dictGet dict_array (31.3558,-1.04336) 101 +dictGet dict_array (31.3564,0.478876) 101 +dictGet dict_array (31.3607,-0.0860507) 5994233 +dictGet dict_array (31.3831,3.84469) 101 +dictGet dict_array (31.3886,-0.731137) 101 +dictGet dict_array (31.4043,-0.348907) 101 +dictGet dict_array (31.4081,1.47391) 101 +dictGet dict_array (31.4176,-0.583645) 101 +dictGet dict_array (31.4177,1.36972) 101 +dictGet dict_array (31.4182,0.958303) 101 +dictGet dict_array (31.4199,3.1738) 101 +dictGet dict_array (31.4221,2.74876) 101 +dictGet dict_array (31.4301,-0.122643) 5994233 +dictGet dict_array (31.4344,1.00661) 101 +dictGet dict_array (31.4375,4.20304) 101 +dictGet dict_array (31.4377,0.289608) 101 +dictGet dict_array (31.4379,0.54744) 101 +dictGet dict_array (31.4459,3.94945) 101 +dictGet dict_array (31.4559,-0.345063) 101 +dictGet dict_array (31.464,0.726129) 101 +dictGet dict_array (31.4662,-0.299019) 5994233 +dictGet dict_array (31.4671,1.9605299999999999) 101 +dictGet dict_array (31.4673,-0.403676) 101 +dictGet dict_array (31.4712,-0.237941) 5994233 +dictGet dict_array (31.4816,0.120264) 5994233 +dictGet dict_array (31.4875,0.323483) 101 +dictGet dict_array (31.490099999999998,-0.338163) 101 +dictGet dict_array (31.4932,0.517674) 101 +dictGet dict_array (31.5112,1.9689299999999998) 101 +dictGet dict_array (31.5122,2.92785) 101 +dictGet dict_array (31.5151,0.166429) 101 +dictGet dict_array (31.5174,2.94802) 101 +dictGet dict_array (31.5182,4.18776) 101 +dictGet dict_array (31.5238,1.18793) 101 +dictGet dict_array (31.5271,3.07446) 101 +dictGet dict_array (31.5393,1.58061) 101 +dictGet dict_array (31.5421,3.13711) 101 +dictGet dict_array (31.5479,2.39897) 101 +dictGet dict_array (31.5519,0.99285) 101 +dictGet dict_array (31.5685,3.47987) 101 +dictGet dict_array (31.5959,0.437382) 101 +dictGet dict_array (31.6003,0.194376) 101 +dictGet dict_array (31.6026,2.15457) 101 +dictGet dict_array (31.606,2.45365) 101 +dictGet dict_array (31.6062,-0.453441) 101 +dictGet dict_array (31.6107,1.35247) 101 +dictGet dict_array (31.6155,3.85588) 101 +dictGet dict_array (31.6222,2.03326) 101 +dictGet dict_array (31.6231,-0.123059) 101 +dictGet dict_array (31.6244,1.6885599999999998) 101 +dictGet dict_array (31.6459,0.669716) 101 +dictGet dict_array (31.6563,-0.0644741) 101 +dictGet dict_array (31.6618,-0.551121) 101 +dictGet dict_array (31.6725,-0.38922) 101 +dictGet dict_array (31.6727,4.10336) 101 +dictGet dict_array (31.6739,4.1391) 101 +dictGet dict_array (31.6897,2.8694699999999997) 101 +dictGet dict_array (31.6902,3.98792) 101 +dictGet dict_array (31.6945,2.46687) 101 +dictGet dict_array (31.6987,-1.3796) 101 +dictGet dict_array (31.7012,2.34845) 101 +dictGet dict_array (31.7036,0.0228348) 101 +dictGet dict_array (31.7046,3.68111) 101 +dictGet dict_array (31.7055,2.92556) 101 +dictGet dict_array (31.7102,1.04532) 101 +dictGet dict_array (31.7149,-0.443302) 101 +dictGet dict_array (31.7195,2.99311) 101 +dictGet dict_array (31.7274,0.166719) 101 +dictGet dict_array (31.7565,-0.565382) 101 +dictGet dict_array (31.7615,0.771626) 101 +dictGet dict_array (31.7739,1.8970099999999999) 101 +dictGet dict_array (31.7848,1.2623199999999999) 101 +dictGet dict_array (31.7912,-0.788599) 101 +dictGet dict_array (31.8011,2.65853) 101 +dictGet dict_array (31.8032,-0.0590108) 101 +dictGet dict_array (31.8038,1.9618799999999998) 101 +dictGet dict_array (31.8098,-1.46851) 101 +dictGet dict_array (31.8131,3.41982) 101 +dictGet dict_array (31.8169,3.31059) 101 +dictGet dict_array (31.8202,-0.193692) 101 +dictGet dict_array (31.8306,1.57586) 101 +dictGet dict_array (31.8382,-0.787948) 101 +dictGet dict_array (31.8433,2.49692) 101 +dictGet dict_array (31.8436,2.41851) 101 +dictGet dict_array (31.8563,-1.10787) 101 +dictGet dict_array (31.8683,0.996504) 101 +dictGet dict_array (31.8693,-0.828142) 101 +dictGet dict_array (31.8723,1.08929) 101 +dictGet dict_array (31.8737,0.881127) 101 +dictGet dict_array (31.8881,-0.58441) 101 +dictGet dict_array (31.9011,0.121349) 101 +dictGet dict_array (31.9066,2.13045) 101 +dictGet dict_array (31.9142,1.03368) 101 +dictGet dict_array (31.9155,3.38363) 101 +dictGet dict_array (31.9168,1.3166) 101 +dictGet dict_array (31.9185,-1.11879) 101 +dictGet dict_array (31.9186,-0.647948) 101 +dictGet dict_array (31.9311,3.96928) 101 +dictGet dict_array (31.9335,1.47048) 101 +dictGet dict_array (31.9443,-1.36175) 101 +dictGet dict_array (31.9481,2.34231) 101 +dictGet dict_array (31.9526,1.36565) 101 +dictGet dict_array (31.9629,2.5208399999999997) 101 +dictGet dict_array (31.9765,0.975783) 101 +dictGet dict_array (31.9923,3.31773) 101 +dictGet dict_array (31.9994,0.972816) 101 +dictGet dict_array (32.001,3.47425) 101 +dictGet dict_array (32.0127,2.13874) 101 +dictGet dict_array (32.0244,3.2092) 101 +dictGet dict_array (32.029,1.18039) 101 +dictGet dict_array (32.0315,0.566073) 101 +dictGet dict_array (32.0354,1.0766499999999999) 101 +dictGet dict_array (32.0399,-1.11576) 101 +dictGet dict_array (32.053,2.16849) 101 +dictGet dict_array (32.0542,0.042328) 101 +dictGet dict_array (32.0576,2.47001) 101 +dictGet dict_array (32.061,3.7498899999999997) 101 +dictGet dict_array (32.0623,1.25134) 101 +dictGet dict_array (32.0626,1.9611399999999999) 101 +dictGet dict_array (32.0666,-0.0904247) 101 +dictGet dict_array (32.0681,2.28442) 101 +dictGet dict_array (32.0692,1.50869) 101 +dictGet dict_array (32.0724,4.03314) 101 +dictGet dict_array (32.0729,-0.064324) 101 +dictGet dict_array (32.079,0.293758) 101 +dictGet dict_array (32.0847,-1.19814) 101 +dictGet dict_array (32.0974,-0.91927) 101 +dictGet dict_array (32.0979,-0.736979) 101 +dictGet dict_array (32.106,-1.33063) 101 +dictGet dict_array (32.1189,0.246715) 101 +dictGet dict_array (32.1207,4.00883) 101 +dictGet dict_array (32.1396,1.12402) 101 +dictGet dict_array (32.1413,1.5668) 101 +dictGet dict_array (32.143,1.35559) 101 +dictGet dict_array (32.1538,1.32881) 101 +dictGet dict_array (32.1549,4.06552) 101 +dictGet dict_array (32.1555,-0.79275) 101 +dictGet dict_array (32.163,1.17733) 101 +dictGet dict_array (32.1634,2.94273) 101 +dictGet dict_array (32.1644,1.85666) 101 +dictGet dict_array (32.1745,0.435458) 101 +dictGet dict_array (32.1765,1.65149) 101 +dictGet dict_array (32.1893,2.08924) 101 +dictGet dict_array (32.2024,0.222191) 101 +dictGet dict_array (32.2107,1.34379) 101 +dictGet dict_array (32.2109,3.9018699999999997) 101 +dictGet dict_array (32.2123,1.85233) 101 +dictGet dict_array (32.2144,3.72534) 101 +dictGet dict_array (32.2218,2.5386699999999998) 101 +dictGet dict_array (32.2279,2.84267) 101 +dictGet dict_array (32.2345,3.33295) 101 +dictGet dict_array (32.2435,3.85283) 101 +dictGet dict_array (32.2527,-0.480608) 101 +dictGet dict_array (32.2566,-0.837882) 101 +dictGet dict_array (32.2627,2.57708) 101 +dictGet dict_array (32.2733,0.244931) 101 +dictGet dict_array (32.2761,4.05808) 101 +dictGet dict_array (32.2764,3.78472) 101 +dictGet dict_array (32.2814,-1.26011) 101 +dictGet dict_array (32.2861,3.02427) 101 +dictGet dict_array (32.2924,0.928609) 101 +dictGet dict_array (32.2963,-0.78543) 101 +dictGet dict_array (32.3039,3.21175) 101 +dictGet dict_array (32.3107,0.698287) 101 +dictGet dict_array (32.3138,0.0595677) 101 +dictGet dict_array (32.3339,0.707056) 101 +dictGet dict_array (32.3351,0.415474) 101 +dictGet dict_array (32.342,-0.681023) 101 +dictGet dict_array (32.3463,1.83196) 101 +dictGet dict_array (32.3494,2.43799) 101 +dictGet dict_array (32.3524,3.47049) 101 +dictGet dict_array (32.3531,2.33115) 101 +dictGet dict_array (32.3602,0.116106) 101 +dictGet dict_array (32.3612,1.1598) 101 +dictGet dict_array (32.3689,3.34847) 101 +dictGet dict_array (32.3695,0.734055) 101 +dictGet dict_array (32.3825,3.85017) 101 +dictGet dict_array (32.3835,-1.25491) 101 +dictGet dict_array (32.4018,-0.728568) 101 +dictGet dict_array (32.4044,2.96727) 101 +dictGet dict_array (32.4101,2.9988) 101 +dictGet dict_array (32.417,-1.12908) 101 +dictGet dict_array (32.4172,4.1952) 101 +dictGet dict_array (32.4239,2.49512) 101 +dictGet dict_array (32.4258,4.05137) 101 +dictGet dict_array (32.4264,-0.427357) 101 +dictGet dict_array (32.4274,3.59377) 101 +dictGet dict_array (32.4286,-1.24757) 101 +dictGet dict_array (32.4294,3.0665) 101 +dictGet dict_array (32.4333,-0.353347) 101 +dictGet dict_array (32.4391,3.64421) 101 +dictGet dict_array (32.4401,3.70635) 101 +dictGet dict_array (32.45,1.68918) 101 +dictGet dict_array (32.4507,-0.133471) 101 +dictGet dict_array (32.4592,0.976458) 101 +dictGet dict_array (32.4595,1.89135) 101 +dictGet dict_array (32.4604,0.280248) 101 +dictGet dict_array (32.4835,0.472731) 101 +dictGet dict_array (32.4855,2.01938) 101 +dictGet dict_array (32.4872,2.01697) 101 +dictGet dict_array (32.4911,0.613106) 101 +dictGet dict_array (32.4918,2.17834) 101 +dictGet dict_array (32.4947,2.34595) 101 +dictGet dict_array (32.5035,2.92234) 101 +dictGet dict_array (32.5132,-0.331206) 101 +dictGet dict_array (32.5156,-0.412604) 7652581 +dictGet dict_array (32.5158,2.9067499999999997) 101 +dictGet dict_array (32.5249,2.44519) 101 +dictGet dict_array (32.5293,-0.790952) 101 +dictGet dict_array (32.5319,3.96854) 101 +dictGet dict_array (32.5518,3.6093) 101 +dictGet dict_array (32.5541,3.5225400000000002) 101 +dictGet dict_array (32.5569,0.816123) 101 +dictGet dict_array (32.5646,1.9775) 101 +dictGet dict_array (32.5733,3.81271) 101 +dictGet dict_array (32.5767,0.948327) 101 +dictGet dict_array (32.5971,1.76179) 101 +dictGet dict_array (32.6035,-0.716157) 101 +dictGet dict_array (32.6087,4.21614) 101 +dictGet dict_array (32.6171,0.024481) 101 +dictGet dict_array (32.6189,-0.775391) 101 +dictGet dict_array (32.6198,2.92081) 101 +dictGet dict_array (32.621,-0.970784) 101 +dictGet dict_array (32.6266,0.650009) 101 +dictGet dict_array (32.6315,2.15144) 101 +dictGet dict_array (32.6385,-0.436803) 101 +dictGet dict_array (32.6449,-0.191292) 101 +dictGet dict_array (32.6535,2.10385) 101 +dictGet dict_array (32.6592,3.49973) 101 +dictGet dict_array (32.6598,2.5980600000000003) 101 +dictGet dict_array (32.6612,2.95681) 101 +dictGet dict_array (32.6636,-0.57235) 101 +dictGet dict_array (32.669,-0.382702) 101 +dictGet dict_array (32.6752,1.30748) 101 +dictGet dict_array (32.6811,2.9559800000000003) 101 +dictGet dict_array (32.6821,0.57336) 101 +dictGet dict_array (32.6828,3.91304) 101 +dictGet dict_array (32.6979,3.96868) 101 +dictGet dict_array (32.6983,3.15784) 101 +dictGet dict_array (32.7122,0.794293) 101 +dictGet dict_array (32.7131,-0.847256) 101 +dictGet dict_array (32.7219,0.883461) 101 +dictGet dict_array (32.7228,1.78808) 101 +dictGet dict_array (32.7273,-0.206908) 101 +dictGet dict_array (32.7292,0.259331) 101 +dictGet dict_array (32.7304,-1.38317) 101 +dictGet dict_array (32.7353,1.01601) 101 +dictGet dict_array (32.7354,4.17574) 101 +dictGet dict_array (32.7357,-0.190194) 101 +dictGet dict_array (32.7465,-1.37598) 101 +dictGet dict_array (32.7494,-0.275675) 101 +dictGet dict_array (32.7514,0.128951) 101 +dictGet dict_array (32.753,3.44207) 101 +dictGet dict_array (32.7686,2.11713) 101 +dictGet dict_array (32.7694,1.47159) 101 +dictGet dict_array (32.7768,0.0401042) 101 +dictGet dict_array (32.781,-1.34283) 101 +dictGet dict_array (32.7814,1.73876) 101 +dictGet dict_array (32.7856,-1.06363) 101 +dictGet dict_array (32.792699999999996,-1.1255600000000001) 101 +dictGet dict_array (32.7941,-0.645447) 101 +dictGet dict_array (32.7946,1.48889) 101 +dictGet dict_array (32.797,0.791753) 101 +dictGet dict_array (32.7982,-0.537798) 101 +dictGet dict_array (32.8091,2.3611) 101 +dictGet dict_array (32.81,1.7130800000000002) 101 +dictGet dict_array (32.8174,-0.288322) 101 +dictGet dict_array (32.823,1.6546699999999999) 101 +dictGet dict_array (32.8233,1.62108) 101 +dictGet dict_array (32.8428,-0.400045) 101 +dictGet dict_array (32.8479,2.13598) 101 +dictGet dict_array (32.8524,0.199902) 101 +dictGet dict_array (32.8543,3.23553) 101 +dictGet dict_array (32.8562,1.31371) 101 +dictGet dict_array (32.87,1.44256) 101 +dictGet dict_array (32.8789,2.38192) 101 +dictGet dict_array (32.8812,2.20734) 5999168 +dictGet dict_array (32.8815,-0.54427) 101 +dictGet dict_array (32.8853,2.4859) 5999168 +dictGet dict_array (32.8909,0.513964) 101 +dictGet dict_array (32.9035,2.38999) 101 +dictGet dict_array (32.9097,2.48131) 5999168 +dictGet dict_array (32.928,-0.943269) 101 +dictGet dict_array (32.9322,1.13165) 101 +dictGet dict_array (32.9348,1.22606) 101 +dictGet dict_array (32.9417,3.77998) 101 +dictGet dict_array (32.9428,3.11936) 101 +dictGet dict_array (32.9482,1.18092) 101 +dictGet dict_array (32.9506,0.0609364) 101 +dictGet dict_array (32.953,-0.828308) 101 +dictGet dict_array (32.9593,3.5209099999999998) 101 +dictGet dict_array (32.9617,2.07711) 5999168 +dictGet dict_array (32.966,0.693749) 101 +dictGet dict_array (32.9668,-0.716432) 101 +dictGet dict_array (32.9702,1.98555) 101 +dictGet dict_array (32.9782,1.73819) 101 +dictGet dict_array (32.9805,3.71151) 101 +dictGet dict_array (32.9821,2.97225) 101 +dictGet dict_array (32.995,-0.830301) 101 +dictGet dict_array (33.0234,0.770848) 101 +dictGet dict_array (33.0312,-0.340964) 101 +dictGet dict_array (33.0366,-0.756795) 101 +dictGet dict_array (33.0438,0.812871) 101 +dictGet dict_array (33.0455,1.84843) 101 +dictGet dict_array (33.0498,0.0913292) 101 +dictGet dict_array (33.0506,1.53739) 101 +dictGet dict_array (33.0554,2.4265) 101 +dictGet dict_array (33.0741,3.61332) 101 +dictGet dict_array (33.0765,-0.179985) 101 +dictGet dict_array (33.087,1.46465) 101 +dictGet dict_array (33.0906,-0.620383) 101 +dictGet dict_array (33.1047,-1.28027) 101 +dictGet dict_array (33.1072,1.96303) 101 +dictGet dict_array (33.1081,-0.897874) 101 +dictGet dict_array (33.1122,1.8950200000000001) 101 +dictGet dict_array (33.1237,2.63993) 101 +dictGet dict_array (33.1238,0.753963) 101 +dictGet dict_array (33.1257,0.495668) 101 +dictGet dict_array (33.1258,1.78341) 101 +dictGet dict_array (33.127,2.59646) 101 +dictGet dict_array (33.1324,-1.23742) 101 +dictGet dict_array (33.1359,3.83491) 101 +dictGet dict_array (33.1628,-0.379588) 101 +dictGet dict_array (33.1679,1.25601) 101 +dictGet dict_array (33.1688,-1.35553) 101 +dictGet dict_array (33.181,2.10943) 101 +dictGet dict_array (33.1871,2.81171) 101 +dictGet dict_array (33.1877,0.771297) 101 +dictGet dict_array (33.1883,-0.204797) 101 +dictGet dict_array (33.1886,3.27998) 101 +dictGet dict_array (33.1955,0.708907) 101 +dictGet dict_array (33.2044,-0.769275) 101 +dictGet dict_array (33.2182,3.36103) 101 +dictGet dict_array (33.2192,3.43586) 101 +dictGet dict_array (33.2322,-0.916753) 101 +dictGet dict_array (33.2359,-0.81321) 101 +dictGet dict_array (33.238,0.635072) 101 +dictGet dict_array (33.2398,3.02588) 101 +dictGet dict_array (33.2469,2.35698) 101 +dictGet dict_array (33.247,2.3327) 101 +dictGet dict_array (33.2579,2.8027100000000003) 101 +dictGet dict_array (33.2607,0.321082) 101 +dictGet dict_array (33.2653,0.243336) 101 +dictGet dict_array (33.2758,0.831836) 101 +dictGet dict_array (33.2771,0.886536) 101 +dictGet dict_array (33.2914,1.16026) 101 +dictGet dict_array (33.2914,1.38882) 101 +dictGet dict_array (33.2982,-1.16604) 101 +dictGet dict_array (33.2985,0.842556) 101 +dictGet dict_array (33.3005,2.8338900000000002) 101 +dictGet dict_array (33.305,0.0969475) 101 +dictGet dict_array (33.3072,3.82163) 101 +dictGet dict_array (33.312,3.41475) 101 +dictGet dict_array (33.3129,2.46048) 101 +dictGet dict_array (33.3134,3.46863) 101 +dictGet dict_array (33.3203,2.33139) 101 +dictGet dict_array (33.324,0.433701) 101 +dictGet dict_array (33.3338,2.44705) 101 +dictGet dict_array (33.337,4.06475) 101 +dictGet dict_array (33.3469,1.08172) 101 +dictGet dict_array (33.3538,0.717896) 101 +dictGet dict_array (33.3618,1.37899) 101 +dictGet dict_array (33.3698,0.547744) 101 +dictGet dict_array (33.3705,0.957619) 101 +dictGet dict_array (33.3821,3.07258) 101 +dictGet dict_array (33.3881,3.0626) 101 +dictGet dict_array (33.393,-0.816186) 101 +dictGet dict_array (33.3945,0.869508) 101 +dictGet dict_array (33.4001,1.24186) 101 +dictGet dict_array (33.4008,2.34911) 101 +dictGet dict_array (33.4166,-1.2808899999999999) 101 +dictGet dict_array (33.4167,3.0655) 101 +dictGet dict_array (33.4204,2.81887) 101 +dictGet dict_array (33.4211,1.71128) 101 +dictGet dict_array (33.4237,2.91761) 101 +dictGet dict_array (33.4266,1.5955599999999999) 101 +dictGet dict_array (33.4353,-0.391392) 101 +dictGet dict_array (33.4362,-0.134658) 101 +dictGet dict_array (33.4386,0.15396) 101 +dictGet dict_array (33.4421,-0.50712) 101 +dictGet dict_array (33.452,0.915829) 101 +dictGet dict_array (33.463,-0.0882717) 101 +dictGet dict_array (33.464,-1.00949) 101 +dictGet dict_array (33.4692,0.954092) 101 +dictGet dict_array (33.4716,1.9538799999999998) 101 +dictGet dict_array (33.4756,1.85836) 101 +dictGet dict_array (33.4859,4.0751) 101 +dictGet dict_array (33.4899,3.54193) 101 +dictGet dict_array (33.4935,3.49794) 101 +dictGet dict_array (33.494,-0.983356) 101 +dictGet dict_array (33.4955,-1.28128) 101 +dictGet dict_array (33.4965,-0.278687) 101 +dictGet dict_array (33.4991,0.647491) 101 +dictGet dict_array (33.5076,2.2272) 101 +dictGet dict_array (33.5079,-0.498199) 101 +dictGet dict_array (33.5157,0.535034) 101 +dictGet dict_array (33.5171,2.49677) 101 +dictGet dict_array (33.5255,2.4447200000000002) 101 +dictGet dict_array (33.526,4.01194) 101 +dictGet dict_array (33.5288,0.789434) 101 +dictGet dict_array (33.5356,-1.17671) 101 +dictGet dict_array (33.5402,1.49152) 101 +dictGet dict_array (33.5418,3.45757) 101 +dictGet dict_array (33.5428,1.90712) 101 +dictGet dict_array (33.5556,-0.55741) 101 +dictGet dict_array (33.5564,0.876858) 101 +dictGet dict_array (33.5567,-0.10208) 101 +dictGet dict_array (33.5645,-0.124824) 101 +dictGet dict_array (33.5663,3.4872) 101 +dictGet dict_array (33.5716,-0.0107611) 101 +dictGet dict_array (33.578,3.55714) 101 +dictGet dict_array (33.5826,-0.49076) 101 +dictGet dict_array (33.5909,0.773737) 101 +dictGet dict_array (33.5958,2.9619999999999997) 5994231 +dictGet dict_array (33.6193,-0.919755) 101 +dictGet dict_array (33.6313,0.652132) 101 +dictGet dict_array (33.632,0.823351) 101 +dictGet dict_array (33.66,2.18998) 101 +dictGet dict_array (33.6621,0.535395) 101 +dictGet dict_array (33.6726,3.19367) 101 +dictGet dict_array (33.6912,1.74522) 101 +dictGet dict_array (33.705,0.706397) 101 +dictGet dict_array (33.7076,0.7622) 101 +dictGet dict_array (33.7112,1.70187) 101 +dictGet dict_array (33.7246,-1.14837) 101 +dictGet dict_array (33.7326,2.62413) 5994231 +dictGet dict_array (33.7332,2.82137) 5994231 +dictGet dict_array (33.7434,0.394672) 101 +dictGet dict_array (33.7443,1.54557) 101 +dictGet dict_array (33.7506,1.57317) 101 +dictGet dict_array (33.7526,1.8578999999999999) 101 +dictGet dict_array (33.766,4.15013) 101 +dictGet dict_array (33.7834,2.41789) 101 +dictGet dict_array (33.7864,0.230935) 101 +dictGet dict_array (33.7965,3.05709) 101 +dictGet dict_array (33.7998,3.32881) 101 +dictGet dict_array (33.8003,2.97338) 5994231 +dictGet dict_array (33.8007,-1.08962) 101 +dictGet dict_array (33.8022,-0.139488) 101 +dictGet dict_array (33.8065,2.70857) 5994231 +dictGet dict_array (33.8169,-0.607788) 101 +dictGet dict_array (33.8203,0.108512) 101 +dictGet dict_array (33.8231,-1.03449) 101 +dictGet dict_array (33.8312,3.49458) 101 +dictGet dict_array (33.8342,0.297518) 101 +dictGet dict_array (33.8352,0.165872) 101 +dictGet dict_array (33.8354,1.87277) 101 +dictGet dict_array (33.8371,1.60103) 101 +dictGet dict_array (33.8387,1.9968) 101 +dictGet dict_array (33.8403,3.5805) 101 +dictGet dict_array (33.8414,-0.703067) 101 +dictGet dict_array (33.844,-0.179472) 101 +dictGet dict_array (33.8468,3.40137) 101 +dictGet dict_array (33.8509,4.15334) 101 +dictGet dict_array (33.8539,2.38339) 101 +dictGet dict_array (33.858,-1.3122500000000001) 101 +dictGet dict_array (33.859,3.72626) 101 +dictGet dict_array (33.8616,2.24433) 101 +dictGet dict_array (33.8621,3.01035) 101 +dictGet dict_array (33.8623,1.17559) 101 +dictGet dict_array (33.8682,2.706) 5994231 +dictGet dict_array (33.8684,0.189231) 101 +dictGet dict_array (33.872,1.93574) 101 +dictGet dict_array (33.8844,3.80404) 101 +dictGet dict_array (33.8888,0.594884) 101 +dictGet dict_array (33.8946,2.74161) 101 +dictGet dict_array (33.9023,0.6239) 101 +dictGet dict_array (33.9057,0.873222) 101 +dictGet dict_array (33.9157,-1.26607) 101 +dictGet dict_array (33.92,2.06848) 101 +dictGet dict_array (33.9298,-0.00526229) 101 +dictGet dict_array (33.932,3.07063) 101 +dictGet dict_array (33.9322,0.629385) 101 +dictGet dict_array (33.9367,-1.41955) 101 +dictGet dict_array (33.937,1.42532) 101 +dictGet dict_array (33.9375,1.1467100000000001) 101 +dictGet dict_array (33.9434,-1.05739) 101 +dictGet dict_array (33.9477,3.34809) 101 +dictGet dict_array (33.95,2.21715) 101 +dictGet dict_array (33.955799999999996,0.305176) 101 +dictGet dict_array (33.9686,-0.28273) 101 +dictGet dict_array (33.9703,4.1255) 101 +dictGet dict_array (33.9707,3.08199) 101 +dictGet dict_array (33.9754,1.06203) 101 +dictGet dict_array (33.9757,3.72468) 101 +dictGet dict_array (33.9775,-0.0440599) 101 +dictGet dict_array (33.9777,-0.251484) 101 +dictGet dict_array (33.9789,-0.339374) 101 +dictGet dict_array (33.9849,2.54515) 5994231 +dictGet dict_array (33.9885,-0.318557) 101 +dictGet dict_array (33.9977,1.07175) 101 +dictGet dict_array (33.9984,-0.700517) 101 +dictGet dict_array (34.0149,3.53338) 101 +dictGet dict_array (34.0173,3.39155) 101 +dictGet dict_array (34.0317,3.9579) 101 +dictGet dict_array (34.0369,3.83612) 101 +dictGet dict_array (34.043,-0.0887221) 101 +dictGet dict_array (34.0487,1.14252) 101 +dictGet dict_array (34.052,1.74832) 101 +dictGet dict_array (34.0711,-0.898071) 101 +dictGet dict_array (34.0747,1.55057) 101 +dictGet dict_array (34.0803,3.16763) 101 +dictGet dict_array (34.0872,3.75555) 101 +dictGet dict_array (34.0965,1.62038) 101 +dictGet dict_array (34.0977,-0.412691) 101 +dictGet dict_array (34.0986,0.0294206) 101 +dictGet dict_array (34.1072,3.15823) 101 +dictGet dict_array (34.1092,3.09599) 101 +dictGet dict_array (34.1206,1.04637) 5940222 +dictGet dict_array (34.1209,3.13826) 101 +dictGet dict_array (34.1265,3.95881) 101 +dictGet dict_array (34.1286,-0.539319) 101 +dictGet dict_array (34.1358,3.67451) 101 +dictGet dict_array (34.1428,0.136115) 101 +dictGet dict_array (34.157,1.73522) 101 +dictGet dict_array (34.1581,1.48001) 101 +dictGet dict_array (34.1682,3.42373) 101 +dictGet dict_array (34.1683,-1.26511) 101 +dictGet dict_array (34.1684,4.20007) 101 +dictGet dict_array (34.1854,3.32089) 101 +dictGet dict_array (34.2022,0.749536) 101 +dictGet dict_array (34.2044,3.04865) 101 +dictGet dict_array (34.22,-0.500055) 101 +dictGet dict_array (34.2249,0.743775) 101 +dictGet dict_array (34.2254,1.34702) 101 +dictGet dict_array (34.2355,-0.898843) 101 +dictGet dict_array (34.2394,2.0203699999999998) 101 +dictGet dict_array (34.2466,1.83785) 101 +dictGet dict_array (34.247,4.09563) 101 +dictGet dict_array (34.2508,2.61312) 101 +dictGet dict_array (34.2517,1.69642) 101 +dictGet dict_array (34.2564,4.13033) 101 +dictGet dict_array (34.2574,4.18928) 101 +dictGet dict_array (34.2614,-0.478719) 101 +dictGet dict_array (34.2625,2.38088) 101 +dictGet dict_array (34.2666,3.1503) 101 +dictGet dict_array (34.271,4.02223) 101 +dictGet dict_array (34.2727,0.514755) 101 +dictGet dict_array (34.278,1.98929) 101 +dictGet dict_array (34.2798,-0.199208) 101 +dictGet dict_array (34.2804,2.05184) 101 +dictGet dict_array (34.2945,-1.11051) 101 +dictGet dict_array (34.3168,-0.0829721) 101 +dictGet dict_array (34.3345,3.4358) 101 +dictGet dict_array (34.3377,1.13527) 5940222 +dictGet dict_array (34.3383,1.27891) 5940222 +dictGet dict_array (34.3391,1.47945) 5940222 +dictGet dict_array (34.3441,0.627014) 101 +dictGet dict_array (34.347,2.4853) 101 +dictGet dict_array (34.3514,2.16247) 101 +dictGet dict_array (34.3627,2.64533) 101 +dictGet dict_array (34.3682,-0.227501) 101 +dictGet dict_array (34.3756,4.21248) 101 +dictGet dict_array (34.379,3.96604) 101 +dictGet dict_array (34.3827,1.7518) 101 +dictGet dict_array (34.3912,2.8834) 101 +dictGet dict_array (34.3919,0.668829) 101 +dictGet dict_array (34.3949,2.00338) 101 +dictGet dict_array (34.3987,0.557268) 101 +dictGet dict_array (34.4111,0.768558) 101 +dictGet dict_array (34.4119,2.8742) 101 +dictGet dict_array (34.416,3.50841) 101 +dictGet dict_array (34.4212,1.24916) 5940222 +dictGet dict_array (34.4251,0.457029) 101 +dictGet dict_array (34.4274,-0.902559) 101 +dictGet dict_array (34.4325,4.03159) 101 +dictGet dict_array (34.438,1.63994) 101 +dictGet dict_array (34.4403,-0.177594) 101 +dictGet dict_array (34.4421,0.726712) 101 +dictGet dict_array (34.4517,2.98611) 101 +dictGet dict_array (34.4658,-1.312) 101 +dictGet dict_array (34.4732,-0.0681338) 101 +dictGet dict_array (34.4752,2.81646) 101 +dictGet dict_array (34.4914,2.3858) 101 +dictGet dict_array (34.4923,0.855231) 101 +dictGet dict_array (34.5235,1.78468) 101 +dictGet dict_array (34.5305,4.10608) 101 +dictGet dict_array (34.5389,0.621937) 101 +dictGet dict_array (34.5406,3.17145) 101 +dictGet dict_array (34.5434,-0.56306) 101 +dictGet dict_array (34.5449,3.13311) 101 +dictGet dict_array (34.5491,2.31572) 101 +dictGet dict_array (34.5539,2.94028) 101 +dictGet dict_array (34.5546,-0.208825) 101 +dictGet dict_array (34.5549,3.78486) 101 +dictGet dict_array (34.5676,0.307148) 101 +dictGet dict_array (34.5743,1.5217399999999999) 101 +dictGet dict_array (34.5775,3.48046) 101 +dictGet dict_array (34.5815,2.5243700000000002) 101 +dictGet dict_array (34.5841,4.21191) 101 +dictGet dict_array (34.5887,2.65083) 101 +dictGet dict_array (34.5937,3.2143) 101 +dictGet dict_array (34.6013,-1.0612) 101 +dictGet dict_array (34.6089,1.36066) 101 +dictGet dict_array (34.6103,3.40227) 101 +dictGet dict_array (34.6128,1.92276) 101 +dictGet dict_array (34.6175,2.43627) 101 +dictGet dict_array (34.6209,3.43776) 101 +dictGet dict_array (34.6234,2.60237) 101 +dictGet dict_array (34.6275,3.52479) 101 +dictGet dict_array (34.635,0.568558) 101 +dictGet dict_array (34.6373,2.37692) 101 +dictGet dict_array (34.6375,3.52234) 101 +dictGet dict_array (34.6426,2.12397) 101 +dictGet dict_array (34.6513,2.80915) 101 +dictGet dict_array (34.6632,2.30039) 101 +dictGet dict_array (34.6691,1.86582) 101 +dictGet dict_array (34.6739,0.15342) 101 +dictGet dict_array (34.6825,0.0499679) 101 +dictGet dict_array (34.6893,0.454326) 101 +dictGet dict_array (34.6957,-0.358598) 101 +dictGet dict_array (34.6986,0.562679) 101 +dictGet dict_array (34.712,1.12114) 101 +dictGet dict_array (34.7126,-0.0057301) 101 +dictGet dict_array (34.7137,0.0248501) 101 +dictGet dict_array (34.7162,1.15623) 101 +dictGet dict_array (34.7258,3.95142) 101 +dictGet dict_array (34.7347,3.5232099999999997) 101 +dictGet dict_array (34.7363,2.23374) 101 +dictGet dict_array (34.7375,0.397841) 101 +dictGet dict_array (34.7423,3.09198) 101 +dictGet dict_array (34.7452,3.09029) 101 +dictGet dict_array (34.7539,-1.06943) 101 +dictGet dict_array (34.7733,-0.00912717) 101 +dictGet dict_array (34.774,2.71088) 101 +dictGet dict_array (34.7771,1.46009) 101 +dictGet dict_array (34.7782,-1.28308) 101 +dictGet dict_array (34.7924,3.63564) 101 +dictGet dict_array (34.7939,-0.416676) 101 +dictGet dict_array (34.7964,-0.401773) 101 +dictGet dict_array (34.7974,0.0286873) 101 +dictGet dict_array (34.7975,3.05965) 101 +dictGet dict_array (34.8037,3.07263) 101 +dictGet dict_array (34.8254,-0.390284) 101 +dictGet dict_array (34.828,1.91869) 101 +dictGet dict_array (34.8289,3.71058) 101 +dictGet dict_array (34.8403,2.14606) 101 +dictGet dict_array (34.8437,2.20617) 101 +dictGet dict_array (34.8469,2.38435) 101 +dictGet dict_array (34.86,1.45705) 101 +dictGet dict_array (34.8612,0.914248) 101 +dictGet dict_array (34.8663,3.4215400000000002) 101 +dictGet dict_array (34.8724,-0.375144) 101 +dictGet dict_array (34.8795,3.29317) 101 +dictGet dict_array (34.8823,1.21988) 101 +dictGet dict_array (34.8834,1.07657) 101 +dictGet dict_array (34.8837,0.157648) 101 +dictGet dict_array (34.8871,-0.9755) 101 +dictGet dict_array (34.8871,1.8943699999999999) 101 +dictGet dict_array (34.889,3.36756) 101 +dictGet dict_array (34.8907,1.24874) 101 +dictGet dict_array (34.8965,3.13508) 101 +dictGet dict_array (34.9042,2.62092) 101 +dictGet dict_array (34.9055,-0.0448967) 101 +dictGet dict_array (34.9122,0.110576) 101 +dictGet dict_array (34.9228,3.60183) 101 +dictGet dict_array (34.9237,1.21715) 101 +dictGet dict_array (34.9296,1.70459) 101 +dictGet dict_array (34.941,-1.14663) 101 +dictGet dict_array (34.9448,1.18923) 101 +dictGet dict_array (34.9462,3.81678) 101 +dictGet dict_array (34.9466,0.593463) 101 +dictGet dict_array (34.9485,0.150307) 101 +dictGet dict_array (34.9542,0.487238) 101 +dictGet dict_array (34.9559,2.03473) 101 +dictGet dict_array (34.9671,-0.960225) 101 +dictGet dict_array (34.9711,2.63444) 101 +dictGet dict_array (34.9892,0.354775) 101 +dictGet dict_array (34.9907,1.40724) 101 +dictGet dict_array (34.9916,-0.00173097) 101 +dictGet dict_array (34.9919,2.06167) 101 diff --git a/tests/queries/0_stateless/01037_polygon_dicts_correctness_all.sh b/tests/queries/0_stateless/01037_polygon_dicts_correctness_all.sh index c2a35a3ef63..fff786d6c06 100755 --- a/tests/queries/0_stateless/01037_polygon_dicts_correctness_all.sh +++ b/tests/queries/0_stateless/01037_polygon_dicts_correctness_all.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-debug, no-parallel +# Tags: no-debug CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh @@ -12,20 +12,18 @@ declare -a SearchTypes=("POLYGON" "POLYGON_SIMPLE" "POLYGON_INDEX_EACH" "POLYGON tar -xf "${CURDIR}"/01037_test_data_search.tar.gz -C "${CURDIR}" $CLICKHOUSE_CLIENT -n --query=" -DROP DATABASE IF EXISTS test_01037; -CREATE DATABASE test_01037; -DROP TABLE IF EXISTS test_01037.points; -CREATE TABLE test_01037.points (x Float64, y Float64) ENGINE = Memory; +DROP TABLE IF EXISTS points; +CREATE TABLE points (x Float64, y Float64) ENGINE = Memory; " -$CLICKHOUSE_CLIENT --query="INSERT INTO test_01037.points FORMAT TSV" --max_insert_block_size=100000 < "${CURDIR}/01037_point_data" +$CLICKHOUSE_CLIENT --query="INSERT INTO points FORMAT TSV" --max_insert_block_size=100000 < "${CURDIR}/01037_point_data" rm "${CURDIR}"/01037_point_data $CLICKHOUSE_CLIENT -n --query=" -DROP TABLE IF EXISTS test_01037.polygons_array; +DROP TABLE IF EXISTS polygons_array; -CREATE TABLE test_01037.polygons_array +CREATE TABLE polygons_array ( key Array(Array(Array(Array(Float64)))), name String, @@ -34,7 +32,7 @@ CREATE TABLE test_01037.polygons_array ENGINE = Memory; " -$CLICKHOUSE_CLIENT --query="INSERT INTO test_01037.polygons_array FORMAT JSONEachRow" --min_chunk_bytes_for_parallel_parsing=10485760 --max_insert_block_size=100000 < "${CURDIR}/01037_polygon_data" +$CLICKHOUSE_CLIENT --query="INSERT INTO polygons_array FORMAT JSONEachRow" --min_chunk_bytes_for_parallel_parsing=10485760 --max_insert_block_size=100000 < "${CURDIR}/01037_polygon_data" rm "${CURDIR}"/01037_polygon_data @@ -43,27 +41,22 @@ do outputFile="${TMP_DIR}/results${type}.out" $CLICKHOUSE_CLIENT -n --query=" - DROP DICTIONARY IF EXISTS test_01037.dict_array; + DROP DICTIONARY IF EXISTS dict_array; - CREATE DICTIONARY test_01037.dict_array + CREATE DICTIONARY dict_array ( key Array(Array(Array(Array(Float64)))), name String DEFAULT 'qqq', value UInt64 DEFAULT 101 ) PRIMARY KEY key - SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'polygons_array' PASSWORD '' DB 'test_01037')) + SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'polygons_array' PASSWORD '' DB currentDatabase())) LIFETIME(0) LAYOUT($type()); - select 'dictGet', 'test_01037.dict_array' as dict_name, tuple(x, y) as key, - dictGet(dict_name, 'value', key) from test_01037.points order by x, y; + select 'dictGet', 'dict_array' as dict_name, tuple(x, y) as key, + dictGet(dict_name, 'value', key) from points order by x, y; " > "$outputFile" diff -q "${CURDIR}/01037_polygon_dicts_correctness_all.ans" "$outputFile" done - -$CLICKHOUSE_CLIENT -n --query=" -DROP TABLE test_01037.points; -DROP DATABASE test_01037; -" diff --git a/tests/queries/0_stateless/01037_polygon_dicts_correctness_fast.ans b/tests/queries/0_stateless/01037_polygon_dicts_correctness_fast.ans index 45fa7637421..297c8416096 100644 --- a/tests/queries/0_stateless/01037_polygon_dicts_correctness_fast.ans +++ b/tests/queries/0_stateless/01037_polygon_dicts_correctness_fast.ans @@ -1,1000 +1,1000 @@ -dictGet test_01037.dict_array (29.5699,2.50068) 101 -dictGet test_01037.dict_array (29.5796,1.55456) 101 -dictGet test_01037.dict_array (29.5796,2.36864) 101 -dictGet test_01037.dict_array (29.5844,1.59626) 101 -dictGet test_01037.dict_array (29.5886,4.03321) 101 -dictGet test_01037.dict_array (29.5914,3.02628) 101 -dictGet test_01037.dict_array (29.5926,-0.0965169) 101 -dictGet test_01037.dict_array (29.5968,2.37773) 101 -dictGet test_01037.dict_array (29.5984,0.755853) 101 -dictGet test_01037.dict_array (29.6066,3.47173) 101 -dictGet test_01037.dict_array (29.6085,-1.26007) 6489978 -dictGet test_01037.dict_array (29.6131,0.246565) 101 -dictGet test_01037.dict_array (29.6157,-0.266687) 101 -dictGet test_01037.dict_array (29.6164,2.94674) 101 -dictGet test_01037.dict_array (29.6195,-0.591941) 101 -dictGet test_01037.dict_array (29.6231,1.54818) 101 -dictGet test_01037.dict_array (29.6379,0.764114) 101 -dictGet test_01037.dict_array (29.6462,-0.772059) 934530 -dictGet test_01037.dict_array (29.6579,-1.07336) 6489978 -dictGet test_01037.dict_array (29.6618,-0.271842) 101 -dictGet test_01037.dict_array (29.6629,-0.303602) 101 -dictGet test_01037.dict_array (29.6659,-0.782823) 934530 -dictGet test_01037.dict_array (29.6736,-0.113832) 101 -dictGet test_01037.dict_array (29.6759,3.02905) 101 -dictGet test_01037.dict_array (29.6778,3.71898) 101 -dictGet test_01037.dict_array (29.6796,1.10433) 101 -dictGet test_01037.dict_array (29.6809,2.13677) 101 -dictGet test_01037.dict_array (29.6935,4.11894) 101 -dictGet test_01037.dict_array (29.6991,-1.4458199999999999) 101 -dictGet test_01037.dict_array (29.6997,3.17297) 101 -dictGet test_01037.dict_array (29.7043,3.6145899999999997) 101 -dictGet test_01037.dict_array (29.7065,3.24885) 101 -dictGet test_01037.dict_array (29.7126,0.28108) 101 -dictGet test_01037.dict_array (29.7192,0.174273) 101 -dictGet test_01037.dict_array (29.7217,-0.523481) 3501900 -dictGet test_01037.dict_array (29.7271,1.67967) 101 -dictGet test_01037.dict_array (29.7311,4.12444) 101 -dictGet test_01037.dict_array (29.7347,1.88378) 101 -dictGet test_01037.dict_array (29.7358,0.67944) 101 -dictGet test_01037.dict_array (29.7366,-0.2973) 101 -dictGet test_01037.dict_array (29.7446,0.646536) 101 -dictGet test_01037.dict_array (29.7453,-0.567963) 3501900 -dictGet test_01037.dict_array (29.764,4.04217) 101 -dictGet test_01037.dict_array (29.7655,1.51372) 101 -dictGet test_01037.dict_array (29.7744,1.12435) 101 -dictGet test_01037.dict_array (29.7774,-0.0681196) 3501895 -dictGet test_01037.dict_array (29.7784,1.54864) 101 -dictGet test_01037.dict_array (29.7785,2.24139) 101 -dictGet test_01037.dict_array (29.7922,0.220808) 101 -dictGet test_01037.dict_array (29.7936,2.37709) 101 -dictGet test_01037.dict_array (29.8008,0.948536) 101 -dictGet test_01037.dict_array (29.8115,0.201227) 101 -dictGet test_01037.dict_array (29.814,0.149601) 3501895 -dictGet test_01037.dict_array (29.8193,-1.35858) 101 -dictGet test_01037.dict_array (29.8201,0.965518) 101 -dictGet test_01037.dict_array (29.8265,-0.727286) 3501900 -dictGet test_01037.dict_array (29.8277,-0.531746) 3501900 -dictGet test_01037.dict_array (29.8289,3.63009) 101 -dictGet test_01037.dict_array (29.8548,0.838047) 101 -dictGet test_01037.dict_array (29.8641,-0.845265) 3501900 -dictGet test_01037.dict_array (29.8649,0.0562212) 3501895 -dictGet test_01037.dict_array (29.8701,-1.02045) 934530 -dictGet test_01037.dict_array (29.8733,2.76654) 101 -dictGet test_01037.dict_array (29.876,0.555475) 101 -dictGet test_01037.dict_array (29.8794,-0.800108) 3501900 -dictGet test_01037.dict_array (29.8813,2.7426399999999997) 101 -dictGet test_01037.dict_array (29.897100000000002,2.66193) 101 -dictGet test_01037.dict_array (29.908,4.01339) 101 -dictGet test_01037.dict_array (29.9165,-1.08246) 3501894 -dictGet test_01037.dict_array (29.9201,-0.420861) 3498054 -dictGet test_01037.dict_array (29.9217,3.03778) 101 -dictGet test_01037.dict_array (29.9355,0.773833) 101 -dictGet test_01037.dict_array (29.947,3.76517) 101 -dictGet test_01037.dict_array (29.9518,-0.60557) 3498056 -dictGet test_01037.dict_array (29.9564,-0.600163) 3498056 -dictGet test_01037.dict_array (29.959600000000002,4.16591) 101 -dictGet test_01037.dict_array (29.9615,-1.33708) 3501894 -dictGet test_01037.dict_array (29.9699,-0.392375) 3498054 -dictGet test_01037.dict_array (29.9776,1.04552) 101 -dictGet test_01037.dict_array (29.9784,4.02756) 101 -dictGet test_01037.dict_array (29.9819,4.00597) 101 -dictGet test_01037.dict_array (29.9826,1.2816100000000001) 101 -dictGet test_01037.dict_array (30.0026,2.76257) 101 -dictGet test_01037.dict_array (30.0126,3.68255) 101 -dictGet test_01037.dict_array (30.0131,0.796576) 3501892 -dictGet test_01037.dict_array (30.018,1.16523) 101 -dictGet test_01037.dict_array (30.0261,-0.210653) 3501896 -dictGet test_01037.dict_array (30.0472,-1.11007) 3501894 -dictGet test_01037.dict_array (30.0542,-0.479585) 3498054 -dictGet test_01037.dict_array (30.0613,1.6278000000000001) 101 -dictGet test_01037.dict_array (30.0617,-0.0551152) 3501895 -dictGet test_01037.dict_array (30.0637,2.62066) 101 -dictGet test_01037.dict_array (30.0721,1.6424400000000001) 101 -dictGet test_01037.dict_array (30.0769,-0.402636) 3498054 -dictGet test_01037.dict_array (30.0791,-0.277435) 3501896 -dictGet test_01037.dict_array (30.0931,0.0327512) 3501895 -dictGet test_01037.dict_array (30.1059,3.52623) 101 -dictGet test_01037.dict_array (30.1103,0.865466) 3501892 -dictGet test_01037.dict_array (30.1115,2.95243) 101 -dictGet test_01037.dict_array (30.1144,1.71029) 101 -dictGet test_01037.dict_array (30.1311,-0.864751) 3501899 -dictGet test_01037.dict_array (30.1336,-0.851386) 3501899 -dictGet test_01037.dict_array (30.1393,3.89901) 101 -dictGet test_01037.dict_array (30.1456,-0.531898) 3498054 -dictGet test_01037.dict_array (30.1492,2.07833) 101 -dictGet test_01037.dict_array (30.1575,2.43856) 101 -dictGet test_01037.dict_array (30.1682,1.19771) 101 -dictGet test_01037.dict_array (30.1716,3.9853300000000003) 101 -dictGet test_01037.dict_array (30.1849,2.78374) 101 -dictGet test_01037.dict_array (30.1866,0.65658) 3498021 -dictGet test_01037.dict_array (30.1885,1.56943) 101 -dictGet test_01037.dict_array (30.1959,-1.38202) 101 -dictGet test_01037.dict_array (30.1999,1.58413) 101 -dictGet test_01037.dict_array (30.2024,0.713081) 3498021 -dictGet test_01037.dict_array (30.2054,0.620143) 3498021 -dictGet test_01037.dict_array (30.2091,1.51641) 101 -dictGet test_01037.dict_array (30.2124,-0.331782) 3498031 -dictGet test_01037.dict_array (30.226,3.03527) 101 -dictGet test_01037.dict_array (30.2261,3.18486) 101 -dictGet test_01037.dict_array (30.2288,2.48407) 101 -dictGet test_01037.dict_array (30.2345,3.7462400000000002) 101 -dictGet test_01037.dict_array (30.2375,0.62046) 3498021 -dictGet test_01037.dict_array (30.2425,-0.472914) 3498054 -dictGet test_01037.dict_array (30.247,3.95863) 101 -dictGet test_01037.dict_array (30.2494,-0.305093) 3498031 -dictGet test_01037.dict_array (30.2499,2.54337) 101 -dictGet test_01037.dict_array (30.2606,2.16644) 101 -dictGet test_01037.dict_array (30.2672,3.94847) 101 -dictGet test_01037.dict_array (30.2709,-0.136264) 6088794 -dictGet test_01037.dict_array (30.2764,1.18654) 101 -dictGet test_01037.dict_array (30.2765,1.20383) 101 -dictGet test_01037.dict_array (30.2839,1.05762) 3498024 -dictGet test_01037.dict_array (30.286,0.469327) 3498021 -dictGet test_01037.dict_array (30.2927,3.1693) 101 -dictGet test_01037.dict_array (30.2935,3.49854) 101 -dictGet test_01037.dict_array (30.307,0.312338) 3498021 -dictGet test_01037.dict_array (30.3085,1.07791) 3498024 -dictGet test_01037.dict_array (30.3139,2.77248) 101 -dictGet test_01037.dict_array (30.314,0.822823) 3498024 -dictGet test_01037.dict_array (30.3227,-0.587351) 3498055 -dictGet test_01037.dict_array (30.332,1.00174) 3498024 -dictGet test_01037.dict_array (30.3388,0.844148) 3498024 -dictGet test_01037.dict_array (30.3485,0.561902) 3498021 -dictGet test_01037.dict_array (30.3497,0.180362) 6489998 -dictGet test_01037.dict_array (30.361,4.13016) 101 -dictGet test_01037.dict_array (30.3623,-0.0484027) 6489998 -dictGet test_01037.dict_array (30.3638,3.9845800000000002) 101 -dictGet test_01037.dict_array (30.3853,3.16051) 101 -dictGet test_01037.dict_array (30.3974,2.6617800000000003) 101 -dictGet test_01037.dict_array (30.4002,-1.15886) 101 -dictGet test_01037.dict_array (30.4008,-0.387015) 3498031 -dictGet test_01037.dict_array (30.4018,1.86493) 101 -dictGet test_01037.dict_array (30.4239,1.16818) 3498024 -dictGet test_01037.dict_array (30.4363,3.63938) 101 -dictGet test_01037.dict_array (30.4377,-0.81315) 3498063 -dictGet test_01037.dict_array (30.4391,3.54703) 101 -dictGet test_01037.dict_array (30.4424,-1.39435) 101 -dictGet test_01037.dict_array (30.4441,2.8463000000000003) 101 -dictGet test_01037.dict_array (30.4517,3.28117) 101 -dictGet test_01037.dict_array (30.4658,2.6928) 101 -dictGet test_01037.dict_array (30.4734,2.66161) 101 -dictGet test_01037.dict_array (30.4799,-1.07578) 101 -dictGet test_01037.dict_array (30.4837,-1.02486) 3501899 -dictGet test_01037.dict_array (30.485,1.06326) 3498024 -dictGet test_01037.dict_array (30.495,1.12306) 101 -dictGet test_01037.dict_array (30.501,2.27264) 101 -dictGet test_01037.dict_array (30.5027,1.99382) 101 -dictGet test_01037.dict_array (30.5194,-1.03943) 3501893 -dictGet test_01037.dict_array (30.5239,1.04328) 101 -dictGet test_01037.dict_array (30.528,3.82041) 101 -dictGet test_01037.dict_array (30.5299,-0.715248) 3498063 -dictGet test_01037.dict_array (30.5331,1.19603) 101 -dictGet test_01037.dict_array (30.535800000000002,2.71485) 101 -dictGet test_01037.dict_array (30.5405,0.804694) 3498023 -dictGet test_01037.dict_array (30.542,1.23739) 101 -dictGet test_01037.dict_array (30.5432,4.04189) 101 -dictGet test_01037.dict_array (30.5457,-0.956121) 3501893 -dictGet test_01037.dict_array (30.5506,3.07443) 101 -dictGet test_01037.dict_array (30.5539,3.87084) 101 -dictGet test_01037.dict_array (30.5578,3.78837) 101 -dictGet test_01037.dict_array (30.5588,0.966135) 3498022 -dictGet test_01037.dict_array (30.5637,2.5605) 101 -dictGet test_01037.dict_array (30.5647,-1.27328) 101 -dictGet test_01037.dict_array (30.5656,-0.0581332) 6088794 -dictGet test_01037.dict_array (30.5715,0.65755) 3498023 -dictGet test_01037.dict_array (30.5727,3.01604) 101 -dictGet test_01037.dict_array (30.5729,-0.976857) 3501893 -dictGet test_01037.dict_array (30.5751,0.60204) 3498023 -dictGet test_01037.dict_array (30.5854,3.02473) 101 -dictGet test_01037.dict_array (30.5866,0.174099) 6489998 -dictGet test_01037.dict_array (30.5947,0.875193) 3498023 -dictGet test_01037.dict_array (30.5992,-0.403901) 3498063 -dictGet test_01037.dict_array (30.6002,4.18891) 101 -dictGet test_01037.dict_array (30.6025,0.217712) 6489998 -dictGet test_01037.dict_array (30.6054,0.927203) 3498022 -dictGet test_01037.dict_array (30.6075,3.79359) 101 -dictGet test_01037.dict_array (30.6159,3.82773) 101 -dictGet test_01037.dict_array (30.627,3.84039) 101 -dictGet test_01037.dict_array (30.6308,0.77517) 3498023 -dictGet test_01037.dict_array (30.6338,0.179565) 6489998 -dictGet test_01037.dict_array (30.6461,1.3293599999999999) 101 -dictGet test_01037.dict_array (30.6674,-0.424547) 3498063 -dictGet test_01037.dict_array (30.669,1.76539) 101 -dictGet test_01037.dict_array (30.6788,4.01239) 101 -dictGet test_01037.dict_array (30.6864,3.59158) 101 -dictGet test_01037.dict_array (30.7049,-0.875413) 3501893 -dictGet test_01037.dict_array (30.705,1.3307) 101 -dictGet test_01037.dict_array (30.7063,-0.473192) 3498063 -dictGet test_01037.dict_array (30.7075,-1.1958199999999999) 101 -dictGet test_01037.dict_array (30.7101,-0.367562) 3498012 -dictGet test_01037.dict_array (30.7203,2.98725) 101 -dictGet test_01037.dict_array (30.7213,2.2745699999999998) 101 -dictGet test_01037.dict_array (30.7446,-0.334144) 3498012 -dictGet test_01037.dict_array (30.7468,3.82967) 101 -dictGet test_01037.dict_array (30.747,-0.384779) 3498012 -dictGet test_01037.dict_array (30.7681,0.904198) 3498022 -dictGet test_01037.dict_array (30.7757,1.78743) 101 -dictGet test_01037.dict_array (30.8021,-0.479212) 3498012 -dictGet test_01037.dict_array (30.8079,-1.40869) 101 -dictGet test_01037.dict_array (30.8206,-0.0608489) 3498012 -dictGet test_01037.dict_array (30.8218,0.43909) 3498023 -dictGet test_01037.dict_array (30.8239,0.10014) 3498012 -dictGet test_01037.dict_array (30.8282,4.15409) 101 -dictGet test_01037.dict_array (30.8288,-0.709528) 3501893 -dictGet test_01037.dict_array (30.8326,0.156011) 3498012 -dictGet test_01037.dict_array (30.8328,-1.03704) 101 -dictGet test_01037.dict_array (30.839,2.15528) 101 -dictGet test_01037.dict_array (30.8452,0.219377) 3498013 -dictGet test_01037.dict_array (30.8463,0.0515355) 3498012 -dictGet test_01037.dict_array (30.8526,2.06614) 101 -dictGet test_01037.dict_array (30.8566,0.517876) 3498023 -dictGet test_01037.dict_array (30.8588,-1.31738) 101 -dictGet test_01037.dict_array (30.8681,0.44207) 3498013 -dictGet test_01037.dict_array (30.8914,1.0072) 3498022 -dictGet test_01037.dict_array (30.897,0.483425) 3498013 -dictGet test_01037.dict_array (30.905,2.8731999999999998) 3501793 -dictGet test_01037.dict_array (30.9051,2.21956) 101 -dictGet test_01037.dict_array (30.9115,4.00663) 101 -dictGet test_01037.dict_array (30.9167,-0.834462) 3501893 -dictGet test_01037.dict_array (30.9252,-1.3289900000000001) 101 -dictGet test_01037.dict_array (30.9314,1.85384) 101 -dictGet test_01037.dict_array (30.9392,2.53236) 3501827 -dictGet test_01037.dict_array (30.9569,2.82038) 3501793 -dictGet test_01037.dict_array (30.9598,-0.641011) 3498012 -dictGet test_01037.dict_array (30.9601,-0.254928) 3498012 -dictGet test_01037.dict_array (30.9623,-1.3886) 101 -dictGet test_01037.dict_array (30.9707,0.888854) 3498022 -dictGet test_01037.dict_array (30.9766,2.81957) 3501793 -dictGet test_01037.dict_array (30.9775,2.69273) 3501793 -dictGet test_01037.dict_array (30.9821,0.587715) 3498013 -dictGet test_01037.dict_array (30.9887,4.0233) 101 -dictGet test_01037.dict_array (30.9914,0.259542) 3498013 -dictGet test_01037.dict_array (30.9986,-1.36832) 101 -dictGet test_01037.dict_array (31.008,0.628999) 3498013 -dictGet test_01037.dict_array (31.0168,-1.17462) 101 -dictGet test_01037.dict_array (31.0237,3.52547) 3501821 -dictGet test_01037.dict_array (31.0306,3.78522) 101 -dictGet test_01037.dict_array (31.0308,-0.72453) 3501893 -dictGet test_01037.dict_array (31.0463,2.41997) 3501825 -dictGet test_01037.dict_array (31.047,0.624184) 3498013 -dictGet test_01037.dict_array (31.0569,0.0706393) 3498015 -dictGet test_01037.dict_array (31.0583,1.3244099999999999) 3501926 -dictGet test_01037.dict_array (31.063,3.23861) 3501793 -dictGet test_01037.dict_array (31.068,0.695575) 3498022 -dictGet test_01037.dict_array (31.0687,1.85675) 101 -dictGet test_01037.dict_array (31.0692,0.254793) 3498014 -dictGet test_01037.dict_array (31.0766,0.828128) 3498022 -dictGet test_01037.dict_array (31.0833,0.0612782) 3498015 -dictGet test_01037.dict_array (31.0833,2.59748) 3501793 -dictGet test_01037.dict_array (31.0861,-1.3778299999999999) 101 -dictGet test_01037.dict_array (31.0874,3.07258) 3501793 -dictGet test_01037.dict_array (31.0882,1.4882) 3501926 -dictGet test_01037.dict_array (31.0924,3.42242) 3501821 -dictGet test_01037.dict_array (31.0927,2.67448) 3501793 -dictGet test_01037.dict_array (31.0936,1.12292) 3498022 -dictGet test_01037.dict_array (31.0952,-0.336928) 3498012 -dictGet test_01037.dict_array (31.0978,3.48482) 3501826 -dictGet test_01037.dict_array (31.1107,3.7513199999999998) 3501826 -dictGet test_01037.dict_array (31.1156,1.19171) 3501926 -dictGet test_01037.dict_array (31.1176,0.223509) 3498015 -dictGet test_01037.dict_array (31.1249,0.946838) 3498022 -dictGet test_01037.dict_array (31.1267,1.48983) 3501926 -dictGet test_01037.dict_array (31.138,-0.289981) 3501898 -dictGet test_01037.dict_array (31.1382,3.02904) 3501793 -dictGet test_01037.dict_array (31.1475,2.6178) 3501793 -dictGet test_01037.dict_array (31.1491,1.37873) 3501926 -dictGet test_01037.dict_array (31.1525,3.72105) 3501826 -dictGet test_01037.dict_array (31.1526,-1.4129800000000001) 101 -dictGet test_01037.dict_array (31.1526,-0.186457) 3501898 -dictGet test_01037.dict_array (31.1539,2.78789) 3501793 -dictGet test_01037.dict_array (31.1548,-1.08552) 101 -dictGet test_01037.dict_array (31.1567,-0.0768925) 3501898 -dictGet test_01037.dict_array (31.1613,1.49617) 3501926 -dictGet test_01037.dict_array (31.1653,1.03777) 3498022 -dictGet test_01037.dict_array (31.1662,3.4214700000000002) 3501826 -dictGet test_01037.dict_array (31.1672,-0.0813169) 3501898 -dictGet test_01037.dict_array (31.177,0.440843) 3498014 -dictGet test_01037.dict_array (31.1788,-0.737151) 3501893 -dictGet test_01037.dict_array (31.1856,-0.144396) 3501898 -dictGet test_01037.dict_array (31.1959,3.66813) 3501826 -dictGet test_01037.dict_array (31.1996,-0.353983) 3501898 -dictGet test_01037.dict_array (31.2019,2.86802) 3501793 -dictGet test_01037.dict_array (31.2087,2.31245) 3501825 -dictGet test_01037.dict_array (31.2125,3.2713200000000002) 3501793 -dictGet test_01037.dict_array (31.2137,-0.108129) 3501898 -dictGet test_01037.dict_array (31.216,3.9156) 101 -dictGet test_01037.dict_array (31.2201,-0.202141) 3501898 -dictGet test_01037.dict_array (31.2285,2.09058) 101 -dictGet test_01037.dict_array (31.2502,4.01526) 101 -dictGet test_01037.dict_array (31.2585,3.11524) 3501793 -dictGet test_01037.dict_array (31.2645,-0.620418) 3501890 -dictGet test_01037.dict_array (31.2684,2.74277) 3501793 -dictGet test_01037.dict_array (31.2821,-1.12772) 101 -dictGet test_01037.dict_array (31.2821,2.46769) 3501825 -dictGet test_01037.dict_array (31.2887,3.91396) 101 -dictGet test_01037.dict_array (31.295,1.49942) 3501926 -dictGet test_01037.dict_array (31.2997,3.46122) 3501826 -dictGet test_01037.dict_array (31.3017,3.3263) 3501826 -dictGet test_01037.dict_array (31.3022,3.16754) 3501793 -dictGet test_01037.dict_array (31.3048,0.364962) 3498014 -dictGet test_01037.dict_array (31.305,3.1967) 3501793 -dictGet test_01037.dict_array (31.3061,1.84303) 101 -dictGet test_01037.dict_array (31.3082,-0.173851) 3501898 -dictGet test_01037.dict_array (31.3315,3.90932) 101 -dictGet test_01037.dict_array (31.3351,2.80164) 3501793 -dictGet test_01037.dict_array (31.3388,0.168765) 3498015 -dictGet test_01037.dict_array (31.339,0.25535) 3498094 -dictGet test_01037.dict_array (31.3423,1.7036799999999999) 3501926 -dictGet test_01037.dict_array (31.349,0.386456) 3498014 -dictGet test_01037.dict_array (31.3558,-1.04336) 101 -dictGet test_01037.dict_array (31.3564,0.478876) 3498014 -dictGet test_01037.dict_array (31.3607,-0.0860507) 3498015 -dictGet test_01037.dict_array (31.3831,3.84469) 101 -dictGet test_01037.dict_array (31.3886,-0.731137) 3501890 -dictGet test_01037.dict_array (31.4043,-0.348907) 5457271 -dictGet test_01037.dict_array (31.4081,1.47391) 3501926 -dictGet test_01037.dict_array (31.4176,-0.583645) 5457271 -dictGet test_01037.dict_array (31.4177,1.36972) 3501926 -dictGet test_01037.dict_array (31.4182,0.958303) 3498022 -dictGet test_01037.dict_array (31.4199,3.1738) 3501793 -dictGet test_01037.dict_array (31.4221,2.74876) 3501825 -dictGet test_01037.dict_array (31.4301,-0.122643) 3498015 -dictGet test_01037.dict_array (31.4344,1.00661) 3498022 -dictGet test_01037.dict_array (31.4375,4.20304) 101 -dictGet test_01037.dict_array (31.4377,0.289608) 3498094 -dictGet test_01037.dict_array (31.4379,0.54744) 3498014 -dictGet test_01037.dict_array (31.4459,3.94945) 101 -dictGet test_01037.dict_array (31.4559,-0.345063) 5457271 -dictGet test_01037.dict_array (31.464,0.726129) 3498014 -dictGet test_01037.dict_array (31.4662,-0.299019) 3498015 -dictGet test_01037.dict_array (31.4671,1.9605299999999999) 3501794 -dictGet test_01037.dict_array (31.4673,-0.403676) 5457271 -dictGet test_01037.dict_array (31.4712,-0.237941) 3498015 -dictGet test_01037.dict_array (31.4816,0.120264) 3498015 -dictGet test_01037.dict_array (31.4875,0.323483) 3498014 -dictGet test_01037.dict_array (31.490099999999998,-0.338163) 5457271 -dictGet test_01037.dict_array (31.4932,0.517674) 3498014 -dictGet test_01037.dict_array (31.5112,1.9689299999999998) 3501794 -dictGet test_01037.dict_array (31.5122,2.92785) 3501791 -dictGet test_01037.dict_array (31.5151,0.166429) 3498094 -dictGet test_01037.dict_array (31.5174,2.94802) 3501791 -dictGet test_01037.dict_array (31.5182,4.18776) 101 -dictGet test_01037.dict_array (31.5238,1.18793) 3498003 -dictGet test_01037.dict_array (31.5271,3.07446) 3501791 -dictGet test_01037.dict_array (31.5393,1.58061) 3501794 -dictGet test_01037.dict_array (31.5421,3.13711) 3501791 -dictGet test_01037.dict_array (31.5479,2.39897) 3497970 -dictGet test_01037.dict_array (31.5519,0.99285) 3498003 -dictGet test_01037.dict_array (31.5685,3.47987) 3501824 -dictGet test_01037.dict_array (31.5959,0.437382) 3498014 -dictGet test_01037.dict_array (31.6003,0.194376) 3498094 -dictGet test_01037.dict_array (31.6026,2.15457) 3501794 -dictGet test_01037.dict_array (31.606,2.45365) 3497970 -dictGet test_01037.dict_array (31.6062,-0.453441) 3501890 -dictGet test_01037.dict_array (31.6107,1.35247) 3497974 -dictGet test_01037.dict_array (31.6155,3.85588) 101 -dictGet test_01037.dict_array (31.6222,2.03326) 3501794 -dictGet test_01037.dict_array (31.6231,-0.123059) 3498083 -dictGet test_01037.dict_array (31.6244,1.6885599999999998) 3497974 -dictGet test_01037.dict_array (31.6459,0.669716) 3498014 -dictGet test_01037.dict_array (31.6563,-0.0644741) 3498083 -dictGet test_01037.dict_array (31.6618,-0.551121) 3501890 -dictGet test_01037.dict_array (31.6725,-0.38922) 3498085 -dictGet test_01037.dict_array (31.6727,4.10336) 101 -dictGet test_01037.dict_array (31.6739,4.1391) 101 -dictGet test_01037.dict_array (31.6897,2.8694699999999997) 3501792 -dictGet test_01037.dict_array (31.6902,3.98792) 101 -dictGet test_01037.dict_array (31.6945,2.46687) 3497970 -dictGet test_01037.dict_array (31.6987,-1.3796) 101 -dictGet test_01037.dict_array (31.7012,2.34845) 3497970 -dictGet test_01037.dict_array (31.7036,0.0228348) 3501888 -dictGet test_01037.dict_array (31.7046,3.68111) 3501824 -dictGet test_01037.dict_array (31.7055,2.92556) 3501792 -dictGet test_01037.dict_array (31.7102,1.04532) 3498003 -dictGet test_01037.dict_array (31.7149,-0.443302) 3498085 -dictGet test_01037.dict_array (31.7195,2.99311) 3501791 -dictGet test_01037.dict_array (31.7274,0.166719) 3498094 -dictGet test_01037.dict_array (31.7565,-0.565382) 3498085 -dictGet test_01037.dict_array (31.7615,0.771626) 3498014 -dictGet test_01037.dict_array (31.7739,1.8970099999999999) 3497974 -dictGet test_01037.dict_array (31.7848,1.2623199999999999) 3498003 -dictGet test_01037.dict_array (31.7912,-0.788599) 101 -dictGet test_01037.dict_array (31.8011,2.65853) 3497970 -dictGet test_01037.dict_array (31.8032,-0.0590108) 3501888 -dictGet test_01037.dict_array (31.8038,1.9618799999999998) 3497974 -dictGet test_01037.dict_array (31.8098,-1.46851) 101 -dictGet test_01037.dict_array (31.8131,3.41982) 3501791 -dictGet test_01037.dict_array (31.8169,3.31059) 3501791 -dictGet test_01037.dict_array (31.8202,-0.193692) 3501888 -dictGet test_01037.dict_array (31.8306,1.57586) 3497974 -dictGet test_01037.dict_array (31.8382,-0.787948) 101 -dictGet test_01037.dict_array (31.8433,2.49692) 3497970 -dictGet test_01037.dict_array (31.8436,2.41851) 3497970 -dictGet test_01037.dict_array (31.8563,-1.10787) 101 -dictGet test_01037.dict_array (31.8683,0.996504) 3498002 -dictGet test_01037.dict_array (31.8693,-0.828142) 101 -dictGet test_01037.dict_array (31.8723,1.08929) 3498003 -dictGet test_01037.dict_array (31.8737,0.881127) 3498002 -dictGet test_01037.dict_array (31.8881,-0.58441) 101 -dictGet test_01037.dict_array (31.9011,0.121349) 3498094 -dictGet test_01037.dict_array (31.9066,2.13045) 3497965 -dictGet test_01037.dict_array (31.9142,1.03368) 3498002 -dictGet test_01037.dict_array (31.9155,3.38363) 3501791 -dictGet test_01037.dict_array (31.9168,1.3166) 3498004 -dictGet test_01037.dict_array (31.9185,-1.11879) 101 -dictGet test_01037.dict_array (31.9186,-0.647948) 101 -dictGet test_01037.dict_array (31.9311,3.96928) 101 -dictGet test_01037.dict_array (31.9335,1.47048) 3497974 -dictGet test_01037.dict_array (31.9443,-1.36175) 101 -dictGet test_01037.dict_array (31.9481,2.34231) 3497970 -dictGet test_01037.dict_array (31.9526,1.36565) 3498004 -dictGet test_01037.dict_array (31.9629,2.5208399999999997) 3497970 -dictGet test_01037.dict_array (31.9765,0.975783) 3498002 -dictGet test_01037.dict_array (31.9923,3.31773) 3501791 -dictGet test_01037.dict_array (31.9994,0.972816) 3498002 -dictGet test_01037.dict_array (32.001,3.47425) 3501791 -dictGet test_01037.dict_array (32.0127,2.13874) 3497965 -dictGet test_01037.dict_array (32.0244,3.2092) 3501792 -dictGet test_01037.dict_array (32.029,1.18039) 3498004 -dictGet test_01037.dict_array (32.0315,0.566073) 3498095 -dictGet test_01037.dict_array (32.0354,1.0766499999999999) 3498004 -dictGet test_01037.dict_array (32.0399,-1.11576) 101 -dictGet test_01037.dict_array (32.053,2.16849) 3497965 -dictGet test_01037.dict_array (32.0542,0.042328) 3498096 -dictGet test_01037.dict_array (32.0576,2.47001) 3497970 -dictGet test_01037.dict_array (32.061,3.7498899999999997) 101 -dictGet test_01037.dict_array (32.0623,1.25134) 3498004 -dictGet test_01037.dict_array (32.0626,1.9611399999999999) 3497965 -dictGet test_01037.dict_array (32.0666,-0.0904247) 3498096 -dictGet test_01037.dict_array (32.0681,2.28442) 3497970 -dictGet test_01037.dict_array (32.0692,1.50869) 3497981 -dictGet test_01037.dict_array (32.0724,4.03314) 101 -dictGet test_01037.dict_array (32.0729,-0.064324) 101 -dictGet test_01037.dict_array (32.079,0.293758) 3498094 -dictGet test_01037.dict_array (32.0847,-1.19814) 101 -dictGet test_01037.dict_array (32.0974,-0.91927) 101 -dictGet test_01037.dict_array (32.0979,-0.736979) 101 -dictGet test_01037.dict_array (32.106,-1.33063) 101 -dictGet test_01037.dict_array (32.1189,0.246715) 3498094 -dictGet test_01037.dict_array (32.1207,4.00883) 101 -dictGet test_01037.dict_array (32.1396,1.12402) 3498004 -dictGet test_01037.dict_array (32.1413,1.5668) 3497981 -dictGet test_01037.dict_array (32.143,1.35559) 3498004 -dictGet test_01037.dict_array (32.1538,1.32881) 3498004 -dictGet test_01037.dict_array (32.1549,4.06552) 101 -dictGet test_01037.dict_array (32.1555,-0.79275) 101 -dictGet test_01037.dict_array (32.163,1.17733) 3498004 -dictGet test_01037.dict_array (32.1634,2.94273) 3501792 -dictGet test_01037.dict_array (32.1644,1.85666) 3497965 -dictGet test_01037.dict_array (32.1745,0.435458) 3498095 -dictGet test_01037.dict_array (32.1765,1.65149) 3497981 -dictGet test_01037.dict_array (32.1893,2.08924) 3497965 -dictGet test_01037.dict_array (32.2024,0.222191) 3498093 -dictGet test_01037.dict_array (32.2107,1.34379) 3497981 -dictGet test_01037.dict_array (32.2109,3.9018699999999997) 101 -dictGet test_01037.dict_array (32.2123,1.85233) 3497965 -dictGet test_01037.dict_array (32.2144,3.72534) 101 -dictGet test_01037.dict_array (32.2218,2.5386699999999998) 3497970 -dictGet test_01037.dict_array (32.2279,2.84267) 3497245 -dictGet test_01037.dict_array (32.2345,3.33295) 3501792 -dictGet test_01037.dict_array (32.2435,3.85283) 101 -dictGet test_01037.dict_array (32.2527,-0.480608) 101 -dictGet test_01037.dict_array (32.2566,-0.837882) 101 -dictGet test_01037.dict_array (32.2627,2.57708) 3497970 -dictGet test_01037.dict_array (32.2733,0.244931) 3498096 -dictGet test_01037.dict_array (32.2761,4.05808) 101 -dictGet test_01037.dict_array (32.2764,3.78472) 101 -dictGet test_01037.dict_array (32.2814,-1.26011) 101 -dictGet test_01037.dict_array (32.2861,3.02427) 3497245 -dictGet test_01037.dict_array (32.2924,0.928609) 3498004 -dictGet test_01037.dict_array (32.2963,-0.78543) 101 -dictGet test_01037.dict_array (32.3039,3.21175) 3501792 -dictGet test_01037.dict_array (32.3107,0.698287) 3498004 -dictGet test_01037.dict_array (32.3138,0.0595677) 3498106 -dictGet test_01037.dict_array (32.3339,0.707056) 3498004 -dictGet test_01037.dict_array (32.3351,0.415474) 3498106 -dictGet test_01037.dict_array (32.342,-0.681023) 101 -dictGet test_01037.dict_array (32.3463,1.83196) 3497126 -dictGet test_01037.dict_array (32.3494,2.43799) 3497114 -dictGet test_01037.dict_array (32.3524,3.47049) 3501822 -dictGet test_01037.dict_array (32.3531,2.33115) 3497114 -dictGet test_01037.dict_array (32.3602,0.116106) 3498106 -dictGet test_01037.dict_array (32.3612,1.1598) 3498004 -dictGet test_01037.dict_array (32.3689,3.34847) 3501822 -dictGet test_01037.dict_array (32.3695,0.734055) 3498004 -dictGet test_01037.dict_array (32.3825,3.85017) 101 -dictGet test_01037.dict_array (32.3835,-1.25491) 101 -dictGet test_01037.dict_array (32.4018,-0.728568) 101 -dictGet test_01037.dict_array (32.4044,2.96727) 3497245 -dictGet test_01037.dict_array (32.4101,2.9988) 3497245 -dictGet test_01037.dict_array (32.417,-1.12908) 101 -dictGet test_01037.dict_array (32.4172,4.1952) 101 -dictGet test_01037.dict_array (32.4239,2.49512) 3497245 -dictGet test_01037.dict_array (32.4258,4.05137) 101 -dictGet test_01037.dict_array (32.4264,-0.427357) 101 -dictGet test_01037.dict_array (32.4274,3.59377) 3501822 -dictGet test_01037.dict_array (32.4286,-1.24757) 101 -dictGet test_01037.dict_array (32.4294,3.0665) 3497245 -dictGet test_01037.dict_array (32.4333,-0.353347) 101 -dictGet test_01037.dict_array (32.4391,3.64421) 3501822 -dictGet test_01037.dict_array (32.4401,3.70635) 3501822 -dictGet test_01037.dict_array (32.45,1.68918) 3497126 -dictGet test_01037.dict_array (32.4507,-0.133471) 101 -dictGet test_01037.dict_array (32.4592,0.976458) 3498105 -dictGet test_01037.dict_array (32.4595,1.89135) 3497126 -dictGet test_01037.dict_array (32.4604,0.280248) 3498106 -dictGet test_01037.dict_array (32.4835,0.472731) 3498106 -dictGet test_01037.dict_array (32.4855,2.01938) 3497126 -dictGet test_01037.dict_array (32.4872,2.01697) 3497126 -dictGet test_01037.dict_array (32.4911,0.613106) 3498105 -dictGet test_01037.dict_array (32.4918,2.17834) 3497114 -dictGet test_01037.dict_array (32.4947,2.34595) 3497114 -dictGet test_01037.dict_array (32.5035,2.92234) 3497245 -dictGet test_01037.dict_array (32.5132,-0.331206) 101 -dictGet test_01037.dict_array (32.5156,-0.412604) 3501887 -dictGet test_01037.dict_array (32.5158,2.9067499999999997) 3497245 -dictGet test_01037.dict_array (32.5249,2.44519) 3497114 -dictGet test_01037.dict_array (32.5293,-0.790952) 101 -dictGet test_01037.dict_array (32.5319,3.96854) 101 -dictGet test_01037.dict_array (32.5518,3.6093) 3501822 -dictGet test_01037.dict_array (32.5541,3.5225400000000002) 3501822 -dictGet test_01037.dict_array (32.5569,0.816123) 3498105 -dictGet test_01037.dict_array (32.5646,1.9775) 3497126 -dictGet test_01037.dict_array (32.5733,3.81271) 101 -dictGet test_01037.dict_array (32.5767,0.948327) 3498105 -dictGet test_01037.dict_array (32.5971,1.76179) 3497126 -dictGet test_01037.dict_array (32.6035,-0.716157) 101 -dictGet test_01037.dict_array (32.6087,4.21614) 101 -dictGet test_01037.dict_array (32.6171,0.024481) 101 -dictGet test_01037.dict_array (32.6189,-0.775391) 101 -dictGet test_01037.dict_array (32.6198,2.92081) 3497167 -dictGet test_01037.dict_array (32.621,-0.970784) 101 -dictGet test_01037.dict_array (32.6266,0.650009) 3498105 -dictGet test_01037.dict_array (32.6315,2.15144) 3497126 -dictGet test_01037.dict_array (32.6385,-0.436803) 101 -dictGet test_01037.dict_array (32.6449,-0.191292) 101 -dictGet test_01037.dict_array (32.6535,2.10385) 3497126 -dictGet test_01037.dict_array (32.6592,3.49973) 3501822 -dictGet test_01037.dict_array (32.6598,2.5980600000000003) 3497114 -dictGet test_01037.dict_array (32.6612,2.95681) 3497167 -dictGet test_01037.dict_array (32.6636,-0.57235) 101 -dictGet test_01037.dict_array (32.669,-0.382702) 101 -dictGet test_01037.dict_array (32.6752,1.30748) 3497981 -dictGet test_01037.dict_array (32.6811,2.9559800000000003) 3497167 -dictGet test_01037.dict_array (32.6821,0.57336) 3498105 -dictGet test_01037.dict_array (32.6828,3.91304) 101 -dictGet test_01037.dict_array (32.6979,3.96868) 101 -dictGet test_01037.dict_array (32.6983,3.15784) 3497167 -dictGet test_01037.dict_array (32.7122,0.794293) 3498105 -dictGet test_01037.dict_array (32.7131,-0.847256) 101 -dictGet test_01037.dict_array (32.7219,0.883461) 3498105 -dictGet test_01037.dict_array (32.7228,1.78808) 3497126 -dictGet test_01037.dict_array (32.7273,-0.206908) 101 -dictGet test_01037.dict_array (32.7292,0.259331) 3501889 -dictGet test_01037.dict_array (32.7304,-1.38317) 101 -dictGet test_01037.dict_array (32.7353,1.01601) 3498105 -dictGet test_01037.dict_array (32.7354,4.17574) 101 -dictGet test_01037.dict_array (32.7357,-0.190194) 101 -dictGet test_01037.dict_array (32.7465,-1.37598) 101 -dictGet test_01037.dict_array (32.7494,-0.275675) 101 -dictGet test_01037.dict_array (32.7514,0.128951) 3501889 -dictGet test_01037.dict_array (32.753,3.44207) 3501822 -dictGet test_01037.dict_array (32.7686,2.11713) 3497126 -dictGet test_01037.dict_array (32.7694,1.47159) 3497388 -dictGet test_01037.dict_array (32.7768,0.0401042) 101 -dictGet test_01037.dict_array (32.781,-1.34283) 101 -dictGet test_01037.dict_array (32.7814,1.73876) 3497388 -dictGet test_01037.dict_array (32.7856,-1.06363) 101 -dictGet test_01037.dict_array (32.792699999999996,-1.1255600000000001) 101 -dictGet test_01037.dict_array (32.7941,-0.645447) 101 -dictGet test_01037.dict_array (32.7946,1.48889) 3497388 -dictGet test_01037.dict_array (32.797,0.791753) 3501889 -dictGet test_01037.dict_array (32.7982,-0.537798) 101 -dictGet test_01037.dict_array (32.8091,2.3611) 3490438 -dictGet test_01037.dict_array (32.81,1.7130800000000002) 3497388 -dictGet test_01037.dict_array (32.8174,-0.288322) 101 -dictGet test_01037.dict_array (32.823,1.6546699999999999) 3497388 -dictGet test_01037.dict_array (32.8233,1.62108) 3497388 -dictGet test_01037.dict_array (32.8428,-0.400045) 101 -dictGet test_01037.dict_array (32.8479,2.13598) 3490438 -dictGet test_01037.dict_array (32.8524,0.199902) 3501889 -dictGet test_01037.dict_array (32.8543,3.23553) 3501820 -dictGet test_01037.dict_array (32.8562,1.31371) 3498117 -dictGet test_01037.dict_array (32.87,1.44256) 3498117 -dictGet test_01037.dict_array (32.8789,2.38192) 3490438 -dictGet test_01037.dict_array (32.8812,2.20734) 3497128 -dictGet test_01037.dict_array (32.8815,-0.54427) 101 -dictGet test_01037.dict_array (32.8853,2.4859) 3497128 -dictGet test_01037.dict_array (32.8909,0.513964) 3501889 -dictGet test_01037.dict_array (32.9035,2.38999) 3490438 -dictGet test_01037.dict_array (32.9097,2.48131) 3497128 -dictGet test_01037.dict_array (32.928,-0.943269) 101 -dictGet test_01037.dict_array (32.9322,1.13165) 3498104 -dictGet test_01037.dict_array (32.9348,1.22606) 3498117 -dictGet test_01037.dict_array (32.9417,3.77998) 3501822 -dictGet test_01037.dict_array (32.9428,3.11936) 3497167 -dictGet test_01037.dict_array (32.9482,1.18092) 3498118 -dictGet test_01037.dict_array (32.9506,0.0609364) 101 -dictGet test_01037.dict_array (32.953,-0.828308) 101 -dictGet test_01037.dict_array (32.9593,3.5209099999999998) 3501822 -dictGet test_01037.dict_array (32.9617,2.07711) 3497128 -dictGet test_01037.dict_array (32.966,0.693749) 3498104 -dictGet test_01037.dict_array (32.9668,-0.716432) 101 -dictGet test_01037.dict_array (32.9702,1.98555) 3497127 -dictGet test_01037.dict_array (32.9782,1.73819) 3497388 -dictGet test_01037.dict_array (32.9805,3.71151) 3501822 -dictGet test_01037.dict_array (32.9821,2.97225) 3497167 -dictGet test_01037.dict_array (32.995,-0.830301) 101 -dictGet test_01037.dict_array (33.0234,0.770848) 3498104 -dictGet test_01037.dict_array (33.0312,-0.340964) 101 -dictGet test_01037.dict_array (33.0366,-0.756795) 101 -dictGet test_01037.dict_array (33.0438,0.812871) 3498118 -dictGet test_01037.dict_array (33.0455,1.84843) 3497127 -dictGet test_01037.dict_array (33.0498,0.0913292) 101 -dictGet test_01037.dict_array (33.0506,1.53739) 3497364 -dictGet test_01037.dict_array (33.0554,2.4265) 3497363 -dictGet test_01037.dict_array (33.0741,3.61332) 3501822 -dictGet test_01037.dict_array (33.0765,-0.179985) 101 -dictGet test_01037.dict_array (33.087,1.46465) 3497399 -dictGet test_01037.dict_array (33.0906,-0.620383) 101 -dictGet test_01037.dict_array (33.1047,-1.28027) 101 -dictGet test_01037.dict_array (33.1072,1.96303) 3497127 -dictGet test_01037.dict_array (33.1081,-0.897874) 101 -dictGet test_01037.dict_array (33.1122,1.8950200000000001) 3497127 -dictGet test_01037.dict_array (33.1237,2.63993) 3497165 -dictGet test_01037.dict_array (33.1238,0.753963) 3498118 -dictGet test_01037.dict_array (33.1257,0.495668) 3498102 -dictGet test_01037.dict_array (33.1258,1.78341) 3497364 -dictGet test_01037.dict_array (33.127,2.59646) 3497166 -dictGet test_01037.dict_array (33.1324,-1.23742) 101 -dictGet test_01037.dict_array (33.1359,3.83491) 101 -dictGet test_01037.dict_array (33.1628,-0.379588) 101 -dictGet test_01037.dict_array (33.1679,1.25601) 3498117 -dictGet test_01037.dict_array (33.1688,-1.35553) 101 -dictGet test_01037.dict_array (33.181,2.10943) 3497363 -dictGet test_01037.dict_array (33.1871,2.81171) 3497165 -dictGet test_01037.dict_array (33.1877,0.771297) 3498118 -dictGet test_01037.dict_array (33.1883,-0.204797) 101 -dictGet test_01037.dict_array (33.1886,3.27998) 3501820 -dictGet test_01037.dict_array (33.1955,0.708907) 3498118 -dictGet test_01037.dict_array (33.2044,-0.769275) 101 -dictGet test_01037.dict_array (33.2182,3.36103) 3501820 -dictGet test_01037.dict_array (33.2192,3.43586) 3501822 -dictGet test_01037.dict_array (33.2322,-0.916753) 101 -dictGet test_01037.dict_array (33.2359,-0.81321) 101 -dictGet test_01037.dict_array (33.238,0.635072) 3498111 -dictGet test_01037.dict_array (33.2398,3.02588) 3497165 -dictGet test_01037.dict_array (33.2469,2.35698) 3497363 -dictGet test_01037.dict_array (33.247,2.3327) 3497363 -dictGet test_01037.dict_array (33.2579,2.8027100000000003) 3497165 -dictGet test_01037.dict_array (33.2607,0.321082) 101 -dictGet test_01037.dict_array (33.2653,0.243336) 101 -dictGet test_01037.dict_array (33.2758,0.831836) 3498118 -dictGet test_01037.dict_array (33.2771,0.886536) 3498118 -dictGet test_01037.dict_array (33.2914,1.16026) 3498117 -dictGet test_01037.dict_array (33.2914,1.38882) 3497399 -dictGet test_01037.dict_array (33.2982,-1.16604) 101 -dictGet test_01037.dict_array (33.2985,0.842556) 3498112 -dictGet test_01037.dict_array (33.3005,2.8338900000000002) 3497165 -dictGet test_01037.dict_array (33.305,0.0969475) 101 -dictGet test_01037.dict_array (33.3072,3.82163) 101 -dictGet test_01037.dict_array (33.312,3.41475) 3501820 -dictGet test_01037.dict_array (33.3129,2.46048) 3497166 -dictGet test_01037.dict_array (33.3134,3.46863) 3501820 -dictGet test_01037.dict_array (33.3203,2.33139) 3497166 -dictGet test_01037.dict_array (33.324,0.433701) 101 -dictGet test_01037.dict_array (33.3338,2.44705) 3497166 -dictGet test_01037.dict_array (33.337,4.06475) 101 -dictGet test_01037.dict_array (33.3469,1.08172) 3498126 -dictGet test_01037.dict_array (33.3538,0.717896) 3498112 -dictGet test_01037.dict_array (33.3618,1.37899) 3497399 -dictGet test_01037.dict_array (33.3698,0.547744) 3501862 -dictGet test_01037.dict_array (33.3705,0.957619) 3498112 -dictGet test_01037.dict_array (33.3821,3.07258) 3497165 -dictGet test_01037.dict_array (33.3881,3.0626) 3497165 -dictGet test_01037.dict_array (33.393,-0.816186) 101 -dictGet test_01037.dict_array (33.3945,0.869508) 3498110 -dictGet test_01037.dict_array (33.4001,1.24186) 3498117 -dictGet test_01037.dict_array (33.4008,2.34911) 3497166 -dictGet test_01037.dict_array (33.4166,-1.2808899999999999) 101 -dictGet test_01037.dict_array (33.4167,3.0655) 3497165 -dictGet test_01037.dict_array (33.4204,2.81887) 3497165 -dictGet test_01037.dict_array (33.4211,1.71128) 3497400 -dictGet test_01037.dict_array (33.4237,2.91761) 3497165 -dictGet test_01037.dict_array (33.4266,1.5955599999999999) 3497399 -dictGet test_01037.dict_array (33.4353,-0.391392) 101 -dictGet test_01037.dict_array (33.4362,-0.134658) 101 -dictGet test_01037.dict_array (33.4386,0.15396) 101 -dictGet test_01037.dict_array (33.4421,-0.50712) 101 -dictGet test_01037.dict_array (33.452,0.915829) 3498126 -dictGet test_01037.dict_array (33.463,-0.0882717) 101 -dictGet test_01037.dict_array (33.464,-1.00949) 101 -dictGet test_01037.dict_array (33.4692,0.954092) 3498126 -dictGet test_01037.dict_array (33.4716,1.9538799999999998) 3497400 -dictGet test_01037.dict_array (33.4756,1.85836) 3497400 -dictGet test_01037.dict_array (33.4859,4.0751) 101 -dictGet test_01037.dict_array (33.4899,3.54193) 3501820 -dictGet test_01037.dict_array (33.4935,3.49794) 3501820 -dictGet test_01037.dict_array (33.494,-0.983356) 101 -dictGet test_01037.dict_array (33.4955,-1.28128) 101 -dictGet test_01037.dict_array (33.4965,-0.278687) 101 -dictGet test_01037.dict_array (33.4991,0.647491) 3498110 -dictGet test_01037.dict_array (33.5076,2.2272) 3497424 -dictGet test_01037.dict_array (33.5079,-0.498199) 101 -dictGet test_01037.dict_array (33.5157,0.535034) 3501862 -dictGet test_01037.dict_array (33.5171,2.49677) 3497166 -dictGet test_01037.dict_array (33.5255,2.4447200000000002) 3497166 -dictGet test_01037.dict_array (33.526,4.01194) 101 -dictGet test_01037.dict_array (33.5288,0.789434) 3498110 -dictGet test_01037.dict_array (33.5356,-1.17671) 101 -dictGet test_01037.dict_array (33.5402,1.49152) 3497399 -dictGet test_01037.dict_array (33.5418,3.45757) 3501820 -dictGet test_01037.dict_array (33.5428,1.90712) 3497400 -dictGet test_01037.dict_array (33.5556,-0.55741) 101 -dictGet test_01037.dict_array (33.5564,0.876858) 3498128 -dictGet test_01037.dict_array (33.5567,-0.10208) 101 -dictGet test_01037.dict_array (33.5645,-0.124824) 101 -dictGet test_01037.dict_array (33.5663,3.4872) 3501820 -dictGet test_01037.dict_array (33.5716,-0.0107611) 101 -dictGet test_01037.dict_array (33.578,3.55714) 3501820 -dictGet test_01037.dict_array (33.5826,-0.49076) 101 -dictGet test_01037.dict_array (33.5909,0.773737) 3498110 -dictGet test_01037.dict_array (33.5958,2.9619999999999997) 3497425 -dictGet test_01037.dict_array (33.6193,-0.919755) 101 -dictGet test_01037.dict_array (33.6313,0.652132) 3498110 -dictGet test_01037.dict_array (33.632,0.823351) 3498128 -dictGet test_01037.dict_array (33.66,2.18998) 3497424 -dictGet test_01037.dict_array (33.6621,0.535395) 3498135 -dictGet test_01037.dict_array (33.6726,3.19367) 3497438 -dictGet test_01037.dict_array (33.6912,1.74522) 3497400 -dictGet test_01037.dict_array (33.705,0.706397) 3498135 -dictGet test_01037.dict_array (33.7076,0.7622) 3498128 -dictGet test_01037.dict_array (33.7112,1.70187) 3497400 -dictGet test_01037.dict_array (33.7246,-1.14837) 101 -dictGet test_01037.dict_array (33.7326,2.62413) 3497425 -dictGet test_01037.dict_array (33.7332,2.82137) 3497425 -dictGet test_01037.dict_array (33.7434,0.394672) 3498135 -dictGet test_01037.dict_array (33.7443,1.54557) 3497398 -dictGet test_01037.dict_array (33.7506,1.57317) 3497398 -dictGet test_01037.dict_array (33.7526,1.8578999999999999) 3497424 -dictGet test_01037.dict_array (33.766,4.15013) 101 -dictGet test_01037.dict_array (33.7834,2.41789) 3497439 -dictGet test_01037.dict_array (33.7864,0.230935) 101 -dictGet test_01037.dict_array (33.7965,3.05709) 3497438 -dictGet test_01037.dict_array (33.7998,3.32881) 3497438 -dictGet test_01037.dict_array (33.8003,2.97338) 3497425 -dictGet test_01037.dict_array (33.8007,-1.08962) 101 -dictGet test_01037.dict_array (33.8022,-0.139488) 101 -dictGet test_01037.dict_array (33.8065,2.70857) 3497425 -dictGet test_01037.dict_array (33.8169,-0.607788) 101 -dictGet test_01037.dict_array (33.8203,0.108512) 3501863 -dictGet test_01037.dict_array (33.8231,-1.03449) 101 -dictGet test_01037.dict_array (33.8312,3.49458) 3501829 -dictGet test_01037.dict_array (33.8342,0.297518) 3501863 -dictGet test_01037.dict_array (33.8352,0.165872) 101 -dictGet test_01037.dict_array (33.8354,1.87277) 3497424 -dictGet test_01037.dict_array (33.8371,1.60103) 3497398 -dictGet test_01037.dict_array (33.8387,1.9968) 3497424 -dictGet test_01037.dict_array (33.8403,3.5805) 3501829 -dictGet test_01037.dict_array (33.8414,-0.703067) 101 -dictGet test_01037.dict_array (33.844,-0.179472) 101 -dictGet test_01037.dict_array (33.8468,3.40137) 3501829 -dictGet test_01037.dict_array (33.8509,4.15334) 101 -dictGet test_01037.dict_array (33.8539,2.38339) 3497439 -dictGet test_01037.dict_array (33.858,-1.3122500000000001) 101 -dictGet test_01037.dict_array (33.859,3.72626) 3501829 -dictGet test_01037.dict_array (33.8616,2.24433) 3497424 -dictGet test_01037.dict_array (33.8621,3.01035) 3497438 -dictGet test_01037.dict_array (33.8623,1.17559) 3498129 -dictGet test_01037.dict_array (33.8682,2.706) 3497425 -dictGet test_01037.dict_array (33.8684,0.189231) 3501863 -dictGet test_01037.dict_array (33.872,1.93574) 3497424 -dictGet test_01037.dict_array (33.8844,3.80404) 3501829 -dictGet test_01037.dict_array (33.8888,0.594884) 3498135 -dictGet test_01037.dict_array (33.8946,2.74161) 3497438 -dictGet test_01037.dict_array (33.9023,0.6239) 3498135 -dictGet test_01037.dict_array (33.9057,0.873222) 3498136 -dictGet test_01037.dict_array (33.9157,-1.26607) 101 -dictGet test_01037.dict_array (33.92,2.06848) 3497397 -dictGet test_01037.dict_array (33.9298,-0.00526229) 101 -dictGet test_01037.dict_array (33.932,3.07063) 3497438 -dictGet test_01037.dict_array (33.9322,0.629385) 3501864 -dictGet test_01037.dict_array (33.9367,-1.41955) 101 -dictGet test_01037.dict_array (33.937,1.42532) 3498173 -dictGet test_01037.dict_array (33.9375,1.1467100000000001) 3498159 -dictGet test_01037.dict_array (33.9434,-1.05739) 101 -dictGet test_01037.dict_array (33.9477,3.34809) 3501829 -dictGet test_01037.dict_array (33.95,2.21715) 3497397 -dictGet test_01037.dict_array (33.955799999999996,0.305176) 3501859 -dictGet test_01037.dict_array (33.9686,-0.28273) 101 -dictGet test_01037.dict_array (33.9703,4.1255) 3501829 -dictGet test_01037.dict_array (33.9707,3.08199) 3497438 -dictGet test_01037.dict_array (33.9754,1.06203) 3498159 -dictGet test_01037.dict_array (33.9757,3.72468) 3501829 -dictGet test_01037.dict_array (33.9775,-0.0440599) 101 -dictGet test_01037.dict_array (33.9777,-0.251484) 101 -dictGet test_01037.dict_array (33.9789,-0.339374) 101 -dictGet test_01037.dict_array (33.9849,2.54515) 3497425 -dictGet test_01037.dict_array (33.9885,-0.318557) 101 -dictGet test_01037.dict_array (33.9977,1.07175) 3498159 -dictGet test_01037.dict_array (33.9984,-0.700517) 101 -dictGet test_01037.dict_array (34.0149,3.53338) 3501829 -dictGet test_01037.dict_array (34.0173,3.39155) 3501829 -dictGet test_01037.dict_array (34.0317,3.9579) 3501829 -dictGet test_01037.dict_array (34.0369,3.83612) 3501829 -dictGet test_01037.dict_array (34.043,-0.0887221) 101 -dictGet test_01037.dict_array (34.0487,1.14252) 3498159 -dictGet test_01037.dict_array (34.052,1.74832) 3497397 -dictGet test_01037.dict_array (34.0711,-0.898071) 101 -dictGet test_01037.dict_array (34.0747,1.55057) 3498173 -dictGet test_01037.dict_array (34.0803,3.16763) 3497438 -dictGet test_01037.dict_array (34.0872,3.75555) 3501829 -dictGet test_01037.dict_array (34.0965,1.62038) 3498173 -dictGet test_01037.dict_array (34.0977,-0.412691) 101 -dictGet test_01037.dict_array (34.0986,0.0294206) 101 -dictGet test_01037.dict_array (34.1072,3.15823) 3497438 -dictGet test_01037.dict_array (34.1092,3.09599) 3497438 -dictGet test_01037.dict_array (34.1206,1.04637) 3498160 -dictGet test_01037.dict_array (34.1209,3.13826) 3497438 -dictGet test_01037.dict_array (34.1265,3.95881) 3501829 -dictGet test_01037.dict_array (34.1286,-0.539319) 101 -dictGet test_01037.dict_array (34.1358,3.67451) 3501829 -dictGet test_01037.dict_array (34.1428,0.136115) 101 -dictGet test_01037.dict_array (34.157,1.73522) 3497397 -dictGet test_01037.dict_array (34.1581,1.48001) 3498172 -dictGet test_01037.dict_array (34.1682,3.42373) 3501829 -dictGet test_01037.dict_array (34.1683,-1.26511) 101 -dictGet test_01037.dict_array (34.1684,4.20007) 101 -dictGet test_01037.dict_array (34.1854,3.32089) 3501829 -dictGet test_01037.dict_array (34.2022,0.749536) 3501864 -dictGet test_01037.dict_array (34.2044,3.04865) 3497438 -dictGet test_01037.dict_array (34.22,-0.500055) 101 -dictGet test_01037.dict_array (34.2249,0.743775) 3501864 -dictGet test_01037.dict_array (34.2254,1.34702) 3498172 -dictGet test_01037.dict_array (34.2355,-0.898843) 101 -dictGet test_01037.dict_array (34.2394,2.0203699999999998) 3497439 -dictGet test_01037.dict_array (34.2466,1.83785) 3498251 -dictGet test_01037.dict_array (34.247,4.09563) 101 -dictGet test_01037.dict_array (34.2508,2.61312) 3497439 -dictGet test_01037.dict_array (34.2517,1.69642) 3498251 -dictGet test_01037.dict_array (34.2564,4.13033) 101 -dictGet test_01037.dict_array (34.2574,4.18928) 101 -dictGet test_01037.dict_array (34.2614,-0.478719) 101 -dictGet test_01037.dict_array (34.2625,2.38088) 3497439 -dictGet test_01037.dict_array (34.2666,3.1503) 3501829 -dictGet test_01037.dict_array (34.271,4.02223) 101 -dictGet test_01037.dict_array (34.2727,0.514755) 101 -dictGet test_01037.dict_array (34.278,1.98929) 3497439 -dictGet test_01037.dict_array (34.2798,-0.199208) 101 -dictGet test_01037.dict_array (34.2804,2.05184) 3497439 -dictGet test_01037.dict_array (34.2945,-1.11051) 101 -dictGet test_01037.dict_array (34.3168,-0.0829721) 101 -dictGet test_01037.dict_array (34.3345,3.4358) 3501829 -dictGet test_01037.dict_array (34.3377,1.13527) 3498162 -dictGet test_01037.dict_array (34.3383,1.27891) 3498161 -dictGet test_01037.dict_array (34.3391,1.47945) 3498161 -dictGet test_01037.dict_array (34.3441,0.627014) 101 -dictGet test_01037.dict_array (34.347,2.4853) 3497439 -dictGet test_01037.dict_array (34.3514,2.16247) 3497439 -dictGet test_01037.dict_array (34.3627,2.64533) 3497439 -dictGet test_01037.dict_array (34.3682,-0.227501) 101 -dictGet test_01037.dict_array (34.3756,4.21248) 101 -dictGet test_01037.dict_array (34.379,3.96604) 101 -dictGet test_01037.dict_array (34.3827,1.7518) 3498251 -dictGet test_01037.dict_array (34.3912,2.8834) 3501830 -dictGet test_01037.dict_array (34.3919,0.668829) 101 -dictGet test_01037.dict_array (34.3949,2.00338) 3497439 -dictGet test_01037.dict_array (34.3987,0.557268) 101 -dictGet test_01037.dict_array (34.4111,0.768558) 101 -dictGet test_01037.dict_array (34.4119,2.8742) 3501830 -dictGet test_01037.dict_array (34.416,3.50841) 3501829 -dictGet test_01037.dict_array (34.4212,1.24916) 3498161 -dictGet test_01037.dict_array (34.4251,0.457029) 101 -dictGet test_01037.dict_array (34.4274,-0.902559) 101 -dictGet test_01037.dict_array (34.4325,4.03159) 101 -dictGet test_01037.dict_array (34.438,1.63994) 3498251 -dictGet test_01037.dict_array (34.4403,-0.177594) 101 -dictGet test_01037.dict_array (34.4421,0.726712) 101 -dictGet test_01037.dict_array (34.4517,2.98611) 3501830 -dictGet test_01037.dict_array (34.4658,-1.312) 101 -dictGet test_01037.dict_array (34.4732,-0.0681338) 101 -dictGet test_01037.dict_array (34.4752,2.81646) 3501830 -dictGet test_01037.dict_array (34.4914,2.3858) 3497439 -dictGet test_01037.dict_array (34.4923,0.855231) 101 -dictGet test_01037.dict_array (34.5235,1.78468) 3498251 -dictGet test_01037.dict_array (34.5305,4.10608) 101 -dictGet test_01037.dict_array (34.5389,0.621937) 101 -dictGet test_01037.dict_array (34.5406,3.17145) 101 -dictGet test_01037.dict_array (34.5434,-0.56306) 101 -dictGet test_01037.dict_array (34.5449,3.13311) 3501829 -dictGet test_01037.dict_array (34.5491,2.31572) 3497439 -dictGet test_01037.dict_array (34.5539,2.94028) 3501830 -dictGet test_01037.dict_array (34.5546,-0.208825) 101 -dictGet test_01037.dict_array (34.5549,3.78486) 101 -dictGet test_01037.dict_array (34.5676,0.307148) 101 -dictGet test_01037.dict_array (34.5743,1.5217399999999999) 3501838 -dictGet test_01037.dict_array (34.5775,3.48046) 101 -dictGet test_01037.dict_array (34.5815,2.5243700000000002) 3501830 -dictGet test_01037.dict_array (34.5841,4.21191) 101 -dictGet test_01037.dict_array (34.5887,2.65083) 3501830 -dictGet test_01037.dict_array (34.5937,3.2143) 101 -dictGet test_01037.dict_array (34.6013,-1.0612) 101 -dictGet test_01037.dict_array (34.6089,1.36066) 3501838 -dictGet test_01037.dict_array (34.6103,3.40227) 101 -dictGet test_01037.dict_array (34.6128,1.92276) 3498251 -dictGet test_01037.dict_array (34.6175,2.43627) 3498251 -dictGet test_01037.dict_array (34.6209,3.43776) 101 -dictGet test_01037.dict_array (34.6234,2.60237) 3501830 -dictGet test_01037.dict_array (34.6275,3.52479) 101 -dictGet test_01037.dict_array (34.635,0.568558) 101 -dictGet test_01037.dict_array (34.6373,2.37692) 3498251 -dictGet test_01037.dict_array (34.6375,3.52234) 101 -dictGet test_01037.dict_array (34.6426,2.12397) 3498251 -dictGet test_01037.dict_array (34.6513,2.80915) 3501830 -dictGet test_01037.dict_array (34.6632,2.30039) 3498251 -dictGet test_01037.dict_array (34.6691,1.86582) 3498251 -dictGet test_01037.dict_array (34.6739,0.15342) 101 -dictGet test_01037.dict_array (34.6825,0.0499679) 101 -dictGet test_01037.dict_array (34.6893,0.454326) 101 -dictGet test_01037.dict_array (34.6957,-0.358598) 101 -dictGet test_01037.dict_array (34.6986,0.562679) 101 -dictGet test_01037.dict_array (34.712,1.12114) 101 -dictGet test_01037.dict_array (34.7126,-0.0057301) 101 -dictGet test_01037.dict_array (34.7137,0.0248501) 101 -dictGet test_01037.dict_array (34.7162,1.15623) 101 -dictGet test_01037.dict_array (34.7258,3.95142) 101 -dictGet test_01037.dict_array (34.7347,3.5232099999999997) 101 -dictGet test_01037.dict_array (34.7363,2.23374) 3501830 -dictGet test_01037.dict_array (34.7375,0.397841) 101 -dictGet test_01037.dict_array (34.7423,3.09198) 101 -dictGet test_01037.dict_array (34.7452,3.09029) 101 -dictGet test_01037.dict_array (34.7539,-1.06943) 101 -dictGet test_01037.dict_array (34.7733,-0.00912717) 101 -dictGet test_01037.dict_array (34.774,2.71088) 3501830 -dictGet test_01037.dict_array (34.7771,1.46009) 3501835 -dictGet test_01037.dict_array (34.7782,-1.28308) 101 -dictGet test_01037.dict_array (34.7924,3.63564) 101 -dictGet test_01037.dict_array (34.7939,-0.416676) 101 -dictGet test_01037.dict_array (34.7964,-0.401773) 101 -dictGet test_01037.dict_array (34.7974,0.0286873) 101 -dictGet test_01037.dict_array (34.7975,3.05965) 101 -dictGet test_01037.dict_array (34.8037,3.07263) 101 -dictGet test_01037.dict_array (34.8254,-0.390284) 101 -dictGet test_01037.dict_array (34.828,1.91869) 3498251 -dictGet test_01037.dict_array (34.8289,3.71058) 101 -dictGet test_01037.dict_array (34.8403,2.14606) 3501835 -dictGet test_01037.dict_array (34.8437,2.20617) 3501830 -dictGet test_01037.dict_array (34.8469,2.38435) 3501830 -dictGet test_01037.dict_array (34.86,1.45705) 101 -dictGet test_01037.dict_array (34.8612,0.914248) 101 -dictGet test_01037.dict_array (34.8663,3.4215400000000002) 101 -dictGet test_01037.dict_array (34.8724,-0.375144) 101 -dictGet test_01037.dict_array (34.8795,3.29317) 101 -dictGet test_01037.dict_array (34.8823,1.21988) 101 -dictGet test_01037.dict_array (34.8834,1.07657) 101 -dictGet test_01037.dict_array (34.8837,0.157648) 101 -dictGet test_01037.dict_array (34.8871,-0.9755) 101 -dictGet test_01037.dict_array (34.8871,1.8943699999999999) 3501835 -dictGet test_01037.dict_array (34.889,3.36756) 101 -dictGet test_01037.dict_array (34.8907,1.24874) 101 -dictGet test_01037.dict_array (34.8965,3.13508) 101 -dictGet test_01037.dict_array (34.9042,2.62092) 101 -dictGet test_01037.dict_array (34.9055,-0.0448967) 101 -dictGet test_01037.dict_array (34.9122,0.110576) 101 -dictGet test_01037.dict_array (34.9228,3.60183) 101 -dictGet test_01037.dict_array (34.9237,1.21715) 101 -dictGet test_01037.dict_array (34.9296,1.70459) 3501835 -dictGet test_01037.dict_array (34.941,-1.14663) 101 -dictGet test_01037.dict_array (34.9448,1.18923) 101 -dictGet test_01037.dict_array (34.9462,3.81678) 101 -dictGet test_01037.dict_array (34.9466,0.593463) 101 -dictGet test_01037.dict_array (34.9485,0.150307) 101 -dictGet test_01037.dict_array (34.9542,0.487238) 101 -dictGet test_01037.dict_array (34.9559,2.03473) 3501835 -dictGet test_01037.dict_array (34.9671,-0.960225) 101 -dictGet test_01037.dict_array (34.9711,2.63444) 101 -dictGet test_01037.dict_array (34.9892,0.354775) 101 -dictGet test_01037.dict_array (34.9907,1.40724) 101 -dictGet test_01037.dict_array (34.9916,-0.00173097) 101 -dictGet test_01037.dict_array (34.9919,2.06167) 101 +dictGet dict_array (29.5699,2.50068) 101 +dictGet dict_array (29.5796,1.55456) 101 +dictGet dict_array (29.5796,2.36864) 101 +dictGet dict_array (29.5844,1.59626) 101 +dictGet dict_array (29.5886,4.03321) 101 +dictGet dict_array (29.5914,3.02628) 101 +dictGet dict_array (29.5926,-0.0965169) 101 +dictGet dict_array (29.5968,2.37773) 101 +dictGet dict_array (29.5984,0.755853) 101 +dictGet dict_array (29.6066,3.47173) 101 +dictGet dict_array (29.6085,-1.26007) 6489978 +dictGet dict_array (29.6131,0.246565) 101 +dictGet dict_array (29.6157,-0.266687) 101 +dictGet dict_array (29.6164,2.94674) 101 +dictGet dict_array (29.6195,-0.591941) 101 +dictGet dict_array (29.6231,1.54818) 101 +dictGet dict_array (29.6379,0.764114) 101 +dictGet dict_array (29.6462,-0.772059) 934530 +dictGet dict_array (29.6579,-1.07336) 6489978 +dictGet dict_array (29.6618,-0.271842) 101 +dictGet dict_array (29.6629,-0.303602) 101 +dictGet dict_array (29.6659,-0.782823) 934530 +dictGet dict_array (29.6736,-0.113832) 101 +dictGet dict_array (29.6759,3.02905) 101 +dictGet dict_array (29.6778,3.71898) 101 +dictGet dict_array (29.6796,1.10433) 101 +dictGet dict_array (29.6809,2.13677) 101 +dictGet dict_array (29.6935,4.11894) 101 +dictGet dict_array (29.6991,-1.4458199999999999) 101 +dictGet dict_array (29.6997,3.17297) 101 +dictGet dict_array (29.7043,3.6145899999999997) 101 +dictGet dict_array (29.7065,3.24885) 101 +dictGet dict_array (29.7126,0.28108) 101 +dictGet dict_array (29.7192,0.174273) 101 +dictGet dict_array (29.7217,-0.523481) 3501900 +dictGet dict_array (29.7271,1.67967) 101 +dictGet dict_array (29.7311,4.12444) 101 +dictGet dict_array (29.7347,1.88378) 101 +dictGet dict_array (29.7358,0.67944) 101 +dictGet dict_array (29.7366,-0.2973) 101 +dictGet dict_array (29.7446,0.646536) 101 +dictGet dict_array (29.7453,-0.567963) 3501900 +dictGet dict_array (29.764,4.04217) 101 +dictGet dict_array (29.7655,1.51372) 101 +dictGet dict_array (29.7744,1.12435) 101 +dictGet dict_array (29.7774,-0.0681196) 3501895 +dictGet dict_array (29.7784,1.54864) 101 +dictGet dict_array (29.7785,2.24139) 101 +dictGet dict_array (29.7922,0.220808) 101 +dictGet dict_array (29.7936,2.37709) 101 +dictGet dict_array (29.8008,0.948536) 101 +dictGet dict_array (29.8115,0.201227) 101 +dictGet dict_array (29.814,0.149601) 3501895 +dictGet dict_array (29.8193,-1.35858) 101 +dictGet dict_array (29.8201,0.965518) 101 +dictGet dict_array (29.8265,-0.727286) 3501900 +dictGet dict_array (29.8277,-0.531746) 3501900 +dictGet dict_array (29.8289,3.63009) 101 +dictGet dict_array (29.8548,0.838047) 101 +dictGet dict_array (29.8641,-0.845265) 3501900 +dictGet dict_array (29.8649,0.0562212) 3501895 +dictGet dict_array (29.8701,-1.02045) 934530 +dictGet dict_array (29.8733,2.76654) 101 +dictGet dict_array (29.876,0.555475) 101 +dictGet dict_array (29.8794,-0.800108) 3501900 +dictGet dict_array (29.8813,2.7426399999999997) 101 +dictGet dict_array (29.897100000000002,2.66193) 101 +dictGet dict_array (29.908,4.01339) 101 +dictGet dict_array (29.9165,-1.08246) 3501894 +dictGet dict_array (29.9201,-0.420861) 3498054 +dictGet dict_array (29.9217,3.03778) 101 +dictGet dict_array (29.9355,0.773833) 101 +dictGet dict_array (29.947,3.76517) 101 +dictGet dict_array (29.9518,-0.60557) 3498056 +dictGet dict_array (29.9564,-0.600163) 3498056 +dictGet dict_array (29.959600000000002,4.16591) 101 +dictGet dict_array (29.9615,-1.33708) 3501894 +dictGet dict_array (29.9699,-0.392375) 3498054 +dictGet dict_array (29.9776,1.04552) 101 +dictGet dict_array (29.9784,4.02756) 101 +dictGet dict_array (29.9819,4.00597) 101 +dictGet dict_array (29.9826,1.2816100000000001) 101 +dictGet dict_array (30.0026,2.76257) 101 +dictGet dict_array (30.0126,3.68255) 101 +dictGet dict_array (30.0131,0.796576) 3501892 +dictGet dict_array (30.018,1.16523) 101 +dictGet dict_array (30.0261,-0.210653) 3501896 +dictGet dict_array (30.0472,-1.11007) 3501894 +dictGet dict_array (30.0542,-0.479585) 3498054 +dictGet dict_array (30.0613,1.6278000000000001) 101 +dictGet dict_array (30.0617,-0.0551152) 3501895 +dictGet dict_array (30.0637,2.62066) 101 +dictGet dict_array (30.0721,1.6424400000000001) 101 +dictGet dict_array (30.0769,-0.402636) 3498054 +dictGet dict_array (30.0791,-0.277435) 3501896 +dictGet dict_array (30.0931,0.0327512) 3501895 +dictGet dict_array (30.1059,3.52623) 101 +dictGet dict_array (30.1103,0.865466) 3501892 +dictGet dict_array (30.1115,2.95243) 101 +dictGet dict_array (30.1144,1.71029) 101 +dictGet dict_array (30.1311,-0.864751) 3501899 +dictGet dict_array (30.1336,-0.851386) 3501899 +dictGet dict_array (30.1393,3.89901) 101 +dictGet dict_array (30.1456,-0.531898) 3498054 +dictGet dict_array (30.1492,2.07833) 101 +dictGet dict_array (30.1575,2.43856) 101 +dictGet dict_array (30.1682,1.19771) 101 +dictGet dict_array (30.1716,3.9853300000000003) 101 +dictGet dict_array (30.1849,2.78374) 101 +dictGet dict_array (30.1866,0.65658) 3498021 +dictGet dict_array (30.1885,1.56943) 101 +dictGet dict_array (30.1959,-1.38202) 101 +dictGet dict_array (30.1999,1.58413) 101 +dictGet dict_array (30.2024,0.713081) 3498021 +dictGet dict_array (30.2054,0.620143) 3498021 +dictGet dict_array (30.2091,1.51641) 101 +dictGet dict_array (30.2124,-0.331782) 3498031 +dictGet dict_array (30.226,3.03527) 101 +dictGet dict_array (30.2261,3.18486) 101 +dictGet dict_array (30.2288,2.48407) 101 +dictGet dict_array (30.2345,3.7462400000000002) 101 +dictGet dict_array (30.2375,0.62046) 3498021 +dictGet dict_array (30.2425,-0.472914) 3498054 +dictGet dict_array (30.247,3.95863) 101 +dictGet dict_array (30.2494,-0.305093) 3498031 +dictGet dict_array (30.2499,2.54337) 101 +dictGet dict_array (30.2606,2.16644) 101 +dictGet dict_array (30.2672,3.94847) 101 +dictGet dict_array (30.2709,-0.136264) 6088794 +dictGet dict_array (30.2764,1.18654) 101 +dictGet dict_array (30.2765,1.20383) 101 +dictGet dict_array (30.2839,1.05762) 3498024 +dictGet dict_array (30.286,0.469327) 3498021 +dictGet dict_array (30.2927,3.1693) 101 +dictGet dict_array (30.2935,3.49854) 101 +dictGet dict_array (30.307,0.312338) 3498021 +dictGet dict_array (30.3085,1.07791) 3498024 +dictGet dict_array (30.3139,2.77248) 101 +dictGet dict_array (30.314,0.822823) 3498024 +dictGet dict_array (30.3227,-0.587351) 3498055 +dictGet dict_array (30.332,1.00174) 3498024 +dictGet dict_array (30.3388,0.844148) 3498024 +dictGet dict_array (30.3485,0.561902) 3498021 +dictGet dict_array (30.3497,0.180362) 6489998 +dictGet dict_array (30.361,4.13016) 101 +dictGet dict_array (30.3623,-0.0484027) 6489998 +dictGet dict_array (30.3638,3.9845800000000002) 101 +dictGet dict_array (30.3853,3.16051) 101 +dictGet dict_array (30.3974,2.6617800000000003) 101 +dictGet dict_array (30.4002,-1.15886) 101 +dictGet dict_array (30.4008,-0.387015) 3498031 +dictGet dict_array (30.4018,1.86493) 101 +dictGet dict_array (30.4239,1.16818) 3498024 +dictGet dict_array (30.4363,3.63938) 101 +dictGet dict_array (30.4377,-0.81315) 3498063 +dictGet dict_array (30.4391,3.54703) 101 +dictGet dict_array (30.4424,-1.39435) 101 +dictGet dict_array (30.4441,2.8463000000000003) 101 +dictGet dict_array (30.4517,3.28117) 101 +dictGet dict_array (30.4658,2.6928) 101 +dictGet dict_array (30.4734,2.66161) 101 +dictGet dict_array (30.4799,-1.07578) 101 +dictGet dict_array (30.4837,-1.02486) 3501899 +dictGet dict_array (30.485,1.06326) 3498024 +dictGet dict_array (30.495,1.12306) 101 +dictGet dict_array (30.501,2.27264) 101 +dictGet dict_array (30.5027,1.99382) 101 +dictGet dict_array (30.5194,-1.03943) 3501893 +dictGet dict_array (30.5239,1.04328) 101 +dictGet dict_array (30.528,3.82041) 101 +dictGet dict_array (30.5299,-0.715248) 3498063 +dictGet dict_array (30.5331,1.19603) 101 +dictGet dict_array (30.535800000000002,2.71485) 101 +dictGet dict_array (30.5405,0.804694) 3498023 +dictGet dict_array (30.542,1.23739) 101 +dictGet dict_array (30.5432,4.04189) 101 +dictGet dict_array (30.5457,-0.956121) 3501893 +dictGet dict_array (30.5506,3.07443) 101 +dictGet dict_array (30.5539,3.87084) 101 +dictGet dict_array (30.5578,3.78837) 101 +dictGet dict_array (30.5588,0.966135) 3498022 +dictGet dict_array (30.5637,2.5605) 101 +dictGet dict_array (30.5647,-1.27328) 101 +dictGet dict_array (30.5656,-0.0581332) 6088794 +dictGet dict_array (30.5715,0.65755) 3498023 +dictGet dict_array (30.5727,3.01604) 101 +dictGet dict_array (30.5729,-0.976857) 3501893 +dictGet dict_array (30.5751,0.60204) 3498023 +dictGet dict_array (30.5854,3.02473) 101 +dictGet dict_array (30.5866,0.174099) 6489998 +dictGet dict_array (30.5947,0.875193) 3498023 +dictGet dict_array (30.5992,-0.403901) 3498063 +dictGet dict_array (30.6002,4.18891) 101 +dictGet dict_array (30.6025,0.217712) 6489998 +dictGet dict_array (30.6054,0.927203) 3498022 +dictGet dict_array (30.6075,3.79359) 101 +dictGet dict_array (30.6159,3.82773) 101 +dictGet dict_array (30.627,3.84039) 101 +dictGet dict_array (30.6308,0.77517) 3498023 +dictGet dict_array (30.6338,0.179565) 6489998 +dictGet dict_array (30.6461,1.3293599999999999) 101 +dictGet dict_array (30.6674,-0.424547) 3498063 +dictGet dict_array (30.669,1.76539) 101 +dictGet dict_array (30.6788,4.01239) 101 +dictGet dict_array (30.6864,3.59158) 101 +dictGet dict_array (30.7049,-0.875413) 3501893 +dictGet dict_array (30.705,1.3307) 101 +dictGet dict_array (30.7063,-0.473192) 3498063 +dictGet dict_array (30.7075,-1.1958199999999999) 101 +dictGet dict_array (30.7101,-0.367562) 3498012 +dictGet dict_array (30.7203,2.98725) 101 +dictGet dict_array (30.7213,2.2745699999999998) 101 +dictGet dict_array (30.7446,-0.334144) 3498012 +dictGet dict_array (30.7468,3.82967) 101 +dictGet dict_array (30.747,-0.384779) 3498012 +dictGet dict_array (30.7681,0.904198) 3498022 +dictGet dict_array (30.7757,1.78743) 101 +dictGet dict_array (30.8021,-0.479212) 3498012 +dictGet dict_array (30.8079,-1.40869) 101 +dictGet dict_array (30.8206,-0.0608489) 3498012 +dictGet dict_array (30.8218,0.43909) 3498023 +dictGet dict_array (30.8239,0.10014) 3498012 +dictGet dict_array (30.8282,4.15409) 101 +dictGet dict_array (30.8288,-0.709528) 3501893 +dictGet dict_array (30.8326,0.156011) 3498012 +dictGet dict_array (30.8328,-1.03704) 101 +dictGet dict_array (30.839,2.15528) 101 +dictGet dict_array (30.8452,0.219377) 3498013 +dictGet dict_array (30.8463,0.0515355) 3498012 +dictGet dict_array (30.8526,2.06614) 101 +dictGet dict_array (30.8566,0.517876) 3498023 +dictGet dict_array (30.8588,-1.31738) 101 +dictGet dict_array (30.8681,0.44207) 3498013 +dictGet dict_array (30.8914,1.0072) 3498022 +dictGet dict_array (30.897,0.483425) 3498013 +dictGet dict_array (30.905,2.8731999999999998) 3501793 +dictGet dict_array (30.9051,2.21956) 101 +dictGet dict_array (30.9115,4.00663) 101 +dictGet dict_array (30.9167,-0.834462) 3501893 +dictGet dict_array (30.9252,-1.3289900000000001) 101 +dictGet dict_array (30.9314,1.85384) 101 +dictGet dict_array (30.9392,2.53236) 3501827 +dictGet dict_array (30.9569,2.82038) 3501793 +dictGet dict_array (30.9598,-0.641011) 3498012 +dictGet dict_array (30.9601,-0.254928) 3498012 +dictGet dict_array (30.9623,-1.3886) 101 +dictGet dict_array (30.9707,0.888854) 3498022 +dictGet dict_array (30.9766,2.81957) 3501793 +dictGet dict_array (30.9775,2.69273) 3501793 +dictGet dict_array (30.9821,0.587715) 3498013 +dictGet dict_array (30.9887,4.0233) 101 +dictGet dict_array (30.9914,0.259542) 3498013 +dictGet dict_array (30.9986,-1.36832) 101 +dictGet dict_array (31.008,0.628999) 3498013 +dictGet dict_array (31.0168,-1.17462) 101 +dictGet dict_array (31.0237,3.52547) 3501821 +dictGet dict_array (31.0306,3.78522) 101 +dictGet dict_array (31.0308,-0.72453) 3501893 +dictGet dict_array (31.0463,2.41997) 3501825 +dictGet dict_array (31.047,0.624184) 3498013 +dictGet dict_array (31.0569,0.0706393) 3498015 +dictGet dict_array (31.0583,1.3244099999999999) 3501926 +dictGet dict_array (31.063,3.23861) 3501793 +dictGet dict_array (31.068,0.695575) 3498022 +dictGet dict_array (31.0687,1.85675) 101 +dictGet dict_array (31.0692,0.254793) 3498014 +dictGet dict_array (31.0766,0.828128) 3498022 +dictGet dict_array (31.0833,0.0612782) 3498015 +dictGet dict_array (31.0833,2.59748) 3501793 +dictGet dict_array (31.0861,-1.3778299999999999) 101 +dictGet dict_array (31.0874,3.07258) 3501793 +dictGet dict_array (31.0882,1.4882) 3501926 +dictGet dict_array (31.0924,3.42242) 3501821 +dictGet dict_array (31.0927,2.67448) 3501793 +dictGet dict_array (31.0936,1.12292) 3498022 +dictGet dict_array (31.0952,-0.336928) 3498012 +dictGet dict_array (31.0978,3.48482) 3501826 +dictGet dict_array (31.1107,3.7513199999999998) 3501826 +dictGet dict_array (31.1156,1.19171) 3501926 +dictGet dict_array (31.1176,0.223509) 3498015 +dictGet dict_array (31.1249,0.946838) 3498022 +dictGet dict_array (31.1267,1.48983) 3501926 +dictGet dict_array (31.138,-0.289981) 3501898 +dictGet dict_array (31.1382,3.02904) 3501793 +dictGet dict_array (31.1475,2.6178) 3501793 +dictGet dict_array (31.1491,1.37873) 3501926 +dictGet dict_array (31.1525,3.72105) 3501826 +dictGet dict_array (31.1526,-1.4129800000000001) 101 +dictGet dict_array (31.1526,-0.186457) 3501898 +dictGet dict_array (31.1539,2.78789) 3501793 +dictGet dict_array (31.1548,-1.08552) 101 +dictGet dict_array (31.1567,-0.0768925) 3501898 +dictGet dict_array (31.1613,1.49617) 3501926 +dictGet dict_array (31.1653,1.03777) 3498022 +dictGet dict_array (31.1662,3.4214700000000002) 3501826 +dictGet dict_array (31.1672,-0.0813169) 3501898 +dictGet dict_array (31.177,0.440843) 3498014 +dictGet dict_array (31.1788,-0.737151) 3501893 +dictGet dict_array (31.1856,-0.144396) 3501898 +dictGet dict_array (31.1959,3.66813) 3501826 +dictGet dict_array (31.1996,-0.353983) 3501898 +dictGet dict_array (31.2019,2.86802) 3501793 +dictGet dict_array (31.2087,2.31245) 3501825 +dictGet dict_array (31.2125,3.2713200000000002) 3501793 +dictGet dict_array (31.2137,-0.108129) 3501898 +dictGet dict_array (31.216,3.9156) 101 +dictGet dict_array (31.2201,-0.202141) 3501898 +dictGet dict_array (31.2285,2.09058) 101 +dictGet dict_array (31.2502,4.01526) 101 +dictGet dict_array (31.2585,3.11524) 3501793 +dictGet dict_array (31.2645,-0.620418) 3501890 +dictGet dict_array (31.2684,2.74277) 3501793 +dictGet dict_array (31.2821,-1.12772) 101 +dictGet dict_array (31.2821,2.46769) 3501825 +dictGet dict_array (31.2887,3.91396) 101 +dictGet dict_array (31.295,1.49942) 3501926 +dictGet dict_array (31.2997,3.46122) 3501826 +dictGet dict_array (31.3017,3.3263) 3501826 +dictGet dict_array (31.3022,3.16754) 3501793 +dictGet dict_array (31.3048,0.364962) 3498014 +dictGet dict_array (31.305,3.1967) 3501793 +dictGet dict_array (31.3061,1.84303) 101 +dictGet dict_array (31.3082,-0.173851) 3501898 +dictGet dict_array (31.3315,3.90932) 101 +dictGet dict_array (31.3351,2.80164) 3501793 +dictGet dict_array (31.3388,0.168765) 3498015 +dictGet dict_array (31.339,0.25535) 3498094 +dictGet dict_array (31.3423,1.7036799999999999) 3501926 +dictGet dict_array (31.349,0.386456) 3498014 +dictGet dict_array (31.3558,-1.04336) 101 +dictGet dict_array (31.3564,0.478876) 3498014 +dictGet dict_array (31.3607,-0.0860507) 3498015 +dictGet dict_array (31.3831,3.84469) 101 +dictGet dict_array (31.3886,-0.731137) 3501890 +dictGet dict_array (31.4043,-0.348907) 5457271 +dictGet dict_array (31.4081,1.47391) 3501926 +dictGet dict_array (31.4176,-0.583645) 5457271 +dictGet dict_array (31.4177,1.36972) 3501926 +dictGet dict_array (31.4182,0.958303) 3498022 +dictGet dict_array (31.4199,3.1738) 3501793 +dictGet dict_array (31.4221,2.74876) 3501825 +dictGet dict_array (31.4301,-0.122643) 3498015 +dictGet dict_array (31.4344,1.00661) 3498022 +dictGet dict_array (31.4375,4.20304) 101 +dictGet dict_array (31.4377,0.289608) 3498094 +dictGet dict_array (31.4379,0.54744) 3498014 +dictGet dict_array (31.4459,3.94945) 101 +dictGet dict_array (31.4559,-0.345063) 5457271 +dictGet dict_array (31.464,0.726129) 3498014 +dictGet dict_array (31.4662,-0.299019) 3498015 +dictGet dict_array (31.4671,1.9605299999999999) 3501794 +dictGet dict_array (31.4673,-0.403676) 5457271 +dictGet dict_array (31.4712,-0.237941) 3498015 +dictGet dict_array (31.4816,0.120264) 3498015 +dictGet dict_array (31.4875,0.323483) 3498014 +dictGet dict_array (31.490099999999998,-0.338163) 5457271 +dictGet dict_array (31.4932,0.517674) 3498014 +dictGet dict_array (31.5112,1.9689299999999998) 3501794 +dictGet dict_array (31.5122,2.92785) 3501791 +dictGet dict_array (31.5151,0.166429) 3498094 +dictGet dict_array (31.5174,2.94802) 3501791 +dictGet dict_array (31.5182,4.18776) 101 +dictGet dict_array (31.5238,1.18793) 3498003 +dictGet dict_array (31.5271,3.07446) 3501791 +dictGet dict_array (31.5393,1.58061) 3501794 +dictGet dict_array (31.5421,3.13711) 3501791 +dictGet dict_array (31.5479,2.39897) 3497970 +dictGet dict_array (31.5519,0.99285) 3498003 +dictGet dict_array (31.5685,3.47987) 3501824 +dictGet dict_array (31.5959,0.437382) 3498014 +dictGet dict_array (31.6003,0.194376) 3498094 +dictGet dict_array (31.6026,2.15457) 3501794 +dictGet dict_array (31.606,2.45365) 3497970 +dictGet dict_array (31.6062,-0.453441) 3501890 +dictGet dict_array (31.6107,1.35247) 3497974 +dictGet dict_array (31.6155,3.85588) 101 +dictGet dict_array (31.6222,2.03326) 3501794 +dictGet dict_array (31.6231,-0.123059) 3498083 +dictGet dict_array (31.6244,1.6885599999999998) 3497974 +dictGet dict_array (31.6459,0.669716) 3498014 +dictGet dict_array (31.6563,-0.0644741) 3498083 +dictGet dict_array (31.6618,-0.551121) 3501890 +dictGet dict_array (31.6725,-0.38922) 3498085 +dictGet dict_array (31.6727,4.10336) 101 +dictGet dict_array (31.6739,4.1391) 101 +dictGet dict_array (31.6897,2.8694699999999997) 3501792 +dictGet dict_array (31.6902,3.98792) 101 +dictGet dict_array (31.6945,2.46687) 3497970 +dictGet dict_array (31.6987,-1.3796) 101 +dictGet dict_array (31.7012,2.34845) 3497970 +dictGet dict_array (31.7036,0.0228348) 3501888 +dictGet dict_array (31.7046,3.68111) 3501824 +dictGet dict_array (31.7055,2.92556) 3501792 +dictGet dict_array (31.7102,1.04532) 3498003 +dictGet dict_array (31.7149,-0.443302) 3498085 +dictGet dict_array (31.7195,2.99311) 3501791 +dictGet dict_array (31.7274,0.166719) 3498094 +dictGet dict_array (31.7565,-0.565382) 3498085 +dictGet dict_array (31.7615,0.771626) 3498014 +dictGet dict_array (31.7739,1.8970099999999999) 3497974 +dictGet dict_array (31.7848,1.2623199999999999) 3498003 +dictGet dict_array (31.7912,-0.788599) 101 +dictGet dict_array (31.8011,2.65853) 3497970 +dictGet dict_array (31.8032,-0.0590108) 3501888 +dictGet dict_array (31.8038,1.9618799999999998) 3497974 +dictGet dict_array (31.8098,-1.46851) 101 +dictGet dict_array (31.8131,3.41982) 3501791 +dictGet dict_array (31.8169,3.31059) 3501791 +dictGet dict_array (31.8202,-0.193692) 3501888 +dictGet dict_array (31.8306,1.57586) 3497974 +dictGet dict_array (31.8382,-0.787948) 101 +dictGet dict_array (31.8433,2.49692) 3497970 +dictGet dict_array (31.8436,2.41851) 3497970 +dictGet dict_array (31.8563,-1.10787) 101 +dictGet dict_array (31.8683,0.996504) 3498002 +dictGet dict_array (31.8693,-0.828142) 101 +dictGet dict_array (31.8723,1.08929) 3498003 +dictGet dict_array (31.8737,0.881127) 3498002 +dictGet dict_array (31.8881,-0.58441) 101 +dictGet dict_array (31.9011,0.121349) 3498094 +dictGet dict_array (31.9066,2.13045) 3497965 +dictGet dict_array (31.9142,1.03368) 3498002 +dictGet dict_array (31.9155,3.38363) 3501791 +dictGet dict_array (31.9168,1.3166) 3498004 +dictGet dict_array (31.9185,-1.11879) 101 +dictGet dict_array (31.9186,-0.647948) 101 +dictGet dict_array (31.9311,3.96928) 101 +dictGet dict_array (31.9335,1.47048) 3497974 +dictGet dict_array (31.9443,-1.36175) 101 +dictGet dict_array (31.9481,2.34231) 3497970 +dictGet dict_array (31.9526,1.36565) 3498004 +dictGet dict_array (31.9629,2.5208399999999997) 3497970 +dictGet dict_array (31.9765,0.975783) 3498002 +dictGet dict_array (31.9923,3.31773) 3501791 +dictGet dict_array (31.9994,0.972816) 3498002 +dictGet dict_array (32.001,3.47425) 3501791 +dictGet dict_array (32.0127,2.13874) 3497965 +dictGet dict_array (32.0244,3.2092) 3501792 +dictGet dict_array (32.029,1.18039) 3498004 +dictGet dict_array (32.0315,0.566073) 3498095 +dictGet dict_array (32.0354,1.0766499999999999) 3498004 +dictGet dict_array (32.0399,-1.11576) 101 +dictGet dict_array (32.053,2.16849) 3497965 +dictGet dict_array (32.0542,0.042328) 3498096 +dictGet dict_array (32.0576,2.47001) 3497970 +dictGet dict_array (32.061,3.7498899999999997) 101 +dictGet dict_array (32.0623,1.25134) 3498004 +dictGet dict_array (32.0626,1.9611399999999999) 3497965 +dictGet dict_array (32.0666,-0.0904247) 3498096 +dictGet dict_array (32.0681,2.28442) 3497970 +dictGet dict_array (32.0692,1.50869) 3497981 +dictGet dict_array (32.0724,4.03314) 101 +dictGet dict_array (32.0729,-0.064324) 101 +dictGet dict_array (32.079,0.293758) 3498094 +dictGet dict_array (32.0847,-1.19814) 101 +dictGet dict_array (32.0974,-0.91927) 101 +dictGet dict_array (32.0979,-0.736979) 101 +dictGet dict_array (32.106,-1.33063) 101 +dictGet dict_array (32.1189,0.246715) 3498094 +dictGet dict_array (32.1207,4.00883) 101 +dictGet dict_array (32.1396,1.12402) 3498004 +dictGet dict_array (32.1413,1.5668) 3497981 +dictGet dict_array (32.143,1.35559) 3498004 +dictGet dict_array (32.1538,1.32881) 3498004 +dictGet dict_array (32.1549,4.06552) 101 +dictGet dict_array (32.1555,-0.79275) 101 +dictGet dict_array (32.163,1.17733) 3498004 +dictGet dict_array (32.1634,2.94273) 3501792 +dictGet dict_array (32.1644,1.85666) 3497965 +dictGet dict_array (32.1745,0.435458) 3498095 +dictGet dict_array (32.1765,1.65149) 3497981 +dictGet dict_array (32.1893,2.08924) 3497965 +dictGet dict_array (32.2024,0.222191) 3498093 +dictGet dict_array (32.2107,1.34379) 3497981 +dictGet dict_array (32.2109,3.9018699999999997) 101 +dictGet dict_array (32.2123,1.85233) 3497965 +dictGet dict_array (32.2144,3.72534) 101 +dictGet dict_array (32.2218,2.5386699999999998) 3497970 +dictGet dict_array (32.2279,2.84267) 3497245 +dictGet dict_array (32.2345,3.33295) 3501792 +dictGet dict_array (32.2435,3.85283) 101 +dictGet dict_array (32.2527,-0.480608) 101 +dictGet dict_array (32.2566,-0.837882) 101 +dictGet dict_array (32.2627,2.57708) 3497970 +dictGet dict_array (32.2733,0.244931) 3498096 +dictGet dict_array (32.2761,4.05808) 101 +dictGet dict_array (32.2764,3.78472) 101 +dictGet dict_array (32.2814,-1.26011) 101 +dictGet dict_array (32.2861,3.02427) 3497245 +dictGet dict_array (32.2924,0.928609) 3498004 +dictGet dict_array (32.2963,-0.78543) 101 +dictGet dict_array (32.3039,3.21175) 3501792 +dictGet dict_array (32.3107,0.698287) 3498004 +dictGet dict_array (32.3138,0.0595677) 3498106 +dictGet dict_array (32.3339,0.707056) 3498004 +dictGet dict_array (32.3351,0.415474) 3498106 +dictGet dict_array (32.342,-0.681023) 101 +dictGet dict_array (32.3463,1.83196) 3497126 +dictGet dict_array (32.3494,2.43799) 3497114 +dictGet dict_array (32.3524,3.47049) 3501822 +dictGet dict_array (32.3531,2.33115) 3497114 +dictGet dict_array (32.3602,0.116106) 3498106 +dictGet dict_array (32.3612,1.1598) 3498004 +dictGet dict_array (32.3689,3.34847) 3501822 +dictGet dict_array (32.3695,0.734055) 3498004 +dictGet dict_array (32.3825,3.85017) 101 +dictGet dict_array (32.3835,-1.25491) 101 +dictGet dict_array (32.4018,-0.728568) 101 +dictGet dict_array (32.4044,2.96727) 3497245 +dictGet dict_array (32.4101,2.9988) 3497245 +dictGet dict_array (32.417,-1.12908) 101 +dictGet dict_array (32.4172,4.1952) 101 +dictGet dict_array (32.4239,2.49512) 3497245 +dictGet dict_array (32.4258,4.05137) 101 +dictGet dict_array (32.4264,-0.427357) 101 +dictGet dict_array (32.4274,3.59377) 3501822 +dictGet dict_array (32.4286,-1.24757) 101 +dictGet dict_array (32.4294,3.0665) 3497245 +dictGet dict_array (32.4333,-0.353347) 101 +dictGet dict_array (32.4391,3.64421) 3501822 +dictGet dict_array (32.4401,3.70635) 3501822 +dictGet dict_array (32.45,1.68918) 3497126 +dictGet dict_array (32.4507,-0.133471) 101 +dictGet dict_array (32.4592,0.976458) 3498105 +dictGet dict_array (32.4595,1.89135) 3497126 +dictGet dict_array (32.4604,0.280248) 3498106 +dictGet dict_array (32.4835,0.472731) 3498106 +dictGet dict_array (32.4855,2.01938) 3497126 +dictGet dict_array (32.4872,2.01697) 3497126 +dictGet dict_array (32.4911,0.613106) 3498105 +dictGet dict_array (32.4918,2.17834) 3497114 +dictGet dict_array (32.4947,2.34595) 3497114 +dictGet dict_array (32.5035,2.92234) 3497245 +dictGet dict_array (32.5132,-0.331206) 101 +dictGet dict_array (32.5156,-0.412604) 3501887 +dictGet dict_array (32.5158,2.9067499999999997) 3497245 +dictGet dict_array (32.5249,2.44519) 3497114 +dictGet dict_array (32.5293,-0.790952) 101 +dictGet dict_array (32.5319,3.96854) 101 +dictGet dict_array (32.5518,3.6093) 3501822 +dictGet dict_array (32.5541,3.5225400000000002) 3501822 +dictGet dict_array (32.5569,0.816123) 3498105 +dictGet dict_array (32.5646,1.9775) 3497126 +dictGet dict_array (32.5733,3.81271) 101 +dictGet dict_array (32.5767,0.948327) 3498105 +dictGet dict_array (32.5971,1.76179) 3497126 +dictGet dict_array (32.6035,-0.716157) 101 +dictGet dict_array (32.6087,4.21614) 101 +dictGet dict_array (32.6171,0.024481) 101 +dictGet dict_array (32.6189,-0.775391) 101 +dictGet dict_array (32.6198,2.92081) 3497167 +dictGet dict_array (32.621,-0.970784) 101 +dictGet dict_array (32.6266,0.650009) 3498105 +dictGet dict_array (32.6315,2.15144) 3497126 +dictGet dict_array (32.6385,-0.436803) 101 +dictGet dict_array (32.6449,-0.191292) 101 +dictGet dict_array (32.6535,2.10385) 3497126 +dictGet dict_array (32.6592,3.49973) 3501822 +dictGet dict_array (32.6598,2.5980600000000003) 3497114 +dictGet dict_array (32.6612,2.95681) 3497167 +dictGet dict_array (32.6636,-0.57235) 101 +dictGet dict_array (32.669,-0.382702) 101 +dictGet dict_array (32.6752,1.30748) 3497981 +dictGet dict_array (32.6811,2.9559800000000003) 3497167 +dictGet dict_array (32.6821,0.57336) 3498105 +dictGet dict_array (32.6828,3.91304) 101 +dictGet dict_array (32.6979,3.96868) 101 +dictGet dict_array (32.6983,3.15784) 3497167 +dictGet dict_array (32.7122,0.794293) 3498105 +dictGet dict_array (32.7131,-0.847256) 101 +dictGet dict_array (32.7219,0.883461) 3498105 +dictGet dict_array (32.7228,1.78808) 3497126 +dictGet dict_array (32.7273,-0.206908) 101 +dictGet dict_array (32.7292,0.259331) 3501889 +dictGet dict_array (32.7304,-1.38317) 101 +dictGet dict_array (32.7353,1.01601) 3498105 +dictGet dict_array (32.7354,4.17574) 101 +dictGet dict_array (32.7357,-0.190194) 101 +dictGet dict_array (32.7465,-1.37598) 101 +dictGet dict_array (32.7494,-0.275675) 101 +dictGet dict_array (32.7514,0.128951) 3501889 +dictGet dict_array (32.753,3.44207) 3501822 +dictGet dict_array (32.7686,2.11713) 3497126 +dictGet dict_array (32.7694,1.47159) 3497388 +dictGet dict_array (32.7768,0.0401042) 101 +dictGet dict_array (32.781,-1.34283) 101 +dictGet dict_array (32.7814,1.73876) 3497388 +dictGet dict_array (32.7856,-1.06363) 101 +dictGet dict_array (32.792699999999996,-1.1255600000000001) 101 +dictGet dict_array (32.7941,-0.645447) 101 +dictGet dict_array (32.7946,1.48889) 3497388 +dictGet dict_array (32.797,0.791753) 3501889 +dictGet dict_array (32.7982,-0.537798) 101 +dictGet dict_array (32.8091,2.3611) 3490438 +dictGet dict_array (32.81,1.7130800000000002) 3497388 +dictGet dict_array (32.8174,-0.288322) 101 +dictGet dict_array (32.823,1.6546699999999999) 3497388 +dictGet dict_array (32.8233,1.62108) 3497388 +dictGet dict_array (32.8428,-0.400045) 101 +dictGet dict_array (32.8479,2.13598) 3490438 +dictGet dict_array (32.8524,0.199902) 3501889 +dictGet dict_array (32.8543,3.23553) 3501820 +dictGet dict_array (32.8562,1.31371) 3498117 +dictGet dict_array (32.87,1.44256) 3498117 +dictGet dict_array (32.8789,2.38192) 3490438 +dictGet dict_array (32.8812,2.20734) 3497128 +dictGet dict_array (32.8815,-0.54427) 101 +dictGet dict_array (32.8853,2.4859) 3497128 +dictGet dict_array (32.8909,0.513964) 3501889 +dictGet dict_array (32.9035,2.38999) 3490438 +dictGet dict_array (32.9097,2.48131) 3497128 +dictGet dict_array (32.928,-0.943269) 101 +dictGet dict_array (32.9322,1.13165) 3498104 +dictGet dict_array (32.9348,1.22606) 3498117 +dictGet dict_array (32.9417,3.77998) 3501822 +dictGet dict_array (32.9428,3.11936) 3497167 +dictGet dict_array (32.9482,1.18092) 3498118 +dictGet dict_array (32.9506,0.0609364) 101 +dictGet dict_array (32.953,-0.828308) 101 +dictGet dict_array (32.9593,3.5209099999999998) 3501822 +dictGet dict_array (32.9617,2.07711) 3497128 +dictGet dict_array (32.966,0.693749) 3498104 +dictGet dict_array (32.9668,-0.716432) 101 +dictGet dict_array (32.9702,1.98555) 3497127 +dictGet dict_array (32.9782,1.73819) 3497388 +dictGet dict_array (32.9805,3.71151) 3501822 +dictGet dict_array (32.9821,2.97225) 3497167 +dictGet dict_array (32.995,-0.830301) 101 +dictGet dict_array (33.0234,0.770848) 3498104 +dictGet dict_array (33.0312,-0.340964) 101 +dictGet dict_array (33.0366,-0.756795) 101 +dictGet dict_array (33.0438,0.812871) 3498118 +dictGet dict_array (33.0455,1.84843) 3497127 +dictGet dict_array (33.0498,0.0913292) 101 +dictGet dict_array (33.0506,1.53739) 3497364 +dictGet dict_array (33.0554,2.4265) 3497363 +dictGet dict_array (33.0741,3.61332) 3501822 +dictGet dict_array (33.0765,-0.179985) 101 +dictGet dict_array (33.087,1.46465) 3497399 +dictGet dict_array (33.0906,-0.620383) 101 +dictGet dict_array (33.1047,-1.28027) 101 +dictGet dict_array (33.1072,1.96303) 3497127 +dictGet dict_array (33.1081,-0.897874) 101 +dictGet dict_array (33.1122,1.8950200000000001) 3497127 +dictGet dict_array (33.1237,2.63993) 3497165 +dictGet dict_array (33.1238,0.753963) 3498118 +dictGet dict_array (33.1257,0.495668) 3498102 +dictGet dict_array (33.1258,1.78341) 3497364 +dictGet dict_array (33.127,2.59646) 3497166 +dictGet dict_array (33.1324,-1.23742) 101 +dictGet dict_array (33.1359,3.83491) 101 +dictGet dict_array (33.1628,-0.379588) 101 +dictGet dict_array (33.1679,1.25601) 3498117 +dictGet dict_array (33.1688,-1.35553) 101 +dictGet dict_array (33.181,2.10943) 3497363 +dictGet dict_array (33.1871,2.81171) 3497165 +dictGet dict_array (33.1877,0.771297) 3498118 +dictGet dict_array (33.1883,-0.204797) 101 +dictGet dict_array (33.1886,3.27998) 3501820 +dictGet dict_array (33.1955,0.708907) 3498118 +dictGet dict_array (33.2044,-0.769275) 101 +dictGet dict_array (33.2182,3.36103) 3501820 +dictGet dict_array (33.2192,3.43586) 3501822 +dictGet dict_array (33.2322,-0.916753) 101 +dictGet dict_array (33.2359,-0.81321) 101 +dictGet dict_array (33.238,0.635072) 3498111 +dictGet dict_array (33.2398,3.02588) 3497165 +dictGet dict_array (33.2469,2.35698) 3497363 +dictGet dict_array (33.247,2.3327) 3497363 +dictGet dict_array (33.2579,2.8027100000000003) 3497165 +dictGet dict_array (33.2607,0.321082) 101 +dictGet dict_array (33.2653,0.243336) 101 +dictGet dict_array (33.2758,0.831836) 3498118 +dictGet dict_array (33.2771,0.886536) 3498118 +dictGet dict_array (33.2914,1.16026) 3498117 +dictGet dict_array (33.2914,1.38882) 3497399 +dictGet dict_array (33.2982,-1.16604) 101 +dictGet dict_array (33.2985,0.842556) 3498112 +dictGet dict_array (33.3005,2.8338900000000002) 3497165 +dictGet dict_array (33.305,0.0969475) 101 +dictGet dict_array (33.3072,3.82163) 101 +dictGet dict_array (33.312,3.41475) 3501820 +dictGet dict_array (33.3129,2.46048) 3497166 +dictGet dict_array (33.3134,3.46863) 3501820 +dictGet dict_array (33.3203,2.33139) 3497166 +dictGet dict_array (33.324,0.433701) 101 +dictGet dict_array (33.3338,2.44705) 3497166 +dictGet dict_array (33.337,4.06475) 101 +dictGet dict_array (33.3469,1.08172) 3498126 +dictGet dict_array (33.3538,0.717896) 3498112 +dictGet dict_array (33.3618,1.37899) 3497399 +dictGet dict_array (33.3698,0.547744) 3501862 +dictGet dict_array (33.3705,0.957619) 3498112 +dictGet dict_array (33.3821,3.07258) 3497165 +dictGet dict_array (33.3881,3.0626) 3497165 +dictGet dict_array (33.393,-0.816186) 101 +dictGet dict_array (33.3945,0.869508) 3498110 +dictGet dict_array (33.4001,1.24186) 3498117 +dictGet dict_array (33.4008,2.34911) 3497166 +dictGet dict_array (33.4166,-1.2808899999999999) 101 +dictGet dict_array (33.4167,3.0655) 3497165 +dictGet dict_array (33.4204,2.81887) 3497165 +dictGet dict_array (33.4211,1.71128) 3497400 +dictGet dict_array (33.4237,2.91761) 3497165 +dictGet dict_array (33.4266,1.5955599999999999) 3497399 +dictGet dict_array (33.4353,-0.391392) 101 +dictGet dict_array (33.4362,-0.134658) 101 +dictGet dict_array (33.4386,0.15396) 101 +dictGet dict_array (33.4421,-0.50712) 101 +dictGet dict_array (33.452,0.915829) 3498126 +dictGet dict_array (33.463,-0.0882717) 101 +dictGet dict_array (33.464,-1.00949) 101 +dictGet dict_array (33.4692,0.954092) 3498126 +dictGet dict_array (33.4716,1.9538799999999998) 3497400 +dictGet dict_array (33.4756,1.85836) 3497400 +dictGet dict_array (33.4859,4.0751) 101 +dictGet dict_array (33.4899,3.54193) 3501820 +dictGet dict_array (33.4935,3.49794) 3501820 +dictGet dict_array (33.494,-0.983356) 101 +dictGet dict_array (33.4955,-1.28128) 101 +dictGet dict_array (33.4965,-0.278687) 101 +dictGet dict_array (33.4991,0.647491) 3498110 +dictGet dict_array (33.5076,2.2272) 3497424 +dictGet dict_array (33.5079,-0.498199) 101 +dictGet dict_array (33.5157,0.535034) 3501862 +dictGet dict_array (33.5171,2.49677) 3497166 +dictGet dict_array (33.5255,2.4447200000000002) 3497166 +dictGet dict_array (33.526,4.01194) 101 +dictGet dict_array (33.5288,0.789434) 3498110 +dictGet dict_array (33.5356,-1.17671) 101 +dictGet dict_array (33.5402,1.49152) 3497399 +dictGet dict_array (33.5418,3.45757) 3501820 +dictGet dict_array (33.5428,1.90712) 3497400 +dictGet dict_array (33.5556,-0.55741) 101 +dictGet dict_array (33.5564,0.876858) 3498128 +dictGet dict_array (33.5567,-0.10208) 101 +dictGet dict_array (33.5645,-0.124824) 101 +dictGet dict_array (33.5663,3.4872) 3501820 +dictGet dict_array (33.5716,-0.0107611) 101 +dictGet dict_array (33.578,3.55714) 3501820 +dictGet dict_array (33.5826,-0.49076) 101 +dictGet dict_array (33.5909,0.773737) 3498110 +dictGet dict_array (33.5958,2.9619999999999997) 3497425 +dictGet dict_array (33.6193,-0.919755) 101 +dictGet dict_array (33.6313,0.652132) 3498110 +dictGet dict_array (33.632,0.823351) 3498128 +dictGet dict_array (33.66,2.18998) 3497424 +dictGet dict_array (33.6621,0.535395) 3498135 +dictGet dict_array (33.6726,3.19367) 3497438 +dictGet dict_array (33.6912,1.74522) 3497400 +dictGet dict_array (33.705,0.706397) 3498135 +dictGet dict_array (33.7076,0.7622) 3498128 +dictGet dict_array (33.7112,1.70187) 3497400 +dictGet dict_array (33.7246,-1.14837) 101 +dictGet dict_array (33.7326,2.62413) 3497425 +dictGet dict_array (33.7332,2.82137) 3497425 +dictGet dict_array (33.7434,0.394672) 3498135 +dictGet dict_array (33.7443,1.54557) 3497398 +dictGet dict_array (33.7506,1.57317) 3497398 +dictGet dict_array (33.7526,1.8578999999999999) 3497424 +dictGet dict_array (33.766,4.15013) 101 +dictGet dict_array (33.7834,2.41789) 3497439 +dictGet dict_array (33.7864,0.230935) 101 +dictGet dict_array (33.7965,3.05709) 3497438 +dictGet dict_array (33.7998,3.32881) 3497438 +dictGet dict_array (33.8003,2.97338) 3497425 +dictGet dict_array (33.8007,-1.08962) 101 +dictGet dict_array (33.8022,-0.139488) 101 +dictGet dict_array (33.8065,2.70857) 3497425 +dictGet dict_array (33.8169,-0.607788) 101 +dictGet dict_array (33.8203,0.108512) 3501863 +dictGet dict_array (33.8231,-1.03449) 101 +dictGet dict_array (33.8312,3.49458) 3501829 +dictGet dict_array (33.8342,0.297518) 3501863 +dictGet dict_array (33.8352,0.165872) 101 +dictGet dict_array (33.8354,1.87277) 3497424 +dictGet dict_array (33.8371,1.60103) 3497398 +dictGet dict_array (33.8387,1.9968) 3497424 +dictGet dict_array (33.8403,3.5805) 3501829 +dictGet dict_array (33.8414,-0.703067) 101 +dictGet dict_array (33.844,-0.179472) 101 +dictGet dict_array (33.8468,3.40137) 3501829 +dictGet dict_array (33.8509,4.15334) 101 +dictGet dict_array (33.8539,2.38339) 3497439 +dictGet dict_array (33.858,-1.3122500000000001) 101 +dictGet dict_array (33.859,3.72626) 3501829 +dictGet dict_array (33.8616,2.24433) 3497424 +dictGet dict_array (33.8621,3.01035) 3497438 +dictGet dict_array (33.8623,1.17559) 3498129 +dictGet dict_array (33.8682,2.706) 3497425 +dictGet dict_array (33.8684,0.189231) 3501863 +dictGet dict_array (33.872,1.93574) 3497424 +dictGet dict_array (33.8844,3.80404) 3501829 +dictGet dict_array (33.8888,0.594884) 3498135 +dictGet dict_array (33.8946,2.74161) 3497438 +dictGet dict_array (33.9023,0.6239) 3498135 +dictGet dict_array (33.9057,0.873222) 3498136 +dictGet dict_array (33.9157,-1.26607) 101 +dictGet dict_array (33.92,2.06848) 3497397 +dictGet dict_array (33.9298,-0.00526229) 101 +dictGet dict_array (33.932,3.07063) 3497438 +dictGet dict_array (33.9322,0.629385) 3501864 +dictGet dict_array (33.9367,-1.41955) 101 +dictGet dict_array (33.937,1.42532) 3498173 +dictGet dict_array (33.9375,1.1467100000000001) 3498159 +dictGet dict_array (33.9434,-1.05739) 101 +dictGet dict_array (33.9477,3.34809) 3501829 +dictGet dict_array (33.95,2.21715) 3497397 +dictGet dict_array (33.955799999999996,0.305176) 3501859 +dictGet dict_array (33.9686,-0.28273) 101 +dictGet dict_array (33.9703,4.1255) 3501829 +dictGet dict_array (33.9707,3.08199) 3497438 +dictGet dict_array (33.9754,1.06203) 3498159 +dictGet dict_array (33.9757,3.72468) 3501829 +dictGet dict_array (33.9775,-0.0440599) 101 +dictGet dict_array (33.9777,-0.251484) 101 +dictGet dict_array (33.9789,-0.339374) 101 +dictGet dict_array (33.9849,2.54515) 3497425 +dictGet dict_array (33.9885,-0.318557) 101 +dictGet dict_array (33.9977,1.07175) 3498159 +dictGet dict_array (33.9984,-0.700517) 101 +dictGet dict_array (34.0149,3.53338) 3501829 +dictGet dict_array (34.0173,3.39155) 3501829 +dictGet dict_array (34.0317,3.9579) 3501829 +dictGet dict_array (34.0369,3.83612) 3501829 +dictGet dict_array (34.043,-0.0887221) 101 +dictGet dict_array (34.0487,1.14252) 3498159 +dictGet dict_array (34.052,1.74832) 3497397 +dictGet dict_array (34.0711,-0.898071) 101 +dictGet dict_array (34.0747,1.55057) 3498173 +dictGet dict_array (34.0803,3.16763) 3497438 +dictGet dict_array (34.0872,3.75555) 3501829 +dictGet dict_array (34.0965,1.62038) 3498173 +dictGet dict_array (34.0977,-0.412691) 101 +dictGet dict_array (34.0986,0.0294206) 101 +dictGet dict_array (34.1072,3.15823) 3497438 +dictGet dict_array (34.1092,3.09599) 3497438 +dictGet dict_array (34.1206,1.04637) 3498160 +dictGet dict_array (34.1209,3.13826) 3497438 +dictGet dict_array (34.1265,3.95881) 3501829 +dictGet dict_array (34.1286,-0.539319) 101 +dictGet dict_array (34.1358,3.67451) 3501829 +dictGet dict_array (34.1428,0.136115) 101 +dictGet dict_array (34.157,1.73522) 3497397 +dictGet dict_array (34.1581,1.48001) 3498172 +dictGet dict_array (34.1682,3.42373) 3501829 +dictGet dict_array (34.1683,-1.26511) 101 +dictGet dict_array (34.1684,4.20007) 101 +dictGet dict_array (34.1854,3.32089) 3501829 +dictGet dict_array (34.2022,0.749536) 3501864 +dictGet dict_array (34.2044,3.04865) 3497438 +dictGet dict_array (34.22,-0.500055) 101 +dictGet dict_array (34.2249,0.743775) 3501864 +dictGet dict_array (34.2254,1.34702) 3498172 +dictGet dict_array (34.2355,-0.898843) 101 +dictGet dict_array (34.2394,2.0203699999999998) 3497439 +dictGet dict_array (34.2466,1.83785) 3498251 +dictGet dict_array (34.247,4.09563) 101 +dictGet dict_array (34.2508,2.61312) 3497439 +dictGet dict_array (34.2517,1.69642) 3498251 +dictGet dict_array (34.2564,4.13033) 101 +dictGet dict_array (34.2574,4.18928) 101 +dictGet dict_array (34.2614,-0.478719) 101 +dictGet dict_array (34.2625,2.38088) 3497439 +dictGet dict_array (34.2666,3.1503) 3501829 +dictGet dict_array (34.271,4.02223) 101 +dictGet dict_array (34.2727,0.514755) 101 +dictGet dict_array (34.278,1.98929) 3497439 +dictGet dict_array (34.2798,-0.199208) 101 +dictGet dict_array (34.2804,2.05184) 3497439 +dictGet dict_array (34.2945,-1.11051) 101 +dictGet dict_array (34.3168,-0.0829721) 101 +dictGet dict_array (34.3345,3.4358) 3501829 +dictGet dict_array (34.3377,1.13527) 3498162 +dictGet dict_array (34.3383,1.27891) 3498161 +dictGet dict_array (34.3391,1.47945) 3498161 +dictGet dict_array (34.3441,0.627014) 101 +dictGet dict_array (34.347,2.4853) 3497439 +dictGet dict_array (34.3514,2.16247) 3497439 +dictGet dict_array (34.3627,2.64533) 3497439 +dictGet dict_array (34.3682,-0.227501) 101 +dictGet dict_array (34.3756,4.21248) 101 +dictGet dict_array (34.379,3.96604) 101 +dictGet dict_array (34.3827,1.7518) 3498251 +dictGet dict_array (34.3912,2.8834) 3501830 +dictGet dict_array (34.3919,0.668829) 101 +dictGet dict_array (34.3949,2.00338) 3497439 +dictGet dict_array (34.3987,0.557268) 101 +dictGet dict_array (34.4111,0.768558) 101 +dictGet dict_array (34.4119,2.8742) 3501830 +dictGet dict_array (34.416,3.50841) 3501829 +dictGet dict_array (34.4212,1.24916) 3498161 +dictGet dict_array (34.4251,0.457029) 101 +dictGet dict_array (34.4274,-0.902559) 101 +dictGet dict_array (34.4325,4.03159) 101 +dictGet dict_array (34.438,1.63994) 3498251 +dictGet dict_array (34.4403,-0.177594) 101 +dictGet dict_array (34.4421,0.726712) 101 +dictGet dict_array (34.4517,2.98611) 3501830 +dictGet dict_array (34.4658,-1.312) 101 +dictGet dict_array (34.4732,-0.0681338) 101 +dictGet dict_array (34.4752,2.81646) 3501830 +dictGet dict_array (34.4914,2.3858) 3497439 +dictGet dict_array (34.4923,0.855231) 101 +dictGet dict_array (34.5235,1.78468) 3498251 +dictGet dict_array (34.5305,4.10608) 101 +dictGet dict_array (34.5389,0.621937) 101 +dictGet dict_array (34.5406,3.17145) 101 +dictGet dict_array (34.5434,-0.56306) 101 +dictGet dict_array (34.5449,3.13311) 3501829 +dictGet dict_array (34.5491,2.31572) 3497439 +dictGet dict_array (34.5539,2.94028) 3501830 +dictGet dict_array (34.5546,-0.208825) 101 +dictGet dict_array (34.5549,3.78486) 101 +dictGet dict_array (34.5676,0.307148) 101 +dictGet dict_array (34.5743,1.5217399999999999) 3501838 +dictGet dict_array (34.5775,3.48046) 101 +dictGet dict_array (34.5815,2.5243700000000002) 3501830 +dictGet dict_array (34.5841,4.21191) 101 +dictGet dict_array (34.5887,2.65083) 3501830 +dictGet dict_array (34.5937,3.2143) 101 +dictGet dict_array (34.6013,-1.0612) 101 +dictGet dict_array (34.6089,1.36066) 3501838 +dictGet dict_array (34.6103,3.40227) 101 +dictGet dict_array (34.6128,1.92276) 3498251 +dictGet dict_array (34.6175,2.43627) 3498251 +dictGet dict_array (34.6209,3.43776) 101 +dictGet dict_array (34.6234,2.60237) 3501830 +dictGet dict_array (34.6275,3.52479) 101 +dictGet dict_array (34.635,0.568558) 101 +dictGet dict_array (34.6373,2.37692) 3498251 +dictGet dict_array (34.6375,3.52234) 101 +dictGet dict_array (34.6426,2.12397) 3498251 +dictGet dict_array (34.6513,2.80915) 3501830 +dictGet dict_array (34.6632,2.30039) 3498251 +dictGet dict_array (34.6691,1.86582) 3498251 +dictGet dict_array (34.6739,0.15342) 101 +dictGet dict_array (34.6825,0.0499679) 101 +dictGet dict_array (34.6893,0.454326) 101 +dictGet dict_array (34.6957,-0.358598) 101 +dictGet dict_array (34.6986,0.562679) 101 +dictGet dict_array (34.712,1.12114) 101 +dictGet dict_array (34.7126,-0.0057301) 101 +dictGet dict_array (34.7137,0.0248501) 101 +dictGet dict_array (34.7162,1.15623) 101 +dictGet dict_array (34.7258,3.95142) 101 +dictGet dict_array (34.7347,3.5232099999999997) 101 +dictGet dict_array (34.7363,2.23374) 3501830 +dictGet dict_array (34.7375,0.397841) 101 +dictGet dict_array (34.7423,3.09198) 101 +dictGet dict_array (34.7452,3.09029) 101 +dictGet dict_array (34.7539,-1.06943) 101 +dictGet dict_array (34.7733,-0.00912717) 101 +dictGet dict_array (34.774,2.71088) 3501830 +dictGet dict_array (34.7771,1.46009) 3501835 +dictGet dict_array (34.7782,-1.28308) 101 +dictGet dict_array (34.7924,3.63564) 101 +dictGet dict_array (34.7939,-0.416676) 101 +dictGet dict_array (34.7964,-0.401773) 101 +dictGet dict_array (34.7974,0.0286873) 101 +dictGet dict_array (34.7975,3.05965) 101 +dictGet dict_array (34.8037,3.07263) 101 +dictGet dict_array (34.8254,-0.390284) 101 +dictGet dict_array (34.828,1.91869) 3498251 +dictGet dict_array (34.8289,3.71058) 101 +dictGet dict_array (34.8403,2.14606) 3501835 +dictGet dict_array (34.8437,2.20617) 3501830 +dictGet dict_array (34.8469,2.38435) 3501830 +dictGet dict_array (34.86,1.45705) 101 +dictGet dict_array (34.8612,0.914248) 101 +dictGet dict_array (34.8663,3.4215400000000002) 101 +dictGet dict_array (34.8724,-0.375144) 101 +dictGet dict_array (34.8795,3.29317) 101 +dictGet dict_array (34.8823,1.21988) 101 +dictGet dict_array (34.8834,1.07657) 101 +dictGet dict_array (34.8837,0.157648) 101 +dictGet dict_array (34.8871,-0.9755) 101 +dictGet dict_array (34.8871,1.8943699999999999) 3501835 +dictGet dict_array (34.889,3.36756) 101 +dictGet dict_array (34.8907,1.24874) 101 +dictGet dict_array (34.8965,3.13508) 101 +dictGet dict_array (34.9042,2.62092) 101 +dictGet dict_array (34.9055,-0.0448967) 101 +dictGet dict_array (34.9122,0.110576) 101 +dictGet dict_array (34.9228,3.60183) 101 +dictGet dict_array (34.9237,1.21715) 101 +dictGet dict_array (34.9296,1.70459) 3501835 +dictGet dict_array (34.941,-1.14663) 101 +dictGet dict_array (34.9448,1.18923) 101 +dictGet dict_array (34.9462,3.81678) 101 +dictGet dict_array (34.9466,0.593463) 101 +dictGet dict_array (34.9485,0.150307) 101 +dictGet dict_array (34.9542,0.487238) 101 +dictGet dict_array (34.9559,2.03473) 3501835 +dictGet dict_array (34.9671,-0.960225) 101 +dictGet dict_array (34.9711,2.63444) 101 +dictGet dict_array (34.9892,0.354775) 101 +dictGet dict_array (34.9907,1.40724) 101 +dictGet dict_array (34.9916,-0.00173097) 101 +dictGet dict_array (34.9919,2.06167) 101 diff --git a/tests/queries/0_stateless/01037_polygon_dicts_correctness_fast.sh b/tests/queries/0_stateless/01037_polygon_dicts_correctness_fast.sh index f6880ae5009..c9cd151a2d9 100755 --- a/tests/queries/0_stateless/01037_polygon_dicts_correctness_fast.sh +++ b/tests/queries/0_stateless/01037_polygon_dicts_correctness_fast.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-debug, no-parallel +# Tags: no-debug CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh @@ -12,20 +12,17 @@ declare -a SearchTypes=("POLYGON_INDEX_EACH" "POLYGON_INDEX_CELL") tar -xf "${CURDIR}"/01037_test_data_perf.tar.gz -C "${CURDIR}" $CLICKHOUSE_CLIENT -n --query=" -DROP DATABASE IF EXISTS test_01037; -CREATE DATABASE test_01037; -DROP TABLE IF EXISTS test_01037.points; -CREATE TABLE test_01037.points (x Float64, y Float64) ENGINE = Memory; +CREATE TABLE points (x Float64, y Float64) ENGINE = Memory; " -$CLICKHOUSE_CLIENT --query="INSERT INTO test_01037.points FORMAT TSV" --min_chunk_bytes_for_parallel_parsing=10485760 --max_insert_block_size=100000 < "${CURDIR}/01037_point_data" +$CLICKHOUSE_CLIENT --query="INSERT INTO points FORMAT TSV" --min_chunk_bytes_for_parallel_parsing=10485760 --max_insert_block_size=100000 < "${CURDIR}/01037_point_data" rm "${CURDIR}"/01037_point_data $CLICKHOUSE_CLIENT -n --query=" -DROP TABLE IF EXISTS test_01037.polygons_array; +DROP TABLE IF EXISTS polygons_array; -CREATE TABLE test_01037.polygons_array +CREATE TABLE polygons_array ( key Array(Array(Array(Array(Float64)))), name String, @@ -34,7 +31,7 @@ CREATE TABLE test_01037.polygons_array ENGINE = Memory; " -$CLICKHOUSE_CLIENT --query="INSERT INTO test_01037.polygons_array FORMAT JSONEachRow" --min_chunk_bytes_for_parallel_parsing=10485760 --max_insert_block_size=100000 < "${CURDIR}/01037_polygon_data" +$CLICKHOUSE_CLIENT --query="INSERT INTO polygons_array FORMAT JSONEachRow" --min_chunk_bytes_for_parallel_parsing=10485760 --max_insert_block_size=100000 < "${CURDIR}/01037_polygon_data" rm "${CURDIR}"/01037_polygon_data @@ -43,27 +40,23 @@ do outputFile="${TMP_DIR}/results${type}.out" $CLICKHOUSE_CLIENT -n --query=" - DROP DICTIONARY IF EXISTS test_01037.dict_array; + DROP DICTIONARY IF EXISTS dict_array; - CREATE DICTIONARY test_01037.dict_array + CREATE DICTIONARY dict_array ( key Array(Array(Array(Array(Float64)))), name String DEFAULT 'qqq', value UInt64 DEFAULT 101 ) PRIMARY KEY key - SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'polygons_array' PASSWORD '' DB 'test_01037')) + SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'polygons_array' PASSWORD '' DB currentDatabase())) LIFETIME(0) LAYOUT($type()); - select 'dictGet', 'test_01037.dict_array' as dict_name, tuple(x, y) as key, - dictGet(dict_name, 'value', key) from test_01037.points order by x, y; + select 'dictGet', 'dict_array' as dict_name, tuple(x, y) as key, + dictGet(dict_name, 'value', key) from points order by x, y; " > "$outputFile" diff -q "${CURDIR}/01037_polygon_dicts_correctness_fast.ans" "$outputFile" done -$CLICKHOUSE_CLIENT -n --query=" -DROP TABLE test_01037.points; -DROP DATABASE test_01037; -" diff --git a/tests/queries/0_stateless/01038_dictionary_lifetime_min_zero_sec.sh b/tests/queries/0_stateless/01038_dictionary_lifetime_min_zero_sec.sh index 1e754dce786..66732205f95 100755 --- a/tests/queries/0_stateless/01038_dictionary_lifetime_min_zero_sec.sh +++ b/tests/queries/0_stateless/01038_dictionary_lifetime_min_zero_sec.sh @@ -5,13 +5,9 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -$CLICKHOUSE_CLIENT --query "DROP DATABASE IF EXISTS db_01038" - -$CLICKHOUSE_CLIENT --query "CREATE DATABASE db_01038" - $CLICKHOUSE_CLIENT --query " -CREATE TABLE db_01038.table_for_dict +CREATE TABLE ${CLICKHOUSE_DATABASE}.table_for_dict ( key_column UInt64, value Float64 @@ -19,34 +15,34 @@ CREATE TABLE db_01038.table_for_dict ENGINE = MergeTree() ORDER BY key_column" -$CLICKHOUSE_CLIENT --query "INSERT INTO db_01038.table_for_dict VALUES (1, 1.1)" +$CLICKHOUSE_CLIENT --query "INSERT INTO ${CLICKHOUSE_DATABASE}.table_for_dict VALUES (1, 1.1)" $CLICKHOUSE_CLIENT --query " -CREATE DICTIONARY db_01038.dict_with_zero_min_lifetime +CREATE DICTIONARY ${CLICKHOUSE_DATABASE}.dict_with_zero_min_lifetime ( key_column UInt64, value Float64 DEFAULT 77.77 ) PRIMARY KEY key_column -SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'table_for_dict' DB 'db_01038')) +SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'table_for_dict' DB '${CLICKHOUSE_DATABASE}')) LIFETIME(1) LAYOUT(FLAT())" -$CLICKHOUSE_CLIENT --query "SELECT dictGetFloat64('db_01038.dict_with_zero_min_lifetime', 'value', toUInt64(1))" +$CLICKHOUSE_CLIENT --query "SELECT dictGetFloat64('${CLICKHOUSE_DATABASE}.dict_with_zero_min_lifetime', 'value', toUInt64(1))" -$CLICKHOUSE_CLIENT --query "SELECT dictGetFloat64('db_01038.dict_with_zero_min_lifetime', 'value', toUInt64(2))" +$CLICKHOUSE_CLIENT --query "SELECT dictGetFloat64('${CLICKHOUSE_DATABASE}.dict_with_zero_min_lifetime', 'value', toUInt64(2))" -$CLICKHOUSE_CLIENT --query "INSERT INTO db_01038.table_for_dict VALUES (2, 2.2)" +$CLICKHOUSE_CLIENT --query "INSERT INTO ${CLICKHOUSE_DATABASE}.table_for_dict VALUES (2, 2.2)" function check() { - query_result=$($CLICKHOUSE_CLIENT --query "SELECT dictGetFloat64('db_01038.dict_with_zero_min_lifetime', 'value', toUInt64(2))") + query_result=$($CLICKHOUSE_CLIENT --query "SELECT dictGetFloat64('${CLICKHOUSE_DATABASE}.dict_with_zero_min_lifetime', 'value', toUInt64(2))") while [ "$query_result" != "2.2" ] do - query_result=$($CLICKHOUSE_CLIENT --query "SELECT dictGetFloat64('db_01038.dict_with_zero_min_lifetime', 'value', toUInt64(2))") + query_result=$($CLICKHOUSE_CLIENT --query "SELECT dictGetFloat64('${CLICKHOUSE_DATABASE}.dict_with_zero_min_lifetime', 'value', toUInt64(2))") done } @@ -55,8 +51,6 @@ export -f check; timeout 10 bash -c check -$CLICKHOUSE_CLIENT --query "SELECT dictGetFloat64('db_01038.dict_with_zero_min_lifetime', 'value', toUInt64(1))" +$CLICKHOUSE_CLIENT --query "SELECT dictGetFloat64('${CLICKHOUSE_DATABASE}.dict_with_zero_min_lifetime', 'value', toUInt64(1))" -$CLICKHOUSE_CLIENT --query "SELECT dictGetFloat64('db_01038.dict_with_zero_min_lifetime', 'value', toUInt64(2))" - -$CLICKHOUSE_CLIENT --query "DROP DATABASE IF EXISTS db_01038" +$CLICKHOUSE_CLIENT --query "SELECT dictGetFloat64('${CLICKHOUSE_DATABASE}.dict_with_zero_min_lifetime', 'value', toUInt64(2))" diff --git a/tests/queries/0_stateless/01040_dictionary_invalidate_query_switchover_long.sh b/tests/queries/0_stateless/01040_dictionary_invalidate_query_switchover_long.sh index 6856f952a47..d558fbf465e 100755 --- a/tests/queries/0_stateless/01040_dictionary_invalidate_query_switchover_long.sh +++ b/tests/queries/0_stateless/01040_dictionary_invalidate_query_switchover_long.sh @@ -1,17 +1,12 @@ #!/usr/bin/env bash -# Tags: long, no-parallel +# Tags: long CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh - -$CLICKHOUSE_CLIENT --query "DROP DATABASE IF EXISTS dictdb_01041_01040" - -$CLICKHOUSE_CLIENT --query "CREATE DATABASE dictdb_01041_01040" - $CLICKHOUSE_CLIENT --query " -CREATE TABLE dictdb_01041_01040.dict_invalidate +CREATE TABLE dict_invalidate ENGINE = Memory AS SELECT 122 as dummy, @@ -20,31 +15,31 @@ FROM system.one" $CLICKHOUSE_CLIENT --query " -CREATE DICTIONARY dictdb_01041_01040.invalidate +CREATE DICTIONARY invalidate ( dummy UInt64, two UInt8 EXPRESSION dummy ) PRIMARY KEY dummy -SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'dict_invalidate' DB 'dictdb_01041_01040' INVALIDATE_QUERY 'select max(last_time) from dictdb_01041_01040.dict_invalidate')) +SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'dict_invalidate' DB currentDatabase() INVALIDATE_QUERY 'select max(last_time) from dict_invalidate')) LIFETIME(MIN 0 MAX 1) LAYOUT(FLAT())" -$CLICKHOUSE_CLIENT --query "SELECT dictGetUInt8('dictdb_01041_01040.invalidate', 'two', toUInt64(122))" +$CLICKHOUSE_CLIENT --query "SELECT dictGetUInt8('invalidate', 'two', toUInt64(122))" # No exception happened -$CLICKHOUSE_CLIENT --query "SELECT last_exception FROM system.dictionaries WHERE database = 'dictdb_01041_01040' AND name = 'invalidate'" +$CLICKHOUSE_CLIENT --query "SELECT last_exception FROM system.dictionaries WHERE database = currentDatabase() AND name = 'invalidate'" -$CLICKHOUSE_CLIENT --check_table_dependencies=0 --query "DROP TABLE dictdb_01041_01040.dict_invalidate" +$CLICKHOUSE_CLIENT --check_table_dependencies=0 --query "DROP TABLE dict_invalidate" function check_exception_detected() { - query_result=$($CLICKHOUSE_CLIENT --query "SELECT last_exception FROM system.dictionaries WHERE database = 'dictdb_01041_01040' AND name = 'invalidate'" 2>&1) + query_result=$($CLICKHOUSE_CLIENT --query "SELECT last_exception FROM system.dictionaries WHERE database = currentDatabase() AND name = 'invalidate'" 2>&1) while [ -z "$query_result" ] do - query_result=$($CLICKHOUSE_CLIENT --query "SELECT last_exception FROM system.dictionaries WHERE database = 'dictdb_01041_01040' AND name = 'invalidate'" 2>&1) + query_result=$($CLICKHOUSE_CLIENT --query "SELECT last_exception FROM system.dictionaries WHERE database = currentDatabase() AND name = 'invalidate'" 2>&1) sleep 0.1 done } @@ -53,10 +48,10 @@ function check_exception_detected() export -f check_exception_detected; timeout 30 bash -c check_exception_detected 2> /dev/null -$CLICKHOUSE_CLIENT --query "SELECT last_exception FROM system.dictionaries WHERE database = 'dictdb_01041_01040' AND name = 'invalidate'" 2>&1 | grep -Eo "dictdb_01041_01040.dict_invalidate.*UNKNOWN_TABLE" | wc -l +$CLICKHOUSE_CLIENT --query "SELECT last_exception FROM system.dictionaries WHERE database = currentDatabase() AND name = 'invalidate'" 2>&1 | grep -Eo "dict_invalidate.*UNKNOWN_TABLE" | wc -l $CLICKHOUSE_CLIENT --query " -CREATE TABLE dictdb_01041_01040.dict_invalidate +CREATE TABLE dict_invalidate ENGINE = Memory AS SELECT 133 as dummy, @@ -65,11 +60,11 @@ FROM system.one" function check_exception_fixed() { - query_result=$($CLICKHOUSE_CLIENT --query "SELECT last_exception FROM system.dictionaries WHERE database = 'dictdb_01041_01040' AND name = 'invalidate'" 2>&1) + query_result=$($CLICKHOUSE_CLIENT --query "SELECT last_exception FROM system.dictionaries WHERE database = currentDatabase() AND name = 'invalidate'" 2>&1) while [ "$query_result" ] do - query_result=$($CLICKHOUSE_CLIENT --query "SELECT last_exception FROM system.dictionaries WHERE database = 'dictdb_01041_01040' AND name = 'invalidate'" 2>&1) + query_result=$($CLICKHOUSE_CLIENT --query "SELECT last_exception FROM system.dictionaries WHERE database = currentDatabase() AND name = 'invalidate'" 2>&1) sleep 0.1 done } @@ -78,7 +73,5 @@ export -f check_exception_fixed; # it may take a while until dictionary reloads timeout 60 bash -c check_exception_fixed 2> /dev/null -$CLICKHOUSE_CLIENT --query "SELECT last_exception FROM system.dictionaries WHERE database = 'dictdb_01041_01040' AND name = 'invalidate'" 2>&1 -$CLICKHOUSE_CLIENT --query "SELECT dictGetUInt8('dictdb_01041_01040.invalidate', 'two', toUInt64(133))" - -$CLICKHOUSE_CLIENT --query "DROP DATABASE IF EXISTS dictdb_01041_01040" +$CLICKHOUSE_CLIENT --query "SELECT last_exception FROM system.dictionaries WHERE database = currentDatabase() AND name = 'invalidate'" 2>&1 +$CLICKHOUSE_CLIENT --query "SELECT dictGetUInt8('invalidate', 'two', toUInt64(133))" diff --git a/tests/queries/0_stateless/01042_system_reload_dictionary_reloads_completely.sh b/tests/queries/0_stateless/01042_system_reload_dictionary_reloads_completely.sh index 9d34470c38d..2b075566ac3 100755 --- a/tests/queries/0_stateless/01042_system_reload_dictionary_reloads_completely.sh +++ b/tests/queries/0_stateless/01042_system_reload_dictionary_reloads_completely.sh @@ -1,5 +1,4 @@ #!/usr/bin/env bash -# Tags: no-parallel CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh @@ -8,41 +7,37 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) set -e -o pipefail # NOTE: dictionaries TTLs works with server timezone, so session_timeout cannot be used -$CLICKHOUSE_CLIENT --session_timezone '' --multiquery <<'EOF' -DROP DATABASE IF EXISTS dictdb_01042; -CREATE DATABASE dictdb_01042; -CREATE TABLE dictdb_01042.table(x Int64, y Int64, insert_time DateTime) ENGINE = MergeTree ORDER BY tuple(); -INSERT INTO dictdb_01042.table VALUES (12, 102, now()); +$CLICKHOUSE_CLIENT --session_timezone '' --multiquery < ', dictGetInt64('dictdb_01042.dict', 'y', toUInt64(12))" +$CLICKHOUSE_CLIENT --query "SELECT '12 -> ', dictGetInt64('${CLICKHOUSE_DATABASE}.dict', 'y', toUInt64(12))" -$CLICKHOUSE_CLIENT --query "INSERT INTO dictdb_01042.table VALUES (13, 103, now())" -$CLICKHOUSE_CLIENT --query "INSERT INTO dictdb_01042.table VALUES (14, 104, now() - INTERVAL 1 DAY)" +$CLICKHOUSE_CLIENT --query "INSERT INTO ${CLICKHOUSE_DATABASE}.table VALUES (13, 103, now())" +$CLICKHOUSE_CLIENT --query "INSERT INTO ${CLICKHOUSE_DATABASE}.table VALUES (14, 104, now() - INTERVAL 1 DAY)" -while [ "$(${CLICKHOUSE_CLIENT} --query "SELECT dictGetInt64('dictdb_01042.dict', 'y', toUInt64(13))")" = -1 ] +while [ "$(${CLICKHOUSE_CLIENT} --query "SELECT dictGetInt64('${CLICKHOUSE_DATABASE}.dict', 'y', toUInt64(13))")" = -1 ] do sleep 0.5 done -$CLICKHOUSE_CLIENT --query "SELECT '13 -> ', dictGetInt64('dictdb_01042.dict', 'y', toUInt64(13))" -$CLICKHOUSE_CLIENT --query "SELECT '14 -> ', dictGetInt64('dictdb_01042.dict', 'y', toUInt64(14))" +$CLICKHOUSE_CLIENT --query "SELECT '13 -> ', dictGetInt64('${CLICKHOUSE_DATABASE}.dict', 'y', toUInt64(13))" +$CLICKHOUSE_CLIENT --query "SELECT '14 -> ', dictGetInt64('${CLICKHOUSE_DATABASE}.dict', 'y', toUInt64(14))" -$CLICKHOUSE_CLIENT --query "SYSTEM RELOAD DICTIONARY 'dictdb_01042.dict'" +$CLICKHOUSE_CLIENT --query "SYSTEM RELOAD DICTIONARY '${CLICKHOUSE_DATABASE}.dict'" -$CLICKHOUSE_CLIENT --query "SELECT '12(r) -> ', dictGetInt64('dictdb_01042.dict', 'y', toUInt64(12))" -$CLICKHOUSE_CLIENT --query "SELECT '13(r) -> ', dictGetInt64('dictdb_01042.dict', 'y', toUInt64(13))" -$CLICKHOUSE_CLIENT --query "SELECT '14(r) -> ', dictGetInt64('dictdb_01042.dict', 'y', toUInt64(14))" - -$CLICKHOUSE_CLIENT --query "DROP DATABASE IF EXISTS dictdb_01042" +$CLICKHOUSE_CLIENT --query "SELECT '12(r) -> ', dictGetInt64('${CLICKHOUSE_DATABASE}.dict', 'y', toUInt64(12))" +$CLICKHOUSE_CLIENT --query "SELECT '13(r) -> ', dictGetInt64('${CLICKHOUSE_DATABASE}.dict', 'y', toUInt64(13))" +$CLICKHOUSE_CLIENT --query "SELECT '14(r) -> ', dictGetInt64('${CLICKHOUSE_DATABASE}.dict', 'y', toUInt64(14))" diff --git a/tests/queries/0_stateless/01053_ssd_dictionary.sh b/tests/queries/0_stateless/01053_ssd_dictionary.sh index b49144c9b1a..00e5719a9a9 100755 --- a/tests/queries/0_stateless/01053_ssd_dictionary.sh +++ b/tests/queries/0_stateless/01053_ssd_dictionary.sh @@ -6,8 +6,6 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -USER_FILES_PATH=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') - $CLICKHOUSE_CLIENT --allow_deprecated_database_ordinary=1 -n --query=" DROP DATABASE IF EXISTS 01053_db; diff --git a/tests/queries/0_stateless/01055_compact_parts_1.sql b/tests/queries/0_stateless/01055_compact_parts_1.sql index ff5ab722e0f..72048c59a41 100644 --- a/tests/queries/0_stateless/01055_compact_parts_1.sql +++ b/tests/queries/0_stateless/01055_compact_parts_1.sql @@ -1,8 +1,3 @@ --- Tags: no-parallel - -drop table if exists mt_compact; -drop table if exists mt_compact_2; - create table mt_compact (a Int, s String) engine = MergeTree order by a partition by a settings index_granularity_bytes = 0; alter table mt_compact modify setting min_rows_for_wide_part = 1000; -- { serverError NOT_IMPLEMENTED } @@ -25,5 +20,3 @@ alter table mt_compact modify setting parts_to_delay_insert = 300; alter table mt_compact modify setting min_rows_for_wide_part = 0; show create table mt_compact; - -drop table mt_compact diff --git a/tests/queries/0_stateless/01060_avro.sh b/tests/queries/0_stateless/01060_avro.sh index 3c70927db25..6ed26c8565f 100755 --- a/tests/queries/0_stateless/01060_avro.sh +++ b/tests/queries/0_stateless/01060_avro.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-parallel, no-fasttest +# Tags: no-fasttest set -e @@ -69,9 +69,6 @@ cat "$DATA_DIR"/simple.null.avro | ${CLICKHOUSE_LOCAL} --input-format Avro --out - - - # output echo '===' output diff --git a/tests/queries/0_stateless/01069_database_memory.sql b/tests/queries/0_stateless/01069_database_memory.sql index 5aab9175c58..5d2fa4ea11e 100644 --- a/tests/queries/0_stateless/01069_database_memory.sql +++ b/tests/queries/0_stateless/01069_database_memory.sql @@ -1,5 +1,3 @@ --- Tags: no-parallel - DROP DATABASE IF EXISTS memory_01069; CREATE DATABASE memory_01069 ENGINE = Memory; SHOW CREATE DATABASE memory_01069; diff --git a/tests/queries/0_stateless/01076_cache_dictionary_datarace_exception_ptr.sh b/tests/queries/0_stateless/01076_cache_dictionary_datarace_exception_ptr.sh index 17068dcbdf9..dcd15718416 100755 --- a/tests/queries/0_stateless/01076_cache_dictionary_datarace_exception_ptr.sh +++ b/tests/queries/0_stateless/01076_cache_dictionary_datarace_exception_ptr.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: race, no-parallel +# Tags: race # This is a monkey test used to trigger sanitizers. @@ -7,11 +7,8 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -$CLICKHOUSE_CLIENT --query="DROP DATABASE IF EXISTS dictdb_01076;" -$CLICKHOUSE_CLIENT --query="CREATE DATABASE dictdb_01076;" - $CLICKHOUSE_CLIENT --query=" -CREATE TABLE dictdb_01076.table_datarace +CREATE TABLE ${CLICKHOUSE_DATABASE}.table_datarace ( key_column UUID, value Float64 @@ -21,17 +18,17 @@ ORDER BY key_column; " $CLICKHOUSE_CLIENT --query=" -INSERT INTO dictdb_01076.table_datarace VALUES ('cd5db34f-0c25-4375-b10e-bfb3708ddc72', 1.1), ('cd5db34f-0c25-4375-b10e-bfb3708ddc72', 2.2), ('cd5db34f-0c25-4375-b10e-bfb3708ddc72', 3.3); +INSERT INTO ${CLICKHOUSE_DATABASE}.table_datarace VALUES ('cd5db34f-0c25-4375-b10e-bfb3708ddc72', 1.1), ('cd5db34f-0c25-4375-b10e-bfb3708ddc72', 2.2), ('cd5db34f-0c25-4375-b10e-bfb3708ddc72', 3.3); " $CLICKHOUSE_CLIENT --query=" -CREATE DICTIONARY IF NOT EXISTS dictdb_01076.dict_datarace +CREATE DICTIONARY IF NOT EXISTS ${CLICKHOUSE_DATABASE}.dict_datarace ( key_column UInt64, value Float64 DEFAULT 77.77 ) PRIMARY KEY key_column -SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'table_datarace' DB 'dictdb_01076')) +SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'table_datarace' DB '${CLICKHOUSE_DATABASE}')) LIFETIME(1) LAYOUT(CACHE(SIZE_IN_CELLS 10)); " @@ -41,7 +38,7 @@ function thread1() for _ in {1..50} do # This query will be ended with exception, because source dictionary has UUID as a key type. - $CLICKHOUSE_CLIENT --query="SELECT dictGetFloat64('dictdb_01076.dict_datarace', 'value', toUInt64(1));" + $CLICKHOUSE_CLIENT --query="SELECT dictGetFloat64('${CLICKHOUSE_DATABASE}.dict_datarace', 'value', toUInt64(1));" done } @@ -51,7 +48,7 @@ function thread2() for _ in {1..50} do # This query will be ended with exception, because source dictionary has UUID as a key type. - $CLICKHOUSE_CLIENT --query="SELECT dictGetFloat64('dictdb_01076.dict_datarace', 'value', toUInt64(2));" + $CLICKHOUSE_CLIENT --query="SELECT dictGetFloat64('${CLICKHOUSE_DATABASE}.dict_datarace', 'value', toUInt64(2));" done } @@ -67,6 +64,5 @@ wait echo OK -$CLICKHOUSE_CLIENT --query="DROP DICTIONARY dictdb_01076.dict_datarace;" -$CLICKHOUSE_CLIENT --query="DROP TABLE dictdb_01076.table_datarace;" -$CLICKHOUSE_CLIENT --query="DROP DATABASE dictdb_01076;" +$CLICKHOUSE_CLIENT --query="DROP DICTIONARY ${CLICKHOUSE_DATABASE}.dict_datarace;" +$CLICKHOUSE_CLIENT --query="DROP TABLE ${CLICKHOUSE_DATABASE}.table_datarace;" diff --git a/tests/queries/0_stateless/01079_parallel_alter_add_drop_column_zookeeper.sh b/tests/queries/0_stateless/01079_parallel_alter_add_drop_column_zookeeper.sh index bfdea95fa9e..f05a0fed965 100755 --- a/tests/queries/0_stateless/01079_parallel_alter_add_drop_column_zookeeper.sh +++ b/tests/queries/0_stateless/01079_parallel_alter_add_drop_column_zookeeper.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: zookeeper, no-parallel, no-fasttest +# Tags: zookeeper, no-fasttest, no-parallel CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh @@ -63,7 +63,7 @@ export -f optimize_thread; export -f insert_thread; -TIMEOUT=30 +TIMEOUT=20 # Sometimes we detach and attach tables timeout $TIMEOUT bash -c alter_thread 2> /dev/null & diff --git a/tests/queries/0_stateless/01079_parallel_alter_modify_zookeeper_long.sh b/tests/queries/0_stateless/01079_parallel_alter_modify_zookeeper_long.sh index ba8d89aad3c..399c9e488a4 100755 --- a/tests/queries/0_stateless/01079_parallel_alter_modify_zookeeper_long.sh +++ b/tests/queries/0_stateless/01079_parallel_alter_modify_zookeeper_long.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: long, zookeeper, no-parallel, no-fasttest +# Tags: long, zookeeper, no-fasttest CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh @@ -76,7 +76,7 @@ export -f insert_thread; export -f select_thread; -TIMEOUT=30 +TIMEOUT=20 # Selects should run successfully diff --git a/tests/queries/0_stateless/01083_expressions_in_engine_arguments.sql b/tests/queries/0_stateless/01083_expressions_in_engine_arguments.sql index bdfbf2a47cf..697843be27f 100644 --- a/tests/queries/0_stateless/01083_expressions_in_engine_arguments.sql +++ b/tests/queries/0_stateless/01083_expressions_in_engine_arguments.sql @@ -28,7 +28,7 @@ CREATE TABLE url (n UInt64, col String) ENGINE=URL ( replace ( - 'https://localhost:8443/?query=' || 'select n, _table from ' || currentDatabase() || '.merge format CSV', ' ', '+' + 'https://localhost:' || getServerPort('https_port') || '/?query=' || 'select n, _table from ' || currentDatabase() || '.merge format CSV', ' ', '+' ), CSV ); @@ -39,7 +39,7 @@ CREATE VIEW view AS SELECT toInt64(n) as n FROM (SELECT toString(n) as n from me SELECT nonexistentsomething; -- { serverError UNKNOWN_IDENTIFIER } CREATE DICTIONARY dict (n UInt64, col String DEFAULT '42') PRIMARY KEY n -SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9440 SECURE 1 USER 'default' TABLE 'url')) LIFETIME(1) LAYOUT(CACHE(SIZE_IN_CELLS 1)); +SOURCE(CLICKHOUSE(HOST 'localhost' PORT getServerPort('tcp_port_secure') SECURE 1 USER 'default' TABLE 'url')) LIFETIME(1) LAYOUT(CACHE(SIZE_IN_CELLS 1)); -- dict --> url --> merge |-> distributed -> file (1) -- |-> distributed_tf -> buffer -> file (1) diff --git a/tests/queries/0_stateless/01098_msgpack_format.sh b/tests/queries/0_stateless/01098_msgpack_format.sh index e2ae026eb27..30956ac6c7f 100755 --- a/tests/queries/0_stateless/01098_msgpack_format.sh +++ b/tests/queries/0_stateless/01098_msgpack_format.sh @@ -1,14 +1,11 @@ #!/usr/bin/env bash -# Tags: no-fasttest, no-parallel +# Tags: no-fasttest CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -USER_FILES_PATH=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') - - $CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS msgpack"; $CLICKHOUSE_CLIENT --query="CREATE TABLE msgpack (uint8 UInt8, uint16 UInt16, uint32 UInt32, uint64 UInt64, int8 Int8, int16 Int16, int32 Int32, int64 Int64, float Float32, double Float64, string String, date Date, datetime DateTime('Asia/Istanbul'), datetime64 DateTime64(3, 'Asia/Istanbul'), array Array(UInt32)) ENGINE = Memory"; diff --git a/tests/queries/0_stateless/01111_create_drop_replicated_db_stress.sh b/tests/queries/0_stateless/01111_create_drop_replicated_db_stress.sh index 91f7a276ea3..90128d7a8ad 100755 --- a/tests/queries/0_stateless/01111_create_drop_replicated_db_stress.sh +++ b/tests/queries/0_stateless/01111_create_drop_replicated_db_stress.sh @@ -87,7 +87,7 @@ function insert() -TIMEOUT=30 +TIMEOUT=20 create_db $TIMEOUT & sync_db $TIMEOUT & diff --git a/tests/queries/0_stateless/01113_local_dictionary_type_conversion.sql b/tests/queries/0_stateless/01113_local_dictionary_type_conversion.sql index 65a03993295..1dc727930ab 100644 --- a/tests/queries/0_stateless/01113_local_dictionary_type_conversion.sql +++ b/tests/queries/0_stateless/01113_local_dictionary_type_conversion.sql @@ -1,29 +1,21 @@ --- Tags: no-parallel - -DROP DATABASE IF EXISTS database_for_dict; - -CREATE DATABASE database_for_dict; - -CREATE TABLE database_for_dict.table_for_dict ( +CREATE TABLE table_for_dict ( CompanyID String, OSType Enum('UNKNOWN' = 0, 'WINDOWS' = 1, 'LINUX' = 2, 'ANDROID' = 3, 'MAC' = 4), SomeID Int32 ) ENGINE = Memory(); -INSERT INTO database_for_dict.table_for_dict VALUES ('First', 'WINDOWS', 1), ('Second', 'LINUX', 2); +INSERT INTO table_for_dict VALUES ('First', 'WINDOWS', 1), ('Second', 'LINUX', 2); -CREATE DICTIONARY database_for_dict.dict_with_conversion +CREATE DICTIONARY dict_with_conversion ( CompanyID String DEFAULT '', OSType String DEFAULT '', SomeID Int32 DEFAULT 0 ) PRIMARY KEY CompanyID -SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'table_for_dict' DB 'database_for_dict')) +SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'table_for_dict' DB currentDatabase())) LIFETIME(MIN 1 MAX 20) LAYOUT(COMPLEX_KEY_HASHED()); -SELECT * FROM database_for_dict.dict_with_conversion ORDER BY CompanyID; - -DROP DATABASE IF EXISTS database_for_dict; +SELECT * FROM dict_with_conversion ORDER BY CompanyID; diff --git a/tests/queries/0_stateless/01114_database_atomic.sh b/tests/queries/0_stateless/01114_database_atomic.sh index 1b1f064ae0b..fed76727a27 100755 --- a/tests/queries/0_stateless/01114_database_atomic.sh +++ b/tests/queries/0_stateless/01114_database_atomic.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-parallel, no-fasttest +# Tags: no-fasttest # Tag no-fasttest: 45 seconds running # Creation of a database with Ordinary engine emits a warning. diff --git a/tests/queries/0_stateless/01114_mysql_database_engine_segfault.sql b/tests/queries/0_stateless/01114_mysql_database_engine_segfault.sql index 3379acf4d7b..783a728e336 100644 --- a/tests/queries/0_stateless/01114_mysql_database_engine_segfault.sql +++ b/tests/queries/0_stateless/01114_mysql_database_engine_segfault.sql @@ -1,4 +1,4 @@ --- Tags: no-parallel, no-fasttest +-- Tags: no-fasttest DROP DATABASE IF EXISTS conv_main; CREATE DATABASE conv_main ENGINE = MySQL('127.0.0.1:3456', conv_main, 'metrika', 'password'); -- { serverError CANNOT_CREATE_DATABASE } diff --git a/tests/queries/0_stateless/01125_dict_ddl_cannot_add_column.sql b/tests/queries/0_stateless/01125_dict_ddl_cannot_add_column.sql index a324d278c12..6a818d94a58 100644 --- a/tests/queries/0_stateless/01125_dict_ddl_cannot_add_column.sql +++ b/tests/queries/0_stateless/01125_dict_ddl_cannot_add_column.sql @@ -1,11 +1,3 @@ --- Tags: no-parallel - -DROP DATABASE IF EXISTS database_for_dict; - -CREATE DATABASE database_for_dict; - -use database_for_dict; - CREATE TABLE date_table ( id UInt32, @@ -24,7 +16,7 @@ CREATE DICTIONARY somedict end Date ) PRIMARY KEY id -SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'date_table' DB 'database_for_dict')) +SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'date_table' DB currentDatabase())) LAYOUT(RANGE_HASHED()) RANGE (MIN start MAX end) LIFETIME(MIN 300 MAX 360); @@ -35,5 +27,3 @@ SELECT * from somedict; SELECT 1 FROM somedict; SHOW TABLES; - -DROP DATABASE database_for_dict; diff --git a/tests/queries/0_stateless/01127_month_partitioning_consistency_select.sql b/tests/queries/0_stateless/01127_month_partitioning_consistency_select.sql index 2a1d04e6074..78632ab2463 100644 --- a/tests/queries/0_stateless/01127_month_partitioning_consistency_select.sql +++ b/tests/queries/0_stateless/01127_month_partitioning_consistency_select.sql @@ -1,6 +1,3 @@ --- Tags: no-parallel - -DROP TABLE IF EXISTS mt; set allow_deprecated_syntax_for_merge_tree=1; CREATE TABLE mt (d Date, x String) ENGINE = MergeTree(d, x, 8192); INSERT INTO mt VALUES ('2106-02-07', 'Hello'), ('1970-01-01', 'World'); diff --git a/tests/queries/0_stateless/01154_move_partition_long.sh b/tests/queries/0_stateless/01154_move_partition_long.sh index 24ec58c9c17..bdffa028846 100755 --- a/tests/queries/0_stateless/01154_move_partition_long.sh +++ b/tests/queries/0_stateless/01154_move_partition_long.sh @@ -107,7 +107,7 @@ export -f drop_partition_thread; export -f optimize_thread; export -f drop_part_thread; -TIMEOUT=60 +TIMEOUT=40 #timeout $TIMEOUT bash -c "create_drop_thread ${engines[@]}" & timeout $TIMEOUT bash -c 'insert_thread src' & diff --git a/tests/queries/0_stateless/01171_mv_select_insert_isolation_long.sh b/tests/queries/0_stateless/01171_mv_select_insert_isolation_long.sh index 8344bb6f426..2ab7f883367 100755 --- a/tests/queries/0_stateless/01171_mv_select_insert_isolation_long.sh +++ b/tests/queries/0_stateless/01171_mv_select_insert_isolation_long.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: long, no-parallel, no-ordinary-database, no-debug +# Tags: long, no-ordinary-database, no-debug # Test is too heavy, avoid parallel run in Flaky Check # shellcheck disable=SC2119 diff --git a/tests/queries/0_stateless/01185_create_or_replace_table.sql b/tests/queries/0_stateless/01185_create_or_replace_table.sql index 11759d0bb0c..801a775e024 100644 --- a/tests/queries/0_stateless/01185_create_or_replace_table.sql +++ b/tests/queries/0_stateless/01185_create_or_replace_table.sql @@ -1,4 +1,4 @@ --- Tags: no-ordinary-database, no-parallel +-- Tags: no-ordinary-database drop table if exists t1; diff --git a/tests/queries/0_stateless/01188_attach_table_from_path.sql b/tests/queries/0_stateless/01188_attach_table_from_path.sql index d1b9493b6c2..026979a0132 100644 --- a/tests/queries/0_stateless/01188_attach_table_from_path.sql +++ b/tests/queries/0_stateless/01188_attach_table_from_path.sql @@ -1,4 +1,4 @@ --- Tags: no-replicated-database, no-parallel +-- Tags: no-replicated-database drop table if exists test; drop table if exists file; diff --git a/tests/queries/0_stateless/01225_drop_dictionary_as_table.sql b/tests/queries/0_stateless/01225_drop_dictionary_as_table.sql index 3e497f9e3a4..a0cacd8bc7a 100644 --- a/tests/queries/0_stateless/01225_drop_dictionary_as_table.sql +++ b/tests/queries/0_stateless/01225_drop_dictionary_as_table.sql @@ -1,22 +1,15 @@ --- Tags: no-parallel - -DROP DATABASE IF EXISTS dict_db_01225; -CREATE DATABASE dict_db_01225; - -CREATE TABLE dict_db_01225.dict_data (key UInt64, val UInt64) Engine=Memory(); -CREATE DICTIONARY dict_db_01225.dict +CREATE TABLE dict_data (key UInt64, val UInt64) Engine=Memory(); +CREATE DICTIONARY dict ( key UInt64 DEFAULT 0, val UInt64 DEFAULT 10 ) PRIMARY KEY key -SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'dict_data' PASSWORD '' DB 'dict_db_01225')) +SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'dict_data' PASSWORD '' DB currentDatabase())) LIFETIME(MIN 0 MAX 0) LAYOUT(FLAT()); -SYSTEM RELOAD DICTIONARY dict_db_01225.dict; +SYSTEM RELOAD DICTIONARY dict; -DROP TABLE dict_db_01225.dict; -- { serverError CANNOT_DETACH_DICTIONARY_AS_TABLE } -DROP DICTIONARY dict_db_01225.dict; - -DROP DATABASE dict_db_01225; +DROP TABLE dict; -- { serverError CANNOT_DETACH_DICTIONARY_AS_TABLE } +DROP DICTIONARY dict; diff --git a/tests/queries/0_stateless/01254_dict_create_without_db.sql b/tests/queries/0_stateless/01254_dict_create_without_db.sql index 65a2ab52d23..2d4da5af9a9 100644 --- a/tests/queries/0_stateless/01254_dict_create_without_db.sql +++ b/tests/queries/0_stateless/01254_dict_create_without_db.sql @@ -1,9 +1,3 @@ --- Tags: no-parallel - -DROP DATABASE IF EXISTS dict_db_01254; -CREATE DATABASE dict_db_01254; -USE dict_db_01254; - CREATE TABLE dict_data (key UInt64, val UInt64) Engine=Memory(); CREATE DICTIONARY dict ( @@ -11,15 +5,12 @@ CREATE DICTIONARY dict val UInt64 DEFAULT 10 ) PRIMARY KEY key -SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'dict_data' PASSWORD '' DB 'dict_db_01254')) +SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'dict_data' PASSWORD '' DB currentDatabase())) LIFETIME(MIN 0 MAX 0) LAYOUT(FLAT()); -SELECT query_count, status FROM system.dictionaries WHERE database = 'dict_db_01254' AND name = 'dict'; -SYSTEM RELOAD DICTIONARY dict_db_01254.dict; -SELECT query_count, status FROM system.dictionaries WHERE database = 'dict_db_01254' AND name = 'dict'; -SELECT dictGetUInt64('dict_db_01254.dict', 'val', toUInt64(0)); -SELECT query_count, status FROM system.dictionaries WHERE database = 'dict_db_01254' AND name = 'dict'; - -USE system; -DROP DATABASE dict_db_01254; +SELECT query_count, status FROM system.dictionaries WHERE database = currentDatabase() AND name = 'dict'; +SYSTEM RELOAD DICTIONARY dict; +SELECT query_count, status FROM system.dictionaries WHERE database = currentDatabase() AND name = 'dict'; +SELECT dictGetUInt64('dict', 'val', toUInt64(0)); +SELECT query_count, status FROM system.dictionaries WHERE database = currentDatabase() AND name = 'dict'; diff --git a/tests/queries/0_stateless/01254_dict_load_after_detach_attach.sql b/tests/queries/0_stateless/01254_dict_load_after_detach_attach.sql index 206ddeac612..11473c6ce32 100644 --- a/tests/queries/0_stateless/01254_dict_load_after_detach_attach.sql +++ b/tests/queries/0_stateless/01254_dict_load_after_detach_attach.sql @@ -1,26 +1,19 @@ --- Tags: no-parallel - -DROP DATABASE IF EXISTS dict_db_01254; -CREATE DATABASE dict_db_01254; - -CREATE TABLE dict_db_01254.dict_data (key UInt64, val UInt64) Engine=Memory(); -CREATE DICTIONARY dict_db_01254.dict +CREATE TABLE dict_data (key UInt64, val UInt64) Engine=Memory(); +CREATE DICTIONARY dict ( key UInt64 DEFAULT 0, val UInt64 DEFAULT 10 ) PRIMARY KEY key -SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'dict_data' PASSWORD '' DB 'dict_db_01254')) +SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'dict_data' PASSWORD '' DB currentDatabase())) LIFETIME(MIN 0 MAX 0) LAYOUT(FLAT()); -DETACH DATABASE dict_db_01254; -ATTACH DATABASE dict_db_01254; +DETACH DATABASE {CLICKHOUSE_DATABASE:Identifier}; +ATTACH DATABASE {CLICKHOUSE_DATABASE:Identifier}; -SELECT COALESCE((SELECT status FROM system.dictionaries WHERE database = 'dict_db_01254' AND name = 'dict')::Nullable(String), 'NOT_LOADED'); -SYSTEM RELOAD DICTIONARY dict_db_01254.dict; -SELECT query_count, status FROM system.dictionaries WHERE database = 'dict_db_01254' AND name = 'dict'; -SELECT dictGetUInt64('dict_db_01254.dict', 'val', toUInt64(0)); -SELECT query_count, status FROM system.dictionaries WHERE database = 'dict_db_01254' AND name = 'dict'; - -DROP DATABASE dict_db_01254; +SELECT COALESCE((SELECT status FROM system.dictionaries WHERE database = currentDatabase() AND name = 'dict')::Nullable(String), 'NOT_LOADED'); +SYSTEM RELOAD DICTIONARY dict; +SELECT query_count, status FROM system.dictionaries WHERE database = currentDatabase() AND name = 'dict'; +SELECT dictGetUInt64('dict', 'val', toUInt64(0)); +SELECT query_count, status FROM system.dictionaries WHERE database = currentDatabase() AND name = 'dict'; diff --git a/tests/queries/0_stateless/01259_dictionary_custom_settings_ddl.sql b/tests/queries/0_stateless/01259_dictionary_custom_settings_ddl.sql index 432256d33c2..be56806f8d6 100644 --- a/tests/queries/0_stateless/01259_dictionary_custom_settings_ddl.sql +++ b/tests/queries/0_stateless/01259_dictionary_custom_settings_ddl.sql @@ -1,12 +1,6 @@ --- Tags: no-parallel, no-fasttest +-- Tags: no-fasttest -DROP DATABASE IF EXISTS database_for_dict; - -CREATE DATABASE database_for_dict; - -DROP TABLE IF EXISTS database_for_dict.table_for_dict; - -CREATE TABLE database_for_dict.table_for_dict +CREATE TABLE table_for_dict ( key_column UInt64, second_column UInt64, @@ -15,7 +9,7 @@ CREATE TABLE database_for_dict.table_for_dict ENGINE = MergeTree() ORDER BY key_column; -INSERT INTO database_for_dict.table_for_dict VALUES (100500, 10000000, 'Hello world'); +INSERT INTO table_for_dict VALUES (100500, 10000000, 'Hello world'); DROP DATABASE IF EXISTS ordinary_db; @@ -30,7 +24,7 @@ CREATE DICTIONARY ordinary_db.dict1 third_column String DEFAULT 'qqq' ) PRIMARY KEY key_column -SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'table_for_dict' PASSWORD '' DB 'database_for_dict')) +SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'table_for_dict' PASSWORD '' DB currentDatabase())) LIFETIME(MIN 1 MAX 10) LAYOUT(FLAT()) SETTINGS(max_result_bytes=1); @@ -40,10 +34,6 @@ SELECT dictGetUInt64('ordinary_db.dict1', 'second_column', toUInt64(100500)); -- SELECT 'END'; -DROP DICTIONARY IF EXISTS ordinary_db.dict1; - DROP DATABASE IF EXISTS ordinary_db; -DROP TABLE IF EXISTS database_for_dict.table_for_dict; - -DROP DATABASE IF EXISTS database_for_dict; +DROP TABLE IF EXISTS table_for_dict; diff --git a/tests/queries/0_stateless/01269_alias_type_differs.sql b/tests/queries/0_stateless/01269_alias_type_differs.sql index 64abcf9e367..b78e46f62c8 100644 --- a/tests/queries/0_stateless/01269_alias_type_differs.sql +++ b/tests/queries/0_stateless/01269_alias_type_differs.sql @@ -1,5 +1,3 @@ --- Tags: no-parallel - DROP TABLE IF EXISTS data_01269; CREATE TABLE data_01269 ( diff --git a/tests/queries/0_stateless/01272_suspicious_codecs.sql b/tests/queries/0_stateless/01272_suspicious_codecs.sql index 082a8d08675..1c1d7b58dd0 100644 --- a/tests/queries/0_stateless/01272_suspicious_codecs.sql +++ b/tests/queries/0_stateless/01272_suspicious_codecs.sql @@ -1,9 +1,5 @@ --- Tags: no-parallel - DROP TABLE IF EXISTS codecs; --- test what should work - CREATE TABLE codecs ( a UInt8 CODEC(LZ4), diff --git a/tests/queries/0_stateless/01280_ssd_complex_key_dictionary.sh b/tests/queries/0_stateless/01280_ssd_complex_key_dictionary.sh index fb7bf5c6fc1..9a80820dd58 100755 --- a/tests/queries/0_stateless/01280_ssd_complex_key_dictionary.sh +++ b/tests/queries/0_stateless/01280_ssd_complex_key_dictionary.sh @@ -5,8 +5,6 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -USER_FILES_PATH=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') - $CLICKHOUSE_CLIENT -n --query=" set allow_deprecated_database_ordinary=1; DROP DATABASE IF EXISTS 01280_db; diff --git a/tests/queries/0_stateless/01294_lazy_database_concurrent_recreate_reattach_and_show_tables_long.sh b/tests/queries/0_stateless/01294_lazy_database_concurrent_recreate_reattach_and_show_tables_long.sh index 3c11dc5f772..21f46a34514 100755 --- a/tests/queries/0_stateless/01294_lazy_database_concurrent_recreate_reattach_and_show_tables_long.sh +++ b/tests/queries/0_stateless/01294_lazy_database_concurrent_recreate_reattach_and_show_tables_long.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: long, no-parallel, no-fasttest +# Tags: long, no-fasttest CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh @@ -91,7 +91,7 @@ ${CLICKHOUSE_CLIENT} -n -q " " -TIMEOUT=30 +TIMEOUT=20 timeout $TIMEOUT bash -c recreate_lazy_func1 2> /dev/null & timeout $TIMEOUT bash -c recreate_lazy_func2 2> /dev/null & diff --git a/tests/queries/0_stateless/01301_aggregate_state_exception_memory_leak.sh b/tests/queries/0_stateless/01301_aggregate_state_exception_memory_leak.sh index 2d18c45406c..47fe7a9c7d9 100755 --- a/tests/queries/0_stateless/01301_aggregate_state_exception_memory_leak.sh +++ b/tests/queries/0_stateless/01301_aggregate_state_exception_memory_leak.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-parallel, no-fasttest +# Tags: no-fasttest CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/01302_aggregate_state_exception_memory_leak.sh b/tests/queries/0_stateless/01302_aggregate_state_exception_memory_leak.sh index b9f1f81da1a..a521accb082 100755 --- a/tests/queries/0_stateless/01302_aggregate_state_exception_memory_leak.sh +++ b/tests/queries/0_stateless/01302_aggregate_state_exception_memory_leak.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-parallel, no-fasttest +# Tags: no-fasttest CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/01338_long_select_and_alter.sh b/tests/queries/0_stateless/01338_long_select_and_alter.sh index fcdfa2dec82..2b0709162a3 100755 --- a/tests/queries/0_stateless/01338_long_select_and_alter.sh +++ b/tests/queries/0_stateless/01338_long_select_and_alter.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: long, no-parallel +# Tags: long CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/01338_long_select_and_alter_zookeeper.sh b/tests/queries/0_stateless/01338_long_select_and_alter_zookeeper.sh index 50ade3fad45..41e0a12f369 100755 --- a/tests/queries/0_stateless/01338_long_select_and_alter_zookeeper.sh +++ b/tests/queries/0_stateless/01338_long_select_and_alter_zookeeper.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: long, zookeeper, no-parallel +# Tags: long, zookeeper CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/01355_alter_column_with_order.sql b/tests/queries/0_stateless/01355_alter_column_with_order.sql index 0b1b4c42cce..405157fd891 100644 --- a/tests/queries/0_stateless/01355_alter_column_with_order.sql +++ b/tests/queries/0_stateless/01355_alter_column_with_order.sql @@ -1,28 +1,26 @@ --- Tags: no-parallel - -DROP TABLE IF EXISTS alter_test; +DROP TABLE IF EXISTS alter_01355; set allow_deprecated_syntax_for_merge_tree=1; -CREATE TABLE alter_test (CounterID UInt32, StartDate Date, UserID UInt32, VisitID UInt32, NestedColumn Nested(A UInt8, S String), ToDrop UInt32) ENGINE = MergeTree(StartDate, intHash32(UserID), (CounterID, StartDate, intHash32(UserID), VisitID), 8192); +CREATE TABLE alter_01355 (CounterID UInt32, StartDate Date, UserID UInt32, VisitID UInt32, NestedColumn Nested(A UInt8, S String), ToDrop UInt32) ENGINE = MergeTree(StartDate, intHash32(UserID), (CounterID, StartDate, intHash32(UserID), VisitID), 8192); -ALTER TABLE alter_test ADD COLUMN Added1 UInt32 FIRST; +ALTER TABLE alter_01355 ADD COLUMN Added1 UInt32 FIRST; -ALTER TABLE alter_test ADD COLUMN Added2 UInt32 AFTER NestedColumn; +ALTER TABLE alter_01355 ADD COLUMN Added2 UInt32 AFTER NestedColumn; -ALTER TABLE alter_test ADD COLUMN Added3 UInt32 AFTER ToDrop; +ALTER TABLE alter_01355 ADD COLUMN Added3 UInt32 AFTER ToDrop; -DESC alter_test; -DETACH TABLE alter_test; -ATTACH TABLE alter_test; -DESC alter_test; +DESC alter_01355; +DETACH TABLE alter_01355; +ATTACH TABLE alter_01355; +DESC alter_01355; -ALTER TABLE alter_test MODIFY COLUMN Added2 UInt32 FIRST; +ALTER TABLE alter_01355 MODIFY COLUMN Added2 UInt32 FIRST; -ALTER TABLE alter_test MODIFY COLUMN Added3 UInt32 AFTER CounterID; +ALTER TABLE alter_01355 MODIFY COLUMN Added3 UInt32 AFTER CounterID; -DESC alter_test; -DETACH TABLE alter_test; -ATTACH TABLE alter_test; -DESC alter_test; +DESC alter_01355; +DETACH TABLE alter_01355; +ATTACH TABLE alter_01355; +DESC alter_01355; -DROP TABLE IF EXISTS alter_test; +DROP TABLE IF EXISTS alter_01355; diff --git a/tests/queries/0_stateless/01355_ilike.sql b/tests/queries/0_stateless/01355_ilike.sql index 6bde62bf47e..1ceb878a5ef 100644 --- a/tests/queries/0_stateless/01355_ilike.sql +++ b/tests/queries/0_stateless/01355_ilike.sql @@ -1,4 +1,4 @@ --- Tags: no-parallel, no-fasttest +-- Tags: no-fasttest SELECT 'Hello' ILIKE ''; SELECT 'Hello' ILIKE '%'; @@ -53,11 +53,7 @@ SELECT 'ощщЁё' ILIKE '%щ%'; SELECT 'ощЩЁё' ILIKE '%ё%'; SHOW TABLES NOT ILIKE '%'; -DROP DATABASE IF EXISTS test_01355; -CREATE DATABASE test_01355; -USE test_01355; CREATE TABLE test1 (x UInt8) ENGINE = Memory; CREATE TABLE test2 (x UInt8) ENGINE = Memory; SHOW TABLES ILIKE 'tES%'; SHOW TABLES NOT ILIKE 'TeS%'; -DROP DATABASE test_01355; diff --git a/tests/queries/0_stateless/01388_clear_all_columns.sql b/tests/queries/0_stateless/01388_clear_all_columns.sql index cc395aa7fb4..07b4fb3de90 100644 --- a/tests/queries/0_stateless/01388_clear_all_columns.sql +++ b/tests/queries/0_stateless/01388_clear_all_columns.sql @@ -1,5 +1,3 @@ --- Tags: no-parallel - DROP TABLE IF EXISTS test; CREATE TABLE test (x UInt8) ENGINE = MergeTree ORDER BY tuple(); INSERT INTO test (x) VALUES (1), (2), (3); diff --git a/tests/queries/0_stateless/01391_join_on_dict_crash.sql b/tests/queries/0_stateless/01391_join_on_dict_crash.sql index 854da04b334..e056e147501 100644 --- a/tests/queries/0_stateless/01391_join_on_dict_crash.sql +++ b/tests/queries/0_stateless/01391_join_on_dict_crash.sql @@ -1,13 +1,3 @@ --- Tags: no-parallel - -DROP DATABASE IF EXISTS db_01391; -CREATE DATABASE db_01391; -USE db_01391; - -DROP TABLE IF EXISTS t; -DROP TABLE IF EXISTS d_src; -DROP DICTIONARY IF EXISTS d; - CREATE TABLE t (click_city_id UInt32, click_country_id UInt32) Engine = Memory; CREATE TABLE d_src (id UInt64, country_id UInt8, name String) Engine = Memory; @@ -16,14 +6,9 @@ INSERT INTO d_src VALUES (0, 0, 'n'); CREATE DICTIONARY d (id UInt32, country_id UInt8, name String) PRIMARY KEY id -SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' DB 'db_01391' table 'd_src')) +SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' DB currentDatabase() table 'd_src')) LIFETIME(MIN 1 MAX 1) LAYOUT(HASHED()); SELECT click_country_id FROM t AS cc LEFT JOIN d ON toUInt32(d.id) = cc.click_city_id; SELECT click_country_id FROM t AS cc LEFT JOIN d ON d.country_id < 99 AND d.id = cc.click_city_id; - -DROP DICTIONARY d; -DROP TABLE t; -DROP TABLE d_src; -DROP DATABASE IF EXISTS db_01391; diff --git a/tests/queries/0_stateless/01392_column_resolve.sql b/tests/queries/0_stateless/01392_column_resolve.sql index 72b6af4576a..90a7d9b169a 100644 --- a/tests/queries/0_stateless/01392_column_resolve.sql +++ b/tests/queries/0_stateless/01392_column_resolve.sql @@ -1,16 +1,11 @@ --- Tags: no-parallel +CREATE TABLE tableConversion (conversionId String, value Nullable(Double)) ENGINE = Log(); +CREATE TABLE tableClick (clickId String, conversionId String, value Nullable(Double)) ENGINE = Log(); +CREATE TABLE leftjoin (id String) ENGINE = Log(); -DROP DATABASE IF EXISTS test_01392; -CREATE DATABASE test_01392; - -CREATE TABLE test_01392.tableConversion (conversionId String, value Nullable(Double)) ENGINE = Log(); -CREATE TABLE test_01392.tableClick (clickId String, conversionId String, value Nullable(Double)) ENGINE = Log(); -CREATE TABLE test_01392.leftjoin (id String) ENGINE = Log(); - -INSERT INTO test_01392.tableConversion(conversionId, value) VALUES ('Conversion 1', 1); -INSERT INTO test_01392.tableClick(clickId, conversionId, value) VALUES ('Click 1', 'Conversion 1', 14); -INSERT INTO test_01392.tableClick(clickId, conversionId, value) VALUES ('Click 2', 'Conversion 1', 15); -INSERT INTO test_01392.tableClick(clickId, conversionId, value) VALUES ('Click 3', 'Conversion 1', 16); +INSERT INTO tableConversion(conversionId, value) VALUES ('Conversion 1', 1); +INSERT INTO tableClick(clickId, conversionId, value) VALUES ('Click 1', 'Conversion 1', 14); +INSERT INTO tableClick(clickId, conversionId, value) VALUES ('Click 2', 'Conversion 1', 15); +INSERT INTO tableClick(clickId, conversionId, value) VALUES ('Click 3', 'Conversion 1', 16); SELECT conversion.conversionId AS myConversionId, @@ -18,19 +13,13 @@ SELECT click.myValue AS myValue FROM ( SELECT conversionId, value as myValue - FROM test_01392.tableConversion + FROM tableConversion ) AS conversion INNER JOIN ( SELECT clickId, conversionId, value as myValue - FROM test_01392.tableClick + FROM tableClick ) AS click ON click.conversionId = conversion.conversionId LEFT JOIN ( - SELECT * FROM test_01392.leftjoin + SELECT * FROM leftjoin ) AS dummy ON (dummy.id = conversion.conversionId) ORDER BY myValue; - -DROP TABLE test_01392.tableConversion; -DROP TABLE test_01392.tableClick; -DROP TABLE test_01392.leftjoin; - -DROP DATABASE test_01392; diff --git a/tests/queries/0_stateless/01396_inactive_replica_cleanup_nodes_zookeeper.sh b/tests/queries/0_stateless/01396_inactive_replica_cleanup_nodes_zookeeper.sh index 1c1eb4489ee..b81bb75891d 100755 --- a/tests/queries/0_stateless/01396_inactive_replica_cleanup_nodes_zookeeper.sh +++ b/tests/queries/0_stateless/01396_inactive_replica_cleanup_nodes_zookeeper.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: replica, no-debug, no-parallel +# Tags: replica, no-debug CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/01412_cache_dictionary_race.sh b/tests/queries/0_stateless/01412_cache_dictionary_race.sh index 165a461193d..9aa39652021 100755 --- a/tests/queries/0_stateless/01412_cache_dictionary_race.sh +++ b/tests/queries/0_stateless/01412_cache_dictionary_race.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: race, no-parallel +# Tags: race CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh @@ -54,7 +54,7 @@ function drop_create_table_thread() export -f dict_get_thread; export -f drop_create_table_thread; -TIMEOUT=30 +TIMEOUT=20 timeout $TIMEOUT bash -c dict_get_thread 2> /dev/null & timeout $TIMEOUT bash -c dict_get_thread 2> /dev/null & diff --git a/tests/queries/0_stateless/01415_sticking_mutations.sh b/tests/queries/0_stateless/01415_sticking_mutations.sh index 821c83fe728..b7c8768a65d 100755 --- a/tests/queries/0_stateless/01415_sticking_mutations.sh +++ b/tests/queries/0_stateless/01415_sticking_mutations.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-replicated-database, no-parallel +# Tags: no-replicated-database set -e diff --git a/tests/queries/0_stateless/01442_merge_detach_attach_long.sh b/tests/queries/0_stateless/01442_merge_detach_attach_long.sh index 85fdf7ed764..9ba1fe93543 100755 --- a/tests/queries/0_stateless/01442_merge_detach_attach_long.sh +++ b/tests/queries/0_stateless/01442_merge_detach_attach_long.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: long, no-parallel, no-debug +# Tags: long, no-debug set -e @@ -27,7 +27,7 @@ function thread_ops() } export -f thread_ops -TIMEOUT=60 +TIMEOUT=30 thread_ops $TIMEOUT & wait diff --git a/tests/queries/0_stateless/01444_create_table_drop_database_race.sh b/tests/queries/0_stateless/01444_create_table_drop_database_race.sh index eb231e71525..ae74efa4e20 100755 --- a/tests/queries/0_stateless/01444_create_table_drop_database_race.sh +++ b/tests/queries/0_stateless/01444_create_table_drop_database_race.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: race, no-parallel +# Tags: race set -e @@ -8,20 +8,20 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . "$CUR_DIR"/../shell_config.sh # This test reproduces "Directory not empty" error in DROP DATABASE query. +export DB=test_$RANDOM function thread1() { while true; do -# ${CLICKHOUSE_CLIENT} --query="SHOW TABLES FROM test_01444" - ${CLICKHOUSE_CLIENT} --query="DROP DATABASE IF EXISTS test_01444" 2>&1| grep -F "Code: " | grep -Fv "Code: 219" - ${CLICKHOUSE_CLIENT} --query="CREATE DATABASE IF NOT EXISTS test_01444" + ${CLICKHOUSE_CLIENT} --query="DROP DATABASE IF EXISTS $DB" 2>&1| grep -F "Code: " | grep -Fv "Code: 219" + ${CLICKHOUSE_CLIENT} --query="CREATE DATABASE IF NOT EXISTS $DB" done } function thread2() { while true; do - ${CLICKHOUSE_CLIENT} --query="CREATE TABLE IF NOT EXISTS test_01444.t$RANDOM (x UInt8) ENGINE = MergeTree ORDER BY tuple()" 2>/dev/null + ${CLICKHOUSE_CLIENT} --query="CREATE TABLE IF NOT EXISTS $DB.t$RANDOM (x UInt8) ENGINE = MergeTree ORDER BY tuple()" 2>/dev/null done } @@ -36,4 +36,4 @@ timeout $TIMEOUT bash -c thread2 & wait -${CLICKHOUSE_CLIENT} --query="DROP DATABASE IF EXISTS test_01444" 2>&1| grep -F "Code: " | grep -Fv "Code: 219" || exit 0 +${CLICKHOUSE_CLIENT} --query="DROP DATABASE IF EXISTS ${DB}" 2>&1| grep -F "Code: " | grep -Fv "Code: 219" || exit 0 diff --git a/tests/queries/0_stateless/01454_storagememory_data_race_challenge.sh b/tests/queries/0_stateless/01454_storagememory_data_race_challenge.sh index d83343b3cb3..2bfd350ec51 100755 --- a/tests/queries/0_stateless/01454_storagememory_data_race_challenge.sh +++ b/tests/queries/0_stateless/01454_storagememory_data_race_challenge.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: race, no-parallel +# Tags: race set -e @@ -36,8 +36,8 @@ function g { export -f f; export -f g; -timeout 30 bash -c f > /dev/null & -timeout 30 bash -c g > /dev/null & +timeout 20 bash -c f > /dev/null & +timeout 20 bash -c g > /dev/null & wait $CLICKHOUSE_CLIENT -q "DROP TABLE mem" diff --git a/tests/queries/0_stateless/01543_avro_deserialization_with_lc.sh b/tests/queries/0_stateless/01543_avro_deserialization_with_lc.sh index 697b32a77ae..a5697a62dc2 100755 --- a/tests/queries/0_stateless/01543_avro_deserialization_with_lc.sh +++ b/tests/queries/0_stateless/01543_avro_deserialization_with_lc.sh @@ -5,8 +5,6 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -USER_FILES_PATH=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') - $CLICKHOUSE_CLIENT --multiquery --query " SET allow_suspicious_low_cardinality_types=1; CREATE TABLE IF NOT EXISTS test_01543 (value LowCardinality(String), value2 LowCardinality(UInt64)) ENGINE=Memory(); diff --git a/tests/queries/0_stateless/01593_concurrent_alter_mutations_kill.sh b/tests/queries/0_stateless/01593_concurrent_alter_mutations_kill.sh index d68f9bc1837..68c09932f2c 100755 --- a/tests/queries/0_stateless/01593_concurrent_alter_mutations_kill.sh +++ b/tests/queries/0_stateless/01593_concurrent_alter_mutations_kill.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-parallel, no-fasttest +# Tags: no-fasttest, no-parallel CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh @@ -39,7 +39,7 @@ function kill_mutation_thread export -f alter_thread; export -f kill_mutation_thread; -TIMEOUT=30 +TIMEOUT=20 timeout $TIMEOUT bash -c alter_thread 2> /dev/null & timeout $TIMEOUT bash -c kill_mutation_thread 2> /dev/null & diff --git a/tests/queries/0_stateless/01658_read_file_to_stringcolumn.sh b/tests/queries/0_stateless/01658_read_file_to_stringcolumn.sh index 96327536f89..685fe69642a 100755 --- a/tests/queries/0_stateless/01658_read_file_to_stringcolumn.sh +++ b/tests/queries/0_stateless/01658_read_file_to_stringcolumn.sh @@ -7,18 +7,11 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -# Data preparation. - -# Now we can get the user_files_path by use the table file function for trick. also we can get it by query as: -# "insert into function file('exist.txt', 'CSV', 'val1 char') values ('aaaa'); select _path from file('exist.txt', 'CSV', 'val1 char')" -CLICKHOUSE_USER_FILES_PATH=$(clickhouse-client --query "select _path, _file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') - -mkdir -p ${CLICKHOUSE_USER_FILES_PATH}/ -echo -n aaaaaaaaa > ${CLICKHOUSE_USER_FILES_PATH}/a.txt -echo -n bbbbbbbbb > ${CLICKHOUSE_USER_FILES_PATH}/b.txt -echo -n ccccccccc > ${CLICKHOUSE_USER_FILES_PATH}/c.txt +echo -n aaaaaaaaa > ${USER_FILES_PATH}/a.txt +echo -n bbbbbbbbb > ${USER_FILES_PATH}/b.txt +echo -n ccccccccc > ${USER_FILES_PATH}/c.txt echo -n ccccccccc > /tmp/c.txt -mkdir -p ${CLICKHOUSE_USER_FILES_PATH}/dir +mkdir -p ${USER_FILES_PATH}/dir ### 1st TEST in CLIENT mode. @@ -85,15 +78,15 @@ echo "${CLICKHOUSE_LOCAL} --query "'"select file('"'dir'), file('b.txt')"'";echo # Test that the function is not injective -echo -n Hello > ${CLICKHOUSE_USER_FILES_PATH}/a -echo -n Hello > ${CLICKHOUSE_USER_FILES_PATH}/b -echo -n World > ${CLICKHOUSE_USER_FILES_PATH}/c +echo -n Hello > ${USER_FILES_PATH}/a +echo -n Hello > ${USER_FILES_PATH}/b +echo -n World > ${USER_FILES_PATH}/c ${CLICKHOUSE_CLIENT} --query "SELECT file(arrayJoin(['a', 'b', 'c'])) AS s, count() GROUP BY s ORDER BY s" ${CLICKHOUSE_CLIENT} --query "SELECT s, count() FROM file('?', TSV, 's String') GROUP BY s ORDER BY s" # Restore -rm ${CLICKHOUSE_USER_FILES_PATH}/{a,b,c}.txt -rm ${CLICKHOUSE_USER_FILES_PATH}/{a,b,c} +rm ${USER_FILES_PATH}/{a,b,c}.txt +rm ${USER_FILES_PATH}/{a,b,c} rm /tmp/c.txt -rm -rf ${CLICKHOUSE_USER_FILES_PATH}/dir +rm -rf ${USER_FILES_PATH}/dir diff --git a/tests/queries/0_stateless/01684_ssd_cache_dictionary_simple_key.sh b/tests/queries/0_stateless/01684_ssd_cache_dictionary_simple_key.sh index 9167a2d306f..0e5c2862066 100755 --- a/tests/queries/0_stateless/01684_ssd_cache_dictionary_simple_key.sh +++ b/tests/queries/0_stateless/01684_ssd_cache_dictionary_simple_key.sh @@ -5,8 +5,6 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -USER_FILES_PATH=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') - $CLICKHOUSE_CLIENT -n --query=" DROP DATABASE IF EXISTS 01684_database_for_cache_dictionary; CREATE DATABASE 01684_database_for_cache_dictionary; diff --git a/tests/queries/0_stateless/01685_ssd_cache_dictionary_complex_key.sh b/tests/queries/0_stateless/01685_ssd_cache_dictionary_complex_key.sh index 9dd8a41ce5a..5583a9dd5e7 100755 --- a/tests/queries/0_stateless/01685_ssd_cache_dictionary_complex_key.sh +++ b/tests/queries/0_stateless/01685_ssd_cache_dictionary_complex_key.sh @@ -5,8 +5,6 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -USER_FILES_PATH=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') - $CLICKHOUSE_CLIENT -n --query=" DROP DATABASE IF EXISTS 01685_database_for_cache_dictionary; CREATE DATABASE 01685_database_for_cache_dictionary; diff --git a/tests/queries/0_stateless/01710_projection_vertical_merges.sql b/tests/queries/0_stateless/01710_projection_vertical_merges.sql index 2c4378bb7a4..0f80d659e92 100644 --- a/tests/queries/0_stateless/01710_projection_vertical_merges.sql +++ b/tests/queries/0_stateless/01710_projection_vertical_merges.sql @@ -1,4 +1,4 @@ --- Tags: long, no-parallel +-- Tags: long drop table if exists t; diff --git a/tests/queries/0_stateless/01747_join_view_filter_dictionary.sql b/tests/queries/0_stateless/01747_join_view_filter_dictionary.sql index 050aa33464e..c7c525ba20e 100644 --- a/tests/queries/0_stateless/01747_join_view_filter_dictionary.sql +++ b/tests/queries/0_stateless/01747_join_view_filter_dictionary.sql @@ -25,7 +25,7 @@ create table dictst01747(some_name String, field1 String, field2 UInt8) Engine = as select 'name', 'test', 33; CREATE DICTIONARY default.dict01747 (some_name String, field1 String, field2 UInt8) -PRIMARY KEY some_name SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 +PRIMARY KEY some_name SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() TABLE dictst01747 DB currentDatabase() USER 'default')) LIFETIME(MIN 0 MAX 0) LAYOUT(COMPLEX_KEY_HASHED()); diff --git a/tests/queries/0_stateless/01747_system_session_log_long.sh b/tests/queries/0_stateless/01747_system_session_log_long.sh index ecddcb627b8..022bf488886 100755 --- a/tests/queries/0_stateless/01747_system_session_log_long.sh +++ b/tests/queries/0_stateless/01747_system_session_log_long.sh @@ -228,13 +228,13 @@ function testMySQL() echo "MySQL 'successful login' case is skipped for ${auth_type}." else executeQuery \ - <<< "SELECT 1 FROM mysql('127.0.0.1:9004', 'system', 'one', '${username}', '${password}') LIMIT 1 \ + <<< "SELECT 1 FROM mysql('127.0.0.1:${CLICKHOUSE_PORT_MYSQL}', 'system', 'one', '${username}', '${password}') LIMIT 1 \ FORMAT Null" fi echo 'Wrong username' executeQueryExpectError \ - <<< "SELECT 1 FROM mysql('127.0.0.1:9004', 'system', 'one', 'invalid_${username}', '${password}') LIMIT 1 \ + <<< "SELECT 1 FROM mysql('127.0.0.1:${CLICKHOUSE_PORT_MYSQL}', 'system', 'one', 'invalid_${username}', '${password}') LIMIT 1 \ FORMAT Null" \ | grep -Eq "Code: 279\. DB::Exception: .* invalid_${username}" @@ -246,7 +246,7 @@ function testMySQL() echo "MySQL 'wrong password' case is skipped for ${auth_type}." else executeQueryExpectError \ - <<< "SELECT 1 FROM mysql('127.0.0.1:9004', 'system', 'one', '${username}', 'invalid_${password}') LIMIT 1 \ + <<< "SELECT 1 FROM mysql('127.0.0.1:${CLICKHOUSE_PORT_MYSQL}', 'system', 'one', '${username}', 'invalid_${password}') LIMIT 1 \ FORMAT Null" | grep -Eq "Code: 279\. DB::Exception: .* ${username}" fi } @@ -267,11 +267,11 @@ function testMySQL() ## Loging\Logout ## CH is being able to log into itself via PostgreSQL protocol but query fails. #executeQueryExpectError \ - # <<< "SELECT 1 FROM postgresql('localhost:9005', 'system', 'one', '${username}', '${password}') LIMIT 1 FORMAT Null" \ + # <<< "SELECT 1 FROM postgresql('localhost:${CLICKHOUSE_PORT_POSTGRESQL', 'system', 'one', '${username}', '${password}') LIMIT 1 FORMAT Null" \ # Wrong username executeQueryExpectError \ - <<< "SELECT 1 FROM postgresql('localhost:9005', 'system', 'one', 'invalid_${username}', '${password}') LIMIT 1 FORMAT Null" \ + <<< "SELECT 1 FROM postgresql('localhost:${CLICKHOUSE_PORT_POSTGRESQL}', 'system', 'one', 'invalid_${username}', '${password}') LIMIT 1 FORMAT Null" \ | grep -Eq "Invalid user or password" if [[ "${auth_type}" == "no_password" ]] @@ -281,7 +281,7 @@ function testMySQL() else # Wrong password executeQueryExpectError \ - <<< "SELECT 1 FROM postgresql('localhost:9005', 'system', 'one', '${username}', 'invalid_${password}') LIMIT 1 FORMAT Null" \ + <<< "SELECT 1 FROM postgresql('localhost:${CLICKHOUSE_PORT_POSTGRESQL}', 'system', 'one', '${username}', 'invalid_${password}') LIMIT 1 FORMAT Null" \ | grep -Eq "Invalid user or password" fi } diff --git a/tests/queries/0_stateless/01748_dictionary_table_dot.sql b/tests/queries/0_stateless/01748_dictionary_table_dot.sql index a2364fdf823..993d2e1a635 100644 --- a/tests/queries/0_stateless/01748_dictionary_table_dot.sql +++ b/tests/queries/0_stateless/01748_dictionary_table_dot.sql @@ -22,7 +22,7 @@ CREATE DICTIONARY test_dict `value` String ) PRIMARY KEY key1, key2 -SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'default' TABLE `test.txt` PASSWORD '' DB currentDatabase())) +SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE `test.txt` PASSWORD '' DB currentDatabase())) LIFETIME(MIN 1 MAX 3600) LAYOUT(COMPLEX_KEY_HASHED()); diff --git a/tests/queries/0_stateless/01780_clickhouse_dictionary_source_loop.sql b/tests/queries/0_stateless/01780_clickhouse_dictionary_source_loop.sql index 1eee4090112..3ebc85c47f7 100644 --- a/tests/queries/0_stateless/01780_clickhouse_dictionary_source_loop.sql +++ b/tests/queries/0_stateless/01780_clickhouse_dictionary_source_loop.sql @@ -10,7 +10,7 @@ CREATE DICTIONARY dict1 value String ) PRIMARY KEY id -SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 TABLE 'dict1')) +SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() TABLE 'dict1')) LAYOUT(DIRECT()); SELECT * FROM dict1; --{serverError BAD_ARGUMENTS} @@ -24,7 +24,7 @@ CREATE DICTIONARY 01780_db.dict2 value String ) PRIMARY KEY id -SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 DATABASE '01780_db' TABLE 'dict2')) +SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() DATABASE '01780_db' TABLE 'dict2')) LAYOUT(DIRECT()); SELECT * FROM 01780_db.dict2; --{serverError BAD_ARGUMENTS} @@ -45,7 +45,7 @@ CREATE DICTIONARY 01780_db.dict3 value String ) PRIMARY KEY id -SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 TABLE 'dict3_source' DATABASE '01780_db')) +SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() TABLE 'dict3_source' DATABASE '01780_db')) LAYOUT(DIRECT()); SELECT * FROM 01780_db.dict3; diff --git a/tests/queries/0_stateless/01825_type_json_btc.sh b/tests/queries/0_stateless/01825_type_json_btc.sh index 1e74166e7a7..ebc5482de7a 100755 --- a/tests/queries/0_stateless/01825_type_json_btc.sh +++ b/tests/queries/0_stateless/01825_type_json_btc.sh @@ -5,10 +5,9 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -user_files_path=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') -mkdir -p ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/ -rm -rf ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME:?}/* -cp $CUR_DIR/data_json/btc_transactions.json ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/ +mkdir -p ${CLICKHOUSE_USER_FILES_UNIQUE}/ +rm -rf "${CLICKHOUSE_USER_FILES_UNIQUE:?}"/* +cp $CUR_DIR/data_json/btc_transactions.json ${CLICKHOUSE_USER_FILES_UNIQUE}/ ${CLICKHOUSE_CLIENT} -q "DROP TABLE IF EXISTS btc" @@ -27,4 +26,4 @@ ${CLICKHOUSE_CLIENT} -q "SELECT data.out.spending_outpoints AS outpoints FROM bt ${CLICKHOUSE_CLIENT} -q "DROP TABLE IF EXISTS btc" -rm ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/btc_transactions.json +rm ${CLICKHOUSE_USER_FILES_UNIQUE}/btc_transactions.json diff --git a/tests/queries/0_stateless/01825_type_json_multiple_files.sh b/tests/queries/0_stateless/01825_type_json_multiple_files.sh index 089b7991784..453e7a3c78e 100755 --- a/tests/queries/0_stateless/01825_type_json_multiple_files.sh +++ b/tests/queries/0_stateless/01825_type_json_multiple_files.sh @@ -1,23 +1,22 @@ #!/usr/bin/env bash -# Tags: no-fasttest, no-parallel +# Tags: no-fasttest CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -user_files_path=$($CLICKHOUSE_CLIENT --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep -E '^Code: 107.*FILE_DOESNT_EXIST' | head -1 | awk '{gsub("/nonexist.txt","",$9); print $9}') -for f in "$user_files_path"/01825_file_*.json; do +for f in "${USER_FILES_PATH:?}/${CLICKHOUSE_DATABASE}"_*.json; do [ -e $f ] && rm $f done for i in {0..5}; do - echo "{\"k$i\": 100}" > "$user_files_path"/01825_file_$i.json + echo "{\"k$i\": 100}" > "$USER_FILES_PATH/${CLICKHOUSE_DATABASE}_$i.json" done ${CLICKHOUSE_CLIENT} -q "DROP TABLE IF EXISTS t_json_files" ${CLICKHOUSE_CLIENT} -q "CREATE TABLE t_json_files (file String, data JSON) ENGINE = MergeTree ORDER BY tuple()" --allow_experimental_object_type 1 -${CLICKHOUSE_CLIENT} -q "INSERT INTO t_json_files SELECT _file, data FROM file('01825_file_*.json', 'JSONAsObject', 'data JSON')" --allow_experimental_object_type 1 +${CLICKHOUSE_CLIENT} -q "INSERT INTO t_json_files SELECT _file, data FROM file('${CLICKHOUSE_DATABASE}_*.json', 'JSONAsObject', 'data JSON')" --allow_experimental_object_type 1 ${CLICKHOUSE_CLIENT} -q "SELECT data FROM t_json_files ORDER BY file FORMAT JSONEachRow" --output_format_json_named_tuples_as_objects 1 ${CLICKHOUSE_CLIENT} -q "SELECT toTypeName(data) FROM t_json_files LIMIT 1" @@ -25,7 +24,7 @@ ${CLICKHOUSE_CLIENT} -q "SELECT toTypeName(data) FROM t_json_files LIMIT 1" ${CLICKHOUSE_CLIENT} -q "TRUNCATE TABLE IF EXISTS t_json_files" ${CLICKHOUSE_CLIENT} -q "INSERT INTO t_json_files \ - SELECT _file, data FROM file('01825_file_*.json', 'JSONAsObject', 'data JSON') \ + SELECT _file, data FROM file('${CLICKHOUSE_DATABASE}_*.json', 'JSONAsObject', 'data JSON') \ ORDER BY _file LIMIT 3" --max_threads 1 --min_insert_block_size_rows 1 --max_insert_block_size 1 --max_block_size 1 --allow_experimental_object_type 1 ${CLICKHOUSE_CLIENT} -q "SELECT data FROM t_json_files ORDER BY file, data FORMAT JSONEachRow" --output_format_json_named_tuples_as_objects 1 @@ -34,11 +33,11 @@ ${CLICKHOUSE_CLIENT} -q "SELECT toTypeName(data) FROM t_json_files LIMIT 1" ${CLICKHOUSE_CLIENT} -q "TRUNCATE TABLE IF EXISTS t_json_files" ${CLICKHOUSE_CLIENT} -q "INSERT INTO t_json_files \ - SELECT _file, data FROM file('01825_file_*.json', 'JSONAsObject', 'data JSON') \ - WHERE _file IN ('01825_file_1.json', '01825_file_3.json')" --allow_experimental_object_type 1 + SELECT _file, data FROM file('${CLICKHOUSE_DATABASE}_*.json', 'JSONAsObject', 'data JSON') \ + WHERE _file IN ('${CLICKHOUSE_DATABASE}_1.json', '${CLICKHOUSE_DATABASE}_3.json')" --allow_experimental_object_type 1 ${CLICKHOUSE_CLIENT} -q "SELECT data FROM t_json_files ORDER BY file FORMAT JSONEachRow" --output_format_json_named_tuples_as_objects 1 ${CLICKHOUSE_CLIENT} -q "SELECT toTypeName(data) FROM t_json_files LIMIT 1" ${CLICKHOUSE_CLIENT} -q "DROP TABLE IF EXISTS t_json_files" -rm "$user_files_path"/01825_file_*.json +rm "$USER_FILES_PATH"/${CLICKHOUSE_DATABASE}_*.json diff --git a/tests/queries/0_stateless/01825_type_json_schema_inference.sh b/tests/queries/0_stateless/01825_type_json_schema_inference.sh index 5fca608d8bb..e0c283b2230 100755 --- a/tests/queries/0_stateless/01825_type_json_schema_inference.sh +++ b/tests/queries/0_stateless/01825_type_json_schema_inference.sh @@ -10,11 +10,10 @@ ${CLICKHOUSE_CLIENT} -q "DROP TABLE IF EXISTS t_json_inference" ${CLICKHOUSE_CLIENT} -q "CREATE TABLE t_json_inference (id UInt64, obj Object(Nullable('json')), s String) \ ENGINE = MergeTree ORDER BY id" --allow_experimental_object_type 1 -user_files_path=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') -mkdir -p ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/ -rm -rf ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME:?}/* +mkdir -p ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/ +rm -rf ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME:?}/* -filename="${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/data.json" +filename="${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/data.json" echo '{"id": 1, "obj": {"k1": 1, "k2": {"k3": 2, "k4": [{"k5": 3}, {"k5": 4}]}}, "s": "foo"}' > $filename echo '{"id": 2, "obj": {"k2": {"k3": "str", "k4": [{"k6": 55}]}, "some": 42}, "s": "bar"}' >> $filename diff --git a/tests/queries/0_stateless/01875_ssd_cache_dictionary_decimal256_type.sh b/tests/queries/0_stateless/01875_ssd_cache_dictionary_decimal256_type.sh index 36a2165329b..8336229a643 100755 --- a/tests/queries/0_stateless/01875_ssd_cache_dictionary_decimal256_type.sh +++ b/tests/queries/0_stateless/01875_ssd_cache_dictionary_decimal256_type.sh @@ -5,8 +5,6 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -USER_FILES_PATH=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') - $CLICKHOUSE_CLIENT -n --query=" SET allow_experimental_bigint_types = 1; diff --git a/tests/queries/0_stateless/01889_check_row_policy_defined_using_user_function.sh b/tests/queries/0_stateless/01889_check_row_policy_defined_using_user_function.sh index b5be39a91df..f79637a7635 100755 --- a/tests/queries/0_stateless/01889_check_row_policy_defined_using_user_function.sh +++ b/tests/queries/0_stateless/01889_check_row_policy_defined_using_user_function.sh @@ -1,5 +1,4 @@ #!/usr/bin/env bash -# Tags: no-parallel CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL=none @@ -7,23 +6,27 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -${CLICKHOUSE_CLIENT} -q "drop user if exists u_01889" -${CLICKHOUSE_CLIENT} -q "drop role if exists r_01889" -${CLICKHOUSE_CLIENT} -q "drop policy if exists t_01889_filter on t_01889" -${CLICKHOUSE_CLIENT} -q "create user u_01889 identified with plaintext_password by 'dfsdffdf5t123'" -${CLICKHOUSE_CLIENT} -q "revoke all on *.* from u_01889" -${CLICKHOUSE_CLIENT} -q "create role r_01889" +USER=u_01889$RANDOM +ROLE=r_01889$RANDOM +POLICY=t_01889_filter$RANDOM + +${CLICKHOUSE_CLIENT} -q "drop user if exists $USER" +${CLICKHOUSE_CLIENT} -q "drop role if exists ${ROLE}" +${CLICKHOUSE_CLIENT} -q "drop policy if exists ${POLICY} on t_01889" +${CLICKHOUSE_CLIENT} -q "create user $USER identified with plaintext_password by 'dfsdffdf5t123'" +${CLICKHOUSE_CLIENT} -q "revoke all on *.* from $USER" +${CLICKHOUSE_CLIENT} -q "create role ${ROLE}" ${CLICKHOUSE_CLIENT} -q "create table t_01889(a Int64, user_id String) Engine=MergeTree order by a" -${CLICKHOUSE_CLIENT} -q "insert into t_01889 select number, 'u_01889' from numbers(1000)" +${CLICKHOUSE_CLIENT} -q "insert into t_01889 select number, '$USER' from numbers(1000)" ${CLICKHOUSE_CLIENT} -q "insert into t_01889 select number, 'xxxxxxx' from numbers(1000)" -${CLICKHOUSE_CLIENT} -q "grant select on t_01889 to r_01889" -${CLICKHOUSE_CLIENT} -q "create row policy t_01889_filter ON t_01889 FOR SELECT USING user_id = user() TO r_01889" -${CLICKHOUSE_CLIENT} -q "grant r_01889 to u_01889" -${CLICKHOUSE_CLIENT} -q "alter user u_01889 default role r_01889 settings none" +${CLICKHOUSE_CLIENT} -q "grant select on t_01889 to ${ROLE}" +${CLICKHOUSE_CLIENT} -q "create row policy ${POLICY} ON t_01889 FOR SELECT USING user_id = user() TO ${ROLE}" +${CLICKHOUSE_CLIENT} -q "grant ${ROLE} to $USER" +${CLICKHOUSE_CLIENT} -q "alter user $USER default role ${ROLE} settings none" -${CLICKHOUSE_CLIENT_BINARY} --database=${CLICKHOUSE_DATABASE} --user=u_01889 --password=dfsdffdf5t123 --query="select count() from t_01889" +${CLICKHOUSE_CLIENT_BINARY} --database=${CLICKHOUSE_DATABASE} --user=$USER --password=dfsdffdf5t123 --query="select count() from t_01889" -${CLICKHOUSE_CLIENT} -q "drop user u_01889" -${CLICKHOUSE_CLIENT} -q "drop policy t_01889_filter on t_01889" -${CLICKHOUSE_CLIENT} -q "drop role r_01889" +${CLICKHOUSE_CLIENT} -q "drop user $USER" +${CLICKHOUSE_CLIENT} -q "drop policy ${POLICY} on t_01889" +${CLICKHOUSE_CLIENT} -q "drop role ${ROLE}" ${CLICKHOUSE_CLIENT} -q "drop table t_01889" diff --git a/tests/queries/0_stateless/01889_sqlite_read_write.sh b/tests/queries/0_stateless/01889_sqlite_read_write.sh index 30496af46f6..63ce5dc909c 100755 --- a/tests/queries/0_stateless/01889_sqlite_read_write.sh +++ b/tests/queries/0_stateless/01889_sqlite_read_write.sh @@ -6,15 +6,9 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -# See 01658_read_file_to_string_column.sh -user_files_path=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') - -mkdir -p "${user_files_path}/" -chmod 777 "${user_files_path}" - export CURR_DATABASE="test_01889_sqllite_${CLICKHOUSE_DATABASE}" -DB_PATH=${user_files_path}/${CURR_DATABASE}_db1 +DB_PATH=${USER_FILES_PATH}/${CURR_DATABASE}_db1 DB_PATH2=$CUR_DIR/${CURR_DATABASE}_db2 function cleanup() diff --git a/tests/queries/0_stateless/01903_ssd_cache_dictionary_array_type.sh b/tests/queries/0_stateless/01903_ssd_cache_dictionary_array_type.sh index 3676f1429b2..853445daf3f 100755 --- a/tests/queries/0_stateless/01903_ssd_cache_dictionary_array_type.sh +++ b/tests/queries/0_stateless/01903_ssd_cache_dictionary_array_type.sh @@ -5,8 +5,6 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -USER_FILES_PATH=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') - $CLICKHOUSE_CLIENT -n --query=" DROP TABLE IF EXISTS dictionary_array_source_table; CREATE TABLE dictionary_array_source_table diff --git a/tests/queries/0_stateless/01904_ssd_cache_dictionary_default_nullable_type.sh b/tests/queries/0_stateless/01904_ssd_cache_dictionary_default_nullable_type.sh index 6aecb20329a..0b555cf82c2 100755 --- a/tests/queries/0_stateless/01904_ssd_cache_dictionary_default_nullable_type.sh +++ b/tests/queries/0_stateless/01904_ssd_cache_dictionary_default_nullable_type.sh @@ -5,8 +5,6 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -USER_FILES_PATH=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') - $CLICKHOUSE_CLIENT -n --query=" DROP TABLE IF EXISTS dictionary_nullable_source_table; CREATE TABLE dictionary_nullable_source_table diff --git a/tests/queries/0_stateless/01910_view_dictionary.sql b/tests/queries/0_stateless/01910_view_dictionary.sql index 05a67889825..51f46decadd 100644 --- a/tests/queries/0_stateless/01910_view_dictionary.sql +++ b/tests/queries/0_stateless/01910_view_dictionary.sql @@ -34,7 +34,7 @@ CREATE DICTIONARY flat_dictionary value_ru String ) PRIMARY KEY id -SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'default' PASSWORD '' TABLE 'dictionary_source_view')) +SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' PASSWORD '' TABLE 'dictionary_source_view')) LIFETIME(MIN 1 MAX 1000) LAYOUT(FLAT()); diff --git a/tests/queries/0_stateless/01921_concurrent_ttl_and_normal_merges_zookeeper_long.sh b/tests/queries/0_stateless/01921_concurrent_ttl_and_normal_merges_zookeeper_long.sh index 5e1600a0673..edffc0a3807 100755 --- a/tests/queries/0_stateless/01921_concurrent_ttl_and_normal_merges_zookeeper_long.sh +++ b/tests/queries/0_stateless/01921_concurrent_ttl_and_normal_merges_zookeeper_long.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: long, zookeeper, no-parallel +# Tags: long, zookeeper CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL=error @@ -50,7 +50,7 @@ function insert_thread export -f insert_thread; export -f optimize_thread; -TIMEOUT=30 +TIMEOUT=20 timeout $TIMEOUT bash -c insert_thread 2> /dev/null & timeout $TIMEOUT bash -c insert_thread 2> /dev/null & diff --git a/tests/queries/0_stateless/01946_test_zstd_decompression_with_escape_sequence_at_the_end_of_buffer.sh b/tests/queries/0_stateless/01946_test_zstd_decompression_with_escape_sequence_at_the_end_of_buffer.sh index 4b230e4f738..0aedef028a2 100755 --- a/tests/queries/0_stateless/01946_test_zstd_decompression_with_escape_sequence_at_the_end_of_buffer.sh +++ b/tests/queries/0_stateless/01946_test_zstd_decompression_with_escape_sequence_at_the_end_of_buffer.sh @@ -6,10 +6,8 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . "$CUR_DIR"/../shell_config.sh -# See 01658_read_file_to_string_column.sh -user_files_path=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') -mkdir -p ${user_files_path}/ -cp $CUR_DIR/data_zstd/test_01946.zstd ${user_files_path}/ +mkdir -p ${USER_FILES_PATH}/ +cp $CUR_DIR/data_zstd/test_01946.zstd ${USER_FILES_PATH}/ ${CLICKHOUSE_CLIENT} --multiline --multiquery --query " set min_chunk_bytes_for_parallel_parsing=10485760; diff --git a/tests/queries/0_stateless/02003_compress_bz2.sh b/tests/queries/0_stateless/02003_compress_bz2.sh index b17effb20b6..edc433ad69b 100755 --- a/tests/queries/0_stateless/02003_compress_bz2.sh +++ b/tests/queries/0_stateless/02003_compress_bz2.sh @@ -6,7 +6,6 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -USER_FILES_PATH=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') WORKING_FOLDER_02003="${USER_FILES_PATH}/${CLICKHOUSE_DATABASE}" rm -rf "${WORKING_FOLDER_02003}" diff --git a/tests/queries/0_stateless/02012_compress_lz4.sh b/tests/queries/0_stateless/02012_compress_lz4.sh index aad437c8011..700bff613da 100755 --- a/tests/queries/0_stateless/02012_compress_lz4.sh +++ b/tests/queries/0_stateless/02012_compress_lz4.sh @@ -4,7 +4,6 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -USER_FILES_PATH=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') WORKING_FOLDER_02012="${USER_FILES_PATH}/${CLICKHOUSE_DATABASE}" rm -rf "${WORKING_FOLDER_02012}" diff --git a/tests/queries/0_stateless/02022_storage_filelog_one_file.sh b/tests/queries/0_stateless/02022_storage_filelog_one_file.sh index ea703d69aa5..29bc28849c8 100755 --- a/tests/queries/0_stateless/02022_storage_filelog_one_file.sh +++ b/tests/queries/0_stateless/02022_storage_filelog_one_file.sh @@ -6,30 +6,26 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -# Data preparation. -# Now we can get the user_files_path by use the table file function for trick. also we can get it by query as: -# "insert into function file('exist.txt', 'CSV', 'val1 char') values ('aaaa'); select _path from file('exist.txt', 'CSV', 'val1 char')" -user_files_path=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') for i in {1..20} do - echo $i, $i >> ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}.txt + echo $i, $i >> ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}.txt done ${CLICKHOUSE_CLIENT} --query "drop table if exists file_log;" -${CLICKHOUSE_CLIENT} --query "create table file_log(k UInt8, v UInt8) engine=FileLog('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}.txt', 'CSV');" +${CLICKHOUSE_CLIENT} --query "create table file_log(k UInt8, v UInt8) engine=FileLog('${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}.txt', 'CSV');" ${CLICKHOUSE_CLIENT} --query "select * from file_log order by k settings stream_like_engine_allow_direct_select=1;" for i in {100..120} do - echo $i, $i >> ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}.txt + echo $i, $i >> ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}.txt done ${CLICKHOUSE_CLIENT} --query "select * from file_log order by k settings stream_like_engine_allow_direct_select=1;" # touch does not change file content, no event -touch ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}.txt +touch ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}.txt ${CLICKHOUSE_CLIENT} --query "select * from file_log order by k settings stream_like_engine_allow_direct_select=1;" ${CLICKHOUSE_CLIENT} --query "detach table file_log;" @@ -40,4 +36,4 @@ ${CLICKHOUSE_CLIENT} --query "select * from file_log order by k settings stream_ ${CLICKHOUSE_CLIENT} --query "drop table file_log;" -rm -rf ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}.txt +rm -rf ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}.txt diff --git a/tests/queries/0_stateless/02023_storage_filelog.sh b/tests/queries/0_stateless/02023_storage_filelog.sh index 51c8dc8ab3e..798ffe66ed9 100755 --- a/tests/queries/0_stateless/02023_storage_filelog.sh +++ b/tests/queries/0_stateless/02023_storage_filelog.sh @@ -6,57 +6,52 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -# Data preparation. -# Now we can get the user_files_path by use the table file function for trick. also we can get it by query as: -# "insert into function file('exist.txt', 'CSV', 'val1 char') values ('aaaa'); select _path from file('exist.txt', 'CSV', 'val1 char')" -user_files_path=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') +mkdir -p ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/ -mkdir -p ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/ - -rm -rf ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME:?}/* +rm -rf ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME:?}/* for i in {1..20} do - echo $i, $i >> ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/a.txt + echo $i, $i >> ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/a.txt done ${CLICKHOUSE_CLIENT} --query "drop table if exists file_log;" -${CLICKHOUSE_CLIENT} --query "create table file_log(k UInt8, v UInt8) engine=FileLog('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/', 'CSV');" +${CLICKHOUSE_CLIENT} --query "create table file_log(k UInt8, v UInt8) engine=FileLog('${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/', 'CSV');" ${CLICKHOUSE_CLIENT} --query "select * from file_log order by k settings stream_like_engine_allow_direct_select=1;" -cp ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/a.txt ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/b.txt +cp ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/a.txt ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/b.txt ${CLICKHOUSE_CLIENT} --query "select * from file_log order by k settings stream_like_engine_allow_direct_select=1;" for i in {100..120} do - echo $i, $i >> ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/a.txt + echo $i, $i >> ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/a.txt done # touch does not change file content, no event -touch ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/a.txt +touch ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/a.txt -cp ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/a.txt ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/c.txt -cp ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/a.txt ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/d.txt -cp ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/a.txt ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/e.txt -mv ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/b.txt ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/j.txt +cp ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/a.txt ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/c.txt +cp ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/a.txt ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/d.txt +cp ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/a.txt ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/e.txt +mv ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/b.txt ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/j.txt -rm ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/d.txt +rm ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/d.txt ${CLICKHOUSE_CLIENT} --query "select * from file_log order by k settings stream_like_engine_allow_direct_select=1;" ${CLICKHOUSE_CLIENT} --query "detach table file_log;" -cp ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/e.txt ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/f.txt -mv ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/e.txt ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/g.txt -mv ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/c.txt ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/h.txt +cp ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/e.txt ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/f.txt +mv ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/e.txt ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/g.txt +mv ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/c.txt ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/h.txt for i in {150..200} do - echo $i, $i >> ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/h.txt + echo $i, $i >> ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/h.txt done for i in {200..250} do - echo $i, $i >> ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/i.txt + echo $i, $i >> ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/i.txt done ${CLICKHOUSE_CLIENT} --query "attach table file_log;" @@ -68,11 +63,11 @@ ${CLICKHOUSE_CLIENT} --query "attach table file_log;" # should no records return ${CLICKHOUSE_CLIENT} --query "select * from file_log order by k settings stream_like_engine_allow_direct_select=1;" -truncate ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/a.txt --size 0 +truncate ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/a.txt --size 0 # exception happend ${CLICKHOUSE_CLIENT} --query "select * from file_log order by k settings stream_like_engine_allow_direct_select=1;" 2>&1 | grep -q "Code: 33" && echo 'OK' || echo 'FAIL' ${CLICKHOUSE_CLIENT} --query "drop table file_log;" -rm -rf ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME:?} +rm -rf ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME:?} diff --git a/tests/queries/0_stateless/02024_storage_filelog_mv.sh b/tests/queries/0_stateless/02024_storage_filelog_mv.sh index 33c8693648c..a47ca6c7507 100755 --- a/tests/queries/0_stateless/02024_storage_filelog_mv.sh +++ b/tests/queries/0_stateless/02024_storage_filelog_mv.sh @@ -7,21 +7,16 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -# Data preparation. -# Now we can get the user_files_path by use the table file function for trick. also we can get it by query as: -# "insert into function file('exist.txt', 'CSV', 'val1 char') values ('aaaa'); select _path from file('exist.txt', 'CSV', 'val1 char')" -user_files_path=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') - -mkdir -p ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/ -rm -rf ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME:?}/* +mkdir -p ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/ +rm -rf ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME:?}/* for i in {1..20} do - echo $i, $i >> ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/a.txt + echo $i, $i >> ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/a.txt done ${CLICKHOUSE_CLIENT} --query "drop table if exists file_log;" -${CLICKHOUSE_CLIENT} --query "create table file_log(k UInt8, v UInt8) engine=FileLog('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/', 'CSV');" +${CLICKHOUSE_CLIENT} --query "create table file_log(k UInt8, v UInt8) engine=FileLog('${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/', 'CSV');" ${CLICKHOUSE_CLIENT} --query "drop table if exists mv;" ${CLICKHOUSE_CLIENT} --query "create Materialized View mv engine=MergeTree order by k as select * from file_log;" @@ -39,17 +34,17 @@ done ${CLICKHOUSE_CLIENT} --query "select * from mv order by k;" -cp ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/a.txt ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/b.txt +cp ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/a.txt ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/b.txt # touch does not change file content, no event -touch ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/a.txt +touch ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/a.txt -cp ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/a.txt ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/c.txt -cp ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/a.txt ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/d.txt +cp ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/a.txt ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/c.txt +cp ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/a.txt ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/d.txt for i in {100..120} do - echo $i, $i >> ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/d.txt + echo $i, $i >> ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/d.txt done while true; do @@ -62,4 +57,4 @@ ${CLICKHOUSE_CLIENT} --query "select * from mv order by k;" ${CLICKHOUSE_CLIENT} --query "drop table mv;" ${CLICKHOUSE_CLIENT} --query "drop table file_log;" -rm -rf ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME:?} +rm -rf ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME:?} diff --git a/tests/queries/0_stateless/02025_storage_filelog_virtual_col.sh b/tests/queries/0_stateless/02025_storage_filelog_virtual_col.sh index f027b61c3ef..7e414b8863e 100755 --- a/tests/queries/0_stateless/02025_storage_filelog_virtual_col.sh +++ b/tests/queries/0_stateless/02025_storage_filelog_virtual_col.sh @@ -6,42 +6,37 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -# Data preparation. -# Now we can get the user_files_path by use the table file function for trick. also we can get it by query as: -# "insert into function file('exist.txt', 'CSV', 'val1 char') values ('aaaa'); select _path from file('exist.txt', 'CSV', 'val1 char')" -user_files_path=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') +mkdir -p ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/ -mkdir -p ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/ - -rm -rf ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME:?}/* +rm -rf ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME:?}/* for i in {1..20} do - echo $i, $i >> ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/a.txt + echo $i, $i >> ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/a.txt done ${CLICKHOUSE_CLIENT} --query "drop table if exists file_log;" -${CLICKHOUSE_CLIENT} --query "create table file_log(k UInt8, v UInt8) engine=FileLog('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/', 'CSV');" +${CLICKHOUSE_CLIENT} --query "create table file_log(k UInt8, v UInt8) engine=FileLog('${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/', 'CSV');" ${CLICKHOUSE_CLIENT} --query "select *, _filename, _offset from file_log order by _filename, _offset settings stream_like_engine_allow_direct_select=1;" -cp ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/a.txt ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/b.txt +cp ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/a.txt ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/b.txt ${CLICKHOUSE_CLIENT} --query "select *, _filename, _offset from file_log order by _filename, _offset settings stream_like_engine_allow_direct_select=1;" for i in {100..120} do - echo $i, $i >> ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/a.txt + echo $i, $i >> ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/a.txt done # touch does not change file content, no event -touch ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/a.txt +touch ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/a.txt -cp ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/a.txt ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/c.txt -cp ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/a.txt ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/d.txt -cp ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/a.txt ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/e.txt +cp ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/a.txt ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/c.txt +cp ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/a.txt ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/d.txt +cp ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/a.txt ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/e.txt -rm ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/d.txt +rm ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/d.txt ${CLICKHOUSE_CLIENT} --query "select *, _filename, _offset from file_log order by _filename, _offset settings stream_like_engine_allow_direct_select=1;" @@ -51,11 +46,11 @@ ${CLICKHOUSE_CLIENT} --query "attach table file_log;" # should no records return ${CLICKHOUSE_CLIENT} --query "select *, _filename, _offset from file_log order by _filename, _offset settings stream_like_engine_allow_direct_select=1;" -truncate ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/a.txt --size 0 +truncate ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/a.txt --size 0 # exception happend ${CLICKHOUSE_CLIENT} --query "select * from file_log order by k settings stream_like_engine_allow_direct_select=1;" 2>&1 | grep -q "Code: 33" && echo 'OK' || echo 'FAIL' ${CLICKHOUSE_CLIENT} --query "drop table file_log;" -rm -rf ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME:?} +rm -rf ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME:?} diff --git a/tests/queries/0_stateless/02026_storage_filelog_largefile.sh b/tests/queries/0_stateless/02026_storage_filelog_largefile.sh index b0a9a4357f3..b7377fb026a 100755 --- a/tests/queries/0_stateless/02026_storage_filelog_largefile.sh +++ b/tests/queries/0_stateless/02026_storage_filelog_largefile.sh @@ -7,34 +7,27 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -# Data preparation. -# Now we can get the user_files_path by use the table file function for trick. also we can get it by query as: -# "insert into function file('exist.txt', 'CSV', 'val1 char') values ('aaaa'); select _path from file('exist.txt', 'CSV', 'val1 char')" -user_files_path=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') - -mkdir -p ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/ - -rm -rf ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME:?}/* - -chmod 777 ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/ +mkdir -p ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/ +rm -rf ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME:?}/* +chmod 777 ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/ for i in {1..10} do - ${CLICKHOUSE_CLIENT} --query "insert into function file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/test$i.csv', 'CSV', 'k UInt32, v UInt32') select number, number from numbers(10000);" + ${CLICKHOUSE_CLIENT} --query "insert into function file('${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/test$i.csv', 'CSV', 'k UInt32, v UInt32') select number, number from numbers(10000);" done ${CLICKHOUSE_CLIENT} --query "drop table if exists file_log;" -${CLICKHOUSE_CLIENT} --query "create table file_log(k UInt32, v UInt32) engine=FileLog('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/', 'CSV');" +${CLICKHOUSE_CLIENT} --query "create table file_log(k UInt32, v UInt32) engine=FileLog('${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/', 'CSV');" ${CLICKHOUSE_CLIENT} --query "select count() from file_log settings stream_like_engine_allow_direct_select=1;" for i in {11..20} do - ${CLICKHOUSE_CLIENT} --query "insert into function file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/test$i.csv', 'CSV', 'k UInt32, v UInt32') select number, number from numbers(10000);" + ${CLICKHOUSE_CLIENT} --query "insert into function file('${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/test$i.csv', 'CSV', 'k UInt32, v UInt32') select number, number from numbers(10000);" done ${CLICKHOUSE_CLIENT} --query "select count() from file_log settings stream_like_engine_allow_direct_select=1;" ${CLICKHOUSE_CLIENT} --query "drop table file_log;" -rm -rf ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME:?} +rm -rf ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME:?} diff --git a/tests/queries/0_stateless/02030_capnp_format.sh b/tests/queries/0_stateless/02030_capnp_format.sh index 6fcfef23cc7..b34bdc9f670 100755 --- a/tests/queries/0_stateless/02030_capnp_format.sh +++ b/tests/queries/0_stateless/02030_capnp_format.sh @@ -1,17 +1,16 @@ #!/usr/bin/env bash -# Tags: no-fasttest, no-parallel, no-replicated-database +# Tags: no-fasttest, no-replicated-database CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -USER_FILES_PATH=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') CAPN_PROTO_FILE=$USER_FILES_PATH/data.capnp touch $CAPN_PROTO_FILE -SCHEMADIR=$($CLICKHOUSE_CLIENT_BINARY --query "select * from file('data.capnp', 'CapnProto', 'val1 char') settings format_schema='nonexist:Message'" 2>&1 | grep Exception | grep -oP "file \K.*(?=/nonexist.capnp)") +SCHEMADIR=${CLICKHOUSE_SCHEMA_FILES} CLIENT_SCHEMADIR=$CURDIR/format_schemas -SERVER_SCHEMADIR=test_02030 +SERVER_SCHEMADIR=${CLICKHOUSE_DATABASE} mkdir -p $SCHEMADIR/$SERVER_SCHEMADIR cp -r $CLIENT_SCHEMADIR/02030_* $SCHEMADIR/$SERVER_SCHEMADIR/ diff --git a/tests/queries/0_stateless/02051_symlinks_to_user_files.sh b/tests/queries/0_stateless/02051_symlinks_to_user_files.sh index eab44e74d88..afdb14c4191 100755 --- a/tests/queries/0_stateless/02051_symlinks_to_user_files.sh +++ b/tests/queries/0_stateless/02051_symlinks_to_user_files.sh @@ -5,10 +5,7 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -# See 01658_read_file_to_string_column.sh -user_files_path=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') - -FILE_PATH="${user_files_path}/file" +FILE_PATH="${USER_FILES_PATH}/file" mkdir -p ${FILE_PATH} chmod 777 ${FILE_PATH} diff --git a/tests/queries/0_stateless/02103_tsv_csv_custom_null_representation.sh b/tests/queries/0_stateless/02103_tsv_csv_custom_null_representation.sh index 7ea6739e932..25ad4f37e45 100755 --- a/tests/queries/0_stateless/02103_tsv_csv_custom_null_representation.sh +++ b/tests/queries/0_stateless/02103_tsv_csv_custom_null_representation.sh @@ -5,7 +5,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -DATA_FILE=$CLICKHOUSE_TMP/test_02103_null.data +DATA_FILE=${CLICKHOUSE_TMP}/${CLICKHOUSE_DATABASE}_null.data # Wrapper for clickhouse-client to always output in JSONEachRow format, that # way format settings will not affect output. diff --git a/tests/queries/0_stateless/02103_with_names_and_types_parallel_parsing.sh b/tests/queries/0_stateless/02103_with_names_and_types_parallel_parsing.sh index a6e704093a2..07d40539358 100755 --- a/tests/queries/0_stateless/02103_with_names_and_types_parallel_parsing.sh +++ b/tests/queries/0_stateless/02103_with_names_and_types_parallel_parsing.sh @@ -1,20 +1,17 @@ #!/usr/bin/env bash -# Tags: no-parallel CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -USER_FILES_PATH=$(clickhouse client --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') - -DATA_FILE=$USER_FILES_PATH/test_02103.data +DATA_FILE=$USER_FILES_PATH/${CLICKHOUSE_DATABASE}.data FORMATS=('TSVWithNames' 'TSVWithNamesAndTypes' 'TSVRawWithNames' 'TSVRawWithNamesAndTypes' 'CSVWithNames' 'CSVWithNamesAndTypes' 'JSONCompactEachRowWithNames' 'JSONCompactEachRowWithNamesAndTypes') for format in "${FORMATS[@]}" do $CLICKHOUSE_CLIENT -q "SELECT number, range(number + 10) AS array, toString(number) AS string FROM numbers(10) FORMAT $format" > $DATA_FILE - $CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103.data', '$format', 'number UInt64, array Array(UInt64), string String') ORDER BY number SETTINGS input_format_parallel_parsing=1, min_chunk_bytes_for_parallel_parsing=40" + $CLICKHOUSE_CLIENT -q "SELECT * FROM file('${CLICKHOUSE_DATABASE}.data', '$format', 'number UInt64, array Array(UInt64), string String') ORDER BY number SETTINGS input_format_parallel_parsing=1, min_chunk_bytes_for_parallel_parsing=40" done rm $DATA_FILE diff --git a/tests/queries/0_stateless/02104_json_strings_nullable_string.sh b/tests/queries/0_stateless/02104_json_strings_nullable_string.sh index b3b156b5787..d46b0704b0e 100755 --- a/tests/queries/0_stateless/02104_json_strings_nullable_string.sh +++ b/tests/queries/0_stateless/02104_json_strings_nullable_string.sh @@ -5,7 +5,6 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -USER_FILES_PATH=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') DATA_FILE=$USER_FILES_PATH/test_02104_null.data echo -e '{"s" : "NULLSome string"}' > $DATA_FILE diff --git a/tests/queries/0_stateless/02105_table_function_file_partiotion_by.sh b/tests/queries/0_stateless/02105_table_function_file_partiotion_by.sh index c79b5d0eee5..38c68ca0005 100755 --- a/tests/queries/0_stateless/02105_table_function_file_partiotion_by.sh +++ b/tests/queries/0_stateless/02105_table_function_file_partiotion_by.sh @@ -5,13 +5,7 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -# See 01658_read_file_to_string_column.sh -user_files_path=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') - -mkdir -p "${user_files_path}/" -chmod 777 ${user_files_path} - -FILE_PATH="${user_files_path}/test_table_function_file" +FILE_PATH="${USER_FILES_PATH}/test_table_function_file" function cleanup() { diff --git a/tests/queries/0_stateless/02115_write_buffers_finalize.sh b/tests/queries/0_stateless/02115_write_buffers_finalize.sh index d8a3c29bbbd..fb3a77f6105 100755 --- a/tests/queries/0_stateless/02115_write_buffers_finalize.sh +++ b/tests/queries/0_stateless/02115_write_buffers_finalize.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-fasttest, no-parallel +# Tags: no-fasttest # Tag no-fasttest: depends on brotli and bzip2 CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) diff --git a/tests/queries/0_stateless/02117_custom_separated_with_names_and_types.sh b/tests/queries/0_stateless/02117_custom_separated_with_names_and_types.sh index 400bf2a56fa..d925c962da4 100755 --- a/tests/queries/0_stateless/02117_custom_separated_with_names_and_types.sh +++ b/tests/queries/0_stateless/02117_custom_separated_with_names_and_types.sh @@ -1,5 +1,4 @@ #!/usr/bin/env bash -# Tags: no-parallel CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/02118_deserialize_whole_text.sh b/tests/queries/0_stateless/02118_deserialize_whole_text.sh index ccbfc5abe97..d544f1452a9 100755 --- a/tests/queries/0_stateless/02118_deserialize_whole_text.sh +++ b/tests/queries/0_stateless/02118_deserialize_whole_text.sh @@ -1,65 +1,65 @@ #!/usr/bin/env bash -# Tags: no-fasttest, no-parallel, no-replicated-database +# Tags: no-fasttest, no-replicated-database CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -USER_FILES_PATH=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') -DATA_FILE=$USER_FILES_PATH/data_02118 +DATA_FILE=$USER_FILES_PATH/${CLICKHOUSE_DATABASE} +FILE=${CLICKHOUSE_DATABASE} echo "[\"[1,2,3]trash\"]" > $DATA_FILE -$CLICKHOUSE_CLIENT -q "SELECT * FROM file('data_02118', 'JSONCompactStringsEachRow', 'x Array(UInt32)')" 2>&1 | grep -F -q "UNEXPECTED_DATA_AFTER_PARSED_VALUE" && echo 'OK' || echo 'FAIL' +$CLICKHOUSE_CLIENT -q "SELECT * FROM file('${FILE}', 'JSONCompactStringsEachRow', 'x Array(UInt32)')" 2>&1 | grep -F -q "UNEXPECTED_DATA_AFTER_PARSED_VALUE" && echo 'OK' || echo 'FAIL' echo "[\"1970-01-02trash\"]" > $DATA_FILE -$CLICKHOUSE_CLIENT -q "SELECT * FROM file('data_02118', 'JSONCompactStringsEachRow', 'x Date')" 2>&1 | grep -F -q "UNEXPECTED_DATA_AFTER_PARSED_VALUE" && echo 'OK' || echo 'FAIL' +$CLICKHOUSE_CLIENT -q "SELECT * FROM file('${FILE}', 'JSONCompactStringsEachRow', 'x Date')" 2>&1 | grep -F -q "UNEXPECTED_DATA_AFTER_PARSED_VALUE" && echo 'OK' || echo 'FAIL' echo "[\"1970-01-02trash\"]" > $DATA_FILE -$CLICKHOUSE_CLIENT -q "SELECT * FROM file('data_02118', 'JSONCompactStringsEachRow', 'x Date32')" 2>&1 | grep -F -q "UNEXPECTED_DATA_AFTER_PARSED_VALUE" && echo 'OK' || echo 'FAIL' +$CLICKHOUSE_CLIENT -q "SELECT * FROM file('${FILE}', 'JSONCompactStringsEachRow', 'x Date32')" 2>&1 | grep -F -q "UNEXPECTED_DATA_AFTER_PARSED_VALUE" && echo 'OK' || echo 'FAIL' echo "[\"1970-01-01 03:00:01trash\"]" > $DATA_FILE -$CLICKHOUSE_CLIENT -q "SELECT * FROM file('data_02118', 'JSONCompactStringsEachRow', 'x DateTime')" 2>&1 | grep -F -q "UNEXPECTED_DATA_AFTER_PARSED_VALUE" && echo 'OK' || echo 'FAIL' +$CLICKHOUSE_CLIENT -q "SELECT * FROM file('${FILE}', 'JSONCompactStringsEachRow', 'x DateTime')" 2>&1 | grep -F -q "UNEXPECTED_DATA_AFTER_PARSED_VALUE" && echo 'OK' || echo 'FAIL' echo "[\"1970-01-01 03:00:01.0000trash\"]" > $DATA_FILE -$CLICKHOUSE_CLIENT -q "SELECT * FROM file('data_02118', 'JSONCompactStringsEachRow', 'x DateTime64(4)')" 2>&1 | grep -F -q "UNEXPECTED_DATA_AFTER_PARSED_VALUE" && echo 'OK' || echo 'FAIL' +$CLICKHOUSE_CLIENT -q "SELECT * FROM file('${FILE}', 'JSONCompactStringsEachRow', 'x DateTime64(4)')" 2>&1 | grep -F -q "UNEXPECTED_DATA_AFTER_PARSED_VALUE" && echo 'OK' || echo 'FAIL' echo "[\"42trash\"]" > $DATA_FILE -$CLICKHOUSE_CLIENT -q "SELECT * FROM file('data_02118', 'JSONCompactStringsEachRow', 'x UInt32')" 2>&1 | grep -F -q "UNEXPECTED_DATA_AFTER_PARSED_VALUE" && echo 'OK' || echo 'FAIL' +$CLICKHOUSE_CLIENT -q "SELECT * FROM file('${FILE}', 'JSONCompactStringsEachRow', 'x UInt32')" 2>&1 | grep -F -q "UNEXPECTED_DATA_AFTER_PARSED_VALUE" && echo 'OK' || echo 'FAIL' echo "[\"42.4242trash\"]" > $DATA_FILE -$CLICKHOUSE_CLIENT -q "SELECT * FROM file('data_02118', 'JSONCompactStringsEachRow', 'x Decimal32(4)')" 2>&1 | grep -F -q "UNEXPECTED_DATA_AFTER_PARSED_VALUE" && echo 'OK' || echo 'FAIL' +$CLICKHOUSE_CLIENT -q "SELECT * FROM file('${FILE}', 'JSONCompactStringsEachRow', 'x Decimal32(4)')" 2>&1 | grep -F -q "UNEXPECTED_DATA_AFTER_PARSED_VALUE" && echo 'OK' || echo 'FAIL' echo "[\"255.255.255.255trash\"]" > $DATA_FILE -$CLICKHOUSE_CLIENT -q "SELECT * FROM file('data_02118', 'JSONCompactStringsEachRow', 'x IPv4')" 2>&1 | grep -F -q "UNEXPECTED_DATA_AFTER_PARSED_VALUE" && echo 'OK' || echo 'FAIL' +$CLICKHOUSE_CLIENT -q "SELECT * FROM file('${FILE}', 'JSONCompactStringsEachRow', 'x IPv4')" 2>&1 | grep -F -q "UNEXPECTED_DATA_AFTER_PARSED_VALUE" && echo 'OK' || echo 'FAIL' echo "255.255.255.255trash" > $DATA_FILE -$CLICKHOUSE_CLIENT -q "SELECT * FROM file('data_02118', 'TSV', 'x IPv4')" 2>&1 | grep -F -q "CANNOT_PARSE_INPUT_ASSERTION_FAILED" && echo 'OK' || echo 'FAIL' +$CLICKHOUSE_CLIENT -q "SELECT * FROM file('${FILE}', 'TSV', 'x IPv4')" 2>&1 | grep -F -q "CANNOT_PARSE_INPUT_ASSERTION_FAILED" && echo 'OK' || echo 'FAIL' echo "255.255.255.255trash" > $DATA_FILE -$CLICKHOUSE_CLIENT -q "SELECT * FROM file('data_02118', 'CSV', 'x IPv4')" 2>&1 | grep -F -q "INCORRECT_DATA" && echo 'OK' || echo 'FAIL' +$CLICKHOUSE_CLIENT -q "SELECT * FROM file('${FILE}', 'CSV', 'x IPv4')" 2>&1 | grep -F -q "INCORRECT_DATA" && echo 'OK' || echo 'FAIL' echo "[\"255.255.255.255trash\"]" > $DATA_FILE -$CLICKHOUSE_CLIENT -q "SELECT * FROM file('data_02118', 'JSONCompactEachRow', 'x IPv4')" 2>&1 | grep -F -q "UNEXPECTED_DATA_AFTER_PARSED_VALUE" && echo 'OK' || echo 'FAIL' +$CLICKHOUSE_CLIENT -q "SELECT * FROM file('${FILE}', 'JSONCompactEachRow', 'x IPv4')" 2>&1 | grep -F -q "UNEXPECTED_DATA_AFTER_PARSED_VALUE" && echo 'OK' || echo 'FAIL' echo "[\"0000:0000:0000:0000:0000:ffff:192.168.100.228b1trash\"]" > $DATA_FILE -$CLICKHOUSE_CLIENT -q "SELECT * FROM file('data_02118', 'JSONCompactStringsEachRow', 'x IPv6')" 2>&1 | grep -F -q "UNEXPECTED_DATA_AFTER_PARSED_VALUE" && echo 'OK' || echo 'FAIL' +$CLICKHOUSE_CLIENT -q "SELECT * FROM file('${FILE}', 'JSONCompactStringsEachRow', 'x IPv6')" 2>&1 | grep -F -q "UNEXPECTED_DATA_AFTER_PARSED_VALUE" && echo 'OK' || echo 'FAIL' echo "0000:0000:0000:0000:0000:ffff:192.168.100.228b1trash" > $DATA_FILE -$CLICKHOUSE_CLIENT -q "SELECT * FROM file('data_02118', 'TSV', 'x IPv6')" 2>&1 | grep -F -q "CANNOT_PARSE_INPUT_ASSERTION_FAILED" && echo 'OK' || echo 'FAIL' +$CLICKHOUSE_CLIENT -q "SELECT * FROM file('${FILE}', 'TSV', 'x IPv6')" 2>&1 | grep -F -q "CANNOT_PARSE_INPUT_ASSERTION_FAILED" && echo 'OK' || echo 'FAIL' echo "0000:0000:0000:0000:0000:ffff:192.168.100.228b1trash" > $DATA_FILE -$CLICKHOUSE_CLIENT -q "SELECT * FROM file('data_02118', 'CSV', 'x IPv6')" 2>&1 | grep -F -q "INCORRECT_DATA" && echo 'OK' || echo 'FAIL' +$CLICKHOUSE_CLIENT -q "SELECT * FROM file('${FILE}', 'CSV', 'x IPv6')" 2>&1 | grep -F -q "INCORRECT_DATA" && echo 'OK' || echo 'FAIL' echo "[\"0000:0000:0000:0000:0000:ffff:192.168.100.228b1trash\"]" > $DATA_FILE -$CLICKHOUSE_CLIENT -q "SELECT * FROM file('data_02118', 'JSONCompactEachRow', 'x IPv6')" 2>&1 | grep -F -q "UNEXPECTED_DATA_AFTER_PARSED_VALUE" && echo 'OK' || echo 'FAIL' +$CLICKHOUSE_CLIENT -q "SELECT * FROM file('${FILE}', 'JSONCompactEachRow', 'x IPv6')" 2>&1 | grep -F -q "UNEXPECTED_DATA_AFTER_PARSED_VALUE" && echo 'OK' || echo 'FAIL' echo "[\"{1:2, 2:3}trash\"]" > $DATA_FILE -$CLICKHOUSE_CLIENT -q "SELECT * FROM file('data_02118', 'JSONCompactStringsEachRow', 'x Map(UInt32, UInt32)')" 2>&1 | grep -F -q "UNEXPECTED_DATA_AFTER_PARSED_VALUE" && echo 'OK' || echo 'FAIL' +$CLICKHOUSE_CLIENT -q "SELECT * FROM file('${FILE}', 'JSONCompactStringsEachRow', 'x Map(UInt32, UInt32)')" 2>&1 | grep -F -q "UNEXPECTED_DATA_AFTER_PARSED_VALUE" && echo 'OK' || echo 'FAIL' echo "[\"(1, 2)trash\"]" > $DATA_FILE -$CLICKHOUSE_CLIENT -q "SELECT * FROM file('data_02118', 'JSONCompactStringsEachRow', 'x Tuple(UInt32, UInt32)')" 2>&1 | grep -F -q "UNEXPECTED_DATA_AFTER_PARSED_VALUE" && echo 'OK' || echo 'FAIL' +$CLICKHOUSE_CLIENT -q "SELECT * FROM file('${FILE}', 'JSONCompactStringsEachRow', 'x Tuple(UInt32, UInt32)')" 2>&1 | grep -F -q "UNEXPECTED_DATA_AFTER_PARSED_VALUE" && echo 'OK' || echo 'FAIL' echo "[\"ed9fd45d-6287-47c1-ad9f-d45d628767c1trash\"]" > $DATA_FILE -$CLICKHOUSE_CLIENT -q "SELECT * FROM file('data_02118', 'JSONCompactStringsEachRow', 'x UUID')" 2>&1 | grep -F -q "UNEXPECTED_DATA_AFTER_PARSED_VALUE" && echo 'OK' || echo 'FAIL' +$CLICKHOUSE_CLIENT -q "SELECT * FROM file('${FILE}', 'JSONCompactStringsEachRow', 'x UUID')" 2>&1 | grep -F -q "UNEXPECTED_DATA_AFTER_PARSED_VALUE" && echo 'OK' || echo 'FAIL' rm $DATA_FILE diff --git a/tests/queries/0_stateless/02125_tskv_proper_names_reading.sh b/tests/queries/0_stateless/02125_tskv_proper_names_reading.sh index 0abf411d38f..4a169897520 100755 --- a/tests/queries/0_stateless/02125_tskv_proper_names_reading.sh +++ b/tests/queries/0_stateless/02125_tskv_proper_names_reading.sh @@ -5,8 +5,6 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -USER_FILES_PATH=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') - DATA_FILE=$USER_FILES_PATH/test_02125.data echo "number=1" > $DATA_FILE diff --git a/tests/queries/0_stateless/02126_fix_filelog.sh b/tests/queries/0_stateless/02126_fix_filelog.sh index b266b582428..0e136a34c62 100755 --- a/tests/queries/0_stateless/02126_fix_filelog.sh +++ b/tests/queries/0_stateless/02126_fix_filelog.sh @@ -6,14 +6,8 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -# Data preparation. -# Now we can get the user_files_path by use the table file function for trick. also we can get it by query as: -# "insert into function file('exist.txt', 'CSV', 'val1 char') values ('aaaa'); select _path from file('exist.txt', 'CSV', 'val1 char')" -user_files_path=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') - -mkdir -p ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/ - -rm -rf ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME:?}/* +mkdir -p ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/ +rm -rf ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME:?}/* ${CLICKHOUSE_CLIENT} --query "drop table if exists file_log;" @@ -21,8 +15,8 @@ ${CLICKHOUSE_CLIENT} --query "create table file_log(k UInt8, v UInt8) engine=Fil ${CLICKHOUSE_CLIENT} --query "create table file_log(k UInt8, v UInt8) engine=FileLog('/tmp/aaa.csv', 'CSV');" 2>&1 | grep -q "Code: 36" && echo 'OK' || echo 'FAIL'; ${CLICKHOUSE_CLIENT} --query "create table file_log(k UInt8, v UInt8) engine=FileLog('/tmp/aaa.csv', 'CSV');" 2>&1 | grep -q "Code: 36" && echo 'OK' || echo 'FAIL'; -${CLICKHOUSE_CLIENT} --query "create table file_log(k UInt8, v UInt8) engine=FileLog('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/', 'CSV');" +${CLICKHOUSE_CLIENT} --query "create table file_log(k UInt8, v UInt8) engine=FileLog('${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/', 'CSV');" ${CLICKHOUSE_CLIENT} --query "drop table file_log;" -rm -rf ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME:?} +rm -rf ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME:?} diff --git a/tests/queries/0_stateless/02129_skip_quoted_fields.sh b/tests/queries/0_stateless/02129_skip_quoted_fields.sh index ac702d3c750..701d7a30b68 100755 --- a/tests/queries/0_stateless/02129_skip_quoted_fields.sh +++ b/tests/queries/0_stateless/02129_skip_quoted_fields.sh @@ -1,5 +1,4 @@ #!/usr/bin/env bash -# Tags: no-parallel CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/02130_parse_quoted_null.sh b/tests/queries/0_stateless/02130_parse_quoted_null.sh index 0c72d0e85a7..44e6ee93599 100755 --- a/tests/queries/0_stateless/02130_parse_quoted_null.sh +++ b/tests/queries/0_stateless/02130_parse_quoted_null.sh @@ -1,14 +1,12 @@ #!/usr/bin/env bash -# Tags: no-parallel CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -USER_FILES_PATH=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') -DATA_FILE=$USER_FILES_PATH/test_02130.data -SELECT_QUERY="select * from file('test_02130.data', 'CustomSeparated', 'x Nullable(Float64), y Nullable(UInt64)') settings input_format_parallel_parsing=0, format_custom_escaping_rule='Quoted'" +DATA_FILE=$USER_FILES_PATH/${CLICKHOUSE_DATABASE}.data +SELECT_QUERY="select * from file('${CLICKHOUSE_DATABASE}.data', 'CustomSeparated', 'x Nullable(Float64), y Nullable(UInt64)') settings input_format_parallel_parsing=0, format_custom_escaping_rule='Quoted'" $CLICKHOUSE_CLIENT -q "drop table if exists test_02130" diff --git a/tests/queries/0_stateless/02149_external_schema_inference.sh b/tests/queries/0_stateless/02149_external_schema_inference.sh index 41f8bfee2bc..edb4e915701 100755 --- a/tests/queries/0_stateless/02149_external_schema_inference.sh +++ b/tests/queries/0_stateless/02149_external_schema_inference.sh @@ -1,20 +1,19 @@ #!/usr/bin/env bash -# Tags: no-parallel, no-fasttest +# Tags: no-fasttest CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -USER_FILES_PATH=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') -FILE_NAME=test_$CLICKHOUSE_TEST_UNIQUE_NAME.data +FILE_NAME=test_${CLICKHOUSE_TEST_UNIQUE_NAME}_${CLICKHOUSE_DATABASE}.data DATA_FILE=$USER_FILES_PATH/$FILE_NAME touch $DATA_FILE -SCHEMADIR=$($CLICKHOUSE_CLIENT_BINARY --query "select * from file('$FILE_NAME', 'CapnProto', 'val1 char') settings format_schema='nonexist:Message'" 2>&1 | grep Exception | grep -oP "file \K.*(?=/nonexist.capnp)") +SCHEMADIR=${CLICKHOUSE_SCHEMA_FILES} CLIENT_SCHEMADIR=$CURDIR/format_schemas -SERVER_SCHEMADIR=test_02149 +SERVER_SCHEMADIR=${CLICKHOUSE_DATABASE} mkdir -p $SCHEMADIR/$SERVER_SCHEMADIR cp -r $CLIENT_SCHEMADIR/* $SCHEMADIR/$SERVER_SCHEMADIR/ diff --git a/tests/queries/0_stateless/02149_schema_inference.sh b/tests/queries/0_stateless/02149_schema_inference.sh index 856549f2215..fba1e6e9137 100755 --- a/tests/queries/0_stateless/02149_schema_inference.sh +++ b/tests/queries/0_stateless/02149_schema_inference.sh @@ -1,18 +1,16 @@ #!/usr/bin/env bash -# Tags: no-parallel, no-fasttest +# Tags: no-fasttest CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -USER_FILES_PATH=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') FILE_NAME=test_$CLICKHOUSE_TEST_UNIQUE_NAME.data -DATA_FILE=${USER_FILES_PATH:?}/$FILE_NAME - +DATA_FILE=${CLICKHOUSE_USER_FILES:?}/$FILE_NAME touch $DATA_FILE -SCHEMADIR=$($CLICKHOUSE_CLIENT_BINARY --query "select * from file('$FILE_NAME', 'Template', 'val1 char') settings format_template_row='nonexist'" 2>&1 | grep Exception | grep -oP "file \K.*(?=/nonexist)") +SCHEMADIR=${CLICKHOUSE_SCHEMA_FILES} echo "TSV" diff --git a/tests/queries/0_stateless/02149_schema_inference_create_table_syntax.sh b/tests/queries/0_stateless/02149_schema_inference_create_table_syntax.sh index 8de2ab8c57a..bf247817323 100755 --- a/tests/queries/0_stateless/02149_schema_inference_create_table_syntax.sh +++ b/tests/queries/0_stateless/02149_schema_inference_create_table_syntax.sh @@ -1,14 +1,13 @@ #!/usr/bin/env bash -# Tags: no-parallel, no-fasttest +# Tags: no-fasttest CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -USER_FILES_PATH=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') -mkdir $USER_FILES_PATH/test_02149 -FILE_NAME=test_02149/data.Parquet +mkdir $USER_FILES_PATH/${CLICKHOUSE_DATABASE}/ +FILE_NAME=data.Parquet DATA_FILE=$USER_FILES_PATH/$FILE_NAME $CLICKHOUSE_CLIENT -q "select number as num, concat('Str: ', toString(number)) as str, [number, number + 1] as arr from numbers(10) format Parquet" > $DATA_FILE diff --git a/tests/queries/0_stateless/02167_format_from_file_extension.sh b/tests/queries/0_stateless/02167_format_from_file_extension.sh index 14985233524..0a0efff3228 100755 --- a/tests/queries/0_stateless/02167_format_from_file_extension.sh +++ b/tests/queries/0_stateless/02167_format_from_file_extension.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-parallel, no-fasttest +# Tags: no-fasttest CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh @@ -7,28 +7,26 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) function cleanup() { - # this command expects an error message like 'Code: 107. DB::Exception: Received <...> nonexist.txt doesn't exist. (FILE_DOESNT_EXIST)' - user_files_path=$($CLICKHOUSE_CLIENT --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep -E '^Code: 107.*FILE_DOESNT_EXIST' | head -1 | awk '{gsub("/nonexist.txt","",$9); print $9}') - rm $user_files_path/test_02167.* + rm ${USER_FILES_PATH}/${CLICKHOUSE_DATABASE}_test_02167.* } trap cleanup EXIT for format in TSV TabSeparated TSVWithNames TSVWithNamesAndTypes CSV Parquet ORC Arrow JSONEachRow JSONCompactEachRow CustomSeparatedWithNamesAndTypes do - $CLICKHOUSE_CLIENT -q "insert into table function file('test_02167.$format', 'auto', 'x UInt64') select * from numbers(2)" - $CLICKHOUSE_CLIENT -q "select * from file('test_02167.$format')" - $CLICKHOUSE_CLIENT -q "select * from file('test_02167.$format', '$format')" + $CLICKHOUSE_CLIENT -q "insert into table function file('${CLICKHOUSE_DATABASE}_test_02167.$format', 'auto', 'x UInt64') select * from numbers(2)" + $CLICKHOUSE_CLIENT -q "select * from file('${CLICKHOUSE_DATABASE}_test_02167.$format')" + $CLICKHOUSE_CLIENT -q "select * from file('${CLICKHOUSE_DATABASE}_test_02167.$format', '$format')" done -$CLICKHOUSE_CLIENT -q "insert into table function file('test_02167.bin', 'auto', 'x UInt64') select * from numbers(2)" -$CLICKHOUSE_CLIENT -q "select * from file('test_02167.bin', 'auto', 'x UInt64')" -$CLICKHOUSE_CLIENT -q "select * from file('test_02167.bin', 'RowBinary', 'x UInt64')" +$CLICKHOUSE_CLIENT -q "insert into table function file('${CLICKHOUSE_DATABASE}_test_02167.bin', 'auto', 'x UInt64') select * from numbers(2)" +$CLICKHOUSE_CLIENT -q "select * from file('${CLICKHOUSE_DATABASE}_test_02167.bin', 'auto', 'x UInt64')" +$CLICKHOUSE_CLIENT -q "select * from file('${CLICKHOUSE_DATABASE}_test_02167.bin', 'RowBinary', 'x UInt64')" -$CLICKHOUSE_CLIENT -q "insert into table function file('test_02167.ndjson', 'auto', 'x UInt64') select * from numbers(2)" -$CLICKHOUSE_CLIENT -q "select * from file('test_02167.ndjson')" -$CLICKHOUSE_CLIENT -q "select * from file('test_02167.ndjson', 'JSONEachRow', 'x UInt64')" +$CLICKHOUSE_CLIENT -q "insert into table function file('${CLICKHOUSE_DATABASE}_test_02167.ndjson', 'auto', 'x UInt64') select * from numbers(2)" +$CLICKHOUSE_CLIENT -q "select * from file('${CLICKHOUSE_DATABASE}_test_02167.ndjson')" +$CLICKHOUSE_CLIENT -q "select * from file('${CLICKHOUSE_DATABASE}_test_02167.ndjson', 'JSONEachRow', 'x UInt64')" -$CLICKHOUSE_CLIENT -q "insert into table function file('test_02167.messagepack', 'auto', 'x UInt64') select * from numbers(2)" -$CLICKHOUSE_CLIENT -q "select * from file('test_02167.messagepack') settings input_format_msgpack_number_of_columns=1" -$CLICKHOUSE_CLIENT -q "select * from file('test_02167.messagepack', 'MsgPack', 'x UInt64')" +$CLICKHOUSE_CLIENT -q "insert into table function file('${CLICKHOUSE_DATABASE}_test_02167.messagepack', 'auto', 'x UInt64') select * from numbers(2)" +$CLICKHOUSE_CLIENT -q "select * from file('${CLICKHOUSE_DATABASE}_test_02167.messagepack') settings input_format_msgpack_number_of_columns=1" +$CLICKHOUSE_CLIENT -q "select * from file('${CLICKHOUSE_DATABASE}_test_02167.messagepack', 'MsgPack', 'x UInt64')" diff --git a/tests/queries/0_stateless/02185_orc_corrupted_file.sh b/tests/queries/0_stateless/02185_orc_corrupted_file.sh index 12510ae3836..8cf4334845d 100755 --- a/tests/queries/0_stateless/02185_orc_corrupted_file.sh +++ b/tests/queries/0_stateless/02185_orc_corrupted_file.sh @@ -5,7 +5,6 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -USER_FILES_PATH=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') cp $CUR_DIR/data_orc/corrupted.orc $USER_FILES_PATH/ ${CLICKHOUSE_CLIENT} --query="select * from file('corrupted.orc')" 2>&1 | grep -F -q 'CANNOT_EXTRACT_TABLE_STRUCTURE' && echo 'OK' || echo 'FAIL' diff --git a/tests/queries/0_stateless/02207_allow_plaintext_and_no_password.sh b/tests/queries/0_stateless/02207_allow_plaintext_and_no_password.sh index 0345a0e6394..dc3cb0de110 100755 --- a/tests/queries/0_stateless/02207_allow_plaintext_and_no_password.sh +++ b/tests/queries/0_stateless/02207_allow_plaintext_and_no_password.sh @@ -5,7 +5,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -cp /etc/clickhouse-server/users.xml "$CURDIR"/users.xml +cp ${CLICKHOUSE_CONFIG_DIR}/users.xml "$CURDIR"/users.xml sed -i 's/<\/password>/c64c5e4e53ea1a9f1427d2713b3a22bbebe8940bc807adaf654744b1568c70ab<\/password_sha256_hex>/g' "$CURDIR"/users.xml sed -i 's//1<\/access_management>/g' "$CURDIR"/users.xml diff --git a/tests/queries/0_stateless/02222_create_table_without_columns_metadata.sh b/tests/queries/0_stateless/02222_create_table_without_columns_metadata.sh index d49c3610852..73d1c2b9b42 100755 --- a/tests/queries/0_stateless/02222_create_table_without_columns_metadata.sh +++ b/tests/queries/0_stateless/02222_create_table_without_columns_metadata.sh @@ -5,8 +5,6 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -USER_FILES_PATH=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') - $CLICKHOUSE_CLIENT -q "insert into table function file(data.jsonl, 'JSONEachRow', 'x UInt32 default 42, y String') select number as x, 'String' as y from numbers(10)" $CLICKHOUSE_CLIENT -q "drop table if exists test" diff --git a/tests/queries/0_stateless/02227_test_create_empty_sqlite_db.sh b/tests/queries/0_stateless/02227_test_create_empty_sqlite_db.sh index 37fdde95ea7..344452767cc 100755 --- a/tests/queries/0_stateless/02227_test_create_empty_sqlite_db.sh +++ b/tests/queries/0_stateless/02227_test_create_empty_sqlite_db.sh @@ -6,9 +6,6 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -# See 01658_read_file_to_string_column.sh -user_files_path=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') - function cleanup() { ${CLICKHOUSE_CLIENT} --query="DROP DATABASE IF EXISTS ${CURR_DATABASE}" @@ -18,7 +15,7 @@ trap cleanup EXIT export CURR_DATABASE="test_01889_sqllite_${CLICKHOUSE_DATABASE}" -DB_PATH=${user_files_path}/${CURR_DATABASE}_db1 +DB_PATH=${USER_FILES_PATH}/${CURR_DATABASE}_db1 ${CLICKHOUSE_CLIENT} --multiquery --multiline --query=""" DROP DATABASE IF EXISTS ${CURR_DATABASE}; diff --git a/tests/queries/0_stateless/02228_merge_tree_insert_memory_usage.sql b/tests/queries/0_stateless/02228_merge_tree_insert_memory_usage.sql index 6d86d995143..72b6cc06f26 100644 --- a/tests/queries/0_stateless/02228_merge_tree_insert_memory_usage.sql +++ b/tests/queries/0_stateless/02228_merge_tree_insert_memory_usage.sql @@ -1,4 +1,4 @@ --- Tags: long, no-parallel, no-object-storage +-- Tags: long, no-object-storage -- no-object-storage: Avoid flakiness due to cache / buffer usage SET insert_keeper_fault_injection_probability=0; -- to succeed this test can require too many retries due to 100 partitions, so disable fault injections diff --git a/tests/queries/0_stateless/02242_arrow_orc_parquet_nullable_schema_inference.sh b/tests/queries/0_stateless/02242_arrow_orc_parquet_nullable_schema_inference.sh index e03c62cfc5f..0f37bff45d6 100755 --- a/tests/queries/0_stateless/02242_arrow_orc_parquet_nullable_schema_inference.sh +++ b/tests/queries/0_stateless/02242_arrow_orc_parquet_nullable_schema_inference.sh @@ -6,7 +6,6 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . "$CURDIR"/../shell_config.sh -USER_FILES_PATH=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') FILE_NAME=test_02242.data DATA_FILE=$USER_FILES_PATH/$FILE_NAME diff --git a/tests/queries/0_stateless/02245_parquet_skip_unknown_type.sh b/tests/queries/0_stateless/02245_parquet_skip_unknown_type.sh index 8ff6e28b123..7bfeb747cc2 100755 --- a/tests/queries/0_stateless/02245_parquet_skip_unknown_type.sh +++ b/tests/queries/0_stateless/02245_parquet_skip_unknown_type.sh @@ -5,7 +5,6 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -USER_FILES_PATH=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') FILE_NAME=test_02245.parquet DATA_FILE=$USER_FILES_PATH/$FILE_NAME diff --git a/tests/queries/0_stateless/02246_tsv_csv_best_effort_schema_inference.sh b/tests/queries/0_stateless/02246_tsv_csv_best_effort_schema_inference.sh index 233db7a534d..07c2a33c4d5 100755 --- a/tests/queries/0_stateless/02246_tsv_csv_best_effort_schema_inference.sh +++ b/tests/queries/0_stateless/02246_tsv_csv_best_effort_schema_inference.sh @@ -1,12 +1,11 @@ #!/usr/bin/env bash -# Tags: no-parallel, no-fasttest +# Tags: no-fasttest CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -USER_FILES_PATH=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') FILE_NAME=test_02149.data DATA_FILE=${USER_FILES_PATH:?}/$FILE_NAME diff --git a/tests/queries/0_stateless/02247_names_order_in_json_and_tskv.sh b/tests/queries/0_stateless/02247_names_order_in_json_and_tskv.sh index e8e3bf88ac4..3385f62af38 100755 --- a/tests/queries/0_stateless/02247_names_order_in_json_and_tskv.sh +++ b/tests/queries/0_stateless/02247_names_order_in_json_and_tskv.sh @@ -6,7 +6,6 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . "$CURDIR"/../shell_config.sh -USER_FILES_PATH=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') FILE_NAME=test_02247.data DATA_FILE=${USER_FILES_PATH:?}/$FILE_NAME diff --git a/tests/queries/0_stateless/02247_read_bools_as_numbers_json.sh b/tests/queries/0_stateless/02247_read_bools_as_numbers_json.sh index 523b5934543..76133df2b37 100755 --- a/tests/queries/0_stateless/02247_read_bools_as_numbers_json.sh +++ b/tests/queries/0_stateless/02247_read_bools_as_numbers_json.sh @@ -1,13 +1,12 @@ #!/usr/bin/env bash -# Tags: no-parallel, no-fasttest +# Tags: no-fasttest CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -USER_FILES_PATH=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') -FILE_NAME=test_02247.data +FILE_NAME=${CLICKHOUSE_DATABASE}.data DATA_FILE=${USER_FILES_PATH:?}/$FILE_NAME touch $DATA_FILE diff --git a/tests/queries/0_stateless/02270_errors_in_files.sh b/tests/queries/0_stateless/02270_errors_in_files.sh index 517547c6ef8..ab8bb28787d 100755 --- a/tests/queries/0_stateless/02270_errors_in_files.sh +++ b/tests/queries/0_stateless/02270_errors_in_files.sh @@ -13,8 +13,6 @@ echo "Error" > "${CLICKHOUSE_TMP}"/test_02270_2.csv ${CLICKHOUSE_LOCAL} --query "SELECT * FROM file('${CLICKHOUSE_TMP}/test_02270*.csv', CSV, 'a String, b String')" 2>&1 | grep -o "test_02270_2.csv" ${CLICKHOUSE_LOCAL} --query "SELECT * FROM file('${CLICKHOUSE_TMP}/test_02270*.csv', CSV, 'a String, b String')" --input_format_parallel_parsing 0 2>&1 | grep -o "test_02270_2.csv" -user_files_path=$($CLICKHOUSE_CLIENT --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep -E '^Code: 107.*FILE_DOESNT_EXIST' | head -1 | awk '{gsub("/nonexist.txt","",$9); print $9}') - ${CLICKHOUSE_CLIENT} --query "INSERT INTO TABLE FUNCTION file('test_02270_1.csv') SELECT 'Hello', 'World'" ${CLICKHOUSE_CLIENT} --query "INSERT INTO TABLE FUNCTION file('test_02270_2.csv') SELECT 'Error'" @@ -27,9 +25,9 @@ ${CLICKHOUSE_CLIENT} --query "INSERT INTO TABLE FUNCTION file('test_02270_2.csv. ${CLICKHOUSE_CLIENT} --query "SELECT * FROM file('test_02270*.csv.gz', 'CSV', 'a String, b String')" 2>&1 | grep -o -m1 "test_02270_2.csv.gz" ${CLICKHOUSE_CLIENT} --query "SELECT * FROM file('test_02270*.csv.gz', 'CSV', 'a String, b String')" --input_format_parallel_parsing 0 2>&1 | grep -o -m1 "test_02270_2.csv.gz" -rm "${CLICKHOUSE_TMP}"/test_02270_1.csv -rm "${CLICKHOUSE_TMP}"/test_02270_2.csv -rm "${user_files_path}"/test_02270_1.csv -rm "${user_files_path}"/test_02270_2.csv -rm "${user_files_path}"/test_02270_1.csv.gz -rm "${user_files_path}"/test_02270_2.csv.gz +rm -f "${CLICKHOUSE_TMP}"/test_02270_1.csv +rm -f "${CLICKHOUSE_TMP}"/test_02270_2.csv +rm -f "${USER_FILES_PATH}"/test_02270_1.csv +rm -f "${USER_FILES_PATH}"/test_02270_2.csv +rm -f "${USER_FILES_PATH}"/test_02270_1.csv.gz +rm -f "${USER_FILES_PATH}"/test_02270_2.csv.gz diff --git a/tests/queries/0_stateless/02286_mysql_dump_input_format.sh b/tests/queries/0_stateless/02286_mysql_dump_input_format.sh index 2f6167c3ddf..59528d97b93 100755 --- a/tests/queries/0_stateless/02286_mysql_dump_input_format.sh +++ b/tests/queries/0_stateless/02286_mysql_dump_input_format.sh @@ -5,8 +5,6 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -USER_FILES_PATH=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') - cp $CURDIR/data_mysql_dump/dump*.sql $USER_FILES_PATH $CLICKHOUSE_CLIENT -q "select * from file(dump1.sql, MySQLDump, 'x Nullable(Int32), y Nullable(Int32)') order by x, y" diff --git a/tests/queries/0_stateless/02293_formats_json_columns.sh b/tests/queries/0_stateless/02293_formats_json_columns.sh index 4eae5a1abb4..bf605d6f591 100755 --- a/tests/queries/0_stateless/02293_formats_json_columns.sh +++ b/tests/queries/0_stateless/02293_formats_json_columns.sh @@ -5,8 +5,6 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -USER_FILES_PATH=$(clickhouse client --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') - DATA_FILE=$USER_FILES_PATH/data_02293 $CLICKHOUSE_CLIENT -q "drop table if exists test_02293" diff --git a/tests/queries/0_stateless/02297_regex_parsing_file_names.sh b/tests/queries/0_stateless/02297_regex_parsing_file_names.sh index 5973e24844a..666cb4d87fc 100755 --- a/tests/queries/0_stateless/02297_regex_parsing_file_names.sh +++ b/tests/queries/0_stateless/02297_regex_parsing_file_names.sh @@ -5,29 +5,21 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -# Data preparation. +rm -rf ${USER_FILES_PATH}/file_{0..10}.csv -# Now we can get the user_files_path by use the table file function for trick. also we can get it by query as: -# "insert into function file('exist.txt', 'CSV', 'val1 char') values ('aaaa'); select _path from file('exist.txt', 'CSV', 'val1 char')" -CLICKHOUSE_USER_FILES_PATH=$($CLICKHOUSE_CLIENT_BINARY --query "select _path, _file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') +echo '0' > ${USER_FILES_PATH}/file_0.csv +echo '0' > ${USER_FILES_PATH}/file_1.csv +echo '0' > ${USER_FILES_PATH}/file_2.csv +echo '0' > ${USER_FILES_PATH}/file_3.csv +echo '0' > ${USER_FILES_PATH}/file_4.csv +echo '0' > ${USER_FILES_PATH}/file_5.csv +echo '0' > ${USER_FILES_PATH}/file_6.csv +echo '0' > ${USER_FILES_PATH}/file_7.csv +echo '0' > ${USER_FILES_PATH}/file_8.csv +echo '0' > ${USER_FILES_PATH}/file_9.csv +echo '0' > ${USER_FILES_PATH}/file_10.csv -mkdir -p ${CLICKHOUSE_USER_FILES_PATH}/ - -rm -rf ${CLICKHOUSE_USER_FILES_PATH}/file_{0..10}.csv - -echo '0' > ${CLICKHOUSE_USER_FILES_PATH}/file_0.csv -echo '0' > ${CLICKHOUSE_USER_FILES_PATH}/file_1.csv -echo '0' > ${CLICKHOUSE_USER_FILES_PATH}/file_2.csv -echo '0' > ${CLICKHOUSE_USER_FILES_PATH}/file_3.csv -echo '0' > ${CLICKHOUSE_USER_FILES_PATH}/file_4.csv -echo '0' > ${CLICKHOUSE_USER_FILES_PATH}/file_5.csv -echo '0' > ${CLICKHOUSE_USER_FILES_PATH}/file_6.csv -echo '0' > ${CLICKHOUSE_USER_FILES_PATH}/file_7.csv -echo '0' > ${CLICKHOUSE_USER_FILES_PATH}/file_8.csv -echo '0' > ${CLICKHOUSE_USER_FILES_PATH}/file_9.csv -echo '0' > ${CLICKHOUSE_USER_FILES_PATH}/file_10.csv - -# echo '' > ${CLICKHOUSE_USER_FILES_PATH}/file_10.csv +# echo '' > ${USER_FILES_PATH}/file_10.csv ${CLICKHOUSE_CLIENT} -q "DROP TABLE IF EXISTS t_regex;" @@ -36,5 +28,5 @@ ${CLICKHOUSE_CLIENT} -q "CREATE TABLE t_regex (id UInt64) ENGINE = MergeTree() o ${CLICKHOUSE_CLIENT} -q "INSERT INTO t_regex SELECT * FROM file('file_{0..10}.csv','CSV');" ${CLICKHOUSE_CLIENT} -q "SELECT count() from t_regex;" -rm -rf ${CLICKHOUSE_USER_FILES_PATH}/file_{0..10}.csv; +rm -rf ${USER_FILES_PATH}/file_{0..10}.csv; ${CLICKHOUSE_CLIENT} -q "DROP TABLE IF EXISTS t_regex;" diff --git a/tests/queries/0_stateless/02327_capnproto_protobuf_empty_messages.sh b/tests/queries/0_stateless/02327_capnproto_protobuf_empty_messages.sh index 650faf6985e..89e5c827a48 100755 --- a/tests/queries/0_stateless/02327_capnproto_protobuf_empty_messages.sh +++ b/tests/queries/0_stateless/02327_capnproto_protobuf_empty_messages.sh @@ -5,10 +5,9 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -USER_FILES_PATH=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') touch $USER_FILES_PATH/data.capnp -SCHEMADIR=$($CLICKHOUSE_CLIENT_BINARY --query "select * from file('data.capnp', 'CapnProto', 'val1 char') settings format_schema='nonexist:Message'" 2>&1 | grep Exception | grep -oP "file \K.*(?=/nonexist.capnp)") +SCHEMADIR=${CLICKHOUSE_SCHEMA_FILES} CLIENT_SCHEMADIR=$CURDIR/format_schemas SERVER_SCHEMADIR=test_02327 mkdir -p $SCHEMADIR/$SERVER_SCHEMADIR diff --git a/tests/queries/0_stateless/02353_compression_level.sh b/tests/queries/0_stateless/02353_compression_level.sh index 8d6a9c899ad..9e102d12fed 100755 --- a/tests/queries/0_stateless/02353_compression_level.sh +++ b/tests/queries/0_stateless/02353_compression_level.sh @@ -1,12 +1,11 @@ #!/usr/bin/env bash -# Tags: no-fasttest, no-parallel +# Tags: no-fasttest # Tag no-fasttest: depends on brotli and bzip2 CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -USER_FILES_PATH=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') WORKING_FOLDER_02353="${USER_FILES_PATH}/${CLICKHOUSE_DATABASE}" rm -rf "${WORKING_FOLDER_02353}" diff --git a/tests/queries/0_stateless/02358_file_default_value.sh b/tests/queries/0_stateless/02358_file_default_value.sh index a7c4c17c129..0fd97a09546 100755 --- a/tests/queries/0_stateless/02358_file_default_value.sh +++ b/tests/queries/0_stateless/02358_file_default_value.sh @@ -4,7 +4,6 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -USER_FILES_PATH=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') WORKING_FOLDER_02357="${USER_FILES_PATH}/${CLICKHOUSE_DATABASE}" rm -rf "${WORKING_FOLDER_02357}" diff --git a/tests/queries/0_stateless/02360_clickhouse_local_config-option.sh b/tests/queries/0_stateless/02360_clickhouse_local_config-option.sh index b58cfd7ec21..50e07ca8612 100755 --- a/tests/queries/0_stateless/02360_clickhouse_local_config-option.sh +++ b/tests/queries/0_stateless/02360_clickhouse_local_config-option.sh @@ -15,7 +15,7 @@ echo " true - 9000 + ${CLICKHOUSE_PORT_TCP} ${SAFE_DIR} diff --git a/tests/queries/0_stateless/02372_data_race_in_avro.sh b/tests/queries/0_stateless/02372_data_race_in_avro.sh index 50a7ae1e3c5..49c34e31923 100755 --- a/tests/queries/0_stateless/02372_data_race_in_avro.sh +++ b/tests/queries/0_stateless/02372_data_race_in_avro.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-fasttest, no-parallel +# Tags: no-fasttest CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/02373_heap_buffer_overflow_in_avro.sh b/tests/queries/0_stateless/02373_heap_buffer_overflow_in_avro.sh index 3461287d28a..e95bda2adfb 100755 --- a/tests/queries/0_stateless/02373_heap_buffer_overflow_in_avro.sh +++ b/tests/queries/0_stateless/02373_heap_buffer_overflow_in_avro.sh @@ -5,8 +5,6 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -USER_FILES_PATH=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') - cp $CURDIR/data_avro/corrupted.avro $USER_FILES_PATH/ $CLICKHOUSE_CLIENT -q "select * from file(corrupted.avro)" 2>&1 | grep -F -q "Cannot read compressed data" && echo "OK" || echo "FAIL" diff --git a/tests/queries/0_stateless/02383_arrow_dict_special_cases.sh b/tests/queries/0_stateless/02383_arrow_dict_special_cases.sh index 80743a97dd0..ae08941da63 100755 --- a/tests/queries/0_stateless/02383_arrow_dict_special_cases.sh +++ b/tests/queries/0_stateless/02383_arrow_dict_special_cases.sh @@ -5,7 +5,6 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -USER_FILES_PATH=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') UNIQ_DEST_PATH=$USER_FILES_PATH/test-02383-$RANDOM-$RANDOM mkdir -p $UNIQ_DEST_PATH diff --git a/tests/queries/0_stateless/02402_capnp_format_segments_overflow.sh b/tests/queries/0_stateless/02402_capnp_format_segments_overflow.sh index 8aad68ffe5c..3028451a5f5 100755 --- a/tests/queries/0_stateless/02402_capnp_format_segments_overflow.sh +++ b/tests/queries/0_stateless/02402_capnp_format_segments_overflow.sh @@ -5,11 +5,10 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -USER_FILES_PATH=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') mkdir -p $USER_FILES_PATH/test_02402 cp $CURDIR/data_capnp/overflow.capnp $USER_FILES_PATH/test_02402/ -SCHEMADIR=$($CLICKHOUSE_CLIENT_BINARY --query "select * from file('test_02402/overflow.capnp', 'CapnProto', 'val1 char') settings format_schema='nonexist:Message'" 2>&1 | grep Exception | grep -oP "file \K.*(?=/nonexist.capnp)") +SCHEMADIR=${CLICKHOUSE_SCHEMA_FILES} CLIENT_SCHEMADIR=$CURDIR/format_schemas SERVER_SCHEMADIR=test_02402 diff --git a/tests/queries/0_stateless/02421_record_errors_row_by_input_format.sh b/tests/queries/0_stateless/02421_record_errors_row_by_input_format.sh index df304eeeba5..72b55144348 100755 --- a/tests/queries/0_stateless/02421_record_errors_row_by_input_format.sh +++ b/tests/queries/0_stateless/02421_record_errors_row_by_input_format.sh @@ -9,10 +9,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # Data preparation. -CLICKHOUSE_USER_FILES_PATH=$($CLICKHOUSE_CLIENT_BINARY --query "select _path, _file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') - -mkdir -p ${CLICKHOUSE_USER_FILES_PATH}/ -echo -e "1,1\n2,a\nb,3\n4,4\n5,c\n6,6" > ${CLICKHOUSE_USER_FILES_PATH}/a.csv +echo -e "1,1\n2,a\nb,3\n4,4\n5,c\n6,6" > ${USER_FILES_PATH}/a.csv ${CLICKHOUSE_CLIENT} --query "drop table if exists data;" ${CLICKHOUSE_CLIENT} --query "create table data (A UInt8, B UInt8) engine=MergeTree() order by A;" @@ -23,12 +20,12 @@ sleep 2 ${CLICKHOUSE_CLIENT} --query "select * except (time) from file('errors_server', 'CSV', 'time DateTime, database Nullable(String), table Nullable(String), offset UInt32, reason String, raw_data String');" # Client side -${CLICKHOUSE_CLIENT} --input_format_allow_errors_num 4 --input_format_record_errors_file_path "${CLICKHOUSE_USER_FILES_PATH}/errors_client" --query "insert into data(A, B) format CSV" < ${CLICKHOUSE_USER_FILES_PATH}/a.csv +${CLICKHOUSE_CLIENT} --input_format_allow_errors_num 4 --input_format_record_errors_file_path "${USER_FILES_PATH}/errors_client" --query "insert into data(A, B) format CSV" < ${USER_FILES_PATH}/a.csv sleep 2 ${CLICKHOUSE_CLIENT} --query "select * except (time) from file('errors_client', 'CSV', 'time DateTime, database Nullable(String), table Nullable(String), offset UInt32, reason String, raw_data String');" # Restore ${CLICKHOUSE_CLIENT} --query "drop table if exists data;" -rm ${CLICKHOUSE_USER_FILES_PATH}/a.csv -rm ${CLICKHOUSE_USER_FILES_PATH}/errors_server -rm ${CLICKHOUSE_USER_FILES_PATH}/errors_client +rm ${USER_FILES_PATH}/a.csv +rm ${USER_FILES_PATH}/errors_server +rm ${USER_FILES_PATH}/errors_client diff --git a/tests/queries/0_stateless/02422_allow_implicit_no_password.sh b/tests/queries/0_stateless/02422_allow_implicit_no_password.sh index 013c367e079..3c433856be2 100755 --- a/tests/queries/0_stateless/02422_allow_implicit_no_password.sh +++ b/tests/queries/0_stateless/02422_allow_implicit_no_password.sh @@ -9,7 +9,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -cp /etc/clickhouse-server/users.xml "$CURDIR"/users.xml +cp ${CLICKHOUSE_CONFIG_DIR}/users.xml "$CURDIR"/users.xml sed -i 's/<\/password>/c64c5e4e53ea1a9f1427d2713b3a22bbebe8940bc807adaf654744b1568c70ab<\/password_sha256_hex>/g' "$CURDIR"/users.xml sed -i 's//1<\/access_management>/g' "$CURDIR"/users.xml diff --git a/tests/queries/0_stateless/02455_one_row_from_csv_memory_usage.sh b/tests/queries/0_stateless/02455_one_row_from_csv_memory_usage.sh index 05de3f05562..7906f2917c4 100755 --- a/tests/queries/0_stateless/02455_one_row_from_csv_memory_usage.sh +++ b/tests/queries/0_stateless/02455_one_row_from_csv_memory_usage.sh @@ -5,11 +5,9 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -USER_FILES_PATH=$($CLICKHOUSE_CLIENT --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep -E '^Code: 107.*FILE_DOESNT_EXIST' | head -1 | awk '{gsub("/nonexist.txt","",$9); print $9}') +cp "$CUR_DIR"/data_csv/10m_rows.csv.xz $USER_FILES_PATH/${CLICKHOUSE_DATABASE}_10m_rows.csv.xz -cp "$CUR_DIR"/data_csv/10m_rows.csv.xz $USER_FILES_PATH/ +${CLICKHOUSE_CLIENT} --query="SELECT * FROM file('${CLICKHOUSE_DATABASE}_10m_rows.csv.xz' , 'CSVWithNames') order by identifier, number, name, surname, birthday LIMIT 1 settings input_format_parallel_parsing=1, max_threads=1, max_parsing_threads=16, min_chunk_bytes_for_parallel_parsing=10485760, max_memory_usage=1000000000" +${CLICKHOUSE_CLIENT} --query="SELECT * FROM file('${CLICKHOUSE_DATABASE}_10m_rows.csv.xz' , 'CSVWithNames') order by identifier, number, name, surname, birthday LIMIT 1 settings input_format_parallel_parsing=1, max_threads=1, max_parsing_threads=16, min_chunk_bytes_for_parallel_parsing=10485760, max_memory_usage=100000000" -${CLICKHOUSE_CLIENT} --query="SELECT * FROM file('10m_rows.csv.xz' , 'CSVWithNames') order by identifier, number, name, surname, birthday LIMIT 1 settings input_format_parallel_parsing=1, max_threads=1, max_parsing_threads=16, min_chunk_bytes_for_parallel_parsing=10485760, max_memory_usage=1000000000" -${CLICKHOUSE_CLIENT} --query="SELECT * FROM file('10m_rows.csv.xz' , 'CSVWithNames') order by identifier, number, name, surname, birthday LIMIT 1 settings input_format_parallel_parsing=1, max_threads=1, max_parsing_threads=16, min_chunk_bytes_for_parallel_parsing=10485760, max_memory_usage=100000000" - -rm $USER_FILES_PATH/10m_rows.csv.xz +rm $USER_FILES_PATH/${CLICKHOUSE_DATABASE}_10m_rows.csv.xz diff --git a/tests/queries/0_stateless/02457_bz2_concatenated.sh b/tests/queries/0_stateless/02457_bz2_concatenated.sh index 96e23cbfa2a..a9991cf44e7 100755 --- a/tests/queries/0_stateless/02457_bz2_concatenated.sh +++ b/tests/queries/0_stateless/02457_bz2_concatenated.sh @@ -6,7 +6,6 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -USER_FILES_PATH=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') WORKING_FOLDER_02457="${USER_FILES_PATH}/${CLICKHOUSE_DATABASE}" rm -rf "${WORKING_FOLDER_02457}" diff --git a/tests/queries/0_stateless/02459_glob_for_recursive_directory_traversal.sh b/tests/queries/0_stateless/02459_glob_for_recursive_directory_traversal.sh index b8430307ea3..b86385b72c4 100755 --- a/tests/queries/0_stateless/02459_glob_for_recursive_directory_traversal.sh +++ b/tests/queries/0_stateless/02459_glob_for_recursive_directory_traversal.sh @@ -5,28 +5,26 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -user_files_path=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') - -mkdir $user_files_path/d1 -touch $user_files_path/d1/text1.txt +mkdir $USER_FILES_PATH/d1 +touch $USER_FILES_PATH/d1/text1.txt for i in {1..2} do - echo $i$'\t'$i >> $user_files_path/d1/text1.txt + echo $i$'\t'$i >> $USER_FILES_PATH/d1/text1.txt done -mkdir $user_files_path/d1/d2 -touch $user_files_path/d1/d2/text2.txt +mkdir $USER_FILES_PATH/d1/d2 +touch $USER_FILES_PATH/d1/d2/text2.txt for i in {3..4} do - echo $i$'\t'$i >> $user_files_path/d1/d2/text2.txt + echo $i$'\t'$i >> $USER_FILES_PATH/d1/d2/text2.txt done -mkdir $user_files_path/d1/d2/d3 -touch $user_files_path/d1/d2/d3/text3.txt +mkdir $USER_FILES_PATH/d1/d2/d3 +touch $USER_FILES_PATH/d1/d2/d3/text3.txt for i in {5..6} do - echo $i$'\t'$i >> $user_files_path/d1/d2/d3/text3.txt + echo $i$'\t'$i >> $USER_FILES_PATH/d1/d2/d3/text3.txt done ${CLICKHOUSE_CLIENT} -q "SELECT * from file ('d1/*','TSV', 'Index UInt8, Number UInt8')" | sort --numeric-sort @@ -35,9 +33,9 @@ ${CLICKHOUSE_CLIENT} -q "SELECT * from file ('d1/*/tex*','TSV', 'Index UInt8, Nu ${CLICKHOUSE_CLIENT} -q "SELECT * from file ('d1/**/tex*','TSV', 'Index UInt8, Number UInt8')" | sort --numeric-sort -rm $user_files_path/d1/d2/d3/text3.txt -rmdir $user_files_path/d1/d2/d3 -rm $user_files_path/d1/d2/text2.txt -rmdir $user_files_path/d1/d2 -rm $user_files_path/d1/text1.txt -rmdir $user_files_path/d1 +rm $USER_FILES_PATH/d1/d2/d3/text3.txt +rmdir $USER_FILES_PATH/d1/d2/d3 +rm $USER_FILES_PATH/d1/d2/text2.txt +rmdir $USER_FILES_PATH/d1/d2 +rm $USER_FILES_PATH/d1/text1.txt +rmdir $USER_FILES_PATH/d1 diff --git a/tests/queries/0_stateless/02475_bson_each_row_format.sh b/tests/queries/0_stateless/02475_bson_each_row_format.sh index 474a6cd0e47..2975b40c868 100755 --- a/tests/queries/0_stateless/02475_bson_each_row_format.sh +++ b/tests/queries/0_stateless/02475_bson_each_row_format.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-parallel, no-debug +# Tags: no-debug CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/02479_race_condition_between_insert_and_droppin_mv.sh b/tests/queries/0_stateless/02479_race_condition_between_insert_and_droppin_mv.sh index 6899b31d1d9..935ea03a947 100755 --- a/tests/queries/0_stateless/02479_race_condition_between_insert_and_droppin_mv.sh +++ b/tests/queries/0_stateless/02479_race_condition_between_insert_and_droppin_mv.sh @@ -38,7 +38,7 @@ ${CLICKHOUSE_CLIENT} -q "CREATE TABLE test_race_condition_landing (number Int64, export -f drop_mv; export -f insert; -TIMEOUT=55 +TIMEOUT=50 for i in {1..4} do diff --git a/tests/queries/0_stateless/02482_capnp_list_of_structs.sh b/tests/queries/0_stateless/02482_capnp_list_of_structs.sh index 9d78b9893dd..a04c631c411 100755 --- a/tests/queries/0_stateless/02482_capnp_list_of_structs.sh +++ b/tests/queries/0_stateless/02482_capnp_list_of_structs.sh @@ -5,10 +5,9 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -USER_FILES_PATH=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') touch $USER_FILES_PATH/data.capnp -SCHEMADIR=$($CLICKHOUSE_CLIENT_BINARY --query "select * from file('data.capnp', 'CapnProto', 'val1 char') settings format_schema='nonexist:Message'" 2>&1 | grep Exception | grep -oP "file \K.*(?=/nonexist.capnp)") +SCHEMADIR=${CLICKHOUSE_SCHEMA_FILES} CLIENT_SCHEMADIR=$CURDIR/format_schemas SERVER_SCHEMADIR=test_02482 mkdir -p $SCHEMADIR/$SERVER_SCHEMADIR diff --git a/tests/queries/0_stateless/02483_capnp_decimals.sh b/tests/queries/0_stateless/02483_capnp_decimals.sh index ef545a5539f..bc19b63fc8b 100755 --- a/tests/queries/0_stateless/02483_capnp_decimals.sh +++ b/tests/queries/0_stateless/02483_capnp_decimals.sh @@ -5,19 +5,17 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -USER_FILES_PATH=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') -touch $USER_FILES_PATH/data.capnp +touch $CLICKHOUSE_USER_FILES/data.capnp -SCHEMADIR=$($CLICKHOUSE_CLIENT_BINARY --query "select * from file('data.capnp', 'CapnProto', 'val1 char') settings format_schema='nonexist:Message'" 2>&1 | grep Exception | grep -oP "file \K.*(?=/nonexist.capnp)") CLIENT_SCHEMADIR=$CURDIR/format_schemas SERVER_SCHEMADIR=test_02483 -mkdir -p $SCHEMADIR/$SERVER_SCHEMADIR -cp -r $CLIENT_SCHEMADIR/02483_* $SCHEMADIR/$SERVER_SCHEMADIR/ +mkdir -p $CLICKHOUSE_SCHEMA_FILES/$SERVER_SCHEMADIR +cp -r $CLIENT_SCHEMADIR/02483_* $CLICKHOUSE_SCHEMA_FILES/$SERVER_SCHEMADIR/ $CLICKHOUSE_CLIENT -q "insert into function file(02483_data.capnp, auto, 'decimal32 Decimal32(3), decimal64 Decimal64(6)') select 42.42, 4242.424242 settings format_schema='$SERVER_SCHEMADIR/02483_decimals.capnp:Message', engine_file_truncate_on_insert=1" $CLICKHOUSE_CLIENT -q "select * from file(02483_data.capnp) settings format_schema='$SERVER_SCHEMADIR/02483_decimals.capnp:Message'" $CLICKHOUSE_CLIENT -q "select * from file(02483_data.capnp, auto, 'decimal64 Decimal64(6), decimal32 Decimal32(3)') settings format_schema='$SERVER_SCHEMADIR/02483_decimals.capnp:Message'" -rm $USER_FILES_PATH/data.capnp -rm $USER_FILES_PATH/02483_data.capnp +rm $CLICKHOUSE_USER_FILES/data.capnp +rm $CLICKHOUSE_USER_FILES/02483_data.capnp diff --git a/tests/queries/0_stateless/02504_regexp_dictionary_ua_parser.sh b/tests/queries/0_stateless/02504_regexp_dictionary_ua_parser.sh index d3a8743b880..e389cf410e8 100755 --- a/tests/queries/0_stateless/02504_regexp_dictionary_ua_parser.sh +++ b/tests/queries/0_stateless/02504_regexp_dictionary_ua_parser.sh @@ -1,18 +1,15 @@ #!/usr/bin/env bash - -# Tags: no-fasttest, no-parallel +# Tags: no-fasttest CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -user_files_path=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') +mkdir -p ${USER_FILES_PATH:?}/${CLICKHOUSE_DATABASE} -mkdir -p $user_files_path/test_02504 - -cp $CURDIR/data_ua_parser/os.yaml ${user_files_path}/test_02504/ -cp $CURDIR/data_ua_parser/browser.yaml ${user_files_path}/test_02504/ -cp $CURDIR/data_ua_parser/device.yaml ${user_files_path}/test_02504/ +cp $CURDIR/data_ua_parser/os.yaml ${USER_FILES_PATH}/${CLICKHOUSE_DATABASE}/ +cp $CURDIR/data_ua_parser/browser.yaml ${USER_FILES_PATH}/${CLICKHOUSE_DATABASE}/ +cp $CURDIR/data_ua_parser/device.yaml ${USER_FILES_PATH}/${CLICKHOUSE_DATABASE}/ $CLICKHOUSE_CLIENT -n --query=" drop dictionary if exists regexp_os; @@ -29,7 +26,7 @@ create dictionary regexp_os os_v4_replacement String default '0' ) PRIMARY KEY(regex) -SOURCE(YAMLRegExpTree(PATH '${user_files_path}/test_02504/os.yaml')) +SOURCE(YAMLRegExpTree(PATH '${USER_FILES_PATH}/${CLICKHOUSE_DATABASE}/os.yaml')) LIFETIME(0) LAYOUT(regexp_tree); @@ -41,7 +38,7 @@ create dictionary regexp_browser v2_replacement String default '0' ) PRIMARY KEY(regex) -SOURCE(YAMLRegExpTree(PATH '${user_files_path}/test_02504/browser.yaml')) +SOURCE(YAMLRegExpTree(PATH '${USER_FILES_PATH}/${CLICKHOUSE_DATABASE}/browser.yaml')) LIFETIME(0) LAYOUT(regexp_tree); @@ -53,7 +50,7 @@ create dictionary regexp_device model_replacement String ) PRIMARY KEY(regex) -SOURCE(YAMLRegExpTree(PATH '${user_files_path}/test_02504/device.yaml')) +SOURCE(YAMLRegExpTree(PATH '${USER_FILES_PATH}/${CLICKHOUSE_DATABASE}/device.yaml')) LIFETIME(0) LAYOUT(regexp_tree); @@ -84,4 +81,4 @@ drop dictionary if exists regexp_device; drop table if exists user_agents; " -rm -rf "$user_files_path/test_02504" +rm -rf ${USER_FILES_PATH:?}/${CLICKHOUSE_DATABASE} diff --git a/tests/queries/0_stateless/02504_regexp_dictionary_yaml_source.sh b/tests/queries/0_stateless/02504_regexp_dictionary_yaml_source.sh index 7211372f2f7..68a87a14320 100755 --- a/tests/queries/0_stateless/02504_regexp_dictionary_yaml_source.sh +++ b/tests/queries/0_stateless/02504_regexp_dictionary_yaml_source.sh @@ -1,13 +1,10 @@ #!/usr/bin/env bash - # Tags: use-vectorscan, no-fasttest, no-parallel CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -USER_FILES_PATH=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') - mkdir -p $USER_FILES_PATH/test_02504 yaml=$USER_FILES_PATH/test_02504/test.yaml diff --git a/tests/queries/0_stateless/02661_read_from_archive.lib b/tests/queries/0_stateless/02661_read_from_archive.lib index 908b6bd38d2..56f1a0f163c 100644 --- a/tests/queries/0_stateless/02661_read_from_archive.lib +++ b/tests/queries/0_stateless/02661_read_from_archive.lib @@ -6,10 +6,10 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . "$CUR_DIR"/../shell_config.sh function read_archive_file() { - $CLICKHOUSE_LOCAL --query "SELECT * FROM file('${user_files_path}/$1') ORDER BY 1, 2" - $CLICKHOUSE_CLIENT --query "SELECT * FROM file('${user_files_path}/$1') ORDER BY 1, 2" - $CLICKHOUSE_CLIENT --query "DESC file('${user_files_path}/$1')" - $CLICKHOUSE_CLIENT --query "CREATE TABLE 02661_archive_table Engine=File('CSV', '${user_files_path}/$1')" + $CLICKHOUSE_LOCAL --query "SELECT * FROM file('${USER_FILES_PATH}/$1') ORDER BY 1, 2" + $CLICKHOUSE_CLIENT --query "SELECT * FROM file('${USER_FILES_PATH}/$1') ORDER BY 1, 2" + $CLICKHOUSE_CLIENT --query "DESC file('${USER_FILES_PATH}/$1')" + $CLICKHOUSE_CLIENT --query "CREATE TABLE 02661_archive_table Engine=File('CSV', '${USER_FILES_PATH}/$1')" $CLICKHOUSE_CLIENT --query "SELECT * FROM 02661_archive_table ORDER BY 1, 2" $CLICKHOUSE_CLIENT --query "DROP TABLE 02661_archive_table" } @@ -20,16 +20,14 @@ function run_archive_test() { extension_without_dot=$(echo $1 | sed -e 's/\.//g') FILE_PREFIX="02661_read_from_archive_${CLICKHOUSE_DATABASE}_$extension_without_dot" - user_files_path=$(clickhouse-client --query "select _path, _file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep -o "/[^[:space:]]*nonexist.txt" | awk '{gsub("/nonexist.txt","",$1); print $1}') - touch ${FILE_PREFIX}_data0.csv echo -e "1,2\n3,4" > ${FILE_PREFIX}_data1.csv echo -e "5,6\n7,8" > ${FILE_PREFIX}_data2.csv echo -e "9,10\n11,12" > ${FILE_PREFIX}_data3.csv - eval "$2 ${user_files_path}/${FILE_PREFIX}_archive1.$1 ${FILE_PREFIX}_data0.csv ${FILE_PREFIX}_data1.csv ${FILE_PREFIX}_data2.csv > /dev/null" - eval "$2 ${user_files_path}/${FILE_PREFIX}_archive2.$1 ${FILE_PREFIX}_data1.csv ${FILE_PREFIX}_data3.csv > /dev/null" - eval "$2 ${user_files_path}/${FILE_PREFIX}_archive3.$1 ${FILE_PREFIX}_data2.csv ${FILE_PREFIX}_data3.csv > /dev/null" + eval "$2 ${USER_FILES_PATH}/${FILE_PREFIX}_archive1.$1 ${FILE_PREFIX}_data0.csv ${FILE_PREFIX}_data1.csv ${FILE_PREFIX}_data2.csv > /dev/null" + eval "$2 ${USER_FILES_PATH}/${FILE_PREFIX}_archive2.$1 ${FILE_PREFIX}_data1.csv ${FILE_PREFIX}_data3.csv > /dev/null" + eval "$2 ${USER_FILES_PATH}/${FILE_PREFIX}_archive3.$1 ${FILE_PREFIX}_data2.csv ${FILE_PREFIX}_data3.csv > /dev/null" echo "archive1 data1.csv" read_archive_file "${FILE_PREFIX}_archive1.$1 :: ${FILE_PREFIX}_data1.csv" @@ -44,10 +42,10 @@ function run_archive_test() { echo "archive* {2..3}.csv" read_archive_file "${FILE_PREFIX}_archive*.$1 :: ${FILE_PREFIX}_data{2..3}.csv" - $CLICKHOUSE_LOCAL --query "SELECT * FROM file('${user_files_path}/${FILE_PREFIX}_archive1.$1::nonexistent.csv')" 2>&1 | grep -q "CANNOT_EXTRACT_TABLE_STRUCTURE" && echo "OK" || echo "FAIL" - $CLICKHOUSE_LOCAL --query "SELECT * FROM file('${user_files_path}/${FILE_PREFIX}_archive3.$1::{2..3}.csv')" 2>&1 | grep -q "CANNOT_EXTRACT_TABLE_STRUCTURE" && echo "OK" || echo "FAIL" + $CLICKHOUSE_LOCAL --query "SELECT * FROM file('${USER_FILES_PATH}/${FILE_PREFIX}_archive1.$1::nonexistent.csv')" 2>&1 | grep -q "CANNOT_EXTRACT_TABLE_STRUCTURE" && echo "OK" || echo "FAIL" + $CLICKHOUSE_LOCAL --query "SELECT * FROM file('${USER_FILES_PATH}/${FILE_PREFIX}_archive3.$1::{2..3}.csv')" 2>&1 | grep -q "CANNOT_EXTRACT_TABLE_STRUCTURE" && echo "OK" || echo "FAIL" - rm ${user_files_path}/${FILE_PREFIX}_archive{1..3}.$1 + rm ${USER_FILES_PATH}/${FILE_PREFIX}_archive{1..3}.$1 rm ${FILE_PREFIX}_data{0..3}.csv } diff --git a/tests/queries/0_stateless/02703_keeper_map_concurrent_create_drop.sh b/tests/queries/0_stateless/02703_keeper_map_concurrent_create_drop.sh index 17d1fa92377..3e629ece33f 100755 --- a/tests/queries/0_stateless/02703_keeper_map_concurrent_create_drop.sh +++ b/tests/queries/0_stateless/02703_keeper_map_concurrent_create_drop.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-ordinary-database, zookeeper, no-fasttest, no-parallel +# Tags: no-ordinary-database, zookeeper, no-fasttest CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh @@ -37,7 +37,7 @@ function create_drop_loop() export -f create_drop_loop; THREADS=10 -TIMEOUT=30 +TIMEOUT=20 for i in `seq $THREADS` do diff --git a/tests/queries/0_stateless/02722_database_filesystem.sh b/tests/queries/0_stateless/02722_database_filesystem.sh index 374dd246c96..2d0ff256c95 100755 --- a/tests/queries/0_stateless/02722_database_filesystem.sh +++ b/tests/queries/0_stateless/02722_database_filesystem.sh @@ -5,12 +5,9 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -# see 01658_read_file_to_stringcolumn.sh -CLICKHOUSE_USER_FILES_PATH=$($CLICKHOUSE_CLIENT_BINARY --query "select _path, _file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') - # Prepare data unique_name=${CLICKHOUSE_TEST_UNIQUE_NAME} -user_files_tmp_dir=${CLICKHOUSE_USER_FILES_PATH}/${unique_name} +user_files_tmp_dir=${USER_FILES_PATH}/${unique_name} mkdir -p ${user_files_tmp_dir}/tmp/ echo '"id","str","int","text"' > ${user_files_tmp_dir}/tmp.csv echo '1,"abc",123,"abacaba"' >> ${user_files_tmp_dir}/tmp.csv diff --git a/tests/queries/0_stateless/02724_decompress_filename_exception.sh b/tests/queries/0_stateless/02724_decompress_filename_exception.sh index e413910b934..8b5a2f23aa9 100755 --- a/tests/queries/0_stateless/02724_decompress_filename_exception.sh +++ b/tests/queries/0_stateless/02724_decompress_filename_exception.sh @@ -5,7 +5,6 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -USER_FILES_PATH=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') FILENAME="${USER_FILES_PATH}/corrupted_file.tsv.xx" echo 'corrupted file' > $FILENAME; diff --git a/tests/queries/0_stateless/02732_rename_after_processing.sh b/tests/queries/0_stateless/02732_rename_after_processing.sh index 9d44ff9fc34..c3f2274570e 100755 --- a/tests/queries/0_stateless/02732_rename_after_processing.sh +++ b/tests/queries/0_stateless/02732_rename_after_processing.sh @@ -4,12 +4,9 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -# see 01658_read_file_to_stringcolumn.sh -CLICKHOUSE_USER_FILES_PATH=$($CLICKHOUSE_CLIENT_BINARY --query "select _path, _file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') - # Prepare data unique_name=${CLICKHOUSE_TEST_UNIQUE_NAME} -tmp_dir=${CLICKHOUSE_USER_FILES_PATH}/${unique_name} +tmp_dir=${USER_FILES_PATH}/${unique_name} mkdir -p $tmp_dir rm -rf ${tmp_dir:?}/* diff --git a/tests/queries/0_stateless/02771_multidirectory_globs_storage_file.sh b/tests/queries/0_stateless/02771_multidirectory_globs_storage_file.sh index 932837b83db..60df3ed2762 100755 --- a/tests/queries/0_stateless/02771_multidirectory_globs_storage_file.sh +++ b/tests/queries/0_stateless/02771_multidirectory_globs_storage_file.sh @@ -7,27 +7,22 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -# Data preparation. -# Now we can get the user_files_path by using the file function, we can also get it by this query: -# "insert into function file('exist.txt', 'CSV', 'val1 char') values ('aaaa'); select _path from file('exist.txt', 'CSV', 'val1 char')" -user_files_path=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') +rm -rf ${USER_FILES_PATH:?}/${CLICKHOUSE_TEST_UNIQUE_NAME:?}/* +mkdir -p ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/ -rm -rf ${user_files_path:?}/${CLICKHOUSE_TEST_UNIQUE_NAME:?}/* -mkdir -p ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/ - -${CLICKHOUSE_CLIENT} --query "SELECT *, _file FROM file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/dir{?/subdir?1/da,2/subdir2?/da}ta/non_existing.csv', CSV);" 2>&1 | grep -q "CANNOT_EXTRACT_TABLE_STRUCTURE" && echo 'OK' || echo 'FAIL' +${CLICKHOUSE_CLIENT} --query "SELECT *, _file FROM file('${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/dir{?/subdir?1/da,2/subdir2?/da}ta/non_existing.csv', CSV);" 2>&1 | grep -q "CANNOT_EXTRACT_TABLE_STRUCTURE" && echo 'OK' || echo 'FAIL' # Create two files in different directories -mkdir -p ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/dir1/subdir11/ -mkdir -p ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/dir2/subdir22/ +mkdir -p ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/dir1/subdir11/ +mkdir -p ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/dir2/subdir22/ -echo 'This is file data1' > ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/dir1/subdir11/data1.csv -echo 'This is file data2' > ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/dir2/subdir22/data2.csv +echo 'This is file data1' > ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/dir1/subdir11/data1.csv +echo 'This is file data2' > ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/dir2/subdir22/data2.csv -${CLICKHOUSE_CLIENT} --query "SELECT *, _file FROM file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/dir{?/subdir?1/da,2/subdir2?/da}ta1.csv', CSV);" -${CLICKHOUSE_CLIENT} --query "SELECT *, _file FROM file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/dir{?/subdir?1/da,2/subdir2?/da}ta2.csv', CSV);" +${CLICKHOUSE_CLIENT} --query "SELECT *, _file FROM file('${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/dir{?/subdir?1/da,2/subdir2?/da}ta1.csv', CSV);" +${CLICKHOUSE_CLIENT} --query "SELECT *, _file FROM file('${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/dir{?/subdir?1/da,2/subdir2?/da}ta2.csv', CSV);" -${CLICKHOUSE_CLIENT} --query "SELECT *, _file FROM file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/dir?/{subdir?1/data1,subdir2?/data2}.csv', CSV) WHERE _file == 'data1.csv';" -${CLICKHOUSE_CLIENT} --query "SELECT *, _file FROM file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/dir?/{subdir?1/data1,subdir2?/data2}.csv', CSV) WHERE _file == 'data2.csv';" +${CLICKHOUSE_CLIENT} --query "SELECT *, _file FROM file('${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/dir?/{subdir?1/data1,subdir2?/data2}.csv', CSV) WHERE _file == 'data1.csv';" +${CLICKHOUSE_CLIENT} --query "SELECT *, _file FROM file('${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/dir?/{subdir?1/data1,subdir2?/data2}.csv', CSV) WHERE _file == 'data2.csv';" -rm -rf ${user_files_path:?}/${CLICKHOUSE_TEST_UNIQUE_NAME:?} +rm -rf ${USER_FILES_PATH:?}/${CLICKHOUSE_TEST_UNIQUE_NAME:?} diff --git a/tests/queries/0_stateless/02889_file_log_save_errors.sh b/tests/queries/0_stateless/02889_file_log_save_errors.sh index 8ef7816d57d..cf7ced0bd08 100755 --- a/tests/queries/0_stateless/02889_file_log_save_errors.sh +++ b/tests/queries/0_stateless/02889_file_log_save_errors.sh @@ -4,27 +4,25 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -user_files_path=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') - ${CLICKHOUSE_CLIENT} --query "drop table if exists file_log;" ${CLICKHOUSE_CLIENT} --query "drop table if exists log_errors;" -mkdir -p ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/ -rm -rf ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME:?}/* +mkdir -p ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/ +rm -rf ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME:?}/* for i in {0..9} do - echo "{\"key\" : $i, \"value\" : $i}" >> ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/a.jsonl - echo "Error $i" >> ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/a.jsonl + echo "{\"key\" : $i, \"value\" : $i}" >> ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/a.jsonl + echo "Error $i" >> ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/a.jsonl done for i in {10..19} do - echo "{\"key\" : $i, \"value\" : $i}" >> ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/b.jsonl - echo "Error $i" >> ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/b.jsonl + echo "{\"key\" : $i, \"value\" : $i}" >> ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/b.jsonl + echo "Error $i" >> ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/b.jsonl done -${CLICKHOUSE_CLIENT} --query "create table file_log(key UInt8, value UInt8) engine=FileLog('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/', 'JSONEachRow') settings handle_error_mode='stream';" +${CLICKHOUSE_CLIENT} --query "create table file_log(key UInt8, value UInt8) engine=FileLog('${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/', 'JSONEachRow') settings handle_error_mode='stream';" ${CLICKHOUSE_CLIENT} --query "create Materialized View log_errors engine=MergeTree order by tuple() as select _error as error, _raw_record as record, _filename as file from file_log where not isNull(_error);" function count() @@ -42,4 +40,4 @@ ${CLICKHOUSE_CLIENT} --query "select * from log_errors order by file, record;" ${CLICKHOUSE_CLIENT} --query "drop table file_log;" ${CLICKHOUSE_CLIENT} --query "drop table log_errors;" -rm -rf ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME:?} +rm -rf ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME:?} diff --git a/tests/queries/0_stateless/02892_input_csv_cr_end_count_many_rows.sh b/tests/queries/0_stateless/02892_input_csv_cr_end_count_many_rows.sh index 42dde18de00..9f93396e368 100755 --- a/tests/queries/0_stateless/02892_input_csv_cr_end_count_many_rows.sh +++ b/tests/queries/0_stateless/02892_input_csv_cr_end_count_many_rows.sh @@ -6,11 +6,9 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -USER_FILES_PATH=$($CLICKHOUSE_CLIENT --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep -E '^Code: 107.*FILE_DOESNT_EXIST' | head -1 | awk '{gsub("/nonexist.txt","",$9); print $9}') - cp "$CURDIR"/data_csv/1m_rows_cr_end_of_line.csv.xz $USER_FILES_PATH/ $CLICKHOUSE_CLIENT -q "SELECT count(1) from file('1m_rows_cr_end_of_line.csv.xz') settings input_format_csv_allow_cr_end_of_line=1, optimize_count_from_files=1" $CLICKHOUSE_CLIENT -q "SELECT count(1) from file('1m_rows_cr_end_of_line.csv.xz') settings input_format_csv_allow_cr_end_of_line=1, optimize_count_from_files=0" -rm $USER_FILES_PATH/1m_rows_cr_end_of_line.csv.xz \ No newline at end of file +rm $USER_FILES_PATH/1m_rows_cr_end_of_line.csv.xz diff --git a/tests/queries/0_stateless/02895_npy_output_format.sh b/tests/queries/0_stateless/02895_npy_output_format.sh index 934c80830c5..a364e447062 100755 --- a/tests/queries/0_stateless/02895_npy_output_format.sh +++ b/tests/queries/0_stateless/02895_npy_output_format.sh @@ -5,10 +5,9 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -user_files_path=$($CLICKHOUSE_CLIENT_BINARY -q "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') -mkdir -p ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/ -rm -rf ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME:?}/* -chmod 777 ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/ +mkdir -p ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/ +rm -rf ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME:?}/* +chmod 777 ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/ ${CLICKHOUSE_CLIENT} -n -q --ignore-error " DROP DATABASE IF EXISTS npy_output_02895; @@ -33,43 +32,43 @@ ${CLICKHOUSE_CLIENT} -n -q --ignore-error " INSERT INTO npy_output_02895.data_types VALUES (1, 1, 1, 1, 1, 1, 1, 1, 0.1, 0.01, 'npy', 'npy'), (-1, -1, -1, -1, 0, 0, 0, 0, 0.2, 0.02, 'npy', 'npynpy'); - INSERT INTO TABLE FUNCTION file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_int8.npy') SELECT i1 FROM npy_output_02895.data_types; - INSERT INTO TABLE FUNCTION file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_int16.npy') SELECT i2 FROM npy_output_02895.data_types; - INSERT INTO TABLE FUNCTION file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_int32.npy') SELECT i4 FROM npy_output_02895.data_types; - INSERT INTO TABLE FUNCTION file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_int64.npy') SELECT i8 FROM npy_output_02895.data_types; - INSERT INTO TABLE FUNCTION file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_uint8.npy') SELECT u1 FROM npy_output_02895.data_types; - INSERT INTO TABLE FUNCTION file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_uint16.npy') SELECT u2 FROM npy_output_02895.data_types; - INSERT INTO TABLE FUNCTION file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_uint32.npy') SELECT u4 FROM npy_output_02895.data_types; - INSERT INTO TABLE FUNCTION file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_uint64.npy') SELECT u8 FROM npy_output_02895.data_types; - INSERT INTO TABLE FUNCTION file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_float32.npy') SELECT f4 FROM npy_output_02895.data_types; - INSERT INTO TABLE FUNCTION file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_float64.npy') SELECT f8 FROM npy_output_02895.data_types; - INSERT INTO TABLE FUNCTION file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_fixedstring.npy') SELECT fs FROM npy_output_02895.data_types; - INSERT INTO TABLE FUNCTION file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_string.npy') SELECT s FROM npy_output_02895.data_types; + INSERT INTO TABLE FUNCTION file('${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_int8.npy') SELECT i1 FROM npy_output_02895.data_types; + INSERT INTO TABLE FUNCTION file('${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_int16.npy') SELECT i2 FROM npy_output_02895.data_types; + INSERT INTO TABLE FUNCTION file('${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_int32.npy') SELECT i4 FROM npy_output_02895.data_types; + INSERT INTO TABLE FUNCTION file('${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_int64.npy') SELECT i8 FROM npy_output_02895.data_types; + INSERT INTO TABLE FUNCTION file('${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_uint8.npy') SELECT u1 FROM npy_output_02895.data_types; + INSERT INTO TABLE FUNCTION file('${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_uint16.npy') SELECT u2 FROM npy_output_02895.data_types; + INSERT INTO TABLE FUNCTION file('${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_uint32.npy') SELECT u4 FROM npy_output_02895.data_types; + INSERT INTO TABLE FUNCTION file('${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_uint64.npy') SELECT u8 FROM npy_output_02895.data_types; + INSERT INTO TABLE FUNCTION file('${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_float32.npy') SELECT f4 FROM npy_output_02895.data_types; + INSERT INTO TABLE FUNCTION file('${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_float64.npy') SELECT f8 FROM npy_output_02895.data_types; + INSERT INTO TABLE FUNCTION file('${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_fixedstring.npy') SELECT fs FROM npy_output_02895.data_types; + INSERT INTO TABLE FUNCTION file('${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_string.npy') SELECT s FROM npy_output_02895.data_types; - SELECT * FROM file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_int8.npy'); - SELECT * FROM file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_int16.npy'); - SELECT * FROM file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_int32.npy'); - SELECT * FROM file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_int64.npy'); - SELECT * FROM file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_uint8.npy'); - SELECT * FROM file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_uint16.npy'); - SELECT * FROM file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_uint32.npy'); - SELECT * FROM file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_uint64.npy'); - SELECT * FROM file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_float32.npy'); - SELECT * FROM file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_float64.npy'); - SELECT * FROM file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_fixedstring.npy'); - SELECT * FROM file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_string.npy'); - DESC file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_int8.npy'); - DESC file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_int16.npy'); - DESC file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_int32.npy'); - DESC file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_int64.npy'); - DESC file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_uint8.npy'); - DESC file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_uint16.npy'); - DESC file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_uint32.npy'); - DESC file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_uint64.npy'); - DESC file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_float32.npy'); - DESC file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_float64.npy'); - DESC file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_fixedstring.npy'); - DESC file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_string.npy'); + SELECT * FROM file('${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_int8.npy'); + SELECT * FROM file('${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_int16.npy'); + SELECT * FROM file('${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_int32.npy'); + SELECT * FROM file('${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_int64.npy'); + SELECT * FROM file('${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_uint8.npy'); + SELECT * FROM file('${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_uint16.npy'); + SELECT * FROM file('${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_uint32.npy'); + SELECT * FROM file('${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_uint64.npy'); + SELECT * FROM file('${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_float32.npy'); + SELECT * FROM file('${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_float64.npy'); + SELECT * FROM file('${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_fixedstring.npy'); + SELECT * FROM file('${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_string.npy'); + DESC file('${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_int8.npy'); + DESC file('${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_int16.npy'); + DESC file('${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_int32.npy'); + DESC file('${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_int64.npy'); + DESC file('${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_uint8.npy'); + DESC file('${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_uint16.npy'); + DESC file('${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_uint32.npy'); + DESC file('${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_uint64.npy'); + DESC file('${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_float32.npy'); + DESC file('${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_float64.npy'); + DESC file('${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_fixedstring.npy'); + DESC file('${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_dtype_string.npy'); SELECT '-- test nested data types --'; CREATE TABLE IF NOT EXISTS npy_output_02895.nested_data_types @@ -81,16 +80,16 @@ ${CLICKHOUSE_CLIENT} -n -q --ignore-error " INSERT INTO npy_output_02895.nested_data_types VALUES ([[[1], [2]], [[3], [4]]], [[0.1], [0.2]], ['a', 'bb']), ([[[1], [2]], [[3], [4]]], [[0.1], [0.2]], ['ccc', 'dddd']); - INSERT INTO TABLE FUNCTION file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_nested_dtype_int32.npy') SELECT i4 FROM npy_output_02895.nested_data_types; - INSERT INTO TABLE FUNCTION file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_nested_dtype_float64.npy') SELECT f8 FROM npy_output_02895.nested_data_types; - INSERT INTO TABLE FUNCTION file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_nested_dtype_string.npy') SELECT s FROM npy_output_02895.nested_data_types; + INSERT INTO TABLE FUNCTION file('${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_nested_dtype_int32.npy') SELECT i4 FROM npy_output_02895.nested_data_types; + INSERT INTO TABLE FUNCTION file('${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_nested_dtype_float64.npy') SELECT f8 FROM npy_output_02895.nested_data_types; + INSERT INTO TABLE FUNCTION file('${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_nested_dtype_string.npy') SELECT s FROM npy_output_02895.nested_data_types; - SELECT * FROM file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_nested_dtype_int32.npy'); - SELECT * FROM file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_nested_dtype_float64.npy'); - SELECT * FROM file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_nested_dtype_string.npy'); - DESC file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_nested_dtype_int32.npy'); - DESC file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_nested_dtype_float64.npy'); - DESC file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_nested_dtype_string.npy'); + SELECT * FROM file('${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_nested_dtype_int32.npy'); + SELECT * FROM file('${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_nested_dtype_float64.npy'); + SELECT * FROM file('${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_nested_dtype_string.npy'); + DESC file('${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_nested_dtype_int32.npy'); + DESC file('${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_nested_dtype_float64.npy'); + DESC file('${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/02895_nested_dtype_string.npy'); SELECT '-- test exceptions --'; CREATE TABLE IF NOT EXISTS npy_output_02895.exceptions @@ -115,4 +114,4 @@ ${CLICKHOUSE_CLIENT} -n -q --ignore-error " DROP DATABASE IF EXISTS npy_output_02895;" -rm -rf ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME:?} +rm -rf ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME:?} diff --git a/tests/queries/0_stateless/02931_file_cluster.sh b/tests/queries/0_stateless/02931_file_cluster.sh index 8566e2ab08e..ebd3792e1dc 100755 --- a/tests/queries/0_stateless/02931_file_cluster.sh +++ b/tests/queries/0_stateless/02931_file_cluster.sh @@ -4,8 +4,6 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -USER_FILES_PATH=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') - mkdir -p "${USER_FILES_PATH}"/"${CLICKHOUSE_TEST_UNIQUE_NAME}"/ for i in {1..10} diff --git a/tests/queries/0_stateless/02933_change_cache_setting_without_restart.sh b/tests/queries/0_stateless/02933_change_cache_setting_without_restart.sh index 76ada756f47..2e5a538007c 100755 --- a/tests/queries/0_stateless/02933_change_cache_setting_without_restart.sh +++ b/tests/queries/0_stateless/02933_change_cache_setting_without_restart.sh @@ -8,7 +8,7 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) disk_name="s3_cache_02933" $CLICKHOUSE_CLIENT --query "DESCRIBE FILESYSTEM CACHE '${disk_name}'" -config_path=/etc/clickhouse-server/config.d/storage_conf.xml +config_path=${CLICKHOUSE_CONFIG_DIR}/config.d/storage_conf.xml config_path_tmp=$config_path.tmp cat $config_path \ diff --git a/tests/queries/0_stateless/02944_dynamically_change_filesystem_cache_size.sh b/tests/queries/0_stateless/02944_dynamically_change_filesystem_cache_size.sh index 6f454da40da..cb099bb59ae 100755 --- a/tests/queries/0_stateless/02944_dynamically_change_filesystem_cache_size.sh +++ b/tests/queries/0_stateless/02944_dynamically_change_filesystem_cache_size.sh @@ -24,7 +24,7 @@ $CLICKHOUSE_CLIENT --query "SELECT * FROM test FORMAT Null" $CLICKHOUSE_CLIENT --query "SELECT count() FROM system.filesystem_cache WHERE state = 'DOWNLOADED'" $CLICKHOUSE_CLIENT --query "SELECT sum(size) FROM system.filesystem_cache WHERE state = 'DOWNLOADED'" -config_path=/etc/clickhouse-server/config.d/storage_conf_02944.xml +config_path=${CLICKHOUSE_CONFIG_DIR}/config.d/storage_conf_02944.xml config_path_tmp=$config_path.tmp echo 'set max_size from 100 to 10' diff --git a/tests/queries/0_stateless/02950_dictionary_ssd_cache_short_circuit.sh b/tests/queries/0_stateless/02950_dictionary_ssd_cache_short_circuit.sh index a02bdd0a1d2..3d2fe5d664d 100755 --- a/tests/queries/0_stateless/02950_dictionary_ssd_cache_short_circuit.sh +++ b/tests/queries/0_stateless/02950_dictionary_ssd_cache_short_circuit.sh @@ -5,8 +5,6 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -USER_FILES_PATH=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') - $CLICKHOUSE_CLIENT -n --query=" DROP DATABASE IF EXISTS 02950_database_for_ssd_cache_dictionary; CREATE DATABASE 02950_database_for_ssd_cache_dictionary; @@ -32,7 +30,7 @@ $CLICKHOUSE_CLIENT -n --query=" PRIMARY KEY id SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'source_table')) LIFETIME(MIN 1 MAX 1000) - LAYOUT(SSD_CACHE(BLOCK_SIZE 4096 FILE_SIZE 8192 PATH '$USER_FILES_PATH/0d')); + LAYOUT(SSD_CACHE(BLOCK_SIZE 4096 FILE_SIZE 8192 PATH '$CLICKHOUSE_USER_FILES/0d')); SELECT dictGetOrDefault('02950_database_for_ssd_cache_dictionary.ssd_cache_dictionary', ('v1', 'v2'), 0, (intDiv(1, id), intDiv(1, id))) FROM 02950_database_for_ssd_cache_dictionary.source_table; SELECT dictGetOrDefault('02950_database_for_ssd_cache_dictionary.ssd_cache_dictionary', 'v2', id+1, intDiv(NULL, id)) FROM 02950_database_for_ssd_cache_dictionary.source_table; diff --git a/tests/queries/0_stateless/02961_storage_config_volume_priority.sh b/tests/queries/0_stateless/02961_storage_config_volume_priority.sh index 4e085541a8d..145b921a750 100755 --- a/tests/queries/0_stateless/02961_storage_config_volume_priority.sh +++ b/tests/queries/0_stateless/02961_storage_config_volume_priority.sh @@ -15,7 +15,7 @@ WHERE policy_name = 'policy_02961' ORDER BY volume_priority ASC; " -config_path=/etc/clickhouse-server/config.d/storage_conf_02961.xml +config_path=${CLICKHOUSE_CONFIG_DIR}/config.d/storage_conf_02961.xml config_path_tmp=$config_path.tmp echo 'check non-unique values dont work' diff --git a/tests/queries/0_stateless/02962_system_sync_replica_lightweight_from_modifier.sh b/tests/queries/0_stateless/02962_system_sync_replica_lightweight_from_modifier.sh index b61be87411d..9ef271632d0 100755 --- a/tests/queries/0_stateless/02962_system_sync_replica_lightweight_from_modifier.sh +++ b/tests/queries/0_stateless/02962_system_sync_replica_lightweight_from_modifier.sh @@ -62,7 +62,7 @@ export -f sync_and_drop_replicas export -f optimize_thread export -f mutations_thread -TIMEOUT=60 +TIMEOUT=30 timeout $TIMEOUT bash -c insert_thread 2> /dev/null & timeout $TIMEOUT bash -c sync_and_drop_replicas 2> /dev/null & diff --git a/tests/queries/0_stateless/02968_file_log_multiple_read.sh b/tests/queries/0_stateless/02968_file_log_multiple_read.sh index 199893a9428..d9bae05270a 100755 --- a/tests/queries/0_stateless/02968_file_log_multiple_read.sh +++ b/tests/queries/0_stateless/02968_file_log_multiple_read.sh @@ -4,12 +4,7 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -# Data preparation. -# Now we can get the user_files_path by use the table file function for trick. also we can get it by query as: -# "insert into function file('exist.txt', 'CSV', 'val1 char') values ('aaaa'); select _path from file('exist.txt', 'CSV', 'val1 char')" -user_files_path=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') - -logs_dir=${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME} +logs_dir=${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME} rm -rf ${logs_dir} diff --git a/tests/queries/0_stateless/02971_analyzer_remote_id.sh b/tests/queries/0_stateless/02971_analyzer_remote_id.sh index 463e4cc1f0c..ab3c5292529 100755 --- a/tests/queries/0_stateless/02971_analyzer_remote_id.sh +++ b/tests/queries/0_stateless/02971_analyzer_remote_id.sh @@ -1,15 +1,9 @@ #!/usr/bin/env bash -# Tags: no-parallel CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -${CLICKHOUSE_CLIENT} --query="DROP DATABASE IF EXISTS test_02971" -${CLICKHOUSE_CLIENT} --query="CREATE DATABASE test_02971" - -${CLICKHOUSE_CLIENT} --query="CREATE TABLE test_02971.x ENGINE = MergeTree() ORDER BY number AS SELECT * FROM numbers(2)" -${CLICKHOUSE_LOCAL} --query="SELECT count() FROM remote('127.0.0.{2,3}', 'test_02971.x') SETTINGS allow_experimental_analyzer = 1" 2>&1 \ +${CLICKHOUSE_CLIENT} --query="CREATE TABLE ${CLICKHOUSE_DATABASE}.x ENGINE = MergeTree() ORDER BY number AS SELECT * FROM numbers(2)" +${CLICKHOUSE_LOCAL} --query="SELECT count() FROM remote('127.0.0.{2,3}', '${CLICKHOUSE_DATABASE}.x') SETTINGS allow_experimental_analyzer = 1" 2>&1 \ | grep -av "ASan doesn't fully support makecontext/swapcontext functions" - -${CLICKHOUSE_CLIENT} --query="DROP DATABASE IF EXISTS test_02971" diff --git a/tests/queries/0_stateless/02973_parse_crlf_with_tsv_files.sh b/tests/queries/0_stateless/02973_parse_crlf_with_tsv_files.sh index 14f28f1ba4a..6edac86be5b 100755 --- a/tests/queries/0_stateless/02973_parse_crlf_with_tsv_files.sh +++ b/tests/queries/0_stateless/02973_parse_crlf_with_tsv_files.sh @@ -4,8 +4,6 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -# Data preparation step -USER_FILES_PATH=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') UNIX_ENDINGS="${CLICKHOUSE_TEST_UNIQUE_NAME}_data_without_crlf.tsv" DOS_ENDINGS="${CLICKHOUSE_TEST_UNIQUE_NAME}_data_with_crlf.tsv" DATA_FILE_UNIX_ENDINGS="${USER_FILES_PATH:?}/${UNIX_ENDINGS}" diff --git a/tests/queries/0_stateless/02984_form_format.sh b/tests/queries/0_stateless/02984_form_format.sh index ce5feb60130..471b48e0f68 100755 --- a/tests/queries/0_stateless/02984_form_format.sh +++ b/tests/queries/0_stateless/02984_form_format.sh @@ -5,8 +5,6 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -# Test setup -USER_FILES_PATH=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') FILE_NAME="data.tmp" FORM_DATA="${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/${FILE_NAME}" mkdir -p ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}/ @@ -30,4 +28,4 @@ touch $FORM_DATA echo -ne "c.e=ls7xfkpm&c.tti.m=raf&rt.start=navigation&rt.bmr=390%2C11%2C10&rt.tstart=1707076768666&rt.bstart=1707076769091&rt.blstart=1707076769056&rt.end=1707076769078&t_resp=296&t_page=116&t_done=412&t_other=boomerang%7C6%2Cboomr_fb%7C425%2Cboomr_ld%7C390%2Cboomr_lat%7C35&rt.tt=2685&rt.obo=0&pt.fcp=407&nt_nav_st=1707076768666&nt_dns_st=1707076768683&nt_dns_end=1707076768684&nt_con_st=1707076768684&nt_con_end=1707076768850&nt_req_st=1707076768850&nt_res_st=1707076768962&nt_res_end=1707076768962&nt_domloading=1707076769040&nt_domint=1707076769066&nt_domcontloaded_st=1707076769067&nt_domcontloaded_end=1707076769068&nt_domcomp=1707076769069&nt_load_st=1707076769069&nt_load_end=1707076769078&nt_unload_st=1707076769040&nt_unload_end=1707076769041&nt_ssl_st=1707076768788&nt_enc_size=3209&nt_dec_size=10093&nt_trn_size=3940&nt_protocol=h2&nt_red_cnt=0&nt_nav_type=1&restiming=%7B%22https%3A%2F%2Fwww.basicrum.com%2F%22%3A%7B%22publications%2F%22%3A%226%2C88%2C88%2C54%2C54%2C3e%2Ci%2Ci%2Ch*12h5%2Ckb%2C5b8%22%2C%22assets%2Fjs%2F%22%3A%7B%22just-the-docs.js%22%3A%223am%2Ce%2Ce*12pc%2C_%2C8oj*20%22%2C%22boomerang-1.737.60.cutting-edge.min.js%22%3A%222au%2Cb%2Ca*1pu3%2C_%2C1m19*21*42%22%2C%22vendor%2Flunr.min.js%22%3A%223am%2Cd%2C8*16t2%2C_%2Cfym*20%22%7D%7D%7D&u=https%3A%2F%2Fwww.basicrum.com%2Fpublications%2F&r=https%3A%2F%2Fwww.basicrum.com%2Fcost-analyses%2F&v=1.737.60&sv=14&sm=p&rt.si=dd0c542f-7adf-4310-830a-6c0a3d157c90-s8cjr1&rt.ss=1707075325294&rt.sl=4&vis.st=visible&ua.plt=Linux%20x86_64&ua.vnd=&pid=8fftz949&n=1&c.t.fps=07*4*65*j*61&c.t.busy=2*4*0034&c.tti.vr=408&c.tti=408&c.b=2&c.f=60&c.f.d=2511&c.f.m=1&c.f.s=ls7xfl1h&dom.res=5&dom.doms=1&mem.lsln=0&mem.ssln=0&mem.lssz=2&mem.sssz=2&scr.xy=1920x1200&scr.bpp=24%2F24&scr.orn=0%2Flandscape-primary&cpu.cnc=16&dom.ln=114&dom.sz=10438&dom.ck=157&dom.img=0&dom.script=6&dom.script.ext=3&dom.iframe=0&dom.link=4&dom.link.css=1&sb=1" > $FORM_DATA $CLICKHOUSE_CLIENT -q "SELECT * FROM file('$FORM_DATA', Form) FORMAT Vertical" -rm $FORM_DATA \ No newline at end of file +rm $FORM_DATA diff --git a/tests/queries/0_stateless/03032_dynamically_resize_filesystem_cache_2.sh b/tests/queries/0_stateless/03032_dynamically_resize_filesystem_cache_2.sh index 09bdd7f6b56..cba5317fcfa 100755 --- a/tests/queries/0_stateless/03032_dynamically_resize_filesystem_cache_2.sh +++ b/tests/queries/0_stateless/03032_dynamically_resize_filesystem_cache_2.sh @@ -18,7 +18,7 @@ $CLICKHOUSE_CLIENT --query "SELECT * FROM test FORMAT Null" prev_max_size=$($CLICKHOUSE_CLIENT --query "SELECT max_size FROM system.filesystem_cache_settings WHERE cache_name = '$disk_name'") $CLICKHOUSE_CLIENT --query "SELECT current_size > 0 FROM system.filesystem_cache_settings WHERE cache_name = '$disk_name' FORMAT TabSeparated" -config_path=/etc/clickhouse-server/config.d/storage_conf.xml +config_path=${CLICKHOUSE_CONFIG_DIR}/config.d/storage_conf.xml new_max_size=$($CLICKHOUSE_CLIENT --query "SELECT divide(max_size, 2) FROM system.filesystem_cache_settings WHERE cache_name = '$disk_name'") sed -i "s|$prev_max_size<\/max_size>|$new_max_size<\/max_size>|" $config_path diff --git a/tests/queries/0_stateless/03144_parallel_alter_add_drop_column_zookeeper_on_steroids.sh b/tests/queries/0_stateless/03144_parallel_alter_add_drop_column_zookeeper_on_steroids.sh index ea7bb8f7ad0..c27dfffcfc2 100755 --- a/tests/queries/0_stateless/03144_parallel_alter_add_drop_column_zookeeper_on_steroids.sh +++ b/tests/queries/0_stateless/03144_parallel_alter_add_drop_column_zookeeper_on_steroids.sh @@ -85,7 +85,7 @@ export -f optimize_thread; export -f insert_thread; -TIMEOUT=30 +TIMEOUT=20 # Sometimes we detach and attach tables timeout $TIMEOUT bash -c alter_thread 2> /dev/null & diff --git a/tests/queries/0_stateless/03153_format_regexp_usability.sh b/tests/queries/0_stateless/03153_format_regexp_usability.sh index 03bed10dd17..561de3be893 100755 --- a/tests/queries/0_stateless/03153_format_regexp_usability.sh +++ b/tests/queries/0_stateless/03153_format_regexp_usability.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-fasttest, no-parallel, no-ordinary-database, long +# Tags: no-fasttest, no-ordinary-database, long CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh From 0365a82e538731bdb17407d0765d0ff81e722b54 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Wed, 10 Jul 2024 08:04:46 +0200 Subject: [PATCH 192/299] Fix test --- tests/queries/0_stateless/01268_procfs_metrics.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/01268_procfs_metrics.sh b/tests/queries/0_stateless/01268_procfs_metrics.sh index 4f09d197596..7d6389bb86e 100755 --- a/tests/queries/0_stateless/01268_procfs_metrics.sh +++ b/tests/queries/0_stateless/01268_procfs_metrics.sh @@ -15,7 +15,7 @@ tmp_path=$(mktemp "$CURDIR/01268_procfs_metrics.XXXXXX") trap 'rm -f $tmp_path' EXIT truncate -s1025 "$tmp_path" -$CLICKHOUSE_LOCAL --profile-events-delay-ms=-1 --print-profile-events -q "SELECT * FROM file('$tmp_path', 'LineAsString') FORMAT Null" |& grep -m1 -F -o -e OSReadChars +$CLICKHOUSE_LOCAL --profile-events-delay-ms=-1 --print-profile-events --storage_file_read_method=pread -q "SELECT * FROM file('$tmp_path', 'LineAsString') FORMAT Null" |& grep -m1 -F -o -e OSReadChars # NOTE: that OSCPUVirtualTimeMicroseconds is in microseconds, so 1e6 is not enough. $CLICKHOUSE_LOCAL --profile-events-delay-ms=-1 --print-profile-events -q "SELECT * FROM numbers(1e8) FORMAT Null" |& grep -m1 -F -o -e OSCPUVirtualTimeMicroseconds exit 0 From bc02d8e66ecc82bee3c8d0402b01816c5005ece9 Mon Sep 17 00:00:00 2001 From: MikhailBurdukov Date: Wed, 10 Jul 2024 08:01:36 +0000 Subject: [PATCH 193/299] Fix settings changelog --- src/Core/SettingsChangesHistory.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Core/SettingsChangesHistory.cpp b/src/Core/SettingsChangesHistory.cpp index b0725340f46..3ccc7321088 100644 --- a/src/Core/SettingsChangesHistory.cpp +++ b/src/Core/SettingsChangesHistory.cpp @@ -59,6 +59,7 @@ static std::initializer_list Date: Wed, 10 Jul 2024 11:33:48 +0300 Subject: [PATCH 194/299] Remove typo from distinctive-features.md --- docs/ru/introduction/distinctive-features.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/ru/introduction/distinctive-features.md b/docs/ru/introduction/distinctive-features.md index dafaf055980..da820c90a1e 100644 --- a/docs/ru/introduction/distinctive-features.md +++ b/docs/ru/introduction/distinctive-features.md @@ -12,7 +12,7 @@ sidebar_label: "Отличительные возможности ClickHouse" Этот пункт пришлось выделить, так как существуют системы, которые могут хранить значения отдельных столбцов по отдельности, но не могут эффективно выполнять аналитические запросы в силу оптимизации под другой сценарий работы. Примеры: HBase, BigTable, Cassandra, HyperTable. В этих системах вы получите пропускную способность в районе сотен тысяч строк в секунду, но не сотен миллионов строк в секунду. -Также стоит заметить, что ClickHouse является системой управления базами данных, а не системой для одной базой данных. То есть, ClickHouse позволяет создавать таблицы и базы данных во время выполнения (runtime), загружать данные и выполнять запросы без переконфигурирования и перезапуска сервера. +Также стоит заметить, что ClickHouse является системой управления базами данных, а не системой для одной базы данных. То есть, ClickHouse позволяет создавать таблицы и базы данных во время выполнения (runtime), загружать данные и выполнять запросы без переконфигурирования и перезапуска сервера. ## Сжатие данных {#szhatie-dannykh} From 513ce9fa2f3bb0d2cc1774a07272a249b40f475f Mon Sep 17 00:00:00 2001 From: vdimir Date: Wed, 10 Jul 2024 08:40:09 +0000 Subject: [PATCH 195/299] Disable broken cases from 02911_join_on_nullsafe_optimization --- ...2911_join_on_nullsafe_optimization.reference | 17 ++++------------- .../02911_join_on_nullsafe_optimization.sql | 5 +++-- 2 files changed, 7 insertions(+), 15 deletions(-) diff --git a/tests/queries/0_stateless/02911_join_on_nullsafe_optimization.reference b/tests/queries/0_stateless/02911_join_on_nullsafe_optimization.reference index f0463509b80..4eb7e74446d 100644 --- a/tests/queries/0_stateless/02911_join_on_nullsafe_optimization.reference +++ b/tests/queries/0_stateless/02911_join_on_nullsafe_optimization.reference @@ -36,19 +36,10 @@ SELECT * FROM t1 JOIN t2 ON (t1.x == t2.x AND ((t2.x IS NOT NULL) AND (t1.x IS N 3 3 3 33 \N \N \N \N -- aliases defined in the join condition are valid -SELECT *, e, e2 FROM t1 FULL JOIN t2 ON ( ( ((t1.x == t2.x) AS e) AND ((t2.x IS NOT NULL) AND (t1.x IS NOT NULL)) ) OR ( (t2.x IS NULL) AND (t1.x IS NULL) ) AS e2 ) ORDER BY t1.x NULLS LAST, t2.x NULLS LAST; -1 42 \N \N \N 0 -2 2 2 2 1 1 -3 3 3 33 1 1 -\N \N 4 42 \N 0 -\N \N \N \N \N 1 -SELECT *, e, e2 FROM t1 FULL JOIN t2 ON ( ( ((t1.x == t2.x) AS e) AND ((t2.x IS NOT NULL) AND (t1.x IS NOT NULL)) ) AS e2 ) ORDER BY t1.x NULLS LAST, t2.x NULLS LAST; -1 42 \N \N \N 0 -2 2 2 2 1 1 -3 3 3 33 1 1 -\N \N 4 42 \N 0 -\N \N \N \N \N 0 -\N \N \N \N \N 0 +-- FIXME(@vdimir) broken query formatting for the following queries: +-- SELECT *, e, e2 FROM t1 FULL JOIN t2 ON ( ( ((t1.x == t2.x) AS e) AND ((t2.x IS NOT NULL) AND (t1.x IS NOT NULL)) ) OR ( (t2.x IS NULL) AND (t1.x IS NULL) ) AS e2 ) ORDER BY t1.x NULLS LAST, t2.x NULLS LAST; +-- SELECT *, e, e2 FROM t1 FULL JOIN t2 ON ( ( ((t1.x == t2.x) AS e) AND ((t2.x IS NOT NULL) AND (t1.x IS NOT NULL)) ) AS e2 ) ORDER BY t1.x NULLS LAST, t2.x NULLS LAST; + -- check for non-nullable columns for which `is null` is replaced with constant SELECT * FROM t1n as t1 JOIN t2n as t2 ON (t1.x == t2.x AND ((t2.x IS NOT NULL) AND (t1.x IS NOT NULL)) ) OR ( (t2.x IS NULL) AND (t1.x IS NULL) ) ORDER BY t1.x NULLS LAST; 2 2 2 2 diff --git a/tests/queries/0_stateless/02911_join_on_nullsafe_optimization.sql b/tests/queries/0_stateless/02911_join_on_nullsafe_optimization.sql index 67918f4302f..f7813e2a1b4 100644 --- a/tests/queries/0_stateless/02911_join_on_nullsafe_optimization.sql +++ b/tests/queries/0_stateless/02911_join_on_nullsafe_optimization.sql @@ -36,8 +36,9 @@ SELECT x = y OR (x IS NULL AND y IS NULL) FROM t1 ORDER BY x NULLS LAST; SELECT * FROM t1 JOIN t2 ON (t1.x == t2.x AND ((t2.x IS NOT NULL) AND (t1.x IS NOT NULL)) ) OR ( (t2.x IS NULL) AND (t1.x IS NULL) ) ORDER BY t1.x NULLS LAST; -- aliases defined in the join condition are valid -SELECT *, e, e2 FROM t1 FULL JOIN t2 ON ( ( ((t1.x == t2.x) AS e) AND ((t2.x IS NOT NULL) AND (t1.x IS NOT NULL)) ) OR ( (t2.x IS NULL) AND (t1.x IS NULL) ) AS e2 ) ORDER BY t1.x NULLS LAST, t2.x NULLS LAST; -SELECT *, e, e2 FROM t1 FULL JOIN t2 ON ( ( ((t1.x == t2.x) AS e) AND ((t2.x IS NOT NULL) AND (t1.x IS NOT NULL)) ) AS e2 ) ORDER BY t1.x NULLS LAST, t2.x NULLS LAST; +-- FIXME(@vdimir) broken query formatting for the following queries: +-- SELECT *, e, e2 FROM t1 FULL JOIN t2 ON ( ( ((t1.x == t2.x) AS e) AND ((t2.x IS NOT NULL) AND (t1.x IS NOT NULL)) ) OR ( (t2.x IS NULL) AND (t1.x IS NULL) ) AS e2 ) ORDER BY t1.x NULLS LAST, t2.x NULLS LAST; +-- SELECT *, e, e2 FROM t1 FULL JOIN t2 ON ( ( ((t1.x == t2.x) AS e) AND ((t2.x IS NOT NULL) AND (t1.x IS NOT NULL)) ) AS e2 ) ORDER BY t1.x NULLS LAST, t2.x NULLS LAST; -- check for non-nullable columns for which `is null` is replaced with constant SELECT * FROM t1n as t1 JOIN t2n as t2 ON (t1.x == t2.x AND ((t2.x IS NOT NULL) AND (t1.x IS NOT NULL)) ) OR ( (t2.x IS NULL) AND (t1.x IS NULL) ) ORDER BY t1.x NULLS LAST; From 36b9a5641f614de1fba593d42ba7670614cde5aa Mon Sep 17 00:00:00 2001 From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> Date: Wed, 10 Jul 2024 11:56:43 +0200 Subject: [PATCH 196/299] Update run.sh --- docker/test/stateless/run.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/test/stateless/run.sh b/docker/test/stateless/run.sh index 1c03f5107b0..8e66d2667f1 100755 --- a/docker/test/stateless/run.sh +++ b/docker/test/stateless/run.sh @@ -249,7 +249,7 @@ function run_tests() try_run_with_retry 10 clickhouse-client -q "insert into system.zookeeper (name, path, value) values ('auxiliary_zookeeper2', '/test/chroot/', '')" set +e - timeout -s TERM --preserve-status 120m -k 60m clickhouse-test --testname --shard --zookeeper --check-zookeeper-session --hung-check --print-time \ + timeout -k 60m -s TERM --preserve-status 120m clickhouse-test --testname --shard --zookeeper --check-zookeeper-session --hung-check --print-time \ --no-drop-if-fail --test-runs "$NUM_TRIES" "${ADDITIONAL_OPTIONS[@]}" 2>&1 \ | ts '%Y-%m-%d %H:%M:%S' \ | tee -a test_output/test_result.txt From a32795d116903c66c18263f47e5d1e622d83a362 Mon Sep 17 00:00:00 2001 From: avogar Date: Wed, 10 Jul 2024 10:07:02 +0000 Subject: [PATCH 197/299] Fix review comments --- src/Formats/JSONExtractTree.cpp | 174 ++++++++++++++++++++++++-------- src/Formats/JSONExtractTree.h | 6 ++ src/Functions/FunctionsJSON.cpp | 3 + 3 files changed, 139 insertions(+), 44 deletions(-) diff --git a/src/Formats/JSONExtractTree.cpp b/src/Formats/JSONExtractTree.cpp index 9efb1392583..242d2dc9f80 100644 --- a/src/Formats/JSONExtractTree.cpp +++ b/src/Formats/JSONExtractTree.cpp @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include @@ -35,9 +36,8 @@ #include #include #include -#include #include -#include +#include #include #include @@ -123,10 +123,7 @@ void jsonElementToString(const typename JSONParser::Element & element, WriteBuff template bool tryGetNumericValueFromJSONElement( - NumberType & value, - const typename JSONParser::Element & element, - bool convert_bool_to_integer, - String & error) + NumberType & value, const typename JSONParser::Element & element, bool convert_bool_to_integer, String & error) { switch (element.type()) { @@ -226,7 +223,11 @@ public: explicit NumericNode(bool is_bool_type_ = false) : is_bool_type(is_bool_type_) { } bool insertResultToColumn( - IColumn & column, const typename JSONParser::Element & element, const JSONExtractInsertSettings & insert_settings, const FormatSettings & format_settings, String & error) const override + IColumn & column, + const typename JSONParser::Element & element, + const JSONExtractInsertSettings & insert_settings, + const FormatSettings & format_settings, + String & error) const override { if (element.isNull()) { @@ -270,7 +271,11 @@ public: } bool insertResultToColumn( - IColumn & column, const typename JSONParser::Element & element, const JSONExtractInsertSettings & insert_settings, const FormatSettings & format_settings, String & error) const override + IColumn & column, + const typename JSONParser::Element & element, + const JSONExtractInsertSettings & insert_settings, + const FormatSettings & format_settings, + String & error) const override { if (element.isNull()) { @@ -309,7 +314,11 @@ class StringNode : public JSONExtractTreeNode { public: bool insertResultToColumn( - IColumn & column, const typename JSONParser::Element & element, const JSONExtractInsertSettings &, const FormatSettings & format_settings, String & error) const override + IColumn & column, + const typename JSONParser::Element & element, + const JSONExtractInsertSettings &, + const FormatSettings & format_settings, + String & error) const override { if (element.isNull()) { @@ -349,7 +358,11 @@ public: explicit LowCardinalityStringNode(bool is_nullable_) : is_nullable(is_nullable_) { } bool insertResultToColumn( - IColumn & column, const typename JSONParser::Element & element, const JSONExtractInsertSettings &, const FormatSettings & format_settings, String & error) const override + IColumn & column, + const typename JSONParser::Element & element, + const JSONExtractInsertSettings &, + const FormatSettings & format_settings, + String & error) const override { if (element.isNull()) { @@ -387,7 +400,11 @@ class FixedStringNode : public JSONExtractTreeNode public: explicit FixedStringNode(size_t fixed_length_) : fixed_length(fixed_length_) { } bool insertResultToColumn( - IColumn & column, const typename JSONParser::Element & element, const JSONExtractInsertSettings &, const FormatSettings & format_settings, String & error) const override + IColumn & column, + const typename JSONParser::Element & element, + const JSONExtractInsertSettings &, + const FormatSettings & format_settings, + String & error) const override { if (element.isNull()) { @@ -431,7 +448,11 @@ public: } bool insertResultToColumn( - IColumn & column, const typename JSONParser::Element & element, const JSONExtractInsertSettings &, const FormatSettings & format_settings, String & error) const override + IColumn & column, + const typename JSONParser::Element & element, + const JSONExtractInsertSettings &, + const FormatSettings & format_settings, + String & error) const override { if (element.isNull()) { @@ -484,7 +505,11 @@ class UUIDNode : public JSONExtractTreeNode { public: bool insertResultToColumn( - IColumn & column, const typename JSONParser::Element & element, const JSONExtractInsertSettings &, const FormatSettings & format_settings, String & error) const override + IColumn & column, + const typename JSONParser::Element & element, + const JSONExtractInsertSettings &, + const FormatSettings & format_settings, + String & error) const override { if (element.isNull() && format_settings.null_as_default) { @@ -525,7 +550,11 @@ public: explicit LowCardinalityUUIDNode(bool is_nullable_) : is_nullable(is_nullable_) { } bool insertResultToColumn( - IColumn & column, const typename JSONParser::Element & element, const JSONExtractInsertSettings &, const FormatSettings & format_settings, String & error) const override + IColumn & column, + const typename JSONParser::Element & element, + const JSONExtractInsertSettings &, + const FormatSettings & format_settings, + String & error) const override { if (element.isNull() && (is_nullable || format_settings.null_as_default)) { @@ -560,7 +589,11 @@ class DateNode : public JSONExtractTreeNode { public: bool insertResultToColumn( - IColumn & column, const typename JSONParser::Element & element, const JSONExtractInsertSettings &, const FormatSettings & format_settings, String & error) const override + IColumn & column, + const typename JSONParser::Element & element, + const JSONExtractInsertSettings &, + const FormatSettings & format_settings, + String & error) const override { if (element.isNull() && format_settings.null_as_default) { @@ -595,7 +628,11 @@ public: explicit DateTimeNode(const DataTypeDateTime & datetime_type) : TimezoneMixin(datetime_type) { } bool insertResultToColumn( - IColumn & column, const typename JSONParser::Element & element, const JSONExtractInsertSettings &, const FormatSettings & format_settings, String & error) const override + IColumn & column, + const typename JSONParser::Element & element, + const JSONExtractInsertSettings &, + const FormatSettings & format_settings, + String & error) const override { if (element.isNull() && format_settings.null_as_default) { @@ -656,7 +693,11 @@ public: explicit DecimalNode(const DataTypePtr & type) : scale(assert_cast &>(*type).getScale()) { } bool insertResultToColumn( - IColumn & column, const typename JSONParser::Element & element, const JSONExtractInsertSettings &, const FormatSettings & format_settings, String & error) const override + IColumn & column, + const typename JSONParser::Element & element, + const JSONExtractInsertSettings &, + const FormatSettings & format_settings, + String & error) const override { DecimalType value{}; @@ -688,7 +729,8 @@ public: } break; } - default: { + default: + { error = fmt::format("cannot read Decimal value from JSON element: {}", jsonElementToString(element, format_settings)); return false; } @@ -707,10 +749,16 @@ template class DateTime64Node : public JSONExtractTreeNode, public TimezoneMixin { public: - explicit DateTime64Node(const DataTypeDateTime64 & datetime64_type) : TimezoneMixin(datetime64_type), scale(datetime64_type.getScale()) { } + explicit DateTime64Node(const DataTypeDateTime64 & datetime64_type) : TimezoneMixin(datetime64_type), scale(datetime64_type.getScale()) + { + } bool insertResultToColumn( - IColumn & column, const typename JSONParser::Element & element, const JSONExtractInsertSettings &, const FormatSettings & format_settings, String & error) const override + IColumn & column, + const typename JSONParser::Element & element, + const JSONExtractInsertSettings &, + const FormatSettings & format_settings, + String & error) const override { if (element.isNull() && format_settings.null_as_default) { @@ -790,7 +838,11 @@ public: } bool insertResultToColumn( - IColumn & column, const typename JSONParser::Element & element, const JSONExtractInsertSettings &, const FormatSettings & format_settings, String & error) const override + IColumn & column, + const typename JSONParser::Element & element, + const JSONExtractInsertSettings &, + const FormatSettings & format_settings, + String & error) const override { if (element.isNull()) { @@ -857,7 +909,11 @@ class IPv4Node : public JSONExtractTreeNode { public: bool insertResultToColumn( - IColumn & column, const typename JSONParser::Element & element, const JSONExtractInsertSettings &, const FormatSettings & format_settings, String & error) const override + IColumn & column, + const typename JSONParser::Element & element, + const JSONExtractInsertSettings &, + const FormatSettings & format_settings, + String & error) const override { if (element.isNull() && format_settings.null_as_default) { @@ -895,7 +951,11 @@ class IPv6Node : public JSONExtractTreeNode { public: bool insertResultToColumn( - IColumn & column, const typename JSONParser::Element & element, const JSONExtractInsertSettings &, const FormatSettings & format_settings, String & error) const override + IColumn & column, + const typename JSONParser::Element & element, + const JSONExtractInsertSettings &, + const FormatSettings & format_settings, + String & error) const override { if (element.isNull() && format_settings.null_as_default) { @@ -936,7 +996,11 @@ public: explicit NullableNode(std::unique_ptr> nested_) : nested(std::move(nested_)) { } bool insertResultToColumn( - IColumn & column, const typename JSONParser::Element & element, const JSONExtractInsertSettings & insert_settings, const FormatSettings & format_settings, String & error) const override + IColumn & column, + const typename JSONParser::Element & element, + const JSONExtractInsertSettings & insert_settings, + const FormatSettings & format_settings, + String & error) const override { if (element.isNull()) { @@ -945,7 +1009,7 @@ public: } auto & col_null = assert_cast(column); - if (!nested-> insertResultToColumn(col_null.getNestedColumn(), element, insert_settings, format_settings, error)) + if (!nested->insertResultToColumn(col_null.getNestedColumn(), element, insert_settings, format_settings, error)) return false; col_null.getNullMapColumn().insertValue(0); return true; @@ -965,7 +1029,11 @@ public: } bool insertResultToColumn( - IColumn & column, const typename JSONParser::Element & element, const JSONExtractInsertSettings & insert_settings, const FormatSettings & format_settings, String & error) const override + IColumn & column, + const typename JSONParser::Element & element, + const JSONExtractInsertSettings & insert_settings, + const FormatSettings & format_settings, + String & error) const override { if (element.isNull() && (is_nullable || format_settings.null_as_default)) { @@ -975,7 +1043,7 @@ public: auto & col_lc = assert_cast(column); auto tmp_nested = col_lc.getDictionary().getNestedColumn()->cloneEmpty(); - if (!nested-> insertResultToColumn(*tmp_nested, element, insert_settings, format_settings, error)) + if (!nested->insertResultToColumn(*tmp_nested, element, insert_settings, format_settings, error)) return false; col_lc.insertFromFullColumn(*tmp_nested, 0); @@ -994,7 +1062,11 @@ public: explicit ArrayNode(std::unique_ptr> nested_) : nested(std::move(nested_)) { } bool insertResultToColumn( - IColumn & column, const typename JSONParser::Element & element, const JSONExtractInsertSettings & insert_settings, const FormatSettings & format_settings, String & error) const override + IColumn & column, + const typename JSONParser::Element & element, + const JSONExtractInsertSettings & insert_settings, + const FormatSettings & format_settings, + String & error) const override { if (element.isNull() && format_settings.null_as_default) { @@ -1017,7 +1089,7 @@ public: for (auto value : array) { - if (nested-> insertResultToColumn(data, value, insert_settings, format_settings, error)) + if (nested->insertResultToColumn(data, value, insert_settings, format_settings, error)) { were_valid_elements = true; } @@ -1058,7 +1130,11 @@ public: } bool insertResultToColumn( - IColumn & column, const typename JSONParser::Element & element, const JSONExtractInsertSettings & insert_settings, const FormatSettings & format_settings, String & error) const override + IColumn & column, + const typename JSONParser::Element & element, + const JSONExtractInsertSettings & insert_settings, + const FormatSettings & format_settings, + String & error) const override { auto & tuple = assert_cast(column); size_t old_size = column.size(); @@ -1087,7 +1163,7 @@ public: for (size_t index = 0; (index != nested.size()) && (it != array.end()); ++index) { - if (nested[index]-> insertResultToColumn(tuple.getColumn(index), *it++, insert_settings, format_settings, error)) + if (nested[index]->insertResultToColumn(tuple.getColumn(index), *it++, insert_settings, format_settings, error)) { were_valid_elements = true; } @@ -1115,7 +1191,7 @@ public: auto it = object.begin(); for (size_t index = 0; (index != nested.size()) && (it != object.end()); ++index) { - if (nested[index]-> insertResultToColumn(tuple.getColumn(index), (*it++).second, insert_settings, format_settings, error)) + if (nested[index]->insertResultToColumn(tuple.getColumn(index), (*it++).second, insert_settings, format_settings, error)) { were_valid_elements = true; } @@ -1138,7 +1214,7 @@ public: auto index = name_to_index_map.find(key); if (index != name_to_index_map.end()) { - if (nested[index->second]-> insertResultToColumn(tuple.getColumn(index->second), value, insert_settings, format_settings, error)) + if (nested[index->second]->insertResultToColumn(tuple.getColumn(index->second), value, insert_settings, format_settings, error)) { were_valid_elements = true; } @@ -1173,7 +1249,11 @@ public: explicit MapNode(std::unique_ptr> value_) : value(std::move(value_)) { } bool insertResultToColumn( - IColumn & column, const typename JSONParser::Element & element, const JSONExtractInsertSettings & insert_settings, const FormatSettings & format_settings, String & error) const override + IColumn & column, + const typename JSONParser::Element & element, + const JSONExtractInsertSettings & insert_settings, + const FormatSettings & format_settings, + String & error) const override { if (!element.isObject()) { @@ -1198,7 +1278,7 @@ public: key_col.insertData(pair.first.data(), pair.first.size()); /// Insert value - if (!value-> insertResultToColumn(value_col, pair.second, insert_settings, format_settings, error)) + if (!value->insertResultToColumn(value_col, pair.second, insert_settings, format_settings, error)) { if (insert_settings.insert_default_on_invalid_elements_in_complex_types) { @@ -1232,13 +1312,17 @@ public: } bool insertResultToColumn( - IColumn & column, const typename JSONParser::Element & element, const JSONExtractInsertSettings & insert_settings, const FormatSettings & format_settings, String & error) const override + IColumn & column, + const typename JSONParser::Element & element, + const JSONExtractInsertSettings & insert_settings, + const FormatSettings & format_settings, + String & error) const override { auto & column_variant = assert_cast(column); for (size_t i : order) { auto & variant = column_variant.getVariantByGlobalDiscriminator(i); - if (variant_nodes[i]-> insertResultToColumn(variant, element, insert_settings, format_settings, error)) + if (variant_nodes[i]->insertResultToColumn(variant, element, insert_settings, format_settings, error)) { column_variant.getLocalDiscriminators().push_back(column_variant.localDiscriminatorByGlobal(i)); column_variant.getOffsets().push_back(variant.size() - 1); @@ -1262,7 +1346,12 @@ template class DynamicNode : public JSONExtractTreeNode { public: - bool insertResultToColumn(IColumn & column, const typename JSONParser::Element & element, const JSONExtractInsertSettings & insert_settings, const FormatSettings & format_settings, String & error) const override + bool insertResultToColumn( + IColumn & column, + const typename JSONParser::Element & element, + const JSONExtractInsertSettings & insert_settings, + const FormatSettings & format_settings, + String & error) const override { auto & column_dynamic = assert_cast(column); /// First, check if element is NULL. @@ -1281,7 +1370,7 @@ public: auto node = buildJSONExtractTree(element_type, "Dynamic inference"); auto global_discriminator = variant_info.variant_name_to_discriminator[element_type->getName()]; auto & variant = variant_column.getVariantByGlobalDiscriminator(global_discriminator); - if (!node-> insertResultToColumn(variant, element, insert_settings, format_settings, error)) + if (!node->insertResultToColumn(variant, element, insert_settings, format_settings, error)) return false; variant_column.getLocalDiscriminators().push_back(variant_column.localDiscriminatorByGlobal(global_discriminator)); variant_column.getOffsets().push_back(variant.size() - 1); @@ -1290,14 +1379,14 @@ public: /// We couldn't add new variant. Try to insert element into current variants. auto variant_node = buildJSONExtractTree(variant_info.variant_type, "Dynamic inference"); - if (variant_node-> insertResultToColumn(variant_column, element, insert_settings, format_settings, error)) + if (variant_node->insertResultToColumn(variant_column, element, insert_settings, format_settings, error)) return true; /// We couldn't insert element into any existing variant, add String variant and read value as String. column_dynamic.addStringVariant(); auto string_global_discriminator = variant_info.variant_name_to_discriminator["String"]; auto & string_column = variant_column.getVariantByGlobalDiscriminator(string_global_discriminator); - if (!getStringNode()-> insertResultToColumn(string_column, element, insert_settings, format_settings, error)) + if (!getStringNode()->insertResultToColumn(string_column, element, insert_settings, format_settings, error)) return false; variant_column.getLocalDiscriminators().push_back(variant_column.localDiscriminatorByGlobal(string_global_discriminator)); variant_column.getOffsets().push_back(string_column.size() - 1); @@ -1348,12 +1437,9 @@ private: if (format_settings.json.try_infer_numbers_from_strings) { - bool is_negative = false; if (auto type = tryInferJSONNumberFromString(data, format_settings, &json_inference_info)) { json_inference_info.numbers_parsed_from_json_strings.insert(type.get()); - if (is_negative) - json_inference_info.negative_integers.insert(type.get()); return type; } } diff --git a/src/Formats/JSONExtractTree.h b/src/Formats/JSONExtractTree.h index 4735f568b1c..b5e82506548 100644 --- a/src/Formats/JSONExtractTree.h +++ b/src/Formats/JSONExtractTree.h @@ -9,7 +9,13 @@ namespace DB struct JSONExtractInsertSettings { + /// If false, JSON boolean values won't be inserted into columns with integer types + /// It's used in JSONExtractInt64/JSONExtractUInt64/... functions. bool convert_bool_to_integer = true; + /// If true, when complex type like Array/Map has both valid and invalid elements, + /// the default value will be inserted on invalid elements. + /// For example, if we have [1, "hello", 2] and type Array(UInt32), + /// we will insert [1, 0, 2] in the column. Used in all JSONExtract functions. bool insert_default_on_invalid_elements_in_complex_types = false; }; diff --git a/src/Functions/FunctionsJSON.cpp b/src/Functions/FunctionsJSON.cpp index ca233becb63..db1602b1939 100644 --- a/src/Functions/FunctionsJSON.cpp +++ b/src/Functions/FunctionsJSON.cpp @@ -354,7 +354,10 @@ public: explicit ExecutableFunctionJSON(const NullPresence & null_presence_, bool allow_simdjson_, const DataTypePtr & json_return_type_, const FormatSettings & format_settings_) : null_presence(null_presence_), allow_simdjson(allow_simdjson_), json_return_type(json_return_type_), format_settings(format_settings_) { + /// Don't escape forward slashes during converting JSON elements to raw string. format_settings.json.escape_forward_slashes = false; + /// Don't insert default values on null during traversing the JSON element. + /// We allow to insert null only to Nullable columns in JSONExtract functions. format_settings.null_as_default = false; } From 17d5ec28d4803f767f5c1f067116fe02147a1e1e Mon Sep 17 00:00:00 2001 From: Nikita Fomichev Date: Wed, 10 Jul 2024 12:08:33 +0200 Subject: [PATCH 198/299] debug tests hung on gdb error --- tests/clickhouse-test | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/clickhouse-test b/tests/clickhouse-test index 8dea6297a61..f60e285e771 100755 --- a/tests/clickhouse-test +++ b/tests/clickhouse-test @@ -2190,6 +2190,7 @@ def run_tests_array(all_tests_with_params: Tuple[List[str], int, TestSuite, bool test_result = test_case.run( args, test_suite, client_options, server_logs_level ) + print(test_result, test_result.case_name, test_result.need_retry) test_result = test_case.process_result(test_result, MESSAGES) if not test_result.need_retry: break @@ -2211,7 +2212,9 @@ def run_tests_array(all_tests_with_params: Tuple[List[str], int, TestSuite, bool failures_total += 1 failures_chain += 1 if test_result.reason == FailureReason.SERVER_DIED: + stop_tests() server_died.set() + raise ServerDied("Server died") elif test_result.status == TestStatus.SKIPPED: skipped_total += 1 From 73c4eaa0f26c0e67f15b0e65ebeda93fd8214957 Mon Sep 17 00:00:00 2001 From: Alexander Gololobov Date: Wed, 10 Jul 2024 12:09:32 +0200 Subject: [PATCH 199/299] Clarify ordered mode description for s3Queue --- docs/en/engines/table-engines/integrations/s3queue.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/engines/table-engines/integrations/s3queue.md b/docs/en/engines/table-engines/integrations/s3queue.md index 11181703645..c45de8760d6 100644 --- a/docs/en/engines/table-engines/integrations/s3queue.md +++ b/docs/en/engines/table-engines/integrations/s3queue.md @@ -75,7 +75,7 @@ SETTINGS Possible values: - unordered — With unordered mode, the set of all already processed files is tracked with persistent nodes in ZooKeeper. -- ordered — With ordered mode, only the max name of the successfully consumed file, and the names of files that will be retried after unsuccessful loading attempt are being stored in ZooKeeper. +- ordered — With ordered mode, the files are processed in lexicographic order. It means that if file named 'BBB' was processed and some point and later on a file named 'AAAA' is added to the bucket, it will be ignored. Only the max name (in lexicographic sense) of the successfully consumed file, and the names of files that will be retried after unsuccessful loading attempt are being stored in ZooKeeper. Default value: `ordered` in versions before 24.6. Starting with 24.6 there is no default value, the setting becomes required to be specified manually. For tables created on earlier versions the default value will remain `Ordered` for compatibility. From 56c751a10a0070d31e70313854cf1ade54f0c9a5 Mon Sep 17 00:00:00 2001 From: Alexander Gololobov Date: Wed, 10 Jul 2024 12:17:39 +0200 Subject: [PATCH 200/299] Update docs/en/engines/table-engines/integrations/s3queue.md Co-authored-by: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> --- docs/en/engines/table-engines/integrations/s3queue.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/engines/table-engines/integrations/s3queue.md b/docs/en/engines/table-engines/integrations/s3queue.md index c45de8760d6..dcce8c415db 100644 --- a/docs/en/engines/table-engines/integrations/s3queue.md +++ b/docs/en/engines/table-engines/integrations/s3queue.md @@ -75,7 +75,7 @@ SETTINGS Possible values: - unordered — With unordered mode, the set of all already processed files is tracked with persistent nodes in ZooKeeper. -- ordered — With ordered mode, the files are processed in lexicographic order. It means that if file named 'BBB' was processed and some point and later on a file named 'AAAA' is added to the bucket, it will be ignored. Only the max name (in lexicographic sense) of the successfully consumed file, and the names of files that will be retried after unsuccessful loading attempt are being stored in ZooKeeper. +- ordered — With ordered mode, the files are processed in lexicographic order. It means that if file named 'BBB' was processed at some point and later on a file named 'AAAA' is added to the bucket, it will be ignored. Only the max name (in lexicographic sense) of the successfully consumed file, and the names of files that will be retried after unsuccessful loading attempt are being stored in ZooKeeper. Default value: `ordered` in versions before 24.6. Starting with 24.6 there is no default value, the setting becomes required to be specified manually. For tables created on earlier versions the default value will remain `Ordered` for compatibility. From 74cc20b2868ffa88656daf0d30a95287291319ed Mon Sep 17 00:00:00 2001 From: Alexander Gololobov Date: Wed, 10 Jul 2024 12:18:50 +0200 Subject: [PATCH 201/299] Make spellcheck happy --- docs/en/engines/table-engines/integrations/s3queue.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/engines/table-engines/integrations/s3queue.md b/docs/en/engines/table-engines/integrations/s3queue.md index dcce8c415db..06325fa15fb 100644 --- a/docs/en/engines/table-engines/integrations/s3queue.md +++ b/docs/en/engines/table-engines/integrations/s3queue.md @@ -75,7 +75,7 @@ SETTINGS Possible values: - unordered — With unordered mode, the set of all already processed files is tracked with persistent nodes in ZooKeeper. -- ordered — With ordered mode, the files are processed in lexicographic order. It means that if file named 'BBB' was processed at some point and later on a file named 'AAAA' is added to the bucket, it will be ignored. Only the max name (in lexicographic sense) of the successfully consumed file, and the names of files that will be retried after unsuccessful loading attempt are being stored in ZooKeeper. +- ordered — With ordered mode, the files are processed in lexicographic order. It means that if file named 'BBB' was processed at some point and later on a file named 'AA' is added to the bucket, it will be ignored. Only the max name (in lexicographic sense) of the successfully consumed file, and the names of files that will be retried after unsuccessful loading attempt are being stored in ZooKeeper. Default value: `ordered` in versions before 24.6. Starting with 24.6 there is no default value, the setting becomes required to be specified manually. For tables created on earlier versions the default value will remain `Ordered` for compatibility. From ef08055db972581b3a5b24e99fa7fa6414436bc2 Mon Sep 17 00:00:00 2001 From: Nikita Fomichev Date: Wed, 10 Jul 2024 12:47:32 +0200 Subject: [PATCH 202/299] debug tests hung on gdb error 2 --- tests/clickhouse-test | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/clickhouse-test b/tests/clickhouse-test index f60e285e771..d1f7ace2376 100755 --- a/tests/clickhouse-test +++ b/tests/clickhouse-test @@ -361,9 +361,11 @@ def clickhouse_execute_json( def stop_tests(): # send signal to all processes in group to avoid hung check triggering # (to avoid terminating clickhouse-test itself, the signal should be ignored) + print("Sending signals") signal.signal(signal.SIGTERM, signal.SIG_IGN) os.killpg(os.getpgid(os.getpid()), signal.SIGTERM) signal.signal(signal.SIGTERM, signal.SIG_DFL) + print("Sending signals DONE") def get_db_engine(args, database_name): @@ -2187,10 +2189,10 @@ def run_tests_array(all_tests_with_params: Tuple[List[str], int, TestSuite, bool sys.stdout.flush() while True: + print(test_case.name) test_result = test_case.run( args, test_suite, client_options, server_logs_level ) - print(test_result, test_result.case_name, test_result.need_retry) test_result = test_case.process_result(test_result, MESSAGES) if not test_result.need_retry: break From ede43721b4d43131ff2d55337bfbcc396772db44 Mon Sep 17 00:00:00 2001 From: Nikita Fomichev Date: Wed, 10 Jul 2024 13:30:06 +0200 Subject: [PATCH 203/299] debug tests hung on gdb error 4 --- docker/test/stateless/run.sh | 2 +- tests/clickhouse-test | 20 ++++++++++++++++---- 2 files changed, 17 insertions(+), 5 deletions(-) diff --git a/docker/test/stateless/run.sh b/docker/test/stateless/run.sh index 30079073ea2..b48b0731d92 100755 --- a/docker/test/stateless/run.sh +++ b/docker/test/stateless/run.sh @@ -311,7 +311,7 @@ function run_tests() set +e timeout -s TERM --preserve-status 120m clickhouse-test --testname --shard --zookeeper --check-zookeeper-session --hung-check --print-time \ - --no-drop-if-fail --test-runs "$NUM_TRIES" "${ADDITIONAL_OPTIONS[@]}" 2>&1 \ + --no-drop-if-fail --test-runs "$NUM_TRIES" "${ADDITIONAL_OPTIONS[@]}" 01710_aggregate_projections 01035_avg_weighted_long 00735_long_conditional 02911_join_on_nullsafe_optimization 02911_backup_restore_keeper_map 02552_siphash128_reference 2>&1 \ | ts '%Y-%m-%d %H:%M:%S' \ | tee -a test_output/test_result.txt set -e diff --git a/tests/clickhouse-test b/tests/clickhouse-test index d1f7ace2376..23273a0a28a 100755 --- a/tests/clickhouse-test +++ b/tests/clickhouse-test @@ -2189,7 +2189,10 @@ def run_tests_array(all_tests_with_params: Tuple[List[str], int, TestSuite, bool sys.stdout.flush() while True: - print(test_case.name) + sys.stdout.flush() + sys.stdout.write(test_case.name) + sys.stdout.flush() + test_result = test_case.run( args, test_suite, client_options, server_logs_level ) @@ -2505,10 +2508,19 @@ def do_run_tests(jobs, test_suite: TestSuite): future_seq.wait() future.wait() + print("future wait DONE") finally: - pool.terminate() - pool.close() - pool.join() + + + print("pool.join") + pool.join(timeout=2200) + print("pool.join DONE") + + # print("pool.terminate") + # pool.terminate() + # print("pool.close") + # pool.close(force=True) + # print("pool.join") if not args.run_sequential_tests_in_parallel: run_tests_array( From f96d40d82806cb195ec519f9a92a859e3e4ef873 Mon Sep 17 00:00:00 2001 From: Nikita Fomichev Date: Wed, 10 Jul 2024 13:35:19 +0200 Subject: [PATCH 204/299] debug tests hung on gdb error 5 --- tests/clickhouse-test | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/tests/clickhouse-test b/tests/clickhouse-test index 23273a0a28a..a90ffb79344 100755 --- a/tests/clickhouse-test +++ b/tests/clickhouse-test @@ -2510,18 +2510,14 @@ def do_run_tests(jobs, test_suite: TestSuite): future.wait() print("future wait DONE") finally: - - + print("pool.terminate") + pool.terminate() + print("pool.close") + pool.close() print("pool.join") - pool.join(timeout=2200) + pool.join() print("pool.join DONE") - # print("pool.terminate") - # pool.terminate() - # print("pool.close") - # pool.close(force=True) - # print("pool.join") - if not args.run_sequential_tests_in_parallel: run_tests_array( ( From bd62fecd317cf1e22ac51ddb45a771ff03754b2d Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Wed, 10 Jul 2024 13:47:58 +0200 Subject: [PATCH 205/299] avoid lock-order-inversion --- src/Interpreters/DatabaseCatalog.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/Interpreters/DatabaseCatalog.cpp b/src/Interpreters/DatabaseCatalog.cpp index 889c6785217..2f83fe2ac6e 100644 --- a/src/Interpreters/DatabaseCatalog.cpp +++ b/src/Interpreters/DatabaseCatalog.cpp @@ -1314,6 +1314,7 @@ void DatabaseCatalog::dropTablesParallel(std::vectortable_id.uuid); chassert(removed); + table_to_delete_without_lock = std::move(*table_iterator); tables_marked_dropped.erase(table_iterator); wait_table_finally_dropped.notify_all(); From bde1d2fe929b37ce22812346a7df8e76b5ce8733 Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Wed, 10 Jul 2024 13:48:52 +0200 Subject: [PATCH 206/299] less noisy logs --- src/Interpreters/DatabaseCatalog.cpp | 5 ----- src/Interpreters/DatabaseCatalog.h | 2 -- 2 files changed, 7 deletions(-) diff --git a/src/Interpreters/DatabaseCatalog.cpp b/src/Interpreters/DatabaseCatalog.cpp index 2f83fe2ac6e..964baea1891 100644 --- a/src/Interpreters/DatabaseCatalog.cpp +++ b/src/Interpreters/DatabaseCatalog.cpp @@ -838,7 +838,6 @@ DatabaseCatalog::DatabaseCatalog(ContextMutablePtr global_context_) , loading_dependencies{"LoadingDeps"} , view_dependencies{"ViewDeps"} , log(getLogger("DatabaseCatalog")) - , limitedLog(std::make_shared(log, 1, 20)) , first_async_drop_in_queue(tables_marked_dropped.end()) { } @@ -1274,10 +1273,6 @@ void DatabaseCatalog::rescheduleDropTableTask() if (first_async_drop_in_queue != tables_marked_dropped.begin()) { - LOG_TRACE( - limitedLog, - "Have {} tables in queue to drop. Some of them are being dropped in sync mode. Schedule background task ASAP", - tables_marked_dropped.size()); (*drop_task)->scheduleAfter(0); return; } diff --git a/src/Interpreters/DatabaseCatalog.h b/src/Interpreters/DatabaseCatalog.h index 73e330dedbc..23e38a6445e 100644 --- a/src/Interpreters/DatabaseCatalog.h +++ b/src/Interpreters/DatabaseCatalog.h @@ -7,7 +7,6 @@ #include #include #include -#include #include #include @@ -326,7 +325,6 @@ private: TablesDependencyGraph view_dependencies TSA_GUARDED_BY(databases_mutex); LoggerPtr log; - LogSeriesLimiterPtr limitedLog; std::atomic_bool is_shutting_down = false; From 6ffac4034a4ca1b18a2ad5aa44f2c7cce696c246 Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Wed, 10 Jul 2024 14:20:12 +0200 Subject: [PATCH 207/299] Enable checks in assert_cast under sanitizers --- src/Common/assert_cast.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Common/assert_cast.h b/src/Common/assert_cast.h index 0b73ba1cc12..f9d0bf0e595 100644 --- a/src/Common/assert_cast.h +++ b/src/Common/assert_cast.h @@ -25,7 +25,7 @@ namespace DB template inline To assert_cast(From && from) { -#ifndef NDEBUG +#ifdef ABORT_ON_LOGICAL_ERROR try { if constexpr (std::is_pointer_v) From f8b9fe621a9b249764538d08dbf361e3ba4a1d49 Mon Sep 17 00:00:00 2001 From: MikhailBurdukov Date: Wed, 10 Jul 2024 12:47:19 +0000 Subject: [PATCH 208/299] Fix test --- tests/integration/test_named_collections/test.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/integration/test_named_collections/test.py b/tests/integration/test_named_collections/test.py index 5d38047e885..32846c79d23 100644 --- a/tests/integration/test_named_collections/test.py +++ b/tests/integration/test_named_collections/test.py @@ -780,6 +780,9 @@ def test_keeper_storage_remove_on_cluster(cluster, ignore, expected_raise): node.query("SYSTEM RELOAD CONFIG") with expected_raise: + node.query( + "DROP NAMED COLLECTION IF EXISTS test_nc ON CLUSTER `replicated_nc_nodes_cluster`" + ) node.query( f"CREATE NAMED COLLECTION test_nc ON CLUSTER `replicated_nc_nodes_cluster` AS key1=1, key2=2 OVERRIDABLE" ) From ba176a9c4aa420a824571e818f74d370e06ec1ba Mon Sep 17 00:00:00 2001 From: Nikita Fomichev Date: Wed, 10 Jul 2024 14:51:27 +0200 Subject: [PATCH 209/299] debug tests hung on gdb error 6 --- docker/test/stateless/run.sh | 4 ++-- tests/clickhouse-test | 10 +++++++++- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/docker/test/stateless/run.sh b/docker/test/stateless/run.sh index b48b0731d92..a3f8f9eea60 100755 --- a/docker/test/stateless/run.sh +++ b/docker/test/stateless/run.sh @@ -310,7 +310,7 @@ function run_tests() try_run_with_retry 10 clickhouse-client -q "insert into system.zookeeper (name, path, value) values ('auxiliary_zookeeper2', '/test/chroot/', '')" set +e - timeout -s TERM --preserve-status 120m clickhouse-test --testname --shard --zookeeper --check-zookeeper-session --hung-check --print-time \ + clickhouse-test --sequential --testname --shard --zookeeper --check-zookeeper-session --hung-check --print-time \ --no-drop-if-fail --test-runs "$NUM_TRIES" "${ADDITIONAL_OPTIONS[@]}" 01710_aggregate_projections 01035_avg_weighted_long 00735_long_conditional 02911_join_on_nullsafe_optimization 02911_backup_restore_keeper_map 02552_siphash128_reference 2>&1 \ | ts '%Y-%m-%d %H:%M:%S' \ | tee -a test_output/test_result.txt @@ -321,7 +321,7 @@ export -f run_tests # This should be enough to setup job and collect artifacts -TIMEOUT=$((MAX_RUN_TIME - 300)) +TIMEOUT=$((MAX_RUN_TIME - 600)) if [ "$NUM_TRIES" -gt "1" ]; then # We don't run tests with Ordinary database in PRs, only in master. # So run new/changed tests with Ordinary at least once in flaky check. diff --git a/tests/clickhouse-test b/tests/clickhouse-test index a90ffb79344..c735fc3bb04 100755 --- a/tests/clickhouse-test +++ b/tests/clickhouse-test @@ -2507,7 +2507,15 @@ def do_run_tests(jobs, test_suite: TestSuite): ) future_seq.wait() - future.wait() + while not future.ready(): + sleep(0.1) + if server_died.is_set(): + print("== Server died") + sleep(5) + pool.terminate() + pool.close() + + # future.wait() print("future wait DONE") finally: print("pool.terminate") From 211994333f363597885e8ac4d083e07ade1ec79c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A1nos=20Benjamin=20Antal?= Date: Wed, 10 Jul 2024 14:56:28 +0200 Subject: [PATCH 210/299] Fix invalid XML --- tests/performance/replaceRegexp_fallback.xml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/performance/replaceRegexp_fallback.xml b/tests/performance/replaceRegexp_fallback.xml index 926e66c702f..15dddf9cdac 100644 --- a/tests/performance/replaceRegexp_fallback.xml +++ b/tests/performance/replaceRegexp_fallback.xml @@ -1,12 +1,12 @@ -> -> + + - > + WITH 'Many years later as he faced the firing squad, Colonel Aureliano Buendia was to remember that distant afternoon when his father took him to discover ice.' AS s SELECT replaceRegexpAll(materialize(s), ' ', '\n') AS w FROM numbers(5000000) FORMAT Null WITH 'Many years later as he faced the firing squad, Colonel Aureliano Buendia was to remember that distant afternoon when his father took him to discover ice.' AS s SELECT replaceRegexpOne(materialize(s), ' ', '\n') AS w FROM numbers(5000000) FORMAT Null - > - > + + WITH 'Many years later as he faced the firing squad, Colonel Aureliano Buendia was to remember that distant afternoon when his father took him to discover ice.' AS s SELECT replaceRegexpAll(materialize(s), '\s+', '\\0\n') AS w FROM numbers(500000) FORMAT Null WITH 'Many years later as he faced the firing squad, Colonel Aureliano Buendia was to remember that distant afternoon when his father took him to discover ice.' AS s SELECT replaceRegexpOne(materialize(s), '\s+', '\\0\n') AS w FROM numbers(500000) FORMAT Null From ccffc056818c70cf16531f680ae7531bca1b376e Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Wed, 10 Jul 2024 15:45:17 +0200 Subject: [PATCH 211/299] Fix asan --- docker/test/stateless/utils.lib | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docker/test/stateless/utils.lib b/docker/test/stateless/utils.lib index 90efb5b1a8e..c3bb8ae9ea4 100644 --- a/docker/test/stateless/utils.lib +++ b/docker/test/stateless/utils.lib @@ -2,6 +2,8 @@ # core.COMM.PID-TID sysctl kernel.core_pattern='core.%e.%p-%P' +# ASAN doesn't work with suid_dumpable=2 +sysctl fs.suid_dumpable=1 function run_with_retry() { From 83cfe164bdab9b73741cf3e1c72d727fe65c1f97 Mon Sep 17 00:00:00 2001 From: Nikita Fomichev Date: Wed, 10 Jul 2024 15:55:30 +0200 Subject: [PATCH 212/299] debug tests hung on gdb error 7 --- docker/test/stateless/run.sh | 4 ++-- tests/clickhouse-test | 6 ++---- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/docker/test/stateless/run.sh b/docker/test/stateless/run.sh index a3f8f9eea60..406ee75ff53 100755 --- a/docker/test/stateless/run.sh +++ b/docker/test/stateless/run.sh @@ -282,7 +282,7 @@ function run_tests() else # All other configurations are OK. ADDITIONAL_OPTIONS+=('--jobs') - ADDITIONAL_OPTIONS+=('5') + ADDITIONAL_OPTIONS+=('1') fi if [[ "$RUN_SEQUENTIAL_TESTS_IN_PARALLEL" -eq 1 ]]; then @@ -310,7 +310,7 @@ function run_tests() try_run_with_retry 10 clickhouse-client -q "insert into system.zookeeper (name, path, value) values ('auxiliary_zookeeper2', '/test/chroot/', '')" set +e - clickhouse-test --sequential --testname --shard --zookeeper --check-zookeeper-session --hung-check --print-time \ + clickhouse-test --testname --shard --zookeeper --check-zookeeper-session --hung-check --print-time \ --no-drop-if-fail --test-runs "$NUM_TRIES" "${ADDITIONAL_OPTIONS[@]}" 01710_aggregate_projections 01035_avg_weighted_long 00735_long_conditional 02911_join_on_nullsafe_optimization 02911_backup_restore_keeper_map 02552_siphash128_reference 2>&1 \ | ts '%Y-%m-%d %H:%M:%S' \ | tee -a test_output/test_result.txt diff --git a/tests/clickhouse-test b/tests/clickhouse-test index c735fc3bb04..27960b56ae3 100755 --- a/tests/clickhouse-test +++ b/tests/clickhouse-test @@ -2510,12 +2510,10 @@ def do_run_tests(jobs, test_suite: TestSuite): while not future.ready(): sleep(0.1) if server_died.is_set(): - print("== Server died") + print("== Server died ==") sleep(5) - pool.terminate() - pool.close() + break - # future.wait() print("future wait DONE") finally: print("pool.terminate") From e6e8b065bce976becee39e664b77f4b7ef20adf1 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Wed, 10 Jul 2024 15:58:42 +0200 Subject: [PATCH 213/299] Fix possible deadlock for jemalloc with enabled profiler ClickHouse uses JEMALLOC_PROF_LIBGCC - _Unwind_Backtrace() for jemalloc, which calls _Unwind_Backtrace() during bootstrap of jemalloc, and if, dlsym() has allocations it will lead to deadlock: 0 __lll_lock_wait (futex=futex@entry=0x16516410 , private=0) at lowlevellock.c:52 1 0x00007ffff7f9d0a3 in __GI___pthread_mutex_lock (mutex=0x16516410 ) at ../nptl/pthread_mutex_lock.c:80 2 0x000000000ba6ec23 in pthread_mutex_lock (arg=arg@entry=0x16516410 ) at ./build/RelWithDebInfo/./src/Common/ThreadFuzzer.cpp:445 3 0x0000000014215fd4 in malloc_mutex_lock_final (mutex=0x165163d0 ) at ./contrib/jemalloc/include/jemalloc/internal/mutex.h:151 4 malloc_mutex_lock_slow (mutex=0x165163d0 ) at ./build/RelWithDebInfo/./contrib/jemalloc/src/mutex.c:90 5 0x00000000141defc6 in malloc_mutex_lock (tsdn=0x0, mutex=) at ./contrib/jemalloc/include/jemalloc/internal/mutex.h:217 6 malloc_init_hard () at ./build/RelWithDebInfo/./contrib/jemalloc/src/jemalloc.c:2118 7 0x00000000141d05db in malloc_init () at ./build/RelWithDebInfo/./contrib/jemalloc/src/jemalloc.c:298 8 imalloc_init_check (sopts=, dopts=) at ./build/RelWithDebInfo/./contrib/jemalloc/src/jemalloc.c:2658 9 imalloc (sopts=, dopts=) at ./build/RelWithDebInfo/./contrib/jemalloc/src/jemalloc.c:2689 10 calloc (num=num@entry=1, size=size@entry=32) at ./build/RelWithDebInfo/./contrib/jemalloc/src/jemalloc.c:2852 11 0x00007ffff7c42c05 in _dlerror_run (operate=operate@entry=0x7ffff7c42490 , args=args@entry=0x7fffffffda40) at dlerror.c:148 12 0x00007ffff7c42525 in __dlsym (handle=, name=0xe7f568 "dl_iterate_phdr") at dlsym.c:70 13 0x000000001408e1a0 in (anonymous namespace)::getOriginalDLIteratePHDR () at ./build/RelWithDebInfo/./base/base/phdr_cache.cpp:44 14 dl_iterate_phdr (callback=0x16287a60 , data=0x7fffffffdb58) at ./build/RelWithDebInfo/./base/base/phdr_cache.cpp:64 15 0x0000000016288468 in libunwind::LocalAddressSpace::findUnwindSections (this=, targetAddr=371765377, info=...) at ./contrib/libunwind/src/AddressSpace.hpp:605 16 libunwind::UnwindCursor::setInfoBasedOnIPRegister (this=this@entry=0x7fffffffde70, isReturnAddress=false) at ./contrib/libunwind/src/UnwindCursor.hpp:2553 17 0x0000000016287617 in __unw_init_local (cursor=cursor@entry=0x7fffffffde70, context=context@entry=0x7fffffffddc8) at ./build/RelWithDebInfo/./contrib/libunwind/src/libunwind.cpp:91 18 0x000000001628b094 in _Unwind_Backtrace (callback=0x14226f40 , ref=0x0) at ./build/RelWithDebInfo/./contrib/libunwind/src/UnwindLevel1-gcc-ext.c:106 19 0x000000001421a7e0 in prof_boot2 (tsd=tsd@entry=0x7ffff7c357c0, base=) at ./build/RelWithDebInfo/./contrib/jemalloc/src/prof.c:693 20 0x00000000141df3e7 in malloc_init_hard () at ./build/RelWithDebInfo/./contrib/jemalloc/src/jemalloc.c:2151 21 0x00000000141d96f1 in malloc_init () at ./build/RelWithDebInfo/./contrib/jemalloc/src/jemalloc.c:298 22 nallocx (size=32, flags=0) at ./build/RelWithDebInfo/./contrib/jemalloc/src/jemalloc.c:4011 23 0x000000000b9fdd9b in _ZN6Memory23getActualAllocationSizeITpTkNSt3__17same_asISt11align_val_tEEJEQsr2DBE16OptionalArgumentIDpT_EEEmmS5_ (size=32) at ./src/Common/memory.h:197 24 _ZN6Memory11trackMemoryITpTkNSt3__17same_asISt11align_val_tEEJEQsr2DBE16OptionalArgumentIDpT_EEEmmR15AllocationTraceS5_ (size=32, trace=...) at ./src/Common/memory.h:208 25 operator new (size=size@entry=32) at ./build/RelWithDebInfo/./src/Common/new_delete.cpp:53 26 0x000000001540e0a6 in google::protobuf::internal::ShutdownData::get () at ./build/RelWithDebInfo/./contrib/google-protobuf/src/google/protobuf/message_lite.cc:679 27 google::protobuf::internal::OnShutdownRun (f=0x15407420 , arg=0x167a6368 ) at ./build/RelWithDebInfo/./contrib/google-protobuf/src/google/protobuf/message_lite.cc:697 28 0x0000000015407560 in global constructors keyed to 000101 () at ./contrib/google-protobuf/src/google/protobuf/generated_message_util.h:180 29 0x000000001629101d in __libc_csu_init () 30 0x00007ffff7dba010 in __libc_start_main (main=0x66e3b00 , argc=4, argv=0x7fffffffe258, init=0x16290fd0 <__libc_csu_init>, fini=, rtld_fini=, stack_end=0x7fffffffe248) at ../csu/libc-start.c:264 31 0x00000000066e302e in _start () And this is indeed what happens for glibc prio 2.34, since only in [2] the allocation had been removed [2]: https://sourceware.org/git/?p=glibc.git;a=commit;h=fada9018199c21c469ff0e731ef75c6020074ac9 But, jemalloc provides also JEMALLOC_PROF_LIBUNWIND, yes it is not llvm libunwind, but gnu one, but ClickHouse version of llvm libunwind provides unw_backtrace(), so we can use it, and it will not be called during jemalloc bootstrap, so deadlock should not be possible. Funny thing that for this deadlock the workaround is to enable background_thread, because a thread created for it will initialize TLS for __dlerror(). Signed-off-by: Azat Khuzhin --- contrib/jemalloc-cmake/CMakeLists.txt | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/contrib/jemalloc-cmake/CMakeLists.txt b/contrib/jemalloc-cmake/CMakeLists.txt index 38ebcc8f680..1fbfd29a3bd 100644 --- a/contrib/jemalloc-cmake/CMakeLists.txt +++ b/contrib/jemalloc-cmake/CMakeLists.txt @@ -179,12 +179,19 @@ endif () target_compile_definitions(_jemalloc PRIVATE -DJEMALLOC_PROF=1) -# jemalloc provides support for two different libunwind flavors: the original HP libunwind and the one coming with gcc / g++ / libstdc++. -# The latter is identified by `JEMALLOC_PROF_LIBGCC` and uses `_Unwind_Backtrace` method instead of `unw_backtrace`. -# At the time ClickHouse uses LLVM libunwind which follows libgcc's way of backtracking. +# jemalloc provides support two unwind flavors: +# - JEMALLOC_PROF_LIBUNWIND - unw_backtrace() - gnu libunwind (compatible with llvm libunwind) +# - JEMALLOC_PROF_LIBGCC - _Unwind_Backtrace() - the original HP libunwind and the one coming with gcc / g++ / libstdc++. # -# ClickHouse has to provide `unw_backtrace` method by the means of [commit 8e2b31e](https://github.com/ClickHouse/libunwind/commit/8e2b31e766dd502f6df74909e04a7dbdf5182eb1). -target_compile_definitions (_jemalloc PRIVATE -DJEMALLOC_PROF_LIBGCC=1) +# But for JEMALLOC_PROF_LIBGCC it also calls _Unwind_Backtrace() during +# bootstraping of jemalloc, which may lead to deadlock, if the dlsym will do +# allocations somewhere (like glibc does prio 2.34, see [1]). +# +# [1]: https://sourceware.org/git/?p=glibc.git;a=commit;h=fada9018199c21c469ff0e731ef75c6020074ac9 +# +# And since ClickHouse unwind already supports unw_backtrace() we can safely +# switch to it to avoid this deadlock. +target_compile_definitions (_jemalloc PRIVATE -DJEMALLOC_PROF_LIBUNWIND=1) target_link_libraries (_jemalloc PRIVATE unwind) # for RTLD_NEXT From b7964bc13ea59b1d812b8a453ddaada4f453af0d Mon Sep 17 00:00:00 2001 From: Thom O'Connor Date: Wed, 10 Jul 2024 14:34:54 +0000 Subject: [PATCH 214/299] Update kill.md Including extra verbiage for ON CLUSTER required in ClickHouse Cloud or self-managed clusters --- docs/en/sql-reference/statements/kill.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/docs/en/sql-reference/statements/kill.md b/docs/en/sql-reference/statements/kill.md index b665ad85a09..6e18ace10c7 100644 --- a/docs/en/sql-reference/statements/kill.md +++ b/docs/en/sql-reference/statements/kill.md @@ -58,6 +58,8 @@ KILL QUERY WHERE query_id='2-857d-4a57-9ee0-327da5d60a90' KILL QUERY WHERE user='username' SYNC ``` +:::tip If you are killing a query in ClickHouse Cloud or in a self-managed cluster, then be sure to use the ```ON CLUSTER [cluster-name]``` option, in order to ensure the query is killed on all replicas::: + Read-only users can only stop their own queries. By default, the asynchronous version of queries is used (`ASYNC`), which does not wait for confirmation that queries have stopped. @@ -131,6 +133,7 @@ KILL MUTATION WHERE database = 'default' AND table = 'table' -- Cancel the specific mutation: KILL MUTATION WHERE database = 'default' AND table = 'table' AND mutation_id = 'mutation_3.txt' ``` +:::tip If you are killing a mutation in ClickHouse Cloud or in a self-managed cluster, then be sure to use the ```ON CLUSTER [cluster-name]``` option, in order to ensure the mutation is killed on all replicas::: The query is useful when a mutation is stuck and cannot finish (e.g. if some function in the mutation query throws an exception when applied to the data contained in the table). From 26627605727518710cb6f1a345d3e4e8da2ac0c9 Mon Sep 17 00:00:00 2001 From: Nikita Fomichev Date: Wed, 10 Jul 2024 16:38:54 +0200 Subject: [PATCH 215/299] debug tests hung on gdb error 8 --- docker/test/stateless/run.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docker/test/stateless/run.sh b/docker/test/stateless/run.sh index 406ee75ff53..1e9eba94c4b 100755 --- a/docker/test/stateless/run.sh +++ b/docker/test/stateless/run.sh @@ -282,7 +282,7 @@ function run_tests() else # All other configurations are OK. ADDITIONAL_OPTIONS+=('--jobs') - ADDITIONAL_OPTIONS+=('1') + ADDITIONAL_OPTIONS+=('5') fi if [[ "$RUN_SEQUENTIAL_TESTS_IN_PARALLEL" -eq 1 ]]; then @@ -311,7 +311,7 @@ function run_tests() set +e clickhouse-test --testname --shard --zookeeper --check-zookeeper-session --hung-check --print-time \ - --no-drop-if-fail --test-runs "$NUM_TRIES" "${ADDITIONAL_OPTIONS[@]}" 01710_aggregate_projections 01035_avg_weighted_long 00735_long_conditional 02911_join_on_nullsafe_optimization 02911_backup_restore_keeper_map 02552_siphash128_reference 2>&1 \ + --no-drop-if-fail --test-runs "$NUM_TRIES" "${ADDITIONAL_OPTIONS[@]}" 2>&1 \ | ts '%Y-%m-%d %H:%M:%S' \ | tee -a test_output/test_result.txt set -e From 7cff918a8348afdce60650655200bbee7a8dee00 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Wed, 10 Jul 2024 17:04:07 +0200 Subject: [PATCH 216/299] Fix test_parallel_replicas_custom_key --- tests/integration/test_parallel_replicas_custom_key/test.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/integration/test_parallel_replicas_custom_key/test.py b/tests/integration/test_parallel_replicas_custom_key/test.py index affa3f32cbe..375fe58d741 100644 --- a/tests/integration/test_parallel_replicas_custom_key/test.py +++ b/tests/integration/test_parallel_replicas_custom_key/test.py @@ -161,6 +161,9 @@ def test_parallel_replicas_custom_key_replicatedmergetree( insert_data("test_table_for_rmt", row_num, all_nodes=False) + for node in nodes: + node.query("SYSTEM SYNC REPLICA test_table_for_rmt LIGHTWEIGHT") + expected_result = "" for i in range(4): expected_result += f"{i}\t250\n" From e6b3368dc152059dd0b5367d1279585203c150f9 Mon Sep 17 00:00:00 2001 From: Nikita Fomichev Date: Wed, 10 Jul 2024 17:09:50 +0200 Subject: [PATCH 217/299] debug tests hung on gdb error 9 --- tests/clickhouse-test | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/clickhouse-test b/tests/clickhouse-test index 27960b56ae3..af4f60a50c5 100755 --- a/tests/clickhouse-test +++ b/tests/clickhouse-test @@ -2189,10 +2189,6 @@ def run_tests_array(all_tests_with_params: Tuple[List[str], int, TestSuite, bool sys.stdout.flush() while True: - sys.stdout.flush() - sys.stdout.write(test_case.name) - sys.stdout.flush() - test_result = test_case.run( args, test_suite, client_options, server_logs_level ) @@ -2505,7 +2501,11 @@ def do_run_tests(jobs, test_suite: TestSuite): ) ], ) - future_seq.wait() + while not future_seq.ready(): + sleep(0.1) + if server_died.is_set(): + sleep(5) + break while not future.ready(): sleep(0.1) From e5804ae5cd64e5a2972d9d5039a8d2df0f9f70b3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A1nos=20Benjamin=20Antal?= Date: Wed, 10 Jul 2024 17:28:10 +0200 Subject: [PATCH 218/299] Use `numbers_mt` to reduce duration of tests --- tests/performance/replaceRegexp_fallback.xml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/performance/replaceRegexp_fallback.xml b/tests/performance/replaceRegexp_fallback.xml index 15dddf9cdac..509257efeb5 100644 --- a/tests/performance/replaceRegexp_fallback.xml +++ b/tests/performance/replaceRegexp_fallback.xml @@ -2,11 +2,11 @@ - WITH 'Many years later as he faced the firing squad, Colonel Aureliano Buendia was to remember that distant afternoon when his father took him to discover ice.' AS s SELECT replaceRegexpAll(materialize(s), ' ', '\n') AS w FROM numbers(5000000) FORMAT Null - WITH 'Many years later as he faced the firing squad, Colonel Aureliano Buendia was to remember that distant afternoon when his father took him to discover ice.' AS s SELECT replaceRegexpOne(materialize(s), ' ', '\n') AS w FROM numbers(5000000) FORMAT Null + WITH 'Many years later as he faced the firing squad, Colonel Aureliano Buendia was to remember that distant afternoon when his father took him to discover ice.' AS s SELECT replaceRegexpAll(materialize(s), ' ', '\n') AS w FROM numbers_mt(5000000) FORMAT Null + WITH 'Many years later as he faced the firing squad, Colonel Aureliano Buendia was to remember that distant afternoon when his father took him to discover ice.' AS s SELECT replaceRegexpOne(materialize(s), ' ', '\n') AS w FROM numbers_mt(5000000) FORMAT Null - WITH 'Many years later as he faced the firing squad, Colonel Aureliano Buendia was to remember that distant afternoon when his father took him to discover ice.' AS s SELECT replaceRegexpAll(materialize(s), '\s+', '\\0\n') AS w FROM numbers(500000) FORMAT Null - WITH 'Many years later as he faced the firing squad, Colonel Aureliano Buendia was to remember that distant afternoon when his father took him to discover ice.' AS s SELECT replaceRegexpOne(materialize(s), '\s+', '\\0\n') AS w FROM numbers(500000) FORMAT Null + WITH 'Many years later as he faced the firing squad, Colonel Aureliano Buendia was to remember that distant afternoon when his father took him to discover ice.' AS s SELECT replaceRegexpAll(materialize(s), '\s+', '\\0\n') AS w FROM numbers_mt(500000) FORMAT Null + WITH 'Many years later as he faced the firing squad, Colonel Aureliano Buendia was to remember that distant afternoon when his father took him to discover ice.' AS s SELECT replaceRegexpOne(materialize(s), '\s+', '\\0\n') AS w FROM numbers_mt(500000) FORMAT Null From 481a1570aab5c2cfaf868011ab68d4d16fff9d4e Mon Sep 17 00:00:00 2001 From: Nikita Fomichev Date: Wed, 10 Jul 2024 19:16:16 +0200 Subject: [PATCH 219/299] Run sequential tests in parallel to non-sequential --- docker/test/stateless/run.sh | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/docker/test/stateless/run.sh b/docker/test/stateless/run.sh index 1e9eba94c4b..5747ead7986 100755 --- a/docker/test/stateless/run.sh +++ b/docker/test/stateless/run.sh @@ -12,8 +12,7 @@ MAX_RUN_TIME=$((MAX_RUN_TIME == 0 ? 10800 : MAX_RUN_TIME)) USE_DATABASE_REPLICATED=${USE_DATABASE_REPLICATED:=0} USE_SHARED_CATALOG=${USE_SHARED_CATALOG:=0} -# disable for now -RUN_SEQUENTIAL_TESTS_IN_PARALLEL=0 +RUN_SEQUENTIAL_TESTS_IN_PARALLEL=1 if [[ "$USE_DATABASE_REPLICATED" -eq 1 ]] || [[ "$USE_SHARED_CATALOG" -eq 1 ]]; then RUN_SEQUENTIAL_TESTS_IN_PARALLEL=0 From 4729161f4187e887e10757731b2381673234b43a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A1nos=20Benjamin=20Antal?= Date: Wed, 10 Jul 2024 18:41:48 +0000 Subject: [PATCH 220/299] Collect logs from minio --- docker/test/stateful/run.sh | 5 +++++ docker/test/stateless/run.sh | 6 ++++++ 2 files changed, 11 insertions(+) diff --git a/docker/test/stateful/run.sh b/docker/test/stateful/run.sh index 2215ac2b37c..ce637b9a146 100755 --- a/docker/test/stateful/run.sh +++ b/docker/test/stateful/run.sh @@ -20,7 +20,10 @@ ln -s /usr/share/clickhouse-test/clickhouse-test /usr/bin/clickhouse-test /usr/share/clickhouse-test/config/install.sh azurite-blob --blobHost 0.0.0.0 --blobPort 10000 --silent --inMemoryPersistence & + ./setup_minio.sh stateful +mc admin trace clickminio > /test_output/rubbish.log & +MC_ADMIN_PID=$! config_logs_export_cluster /etc/clickhouse-server/config.d/system_logs_export.yaml @@ -251,6 +254,8 @@ if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]] sudo clickhouse stop --pid-path /var/run/clickhouse-server2 ||: fi +# Kill minio admin client to stop collecting logs +kill $MC_ADMIN_PID rg -Fa "" /var/log/clickhouse-server/clickhouse-server.log ||: zstd --threads=0 < /var/log/clickhouse-server/clickhouse-server.log > /test_output/clickhouse-server.log.zst ||: diff --git a/docker/test/stateless/run.sh b/docker/test/stateless/run.sh index 029c5a03151..a01abe5929f 100755 --- a/docker/test/stateless/run.sh +++ b/docker/test/stateless/run.sh @@ -46,6 +46,9 @@ source /utils.lib /usr/share/clickhouse-test/config/install.sh ./setup_minio.sh stateless +mc admin trace clickminio > /test_output/rubbish.log & +MC_ADMIN_PID=$! + ./setup_hdfs_minicluster.sh config_logs_export_cluster /etc/clickhouse-server/config.d/system_logs_export.yaml @@ -325,6 +328,9 @@ if [[ -n "$USE_SHARED_CATALOG" ]] && [[ "$USE_SHARED_CATALOG" -eq 1 ]]; then sudo clickhouse stop --pid-path /var/run/clickhouse-server1 ||: fi +# Kill minio admin client to stop collecting logs +kill $MC_ADMIN_PID + rg -Fa "" /var/log/clickhouse-server/clickhouse-server.log ||: rg -A50 -Fa "============" /var/log/clickhouse-server/stderr.log ||: zstd --threads=0 < /var/log/clickhouse-server/clickhouse-server.log > /test_output/clickhouse-server.log.zst & From 9c0610ec2bf454ba4740a2117fb5b0d03510607f Mon Sep 17 00:00:00 2001 From: Blargian Date: Wed, 10 Jul 2024 21:27:15 +0200 Subject: [PATCH 221/299] add remaining window functions --- .../window-functions/first_value.md | 72 +++++++++++++++++++ .../sql-reference/window-functions/index.md | 4 +- .../window-functions/last_value.md | 72 +++++++++++++++++++ .../window-functions/leadInFrame.md | 2 +- .../window-functions/nth_value.md | 24 +++---- .../en/sql-reference/window-functions/rank.md | 2 +- .../window-functions/row_number.md | 2 +- 7 files changed, 161 insertions(+), 17 deletions(-) create mode 100644 docs/en/sql-reference/window-functions/first_value.md create mode 100644 docs/en/sql-reference/window-functions/last_value.md diff --git a/docs/en/sql-reference/window-functions/first_value.md b/docs/en/sql-reference/window-functions/first_value.md new file mode 100644 index 00000000000..575a6fc3f48 --- /dev/null +++ b/docs/en/sql-reference/window-functions/first_value.md @@ -0,0 +1,72 @@ +--- +slug: /en/sql-reference/window-functions/first_value +sidebar_label: first_value +sidebar_position: 3 +--- + +# first_value + +Returns the first non-NULL value evaluated within its ordered frame. + +**Syntax** + +```sql +first_value (column_name) + OVER ([[PARTITION BY grouping_column] [ORDER BY sorting_column] + [ROWS or RANGE expression_to_bound_rows_withing_the_group]] | [window_name]) +FROM table_name +WINDOW window_name as ([[PARTITION BY grouping_column] [ORDER BY sorting_column]) +``` + +For more detail on window function syntax see: [Window Functions - Syntax](./index.md/#syntax). + +**Returned value** + +- The first non-NULL value evaluated within its ordered frame. + +**Example** + +In this example the `first_value` function is used to find the highest paid footballer from a fictional dataset of salaries of Premier League football players. + +Query: + +```sql +DROP TABLE IF EXISTS salaries; +CREATE TABLE salaries +( + `team` String, + `player` String, + `salary` UInt32, + `position` String +) +Engine = Memory; + +INSERT INTO salaries FORMAT Values + ('Port Elizabeth Barbarians', 'Gary Chen', 196000, 'F'), + ('New Coreystad Archdukes', 'Charles Juarez', 190000, 'F'), + ('Port Elizabeth Barbarians', 'Michael Stanley', 100000, 'D'), + ('New Coreystad Archdukes', 'Scott Harrison', 180000, 'D'), + ('Port Elizabeth Barbarians', 'Robert George', 195000, 'M'), + ('South Hampton Seagulls', 'Douglas Benson', 150000, 'M'), + ('South Hampton Seagulls', 'James Henderson', 140000, 'M'); +``` + +```sql +SELECT player, salary, + first_value(player) OVER (ORDER BY salary DESC) AS highest_paid_player +FROM salaries; +``` + +Result: + +```response + ┌─player──────────┬─salary─┬─highest_paid_player─┐ +1. │ Gary Chen │ 196000 │ Gary Chen │ +2. │ Robert George │ 195000 │ Gary Chen │ +3. │ Charles Juarez │ 190000 │ Gary Chen │ +4. │ Scott Harrison │ 180000 │ Gary Chen │ +5. │ Douglas Benson │ 150000 │ Gary Chen │ +6. │ James Henderson │ 140000 │ Gary Chen │ +7. │ Michael Stanley │ 100000 │ Gary Chen │ + └─────────────────┴────────┴─────────────────────┘ +``` \ No newline at end of file diff --git a/docs/en/sql-reference/window-functions/index.md b/docs/en/sql-reference/window-functions/index.md index ee54a679ba1..d18dbcc189d 100644 --- a/docs/en/sql-reference/window-functions/index.md +++ b/docs/en/sql-reference/window-functions/index.md @@ -76,8 +76,8 @@ WINDOW window_name as ([[PARTITION BY grouping_column] [ORDER BY sorting_column] These functions can be used only as a window function. - [`row_number()`](./row_number.md) - Number the current row within its partition starting from 1. -- `first_value(x)` - Return the first non-NULL value evaluated within its ordered frame. -- `last_value(x)` - Return the last non-NULL value evaluated within its ordered frame. +- [`first_value(x)`](./first_value.md) - Return the first non-NULL value evaluated within its ordered frame. +- [`last_value(x)`](./last_value.md) - Return the last non-NULL value evaluated within its ordered frame. - [`nth_value(x, offset)`](./nth_value.md) - Return the first non-NULL value evaluated against the nth row (offset) in its ordered frame. - [`rank()`](./rank.md) - Rank the current row within its partition with gaps. - [`dense_rank()`](./dense_rank.md) - Rank the current row within its partition without gaps. diff --git a/docs/en/sql-reference/window-functions/last_value.md b/docs/en/sql-reference/window-functions/last_value.md new file mode 100644 index 00000000000..098ee81ceb3 --- /dev/null +++ b/docs/en/sql-reference/window-functions/last_value.md @@ -0,0 +1,72 @@ +--- +slug: /en/sql-reference/window-functions/lagInFrame +sidebar_label: lagInFrame +sidebar_position: 4 +--- + +# first_value + +Return the last non-NULL value evaluated within its ordered frame. + +**Syntax** + +```sql +first_value (column_name) + OVER ([[PARTITION BY grouping_column] [ORDER BY sorting_column] + [ROWS or RANGE expression_to_bound_rows_withing_the_group]] | [window_name]) +FROM table_name +WINDOW window_name as ([[PARTITION BY grouping_column] [ORDER BY sorting_column]) +``` + +For more detail on window function syntax see: [Window Functions - Syntax](./index.md/#syntax). + +**Returned value** + +- The last non-NULL value evaluated within its ordered frame. + +**Example** + +In this example the `last_value` function is used to find the highest paid footballer from a fictional dataset of salaries of Premier League football players. + +Query: + +```sql +DROP TABLE IF EXISTS salaries; +CREATE TABLE salaries +( + `team` String, + `player` String, + `salary` UInt32, + `position` String +) +Engine = Memory; + +INSERT INTO salaries FORMAT Values + ('Port Elizabeth Barbarians', 'Gary Chen', 196000, 'F'), + ('New Coreystad Archdukes', 'Charles Juarez', 190000, 'F'), + ('Port Elizabeth Barbarians', 'Michael Stanley', 100000, 'D'), + ('New Coreystad Archdukes', 'Scott Harrison', 180000, 'D'), + ('Port Elizabeth Barbarians', 'Robert George', 195000, 'M'), + ('South Hampton Seagulls', 'Douglas Benson', 150000, 'M'), + ('South Hampton Seagulls', 'James Henderson', 140000, 'M'); +``` + +```sql +SELECT player, salary, + last_value(player) OVER (ORDER BY salary DESC RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS lowest_paid_player +FROM salaries; +``` + +Result: + +```response + ┌─player──────────┬─salary─┬─lowest_paid_player─┐ +1. │ Gary Chen │ 196000 │ Michael Stanley │ +2. │ Robert George │ 195000 │ Michael Stanley │ +3. │ Charles Juarez │ 190000 │ Michael Stanley │ +4. │ Scott Harrison │ 180000 │ Michael Stanley │ +5. │ Douglas Benson │ 150000 │ Michael Stanley │ +6. │ James Henderson │ 140000 │ Michael Stanley │ +7. │ Michael Stanley │ 100000 │ Michael Stanley │ + └─────────────────┴────────┴────────────────────┘ +``` \ No newline at end of file diff --git a/docs/en/sql-reference/window-functions/leadInFrame.md b/docs/en/sql-reference/window-functions/leadInFrame.md index 0cb4eea52b2..33f69c0dcae 100644 --- a/docs/en/sql-reference/window-functions/leadInFrame.md +++ b/docs/en/sql-reference/window-functions/leadInFrame.md @@ -1,7 +1,7 @@ --- slug: /en/sql-reference/window-functions/leadInFrame sidebar_label: leadInFrame -sidebar_position: 4 +sidebar_position: 5 --- # leadInFrame diff --git a/docs/en/sql-reference/window-functions/nth_value.md b/docs/en/sql-reference/window-functions/nth_value.md index 26c90110aaa..5c430707009 100644 --- a/docs/en/sql-reference/window-functions/nth_value.md +++ b/docs/en/sql-reference/window-functions/nth_value.md @@ -1,7 +1,7 @@ --- slug: /en/sql-reference/window-functions/leadInFrame sidebar_label: leadInFrame -sidebar_position: 5 +sidebar_position: 6 --- # nth_value @@ -51,7 +51,7 @@ Engine = Memory; INSERT INTO salaries FORMAT Values ('Port Elizabeth Barbarians', 'Gary Chen', 195000, 'F'), ('New Coreystad Archdukes', 'Charles Juarez', 190000, 'F'), - ('Port Elizabeth Barbarians', 'Michael Stanley', 10000, 'D'), + ('Port Elizabeth Barbarians', 'Michael Stanley', 100000, 'D'), ('New Coreystad Archdukes', 'Scott Harrison', 180000, 'D'), ('Port Elizabeth Barbarians', 'Robert George', 195000, 'M'), ('South Hampton Seagulls', 'Douglas Benson', 150000, 'M'), @@ -59,19 +59,19 @@ INSERT INTO salaries FORMAT Values ``` ```sql -SELECT salary, nth_value(salary,3) OVER(ORDER BY salary DESC) FROM salaries GROUP BY salary; +SELECT player, salary, nth_value(player,3) OVER(ORDER BY salary DESC) AS third_highest_salary FROM salaries; ``` Result: ```response - ┌─player──────────┬─salary─┬─rank─┐ -1. │ Gary Chen │ 195000 │ 1 │ -2. │ Robert George │ 195000 │ 1 │ -3. │ Charles Juarez │ 190000 │ 3 │ -4. │ Douglas Benson │ 150000 │ 4 │ -5. │ Michael Stanley │ 150000 │ 4 │ -6. │ Scott Harrison │ 150000 │ 4 │ -7. │ James Henderson │ 140000 │ 7 │ - └─────────────────┴────────┴──────┘ + ┌─player──────────┬─salary─┬─third_highest_salary─┐ +1. │ Gary Chen │ 195000 │ │ +2. │ Robert George │ 195000 │ │ +3. │ Charles Juarez │ 190000 │ Charles Juarez │ +4. │ Scott Harrison │ 180000 │ Charles Juarez │ +5. │ Douglas Benson │ 150000 │ Charles Juarez │ +6. │ James Henderson │ 140000 │ Charles Juarez │ +7. │ Michael Stanley │ 100000 │ Charles Juarez │ + └─────────────────┴────────┴──────────────────────┘ ``` \ No newline at end of file diff --git a/docs/en/sql-reference/window-functions/rank.md b/docs/en/sql-reference/window-functions/rank.md index 9ac99dde6df..d7ed8d79c35 100644 --- a/docs/en/sql-reference/window-functions/rank.md +++ b/docs/en/sql-reference/window-functions/rank.md @@ -1,7 +1,7 @@ --- slug: /en/sql-reference/window-functions/rank sidebar_label: rank -sidebar_position: 6 +sidebar_position: 7 --- # rank diff --git a/docs/en/sql-reference/window-functions/row_number.md b/docs/en/sql-reference/window-functions/row_number.md index e7165d60169..485ca355f12 100644 --- a/docs/en/sql-reference/window-functions/row_number.md +++ b/docs/en/sql-reference/window-functions/row_number.md @@ -1,7 +1,7 @@ --- slug: /en/sql-reference/window-functions/row_number sidebar_label: row_number -sidebar_position: 7 +sidebar_position: 8 --- # row_number From 4d60ff6a91b9d17744a8522e3da0b850215a76d2 Mon Sep 17 00:00:00 2001 From: Blargian Date: Wed, 10 Jul 2024 21:51:14 +0200 Subject: [PATCH 222/299] small updates --- docs/en/sql-reference/window-functions/dense_rank.md | 4 ++-- docs/en/sql-reference/window-functions/lagInFrame.md | 4 ++-- docs/en/sql-reference/window-functions/last_value.md | 10 +++++----- docs/en/sql-reference/window-functions/leadInFrame.md | 4 ++-- docs/en/sql-reference/window-functions/nth_value.md | 10 ++++------ docs/en/sql-reference/window-functions/rank.md | 4 ++-- docs/en/sql-reference/window-functions/row_number.md | 4 ++-- 7 files changed, 19 insertions(+), 21 deletions(-) diff --git a/docs/en/sql-reference/window-functions/dense_rank.md b/docs/en/sql-reference/window-functions/dense_rank.md index 17ab894707e..d6445b68c55 100644 --- a/docs/en/sql-reference/window-functions/dense_rank.md +++ b/docs/en/sql-reference/window-functions/dense_rank.md @@ -1,12 +1,12 @@ --- slug: /en/sql-reference/window-functions/dense_rank sidebar_label: dense_rank -sidebar_position: 2 +sidebar_position: 7 --- # dense_rank -This window function ranks the current row within its partition without gaps. In other words, if the value of any new row encountered is equal to the value of one of the previous rows then it will receive the next successive rank without any gaps in ranking. +Ranks the current row within its partition without gaps. In other words, if the value of any new row encountered is equal to the value of one of the previous rows then it will receive the next successive rank without any gaps in ranking. The [rank](./rank.md) function provides the same behaviour, but with gaps in ranking. diff --git a/docs/en/sql-reference/window-functions/lagInFrame.md b/docs/en/sql-reference/window-functions/lagInFrame.md index b67cf252283..049e095c10f 100644 --- a/docs/en/sql-reference/window-functions/lagInFrame.md +++ b/docs/en/sql-reference/window-functions/lagInFrame.md @@ -1,12 +1,12 @@ --- slug: /en/sql-reference/window-functions/lagInFrame sidebar_label: lagInFrame -sidebar_position: 3 +sidebar_position: 8 --- # lagInFrame -Return a value evaluated at the row that is at a specified physical offset before the current row within the ordered frame. The offset parameter, if not specified, defaults to 1, meaning it will fetch the value from the next row. If the calculated row exceeds the boundaries of the window frame, the specified default value is returned. +Returns a value evaluated at the row that is at a specified physical offset row before the current row within the ordered frame. **Syntax** diff --git a/docs/en/sql-reference/window-functions/last_value.md b/docs/en/sql-reference/window-functions/last_value.md index 098ee81ceb3..99b7ca4f75a 100644 --- a/docs/en/sql-reference/window-functions/last_value.md +++ b/docs/en/sql-reference/window-functions/last_value.md @@ -1,17 +1,17 @@ --- -slug: /en/sql-reference/window-functions/lagInFrame -sidebar_label: lagInFrame +slug: /en/sql-reference/window-functions/last_value +sidebar_label: last_value sidebar_position: 4 --- -# first_value +# last_value -Return the last non-NULL value evaluated within its ordered frame. +Returns the last non-NULL value evaluated within its ordered frame. **Syntax** ```sql -first_value (column_name) +last_value (column_name) OVER ([[PARTITION BY grouping_column] [ORDER BY sorting_column] [ROWS or RANGE expression_to_bound_rows_withing_the_group]] | [window_name]) FROM table_name diff --git a/docs/en/sql-reference/window-functions/leadInFrame.md b/docs/en/sql-reference/window-functions/leadInFrame.md index 33f69c0dcae..fc1b92cc266 100644 --- a/docs/en/sql-reference/window-functions/leadInFrame.md +++ b/docs/en/sql-reference/window-functions/leadInFrame.md @@ -1,12 +1,12 @@ --- slug: /en/sql-reference/window-functions/leadInFrame sidebar_label: leadInFrame -sidebar_position: 5 +sidebar_position: 9 --- # leadInFrame -Return a value evaluated at the row that is offset rows after the current row within the ordered frame. +Returns a value evaluated at the row that is offset rows after the current row within the ordered frame. **Syntax** diff --git a/docs/en/sql-reference/window-functions/nth_value.md b/docs/en/sql-reference/window-functions/nth_value.md index 5c430707009..aa5baf651a8 100644 --- a/docs/en/sql-reference/window-functions/nth_value.md +++ b/docs/en/sql-reference/window-functions/nth_value.md @@ -1,14 +1,12 @@ --- -slug: /en/sql-reference/window-functions/leadInFrame -sidebar_label: leadInFrame -sidebar_position: 6 +slug: /en/sql-reference/window-functions/nth_value +sidebar_label: nth_value +sidebar_position: 5 --- # nth_value -Return the first non-NULL value evaluated against the nth row (offset) in its ordered frame. - -The [dense_rank](./dense_rank.md) function provides the same behaviour but without gaps in ranking. +Returns the first non-NULL value evaluated against the nth row (offset) in its ordered frame. **Syntax** diff --git a/docs/en/sql-reference/window-functions/rank.md b/docs/en/sql-reference/window-functions/rank.md index d7ed8d79c35..dff5e154151 100644 --- a/docs/en/sql-reference/window-functions/rank.md +++ b/docs/en/sql-reference/window-functions/rank.md @@ -1,12 +1,12 @@ --- slug: /en/sql-reference/window-functions/rank sidebar_label: rank -sidebar_position: 7 +sidebar_position: 6 --- # rank -This window function ranks the current row within its partition with gaps. In other words, if the value of any row it encounters is equal to the value of a previous row then it will receive the same rank as that previous row. +Ranks the current row within its partition with gaps. In other words, if the value of any row it encounters is equal to the value of a previous row then it will receive the same rank as that previous row. The rank of the next row is then equal to the rank of the previous row plus a gap equal to the number of times the previous rank was given. The [dense_rank](./dense_rank.md) function provides the same behaviour but without gaps in ranking. diff --git a/docs/en/sql-reference/window-functions/row_number.md b/docs/en/sql-reference/window-functions/row_number.md index 485ca355f12..f1c331f89a3 100644 --- a/docs/en/sql-reference/window-functions/row_number.md +++ b/docs/en/sql-reference/window-functions/row_number.md @@ -1,12 +1,12 @@ --- slug: /en/sql-reference/window-functions/row_number sidebar_label: row_number -sidebar_position: 8 +sidebar_position: 2 --- # row_number -Numbers the current row within its partition starting from 1 +Numbers the current row within its partition starting from 1. **Syntax** From 41633cabb2e055a42db9e8899947358111470cf3 Mon Sep 17 00:00:00 2001 From: Blargian Date: Wed, 10 Jul 2024 22:16:03 +0200 Subject: [PATCH 223/299] Update first_value, last_value with possibility to use RESPECT NULLS --- docs/en/sql-reference/window-functions/first_value.md | 11 +++++++++-- docs/en/sql-reference/window-functions/index.md | 4 ++-- docs/en/sql-reference/window-functions/last_value.md | 11 +++++++++-- 3 files changed, 20 insertions(+), 6 deletions(-) diff --git a/docs/en/sql-reference/window-functions/first_value.md b/docs/en/sql-reference/window-functions/first_value.md index 575a6fc3f48..17ca1cacda8 100644 --- a/docs/en/sql-reference/window-functions/first_value.md +++ b/docs/en/sql-reference/window-functions/first_value.md @@ -6,18 +6,25 @@ sidebar_position: 3 # first_value -Returns the first non-NULL value evaluated within its ordered frame. +Returns the first value evaluated within its ordered frame. By default, NULL arguments are skipped, however the `RESPECT NULLS` modifier can be used to override this behaviour. **Syntax** ```sql -first_value (column_name) +first_value (column_name) [RESPECT NULLS] OVER ([[PARTITION BY grouping_column] [ORDER BY sorting_column] [ROWS or RANGE expression_to_bound_rows_withing_the_group]] | [window_name]) FROM table_name WINDOW window_name as ([[PARTITION BY grouping_column] [ORDER BY sorting_column]) ``` +Alias: `any`. + +:::note +Using the optional modifier `RESPECT NULLS` after `first_value(column_name)` will ensure that `NULL` arguments are not skipped. +See [NULL processing](../aggregate-functions/index.md/#null-processing) for more information. +::: + For more detail on window function syntax see: [Window Functions - Syntax](./index.md/#syntax). **Returned value** diff --git a/docs/en/sql-reference/window-functions/index.md b/docs/en/sql-reference/window-functions/index.md index 712b99992ea..0c3e2ea1cb6 100644 --- a/docs/en/sql-reference/window-functions/index.md +++ b/docs/en/sql-reference/window-functions/index.md @@ -77,8 +77,8 @@ WINDOW window_name as ([[PARTITION BY grouping_column] [ORDER BY sorting_column] These functions can be used only as a window function. - [`row_number()`](./row_number.md) - Number the current row within its partition starting from 1. -- [`first_value(x)`](./first_value.md) - Return the first non-NULL value evaluated within its ordered frame. -- [`last_value(x)`](./last_value.md) - Return the last non-NULL value evaluated within its ordered frame. +- [`first_value(x)`](./first_value.md) - Return the first value evaluated within its ordered frame. +- [`last_value(x)`](./last_value.md) - Return the last value evaluated within its ordered frame. - [`nth_value(x, offset)`](./nth_value.md) - Return the first non-NULL value evaluated against the nth row (offset) in its ordered frame. - [`rank()`](./rank.md) - Rank the current row within its partition with gaps. - [`dense_rank()`](./dense_rank.md) - Rank the current row within its partition without gaps. diff --git a/docs/en/sql-reference/window-functions/last_value.md b/docs/en/sql-reference/window-functions/last_value.md index 99b7ca4f75a..9d1ce81cc57 100644 --- a/docs/en/sql-reference/window-functions/last_value.md +++ b/docs/en/sql-reference/window-functions/last_value.md @@ -6,18 +6,25 @@ sidebar_position: 4 # last_value -Returns the last non-NULL value evaluated within its ordered frame. +Returns the last value evaluated within its ordered frame. By default, NULL arguments are skipped, however the `RESPECT NULLS` modifier can be used to override this behaviour. **Syntax** ```sql -last_value (column_name) +last_value (column_name) [RESPECT NULLS] OVER ([[PARTITION BY grouping_column] [ORDER BY sorting_column] [ROWS or RANGE expression_to_bound_rows_withing_the_group]] | [window_name]) FROM table_name WINDOW window_name as ([[PARTITION BY grouping_column] [ORDER BY sorting_column]) ``` +Alias: `anyLast`. + +:::note +Using the optional modifier `RESPECT NULLS` after `first_value(column_name)` will ensure that `NULL` arguments are not skipped. +See [NULL processing](../aggregate-functions/index.md/#null-processing) for more information. +::: + For more detail on window function syntax see: [Window Functions - Syntax](./index.md/#syntax). **Returned value** From 6f3ae4a6de7762fae8dde45c0fc83ecc1296f271 Mon Sep 17 00:00:00 2001 From: Nikita Fomichev Date: Wed, 10 Jul 2024 23:57:23 +0200 Subject: [PATCH 224/299] Fix path in test --- tests/clickhouse-test | 19 ++----------------- .../02240_tskv_schema_inference_bug.sh | 2 -- 2 files changed, 2 insertions(+), 19 deletions(-) diff --git a/tests/clickhouse-test b/tests/clickhouse-test index af4f60a50c5..958dde0606f 100755 --- a/tests/clickhouse-test +++ b/tests/clickhouse-test @@ -2501,28 +2501,13 @@ def do_run_tests(jobs, test_suite: TestSuite): ) ], ) - while not future_seq.ready(): - sleep(0.1) - if server_died.is_set(): - sleep(5) - break + future_seq.wait() - while not future.ready(): - sleep(0.1) - if server_died.is_set(): - print("== Server died ==") - sleep(5) - break - - print("future wait DONE") + future.wait() finally: - print("pool.terminate") pool.terminate() - print("pool.close") pool.close() - print("pool.join") pool.join() - print("pool.join DONE") if not args.run_sequential_tests_in_parallel: run_tests_array( diff --git a/tests/queries/0_stateless/02240_tskv_schema_inference_bug.sh b/tests/queries/0_stateless/02240_tskv_schema_inference_bug.sh index ce545a27317..d4a4e54acbd 100755 --- a/tests/queries/0_stateless/02240_tskv_schema_inference_bug.sh +++ b/tests/queries/0_stateless/02240_tskv_schema_inference_bug.sh @@ -5,8 +5,6 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh - -USER_FILES_PATH=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') FILE_NAME=test_02240.data DATA_FILE=${USER_FILES_PATH:?}/$FILE_NAME From 22706b89b9927045e463286c53d82c6369f68bf2 Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Thu, 11 Jul 2024 00:10:59 +0200 Subject: [PATCH 225/299] Try to fix links in docs --- docs/en/sql-reference/data-types/dynamic.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/en/sql-reference/data-types/dynamic.md b/docs/en/sql-reference/data-types/dynamic.md index e50f7e6ddaa..8b3c7479f4f 100644 --- a/docs/en/sql-reference/data-types/dynamic.md +++ b/docs/en/sql-reference/data-types/dynamic.md @@ -529,10 +529,10 @@ SELECT JSONExtractKeysAndValues('{"a" : 42, "b" : "Hello", "c" : [1,2,3]}', 'Var ### Binary output format -In [RowBinary](../../interfaces/formats.md#rowbinary-rowbinary) format values of `Dynamic` type are serialized in the following format: +In [RowBinary](/docs/en/interfaces/formats.md#rowbinary-rowbinary) format values of `Dynamic` type are serialized in the following format: ```text ``` -See the [data types binary encoding specification](../../sql-reference/data-types/data-types-binary-encoding.md) +See the [data types binary encoding specification](/docs/en/sql-reference/data-types/data-types-binary-encoding.md) From a9a227ddc614834eb2943a20bf0f2fa722549fec Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Thu, 11 Jul 2024 07:53:54 +0200 Subject: [PATCH 226/299] Fix "Sending a batch of X files to Y (0.00 rows, 0.00 B bytes)." in case of batch restoring Previously it was always zeros. Signed-off-by: Azat Khuzhin --- .../Distributed/DistributedAsyncInsertBatch.cpp | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/Storages/Distributed/DistributedAsyncInsertBatch.cpp b/src/Storages/Distributed/DistributedAsyncInsertBatch.cpp index 06d4c185840..e1facec5b40 100644 --- a/src/Storages/Distributed/DistributedAsyncInsertBatch.cpp +++ b/src/Storages/Distributed/DistributedAsyncInsertBatch.cpp @@ -196,6 +196,16 @@ void DistributedAsyncInsertBatch::readText(ReadBuffer & in) UInt64 idx; in >> idx >> "\n"; files.push_back(std::filesystem::absolute(fmt::format("{}/{}.bin", parent.path, idx)).string()); + + ReadBufferFromFile header_buffer(files.back()); + const DistributedAsyncInsertHeader & header = DistributedAsyncInsertHeader::read(header_buffer, parent.log); + total_bytes += total_bytes; + + if (header.rows) + { + total_rows += header.rows; + total_bytes += header.bytes; + } } recovered = true; From 37c66c8976da45e1984e22e55a3932e02e7937cc Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Thu, 11 Jul 2024 08:04:44 +0200 Subject: [PATCH 227/299] Fix 03030_system_flush_distributed_settings flakiness Signed-off-by: Azat Khuzhin --- .../03030_system_flush_distributed_settings.reference | 2 +- .../03030_system_flush_distributed_settings.sql | 8 +++++--- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/tests/queries/0_stateless/03030_system_flush_distributed_settings.reference b/tests/queries/0_stateless/03030_system_flush_distributed_settings.reference index 5caff40c4a0..3a05c8b3ee8 100644 --- a/tests/queries/0_stateless/03030_system_flush_distributed_settings.reference +++ b/tests/queries/0_stateless/03030_system_flush_distributed_settings.reference @@ -1 +1 @@ -10000 +30000 diff --git a/tests/queries/0_stateless/03030_system_flush_distributed_settings.sql b/tests/queries/0_stateless/03030_system_flush_distributed_settings.sql index ac64135b593..fac673a4fe4 100644 --- a/tests/queries/0_stateless/03030_system_flush_distributed_settings.sql +++ b/tests/queries/0_stateless/03030_system_flush_distributed_settings.sql @@ -6,15 +6,17 @@ drop table if exists dist_out; create table ephemeral (key Int, value Int) engine=Null(); create table dist_in as ephemeral engine=Distributed(test_shard_localhost, currentDatabase(), ephemeral, key) settings background_insert_batch=1; -create table data (key Int, uniq_values Int) engine=Memory(); -create materialized view mv to data as select key, uniqExact(value) uniq_values from ephemeral group by key; +create table data (key Int, uniq_values Int) engine=TinyLog(); +create materialized view mv to data as select key, uniqExact(value::String) uniq_values from ephemeral group by key; system stop distributed sends dist_in; create table dist_out as data engine=Distributed(test_shard_localhost, currentDatabase(), data); set prefer_localhost_replica=0; SET optimize_trivial_insert_select = 1; -insert into dist_in select number/100, number from system.numbers limit 1e6 settings max_memory_usage='20Mi'; +-- due to pushing to MV with aggregation the query needs ~300MiB +-- but it will be done in background via "system flush distributed" +insert into dist_in select number/100, number from system.numbers limit 3e6 settings max_block_size=3e6, max_memory_usage='100Mi'; system flush distributed dist_in; -- { serverError MEMORY_LIMIT_EXCEEDED } system flush distributed dist_in settings max_memory_usage=0; select count() from dist_out; From 3c52651b5580034e5d42433320c0d3de70a15b4e Mon Sep 17 00:00:00 2001 From: Ilya Golshtein Date: Wed, 10 Jul 2024 11:58:45 +0000 Subject: [PATCH 228/299] s3_off_fix: initial (proper ifdef for registerStorageAzureQueue) --- src/Storages/registerStorages.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/Storages/registerStorages.cpp b/src/Storages/registerStorages.cpp index 9f849052071..adc1074b1fe 100644 --- a/src/Storages/registerStorages.cpp +++ b/src/Storages/registerStorages.cpp @@ -35,7 +35,6 @@ void registerStorageFuzzJSON(StorageFactory & factory); void registerStorageS3(StorageFactory & factory); void registerStorageHudi(StorageFactory & factory); void registerStorageS3Queue(StorageFactory & factory); -void registerStorageAzureQueue(StorageFactory & factory); #if USE_PARQUET void registerStorageDeltaLake(StorageFactory & factory); @@ -45,6 +44,10 @@ void registerStorageIceberg(StorageFactory & factory); #endif #endif +#if USE_AZURE_BLOB_STORAGE +void registerStorageAzureQueue(StorageFactory & factory); +#endif + #if USE_HDFS #if USE_HIVE void registerStorageHive(StorageFactory & factory); From 321a58c51931e8d39b8ba111108fed20ba0541cd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A1nos=20Benjamin=20Antal?= Date: Thu, 11 Jul 2024 08:33:10 +0000 Subject: [PATCH 229/299] Fix relative path to `mc` --- docker/test/stateful/run.sh | 2 +- docker/test/stateless/run.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docker/test/stateful/run.sh b/docker/test/stateful/run.sh index ce637b9a146..2efbed6aa4f 100755 --- a/docker/test/stateful/run.sh +++ b/docker/test/stateful/run.sh @@ -22,7 +22,7 @@ ln -s /usr/share/clickhouse-test/clickhouse-test /usr/bin/clickhouse-test azurite-blob --blobHost 0.0.0.0 --blobPort 10000 --silent --inMemoryPersistence & ./setup_minio.sh stateful -mc admin trace clickminio > /test_output/rubbish.log & +./mc admin trace clickminio > /test_output/rubbish.log & MC_ADMIN_PID=$! config_logs_export_cluster /etc/clickhouse-server/config.d/system_logs_export.yaml diff --git a/docker/test/stateless/run.sh b/docker/test/stateless/run.sh index a01abe5929f..d47728cddb9 100755 --- a/docker/test/stateless/run.sh +++ b/docker/test/stateless/run.sh @@ -46,7 +46,7 @@ source /utils.lib /usr/share/clickhouse-test/config/install.sh ./setup_minio.sh stateless -mc admin trace clickminio > /test_output/rubbish.log & +m./c admin trace clickminio > /test_output/rubbish.log & MC_ADMIN_PID=$! ./setup_hdfs_minicluster.sh From 35850da12e0dc775f117ed060f839671306cc26b Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Thu, 11 Jul 2024 11:03:03 +0200 Subject: [PATCH 230/299] Update dynamic.md --- docs/en/sql-reference/data-types/dynamic.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/sql-reference/data-types/dynamic.md b/docs/en/sql-reference/data-types/dynamic.md index 8b3c7479f4f..b5781a7dd62 100644 --- a/docs/en/sql-reference/data-types/dynamic.md +++ b/docs/en/sql-reference/data-types/dynamic.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/data-types/dynamic -sidebar_position: 56 +sidebar_position: 62 sidebar_label: Dynamic --- From 595bce4945cef4ef1822e610e5352a13e654e45f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Thu, 11 Jul 2024 12:06:04 +0200 Subject: [PATCH 231/299] Update docs/en/sql-reference/window-functions/first_value.md --- docs/en/sql-reference/window-functions/first_value.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/sql-reference/window-functions/first_value.md b/docs/en/sql-reference/window-functions/first_value.md index 17ca1cacda8..4f8a9d393b1 100644 --- a/docs/en/sql-reference/window-functions/first_value.md +++ b/docs/en/sql-reference/window-functions/first_value.md @@ -11,7 +11,7 @@ Returns the first value evaluated within its ordered frame. By default, NULL arg **Syntax** ```sql -first_value (column_name) [RESPECT NULLS] +first_value (column_name) [[RESPECT NULLS] | [IGNORE NULLS]] OVER ([[PARTITION BY grouping_column] [ORDER BY sorting_column] [ROWS or RANGE expression_to_bound_rows_withing_the_group]] | [window_name]) FROM table_name From 366ed8701e0e60bc6ca54258663987d3342d7763 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Thu, 11 Jul 2024 12:06:09 +0200 Subject: [PATCH 232/299] Update docs/en/sql-reference/window-functions/first_value.md --- docs/en/sql-reference/window-functions/first_value.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/sql-reference/window-functions/first_value.md b/docs/en/sql-reference/window-functions/first_value.md index 4f8a9d393b1..30c3b1f99dc 100644 --- a/docs/en/sql-reference/window-functions/first_value.md +++ b/docs/en/sql-reference/window-functions/first_value.md @@ -29,7 +29,7 @@ For more detail on window function syntax see: [Window Functions - Syntax](./ind **Returned value** -- The first non-NULL value evaluated within its ordered frame. +- The first value evaluated within its ordered frame. **Example** From 3d96bf298ceaf030c3e863ea2fabd0a6ebe90e4c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Thu, 11 Jul 2024 12:06:13 +0200 Subject: [PATCH 233/299] Update docs/en/sql-reference/window-functions/last_value.md --- docs/en/sql-reference/window-functions/last_value.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/sql-reference/window-functions/last_value.md b/docs/en/sql-reference/window-functions/last_value.md index 9d1ce81cc57..34170226cdd 100644 --- a/docs/en/sql-reference/window-functions/last_value.md +++ b/docs/en/sql-reference/window-functions/last_value.md @@ -29,7 +29,7 @@ For more detail on window function syntax see: [Window Functions - Syntax](./ind **Returned value** -- The last non-NULL value evaluated within its ordered frame. +- The last value evaluated within its ordered frame. **Example** From 330082c3d4d1ff075e66f2eaf72c1515ad64ffdb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Thu, 11 Jul 2024 12:06:33 +0200 Subject: [PATCH 234/299] Update docs/en/sql-reference/window-functions/last_value.md --- docs/en/sql-reference/window-functions/last_value.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/sql-reference/window-functions/last_value.md b/docs/en/sql-reference/window-functions/last_value.md index 34170226cdd..dd7f5fa078a 100644 --- a/docs/en/sql-reference/window-functions/last_value.md +++ b/docs/en/sql-reference/window-functions/last_value.md @@ -11,7 +11,7 @@ Returns the last value evaluated within its ordered frame. By default, NULL argu **Syntax** ```sql -last_value (column_name) [RESPECT NULLS] +last_value (column_name) [[RESPECT NULLS] | [IGNORE NULLS]] OVER ([[PARTITION BY grouping_column] [ORDER BY sorting_column] [ROWS or RANGE expression_to_bound_rows_withing_the_group]] | [window_name]) FROM table_name From 050240d89071f750516f2a38fea1909d58095aaa Mon Sep 17 00:00:00 2001 From: kssenii Date: Thu, 11 Jul 2024 12:08:16 +0200 Subject: [PATCH 235/299] Review fix --- src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.cpp b/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.cpp index d37bffc42c4..c896a760597 100644 --- a/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.cpp +++ b/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.cpp @@ -635,7 +635,7 @@ struct DeltaLakeMetadataImpl } const auto value = tuple[1].safeGet(); auto field = getFieldValue(value, name_and_type->type); - current_partition_columns.emplace_back(*name_and_type, field); + current_partition_columns.emplace_back(std::move(name_and_type.value()), std::move(field)); LOG_TEST(log, "Partition {} value is {} (for {})", partition_name, value, filename); } From 80ceb63f5f194c4c99aa7502f64f7770933ae18f Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Thu, 11 Jul 2024 10:45:36 +0000 Subject: [PATCH 236/299] Fixing build. --- src/Common/Exception.cpp | 2 +- src/Common/Exception.h | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/src/Common/Exception.cpp b/src/Common/Exception.cpp index 07bda6a75be..09ba664baef 100644 --- a/src/Common/Exception.cpp +++ b/src/Common/Exception.cpp @@ -38,7 +38,7 @@ namespace ErrorCodes extern const int CANNOT_MREMAP; } -[[noreturn]] void abortOnFailedAssertion(const String & description, const Exception::FramePointers * trace = nullptr) +void abortOnFailedAssertion(const String & description, const Exception::FramePointers * trace) { auto & logger = Poco::Logger::root(); LOG_FATAL(&logger, "Logical error: '{}'.", description); diff --git a/src/Common/Exception.h b/src/Common/Exception.h index 87ef7101cdc..68cc305e67e 100644 --- a/src/Common/Exception.h +++ b/src/Common/Exception.h @@ -25,8 +25,6 @@ namespace DB class AtomicLogger; -[[noreturn]] void abortOnFailedAssertion(const String & description); - /// This flag can be set for testing purposes - to check that no exceptions are thrown. extern bool terminate_on_any_exception; @@ -167,6 +165,7 @@ protected: mutable std::vector capture_thread_frame_pointers; }; +[[noreturn]] void abortOnFailedAssertion(const String & description, const Exception::FramePointers * trace = nullptr); std::string getExceptionStackTraceString(const std::exception & e); std::string getExceptionStackTraceString(std::exception_ptr e); From 1bc02fb71d6a27a2bc83484dec667edb48b0ab84 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Thu, 11 Jul 2024 12:39:53 +0000 Subject: [PATCH 237/299] Ignore subquery for IN in DDLLoadingDependencyVisitor --- src/Databases/DDLLoadingDependencyVisitor.cpp | 8 +++++++ .../02841_not_ready_set_constraints.reference | 1 + .../02841_not_ready_set_constraints.sql | 24 +++++++++++++++++++ 3 files changed, 33 insertions(+) diff --git a/src/Databases/DDLLoadingDependencyVisitor.cpp b/src/Databases/DDLLoadingDependencyVisitor.cpp index 40234abb20f..67bce915168 100644 --- a/src/Databases/DDLLoadingDependencyVisitor.cpp +++ b/src/Databases/DDLLoadingDependencyVisitor.cpp @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -211,6 +212,13 @@ void DDLLoadingDependencyVisitor::extractTableNameFromArgument(const ASTFunction qualified_name.database = table_identifier->getDatabaseName(); qualified_name.table = table_identifier->shortName(); } + else if (arg->as()) + { + /// Allow IN subquery. + /// Do not add tables from the subquery into dependencies, + /// because CREATE will succeed anyway. + return; + } else { assert(false); diff --git a/tests/queries/0_stateless/02841_not_ready_set_constraints.reference b/tests/queries/0_stateless/02841_not_ready_set_constraints.reference index d81cc0710eb..daaac9e3030 100644 --- a/tests/queries/0_stateless/02841_not_ready_set_constraints.reference +++ b/tests/queries/0_stateless/02841_not_ready_set_constraints.reference @@ -1 +1,2 @@ 42 +42 diff --git a/tests/queries/0_stateless/02841_not_ready_set_constraints.sql b/tests/queries/0_stateless/02841_not_ready_set_constraints.sql index ecdf4d50635..274940f50a3 100644 --- a/tests/queries/0_stateless/02841_not_ready_set_constraints.sql +++ b/tests/queries/0_stateless/02841_not_ready_set_constraints.sql @@ -17,3 +17,27 @@ ENGINE = MergeTree ORDER BY conversation; INSERT INTO t2(conversation) VALUES (42); select * from t2; + +drop table t1; + +INSERT INTO t2(conversation) VALUES (42); -- { serverError UNKNOWN_TABLE } + +drop table t2; + +CREATE TABLE t2 ( + `conversation` UInt64, + CONSTRAINT constraint_conversation CHECK conversation IN (SELECT id FROM t1) +) +ENGINE = MergeTree ORDER BY conversation; + +INSERT INTO t2(conversation) VALUES (42); -- { serverError UNKNOWN_TABLE } + +CREATE TABLE t1 ( + `id` UInt64 +) +ENGINE = MergeTree ORDER BY id; + +INSERT INTO t1(id) VALUES (42); + +INSERT INTO t2(conversation) VALUES (42); +select * from t2; From 355a56d1b0025b6ba85c7b63a4ce7356d5de792c Mon Sep 17 00:00:00 2001 From: Vitaly Baranov Date: Mon, 8 Jul 2024 15:21:11 +0200 Subject: [PATCH 238/299] Add a stateless test for gRPC protocol. --- docker/test/stateless/Dockerfile | 1 + docker/test/stateless/requirements.txt | 2 + tests/ci/functional_test_check.py | 1 + tests/config/config.d/grpc_protocol.xml | 3 ++ tests/config/install.sh | 1 + .../0_stateless/03203_grpc_protocol.reference | 1 + .../0_stateless/03203_grpc_protocol.sh | 14 +++++ utils/grpc-client/generate_pb2.py | 52 +++++++++++++++++++ utils/grpc-client/pb2/generate.py | 29 ----------- 9 files changed, 75 insertions(+), 29 deletions(-) create mode 100644 tests/config/config.d/grpc_protocol.xml create mode 100644 tests/queries/0_stateless/03203_grpc_protocol.reference create mode 100755 tests/queries/0_stateless/03203_grpc_protocol.sh create mode 100755 utils/grpc-client/generate_pb2.py delete mode 100755 utils/grpc-client/pb2/generate.py diff --git a/docker/test/stateless/Dockerfile b/docker/test/stateless/Dockerfile index 5a655a3fd2b..a0e5513a3a2 100644 --- a/docker/test/stateless/Dockerfile +++ b/docker/test/stateless/Dockerfile @@ -86,6 +86,7 @@ RUN curl -L --no-verbose -O 'https://archive.apache.org/dist/hadoop/common/hadoo ENV MINIO_ROOT_USER="clickhouse" ENV MINIO_ROOT_PASSWORD="clickhouse" ENV EXPORT_S3_STORAGE_POLICIES=1 +ENV CLICKHOUSE_GRPC_CLIENT="/usr/share/clickhouse-utils/grpc-client/clickhouse-grpc-client.py" RUN npm install -g azurite@3.30.0 \ && npm install -g tslib && npm install -g node diff --git a/docker/test/stateless/requirements.txt b/docker/test/stateless/requirements.txt index 3284107e24e..74860d5fec3 100644 --- a/docker/test/stateless/requirements.txt +++ b/docker/test/stateless/requirements.txt @@ -8,6 +8,7 @@ cryptography==3.4.8 dbus-python==1.2.18 distro==1.7.0 docutils==0.17.1 +grpcio==1.47.0 gyp==0.1 httplib2==0.20.2 idna==3.3 @@ -28,6 +29,7 @@ packaging==24.1 pandas==1.5.3 pip==24.1.1 pipdeptree==2.23.0 +protobuf==4.25.3 pyarrow==15.0.0 pyasn1==0.4.8 PyJWT==2.3.0 diff --git a/tests/ci/functional_test_check.py b/tests/ci/functional_test_check.py index d8e5a7fa27f..c48a5d91bf5 100644 --- a/tests/ci/functional_test_check.py +++ b/tests/ci/functional_test_check.py @@ -106,6 +106,7 @@ def get_run_command( f"docker run --volume={builds_path}:/package_folder " f"{ci_logs_args}" f"--volume={repo_path}/tests:/usr/share/clickhouse-test " + f"--volume={repo_path}/utils/grpc-client:/usr/share/clickhouse-utils/grpc-client " f"{volume_with_broken_test}" f"--volume={result_path}:/test_output " f"--volume={server_log_path}:/var/log/clickhouse-server " diff --git a/tests/config/config.d/grpc_protocol.xml b/tests/config/config.d/grpc_protocol.xml new file mode 100644 index 00000000000..b957618120d --- /dev/null +++ b/tests/config/config.d/grpc_protocol.xml @@ -0,0 +1,3 @@ + + 9100 + diff --git a/tests/config/install.sh b/tests/config/install.sh index 08ee11a7407..9f8730bb91e 100755 --- a/tests/config/install.sh +++ b/tests/config/install.sh @@ -27,6 +27,7 @@ ln -sf $SRC_PATH/config.d/secure_ports.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/clusters.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/graphite.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/graphite_alternative.xml $DEST_SERVER_PATH/config.d/ +ln -sf $SRC_PATH/config.d/grpc_protocol.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/database_atomic.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/max_concurrent_queries.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/merge_tree_settings.xml $DEST_SERVER_PATH/config.d/ diff --git a/tests/queries/0_stateless/03203_grpc_protocol.reference b/tests/queries/0_stateless/03203_grpc_protocol.reference new file mode 100644 index 00000000000..9766475a418 --- /dev/null +++ b/tests/queries/0_stateless/03203_grpc_protocol.reference @@ -0,0 +1 @@ +ok diff --git a/tests/queries/0_stateless/03203_grpc_protocol.sh b/tests/queries/0_stateless/03203_grpc_protocol.sh new file mode 100755 index 00000000000..d51d6382f67 --- /dev/null +++ b/tests/queries/0_stateless/03203_grpc_protocol.sh @@ -0,0 +1,14 @@ +#!/usr/bin/env bash +# Tags: no-fasttest +# Tag no-fasttest: In fasttest, ENABLE_LIBRARIES=0, so the grpc library is not built + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +if [[ -z "$CLICKHOUSE_GRPC_CLIENT" ]]; then + CLICKHOUSE_GRPC_CLIENT="$CURDIR/../../../utils/grpc-client/clickhouse-grpc-client.py" +fi + +# Simple test. +$CLICKHOUSE_GRPC_CLIENT --query "SELECT 'ok'" diff --git a/utils/grpc-client/generate_pb2.py b/utils/grpc-client/generate_pb2.py new file mode 100755 index 00000000000..95a39023ed7 --- /dev/null +++ b/utils/grpc-client/generate_pb2.py @@ -0,0 +1,52 @@ +#!/usr/bin/env python3 + +# This is a helper utility. +# It generates files in the "pb2" folder using the protocol buffer compiler. +# This script must be called manually after any change pf "clickhouse_grpc.proto" + +import grpc_tools # pip3 install grpcio-tools + +import os, shutil, subprocess + + +# Settings. +script_path = os.path.realpath(__file__) +script_name = os.path.basename(script_path) +script_dir = os.path.dirname(script_path) +root_dir = os.path.abspath(os.path.join(script_dir, "../..")) + +grpc_proto_dir = os.path.abspath(os.path.join(root_dir, "src/Server/grpc_protos")) +grpc_proto_filename = "clickhouse_grpc.proto" + +# Files in the "pb2" folder which will be generated by this script. +pb2_filenames = ["clickhouse_grpc_pb2.py", "clickhouse_grpc_pb2_grpc.py"] +pb2_dir = os.path.join(script_dir, "pb2") + + +# Processes the protobuf schema with the protocol buffer compiler and generates the "pb2" folder. +def generate_pb2(): + print(f"Generating files:") + for pb2_filename in pb2_filenames: + print(os.path.join(pb2_dir, pb2_filename)) + + os.makedirs(pb2_dir, exist_ok=True) + + cmd = [ + "python3", + "-m", + "grpc_tools.protoc", + "-I" + grpc_proto_dir, + "--python_out=" + pb2_dir, + "--grpc_python_out=" + pb2_dir, + os.path.join(grpc_proto_dir, grpc_proto_filename), + ] + subprocess.run(cmd) + + for pb2_filename in pb2_filenames: + assert os.path.exists(os.path.join(pb2_dir, pb2_filename)) + print("Done! (generate_pb2)") + + +# MAIN +if __name__ == "__main__": + generate_pb2() diff --git a/utils/grpc-client/pb2/generate.py b/utils/grpc-client/pb2/generate.py deleted file mode 100755 index 2f4b3bf5af7..00000000000 --- a/utils/grpc-client/pb2/generate.py +++ /dev/null @@ -1,29 +0,0 @@ -#!/usr/bin/env python3 - -import grpc_tools # pip3 install grpcio-tools - -import os -import subprocess - - -script_dir = os.path.dirname(os.path.realpath(__file__)) -dest_dir = script_dir -src_dir = os.path.abspath(os.path.join(script_dir, "../../../src/Server/grpc_protos")) -src_filename = "clickhouse_grpc.proto" - - -def generate(): - cmd = [ - "python3", - "-m", - "grpc_tools.protoc", - "-I" + src_dir, - "--python_out=" + dest_dir, - "--grpc_python_out=" + dest_dir, - os.path.join(src_dir, src_filename), - ] - subprocess.run(cmd) - - -if __name__ == "__main__": - generate() From 3e9f6265195ee6952d4963409de3bd4a1f344730 Mon Sep 17 00:00:00 2001 From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> Date: Thu, 11 Jul 2024 15:59:34 +0200 Subject: [PATCH 239/299] Update test.py --- tests/integration/test_storage_rabbitmq/test.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/integration/test_storage_rabbitmq/test.py b/tests/integration/test_storage_rabbitmq/test.py index 3240039ee81..f885a3507ac 100644 --- a/tests/integration/test_storage_rabbitmq/test.py +++ b/tests/integration/test_storage_rabbitmq/test.py @@ -78,13 +78,13 @@ def wait_rabbitmq_to_start(rabbitmq_docker_id, cookie, timeout=180): def kill_rabbitmq(rabbitmq_id): p = subprocess.Popen(("docker", "stop", rabbitmq_id), stdout=subprocess.PIPE) - p.communicate() + p.wait(timeout=60) return p.returncode == 0 def revive_rabbitmq(rabbitmq_id, cookie): p = subprocess.Popen(("docker", "start", rabbitmq_id), stdout=subprocess.PIPE) - p.communicate() + p.wait(timeout=60) wait_rabbitmq_to_start(rabbitmq_id, cookie) From cdb3b3f2aaec64b8d5b3ec1ee6e38545e3a2b186 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Thu, 11 Jul 2024 16:42:29 +0200 Subject: [PATCH 240/299] Add query elapsed time for non-default format in play UI Signed-off-by: Azat Khuzhin --- programs/server/play.html | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/programs/server/play.html b/programs/server/play.html index 507a96382a7..b5bcc687c27 100644 --- a/programs/server/play.html +++ b/programs/server/play.html @@ -516,6 +516,9 @@ /// Save query in history only if it is different. let previous_query = ''; + /// Start of the last query + let last_query_start = 0; + const current_url = new URL(window.location); const opened_locally = location.protocol == 'file:'; @@ -567,6 +570,8 @@ '&password=' + encodeURIComponent(password) } + last_query_start = performance.now(); + const xhr = new XMLHttpRequest; xhr.open('POST', url, true); @@ -579,7 +584,8 @@ if (posted_request_num != request_num) { return; } else if (this.readyState === XMLHttpRequest.DONE) { - renderResponse(this.status, this.response); + const elapsed_msec = performance.now() - last_query_start; + renderResponse(this.status, this.response, elapsed_msec); /// The query is saved in browser history (in state JSON object) /// as well as in URL fragment identifier. @@ -587,7 +593,8 @@ const state = { query: query, status: this.status, - response: this.response.length > 100000 ? null : this.response /// Lower than the browser's limit. + response: this.response.length > 100000 ? null : this.response, /// Lower than the browser's limit. + elapsed_msec: elapsed_msec, }; const title = "ClickHouse Query: " + query; @@ -617,7 +624,7 @@ xhr.send(query); } - function renderResponse(status, response) { + function renderResponse(status, response, elapsed_msec) { document.getElementById('hourglass').style.display = 'none'; if (status === 200) { @@ -632,6 +639,7 @@ renderChart(json); } else { renderUnparsedResult(response); + stats.innerText = `Elapsed: ${(elapsed_msec/1000).toFixed(3)} sec.`; } document.getElementById('check-mark').style.display = 'inline'; } else { @@ -651,7 +659,7 @@ clear(); return; } - renderResponse(event.state.status, event.state.response); + renderResponse(event.state.status, event.state.response, event.state.elapsed_msec); }; if (window.location.hash) { From 5f302e539dcf728174b30819594c69b4cc85543b Mon Sep 17 00:00:00 2001 From: Vitaly Baranov Date: Tue, 9 Jul 2024 22:49:41 +0200 Subject: [PATCH 241/299] Fix error reporting while copying to Azure Blob Storage. --- .../copyAzureBlobStorageFile.cpp | 42 +++++++++++-------- 1 file changed, 25 insertions(+), 17 deletions(-) diff --git a/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp b/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp index 128df415197..c10a7cd017a 100644 --- a/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp +++ b/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp @@ -49,7 +49,7 @@ namespace const String & dest_blob_, std::shared_ptr settings_, ThreadPoolCallbackRunnerUnsafe schedule_, - const Poco::Logger * log_) + LoggerPtr log_) : create_read_buffer(create_read_buffer_) , client(client_) , offset (offset_) @@ -74,7 +74,7 @@ namespace const String & dest_blob; std::shared_ptr settings; ThreadPoolCallbackRunnerUnsafe schedule; - const Poco::Logger * log; + const LoggerPtr log; size_t max_single_part_upload_size; struct UploadPartTask @@ -83,7 +83,6 @@ namespace size_t part_size; std::vector block_ids; bool is_finished = false; - std::exception_ptr exception; }; size_t normal_part_size; @@ -92,6 +91,7 @@ namespace std::list TSA_GUARDED_BY(bg_tasks_mutex) bg_tasks; int num_added_bg_tasks TSA_GUARDED_BY(bg_tasks_mutex) = 0; int num_finished_bg_tasks TSA_GUARDED_BY(bg_tasks_mutex) = 0; + std::exception_ptr bg_exception TSA_GUARDED_BY(bg_tasks_mutex); std::mutex bg_tasks_mutex; std::condition_variable bg_tasks_condvar; @@ -186,7 +186,7 @@ namespace } catch (...) { - tryLogCurrentException(__PRETTY_FUNCTION__); + tryLogCurrentException(log, fmt::format("While performing multipart upload of blob {} in container {}", dest_blob, dest_container_for_logging)); waitForAllBackgroundTasks(); throw; } @@ -242,7 +242,12 @@ namespace } catch (...) { - task->exception = std::current_exception(); + std::lock_guard lock(bg_tasks_mutex); + if (!bg_exception) + { + tryLogCurrentException(log, "While writing part"); + bg_exception = std::current_exception(); /// The exception will be rethrown after all background tasks stop working. + } } task_finish_notify(); }, Priority{}); @@ -299,13 +304,13 @@ namespace /// Suppress warnings because bg_tasks_mutex is actually hold, but tsa annotations do not understand std::unique_lock bg_tasks_condvar.wait(lock, [this]() {return TSA_SUPPRESS_WARNING_FOR_READ(num_added_bg_tasks) == TSA_SUPPRESS_WARNING_FOR_READ(num_finished_bg_tasks); }); - auto & tasks = TSA_SUPPRESS_WARNING_FOR_WRITE(bg_tasks); - for (auto & task : tasks) - { - if (task.exception) - std::rethrow_exception(task.exception); + auto exception = TSA_SUPPRESS_WARNING_FOR_READ(bg_exception); + if (exception) + std::rethrow_exception(exception); + + const auto & tasks = TSA_SUPPRESS_WARNING_FOR_READ(bg_tasks); + for (const auto & task : tasks) block_ids.insert(block_ids.end(),task.block_ids.begin(), task.block_ids.end()); - } } }; } @@ -321,7 +326,8 @@ void copyDataToAzureBlobStorageFile( std::shared_ptr settings, ThreadPoolCallbackRunnerUnsafe schedule) { - UploadHelper helper{create_read_buffer, dest_client, offset, size, dest_container_for_logging, dest_blob, settings, schedule, &Poco::Logger::get("copyDataToAzureBlobStorageFile")}; + auto log = getLogger("copyDataToAzureBlobStorageFile"); + UploadHelper helper{create_read_buffer, dest_client, offset, size, dest_container_for_logging, dest_blob, settings, schedule, log}; helper.performCopy(); } @@ -339,9 +345,11 @@ void copyAzureBlobStorageFile( const ReadSettings & read_settings, ThreadPoolCallbackRunnerUnsafe schedule) { + auto log = getLogger("copyAzureBlobStorageFile"); + if (settings->use_native_copy) { - LOG_TRACE(getLogger("copyAzureBlobStorageFile"), "Copying Blob: {} from Container: {} using native copy", src_container_for_logging, src_blob); + LOG_TRACE(log, "Copying Blob: {} from Container: {} using native copy", src_container_for_logging, src_blob); ProfileEvents::increment(ProfileEvents::AzureCopyObject); if (dest_client->GetClickhouseOptions().IsClientForDisk) ProfileEvents::increment(ProfileEvents::DiskAzureCopyObject); @@ -352,7 +360,7 @@ void copyAzureBlobStorageFile( if (size < settings->max_single_part_copy_size) { - LOG_TRACE(getLogger("copyAzureBlobStorageFile"), "Copy blob sync {} -> {}", src_blob, dest_blob); + LOG_TRACE(log, "Copy blob sync {} -> {}", src_blob, dest_blob); block_blob_client_dest.CopyFromUri(source_uri); } else @@ -368,7 +376,7 @@ void copyAzureBlobStorageFile( if (copy_status.HasValue() && copy_status.Value() == Azure::Storage::Blobs::Models::CopyStatus::Success) { - LOG_TRACE(getLogger("copyAzureBlobStorageFile"), "Copy of {} to {} finished", properties_model.CopySource.Value(), dest_blob); + LOG_TRACE(log, "Copy of {} to {} finished", properties_model.CopySource.Value(), dest_blob); } else { @@ -382,14 +390,14 @@ void copyAzureBlobStorageFile( } else { - LOG_TRACE(&Poco::Logger::get("copyAzureBlobStorageFile"), "Reading from Container: {}, Blob: {}", src_container_for_logging, src_blob); + LOG_TRACE(log, "Reading from Container: {}, Blob: {}", src_container_for_logging, src_blob); auto create_read_buffer = [&] { return std::make_unique( src_client, src_blob, read_settings, settings->max_single_read_retries, settings->max_single_download_retries); }; - UploadHelper helper{create_read_buffer, dest_client, offset, size, dest_container_for_logging, dest_blob, settings, schedule, &Poco::Logger::get("copyAzureBlobStorageFile")}; + UploadHelper helper{create_read_buffer, dest_client, offset, size, dest_container_for_logging, dest_blob, settings, schedule, log}; helper.performCopy(); } } From 262c1f9e77add2349c76777d9ddabfe06895c6d3 Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Thu, 11 Jul 2024 17:03:13 +0200 Subject: [PATCH 242/299] Update dynamic.md --- docs/en/sql-reference/data-types/dynamic.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/sql-reference/data-types/dynamic.md b/docs/en/sql-reference/data-types/dynamic.md index b5781a7dd62..d0116e7158c 100644 --- a/docs/en/sql-reference/data-types/dynamic.md +++ b/docs/en/sql-reference/data-types/dynamic.md @@ -529,7 +529,7 @@ SELECT JSONExtractKeysAndValues('{"a" : 42, "b" : "Hello", "c" : [1,2,3]}', 'Var ### Binary output format -In [RowBinary](/docs/en/interfaces/formats.md#rowbinary-rowbinary) format values of `Dynamic` type are serialized in the following format: +In [RowBinary](/docs/en/interfaces/formats.md#rowbinary) format values of `Dynamic` type are serialized in the following format: ```text From 6868708a58f39fac83382695864459eb5fcffe5b Mon Sep 17 00:00:00 2001 From: Max Kainov Date: Thu, 11 Jul 2024 11:37:26 +0000 Subject: [PATCH 243/299] CI Buddy bot to notify about CI events --- pyproject.toml | 3 +- tests/ci/.mypy.ini | 1 + tests/ci/ci.py | 11 +++++- tests/ci/ci_buddy.py | 88 ++++++++++++++++++++++++++++++++++++++++++++ tests/ci/ci_utils.py | 41 +++++++++++++++++++++ 5 files changed, 141 insertions(+), 3 deletions(-) create mode 100644 tests/ci/ci_buddy.py diff --git a/pyproject.toml b/pyproject.toml index 279d077a695..90f089afa41 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -35,10 +35,9 @@ disable = ''' broad-except, bare-except, no-else-return, - global-statement + global-statement, ''' [tool.pylint.SIMILARITIES] # due to SQL min-similarity-lines=1000 - diff --git a/tests/ci/.mypy.ini b/tests/ci/.mypy.ini index 9bc44025826..f12d27979ce 100644 --- a/tests/ci/.mypy.ini +++ b/tests/ci/.mypy.ini @@ -15,3 +15,4 @@ warn_return_any = True no_implicit_reexport = True strict_equality = True extra_checks = True +ignore_missing_imports = True \ No newline at end of file diff --git a/tests/ci/ci.py b/tests/ci/ci.py index af2f4c0a1fc..b4a3c7ec849 100644 --- a/tests/ci/ci.py +++ b/tests/ci/ci.py @@ -15,7 +15,7 @@ import upload_result_helper from build_check import get_release_or_pr from ci_config import CI from ci_metadata import CiMetadata -from ci_utils import GHActions, normalize_string +from ci_utils import GHActions, normalize_string, Shell from clickhouse_helper import ( CiLogsCredentials, ClickHouseHelper, @@ -53,6 +53,7 @@ from stopwatch import Stopwatch from tee_popen import TeePopen from ci_cache import CiCache from ci_settings import CiSettings +from ci_buddy import CIBuddy from version_helper import get_version_from_repo # pylint: disable=too-many-lines @@ -262,6 +263,8 @@ def check_missing_images_on_dockerhub( def _pre_action(s3, indata, pr_info): + print("Clear dmesg") + Shell.run("sudo dmesg --clear ||:") CommitStatusData.cleanup() JobReport.cleanup() BuildResult.cleanup() @@ -1118,6 +1121,12 @@ def main() -> int: ### POST action: start elif args.post: + if Shell.check( + "sudo dmesg -T | grep -q -e 'Out of memory: Killed process' -e 'oom_reaper: reaped process' -e 'oom-kill:constraint=CONSTRAINT_NONE'" + ): + print("WARNING: OOM while job execution") + CIBuddy().post_error("Out Of Memory") + job_report = JobReport.load() if JobReport.exist() else None if job_report: ch_helper = ClickHouseHelper() diff --git a/tests/ci/ci_buddy.py b/tests/ci/ci_buddy.py new file mode 100644 index 00000000000..d03f5d819ec --- /dev/null +++ b/tests/ci/ci_buddy.py @@ -0,0 +1,88 @@ +import json +import os + +import boto3 +import requests +from botocore.exceptions import ClientError + +from pr_info import PRInfo +from ci_utils import Shell + + +class CIBuddy: + _HEADERS = {"Content-Type": "application/json"} + + def __init__(self, dry_run=False): + self.repo = os.getenv("GITHUB_REPOSITORY", "") + self.dry_run = dry_run + res = self._get_webhooks() + self.test_channel = "" + self.dev_ci_channel = "" + if res: + self.test_channel = json.loads(res)["test_channel"] + self.dev_ci_channel = json.loads(res)["ci_channel"] + self.job_name = os.getenv("CHECK_NAME", "unknown") + pr_info = PRInfo() + self.pr_number = pr_info.number + self.head_ref = pr_info.head_ref + self.commit_url = pr_info.commit_html_url + + @staticmethod + def _get_webhooks(): + name = "ci_buddy_web_hooks" + + session = boto3.Session(region_name="us-east-1") # Replace with your region + ssm_client = session.client("ssm") + json_string = None + try: + response = ssm_client.get_parameter( + Name=name, + WithDecryption=True, # Set to True if the parameter is a SecureString + ) + json_string = response["Parameter"]["Value"] + except ClientError as e: + print(f"An error occurred: {e}") + + return json_string + + def post(self, message, dry_run=None): + if dry_run is None: + dry_run = self.dry_run + print(f"Posting slack message, dry_run [{dry_run}]") + if dry_run: + url = self.test_channel + else: + url = self.dev_ci_channel + data = {"text": message} + try: + requests.post(url, headers=self._HEADERS, data=json.dumps(data), timeout=10) + except Exception as e: + print(f"ERROR: Failed to post message, ex {e}") + + def post_error(self, error_description, job_name="", with_instance_info=True): + instance_id, instance_type = "unknown", "unknown" + if with_instance_info: + instance_id = Shell.run("ec2metadata --instance-id") or instance_id + instance_type = Shell.run("ec2metadata --instance-type") or instance_type + if not job_name: + job_name = os.getenv("CHECK_NAME", "unknown") + line_err = f":red_circle: {error_description} :red_circle:\n\n" + line_ghr = f" *Runner:* `{instance_type}`, `{instance_id}`\n" + line_job = f" *Job:* `{job_name}`\n" + line_pr_ = f" *PR:* \n" + line_br_ = f" *Branch:* `{self.head_ref}`, <{self.commit_url}|commit>\n" + message = line_err + message += line_job + if with_instance_info: + message += line_ghr + if self.pr_number > 0: + message += line_pr_ + else: + message += line_br_ + self.post(message) + + +if __name__ == "__main__": + # test + buddy = CIBuddy(dry_run=True) + buddy.post_error("Out of memory") diff --git a/tests/ci/ci_utils.py b/tests/ci/ci_utils.py index e7034d0b104..629f37289a9 100644 --- a/tests/ci/ci_utils.py +++ b/tests/ci/ci_utils.py @@ -1,4 +1,5 @@ import os +import subprocess from contextlib import contextmanager from pathlib import Path from typing import Any, Iterator, List, Union @@ -42,3 +43,43 @@ class GHActions: for line in lines: print(line) print("::endgroup::") + + +class Shell: + @classmethod + def run_strict(cls, command): + subprocess.run( + command + " 2>&1", + shell=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True, + check=True, + ) + + @classmethod + def run(cls, command): + res = "" + result = subprocess.run( + command, + shell=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True, + check=False, + ) + if result.returncode == 0: + res = result.stdout + return res.strip() + + @classmethod + def check(cls, command): + result = subprocess.run( + command + " 2>&1", + shell=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True, + check=False, + ) + return result.returncode == 0 From 808d875a760d81792691f4cd7c465ec2823aefa9 Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Thu, 11 Jul 2024 18:57:54 +0200 Subject: [PATCH 244/299] Remove links at all --- docs/en/sql-reference/data-types/dynamic.md | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/docs/en/sql-reference/data-types/dynamic.md b/docs/en/sql-reference/data-types/dynamic.md index d0116e7158c..8be81471377 100644 --- a/docs/en/sql-reference/data-types/dynamic.md +++ b/docs/en/sql-reference/data-types/dynamic.md @@ -529,10 +529,8 @@ SELECT JSONExtractKeysAndValues('{"a" : 42, "b" : "Hello", "c" : [1,2,3]}', 'Var ### Binary output format -In [RowBinary](/docs/en/interfaces/formats.md#rowbinary) format values of `Dynamic` type are serialized in the following format: +In RowBinary format values of `Dynamic` type are serialized in the following format: ```text ``` - -See the [data types binary encoding specification](/docs/en/sql-reference/data-types/data-types-binary-encoding.md) From fe451ec25a3baa269cb47722e38dfd90f01b3734 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Thu, 11 Jul 2024 17:34:33 +0000 Subject: [PATCH 245/299] Fixing build. --- src/Common/Exception.cpp | 12 +++++++++--- src/Common/Exception.h | 3 ++- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/src/Common/Exception.cpp b/src/Common/Exception.cpp index 09ba664baef..111280074dd 100644 --- a/src/Common/Exception.cpp +++ b/src/Common/Exception.cpp @@ -38,15 +38,21 @@ namespace ErrorCodes extern const int CANNOT_MREMAP; } -void abortOnFailedAssertion(const String & description, const Exception::FramePointers * trace) +void abortOnFailedAssertion(const String & description, void * const * trace, size_t trace_offset, size_t trace_size) { auto & logger = Poco::Logger::root(); LOG_FATAL(&logger, "Logical error: '{}'.", description); if (trace) - LOG_FATAL(&logger, "Stack trace (when copying this message, always include the lines below):\n\n{}", StackTrace::toString(trace->data(), 0, trace->size())); + LOG_FATAL(&logger, "Stack trace (when copying this message, always include the lines below):\n\n{}", StackTrace::toString(trace, trace_offset, trace_size)); abort(); } +void abortOnFailedAssertion(const String & description) +{ + StackTrace st; + abortOnFailedAssertion(description, st.getFramePointers().data(), st.getOffset(), st.getSize()); +} + bool terminate_on_any_exception = false; static int terminate_status_code = 128 + SIGABRT; thread_local bool update_error_statistics = true; @@ -61,7 +67,7 @@ void handle_error_code(const std::string & msg, int code, bool remote, const Exc #ifdef ABORT_ON_LOGICAL_ERROR if (code == ErrorCodes::LOGICAL_ERROR) { - abortOnFailedAssertion(msg, &trace); + abortOnFailedAssertion(msg, trace.data(), 0, trace.size()); } #endif diff --git a/src/Common/Exception.h b/src/Common/Exception.h index 68cc305e67e..a4774a89f6a 100644 --- a/src/Common/Exception.h +++ b/src/Common/Exception.h @@ -165,7 +165,8 @@ protected: mutable std::vector capture_thread_frame_pointers; }; -[[noreturn]] void abortOnFailedAssertion(const String & description, const Exception::FramePointers * trace = nullptr); +[[noreturn]] void abortOnFailedAssertion(const String & description, void * const * trace, size_t trace_offset, size_t trace_size); +[[noreturn]] void abortOnFailedAssertion(const String & description); std::string getExceptionStackTraceString(const std::exception & e); std::string getExceptionStackTraceString(std::exception_ptr e); From 966e4d2d6327987b05668ce625e9f145c28e264a Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 11 Jul 2024 21:46:08 +0200 Subject: [PATCH 246/299] Remove noisy message --- src/Interpreters/DatabaseCatalog.cpp | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/Interpreters/DatabaseCatalog.cpp b/src/Interpreters/DatabaseCatalog.cpp index 964baea1891..bb2dd158710 100644 --- a/src/Interpreters/DatabaseCatalog.cpp +++ b/src/Interpreters/DatabaseCatalog.cpp @@ -1281,10 +1281,6 @@ void DatabaseCatalog::rescheduleDropTableTask() auto min_drop_time = getMinDropTime(); time_t schedule_after_ms = min_drop_time > current_time ? (min_drop_time - current_time) * 1000 : 0; - LOG_TRACE( - log, - "Have {} tables in queue to drop. Schedule background task in {} seconds", - tables_marked_dropped.size(), schedule_after_ms / 1000); (*drop_task)->scheduleAfter(schedule_after_ms); } From 364686731f1437e708f12d6e0f44d371b977dc9c Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 11 Jul 2024 21:51:09 +0200 Subject: [PATCH 247/299] Fix test --- tests/queries/0_stateless/03201_local_named_collections.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/03201_local_named_collections.sh b/tests/queries/0_stateless/03201_local_named_collections.sh index 2054a09df06..809b4b52f41 100755 --- a/tests/queries/0_stateless/03201_local_named_collections.sh +++ b/tests/queries/0_stateless/03201_local_named_collections.sh @@ -13,7 +13,7 @@ INSERT INTO test VALUES ('Hello, world!'); ${CLICKHOUSE_LOCAL} --multiquery " CREATE NAMED COLLECTION mydb AS host = '${CLICKHOUSE_HOST}', port = ${CLICKHOUSE_PORT_TCP}, user = 'default', password = '', db = '${CLICKHOUSE_DATABASE}'; SELECT * FROM remote(mydb, table = 'test'); -" | grep --text -F -v "ASan doesn't fully support makecontext/swapcontext functions" +" 2>&1 | grep --text -F -v "ASan doesn't fully support makecontext/swapcontext functions" ${CLICKHOUSE_CLIENT} --multiquery " DROP TABLE test; From 4a56c601b2b4ec364f808b25d6e9d9adfd4d3ce2 Mon Sep 17 00:00:00 2001 From: Nikita Fomichev Date: Fri, 12 Jul 2024 01:11:13 +0200 Subject: [PATCH 248/299] Stateless tests: decrease CI timeout --- docker/test/stateless/run.sh | 6 +++--- tests/ci/ci_definitions.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/docker/test/stateless/run.sh b/docker/test/stateless/run.sh index 5747ead7986..cb699926cbb 100755 --- a/docker/test/stateless/run.sh +++ b/docker/test/stateless/run.sh @@ -6,8 +6,8 @@ source /setup_export_logs.sh # fail on errors, verbose and export all env variables set -e -x -a -MAX_RUN_TIME=${MAX_RUN_TIME:-10800} -MAX_RUN_TIME=$((MAX_RUN_TIME == 0 ? 10800 : MAX_RUN_TIME)) +MAX_RUN_TIME=${MAX_RUN_TIME:-7200} +MAX_RUN_TIME=$((MAX_RUN_TIME == 0 ? 7200 : MAX_RUN_TIME)) USE_DATABASE_REPLICATED=${USE_DATABASE_REPLICATED:=0} USE_SHARED_CATALOG=${USE_SHARED_CATALOG:=0} @@ -320,7 +320,7 @@ export -f run_tests # This should be enough to setup job and collect artifacts -TIMEOUT=$((MAX_RUN_TIME - 600)) +TIMEOUT=$((MAX_RUN_TIME - 700)) if [ "$NUM_TRIES" -gt "1" ]; then # We don't run tests with Ordinary database in PRs, only in master. # So run new/changed tests with Ordinary at least once in flaky check. diff --git a/tests/ci/ci_definitions.py b/tests/ci/ci_definitions.py index 48e1280d939..4ae252560e9 100644 --- a/tests/ci/ci_definitions.py +++ b/tests/ci/ci_definitions.py @@ -378,7 +378,7 @@ class CommonJobConfigs: ), run_command='functional_test_check.py "$CHECK_NAME"', runner_type=Runners.FUNC_TESTER, - timeout=10800, + timeout=7200, ) STATEFUL_TEST = JobConfig( job_name_keyword="stateful", From 8ef3fbf32333dea0be9ee3ebbd9a3c9529cb9fb6 Mon Sep 17 00:00:00 2001 From: Guspan Tanadi <36249910+guspan-tanadi@users.noreply.github.com> Date: Fri, 12 Jul 2024 07:50:35 +0700 Subject: [PATCH 249/299] docs(clickhouse-local): intended section link --- docs/en/operations/utilities/clickhouse-local.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/operations/utilities/clickhouse-local.md b/docs/en/operations/utilities/clickhouse-local.md index f19643a3fa5..c20e4fc3b09 100644 --- a/docs/en/operations/utilities/clickhouse-local.md +++ b/docs/en/operations/utilities/clickhouse-local.md @@ -16,7 +16,7 @@ sidebar_label: clickhouse-local While `clickhouse-local` is a great tool for development and testing purposes, and for processing files, it is not suitable for serving end users or applications. In these scenarios, it is recommended to use the open-source [ClickHouse](https://clickhouse.com/docs/en/install). ClickHouse is a powerful OLAP database that is designed to handle large-scale analytical workloads. It provides fast and efficient processing of complex queries on large datasets, making it ideal for use in production environments where high-performance is critical. Additionally, ClickHouse offers a wide range of features such as replication, sharding, and high availability, which are essential for scaling up to handle large datasets and serving applications. If you need to handle larger datasets or serve end users or applications, we recommend using open-source ClickHouse instead of `clickhouse-local`. -Please read the docs below that show example use cases for `clickhouse-local`, such as [querying local CSVs](#query-data-in-a-csv-file-using-sql) or [reading a parquet file in S3](#query-data-in-a-parquet-file-in-aws-s3). +Please read the docs below that show example use cases for `clickhouse-local`, such as [querying local file](#query_data_in_file) or [reading a parquet file in S3](#query-data-in-a-parquet-file-in-aws-s3). ## Download clickhouse-local From 08b6dd604a4673628d0496808a7109f87897d1b5 Mon Sep 17 00:00:00 2001 From: Nikita Fomichev Date: Fri, 12 Jul 2024 01:24:07 +0200 Subject: [PATCH 250/299] Stateless tests: deal with hang-ups more roughly --- tests/clickhouse-test | 123 +++++++++++++++++++++++++++++++----------- 1 file changed, 92 insertions(+), 31 deletions(-) diff --git a/tests/clickhouse-test b/tests/clickhouse-test index 958dde0606f..ffb3dcf4d9e 100755 --- a/tests/clickhouse-test +++ b/tests/clickhouse-test @@ -1750,7 +1750,7 @@ class TestCase: return TestResult( self.name, TestStatus.FAIL, - FailureReason.INTERNAL_QUERY_FAIL, + FailureReason.TIMEOUT, total_time, self.add_info_about_settings( self.get_description_from_exception_info(sys.exc_info()) @@ -2189,11 +2189,26 @@ def run_tests_array(all_tests_with_params: Tuple[List[str], int, TestSuite, bool sys.stdout.flush() while True: - test_result = test_case.run( - args, test_suite, client_options, server_logs_level - ) - test_result = test_case.process_result(test_result, MESSAGES) - if not test_result.need_retry: + # This is the upper level timeout + # This helps with completely frozen processes, like in case of gdb errors + def timeout_handler(signum, frame): + raise TimeoutError("Test execution timed out") + + signal.signal(signal.SIGALRM, timeout_handler) + signal.alarm(int(args.timeout * 1.1)) + test_result = None + try: + test_result = test_case.run( + args, test_suite, client_options, server_logs_level + ) + test_result = test_case.process_result(test_result, MESSAGES) + break + except TimeoutError: + break + finally: + signal.alarm(0) + + if not test_result or not test_result.need_retry: break restarted_tests.append(test_result) @@ -2452,6 +2467,10 @@ def override_envs(*args_, **kwargs): run_tests_array(*args_, **kwargs) +def run_tests_process(*args, **kwargs): + return run_tests_array(*args, **kwargs) + + def do_run_tests(jobs, test_suite: TestSuite): if jobs > 1 and len(test_suite.parallel_tests) > 0: print( @@ -2475,39 +2494,70 @@ def do_run_tests(jobs, test_suite: TestSuite): # of failures will be nearly the same for all tests from the group. random.shuffle(test_suite.parallel_tests) - batch_size = max(1, len(test_suite.parallel_tests) // jobs) + batch_size = max(1, (len(test_suite.parallel_tests) // jobs) + 1) parallel_tests_array = [] for job in range(jobs): range_ = job * batch_size, job * batch_size + batch_size batch = test_suite.parallel_tests[range_[0] : range_[1]] parallel_tests_array.append((batch, batch_size, test_suite, True)) - try: - with multiprocessing.Pool(processes=jobs + 1) as pool: - future = pool.map_async(run_tests_array, parallel_tests_array) + processes = [] - if args.run_sequential_tests_in_parallel: - # Run parallel tests and sequential tests at the same time - # Sequential tests will use different ClickHouse instance - # In this process we can safely override values in `args` and `os.environ` - future_seq = pool.map_async( - override_envs, - [ - ( - test_suite.sequential_tests, - len(test_suite.sequential_tests), - test_suite, - False, - ) - ], - ) - future_seq.wait() + for test_batch in parallel_tests_array: + process = multiprocessing.Process( + target=run_tests_process, args=(test_batch,) + ) + processes.append(process) + process.start() - future.wait() - finally: - pool.terminate() - pool.close() - pool.join() + if args.run_sequential_tests_in_parallel: + # Run parallel tests and sequential tests at the same time + # Sequential tests will use different ClickHouse instance + # In this process we can safely override values in `args` and `os.environ` + process = multiprocessing.Process( + target=override_envs, + args=( + ( + test_suite.sequential_tests, + len(test_suite.sequential_tests), + test_suite, + False, + ), + ), + ) + processes.append(process) + process.start() + + while processes: + sys.stdout.flush() + # Periodically check the server for hangs + # and stop all processes in this case + try: + clickhouse_execute( + args, + query="SELECT 1 /*hang up check*/", + max_http_retries=5, + timeout=20, + ) + except Exception: + print("Hang up check failed") + server_died.set() + + if server_died.is_set(): + print("Server died, terminating all processes...") + kill_gdb_if_any() + # Wait for test results + sleep(args.timeout) + for p in processes: + if p.is_alive(): + p.terminate() + break + + for p in processes[:]: + if not p.is_alive(): + processes.remove(p) + + sleep(5) if not args.run_sequential_tests_in_parallel: run_tests_array( @@ -3358,6 +3408,14 @@ def parse_args(): return parser.parse_args() +class Terminated(KeyboardInterrupt): + pass + + +def signal_handler(sig, frame): + raise Terminated(f"Terminated with {sig} signal") + + if __name__ == "__main__": stop_time = None exit_code = multiprocessing.Value("i", 0) @@ -3369,6 +3427,9 @@ if __name__ == "__main__": # infinite tests processes left # (new process group is required to avoid killing some parent processes) os.setpgid(0, 0) + signal.signal(signal.SIGTERM, signal_handler) + signal.signal(signal.SIGINT, signal_handler) + signal.signal(signal.SIGHUP, signal_handler) try: args = parse_args() From 5f8358942c9de0380728a0e1e7a4ba749e8d7856 Mon Sep 17 00:00:00 2001 From: Nikita Fomichev Date: Fri, 12 Jul 2024 03:06:07 +0200 Subject: [PATCH 251/299] Stateless tests: push CI --- tests/clickhouse-test | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/clickhouse-test b/tests/clickhouse-test index ffb3dcf4d9e..79f6b5d71d3 100755 --- a/tests/clickhouse-test +++ b/tests/clickhouse-test @@ -2190,7 +2190,7 @@ def run_tests_array(all_tests_with_params: Tuple[List[str], int, TestSuite, bool while True: # This is the upper level timeout - # This helps with completely frozen processes, like in case of gdb errors + # It helps with completely frozen processes, like in case of gdb errors def timeout_handler(signum, frame): raise TimeoutError("Test execution timed out") From f30cd1243495265f54bd6cbcbd721c4f77cebe37 Mon Sep 17 00:00:00 2001 From: Nikita Fomichev Date: Fri, 12 Jul 2024 08:38:22 +0200 Subject: [PATCH 252/299] Stateless tests: add "Server died" check --- docker/test/util/process_functional_tests_result.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/docker/test/util/process_functional_tests_result.py b/docker/test/util/process_functional_tests_result.py index fd4cc9f4bf7..8b2fd46c973 100755 --- a/docker/test/util/process_functional_tests_result.py +++ b/docker/test/util/process_functional_tests_result.py @@ -11,6 +11,7 @@ TIMEOUT_SIGN = "[ Timeout! " UNKNOWN_SIGN = "[ UNKNOWN " SKIPPED_SIGN = "[ SKIPPED " HUNG_SIGN = "Found hung queries in processlist" +SERVER_DIED_SIGN = "Server died, terminating all processes" DATABASE_SIGN = "Database: " SUCCESS_FINISH_SIGNS = ["All tests have finished", "No tests were run"] @@ -25,6 +26,7 @@ def process_test_log(log_path, broken_tests): failed = 0 success = 0 hung = False + server_died = False retries = False success_finish = False test_results = [] @@ -41,6 +43,8 @@ def process_test_log(log_path, broken_tests): if HUNG_SIGN in line: hung = True break + if SERVER_DIED_SIGN in line: + server_died = True if RETRIES_SIGN in line: retries = True if any( @@ -123,6 +127,7 @@ def process_test_log(log_path, broken_tests): failed, success, hung, + server_died, success_finish, retries, test_results, @@ -150,6 +155,7 @@ def process_result(result_path, broken_tests): failed, success, hung, + server_died, success_finish, retries, test_results, @@ -165,6 +171,10 @@ def process_result(result_path, broken_tests): description = "Some queries hung, " state = "failure" test_results.append(("Some queries hung", "FAIL", "0", "")) + elif server_died: + description = "Server died, " + state = "failure" + test_results.append(("Server died", "FAIL", "0", "")) elif not success_finish: description = "Tests are not finished, " state = "failure" From b024bb736f6008de741bf6392d0b8432e8cdf16c Mon Sep 17 00:00:00 2001 From: Max K Date: Fri, 12 Jul 2024 10:14:41 +0200 Subject: [PATCH 253/299] CI: CiBuddy to post to salck channel from release branches only --- tests/ci/ci.py | 4 +++- tests/ci/ci_buddy.py | 10 +++++----- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/tests/ci/ci.py b/tests/ci/ci.py index b4a3c7ec849..fac50d30022 100644 --- a/tests/ci/ci.py +++ b/tests/ci/ci.py @@ -1125,7 +1125,9 @@ def main() -> int: "sudo dmesg -T | grep -q -e 'Out of memory: Killed process' -e 'oom_reaper: reaped process' -e 'oom-kill:constraint=CONSTRAINT_NONE'" ): print("WARNING: OOM while job execution") - CIBuddy().post_error("Out Of Memory") + CIBuddy(dry_run=not pr_info.is_release).post_error( + "Out Of Memory", job_name=_get_ext_check_name(args.job_name) + ) job_report = JobReport.load() if JobReport.exist() else None if job_report: diff --git a/tests/ci/ci_buddy.py b/tests/ci/ci_buddy.py index d03f5d819ec..ea690bb602c 100644 --- a/tests/ci/ci_buddy.py +++ b/tests/ci/ci_buddy.py @@ -66,11 +66,11 @@ class CIBuddy: instance_type = Shell.run("ec2metadata --instance-type") or instance_type if not job_name: job_name = os.getenv("CHECK_NAME", "unknown") - line_err = f":red_circle: {error_description} :red_circle:\n\n" - line_ghr = f" *Runner:* `{instance_type}`, `{instance_id}`\n" - line_job = f" *Job:* `{job_name}`\n" - line_pr_ = f" *PR:* \n" - line_br_ = f" *Branch:* `{self.head_ref}`, <{self.commit_url}|commit>\n" + line_err = f":red_circle: *Error: {error_description}*\n\n" + line_ghr = f" *Runner:* `{instance_type}`, `{instance_id}`\n" + line_job = f" *Job:* `{job_name}`\n" + line_pr_ = f" *PR:* \n" + line_br_ = f" *Branch:* `{self.head_ref}`, <{self.commit_url}|commit>\n" message = line_err message += line_job if with_instance_info: From 7d9e1700d2a85330380c19777b788ce5a6c2f605 Mon Sep 17 00:00:00 2001 From: Blargian Date: Fri, 12 Jul 2024 11:24:24 +0200 Subject: [PATCH 254/299] update intHash32, intHash64 --- .../functions/array-functions.md | 40 ++++++++++++ .../sql-reference/functions/hash-functions.md | 63 ++++++++++++++++++- 2 files changed, 102 insertions(+), 1 deletion(-) diff --git a/docs/en/sql-reference/functions/array-functions.md b/docs/en/sql-reference/functions/array-functions.md index d87ca4a0fe7..4080dce883f 100644 --- a/docs/en/sql-reference/functions/array-functions.md +++ b/docs/en/sql-reference/functions/array-functions.md @@ -3081,3 +3081,43 @@ Result: ## Distance functions All supported functions are described in [distance functions documentation](../../sql-reference/functions/distance-functions.md). + +## kql_array_sort_asc + +Sorts an array from lowest to highest value. For use with [Kusto Query Language (KQL)](https://clickhouse.com/docs/en/guides/developer/alternative-query-languages#kusto-query-language-kql). + +:::note +For this function to work you should have Kusto enabled. To enable Kusto: + +```sql +SET dialect = 'kusto' +``` + +::: + +**Syntax** + +``` sql +kql_array_sort_asc(arr1 [, arr2 ... arrN]) +``` + +**Arguments** + +- `arr1` — [Array](../data-types/array.md) of numeric values. +- `arr1` — [Array](../data-types/array.md) of numeric values. + +**Returned value** + +- Returns an array of non-negative partial sums of elements in the source array. [UInt\*](https://clickhouse.com/docs/en/data_types/int_uint/#uint-ranges), [Int\*](https://clickhouse.com/docs/en/data_types/int_uint/#int-ranges), [Float\*](https://clickhouse.com/docs/en/data_types/float/). + +**Example** + +``` sql +SELECT arrayCumSumNonNegative([1, 1, -4, 1]) AS res +``` + +``` text +┌─res───────┐ +│ [1,2,0,1] │ +└───────────┘ +``` \ No newline at end of file diff --git a/docs/en/sql-reference/functions/hash-functions.md b/docs/en/sql-reference/functions/hash-functions.md index e431ed75465..d2ed4516fce 100644 --- a/docs/en/sql-reference/functions/hash-functions.md +++ b/docs/en/sql-reference/functions/hash-functions.md @@ -314,10 +314,71 @@ SELECT groupBitXor(cityHash64(*)) FROM table Calculates a 32-bit hash code from any type of integer. This is a relatively fast non-cryptographic hash function of average quality for numbers. +**Syntax** + +```sql +intHash32(int) +``` + +**Arguments** + +- `int` — Integer to hash. [(U)Int*](../data-types/int-uint.md). + +**Returned value** + +- 32-bit hash code. [UInt32](../data-types/int-uint.md). + +**Example** + +Query: + +```sql +SELECT intHash32(42); +``` + +Result: + +```response +┌─intHash32(42)─┐ +│ 1228623923 │ +└───────────────┘ +``` + ## intHash64 Calculates a 64-bit hash code from any type of integer. -It works faster than intHash32. Average quality. +This is a relatively fast non-cryptographic hash function of average quality for numbers. +It works faster than [intHash32](#inthash32). + +**Syntax** + +```sql +intHash32(int) +``` + +**Arguments** + +- `int` — Integer to hash. [(U)Int*](../data-types/int-uint.md). + +**Returned value** + +- 64-bit hash code. [UInt64](../data-types/int-uint.md). + +**Example** + +Query: + +```sql +SELECT intHash64(42); +``` + +Result: + +```response +┌────────intHash64(42)─┐ +│ 11490350930367293593 │ +└──────────────────────┘ +``` ## SHA1, SHA224, SHA256, SHA512, SHA512_256 From 18e411d35366a82e3a2c9a725ccfabfa6a9170b6 Mon Sep 17 00:00:00 2001 From: Blargian Date: Fri, 12 Jul 2024 11:28:41 +0200 Subject: [PATCH 255/299] remove unwanted change --- .../functions/array-functions.md | 42 +------------------ 1 file changed, 1 insertion(+), 41 deletions(-) diff --git a/docs/en/sql-reference/functions/array-functions.md b/docs/en/sql-reference/functions/array-functions.md index 4080dce883f..1b52440903d 100644 --- a/docs/en/sql-reference/functions/array-functions.md +++ b/docs/en/sql-reference/functions/array-functions.md @@ -3080,44 +3080,4 @@ Result: ## Distance functions -All supported functions are described in [distance functions documentation](../../sql-reference/functions/distance-functions.md). - -## kql_array_sort_asc - -Sorts an array from lowest to highest value. For use with [Kusto Query Language (KQL)](https://clickhouse.com/docs/en/guides/developer/alternative-query-languages#kusto-query-language-kql). - -:::note -For this function to work you should have Kusto enabled. To enable Kusto: - -```sql -SET dialect = 'kusto' -``` - -::: - -**Syntax** - -``` sql -kql_array_sort_asc(arr1 [, arr2 ... arrN]) -``` - -**Arguments** - -- `arr1` — [Array](../data-types/array.md) of numeric values. -- `arr1` — [Array](../data-types/array.md) of numeric values. - -**Returned value** - -- Returns an array of non-negative partial sums of elements in the source array. [UInt\*](https://clickhouse.com/docs/en/data_types/int_uint/#uint-ranges), [Int\*](https://clickhouse.com/docs/en/data_types/int_uint/#int-ranges), [Float\*](https://clickhouse.com/docs/en/data_types/float/). - -**Example** - -``` sql -SELECT arrayCumSumNonNegative([1, 1, -4, 1]) AS res -``` - -``` text -┌─res───────┐ -│ [1,2,0,1] │ -└───────────┘ -``` \ No newline at end of file +All supported functions are described in [distance functions documentation](../../sql-reference/functions/distance-functions.md). \ No newline at end of file From 7c6db58eec7d06ae216774b957df992c05e94454 Mon Sep 17 00:00:00 2001 From: Shaun Struwig <41984034+Blargian@users.noreply.github.com> Date: Fri, 12 Jul 2024 11:34:04 +0200 Subject: [PATCH 256/299] Update aspell-dict.txt --- utils/check-style/aspell-ignore/en/aspell-dict.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt index 78c4b6bde95..ca2c4ec4192 100644 --- a/utils/check-style/aspell-ignore/en/aspell-dict.txt +++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt @@ -1900,11 +1900,13 @@ kurtosis kurtpop kurtsamp laion +lagInFrame lang laravel largestTriangleThreeBuckets latencies ldap +leadInFrame leftPad leftPadUTF leftUTF From d9a05bca89f3578bb2cf965b1ce373657de0474a Mon Sep 17 00:00:00 2001 From: Blargian Date: Fri, 12 Jul 2024 11:43:04 +0200 Subject: [PATCH 257/299] add alias to anyLast_respect_nulls --- .../aggregate-functions/reference/anylast_respect_nulls.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/en/sql-reference/aggregate-functions/reference/anylast_respect_nulls.md b/docs/en/sql-reference/aggregate-functions/reference/anylast_respect_nulls.md index 8f093cfdb61..a28b965f7ea 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/anylast_respect_nulls.md +++ b/docs/en/sql-reference/aggregate-functions/reference/anylast_respect_nulls.md @@ -13,6 +13,8 @@ Selects the last value encountered, irregardless of whether it is `NULL` or not. anyLast_respect_nulls(column) ``` +Alias: `last_value_respect_nulls`. + **Parameters** - `column`: The column name. From 713546e5102829f71b2c2f27021478c63da0ea3e Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 12 Jul 2024 11:44:34 +0200 Subject: [PATCH 258/299] Update src/Common/ProfileEvents.cpp --- src/Common/ProfileEvents.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Common/ProfileEvents.cpp b/src/Common/ProfileEvents.cpp index e80afc95e8d..ba9c4cbfdb2 100644 --- a/src/Common/ProfileEvents.cpp +++ b/src/Common/ProfileEvents.cpp @@ -508,7 +508,7 @@ The server successfully detected this situation and will download merged part fr M(FileSegmentHolderCompleteMicroseconds, "File segments holder complete() time") \ M(FileSegmentFailToIncreasePriority, "Number of times the priority was not increased due to a high contention on the cache lock") \ M(FilesystemCacheFailToReserveSpaceBecauseOfLockContention, "Number of times space reservation was skipped due to a high contention on the cache lock") \ - M(FilesystemCacheFailToReserveSpaceBecauseOfCacheResize, "Number of times space reservation was skipped due to the cache is being resized") \ + M(FilesystemCacheFailToReserveSpaceBecauseOfCacheResize, "Number of times space reservation was skipped due to the cache is being resized" ) \ M(FilesystemCacheHoldFileSegments, "Filesystem cache file segments count, which were hold") \ M(FilesystemCacheUnusedHoldFileSegments, "Filesystem cache file segments count, which were hold, but not used (because of seek or LIMIT n, etc)") \ M(FilesystemCacheFreeSpaceKeepingThreadRun, "Number of times background thread executed free space keeping job") \ From b4959b25dfc1a0cf590734bf2b9bc16bc5728188 Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 12 Jul 2024 11:44:44 +0200 Subject: [PATCH 259/299] Update src/Common/ProfileEvents.cpp --- src/Common/ProfileEvents.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Common/ProfileEvents.cpp b/src/Common/ProfileEvents.cpp index ba9c4cbfdb2..e80afc95e8d 100644 --- a/src/Common/ProfileEvents.cpp +++ b/src/Common/ProfileEvents.cpp @@ -508,7 +508,7 @@ The server successfully detected this situation and will download merged part fr M(FileSegmentHolderCompleteMicroseconds, "File segments holder complete() time") \ M(FileSegmentFailToIncreasePriority, "Number of times the priority was not increased due to a high contention on the cache lock") \ M(FilesystemCacheFailToReserveSpaceBecauseOfLockContention, "Number of times space reservation was skipped due to a high contention on the cache lock") \ - M(FilesystemCacheFailToReserveSpaceBecauseOfCacheResize, "Number of times space reservation was skipped due to the cache is being resized" ) \ + M(FilesystemCacheFailToReserveSpaceBecauseOfCacheResize, "Number of times space reservation was skipped due to the cache is being resized") \ M(FilesystemCacheHoldFileSegments, "Filesystem cache file segments count, which were hold") \ M(FilesystemCacheUnusedHoldFileSegments, "Filesystem cache file segments count, which were hold, but not used (because of seek or LIMIT n, etc)") \ M(FilesystemCacheFreeSpaceKeepingThreadRun, "Number of times background thread executed free space keeping job") \ From dd6dac6c5a0d5057e1927e3230d887af86f1d9c3 Mon Sep 17 00:00:00 2001 From: Nikita Fomichev Date: Fri, 12 Jul 2024 12:15:59 +0200 Subject: [PATCH 260/299] Stateless tests: better sort checks in test report --- .../test/util/process_functional_tests_result.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/docker/test/util/process_functional_tests_result.py b/docker/test/util/process_functional_tests_result.py index 8b2fd46c973..4442c9d7d9e 100755 --- a/docker/test/util/process_functional_tests_result.py +++ b/docker/test/util/process_functional_tests_result.py @@ -228,5 +228,20 @@ if __name__ == "__main__": state, description, test_results = process_result(args.in_results_dir, broken_tests) logging.info("Result parsed") status = (state, description) + + def test_result_comparator(item): + # sort by status then by check name + order = { + "FAIL": 0, + "Timeout": 1, + "NOT_FAILED": 2, + "BROKEN": 3, + "OK": 4, + "SKIPPED": 5, + } + return order.get(item[1], 10), str(item[0]), item[1] + + test_results.sort(key=test_result_comparator) + write_results(args.out_results_file, args.out_status_file, test_results, status) logging.info("Result written") From 3806ab7ef1ed8f298cbe0a1d3b186a0d29e7d3a6 Mon Sep 17 00:00:00 2001 From: Blargian Date: Fri, 12 Jul 2024 12:50:19 +0200 Subject: [PATCH 261/299] remove *_respect_nulls and modify any, anyLast to reflect that they can use modifier RESPECT NULLS --- .../aggregate-functions/index.md | 2 +- .../aggregate-functions/reference/any.md | 8 ++-- .../reference/any_respect_nulls.md | 44 ------------------- .../aggregate-functions/reference/anylast.md | 8 +++- .../reference/anylast_respect_nulls.md | 41 ----------------- .../aggregate-functions/reference/index.md | 3 +- 6 files changed, 13 insertions(+), 93 deletions(-) delete mode 100644 docs/en/sql-reference/aggregate-functions/reference/any_respect_nulls.md delete mode 100644 docs/en/sql-reference/aggregate-functions/reference/anylast_respect_nulls.md diff --git a/docs/en/sql-reference/aggregate-functions/index.md b/docs/en/sql-reference/aggregate-functions/index.md index 96bf0c5d93b..5056ef2c7aa 100644 --- a/docs/en/sql-reference/aggregate-functions/index.md +++ b/docs/en/sql-reference/aggregate-functions/index.md @@ -18,7 +18,7 @@ ClickHouse also supports: During aggregation, all `NULL` arguments are skipped. If the aggregation has several arguments it will ignore any row in which one or more of them are NULL. -There is an exception to this rule, which are the functions [`first_value`](../../sql-reference/aggregate-functions/reference/first_value.md), [`last_value`](../../sql-reference/aggregate-functions/reference/last_value.md) and their aliases when followed by the modifier `RESPECT NULLS`: `FIRST_VALUE(b) RESPECT NULLS`. +There is an exception to this rule, which are the functions [`first_value`](../../sql-reference/aggregate-functions/reference/first_value.md), [`last_value`](../../sql-reference/aggregate-functions/reference/last_value.md) and their aliases (`any` and `anyLast` respectively) when followed by the modifier `RESPECT NULLS`. For example, `FIRST_VALUE(b) RESPECT NULLS`. **Examples:** diff --git a/docs/en/sql-reference/aggregate-functions/reference/any.md b/docs/en/sql-reference/aggregate-functions/reference/any.md index cdff7dde4a9..972263585f2 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/any.md +++ b/docs/en/sql-reference/aggregate-functions/reference/any.md @@ -5,12 +5,12 @@ sidebar_position: 102 # any -Selects the first encountered value of a column. +Selects the first encountered value of a column, ignoring any `NULL` values. **Syntax** ```sql -any(column) +any(column) [RESPECT NULLS] ``` Aliases: `any_value`, [`first_value`](../reference/first_value.md). @@ -20,7 +20,9 @@ Aliases: `any_value`, [`first_value`](../reference/first_value.md). **Returned value** -By default, it ignores NULL values and returns the first NOT NULL value found in the column. Like [`first_value`](../../../sql-reference/aggregate-functions/reference/first_value.md) it supports `RESPECT NULLS`, in which case it will select the first value passed, independently on whether it's NULL or not. +:::note +Supports the `RESPECT NULLS` modifier after the function name. Using this modifier will ensure the function selects the first value passed, regardless of whether it is `NULL` or not. +::: :::note The return type of the function is the same as the input, except for LowCardinality which is discarded. This means that given no rows as input it will return the default value of that type (0 for integers, or Null for a Nullable() column). You might use the `-OrNull` [combinator](../../../sql-reference/aggregate-functions/combinators.md) ) to modify this behaviour. diff --git a/docs/en/sql-reference/aggregate-functions/reference/any_respect_nulls.md b/docs/en/sql-reference/aggregate-functions/reference/any_respect_nulls.md deleted file mode 100644 index 99104a9b8c7..00000000000 --- a/docs/en/sql-reference/aggregate-functions/reference/any_respect_nulls.md +++ /dev/null @@ -1,44 +0,0 @@ ---- -slug: /en/sql-reference/aggregate-functions/reference/any_respect_nulls -sidebar_position: 103 ---- - -# any_respect_nulls - -Selects the first encountered value of a column, irregardless of whether it is a `NULL` value or not. - -Alias: `any_value_respect_nulls`, `first_value_repect_nulls`. - -**Syntax** - -```sql -any_respect_nulls(column) -``` - -**Parameters** -- `column`: The column name. - -**Returned value** - -- The last value encountered, irregardless of whether it is a `NULL` value or not. - -**Example** - -Query: - -```sql -CREATE TABLE any_nulls (city Nullable(String)) ENGINE=Log; - -INSERT INTO any_nulls (city) VALUES (NULL), ('Amsterdam'), ('New York'), ('Tokyo'), ('Valencia'), (NULL); - -SELECT any(city), any_respect_nulls(city) FROM any_nulls; -``` - -```response -┌─any(city)─┬─any_respect_nulls(city)─┐ -│ Amsterdam │ ᴺᵁᴸᴸ │ -└───────────┴─────────────────────────┘ -``` - -**See Also** -- [any](../reference/any.md) diff --git a/docs/en/sql-reference/aggregate-functions/reference/anylast.md b/docs/en/sql-reference/aggregate-functions/reference/anylast.md index e43bc07fbdc..202d2e9fb10 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/anylast.md +++ b/docs/en/sql-reference/aggregate-functions/reference/anylast.md @@ -5,17 +5,21 @@ sidebar_position: 105 # anyLast -Selects the last value encountered. The result is just as indeterminate as for the [any](../../../sql-reference/aggregate-functions/reference/any.md) function. +Selects the last value encountered, ignoring any `NULL` values by default. The result is just as indeterminate as for the [any](../../../sql-reference/aggregate-functions/reference/any.md) function. **Syntax** ```sql -anyLast(column) +anyLast(column) [RESPECT NULLS] ``` **Parameters** - `column`: The column name. +:::note +Supports the `RESPECT NULLS` modifier after the function name. Using this modifier will ensure the function selects the first value passed, regardless of whether it is `NULL` or not. +::: + **Returned value** - The last value encountered. diff --git a/docs/en/sql-reference/aggregate-functions/reference/anylast_respect_nulls.md b/docs/en/sql-reference/aggregate-functions/reference/anylast_respect_nulls.md deleted file mode 100644 index a28b965f7ea..00000000000 --- a/docs/en/sql-reference/aggregate-functions/reference/anylast_respect_nulls.md +++ /dev/null @@ -1,41 +0,0 @@ ---- -slug: /en/sql-reference/aggregate-functions/reference/anylast_respect_nulls -sidebar_position: 106 ---- - -# anyLast_respect_nulls - -Selects the last value encountered, irregardless of whether it is `NULL` or not. - -**Syntax** - -```sql -anyLast_respect_nulls(column) -``` - -Alias: `last_value_respect_nulls`. - -**Parameters** -- `column`: The column name. - -**Returned value** - -- The last value encountered, irregardless of whether it is `NULL` or not. - -**Example** - -Query: - -```sql -CREATE TABLE any_last_nulls (city Nullable(String)) ENGINE=Log; - -INSERT INTO any_last_nulls (city) VALUES ('Amsterdam'),(NULL),('New York'),('Tokyo'),('Valencia'),(NULL); - -SELECT anyLast(city), anyLast_respect_nulls(city) FROM any_last_nulls; -``` - -```response -┌─anyLast(city)─┬─anyLast_respect_nulls(city)─┐ -│ Valencia │ ᴺᵁᴸᴸ │ -└───────────────┴─────────────────────────────┘ -``` \ No newline at end of file diff --git a/docs/en/sql-reference/aggregate-functions/reference/index.md b/docs/en/sql-reference/aggregate-functions/reference/index.md index e3725b6a430..323a99d276f 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/index.md +++ b/docs/en/sql-reference/aggregate-functions/reference/index.md @@ -44,10 +44,9 @@ Standard aggregate functions: ClickHouse-specific aggregate functions: - [analysisOfVariance](../reference/analysis_of_variance.md) -- [any](../reference/any_respect_nulls.md) +- [any](../reference/any.md) - [anyHeavy](../reference/anyheavy.md) - [anyLast](../reference/anylast.md) -- [anyLast](../reference/anylast_respect_nulls.md) - [boundingRatio](../reference/boundrat.md) - [first_value](../reference/first_value.md) - [last_value](../reference/last_value.md) From 633db10d397a030b9e0f5aa4435fe5d6c002b54f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Fri, 12 Jul 2024 12:58:14 +0200 Subject: [PATCH 262/299] Update docs/en/sql-reference/functions/hash-functions.md --- docs/en/sql-reference/functions/hash-functions.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/sql-reference/functions/hash-functions.md b/docs/en/sql-reference/functions/hash-functions.md index d2ed4516fce..7c977e7d6dc 100644 --- a/docs/en/sql-reference/functions/hash-functions.md +++ b/docs/en/sql-reference/functions/hash-functions.md @@ -353,7 +353,7 @@ It works faster than [intHash32](#inthash32). **Syntax** ```sql -intHash32(int) +intHash64(int) ``` **Arguments** From c7180e67bde8fa7924c77b999a0dafdc2b4b283d Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Fri, 12 Jul 2024 13:08:53 +0100 Subject: [PATCH 263/299] impl --- cmake/limit_jobs.cmake | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/cmake/limit_jobs.cmake b/cmake/limit_jobs.cmake index 8e48fc9b9d8..3a759b90fe3 100644 --- a/cmake/limit_jobs.cmake +++ b/cmake/limit_jobs.cmake @@ -42,9 +42,14 @@ endif () # But use 2 parallel jobs, since: # - this is what llvm does # - and I've verfied that lld-11 does not use all available CPU time (in peak) while linking one binary -if (CMAKE_BUILD_TYPE_UC STREQUAL "RELWITHDEBINFO" AND ENABLE_THINLTO AND PARALLEL_LINK_JOBS GREATER 2) - message(STATUS "ThinLTO provides its own parallel linking - limiting parallel link jobs to 2.") - set (PARALLEL_LINK_JOBS 2) +if (CMAKE_BUILD_TYPE_UC STREQUAL "RELWITHDEBINFO" AND ENABLE_THINLTO) + if (ARCH_AARCH64) + message(STATUS "ThinLTO provides its own parallel linking - limiting parallel link jobs to 1.") + set (PARALLEL_LINK_JOBS 1) + elseif (PARALLEL_LINK_JOBS GREATER 2) + message(STATUS "ThinLTO provides its own parallel linking - limiting parallel link jobs to 2.") + set (PARALLEL_LINK_JOBS 2) + endif () endif() message(STATUS "Building sub-tree with ${PARALLEL_COMPILE_JOBS} compile jobs and ${PARALLEL_LINK_JOBS} linker jobs (system: ${NUMBER_OF_LOGICAL_CORES} cores, ${TOTAL_PHYSICAL_MEMORY} MB RAM, 'OFF' means the native core count).") From a4591a4dc44a4d8488721125e107ddbe03384c95 Mon Sep 17 00:00:00 2001 From: Max K Date: Thu, 11 Jul 2024 22:19:50 +0200 Subject: [PATCH 264/299] CI: Skip pending and not affected jobs from PR workflow run --- tests/ci/ci.py | 3 +++ tests/ci/ci_cache.py | 41 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 44 insertions(+) diff --git a/tests/ci/ci.py b/tests/ci/ci.py index fac50d30022..4774f65b062 100644 --- a/tests/ci/ci.py +++ b/tests/ci/ci.py @@ -997,6 +997,9 @@ def main() -> int: ) ci_cache.print_status() + if IS_CI and pr_info.is_pr: + ci_cache.filter_out_not_affected_jobs() + if IS_CI and not pr_info.is_merge_queue: # wait for pending jobs to be finished, await_jobs is a long blocking call ci_cache.await_pending_jobs(pr_info.is_release) diff --git a/tests/ci/ci_cache.py b/tests/ci/ci_cache.py index 8ee0ae54385..594654ce168 100644 --- a/tests/ci/ci_cache.py +++ b/tests/ci/ci_cache.py @@ -674,6 +674,47 @@ class CiCache: bucket=S3_BUILDS_BUCKET, file_path=result_json_path, s3_path=s3_path ) + def filter_out_not_affected_jobs(self): + """ + removes the following jobs from to_do and to_wait lists: + test jobs - as not affected by the change + build jobs which are not required by left test jobs + :return: + """ + remove_from_await_list = [] + for job_name, job_config in self.jobs_to_wait.items(): + if CI.is_test_job(job_name): + remove_from_await_list.append(job_name) + for job in remove_from_await_list: + print(f"Filter job [{job}] - test job and not affected by the change") + del self.jobs_to_wait[job] + del self.jobs_to_do[job] + + required_builds = list() + for job_name, job_config in self.jobs_to_do.items(): + if CI.is_test_job(job_name) and job_config.required_builds: + required_builds += job_config.required_builds + required_builds = list(set(required_builds)) + + remove_builds = [] + has_builds_to_do = False + for job_name, job_config in self.jobs_to_do.items(): + if CI.is_build_job(job_name): + if job_name not in required_builds: + remove_builds += job_name + else: + has_builds_to_do = True + + for build_job in remove_builds: + print(f"Filter build job [{build_job}] - not affected and not required by test jobs") + del self.jobs_to_do[build_job] + if build_job in self.jobs_to_wait: + del self.jobs_to_wait[build_job] + + if not has_builds_to_do and CI.JobNames.BUILD_CHECK in self.jobs_to_do: + print(f"Filter job [{CI.JobNames.BUILD_CHECK}] - no builds to do") + del self.jobs_to_do[CI.JobNames.BUILD_CHECK] + def await_pending_jobs(self, is_release: bool, dry_run: bool = False) -> None: """ await pending jobs to be finished From f9eb0f9efd6d5ad8cb80831a960d7cd313a71d24 Mon Sep 17 00:00:00 2001 From: Max K Date: Fri, 12 Jul 2024 12:29:34 +0200 Subject: [PATCH 265/299] ci unit test --- tests/ci/ci.py | 3 +- tests/ci/ci_cache.py | 47 ++++++++-- .../lambda_shared/token.py | 1 + tests/ci/ssh.py | 6 +- tests/ci/test_ci_config.py | 86 ++++++++++++++++++- tests/ci/test_ci_options.py | 14 ++- 6 files changed, 135 insertions(+), 22 deletions(-) diff --git a/tests/ci/ci.py b/tests/ci/ci.py index 4774f65b062..32b87698395 100644 --- a/tests/ci/ci.py +++ b/tests/ci/ci.py @@ -995,11 +995,12 @@ def main() -> int: ci_settings, args.skip_jobs, ) - ci_cache.print_status() if IS_CI and pr_info.is_pr: ci_cache.filter_out_not_affected_jobs() + ci_cache.print_status() + if IS_CI and not pr_info.is_merge_queue: # wait for pending jobs to be finished, await_jobs is a long blocking call ci_cache.await_pending_jobs(pr_info.is_release) diff --git a/tests/ci/ci_cache.py b/tests/ci/ci_cache.py index 594654ce168..07dc362428c 100644 --- a/tests/ci/ci_cache.py +++ b/tests/ci/ci_cache.py @@ -676,37 +676,68 @@ class CiCache: def filter_out_not_affected_jobs(self): """ - removes the following jobs from to_do and to_wait lists: - test jobs - as not affected by the change - build jobs which are not required by left test jobs + Filter is to be applied in PRs to remove jobs that are not affected by the change + It removes jobs from @jobs_to_do if it is a: + 1. test job and it is in @jobs_to_wait (no need to wait not affected jobs in PRs) + 2. test job and it has finished on release branch (even if failed) + 2. build job which is not required by any test job that is left in @jobs_to_do + :return: """ + # 1. remove_from_await_list = [] for job_name, job_config in self.jobs_to_wait.items(): - if CI.is_test_job(job_name): + if CI.is_test_job(job_name) and job_name != CI.JobNames.BUILD_CHECK: remove_from_await_list.append(job_name) for job in remove_from_await_list: print(f"Filter job [{job}] - test job and not affected by the change") del self.jobs_to_wait[job] del self.jobs_to_do[job] - required_builds = list() + # 2. + remove_from_to_do = [] + for job_name, job_config in self.jobs_to_do.items(): + if CI.is_test_job(job_name): + batches_to_remove = [] + if job_config.batches is not None: + for batch in job_config.batches: + if self.is_failed( + job_name, batch, job_config.num_batches, release_branch=True + ): + print( + f"Filter [{job_name}/{batch}] - not affected by the change (failed on release branch)" + ) + batches_to_remove.append(batch) + for batch in batches_to_remove: + job_config.batches.remove(batch) + if not job_config.batches: + print( + f"Filter [{job_name}] - not affected by the change (failed on release branch)" + ) + remove_from_to_do.append(job_name) + for job in remove_from_to_do: + del self.jobs_to_do[job] + + # 3. + required_builds = [] # type: List[str] for job_name, job_config in self.jobs_to_do.items(): if CI.is_test_job(job_name) and job_config.required_builds: required_builds += job_config.required_builds required_builds = list(set(required_builds)) - remove_builds = [] + remove_builds = [] # type: List[str] has_builds_to_do = False for job_name, job_config in self.jobs_to_do.items(): if CI.is_build_job(job_name): if job_name not in required_builds: - remove_builds += job_name + remove_builds.append(job_name) else: has_builds_to_do = True for build_job in remove_builds: - print(f"Filter build job [{build_job}] - not affected and not required by test jobs") + print( + f"Filter build job [{build_job}] - not affected and not required by test jobs" + ) del self.jobs_to_do[build_job] if build_job in self.jobs_to_wait: del self.jobs_to_wait[build_job] diff --git a/tests/ci/lambda_shared_package/lambda_shared/token.py b/tests/ci/lambda_shared_package/lambda_shared/token.py index 9749122bd39..3fb8f10c0e2 100644 --- a/tests/ci/lambda_shared_package/lambda_shared/token.py +++ b/tests/ci/lambda_shared_package/lambda_shared/token.py @@ -1,4 +1,5 @@ """Module to get the token for GitHub""" + from dataclasses import dataclass import json import time diff --git a/tests/ci/ssh.py b/tests/ci/ssh.py index 321826fcf44..89d90d724d2 100644 --- a/tests/ci/ssh.py +++ b/tests/ci/ssh.py @@ -37,9 +37,9 @@ class SSHAgent: ssh_options = ( "," + os.environ["SSH_OPTIONS"] if os.environ.get("SSH_OPTIONS") else "" ) - os.environ[ - "SSH_OPTIONS" - ] = f"{ssh_options}UserKnownHostsFile=/dev/null,StrictHostKeyChecking=no" + os.environ["SSH_OPTIONS"] = ( + f"{ssh_options}UserKnownHostsFile=/dev/null,StrictHostKeyChecking=no" + ) def add(self, key): key_pub = self._key_pub(key) diff --git a/tests/ci/test_ci_config.py b/tests/ci/test_ci_config.py index 47247b91858..558faca915e 100644 --- a/tests/ci/test_ci_config.py +++ b/tests/ci/test_ci_config.py @@ -417,7 +417,7 @@ class TestCIConfig(unittest.TestCase): assert not ci_cache.jobs_to_skip assert not ci_cache.jobs_to_wait - # pretend there are pending jobs that we neet to wait + # pretend there are pending jobs that we need to wait ci_cache.jobs_to_wait = dict(ci_cache.jobs_to_do) for job, config in ci_cache.jobs_to_wait.items(): assert not config.pending_batches @@ -489,3 +489,87 @@ class TestCIConfig(unittest.TestCase): self.assertCountEqual( list(ci_cache.jobs_to_do) + ci_cache.jobs_to_skip, all_jobs_in_wf ) + + def test_ci_py_filters_not_affected_jobs_in_prs(self): + """ + checks ci.py filters not affected jobs in PRs + """ + settings = CiSettings() + settings.no_ci_cache = True + pr_info = PRInfo(github_event=_TEST_EVENT_JSON) + pr_info.event_type = EventType.PUSH + pr_info.number = 0 + assert pr_info.is_release and not pr_info.is_merge_queue + ci_cache = CIPY._configure_jobs( + S3Helper(), pr_info, settings, skip_jobs=False, dry_run=True + ) + self.assertTrue(not ci_cache.jobs_to_skip, "Must be no jobs in skip list") + all_jobs_in_wf = list(ci_cache.jobs_to_do) + assert not ci_cache.jobs_to_wait + assert not ci_cache.jobs_to_skip + + # pretend there are pending jobs that we need to wait + for job, job_config in ci_cache.jobs_to_do.items(): + ci_cache.jobs_to_wait[job] = job_config + + # remove couple tests from to_wait and + # expect they are preserved in @jobs_to_to along with required package_asan + del ci_cache.jobs_to_wait[CI.JobNames.STATELESS_TEST_ASAN] + del ci_cache.jobs_to_wait[CI.JobNames.INTEGRATION_TEST_TSAN] + del ci_cache.jobs_to_wait[CI.JobNames.STATELESS_TEST_MSAN] + + # pretend we have some batches failed for one of the job from the to_do list + failed_job = CI.JobNames.INTEGRATION_TEST_TSAN + failed_job_config = ci_cache.jobs_to_do[failed_job] + FAILED_BATCHES = [0, 3] + for batch in FAILED_BATCHES: + assert batch < failed_job_config.num_batches + record = CiCache.Record( + record_type=CiCache.RecordType.FAILED, + job_name=failed_job, + job_digest=ci_cache.job_digests[failed_job], + batch=batch, + num_batches=failed_job_config.num_batches, + release_branch=True, + ) + for record_t_, records_ in ci_cache.records.items(): + if record_t_.value == CiCache.RecordType.FAILED.value: + records_[record.to_str_key()] = record + + # pretend we have all batches failed for one of the job from the to_do list + failed_job = CI.JobNames.STATELESS_TEST_MSAN + failed_job_config = ci_cache.jobs_to_do[failed_job] + assert failed_job_config.num_batches > 1 + for batch in range(failed_job_config.num_batches): + record = CiCache.Record( + record_type=CiCache.RecordType.FAILED, + job_name=failed_job, + job_digest=ci_cache.job_digests[failed_job], + batch=batch, + num_batches=failed_job_config.num_batches, + release_branch=True, + ) + for record_t_, records_ in ci_cache.records.items(): + if record_t_.value == CiCache.RecordType.FAILED.value: + records_[record.to_str_key()] = record + + ci_cache.filter_out_not_affected_jobs() + expected_to_do = [ + CI.JobNames.STATELESS_TEST_ASAN, + CI.BuildNames.PACKAGE_ASAN, + CI.JobNames.INTEGRATION_TEST_TSAN, + CI.BuildNames.PACKAGE_TSAN, + CI.JobNames.BUILD_CHECK, + ] + self.assertCountEqual( + list(ci_cache.jobs_to_wait), + [ + CI.BuildNames.PACKAGE_ASAN, + CI.BuildNames.PACKAGE_TSAN, + CI.JobNames.BUILD_CHECK, + ], + ) + self.assertCountEqual(list(ci_cache.jobs_to_do), expected_to_do) + self.assertTrue(ci_cache.jobs_to_do[CI.JobNames.INTEGRATION_TEST_TSAN].batches) + for batch in ci_cache.jobs_to_do[CI.JobNames.INTEGRATION_TEST_TSAN].batches: + self.assertTrue(batch not in FAILED_BATCHES) diff --git a/tests/ci/test_ci_options.py b/tests/ci/test_ci_options.py index 3f158e79f30..f4d14a17512 100644 --- a/tests/ci/test_ci_options.py +++ b/tests/ci/test_ci_options.py @@ -172,14 +172,10 @@ class TestCIOptions(unittest.TestCase): job: CI.JobConfig(runner_type=CI.Runners.STYLE_CHECKER) for job in _TEST_JOB_LIST } - jobs_configs[ - "fuzzers" - ].run_by_label = ( + jobs_configs["fuzzers"].run_by_label = ( "TEST_LABEL" # check "fuzzers" appears in the result due to the label ) - jobs_configs[ - "Integration tests (asan)" - ].release_only = ( + jobs_configs["Integration tests (asan)"].release_only = ( True # still must be included as it's set with include keywords ) filtered_jobs = list( @@ -311,9 +307,9 @@ class TestCIOptions(unittest.TestCase): job: CI.JobConfig(runner_type=CI.Runners.STYLE_CHECKER) for job in _TEST_JOB_LIST } - jobs_configs[ - "fuzzers" - ].run_by_label = "TEST_LABEL" # check "fuzzers" does not appears in the result + jobs_configs["fuzzers"].run_by_label = ( + "TEST_LABEL" # check "fuzzers" does not appears in the result + ) jobs_configs["Integration tests (asan)"].release_only = True filtered_jobs = list( ci_options.apply( From ed693da2b0937f708fe3c37d73821e49e8f2314f Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Fri, 12 Jul 2024 12:34:16 +0000 Subject: [PATCH 266/299] Automatic style fix --- tests/ci/ssh.py | 6 +++--- tests/ci/test_ci_options.py | 14 +++++++++----- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/tests/ci/ssh.py b/tests/ci/ssh.py index 89d90d724d2..321826fcf44 100644 --- a/tests/ci/ssh.py +++ b/tests/ci/ssh.py @@ -37,9 +37,9 @@ class SSHAgent: ssh_options = ( "," + os.environ["SSH_OPTIONS"] if os.environ.get("SSH_OPTIONS") else "" ) - os.environ["SSH_OPTIONS"] = ( - f"{ssh_options}UserKnownHostsFile=/dev/null,StrictHostKeyChecking=no" - ) + os.environ[ + "SSH_OPTIONS" + ] = f"{ssh_options}UserKnownHostsFile=/dev/null,StrictHostKeyChecking=no" def add(self, key): key_pub = self._key_pub(key) diff --git a/tests/ci/test_ci_options.py b/tests/ci/test_ci_options.py index f4d14a17512..3f158e79f30 100644 --- a/tests/ci/test_ci_options.py +++ b/tests/ci/test_ci_options.py @@ -172,10 +172,14 @@ class TestCIOptions(unittest.TestCase): job: CI.JobConfig(runner_type=CI.Runners.STYLE_CHECKER) for job in _TEST_JOB_LIST } - jobs_configs["fuzzers"].run_by_label = ( + jobs_configs[ + "fuzzers" + ].run_by_label = ( "TEST_LABEL" # check "fuzzers" appears in the result due to the label ) - jobs_configs["Integration tests (asan)"].release_only = ( + jobs_configs[ + "Integration tests (asan)" + ].release_only = ( True # still must be included as it's set with include keywords ) filtered_jobs = list( @@ -307,9 +311,9 @@ class TestCIOptions(unittest.TestCase): job: CI.JobConfig(runner_type=CI.Runners.STYLE_CHECKER) for job in _TEST_JOB_LIST } - jobs_configs["fuzzers"].run_by_label = ( - "TEST_LABEL" # check "fuzzers" does not appears in the result - ) + jobs_configs[ + "fuzzers" + ].run_by_label = "TEST_LABEL" # check "fuzzers" does not appears in the result jobs_configs["Integration tests (asan)"].release_only = True filtered_jobs = list( ci_options.apply( From a3ab1ab5ca707c0e40b4ad738413dd868f2db606 Mon Sep 17 00:00:00 2001 From: Max Kainov Date: Fri, 12 Jul 2024 13:10:13 +0000 Subject: [PATCH 267/299] CI: Do not block on few number of test failures --- tests/ci/ci_config.py | 3 ++ tests/ci/ci_utils.py | 15 ++++++++- tests/ci/merge_pr.py | 78 ++++++++++++++++++++++++++++++++++--------- 3 files changed, 80 insertions(+), 16 deletions(-) diff --git a/tests/ci/ci_config.py b/tests/ci/ci_config.py index 8eda6e6b96f..9a9aa553e1b 100644 --- a/tests/ci/ci_config.py +++ b/tests/ci/ci_config.py @@ -13,6 +13,9 @@ class CI: each config item in the below dicts should be an instance of JobConfig class or inherited from it """ + MAX_TOTAL_FAILURES_BEFORE_BLOCKING_CI = 2 + MAX_TOTAL_FAILURES_PER_JOB_BEFORE_BLOCKING_CI = 1 + # reimport types to CI class so that they visible as CI.* and mypy is happy # pylint:disable=useless-import-alias,reimported,import-outside-toplevel from ci_definitions import BuildConfig as BuildConfig diff --git a/tests/ci/ci_utils.py b/tests/ci/ci_utils.py index 629f37289a9..abc4a88989d 100644 --- a/tests/ci/ci_utils.py +++ b/tests/ci/ci_utils.py @@ -1,8 +1,9 @@ import os +import re import subprocess from contextlib import contextmanager from pathlib import Path -from typing import Any, Iterator, List, Union +from typing import Any, Iterator, List, Union, Optional class WithIter(type): @@ -83,3 +84,15 @@ class Shell: check=False, ) return result.returncode == 0 + + +class Utils: + @staticmethod + def get_failed_tests_number(description: str) -> Optional[int]: + description = description.lower() + + pattern = r"fail:\s*(\d+)\s*(?=,|$)" + match = re.search(pattern, description) + if match: + return int(match.group(1)) + return None diff --git a/tests/ci/merge_pr.py b/tests/ci/merge_pr.py index 37c08fc4efe..6b437731561 100644 --- a/tests/ci/merge_pr.py +++ b/tests/ci/merge_pr.py @@ -26,6 +26,8 @@ from pr_info import PRInfo from report import SUCCESS, FAILURE from env_helper import GITHUB_UPSTREAM_REPOSITORY, GITHUB_REPOSITORY from synchronizer_utils import SYNC_BRANCH_PREFIX +from ci_config import CI +from ci_utils import Utils # The team name for accepted approvals TEAM_NAME = getenv("GITHUB_TEAM_NAME", "core") @@ -251,23 +253,69 @@ def main(): # set mergeable check status and exit commit = get_commit(gh, args.pr_info.sha) statuses = get_commit_filtered_statuses(commit) - state = trigger_mergeable_check( - commit, - statuses, - workflow_failed=(args.wf_status != "success"), - ) - # Process upstream StatusNames.SYNC - pr_info = PRInfo() - if ( - pr_info.head_ref.startswith(f"{SYNC_BRANCH_PREFIX}/pr/") - and GITHUB_REPOSITORY != GITHUB_UPSTREAM_REPOSITORY - ): - print("Updating upstream statuses") - update_upstream_sync_status(pr_info, state) + max_failed_tests_per_job = 0 + job_name_with_max_failures = None + total_failed_tests = 0 + failed_to_get_info = False + has_failed_statuses = False + for status in statuses: + if not CI.is_required(status.context): + continue + if status.state == FAILURE: + has_failed_statuses = True + failed_cnt = Utils.get_failed_tests_number(status.description) + if failed_cnt is None: + failed_to_get_info = True + else: + if failed_cnt > max_failed_tests_per_job: + job_name_with_max_failures = status.context + max_failed_tests_per_job = failed_cnt + total_failed_tests += failed_cnt + elif status.state != SUCCESS: + has_failed_statuses = True + print( + f"Unexpected status for [{status.context}]: [{status.state}] - block further testing" + ) + failed_to_get_info = True - if args.wf_status != "success": - # exit with 1 to rerun on workflow failed job restart + can_continue = True + if total_failed_tests > CI.MAX_TOTAL_FAILURES_BEFORE_BLOCKING_CI: + print( + f"Required check has [{total_failed_tests}] failed - block further testing" + ) + can_continue = False + if max_failed_tests_per_job > CI.MAX_TOTAL_FAILURES_PER_JOB_BEFORE_BLOCKING_CI: + print( + f"Job [{job_name_with_max_failures}] has [{max_failed_tests_per_job}] failures - block further testing" + ) + can_continue = False + if failed_to_get_info: + print(f"Unexpected commit status state - block further testing") + can_continue = False + if args.wf_status != SUCCESS: + can_continue = False + print("Workflow has failures - block further testing") + + if args.wf_status == "success" or has_failed_statuses: + state = trigger_mergeable_check( + commit, + statuses, + ) + # Process upstream StatusNames.SYNC + pr_info = PRInfo() + if ( + pr_info.head_ref.startswith(f"{SYNC_BRANCH_PREFIX}/pr/") + and GITHUB_REPOSITORY != GITHUB_UPSTREAM_REPOSITORY + ): + print("Updating upstream statuses") + update_upstream_sync_status(pr_info, state) + else: + print( + "Workflow failed but no failed statuses found (died runner?) - cannot set Mergeable Check status" + ) + + if not can_continue: sys.exit(1) sys.exit(0) From 666f5ffaf9591ae70484930cf6e381a7ab812381 Mon Sep 17 00:00:00 2001 From: Max K Date: Fri, 12 Jul 2024 15:17:51 +0200 Subject: [PATCH 268/299] mypy fix --- tests/ci/ci_cache.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/tests/ci/ci_cache.py b/tests/ci/ci_cache.py index 07dc362428c..fe54634039d 100644 --- a/tests/ci/ci_cache.py +++ b/tests/ci/ci_cache.py @@ -699,15 +699,15 @@ class CiCache: for job_name, job_config in self.jobs_to_do.items(): if CI.is_test_job(job_name): batches_to_remove = [] - if job_config.batches is not None: - for batch in job_config.batches: - if self.is_failed( - job_name, batch, job_config.num_batches, release_branch=True - ): - print( - f"Filter [{job_name}/{batch}] - not affected by the change (failed on release branch)" - ) - batches_to_remove.append(batch) + assert job_config.batches is not None + for batch in job_config.batches: + if self.is_failed( + job_name, batch, job_config.num_batches, release_branch=True + ): + print( + f"Filter [{job_name}/{batch}] - not affected by the change (failed on release branch)" + ) + batches_to_remove.append(batch) for batch in batches_to_remove: job_config.batches.remove(batch) if not job_config.batches: From 05810ec76fc8e811296daabee97cccc625204941 Mon Sep 17 00:00:00 2001 From: Max K Date: Fri, 12 Jul 2024 15:40:06 +0200 Subject: [PATCH 269/299] do not skip Build_report --- tests/ci/ci_cache.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/ci/ci_cache.py b/tests/ci/ci_cache.py index fe54634039d..291ed56aeea 100644 --- a/tests/ci/ci_cache.py +++ b/tests/ci/ci_cache.py @@ -697,7 +697,7 @@ class CiCache: # 2. remove_from_to_do = [] for job_name, job_config in self.jobs_to_do.items(): - if CI.is_test_job(job_name): + if CI.is_test_job(job_name) and job_name != CI.JobNames.BUILD_CHECK: batches_to_remove = [] assert job_config.batches is not None for batch in job_config.batches: From c06589392b866bf4c799b1f5053197f7027f3db3 Mon Sep 17 00:00:00 2001 From: Nikita Fomichev Date: Fri, 12 Jul 2024 17:21:11 +0200 Subject: [PATCH 270/299] Stateless tests: fix flaky tests --- .../01037_polygon_dicts_correctness_all.sh | 14 +- .../01037_polygon_dicts_correctness_fast.sh | 14 +- .../01037_polygon_dicts_simple_functions.ans | 208 +++++++++--------- .../01037_polygon_dicts_simple_functions.sh | 127 +++++------ 4 files changed, 181 insertions(+), 182 deletions(-) diff --git a/tests/queries/0_stateless/01037_polygon_dicts_correctness_all.sh b/tests/queries/0_stateless/01037_polygon_dicts_correctness_all.sh index fff786d6c06..39f235d9966 100755 --- a/tests/queries/0_stateless/01037_polygon_dicts_correctness_all.sh +++ b/tests/queries/0_stateless/01037_polygon_dicts_correctness_all.sh @@ -5,20 +5,22 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -TMP_DIR="/tmp" +TMP_DIR=${CLICKHOUSE_TMP}${CLICKHOUSE_DATABASE} +mkdir -p $TMP_DIR declare -a SearchTypes=("POLYGON" "POLYGON_SIMPLE" "POLYGON_INDEX_EACH" "POLYGON_INDEX_CELL") -tar -xf "${CURDIR}"/01037_test_data_search.tar.gz -C "${CURDIR}" +DATA_DIR=${CURDIR}/${CLICKHOUSE_DATABASE} +tar -xf "${CURDIR}"/01037_test_data_search.tar.gz -C "${DATA_DIR}" $CLICKHOUSE_CLIENT -n --query=" DROP TABLE IF EXISTS points; CREATE TABLE points (x Float64, y Float64) ENGINE = Memory; " -$CLICKHOUSE_CLIENT --query="INSERT INTO points FORMAT TSV" --max_insert_block_size=100000 < "${CURDIR}/01037_point_data" +$CLICKHOUSE_CLIENT --query="INSERT INTO points FORMAT TSV" --max_insert_block_size=100000 < "${DATA_DIR}/01037_point_data" -rm "${CURDIR}"/01037_point_data +rm "${DATA_DIR}"/01037_point_data $CLICKHOUSE_CLIENT -n --query=" DROP TABLE IF EXISTS polygons_array; @@ -32,9 +34,9 @@ CREATE TABLE polygons_array ENGINE = Memory; " -$CLICKHOUSE_CLIENT --query="INSERT INTO polygons_array FORMAT JSONEachRow" --min_chunk_bytes_for_parallel_parsing=10485760 --max_insert_block_size=100000 < "${CURDIR}/01037_polygon_data" +$CLICKHOUSE_CLIENT --query="INSERT INTO polygons_array FORMAT JSONEachRow" --min_chunk_bytes_for_parallel_parsing=10485760 --max_insert_block_size=100000 < "${DATA_DIR}/01037_polygon_data" -rm "${CURDIR}"/01037_polygon_data +rm "${DATA_DIR}"/01037_polygon_data for type in "${SearchTypes[@]}"; do diff --git a/tests/queries/0_stateless/01037_polygon_dicts_correctness_fast.sh b/tests/queries/0_stateless/01037_polygon_dicts_correctness_fast.sh index c9cd151a2d9..3e461abcefe 100755 --- a/tests/queries/0_stateless/01037_polygon_dicts_correctness_fast.sh +++ b/tests/queries/0_stateless/01037_polygon_dicts_correctness_fast.sh @@ -5,19 +5,21 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -TMP_DIR="/tmp" +TMP_DIR=${CLICKHOUSE_TMP}${CLICKHOUSE_DATABASE} +mkdir -p $TMP_DIR declare -a SearchTypes=("POLYGON_INDEX_EACH" "POLYGON_INDEX_CELL") -tar -xf "${CURDIR}"/01037_test_data_perf.tar.gz -C "${CURDIR}" +DATA_DIR=${CURDIR}/${CLICKHOUSE_DATABASE} +tar -xf "${CURDIR}"/01037_test_data_perf.tar.gz -C "${DATA_DIR}" $CLICKHOUSE_CLIENT -n --query=" CREATE TABLE points (x Float64, y Float64) ENGINE = Memory; " -$CLICKHOUSE_CLIENT --query="INSERT INTO points FORMAT TSV" --min_chunk_bytes_for_parallel_parsing=10485760 --max_insert_block_size=100000 < "${CURDIR}/01037_point_data" +$CLICKHOUSE_CLIENT --query="INSERT INTO points FORMAT TSV" --min_chunk_bytes_for_parallel_parsing=10485760 --max_insert_block_size=100000 < "${DATA_DIR}/01037_point_data" -rm "${CURDIR}"/01037_point_data +rm "${DATA_DIR}"/01037_point_data $CLICKHOUSE_CLIENT -n --query=" DROP TABLE IF EXISTS polygons_array; @@ -31,9 +33,9 @@ CREATE TABLE polygons_array ENGINE = Memory; " -$CLICKHOUSE_CLIENT --query="INSERT INTO polygons_array FORMAT JSONEachRow" --min_chunk_bytes_for_parallel_parsing=10485760 --max_insert_block_size=100000 < "${CURDIR}/01037_polygon_data" +$CLICKHOUSE_CLIENT --query="INSERT INTO polygons_array FORMAT JSONEachRow" --min_chunk_bytes_for_parallel_parsing=10485760 --max_insert_block_size=100000 < "${DATA_DIR}/01037_polygon_data" -rm "${CURDIR}"/01037_polygon_data +rm "${DATA_DIR}"/01037_polygon_data for type in "${SearchTypes[@]}"; do diff --git a/tests/queries/0_stateless/01037_polygon_dicts_simple_functions.ans b/tests/queries/0_stateless/01037_polygon_dicts_simple_functions.ans index dfad14fb113..937539643ec 100644 --- a/tests/queries/0_stateless/01037_polygon_dicts_simple_functions.ans +++ b/tests/queries/0_stateless/01037_polygon_dicts_simple_functions.ans @@ -1,104 +1,104 @@ -dictGet test_01037.dict_array (-100,-42) qqq 101 -dictGet test_01037.dict_array (-1,0) Click South 423 -dictGet test_01037.dict_array (-0.1,0) Click South 423 -dictGet test_01037.dict_array (0,-2) Click West 424 -dictGet test_01037.dict_array (0,-1.1) Click West 424 -dictGet test_01037.dict_array (0,1.1) Click North 422 -dictGet test_01037.dict_array (0,2) Click North 422 -dictGet test_01037.dict_array (0.1,0) Click East 421 -dictGet test_01037.dict_array (0.99,2.99) Click North 422 -dictGet test_01037.dict_array (1,0) Click East 421 -dictGet test_01037.dict_array (3,3) House 314159 -dictGet test_01037.dict_array (5,6) Click 42 -dictGet test_01037.dict_array (7.01,7.01) qqq 101 -dictGetOrDefault test_01037.dict_array (-100,-42) www 1234 -dictGetOrDefault test_01037.dict_array (-1,0) Click South 423 -dictGetOrDefault test_01037.dict_array (-0.1,0) Click South 423 -dictGetOrDefault test_01037.dict_array (0,-2) Click West 424 -dictGetOrDefault test_01037.dict_array (0,-1.1) Click West 424 -dictGetOrDefault test_01037.dict_array (0,1.1) Click North 422 -dictGetOrDefault test_01037.dict_array (0,2) Click North 422 -dictGetOrDefault test_01037.dict_array (0.1,0) Click East 421 -dictGetOrDefault test_01037.dict_array (0.99,2.99) Click North 422 -dictGetOrDefault test_01037.dict_array (1,0) Click East 421 -dictGetOrDefault test_01037.dict_array (3,3) House 314159 -dictGetOrDefault test_01037.dict_array (5,6) Click 42 -dictGetOrDefault test_01037.dict_array (7.01,7.01) www 1234 -dictGetOrDefault test_01037.dict_array (-100,-42) dd 44 -dictGetOrDefault test_01037.dict_array (-1,0) Click South 423 -dictGetOrDefault test_01037.dict_array (-0.1,0) Click South 423 -dictGetOrDefault test_01037.dict_array (0,-2) Click West 424 -dictGetOrDefault test_01037.dict_array (0,-1.1) Click West 424 -dictGetOrDefault test_01037.dict_array (0,1.1) Click North 422 -dictGetOrDefault test_01037.dict_array (0,2) Click North 422 -dictGetOrDefault test_01037.dict_array (0.1,0) Click East 421 -dictGetOrDefault test_01037.dict_array (0.99,2.99) Click North 422 -dictGetOrDefault test_01037.dict_array (1,0) Click East 421 -dictGetOrDefault test_01037.dict_array (3,3) House 314159 -dictGetOrDefault test_01037.dict_array (5,6) Click 42 -dictGetOrDefault test_01037.dict_array (7.01,7.01) ee 55 -dictGet test_01037.dict_tuple (-100,-42) qqq 101 -dictGet test_01037.dict_tuple (-1,0) Click South 423 -dictGet test_01037.dict_tuple (-0.1,0) Click South 423 -dictGet test_01037.dict_tuple (0,-2) Click West 424 -dictGet test_01037.dict_tuple (0,-1.1) Click West 424 -dictGet test_01037.dict_tuple (0,1.1) Click North 422 -dictGet test_01037.dict_tuple (0,2) Click North 422 -dictGet test_01037.dict_tuple (0.1,0) Click East 421 -dictGet test_01037.dict_tuple (0.99,2.99) Click North 422 -dictGet test_01037.dict_tuple (1,0) Click East 421 -dictGet test_01037.dict_tuple (3,3) House 314159 -dictGet test_01037.dict_tuple (5,6) Click 42 -dictGet test_01037.dict_tuple (7.01,7.01) qqq 101 -dictGetOrDefault test_01037.dict_tuple (-100,-42) www 1234 -dictGetOrDefault test_01037.dict_tuple (-1,0) Click South 423 -dictGetOrDefault test_01037.dict_tuple (-0.1,0) Click South 423 -dictGetOrDefault test_01037.dict_tuple (0,-2) Click West 424 -dictGetOrDefault test_01037.dict_tuple (0,-1.1) Click West 424 -dictGetOrDefault test_01037.dict_tuple (0,1.1) Click North 422 -dictGetOrDefault test_01037.dict_tuple (0,2) Click North 422 -dictGetOrDefault test_01037.dict_tuple (0.1,0) Click East 421 -dictGetOrDefault test_01037.dict_tuple (0.99,2.99) Click North 422 -dictGetOrDefault test_01037.dict_tuple (1,0) Click East 421 -dictGetOrDefault test_01037.dict_tuple (3,3) House 314159 -dictGetOrDefault test_01037.dict_tuple (5,6) Click 42 -dictGetOrDefault test_01037.dict_tuple (7.01,7.01) www 1234 -dictGetOrDefault test_01037.dict_tuple (-100,-42) dd 44 -dictGetOrDefault test_01037.dict_tuple (-1,0) Click South 423 -dictGetOrDefault test_01037.dict_tuple (-0.1,0) Click South 423 -dictGetOrDefault test_01037.dict_tuple (0,-2) Click West 424 -dictGetOrDefault test_01037.dict_tuple (0,-1.1) Click West 424 -dictGetOrDefault test_01037.dict_tuple (0,1.1) Click North 422 -dictGetOrDefault test_01037.dict_tuple (0,2) Click North 422 -dictGetOrDefault test_01037.dict_tuple (0.1,0) Click East 421 -dictGetOrDefault test_01037.dict_tuple (0.99,2.99) Click North 422 -dictGetOrDefault test_01037.dict_tuple (1,0) Click East 421 -dictGetOrDefault test_01037.dict_tuple (3,3) House 314159 -dictGetOrDefault test_01037.dict_tuple (5,6) Click 42 -dictGetOrDefault test_01037.dict_tuple (7.01,7.01) ee 55 -dictHas test_01037.dict_array (-100,-42) 0 -dictHas test_01037.dict_array (-1,0) 1 -dictHas test_01037.dict_array (-0.1,0) 1 -dictHas test_01037.dict_array (0,-2) 1 -dictHas test_01037.dict_array (0,-1.1) 1 -dictHas test_01037.dict_array (0,1.1) 1 -dictHas test_01037.dict_array (0,2) 1 -dictHas test_01037.dict_array (0.1,0) 1 -dictHas test_01037.dict_array (0.99,2.99) 1 -dictHas test_01037.dict_array (1,0) 1 -dictHas test_01037.dict_array (3,3) 1 -dictHas test_01037.dict_array (5,6) 1 -dictHas test_01037.dict_array (7.01,7.01) 0 -dictHas test_01037.dict_tuple (-100,-42) 0 -dictHas test_01037.dict_tuple (-1,0) 1 -dictHas test_01037.dict_tuple (-0.1,0) 1 -dictHas test_01037.dict_tuple (0,-2) 1 -dictHas test_01037.dict_tuple (0,-1.1) 1 -dictHas test_01037.dict_tuple (0,1.1) 1 -dictHas test_01037.dict_tuple (0,2) 1 -dictHas test_01037.dict_tuple (0.1,0) 1 -dictHas test_01037.dict_tuple (0.99,2.99) 1 -dictHas test_01037.dict_tuple (1,0) 1 -dictHas test_01037.dict_tuple (3,3) 1 -dictHas test_01037.dict_tuple (5,6) 1 -dictHas test_01037.dict_tuple (7.01,7.01) 0 +dictGet dict_array (-100,-42) qqq 101 +dictGet dict_array (-1,0) Click South 423 +dictGet dict_array (-0.1,0) Click South 423 +dictGet dict_array (0,-2) Click West 424 +dictGet dict_array (0,-1.1) Click West 424 +dictGet dict_array (0,1.1) Click North 422 +dictGet dict_array (0,2) Click North 422 +dictGet dict_array (0.1,0) Click East 421 +dictGet dict_array (0.99,2.99) Click North 422 +dictGet dict_array (1,0) Click East 421 +dictGet dict_array (3,3) House 314159 +dictGet dict_array (5,6) Click 42 +dictGet dict_array (7.01,7.01) qqq 101 +dictGetOrDefault dict_array (-100,-42) www 1234 +dictGetOrDefault dict_array (-1,0) Click South 423 +dictGetOrDefault dict_array (-0.1,0) Click South 423 +dictGetOrDefault dict_array (0,-2) Click West 424 +dictGetOrDefault dict_array (0,-1.1) Click West 424 +dictGetOrDefault dict_array (0,1.1) Click North 422 +dictGetOrDefault dict_array (0,2) Click North 422 +dictGetOrDefault dict_array (0.1,0) Click East 421 +dictGetOrDefault dict_array (0.99,2.99) Click North 422 +dictGetOrDefault dict_array (1,0) Click East 421 +dictGetOrDefault dict_array (3,3) House 314159 +dictGetOrDefault dict_array (5,6) Click 42 +dictGetOrDefault dict_array (7.01,7.01) www 1234 +dictGetOrDefault dict_array (-100,-42) dd 44 +dictGetOrDefault dict_array (-1,0) Click South 423 +dictGetOrDefault dict_array (-0.1,0) Click South 423 +dictGetOrDefault dict_array (0,-2) Click West 424 +dictGetOrDefault dict_array (0,-1.1) Click West 424 +dictGetOrDefault dict_array (0,1.1) Click North 422 +dictGetOrDefault dict_array (0,2) Click North 422 +dictGetOrDefault dict_array (0.1,0) Click East 421 +dictGetOrDefault dict_array (0.99,2.99) Click North 422 +dictGetOrDefault dict_array (1,0) Click East 421 +dictGetOrDefault dict_array (3,3) House 314159 +dictGetOrDefault dict_array (5,6) Click 42 +dictGetOrDefault dict_array (7.01,7.01) ee 55 +dictGet dict_tuple (-100,-42) qqq 101 +dictGet dict_tuple (-1,0) Click South 423 +dictGet dict_tuple (-0.1,0) Click South 423 +dictGet dict_tuple (0,-2) Click West 424 +dictGet dict_tuple (0,-1.1) Click West 424 +dictGet dict_tuple (0,1.1) Click North 422 +dictGet dict_tuple (0,2) Click North 422 +dictGet dict_tuple (0.1,0) Click East 421 +dictGet dict_tuple (0.99,2.99) Click North 422 +dictGet dict_tuple (1,0) Click East 421 +dictGet dict_tuple (3,3) House 314159 +dictGet dict_tuple (5,6) Click 42 +dictGet dict_tuple (7.01,7.01) qqq 101 +dictGetOrDefault dict_tuple (-100,-42) www 1234 +dictGetOrDefault dict_tuple (-1,0) Click South 423 +dictGetOrDefault dict_tuple (-0.1,0) Click South 423 +dictGetOrDefault dict_tuple (0,-2) Click West 424 +dictGetOrDefault dict_tuple (0,-1.1) Click West 424 +dictGetOrDefault dict_tuple (0,1.1) Click North 422 +dictGetOrDefault dict_tuple (0,2) Click North 422 +dictGetOrDefault dict_tuple (0.1,0) Click East 421 +dictGetOrDefault dict_tuple (0.99,2.99) Click North 422 +dictGetOrDefault dict_tuple (1,0) Click East 421 +dictGetOrDefault dict_tuple (3,3) House 314159 +dictGetOrDefault dict_tuple (5,6) Click 42 +dictGetOrDefault dict_tuple (7.01,7.01) www 1234 +dictGetOrDefault dict_tuple (-100,-42) dd 44 +dictGetOrDefault dict_tuple (-1,0) Click South 423 +dictGetOrDefault dict_tuple (-0.1,0) Click South 423 +dictGetOrDefault dict_tuple (0,-2) Click West 424 +dictGetOrDefault dict_tuple (0,-1.1) Click West 424 +dictGetOrDefault dict_tuple (0,1.1) Click North 422 +dictGetOrDefault dict_tuple (0,2) Click North 422 +dictGetOrDefault dict_tuple (0.1,0) Click East 421 +dictGetOrDefault dict_tuple (0.99,2.99) Click North 422 +dictGetOrDefault dict_tuple (1,0) Click East 421 +dictGetOrDefault dict_tuple (3,3) House 314159 +dictGetOrDefault dict_tuple (5,6) Click 42 +dictGetOrDefault dict_tuple (7.01,7.01) ee 55 +dictHas dict_array (-100,-42) 0 +dictHas dict_array (-1,0) 1 +dictHas dict_array (-0.1,0) 1 +dictHas dict_array (0,-2) 1 +dictHas dict_array (0,-1.1) 1 +dictHas dict_array (0,1.1) 1 +dictHas dict_array (0,2) 1 +dictHas dict_array (0.1,0) 1 +dictHas dict_array (0.99,2.99) 1 +dictHas dict_array (1,0) 1 +dictHas dict_array (3,3) 1 +dictHas dict_array (5,6) 1 +dictHas dict_array (7.01,7.01) 0 +dictHas dict_tuple (-100,-42) 0 +dictHas dict_tuple (-1,0) 1 +dictHas dict_tuple (-0.1,0) 1 +dictHas dict_tuple (0,-2) 1 +dictHas dict_tuple (0,-1.1) 1 +dictHas dict_tuple (0,1.1) 1 +dictHas dict_tuple (0,2) 1 +dictHas dict_tuple (0.1,0) 1 +dictHas dict_tuple (0.99,2.99) 1 +dictHas dict_tuple (1,0) 1 +dictHas dict_tuple (3,3) 1 +dictHas dict_tuple (5,6) 1 +dictHas dict_tuple (7.01,7.01) 0 diff --git a/tests/queries/0_stateless/01037_polygon_dicts_simple_functions.sh b/tests/queries/0_stateless/01037_polygon_dicts_simple_functions.sh index be983ec1be4..efc66783d62 100755 --- a/tests/queries/0_stateless/01037_polygon_dicts_simple_functions.sh +++ b/tests/queries/0_stateless/01037_polygon_dicts_simple_functions.sh @@ -1,56 +1,52 @@ #!/usr/bin/env bash -# Tags: no-debug, no-parallel +# Tags: no-debug CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -TMP_DIR="/tmp" +TMP_DIR=${CLICKHOUSE_TMP}${CLICKHOUSE_DATABASE} +mkdir -p $TMP_DIR $CLICKHOUSE_CLIENT -n --query=" -DROP DATABASE IF EXISTS test_01037; +DROP TABLE IF EXISTS polygons_array; -CREATE DATABASE test_01037; +CREATE TABLE polygons_array (key Array(Array(Array(Array(Float64)))), name String, value UInt64) ENGINE = Memory; +INSERT INTO polygons_array VALUES ([[[[1, 3], [1, 1], [3, 1], [3, -1], [1, -1], [1, -3], [-1, -3], [-1, -1], [-3, -1], [-3, 1], [-1, 1], [-1, 3]]], [[[5, 5], [5, 1], [7, 1], [7, 7], [1, 7], [1, 5]]]], 'Click', 42); +INSERT INTO polygons_array VALUES ([[[[5, 5], [5, -5], [-5, -5], [-5, 5]], [[1, 3], [1, 1], [3, 1], [3, -1], [1, -1], [1, -3], [-1, -3], [-1, -1], [-3, -1], [-3, 1], [-1, 1], [-1, 3]]]], 'House', 314159); +INSERT INTO polygons_array VALUES ([[[[3, 1], [0, 1], [0, -1], [3, -1]]]], 'Click East', 421); +INSERT INTO polygons_array VALUES ([[[[-1, 1], [1, 1], [1, 3], [-1, 3]]]], 'Click North', 422); +INSERT INTO polygons_array VALUES ([[[[-3, 1], [-3, -1], [0, -1], [0, 1]]]], 'Click South', 423); +INSERT INTO polygons_array VALUES ([[[[-1, -1], [1, -1], [1, -3], [-1, -3]]]], 'Click West', 424); -DROP TABLE IF EXISTS test_01037.polygons_array; +DROP TABLE IF EXISTS polygons_tuple; -CREATE TABLE test_01037.polygons_array (key Array(Array(Array(Array(Float64)))), name String, value UInt64) ENGINE = Memory; -INSERT INTO test_01037.polygons_array VALUES ([[[[1, 3], [1, 1], [3, 1], [3, -1], [1, -1], [1, -3], [-1, -3], [-1, -1], [-3, -1], [-3, 1], [-1, 1], [-1, 3]]], [[[5, 5], [5, 1], [7, 1], [7, 7], [1, 7], [1, 5]]]], 'Click', 42); -INSERT INTO test_01037.polygons_array VALUES ([[[[5, 5], [5, -5], [-5, -5], [-5, 5]], [[1, 3], [1, 1], [3, 1], [3, -1], [1, -1], [1, -3], [-1, -3], [-1, -1], [-3, -1], [-3, 1], [-1, 1], [-1, 3]]]], 'House', 314159); -INSERT INTO test_01037.polygons_array VALUES ([[[[3, 1], [0, 1], [0, -1], [3, -1]]]], 'Click East', 421); -INSERT INTO test_01037.polygons_array VALUES ([[[[-1, 1], [1, 1], [1, 3], [-1, 3]]]], 'Click North', 422); -INSERT INTO test_01037.polygons_array VALUES ([[[[-3, 1], [-3, -1], [0, -1], [0, 1]]]], 'Click South', 423); -INSERT INTO test_01037.polygons_array VALUES ([[[[-1, -1], [1, -1], [1, -3], [-1, -3]]]], 'Click West', 424); +CREATE TABLE polygons_tuple (key Array(Array(Array(Tuple(Float64, Float64)))), name String, value UInt64) ENGINE = Memory; +INSERT INTO polygons_tuple VALUES ([[[(1, 3), (1, 1), (3, 1), (3, -1), (1, -1), (1, -3), (-1, -3), (-1, -1), (-3, -1), (-3, 1), (-1, 1), (-1, 3)]], [[(5, 5), (5, 1), (7, 1), (7, 7), (1, 7), (1, 5)]]], 'Click', 42); +INSERT INTO polygons_tuple VALUES ([[[(5, 5), (5, -5), (-5, -5), (-5, 5)], [(1, 3), (1, 1), (3, 1), (3, -1), (1, -1), (1, -3), (-1, -3), (-1, -1), (-3, -1), (-3, 1), (-1, 1), (-1, 3)]]], 'House', 314159); +INSERT INTO polygons_tuple VALUES ([[[(3, 1), (0, 1), (0, -1), (3, -1)]]], 'Click East', 421); +INSERT INTO polygons_tuple VALUES ([[[(-1, 1), (1, 1), (1, 3), (-1, 3)]]], 'Click North', 422); +INSERT INTO polygons_tuple VALUES ([[[(-3, 1), (-3, -1), (0, -1), (0, 1)]]], 'Click South', 423); +INSERT INTO polygons_tuple VALUES ([[[(-1, -1), (1, -1), (1, -3), (-1, -3)]]], 'Click West', 424); -DROP TABLE IF EXISTS test_01037.polygons_tuple; +DROP TABLE IF EXISTS points; -CREATE TABLE test_01037.polygons_tuple (key Array(Array(Array(Tuple(Float64, Float64)))), name String, value UInt64) ENGINE = Memory; -INSERT INTO test_01037.polygons_tuple VALUES ([[[(1, 3), (1, 1), (3, 1), (3, -1), (1, -1), (1, -3), (-1, -3), (-1, -1), (-3, -1), (-3, 1), (-1, 1), (-1, 3)]], [[(5, 5), (5, 1), (7, 1), (7, 7), (1, 7), (1, 5)]]], 'Click', 42); -INSERT INTO test_01037.polygons_tuple VALUES ([[[(5, 5), (5, -5), (-5, -5), (-5, 5)], [(1, 3), (1, 1), (3, 1), (3, -1), (1, -1), (1, -3), (-1, -3), (-1, -1), (-3, -1), (-3, 1), (-1, 1), (-1, 3)]]], 'House', 314159); -INSERT INTO test_01037.polygons_tuple VALUES ([[[(3, 1), (0, 1), (0, -1), (3, -1)]]], 'Click East', 421); -INSERT INTO test_01037.polygons_tuple VALUES ([[[(-1, 1), (1, 1), (1, 3), (-1, 3)]]], 'Click North', 422); -INSERT INTO test_01037.polygons_tuple VALUES ([[[(-3, 1), (-3, -1), (0, -1), (0, 1)]]], 'Click South', 423); -INSERT INTO test_01037.polygons_tuple VALUES ([[[(-1, -1), (1, -1), (1, -3), (-1, -3)]]], 'Click West', 424); - -DROP TABLE IF EXISTS test_01037.points; - -CREATE TABLE test_01037.points (x Float64, y Float64, def_i UInt64, def_s String) ENGINE = Memory; -INSERT INTO test_01037.points VALUES (0.1, 0.0, 112, 'aax'); -INSERT INTO test_01037.points VALUES (-0.1, 0.0, 113, 'aay'); -INSERT INTO test_01037.points VALUES (0.0, 1.1, 114, 'aaz'); -INSERT INTO test_01037.points VALUES (0.0, -1.1, 115, 'aat'); -INSERT INTO test_01037.points VALUES (3.0, 3.0, 22, 'bb'); -INSERT INTO test_01037.points VALUES (5.0, 6.0, 33, 'cc'); -INSERT INTO test_01037.points VALUES (-100.0, -42.0, 44, 'dd'); -INSERT INTO test_01037.points VALUES (7.01, 7.01, 55, 'ee'); -INSERT INTO test_01037.points VALUES (0.99, 2.99, 66, 'ee'); -INSERT INTO test_01037.points VALUES (1.0, 0.0, 771, 'ffa'); -INSERT INTO test_01037.points VALUES (-1.0, 0.0, 772, 'ffb'); -INSERT INTO test_01037.points VALUES (0.0, 2.0, 773, 'ffc'); -INSERT INTO test_01037.points VALUES (0.0, -2.0, 774, 'ffd'); +CREATE TABLE points (x Float64, y Float64, def_i UInt64, def_s String) ENGINE = Memory; +INSERT INTO points VALUES (0.1, 0.0, 112, 'aax'); +INSERT INTO points VALUES (-0.1, 0.0, 113, 'aay'); +INSERT INTO points VALUES (0.0, 1.1, 114, 'aaz'); +INSERT INTO points VALUES (0.0, -1.1, 115, 'aat'); +INSERT INTO points VALUES (3.0, 3.0, 22, 'bb'); +INSERT INTO points VALUES (5.0, 6.0, 33, 'cc'); +INSERT INTO points VALUES (-100.0, -42.0, 44, 'dd'); +INSERT INTO points VALUES (7.01, 7.01, 55, 'ee'); +INSERT INTO points VALUES (0.99, 2.99, 66, 'ee'); +INSERT INTO points VALUES (1.0, 0.0, 771, 'ffa'); +INSERT INTO points VALUES (-1.0, 0.0, 772, 'ffb'); +INSERT INTO points VALUES (0.0, 2.0, 773, 'ffc'); +INSERT INTO points VALUES (0.0, -2.0, 774, 'ffd'); " - declare -a SearchTypes=("POLYGON" "POLYGON_SIMPLE" "POLYGON_INDEX_EACH" "POLYGON_INDEX_CELL") for type in "${SearchTypes[@]}"; @@ -58,63 +54,62 @@ do outputFile="${TMP_DIR}/results${type}.out" $CLICKHOUSE_CLIENT -n --query=" - DROP DICTIONARY IF EXISTS test_01037.dict_array; - CREATE DICTIONARY test_01037.dict_array + DROP DICTIONARY IF EXISTS dict_array; + CREATE DICTIONARY dict_array ( key Array(Array(Array(Array(Float64)))), name String DEFAULT 'qqq', value UInt64 DEFAULT 101 ) PRIMARY KEY key - SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'polygons_array' PASSWORD '' DB 'test_01037')) + SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'polygons_array' PASSWORD '' DB currentDatabase())) LIFETIME(0) LAYOUT($type()); - DROP DICTIONARY IF EXISTS test_01037.dict_tuple; + DROP DICTIONARY IF EXISTS dict_tuple; - CREATE DICTIONARY test_01037.dict_tuple + CREATE DICTIONARY dict_tuple ( key Array(Array(Array(Tuple(Float64, Float64)))), name String DEFAULT 'qqq', value UInt64 DEFAULT 101 ) PRIMARY KEY key - SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'polygons_tuple' PASSWORD '' DB 'test_01037')) + SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'polygons_tuple' PASSWORD '' DB currentDatabase())) LIFETIME(0) LAYOUT($type()); - select 'dictGet', 'test_01037.dict_array' as dict_name, tuple(x, y) as key, + select 'dictGet', 'dict_array' as dict_name, tuple(x, y) as key, dictGet(dict_name, 'name', key), - dictGet(dict_name, 'value', key) from test_01037.points order by x, y; - select 'dictGetOrDefault', 'test_01037.dict_array' as dict_name, tuple(x, y) as key, + dictGet(dict_name, 'value', key) from points order by x, y; + select 'dictGetOrDefault', 'dict_array' as dict_name, tuple(x, y) as key, dictGetOrDefault(dict_name, 'name', key, 'www'), - dictGetOrDefault(dict_name, 'value', key, toUInt64(1234)) from test_01037.points order by x, y; - select 'dictGetOrDefault', 'test_01037.dict_array' as dict_name, tuple(x, y) as key, + dictGetOrDefault(dict_name, 'value', key, toUInt64(1234)) from points order by x, y; + select 'dictGetOrDefault', 'dict_array' as dict_name, tuple(x, y) as key, dictGetOrDefault(dict_name, 'name', key, def_s), - dictGetOrDefault(dict_name, 'value', key, def_i) from test_01037.points order by x, y; - select 'dictGet', 'test_01037.dict_tuple' as dict_name, tuple(x, y) as key, + dictGetOrDefault(dict_name, 'value', key, def_i) from points order by x, y; + select 'dictGet', 'dict_tuple' as dict_name, tuple(x, y) as key, dictGet(dict_name, 'name', key), - dictGet(dict_name, 'value', key) from test_01037.points order by x, y; - select 'dictGetOrDefault', 'test_01037.dict_tuple' as dict_name, tuple(x, y) as key, + dictGet(dict_name, 'value', key) from points order by x, y; + select 'dictGetOrDefault', 'dict_tuple' as dict_name, tuple(x, y) as key, dictGetOrDefault(dict_name, 'name', key, 'www'), - dictGetOrDefault(dict_name, 'value', key, toUInt64(1234)) from test_01037.points order by x, y; - select 'dictGetOrDefault', 'test_01037.dict_tuple' as dict_name, tuple(x, y) as key, + dictGetOrDefault(dict_name, 'value', key, toUInt64(1234)) from points order by x, y; + select 'dictGetOrDefault', 'dict_tuple' as dict_name, tuple(x, y) as key, dictGetOrDefault(dict_name, 'name', key, def_s), - dictGetOrDefault(dict_name, 'value', key, def_i) from test_01037.points order by x, y; - select 'dictHas', 'test_01037.dict_array' as dict_name, tuple(x, y) as key, - dictHas(dict_name, key) from test_01037.points order by x, y; - select 'dictHas', 'test_01037.dict_tuple' as dict_name, tuple(x, y) as key, - dictHas(dict_name, key) from test_01037.points order by x, y; + dictGetOrDefault(dict_name, 'value', key, def_i) from points order by x, y; + select 'dictHas', 'dict_array' as dict_name, tuple(x, y) as key, + dictHas(dict_name, key) from points order by x, y; + select 'dictHas', 'dict_tuple' as dict_name, tuple(x, y) as key, + dictHas(dict_name, key) from points order by x, y; " > "$outputFile" diff -q "${CURDIR}/01037_polygon_dicts_simple_functions.ans" "$outputFile" done $CLICKHOUSE_CLIENT -n --query=" -DROP DICTIONARY test_01037.dict_array; -DROP DICTIONARY test_01037.dict_tuple; -DROP TABLE test_01037.polygons_array; -DROP TABLE test_01037.polygons_tuple; -DROP TABLE test_01037.points; -DROP DATABASE test_01037; +DROP DICTIONARY dict_array; +DROP DICTIONARY dict_tuple; +DROP TABLE polygons_array; +DROP TABLE polygons_tuple; +DROP TABLE points; " From b38928b09f75bc88c722c10ba401623a880a7b70 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Fri, 12 Jul 2024 19:25:32 +0200 Subject: [PATCH 271/299] Update 00992_system_parts_race_condition_zookeeper_long.sh --- .../00992_system_parts_race_condition_zookeeper_long.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/00992_system_parts_race_condition_zookeeper_long.sh b/tests/queries/0_stateless/00992_system_parts_race_condition_zookeeper_long.sh index d45cc3a6871..4887c409844 100755 --- a/tests/queries/0_stateless/00992_system_parts_race_condition_zookeeper_long.sh +++ b/tests/queries/0_stateless/00992_system_parts_race_condition_zookeeper_long.sh @@ -41,7 +41,7 @@ function thread3() function thread4() { - while true; do $CLICKHOUSE_CLIENT -q "OPTIMIZE TABLE alter_table0 FINAL"; done + while true; do $CLICKHOUSE_CLIENT --receive_timeout=3 -q "OPTIMIZE TABLE alter_table0 FINAL" | grep -Fv "Timeout exceeded while receiving data from server"; done } function thread5() From 8d07c522f155b3b6ec9760b9ecef28c90598dd1f Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Fri, 12 Jul 2024 17:13:33 +0100 Subject: [PATCH 272/299] reduce amount of parallel linker jobs further --- cmake/limit_jobs.cmake | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/cmake/limit_jobs.cmake b/cmake/limit_jobs.cmake index 3a759b90fe3..17d8dd42a2c 100644 --- a/cmake/limit_jobs.cmake +++ b/cmake/limit_jobs.cmake @@ -44,8 +44,13 @@ endif () # - and I've verfied that lld-11 does not use all available CPU time (in peak) while linking one binary if (CMAKE_BUILD_TYPE_UC STREQUAL "RELWITHDEBINFO" AND ENABLE_THINLTO) if (ARCH_AARCH64) + # aarch64 builds start to often fail with OOMs (reason not yet clear), for now let's limit the concurrency message(STATUS "ThinLTO provides its own parallel linking - limiting parallel link jobs to 1.") set (PARALLEL_LINK_JOBS 1) + if (LINKER_NAME MATCHES "lld") + math(EXPR LTO_JOBS ${NUMBER_OF_LOGICAL_CORES}/4) + set (CMAKE_EXE_LINKER_FLAGS_RELWITHDEBINFO "${CMAKE_EXE_LINKER_FLAGS_RELWITHDEBINFO} -Wl,--thinlto-jobs=${LTO_JOBS}") + endif() elseif (PARALLEL_LINK_JOBS GREATER 2) message(STATUS "ThinLTO provides its own parallel linking - limiting parallel link jobs to 2.") set (PARALLEL_LINK_JOBS 2) From 0bb3d07e8ed0f5e7005186e47446d57f8fea0e9f Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Fri, 12 Jul 2024 20:12:39 +0200 Subject: [PATCH 273/299] fix --- src/Processors/Formats/Impl/NpyRowInputFormat.cpp | 3 +++ tests/queries/0_stateless/02895_npy_format.reference | 12 ++++++------ tests/queries/0_stateless/02895_npy_format.sh | 12 ++++++------ 3 files changed, 15 insertions(+), 12 deletions(-) diff --git a/src/Processors/Formats/Impl/NpyRowInputFormat.cpp b/src/Processors/Formats/Impl/NpyRowInputFormat.cpp index 65e0f9dd192..773cbc9268e 100644 --- a/src/Processors/Formats/Impl/NpyRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/NpyRowInputFormat.cpp @@ -445,6 +445,9 @@ bool NpyRowInputFormat::readRow(MutableColumns & columns, RowReadExtension & /* elements_in_current_column *= header.shape[i]; } + if (typeid_cast(current_column)) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unexpected nesting level of column '{}', expected {}", column->getName(), header.shape.size() - 1); + for (size_t i = 0; i != elements_in_current_column; ++i) readValue(current_column); diff --git a/tests/queries/0_stateless/02895_npy_format.reference b/tests/queries/0_stateless/02895_npy_format.reference index f9e77644a35..52972f0acbd 100644 --- a/tests/queries/0_stateless/02895_npy_format.reference +++ b/tests/queries/0_stateless/02895_npy_format.reference @@ -85,12 +85,12 @@ c [4,5,6] [[1,2],[3,4]] [[5,6],[7,8]] -0 -0 -0 -0 -0 -0 +1 +1 +1 +1 +1 +1 1 [2.199219,1.099609,3.300781] [4.25,3.34961,6.628906] diff --git a/tests/queries/0_stateless/02895_npy_format.sh b/tests/queries/0_stateless/02895_npy_format.sh index 9d05303a091..194b2bc1fe4 100755 --- a/tests/queries/0_stateless/02895_npy_format.sh +++ b/tests/queries/0_stateless/02895_npy_format.sh @@ -52,14 +52,14 @@ $CLICKHOUSE_LOCAL -q "select * from file('$CURDIR/data_npy/two_dim.npy', Npy, 'v $CLICKHOUSE_LOCAL -q "select * from file('$CURDIR/data_npy/three_dim.npy', Npy, 'value Array(Array(Int8))')" $CLICKHOUSE_LOCAL -q "select * from file('$CURDIR/data_npy/one_dim_float.npy', Npy, 'value Array(Float32)')" 2>&1 | grep -c "BAD_ARGUMENTS" -$CLICKHOUSE_LOCAL -q "select * from file('$CURDIR/data_npy/one_dim_float.npy', Npy, 'value UUID')" 2>&1 | grep -c "BAD_ARGUMENTS" -$CLICKHOUSE_LOCAL -q "select * from file('$CURDIR/data_npy/one_dim_float.npy', Npy, 'value Tuple(UInt8)')" 2>&1 | grep -c "BAD_ARGUMENTS" +$CLICKHOUSE_LOCAL -q "select * from file('$CURDIR/data_npy/one_dim_float.npy', Npy, 'value UUID')" 2>&1 | grep -c "UNKNOWN_TYPE" +$CLICKHOUSE_LOCAL -q "select * from file('$CURDIR/data_npy/one_dim_float.npy', Npy, 'value Tuple(UInt8)')" 2>&1 | grep -c "UNKNOWN_TYPE" -$CLICKHOUSE_LOCAL -q "select * from file('$CURDIR/data_npy/one_dim_float.npy', Npy, 'value Int8')" 2>&1 | grep -c "BAD_ARGUMENTS" -$CLICKHOUSE_LOCAL -q "select * from file('$CURDIR/data_npy/one_dim_str.npy', Npy, 'value Int8')" 2>&1 | grep -c "BAD_ARGUMENTS" -$CLICKHOUSE_LOCAL -q "select * from file('$CURDIR/data_npy/one_dim_unicode.npy', Npy, 'value Float32')" 2>&1 | grep -c "BAD_ARGUMENTS" +$CLICKHOUSE_LOCAL -q "select * from file('$CURDIR/data_npy/one_dim_float.npy', Npy, 'value Int8')" 2>&1 | grep -c "ILLEGAL_COLUMN" +$CLICKHOUSE_LOCAL -q "select * from file('$CURDIR/data_npy/one_dim_str.npy', Npy, 'value Int8')" 2>&1 | grep -c "ILLEGAL_COLUMN" +$CLICKHOUSE_LOCAL -q "select * from file('$CURDIR/data_npy/one_dim_unicode.npy', Npy, 'value Float32')" 2>&1 | grep -c "ILLEGAL_COLUMN" -$CLICKHOUSE_LOCAL -q "select * from file('$CURDIR/data_npy/complex.npy')" 2>&1 | grep -c "BAD_ARGUMENTS" +$CLICKHOUSE_LOCAL -q "select * from file('$CURDIR/data_npy/complex.npy')" 2>&1 | grep -c "CANNOT_EXTRACT_TABLE_STRUCTURE" $CLICKHOUSE_LOCAL -q "select * from file('$CURDIR/data_npy/float_16.npy')" From 5b1e9bebe47e6b6971e6432f788e9ad9ce1c5f2b Mon Sep 17 00:00:00 2001 From: Max K Date: Fri, 12 Jul 2024 18:19:30 +0200 Subject: [PATCH 274/299] change thresholds --- tests/ci/ci_config.py | 4 ++-- tests/ci/merge_pr.py | 3 ++- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/tests/ci/ci_config.py b/tests/ci/ci_config.py index 9a9aa553e1b..d9f8e7d3afd 100644 --- a/tests/ci/ci_config.py +++ b/tests/ci/ci_config.py @@ -13,8 +13,8 @@ class CI: each config item in the below dicts should be an instance of JobConfig class or inherited from it """ - MAX_TOTAL_FAILURES_BEFORE_BLOCKING_CI = 2 - MAX_TOTAL_FAILURES_PER_JOB_BEFORE_BLOCKING_CI = 1 + MAX_TOTAL_FAILURES_BEFORE_BLOCKING_CI = 5 + MAX_TOTAL_FAILURES_PER_JOB_BEFORE_BLOCKING_CI = 2 # reimport types to CI class so that they visible as CI.* and mypy is happy # pylint:disable=useless-import-alias,reimported,import-outside-toplevel diff --git a/tests/ci/merge_pr.py b/tests/ci/merge_pr.py index 6b437731561..061376fc856 100644 --- a/tests/ci/merge_pr.py +++ b/tests/ci/merge_pr.py @@ -291,13 +291,14 @@ def main(): ) can_continue = False if failed_to_get_info: - print(f"Unexpected commit status state - block further testing") + print("Unexpected commit status state - block further testing") can_continue = False if args.wf_status != SUCCESS: can_continue = False print("Workflow has failures - block further testing") if args.wf_status == "success" or has_failed_statuses: + # do not set mergeable check status if args.wf_status == failure, apparently it has died runners and is to be restarted state = trigger_mergeable_check( commit, statuses, From 1495ef32180625b743f9f01cd86b4a257ae96ff0 Mon Sep 17 00:00:00 2001 From: Max K Date: Fri, 12 Jul 2024 20:32:01 +0200 Subject: [PATCH 275/299] CI: Set error status for job with OOM --- pyproject.toml | 1 + tests/ci/ci.py | 23 +++++++++++++++++++++-- 2 files changed, 22 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 90f089afa41..39511e1a0d3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -36,6 +36,7 @@ disable = ''' bare-except, no-else-return, global-statement, + f-string-without-interpolation, ''' [tool.pylint.SIMILARITIES] diff --git a/tests/ci/ci.py b/tests/ci/ci.py index 32b87698395..9f4a98114c5 100644 --- a/tests/ci/ci.py +++ b/tests/ci/ci.py @@ -1125,6 +1125,7 @@ def main() -> int: ### POST action: start elif args.post: + has_oom_error = False if Shell.check( "sudo dmesg -T | grep -q -e 'Out of memory: Killed process' -e 'oom_reaper: reaped process' -e 'oom-kill:constraint=CONSTRAINT_NONE'" ): @@ -1132,6 +1133,7 @@ def main() -> int: CIBuddy(dry_run=not pr_info.is_release).post_error( "Out Of Memory", job_name=_get_ext_check_name(args.job_name) ) + has_oom_error = True job_report = JobReport.load() if JobReport.exist() else None if job_report: @@ -1235,8 +1237,25 @@ def main() -> int: ch_helper, ) else: - # no job report - print(f"No job report for {[args.job_name]} - do nothing") + if CI.is_test_job(args.job_name): + if has_oom_error: + description = "ERROR: Out Of Memory" + else: + description = "ERROR: Unknown job status" + gh = GitHub(get_best_robot_token(), per_page=100) + commit = get_commit(gh, pr_info.sha) + post_commit_status( + commit, + ERROR, + "", + description, + job_report.check_name or _get_ext_check_name(args.job_name), + pr_info, + dump_to_file=True, + ) + else: + # no job report + print(f"No job report for {[args.job_name]} - do nothing") ### POST action: end ### MARK SUCCESS action: start From 2dc7d1f510dfc7a4719835a31acf9a9a47a8c1dc Mon Sep 17 00:00:00 2001 From: Nikita Fomichev Date: Fri, 12 Jul 2024 21:09:10 +0200 Subject: [PATCH 276/299] Stateless tests: fix flaky tests 2 --- .../0_stateless/01037_polygon_dicts_correctness_all.sh | 5 +++-- .../0_stateless/01037_polygon_dicts_correctness_fast.sh | 5 +++-- .../0_stateless/01037_polygon_dicts_simple_functions.sh | 2 +- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/tests/queries/0_stateless/01037_polygon_dicts_correctness_all.sh b/tests/queries/0_stateless/01037_polygon_dicts_correctness_all.sh index 39f235d9966..9a26f78a8ee 100755 --- a/tests/queries/0_stateless/01037_polygon_dicts_correctness_all.sh +++ b/tests/queries/0_stateless/01037_polygon_dicts_correctness_all.sh @@ -5,12 +5,13 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -TMP_DIR=${CLICKHOUSE_TMP}${CLICKHOUSE_DATABASE} +TMP_DIR=${CLICKHOUSE_TMP}/tmp +DATA_DIR=${CLICKHOUSE_TMP}/data mkdir -p $TMP_DIR +mkdir -p $DATA_DIR declare -a SearchTypes=("POLYGON" "POLYGON_SIMPLE" "POLYGON_INDEX_EACH" "POLYGON_INDEX_CELL") -DATA_DIR=${CURDIR}/${CLICKHOUSE_DATABASE} tar -xf "${CURDIR}"/01037_test_data_search.tar.gz -C "${DATA_DIR}" $CLICKHOUSE_CLIENT -n --query=" diff --git a/tests/queries/0_stateless/01037_polygon_dicts_correctness_fast.sh b/tests/queries/0_stateless/01037_polygon_dicts_correctness_fast.sh index 3e461abcefe..47f7a5c1c4f 100755 --- a/tests/queries/0_stateless/01037_polygon_dicts_correctness_fast.sh +++ b/tests/queries/0_stateless/01037_polygon_dicts_correctness_fast.sh @@ -5,12 +5,13 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -TMP_DIR=${CLICKHOUSE_TMP}${CLICKHOUSE_DATABASE} +TMP_DIR=${CLICKHOUSE_TMP}/tmp +DATA_DIR=${CLICKHOUSE_TMP}/data mkdir -p $TMP_DIR +mkdir -p $DATA_DIR declare -a SearchTypes=("POLYGON_INDEX_EACH" "POLYGON_INDEX_CELL") -DATA_DIR=${CURDIR}/${CLICKHOUSE_DATABASE} tar -xf "${CURDIR}"/01037_test_data_perf.tar.gz -C "${DATA_DIR}" $CLICKHOUSE_CLIENT -n --query=" diff --git a/tests/queries/0_stateless/01037_polygon_dicts_simple_functions.sh b/tests/queries/0_stateless/01037_polygon_dicts_simple_functions.sh index efc66783d62..d1ee3f283bc 100755 --- a/tests/queries/0_stateless/01037_polygon_dicts_simple_functions.sh +++ b/tests/queries/0_stateless/01037_polygon_dicts_simple_functions.sh @@ -5,7 +5,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -TMP_DIR=${CLICKHOUSE_TMP}${CLICKHOUSE_DATABASE} +TMP_DIR=${CLICKHOUSE_TMP}/tmp mkdir -p $TMP_DIR $CLICKHOUSE_CLIENT -n --query=" From cc2cce97177552dcf82682f06418ffa3388760c1 Mon Sep 17 00:00:00 2001 From: Nikita Fomichev Date: Sat, 13 Jul 2024 00:34:54 +0200 Subject: [PATCH 277/299] Stateless tests: fix flaky tests 3 --- tests/clickhouse-test | 2 +- tests/queries/0_stateless/02834_apache_arrow_abort.sql | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/clickhouse-test b/tests/clickhouse-test index 79f6b5d71d3..bc30b3c21b7 100755 --- a/tests/clickhouse-test +++ b/tests/clickhouse-test @@ -711,7 +711,7 @@ def get_localzone(): class SettingsRandomizer: settings = { - "max_insert_threads": lambda: 32 + "max_insert_threads": lambda: 12 if random.random() < 0.03 else random.randint(1, 3), "group_by_two_level_threshold": threshold_generator(0.2, 0.2, 1, 1000000), diff --git a/tests/queries/0_stateless/02834_apache_arrow_abort.sql b/tests/queries/0_stateless/02834_apache_arrow_abort.sql index bd29e95db9a..47e1c5d3951 100644 --- a/tests/queries/0_stateless/02834_apache_arrow_abort.sql +++ b/tests/queries/0_stateless/02834_apache_arrow_abort.sql @@ -1,4 +1,4 @@ -- Tags: no-fasttest -- This tests depends on internet access, but it does not matter, because it only has to check that there is no abort due to a bug in Apache Arrow library. - +SET optimize_trivial_insert_select=1; INSERT INTO TABLE FUNCTION url('https://clickhouse-public-datasets.s3.amazonaws.com/hits_compatible/athena_partitioned/hits_9.parquet') SELECT * FROM url('https://clickhouse-public-datasets.s3.amazonaws.com/hits_compatible/athena_partitioned/hits_9.parquet'); -- { serverError CANNOT_WRITE_TO_OSTREAM, RECEIVED_ERROR_FROM_REMOTE_IO_SERVER, POCO_EXCEPTION } From 8295a8e9b8360eec0128078fb05f6c3d50ce3b97 Mon Sep 17 00:00:00 2001 From: Nikita Fomichev Date: Sat, 13 Jul 2024 00:39:53 +0200 Subject: [PATCH 278/299] Stateless tests: fix flaky tests 4 --- tests/clickhouse-test | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/clickhouse-test b/tests/clickhouse-test index bc30b3c21b7..0cf46732354 100755 --- a/tests/clickhouse-test +++ b/tests/clickhouse-test @@ -729,7 +729,7 @@ class SettingsRandomizer: "prefer_localhost_replica": lambda: random.randint(0, 1), "max_block_size": lambda: random.randint(8000, 100000), "max_joined_block_size_rows": lambda: random.randint(8000, 100000), - "max_threads": lambda: 64 if random.random() < 0.03 else random.randint(1, 3), + "max_threads": lambda: 32 if random.random() < 0.03 else random.randint(1, 3), "optimize_append_index": lambda: random.randint(0, 1), "optimize_if_chain_to_multiif": lambda: random.randint(0, 1), "optimize_if_transform_strings_to_enum": lambda: random.randint(0, 1), From 04525888f5db6f2c0e61e170cab5ad57626fbf17 Mon Sep 17 00:00:00 2001 From: Max K Date: Sat, 13 Jul 2024 11:55:25 +0200 Subject: [PATCH 279/299] fix for failed workflow status --- tests/ci/merge_pr.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/tests/ci/merge_pr.py b/tests/ci/merge_pr.py index 061376fc856..6fb6821ede4 100644 --- a/tests/ci/merge_pr.py +++ b/tests/ci/merge_pr.py @@ -293,11 +293,14 @@ def main(): if failed_to_get_info: print("Unexpected commit status state - block further testing") can_continue = False - if args.wf_status != SUCCESS: + if args.wf_status != SUCCESS and not has_failed_statuses: + # workflow failed but reason is unknown as no failed statuses present can_continue = False - print("Workflow has failures - block further testing") + print( + "WARNING: Either the runner is faulty or the operating status is unknown. The first is self-healing, the second requires investigation." + ) - if args.wf_status == "success" or has_failed_statuses: + if args.wf_status == SUCCESS or has_failed_statuses: # do not set mergeable check status if args.wf_status == failure, apparently it has died runners and is to be restarted state = trigger_mergeable_check( commit, From 8706145c467852e7d4b84e5a9823050b8de3e085 Mon Sep 17 00:00:00 2001 From: Max K Date: Sat, 13 Jul 2024 12:17:03 +0200 Subject: [PATCH 280/299] fix for not success status in Sync --- tests/ci/merge_pr.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tests/ci/merge_pr.py b/tests/ci/merge_pr.py index 6fb6821ede4..59749abb4fa 100644 --- a/tests/ci/merge_pr.py +++ b/tests/ci/merge_pr.py @@ -272,7 +272,11 @@ def main(): job_name_with_max_failures = status.context max_failed_tests_per_job = failed_cnt total_failed_tests += failed_cnt - elif status.state != SUCCESS: + elif status.state != SUCCESS and status.context not in ( + CI.StatusNames.SYNC, + CI.StatusNames.PR_CHECK, + ): + # do not block CI on failures in (CI.StatusNames.SYNC, CI.StatusNames.PR_CHECK) has_failed_statuses = True print( f"Unexpected status for [{status.context}]: [{status.state}] - block further testing" From 11f3e406c6ab040cc42d209ac2471406367f577c Mon Sep 17 00:00:00 2001 From: Max K Date: Sat, 13 Jul 2024 12:48:48 +0200 Subject: [PATCH 281/299] CI: Cache AST fuzzers (run always) jobs in CI --- tests/ci/ci.py | 4 ++-- tests/ci/ci_cache.py | 4 ++-- tests/ci/ci_definitions.py | 9 ++++++++- 3 files changed, 12 insertions(+), 5 deletions(-) diff --git a/tests/ci/ci.py b/tests/ci/ci.py index 32b87698395..57552985f62 100644 --- a/tests/ci/ci.py +++ b/tests/ci/ci.py @@ -325,8 +325,8 @@ def _mark_success_action( # do nothing, exit without failure print(f"ERROR: no status file for job [{job}]") - if job_config.run_always or job_config.run_by_label: - print(f"Job [{job}] runs always or by label in CI - do not cache") + if job_config.run_by_label or not job_config.has_digest(): + print(f"Job [{job}] has no digest or run by label in CI - do not cache") else: if pr_info.is_master: pass diff --git a/tests/ci/ci_cache.py b/tests/ci/ci_cache.py index 291ed56aeea..bc6761959b4 100644 --- a/tests/ci/ci_cache.py +++ b/tests/ci/ci_cache.py @@ -609,7 +609,7 @@ class CiCache: pushes pending records for all jobs that supposed to be run """ for job, job_config in self.jobs_to_do.items(): - if job_config.run_always: + if not job_config.has_digest(): continue pending_state = PendingState(time.time(), run_url=GITHUB_RUN_URL) assert job_config.batches @@ -680,7 +680,7 @@ class CiCache: It removes jobs from @jobs_to_do if it is a: 1. test job and it is in @jobs_to_wait (no need to wait not affected jobs in PRs) 2. test job and it has finished on release branch (even if failed) - 2. build job which is not required by any test job that is left in @jobs_to_do + 3. build job which is not required by any test job that is left in @jobs_to_do :return: """ diff --git a/tests/ci/ci_definitions.py b/tests/ci/ci_definitions.py index 4ae252560e9..a79097d8b55 100644 --- a/tests/ci/ci_definitions.py +++ b/tests/ci/ci_definitions.py @@ -327,6 +327,9 @@ class JobConfig: assert self.required_builds return self.required_builds[0] + def has_digest(self) -> bool: + return self.digest != DigestConfig() + class CommonJobConfigs: """ @@ -440,7 +443,11 @@ class CommonJobConfigs: ) ASTFUZZER_TEST = JobConfig( job_name_keyword="ast", - digest=DigestConfig(), + digest=DigestConfig( + include_paths=[ + "./tests/ci/ast_fuzzer_check.py", + ], + docker=["clickhouse/fuzzer"]), run_command="ast_fuzzer_check.py", run_always=True, runner_type=Runners.FUZZER_UNIT_TESTER, From fd9f91c796227a4b9d7273f812c626c2053b098a Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Sat, 13 Jul 2024 11:07:52 +0000 Subject: [PATCH 282/299] Automatic style fix --- tests/ci/ci_definitions.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/ci/ci_definitions.py b/tests/ci/ci_definitions.py index a79097d8b55..d2da73f4e46 100644 --- a/tests/ci/ci_definitions.py +++ b/tests/ci/ci_definitions.py @@ -447,7 +447,8 @@ class CommonJobConfigs: include_paths=[ "./tests/ci/ast_fuzzer_check.py", ], - docker=["clickhouse/fuzzer"]), + docker=["clickhouse/fuzzer"], + ), run_command="ast_fuzzer_check.py", run_always=True, runner_type=Runners.FUZZER_UNIT_TESTER, From 99cd83da1f6bf71545337fdd72ffd2ed7cf68bb2 Mon Sep 17 00:00:00 2001 From: Max K Date: Thu, 27 Jun 2024 19:43:10 +0200 Subject: [PATCH 283/299] New Create Release workflow --- .github/workflows/create_release.yml | 138 +++++- tests/ci/artifactory.py | 356 ++++++++++++++ tests/ci/create_release.py | 663 +++++++++++++++++++++++++++ tests/ci/docker_server.py | 24 +- tests/ci/version_helper.py | 6 + 5 files changed, 1175 insertions(+), 12 deletions(-) create mode 100644 tests/ci/artifactory.py create mode 100755 tests/ci/create_release.py diff --git a/.github/workflows/create_release.yml b/.github/workflows/create_release.yml index 3988df3b2b1..e2ad16a05a4 100644 --- a/.github/workflows/create_release.yml +++ b/.github/workflows/create_release.yml @@ -6,8 +6,8 @@ concurrency: 'on': workflow_dispatch: inputs: - sha: - description: 'The SHA hash of the commit from which to create the release' + ref: + description: 'Git reference (branch or commit sha) from which to create the release' required: true type: string type: @@ -15,15 +15,139 @@ concurrency: required: true type: choice options: - - new + # TODO: + #- new - patch + dry-run: + description: 'Dry run' + required: false + default: true + type: boolean jobs: - Release: - runs-on: [self-hosted, style-checker-aarch64] + CreateRelease: + env: + GH_TOKEN: ${{ secrets.ROBOT_CLICKHOUSE_COMMIT_TOKEN }} + RELEASE_TYPE: + runs-on: [self-hosted, release-maker] steps: + - name: DebugInfo + uses: hmarr/debug-action@f7318c783045ac39ed9bb497e22ce835fdafbfe6 + - name: Set envs + # https://docs.github.com/en/actions/learn-github-actions/workflow-commands-for-github-actions#multiline-strings + run: | + cat >> "$GITHUB_ENV" << 'EOF' + ROBOT_CLICKHOUSE_SSH_KEY<> "$GITHUB_ENV" + echo "COMMIT_SHA=$commit_sha" >> "$GITHUB_ENV" + - name: Download All Release Artifacts + run: | + python3 ./tests/ci/create_release.py --infile "$RELEASE_INFO_FILE" --download-packages ${{ inputs.dry-run && '--dry-run' || '' }} + - name: Push Git Tag for the Release + run: | + python3 ./tests/ci/create_release.py --push-release-tag --infile "$RELEASE_INFO_FILE" ${{ inputs.dry-run && '--dry-run' || '' }} + - name: Bump CH Version and Update Contributors' List + run: | + python3 ./tests/ci/create_release.py --create-bump-version-pr --infile "$RELEASE_INFO_FILE" ${{ inputs.dry-run && '--dry-run' || '' }} + - name: Checkout master + run: | + git checkout master + - name: Bump Docker versions, Changelog, Security + run: | + [ "$(git branch --show-current)" != "master" ] && echo "not on the master" && exit 1 + echo "List versions" + ./utils/list-versions/list-versions.sh > ./utils/list-versions/version_date.tsv + echo "Update docker version" + ./utils/list-versions/update-docker-version.sh + echo "Generate ChangeLog" + export CI=1 + docker run -u "${UID}:${GID}" -e PYTHONUNBUFFERED=1 -e CI=1 --network=host \ + --volume=".:/ClickHouse" clickhouse/style-test \ + /ClickHouse/tests/ci/changelog.py -v --debug-helpers \ + --gh-user-or-token="$GH_TOKEN" --jobs=5 \ + --output="/ClickHouse/docs/changelogs/${RELEASE_TAG}.md" "${RELEASE_TAG}" + git add ./docs/changelogs/${RELEASE_TAG}.md + echo "Generate Security" + python3 ./utils/security-generator/generate_security.py > SECURITY.md + git diff HEAD + - name: Create ChangeLog Pull Request + if: ${{ ! inputs.dry-run }} + uses: peter-evans/create-pull-request@v6 + with: + author: "robot-clickhouse " + token: ${{ secrets.ROBOT_CLICKHOUSE_COMMIT_TOKEN }} + committer: "robot-clickhouse " + commit-message: Update version_date.tsv and changelogs after ${{ env.RELEASE_TAG }} + branch: auto/${{ env.RELEASE_TAG }} + assignees: ${{ github.event.sender.login }} # assign the PR to the tag pusher + delete-branch: true + title: Update version_date.tsv and changelog after ${{ env.RELEASE_TAG }} + labels: do not test + body: | + Update version_date.tsv and changelogs after ${{ env.RELEASE_TAG }} + ### Changelog category (leave one): + - Not for changelog (changelog entry is not required) + - name: Reset changes if Dry-run + if: ${{ inputs.dry-run }} + run: | + git reset --hard HEAD + - name: Checkout back to GITHUB_REF + run: | + git checkout "$GITHUB_REF_NAME" + - name: Create GH Release + run: | + python3 ./tests/ci/create_release.py --create-gh-release \ + --infile ${{ env.RELEASE_INFO_FILE }} ${{ inputs.dry-run && '--dry-run' || '' }} + + - name: Export TGZ Packages + run: | + python3 ./tests/ci/artifactory.py --export-tgz --infile ${{ env.RELEASE_INFO_FILE }} ${{ inputs.dry-run && '--dry-run' || '' }} + - name: Test TGZ Packages + run: | + python3 ./tests/ci/artifactory.py --test-tgz --infile ${{ env.RELEASE_INFO_FILE }} ${{ inputs.dry-run && '--dry-run' || '' }} + - name: Export RPM Packages + run: | + python3 ./tests/ci/artifactory.py --export-rpm --infile ${{ env.RELEASE_INFO_FILE }} ${{ inputs.dry-run && '--dry-run' || '' }} + - name: Test RPM Packages + run: | + python3 ./tests/ci/artifactory.py --test-rpm --infile ${{ env.RELEASE_INFO_FILE }} ${{ inputs.dry-run && '--dry-run' || '' }} + - name: Export Debian Packages + run: | + python3 ./tests/ci/artifactory.py --export-debian --infile ${{ env.RELEASE_INFO_FILE }} ${{ inputs.dry-run && '--dry-run' || '' }} + - name: Test Debian Packages + run: | + python3 ./tests/ci/artifactory.py --test-debian --infile ${{ env.RELEASE_INFO_FILE }} ${{ inputs.dry-run && '--dry-run' || '' }} + - name: Docker clickhouse/clickhouse-server building + run: | + cd "./tests/ci" + export CHECK_NAME="Docker server image" + python3 docker_server.py --release-type auto --version ${{ env.RELEASE_TAG }} --check-name "$CHECK_NAME" --sha ${{ env.COMMIT_SHA }} ${{ ! inputs.dry-run && '--push' || '' }} + - name: Docker clickhouse/clickhouse-keeper building + run: | + cd "./tests/ci" + export CHECK_NAME="Docker keeper image" + python3 docker_server.py --release-type auto --version ${{ env.RELEASE_TAG }} --check-name "$CHECK_NAME" --sha ${{ env.COMMIT_SHA }} ${{ ! inputs.dry-run && '--push' || '' }} + - name: Post Slack Message + if: failure() + run: | + echo Slack Message \ No newline at end of file diff --git a/tests/ci/artifactory.py b/tests/ci/artifactory.py new file mode 100644 index 00000000000..2e18316cb78 --- /dev/null +++ b/tests/ci/artifactory.py @@ -0,0 +1,356 @@ +import argparse +import time +from pathlib import Path +from typing import Optional + +from create_release import PackageDownloader, ReleaseInfo, ShellRunner +from ci_utils import WithIter +from shutil import copy2 + + +class MountPointApp(metaclass=WithIter): + RCLONE = "rclone" + S3FS = "s3fs" + + +class R2MountPoint: + _TEST_BUCKET_NAME = "repo-test" + _PROD_BUCKET_NAME = "packages" + _CACHE_MAX_SIZE_GB = 20 + MOUNT_POINT = "/home/ubuntu/mountpoint" + API_ENDPOINT = "https://d4fd593eebab2e3a58a599400c4cd64d.r2.cloudflarestorage.com" + LOG_FILE = "/home/ubuntu/fuse_mount.log" + # mod time is not required by reprepro and createrepo - disable to simplify bucket's mount sync (applicable fro rclone) + NOMODTIME = True + # enable debug messages in mount log + DEBUG = True + # enable cache for mountpoint + CACHE_ENABLED = False + # TODO: which mode is better: minimal/writes/full/off + _RCLONE_CACHE_MODE = "minimal" + UMASK = "0000" + + def __init__(self, app: str, dry_run: bool) -> None: + assert app in MountPointApp + self.app = app + if dry_run: + self.bucket_name = self._TEST_BUCKET_NAME + else: + self.bucket_name = self._PROD_BUCKET_NAME + + self.aux_mount_options = "" + self.async_mount = False + if self.app == MountPointApp.S3FS: + self.cache_dir = "/home/ubuntu/s3fs_cache" + # self.aux_mount_options += "-o nomodtime " if self.NOMODTIME else "" not for s3fs + self.aux_mount_options += "--debug " if self.DEBUG else "" + self.aux_mount_options += ( + f"-o use_cache={self.cache_dir} -o cache_size_mb={self._CACHE_MAX_SIZE_GB * 1024} " + if self.CACHE_ENABLED + else "" + ) + # without -o nomultipart there are errors like "Error 5 writing to /home/ubuntu/***.deb: Input/output error" + self.mount_cmd = f"s3fs {self.bucket_name} {self.MOUNT_POINT} -o url={self.API_ENDPOINT} -o use_path_request_style -o umask=0000 -o nomultipart -o logfile={self.LOG_FILE} {self.aux_mount_options}" + elif self.app == MountPointApp.RCLONE: + # run rclone mount process asynchronously, otherwise subprocess.run(daemonized command) will not return + self.async_mount = True + self.cache_dir = "/home/ubuntu/rclone_cache" + self.aux_mount_options += "--no-modtime " if self.NOMODTIME else "" + self.aux_mount_options += "-v " if self.DEBUG else "" # -vv too verbose + self.aux_mount_options += ( + f"--vfs-cache-mode {self._RCLONE_CACHE_MODE} --vfs-cache-max-size {self._CACHE_MAX_SIZE_GB}G" + if self.CACHE_ENABLED + else "--vfs-cache-mode off" + ) + # Use --no-modtime to try to avoid: ERROR : rpm/lts/clickhouse-client-24.3.6.5.x86_64.rpm: Failed to apply pending mod time + self.mount_cmd = f"rclone mount remote:{self.bucket_name} {self.MOUNT_POINT} --daemon --cache-dir {self.cache_dir} --umask 0000 --log-file {self.LOG_FILE} {self.aux_mount_options}" + else: + assert False + + def init(self): + print(f"Mount bucket [{self.bucket_name}] to [{self.MOUNT_POINT}]") + _CLEAN_LOG_FILE_CMD = f"tail -n 1000 {self.LOG_FILE} > {self.LOG_FILE}_tmp && mv {self.LOG_FILE}_tmp {self.LOG_FILE} ||:" + _MKDIR_CMD = f"mkdir -p {self.MOUNT_POINT}" + _MKDIR_FOR_CACHE = f"mkdir -p {self.cache_dir}" + _UNMOUNT_CMD = ( + f"mount | grep -q {self.MOUNT_POINT} && umount {self.MOUNT_POINT} ||:" + ) + + _TEST_MOUNT_CMD = f"mount | grep -q {self.MOUNT_POINT}" + ShellRunner.run(_CLEAN_LOG_FILE_CMD) + ShellRunner.run(_UNMOUNT_CMD) + ShellRunner.run(_MKDIR_CMD) + ShellRunner.run(_MKDIR_FOR_CACHE) + ShellRunner.run(self.mount_cmd, async_=self.async_mount) + if self.async_mount: + time.sleep(3) + ShellRunner.run(_TEST_MOUNT_CMD) + + @classmethod + def teardown(cls): + print(f"Unmount [{cls.MOUNT_POINT}]") + ShellRunner.run(f"umount {cls.MOUNT_POINT}") + + +class RepoCodenames(metaclass=WithIter): + LTS = "lts" + STABLE = "stable" + + +class DebianArtifactory: + _TEST_REPO_URL = "https://pub-73dd1910f4284a81a02a67018967e028.r2.dev/deb" + _PROD_REPO_URL = "https://packages.clickhouse.com/deb" + + def __init__(self, release_info: ReleaseInfo, dry_run: bool): + self.codename = release_info.codename + self.version = release_info.version + if dry_run: + self.repo_url = self._TEST_REPO_URL + else: + self.repo_url = self._PROD_REPO_URL + assert self.codename in RepoCodenames + self.pd = PackageDownloader( + release=release_info.release_branch, + commit_sha=release_info.commit_sha, + version=release_info.version, + ) + + def export_packages(self): + assert self.pd.local_deb_packages_ready(), "BUG: Packages are not downloaded" + print("Start adding packages") + paths = [ + self.pd.LOCAL_DIR + "/" + file for file in self.pd.get_deb_packages_files() + ] + REPREPRO_CMD_PREFIX = f"reprepro --basedir {R2MountPoint.MOUNT_POINT}/configs/deb --outdir {R2MountPoint.MOUNT_POINT}/deb --verbose" + cmd = f"{REPREPRO_CMD_PREFIX} includedeb {self.codename} {' '.join(paths)}" + print("Running export command:") + print(f" {cmd}") + ShellRunner.run(cmd) + ShellRunner.run("sync") + + if self.codename == RepoCodenames.LTS: + packages_with_version = [ + package + "=" + self.version for package in self.pd.get_packages_names() + ] + print( + f"Copy packages from {RepoCodenames.LTS} to {RepoCodenames.STABLE} repository" + ) + cmd = f"{REPREPRO_CMD_PREFIX} copy {RepoCodenames.STABLE} {RepoCodenames.LTS} {' '.join(packages_with_version)}" + print("Running copy command:") + print(f" {cmd}") + ShellRunner.run(cmd) + ShellRunner.run("sync") + + def test_packages(self): + ShellRunner.run("docker pull ubuntu:latest") + print(f"Test packages installation, version [{self.version}]") + cmd = f"docker run --rm ubuntu:latest bash -c \"apt update -y; apt install -y sudo gnupg ca-certificates; apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv 8919F6BD2B48D754; echo 'deb {self.repo_url} stable main' | tee /etc/apt/sources.list.d/clickhouse.list; apt update -y; apt-get install -y clickhouse-client={self.version}\"" + print("Running test command:") + print(f" {cmd}") + ShellRunner.run(cmd) + + +def _copy_if_not_exists(src: Path, dst: Path) -> Path: + if dst.is_dir(): + dst = dst / src.name + if not dst.exists(): + return copy2(src, dst) # type: ignore + if src.stat().st_size == dst.stat().st_size: + return dst + return copy2(src, dst) # type: ignore + + +class RpmArtifactory: + _TEST_REPO_URL = ( + "https://pub-73dd1910f4284a81a02a67018967e028.r2.dev/rpm/clickhouse.repo" + ) + _PROD_REPO_URL = "https://packages.clickhouse.com/rpm/clickhouse.repo" + _SIGN_KEY = "885E2BDCF96B0B45ABF058453E4AD4719DDE9A38" + + def __init__(self, release_info: ReleaseInfo, dry_run: bool): + self.codename = release_info.codename + self.version = release_info.version + if dry_run: + self.repo_url = self._TEST_REPO_URL + else: + self.repo_url = self._PROD_REPO_URL + assert self.codename in RepoCodenames + self.pd = PackageDownloader( + release=release_info.release_branch, + commit_sha=release_info.commit_sha, + version=release_info.version, + ) + + def export_packages(self, codename: Optional[str] = None) -> None: + assert self.pd.local_rpm_packages_ready(), "BUG: Packages are not downloaded" + codename = codename or self.codename + print(f"Start adding packages to [{codename}]") + paths = [ + self.pd.LOCAL_DIR + "/" + file for file in self.pd.get_rpm_packages_files() + ] + + dest_dir = Path(R2MountPoint.MOUNT_POINT) / "rpm" / codename + + for package in paths: + _copy_if_not_exists(Path(package), dest_dir) + + commands = ( + f"createrepo_c --local-sqlite --workers=2 --update --verbose {dest_dir}", + f"gpg --sign-with {self._SIGN_KEY} --detach-sign --batch --yes --armor {dest_dir / 'repodata' / 'repomd.xml'}", + ) + print(f"Exporting RPM packages into [{codename}]") + + for command in commands: + print(f"Running command:") + print(f" {command}") + ShellRunner.run(command) + + update_public_key = f"gpg --armor --export {self._SIGN_KEY}" + pub_key_path = dest_dir / "repodata" / "repomd.xml.key" + print("Updating repomd.xml.key") + pub_key_path.write_text(ShellRunner.run(update_public_key)[1]) + if codename == RepoCodenames.LTS: + self.export_packages(RepoCodenames.STABLE) + ShellRunner.run("sync") + + def test_packages(self): + ShellRunner.run("docker pull fedora:latest") + print(f"Test package installation, version [{self.version}]") + cmd = f'docker run --rm fedora:latest /bin/bash -c "dnf -y install dnf-plugins-core && dnf config-manager --add-repo={self.repo_url} && dnf makecache && dnf -y install clickhouse-client-{self.version}-1"' + print("Running test command:") + print(f" {cmd}") + ShellRunner.run(cmd) + + +class TgzArtifactory: + _TEST_REPO_URL = "https://pub-73dd1910f4284a81a02a67018967e028.r2.dev/tgz" + _PROD_REPO_URL = "https://packages.clickhouse.com/tgz" + + def __init__(self, release_info: ReleaseInfo, dry_run: bool): + self.codename = release_info.codename + self.version = release_info.version + if dry_run: + self.repo_url = self._TEST_REPO_URL + else: + self.repo_url = self._PROD_REPO_URL + assert self.codename in RepoCodenames + self.pd = PackageDownloader( + release=release_info.release_branch, + commit_sha=release_info.commit_sha, + version=release_info.version, + ) + + def export_packages(self, codename: Optional[str] = None) -> None: + assert self.pd.local_tgz_packages_ready(), "BUG: Packages are not downloaded" + codename = codename or self.codename + + paths = [ + self.pd.LOCAL_DIR + "/" + file for file in self.pd.get_tgz_packages_files() + ] + + dest_dir = Path(R2MountPoint.MOUNT_POINT) / "tgz" / codename + + print(f"Exporting TGZ packages into [{codename}]") + + for package in paths: + _copy_if_not_exists(Path(package), dest_dir) + + if codename == RepoCodenames.LTS: + self.export_packages(RepoCodenames.STABLE) + ShellRunner.run("sync") + + def test_packages(self): + tgz_file = "/tmp/tmp.tgz" + tgz_sha_file = "/tmp/tmp.tgz.sha512" + ShellRunner.run( + f"curl -o {tgz_file} -f0 {self.repo_url}/stable/clickhouse-client-{self.version}-arm64.tgz" + ) + ShellRunner.run( + f"curl -o {tgz_sha_file} -f0 {self.repo_url}/stable/clickhouse-client-{self.version}-arm64.tgz.sha512" + ) + expected_checksum = ShellRunner.run(f"cut -d ' ' -f 1 {tgz_sha_file}") + actual_checksum = ShellRunner.run(f"sha512sum {tgz_file} | cut -d ' ' -f 1") + assert ( + expected_checksum == actual_checksum + ), f"[{actual_checksum} != {expected_checksum}]" + ShellRunner.run("rm /tmp/tmp.tgz*") + + +def parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser( + formatter_class=argparse.ArgumentDefaultsHelpFormatter, + description="Adds release packages to the repository", + ) + parser.add_argument( + "--infile", + type=str, + required=True, + help="input file with release info", + ) + parser.add_argument( + "--export-debian", + action="store_true", + help="Export debian packages to repository", + ) + parser.add_argument( + "--export-rpm", + action="store_true", + help="Export rpm packages to repository", + ) + parser.add_argument( + "--export-tgz", + action="store_true", + help="Export tgz packages to repository", + ) + parser.add_argument( + "--test-debian", + action="store_true", + help="Test debian packages installation", + ) + parser.add_argument( + "--test-rpm", + action="store_true", + help="Test rpm packages installation", + ) + parser.add_argument( + "--test-tgz", + action="store_true", + help="Test tgz packages installation", + ) + parser.add_argument( + "--dry-run", + action="store_true", + help="Dry run mode", + ) + return parser.parse_args() + + +if __name__ == "__main__": + args = parse_args() + assert args.dry_run + + release_info = ReleaseInfo.from_file(args.infile) + """ + Use S3FS. RCLONE has some errors with r2 remote which I didn't figure out how to resolve: + ERROR : IO error: NotImplemented: versionId not implemented + Failed to copy: NotImplemented: versionId not implemented + """ + mp = R2MountPoint(MountPointApp.S3FS, dry_run=args.dry_run) + if args.export_debian: + mp.init() + DebianArtifactory(release_info, dry_run=args.dry_run).export_packages() + mp.teardown() + if args.export_rpm: + mp.init() + RpmArtifactory(release_info, dry_run=args.dry_run).export_packages() + mp.teardown() + if args.export_tgz: + mp.init() + TgzArtifactory(release_info, dry_run=args.dry_run).export_packages() + mp.teardown() + if args.test_debian: + DebianArtifactory(release_info, dry_run=args.dry_run).test_packages() + if args.test_tgz: + TgzArtifactory(release_info, dry_run=args.dry_run).test_packages() + if args.test_rpm: + RpmArtifactory(release_info, dry_run=args.dry_run).test_packages() diff --git a/tests/ci/create_release.py b/tests/ci/create_release.py new file mode 100755 index 00000000000..d749c85994b --- /dev/null +++ b/tests/ci/create_release.py @@ -0,0 +1,663 @@ +import argparse +import dataclasses +import json +import os +import subprocess + +from contextlib import contextmanager +from pathlib import Path +from typing import Iterator, List + +from git_helper import Git, GIT_PREFIX, Runner +from ssh import SSHAgent +from env_helper import GITHUB_REPOSITORY, S3_BUILDS_BUCKET +from s3_helper import S3Helper +from version_helper import ( + FILE_WITH_VERSION_PATH, + GENERATED_CONTRIBUTORS, + get_abs_path, + get_version_from_repo, + update_cmake_version, + update_contributors, +) +from git_helper import git_runner as runner +from ci_config import CI + +CMAKE_PATH = get_abs_path(FILE_WITH_VERSION_PATH) +CONTRIBUTORS_PATH = get_abs_path(GENERATED_CONTRIBUTORS) + + +class ShellRunner: + + @classmethod + def run(cls, command, check_retcode=True, print_output=True, async_=False): + print(f"Running shell command: [{command}]") + if async_: + subprocess.Popen(command.split(" ")) + return 0, "" + result = subprocess.run( + command + " 2>&1", + shell=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True, + ) + if print_output: + print(result.stdout) + if check_retcode: + assert result.returncode == 0, f"Return code [{result.returncode}]" + return result.returncode, result.stdout + + +@dataclasses.dataclass +class ReleaseInfo: + version: str + release_tag: str + release_branch: str + commit_sha: str + # lts or stable + codename: str + + @staticmethod + def from_file(file_path: str) -> "ReleaseInfo": + with open(file_path, "r", encoding="utf-8") as json_file: + res = json.load(json_file) + return ReleaseInfo(**res) + + @staticmethod + def prepare(commit_ref: str, release_type: str, outfile: str) -> None: + dir = Path(outfile).parent + dir.mkdir(parents=True, exist_ok=True) + Path(outfile).unlink(missing_ok=True) + version = None + release_branch = None + release_tag = None + codename = None + assert release_type in ("patch",) + # if release_type == "new": + # assert False, "TODO" + # git = Git() + # version = get_version_from_repo(git=git) + # assert runner.check_command( + # f"git merge-base --is-ancestor {commit_ref} origin/master" + # ) + # expected_tag = f"v{version.major}.{version.minor}-new" + # assert ( + # git.latest_tag == expected_tag + # ), f"BUG: latest tag [{git.latest_tag}], expected [{expected_tag}]" + # release_branch = "master" + if release_type == "patch": + with checkout(commit_ref): + # Git() must be inside "with checkout" contextmanager + commit_sha = Runner().run(f"git rev-parse {commit_ref}") + git = Git() + version = get_version_from_repo(git=git) + codename = version.get_stable_release_type() + version.with_description(codename) + release_branch = f"{version.major}.{version.minor}" + release_tag = version.describe + runner.run(f"{GIT_PREFIX} fetch origin {release_branch} --tags") + assert runner.check_command( + f"git merge-base --is-ancestor {commit_ref} origin/{release_branch}" + ) + if version.patch == 1: + expected_tag_prefix = f"v{version.major}.{version.minor+1}-" + expected_tag_suffix = "-new" + else: + expected_tag_prefix = ( + f"v{version.major}.{version.minor}.{version.patch-1}." + ) + expected_tag_suffix = f"-{version.get_stable_release_type()}" + if git.latest_tag.startswith( + expected_tag_prefix + ) and git.latest_tag.endswith(expected_tag_suffix): + pass + else: + assert ( + False + ), f"BUG: Unexpected latest tag [{git.latest_tag}] expected [{expected_tag_prefix}*{expected_tag_suffix}]" + + assert ( + release_branch + and commit_sha + and release_tag + and version.string + and codename in ("lts", "stable") + ) + res = ReleaseInfo( + release_branch=release_branch, + commit_sha=commit_sha, + release_tag=release_tag, + version=version.string, + codename=codename, + ) + with open(outfile, "w", encoding="utf-8") as f: + print(json.dumps(dataclasses.asdict(res), indent=2), file=f) + + def push_release_tag(self, dry_run: bool) -> None: + if dry_run: + # remove locally created tag from prev run + ShellRunner.run( + f"{GIT_PREFIX} tag -l | grep -q {self.release_tag} && git tag -d {self.release_tag} ||:" + ) + # Create release tag + print( + f"Create and push release tag [{self.release_tag}], commit [{self.commit_sha}]" + ) + tag_message = f"Release {self.release_tag}" + runner.run( + f"{GIT_PREFIX} tag -a -m '{tag_message}' {self.release_tag} {self.commit_sha}" + ) + cmd_push_tag = f"{GIT_PREFIX} push origin {self.release_tag}:{self.release_tag}" + if not dry_run: + # TODO: cannot push - workflow will start + # runner.run(cmd_commit_version_upd) + pass + else: + print("Dry run, would execute:") + print(f"* {cmd_push_tag}") + + def update_version_and_contributors_list(self, dry_run: bool) -> None: + # Bump version, update contributors list, create PR + branch_upd_version_contributors = f"bump_version_{self.version}" + with checkout(self.commit_sha): + git = Git() + version = get_version_from_repo(git=git) + version.with_description(version.get_stable_release_type()) + assert ( + version.string == self.version + ), "BUG: version in release info does not match version in git commit" + with checkout(self.release_branch): + with checkout_new(branch_upd_version_contributors): + update_cmake_version(version) + update_contributors(raise_error=True) + cmd_commit_version_upd = ( + f"{GIT_PREFIX} commit '{CMAKE_PATH}' '{CONTRIBUTORS_PATH}' -m 'Update autogenerated version to {self.version} and contributors'", + ) + cmd_push_branch = f"{GIT_PREFIX} push --set-upstream origin {branch_upd_version_contributors}" + body_file = get_abs_path(".github/PULL_REQUEST_TEMPLATE.md") + actor = os.getenv("GITHUB_ACTOR", "") or "me" + cmd_create_pr = f"gh pr create --repo {GITHUB_REPOSITORY} --title 'Update version after release' --head {branch_upd_version_contributors} --base {self.release_branch} --body-file '{body_file} --label 'do not test' --assignee @{actor}" + if not dry_run: + runner.run(cmd_commit_version_upd) + runner.run(cmd_push_branch) + runner.run(cmd_create_pr) + else: + print("Dry run, would execute:") + print(f"* {cmd_commit_version_upd}") + print(f"* {cmd_push_branch}") + print(f"* {cmd_create_pr}") + print("Dry run, diff:") + print( + runner.run( + f"{GIT_PREFIX} diff '{CMAKE_PATH}' '{CONTRIBUTORS_PATH}'" + ) + ) + runner.run( + f"{GIT_PREFIX} checkout '{CMAKE_PATH}' '{CONTRIBUTORS_PATH}'" + ) + + def create_gh_release(self, packages_files: List[str], dry_run: bool) -> None: + repo = os.getenv("GITHUB_REPOSITORY") + assert repo + cmds = [] + cmds.append( + f"gh release create --repo {repo} --title 'Release {self.release_tag}' {self.release_tag}" + ) + for file in packages_files: + cmds.append(f"gh release upload {self.release_tag} {file}") + if not dry_run: + for cmd in cmds: + ShellRunner.run(cmd) + else: + print("Dry-run, would run commands:") + print("\n * ".join(cmds)) + + +class RepoTypes: + RPM = "rpm" + DEBIAN = "deb" + TGZ = "tgz" + + +class PackageDownloader: + PACKAGES = ( + "clickhouse-client", + "clickhouse-common-static", + "clickhouse-common-static-dbg", + "clickhouse-keeper", + "clickhouse-keeper-dbg", + "clickhouse-server", + ) + + EXTRA_PACKAGES = ( + "clickhouse-library-bridge", + "clickhouse-odbc-bridge", + ) + PACKAGE_TYPES = (CI.BuildNames.PACKAGE_RELEASE, CI.BuildNames.PACKAGE_AARCH64) + MACOS_PACKAGE_TO_BIN_SUFFIX = { + CI.BuildNames.BINARY_DARWIN: "macos", + CI.BuildNames.BINARY_DARWIN_AARCH64: "macos-aarch64", + } + LOCAL_DIR = "/tmp/packages" + + @classmethod + def _get_arch_suffix(cls, package_arch, repo_type): + if package_arch == CI.BuildNames.PACKAGE_RELEASE: + return ( + "amd64" if repo_type in (RepoTypes.DEBIAN, RepoTypes.TGZ) else "x86_64" + ) + elif package_arch == CI.BuildNames.PACKAGE_AARCH64: + return ( + "arm64" if repo_type in (RepoTypes.DEBIAN, RepoTypes.TGZ) else "aarch64" + ) + else: + assert False, "BUG" + + def __init__(self, release, commit_sha, version: str): + assert version.startswith(release), "Invalid release branch or version" + major, minor = map(int, release.split(".")) + self.package_names = self.PACKAGES + if major > 24 or (major == 24 and minor > 3): + self.package_names += self.EXTRA_PACKAGES + self.release = release + self.commit_sha = commit_sha + self.version = version + self.s3 = S3Helper() + self.deb_package_files = [] + self.rpm_package_files = [] + self.tgz_package_files = [] + # just binaries for macos + self.macos_package_files = ["clickhouse-macos", "clickhouse-macos-aarch64"] + self.file_to_type = {} + + ShellRunner.run(f"mkdir -p {self.LOCAL_DIR}") + + for package_type in self.PACKAGE_TYPES: + for package in self.package_names: + package_file_name = ( + package + + "_" + + self.version + + "_" + + self._get_arch_suffix(package_type, RepoTypes.DEBIAN) + + ".deb" + ) + self.deb_package_files.append(package_file_name) + self.file_to_type[package_file_name] = package_type + + package_file_name = ( + package + + "-" + + self.version + + "." + + self._get_arch_suffix(package_type, RepoTypes.RPM) + + ".rpm" + ) + self.rpm_package_files.append(package_file_name) + self.file_to_type[package_file_name] = package_type + + package_file_name = ( + package + + "-" + + self.version + + "-" + + self._get_arch_suffix(package_type, RepoTypes.TGZ) + + ".tgz" + ) + self.tgz_package_files.append(package_file_name) + self.file_to_type[package_file_name] = package_type + package_file_name += ".sha512" + self.tgz_package_files.append(package_file_name) + self.file_to_type[package_file_name] = package_type + + def get_deb_packages_files(self): + return self.deb_package_files + + def get_rpm_packages_files(self): + return self.rpm_package_files + + def get_tgz_packages_files(self): + return self.tgz_package_files + + def get_macos_packages_files(self): + return self.macos_package_files + + def get_packages_names(self): + return self.package_names + + def get_all_packages_files(self): + assert self.local_tgz_packages_ready() + assert self.local_deb_packages_ready() + assert self.local_rpm_packages_ready() + assert self.local_macos_packages_ready() + res = [] + for package_file in ( + self.deb_package_files + + self.rpm_package_files + + self.tgz_package_files + + self.macos_package_files + ): + res.append(self.LOCAL_DIR + "/" + package_file) + return res + + def run(self): + ShellRunner.run(f"rm -rf {self.LOCAL_DIR}/*") + for package_file in ( + self.deb_package_files + self.rpm_package_files + self.tgz_package_files + ): + print(f"Downloading: [{package_file}]") + s3_path = "/".join( + [ + self.release, + self.commit_sha, + self.file_to_type[package_file], + package_file, + ] + ) + self.s3.download_file( + bucket=S3_BUILDS_BUCKET, + s3_path=s3_path, + local_file_path="/".join([self.LOCAL_DIR, package_file]), + ) + + for macos_package, bin_suffix in self.MACOS_PACKAGE_TO_BIN_SUFFIX.items(): + binary_name = "clickhouse" + destination_binary_name = f"{binary_name}-{bin_suffix}" + assert destination_binary_name in self.macos_package_files + print( + f"Downloading: [{macos_package}] binary to [{destination_binary_name}]" + ) + s3_path = "/".join( + [ + self.release, + self.commit_sha, + macos_package, + binary_name, + ] + ) + self.s3.download_file( + bucket=S3_BUILDS_BUCKET, + s3_path=s3_path, + local_file_path="/".join([self.LOCAL_DIR, destination_binary_name]), + ) + + def local_deb_packages_ready(self) -> bool: + assert self.deb_package_files + for package_file in self.deb_package_files: + print(f"Check package is downloaded [{package_file}]") + if not Path(self.LOCAL_DIR + "/" + package_file).is_file(): + return False + return True + + def local_rpm_packages_ready(self) -> bool: + assert self.rpm_package_files + for package_file in self.rpm_package_files: + print(f"Check package is downloaded [{package_file}]") + if not Path(self.LOCAL_DIR + "/" + package_file).is_file(): + return False + return True + + def local_tgz_packages_ready(self) -> bool: + assert self.tgz_package_files + for package_file in self.tgz_package_files: + print(f"Check package is downloaded [{package_file}]") + if not Path(self.LOCAL_DIR + "/" + package_file).is_file(): + return False + return True + + def local_macos_packages_ready(self) -> bool: + assert self.macos_package_files + for package_file in self.macos_package_files: + print(f"Check package is downloaded [{package_file}]") + if not Path(self.LOCAL_DIR + "/" + package_file).is_file(): + return False + return True + + +def parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser( + formatter_class=argparse.ArgumentDefaultsHelpFormatter, + description="Creates release", + ) + parser.add_argument( + "--prepare-release-info", + action="store_true", + help="Initial step to prepare info like release branch, release tag, etc.", + ) + parser.add_argument( + "--push-release-tag", + action="store_true", + help="Creates and pushes git tag", + ) + parser.add_argument( + "--create-bump-version-pr", + action="store_true", + help="Updates version, contributors' list and creates PR", + ) + parser.add_argument( + "--download-packages", + action="store_true", + help="Downloads all required packages from s3", + ) + parser.add_argument( + "--create-gh-release", + action="store_true", + help="Create GH Release object and attach all packages", + ) + parser.add_argument( + "--ref", + type=str, + help="the commit hash or branch", + ) + parser.add_argument( + "--release-type", + choices=("new", "patch"), + # dest="release_type", + help="a release type to bump the major.minor.patch version part, " + "new branch is created only for the value 'new'", + ) + parser.add_argument( + "--dry-run", + action="store_true", + help="do not make any actual changes in the repo, just show what will be done", + ) + parser.add_argument( + "--outfile", + default="", + type=str, + help="output file to write json result to, if not set - stdout", + ) + parser.add_argument( + "--infile", + default="", + type=str, + help="input file with release info", + ) + + return parser.parse_args() + + +@contextmanager +def checkout(ref: str) -> Iterator[None]: + orig_ref = runner.run(f"{GIT_PREFIX} symbolic-ref --short HEAD") + rollback_cmd = f"{GIT_PREFIX} checkout {orig_ref}" + assert orig_ref + if ref not in (orig_ref,): + runner.run(f"{GIT_PREFIX} checkout {ref}") + try: + yield + except (Exception, KeyboardInterrupt) as e: + print(f"ERROR: Exception [{e}]") + runner.run(rollback_cmd) + raise + runner.run(rollback_cmd) + + +@contextmanager +def checkout_new(ref: str) -> Iterator[None]: + orig_ref = runner.run(f"{GIT_PREFIX} symbolic-ref --short HEAD") + rollback_cmd = f"{GIT_PREFIX} checkout {orig_ref}" + assert orig_ref + runner.run(f"{GIT_PREFIX} checkout -b {ref}") + try: + yield + except (Exception, KeyboardInterrupt) as e: + print(f"ERROR: Exception [{e}]") + runner.run(rollback_cmd) + raise + runner.run(rollback_cmd) + + +if __name__ == "__main__": + args = parse_args() + assert args.dry_run + + # prepare ssh for git if needed + _ssh_agent = None + _key_pub = None + if os.getenv("ROBOT_CLICKHOUSE_SSH_KEY", ""): + _key = os.getenv("ROBOT_CLICKHOUSE_SSH_KEY") + _ssh_agent = SSHAgent() + _key_pub = _ssh_agent.add(_key) + _ssh_agent.print_keys() + + if args.prepare_release_info: + assert ( + args.ref and args.release_type and args.outfile + ), "--ref, --release-type and --outfile must be provided with --prepare-release-info" + ReleaseInfo.prepare( + commit_ref=args.ref, release_type=args.release_type, outfile=args.outfile + ) + if args.push_release_tag: + assert args.infile, "--infile must be provided" + release_info = ReleaseInfo.from_file(args.infile) + release_info.push_release_tag(dry_run=args.dry_run) + if args.create_bump_version_pr: + # TODO: store link to PR in release info + assert args.infile, "--infile must be provided" + release_info = ReleaseInfo.from_file(args.infile) + release_info.update_version_and_contributors_list(dry_run=args.dry_run) + if args.download_packages: + assert args.infile, "--infile must be provided" + release_info = ReleaseInfo.from_file(args.infile) + p = PackageDownloader( + release=release_info.release_branch, + commit_sha=release_info.commit_sha, + version=release_info.version, + ) + p.run() + if args.create_gh_release: + assert args.infile, "--infile must be provided" + release_info = ReleaseInfo.from_file(args.infile) + p = PackageDownloader( + release=release_info.release_branch, + commit_sha=release_info.commit_sha, + version=release_info.version, + ) + release_info.create_gh_release(p.get_all_packages_files(), args.dry_run) + + # tear down ssh + if _ssh_agent and _key_pub: + _ssh_agent.remove(_key_pub) + + +""" +Prepare release machine (for arm machine): + +### INSTALL PACKAGES +sudo apt update +sudo apt install --yes --no-install-recommends python3-dev python3-pip gh unzip +sudo apt install --yes python3-boto3 +sudo apt install --yes python3-github +sudo apt install --yes python3-unidiff +sudo apt install --yes s3fs + +### INSTALL AWS CLI +cd /tmp +curl "https://awscli.amazonaws.com/awscli-exe-linux-$(uname -m).zip" -o "awscliv2.zip" +unzip awscliv2.zip +sudo ./aws/install +rm -rf aws* +cd - + +### INSTALL GH ACTIONS RUNNER: +# Create a folder +RUNNER_VERSION=2.317.0 +cd ~ +mkdir actions-runner && cd actions-runner +# Download the latest runner package +runner_arch() { + case $(uname -m) in + x86_64 ) + echo x64;; + aarch64 ) + echo arm64;; + esac +} +curl -O -L https://github.com/actions/runner/releases/download/v$RUNNER_VERSION/actions-runner-linux-$(runner_arch)-$RUNNER_VERSION.tar.gz +# Extract the installer +tar xzf ./actions-runner-linux-$(runner_arch)-$RUNNER_VERSION.tar.gz +rm ./actions-runner-linux-$(runner_arch)-$RUNNER_VERSION.tar.gz + +### Install reprepro: +cd ~ +sudo apt install dpkg-dev libgpgme-dev libdb-dev libbz2-dev liblzma-dev libarchive-dev shunit2 db-util debhelper +git clone https://salsa.debian.org/debian/reprepro.git +cd reprepro +dpkg-buildpackage -b --no-sign && sudo dpkg -i ../reprepro_$(dpkg-parsechangelog --show-field Version)_$(dpkg-architecture -q DEB_HOST_ARCH).deb + +### Install createrepo-c: +sudo apt install createrepo-c +createrepo_c --version +#Version: 0.17.3 (Features: DeltaRPM LegacyWeakdeps ) + +### Import gpg sign key +gpg --import key.pgp +gpg --list-secret-keys + +### Install docker +sudo su; cd ~ + +deb_arch() { + case $(uname -m) in + x86_64 ) + echo amd64;; + aarch64 ) + echo arm64;; + esac +} +curl -fsSL https://download.docker.com/linux/ubuntu/gpg | gpg --dearmor -o /usr/share/keyrings/docker-archive-keyring.gpg + +echo "deb [arch=$(deb_arch) signed-by=/usr/share/keyrings/docker-archive-keyring.gpg] https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable" | sudo tee /etc/apt/sources.list.d/docker.list > /dev/null + +sudo apt-get update +sudo apt-get install --yes --no-install-recommends docker-ce docker-buildx-plugin docker-ce-cli containerd.io + +sudo usermod -aG docker ubuntu + +# enable ipv6 in containers (fixed-cidr-v6 is some random network mask) +cat < /etc/docker/daemon.json +{ + "ipv6": true, + "fixed-cidr-v6": "2001:db8:1::/64", + "log-driver": "json-file", + "log-opts": { + "max-file": "5", + "max-size": "1000m" + }, + "insecure-registries" : ["dockerhub-proxy.dockerhub-proxy-zone:5000"], + "registry-mirrors" : ["http://dockerhub-proxy.dockerhub-proxy-zone:5000"] +} +EOT + +# if docker build does not work: + sudo systemctl restart docker + docker buildx rm mybuilder + docker buildx create --name mybuilder --driver docker-container --use + docker buildx inspect mybuilder --bootstrap + +### Install tailscale + +### Configure GH runner +""" diff --git a/tests/ci/docker_server.py b/tests/ci/docker_server.py index 151cc5a4c02..2f556e3ed57 100644 --- a/tests/ci/docker_server.py +++ b/tests/ci/docker_server.py @@ -11,7 +11,6 @@ from os import path as p from pathlib import Path from typing import Dict, List -from build_check import get_release_or_pr from build_download_helper import read_build_urls from docker_images_helper import DockerImageData, docker_login from env_helper import ( @@ -22,7 +21,7 @@ from env_helper import ( TEMP_PATH, ) from git_helper import Git -from pr_info import PRInfo +from pr_info import PRInfo, EventType from report import FAILURE, SUCCESS, JobReport, TestResult, TestResults from stopwatch import Stopwatch from tee_popen import TeePopen @@ -63,6 +62,12 @@ def parse_args() -> argparse.Namespace: help="a version to build, automaticaly got from version_helper, accepts either " "tag ('refs/tags/' is removed automatically) or a normal 22.2.2.2 format", ) + parser.add_argument( + "--sha", + type=str, + default="", + help="sha of the commit to use packages from", + ) parser.add_argument( "--release-type", type=str, @@ -122,7 +127,7 @@ def parse_args() -> argparse.Namespace: def retry_popen(cmd: str, log_file: Path) -> int: - max_retries = 5 + max_retries = 2 for retry in range(max_retries): # From time to time docker build may failed. Curl issues, or even push # It will sleep progressively 5, 15, 30 and 50 seconds between retries @@ -370,13 +375,22 @@ def main(): tags = gen_tags(args.version, args.release_type) repo_urls = {} direct_urls: Dict[str, List[str]] = {} - release_or_pr, _ = get_release_or_pr(pr_info, args.version) + if pr_info.event_type == EventType.PULL_REQUEST: + release_or_pr = pr_info.number + sha = pr_info.sha + elif pr_info.event_type == EventType.PUSH and pr_info.is_master: + release_or_pr = 0 + sha = pr_info.sha + else: + release_or_pr = f"{args.version.major}.{args.version.minor}" + sha = args.sha + assert sha for arch, build_name in zip(ARCH, ("package_release", "package_aarch64")): if not args.bucket_prefix: repo_urls[arch] = ( f"{S3_DOWNLOAD}/{S3_BUILDS_BUCKET}/" - f"{release_or_pr}/{pr_info.sha}/{build_name}" + f"{release_or_pr}/{sha}/{build_name}" ) else: repo_urls[arch] = f"{args.bucket_prefix}/{build_name}" diff --git a/tests/ci/version_helper.py b/tests/ci/version_helper.py index 50263f6ebb6..d48dd36b16a 100755 --- a/tests/ci/version_helper.py +++ b/tests/ci/version_helper.py @@ -148,6 +148,11 @@ class ClickHouseVersion: """our X.3 and X.8 are LTS""" return self.minor % 5 == 3 + def get_stable_release_type(self) -> str: + if self.is_lts: + return VersionType.LTS + return VersionType.STABLE + def as_dict(self) -> VERSIONS: return { "revision": self.revision, @@ -168,6 +173,7 @@ class ClickHouseVersion: raise ValueError(f"version type {version_type} not in {VersionType.VALID}") self._description = version_type self._describe = f"v{self.string}-{version_type}" + return self def copy(self) -> "ClickHouseVersion": copy = ClickHouseVersion( From f8e71707f234c10dd3567e9087a16e6cc2e21801 Mon Sep 17 00:00:00 2001 From: Max K Date: Wed, 10 Jul 2024 14:50:55 +0200 Subject: [PATCH 284/299] update black --- docker/test/style/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/test/style/requirements.txt b/docker/test/style/requirements.txt index bb0cd55dd1a..ed73d0d3636 100644 --- a/docker/test/style/requirements.txt +++ b/docker/test/style/requirements.txt @@ -3,7 +3,7 @@ aiosignal==1.3.1 astroid==3.1.0 async-timeout==4.0.3 attrs==23.2.0 -black==23.12.0 +black==24.4.2 boto3==1.34.131 botocore==1.34.131 certifi==2024.6.2 From 10502174452c7b1adf623b113e145f1bad24c1ea Mon Sep 17 00:00:00 2001 From: Max K Date: Wed, 10 Jul 2024 16:19:06 +0200 Subject: [PATCH 285/299] add support for new release branch Automatic style fix --- .github/workflows/create_release.yml | 27 +++- docker/test/libfuzzer/run_libfuzzer.py | 18 +-- tests/ci/create_release.py | 125 +++++++++++++++--- tests/ci/ssh.py | 6 +- tests/ci/test_ci_options.py | 14 +- tests/ci/version_helper.py | 13 ++ tests/integration/helpers/cluster.py | 18 +-- tests/integration/test_disk_types/test.py | 6 +- .../test_distributed_default_database/test.py | 1 + .../test_interserver_dns_retires/test.py | 1 + tests/integration/test_storage_s3/test.py | 4 +- .../test_case.py | 1 + .../test_case.py | 1 + ...411_long_accurate_number_comparison.python | 2 +- 14 files changed, 175 insertions(+), 62 deletions(-) diff --git a/.github/workflows/create_release.yml b/.github/workflows/create_release.yml index e2ad16a05a4..26adc3adff9 100644 --- a/.github/workflows/create_release.yml +++ b/.github/workflows/create_release.yml @@ -15,9 +15,8 @@ concurrency: required: true type: choice options: - # TODO: - #- new - patch + - new dry-run: description: 'Dry run' required: false @@ -28,7 +27,6 @@ jobs: CreateRelease: env: GH_TOKEN: ${{ secrets.ROBOT_CLICKHOUSE_COMMIT_TOKEN }} - RELEASE_TYPE: runs-on: [self-hosted, release-maker] steps: - name: DebugInfo @@ -61,11 +59,16 @@ jobs: echo "RELEASE_TAG=$release_tag" >> "$GITHUB_ENV" echo "COMMIT_SHA=$commit_sha" >> "$GITHUB_ENV" - name: Download All Release Artifacts + if: ${{ inputs.type == 'patch' }} run: | python3 ./tests/ci/create_release.py --infile "$RELEASE_INFO_FILE" --download-packages ${{ inputs.dry-run && '--dry-run' || '' }} - name: Push Git Tag for the Release run: | python3 ./tests/ci/create_release.py --push-release-tag --infile "$RELEASE_INFO_FILE" ${{ inputs.dry-run && '--dry-run' || '' }} + - name: Push New Release Branch + if: ${{ inputs.type == 'new' }} + run: | + python3 ./tests/ci/create_release.py --push-new-release-branch --infile "$RELEASE_INFO_FILE" ${{ inputs.dry-run && '--dry-run' || '' }} - name: Bump CH Version and Update Contributors' List run: | python3 ./tests/ci/create_release.py --create-bump-version-pr --infile "$RELEASE_INFO_FILE" ${{ inputs.dry-run && '--dry-run' || '' }} @@ -73,6 +76,7 @@ jobs: run: | git checkout master - name: Bump Docker versions, Changelog, Security + if: ${{ inputs.type == 'patch' }} run: | [ "$(git branch --show-current)" != "master" ] && echo "not on the master" && exit 1 echo "List versions" @@ -90,8 +94,8 @@ jobs: echo "Generate Security" python3 ./utils/security-generator/generate_security.py > SECURITY.md git diff HEAD - - name: Create ChangeLog Pull Request - if: ${{ ! inputs.dry-run }} + - name: Generate ChangeLog + if: ${{ inputs.type == 'patch' && ! inputs.dry-run }} uses: peter-evans/create-pull-request@v6 with: author: "robot-clickhouse " @@ -115,39 +119,48 @@ jobs: run: | git checkout "$GITHUB_REF_NAME" - name: Create GH Release + if: ${{ inputs.type == 'patch' }} run: | python3 ./tests/ci/create_release.py --create-gh-release \ --infile ${{ env.RELEASE_INFO_FILE }} ${{ inputs.dry-run && '--dry-run' || '' }} - name: Export TGZ Packages + if: ${{ inputs.type == 'patch' }} run: | python3 ./tests/ci/artifactory.py --export-tgz --infile ${{ env.RELEASE_INFO_FILE }} ${{ inputs.dry-run && '--dry-run' || '' }} - name: Test TGZ Packages + if: ${{ inputs.type == 'patch' }} run: | python3 ./tests/ci/artifactory.py --test-tgz --infile ${{ env.RELEASE_INFO_FILE }} ${{ inputs.dry-run && '--dry-run' || '' }} - name: Export RPM Packages + if: ${{ inputs.type == 'patch' }} run: | python3 ./tests/ci/artifactory.py --export-rpm --infile ${{ env.RELEASE_INFO_FILE }} ${{ inputs.dry-run && '--dry-run' || '' }} - name: Test RPM Packages + if: ${{ inputs.type == 'patch' }} run: | python3 ./tests/ci/artifactory.py --test-rpm --infile ${{ env.RELEASE_INFO_FILE }} ${{ inputs.dry-run && '--dry-run' || '' }} - name: Export Debian Packages + if: ${{ inputs.type == 'patch' }} run: | python3 ./tests/ci/artifactory.py --export-debian --infile ${{ env.RELEASE_INFO_FILE }} ${{ inputs.dry-run && '--dry-run' || '' }} - name: Test Debian Packages + if: ${{ inputs.type == 'patch' }} run: | python3 ./tests/ci/artifactory.py --test-debian --infile ${{ env.RELEASE_INFO_FILE }} ${{ inputs.dry-run && '--dry-run' || '' }} - name: Docker clickhouse/clickhouse-server building + if: ${{ inputs.type == 'patch' }} run: | cd "./tests/ci" export CHECK_NAME="Docker server image" python3 docker_server.py --release-type auto --version ${{ env.RELEASE_TAG }} --check-name "$CHECK_NAME" --sha ${{ env.COMMIT_SHA }} ${{ ! inputs.dry-run && '--push' || '' }} - name: Docker clickhouse/clickhouse-keeper building + if: ${{ inputs.type == 'patch' }} run: | cd "./tests/ci" export CHECK_NAME="Docker keeper image" python3 docker_server.py --release-type auto --version ${{ env.RELEASE_TAG }} --check-name "$CHECK_NAME" --sha ${{ env.COMMIT_SHA }} ${{ ! inputs.dry-run && '--push' || '' }} - name: Post Slack Message - if: failure() + if: always() run: | - echo Slack Message \ No newline at end of file + echo Slack Message diff --git a/docker/test/libfuzzer/run_libfuzzer.py b/docker/test/libfuzzer/run_libfuzzer.py index 5ed019490d5..fa67805dfa5 100755 --- a/docker/test/libfuzzer/run_libfuzzer.py +++ b/docker/test/libfuzzer/run_libfuzzer.py @@ -27,19 +27,19 @@ def run_fuzzer(fuzzer: str): parser.read(path) if parser.has_section("asan"): - os.environ[ - "ASAN_OPTIONS" - ] = f"{os.environ['ASAN_OPTIONS']}:{':'.join('%s=%s' % (key, value) for key, value in parser['asan'].items())}" + os.environ["ASAN_OPTIONS"] = ( + f"{os.environ['ASAN_OPTIONS']}:{':'.join('%s=%s' % (key, value) for key, value in parser['asan'].items())}" + ) if parser.has_section("msan"): - os.environ[ - "MSAN_OPTIONS" - ] = f"{os.environ['MSAN_OPTIONS']}:{':'.join('%s=%s' % (key, value) for key, value in parser['msan'].items())}" + os.environ["MSAN_OPTIONS"] = ( + f"{os.environ['MSAN_OPTIONS']}:{':'.join('%s=%s' % (key, value) for key, value in parser['msan'].items())}" + ) if parser.has_section("ubsan"): - os.environ[ - "UBSAN_OPTIONS" - ] = f"{os.environ['UBSAN_OPTIONS']}:{':'.join('%s=%s' % (key, value) for key, value in parser['ubsan'].items())}" + os.environ["UBSAN_OPTIONS"] = ( + f"{os.environ['UBSAN_OPTIONS']}:{':'.join('%s=%s' % (key, value) for key, value in parser['ubsan'].items())}" + ) if parser.has_section("libfuzzer"): custom_libfuzzer_options = " ".join( diff --git a/tests/ci/create_release.py b/tests/ci/create_release.py index d749c85994b..65ab48865ef 100755 --- a/tests/ci/create_release.py +++ b/tests/ci/create_release.py @@ -5,6 +5,7 @@ import os import subprocess from contextlib import contextmanager +from copy import copy from pathlib import Path from typing import Iterator, List @@ -12,6 +13,7 @@ from git_helper import Git, GIT_PREFIX, Runner from ssh import SSHAgent from env_helper import GITHUB_REPOSITORY, S3_BUILDS_BUCKET from s3_helper import S3Helper +from autoscale_runners_lambda.lambda_shared.pr import Labels from version_helper import ( FILE_WITH_VERSION_PATH, GENERATED_CONTRIBUTORS, @@ -19,6 +21,7 @@ from version_helper import ( get_version_from_repo, update_cmake_version, update_contributors, + VersionType, ) from git_helper import git_runner as runner from ci_config import CI @@ -30,7 +33,12 @@ CONTRIBUTORS_PATH = get_abs_path(GENERATED_CONTRIBUTORS) class ShellRunner: @classmethod - def run(cls, command, check_retcode=True, print_output=True, async_=False): + def run( + cls, command, check_retcode=True, print_output=True, async_=False, dry_run=False + ): + if dry_run: + print(f"Dry-run: Would run shell command: [{command}]") + return 0, "" print(f"Running shell command: [{command}]") if async_: subprocess.Popen(command.split(" ")) @@ -73,23 +81,31 @@ class ReleaseInfo: release_branch = None release_tag = None codename = None - assert release_type in ("patch",) - # if release_type == "new": - # assert False, "TODO" - # git = Git() - # version = get_version_from_repo(git=git) - # assert runner.check_command( - # f"git merge-base --is-ancestor {commit_ref} origin/master" - # ) - # expected_tag = f"v{version.major}.{version.minor}-new" - # assert ( - # git.latest_tag == expected_tag - # ), f"BUG: latest tag [{git.latest_tag}], expected [{expected_tag}]" - # release_branch = "master" + assert release_type in ("patch", "new") + if release_type == "new": + # check commit_ref is right and on a right branch + ShellRunner.run( + f"git merge-base --is-ancestor origin/{commit_ref} origin/master" + ) + with checkout(commit_ref): + commit_sha = Runner().run(f"git rev-parse {commit_ref}") + # Git() must be inside "with checkout" contextmanager + git = Git() + version = get_version_from_repo(git=git) + release_branch = "master" + expected_prev_tag = f"v{version.major}.{version.minor}.1.1-new" + version.bump().with_description(VersionType.NEW) + assert ( + git.latest_tag == expected_prev_tag + ), f"BUG: latest tag [{git.latest_tag}], expected [{expected_prev_tag}]" + release_tag = version.describe + codename = ( + VersionType.STABLE + ) # dummy value (artifactory won't be updated for new release) if release_type == "patch": with checkout(commit_ref): - # Git() must be inside "with checkout" contextmanager commit_sha = Runner().run(f"git rev-parse {commit_ref}") + # Git() must be inside "with checkout" contextmanager git = Git() version = get_version_from_repo(git=git) codename = version.get_stable_release_type() @@ -97,11 +113,16 @@ class ReleaseInfo: release_branch = f"{version.major}.{version.minor}" release_tag = version.describe runner.run(f"{GIT_PREFIX} fetch origin {release_branch} --tags") - assert runner.check_command( + # check commit is right and on a right branch + ShellRunner.run( f"git merge-base --is-ancestor {commit_ref} origin/{release_branch}" ) if version.patch == 1: - expected_tag_prefix = f"v{version.major}.{version.minor+1}-" + expected_version = copy(version) + expected_version.bump() + expected_tag_prefix = ( + f"v{expected_version.major}.{expected_version.minor}-" + ) expected_tag_suffix = "-new" else: expected_tag_prefix = ( @@ -157,16 +178,71 @@ class ReleaseInfo: print("Dry run, would execute:") print(f"* {cmd_push_tag}") + @staticmethod + def _create_gh_label(label: str, color_hex: str, dry_run: bool): + cmd = f"gh api repos/{GITHUB_REPOSITORY}/labels -f name={label} -f color={color_hex}" + ShellRunner.run(cmd, dry_run=dry_run) + + def push_new_release_branch(self, dry_run: bool) -> None: + assert ( + self.release_branch == "master" + ), "New release branch can be created only for release type [new]" + git = Git() + version = get_version_from_repo(git=git) + new_release_branch = f"{version.major}.{version.minor}" + stable_release_type = version.get_stable_release_type() + version_after_release = copy(version) + version_after_release.bump() + assert ( + version_after_release.string == self.version + ), f"Unexpected current version in git, must precede [{self.version}] by one step, actual [{version.string}]" + if dry_run: + # remove locally created branch from prev run + ShellRunner.run( + f"{GIT_PREFIX} branch -l | grep -q {new_release_branch} && git branch -d {new_release_branch} ||:" + ) + print( + f"Create and push new release branch [{new_release_branch}], commit [{self.commit_sha}]" + ) + with checkout(self.release_branch): + with checkout_new(new_release_branch): + pr_labels = f"--label {Labels.RELEASE}" + if stable_release_type == VersionType.LTS: + pr_labels += f" --label {Labels.RELEASE_LTS}" + cmd_push_branch = ( + f"{GIT_PREFIX} push --set-upstream origin {new_release_branch}" + ) + ShellRunner.run(cmd_push_branch, dry_run=dry_run) + + print("Create and push backport tags for new release branch") + ReleaseInfo._create_gh_label( + f"v{new_release_branch}-must-backport", "10dbed", dry_run=dry_run + ) + ReleaseInfo._create_gh_label( + f"v{new_release_branch}-affected", "c2bfff", dry_run=dry_run + ) + ShellRunner.run( + f"""gh pr create --repo {GITHUB_REPOSITORY} --title 'Release pull request for branch {new_release_branch}' + --head {new_release_branch} {pr_labels} + --body 'This PullRequest is a part of ClickHouse release cycle. It is used by CI system only. Do not perform any changes with it.' + """, + dry_run=dry_run, + ) + def update_version_and_contributors_list(self, dry_run: bool) -> None: # Bump version, update contributors list, create PR branch_upd_version_contributors = f"bump_version_{self.version}" with checkout(self.commit_sha): git = Git() version = get_version_from_repo(git=git) - version.with_description(version.get_stable_release_type()) + if self.release_branch == "master": + version.bump() + version.with_description(VersionType.TESTING) + else: + version.with_description(version.get_stable_release_type()) assert ( version.string == self.version - ), "BUG: version in release info does not match version in git commit" + ), f"BUG: version in release info does not match version in git commit, expected [{self.version}], got [{version.string}]" with checkout(self.release_branch): with checkout_new(branch_upd_version_contributors): update_cmake_version(version) @@ -430,6 +506,11 @@ def parse_args() -> argparse.Namespace: action="store_true", help="Creates and pushes git tag", ) + parser.add_argument( + "--push-new-release-branch", + action="store_true", + help="Creates and pushes new release branch and corresponding service gh tags for backports", + ) parser.add_argument( "--create-bump-version-pr", action="store_true", @@ -533,6 +614,10 @@ if __name__ == "__main__": assert args.infile, "--infile must be provided" release_info = ReleaseInfo.from_file(args.infile) release_info.push_release_tag(dry_run=args.dry_run) + if args.push_new_release_branch: + assert args.infile, "--infile must be provided" + release_info = ReleaseInfo.from_file(args.infile) + release_info.push_new_release_branch(dry_run=args.dry_run) if args.create_bump_version_pr: # TODO: store link to PR in release info assert args.infile, "--infile must be provided" @@ -563,7 +648,7 @@ if __name__ == "__main__": """ -Prepare release machine (for arm machine): +Prepare release machine: ### INSTALL PACKAGES sudo apt update diff --git a/tests/ci/ssh.py b/tests/ci/ssh.py index 321826fcf44..89d90d724d2 100644 --- a/tests/ci/ssh.py +++ b/tests/ci/ssh.py @@ -37,9 +37,9 @@ class SSHAgent: ssh_options = ( "," + os.environ["SSH_OPTIONS"] if os.environ.get("SSH_OPTIONS") else "" ) - os.environ[ - "SSH_OPTIONS" - ] = f"{ssh_options}UserKnownHostsFile=/dev/null,StrictHostKeyChecking=no" + os.environ["SSH_OPTIONS"] = ( + f"{ssh_options}UserKnownHostsFile=/dev/null,StrictHostKeyChecking=no" + ) def add(self, key): key_pub = self._key_pub(key) diff --git a/tests/ci/test_ci_options.py b/tests/ci/test_ci_options.py index 3f158e79f30..f4d14a17512 100644 --- a/tests/ci/test_ci_options.py +++ b/tests/ci/test_ci_options.py @@ -172,14 +172,10 @@ class TestCIOptions(unittest.TestCase): job: CI.JobConfig(runner_type=CI.Runners.STYLE_CHECKER) for job in _TEST_JOB_LIST } - jobs_configs[ - "fuzzers" - ].run_by_label = ( + jobs_configs["fuzzers"].run_by_label = ( "TEST_LABEL" # check "fuzzers" appears in the result due to the label ) - jobs_configs[ - "Integration tests (asan)" - ].release_only = ( + jobs_configs["Integration tests (asan)"].release_only = ( True # still must be included as it's set with include keywords ) filtered_jobs = list( @@ -311,9 +307,9 @@ class TestCIOptions(unittest.TestCase): job: CI.JobConfig(runner_type=CI.Runners.STYLE_CHECKER) for job in _TEST_JOB_LIST } - jobs_configs[ - "fuzzers" - ].run_by_label = "TEST_LABEL" # check "fuzzers" does not appears in the result + jobs_configs["fuzzers"].run_by_label = ( + "TEST_LABEL" # check "fuzzers" does not appears in the result + ) jobs_configs["Integration tests (asan)"].release_only = True filtered_jobs = list( ci_options.apply( diff --git a/tests/ci/version_helper.py b/tests/ci/version_helper.py index d48dd36b16a..07a7a9601c0 100755 --- a/tests/ci/version_helper.py +++ b/tests/ci/version_helper.py @@ -72,6 +72,19 @@ class ClickHouseVersion: return self.patch_update() raise KeyError(f"wrong part {part} is used") + def bump(self) -> "ClickHouseVersion": + if self.minor < 12: + self._minor += 1 + self._revision += 1 + self._patch = 1 + self._tweak = 1 + else: + self._major += 1 + self._revision += 1 + self._patch = 1 + self._tweak = 1 + return self + def major_update(self) -> "ClickHouseVersion": if self._git is not None: self._git.update() diff --git a/tests/integration/helpers/cluster.py b/tests/integration/helpers/cluster.py index 34f5c28fef8..548b58a17e8 100644 --- a/tests/integration/helpers/cluster.py +++ b/tests/integration/helpers/cluster.py @@ -1454,9 +1454,9 @@ class ClickHouseCluster: def setup_azurite_cmd(self, instance, env_variables, docker_compose_yml_dir): self.with_azurite = True env_variables["AZURITE_PORT"] = str(self.azurite_port) - env_variables[ - "AZURITE_STORAGE_ACCOUNT_URL" - ] = f"http://azurite1:{env_variables['AZURITE_PORT']}/devstoreaccount1" + env_variables["AZURITE_STORAGE_ACCOUNT_URL"] = ( + f"http://azurite1:{env_variables['AZURITE_PORT']}/devstoreaccount1" + ) env_variables["AZURITE_CONNECTION_STRING"] = ( f"DefaultEndpointsProtocol=http;AccountName=devstoreaccount1;" f"AccountKey=Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==;" @@ -1653,9 +1653,9 @@ class ClickHouseCluster: # Code coverage files will be placed in database directory # (affect only WITH_COVERAGE=1 build) - env_variables[ - "LLVM_PROFILE_FILE" - ] = "/var/lib/clickhouse/server_%h_%p_%m.profraw" + env_variables["LLVM_PROFILE_FILE"] = ( + "/var/lib/clickhouse/server_%h_%p_%m.profraw" + ) clickhouse_start_command = CLICKHOUSE_START_COMMAND if clickhouse_log_file: @@ -1668,9 +1668,9 @@ class ClickHouseCluster: cluster=self, base_path=self.base_dir, name=name, - base_config_dir=base_config_dir - if base_config_dir - else self.base_config_dir, + base_config_dir=( + base_config_dir if base_config_dir else self.base_config_dir + ), custom_main_configs=main_configs or [], custom_user_configs=user_configs or [], custom_dictionaries=dictionaries or [], diff --git a/tests/integration/test_disk_types/test.py b/tests/integration/test_disk_types/test.py index 1cc5048eb69..a5e2456ef4f 100644 --- a/tests/integration/test_disk_types/test.py +++ b/tests/integration/test_disk_types/test.py @@ -19,9 +19,9 @@ def cluster(): cluster = ClickHouseCluster(__file__) cluster.add_instance( "node", - main_configs=["configs/storage_arm.xml"] - if is_arm() - else ["configs/storage_amd.xml"], + main_configs=( + ["configs/storage_arm.xml"] if is_arm() else ["configs/storage_amd.xml"] + ), with_minio=True, with_hdfs=not is_arm(), ) diff --git a/tests/integration/test_distributed_default_database/test.py b/tests/integration/test_distributed_default_database/test.py index ef69533416b..7da9a368997 100644 --- a/tests/integration/test_distributed_default_database/test.py +++ b/tests/integration/test_distributed_default_database/test.py @@ -5,6 +5,7 @@ in this test we write into per-node tables and read from the distributed table. The default database in the distributed table definition is left empty on purpose to test default database deduction. """ + import pytest from helpers.client import QueryRuntimeException diff --git a/tests/integration/test_interserver_dns_retires/test.py b/tests/integration/test_interserver_dns_retires/test.py index f0c581e6450..7c81f278737 100644 --- a/tests/integration/test_interserver_dns_retires/test.py +++ b/tests/integration/test_interserver_dns_retires/test.py @@ -2,6 +2,7 @@ This test makes sure interserver cluster queries handle invalid DNS records for replicas. """ + from helpers.client import QueryRuntimeException from helpers.cluster import ClickHouseCluster, ClickHouseInstance diff --git a/tests/integration/test_storage_s3/test.py b/tests/integration/test_storage_s3/test.py index 9a0cb352088..40cbf4b44a6 100644 --- a/tests/integration/test_storage_s3/test.py +++ b/tests/integration/test_storage_s3/test.py @@ -197,7 +197,9 @@ def test_partition_by_string_column(started_cluster): started_cluster, bucket, "test_foo/bar.csv" ) assert '3,"йцук"\n' == get_s3_file_content(started_cluster, bucket, "test_йцук.csv") - assert '78,"你好"\n' == get_s3_file_content(started_cluster, bucket, "test_你好.csv") + assert '78,"你好"\n' == get_s3_file_content( + started_cluster, bucket, "test_你好.csv" + ) def test_partition_by_const_column(started_cluster): diff --git a/tests/integration/test_tcp_handler_http_responses/test_case.py b/tests/integration/test_tcp_handler_http_responses/test_case.py index 2fc53674ca4..98f4b74223e 100644 --- a/tests/integration/test_tcp_handler_http_responses/test_case.py +++ b/tests/integration/test_tcp_handler_http_responses/test_case.py @@ -1,4 +1,5 @@ """Test HTTP responses given by the TCP Handler.""" + from pathlib import Path import pytest from helpers.cluster import ClickHouseCluster diff --git a/tests/integration/test_tcp_handler_interserver_listen_host/test_case.py b/tests/integration/test_tcp_handler_interserver_listen_host/test_case.py index 62581996f3b..b20ab48795b 100644 --- a/tests/integration/test_tcp_handler_interserver_listen_host/test_case.py +++ b/tests/integration/test_tcp_handler_interserver_listen_host/test_case.py @@ -1,4 +1,5 @@ """Test Interserver responses on configured IP.""" + from pathlib import Path import pytest from helpers.cluster import ClickHouseCluster diff --git a/tests/queries/0_stateless/00411_long_accurate_number_comparison.python b/tests/queries/0_stateless/00411_long_accurate_number_comparison.python index 183a2637d36..045de9ee7ee 100644 --- a/tests/queries/0_stateless/00411_long_accurate_number_comparison.python +++ b/tests/queries/0_stateless/00411_long_accurate_number_comparison.python @@ -50,7 +50,7 @@ TYPES = { "UInt32": {"bits": 32, "sign": False, "float": False}, "Int32": {"bits": 32, "sign": True, "float": False}, "UInt64": {"bits": 64, "sign": False, "float": False}, - "Int64": {"bits": 64, "sign": True, "float": False} + "Int64": {"bits": 64, "sign": True, "float": False}, # "Float32" : { "bits" : 32, "sign" : True, "float" : True }, # "Float64" : { "bits" : 64, "sign" : True, "float" : True } } From 2cd1a39ff4f283122208bc5b2be5244a60e0bfcf Mon Sep 17 00:00:00 2001 From: Max K Date: Wed, 10 Jul 2024 21:01:49 +0200 Subject: [PATCH 286/299] style fixes Automatic style fix --- .github/workflows/create_release.yml | 2 +- tests/ci/artifactory.py | 5 +- tests/ci/create_release.py | 116 +++++++++------------------ 3 files changed, 42 insertions(+), 81 deletions(-) diff --git a/.github/workflows/create_release.yml b/.github/workflows/create_release.yml index 26adc3adff9..7879c28ab8d 100644 --- a/.github/workflows/create_release.yml +++ b/.github/workflows/create_release.yml @@ -49,7 +49,7 @@ jobs: run: | python3 ./tests/ci/create_release.py --prepare-release-info \ --ref ${{ inputs.ref }} --release-type ${{ inputs.type }} \ - --outfile ${{ env.RELEASE_INFO_FILE }} ${{ inputs.dry-run && '--dry-run' || '' }} + --outfile ${{ env.RELEASE_INFO_FILE }} ${{ inputs.dry-run && '--dry-run' || '' }} echo "::group::Release Info" python3 -m json.tool "$RELEASE_INFO_FILE" echo "::endgroup::" diff --git a/tests/ci/artifactory.py b/tests/ci/artifactory.py index 2e18316cb78..1a062d05a23 100644 --- a/tests/ci/artifactory.py +++ b/tests/ci/artifactory.py @@ -2,10 +2,9 @@ import argparse import time from pathlib import Path from typing import Optional - +from shutil import copy2 from create_release import PackageDownloader, ReleaseInfo, ShellRunner from ci_utils import WithIter -from shutil import copy2 class MountPointApp(metaclass=WithIter): @@ -201,7 +200,7 @@ class RpmArtifactory: print(f"Exporting RPM packages into [{codename}]") for command in commands: - print(f"Running command:") + print("Running command:") print(f" {command}") ShellRunner.run(command) diff --git a/tests/ci/create_release.py b/tests/ci/create_release.py index 65ab48865ef..83a51c66bce 100755 --- a/tests/ci/create_release.py +++ b/tests/ci/create_release.py @@ -9,7 +9,7 @@ from copy import copy from pathlib import Path from typing import Iterator, List -from git_helper import Git, GIT_PREFIX, Runner +from git_helper import Git, GIT_PREFIX from ssh import SSHAgent from env_helper import GITHUB_REPOSITORY, S3_BUILDS_BUCKET from s3_helper import S3Helper @@ -23,7 +23,6 @@ from version_helper import ( update_contributors, VersionType, ) -from git_helper import git_runner as runner from ci_config import CI CMAKE_PATH = get_abs_path(FILE_WITH_VERSION_PATH) @@ -49,6 +48,7 @@ class ShellRunner: stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, + check=True, ) if print_output: print(result.stdout) @@ -74,8 +74,7 @@ class ReleaseInfo: @staticmethod def prepare(commit_ref: str, release_type: str, outfile: str) -> None: - dir = Path(outfile).parent - dir.mkdir(parents=True, exist_ok=True) + Path(outfile).parent.mkdir(parents=True, exist_ok=True) Path(outfile).unlink(missing_ok=True) version = None release_branch = None @@ -88,7 +87,7 @@ class ReleaseInfo: f"git merge-base --is-ancestor origin/{commit_ref} origin/master" ) with checkout(commit_ref): - commit_sha = Runner().run(f"git rev-parse {commit_ref}") + _, commit_sha = ShellRunner.run(f"git rev-parse {commit_ref}") # Git() must be inside "with checkout" contextmanager git = Git() version = get_version_from_repo(git=git) @@ -104,7 +103,7 @@ class ReleaseInfo: ) # dummy value (artifactory won't be updated for new release) if release_type == "patch": with checkout(commit_ref): - commit_sha = Runner().run(f"git rev-parse {commit_ref}") + _, commit_sha = ShellRunner.run(f"git rev-parse {commit_ref}") # Git() must be inside "with checkout" contextmanager git = Git() version = get_version_from_repo(git=git) @@ -112,7 +111,7 @@ class ReleaseInfo: version.with_description(codename) release_branch = f"{version.major}.{version.minor}" release_tag = version.describe - runner.run(f"{GIT_PREFIX} fetch origin {release_branch} --tags") + ShellRunner.run(f"{GIT_PREFIX} fetch origin {release_branch} --tags") # check commit is right and on a right branch ShellRunner.run( f"git merge-base --is-ancestor {commit_ref} origin/{release_branch}" @@ -142,7 +141,7 @@ class ReleaseInfo: release_branch and commit_sha and release_tag - and version.string + and version and codename in ("lts", "stable") ) res = ReleaseInfo( @@ -166,20 +165,14 @@ class ReleaseInfo: f"Create and push release tag [{self.release_tag}], commit [{self.commit_sha}]" ) tag_message = f"Release {self.release_tag}" - runner.run( + ShellRunner.run( f"{GIT_PREFIX} tag -a -m '{tag_message}' {self.release_tag} {self.commit_sha}" ) cmd_push_tag = f"{GIT_PREFIX} push origin {self.release_tag}:{self.release_tag}" - if not dry_run: - # TODO: cannot push - workflow will start - # runner.run(cmd_commit_version_upd) - pass - else: - print("Dry run, would execute:") - print(f"* {cmd_push_tag}") + ShellRunner.run(cmd_push_tag, dry_run=dry_run) @staticmethod - def _create_gh_label(label: str, color_hex: str, dry_run: bool): + def _create_gh_label(label: str, color_hex: str, dry_run: bool) -> None: cmd = f"gh api repos/{GITHUB_REPOSITORY}/labels -f name={label} -f color={color_hex}" ShellRunner.run(cmd, dry_run=dry_run) @@ -247,29 +240,19 @@ class ReleaseInfo: with checkout_new(branch_upd_version_contributors): update_cmake_version(version) update_contributors(raise_error=True) - cmd_commit_version_upd = ( - f"{GIT_PREFIX} commit '{CMAKE_PATH}' '{CONTRIBUTORS_PATH}' -m 'Update autogenerated version to {self.version} and contributors'", - ) + cmd_commit_version_upd = f"{GIT_PREFIX} commit '{CMAKE_PATH}' '{CONTRIBUTORS_PATH}' -m 'Update autogenerated version to {self.version} and contributors'" cmd_push_branch = f"{GIT_PREFIX} push --set-upstream origin {branch_upd_version_contributors}" body_file = get_abs_path(".github/PULL_REQUEST_TEMPLATE.md") actor = os.getenv("GITHUB_ACTOR", "") or "me" cmd_create_pr = f"gh pr create --repo {GITHUB_REPOSITORY} --title 'Update version after release' --head {branch_upd_version_contributors} --base {self.release_branch} --body-file '{body_file} --label 'do not test' --assignee @{actor}" - if not dry_run: - runner.run(cmd_commit_version_upd) - runner.run(cmd_push_branch) - runner.run(cmd_create_pr) - else: - print("Dry run, would execute:") - print(f"* {cmd_commit_version_upd}") - print(f"* {cmd_push_branch}") - print(f"* {cmd_create_pr}") - print("Dry run, diff:") - print( - runner.run( - f"{GIT_PREFIX} diff '{CMAKE_PATH}' '{CONTRIBUTORS_PATH}'" - ) + ShellRunner.run(cmd_commit_version_upd, dry_run=dry_run) + ShellRunner.run(cmd_push_branch, dry_run=dry_run) + ShellRunner.run(cmd_create_pr, dry_run=dry_run) + if dry_run: + ShellRunner.run( + f"{GIT_PREFIX} diff '{CMAKE_PATH}' '{CONTRIBUTORS_PATH}'" ) - runner.run( + ShellRunner.run( f"{GIT_PREFIX} checkout '{CMAKE_PATH}' '{CONTRIBUTORS_PATH}'" ) @@ -330,7 +313,7 @@ class PackageDownloader: else: assert False, "BUG" - def __init__(self, release, commit_sha, version: str): + def __init__(self, release, commit_sha, version): assert version.startswith(release), "Invalid release branch or version" major, minor = map(int, release.split(".")) self.package_names = self.PACKAGES @@ -351,41 +334,20 @@ class PackageDownloader: for package_type in self.PACKAGE_TYPES: for package in self.package_names: - package_file_name = ( - package - + "_" - + self.version - + "_" - + self._get_arch_suffix(package_type, RepoTypes.DEBIAN) - + ".deb" - ) - self.deb_package_files.append(package_file_name) - self.file_to_type[package_file_name] = package_type + deb_package_file_name = f"{package}_{self.version}_{self._get_arch_suffix(package_type, RepoTypes.DEBIAN)}.deb" + self.deb_package_files.append(deb_package_file_name) + self.file_to_type[deb_package_file_name] = package_type - package_file_name = ( - package - + "-" - + self.version - + "." - + self._get_arch_suffix(package_type, RepoTypes.RPM) - + ".rpm" - ) - self.rpm_package_files.append(package_file_name) - self.file_to_type[package_file_name] = package_type + rpm_package_file_name = f"{package}-{self.version}.{self._get_arch_suffix(package_type, RepoTypes.RPM)}.rpm" + self.rpm_package_files.append(rpm_package_file_name) + self.file_to_type[rpm_package_file_name] = package_type - package_file_name = ( - package - + "-" - + self.version - + "-" - + self._get_arch_suffix(package_type, RepoTypes.TGZ) - + ".tgz" - ) - self.tgz_package_files.append(package_file_name) - self.file_to_type[package_file_name] = package_type - package_file_name += ".sha512" - self.tgz_package_files.append(package_file_name) - self.file_to_type[package_file_name] = package_type + tgz_package_file_name = f"{package}-{self.version}-{self._get_arch_suffix(package_type, RepoTypes.TGZ)}.tgz" + self.tgz_package_files.append(tgz_package_file_name) + self.file_to_type[tgz_package_file_name] = package_type + tgz_package_file_name += ".sha512" + self.tgz_package_files.append(tgz_package_file_name) + self.file_to_type[tgz_package_file_name] = package_type def get_deb_packages_files(self): return self.deb_package_files @@ -561,33 +523,33 @@ def parse_args() -> argparse.Namespace: @contextmanager def checkout(ref: str) -> Iterator[None]: - orig_ref = runner.run(f"{GIT_PREFIX} symbolic-ref --short HEAD") + _, orig_ref = ShellRunner.run(f"{GIT_PREFIX} symbolic-ref --short HEAD") rollback_cmd = f"{GIT_PREFIX} checkout {orig_ref}" assert orig_ref if ref not in (orig_ref,): - runner.run(f"{GIT_PREFIX} checkout {ref}") + ShellRunner.run(f"{GIT_PREFIX} checkout {ref}") try: yield except (Exception, KeyboardInterrupt) as e: print(f"ERROR: Exception [{e}]") - runner.run(rollback_cmd) + ShellRunner.run(rollback_cmd) raise - runner.run(rollback_cmd) + ShellRunner.run(rollback_cmd) @contextmanager def checkout_new(ref: str) -> Iterator[None]: - orig_ref = runner.run(f"{GIT_PREFIX} symbolic-ref --short HEAD") + _, orig_ref = ShellRunner.run(f"{GIT_PREFIX} symbolic-ref --short HEAD") rollback_cmd = f"{GIT_PREFIX} checkout {orig_ref}" assert orig_ref - runner.run(f"{GIT_PREFIX} checkout -b {ref}") + ShellRunner.run(f"{GIT_PREFIX} checkout -b {ref}") try: yield except (Exception, KeyboardInterrupt) as e: print(f"ERROR: Exception [{e}]") - runner.run(rollback_cmd) + ShellRunner.run(rollback_cmd) raise - runner.run(rollback_cmd) + ShellRunner.run(rollback_cmd) if __name__ == "__main__": From 9f1520ae219e39f445b0948f48a2b34a99abf568 Mon Sep 17 00:00:00 2001 From: Max K Date: Wed, 10 Jul 2024 21:28:08 +0200 Subject: [PATCH 287/299] do not autofix if not only black failed --- tests/ci/style_check.py | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/tests/ci/style_check.py b/tests/ci/style_check.py index 9deae06d9f4..6b58ecece8d 100644 --- a/tests/ci/style_check.py +++ b/tests/ci/style_check.py @@ -192,15 +192,6 @@ def main(): future = executor.submit(subprocess.run, cmd_shell, shell=True) _ = future.result() - autofix_description = "" - if args.push: - try: - commit_push_staged(pr_info) - except subprocess.SubprocessError: - # do not fail the whole script if the autofix didn't work out - logging.error("Unable to push the autofix. Continue.") - autofix_description = "Failed to push autofix to the PR. " - subprocess.check_call( f"python3 ../../utils/check-style/process_style_check_result.py --in-results-dir {temp_path} " f"--out-results-file {temp_path}/test_results.tsv --out-status-file {temp_path}/check_status.tsv || " @@ -210,6 +201,21 @@ def main(): state, description, test_results, additional_files = process_result(temp_path) + autofix_description = "" + fail_cnt = 0 + for result in test_results: + if result.status == FAILURE: + # do not autofix if not only back failed + fail_cnt += 1 + + if args.push and fail_cnt == 1: + try: + commit_push_staged(pr_info) + except subprocess.SubprocessError: + # do not fail the whole script if the autofix didn't work out + logging.error("Unable to push the autofix. Continue.") + autofix_description = "Failed to push autofix to the PR. " + JobReport( description=f"{autofix_description}{description}", test_results=test_results, From 578d22ae94ea1704819e41ca1f0f7c8e58d92e32 Mon Sep 17 00:00:00 2001 From: Max K Date: Thu, 11 Jul 2024 09:24:25 +0200 Subject: [PATCH 288/299] style Automatic style fix Automatic style fix --- .github/actionlint.yml | 1 + .github/workflows/create_release.yml | 4 ++-- pyproject.toml | 2 ++ tests/ci/create_release.py | 6 +++--- tests/ci/docker_server.py | 4 ++-- tests/ci/style_check.py | 14 +++++++++++--- tests/clickhouse-test | 6 +++--- 7 files changed, 24 insertions(+), 13 deletions(-) diff --git a/.github/actionlint.yml b/.github/actionlint.yml index 0f88f30d42c..4357bd3eb6b 100644 --- a/.github/actionlint.yml +++ b/.github/actionlint.yml @@ -7,3 +7,4 @@ self-hosted-runner: - stress-tester - style-checker - style-checker-aarch64 + - release-maker diff --git a/.github/workflows/create_release.yml b/.github/workflows/create_release.yml index 7879c28ab8d..972aff90195 100644 --- a/.github/workflows/create_release.yml +++ b/.github/workflows/create_release.yml @@ -89,8 +89,8 @@ jobs: --volume=".:/ClickHouse" clickhouse/style-test \ /ClickHouse/tests/ci/changelog.py -v --debug-helpers \ --gh-user-or-token="$GH_TOKEN" --jobs=5 \ - --output="/ClickHouse/docs/changelogs/${RELEASE_TAG}.md" "${RELEASE_TAG}" - git add ./docs/changelogs/${RELEASE_TAG}.md + --output="/ClickHouse/docs/changelogs/${{ env.RELEASE_TAG }}.md" ${{ env.RELEASE_TAG }} + git add ./docs/changelogs/${{ env.RELEASE_TAG }}.md echo "Generate Security" python3 ./utils/security-generator/generate_security.py > SECURITY.md git diff HEAD diff --git a/pyproject.toml b/pyproject.toml index 39511e1a0d3..c89d46c0929 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -17,6 +17,8 @@ src_paths = ["src", "tests/ci", "tests/sqllogic"] [tool.pylint.'MESSAGES CONTROL'] # pytest.mark.parametrize is not callable (not-callable) disable = ''' + pointless-string-statement, + line-too-long, missing-docstring, too-few-public-methods, invalid-name, diff --git a/tests/ci/create_release.py b/tests/ci/create_release.py index 83a51c66bce..7f4cf8c787a 100755 --- a/tests/ci/create_release.py +++ b/tests/ci/create_release.py @@ -40,7 +40,7 @@ class ShellRunner: return 0, "" print(f"Running shell command: [{command}]") if async_: - subprocess.Popen(command.split(" ")) + subprocess.Popen(command.split(" ")) # pylint:disable=consider-using-with return 0, "" result = subprocess.run( command + " 2>&1", @@ -316,9 +316,9 @@ class PackageDownloader: def __init__(self, release, commit_sha, version): assert version.startswith(release), "Invalid release branch or version" major, minor = map(int, release.split(".")) - self.package_names = self.PACKAGES + self.package_names = list(self.PACKAGES) if major > 24 or (major == 24 and minor > 3): - self.package_names += self.EXTRA_PACKAGES + self.package_names += list(self.EXTRA_PACKAGES) self.release = release self.commit_sha = commit_sha self.version = version diff --git a/tests/ci/docker_server.py b/tests/ci/docker_server.py index 2f556e3ed57..21fc02ce02a 100644 --- a/tests/ci/docker_server.py +++ b/tests/ci/docker_server.py @@ -376,10 +376,10 @@ def main(): repo_urls = {} direct_urls: Dict[str, List[str]] = {} if pr_info.event_type == EventType.PULL_REQUEST: - release_or_pr = pr_info.number + release_or_pr = str(pr_info.number) sha = pr_info.sha elif pr_info.event_type == EventType.PUSH and pr_info.is_master: - release_or_pr = 0 + release_or_pr = str(0) sha = pr_info.sha else: release_or_pr = f"{args.version.major}.{args.version.minor}" diff --git a/tests/ci/style_check.py b/tests/ci/style_check.py index 6b58ecece8d..36620d44a2d 100644 --- a/tests/ci/style_check.py +++ b/tests/ci/style_check.py @@ -16,7 +16,15 @@ from docker_images_helper import get_docker_image, pull_image from env_helper import IS_CI, REPO_COPY, TEMP_PATH, GITHUB_EVENT_PATH from git_helper import GIT_PREFIX, git_runner from pr_info import PRInfo -from report import ERROR, FAILURE, SUCCESS, JobReport, TestResults, read_test_results +from report import ( + ERROR, + FAILURE, + SUCCESS, + JobReport, + TestResults, + read_test_results, + FAIL, +) from ssh import SSHKey from stopwatch import Stopwatch @@ -204,8 +212,8 @@ def main(): autofix_description = "" fail_cnt = 0 for result in test_results: - if result.status == FAILURE: - # do not autofix if not only back failed + if result.status in (FAILURE, FAIL): + # do not autofix if not only black failed fail_cnt += 1 if args.push and fail_cnt == 1: diff --git a/tests/clickhouse-test b/tests/clickhouse-test index 0cf46732354..90fb9611151 100755 --- a/tests/clickhouse-test +++ b/tests/clickhouse-test @@ -711,9 +711,9 @@ def get_localzone(): class SettingsRandomizer: settings = { - "max_insert_threads": lambda: 12 - if random.random() < 0.03 - else random.randint(1, 3), + "max_insert_threads": lambda: ( + 12 if random.random() < 0.03 else random.randint(1, 3) + ), "group_by_two_level_threshold": threshold_generator(0.2, 0.2, 1, 1000000), "group_by_two_level_threshold_bytes": threshold_generator( 0.2, 0.2, 1, 50000000 From d99c56ae8cf2ddcc8e404b20eee4885c71a43fb1 Mon Sep 17 00:00:00 2001 From: Max K Date: Sat, 13 Jul 2024 16:13:56 +0200 Subject: [PATCH 289/299] CI: Autoskip all non-affected jobs in PRs --- .github/workflows/pull_request.yml | 2 +- tests/ci/ci.py | 19 ++++- tests/ci/ci_cache.py | 107 +++++++++++++------------- tests/ci/ci_config.py | 11 ++- tests/ci/ci_definitions.py | 6 +- tests/ci/report.py | 1 + tests/ci/test_ci_config.py | 119 +++++++++++++++-------------- 7 files changed, 146 insertions(+), 119 deletions(-) diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml index 259e6d41110..c9f4f858825 100644 --- a/.github/workflows/pull_request.yml +++ b/.github/workflows/pull_request.yml @@ -172,7 +172,7 @@ jobs: ################################# Stage Final ################################# # FinishCheck: - if: ${{ !failure() }} + if: ${{ !failure() && !cancelled() }} needs: [RunConfig, BuildDockers, StyleCheck, FastTest, Builds_1, Builds_2, Builds_Report, Tests_1, Tests_2, Tests_3] runs-on: [self-hosted, style-checker-aarch64] steps: diff --git a/tests/ci/ci.py b/tests/ci/ci.py index 53bbf588e1b..9065e2798c8 100644 --- a/tests/ci/ci.py +++ b/tests/ci/ci.py @@ -996,7 +996,7 @@ def main() -> int: args.skip_jobs, ) - if IS_CI and pr_info.is_pr: + if IS_CI and pr_info.is_pr and not ci_settings.no_ci_cache: ci_cache.filter_out_not_affected_jobs() ci_cache.print_status() @@ -1086,6 +1086,16 @@ def main() -> int: print(status) print("::endgroup::") previous_status = status.state + print("Create dummy job report with job_skipped flag") + JobReport( + status=status.state, + description="", + test_results=[], + start_time="", + duration=0.0, + additional_files=[], + job_skipped=True, + ).dump() # ci cache check if not previous_status and not ci_settings.no_ci_cache: @@ -1136,7 +1146,7 @@ def main() -> int: has_oom_error = True job_report = JobReport.load() if JobReport.exist() else None - if job_report: + if job_report and not job_report.job_skipped: ch_helper = ClickHouseHelper() check_url = "" @@ -1242,6 +1252,9 @@ def main() -> int: description = "ERROR: Out Of Memory" else: description = "ERROR: Unknown job status" + print( + f"No job report for {[args.job_name]} - post status [{description}]" + ) gh = GitHub(get_best_robot_token(), per_page=100) commit = get_commit(gh, pr_info.sha) post_commit_status( @@ -1249,7 +1262,7 @@ def main() -> int: ERROR, "", description, - job_report.check_name or _get_ext_check_name(args.job_name), + _get_ext_check_name(args.job_name), pr_info, dump_to_file=True, ) diff --git a/tests/ci/ci_cache.py b/tests/ci/ci_cache.py index bc6761959b4..ae3b8f9e9a4 100644 --- a/tests/ci/ci_cache.py +++ b/tests/ci/ci_cache.py @@ -520,6 +520,35 @@ class CiCache: self.RecordType.SUCCESSFUL, job, batch, num_batches, release_branch ) + def has_evidence(self, job: str, job_config: CI.JobConfig) -> bool: + """ + checks if the job has been seen in master/release CI + function is to be used to check if change did not affect the job + :param job_config: + :param job: + :return: + """ + return ( + self.is_successful( + job=job, + batch=0, + num_batches=job_config.num_batches, + release_branch=not job_config.pr_only, + ) + or self.is_pending( + job=job, + batch=0, + num_batches=job_config.num_batches, + release_branch=not job_config.pr_only, + ) + or self.is_failed( + job=job, + batch=0, + num_batches=job_config.num_batches, + release_branch=not job_config.pr_only, + ) + ) + def is_failed( self, job: str, batch: int, num_batches: int, release_branch: bool ) -> bool: @@ -677,74 +706,46 @@ class CiCache: def filter_out_not_affected_jobs(self): """ Filter is to be applied in PRs to remove jobs that are not affected by the change - It removes jobs from @jobs_to_do if it is a: - 1. test job and it is in @jobs_to_wait (no need to wait not affected jobs in PRs) - 2. test job and it has finished on release branch (even if failed) - 3. build job which is not required by any test job that is left in @jobs_to_do - :return: """ - # 1. - remove_from_await_list = [] - for job_name, job_config in self.jobs_to_wait.items(): - if CI.is_test_job(job_name) and job_name != CI.JobNames.BUILD_CHECK: - remove_from_await_list.append(job_name) - for job in remove_from_await_list: - print(f"Filter job [{job}] - test job and not affected by the change") - del self.jobs_to_wait[job] - del self.jobs_to_do[job] - - # 2. remove_from_to_do = [] + required_builds = [] for job_name, job_config in self.jobs_to_do.items(): if CI.is_test_job(job_name) and job_name != CI.JobNames.BUILD_CHECK: - batches_to_remove = [] - assert job_config.batches is not None - for batch in job_config.batches: - if self.is_failed( - job_name, batch, job_config.num_batches, release_branch=True - ): - print( - f"Filter [{job_name}/{batch}] - not affected by the change (failed on release branch)" - ) - batches_to_remove.append(batch) - for batch in batches_to_remove: - job_config.batches.remove(batch) - if not job_config.batches: - print( - f"Filter [{job_name}] - not affected by the change (failed on release branch)" - ) + if job_config.reference_job_name: + reference_name = job_config.reference_job_name + reference_config = self.jobs_to_do[reference_name] + else: + reference_name = job_name + reference_config = job_config + if self.has_evidence( + job=reference_name, + job_config=reference_config, + ): remove_from_to_do.append(job_name) - for job in remove_from_to_do: - del self.jobs_to_do[job] + else: + required_builds += ( + job_config.required_builds if job_config.required_builds else [] + ) - # 3. - required_builds = [] # type: List[str] - for job_name, job_config in self.jobs_to_do.items(): - if CI.is_test_job(job_name) and job_config.required_builds: - required_builds += job_config.required_builds - required_builds = list(set(required_builds)) - - remove_builds = [] # type: List[str] has_builds_to_do = False for job_name, job_config in self.jobs_to_do.items(): if CI.is_build_job(job_name): if job_name not in required_builds: - remove_builds.append(job_name) + remove_from_to_do.append(job_name) else: has_builds_to_do = True - for build_job in remove_builds: - print( - f"Filter build job [{build_job}] - not affected and not required by test jobs" - ) - del self.jobs_to_do[build_job] - if build_job in self.jobs_to_wait: - del self.jobs_to_wait[build_job] + if not has_builds_to_do: + remove_from_to_do.append(CI.JobNames.BUILD_CHECK) - if not has_builds_to_do and CI.JobNames.BUILD_CHECK in self.jobs_to_do: - print(f"Filter job [{CI.JobNames.BUILD_CHECK}] - no builds to do") - del self.jobs_to_do[CI.JobNames.BUILD_CHECK] + for job in remove_from_to_do: + print(f"Filter job [{job}] - not affected by the change") + if job in self.jobs_to_do: + del self.jobs_to_do[job] + if job in self.jobs_to_wait: + del self.jobs_to_wait[job] + self.jobs_to_skip.append(job) def await_pending_jobs(self, is_release: bool, dry_run: bool = False) -> None: """ diff --git a/tests/ci/ci_config.py b/tests/ci/ci_config.py index d9f8e7d3afd..9b9ddee5326 100644 --- a/tests/ci/ci_config.py +++ b/tests/ci/ci_config.py @@ -413,7 +413,9 @@ class CI: release_only=True, ), JobNames.INTEGRATION_TEST_FLAKY: CommonJobConfigs.INTEGRATION_TEST.with_properties( - required_builds=[BuildNames.PACKAGE_ASAN], pr_only=True + required_builds=[BuildNames.PACKAGE_ASAN], + pr_only=True, + reference_job_name=JobNames.INTEGRATION_TEST_TSAN, ), JobNames.COMPATIBILITY_TEST: CommonJobConfigs.COMPATIBILITY_TEST.with_properties( required_builds=[BuildNames.PACKAGE_RELEASE], @@ -455,7 +457,10 @@ class CI: required_builds=[BuildNames.PACKAGE_UBSAN], ), JobNames.STATELESS_TEST_FLAKY_ASAN: CommonJobConfigs.STATELESS_TEST.with_properties( - required_builds=[BuildNames.PACKAGE_ASAN], pr_only=True, timeout=3600 + required_builds=[BuildNames.PACKAGE_ASAN], + pr_only=True, + timeout=3600, + reference_job_name=JobNames.STATELESS_TEST_RELEASE, ), JobNames.JEPSEN_KEEPER: JobConfig( required_builds=[BuildNames.BINARY_RELEASE], @@ -640,7 +645,7 @@ class CI: @classmethod def is_test_job(cls, job: str) -> bool: - return not cls.is_build_job(job) and job != cls.JobNames.STYLE_CHECK + return not cls.is_build_job(job) @classmethod def is_docs_job(cls, job: str) -> bool: diff --git a/tests/ci/ci_definitions.py b/tests/ci/ci_definitions.py index d2da73f4e46..acd9b7fa904 100644 --- a/tests/ci/ci_definitions.py +++ b/tests/ci/ci_definitions.py @@ -284,8 +284,12 @@ class JobConfig: # GH Runner type (tag from @Runners) runner_type: str - # used for config validation in ci unittests + # used in ci unittests for config validation job_name_keyword: str = "" + # name of another job that (if provided) should be used to check if job was affected by the change or not (in CiCache.has_evidence(job=@reference_job_name) call) + # for example: "Stateless flaky check" can use reference_job_name="Stateless tests (release)". "Stateless flaky check" does not run on master + # and there cannot be an evidence for it, so instead "Stateless tests (release)" job name can be used to check the evidence + reference_job_name: str = "" # builds required for the job (applicable for test jobs) required_builds: Optional[List[str]] = None # build config for the build job (applicable for builds) diff --git a/tests/ci/report.py b/tests/ci/report.py index bdaa2e15130..039c21e9c9b 100644 --- a/tests/ci/report.py +++ b/tests/ci/report.py @@ -296,6 +296,7 @@ class JobReport: build_dir_for_upload: Union[Path, str] = "" # if False no GH commit status will be created by CI need_commit_status: bool = True + job_skipped: bool = False def __post_init__(self): assert self.status in (SUCCESS, ERROR, FAILURE, PENDING) diff --git a/tests/ci/test_ci_config.py b/tests/ci/test_ci_config.py index 558faca915e..e12a67bfc92 100644 --- a/tests/ci/test_ci_config.py +++ b/tests/ci/test_ci_config.py @@ -1,6 +1,8 @@ #!/usr/bin/env python3 import unittest +import random + from ci_config import CI import ci as CIPY from ci_settings import CiSettings @@ -57,6 +59,18 @@ class TestCIConfig(unittest.TestCase): f"Job [{job}] apparently uses wrong common config with job keyword [{CI.JOB_CONFIGS[job].job_name_keyword}]", ) + def test_job_config_has_proper_values(self): + for job in CI.JobNames: + if CI.JOB_CONFIGS[job].reference_job_name: + reference_job_config = CI.JOB_CONFIGS[ + CI.JOB_CONFIGS[job].reference_job_name + ] + # reference job must run in all workflows and has digest + self.assertTrue(reference_job_config.pr_only == False) + self.assertTrue(reference_job_config.release_only == False) + self.assertTrue(reference_job_config.run_always == False) + self.assertTrue(reference_job_config.digest != CI.DigestConfig()) + def test_required_checks(self): for job in CI.REQUIRED_CHECKS: if job in (CI.StatusNames.PR_CHECK, CI.StatusNames.SYNC): @@ -497,79 +511,68 @@ class TestCIConfig(unittest.TestCase): settings = CiSettings() settings.no_ci_cache = True pr_info = PRInfo(github_event=_TEST_EVENT_JSON) - pr_info.event_type = EventType.PUSH - pr_info.number = 0 - assert pr_info.is_release and not pr_info.is_merge_queue + pr_info.event_type = EventType.PULL_REQUEST + pr_info.number = 123 + assert pr_info.is_pr ci_cache = CIPY._configure_jobs( S3Helper(), pr_info, settings, skip_jobs=False, dry_run=True ) self.assertTrue(not ci_cache.jobs_to_skip, "Must be no jobs in skip list") - all_jobs_in_wf = list(ci_cache.jobs_to_do) assert not ci_cache.jobs_to_wait assert not ci_cache.jobs_to_skip + MOCK_AFFECTED_JOBS = [ + CI.JobNames.STATELESS_TEST_S3_DEBUG, + CI.JobNames.STRESS_TEST_TSAN, + ] + MOCK_REQUIRED_BUILDS = [] + # pretend there are pending jobs that we need to wait for job, job_config in ci_cache.jobs_to_do.items(): - ci_cache.jobs_to_wait[job] = job_config + if job in MOCK_AFFECTED_JOBS: + MOCK_REQUIRED_BUILDS += job_config.required_builds + elif job not in MOCK_AFFECTED_JOBS: + ci_cache.jobs_to_wait[job] = job_config - # remove couple tests from to_wait and - # expect they are preserved in @jobs_to_to along with required package_asan - del ci_cache.jobs_to_wait[CI.JobNames.STATELESS_TEST_ASAN] - del ci_cache.jobs_to_wait[CI.JobNames.INTEGRATION_TEST_TSAN] - del ci_cache.jobs_to_wait[CI.JobNames.STATELESS_TEST_MSAN] - - # pretend we have some batches failed for one of the job from the to_do list - failed_job = CI.JobNames.INTEGRATION_TEST_TSAN - failed_job_config = ci_cache.jobs_to_do[failed_job] - FAILED_BATCHES = [0, 3] - for batch in FAILED_BATCHES: - assert batch < failed_job_config.num_batches - record = CiCache.Record( - record_type=CiCache.RecordType.FAILED, - job_name=failed_job, - job_digest=ci_cache.job_digests[failed_job], - batch=batch, - num_batches=failed_job_config.num_batches, - release_branch=True, - ) - for record_t_, records_ in ci_cache.records.items(): - if record_t_.value == CiCache.RecordType.FAILED.value: - records_[record.to_str_key()] = record - - # pretend we have all batches failed for one of the job from the to_do list - failed_job = CI.JobNames.STATELESS_TEST_MSAN - failed_job_config = ci_cache.jobs_to_do[failed_job] - assert failed_job_config.num_batches > 1 - for batch in range(failed_job_config.num_batches): - record = CiCache.Record( - record_type=CiCache.RecordType.FAILED, - job_name=failed_job, - job_digest=ci_cache.job_digests[failed_job], - batch=batch, - num_batches=failed_job_config.num_batches, - release_branch=True, - ) - for record_t_, records_ in ci_cache.records.items(): - if record_t_.value == CiCache.RecordType.FAILED.value: - records_[record.to_str_key()] = record + for job, job_config in ci_cache.jobs_to_do.items(): + if job_config.reference_job_name: + # jobs with reference_job_name in config are not supposed to have records in the cache - continue + continue + if job in MOCK_AFFECTED_JOBS: + continue + for batch in range(job_config.num_batches): + # add any record into cache + record = CiCache.Record( + record_type=random.choice( + [ + CiCache.RecordType.FAILED, + CiCache.RecordType.PENDING, + CiCache.RecordType.SUCCESSFUL, + ] + ), + job_name=job, + job_digest=ci_cache.job_digests[job], + batch=batch, + num_batches=job_config.num_batches, + release_branch=True, + ) + for record_t_, records_ in ci_cache.records.items(): + if record_t_.value == CiCache.RecordType.FAILED.value: + records_[record.to_str_key()] = record ci_cache.filter_out_not_affected_jobs() - expected_to_do = [ - CI.JobNames.STATELESS_TEST_ASAN, - CI.BuildNames.PACKAGE_ASAN, - CI.JobNames.INTEGRATION_TEST_TSAN, - CI.BuildNames.PACKAGE_TSAN, - CI.JobNames.BUILD_CHECK, - ] + expected_to_do = ( + [ + CI.JobNames.BUILD_CHECK, + ] + + MOCK_AFFECTED_JOBS + + MOCK_REQUIRED_BUILDS + ) self.assertCountEqual( list(ci_cache.jobs_to_wait), [ - CI.BuildNames.PACKAGE_ASAN, - CI.BuildNames.PACKAGE_TSAN, CI.JobNames.BUILD_CHECK, - ], + ] + + MOCK_REQUIRED_BUILDS, ) self.assertCountEqual(list(ci_cache.jobs_to_do), expected_to_do) - self.assertTrue(ci_cache.jobs_to_do[CI.JobNames.INTEGRATION_TEST_TSAN].batches) - for batch in ci_cache.jobs_to_do[CI.JobNames.INTEGRATION_TEST_TSAN].batches: - self.assertTrue(batch not in FAILED_BATCHES) From 162d875aee7ee8f9b2c055b8506c1e6467ad2d77 Mon Sep 17 00:00:00 2001 From: Max K Date: Sat, 13 Jul 2024 18:28:13 +0200 Subject: [PATCH 290/299] persistent job report path --- tests/ci/report.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/ci/report.py b/tests/ci/report.py index 039c21e9c9b..5ca911f638f 100644 --- a/tests/ci/report.py +++ b/tests/ci/report.py @@ -23,7 +23,7 @@ from typing import ( from build_download_helper import get_gh_api from ci_config import CI from ci_utils import normalize_string -from env_helper import REPORT_PATH, TEMP_PATH +from env_helper import REPORT_PATH, GITHUB_WORKSPACE logger = logging.getLogger(__name__) @@ -244,7 +244,8 @@ HTML_TEST_PART = """ """ BASE_HEADERS = ["Test name", "Test status"] -JOB_REPORT_FILE = Path(TEMP_PATH) / "job_report.json" +# should not be in TEMP directory or any directory that may be cleaned during the job execution +JOB_REPORT_FILE = Path(GITHUB_WORKSPACE) / "job_report.json" @dataclass From f9b1f2498a2c5044f6b8cc0859abaf66a87034a0 Mon Sep 17 00:00:00 2001 From: Max K Date: Sat, 13 Jul 2024 18:49:23 +0200 Subject: [PATCH 291/299] fix --- tests/ci/ci.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/ci/ci.py b/tests/ci/ci.py index 9065e2798c8..5e13def95f1 100644 --- a/tests/ci/ci.py +++ b/tests/ci/ci.py @@ -1246,6 +1246,8 @@ def main() -> int: indata["build"], ch_helper, ) + elif job_report.job_skipped: + print(f"Skipped after rerun check {[args.job_name]} - do nothing") else: if CI.is_test_job(args.job_name): if has_oom_error: From 96d3c311900fcb089cfccef965847634bd23de4a Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 13 Jul 2024 18:51:00 +0200 Subject: [PATCH 292/299] Update play.html --- programs/server/play.html | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/programs/server/play.html b/programs/server/play.html index b5bcc687c27..9590a65524c 100644 --- a/programs/server/play.html +++ b/programs/server/play.html @@ -639,7 +639,7 @@ renderChart(json); } else { renderUnparsedResult(response); - stats.innerText = `Elapsed: ${(elapsed_msec/1000).toFixed(3)} sec.`; + stats.innerText = `Elapsed (client-side): ${(elapsed_msec / 1000).toFixed(3)} sec.`; } document.getElementById('check-mark').style.display = 'inline'; } else { From 09b753ecc59e4e8bec78f63faac97e89df4c6c80 Mon Sep 17 00:00:00 2001 From: Max K Date: Sat, 13 Jul 2024 20:15:47 +0200 Subject: [PATCH 293/299] CI: Check job's exit status and report if killed --- tests/ci/ci.py | 52 +++++++++++++++++++++++--------------------- tests/ci/ci_utils.py | 12 ++++++++++ tests/ci/report.py | 26 ++++++++++++++++++++++ 3 files changed, 65 insertions(+), 25 deletions(-) diff --git a/tests/ci/ci.py b/tests/ci/ci.py index 5e13def95f1..f99a5dad92f 100644 --- a/tests/ci/ci.py +++ b/tests/ci/ci.py @@ -15,7 +15,7 @@ import upload_result_helper from build_check import get_release_or_pr from ci_config import CI from ci_metadata import CiMetadata -from ci_utils import GHActions, normalize_string, Shell +from ci_utils import GHActions, normalize_string, Utils from clickhouse_helper import ( CiLogsCredentials, ClickHouseHelper, @@ -264,7 +264,7 @@ def check_missing_images_on_dockerhub( def _pre_action(s3, indata, pr_info): print("Clear dmesg") - Shell.run("sudo dmesg --clear ||:") + Utils.clear_dmesg() CommitStatusData.cleanup() JobReport.cleanup() BuildResult.cleanup() @@ -1035,6 +1035,7 @@ def main() -> int: elif args.pre: assert indata, "Run config must be provided via --infile" _pre_action(s3, indata, pr_info) + JobReport.create_pre_report().dump() ### RUN action: start elif args.run: @@ -1131,22 +1132,22 @@ def main() -> int: exit_code = 1 else: exit_code = _run_test(check_name, args.run_command) + job_report = JobReport.load() if JobReport.exist() else None + assert ( + job_report + ), "BUG. There must be job report either real report, or pre-report if job was killed" + job_report.exit_code = exit_code + job_report.dump() ### RUN action: end ### POST action: start elif args.post: - has_oom_error = False - if Shell.check( - "sudo dmesg -T | grep -q -e 'Out of memory: Killed process' -e 'oom_reaper: reaped process' -e 'oom-kill:constraint=CONSTRAINT_NONE'" - ): - print("WARNING: OOM while job execution") - CIBuddy(dry_run=not pr_info.is_release).post_error( - "Out Of Memory", job_name=_get_ext_check_name(args.job_name) - ) - has_oom_error = True - job_report = JobReport.load() if JobReport.exist() else None - if job_report and not job_report.job_skipped: + assert ( + job_report + ), "BUG. There must be job report either real report, or pre-report if job was killed" + if not job_report.job_skipped and not job_report.pre_report: + # it's a real job report ch_helper = ClickHouseHelper() check_url = "" @@ -1248,29 +1249,30 @@ def main() -> int: ) elif job_report.job_skipped: print(f"Skipped after rerun check {[args.job_name]} - do nothing") - else: + elif job_report.job_skipped: + print(f"Job was skipped {[args.job_name]} - do nothing") + elif job_report.pre_report: + print(f"ERROR: Job was killed - generate evidence") + job_report.update_duration() + # Job was killed! + if Utils.is_killed_with_oom(): + print("WARNING: OOM while job execution") + error = f"Out Of Memory, exit_code {job_report.exit_code}, after {job_report.duration}s" + else: + error = f"Unknown, exit_code {job_report.exit_code}, after {job_report.duration}s" + CIBuddy().post_error(error, job_name=_get_ext_check_name(args.job_name)) if CI.is_test_job(args.job_name): - if has_oom_error: - description = "ERROR: Out Of Memory" - else: - description = "ERROR: Unknown job status" - print( - f"No job report for {[args.job_name]} - post status [{description}]" - ) gh = GitHub(get_best_robot_token(), per_page=100) commit = get_commit(gh, pr_info.sha) post_commit_status( commit, ERROR, "", - description, + "Error: " + error, _get_ext_check_name(args.job_name), pr_info, dump_to_file=True, ) - else: - # no job report - print(f"No job report for {[args.job_name]} - do nothing") ### POST action: end ### MARK SUCCESS action: start diff --git a/tests/ci/ci_utils.py b/tests/ci/ci_utils.py index abc4a88989d..44bd37fe260 100644 --- a/tests/ci/ci_utils.py +++ b/tests/ci/ci_utils.py @@ -96,3 +96,15 @@ class Utils: if match: return int(match.group(1)) return None + + @staticmethod + def is_killed_with_oom(): + if Shell.check( + "sudo dmesg -T | grep -q -e 'Out of memory: Killed process' -e 'oom_reaper: reaped process' -e 'oom-kill:constraint=CONSTRAINT_NONE'" + ): + return True + return False + + @staticmethod + def clear_dmesg(): + Shell.run("sudo dmesg --clear ||:") diff --git a/tests/ci/report.py b/tests/ci/report.py index 5ca911f638f..4be7b438f4f 100644 --- a/tests/ci/report.py +++ b/tests/ci/report.py @@ -297,7 +297,33 @@ class JobReport: build_dir_for_upload: Union[Path, str] = "" # if False no GH commit status will be created by CI need_commit_status: bool = True + # indicates that this is not real job report but report for the job that was skipped by rerun check job_skipped: bool = False + # indicates that report generated by CI script in order to check later if job was killed before real report is generated + pre_report: bool = False + exit_code: int = -1 + + @staticmethod + def create_pre_report() -> "JobReport": + return JobReport( + status=ERROR, + description="", + test_results=[], + start_time=datetime.datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S"), + duration=0.0, + additional_files=[], + pre_report=True, + ) + + def update_duration(self): + if not self.start_time: + self.duration = 0.0 + else: + start_time = datetime.datetime.strptime( + self.start_time, "%Y-%m-%d %H:%M:%S" + ) + current_time = datetime.datetime.utcnow() + self.duration = (current_time - start_time).total_seconds() def __post_init__(self): assert self.status in (SUCCESS, ERROR, FAILURE, PENDING) From 707994b876145b93bcd57e204f1b2d449496e998 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 13 Jul 2024 21:48:53 +0200 Subject: [PATCH 294/299] Add a test for #37557 --- .../03204_distributed_with_scalar_subquery.reference | 0 .../03204_distributed_with_scalar_subquery.sql | 10 ++++++++++ 2 files changed, 10 insertions(+) create mode 100644 tests/queries/0_stateless/03204_distributed_with_scalar_subquery.reference create mode 100644 tests/queries/0_stateless/03204_distributed_with_scalar_subquery.sql diff --git a/tests/queries/0_stateless/03204_distributed_with_scalar_subquery.reference b/tests/queries/0_stateless/03204_distributed_with_scalar_subquery.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/03204_distributed_with_scalar_subquery.sql b/tests/queries/0_stateless/03204_distributed_with_scalar_subquery.sql new file mode 100644 index 00000000000..0a07ce48268 --- /dev/null +++ b/tests/queries/0_stateless/03204_distributed_with_scalar_subquery.sql @@ -0,0 +1,10 @@ +DROP TABLE IF EXISTS t_c3oollc8r; +CREATE TABLE t_c3oollc8r (c_k37 Int32, c_y String, c_bou Int32, c_g1 Int32, c_lfntfzg Int32, c_kntw50q Int32) ENGINE = MergeTree ORDER BY (); + +SELECT ( + SELECT c_k37 + FROM t_c3oollc8r + ) > c_lfntfzg +FROM remote('127.0.0.{1,2}', currentDatabase(), t_c3oollc8r); + +DROP TABLE t_c3oollc8r; From dfb831f86119588f63a48f12807c44d568995ab4 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Sat, 13 Jul 2024 22:07:46 +0000 Subject: [PATCH 295/299] Try to fix an msan issue --- src/Functions/changeDate.cpp | 21 +++++++-------------- 1 file changed, 7 insertions(+), 14 deletions(-) diff --git a/src/Functions/changeDate.cpp b/src/Functions/changeDate.cpp index 5965f3d1d00..19e4c165ee3 100644 --- a/src/Functions/changeDate.cpp +++ b/src/Functions/changeDate.cpp @@ -1,6 +1,5 @@ #include "Common/DateLUTImpl.h" #include "Common/Exception.h" -#include #include #include #include @@ -101,19 +100,16 @@ public: template ColumnPtr execute(const ColumnsWithTypeAndName & arguments, const DataTypePtr & input_type, const DataTypePtr & result_type, size_t input_rows_count) const { - bool is_const = (isColumnConst(*arguments[0].column) && isColumnConst(*arguments[1].column)); - size_t result_rows_count = (is_const ? 1 : input_rows_count); - typename ResultDataType::ColumnType::MutablePtr result_col; if constexpr (std::is_same_v) { auto scale = DataTypeDateTime64::default_scale; if constexpr (std::is_same_v) scale = typeid_cast(*result_type).getScale(); - result_col = ResultDataType::ColumnType::create(result_rows_count, scale); + result_col = ResultDataType::ColumnType::create(input_rows_count, scale); } else - result_col = ResultDataType::ColumnType::create(result_rows_count); + result_col = ResultDataType::ColumnType::create(input_rows_count); auto date_time_col = arguments[0].column->convertToFullIfNeeded(); const auto & date_time_col_data = typeid_cast(*date_time_col).getData(); @@ -133,7 +129,7 @@ public: for (size_t j = 0; j < scale; ++j) deg *= 10; - for (size_t i = 0; i < result_rows_count; ++i) + for (size_t i = 0; i < input_rows_count; ++i) { Int64 time = date_lut.toNumYYYYMMDDhhmmss(date_time_col_data[i] / deg); Int64 fraction = date_time_col_data[i] % deg; @@ -144,7 +140,7 @@ public: else if constexpr (std::is_same_v && std::is_same_v) { const auto & date_lut = typeid_cast(*result_type).getTimeZone(); - for (size_t i = 0; i < result_rows_count; ++i) + for (size_t i = 0; i < input_rows_count; ++i) { Int64 time = static_cast(date_lut.toNumYYYYMMDD(ExtendedDayNum(date_time_col_data[i]))) * 1'000'000; result_col_data[i] = getChangedDate(time, value_col_data[i], result_type, date_lut, 3, 0); @@ -153,7 +149,7 @@ public: else if constexpr (std::is_same_v && std::is_same_v) { const auto & date_lut = typeid_cast(*result_type).getTimeZone(); - for (size_t i = 0; i < result_rows_count; ++i) + for (size_t i = 0; i < input_rows_count; ++i) { Int64 time = static_cast(date_lut.toNumYYYYMMDD(ExtendedDayNum(date_time_col_data[i]))) * 1'000'000; result_col_data[i] = static_cast(getChangedDate(time, value_col_data[i], result_type, date_lut)); @@ -162,7 +158,7 @@ public: else if constexpr (std::is_same_v) { const auto & date_lut = typeid_cast(*result_type).getTimeZone(); - for (size_t i = 0; i < result_rows_count; ++i) + for (size_t i = 0; i < input_rows_count; ++i) { Int64 time = date_lut.toNumYYYYMMDDhhmmss(date_time_col_data[i]); result_col_data[i] = static_cast(getChangedDate(time, value_col_data[i], result_type, date_lut)); @@ -171,7 +167,7 @@ public: else { const auto & date_lut = DateLUT::instance(); - for (size_t i = 0; i < result_rows_count; ++i) + for (size_t i = 0; i < input_rows_count; ++i) { Int64 time; if (isDate(input_type)) @@ -186,9 +182,6 @@ public: } } - if (is_const) - return ColumnConst::create(std::move(result_col), input_rows_count); - return result_col; } From 80c8511004008e5caaec08ad7f869baa578cad6c Mon Sep 17 00:00:00 2001 From: Max K Date: Sun, 14 Jul 2024 11:39:31 +0200 Subject: [PATCH 296/299] CI: Add retry for GH set_comment_status call --- tests/ci/ci.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/tests/ci/ci.py b/tests/ci/ci.py index f99a5dad92f..979d108378d 100644 --- a/tests/ci/ci.py +++ b/tests/ci/ci.py @@ -6,6 +6,7 @@ import os import re import subprocess import sys +import time from dataclasses import dataclass from pathlib import Path from typing import Any, Dict, List, Optional @@ -550,7 +551,17 @@ def _update_gh_statuses_action(indata: Dict, s3: S3Helper) -> None: except Exception as e: raise e print("Going to update overall CI report") - set_status_comment(commit, pr_info) + for retry in range(2): + try: + set_status_comment(commit, pr_info) + break + except Exception as e: + print( + f"WARNING: Failed to update CI Running status, attempt [{retry + 1}], exception [{e}]" + ) + time.sleep(1) + else: + print("ERROR: All retry attempts failed.") print("... CI report update - done") From 379706d2d5118dcd53c87f661b7a06add00bee18 Mon Sep 17 00:00:00 2001 From: Max K Date: Sun, 14 Jul 2024 11:47:18 +0200 Subject: [PATCH 297/299] fix for job filtering --- tests/ci/ci.py | 4 ++-- tests/ci/ci_cache.py | 5 +++-- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/tests/ci/ci.py b/tests/ci/ci.py index 979d108378d..8dcf3fc4c69 100644 --- a/tests/ci/ci.py +++ b/tests/ci/ci.py @@ -1007,10 +1007,10 @@ def main() -> int: args.skip_jobs, ) + ci_cache.print_status() if IS_CI and pr_info.is_pr and not ci_settings.no_ci_cache: ci_cache.filter_out_not_affected_jobs() - - ci_cache.print_status() + ci_cache.print_status() if IS_CI and not pr_info.is_merge_queue: # wait for pending jobs to be finished, await_jobs is a long blocking call diff --git a/tests/ci/ci_cache.py b/tests/ci/ci_cache.py index ae3b8f9e9a4..7552b10b873 100644 --- a/tests/ci/ci_cache.py +++ b/tests/ci/ci_cache.py @@ -714,7 +714,7 @@ class CiCache: if CI.is_test_job(job_name) and job_name != CI.JobNames.BUILD_CHECK: if job_config.reference_job_name: reference_name = job_config.reference_job_name - reference_config = self.jobs_to_do[reference_name] + reference_config = CI.JOB_CONFIGS[reference_name] else: reference_name = job_name reference_config = job_config @@ -745,7 +745,8 @@ class CiCache: del self.jobs_to_do[job] if job in self.jobs_to_wait: del self.jobs_to_wait[job] - self.jobs_to_skip.append(job) + if job in self.jobs_to_skip: + self.jobs_to_skip.remove(job) def await_pending_jobs(self, is_release: bool, dry_run: bool = False) -> None: """ From f374cdfe69a8d2bc1f7360ead578a5323331650d Mon Sep 17 00:00:00 2001 From: Max K Date: Sun, 14 Jul 2024 13:53:20 +0200 Subject: [PATCH 298/299] CI: Fix for job filtering in PRs --- tests/ci/ci_cache.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/ci/ci_cache.py b/tests/ci/ci_cache.py index 7552b10b873..5c6d3b05021 100644 --- a/tests/ci/ci_cache.py +++ b/tests/ci/ci_cache.py @@ -533,19 +533,19 @@ class CiCache: job=job, batch=0, num_batches=job_config.num_batches, - release_branch=not job_config.pr_only, + release_branch=True, ) or self.is_pending( job=job, batch=0, num_batches=job_config.num_batches, - release_branch=not job_config.pr_only, + release_branch=True, ) or self.is_failed( job=job, batch=0, num_batches=job_config.num_batches, - release_branch=not job_config.pr_only, + release_branch=True, ) ) From 9dabf205e623680807bffe022d8b8b6b856779d8 Mon Sep 17 00:00:00 2001 From: Max K Date: Sun, 14 Jul 2024 14:01:10 +0200 Subject: [PATCH 299/299] CI: Fix for await to always await for builds --- tests/ci/ci_cache.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/tests/ci/ci_cache.py b/tests/ci/ci_cache.py index 5c6d3b05021..5229c754d70 100644 --- a/tests/ci/ci_cache.py +++ b/tests/ci/ci_cache.py @@ -765,14 +765,19 @@ class CiCache: MAX_JOB_NUM_TO_WAIT = 3 round_cnt = 0 - # FIXME: temporary experiment: lets enable await for PR' workflows but for a shorter time + def _has_build_job(): + for job in self.jobs_to_wait: + if CI.is_build_job(job): + return True + return False + if not is_release: - MAX_ROUNDS_TO_WAIT = 3 + # in PRs we can wait only for builds, TIMEOUT*MAX_ROUNDS_TO_WAIT=100min is enough + MAX_ROUNDS_TO_WAIT = 2 while ( - len(self.jobs_to_wait) > MAX_JOB_NUM_TO_WAIT - and round_cnt < MAX_ROUNDS_TO_WAIT - ): + len(self.jobs_to_wait) > MAX_JOB_NUM_TO_WAIT or _has_build_job() + ) and round_cnt < MAX_ROUNDS_TO_WAIT: round_cnt += 1 GHActions.print_in_group( f"Wait pending jobs, round [{round_cnt}/{MAX_ROUNDS_TO_WAIT}]:",