From 3cfb921befa895e445e8d7b98e639015e1e41aa0 Mon Sep 17 00:00:00 2001 From: avogar Date: Tue, 13 Aug 2024 18:41:53 +0000 Subject: [PATCH 001/114] Fix using schema_inference_make_columns_nullable=0 --- src/Core/Settings.h | 2 +- src/Formats/FormatFactory.cpp | 2 +- src/Formats/FormatSettings.h | 2 +- src/Formats/SchemaInferenceUtils.cpp | 4 +++ src/Processors/Formats/ISchemaReader.cpp | 2 +- .../Formats/Impl/ArrowBlockInputFormat.cpp | 7 +++-- .../Formats/Impl/ArrowColumnToCHColumn.cpp | 9 ++++--- .../Formats/Impl/ArrowColumnToCHColumn.h | 3 ++- .../Impl/NativeORCBlockInputFormat.cpp | 2 +- .../Formats/Impl/ORCBlockInputFormat.cpp | 7 +++-- .../Formats/Impl/ParquetBlockInputFormat.cpp | 7 +++-- .../03036_parquet_arrow_nullable.reference | 26 +++++++++++++++++++ .../03036_parquet_arrow_nullable.sh | 7 +++++ 13 files changed, 65 insertions(+), 15 deletions(-) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 0808e8eb49f..bc9c6daab1b 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -1116,7 +1116,7 @@ class IColumn; M(String, column_names_for_schema_inference, "", "The list of column names to use in schema inference for formats without column names. The format: 'column1,column2,column3,...'", 0) \ M(String, schema_inference_hints, "", "The list of column names and types to use in schema inference for formats without column names. The format: 'column_name1 column_type1, column_name2 column_type2, ...'", 0) \ M(SchemaInferenceMode, schema_inference_mode, "default", "Mode of schema inference. 'default' - assume that all files have the same schema and schema can be inferred from any file, 'union' - files can have different schemas and the resulting schema should be the a union of schemas of all files", 0) \ - M(Bool, schema_inference_make_columns_nullable, true, "If set to true, all inferred types will be Nullable in schema inference for formats without information about nullability.", 0) \ + M(UInt64Auto, schema_inference_make_columns_nullable, 1, "If set to true, all inferred types will be Nullable in schema inference. When set to false, no columns will be converted to Nullable. When set to 'auto', ClickHosue will use information about nullability from the data..", 0) \ M(Bool, input_format_json_read_bools_as_numbers, true, "Allow to parse bools as numbers in JSON input formats", 0) \ M(Bool, input_format_json_read_bools_as_strings, true, "Allow to parse bools as strings in JSON input formats", 0) \ M(Bool, input_format_json_try_infer_numbers_from_strings, false, "Try to infer numbers from string fields while schema inference", 0) \ diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp index a78836ff63c..8d8257b9abc 100644 --- a/src/Formats/FormatFactory.cpp +++ b/src/Formats/FormatFactory.cpp @@ -255,7 +255,7 @@ FormatSettings getFormatSettings(const ContextPtr & context, const Settings & se format_settings.max_bytes_to_read_for_schema_inference = settings.input_format_max_bytes_to_read_for_schema_inference; format_settings.column_names_for_schema_inference = settings.column_names_for_schema_inference; format_settings.schema_inference_hints = settings.schema_inference_hints; - format_settings.schema_inference_make_columns_nullable = settings.schema_inference_make_columns_nullable; + format_settings.schema_inference_make_columns_nullable = settings.schema_inference_make_columns_nullable.valueOr(2); format_settings.mysql_dump.table_name = settings.input_format_mysql_dump_table_name; format_settings.mysql_dump.map_column_names = settings.input_format_mysql_dump_map_column_names; format_settings.sql_insert.max_batch_size = settings.output_format_sql_insert_max_batch_size; diff --git a/src/Formats/FormatSettings.h b/src/Formats/FormatSettings.h index f0359218775..479b1a89adf 100644 --- a/src/Formats/FormatSettings.h +++ b/src/Formats/FormatSettings.h @@ -75,7 +75,7 @@ struct FormatSettings Raw }; - bool schema_inference_make_columns_nullable = true; + UInt64 schema_inference_make_columns_nullable = true; DateTimeOutputFormat date_time_output_format = DateTimeOutputFormat::Simple; diff --git a/src/Formats/SchemaInferenceUtils.cpp b/src/Formats/SchemaInferenceUtils.cpp index 3c374ada9e6..c04682e8765 100644 --- a/src/Formats/SchemaInferenceUtils.cpp +++ b/src/Formats/SchemaInferenceUtils.cpp @@ -1262,7 +1262,11 @@ namespace if (checkCharCaseInsensitive('n', buf)) { if (checkStringCaseInsensitive("ull", buf)) + { + if (settings.schema_inference_make_columns_nullable == 0) + return std::make_shared(); return makeNullable(std::make_shared()); + } else if (checkStringCaseInsensitive("an", buf)) return std::make_shared(); } diff --git a/src/Processors/Formats/ISchemaReader.cpp b/src/Processors/Formats/ISchemaReader.cpp index 45523700a5d..569d4bb39e7 100644 --- a/src/Processors/Formats/ISchemaReader.cpp +++ b/src/Processors/Formats/ISchemaReader.cpp @@ -54,7 +54,7 @@ void checkFinalInferredType( type = default_type; } - if (settings.schema_inference_make_columns_nullable) + if (settings.schema_inference_make_columns_nullable == 1) type = makeNullableRecursively(type); /// In case when data for some column could contain nulls and regular values, /// resulting inferred type is Nullable. diff --git a/src/Processors/Formats/Impl/ArrowBlockInputFormat.cpp b/src/Processors/Formats/Impl/ArrowBlockInputFormat.cpp index 72a93002669..cf079e52db0 100644 --- a/src/Processors/Formats/Impl/ArrowBlockInputFormat.cpp +++ b/src/Processors/Formats/Impl/ArrowBlockInputFormat.cpp @@ -204,8 +204,11 @@ NamesAndTypesList ArrowSchemaReader::readSchema() schema = file_reader->schema(); auto header = ArrowColumnToCHColumn::arrowSchemaToCHHeader( - *schema, stream ? "ArrowStream" : "Arrow", format_settings.arrow.skip_columns_with_unsupported_types_in_schema_inference); - if (format_settings.schema_inference_make_columns_nullable) + *schema, + stream ? "ArrowStream" : "Arrow", + format_settings.arrow.skip_columns_with_unsupported_types_in_schema_inference, + format_settings.schema_inference_make_columns_nullable != 0); + if (format_settings.schema_inference_make_columns_nullable == 1) return getNamesAndRecursivelyNullableTypes(header); return header.getNamesAndTypesList(); } diff --git a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp index ed91913de4d..bcc8bfecdc6 100644 --- a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp +++ b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp @@ -712,6 +712,7 @@ struct ReadColumnFromArrowColumnSettings FormatSettings::DateTimeOverflowBehavior date_time_overflow_behavior; bool allow_arrow_null_type; bool skip_columns_with_unsupported_types; + bool allow_inferring_nullable_columns; }; static ColumnWithTypeAndName readColumnFromArrowColumn( @@ -1085,7 +1086,7 @@ static ColumnWithTypeAndName readColumnFromArrowColumn( bool is_map_nested_column, const ReadColumnFromArrowColumnSettings & settings) { - bool read_as_nullable_column = arrow_column->null_count() || is_nullable_column || (type_hint && type_hint->isNullable()); + bool read_as_nullable_column = (arrow_column->null_count() || is_nullable_column || (type_hint && type_hint->isNullable())) && settings.allow_inferring_nullable_columns; if (read_as_nullable_column && arrow_column->type()->id() != arrow::Type::LIST && arrow_column->type()->id() != arrow::Type::LARGE_LIST && @@ -1149,14 +1150,16 @@ static std::shared_ptr createArrowColumn(const std::shared_ Block ArrowColumnToCHColumn::arrowSchemaToCHHeader( const arrow::Schema & schema, const std::string & format_name, - bool skip_columns_with_unsupported_types) + bool skip_columns_with_unsupported_types, + bool allow_inferring_nullable_columns) { ReadColumnFromArrowColumnSettings settings { .format_name = format_name, .date_time_overflow_behavior = FormatSettings::DateTimeOverflowBehavior::Ignore, .allow_arrow_null_type = false, - .skip_columns_with_unsupported_types = skip_columns_with_unsupported_types + .skip_columns_with_unsupported_types = skip_columns_with_unsupported_types, + .allow_inferring_nullable_columns = allow_inferring_nullable_columns, }; ColumnsWithTypeAndName sample_columns; diff --git a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.h b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.h index 27e9afdf763..8521cd2f410 100644 --- a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.h +++ b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.h @@ -34,7 +34,8 @@ public: static Block arrowSchemaToCHHeader( const arrow::Schema & schema, const std::string & format_name, - bool skip_columns_with_unsupported_types = false); + bool skip_columns_with_unsupported_types = false, + bool allow_inferring_nullable_columns = true); struct DictionaryInfo { diff --git a/src/Processors/Formats/Impl/NativeORCBlockInputFormat.cpp b/src/Processors/Formats/Impl/NativeORCBlockInputFormat.cpp index 58bec8120f1..b0fd6789d1a 100644 --- a/src/Processors/Formats/Impl/NativeORCBlockInputFormat.cpp +++ b/src/Processors/Formats/Impl/NativeORCBlockInputFormat.cpp @@ -1002,7 +1002,7 @@ NamesAndTypesList NativeORCSchemaReader::readSchema() header.insert(ColumnWithTypeAndName{type, name}); } - if (format_settings.schema_inference_make_columns_nullable) + if (format_settings.schema_inference_make_columns_nullable == 1) return getNamesAndRecursivelyNullableTypes(header); return header.getNamesAndTypesList(); } diff --git a/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp b/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp index a3c218fa26e..2266c0b488c 100644 --- a/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp +++ b/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp @@ -160,8 +160,11 @@ NamesAndTypesList ORCSchemaReader::readSchema() { initializeIfNeeded(); auto header = ArrowColumnToCHColumn::arrowSchemaToCHHeader( - *schema, "ORC", format_settings.orc.skip_columns_with_unsupported_types_in_schema_inference); - if (format_settings.schema_inference_make_columns_nullable) + *schema, + "ORC", + format_settings.orc.skip_columns_with_unsupported_types_in_schema_inference, + format_settings.schema_inference_make_columns_nullable != 0); + if (format_settings.schema_inference_make_columns_nullable == 1) return getNamesAndRecursivelyNullableTypes(header); return header.getNamesAndTypesList(); } diff --git a/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp b/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp index bc5e8292192..b116070b8df 100644 --- a/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp +++ b/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp @@ -810,8 +810,11 @@ NamesAndTypesList ParquetSchemaReader::readSchema() THROW_ARROW_NOT_OK(parquet::arrow::FromParquetSchema(metadata->schema(), &schema)); auto header = ArrowColumnToCHColumn::arrowSchemaToCHHeader( - *schema, "Parquet", format_settings.parquet.skip_columns_with_unsupported_types_in_schema_inference); - if (format_settings.schema_inference_make_columns_nullable) + *schema, + "Parquet", + format_settings.parquet.skip_columns_with_unsupported_types_in_schema_inference, + format_settings.schema_inference_make_columns_nullable != 0); + if (format_settings.schema_inference_make_columns_nullable == 1) return getNamesAndRecursivelyNullableTypes(header); return header.getNamesAndTypesList(); } diff --git a/tests/queries/0_stateless/03036_parquet_arrow_nullable.reference b/tests/queries/0_stateless/03036_parquet_arrow_nullable.reference index 985f8192f26..d15f0d8365d 100644 --- a/tests/queries/0_stateless/03036_parquet_arrow_nullable.reference +++ b/tests/queries/0_stateless/03036_parquet_arrow_nullable.reference @@ -1,40 +1,66 @@ Parquet a UInt64 a_nullable Nullable(UInt64) +a UInt64 +a_nullable UInt64 Arrow a UInt64 a_nullable Nullable(UInt64) +a UInt64 +a_nullable UInt64 Parquet b Array(UInt64) b_nullable Array(Nullable(UInt64)) +b Array(UInt64) +b_nullable Array(UInt64) Arrow b Array(Nullable(UInt64)) b_nullable Array(Nullable(UInt64)) +b Array(UInt64) +b_nullable Array(UInt64) Parquet c Tuple(\n a UInt64,\n b String) c_nullable Tuple(\n a Nullable(UInt64),\n b Nullable(String)) +c Tuple(\n a UInt64,\n b String) +c_nullable Tuple(\n a UInt64,\n b String) Arrow c Tuple(\n a UInt64,\n b String) c_nullable Tuple(\n a Nullable(UInt64),\n b Nullable(String)) +c Tuple(\n a UInt64,\n b String) +c_nullable Tuple(\n a UInt64,\n b String) Parquet d Tuple(\n a UInt64,\n b Tuple(\n a UInt64,\n b String),\n d_nullable Tuple(\n a UInt64,\n b Tuple(\n a Nullable(UInt64),\n b Nullable(String)))) +d Tuple(\n a UInt64,\n b Tuple(\n a UInt64,\n b String),\n d_nullable Tuple(\n a UInt64,\n b Tuple(\n a UInt64,\n b String))) Arrow d Tuple(\n a UInt64,\n b Tuple(\n a UInt64,\n b String),\n d_nullable Tuple(\n a UInt64,\n b Tuple(\n a Nullable(UInt64),\n b Nullable(String)))) +d Tuple(\n a UInt64,\n b Tuple(\n a UInt64,\n b String),\n d_nullable Tuple(\n a UInt64,\n b Tuple(\n a UInt64,\n b String))) Parquet e Map(UInt64, String) e_nullable Map(UInt64, Nullable(String)) +e Map(UInt64, String) +e_nullable Map(UInt64, String) Arrow e Map(UInt64, Nullable(String)) e_nullable Map(UInt64, Nullable(String)) +e Map(UInt64, String) +e_nullable Map(UInt64, String) Parquet f Map(UInt64, Map(UInt64, String)) f_nullables Map(UInt64, Map(UInt64, Nullable(String))) +f Map(UInt64, Map(UInt64, String)) +f_nullables Map(UInt64, Map(UInt64, String)) Arrow f Map(UInt64, Map(UInt64, Nullable(String))) f_nullables Map(UInt64, Map(UInt64, Nullable(String))) +f Map(UInt64, Map(UInt64, String)) +f_nullables Map(UInt64, Map(UInt64, String)) Parquet g String g_nullable Nullable(String) +g String +g_nullable String Arrow g LowCardinality(String) g_nullable LowCardinality(String) +g LowCardinality(String) +g_nullable LowCardinality(String) diff --git a/tests/queries/0_stateless/03036_parquet_arrow_nullable.sh b/tests/queries/0_stateless/03036_parquet_arrow_nullable.sh index bdd641e2b94..379756f78f3 100755 --- a/tests/queries/0_stateless/03036_parquet_arrow_nullable.sh +++ b/tests/queries/0_stateless/03036_parquet_arrow_nullable.sh @@ -14,6 +14,7 @@ for format in $formats do echo $format $CLICKHOUSE_LOCAL -q "select * from generateRandom('a UInt64, a_nullable Nullable(UInt64)', 42) limit 10 format $format" > $DATA_FILE + $CLICKHOUSE_LOCAL -q "desc file('$DATA_FILE') SETTINGS schema_inference_make_columns_nullable = 'auto'" $CLICKHOUSE_LOCAL -q "desc file('$DATA_FILE') SETTINGS schema_inference_make_columns_nullable = 0" done @@ -21,6 +22,7 @@ for format in $formats do echo $format $CLICKHOUSE_LOCAL -q "select * from generateRandom('b Array(UInt64), b_nullable Array(Nullable(UInt64))', 42) limit 10 format $format" > $DATA_FILE + $CLICKHOUSE_LOCAL -q "desc file('$DATA_FILE') SETTINGS schema_inference_make_columns_nullable = 'auto'" $CLICKHOUSE_LOCAL -q "desc file('$DATA_FILE') SETTINGS schema_inference_make_columns_nullable = 0" done @@ -28,6 +30,7 @@ for format in $formats do echo $format $CLICKHOUSE_LOCAL -q "select * from generateRandom('c Tuple(a UInt64, b String), c_nullable Tuple(a Nullable(UInt64), b Nullable(String))', 42) limit 10 format $format" > $DATA_FILE + $CLICKHOUSE_LOCAL -q "desc file('$DATA_FILE') SETTINGS schema_inference_make_columns_nullable = 'auto'" $CLICKHOUSE_LOCAL -q "desc file('$DATA_FILE') SETTINGS schema_inference_make_columns_nullable = 0" done @@ -35,6 +38,7 @@ for format in $formats do echo $format $CLICKHOUSE_LOCAL -q "select * from generateRandom('d Tuple(a UInt64, b Tuple(a UInt64, b String), d_nullable Tuple(a UInt64, b Tuple(a Nullable(UInt64), b Nullable(String))))', 42) limit 10 format $format" > $DATA_FILE + $CLICKHOUSE_LOCAL -q "desc file('$DATA_FILE') SETTINGS schema_inference_make_columns_nullable = 'auto'" $CLICKHOUSE_LOCAL -q "desc file('$DATA_FILE') SETTINGS schema_inference_make_columns_nullable = 0" done @@ -42,6 +46,7 @@ for format in $formats do echo $format $CLICKHOUSE_LOCAL -q "select * from generateRandom('e Map(UInt64, String), e_nullable Map(UInt64, Nullable(String))', 42) limit 10 format $format" > $DATA_FILE + $CLICKHOUSE_LOCAL -q "desc file('$DATA_FILE') SETTINGS schema_inference_make_columns_nullable = 'auto'" $CLICKHOUSE_LOCAL -q "desc file('$DATA_FILE') SETTINGS schema_inference_make_columns_nullable = 0" done @@ -49,6 +54,7 @@ for format in $formats do echo $format $CLICKHOUSE_LOCAL -q "select * from generateRandom('f Map(UInt64, Map(UInt64, String)), f_nullables Map(UInt64, Map(UInt64, Nullable(String)))', 42) limit 10 format $format" > $DATA_FILE + $CLICKHOUSE_LOCAL -q "desc file('$DATA_FILE') SETTINGS schema_inference_make_columns_nullable = 'auto'" $CLICKHOUSE_LOCAL -q "desc file('$DATA_FILE') SETTINGS schema_inference_make_columns_nullable = 0" done @@ -56,6 +62,7 @@ for format in $formats do echo $format $CLICKHOUSE_LOCAL -q "select * from generateRandom('g LowCardinality(String), g_nullable LowCardinality(Nullable(String))', 42) limit 10 settings output_format_arrow_low_cardinality_as_dictionary=1, allow_suspicious_low_cardinality_types=1 format $format" > $DATA_FILE + $CLICKHOUSE_LOCAL -q "desc file('$DATA_FILE') SETTINGS schema_inference_make_columns_nullable = 'auto'" $CLICKHOUSE_LOCAL -q "desc file('$DATA_FILE') SETTINGS schema_inference_make_columns_nullable = 0" done From 70708fd5dcf633d4d3147240195554587f4fb14f Mon Sep 17 00:00:00 2001 From: avogar Date: Tue, 13 Aug 2024 19:19:02 +0000 Subject: [PATCH 002/114] Update docs, make better --- docs/en/interfaces/schema-inference.md | 7 ++----- docs/en/operations/settings/settings-formats.md | 4 ++-- src/Processors/Formats/ISchemaReader.cpp | 5 ----- 3 files changed, 4 insertions(+), 12 deletions(-) diff --git a/docs/en/interfaces/schema-inference.md b/docs/en/interfaces/schema-inference.md index 05fae994cbe..5b3cd179e21 100644 --- a/docs/en/interfaces/schema-inference.md +++ b/docs/en/interfaces/schema-inference.md @@ -1385,7 +1385,7 @@ DESC format(JSONEachRow, '{"id" : 1, "age" : 25, "name" : "Josh", "status" : nul #### schema_inference_make_columns_nullable Controls making inferred types `Nullable` in schema inference for formats without information about nullability. -If the setting is enabled, all inferred type will be `Nullable`, if disabled, the inferred type will be `Nullable` only if `input_format_null_as_default` is disabled and the column contains `NULL` in a sample that is parsed during schema inference. +If the setting is enabled, all inferred type will be `Nullable`, if disabled, the inferred type will never be `Nullable`, if set to `auto`, the inferred type will be `Nullable` only if the column contains `NULL` in a sample that is parsed during schema inference or file metadata contains information about column nullability. Enabled by default. @@ -1408,15 +1408,13 @@ DESC format(JSONEachRow, $$ └─────────┴─────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ ``` ```sql -SET schema_inference_make_columns_nullable = 0; -SET input_format_null_as_default = 0; +SET schema_inference_make_columns_nullable = 'auto'; DESC format(JSONEachRow, $$ {"id" : 1, "age" : 25, "name" : "Josh", "status" : null, "hobbies" : ["football", "cooking"]} {"id" : 2, "age" : 19, "name" : "Alan", "status" : "married", "hobbies" : ["tennis", "art"]} $$) ``` ```response - ┌─name────┬─type─────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ │ id │ Int64 │ │ │ │ │ │ │ age │ Int64 │ │ │ │ │ │ @@ -1428,7 +1426,6 @@ DESC format(JSONEachRow, $$ ```sql SET schema_inference_make_columns_nullable = 0; -SET input_format_null_as_default = 1; DESC format(JSONEachRow, $$ {"id" : 1, "age" : 25, "name" : "Josh", "status" : null, "hobbies" : ["football", "cooking"]} {"id" : 2, "age" : 19, "name" : "Alan", "status" : "married", "hobbies" : ["tennis", "art"]} diff --git a/docs/en/operations/settings/settings-formats.md b/docs/en/operations/settings/settings-formats.md index f8b40cd81ac..57812ef0e03 100644 --- a/docs/en/operations/settings/settings-formats.md +++ b/docs/en/operations/settings/settings-formats.md @@ -171,8 +171,8 @@ If the `schema_inference_hints` is not formated properly, or if there is a typo ## schema_inference_make_columns_nullable {#schema_inference_make_columns_nullable} -Controls making inferred types `Nullable` in schema inference for formats without information about nullability. -If the setting is enabled, the inferred type will be `Nullable` only if column contains `NULL` in a sample that is parsed during schema inference. +Controls making inferred types `Nullable` in schema inference. +If the setting is enabled, all inferred type will be `Nullable`, if disabled, the inferred type will never be `Nullable`, if set to `auto`, the inferred type will be `Nullable` only if the column contains `NULL` in a sample that is parsed during schema inference or file metadata contains information about column nullability. Default value: `true`. diff --git a/src/Processors/Formats/ISchemaReader.cpp b/src/Processors/Formats/ISchemaReader.cpp index 569d4bb39e7..e002e64b7e5 100644 --- a/src/Processors/Formats/ISchemaReader.cpp +++ b/src/Processors/Formats/ISchemaReader.cpp @@ -56,11 +56,6 @@ void checkFinalInferredType( if (settings.schema_inference_make_columns_nullable == 1) type = makeNullableRecursively(type); - /// In case when data for some column could contain nulls and regular values, - /// resulting inferred type is Nullable. - /// If input_format_null_as_default is enabled, we should remove Nullable type. - else if (settings.null_as_default) - type = removeNullable(type); } void ISchemaReader::transformTypesIfNeeded(DB::DataTypePtr & type, DB::DataTypePtr & new_type) From e2feaefcaf0e88f86f303c068edcbdacaeb67252 Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Wed, 14 Aug 2024 15:13:49 +0200 Subject: [PATCH 003/114] Update src/Core/Settings.h Co-authored-by: Alexey Katsman --- src/Core/Settings.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index bc9c6daab1b..2417ddd39e8 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -1116,7 +1116,7 @@ class IColumn; M(String, column_names_for_schema_inference, "", "The list of column names to use in schema inference for formats without column names. The format: 'column1,column2,column3,...'", 0) \ M(String, schema_inference_hints, "", "The list of column names and types to use in schema inference for formats without column names. The format: 'column_name1 column_type1, column_name2 column_type2, ...'", 0) \ M(SchemaInferenceMode, schema_inference_mode, "default", "Mode of schema inference. 'default' - assume that all files have the same schema and schema can be inferred from any file, 'union' - files can have different schemas and the resulting schema should be the a union of schemas of all files", 0) \ - M(UInt64Auto, schema_inference_make_columns_nullable, 1, "If set to true, all inferred types will be Nullable in schema inference. When set to false, no columns will be converted to Nullable. When set to 'auto', ClickHosue will use information about nullability from the data..", 0) \ + M(UInt64Auto, schema_inference_make_columns_nullable, 1, "If set to true, all inferred types will be Nullable in schema inference. When set to false, no columns will be converted to Nullable. When set to 'auto', ClickHouse will use information about nullability from the data.", 0) \ M(Bool, input_format_json_read_bools_as_numbers, true, "Allow to parse bools as numbers in JSON input formats", 0) \ M(Bool, input_format_json_read_bools_as_strings, true, "Allow to parse bools as strings in JSON input formats", 0) \ M(Bool, input_format_json_try_infer_numbers_from_strings, false, "Try to infer numbers from string fields while schema inference", 0) \ From ccb7ecb9a22ddeabe93a5b907e3ad688b04966b4 Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Wed, 14 Aug 2024 15:13:57 +0200 Subject: [PATCH 004/114] Update src/Formats/FormatSettings.h Co-authored-by: Alexey Katsman --- src/Formats/FormatSettings.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Formats/FormatSettings.h b/src/Formats/FormatSettings.h index 479b1a89adf..81b34ff0c55 100644 --- a/src/Formats/FormatSettings.h +++ b/src/Formats/FormatSettings.h @@ -75,7 +75,7 @@ struct FormatSettings Raw }; - UInt64 schema_inference_make_columns_nullable = true; + UInt64 schema_inference_make_columns_nullable = 1; DateTimeOutputFormat date_time_output_format = DateTimeOutputFormat::Simple; From 92a9b29b45c254e670fe9f67114b5af890bfb5cb Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Thu, 15 Aug 2024 22:25:21 +0800 Subject: [PATCH 005/114] devirtualize format reader --- .../Formats/Impl/BinaryRowInputFormat.cpp | 4 +- .../Formats/Impl/BinaryRowInputFormat.h | 7 ++- .../Formats/Impl/CSVRowInputFormat.cpp | 2 +- .../Formats/Impl/CSVRowInputFormat.h | 7 ++- .../Impl/CustomSeparatedRowInputFormat.h | 3 +- .../Impl/JSONCompactEachRowRowInputFormat.h | 4 +- .../Impl/JSONCompactRowInputFormat.cpp | 2 +- .../Formats/Impl/JSONCompactRowInputFormat.h | 4 +- .../Formats/Impl/TabSeparatedRowInputFormat.h | 4 +- .../RowInputFormatWithNamesAndTypes.cpp | 61 +++++++++++++------ .../Formats/RowInputFormatWithNamesAndTypes.h | 5 +- 11 files changed, 67 insertions(+), 36 deletions(-) diff --git a/src/Processors/Formats/Impl/BinaryRowInputFormat.cpp b/src/Processors/Formats/Impl/BinaryRowInputFormat.cpp index c5336f3bcc7..b549f2de975 100644 --- a/src/Processors/Formats/Impl/BinaryRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/BinaryRowInputFormat.cpp @@ -15,8 +15,8 @@ namespace ErrorCodes } template -BinaryRowInputFormat::BinaryRowInputFormat(ReadBuffer & in_, const Block & header, Params params_, bool with_names_, bool with_types_, const FormatSettings & format_settings_) - : RowInputFormatWithNamesAndTypes( +BinaryRowInputFormat::BinaryRowInputFormat(ReadBuffer & in_, const Block & header, IRowInputFormat::Params params_, bool with_names_, bool with_types_, const FormatSettings & format_settings_) + : RowInputFormatWithNamesAndTypes>( header, in_, params_, diff --git a/src/Processors/Formats/Impl/BinaryRowInputFormat.h b/src/Processors/Formats/Impl/BinaryRowInputFormat.h index 6f2042d1315..6a4ca8f6418 100644 --- a/src/Processors/Formats/Impl/BinaryRowInputFormat.h +++ b/src/Processors/Formats/Impl/BinaryRowInputFormat.h @@ -10,13 +10,16 @@ namespace DB class ReadBuffer; +template +class BinaryFormatReader; + /** A stream for inputting data in a binary line-by-line format. */ template -class BinaryRowInputFormat final : public RowInputFormatWithNamesAndTypes +class BinaryRowInputFormat final : public RowInputFormatWithNamesAndTypes> { public: - BinaryRowInputFormat(ReadBuffer & in_, const Block & header, Params params_, bool with_names_, bool with_types_, const FormatSettings & format_settings_); + BinaryRowInputFormat(ReadBuffer & in_, const Block & header, IRowInputFormat::Params params_, bool with_names_, bool with_types_, const FormatSettings & format_settings_); String getName() const override { return "BinaryRowInputFormat"; } diff --git a/src/Processors/Formats/Impl/CSVRowInputFormat.cpp b/src/Processors/Formats/Impl/CSVRowInputFormat.cpp index b7f84748f61..cf58a4057c8 100644 --- a/src/Processors/Formats/Impl/CSVRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/CSVRowInputFormat.cpp @@ -61,7 +61,7 @@ CSVRowInputFormat::CSVRowInputFormat( bool with_names_, bool with_types_, const FormatSettings & format_settings_, - std::unique_ptr format_reader_) + std::unique_ptr format_reader_) : RowInputFormatWithNamesAndTypes( header_, *in_, diff --git a/src/Processors/Formats/Impl/CSVRowInputFormat.h b/src/Processors/Formats/Impl/CSVRowInputFormat.h index fe4d4e3be08..86af5028438 100644 --- a/src/Processors/Formats/Impl/CSVRowInputFormat.h +++ b/src/Processors/Formats/Impl/CSVRowInputFormat.h @@ -1,7 +1,6 @@ #pragma once #include -#include #include #include @@ -13,10 +12,12 @@ namespace DB { +class CSVFormatReader; + /** A stream for inputting data in csv format. * Does not conform with https://tools.ietf.org/html/rfc4180 because it skips spaces and tabs between values. */ -class CSVRowInputFormat : public RowInputFormatWithNamesAndTypes +class CSVRowInputFormat : public RowInputFormatWithNamesAndTypes { public: /** with_names - in the first line the header with column names @@ -32,7 +33,7 @@ public: protected: CSVRowInputFormat(const Block & header_, std::shared_ptr in_, const Params & params_, - bool with_names_, bool with_types_, const FormatSettings & format_settings_, std::unique_ptr format_reader_); + bool with_names_, bool with_types_, const FormatSettings & format_settings_, std::unique_ptr format_reader_); CSVRowInputFormat(const Block & header_, std::shared_ptr in_buf_, const Params & params_, bool with_names_, bool with_types_, const FormatSettings & format_settings_); diff --git a/src/Processors/Formats/Impl/CustomSeparatedRowInputFormat.h b/src/Processors/Formats/Impl/CustomSeparatedRowInputFormat.h index 58f78e5af42..b1d35947ba8 100644 --- a/src/Processors/Formats/Impl/CustomSeparatedRowInputFormat.h +++ b/src/Processors/Formats/Impl/CustomSeparatedRowInputFormat.h @@ -9,7 +9,8 @@ namespace DB { -class CustomSeparatedRowInputFormat final : public RowInputFormatWithNamesAndTypes +class CustomSeparatedFormatReader; +class CustomSeparatedRowInputFormat final : public RowInputFormatWithNamesAndTypes { public: CustomSeparatedRowInputFormat( diff --git a/src/Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.h b/src/Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.h index ebeb939e7fa..50589329073 100644 --- a/src/Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.h +++ b/src/Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.h @@ -11,7 +11,7 @@ namespace DB { class ReadBuffer; - +class JSONCompactEachRowFormatReader; /** A stream for reading data in a bunch of formats: * - JSONCompactEachRow @@ -20,7 +20,7 @@ class ReadBuffer; * - JSONCompactStringsEachRowWithNamesAndTypes * */ -class JSONCompactEachRowRowInputFormat final : public RowInputFormatWithNamesAndTypes +class JSONCompactEachRowRowInputFormat final : public RowInputFormatWithNamesAndTypes { public: JSONCompactEachRowRowInputFormat( diff --git a/src/Processors/Formats/Impl/JSONCompactRowInputFormat.cpp b/src/Processors/Formats/Impl/JSONCompactRowInputFormat.cpp index 63066fc8220..63ced05dd3a 100644 --- a/src/Processors/Formats/Impl/JSONCompactRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/JSONCompactRowInputFormat.cpp @@ -14,7 +14,7 @@ namespace ErrorCodes JSONCompactRowInputFormat::JSONCompactRowInputFormat( const Block & header_, ReadBuffer & in_, Params params_, const FormatSettings & format_settings_) - : RowInputFormatWithNamesAndTypes( + : RowInputFormatWithNamesAndTypes( header_, in_, params_, false, false, false, format_settings_, std::make_unique(in_, format_settings_)) { } diff --git a/src/Processors/Formats/Impl/JSONCompactRowInputFormat.h b/src/Processors/Formats/Impl/JSONCompactRowInputFormat.h index 3a93e7149b0..eb70f6ec2a3 100644 --- a/src/Processors/Formats/Impl/JSONCompactRowInputFormat.h +++ b/src/Processors/Formats/Impl/JSONCompactRowInputFormat.h @@ -5,8 +5,8 @@ namespace DB { - -class JSONCompactRowInputFormat final : public RowInputFormatWithNamesAndTypes +class JSONCompactFormatReader; +class JSONCompactRowInputFormat final : public RowInputFormatWithNamesAndTypes { public: JSONCompactRowInputFormat(const Block & header_, ReadBuffer & in_, Params params_, const FormatSettings & format_settings_); diff --git a/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.h b/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.h index 32abd532a52..3c6efe9ac4c 100644 --- a/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.h +++ b/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.h @@ -10,9 +10,11 @@ namespace DB { +class TabSeparatedFormatReader; + /** A stream to input data in tsv format. */ -class TabSeparatedRowInputFormat final : public RowInputFormatWithNamesAndTypes +class TabSeparatedRowInputFormat final : public RowInputFormatWithNamesAndTypes { public: /** with_names - the first line is the header with the names of the columns diff --git a/src/Processors/Formats/RowInputFormatWithNamesAndTypes.cpp b/src/Processors/Formats/RowInputFormatWithNamesAndTypes.cpp index ae30d741c2f..5701b80ecc2 100644 --- a/src/Processors/Formats/RowInputFormatWithNamesAndTypes.cpp +++ b/src/Processors/Formats/RowInputFormatWithNamesAndTypes.cpp @@ -1,14 +1,20 @@ -#include -#include -#include -#include #include #include -#include -#include -#include -#include +#include +#include #include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include namespace DB @@ -44,7 +50,8 @@ namespace } } -RowInputFormatWithNamesAndTypes::RowInputFormatWithNamesAndTypes( +template +RowInputFormatWithNamesAndTypes::RowInputFormatWithNamesAndTypes( const Block & header_, ReadBuffer & in_, const Params & params_, @@ -52,7 +59,7 @@ RowInputFormatWithNamesAndTypes::RowInputFormatWithNamesAndTypes( bool with_names_, bool with_types_, const FormatSettings & format_settings_, - std::unique_ptr format_reader_, + std::unique_ptr format_reader_, bool try_detect_header_) : RowInputFormatWithDiagnosticInfo(header_, in_, params_) , format_settings(format_settings_) @@ -66,7 +73,8 @@ RowInputFormatWithNamesAndTypes::RowInputFormatWithNamesAndTypes( column_indexes_by_names = getPort().getHeader().getNamesToIndexesMap(); } -void RowInputFormatWithNamesAndTypes::readPrefix() +template +void RowInputFormatWithNamesAndTypes::readPrefix() { /// Search and remove BOM only in textual formats (CSV, TSV etc), not in binary ones (RowBinary*). /// Also, we assume that column name or type cannot contain BOM, so, if format has header, @@ -138,7 +146,8 @@ void RowInputFormatWithNamesAndTypes::readPrefix() } } -void RowInputFormatWithNamesAndTypes::tryDetectHeader(std::vector & column_names_out, std::vector & type_names_out) +template +void RowInputFormatWithNamesAndTypes::tryDetectHeader(std::vector & column_names_out, std::vector & type_names_out) { auto & read_buf = getReadBuffer(); PeekableReadBuffer * peekable_buf = dynamic_cast(&read_buf); @@ -201,7 +210,8 @@ void RowInputFormatWithNamesAndTypes::tryDetectHeader(std::vector & colu peekable_buf->dropCheckpoint(); } -bool RowInputFormatWithNamesAndTypes::readRow(MutableColumns & columns, RowReadExtension & ext) +template +bool RowInputFormatWithNamesAndTypes::readRow(MutableColumns & columns, RowReadExtension & ext) { if (unlikely(end_of_stream)) return false; @@ -280,7 +290,8 @@ bool RowInputFormatWithNamesAndTypes::readRow(MutableColumns & columns, RowReadE return true; } -size_t RowInputFormatWithNamesAndTypes::countRows(size_t max_block_size) +template +size_t RowInputFormatWithNamesAndTypes::countRows(size_t max_block_size) { if (unlikely(end_of_stream)) return 0; @@ -304,7 +315,8 @@ size_t RowInputFormatWithNamesAndTypes::countRows(size_t max_block_size) return num_rows; } -void RowInputFormatWithNamesAndTypes::resetParser() +template +void RowInputFormatWithNamesAndTypes::resetParser() { RowInputFormatWithDiagnosticInfo::resetParser(); column_mapping->column_indexes_for_input_fields.clear(); @@ -313,7 +325,8 @@ void RowInputFormatWithNamesAndTypes::resetParser() end_of_stream = false; } -void RowInputFormatWithNamesAndTypes::tryDeserializeField(const DataTypePtr & type, IColumn & column, size_t file_column) +template +void RowInputFormatWithNamesAndTypes::tryDeserializeField(const DataTypePtr & type, IColumn & column, size_t file_column) { const auto & index = column_mapping->column_indexes_for_input_fields[file_column]; if (index) @@ -328,7 +341,8 @@ void RowInputFormatWithNamesAndTypes::tryDeserializeField(const DataTypePtr & ty } } -bool RowInputFormatWithNamesAndTypes::parseRowAndPrintDiagnosticInfo(MutableColumns & columns, WriteBuffer & out) +template +bool RowInputFormatWithNamesAndTypes::parseRowAndPrintDiagnosticInfo(MutableColumns & columns, WriteBuffer & out) { if (in->eof()) { @@ -374,12 +388,14 @@ bool RowInputFormatWithNamesAndTypes::parseRowAndPrintDiagnosticInfo(MutableColu return format_reader->parseRowEndWithDiagnosticInfo(out); } -bool RowInputFormatWithNamesAndTypes::isGarbageAfterField(size_t index, ReadBuffer::Position pos) +template +bool RowInputFormatWithNamesAndTypes::isGarbageAfterField(size_t index, ReadBuffer::Position pos) { return format_reader->isGarbageAfterField(index, pos); } -void RowInputFormatWithNamesAndTypes::setReadBuffer(ReadBuffer & in_) +template +void RowInputFormatWithNamesAndTypes::setReadBuffer(ReadBuffer & in_) { format_reader->setReadBuffer(in_); IInputFormat::setReadBuffer(in_); @@ -582,5 +598,12 @@ void FormatWithNamesAndTypesSchemaReader::transformTypesIfNeeded(DB::DataTypePtr transformInferredTypesIfNeeded(type, new_type, format_settings); } +template class RowInputFormatWithNamesAndTypes; +template class RowInputFormatWithNamesAndTypes; +template class RowInputFormatWithNamesAndTypes; +template class RowInputFormatWithNamesAndTypes; +template class RowInputFormatWithNamesAndTypes; +template class RowInputFormatWithNamesAndTypes>; +template class RowInputFormatWithNamesAndTypes>; } diff --git a/src/Processors/Formats/RowInputFormatWithNamesAndTypes.h b/src/Processors/Formats/RowInputFormatWithNamesAndTypes.h index b7d9507151e..cd836cb00dc 100644 --- a/src/Processors/Formats/RowInputFormatWithNamesAndTypes.h +++ b/src/Processors/Formats/RowInputFormatWithNamesAndTypes.h @@ -26,6 +26,7 @@ class FormatWithNamesAndTypesReader; /// will be compared types from header. /// It's important that firstly this class reads/skips names and only /// then reads/skips types. So you can this invariant. +template class RowInputFormatWithNamesAndTypes : public RowInputFormatWithDiagnosticInfo { protected: @@ -41,7 +42,7 @@ protected: bool with_names_, bool with_types_, const FormatSettings & format_settings_, - std::unique_ptr format_reader_, + std::unique_ptr format_reader_, bool try_detect_header_ = false); void resetParser() override; @@ -70,7 +71,7 @@ private: bool is_header_detected = false; protected: - std::unique_ptr format_reader; + std::unique_ptr format_reader; Block::NameMap column_indexes_by_names; }; From 370b6bdc7b6d97f0e697e99ccd06a25e97651406 Mon Sep 17 00:00:00 2001 From: avogar Date: Fri, 16 Aug 2024 13:38:30 +0000 Subject: [PATCH 006/114] Update tests --- tests/queries/0_stateless/02497_schema_inference_nulls.sql | 4 ++-- .../0_stateless/02784_schema_inference_null_as_default.sql | 4 ++-- ..._max_bytes_to_read_for_schema_inference_in_cache.reference | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/queries/0_stateless/02497_schema_inference_nulls.sql b/tests/queries/0_stateless/02497_schema_inference_nulls.sql index b78b5709dbb..5670b031e8b 100644 --- a/tests/queries/0_stateless/02497_schema_inference_nulls.sql +++ b/tests/queries/0_stateless/02497_schema_inference_nulls.sql @@ -40,7 +40,7 @@ desc format(JSONCompactEachRow, '[[[], [null], [1, 2, 3]]]'); desc format(JSONCompactEachRow, '[[{"a" : null}, {"b" : 1}]]'); desc format(JSONCompactEachRow, '[[["2020-01-01", null, "1234"], ["abcd"]]]'); -set schema_inference_make_columns_nullable=0; +set schema_inference_make_columns_nullable='auto'; desc format(JSONCompactEachRow, '[[1, 2]]'); desc format(JSONCompactEachRow, '[[null, 1]]'); desc format(JSONCompactEachRow, '[[1, 2]], [[3]]'); @@ -59,7 +59,7 @@ desc format(CSV, '"[[], [null], [1, 2, 3]]"'); desc format(CSV, '"[{\'a\' : null}, {\'b\' : 1}]"'); desc format(CSV, '"[[\'2020-01-01\', null, \'1234\'], [\'abcd\']]"'); -set schema_inference_make_columns_nullable=0; +set schema_inference_make_columns_nullable='auto'; desc format(CSV, '"[1,2]"'); desc format(CSV, '"[NULL, 1]"'); desc format(CSV, '"[1, 2]"\n"[3]"'); diff --git a/tests/queries/0_stateless/02784_schema_inference_null_as_default.sql b/tests/queries/0_stateless/02784_schema_inference_null_as_default.sql index 9c9f99d8283..571e3ab4f25 100644 --- a/tests/queries/0_stateless/02784_schema_inference_null_as_default.sql +++ b/tests/queries/0_stateless/02784_schema_inference_null_as_default.sql @@ -1,7 +1,7 @@ desc format(JSONEachRow, '{"x" : null}, {"x" : 42}') settings schema_inference_make_columns_nullable=1; select * from format(JSONEachRow, '{"x" : null}, {"x" : 42}') settings schema_inference_make_columns_nullable=1; -desc format(JSONEachRow, '{"x" : null}, {"x" : 42}') settings schema_inference_make_columns_nullable=0, input_format_null_as_default=0; -select * from format(JSONEachRow, '{"x" : null}, {"x" : 42}') settings schema_inference_make_columns_nullable=0, input_format_null_as_default=0; +desc format(JSONEachRow, '{"x" : null}, {"x" : 42}') settings schema_inference_make_columns_nullable='auto', input_format_null_as_default=0; +select * from format(JSONEachRow, '{"x" : null}, {"x" : 42}') settings schema_inference_make_columns_nullable='auto', input_format_null_as_default=0; desc format(JSONEachRow, '{"x" : null}, {"x" : 42}') settings schema_inference_make_columns_nullable=0, input_format_null_as_default=1; select * from format(JSONEachRow, '{"x" : null}, {"x" : 42}') settings schema_inference_make_columns_nullable=0, input_format_null_as_default=1; diff --git a/tests/queries/0_stateless/03212_max_bytes_to_read_for_schema_inference_in_cache.reference b/tests/queries/0_stateless/03212_max_bytes_to_read_for_schema_inference_in_cache.reference index cd109daac52..3b9d88edc19 100644 --- a/tests/queries/0_stateless/03212_max_bytes_to_read_for_schema_inference_in_cache.reference +++ b/tests/queries/0_stateless/03212_max_bytes_to_read_for_schema_inference_in_cache.reference @@ -1,2 +1,2 @@ x Nullable(Int64) -schema_inference_hints=, max_rows_to_read_for_schema_inference=25000, max_bytes_to_read_for_schema_inference=1000, schema_inference_make_columns_nullable=true, try_infer_integers=true, try_infer_dates=true, try_infer_datetimes=true, try_infer_numbers_from_strings=false, read_bools_as_numbers=true, read_bools_as_strings=true, read_objects_as_strings=true, read_numbers_as_strings=true, read_arrays_as_strings=true, try_infer_objects_as_tuples=true, infer_incomplete_types_as_strings=true, try_infer_objects=false, use_string_type_for_ambiguous_paths_in_named_tuples_inference_from_objects=false +schema_inference_hints=, max_rows_to_read_for_schema_inference=25000, max_bytes_to_read_for_schema_inference=1000, schema_inference_make_columns_nullable=1, try_infer_integers=true, try_infer_dates=true, try_infer_datetimes=true, try_infer_numbers_from_strings=false, read_bools_as_numbers=true, read_bools_as_strings=true, read_objects_as_strings=true, read_numbers_as_strings=true, read_arrays_as_strings=true, try_infer_objects_as_tuples=true, infer_incomplete_types_as_strings=true, try_infer_objects=false, use_string_type_for_ambiguous_paths_in_named_tuples_inference_from_objects=false From 5fe46af4221a36ecb4566ca7bfad314d732f1de2 Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Fri, 16 Aug 2024 18:12:51 +0200 Subject: [PATCH 007/114] Update 02497_schema_inference_nulls.sql --- tests/queries/0_stateless/02497_schema_inference_nulls.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/02497_schema_inference_nulls.sql b/tests/queries/0_stateless/02497_schema_inference_nulls.sql index 5670b031e8b..d62fc76d9b9 100644 --- a/tests/queries/0_stateless/02497_schema_inference_nulls.sql +++ b/tests/queries/0_stateless/02497_schema_inference_nulls.sql @@ -18,7 +18,7 @@ desc format(JSONEachRow, '{"x" : [[], [null], [1, 2, 3]]}'); desc format(JSONEachRow, '{"x" : [{"a" : null}, {"b" : 1}]}'); desc format(JSONEachRow, '{"x" : [["2020-01-01", null, "1234"], ["abcd"]]}'); -set schema_inference_make_columns_nullable=0; +set schema_inference_make_columns_nullable='auto'; desc format(JSONEachRow, '{"x" : [1, 2]}'); desc format(JSONEachRow, '{"x" : [null, 1]}'); desc format(JSONEachRow, '{"x" : [1, 2]}, {"x" : [3]}'); From 00a27669df3f2153401d62b4d1681914bd0f440a Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Mon, 19 Aug 2024 20:22:14 +0200 Subject: [PATCH 008/114] Fix builds --- src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp index 69b0c095ad1..6263351897e 100644 --- a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp +++ b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp @@ -1258,6 +1258,7 @@ Chunk ArrowColumnToCHColumn::arrowColumnsToCHChunk(const NameToArrowColumn & nam .date_time_overflow_behavior = date_time_overflow_behavior, .allow_arrow_null_type = true, .skip_columns_with_unsupported_types = false + .allow_inferring_nullable_columns = true; }; Columns columns; From 0ccbb554b9d0b7055415569559e029060261243e Mon Sep 17 00:00:00 2001 From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> Date: Tue, 20 Aug 2024 10:58:14 +0200 Subject: [PATCH 009/114] Update 02995_index_7.sh --- tests/queries/0_stateless/02995_index_7.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/02995_index_7.sh b/tests/queries/0_stateless/02995_index_7.sh index a5fdd98b2f8..7a03b0d4c1a 100755 --- a/tests/queries/0_stateless/02995_index_7.sh +++ b/tests/queries/0_stateless/02995_index_7.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: long, no-debug, no-asan, no-tsan, no-msan, no-ubsan, no-sanitize-coverage +# Tags: long, no-debug, no-asan, no-tsan, no-msan, no-ubsan, no-sanitize-coverage, no-distributed-cache CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh From 24eeaffa7a3ddbfa0fb7bc4546942bc18cab06af Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Tue, 20 Aug 2024 14:02:09 +0200 Subject: [PATCH 010/114] init --- src/Storages/VirtualColumnUtils.cpp | 11 +++++----- .../03203_hive_style_partitioning.reference | 1 + .../03203_hive_style_partitioning.sh | 19 +++++++++--------- .../partitioning/a=b/a=b/sample.parquet | Bin 0 -> 1308 bytes 4 files changed, 17 insertions(+), 14 deletions(-) create mode 100644 tests/queries/0_stateless/data_hive/partitioning/a=b/a=b/sample.parquet diff --git a/src/Storages/VirtualColumnUtils.cpp b/src/Storages/VirtualColumnUtils.cpp index d932f5cc469..edf50907752 100644 --- a/src/Storages/VirtualColumnUtils.cpp +++ b/src/Storages/VirtualColumnUtils.cpp @@ -136,14 +136,15 @@ std::unordered_map parseHivePartitioningKeysAndValues( std::unordered_map key_values; std::string key, value; - std::unordered_set used_keys; + std::unordered_map used_keys; while (RE2::FindAndConsume(&input_piece, pattern, &key, &value)) { - if (used_keys.contains(key)) - throw Exception(ErrorCodes::INCORRECT_DATA, "Path '{}' to file with enabled hive-style partitioning contains duplicated partition key {}, only unique keys are allowed", path, key); - used_keys.insert(key); + auto it = used_keys.find(key); + if (it != used_keys.end() && it->second != value) + throw Exception(ErrorCodes::INCORRECT_DATA, "Path '{}' to file with enabled hive-style partitioning contains duplicated partition key {} with different values, only unique keys are allowed", path, key); + used_keys.insert({key, value}); - auto col_name = "_" + key; + auto col_name = key; while (storage_columns.has(col_name)) col_name = "_" + col_name; key_values[col_name] = value; diff --git a/tests/queries/0_stateless/03203_hive_style_partitioning.reference b/tests/queries/0_stateless/03203_hive_style_partitioning.reference index a4a2e48e046..12ffd17c102 100644 --- a/tests/queries/0_stateless/03203_hive_style_partitioning.reference +++ b/tests/queries/0_stateless/03203_hive_style_partitioning.reference @@ -39,6 +39,7 @@ Array(Int64) LowCardinality(Float64) 2070 1 1 +b TESTING THE URL PARTITIONING first last Elizabeth Jorge Frank Elizabeth diff --git a/tests/queries/0_stateless/03203_hive_style_partitioning.sh b/tests/queries/0_stateless/03203_hive_style_partitioning.sh index db1f073d736..5a0bd482985 100755 --- a/tests/queries/0_stateless/03203_hive_style_partitioning.sh +++ b/tests/queries/0_stateless/03203_hive_style_partitioning.sh @@ -27,26 +27,27 @@ SELECT *, _column0 FROM file('$CURDIR/data_hive/partitioning/column0=Elizabeth/c SELECT *, _column0, _column1 FROM file('$CURDIR/data_hive/partitioning/column0=Elizabeth/column1=Gordon/sample.parquet') WHERE column0 = _column0 AND column1 = _column1; SELECT *, _column0 FROM file('$CURDIR/data_hive/partitioning/column0=Elizabeth/column1=Gordon/sample.parquet') WHERE column0 = _column0 AND column1 = _column1; -SELECT *, _non_existing_column FROM file('$CURDIR/data_hive/partitioning/non_existing_column=Elizabeth/sample.parquet') LIMIT 10; +SELECT *, non_existing_column FROM file('$CURDIR/data_hive/partitioning/non_existing_column=Elizabeth/sample.parquet') LIMIT 10; SELECT *, _column0 FROM file('$CURDIR/data_hive/partitioning/column0=*/sample.parquet') WHERE column0 = _column0; -SELECT _number, _date FROM file('$CURDIR/data_hive/partitioning/number=42/date=2020-01-01/sample.parquet') LIMIT 1; -SELECT _array, _float FROM file('$CURDIR/data_hive/partitioning/array=[1,2,3]/float=42.42/sample.parquet') LIMIT 1; -SELECT toTypeName(_array), toTypeName(_float) FROM file('$CURDIR/data_hive/partitioning/array=[1,2,3]/float=42.42/sample.parquet') LIMIT 1; -SELECT count(*) FROM file('$CURDIR/data_hive/partitioning/number=42/date=2020-01-01/sample.parquet') WHERE _number = 42; +SELECT number, date FROM file('$CURDIR/data_hive/partitioning/number=42/date=2020-01-01/sample.parquet') LIMIT 1; +SELECT array, float FROM file('$CURDIR/data_hive/partitioning/array=[1,2,3]/float=42.42/sample.parquet') LIMIT 1; +SELECT toTypeName(array), toTypeName(float) FROM file('$CURDIR/data_hive/partitioning/array=[1,2,3]/float=42.42/sample.parquet') LIMIT 1; +SELECT count(*) FROM file('$CURDIR/data_hive/partitioning/number=42/date=2020-01-01/sample.parquet') WHERE number = 42; """ $CLICKHOUSE_LOCAL -n -q """ set use_hive_partitioning = 1; SELECT _identifier FROM file('$CURDIR/data_hive/partitioning/identifier=*/email.csv') LIMIT 2; -SELECT __identifier FROM file('$CURDIR/data_hive/partitioning/identifier=*/email.csv') LIMIT 2; +SELECT identifier FROM file('$CURDIR/data_hive/partitioning/identifier=*/email.csv') LIMIT 2; +SELECT a FROM file('$CURDIR/data_hive/partitioning/a=b/a=b/sample.parquet') LIMIT 1; """ $CLICKHOUSE_LOCAL -n -q """ set use_hive_partitioning = 1; -SELECT *, _column0 FROM file('$CURDIR/data_hive/partitioning/column0=Elizabeth/column0=Elizabeth/sample.parquet') LIMIT 10; +SELECT *, _column0 FROM file('$CURDIR/data_hive/partitioning/column0=Elizabeth/column0=Elizabeth1/sample.parquet') LIMIT 10; """ 2>&1 | grep -c "INCORRECT_DATA" $CLICKHOUSE_LOCAL -n -q """ @@ -78,7 +79,7 @@ SELECT *, _column0 FROM url('http://localhost:11111/test/hive_partitioning/colum SELECT *, _column0, _column1 FROM url('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/column1=Gordon/sample.parquet') WHERE column0 = _column0 AND column1 = _column1; SELECT *, _column0 FROM url('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/column1=Gordon/sample.parquet') WHERE column0 = _column0 AND column1 = _column1; -SELECT *, _non_existing_column FROM url('http://localhost:11111/test/hive_partitioning/non_existing_column=Elizabeth/sample.parquet') LIMIT 10;""" +SELECT *, non_existing_column FROM url('http://localhost:11111/test/hive_partitioning/non_existing_column=Elizabeth/sample.parquet') LIMIT 10;""" $CLICKHOUSE_LOCAL -n -q """ set use_hive_partitioning = 0; @@ -109,7 +110,7 @@ SELECT *, _column0 FROM s3('http://localhost:11111/test/hive_partitioning/column SELECT *, _column0, _column1 FROM s3('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/column1=Gordon/sample.parquet') WHERE column0 = _column0 AND column1 = _column1; SELECT *, _column0 FROM s3('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/column1=Gordon/sample.parquet') WHERE column0 = _column0 AND column1 = _column1; -SELECT *, _non_existing_column FROM s3('http://localhost:11111/test/hive_partitioning/non_existing_column=Elizabeth/sample.parquet') LIMIT 10; +SELECT *, non_existing_column FROM s3('http://localhost:11111/test/hive_partitioning/non_existing_column=Elizabeth/sample.parquet') LIMIT 10; SELECT *, _column0 FROM s3('http://localhost:11111/test/hive_partitioning/column0=*/sample.parquet') WHERE column0 = _column0; """ diff --git a/tests/queries/0_stateless/data_hive/partitioning/a=b/a=b/sample.parquet b/tests/queries/0_stateless/data_hive/partitioning/a=b/a=b/sample.parquet new file mode 100644 index 0000000000000000000000000000000000000000..9b6a78cf8cc7cd3ece15e13c9b2f222c8f09b81e GIT binary patch literal 1308 zcmWG=3^EjD5Z%Hr`iosh^b{kI%_hpmz#!kv!2kyTLxU6Z9~tnZHa*@opEc(Au?K1g zOD4aYo#~scS*oJ`_R8gD$~^!1^Jl8v?6d#uZT@&1Z&h*OEsK+iS@vM( z^NvMD|`UJH2qG^xTWJ-dT6$7G6DVTky7Woy5#*nvWVEJpR{CJ{Fy0- zE8ux@_5^8x!?dEIRau&2MyW=j!5h*xtj<|H$T%nI_ zrsjz?W}YW@dt8{DRBI|`*(jU(m2ZmM@u#NQ!s{)z%{yLgtZF$)cAddC?xOT5D^_mz z-x7J9sr1v$v$K{(^`5h;Sz-1gc2*AGUh7}8F0R?}-B&E(IrH;G`GUhY z?q@1K*wQW0otd;iYI&}N?~AIE{%tkCroWN7t$#4bGw~KP0|PJ-eBc+|z=1tM#0JF{ zU3TEfTh6OHr)jl`=8?k_CV5&tiR=x1?{{sI`|Af*?oUEIqS_tiuleY8e||}EY3bMB zzp9qaKhIf|e>9xYs^&t{(WWC|y8X+=Uc{}=?T>Xh_5JxVk(1Vsywf&)T&i$tu2}yJ zsTDW>>9!Q_yZT7oEaCof4t43QdkFv1JFG`q9?h6g zxTpBgk6%&qwlli6{)!hkc#l_C=)}P;-Ys+NvjP>bYG~cCGCw}YQ1x-0z@w1)u@}^n zTV#|>Z7-{GtbTT=rr=<)~?``+iTxh4l+3|MS-tdVRHm+9w`h0!z=3knV zrSnX_{WmK}KJ?@4(a#30zmF(AmC{eNN7s8Lx}H>x1pMHFk2oys;%$ zvXN_R)m$dd8M|y^7q?Bh-x;&%icdYm3!CL}KR{`PNz%rYL4r4>G&wsZDZV&4BQ-Zs zl!ZZ*N0mu}Jvl$8G&j!xn4o|vkwidc4g-VODMm>dNgXu?8BrcdQ3gqbdKRFR7=zd% z4mA!N3D&gCqT&(>R>!2I%v3Q34HQ1GkiyV!C<@hogF|f<&;XY3{QMLNR)w6z;u4^K eWG+xU(4JF_Y8(t2Y%V}QxHvIf1_}lM%S8a*|2_@? literal 0 HcmV?d00001 From e416a2b3d2f9ff2395a218e79f9417cb96dafbda Mon Sep 17 00:00:00 2001 From: leonkozlowski Date: Tue, 20 Aug 2024 09:42:19 -0400 Subject: [PATCH 011/114] patch: fix reference to sorting key in primary key docs --- docs/en/engines/table-engines/mergetree-family/mergetree.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/engines/table-engines/mergetree-family/mergetree.md b/docs/en/engines/table-engines/mergetree-family/mergetree.md index 183b94f4641..0b693775dde 100644 --- a/docs/en/engines/table-engines/mergetree-family/mergetree.md +++ b/docs/en/engines/table-engines/mergetree-family/mergetree.md @@ -80,7 +80,7 @@ For partitioning by month, use the `toYYYYMM(date_column)` expression, where `da `PRIMARY KEY` — The primary key if it [differs from the sorting key](#choosing-a-primary-key-that-differs-from-the-sorting-key). Optional. Specifying a sorting key (using `ORDER BY` clause) implicitly specifies a primary key. -It is usually not necessary to specify the primary key in addition to the primary key. +It is usually not necessary to specify the primary key in addition to the sorting key. #### SAMPLE BY From fe637452ec730db224e40a5c4a399d9ff7ac4ca0 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Tue, 20 Aug 2024 19:54:12 +0200 Subject: [PATCH 012/114] Revert "Fix test `01079_bad_alters_zookeeper_long`" --- src/Storages/StorageReplicatedMergeTree.cpp | 2 +- tests/clickhouse-test | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 80a7e862f72..ff8e362aa36 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -6340,7 +6340,7 @@ void StorageReplicatedMergeTree::alter( "Metadata on replica is not up to date with common metadata in Zookeeper. " "It means that this replica still not applied some of previous alters." " Probably too many alters executing concurrently (highly not recommended). " - "You can retry the query"); + "You can retry this error"); /// Cannot retry automatically, because some zookeeper ops were lost on the first attempt. Will retry on DDLWorker-level. if (query_context->getZooKeeperMetadataTransaction()) diff --git a/tests/clickhouse-test b/tests/clickhouse-test index 5fb892597f7..01c2937352f 100755 --- a/tests/clickhouse-test +++ b/tests/clickhouse-test @@ -59,7 +59,6 @@ MESSAGES_TO_RETRY = [ "is already started to be removing by another replica right now", # This is from LSan, and it indicates its own internal problem: "Unable to get registers from thread", - "You can retry", ] MAX_RETRIES = 3 From 2ad50a5f3c6a1da2e33aee32051d654e789b8ca3 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Tue, 20 Aug 2024 19:56:22 +0200 Subject: [PATCH 013/114] Update 01079_bad_alters_zookeeper_long.sh --- tests/queries/0_stateless/01079_bad_alters_zookeeper_long.sh | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tests/queries/0_stateless/01079_bad_alters_zookeeper_long.sh b/tests/queries/0_stateless/01079_bad_alters_zookeeper_long.sh index 82b8be65af5..39e65af039b 100755 --- a/tests/queries/0_stateless/01079_bad_alters_zookeeper_long.sh +++ b/tests/queries/0_stateless/01079_bad_alters_zookeeper_long.sh @@ -26,6 +26,10 @@ while [[ $($CLICKHOUSE_CLIENT --query "KILL MUTATION WHERE mutation_id='00000000 sleep 1 done +while [[ $($CLICKHOUSE_CLIENT --query "SELECT * FROM system.replication_queue WHERE type='ALTER_METADATA' AND database = '$CLICKHOUSE_DATABASE'" 2>&1) ]]; do + sleep 1 +done + $CLICKHOUSE_CLIENT --query "SHOW CREATE TABLE table_for_bad_alters;" # Type changed, but we can revert back $CLICKHOUSE_CLIENT --query "INSERT INTO table_for_bad_alters VALUES(2, 2, 7)" From 434458cc830d2ced68f1f96dcfa13f967c9bc74e Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Fri, 16 Aug 2024 11:22:22 +0000 Subject: [PATCH 014/114] Remove -n / --multiquery --- .../00115_shard_in_incomplete_result.sh | 2 +- .../0_stateless/00550_join_insert_select.sh | 2 +- .../0_stateless/01086_window_view_cleanup.sh | 2 +- ...396_inactive_replica_cleanup_nodes_zookeeper.sh | 4 ++-- .../0_stateless/01455_opentelemetry_distributed.sh | 2 +- .../0_stateless/01509_format_raw_blob.reference | 4 ++-- tests/queries/0_stateless/01509_format_raw_blob.sh | 4 ++-- .../01565_query_loop_after_client_error.expect | 2 +- .../01811_storage_buffer_flush_parameters.sh | 6 +++--- ...3_correct_block_size_prediction_with_default.sh | 4 ++-- .../02020_alter_table_modify_comment.sh | 2 +- ...e_sorting_by_input_stream_properties_explain.sh | 4 ++-- .../0_stateless/02383_join_and_filtering_set.sh | 2 +- .../0_stateless/02423_ddl_for_opentelemetry.sh | 2 +- tests/queries/0_stateless/02539_settings_alias.sh | 4 ++-- .../02697_stop_reading_on_first_cancel.sh | 2 +- .../0_stateless/02703_row_policies_for_asterisk.sh | 2 +- .../02703_row_policies_for_database_combination.sh | 2 +- .../0_stateless/02703_row_policy_for_database.sh | 4 ++-- tests/queries/0_stateless/02724_delay_mutations.sh | 6 +++--- ...02765_queries_with_subqueries_profile_events.sh | 14 +++++++------- .../queries/0_stateless/02841_not_ready_set_bug.sh | 4 ++-- .../0_stateless/02871_peak_threads_usage.sh | 14 +++++++------- .../0_stateless/02911_backup_restore_keeper_map.sh | 6 +++--- .../0_stateless/02968_file_log_multiple_read.sh | 4 ++-- .../03002_part_log_rmt_fetch_merge_error.sh | 8 ++++---- .../03002_part_log_rmt_fetch_mutate_error.sh | 10 +++++----- .../03164_selects_with_pk_usage_profile_event.sh | 8 ++++---- .../0_stateless/03172_system_detached_tables.sh | 4 ++-- .../03173_parallel_replicas_join_bug.sh | 2 +- 30 files changed, 68 insertions(+), 68 deletions(-) diff --git a/tests/queries/0_stateless/00115_shard_in_incomplete_result.sh b/tests/queries/0_stateless/00115_shard_in_incomplete_result.sh index 5c3918dea9f..4916721764c 100755 --- a/tests/queries/0_stateless/00115_shard_in_incomplete_result.sh +++ b/tests/queries/0_stateless/00115_shard_in_incomplete_result.sh @@ -8,7 +8,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) set -o errexit set -o pipefail -$CLICKHOUSE_CLIENT -n --query=" +$CLICKHOUSE_CLIENT --query=" DROP TABLE IF EXISTS users; CREATE TABLE users (UserID UInt64) ENGINE = Log; INSERT INTO users VALUES (1468013291393583084); diff --git a/tests/queries/0_stateless/00550_join_insert_select.sh b/tests/queries/0_stateless/00550_join_insert_select.sh index bfaccb613ca..ee2f3ab286b 100755 --- a/tests/queries/0_stateless/00550_join_insert_select.sh +++ b/tests/queries/0_stateless/00550_join_insert_select.sh @@ -4,7 +4,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -$CLICKHOUSE_CLIENT -n --ignore-error --query=" +$CLICKHOUSE_CLIENT --ignore-error --query=" DROP TABLE IF EXISTS test1_00550; DROP TABLE IF EXISTS test2_00550; DROP TABLE IF EXISTS test3_00550; diff --git a/tests/queries/0_stateless/01086_window_view_cleanup.sh b/tests/queries/0_stateless/01086_window_view_cleanup.sh index 8b8e794c8ff..1bfa3c50869 100755 --- a/tests/queries/0_stateless/01086_window_view_cleanup.sh +++ b/tests/queries/0_stateless/01086_window_view_cleanup.sh @@ -13,7 +13,7 @@ opts=( DATABASE_ORDINARY="${CLICKHOUSE_DATABASE}_ordinary" -$CLICKHOUSE_CLIENT "${opts[@]}" --allow_deprecated_database_ordinary=1 --multiquery " +$CLICKHOUSE_CLIENT "${opts[@]}" --allow_deprecated_database_ordinary=1 " SET allow_experimental_window_view = 1; SET window_view_clean_interval = 1; diff --git a/tests/queries/0_stateless/01396_inactive_replica_cleanup_nodes_zookeeper.sh b/tests/queries/0_stateless/01396_inactive_replica_cleanup_nodes_zookeeper.sh index bff85b3e29f..4a0b6a8c93c 100755 --- a/tests/queries/0_stateless/01396_inactive_replica_cleanup_nodes_zookeeper.sh +++ b/tests/queries/0_stateless/01396_inactive_replica_cleanup_nodes_zookeeper.sh @@ -13,7 +13,7 @@ REPLICA=$($CLICKHOUSE_CLIENT --query "Select getMacro('replica')") SCALE=1000 -$CLICKHOUSE_CLIENT -n --query " +$CLICKHOUSE_CLIENT --query " DROP TABLE IF EXISTS r1; DROP TABLE IF EXISTS r2; CREATE TABLE r1 (x UInt64) ENGINE = ReplicatedMergeTree('/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/{shard}', '1{replica}') ORDER BY x @@ -46,7 +46,7 @@ $CLICKHOUSE_CLIENT --receive_timeout 600 --query "SYSTEM SYNC REPLICA r2" # Need $CLICKHOUSE_CLIENT --query "SELECT value FROM system.zookeeper WHERE path = '/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/$SHARD/replicas/2$REPLICA' AND name = 'is_lost'"; -$CLICKHOUSE_CLIENT -n --query " +$CLICKHOUSE_CLIENT --query " DROP TABLE IF EXISTS r1; DROP TABLE IF EXISTS r2; " diff --git a/tests/queries/0_stateless/01455_opentelemetry_distributed.sh b/tests/queries/0_stateless/01455_opentelemetry_distributed.sh index 2b6da6132ed..30940f93a56 100755 --- a/tests/queries/0_stateless/01455_opentelemetry_distributed.sh +++ b/tests/queries/0_stateless/01455_opentelemetry_distributed.sh @@ -9,7 +9,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) function check_log { -${CLICKHOUSE_CLIENT} --format=JSONEachRow -nq " +${CLICKHOUSE_CLIENT} --format=JSONEachRow -q " set enable_analyzer = 1; system flush logs; diff --git a/tests/queries/0_stateless/01509_format_raw_blob.reference b/tests/queries/0_stateless/01509_format_raw_blob.reference index 05014001bd9..eb074457e07 100644 --- a/tests/queries/0_stateless/01509_format_raw_blob.reference +++ b/tests/queries/0_stateless/01509_format_raw_blob.reference @@ -1,2 +1,2 @@ -9fd46251e5574c633cbfbb9293671888 - -9fd46251e5574c633cbfbb9293671888 - +48fad37bc89fc3bcc29c4750897b6709 - +48fad37bc89fc3bcc29c4750897b6709 - diff --git a/tests/queries/0_stateless/01509_format_raw_blob.sh b/tests/queries/0_stateless/01509_format_raw_blob.sh index 3d1d3fbb17b..355928014e8 100755 --- a/tests/queries/0_stateless/01509_format_raw_blob.sh +++ b/tests/queries/0_stateless/01509_format_raw_blob.sh @@ -4,7 +4,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -${CLICKHOUSE_CLIENT} -n --query " +${CLICKHOUSE_CLIENT} --query " DROP TABLE IF EXISTS t; CREATE TABLE t (a LowCardinality(Nullable(String))) ENGINE = Memory; " @@ -12,7 +12,7 @@ CREATE TABLE t (a LowCardinality(Nullable(String))) ENGINE = Memory; ${CLICKHOUSE_CLIENT} --query "INSERT INTO t FORMAT RawBLOB" < ${BASH_SOURCE[0]} cat ${BASH_SOURCE[0]} | md5sum -${CLICKHOUSE_CLIENT} -n --query "SELECT * FROM t FORMAT RawBLOB" | md5sum +${CLICKHOUSE_CLIENT} --query "SELECT * FROM t FORMAT RawBLOB" | md5sum ${CLICKHOUSE_CLIENT} --query " DROP TABLE t; diff --git a/tests/queries/0_stateless/01565_query_loop_after_client_error.expect b/tests/queries/0_stateless/01565_query_loop_after_client_error.expect index 6253840c63c..f08ef911da4 100755 --- a/tests/queries/0_stateless/01565_query_loop_after_client_error.expect +++ b/tests/queries/0_stateless/01565_query_loop_after_client_error.expect @@ -24,7 +24,7 @@ expect_after { -i $any_spawn_id timeout { exit 1 } } -spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT_BINARY \$CLICKHOUSE_CLIENT_OPT --disable_suggestion -mn --history_file=$history_file --highlight 0" +spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT_BINARY \$CLICKHOUSE_CLIENT_OPT --disable_suggestion -m --history_file=$history_file --highlight 0" expect "\n:) " send -- "DROP TABLE IF EXISTS t01565;\r" diff --git a/tests/queries/0_stateless/01811_storage_buffer_flush_parameters.sh b/tests/queries/0_stateless/01811_storage_buffer_flush_parameters.sh index 6a5949741ab..7878867e159 100755 --- a/tests/queries/0_stateless/01811_storage_buffer_flush_parameters.sh +++ b/tests/queries/0_stateless/01811_storage_buffer_flush_parameters.sh @@ -17,7 +17,7 @@ function wait_with_limit() done } -$CLICKHOUSE_CLIENT -nm -q " +$CLICKHOUSE_CLIENT -m -q " drop table if exists data_01811; drop table if exists buffer_01811; @@ -39,9 +39,9 @@ $CLICKHOUSE_CLIENT -nm -q " # wait for background buffer flush wait_with_limit 30 '[[ $($CLICKHOUSE_CLIENT -q "select count() from data_01811") -gt 0 ]]' -$CLICKHOUSE_CLIENT -nm -q "select count() from data_01811" +$CLICKHOUSE_CLIENT -m -q "select count() from data_01811" -$CLICKHOUSE_CLIENT -nm -q " +$CLICKHOUSE_CLIENT -m -q " drop table buffer_01811; drop table data_01811; " diff --git a/tests/queries/0_stateless/01903_correct_block_size_prediction_with_default.sh b/tests/queries/0_stateless/01903_correct_block_size_prediction_with_default.sh index 1482730af2c..57f9b5595de 100755 --- a/tests/queries/0_stateless/01903_correct_block_size_prediction_with_default.sh +++ b/tests/queries/0_stateless/01903_correct_block_size_prediction_with_default.sh @@ -8,7 +8,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) sql="toUInt16OrNull(arrayFirst((v, k) -> (k = '4Id'), arr[2], arr[1]))" # Create the table and fill it -$CLICKHOUSE_CLIENT -n --query=" +$CLICKHOUSE_CLIENT --query=" CREATE TABLE test_extract(str String, arr Array(Array(String)) ALIAS extractAllGroupsHorizontal(str, '\\W(\\w+)=(\"[^\"]*?\"|[^\",}]*)')) ENGINE=MergeTree() PARTITION BY tuple() ORDER BY tuple(); INSERT INTO test_extract (str) WITH range(8) as range_arr, arrayMap(x-> concat(toString(x),'Id'), range_arr) as key, arrayMap(x -> rand() % 8, range_arr) as val, arrayStringConcat(arrayMap((x,y) -> concat(x,'=',toString(y)), key, val),',') as str SELECT str FROM numbers(500000); ALTER TABLE test_extract ADD COLUMN 15Id Nullable(UInt16) DEFAULT $sql;" @@ -24,7 +24,7 @@ function test() $CLICKHOUSE_CLIENT --query="SELECT uniq(15Id) FROM test_extract $where SETTINGS max_threads=1" --query_id=$uuid_1 uuid_2=$(cat /proc/sys/kernel/random/uuid) $CLICKHOUSE_CLIENT --query="SELECT uniq($sql) FROM test_extract $where SETTINGS max_threads=1" --query_id=$uuid_2 - $CLICKHOUSE_CLIENT -n --query=" + $CLICKHOUSE_CLIENT --query=" SYSTEM FLUSH LOGS; WITH memory_1 AS (SELECT memory_usage FROM system.query_log WHERE current_database = currentDatabase() AND query_id='$uuid_1' AND type = 'QueryFinish' as memory_1), memory_2 AS (SELECT memory_usage FROM system.query_log WHERE current_database = currentDatabase() AND query_id='$uuid_2' AND type = 'QueryFinish' as memory_2) diff --git a/tests/queries/0_stateless/02020_alter_table_modify_comment.sh b/tests/queries/0_stateless/02020_alter_table_modify_comment.sh index 3448f052f51..fa2d84e131a 100755 --- a/tests/queries/0_stateless/02020_alter_table_modify_comment.sh +++ b/tests/queries/0_stateless/02020_alter_table_modify_comment.sh @@ -16,7 +16,7 @@ function test_table_comments() local ENGINE_NAME="$1" echo "engine : ${ENGINE_NAME}" - $CLICKHOUSE_CLIENT -nm <&1 | grep -q "SYNTAX_ERROR" -$CLICKHOUSE_CLIENT --multiquery " +$CLICKHOUSE_CLIENT " CREATE TABLE 02703_rqtable_default (x UInt8) ENGINE = MergeTree ORDER BY x; CREATE ROW POLICY ${CLICKHOUSE_DATABASE}_filter_11_db_policy ON * USING x=1 AS permissive TO ALL; diff --git a/tests/queries/0_stateless/02724_delay_mutations.sh b/tests/queries/0_stateless/02724_delay_mutations.sh index f349e29253a..7843e692822 100755 --- a/tests/queries/0_stateless/02724_delay_mutations.sh +++ b/tests/queries/0_stateless/02724_delay_mutations.sh @@ -7,7 +7,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=./mergetree_mutations.lib . "$CURDIR"/mergetree_mutations.lib -${CLICKHOUSE_CLIENT} -n --query " +${CLICKHOUSE_CLIENT} --query " DROP TABLE IF EXISTS t_delay_mutations SYNC; CREATE TABLE t_delay_mutations (id UInt64, v UInt64) @@ -36,14 +36,14 @@ SELECT count() FROM system.mutations WHERE database = currentDatabase() AND tabl ${CLICKHOUSE_CLIENT} --query "SYSTEM START MERGES t_delay_mutations" wait_for_mutation "t_delay_mutations" "mutation_5.txt" -${CLICKHOUSE_CLIENT} -n --query " +${CLICKHOUSE_CLIENT} --query " SELECT * FROM t_delay_mutations ORDER BY id; SELECT count() FROM system.mutations WHERE database = currentDatabase() AND table = 't_delay_mutations' AND NOT is_done; DROP TABLE IF EXISTS t_delay_mutations SYNC; " -${CLICKHOUSE_CLIENT} -n --query " +${CLICKHOUSE_CLIENT} --query " SYSTEM FLUSH LOGS; SELECT diff --git a/tests/queries/0_stateless/02765_queries_with_subqueries_profile_events.sh b/tests/queries/0_stateless/02765_queries_with_subqueries_profile_events.sh index b7d93b5396c..fd64e8d8cb8 100755 --- a/tests/queries/0_stateless/02765_queries_with_subqueries_profile_events.sh +++ b/tests/queries/0_stateless/02765_queries_with_subqueries_profile_events.sh @@ -4,7 +4,7 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -$CLICKHOUSE_CLIENT -n -q " +$CLICKHOUSE_CLIENT -q " DROP TABLE IF EXISTS mv; DROP TABLE IF EXISTS output; DROP TABLE IF EXISTS input; @@ -17,7 +17,7 @@ $CLICKHOUSE_CLIENT -n -q " for enable_analyzer in 0 1; do query_id="$(random_str 10)" $CLICKHOUSE_CLIENT --enable_analyzer "$enable_analyzer" --query_id "$query_id" -q "INSERT INTO input SELECT * FROM numbers(1)" - $CLICKHOUSE_CLIENT -mn -q " + $CLICKHOUSE_CLIENT -m -q " SYSTEM FLUSH LOGS; SELECT 1 view, @@ -35,7 +35,7 @@ for enable_analyzer in 0 1; do query_id="$(random_str 10)" $CLICKHOUSE_CLIENT --enable_analyzer "$enable_analyzer" --query_id "$query_id" -q "SELECT * FROM system.one WHERE dummy IN (SELECT * FROM system.one) FORMAT Null" - $CLICKHOUSE_CLIENT -mn -q " + $CLICKHOUSE_CLIENT -m -q " SYSTEM FLUSH LOGS; SELECT 1 subquery, @@ -52,7 +52,7 @@ for enable_analyzer in 0 1; do query_id="$(random_str 10)" $CLICKHOUSE_CLIENT --enable_analyzer "$enable_analyzer" --query_id "$query_id" -q "WITH (SELECT * FROM system.one) AS x SELECT x FORMAT Null" - $CLICKHOUSE_CLIENT -mn -q " + $CLICKHOUSE_CLIENT -m -q " SYSTEM FLUSH LOGS; SELECT 1 CSE, @@ -69,7 +69,7 @@ for enable_analyzer in 0 1; do query_id="$(random_str 10)" $CLICKHOUSE_CLIENT --enable_analyzer "$enable_analyzer" --query_id "$query_id" -q "WITH (SELECT * FROM system.one) AS x SELECT x, x FORMAT Null" - $CLICKHOUSE_CLIENT -mn -q " + $CLICKHOUSE_CLIENT -m -q " SYSTEM FLUSH LOGS; SELECT 1 CSE_Multi, @@ -86,7 +86,7 @@ for enable_analyzer in 0 1; do query_id="$(random_str 10)" $CLICKHOUSE_CLIENT --enable_analyzer "$enable_analyzer" --query_id "$query_id" -q "WITH x AS (SELECT * FROM system.one) SELECT * FROM x FORMAT Null" - $CLICKHOUSE_CLIENT -mn -q " + $CLICKHOUSE_CLIENT -m -q " SYSTEM FLUSH LOGS; SELECT 1 CTE, @@ -103,7 +103,7 @@ for enable_analyzer in 0 1; do query_id="$(random_str 10)" $CLICKHOUSE_CLIENT --enable_analyzer "$enable_analyzer" --query_id "$query_id" -q "WITH x AS (SELECT * FROM system.one) SELECT * FROM x UNION ALL SELECT * FROM x FORMAT Null" - $CLICKHOUSE_CLIENT -mn -q " + $CLICKHOUSE_CLIENT -m -q " SYSTEM FLUSH LOGS; SELECT 1 CTE_Multi, diff --git a/tests/queries/0_stateless/02841_not_ready_set_bug.sh b/tests/queries/0_stateless/02841_not_ready_set_bug.sh index 556e2f52de2..d5a2d034014 100755 --- a/tests/queries/0_stateless/02841_not_ready_set_bug.sh +++ b/tests/queries/0_stateless/02841_not_ready_set_bug.sh @@ -13,7 +13,7 @@ $CLICKHOUSE_CLIENT -q "SELECT * FROM system.tables WHERE 1 in (SELECT number fro $CLICKHOUSE_CLIENT -q "SELECT xor(1, 0) FROM system.parts WHERE 1 IN (SELECT 1) FORMAT Null" # (Not all of these tests are effective because some of these tables are empty.) -$CLICKHOUSE_CLIENT -nq " +$CLICKHOUSE_CLIENT -q " select * from system.columns where table in (select '123'); select * from system.replicas where database in (select '123'); select * from system.data_skipping_indices where database in (select '123'); @@ -23,7 +23,7 @@ $CLICKHOUSE_CLIENT -nq " select * from system.replication_queue where database in (select '123'); select * from system.distribution_queue where database in (select '123'); " -$CLICKHOUSE_CLIENT -nq " +$CLICKHOUSE_CLIENT -q " create table a (x Int8) engine MergeTree order by x; insert into a values (1); select * from mergeTreeIndex(currentDatabase(), 'a') where part_name in (select '123'); diff --git a/tests/queries/0_stateless/02871_peak_threads_usage.sh b/tests/queries/0_stateless/02871_peak_threads_usage.sh index dfb3e665020..0f0473bbb47 100755 --- a/tests/queries/0_stateless/02871_peak_threads_usage.sh +++ b/tests/queries/0_stateless/02871_peak_threads_usage.sh @@ -26,7 +26,7 @@ ${CLICKHOUSE_CLIENT} --query_id="${UNIQUE_QUERY_ID}_6" --query='SELECT * FROM nu ${CLICKHOUSE_CLIENT} --query_id="${UNIQUE_QUERY_ID}_7" --query='SELECT * FROM numbers_mt(5000), numbers(5000) SETTINGS max_threads = 1, joined_subquery_requires_alias=0' "${QUERY_OPTIONS[@]}" ${CLICKHOUSE_CLIENT} --query_id="${UNIQUE_QUERY_ID}_8" --query='SELECT * FROM numbers_mt(5000), numbers(5000) SETTINGS max_threads = 4, joined_subquery_requires_alias=0' "${QUERY_OPTIONS[@]}" -${CLICKHOUSE_CLIENT} --query_id="${UNIQUE_QUERY_ID}_9" -mn --query=""" +${CLICKHOUSE_CLIENT} --query_id="${UNIQUE_QUERY_ID}_9" -m --query=""" SELECT count() FROM (SELECT number FROM numbers_mt(1,100000) UNION ALL SELECT number FROM numbers_mt(10000, 200000) @@ -38,7 +38,7 @@ SELECT count() FROM UNION ALL SELECT number FROM numbers_mt(300000, 4000000) ) SETTINGS max_threads = 1""" "${QUERY_OPTIONS[@]}" -${CLICKHOUSE_CLIENT} --query_id="${UNIQUE_QUERY_ID}_10" -mn --query=""" +${CLICKHOUSE_CLIENT} --query_id="${UNIQUE_QUERY_ID}_10" -m --query=""" SELECT count() FROM (SELECT number FROM numbers_mt(1,100000) UNION ALL SELECT number FROM numbers_mt(10000, 2000) @@ -50,7 +50,7 @@ SELECT count() FROM UNION ALL SELECT number FROM numbers_mt(300000, 4000000) ) SETTINGS max_threads = 4""" "${QUERY_OPTIONS[@]}" -${CLICKHOUSE_CLIENT} --query_id="${UNIQUE_QUERY_ID}_11" -mn --query=""" +${CLICKHOUSE_CLIENT} --query_id="${UNIQUE_QUERY_ID}_11" -m --query=""" SELECT count() FROM (SELECT number FROM numbers_mt(1,100000) UNION ALL SELECT number FROM numbers_mt(1, 1) @@ -62,20 +62,20 @@ SELECT count() FROM UNION ALL SELECT number FROM numbers_mt(1, 4000000) ) SETTINGS max_threads = 4""" "${QUERY_OPTIONS[@]}" -${CLICKHOUSE_CLIENT} --query_id="${UNIQUE_QUERY_ID}_12" -mn --query=""" +${CLICKHOUSE_CLIENT} --query_id="${UNIQUE_QUERY_ID}_12" -m --query=""" SELECT sum(number) FROM numbers_mt(100000) GROUP BY number % 2 WITH TOTALS ORDER BY number % 2 SETTINGS max_threads = 4""" "${QUERY_OPTIONS[@]}" -${CLICKHOUSE_CLIENT} --query_id="${UNIQUE_QUERY_ID}_13" -mn --query="SELECT * FROM numbers(100000) SETTINGS max_threads = 1" "${QUERY_OPTIONS[@]}" +${CLICKHOUSE_CLIENT} --query_id="${UNIQUE_QUERY_ID}_13" -m --query="SELECT * FROM numbers(100000) SETTINGS max_threads = 1" "${QUERY_OPTIONS[@]}" -${CLICKHOUSE_CLIENT} --query_id="${UNIQUE_QUERY_ID}_14" -mn --query="SELECT * FROM numbers(100000) SETTINGS max_threads = 4" "${QUERY_OPTIONS[@]}" +${CLICKHOUSE_CLIENT} --query_id="${UNIQUE_QUERY_ID}_14" -m --query="SELECT * FROM numbers(100000) SETTINGS max_threads = 4" "${QUERY_OPTIONS[@]}" ${CLICKHOUSE_CLIENT} -q "SYSTEM FLUSH LOGS" for i in {1..14} do - ${CLICKHOUSE_CLIENT} -mn --query=""" + ${CLICKHOUSE_CLIENT} -m --query=""" SELECT '${i}', peak_threads_usage, (select count() from system.query_thread_log WHERE system.query_thread_log.query_id = '${UNIQUE_QUERY_ID}_${i}' AND current_database = currentDatabase()) = length(thread_ids), diff --git a/tests/queries/0_stateless/02911_backup_restore_keeper_map.sh b/tests/queries/0_stateless/02911_backup_restore_keeper_map.sh index c04667505c3..01aba244a02 100755 --- a/tests/queries/0_stateless/02911_backup_restore_keeper_map.sh +++ b/tests/queries/0_stateless/02911_backup_restore_keeper_map.sh @@ -5,7 +5,7 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . "$CUR_DIR"/../shell_config.sh database_name="$CLICKHOUSE_DATABASE"_02911_keeper_map -$CLICKHOUSE_CLIENT -nm -q " +$CLICKHOUSE_CLIENT -m -q " DROP DATABASE IF EXISTS $database_name; CREATE DATABASE $database_name; CREATE TABLE $database_name.02911_backup_restore_keeper_map1 (key UInt64, value String) Engine=KeeperMap('/' || currentDatabase() || '/test02911') PRIMARY KEY key; @@ -13,9 +13,9 @@ $CLICKHOUSE_CLIENT -nm -q " CREATE TABLE $database_name.02911_backup_restore_keeper_map3 (key UInt64, value String) Engine=KeeperMap('/' || currentDatabase() || '/test02911_different') PRIMARY KEY key; " -$CLICKHOUSE_CLIENT -nm -q "INSERT INTO $database_name.02911_backup_restore_keeper_map2 SELECT number, 'test' || toString(number) FROM system.numbers LIMIT 5000;" +$CLICKHOUSE_CLIENT -m -q "INSERT INTO $database_name.02911_backup_restore_keeper_map2 SELECT number, 'test' || toString(number) FROM system.numbers LIMIT 5000;" -$CLICKHOUSE_CLIENT -nm -q "INSERT INTO $database_name.02911_backup_restore_keeper_map3 SELECT number, 'test' || toString(number) FROM system.numbers LIMIT 3000;" +$CLICKHOUSE_CLIENT -m -q "INSERT INTO $database_name.02911_backup_restore_keeper_map3 SELECT number, 'test' || toString(number) FROM system.numbers LIMIT 3000;" backup_path="$database_name" for i in $(seq 1 3); do diff --git a/tests/queries/0_stateless/02968_file_log_multiple_read.sh b/tests/queries/0_stateless/02968_file_log_multiple_read.sh index d9bae05270a..0879bf02d60 100755 --- a/tests/queries/0_stateless/02968_file_log_multiple_read.sh +++ b/tests/queries/0_stateless/02968_file_log_multiple_read.sh @@ -15,7 +15,7 @@ do echo $i >> ${logs_dir}/a.txt done -${CLICKHOUSE_CLIENT} -n --query=" +${CLICKHOUSE_CLIENT} --query=" DROP TABLE IF EXISTS file_log; DROP TABLE IF EXISTS table_to_store_data; DROP TABLE IF EXISTS file_log_mv; @@ -69,7 +69,7 @@ done ${CLICKHOUSE_CLIENT} --query "SELECT * FROM table_to_store_data ORDER BY id;" -${CLICKHOUSE_CLIENT} -n --query=" +${CLICKHOUSE_CLIENT} --query=" DROP TABLE file_log; DROP TABLE table_to_store_data; DROP TABLE file_log_mv; diff --git a/tests/queries/0_stateless/03002_part_log_rmt_fetch_merge_error.sh b/tests/queries/0_stateless/03002_part_log_rmt_fetch_merge_error.sh index e58c542b8ac..185e46a2eac 100755 --- a/tests/queries/0_stateless/03002_part_log_rmt_fetch_merge_error.sh +++ b/tests/queries/0_stateless/03002_part_log_rmt_fetch_merge_error.sh @@ -11,13 +11,13 @@ set -e function wait_until() { local q=$1 && shift - while [ "$($CLICKHOUSE_CLIENT -nm -q "$q")" != "1" ]; do + while [ "$($CLICKHOUSE_CLIENT -m -q "$q")" != "1" ]; do # too frequent FLUSH LOGS is too costly sleep 2 done } -$CLICKHOUSE_CLIENT -nm -q " +$CLICKHOUSE_CLIENT -m -q " drop table if exists rmt_master; drop table if exists rmt_slave; @@ -33,7 +33,7 @@ $CLICKHOUSE_CLIENT -nm -q " optimize table rmt_master final settings alter_sync=1, optimize_throw_if_noop=1; " -$CLICKHOUSE_CLIENT -nm -q " +$CLICKHOUSE_CLIENT -m -q " system flush logs; select 'before'; select table, event_type, error>0, countIf(error=0) from system.part_log where database = currentDatabase() group by 1, 2, 3 order by 1, 2, 3; @@ -42,7 +42,7 @@ $CLICKHOUSE_CLIENT -nm -q " " # wait until rmt_slave will fetch the part and reflect this error in system.part_log wait_until "system flush logs; select count()>0 from system.part_log where table = 'rmt_slave' and database = '$CLICKHOUSE_DATABASE' and error > 0" -$CLICKHOUSE_CLIENT -nm -q " +$CLICKHOUSE_CLIENT -m -q " system sync replica rmt_slave; system flush logs; diff --git a/tests/queries/0_stateless/03002_part_log_rmt_fetch_mutate_error.sh b/tests/queries/0_stateless/03002_part_log_rmt_fetch_mutate_error.sh index cc8f53aafb9..e731d51e7e3 100755 --- a/tests/queries/0_stateless/03002_part_log_rmt_fetch_mutate_error.sh +++ b/tests/queries/0_stateless/03002_part_log_rmt_fetch_mutate_error.sh @@ -14,13 +14,13 @@ set -e function wait_until() { local q=$1 && shift - while [ "$($CLICKHOUSE_CLIENT -nm -q "$q")" != "1" ]; do + while [ "$($CLICKHOUSE_CLIENT -m -q "$q")" != "1" ]; do # too frequent FLUSH LOGS is too costly sleep 2 done } -$CLICKHOUSE_CLIENT -nm -q " +$CLICKHOUSE_CLIENT -m -q " drop table if exists rmt_master; drop table if exists rmt_slave; @@ -41,10 +41,10 @@ $CLICKHOUSE_CLIENT -nm -q " # the part, and rmt_slave will consider it instead of performing mutation on # it's own, otherwise prefer_fetch_merged_part_*_threshold will be simply ignored wait_for_mutation rmt_master 0000000000 -$CLICKHOUSE_CLIENT -nm -q "system start pulling replication log rmt_slave" +$CLICKHOUSE_CLIENT -m -q "system start pulling replication log rmt_slave" # and wait until rmt_slave to fetch the part and reflect this error in system.part_log wait_until "system flush logs; select count()>0 from system.part_log where table = 'rmt_slave' and database = '$CLICKHOUSE_DATABASE' and error > 0" -$CLICKHOUSE_CLIENT -nm -q " +$CLICKHOUSE_CLIENT -m -q " system flush logs; select 'before'; select table, event_type, error>0, countIf(error=0) from system.part_log where database = currentDatabase() group by 1, 2, 3 order by 1, 2, 3; @@ -52,7 +52,7 @@ $CLICKHOUSE_CLIENT -nm -q " system start replicated sends rmt_master; " wait_for_mutation rmt_slave 0000000000 -$CLICKHOUSE_CLIENT -nm -q " +$CLICKHOUSE_CLIENT -m -q " system sync replica rmt_slave; system flush logs; diff --git a/tests/queries/0_stateless/03164_selects_with_pk_usage_profile_event.sh b/tests/queries/0_stateless/03164_selects_with_pk_usage_profile_event.sh index 29d4c877909..75efc3f057a 100755 --- a/tests/queries/0_stateless/03164_selects_with_pk_usage_profile_event.sh +++ b/tests/queries/0_stateless/03164_selects_with_pk_usage_profile_event.sh @@ -33,7 +33,7 @@ $CLICKHOUSE_CLIENT -q " query_id="$(random_str 10)" $CLICKHOUSE_CLIENT --query_id "$query_id" -q " SELECT count(*) FROM table_$table_id FORMAT Null;" -$CLICKHOUSE_CLIENT -mn -q " +$CLICKHOUSE_CLIENT -m -q " SYSTEM FLUSH LOGS; SELECT ProfileEvents['SelectQueriesWithPrimaryKeyUsage'] AS selects_with_pk_usage @@ -50,7 +50,7 @@ $CLICKHOUSE_CLIENT -mn -q " query_id="$(random_str 10)" $CLICKHOUSE_CLIENT --query_id "$query_id" -q " SELECT count(*) FROM table_$table_id WHERE col2 >= 50000 FORMAT Null;" -$CLICKHOUSE_CLIENT -mn -q " +$CLICKHOUSE_CLIENT -m -q " SYSTEM FLUSH LOGS; SELECT ProfileEvents['SelectQueriesWithPrimaryKeyUsage'] AS selects_with_pk_usage @@ -67,7 +67,7 @@ $CLICKHOUSE_CLIENT -mn -q " query_id="$(random_str 10)" $CLICKHOUSE_CLIENT --query_id "$query_id" -q " SELECT count(*) FROM table_$table_id WHERE pk >= 50000 FORMAT Null;" -$CLICKHOUSE_CLIENT -mn -q " +$CLICKHOUSE_CLIENT -m -q " SYSTEM FLUSH LOGS; SELECT ProfileEvents['SelectQueriesWithPrimaryKeyUsage'] AS selects_with_pk_usage @@ -84,7 +84,7 @@ $CLICKHOUSE_CLIENT -mn -q " query_id="$(random_str 10)" $CLICKHOUSE_CLIENT --query_id "$query_id" -q " SELECT count(*) FROM table_$table_id WHERE col1 >= 50000 FORMAT Null;" -$CLICKHOUSE_CLIENT -mn -q " +$CLICKHOUSE_CLIENT -m -q " SYSTEM FLUSH LOGS; SELECT ProfileEvents['SelectQueriesWithPrimaryKeyUsage'] AS selects_with_pk_usage diff --git a/tests/queries/0_stateless/03172_system_detached_tables.sh b/tests/queries/0_stateless/03172_system_detached_tables.sh index 47775abcc45..60e913b62a8 100755 --- a/tests/queries/0_stateless/03172_system_detached_tables.sh +++ b/tests/queries/0_stateless/03172_system_detached_tables.sh @@ -7,7 +7,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) DATABASE_ATOMIC="${CLICKHOUSE_DATABASE}_atomic" DATABASE_LAZY="${CLICKHOUSE_DATABASE}_lazy" -$CLICKHOUSE_CLIENT --multiquery " +$CLICKHOUSE_CLIENT " SELECT 'database atomic tests'; DROP DATABASE IF EXISTS ${DATABASE_ATOMIC}; @@ -36,7 +36,7 @@ DROP DATABASE ${DATABASE_ATOMIC} SYNC; " -$CLICKHOUSE_CLIENT --multiquery " +$CLICKHOUSE_CLIENT " SELECT '-----------------------'; SELECT 'database lazy tests'; diff --git a/tests/queries/0_stateless/03173_parallel_replicas_join_bug.sh b/tests/queries/0_stateless/03173_parallel_replicas_join_bug.sh index af702569794..d2be9899f86 100755 --- a/tests/queries/0_stateless/03173_parallel_replicas_join_bug.sh +++ b/tests/queries/0_stateless/03173_parallel_replicas_join_bug.sh @@ -16,7 +16,7 @@ $CLICKHOUSE_CLIENT -q " INSERT INTO data2 VALUES ('a1451105-722e-4fe7-bfaa-65ad2ae249c2', '2000-01-02', 'CREATED'); " -$CLICKHOUSE_CLIENT -nq " +$CLICKHOUSE_CLIENT -q " SET enable_analyzer = 1, cluster_for_parallel_replicas = 'parallel_replicas', max_parallel_replicas = 10, allow_experimental_parallel_reading_from_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, max_threads = 1; SELECT From 539d04c90f30efa0ef6435373ec8ffc4777aee78 Mon Sep 17 00:00:00 2001 From: avogar Date: Tue, 20 Aug 2024 20:00:23 +0000 Subject: [PATCH 015/114] Disable min_bytes_to_use_direct_io in some tests with Dynamic/JSON subcolumns because it's broken --- .../03036_dynamic_read_shared_subcolumns_wide_merge_tree.sql | 2 ++ .../03036_dynamic_read_subcolumns_wide_merge_tree.sql | 2 ++ .../03207_json_read_subcolumns_2_wide_merge_tree.sql.j2 | 2 ++ ...03208_array_of_json_read_subcolumns_2_wide_merge_tree.sql.j2 | 2 ++ 4 files changed, 8 insertions(+) diff --git a/tests/queries/0_stateless/03036_dynamic_read_shared_subcolumns_wide_merge_tree.sql b/tests/queries/0_stateless/03036_dynamic_read_shared_subcolumns_wide_merge_tree.sql index 61dc8fca01a..9e6e0652127 100644 --- a/tests/queries/0_stateless/03036_dynamic_read_shared_subcolumns_wide_merge_tree.sql +++ b/tests/queries/0_stateless/03036_dynamic_read_shared_subcolumns_wide_merge_tree.sql @@ -4,6 +4,8 @@ set allow_experimental_variant_type = 1; set use_variant_as_common_type = 1; set allow_experimental_dynamic_type = 1; +set min_bytes_to_use_direct_io = 0; -- min_bytes_to_use_direct_io > 0 is broken + drop table if exists test; create table test (id UInt64, d Dynamic(max_types=2)) engine=MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1; diff --git a/tests/queries/0_stateless/03036_dynamic_read_subcolumns_wide_merge_tree.sql b/tests/queries/0_stateless/03036_dynamic_read_subcolumns_wide_merge_tree.sql index 5aac5f7b72f..44ceac1e5ad 100644 --- a/tests/queries/0_stateless/03036_dynamic_read_subcolumns_wide_merge_tree.sql +++ b/tests/queries/0_stateless/03036_dynamic_read_subcolumns_wide_merge_tree.sql @@ -4,6 +4,8 @@ set allow_experimental_variant_type = 1; set use_variant_as_common_type = 1; set allow_experimental_dynamic_type = 1; +set min_bytes_to_use_direct_io = 0; -- min_bytes_to_use_direct_io > 0 is broken + drop table if exists test; create table test (id UInt64, d Dynamic) engine=MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1; diff --git a/tests/queries/0_stateless/03207_json_read_subcolumns_2_wide_merge_tree.sql.j2 b/tests/queries/0_stateless/03207_json_read_subcolumns_2_wide_merge_tree.sql.j2 index ab4e0437c15..b31e57753c0 100644 --- a/tests/queries/0_stateless/03207_json_read_subcolumns_2_wide_merge_tree.sql.j2 +++ b/tests/queries/0_stateless/03207_json_read_subcolumns_2_wide_merge_tree.sql.j2 @@ -5,6 +5,8 @@ set allow_experimental_variant_type = 1; set use_variant_as_common_type = 1; set session_timezone = 'UTC'; +set min_bytes_to_use_direct_io = 0; -- min_bytes_to_use_direct_io > 0 is broken + drop table if exists test; create table test (id UInt64, json JSON(max_dynamic_paths=2, a.b.c UInt32)) engine=MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1; diff --git a/tests/queries/0_stateless/03208_array_of_json_read_subcolumns_2_wide_merge_tree.sql.j2 b/tests/queries/0_stateless/03208_array_of_json_read_subcolumns_2_wide_merge_tree.sql.j2 index 3010fa0e2de..c0f34a8ea61 100644 --- a/tests/queries/0_stateless/03208_array_of_json_read_subcolumns_2_wide_merge_tree.sql.j2 +++ b/tests/queries/0_stateless/03208_array_of_json_read_subcolumns_2_wide_merge_tree.sql.j2 @@ -4,6 +4,8 @@ set allow_experimental_json_type = 1; set allow_experimental_variant_type = 1; set use_variant_as_common_type = 1; +set min_bytes_to_use_direct_io = 0; -- min_bytes_to_use_direct_io > 0 is broken + create table test (id UInt64, json JSON(max_dynamic_paths=8, a.b Array(JSON))) engine=MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1; insert into test select number, '{}' from numbers(10000); From 1626589bb35eaca62804eabc5f64d5d55c998edf Mon Sep 17 00:00:00 2001 From: Dergousov Date: Wed, 21 Aug 2024 00:48:23 +0300 Subject: [PATCH 016/114] feat: add ripeMD160 support --- src/Common/RipeMD160Hash.h | 186 +++++++++++++++++++++++++ src/Functions/FunctionsHashing.h | 16 +++ src/Functions/FunctionsHashingRipe.cpp | 21 +++ 3 files changed, 223 insertions(+) create mode 100644 src/Common/RipeMD160Hash.h create mode 100644 src/Functions/FunctionsHashingRipe.cpp diff --git a/src/Common/RipeMD160Hash.h b/src/Common/RipeMD160Hash.h new file mode 100644 index 00000000000..0ff97dc13b2 --- /dev/null +++ b/src/Common/RipeMD160Hash.h @@ -0,0 +1,186 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + + +class RIPEMD160 +{ +private: + UInt8 digest_bytes[20]; + + static constexpr UInt32 initial_digest[5] = {0x67452301UL, 0xefcdab89UL, 0x98badcfeUL, 0x10325476UL, 0xc3d2e1f0UL}; + + static constexpr UInt8 rho[16] = {0x7, 0x4, 0xd, 0x1, 0xa, 0x6, 0xf, 0x3, 0xc, 0x0, 0x9, 0x5, 0x2, 0xe, 0xb, 0x8}; + + static constexpr UInt8 shifts[80] + = {11, 14, 15, 12, 5, 8, 7, 9, 11, 13, 14, 15, 6, 7, 9, 8, 12, 13, 11, 15, 6, 9, 9, 7, 12, 15, 11, + 13, 7, 8, 7, 7, 13, 15, 14, 11, 7, 7, 6, 8, 13, 14, 13, 12, 5, 5, 6, 9, 14, 11, 12, 14, 8, 6, + 5, 5, 15, 12, 15, 14, 9, 9, 8, 6, 15, 12, 13, 13, 9, 5, 8, 6, 14, 11, 12, 11, 8, 6, 5, 5}; + + static constexpr UInt32 constants_left[5] = {0x00000000UL, 0x5a827999UL, 0x6ed9eba1UL, 0x8f1bbcdcUL, 0xa953fd4eUL}; + + + static ALWAYS_INLINE UInt32 make_le32(UInt32 x) noexcept + { + if constexpr (std::endian::native == std::endian::little) + { + return x; + } + else + { + return __builtin_bswap32(x); + } + } + + static constexpr UInt32 constants_right[5] = {0x50a28be6UL, 0x5c4dd124UL, 0x6d703ef3UL, 0x7a6d76e9UL, 0x00000000UL}; + + static constexpr UInt8 fns_left[5] = {1, 2, 3, 4, 5}; + static constexpr UInt8 fns_right[5] = {5, 4, 3, 2, 1}; + + static ALWAYS_INLINE UInt32 rol(UInt32 x, UInt32 n) noexcept { return (x << n) | (x >> (32 - n)); } + + static ALWAYS_INLINE UInt32 F_1(UInt32 a, UInt32 b, UInt32 c) noexcept { return (a ^ b ^ c); } + + static ALWAYS_INLINE UInt32 F_2(UInt32 a, UInt32 b, UInt32 c) noexcept { return ((a & b) | (~a & c)); } + + static ALWAYS_INLINE UInt32 F_3(UInt32 a, UInt32 b, UInt32 c) noexcept { return ((a | ~b) ^ c); } + + static ALWAYS_INLINE UInt32 F_4(UInt32 a, UInt32 b, UInt32 c) noexcept { return ((a & c) | (b & ~c)); } + + static ALWAYS_INLINE UInt32 F_5(UInt32 a, UInt32 b, UInt32 c) noexcept { return (a ^ (b | ~c)); } + + using FuncPtr = UInt32 (*)(UInt32, UInt32, UInt32); + + static constexpr FuncPtr funcs[5] = {F_1, F_2, F_3, F_4, F_5}; + + static ALWAYS_INLINE FuncPtr getFunction(UInt8 func) noexcept { return funcs[func - 1]; } + + void compute_line( + UInt32 * digest, + UInt32 * words, + const UInt32 * chunk, + UInt8 * index, + const UInt8 * sh, + const UInt32 * ks, + const UInt8 * fns) noexcept + { + std::memcpy(words, digest, 5 * sizeof(UInt32)); + for (UInt8 round = 0; round < 5; ++round) + { + UInt32 k = ks[round]; + UInt8 fn = fns[round]; + for (UInt8 j = 0; j < 16; ++j) + { + UInt32 tmp = getFunction(fn)(words[1], words[2], words[3]); + tmp += words[0] + le32toh(chunk[index[j]]) + k; + tmp = rol(tmp, sh[index[j]]) + words[4]; + words[0] = words[4]; + words[4] = words[3]; + words[3] = rol(words[2], 10); + words[2] = words[1]; + words[1] = tmp; + } + sh += 16; + UInt8 index_tmp[16]; + for (size_t i = 0; i < 16; ++i) + index_tmp[i] = rho[index[i]]; + std::memcpy(index, index_tmp, 16); + } + } + + /// Update the digest with the given chunk of data + void update(UInt32 * digest, const UInt32 * chunk) noexcept + { + UInt8 index[16]; + for (UInt8 i = 0; i < 16; ++i) + index[i] = i; + + UInt32 words_left[5]; + compute_line(digest, words_left, chunk, index, shifts, constants_left, fns_left); + + static constexpr UInt8 rho_index[16] = {5, 14, 7, 0, 9, 2, 11, 4, 13, 6, 15, 8, 1, 10, 3, 12}; + std::memcpy(index, rho_index, 16); + + UInt32 words_right[5]; + compute_line(digest, words_right, chunk, index, shifts, constants_right, fns_right); + + digest[0] += words_left[1] + words_right[2]; + digest[1] += words_left[2] + words_right[3]; + digest[2] += words_left[3] + words_right[4]; + digest[3] += words_left[4] + words_right[0]; + digest[4] += words_left[0] + words_right[1]; + + std::rotate(digest, digest + 1, digest + 5); + } + +public: + void hash(const UInt8 * data, size_t data_len) noexcept + { + UInt32 digest[5]; + for (size_t i = 0; i < 5; ++i) + digest[i] = make_le32(initial_digest[i]); + + const UInt8 * last_chunk_start = data + (data_len & (~0x3f)); + while (data < last_chunk_start) + { + update(digest, reinterpret_cast(data)); + data += 0x40; + } + + UInt8 last_chunk[0x40] = {}; + UInt8 leftover_size = data_len & 0x3f; + std::memcpy(last_chunk, data, leftover_size); + + last_chunk[leftover_size] = 0x80; + + if (leftover_size >= 0x38) + { + update(digest, reinterpret_cast(last_chunk)); + std::memset(last_chunk, 0, 0x38); + } + + UInt32 data_len_bits = static_cast(data_len << 3); + std::memcpy(&last_chunk[0x38], &data_len_bits, sizeof(data_len_bits)); + data_len_bits = static_cast(data_len >> 29); + std::memcpy(&last_chunk[0x3c], &data_len_bits, sizeof(data_len_bits)); + + update(digest, reinterpret_cast(last_chunk)); + + for (size_t i = 0; i < 5; ++i) + { + UInt32 digest_part = make_le32(digest[i]); + std::memcpy(digest_bytes + i * 4, &digest_part, 4); + } + } + + const UInt8 * get_digest_bytes() const noexcept { return digest_bytes; } +}; + + +inline UInt256 ripeMD160Hash(const char * data, const size_t size) noexcept +{ + RIPEMD160 ripe; + ripe.hash(reinterpret_cast(data), size); + + UInt8 digest[20]; + std::memcpy(digest, ripe.get_digest_bytes(), sizeof(digest)); + + std::reverse(digest, digest + sizeof(digest)); + + UInt256 res = 0; + std::memcpy(&res, digest, sizeof(digest)); + + return res; +} diff --git a/src/Functions/FunctionsHashing.h b/src/Functions/FunctionsHashing.h index 95c54ac9528..8829e7c0479 100644 --- a/src/Functions/FunctionsHashing.h +++ b/src/Functions/FunctionsHashing.h @@ -14,6 +14,7 @@ #include #include +#include #include #include #include @@ -190,6 +191,19 @@ T combineHashesFunc(T t1, T t2) return HashFunction::apply(reinterpret_cast(hashes), sizeof(hashes)); } +struct RipeMD160Impl +{ + static constexpr auto name = "ripeMD160"; + + using ReturnType = UInt256; + + static UInt256 apply(const char * begin, size_t size) { return ripeMD160Hash(begin, size); } + + static UInt256 combineHashes(UInt256 h1, UInt256 h2) { return combineHashesFunc(h1, h2); } + + static constexpr bool use_int_hash_for_pods = false; +}; + struct SipHash64Impl { @@ -1646,6 +1660,8 @@ using FunctionXxHash64 = FunctionAnyHash; using FunctionXXH3 = FunctionAnyHash; using FunctionWyHash64 = FunctionAnyHash; + +using FunctionRipeMD160Hash = FunctionAnyHash; } #pragma clang diagnostic pop diff --git a/src/Functions/FunctionsHashingRipe.cpp b/src/Functions/FunctionsHashingRipe.cpp new file mode 100644 index 00000000000..5b06b8ab924 --- /dev/null +++ b/src/Functions/FunctionsHashingRipe.cpp @@ -0,0 +1,21 @@ +#include "FunctionsHashing.h" + +#include + +/// FunctionsHashing instantiations are separated into files FunctionsHashing*.cpp +/// to better parallelize the build procedure and avoid MSan build failure +/// due to excessive resource consumption. +namespace DB +{ +REGISTER_FUNCTION(HashingRipe) +{ + factory.registerFunction(FunctionDocumentation{ + .description = "RIPEMD-160 hash function, primarily used in Bitcoin address generation.", + .examples{{"", "SELECT hex(ripeMD160('The quick brown fox jumps over the lazy dog'));", R"( + ┌─hex(ripeMD160('The quick brown fox jumps over the lazy dog'))─┐ + │ 37F332F68DB77BD9D7EDD4969571AD671CF9DD3B │ + └───────────────────────────────────────────────────────────────┘ + )"}}, + .categories{"Hash"}}); +} +} From 5740df58b90f4b2f5532bd6b5ab5dc77a838a0a7 Mon Sep 17 00:00:00 2001 From: Dergousov Date: Wed, 21 Aug 2024 01:17:40 +0300 Subject: [PATCH 017/114] feat: add test --- tests/queries/0_stateless/03222_ripeMD160.reference | 5 +++++ tests/queries/0_stateless/03222_ripeMD160.sql | 11 +++++++++++ 2 files changed, 16 insertions(+) create mode 100644 tests/queries/0_stateless/03222_ripeMD160.reference create mode 100644 tests/queries/0_stateless/03222_ripeMD160.sql diff --git a/tests/queries/0_stateless/03222_ripeMD160.reference b/tests/queries/0_stateless/03222_ripeMD160.reference new file mode 100644 index 00000000000..f0db55dc8e1 --- /dev/null +++ b/tests/queries/0_stateless/03222_ripeMD160.reference @@ -0,0 +1,5 @@ +37F332F68DB77BD9D7EDD4969571AD671CF9DD3B +132072DF690933835EB8B6AD0B77E7B6F14ACAD7 +9C1185A5C5E9FC54612808977EE8F548B2258D31 +13920F39C93D503A0AC02EAB9AA8F672BC523ADA +3FEDF0C212CCFA54C0EBA676C8A8A2A10BC218BE diff --git a/tests/queries/0_stateless/03222_ripeMD160.sql b/tests/queries/0_stateless/03222_ripeMD160.sql new file mode 100644 index 00000000000..592f9f830dd --- /dev/null +++ b/tests/queries/0_stateless/03222_ripeMD160.sql @@ -0,0 +1,11 @@ +-- Ouput can be verified using: https://emn178.github.io/online-tools/ripemd-160/ + +SELECT hex(ripeMD160('The quick brown fox jumps over the lazy dog')); + +SELECT hex(ripeMD160('The quick brown fox jumps over the lazy cog')); + +SELECT hex(ripeMD160('')); + +SELECT hex(ripeMD160('CheREpaha1512')); + +SELECT hex(ripeMD160('A-very-long-string-that-should-be-hashed-using-ripeMD160')); From bb2b6600961e432d115c08964d65ade23740861a Mon Sep 17 00:00:00 2001 From: Dergousov Date: Wed, 21 Aug 2024 02:11:08 +0300 Subject: [PATCH 018/114] fix: cosmetic --- src/Common/RipeMD160Hash.h | 151 +++++++++++++++++++------------------ 1 file changed, 78 insertions(+), 73 deletions(-) diff --git a/src/Common/RipeMD160Hash.h b/src/Common/RipeMD160Hash.h index 0ff97dc13b2..a759553278c 100644 --- a/src/Common/RipeMD160Hash.h +++ b/src/Common/RipeMD160Hash.h @@ -14,42 +14,33 @@ #include +/// https://homes.esat.kuleuven.be/~bosselae/ripemd160/pdf/AB-9601/AB-9601.pdf +/// https://en.wikipedia.org/wiki/RIPEMD -class RIPEMD160 +class RipeMD160 { private: + using FuncPtr = UInt32 (*)(UInt32, UInt32, UInt32); + + /// Stores the final 20-byte (160-bit) hash result UInt8 digest_bytes[20]; - static constexpr UInt32 initial_digest[5] = {0x67452301UL, 0xefcdab89UL, 0x98badcfeUL, 0x10325476UL, 0xc3d2e1f0UL}; + static constexpr UInt32 initial_hash_values[5] = {0x67452301UL, 0xEFCDAB89UL, 0x98BADCFEUL, 0x10325476UL, 0xC3D2E1F0UL}; - static constexpr UInt8 rho[16] = {0x7, 0x4, 0xd, 0x1, 0xa, 0x6, 0xf, 0x3, 0xc, 0x0, 0x9, 0x5, 0x2, 0xe, 0xb, 0x8}; + static constexpr UInt8 rho_order[16] = {0x7, 0x4, 0xD, 0x1, 0xA, 0x6, 0xF, 0x3, 0xC, 0x0, 0x9, 0x5, 0x2, 0xE, 0xB, 0x8}; - static constexpr UInt8 shifts[80] + static constexpr UInt8 shift_amounts[80] = {11, 14, 15, 12, 5, 8, 7, 9, 11, 13, 14, 15, 6, 7, 9, 8, 12, 13, 11, 15, 6, 9, 9, 7, 12, 15, 11, 13, 7, 8, 7, 7, 13, 15, 14, 11, 7, 7, 6, 8, 13, 14, 13, 12, 5, 5, 6, 9, 14, 11, 12, 14, 8, 6, 5, 5, 15, 12, 15, 14, 9, 9, 8, 6, 15, 12, 13, 13, 9, 5, 8, 6, 14, 11, 12, 11, 8, 6, 5, 5}; - static constexpr UInt32 constants_left[5] = {0x00000000UL, 0x5a827999UL, 0x6ed9eba1UL, 0x8f1bbcdcUL, 0xa953fd4eUL}; + static constexpr UInt32 left_round_constants[5] = {0x00000000UL, 0x5A827999UL, 0x6ED9EBA1UL, 0x8F1BBCDCUL, 0xA953FD4EUL}; - - static ALWAYS_INLINE UInt32 make_le32(UInt32 x) noexcept - { - if constexpr (std::endian::native == std::endian::little) - { - return x; - } - else - { - return __builtin_bswap32(x); - } - } + static constexpr UInt32 right_round_constants[5] = {0x50A28BE6UL, 0x5C4DD124UL, 0x6D703EF3UL, 0x7A6D76E9UL, 0x00000000UL}; - static constexpr UInt32 constants_right[5] = {0x50a28be6UL, 0x5c4dd124UL, 0x6d703ef3UL, 0x7a6d76e9UL, 0x00000000UL}; + static constexpr UInt8 left_function_order[5] = {1, 2, 3, 4, 5}; - static constexpr UInt8 fns_left[5] = {1, 2, 3, 4, 5}; - static constexpr UInt8 fns_right[5] = {5, 4, 3, 2, 1}; - - static ALWAYS_INLINE UInt32 rol(UInt32 x, UInt32 n) noexcept { return (x << n) | (x >> (32 - n)); } + static constexpr UInt8 right_function_order[5] = {5, 4, 3, 2, 1}; static ALWAYS_INLINE UInt32 F_1(UInt32 a, UInt32 b, UInt32 c) noexcept { return (a ^ b ^ c); } @@ -61,68 +52,82 @@ private: static ALWAYS_INLINE UInt32 F_5(UInt32 a, UInt32 b, UInt32 c) noexcept { return (a ^ (b | ~c)); } - using FuncPtr = UInt32 (*)(UInt32, UInt32, UInt32); + static constexpr FuncPtr hash_functions[5] = {F_1, F_2, F_3, F_4, F_5}; - static constexpr FuncPtr funcs[5] = {F_1, F_2, F_3, F_4, F_5}; + static ALWAYS_INLINE FuncPtr get_function(UInt8 function_id) noexcept { return hash_functions[function_id - 1]; } - static ALWAYS_INLINE FuncPtr getFunction(UInt8 func) noexcept { return funcs[func - 1]; } - - void compute_line( - UInt32 * digest, - UInt32 * words, - const UInt32 * chunk, - UInt8 * index, - const UInt8 * sh, - const UInt32 * ks, - const UInt8 * fns) noexcept + static ALWAYS_INLINE UInt32 convert_to_little_endian(UInt32 x) noexcept { - std::memcpy(words, digest, 5 * sizeof(UInt32)); + if constexpr (std::endian::native == std::endian::little) + { + return x; + } + else + { + return __builtin_bswap32(x); + } + } + + static ALWAYS_INLINE UInt32 rotate_left(UInt32 value, UInt32 shift) noexcept { return (value << shift) | (value >> (32 - shift)); } + + /// Performs one full pass (5 rounds) of RIPEMD-160 algorithm for one path (left or right) + void process_rounds( + UInt32 * current_digest, + UInt32 * temp_words, + const UInt32 * data_chunk, + UInt8 * index_order, + const UInt8 * shift_values, + const UInt32 * round_constants, + const UInt8 * function_order) noexcept + { + std::memcpy(temp_words, current_digest, 5 * sizeof(UInt32)); for (UInt8 round = 0; round < 5; ++round) { - UInt32 k = ks[round]; - UInt8 fn = fns[round]; + UInt32 k = round_constants[round]; + UInt8 fn = function_order[round]; for (UInt8 j = 0; j < 16; ++j) { - UInt32 tmp = getFunction(fn)(words[1], words[2], words[3]); - tmp += words[0] + le32toh(chunk[index[j]]) + k; - tmp = rol(tmp, sh[index[j]]) + words[4]; - words[0] = words[4]; - words[4] = words[3]; - words[3] = rol(words[2], 10); - words[2] = words[1]; - words[1] = tmp; + UInt32 temp_result = get_function(fn)(temp_words[1], temp_words[2], temp_words[3]); + temp_result += temp_words[0] + convert_to_little_endian(data_chunk[index_order[j]]) + k; + temp_result = rotate_left(temp_result, shift_values[index_order[j]]) + temp_words[4]; + temp_words[0] = temp_words[4]; + temp_words[4] = temp_words[3]; + temp_words[3] = rotate_left(temp_words[2], 10); + temp_words[2] = temp_words[1]; + temp_words[1] = temp_result; } - sh += 16; - UInt8 index_tmp[16]; + shift_values += 16; + UInt8 reordered_index[16]; for (size_t i = 0; i < 16; ++i) - index_tmp[i] = rho[index[i]]; - std::memcpy(index, index_tmp, 16); + reordered_index[i] = rho_order[index_order[i]]; + std::memcpy(index_order, reordered_index, 16); } } /// Update the digest with the given chunk of data - void update(UInt32 * digest, const UInt32 * chunk) noexcept + void update_digest(UInt32 * current_digest, const UInt32 * data_chunk) noexcept { - UInt8 index[16]; + UInt8 index_order[16]; for (UInt8 i = 0; i < 16; ++i) - index[i] = i; + index_order[i] = i; - UInt32 words_left[5]; - compute_line(digest, words_left, chunk, index, shifts, constants_left, fns_left); + UInt32 left_path_words[5]; + process_rounds(current_digest, left_path_words, data_chunk, index_order, shift_amounts, left_round_constants, left_function_order); - static constexpr UInt8 rho_index[16] = {5, 14, 7, 0, 9, 2, 11, 4, 13, 6, 15, 8, 1, 10, 3, 12}; - std::memcpy(index, rho_index, 16); + static constexpr UInt8 rho_reordered_index[16] = {5, 14, 7, 0, 9, 2, 11, 4, 13, 6, 15, 8, 1, 10, 3, 12}; + std::memcpy(index_order, rho_reordered_index, 16); - UInt32 words_right[5]; - compute_line(digest, words_right, chunk, index, shifts, constants_right, fns_right); + UInt32 right_path_words[5]; + process_rounds( + current_digest, right_path_words, data_chunk, index_order, shift_amounts, right_round_constants, right_function_order); - digest[0] += words_left[1] + words_right[2]; - digest[1] += words_left[2] + words_right[3]; - digest[2] += words_left[3] + words_right[4]; - digest[3] += words_left[4] + words_right[0]; - digest[4] += words_left[0] + words_right[1]; + current_digest[0] += left_path_words[1] + right_path_words[2]; + current_digest[1] += left_path_words[2] + right_path_words[3]; + current_digest[2] += left_path_words[3] + right_path_words[4]; + current_digest[3] += left_path_words[4] + right_path_words[0]; + current_digest[4] += left_path_words[0] + right_path_words[1]; - std::rotate(digest, digest + 1, digest + 5); + std::rotate(current_digest, current_digest + 1, current_digest + 5); } public: @@ -130,37 +135,37 @@ public: { UInt32 digest[5]; for (size_t i = 0; i < 5; ++i) - digest[i] = make_le32(initial_digest[i]); + digest[i] = convert_to_little_endian(initial_hash_values[i]); - const UInt8 * last_chunk_start = data + (data_len & (~0x3f)); + const UInt8 * last_chunk_start = data + (data_len & (~0x3F)); while (data < last_chunk_start) { - update(digest, reinterpret_cast(data)); + update_digest(digest, reinterpret_cast(data)); data += 0x40; } UInt8 last_chunk[0x40] = {}; - UInt8 leftover_size = data_len & 0x3f; + UInt8 leftover_size = data_len & 0x3F; std::memcpy(last_chunk, data, leftover_size); last_chunk[leftover_size] = 0x80; if (leftover_size >= 0x38) { - update(digest, reinterpret_cast(last_chunk)); + update_digest(digest, reinterpret_cast(last_chunk)); std::memset(last_chunk, 0, 0x38); } UInt32 data_len_bits = static_cast(data_len << 3); std::memcpy(&last_chunk[0x38], &data_len_bits, sizeof(data_len_bits)); data_len_bits = static_cast(data_len >> 29); - std::memcpy(&last_chunk[0x3c], &data_len_bits, sizeof(data_len_bits)); + std::memcpy(&last_chunk[0x3C], &data_len_bits, sizeof(data_len_bits)); - update(digest, reinterpret_cast(last_chunk)); + update_digest(digest, reinterpret_cast(last_chunk)); for (size_t i = 0; i < 5; ++i) { - UInt32 digest_part = make_le32(digest[i]); + UInt32 digest_part = convert_to_little_endian(digest[i]); std::memcpy(digest_bytes + i * 4, &digest_part, 4); } } @@ -171,7 +176,7 @@ public: inline UInt256 ripeMD160Hash(const char * data, const size_t size) noexcept { - RIPEMD160 ripe; + RipeMD160 ripe; ripe.hash(reinterpret_cast(data), size); UInt8 digest[20]; From a0d29c812c4da7c7e8d15e798e141e7e6be910b9 Mon Sep 17 00:00:00 2001 From: Dergousov Date: Wed, 21 Aug 2024 02:47:23 +0300 Subject: [PATCH 019/114] fix: cosmetic --- src/Common/RipeMD160Hash.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Common/RipeMD160Hash.h b/src/Common/RipeMD160Hash.h index a759553278c..b6488225974 100644 --- a/src/Common/RipeMD160Hash.h +++ b/src/Common/RipeMD160Hash.h @@ -34,7 +34,7 @@ private: 13, 7, 8, 7, 7, 13, 15, 14, 11, 7, 7, 6, 8, 13, 14, 13, 12, 5, 5, 6, 9, 14, 11, 12, 14, 8, 6, 5, 5, 15, 12, 15, 14, 9, 9, 8, 6, 15, 12, 13, 13, 9, 5, 8, 6, 14, 11, 12, 11, 8, 6, 5, 5}; - static constexpr UInt32 left_round_constants[5] = {0x00000000UL, 0x5A827999UL, 0x6ED9EBA1UL, 0x8F1BBCDCUL, 0xA953FD4EUL}; + static constexpr UInt32 left_round_constants[5] = {0x00000000UL, 0x5A827999UL, 0x6ED9EBA1UL, 0x8F1BBCDCUL, 0xA953FD4EUL}; static constexpr UInt32 right_round_constants[5] = {0x50A28BE6UL, 0x5C4DD124UL, 0x6D703EF3UL, 0x7A6D76E9UL, 0x00000000UL}; From e01a448bcc62a7e292766cddc0c817b9e44558d4 Mon Sep 17 00:00:00 2001 From: Zhigao Hong Date: Wed, 21 Aug 2024 15:35:33 +0800 Subject: [PATCH 020/114] Fix invalid characters in replica_name --- src/Storages/MergeTree/registerStorageMergeTree.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/Storages/MergeTree/registerStorageMergeTree.cpp b/src/Storages/MergeTree/registerStorageMergeTree.cpp index 44548e33d46..9a65d590453 100644 --- a/src/Storages/MergeTree/registerStorageMergeTree.cpp +++ b/src/Storages/MergeTree/registerStorageMergeTree.cpp @@ -538,6 +538,9 @@ static StoragePtr create(const StorageFactory::Arguments & args) if (replica_name.empty()) throw Exception(ErrorCodes::NO_REPLICA_NAME_GIVEN, "No replica name in config{}", verbose_help_message); + // '\t' and '\n' will interrupt parsing 'source replica' in ReplicatedMergeTreeLogEntryData::readText + if (replica_name.find('\t') != String::npos || replica_name.find('\n') != String::npos) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Replica name must not contain '\\t' or '\\n'"); arg_cnt = engine_args.size(); /// Update `arg_cnt` here because extractZooKeeperPathAndReplicaNameFromEngineArgs() could add arguments. arg_num = 2; /// zookeeper_path and replica_name together are always two arguments. From 915daafd3a0c9f1539dad75dc3805e740f0bc75a Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Wed, 21 Aug 2024 10:45:48 +0000 Subject: [PATCH 021/114] Fix 01086_window_view_cleanup.sh --- tests/queries/0_stateless/01086_window_view_cleanup.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/queries/0_stateless/01086_window_view_cleanup.sh b/tests/queries/0_stateless/01086_window_view_cleanup.sh index 1bfa3c50869..2e6cc7e2520 100755 --- a/tests/queries/0_stateless/01086_window_view_cleanup.sh +++ b/tests/queries/0_stateless/01086_window_view_cleanup.sh @@ -13,7 +13,8 @@ opts=( DATABASE_ORDINARY="${CLICKHOUSE_DATABASE}_ordinary" -$CLICKHOUSE_CLIENT "${opts[@]}" --allow_deprecated_database_ordinary=1 " +$CLICKHOUSE_CLIENT "${opts[@]}" --query " + SET allow_deprecated_database_ordinary = 1; SET allow_experimental_window_view = 1; SET window_view_clean_interval = 1; @@ -28,8 +29,7 @@ $CLICKHOUSE_CLIENT "${opts[@]}" --allow_deprecated_database_ordinary=1 " INSERT INTO ${DATABASE_ORDINARY}.mt VALUES (1, 2, toDateTime('1990/01/01 12:00:01', 'US/Samoa')); INSERT INTO ${DATABASE_ORDINARY}.mt VALUES (1, 3, toDateTime('1990/01/01 12:00:02', 'US/Samoa')); INSERT INTO ${DATABASE_ORDINARY}.mt VALUES (1, 4, toDateTime('1990/01/01 12:00:05', 'US/Samoa')); - INSERT INTO ${DATABASE_ORDINARY}.mt VALUES (1, 5, toDateTime('1990/01/01 12:00:06', 'US/Samoa')); -" + INSERT INTO ${DATABASE_ORDINARY}.mt VALUES (1, 5, toDateTime('1990/01/01 12:00:06', 'US/Samoa'));" while true; do $CLICKHOUSE_CLIENT "${opts[@]}" --query="SELECT count(*) FROM ${DATABASE_ORDINARY}.\`.inner.wv\`" | grep -q "5" && break || sleep .5 ||: From a387807c8429af1c9ed471b80ad317c9171b40a4 Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Wed, 21 Aug 2024 15:14:51 +0200 Subject: [PATCH 022/114] Fix build --- src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp index 6263351897e..c9ca9efabee 100644 --- a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp +++ b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp @@ -1257,8 +1257,8 @@ Chunk ArrowColumnToCHColumn::arrowColumnsToCHChunk(const NameToArrowColumn & nam .format_name = format_name, .date_time_overflow_behavior = date_time_overflow_behavior, .allow_arrow_null_type = true, - .skip_columns_with_unsupported_types = false - .allow_inferring_nullable_columns = true; + .skip_columns_with_unsupported_types = false, + .allow_inferring_nullable_columns = true }; Columns columns; From 6db7b995439f873f9cd33d07019ea939ec51a3b7 Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Wed, 21 Aug 2024 15:42:46 +0200 Subject: [PATCH 023/114] Increase connectTimeoutMs IMDS connection timeout to 50ms to avoid failures in CI --- src/IO/S3/Credentials.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/IO/S3/Credentials.cpp b/src/IO/S3/Credentials.cpp index d6f7542da6b..fab3a0111eb 100644 --- a/src/IO/S3/Credentials.cpp +++ b/src/IO/S3/Credentials.cpp @@ -787,7 +787,7 @@ S3CredentialsProviderChain::S3CredentialsProviderChain( /// EC2MetadataService delay is in order of seconds so it only make sense to retry after a couple of seconds. /// But the connection timeout should be small because there is the case when there is no IMDS at all, /// like outside of the cloud, on your own machines. - aws_client_configuration.connectTimeoutMs = 10; + aws_client_configuration.connectTimeoutMs = 50; aws_client_configuration.requestTimeoutMs = 1000; aws_client_configuration.retryStrategy = std::make_shared(1, 1000); From 4a7a04b35b492aec779b42adc6f3f3eae354a947 Mon Sep 17 00:00:00 2001 From: leonkozlowski Date: Wed, 21 Aug 2024 10:13:02 -0400 Subject: [PATCH 024/114] patch: build From 3fd50ed856a9767094f19f08c93401fc4a5a80eb Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Wed, 21 Aug 2024 16:23:37 +0200 Subject: [PATCH 025/114] Fix flaky test test_distributed_replica_max_ignored_errors --- tests/integration/test_distributed_load_balancing/test.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/integration/test_distributed_load_balancing/test.py b/tests/integration/test_distributed_load_balancing/test.py index d61cca6ce12..a913c2ebb49 100644 --- a/tests/integration/test_distributed_load_balancing/test.py +++ b/tests/integration/test_distributed_load_balancing/test.py @@ -200,7 +200,6 @@ def test_distributed_replica_max_ignored_errors(): "connect_timeout": 2, "receive_timeout": 2, "send_timeout": 2, - "idle_connection_timeout": 2, "tcp_keep_alive_timeout": 2, "distributed_replica_max_ignored_errors": 0, "distributed_replica_error_half_life": 60, From 1afd3a7c3a7569b172ac3238f798c7850fd41bcf Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Wed, 21 Aug 2024 16:24:43 +0200 Subject: [PATCH 026/114] give priority to parsed columns over storage columns --- src/Storages/Hive/StorageHive.cpp | 2 +- .../ObjectStorage/StorageObjectStorage.cpp | 2 +- .../StorageObjectStorageCluster.cpp | 2 +- .../StorageObjectStorageSource.cpp | 4 ++-- .../StorageObjectStorageSource.h | 4 ++-- .../ObjectStorageQueueSource.cpp | 2 +- .../ObjectStorageQueueSource.h | 2 +- .../StorageObjectStorageQueue.cpp | 2 +- src/Storages/StorageFile.cpp | 6 ++--- src/Storages/StorageFileCluster.cpp | 2 +- src/Storages/StorageURL.cpp | 6 ++--- src/Storages/StorageURLCluster.cpp | 2 +- src/Storages/VirtualColumnUtils.cpp | 23 +++++++++++-------- src/Storages/VirtualColumnUtils.h | 4 ++-- 14 files changed, 33 insertions(+), 30 deletions(-) diff --git a/src/Storages/Hive/StorageHive.cpp b/src/Storages/Hive/StorageHive.cpp index ae2e8cffe28..ea2e9e3eece 100644 --- a/src/Storages/Hive/StorageHive.cpp +++ b/src/Storages/Hive/StorageHive.cpp @@ -444,8 +444,8 @@ StorageHive::StorageHive( storage_metadata.setComment(comment_); storage_metadata.partition_key = KeyDescription::getKeyFromAST(partition_by_ast, storage_metadata.columns, getContext()); + setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.columns, getContext())); setInMemoryMetadata(storage_metadata); - setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.getColumns(), getContext())); } void StorageHive::lazyInitialize() diff --git a/src/Storages/ObjectStorage/StorageObjectStorage.cpp b/src/Storages/ObjectStorage/StorageObjectStorage.cpp index d9c82d68791..a0f189e92fc 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorage.cpp +++ b/src/Storages/ObjectStorage/StorageObjectStorage.cpp @@ -94,7 +94,7 @@ StorageObjectStorage::StorageObjectStorage( if (sample_path.empty() && context->getSettingsRef().use_hive_partitioning) sample_path = getPathSample(metadata, context); - setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(metadata.getColumns(), context, sample_path, format_settings)); + setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(metadata.columns, context, sample_path, format_settings)); setInMemoryMetadata(metadata); } diff --git a/src/Storages/ObjectStorage/StorageObjectStorageCluster.cpp b/src/Storages/ObjectStorage/StorageObjectStorageCluster.cpp index c214665f7e0..08a0739d929 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageCluster.cpp +++ b/src/Storages/ObjectStorage/StorageObjectStorageCluster.cpp @@ -68,7 +68,7 @@ StorageObjectStorageCluster::StorageObjectStorageCluster( if (sample_path.empty() && context_->getSettingsRef().use_hive_partitioning) sample_path = getPathSample(metadata, context_); - setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(metadata.getColumns(), context_, sample_path)); + setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(metadata.columns, context_, sample_path)); setInMemoryMetadata(metadata); } diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp index d8e26977e75..04e319cd0b8 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp +++ b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp @@ -208,7 +208,7 @@ Chunk StorageObjectStorageSource::generate() .filename = &filename, .last_modified = object_info->metadata->last_modified, .etag = &(object_info->metadata->etag) - }, getContext(), read_from_format_info.columns_description); + }, getContext()); const auto & partition_columns = configuration->getPartitionColumns(); if (!partition_columns.empty() && chunk_size && chunk.hasColumns()) @@ -280,7 +280,7 @@ StorageObjectStorageSource::ReaderHolder StorageObjectStorageSource::createReade const std::shared_ptr & file_iterator, const ConfigurationPtr & configuration, const ObjectStoragePtr & object_storage, - const ReadFromFormatInfo & read_from_format_info, + ReadFromFormatInfo & read_from_format_info, const std::optional & format_settings, const std::shared_ptr & key_condition_, const ContextPtr & context_, diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSource.h b/src/Storages/ObjectStorage/StorageObjectStorageSource.h index 6681dbf4578..7ae7a2358e9 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageSource.h +++ b/src/Storages/ObjectStorage/StorageObjectStorageSource.h @@ -74,7 +74,7 @@ protected: const UInt64 max_block_size; const bool need_only_count; const size_t max_parsing_threads; - const ReadFromFormatInfo read_from_format_info; + ReadFromFormatInfo read_from_format_info; const std::shared_ptr create_reader_pool; std::shared_ptr file_iterator; @@ -122,7 +122,7 @@ protected: const std::shared_ptr & file_iterator, const ConfigurationPtr & configuration, const ObjectStoragePtr & object_storage, - const ReadFromFormatInfo & read_from_format_info, + ReadFromFormatInfo & read_from_format_info, const std::optional & format_settings, const std::shared_ptr & key_condition_, const ContextPtr & context_, diff --git a/src/Storages/ObjectStorageQueue/ObjectStorageQueueSource.cpp b/src/Storages/ObjectStorageQueue/ObjectStorageQueueSource.cpp index 2634a7b2f1e..cde41b4afff 100644 --- a/src/Storages/ObjectStorageQueue/ObjectStorageQueueSource.cpp +++ b/src/Storages/ObjectStorageQueue/ObjectStorageQueueSource.cpp @@ -524,7 +524,7 @@ Chunk ObjectStorageQueueSource::generateImpl() { .path = path, .size = reader.getObjectInfo()->metadata->size_bytes - }, getContext(), read_from_format_info.columns_description); + }, getContext()); return chunk; } diff --git a/src/Storages/ObjectStorageQueue/ObjectStorageQueueSource.h b/src/Storages/ObjectStorageQueue/ObjectStorageQueueSource.h index 0f3d0ab2e92..c085287e4f3 100644 --- a/src/Storages/ObjectStorageQueue/ObjectStorageQueueSource.h +++ b/src/Storages/ObjectStorageQueue/ObjectStorageQueueSource.h @@ -128,7 +128,7 @@ private: const std::shared_ptr file_iterator; const ConfigurationPtr configuration; const ObjectStoragePtr object_storage; - const ReadFromFormatInfo read_from_format_info; + ReadFromFormatInfo read_from_format_info; const std::optional format_settings; const ObjectStorageQueueSettings queue_settings; const std::shared_ptr files_metadata; diff --git a/src/Storages/ObjectStorageQueue/StorageObjectStorageQueue.cpp b/src/Storages/ObjectStorageQueue/StorageObjectStorageQueue.cpp index 5dc3e01962c..9452ce81e9e 100644 --- a/src/Storages/ObjectStorageQueue/StorageObjectStorageQueue.cpp +++ b/src/Storages/ObjectStorageQueue/StorageObjectStorageQueue.cpp @@ -169,7 +169,7 @@ StorageObjectStorageQueue::StorageObjectStorageQueue( storage_metadata.setColumns(columns); storage_metadata.setConstraints(constraints_); storage_metadata.setComment(comment); - setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.getColumns(), context_)); + setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.columns, context_)); setInMemoryMetadata(storage_metadata); LOG_INFO(log, "Using zookeeper path: {}", zk_path.string()); diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp index 766b7722cdf..50294df32a4 100644 --- a/src/Storages/StorageFile.cpp +++ b/src/Storages/StorageFile.cpp @@ -1112,9 +1112,9 @@ void StorageFile::setStorageMetadata(CommonArguments args) storage_metadata.setConstraints(args.constraints); storage_metadata.setComment(args.comment); - setInMemoryMetadata(storage_metadata); - setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.getColumns(), args.getContext(), paths.empty() ? "" : paths[0], format_settings)); + setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.columns, args.getContext(), paths.empty() ? "" : paths[0], format_settings)); + setInMemoryMetadata(storage_metadata); } @@ -1468,7 +1468,7 @@ Chunk StorageFileSource::generate() .size = current_file_size, .filename = (filename_override.has_value() ? &filename_override.value() : nullptr), .last_modified = current_file_last_modified - }, getContext(), columns_description); + }, getContext()); return chunk; } diff --git a/src/Storages/StorageFileCluster.cpp b/src/Storages/StorageFileCluster.cpp index 82ae0b761ae..c01738067c4 100644 --- a/src/Storages/StorageFileCluster.cpp +++ b/src/Storages/StorageFileCluster.cpp @@ -60,8 +60,8 @@ StorageFileCluster::StorageFileCluster( } storage_metadata.setConstraints(constraints_); + setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.columns, context, paths.empty() ? "" : paths[0])); setInMemoryMetadata(storage_metadata); - setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.getColumns(), context, paths.empty() ? "" : paths[0])); } void StorageFileCluster::updateQueryToSendIfNeeded(DB::ASTPtr & query, const StorageSnapshotPtr & storage_snapshot, const DB::ContextPtr & context) diff --git a/src/Storages/StorageURL.cpp b/src/Storages/StorageURL.cpp index 6442891cf23..fc1354b780a 100644 --- a/src/Storages/StorageURL.cpp +++ b/src/Storages/StorageURL.cpp @@ -165,9 +165,9 @@ IStorageURLBase::IStorageURLBase( storage_metadata.setConstraints(constraints_); storage_metadata.setComment(comment); - setInMemoryMetadata(storage_metadata); - setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.getColumns(), context_, getSampleURI(uri, context_), format_settings)); + setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.columns, context_, getSampleURI(uri, context_), format_settings)); + setInMemoryMetadata(storage_metadata); } @@ -435,7 +435,7 @@ Chunk StorageURLSource::generate() { .path = curr_uri.getPath(), .size = current_file_size, - }, getContext(), columns_description); + }, getContext()); return chunk; } diff --git a/src/Storages/StorageURLCluster.cpp b/src/Storages/StorageURLCluster.cpp index 7c7a299c64e..140413d78b0 100644 --- a/src/Storages/StorageURLCluster.cpp +++ b/src/Storages/StorageURLCluster.cpp @@ -75,8 +75,8 @@ StorageURLCluster::StorageURLCluster( } storage_metadata.setConstraints(constraints_); + setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.columns, context, getSampleURI(uri, context))); setInMemoryMetadata(storage_metadata); - setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.getColumns(), context, getSampleURI(uri, context))); } void StorageURLCluster::updateQueryToSendIfNeeded(ASTPtr & query, const StorageSnapshotPtr & storage_snapshot, const ContextPtr & context) diff --git a/src/Storages/VirtualColumnUtils.cpp b/src/Storages/VirtualColumnUtils.cpp index edf50907752..5b974cb8a22 100644 --- a/src/Storages/VirtualColumnUtils.cpp +++ b/src/Storages/VirtualColumnUtils.cpp @@ -129,7 +129,7 @@ NameSet getVirtualNamesForFileLikeStorage() return {"_path", "_file", "_size", "_time", "_etag"}; } -std::unordered_map parseHivePartitioningKeysAndValues(const String & path, const ColumnsDescription & storage_columns) +std::unordered_map parseHivePartitioningKeysAndValues(const String & path) { std::string pattern = "([^/]+)=([^/]+)/"; re2::StringPiece input_piece(path); @@ -145,34 +145,37 @@ std::unordered_map parseHivePartitioningKeysAndValues( used_keys.insert({key, value}); auto col_name = key; - while (storage_columns.has(col_name)) - col_name = "_" + col_name; key_values[col_name] = value; } return key_values; } -VirtualColumnsDescription getVirtualsForFileLikeStorage(const ColumnsDescription & storage_columns, const ContextPtr & context, const std::string & path, std::optional format_settings_) +VirtualColumnsDescription getVirtualsForFileLikeStorage(ColumnsDescription & storage_columns, const ContextPtr & context, const std::string & path, std::optional format_settings_) { VirtualColumnsDescription desc; auto add_virtual = [&](const auto & name, const auto & type) { - if (storage_columns.has(name)) + auto local_type = type; + if (storage_columns.has(name) && !context->getSettingsRef().use_hive_partitioning) return; + if (storage_columns.has(name)) + { + local_type = storage_columns.get(name).type; + storage_columns.remove(name); + } - desc.addEphemeral(name, type, ""); + desc.addEphemeral(name, local_type, ""); }; add_virtual("_path", std::make_shared(std::make_shared())); add_virtual("_file", std::make_shared(std::make_shared())); add_virtual("_size", makeNullable(std::make_shared())); add_virtual("_time", makeNullable(std::make_shared())); - add_virtual("_etag", std::make_shared(std::make_shared())); if (context->getSettingsRef().use_hive_partitioning) { - auto map = parseHivePartitioningKeysAndValues(path, storage_columns); + auto map = parseHivePartitioningKeysAndValues(path); auto format_settings = format_settings_ ? *format_settings_ : getFormatSettings(context); for (auto & item : map) { @@ -245,11 +248,11 @@ ColumnPtr getFilterByPathAndFileIndexes(const std::vector & paths, const void addRequestedFileLikeStorageVirtualsToChunk( Chunk & chunk, const NamesAndTypesList & requested_virtual_columns, - VirtualsForFileLikeStorage virtual_values, ContextPtr context, const ColumnsDescription & columns) + VirtualsForFileLikeStorage virtual_values, ContextPtr context) { std::unordered_map hive_map; if (context->getSettingsRef().use_hive_partitioning) - hive_map = parseHivePartitioningKeysAndValues(virtual_values.path, columns); + hive_map = parseHivePartitioningKeysAndValues(virtual_values.path); for (const auto & virtual_column : requested_virtual_columns) { diff --git a/src/Storages/VirtualColumnUtils.h b/src/Storages/VirtualColumnUtils.h index 23e16871798..6aa08b2aef2 100644 --- a/src/Storages/VirtualColumnUtils.h +++ b/src/Storages/VirtualColumnUtils.h @@ -70,7 +70,7 @@ auto extractSingleValueFromBlock(const Block & block, const String & name) NameSet getVirtualNamesForFileLikeStorage(); VirtualColumnsDescription getVirtualsForFileLikeStorage( - const ColumnsDescription & storage_columns, + ColumnsDescription & storage_columns, const ContextPtr & context, const std::string & sample_path = "", std::optional format_settings_ = std::nullopt); @@ -105,7 +105,7 @@ struct VirtualsForFileLikeStorage void addRequestedFileLikeStorageVirtualsToChunk( Chunk & chunk, const NamesAndTypesList & requested_virtual_columns, - VirtualsForFileLikeStorage virtual_values, ContextPtr context, const ColumnsDescription & columns); + VirtualsForFileLikeStorage virtual_values, ContextPtr context); } } From 5965297d8b2f26768fb0ee13a9aeec6d7cada0c9 Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Wed, 21 Aug 2024 16:35:39 +0200 Subject: [PATCH 027/114] add accidentally removed virtual column --- src/Storages/VirtualColumnUtils.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Storages/VirtualColumnUtils.cpp b/src/Storages/VirtualColumnUtils.cpp index 5b974cb8a22..bbeb9ee6643 100644 --- a/src/Storages/VirtualColumnUtils.cpp +++ b/src/Storages/VirtualColumnUtils.cpp @@ -172,6 +172,7 @@ VirtualColumnsDescription getVirtualsForFileLikeStorage(ColumnsDescription & sto add_virtual("_file", std::make_shared(std::make_shared())); add_virtual("_size", makeNullable(std::make_shared())); add_virtual("_time", makeNullable(std::make_shared())); + add_virtual("_etag", std::make_shared(std::make_shared())); if (context->getSettingsRef().use_hive_partitioning) { From e87de3cfcd22870bf7aea3dfaf1607b180b2b1d8 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Wed, 21 Aug 2024 15:19:29 +0000 Subject: [PATCH 028/114] return back virtual columns to distributed tables --- src/Storages/StorageDistributed.cpp | 4 ++++ .../03228_virtual_column_merge_dist.reference | 8 +++++++ .../03228_virtual_column_merge_dist.sql | 24 +++++++++++++++++++ 3 files changed, 36 insertions(+) create mode 100644 tests/queries/0_stateless/03228_virtual_column_merge_dist.reference create mode 100644 tests/queries/0_stateless/03228_virtual_column_merge_dist.sql diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp index e146e95f89f..c4668159759 100644 --- a/src/Storages/StorageDistributed.cpp +++ b/src/Storages/StorageDistributed.cpp @@ -290,6 +290,10 @@ VirtualColumnsDescription StorageDistributed::createVirtuals() desc.addEphemeral("_shard_num", std::make_shared(), "Deprecated. Use function shardNum instead"); + /// Add virtual columns from table of storage Merges. + desc.addEphemeral("_database", std::make_shared(std::make_shared()), "The name of database which the row comes from"); + desc.addEphemeral("_table", std::make_shared(std::make_shared()), "The name of table which the row comes from"); + return desc; } diff --git a/tests/queries/0_stateless/03228_virtual_column_merge_dist.reference b/tests/queries/0_stateless/03228_virtual_column_merge_dist.reference new file mode 100644 index 00000000000..28f00bafdfe --- /dev/null +++ b/tests/queries/0_stateless/03228_virtual_column_merge_dist.reference @@ -0,0 +1,8 @@ +1 t_local_1 +2 t_local_2 +1 t_local_1 +2 t_local_2 +1 1 +2 1 +1 1 +2 1 diff --git a/tests/queries/0_stateless/03228_virtual_column_merge_dist.sql b/tests/queries/0_stateless/03228_virtual_column_merge_dist.sql new file mode 100644 index 00000000000..caf00a2e407 --- /dev/null +++ b/tests/queries/0_stateless/03228_virtual_column_merge_dist.sql @@ -0,0 +1,24 @@ +DROP TABLE IF EXISTS t_local_1; +DROP TABLE IF EXISTS t_local_2; +DROP TABLE IF EXISTS t_merge; +DROP TABLE IF EXISTS t_distr; + +CREATE TABLE t_local_1 (a UInt32) ENGINE = MergeTree ORDER BY a; +CREATE TABLE t_local_2 (a UInt32) ENGINE = MergeTree ORDER BY a; + +INSERT INTO t_local_1 VALUES (1); +INSERT INTO t_local_2 VALUES (2); + +CREATE TABLE t_merge AS t_local_1 ENGINE = Merge(currentDatabase(), '^(t_local_1|t_local_2)$'); +CREATE TABLE t_distr AS t_local_1 engine=Distributed('test_shard_localhost', currentDatabase(), t_merge, rand()); + +SELECT a, _table FROM t_merge ORDER BY a; +SELECT a, _table FROM t_distr ORDER BY a; + +SELECT a, _database = currentDatabase() FROM t_merge ORDER BY a; +SELECT a, _database = currentDatabase() FROM t_distr ORDER BY a; + +DROP TABLE IF EXISTS t_local_1; +DROP TABLE IF EXISTS t_local_2; +DROP TABLE IF EXISTS t_merge; +DROP TABLE IF EXISTS t_distr; From 2e58ac56111a075bdbaee566a4484a193a882792 Mon Sep 17 00:00:00 2001 From: yariks5s Date: Wed, 21 Aug 2024 16:30:42 +0000 Subject: [PATCH 029/114] build fix --- src/Storages/VirtualColumnUtils.cpp | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/src/Storages/VirtualColumnUtils.cpp b/src/Storages/VirtualColumnUtils.cpp index bbeb9ee6643..7e3e902f083 100644 --- a/src/Storages/VirtualColumnUtils.cpp +++ b/src/Storages/VirtualColumnUtils.cpp @@ -156,16 +156,20 @@ VirtualColumnsDescription getVirtualsForFileLikeStorage(ColumnsDescription & sto auto add_virtual = [&](const auto & name, const auto & type) { - auto local_type = type; - if (storage_columns.has(name) && !context->getSettingsRef().use_hive_partitioning) - return; if (storage_columns.has(name)) { - local_type = storage_columns.get(name).type; + if (!context->getSettingsRef().use_hive_partitioning) + return; + + if (storage_columns.size() == 1) + throw Exception(ErrorCodes::INCORRECT_DATA, "Cannot use a file with one column {}, that is ised during hive partitioning", name); + auto local_type = storage_columns.get(name).type; storage_columns.remove(name); + desc.addEphemeral(name, local_type, ""); + return; } - desc.addEphemeral(name, local_type, ""); + desc.addEphemeral(name, type, ""); }; add_virtual("_path", std::make_shared(std::make_shared())); From a52eff299eb49291e2b57f68e5b2874c7704f9d2 Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Wed, 21 Aug 2024 19:43:45 +0200 Subject: [PATCH 030/114] fix tests --- src/Storages/VirtualColumnUtils.cpp | 2 +- .../test_storage_azure_blob_storage/test.py | 43 ++---- tests/integration/test_storage_hdfs/test.py | 24 ++- .../03203_hive_style_partitioning.reference | 145 +++++++++--------- .../03203_hive_style_partitioning.sh | 2 - 5 files changed, 100 insertions(+), 116 deletions(-) diff --git a/src/Storages/VirtualColumnUtils.cpp b/src/Storages/VirtualColumnUtils.cpp index 7e3e902f083..ca82a1ce67a 100644 --- a/src/Storages/VirtualColumnUtils.cpp +++ b/src/Storages/VirtualColumnUtils.cpp @@ -162,7 +162,7 @@ VirtualColumnsDescription getVirtualsForFileLikeStorage(ColumnsDescription & sto return; if (storage_columns.size() == 1) - throw Exception(ErrorCodes::INCORRECT_DATA, "Cannot use a file with one column {}, that is ised during hive partitioning", name); + throw Exception(ErrorCodes::INCORRECT_DATA, "Cannot implement partition by all columns in a file"); auto local_type = storage_columns.get(name).type; storage_columns.remove(name); desc.addEphemeral(name, local_type, ""); diff --git a/tests/integration/test_storage_azure_blob_storage/test.py b/tests/integration/test_storage_azure_blob_storage/test.py index fbdc7f29f98..637dbd38262 100644 --- a/tests/integration/test_storage_azure_blob_storage/test.py +++ b/tests/integration/test_storage_azure_blob_storage/test.py @@ -1518,14 +1518,14 @@ def test_hive_partitioning_with_one_parameter(cluster): ) query = ( - f"SELECT column1, column2, _file, _path, _column1 FROM azureBlobStorage(azure_conf2, " + f"SELECT column1, column2, _file, _path FROM azureBlobStorage(azure_conf2, " f"storage_account_url = '{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}', container='cont', " f"blob_path='{path}', format='CSV', structure='{table_format}')" ) assert azure_query( node, query, settings={"use_hive_partitioning": 1} ).splitlines() == [ - "Elizabeth\tGordon\tsample.csv\t{bucket}/{max_path}\tElizabeth".format( + "Elizabeth\tGordon\tsample.csv\t{bucket}/{max_path}".format( bucket="cont", max_path=path ) ] @@ -1533,14 +1533,14 @@ def test_hive_partitioning_with_one_parameter(cluster): query = ( f"SELECT column2 FROM azureBlobStorage(azure_conf2, " f"storage_account_url = '{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}', container='cont', " - f"blob_path='{path}', format='CSV', structure='{table_format}') WHERE column1=_column1;" + f"blob_path='{path}', format='CSV', structure='{table_format}');" ) assert azure_query( node, query, settings={"use_hive_partitioning": 1} ).splitlines() == ["Gordon"] -def test_hive_partitioning_with_two_parameters(cluster): +def test_hive_partitioning_with_all_parameters(cluster): # type: (ClickHouseCluster) -> None node = cluster.instances["node"] # type: ClickHouseInstance table_format = "column1 String, column2 String" @@ -1556,35 +1556,14 @@ def test_hive_partitioning_with_two_parameters(cluster): ) query = ( - f"SELECT column1, column2, _file, _path, _column1, _column2 FROM azureBlobStorage(azure_conf2, " + f"SELECT column1, column2, _file, _path FROM azureBlobStorage(azure_conf2, " f"storage_account_url = '{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}', container='cont', " - f"blob_path='{path}', format='CSV', structure='{table_format}') WHERE column1=_column1;" + f"blob_path='{path}', format='CSV', structure='{table_format}');" ) - assert azure_query( - node, query, settings={"use_hive_partitioning": 1} - ).splitlines() == [ - "Elizabeth\tGordon\tsample.csv\t{bucket}/{max_path}\tElizabeth\tGordon".format( - bucket="cont", max_path=path - ) - ] + pattern = r"DB::Exception: Cannot implement partition by all columns in a file" - query = ( - f"SELECT column1 FROM azureBlobStorage(azure_conf2, " - f"storage_account_url = '{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}', container='cont', " - f"blob_path='{path}', format='CSV', structure='{table_format}') WHERE column2=_column2;" - ) - assert azure_query( - node, query, settings={"use_hive_partitioning": 1} - ).splitlines() == ["Elizabeth"] - - query = ( - f"SELECT column1 FROM azureBlobStorage(azure_conf2, " - f"storage_account_url = '{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}', container='cont', " - f"blob_path='{path}', format='CSV', structure='{table_format}') WHERE column2=_column2 AND column1=_column1;" - ) - assert azure_query( - node, query, settings={"use_hive_partitioning": 1} - ).splitlines() == ["Elizabeth"] + with pytest.raises(Exception, match=pattern): + azure_query(node, query, settings={"use_hive_partitioning": 1}) def test_hive_partitioning_without_setting(cluster): @@ -1603,9 +1582,9 @@ def test_hive_partitioning_without_setting(cluster): ) query = ( - f"SELECT column1, column2, _file, _path, _column1, _column2 FROM azureBlobStorage(azure_conf2, " + f"SELECT column1, column2, _file, _path FROM azureBlobStorage(azure_conf2, " f"storage_account_url = '{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}', container='cont', " - f"blob_path='{path}', format='CSV', structure='{table_format}') WHERE column1=_column1;" + f"blob_path='{path}', format='CSV', structure='{table_format}');" ) pattern = re.compile( r"DB::Exception: Unknown expression identifier '.*' in scope.*", re.DOTALL diff --git a/tests/integration/test_storage_hdfs/test.py b/tests/integration/test_storage_hdfs/test.py index fdbf7c5bacb..ad2e7084791 100644 --- a/tests/integration/test_storage_hdfs/test.py +++ b/tests/integration/test_storage_hdfs/test.py @@ -1263,13 +1263,19 @@ def test_hive_partitioning_with_one_parameter(started_cluster): assert hdfs_api.read_data(f"/column0=Elizabeth/parquet_1") == f"Elizabeth\tGordon\n" r = node1.query( - "SELECT _column0 FROM hdfs('hdfs://hdfs1:9000/column0=Elizabeth/parquet_1', 'TSV')", + "SELECT column0 FROM hdfs('hdfs://hdfs1:9000/column0=Elizabeth/parquet_1', 'TSV')", settings={"use_hive_partitioning": 1}, ) assert r == f"Elizabeth\n" + r = node1.query( + "SELECT column1 FROM hdfs('hdfs://hdfs1:9000/column0=Elizabeth/parquet_1', 'TSV')", + settings={"use_hive_partitioning": 1}, + ) + assert r == f"Gordon\n" -def test_hive_partitioning_with_two_parameters(started_cluster): + +def test_hive_partitioning_with_all_parameters(started_cluster): hdfs_api = started_cluster.hdfs_api hdfs_api.write_data( f"/column0=Elizabeth/column1=Gordon/parquet_2", f"Elizabeth\tGordon\n" @@ -1279,11 +1285,13 @@ def test_hive_partitioning_with_two_parameters(started_cluster): == f"Elizabeth\tGordon\n" ) - r = node1.query( - "SELECT _column1 FROM hdfs('hdfs://hdfs1:9000/column0=Elizabeth/column1=Gordon/parquet_2', 'TSV');", - settings={"use_hive_partitioning": 1}, - ) - assert r == f"Gordon\n" + pattern = r"DB::Exception: Cannot implement partition by all columns in a file" + + with pytest.raises(QueryRuntimeException, match=pattern): + node1.query( + f"SELECT _column1 FROM hdfs('hdfs://hdfs1:9000/column0=Elizabeth/column1=Gordon/parquet_2', 'TSV');", + settings={"use_hive_partitioning": 1}, + ) def test_hive_partitioning_without_setting(started_cluster): @@ -1301,7 +1309,7 @@ def test_hive_partitioning_without_setting(started_cluster): with pytest.raises(QueryRuntimeException, match=pattern): node1.query( - f"SELECT _column1 FROM hdfs('hdfs://hdfs1:9000/column0=Elizabeth/column1=Gordon/parquet_2', 'TSV');", + f"SELECT column1 FROM hdfs('hdfs://hdfs1:9000/column0=Elizabeth/column1=Gordon/parquet_2', 'TSV');", settings={"use_hive_partitioning": 0}, ) diff --git a/tests/queries/0_stateless/03203_hive_style_partitioning.reference b/tests/queries/0_stateless/03203_hive_style_partitioning.reference index 12ffd17c102..b5eaef7f51e 100644 --- a/tests/queries/0_stateless/03203_hive_style_partitioning.reference +++ b/tests/queries/0_stateless/03203_hive_style_partitioning.reference @@ -1,4 +1,14 @@ TESTING THE FILE HIVE PARTITIONING +last Elizabeth +Frank Elizabeth +Moreno Elizabeth +Guzman Elizabeth +Stephens Elizabeth +Franklin Elizabeth +Gibson Elizabeth +Greer Elizabeth +Delgado Elizabeth +Cross Elizabeth first last Elizabeth Jorge Frank Elizabeth Hunter Moreno Elizabeth @@ -10,25 +20,16 @@ Eugenia Greer Elizabeth Jeffery Delgado Elizabeth Clara Cross Elizabeth Elizabeth Gordon Elizabeth -Eva Schmidt Elizabeth Schmidt -Samuel Schmidt Elizabeth Schmidt -Eva Schmidt Elizabeth -Samuel Schmidt Elizabeth -Elizabeth Gordon Elizabeth Gordon -Elizabeth Gordon Elizabeth -Elizabeth Gordon Elizabeth Gordon -Elizabeth Gordon Elizabeth -first last Elizabeth -Jorge Frank Elizabeth -Hunter Moreno Elizabeth -Esther Guzman Elizabeth -Dennis Stephens Elizabeth -Nettie Franklin Elizabeth -Stanley Gibson Elizabeth -Eugenia Greer Elizabeth -Jeffery Delgado Elizabeth -Clara Cross Elizabeth -Elizabeth Gordon Elizabeth +last Elizabeth +Frank Elizabeth +Moreno Elizabeth +Guzman Elizabeth +Stephens Elizabeth +Franklin Elizabeth +Gibson Elizabeth +Greer Elizabeth +Delgado Elizabeth +Cross Elizabeth 42 2020-01-01 [1,2,3] 42.42 Array(Int64) LowCardinality(Float64) @@ -37,10 +38,20 @@ Array(Int64) LowCardinality(Float64) 4081 2070 2070 -1 -1 b +1 +1 TESTING THE URL PARTITIONING +last Elizabeth +Frank Elizabeth +Moreno Elizabeth +Guzman Elizabeth +Stephens Elizabeth +Franklin Elizabeth +Gibson Elizabeth +Greer Elizabeth +Delgado Elizabeth +Cross Elizabeth first last Elizabeth Jorge Frank Elizabeth Hunter Moreno Elizabeth @@ -52,26 +63,18 @@ Eugenia Greer Elizabeth Jeffery Delgado Elizabeth Clara Cross Elizabeth Elizabeth Gordon Elizabeth -Eva Schmidt Elizabeth Schmidt -Samuel Schmidt Elizabeth Schmidt -Eva Schmidt Elizabeth -Samuel Schmidt Elizabeth -Elizabeth Gordon Elizabeth Gordon -Elizabeth Gordon Elizabeth -Elizabeth Gordon Elizabeth Gordon -Elizabeth Gordon Elizabeth -first last Elizabeth -Jorge Frank Elizabeth -Hunter Moreno Elizabeth -Esther Guzman Elizabeth -Dennis Stephens Elizabeth -Nettie Franklin Elizabeth -Stanley Gibson Elizabeth -Eugenia Greer Elizabeth -Jeffery Delgado Elizabeth -Clara Cross Elizabeth 1 TESTING THE S3 PARTITIONING +last Elizabeth +Frank Elizabeth +Moreno Elizabeth +Guzman Elizabeth +Stephens Elizabeth +Franklin Elizabeth +Gibson Elizabeth +Greer Elizabeth +Delgado Elizabeth +Cross Elizabeth first last Elizabeth Jorge Frank Elizabeth Hunter Moreno Elizabeth @@ -83,39 +86,35 @@ Eugenia Greer Elizabeth Jeffery Delgado Elizabeth Clara Cross Elizabeth Elizabeth Gordon Elizabeth -Eva Schmidt Elizabeth Schmidt -Samuel Schmidt Elizabeth Schmidt -Eva Schmidt Elizabeth -Samuel Schmidt Elizabeth -Elizabeth Gordon Elizabeth Gordon -Elizabeth Gordon Elizabeth -Elizabeth Gordon Elizabeth Gordon -Elizabeth Gordon Elizabeth -first last Elizabeth -Jorge Frank Elizabeth -Hunter Moreno Elizabeth -Esther Guzman Elizabeth -Dennis Stephens Elizabeth -Nettie Franklin Elizabeth -Stanley Gibson Elizabeth -Eugenia Greer Elizabeth -Jeffery Delgado Elizabeth -Clara Cross Elizabeth -Elizabeth Gordon Elizabeth +last Elizabeth +Frank Elizabeth +Moreno Elizabeth +Guzman Elizabeth +Stephens Elizabeth +Franklin Elizabeth +Gibson Elizabeth +Greer Elizabeth +Delgado Elizabeth +Cross Elizabeth OK TESTING THE S3CLUSTER PARTITIONING -first last Elizabeth -Jorge Frank Elizabeth -Hunter Moreno Elizabeth -Esther Guzman Elizabeth -Dennis Stephens Elizabeth -Nettie Franklin Elizabeth -Stanley Gibson Elizabeth -Eugenia Greer Elizabeth -Jeffery Delgado Elizabeth -Clara Cross Elizabeth -Elizabeth Gordon Elizabeth -Eva Schmidt Elizabeth Schmidt -Samuel Schmidt Elizabeth Schmidt -Eva Schmidt Elizabeth -Samuel Schmidt Elizabeth +last Elizabeth +Frank Elizabeth +Moreno Elizabeth +Guzman Elizabeth +Stephens Elizabeth +Franklin Elizabeth +Gibson Elizabeth +Greer Elizabeth +Delgado Elizabeth +Cross Elizabeth +last Elizabeth +Frank Elizabeth +Moreno Elizabeth +Guzman Elizabeth +Stephens Elizabeth +Franklin Elizabeth +Gibson Elizabeth +Greer Elizabeth +Delgado Elizabeth +Cross Elizabeth diff --git a/tests/queries/0_stateless/03203_hive_style_partitioning.sh b/tests/queries/0_stateless/03203_hive_style_partitioning.sh index 5a0bd482985..41b215578f0 100755 --- a/tests/queries/0_stateless/03203_hive_style_partitioning.sh +++ b/tests/queries/0_stateless/03203_hive_style_partitioning.sh @@ -13,8 +13,6 @@ set use_hive_partitioning = 1; SELECT *, _column0 FROM file('$CURDIR/data_hive/partitioning/column0=Elizabeth/sample.parquet') LIMIT 10; -SELECT *, _column0 FROM file('$CURDIR/data_hive/partitioning/column0=Elizabeth/sample.parquet') WHERE column0 = _column0; - SELECT *, _column0, _column1 FROM file('$CURDIR/data_hive/partitioning/column0=Elizabeth/column1=Schmidt/sample.parquet') WHERE column1 = _column1; SELECT *, _column0 FROM file('$CURDIR/data_hive/partitioning/column0=Elizabeth/column1=Schmidt/sample.parquet') WHERE column1 = _column1; From 74d8971432158a312777dcfba229513bfd876acc Mon Sep 17 00:00:00 2001 From: Dergousov Date: Wed, 21 Aug 2024 22:06:52 +0300 Subject: [PATCH 031/114] fix: use OpenSSL RIPEMD160 impl --- src/Common/RipeMD160Hash.h | 191 ------------------------------- src/Functions/FunctionsHashing.h | 23 +++- 2 files changed, 19 insertions(+), 195 deletions(-) delete mode 100644 src/Common/RipeMD160Hash.h diff --git a/src/Common/RipeMD160Hash.h b/src/Common/RipeMD160Hash.h deleted file mode 100644 index b6488225974..00000000000 --- a/src/Common/RipeMD160Hash.h +++ /dev/null @@ -1,191 +0,0 @@ -#pragma once - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -/// https://homes.esat.kuleuven.be/~bosselae/ripemd160/pdf/AB-9601/AB-9601.pdf -/// https://en.wikipedia.org/wiki/RIPEMD - -class RipeMD160 -{ -private: - using FuncPtr = UInt32 (*)(UInt32, UInt32, UInt32); - - /// Stores the final 20-byte (160-bit) hash result - UInt8 digest_bytes[20]; - - static constexpr UInt32 initial_hash_values[5] = {0x67452301UL, 0xEFCDAB89UL, 0x98BADCFEUL, 0x10325476UL, 0xC3D2E1F0UL}; - - static constexpr UInt8 rho_order[16] = {0x7, 0x4, 0xD, 0x1, 0xA, 0x6, 0xF, 0x3, 0xC, 0x0, 0x9, 0x5, 0x2, 0xE, 0xB, 0x8}; - - static constexpr UInt8 shift_amounts[80] - = {11, 14, 15, 12, 5, 8, 7, 9, 11, 13, 14, 15, 6, 7, 9, 8, 12, 13, 11, 15, 6, 9, 9, 7, 12, 15, 11, - 13, 7, 8, 7, 7, 13, 15, 14, 11, 7, 7, 6, 8, 13, 14, 13, 12, 5, 5, 6, 9, 14, 11, 12, 14, 8, 6, - 5, 5, 15, 12, 15, 14, 9, 9, 8, 6, 15, 12, 13, 13, 9, 5, 8, 6, 14, 11, 12, 11, 8, 6, 5, 5}; - - static constexpr UInt32 left_round_constants[5] = {0x00000000UL, 0x5A827999UL, 0x6ED9EBA1UL, 0x8F1BBCDCUL, 0xA953FD4EUL}; - - static constexpr UInt32 right_round_constants[5] = {0x50A28BE6UL, 0x5C4DD124UL, 0x6D703EF3UL, 0x7A6D76E9UL, 0x00000000UL}; - - static constexpr UInt8 left_function_order[5] = {1, 2, 3, 4, 5}; - - static constexpr UInt8 right_function_order[5] = {5, 4, 3, 2, 1}; - - static ALWAYS_INLINE UInt32 F_1(UInt32 a, UInt32 b, UInt32 c) noexcept { return (a ^ b ^ c); } - - static ALWAYS_INLINE UInt32 F_2(UInt32 a, UInt32 b, UInt32 c) noexcept { return ((a & b) | (~a & c)); } - - static ALWAYS_INLINE UInt32 F_3(UInt32 a, UInt32 b, UInt32 c) noexcept { return ((a | ~b) ^ c); } - - static ALWAYS_INLINE UInt32 F_4(UInt32 a, UInt32 b, UInt32 c) noexcept { return ((a & c) | (b & ~c)); } - - static ALWAYS_INLINE UInt32 F_5(UInt32 a, UInt32 b, UInt32 c) noexcept { return (a ^ (b | ~c)); } - - static constexpr FuncPtr hash_functions[5] = {F_1, F_2, F_3, F_4, F_5}; - - static ALWAYS_INLINE FuncPtr get_function(UInt8 function_id) noexcept { return hash_functions[function_id - 1]; } - - static ALWAYS_INLINE UInt32 convert_to_little_endian(UInt32 x) noexcept - { - if constexpr (std::endian::native == std::endian::little) - { - return x; - } - else - { - return __builtin_bswap32(x); - } - } - - static ALWAYS_INLINE UInt32 rotate_left(UInt32 value, UInt32 shift) noexcept { return (value << shift) | (value >> (32 - shift)); } - - /// Performs one full pass (5 rounds) of RIPEMD-160 algorithm for one path (left or right) - void process_rounds( - UInt32 * current_digest, - UInt32 * temp_words, - const UInt32 * data_chunk, - UInt8 * index_order, - const UInt8 * shift_values, - const UInt32 * round_constants, - const UInt8 * function_order) noexcept - { - std::memcpy(temp_words, current_digest, 5 * sizeof(UInt32)); - for (UInt8 round = 0; round < 5; ++round) - { - UInt32 k = round_constants[round]; - UInt8 fn = function_order[round]; - for (UInt8 j = 0; j < 16; ++j) - { - UInt32 temp_result = get_function(fn)(temp_words[1], temp_words[2], temp_words[3]); - temp_result += temp_words[0] + convert_to_little_endian(data_chunk[index_order[j]]) + k; - temp_result = rotate_left(temp_result, shift_values[index_order[j]]) + temp_words[4]; - temp_words[0] = temp_words[4]; - temp_words[4] = temp_words[3]; - temp_words[3] = rotate_left(temp_words[2], 10); - temp_words[2] = temp_words[1]; - temp_words[1] = temp_result; - } - shift_values += 16; - UInt8 reordered_index[16]; - for (size_t i = 0; i < 16; ++i) - reordered_index[i] = rho_order[index_order[i]]; - std::memcpy(index_order, reordered_index, 16); - } - } - - /// Update the digest with the given chunk of data - void update_digest(UInt32 * current_digest, const UInt32 * data_chunk) noexcept - { - UInt8 index_order[16]; - for (UInt8 i = 0; i < 16; ++i) - index_order[i] = i; - - UInt32 left_path_words[5]; - process_rounds(current_digest, left_path_words, data_chunk, index_order, shift_amounts, left_round_constants, left_function_order); - - static constexpr UInt8 rho_reordered_index[16] = {5, 14, 7, 0, 9, 2, 11, 4, 13, 6, 15, 8, 1, 10, 3, 12}; - std::memcpy(index_order, rho_reordered_index, 16); - - UInt32 right_path_words[5]; - process_rounds( - current_digest, right_path_words, data_chunk, index_order, shift_amounts, right_round_constants, right_function_order); - - current_digest[0] += left_path_words[1] + right_path_words[2]; - current_digest[1] += left_path_words[2] + right_path_words[3]; - current_digest[2] += left_path_words[3] + right_path_words[4]; - current_digest[3] += left_path_words[4] + right_path_words[0]; - current_digest[4] += left_path_words[0] + right_path_words[1]; - - std::rotate(current_digest, current_digest + 1, current_digest + 5); - } - -public: - void hash(const UInt8 * data, size_t data_len) noexcept - { - UInt32 digest[5]; - for (size_t i = 0; i < 5; ++i) - digest[i] = convert_to_little_endian(initial_hash_values[i]); - - const UInt8 * last_chunk_start = data + (data_len & (~0x3F)); - while (data < last_chunk_start) - { - update_digest(digest, reinterpret_cast(data)); - data += 0x40; - } - - UInt8 last_chunk[0x40] = {}; - UInt8 leftover_size = data_len & 0x3F; - std::memcpy(last_chunk, data, leftover_size); - - last_chunk[leftover_size] = 0x80; - - if (leftover_size >= 0x38) - { - update_digest(digest, reinterpret_cast(last_chunk)); - std::memset(last_chunk, 0, 0x38); - } - - UInt32 data_len_bits = static_cast(data_len << 3); - std::memcpy(&last_chunk[0x38], &data_len_bits, sizeof(data_len_bits)); - data_len_bits = static_cast(data_len >> 29); - std::memcpy(&last_chunk[0x3C], &data_len_bits, sizeof(data_len_bits)); - - update_digest(digest, reinterpret_cast(last_chunk)); - - for (size_t i = 0; i < 5; ++i) - { - UInt32 digest_part = convert_to_little_endian(digest[i]); - std::memcpy(digest_bytes + i * 4, &digest_part, 4); - } - } - - const UInt8 * get_digest_bytes() const noexcept { return digest_bytes; } -}; - - -inline UInt256 ripeMD160Hash(const char * data, const size_t size) noexcept -{ - RipeMD160 ripe; - ripe.hash(reinterpret_cast(data), size); - - UInt8 digest[20]; - std::memcpy(digest, ripe.get_digest_bytes(), sizeof(digest)); - - std::reverse(digest, digest + sizeof(digest)); - - UInt256 res = 0; - std::memcpy(&res, digest, sizeof(digest)); - - return res; -} diff --git a/src/Functions/FunctionsHashing.h b/src/Functions/FunctionsHashing.h index 8829e7c0479..5111ee2bd90 100644 --- a/src/Functions/FunctionsHashing.h +++ b/src/Functions/FunctionsHashing.h @@ -14,13 +14,14 @@ #include #include -#include #include #include #include #if USE_SSL +# include # include +# include #endif #include @@ -194,12 +195,26 @@ T combineHashesFunc(T t1, T t2) struct RipeMD160Impl { static constexpr auto name = "ripeMD160"; - using ReturnType = UInt256; - static UInt256 apply(const char * begin, size_t size) { return ripeMD160Hash(begin, size); } + static UInt256 apply(const char * begin, size_t size) + { + UInt8 digest[RIPEMD160_DIGEST_LENGTH]; - static UInt256 combineHashes(UInt256 h1, UInt256 h2) { return combineHashesFunc(h1, h2); } + RIPEMD160(reinterpret_cast(begin), size, reinterpret_cast(digest)); + + std::reverse(digest, digest + RIPEMD160_DIGEST_LENGTH); + + UInt256 res = 0; + std::memcpy(&res, digest, RIPEMD160_DIGEST_LENGTH); + + return res; + } + + static UInt256 combineHashes(UInt256 h1, UInt256 h2) + { + return combineHashesFunc(h1, h2); + } static constexpr bool use_int_hash_for_pods = false; }; From 38f9ef6bc95550d727bc56627fd029741e99177c Mon Sep 17 00:00:00 2001 From: avogar Date: Wed, 21 Aug 2024 19:08:07 +0000 Subject: [PATCH 032/114] Fix ColumnVariant permutation --- src/Columns/ColumnVariant.cpp | 12 ++++++++++-- .../03228_variant_permutation_issue.reference | 4 ++++ .../03228_variant_permutation_issue.sql | 18 ++++++++++++++++++ 3 files changed, 32 insertions(+), 2 deletions(-) create mode 100644 tests/queries/0_stateless/03228_variant_permutation_issue.reference create mode 100644 tests/queries/0_stateless/03228_variant_permutation_issue.sql diff --git a/src/Columns/ColumnVariant.cpp b/src/Columns/ColumnVariant.cpp index 28a4860b546..2fea3eca123 100644 --- a/src/Columns/ColumnVariant.cpp +++ b/src/Columns/ColumnVariant.cpp @@ -1009,8 +1009,16 @@ ColumnPtr ColumnVariant::indexImpl(const PaddedPODArray & indexes, size_t new_variants.reserve(num_variants); for (size_t i = 0; i != num_variants; ++i) { - size_t nested_limit = nested_perms[i].size() == variants[i]->size() ? 0 : nested_perms[i].size(); - new_variants.emplace_back(variants[i]->permute(nested_perms[i], nested_limit)); + /// Check if no values from this variant were selected. + if (nested_perms[i].empty()) + { + new_variants.emplace_back(variants[i]->cloneEmpty()); + } + else + { + size_t nested_limit = nested_perms[i].size() == variants[i]->size() ? 0 : nested_perms[i].size(); + new_variants.emplace_back(variants[i]->permute(nested_perms[i], nested_limit)); + } } /// We cannot use new_offsets column as an offset column, because it became invalid after variants permutation. diff --git a/tests/queries/0_stateless/03228_variant_permutation_issue.reference b/tests/queries/0_stateless/03228_variant_permutation_issue.reference new file mode 100644 index 00000000000..7b18a0c59fb --- /dev/null +++ b/tests/queries/0_stateless/03228_variant_permutation_issue.reference @@ -0,0 +1,4 @@ +2 {"foo2":"bar"} 1 +3 {"foo2":"bar"} 1 +3 {"foo2":"bar"} 1 +2 {"foo2":"baz"} 2 diff --git a/tests/queries/0_stateless/03228_variant_permutation_issue.sql b/tests/queries/0_stateless/03228_variant_permutation_issue.sql new file mode 100644 index 00000000000..3f60d42ffbd --- /dev/null +++ b/tests/queries/0_stateless/03228_variant_permutation_issue.sql @@ -0,0 +1,18 @@ +SET allow_experimental_json_type = 1; + +DROP TABLE IF EXISTS test_new_json_type; +CREATE TABLE test_new_json_type(id UInt32, data JSON, version UInt64) ENGINE=ReplacingMergeTree(version) ORDER BY id; +INSERT INTO test_new_json_type format JSONEachRow +{"id":1,"data":{"foo1":"bar"},"version":1} +{"id":2,"data":{"foo2":"bar"},"version":1} +{"id":3,"data":{"foo2":"bar"},"version":1} +; + +SELECT * FROM test_new_json_type FINAL WHERE data.foo2 is not null; + +INSERT INTO test_new_json_type SELECT id, '{"foo2":"baz"}' AS _data, version+1 AS _version FROM test_new_json_type where id=2; + +SELECT * FROM test_new_json_type FINAL WHERE data.foo2 is not null; + +DROP TABLE test_new_json_type; + From dfe0beb53b4f0d1da50bf04e9c9e3e06f8b29ad2 Mon Sep 17 00:00:00 2001 From: Dergousov Date: Wed, 21 Aug 2024 22:46:29 +0300 Subject: [PATCH 033/114] feat: add docs --- .../sql-reference/functions/hash-functions.md | 30 +++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/docs/en/sql-reference/functions/hash-functions.md b/docs/en/sql-reference/functions/hash-functions.md index 7c977e7d6dc..d610e23fdda 100644 --- a/docs/en/sql-reference/functions/hash-functions.md +++ b/docs/en/sql-reference/functions/hash-functions.md @@ -688,6 +688,36 @@ SELECT kostikConsistentHash(16045690984833335023, 2); └───────────────────────────────────────────────┘ ``` +## ripeMD160 + +**Syntax** + +```sql +ripeMD160('input') +``` + +**Parameters** + +- `input`: Input string. [String](../data-types/string.md) + +**Returned value** + +- A [UInt256](../data-types/int-uint.md) hash value of type [FixedString(20)](../data-types/fixedstring.md). + +**Example** + +Query: + +```sql +SELECT hex(ripeMD160('The quick brown fox jumps over the lazy dog')); +``` + +```response +┌─hex(ripeMD160('The quick brown fox jumps over the lazy dog'))─┐ +│ 37F332F68DB77BD9D7EDD4969571AD671CF9DD3B │ +└───────────────────────────────────────────────────────────────┘ +``` + ## murmurHash2_32, murmurHash2_64 Produces a [MurmurHash2](https://github.com/aappleby/smhasher) hash value. From 7f15f61426d07561ad0e24d946ac126961038a0c Mon Sep 17 00:00:00 2001 From: Dergousov Date: Wed, 21 Aug 2024 22:46:55 +0300 Subject: [PATCH 034/114] feat: add docs --- .../sql-reference/functions/hash-functions.md | 6 +++- .../sql-reference/functions/hash-functions.md | 32 +++++++++++++++++++ 2 files changed, 37 insertions(+), 1 deletion(-) diff --git a/docs/en/sql-reference/functions/hash-functions.md b/docs/en/sql-reference/functions/hash-functions.md index d610e23fdda..9b7ac8af0e3 100644 --- a/docs/en/sql-reference/functions/hash-functions.md +++ b/docs/en/sql-reference/functions/hash-functions.md @@ -689,6 +689,9 @@ SELECT kostikConsistentHash(16045690984833335023, 2); ``` ## ripeMD160 +Produces [RIPEMD-160](https://en.wikipedia.org/wiki/RIPEMD) hash of a string and returns the resulting set of bytes as [FixedString](../data-types/fixedstring.md). + + **Syntax** @@ -702,9 +705,10 @@ ripeMD160('input') **Returned value** -- A [UInt256](../data-types/int-uint.md) hash value of type [FixedString(20)](../data-types/fixedstring.md). +- A [UInt256](../data-types/int-uint.md) hash value **Example** +Use the [hex](../functions/encoding-functions.md/#hex) function to represent the result as a hex-encoded string. Query: diff --git a/docs/ru/sql-reference/functions/hash-functions.md b/docs/ru/sql-reference/functions/hash-functions.md index 98b6d8d4b17..66d77e66972 100644 --- a/docs/ru/sql-reference/functions/hash-functions.md +++ b/docs/ru/sql-reference/functions/hash-functions.md @@ -124,6 +124,38 @@ SELECT hex(sipHash128('foo', '\x01', 3)); └──────────────────────────────────┘ ``` +## ripeMD160 +Генерирует [RIPEMD-160](https://en.wikipedia.org/wiki/RIPEMD) хеш строки и возвращает полученный набор байт в виде [FixedString](../data-types/fixedstring.md). + +**Синтаксис** + +```sql +ripeMD160('input') +``` + +**Аргументы** + +- `input`: Строка [String](../data-types/string.md) + +**Возвращаемое значение** + +- [UInt256](../data-types/int-uint.md) хеш-значение + +**Пример** +Используйте функцию [hex](../functions/encoding-functions.md#hex) для представления результата в виде строки с шестнадцатеричной кодировкой + +Запрос: + +```sql +SELECT hex(ripeMD160('The quick brown fox jumps over the lazy dog')); +``` +Результат: +```response +┌─hex(ripeMD160('The quick brown fox jumps over the lazy dog'))─┐ +│ 37F332F68DB77BD9D7EDD4969571AD671CF9DD3B │ +└───────────────────────────────────────────────────────────────┘ +``` + ## cityHash64 {#cityhash64} Генерирует 64-х битное значение [CityHash](https://github.com/google/cityhash). From 6e5465ae5126f3281d81172e952b6811f8946f2d Mon Sep 17 00:00:00 2001 From: Max Kainov Date: Wed, 21 Aug 2024 15:47:08 +0200 Subject: [PATCH 035/114] CI: SQLLogix job fix --- docker/test/sqllogic/Dockerfile | 3 --- tests/ci/sqllogic_test.py | 10 ++++------ .../docker_scripts/sqllogic_runner.sh | 18 +++++++++--------- 3 files changed, 13 insertions(+), 18 deletions(-) rename docker/test/sqllogic/run.sh => tests/docker_scripts/sqllogic_runner.sh (87%) diff --git a/docker/test/sqllogic/Dockerfile b/docker/test/sqllogic/Dockerfile index 6397526388e..0d21a2da44e 100644 --- a/docker/test/sqllogic/Dockerfile +++ b/docker/test/sqllogic/Dockerfile @@ -40,6 +40,3 @@ RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone ARG sqllogic_test_repo="https://github.com/gregrahn/sqllogictest.git" RUN git clone --recursive ${sqllogic_test_repo} - -COPY run.sh / -CMD ["/bin/bash", "/run.sh"] diff --git a/tests/ci/sqllogic_test.py b/tests/ci/sqllogic_test.py index 63880f07e92..7fe44c235c7 100755 --- a/tests/ci/sqllogic_test.py +++ b/tests/ci/sqllogic_test.py @@ -31,7 +31,7 @@ IMAGE_NAME = "clickhouse/sqllogic-test" def get_run_command( builds_path: Path, - repo_tests_path: Path, + repo_path: Path, result_path: Path, server_log_path: Path, image: DockerImage, @@ -39,11 +39,11 @@ def get_run_command( return ( f"docker run " f"--volume={builds_path}:/package_folder " - f"--volume={repo_tests_path}:/clickhouse-tests " + f"--volume={repo_path}:/repo " f"--volume={result_path}:/test_output " f"--volume={server_log_path}:/var/log/clickhouse-server " "--security-opt seccomp=unconfined " # required to issue io_uring sys-calls - f"--cap-add=SYS_PTRACE {image}" + f"--cap-add=SYS_PTRACE {image} /repo/tests/docker_scripts/sqllogic_runner.sh" ) @@ -94,8 +94,6 @@ def main(): docker_image = pull_image(get_docker_image(IMAGE_NAME)) - repo_tests_path = repo_path / "tests" - packages_path = temp_path / "packages" packages_path.mkdir(parents=True, exist_ok=True) @@ -111,7 +109,7 @@ def main(): run_command = get_run_command( # run script inside docker packages_path, - repo_tests_path, + repo_path, result_path, server_log_path, docker_image, diff --git a/docker/test/sqllogic/run.sh b/tests/docker_scripts/sqllogic_runner.sh similarity index 87% rename from docker/test/sqllogic/run.sh rename to tests/docker_scripts/sqllogic_runner.sh index 32368980f9b..8b8f1e7aec7 100755 --- a/docker/test/sqllogic/run.sh +++ b/tests/docker_scripts/sqllogic_runner.sh @@ -15,10 +15,10 @@ echo "Files in current directory" ls -la ./ echo "Files in root directory" ls -la / -echo "Files in /clickhouse-tests directory" -ls -la /clickhouse-tests -echo "Files in /clickhouse-tests/sqllogic directory" -ls -la /clickhouse-tests/sqllogic +echo "Files in /repo/tests directory" +ls -la /repo/tests +echo "Files in /repo/tests/sqllogic directory" +ls -la /repo/tests/sqllogic echo "Files in /package_folder directory" ls -la /package_folder echo "Files in /test_output" @@ -45,13 +45,13 @@ function run_tests() cd /test_output - /clickhouse-tests/sqllogic/runner.py --help 2>&1 \ + /repo/tests/sqllogic/runner.py --help 2>&1 \ | ts '%Y-%m-%d %H:%M:%S' mkdir -p /test_output/self-test - /clickhouse-tests/sqllogic/runner.py --log-file /test_output/runner-self-test.log \ + /repo/tests/sqllogic/runner.py --log-file /test_output/runner-self-test.log \ self-test \ - --self-test-dir /clickhouse-tests/sqllogic/self-test \ + --self-test-dir /repo/tests/sqllogic/self-test \ --out-dir /test_output/self-test \ 2>&1 \ | ts '%Y-%m-%d %H:%M:%S' @@ -63,7 +63,7 @@ function run_tests() if [ -d /sqllogictest ] then mkdir -p /test_output/statements-test - /clickhouse-tests/sqllogic/runner.py \ + /repo/tests/sqllogic/runner.py \ --log-file /test_output/runner-statements-test.log \ --log-level info \ statements-test \ @@ -77,7 +77,7 @@ function run_tests() tar -zcvf statements-check.tar.gz statements-test 1>/dev/null mkdir -p /test_output/complete-test - /clickhouse-tests/sqllogic/runner.py \ + /repo/tests/sqllogic/runner.py \ --log-file /test_output/runner-complete-test.log \ --log-level info \ complete-test \ From ca880ccdee16a212cebccce7090eabf4f528aa68 Mon Sep 17 00:00:00 2001 From: avogar Date: Wed, 21 Aug 2024 20:47:48 +0000 Subject: [PATCH 036/114] Fix structure comparison between 2 JSON columns --- src/Columns/ColumnObject.cpp | 4 ++-- .../03229_json_structure_comparison.reference | 3 +++ .../03229_json_structure_comparison.sql | 22 +++++++++++++++++++ 3 files changed, 27 insertions(+), 2 deletions(-) create mode 100644 tests/queries/0_stateless/03229_json_structure_comparison.reference create mode 100644 tests/queries/0_stateless/03229_json_structure_comparison.sql diff --git a/src/Columns/ColumnObject.cpp b/src/Columns/ColumnObject.cpp index b7194ef50e7..999c0f6088e 100644 --- a/src/Columns/ColumnObject.cpp +++ b/src/Columns/ColumnObject.cpp @@ -127,7 +127,7 @@ std::string ColumnObject::getName() const { WriteBufferFromOwnString ss; ss << "Object("; - ss << "max_dynamic_paths=" << max_dynamic_paths; + ss << "max_dynamic_paths=" << global_max_dynamic_paths; ss << ", max_dynamic_types=" << max_dynamic_types; std::vector sorted_typed_paths; sorted_typed_paths.reserve(typed_paths.size()); @@ -1047,7 +1047,7 @@ bool ColumnObject::structureEquals(const IColumn & rhs) const { /// 2 Object columns have equal structure if they have the same typed paths and max_dynamic_paths/max_dynamic_types. const auto * rhs_object = typeid_cast(&rhs); - if (!rhs_object || typed_paths.size() != rhs_object->typed_paths.size() || max_dynamic_paths != rhs_object->max_dynamic_paths || max_dynamic_types != rhs_object->max_dynamic_types) + if (!rhs_object || typed_paths.size() != rhs_object->typed_paths.size() || global_max_dynamic_paths != rhs_object->global_max_dynamic_paths || max_dynamic_types != rhs_object->max_dynamic_types) return false; for (const auto & [path, column] : typed_paths) diff --git a/tests/queries/0_stateless/03229_json_structure_comparison.reference b/tests/queries/0_stateless/03229_json_structure_comparison.reference new file mode 100644 index 00000000000..c816df4f5c7 --- /dev/null +++ b/tests/queries/0_stateless/03229_json_structure_comparison.reference @@ -0,0 +1,3 @@ +{"foo1":"bar"} {"foo1":"bar"} +{"foo2":"bar"} {"foo2":"bar"} +{"foo2":"bar"} {"foo2":"bar"} diff --git a/tests/queries/0_stateless/03229_json_structure_comparison.sql b/tests/queries/0_stateless/03229_json_structure_comparison.sql new file mode 100644 index 00000000000..16db469325d --- /dev/null +++ b/tests/queries/0_stateless/03229_json_structure_comparison.sql @@ -0,0 +1,22 @@ +SET allow_experimental_json_type=1; + +DROP TABLE IF EXISTS test_new_json_type; + +CREATE TABLE test_new_json_type(id UInt32, data JSON, version UInt64) ENGINE=ReplacingMergeTree(version) ORDER BY id; + +INSERT INTO test_new_json_type format JSONEachRow +{"id":1,"data":{"foo1":"bar"},"version":1} +{"id":2,"data":{"foo2":"bar"},"version":1} +{"id":3,"data":{"foo2":"bar"},"version":1} +; + +SELECT + a.data + , b.data +FROM + test_new_json_type a + JOIN test_new_json_type b + ON a.id = b.id; + +DROP TABLE test_new_json_type; + From bff252ea73f5141b1c85bccb69780f7e27c9a6f7 Mon Sep 17 00:00:00 2001 From: avogar Date: Wed, 21 Aug 2024 21:45:26 +0000 Subject: [PATCH 037/114] Fix test --- .../0_stateless/03228_variant_permutation_issue.reference | 2 +- tests/queries/0_stateless/03228_variant_permutation_issue.sql | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/queries/0_stateless/03228_variant_permutation_issue.reference b/tests/queries/0_stateless/03228_variant_permutation_issue.reference index 7b18a0c59fb..10688253e15 100644 --- a/tests/queries/0_stateless/03228_variant_permutation_issue.reference +++ b/tests/queries/0_stateless/03228_variant_permutation_issue.reference @@ -1,4 +1,4 @@ 2 {"foo2":"bar"} 1 3 {"foo2":"bar"} 1 -3 {"foo2":"bar"} 1 2 {"foo2":"baz"} 2 +3 {"foo2":"bar"} 1 diff --git a/tests/queries/0_stateless/03228_variant_permutation_issue.sql b/tests/queries/0_stateless/03228_variant_permutation_issue.sql index 3f60d42ffbd..088361d6430 100644 --- a/tests/queries/0_stateless/03228_variant_permutation_issue.sql +++ b/tests/queries/0_stateless/03228_variant_permutation_issue.sql @@ -8,11 +8,11 @@ INSERT INTO test_new_json_type format JSONEachRow {"id":3,"data":{"foo2":"bar"},"version":1} ; -SELECT * FROM test_new_json_type FINAL WHERE data.foo2 is not null; +SELECT * FROM test_new_json_type FINAL WHERE data.foo2 is not null ORDER BY id; INSERT INTO test_new_json_type SELECT id, '{"foo2":"baz"}' AS _data, version+1 AS _version FROM test_new_json_type where id=2; -SELECT * FROM test_new_json_type FINAL WHERE data.foo2 is not null; +SELECT * FROM test_new_json_type FINAL WHERE data.foo2 is not null ORDER BY id; DROP TABLE test_new_json_type; From 2f6ad1271cfbd9aa62ad2365e70314aba4da21b9 Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Thu, 22 Aug 2024 00:27:02 +0200 Subject: [PATCH 038/114] fix tests + exception --- src/Storages/VirtualColumnUtils.cpp | 2 +- .../test_storage_azure_blob_storage/test.py | 10 +-- tests/integration/test_storage_hdfs/test.py | 9 +-- .../03203_hive_style_partitioning.reference | 2 - .../03203_hive_style_partitioning.sh | 61 +++---------------- 5 files changed, 15 insertions(+), 69 deletions(-) diff --git a/src/Storages/VirtualColumnUtils.cpp b/src/Storages/VirtualColumnUtils.cpp index ca82a1ce67a..f0d276e4e56 100644 --- a/src/Storages/VirtualColumnUtils.cpp +++ b/src/Storages/VirtualColumnUtils.cpp @@ -162,7 +162,7 @@ VirtualColumnsDescription getVirtualsForFileLikeStorage(ColumnsDescription & sto return; if (storage_columns.size() == 1) - throw Exception(ErrorCodes::INCORRECT_DATA, "Cannot implement partition by all columns in a file"); + throw Exception(ErrorCodes::INCORRECT_DATA, "Cannot use hive partitioning for file {}: it contains only partition columns. Disable use_hive_partitioning setting to read this file", path); auto local_type = storage_columns.get(name).type; storage_columns.remove(name); desc.addEphemeral(name, local_type, ""); diff --git a/tests/integration/test_storage_azure_blob_storage/test.py b/tests/integration/test_storage_azure_blob_storage/test.py index 637dbd38262..a3172329a99 100644 --- a/tests/integration/test_storage_azure_blob_storage/test.py +++ b/tests/integration/test_storage_azure_blob_storage/test.py @@ -1518,14 +1518,14 @@ def test_hive_partitioning_with_one_parameter(cluster): ) query = ( - f"SELECT column1, column2, _file, _path FROM azureBlobStorage(azure_conf2, " + f"SELECT column2, _file, _path, column1 FROM azureBlobStorage(azure_conf2, " f"storage_account_url = '{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}', container='cont', " f"blob_path='{path}', format='CSV', structure='{table_format}')" ) assert azure_query( node, query, settings={"use_hive_partitioning": 1} ).splitlines() == [ - "Elizabeth\tGordon\tsample.csv\t{bucket}/{max_path}".format( + "Gordon\tsample.csv\t{bucket}/{max_path}\tElizabeth".format( bucket="cont", max_path=path ) ] @@ -1560,7 +1560,7 @@ def test_hive_partitioning_with_all_parameters(cluster): f"storage_account_url = '{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}', container='cont', " f"blob_path='{path}', format='CSV', structure='{table_format}');" ) - pattern = r"DB::Exception: Cannot implement partition by all columns in a file" + pattern = r"DB::Exception: Cannot use hive partitioning for file" with pytest.raises(Exception, match=pattern): azure_query(node, query, settings={"use_hive_partitioning": 1}) @@ -1572,7 +1572,7 @@ def test_hive_partitioning_without_setting(cluster): table_format = "column1 String, column2 String" values_1 = f"('Elizabeth', 'Gordon')" values_2 = f"('Emilia', 'Gregor')" - path = "a/column1=Elizabeth/column2=Gordon/sample.csv" + path = "a/column1=Elizabeth/column2=Gordon/column3=Gordon/sample.csv" azure_query( node, @@ -1582,7 +1582,7 @@ def test_hive_partitioning_without_setting(cluster): ) query = ( - f"SELECT column1, column2, _file, _path FROM azureBlobStorage(azure_conf2, " + f"SELECT column1, column2, _file, _path, column3 FROM azureBlobStorage(azure_conf2, " f"storage_account_url = '{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}', container='cont', " f"blob_path='{path}', format='CSV', structure='{table_format}');" ) diff --git a/tests/integration/test_storage_hdfs/test.py b/tests/integration/test_storage_hdfs/test.py index ad2e7084791..ea8c4efa745 100644 --- a/tests/integration/test_storage_hdfs/test.py +++ b/tests/integration/test_storage_hdfs/test.py @@ -1268,11 +1268,6 @@ def test_hive_partitioning_with_one_parameter(started_cluster): ) assert r == f"Elizabeth\n" - r = node1.query( - "SELECT column1 FROM hdfs('hdfs://hdfs1:9000/column0=Elizabeth/parquet_1', 'TSV')", - settings={"use_hive_partitioning": 1}, - ) - assert r == f"Gordon\n" def test_hive_partitioning_with_all_parameters(started_cluster): @@ -1285,11 +1280,11 @@ def test_hive_partitioning_with_all_parameters(started_cluster): == f"Elizabeth\tGordon\n" ) - pattern = r"DB::Exception: Cannot implement partition by all columns in a file" + pattern = r"DB::Exception: Cannot use hive partitioning for file" with pytest.raises(QueryRuntimeException, match=pattern): node1.query( - f"SELECT _column1 FROM hdfs('hdfs://hdfs1:9000/column0=Elizabeth/column1=Gordon/parquet_2', 'TSV');", + f"SELECT column1 FROM hdfs('hdfs://hdfs1:9000/column0=Elizabeth/column1=Gordon/parquet_2', 'TSV');", settings={"use_hive_partitioning": 1}, ) diff --git a/tests/queries/0_stateless/03203_hive_style_partitioning.reference b/tests/queries/0_stateless/03203_hive_style_partitioning.reference index b5eaef7f51e..af52dcd9b88 100644 --- a/tests/queries/0_stateless/03203_hive_style_partitioning.reference +++ b/tests/queries/0_stateless/03203_hive_style_partitioning.reference @@ -35,8 +35,6 @@ Cross Elizabeth Array(Int64) LowCardinality(Float64) 101 2070 -4081 -2070 2070 b 1 diff --git a/tests/queries/0_stateless/03203_hive_style_partitioning.sh b/tests/queries/0_stateless/03203_hive_style_partitioning.sh index 41b215578f0..4e165446c34 100755 --- a/tests/queries/0_stateless/03203_hive_style_partitioning.sh +++ b/tests/queries/0_stateless/03203_hive_style_partitioning.sh @@ -11,22 +11,10 @@ $CLICKHOUSE_LOCAL -q "SELECT 'TESTING THE FILE HIVE PARTITIONING'" $CLICKHOUSE_LOCAL -n -q """ set use_hive_partitioning = 1; -SELECT *, _column0 FROM file('$CURDIR/data_hive/partitioning/column0=Elizabeth/sample.parquet') LIMIT 10; - -SELECT *, _column0, _column1 FROM file('$CURDIR/data_hive/partitioning/column0=Elizabeth/column1=Schmidt/sample.parquet') WHERE column1 = _column1; -SELECT *, _column0 FROM file('$CURDIR/data_hive/partitioning/column0=Elizabeth/column1=Schmidt/sample.parquet') WHERE column1 = _column1; - -SELECT *, _column0, _column1 FROM file('$CURDIR/data_hive/partitioning/column0=Elizabeth/column1=Schmidt/sample.parquet') WHERE column0 = _column0 AND column1 = _column1; -SELECT *, _column0 FROM file('$CURDIR/data_hive/partitioning/column0=Elizabeth/column1=Schmidt/sample.parquet') WHERE column0 = _column0 AND column1 = _column1; - -SELECT *, _column0, _column1 FROM file('$CURDIR/data_hive/partitioning/column0=Elizabeth/column1=Gordon/sample.parquet') WHERE column1 = _column1; -SELECT *, _column0 FROM file('$CURDIR/data_hive/partitioning/column0=Elizabeth/column1=Gordon/sample.parquet') WHERE column1 = _column1; - -SELECT *, _column0, _column1 FROM file('$CURDIR/data_hive/partitioning/column0=Elizabeth/column1=Gordon/sample.parquet') WHERE column0 = _column0 AND column1 = _column1; -SELECT *, _column0 FROM file('$CURDIR/data_hive/partitioning/column0=Elizabeth/column1=Gordon/sample.parquet') WHERE column0 = _column0 AND column1 = _column1; +SELECT *, column0 FROM file('$CURDIR/data_hive/partitioning/column0=Elizabeth/sample.parquet') LIMIT 10; SELECT *, non_existing_column FROM file('$CURDIR/data_hive/partitioning/non_existing_column=Elizabeth/sample.parquet') LIMIT 10; -SELECT *, _column0 FROM file('$CURDIR/data_hive/partitioning/column0=*/sample.parquet') WHERE column0 = _column0; +SELECT *, column0 FROM file('$CURDIR/data_hive/partitioning/column0=*/sample.parquet') WHERE column0 = 'Elizabeth'; SELECT number, date FROM file('$CURDIR/data_hive/partitioning/number=42/date=2020-01-01/sample.parquet') LIMIT 1; SELECT array, float FROM file('$CURDIR/data_hive/partitioning/array=[1,2,3]/float=42.42/sample.parquet') LIMIT 1; @@ -37,7 +25,6 @@ SELECT count(*) FROM file('$CURDIR/data_hive/partitioning/number=42/date=2020-01 $CLICKHOUSE_LOCAL -n -q """ set use_hive_partitioning = 1; -SELECT _identifier FROM file('$CURDIR/data_hive/partitioning/identifier=*/email.csv') LIMIT 2; SELECT identifier FROM file('$CURDIR/data_hive/partitioning/identifier=*/email.csv') LIMIT 2; SELECT a FROM file('$CURDIR/data_hive/partitioning/a=b/a=b/sample.parquet') LIMIT 1; """ @@ -61,21 +48,7 @@ $CLICKHOUSE_LOCAL -q "SELECT 'TESTING THE URL PARTITIONING'" $CLICKHOUSE_LOCAL -n -q """ set use_hive_partitioning = 1; -SELECT *, _column0 FROM url('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/sample.parquet') LIMIT 10; - -SELECT *, _column0 FROM url('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/sample.parquet') WHERE column0 = _column0; - -SELECT *, _column0, _column1 FROM url('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/column1=Schmidt/sample.parquet') WHERE column1 = _column1; -SELECT *, _column0 FROM url('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/column1=Schmidt/sample.parquet') WHERE column1 = _column1; - -SELECT *, _column0, _column1 FROM url('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/column1=Schmidt/sample.parquet') WHERE column0 = _column0 AND column1 = _column1; -SELECT *, _column0 FROM url('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/column1=Schmidt/sample.parquet') WHERE column0 = _column0 AND column1 = _column1; - -SELECT *, _column0, _column1 FROM url('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/column1=Gordon/sample.parquet') WHERE column1 = _column1; -SELECT *, _column0 FROM url('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/column1=Gordon/sample.parquet') WHERE column1 = _column1; - -SELECT *, _column0, _column1 FROM url('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/column1=Gordon/sample.parquet') WHERE column0 = _column0 AND column1 = _column1; -SELECT *, _column0 FROM url('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/column1=Gordon/sample.parquet') WHERE column0 = _column0 AND column1 = _column1; +SELECT *, column0 FROM url('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/sample.parquet') LIMIT 10; SELECT *, non_existing_column FROM url('http://localhost:11111/test/hive_partitioning/non_existing_column=Elizabeth/sample.parquet') LIMIT 10;""" @@ -92,24 +65,10 @@ $CLICKHOUSE_LOCAL -q "SELECT 'TESTING THE S3 PARTITIONING'" $CLICKHOUSE_CLIENT -n -q """ set use_hive_partitioning = 1; -SELECT *, _column0 FROM s3('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/sample.parquet') LIMIT 10; - -SELECT *, _column0 FROM s3('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/sample.parquet') WHERE column0 = _column0; - -SELECT *, _column0, _column1 FROM s3('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/column1=Schmidt/sample.parquet') WHERE column1 = _column1; -SELECT *, _column0 FROM s3('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/column1=Schmidt/sample.parquet') WHERE column1 = _column1; - -SELECT *, _column0, _column1 FROM s3('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/column1=Schmidt/sample.parquet') WHERE column0 = _column0 AND column1 = _column1; -SELECT *, _column0 FROM s3('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/column1=Schmidt/sample.parquet') WHERE column0 = _column0 AND column1 = _column1; - -SELECT *, _column0, _column1 FROM s3('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/column1=Gordon/sample.parquet') WHERE column1 = _column1; -SELECT *, _column0 FROM s3('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/column1=Gordon/sample.parquet') WHERE column1 = _column1; - -SELECT *, _column0, _column1 FROM s3('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/column1=Gordon/sample.parquet') WHERE column0 = _column0 AND column1 = _column1; -SELECT *, _column0 FROM s3('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/column1=Gordon/sample.parquet') WHERE column0 = _column0 AND column1 = _column1; +SELECT *, column0 FROM s3('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/sample.parquet') LIMIT 10; SELECT *, non_existing_column FROM s3('http://localhost:11111/test/hive_partitioning/non_existing_column=Elizabeth/sample.parquet') LIMIT 10; -SELECT *, _column0 FROM s3('http://localhost:11111/test/hive_partitioning/column0=*/sample.parquet') WHERE column0 = _column0; +SELECT *, column0 FROM s3('http://localhost:11111/test/hive_partitioning/column0=*/sample.parquet') WHERE column0 = 'Elizabeth'; """ $CLICKHOUSE_CLIENT -n -q """ @@ -123,13 +82,7 @@ $CLICKHOUSE_LOCAL -q "SELECT 'TESTING THE S3CLUSTER PARTITIONING'" $CLICKHOUSE_CLIENT -n -q """ set use_hive_partitioning = 1; -SELECT *, _column0 FROM s3Cluster(test_cluster_one_shard_three_replicas_localhost, 'http://localhost:11111/test/hive_partitioning/column0=Elizabeth/sample.parquet') LIMIT 10; +SELECT *, column0 FROM s3Cluster(test_cluster_one_shard_three_replicas_localhost, 'http://localhost:11111/test/hive_partitioning/column0=Elizabeth/sample.parquet') LIMIT 10; -SELECT *, _column0 FROM s3Cluster(test_cluster_one_shard_three_replicas_localhost, 'http://localhost:11111/test/hive_partitioning/column0=Elizabeth/sample.parquet') WHERE column0 = _column0; - -SELECT *, _column0, _column1 FROM s3Cluster(test_cluster_one_shard_three_replicas_localhost, 'http://localhost:11111/test/hive_partitioning/column0=Elizabeth/column1=Schmidt/sample.parquet') WHERE column1 = _column1; -SELECT *, _column0 FROM s3Cluster(test_cluster_one_shard_three_replicas_localhost, 'http://localhost:11111/test/hive_partitioning/column0=Elizabeth/column1=Schmidt/sample.parquet') WHERE column1 = _column1; - -SELECT *, _column0, _column1 FROM s3Cluster(test_cluster_one_shard_three_replicas_localhost, 'http://localhost:11111/test/hive_partitioning/column0=Elizabeth/column1=Schmidt/sample.parquet') WHERE column0 = _column0 AND column1 = _column1; -SELECT *, _column0 FROM s3Cluster(test_cluster_one_shard_three_replicas_localhost, 'http://localhost:11111/test/hive_partitioning/column0=Elizabeth/column1=Schmidt/sample.parquet') WHERE column0 = _column0 AND column1 = _column1; +SELECT *, column0 FROM s3Cluster(test_cluster_one_shard_three_replicas_localhost, 'http://localhost:11111/test/hive_partitioning/column0=Elizabeth/sample.parquet') WHERE column0 = 'Elizabeth'; """ From 8a89cd31a1e7770479af6eaf1b4211ef4ece1795 Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Thu, 22 Aug 2024 00:29:32 +0200 Subject: [PATCH 039/114] Fix Upgrade Check: move some settings to 24.9 section --- src/Core/SettingsChangesHistory.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Core/SettingsChangesHistory.cpp b/src/Core/SettingsChangesHistory.cpp index fb59577b0f0..5e831c6301c 100644 --- a/src/Core/SettingsChangesHistory.cpp +++ b/src/Core/SettingsChangesHistory.cpp @@ -72,11 +72,13 @@ static std::initializer_list Date: Thu, 22 Aug 2024 00:48:29 +0200 Subject: [PATCH 040/114] fix black --- tests/integration/test_storage_hdfs/test.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/integration/test_storage_hdfs/test.py b/tests/integration/test_storage_hdfs/test.py index ea8c4efa745..a75c13b9ea6 100644 --- a/tests/integration/test_storage_hdfs/test.py +++ b/tests/integration/test_storage_hdfs/test.py @@ -1269,7 +1269,6 @@ def test_hive_partitioning_with_one_parameter(started_cluster): assert r == f"Elizabeth\n" - def test_hive_partitioning_with_all_parameters(started_cluster): hdfs_api = started_cluster.hdfs_api hdfs_api.write_data( From 0f3c7ae8c202f475fe55f33f45e9bca92155d52c Mon Sep 17 00:00:00 2001 From: Konstantin Bogdanov Date: Thu, 22 Aug 2024 01:15:16 +0200 Subject: [PATCH 041/114] feat: add docs --- utils/check-style/aspell-ignore/en/aspell-dict.txt | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt index ffd9fae7f45..308e285c4bd 100644 --- a/utils/check-style/aspell-ignore/en/aspell-dict.txt +++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt @@ -1,4 +1,4 @@ -personal_ws-1.1 en 2942 +personal_ws-1.1 en 2983 AArch ACLs ALTERs @@ -957,6 +957,7 @@ ThreadPoolRemoteFSReaderThreads ThreadPoolRemoteFSReaderThreadsActive ThreadsActive ThreadsInOvercommitTracker +TimeSeries Timeunit TinyLog Tkachenko @@ -1098,12 +1099,12 @@ addressToLineWithInlines addressToSymbol adviced agg +aggThrow aggregatefunction aggregatingmergetree aggregatio aggretate aggthrow -aggThrow aiochclient allocator alphaTokens @@ -1875,8 +1876,8 @@ joinGet joinGetOrNull json jsonMergePatch -jsonasstring jsonasobject +jsonasstring jsoncolumns jsoncolumnsmonoblock jsoncompact @@ -1917,8 +1918,8 @@ kurtSamp kurtosis kurtpop kurtsamp -laion lagInFrame +laion lang laravel largestTriangleThreeBuckets @@ -2020,7 +2021,6 @@ maxMap maxintersections maxintersectionsposition maxmap -minMappedArrays maxmind mdadm meanZTest @@ -2213,8 +2213,8 @@ parseReadableSizeOrZero parseTimeDelta parseable parsers -partitionId partitionID +partitionId pathFull pclmulqdq pcre @@ -2443,6 +2443,7 @@ rewritable rightPad rightPadUTF rightUTF +ripeMD risc riscv ro @@ -2694,7 +2695,6 @@ themself threadpool throwIf timeDiff -TimeSeries timeSeriesData timeSeriesMetrics timeSeriesTags From 54caf1f84e3c3b5076adf29b49f4ee548f243091 Mon Sep 17 00:00:00 2001 From: Konstantin Bogdanov Date: Thu, 22 Aug 2024 01:20:46 +0200 Subject: [PATCH 042/114] fix: wrap in conditional preprocessor directives --- src/Functions/FunctionsHashing.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/Functions/FunctionsHashing.h b/src/Functions/FunctionsHashing.h index 5111ee2bd90..ec39cf1e2cf 100644 --- a/src/Functions/FunctionsHashing.h +++ b/src/Functions/FunctionsHashing.h @@ -192,6 +192,7 @@ T combineHashesFunc(T t1, T t2) return HashFunction::apply(reinterpret_cast(hashes), sizeof(hashes)); } +#if USE_SSL struct RipeMD160Impl { static constexpr auto name = "ripeMD160"; @@ -218,7 +219,7 @@ struct RipeMD160Impl static constexpr bool use_int_hash_for_pods = false; }; - +#endif struct SipHash64Impl { @@ -1647,6 +1648,7 @@ using FunctionIntHash32 = FunctionIntHash; using FunctionIntHash64 = FunctionIntHash; #if USE_SSL using FunctionHalfMD5 = FunctionAnyHash; +using FunctionRipeMD160Hash = FunctionAnyHash; #endif using FunctionSipHash128 = FunctionAnyHash; using FunctionSipHash128Keyed = FunctionAnyHash; @@ -1676,7 +1678,6 @@ using FunctionXXH3 = FunctionAnyHash; using FunctionWyHash64 = FunctionAnyHash; -using FunctionRipeMD160Hash = FunctionAnyHash; } #pragma clang diagnostic pop From be4439e3ec0a1491f4e333ac848844fd930a6e5b Mon Sep 17 00:00:00 2001 From: Alexey Date: Thu, 22 Aug 2024 10:30:48 +0300 Subject: [PATCH 043/114] Update install.md Added correct commands for russian vwersion of the installation from deb packets --- docs/ru/getting-started/install.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/ru/getting-started/install.md b/docs/ru/getting-started/install.md index aee445da843..4a0ec258c64 100644 --- a/docs/ru/getting-started/install.md +++ b/docs/ru/getting-started/install.md @@ -25,10 +25,10 @@ grep -q sse4_2 /proc/cpuinfo && echo "SSE 4.2 supported" || echo "SSE 4.2 not su Яндекс рекомендует использовать официальные скомпилированные `deb`-пакеты для Debian или Ubuntu. Для установки пакетов выполните: ``` bash -sudo apt-get install -y apt-transport-https ca-certificates dirmngr -sudo apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv 8919F6BD2B48D754 +sudo apt-get install -y apt-transport-https ca-certificates curl gnupg +curl -fsSL 'https://packages.clickhouse.com/rpm/lts/repodata/repomd.xml.key' | sudo gpg --dearmor -o /usr/share/keyrings/clickhouse-keyring.gpg -echo "deb https://packages.clickhouse.com/deb stable main" | sudo tee \ +echo "deb [signed-by=/usr/share/keyrings/clickhouse-keyring.gpg] https://packages.clickhouse.com/deb stable main" | sudo tee \ /etc/apt/sources.list.d/clickhouse.list sudo apt-get update From 6466f374e0372b22a23d1193e534bd6c94f87b94 Mon Sep 17 00:00:00 2001 From: Shaun Struwig <41984034+Blargian@users.noreply.github.com> Date: Thu, 22 Aug 2024 11:29:33 +0200 Subject: [PATCH 044/114] Update geohash.md --- .../en/sql-reference/functions/geo/geohash.md | 31 ++++++++++++++----- 1 file changed, 24 insertions(+), 7 deletions(-) diff --git a/docs/en/sql-reference/functions/geo/geohash.md b/docs/en/sql-reference/functions/geo/geohash.md index b6ac7a74092..c4f41fc53da 100644 --- a/docs/en/sql-reference/functions/geo/geohash.md +++ b/docs/en/sql-reference/functions/geo/geohash.md @@ -6,7 +6,7 @@ title: "Functions for Working with Geohash" ## Geohash -[Geohash](https://en.wikipedia.org/wiki/Geohash) is the geocode system, which subdivides Earth’s surface into buckets of grid shape and encodes each cell into a short string of letters and digits. It is a hierarchical data structure, so the longer is the geohash string, the more precise is the geographic location. +[Geohash](https://en.wikipedia.org/wiki/Geohash) is the geocode system, which subdivides Earth’s surface into buckets of grid shape and encodes each cell into a short string of letters and digits. It is a hierarchical data structure, so the longer the geohash string is, the more precise the geographic location will be. If you need to manually convert geographic coordinates to geohash strings, you can use [geohash.org](http://geohash.org/). @@ -14,26 +14,37 @@ If you need to manually convert geographic coordinates to geohash strings, you c Encodes latitude and longitude as a [geohash](#geohash)-string. +**Syntax** + ``` sql geohashEncode(longitude, latitude, [precision]) ``` **Input values** -- longitude - longitude part of the coordinate you want to encode. Floating in range`[-180°, 180°]` -- latitude - latitude part of the coordinate you want to encode. Floating in range `[-90°, 90°]` -- precision - Optional, length of the resulting encoded string, defaults to `12`. Integer in range `[1, 12]`. Any value less than `1` or greater than `12` is silently converted to `12`. +- `longitude` — Longitude part of the coordinate you want to encode. Floating in range`[-180°, 180°]`. [Float](../../data_types/float.md). +- `latitude` — Latitude part of the coordinate you want to encode. Floating in range `[-90°, 90°]`. [Float](../../data_types/float.md). +- `precision` (optional) — Length of the resulting encoded string. Defaults to `12`. Integer in the range `[1, 12]`. [Int8](../../data-types/int-uint.md). + +:::note +- All coordinate parameters must be of the same type: either `Float32` or `Float64`. +- For the `precision` parameter, any value less than `1` or greater than `12` is silently converted to `12`. +::: **Returned values** -- alphanumeric `String` of encoded coordinate (modified version of the base32-encoding alphabet is used). +- Alphanumeric string of the encoded coordinate (modified version of the base32-encoding alphabet is used). [String](../../data-types/string.md). **Example** +Query: + ``` sql SELECT geohashEncode(-5.60302734375, 42.593994140625, 0) AS res; ``` +Result: + ``` text ┌─res──────────┐ │ ezs42d000000 │ @@ -44,13 +55,19 @@ SELECT geohashEncode(-5.60302734375, 42.593994140625, 0) AS res; Decodes any [geohash](#geohash)-encoded string into longitude and latitude. +**Syntax** + +```sql +geohashDecode(hash_str) +``` + **Input values** -- encoded string - geohash-encoded string. +- `hash_str` — Geohash-encoded string. **Returned values** -- (longitude, latitude) - 2-tuple of `Float64` values of longitude and latitude. +- Tuple `(longitude, latitude)` of `Float64` values of longitude and latitude. [Tuple](../../data-types/tuple.md)([Float64](../../data-types/float.md)) **Example** From 95f45d2eaf39a9e8a6373c75749ec57f727be700 Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Thu, 22 Aug 2024 13:20:04 +0200 Subject: [PATCH 045/114] try to fix tests --- .../test_storage_azure_blob_storage/test.py | 14 +++++------ tests/integration/test_storage_hdfs/test.py | 25 +++---------------- .../03203_hive_style_partitioning.reference | 20 +++++++-------- .../03203_hive_style_partitioning.sh | 14 +++-------- 4 files changed, 23 insertions(+), 50 deletions(-) diff --git a/tests/integration/test_storage_azure_blob_storage/test.py b/tests/integration/test_storage_azure_blob_storage/test.py index a3172329a99..c1f518e45ce 100644 --- a/tests/integration/test_storage_azure_blob_storage/test.py +++ b/tests/integration/test_storage_azure_blob_storage/test.py @@ -1513,14 +1513,14 @@ def test_hive_partitioning_with_one_parameter(cluster): azure_query( node, f"INSERT INTO TABLE FUNCTION azureBlobStorage(azure_conf2, storage_account_url = '{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}'," - f" container='cont', blob_path='{path}', format='CSV', compression='auto', structure='{table_format}') VALUES {values}", + f" container='cont', blob_path='{path}', format='CSVWithNames', compression='auto', structure='{table_format}') VALUES {values}", settings={"azure_truncate_on_insert": 1}, ) query = ( f"SELECT column2, _file, _path, column1 FROM azureBlobStorage(azure_conf2, " f"storage_account_url = '{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}', container='cont', " - f"blob_path='{path}', format='CSV', structure='{table_format}')" + f"blob_path='{path}', format='CSVWithNames', structure='{table_format}')" ) assert azure_query( node, query, settings={"use_hive_partitioning": 1} @@ -1533,7 +1533,7 @@ def test_hive_partitioning_with_one_parameter(cluster): query = ( f"SELECT column2 FROM azureBlobStorage(azure_conf2, " f"storage_account_url = '{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}', container='cont', " - f"blob_path='{path}', format='CSV', structure='{table_format}');" + f"blob_path='{path}', format='CSVWithNames', structure='{table_format}');" ) assert azure_query( node, query, settings={"use_hive_partitioning": 1} @@ -1551,14 +1551,14 @@ def test_hive_partitioning_with_all_parameters(cluster): azure_query( node, f"INSERT INTO TABLE FUNCTION azureBlobStorage(azure_conf2, storage_account_url = '{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}'," - f" container='cont', blob_path='{path}', format='CSV', compression='auto', structure='{table_format}') VALUES {values_1}, {values_2}", + f" container='cont', blob_path='{path}', format='CSVWithNames', compression='auto', structure='{table_format}') VALUES {values_1}, {values_2}", settings={"azure_truncate_on_insert": 1}, ) query = ( f"SELECT column1, column2, _file, _path FROM azureBlobStorage(azure_conf2, " f"storage_account_url = '{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}', container='cont', " - f"blob_path='{path}', format='CSV', structure='{table_format}');" + f"blob_path='{path}', format='CSVWithNames', structure='{table_format}');" ) pattern = r"DB::Exception: Cannot use hive partitioning for file" @@ -1577,14 +1577,14 @@ def test_hive_partitioning_without_setting(cluster): azure_query( node, f"INSERT INTO TABLE FUNCTION azureBlobStorage(azure_conf2, storage_account_url = '{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}'," - f" container='cont', blob_path='{path}', format='CSV', compression='auto', structure='{table_format}') VALUES {values_1}, {values_2}", + f" container='cont', blob_path='{path}', format='CSVWithNames', compression='auto', structure='{table_format}') VALUES {values_1}, {values_2}", settings={"azure_truncate_on_insert": 1}, ) query = ( f"SELECT column1, column2, _file, _path, column3 FROM azureBlobStorage(azure_conf2, " f"storage_account_url = '{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}', container='cont', " - f"blob_path='{path}', format='CSV', structure='{table_format}');" + f"blob_path='{path}', format='CSVWithNames', structure='{table_format}');" ) pattern = re.compile( r"DB::Exception: Unknown expression identifier '.*' in scope.*", re.DOTALL diff --git a/tests/integration/test_storage_hdfs/test.py b/tests/integration/test_storage_hdfs/test.py index a75c13b9ea6..31cc8609eb4 100644 --- a/tests/integration/test_storage_hdfs/test.py +++ b/tests/integration/test_storage_hdfs/test.py @@ -1259,35 +1259,16 @@ def test_respect_object_existence_on_partitioned_write(started_cluster): def test_hive_partitioning_with_one_parameter(started_cluster): hdfs_api = started_cluster.hdfs_api - hdfs_api.write_data(f"/column0=Elizabeth/parquet_1", f"Elizabeth\tGordon\n") - assert hdfs_api.read_data(f"/column0=Elizabeth/parquet_1") == f"Elizabeth\tGordon\n" + hdfs_api.write_data(f"/column0=Elizabeth/file_1", f"column0,column1\nElizabeth,Gordon\n") + assert hdfs_api.read_data(f"/column0=Elizabeth/file_1") == f"column0,column1\nElizabeth,Gordon\n" r = node1.query( - "SELECT column0 FROM hdfs('hdfs://hdfs1:9000/column0=Elizabeth/parquet_1', 'TSV')", + "SELECT column0 FROM hdfs('hdfs://hdfs1:9000/column0=Elizabeth/file_1', 'CSVWithNames')", settings={"use_hive_partitioning": 1}, ) assert r == f"Elizabeth\n" -def test_hive_partitioning_with_all_parameters(started_cluster): - hdfs_api = started_cluster.hdfs_api - hdfs_api.write_data( - f"/column0=Elizabeth/column1=Gordon/parquet_2", f"Elizabeth\tGordon\n" - ) - assert ( - hdfs_api.read_data(f"/column0=Elizabeth/column1=Gordon/parquet_2") - == f"Elizabeth\tGordon\n" - ) - - pattern = r"DB::Exception: Cannot use hive partitioning for file" - - with pytest.raises(QueryRuntimeException, match=pattern): - node1.query( - f"SELECT column1 FROM hdfs('hdfs://hdfs1:9000/column0=Elizabeth/column1=Gordon/parquet_2', 'TSV');", - settings={"use_hive_partitioning": 1}, - ) - - def test_hive_partitioning_without_setting(started_cluster): hdfs_api = started_cluster.hdfs_api hdfs_api.write_data( diff --git a/tests/queries/0_stateless/03203_hive_style_partitioning.reference b/tests/queries/0_stateless/03203_hive_style_partitioning.reference index af52dcd9b88..acdadc2510b 100644 --- a/tests/queries/0_stateless/03203_hive_style_partitioning.reference +++ b/tests/queries/0_stateless/03203_hive_style_partitioning.reference @@ -1,5 +1,5 @@ TESTING THE FILE HIVE PARTITIONING -last Elizabeth + last Elizabeth Frank Elizabeth Moreno Elizabeth Guzman Elizabeth @@ -19,8 +19,7 @@ Stanley Gibson Elizabeth Eugenia Greer Elizabeth Jeffery Delgado Elizabeth Clara Cross Elizabeth -Elizabeth Gordon Elizabeth -last Elizabeth + last Elizabeth Frank Elizabeth Moreno Elizabeth Guzman Elizabeth @@ -35,12 +34,13 @@ Cross Elizabeth Array(Int64) LowCardinality(Float64) 101 2070 +4081 +2070 2070 b 1 -1 TESTING THE URL PARTITIONING -last Elizabeth + last Elizabeth Frank Elizabeth Moreno Elizabeth Guzman Elizabeth @@ -60,10 +60,9 @@ Stanley Gibson Elizabeth Eugenia Greer Elizabeth Jeffery Delgado Elizabeth Clara Cross Elizabeth -Elizabeth Gordon Elizabeth 1 TESTING THE S3 PARTITIONING -last Elizabeth + last Elizabeth Frank Elizabeth Moreno Elizabeth Guzman Elizabeth @@ -83,8 +82,7 @@ Stanley Gibson Elizabeth Eugenia Greer Elizabeth Jeffery Delgado Elizabeth Clara Cross Elizabeth -Elizabeth Gordon Elizabeth -last Elizabeth + last Elizabeth Frank Elizabeth Moreno Elizabeth Guzman Elizabeth @@ -96,7 +94,7 @@ Delgado Elizabeth Cross Elizabeth OK TESTING THE S3CLUSTER PARTITIONING -last Elizabeth + last Elizabeth Frank Elizabeth Moreno Elizabeth Guzman Elizabeth @@ -106,7 +104,7 @@ Gibson Elizabeth Greer Elizabeth Delgado Elizabeth Cross Elizabeth -last Elizabeth + last Elizabeth Frank Elizabeth Moreno Elizabeth Guzman Elizabeth diff --git a/tests/queries/0_stateless/03203_hive_style_partitioning.sh b/tests/queries/0_stateless/03203_hive_style_partitioning.sh index 4e165446c34..b3d196924af 100755 --- a/tests/queries/0_stateless/03203_hive_style_partitioning.sh +++ b/tests/queries/0_stateless/03203_hive_style_partitioning.sh @@ -14,7 +14,7 @@ set use_hive_partitioning = 1; SELECT *, column0 FROM file('$CURDIR/data_hive/partitioning/column0=Elizabeth/sample.parquet') LIMIT 10; SELECT *, non_existing_column FROM file('$CURDIR/data_hive/partitioning/non_existing_column=Elizabeth/sample.parquet') LIMIT 10; -SELECT *, column0 FROM file('$CURDIR/data_hive/partitioning/column0=*/sample.parquet') WHERE column0 = 'Elizabeth'; +SELECT *, column0 FROM file('$CURDIR/data_hive/partitioning/column0=*/sample.parquet') WHERE column0 = 'Elizabeth' LIMIT 10; SELECT number, date FROM file('$CURDIR/data_hive/partitioning/number=42/date=2020-01-01/sample.parquet') LIMIT 1; SELECT array, float FROM file('$CURDIR/data_hive/partitioning/array=[1,2,3]/float=42.42/sample.parquet') LIMIT 1; @@ -29,16 +29,10 @@ SELECT identifier FROM file('$CURDIR/data_hive/partitioning/identifier=*/email.c SELECT a FROM file('$CURDIR/data_hive/partitioning/a=b/a=b/sample.parquet') LIMIT 1; """ -$CLICKHOUSE_LOCAL -n -q """ -set use_hive_partitioning = 1; - -SELECT *, _column0 FROM file('$CURDIR/data_hive/partitioning/column0=Elizabeth/column0=Elizabeth1/sample.parquet') LIMIT 10; -""" 2>&1 | grep -c "INCORRECT_DATA" - $CLICKHOUSE_LOCAL -n -q """ set use_hive_partitioning = 0; -SELECT *, _column0 FROM file('$CURDIR/data_hive/partitioning/column0=Elizabeth/sample.parquet') LIMIT 10; +SELECT *, column0 FROM file('$CURDIR/data_hive/partitioning/column0=Elizabeth/sample.parquet') LIMIT 10; """ 2>&1 | grep -c "UNKNOWN_IDENTIFIER" @@ -68,7 +62,7 @@ set use_hive_partitioning = 1; SELECT *, column0 FROM s3('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/sample.parquet') LIMIT 10; SELECT *, non_existing_column FROM s3('http://localhost:11111/test/hive_partitioning/non_existing_column=Elizabeth/sample.parquet') LIMIT 10; -SELECT *, column0 FROM s3('http://localhost:11111/test/hive_partitioning/column0=*/sample.parquet') WHERE column0 = 'Elizabeth'; +SELECT *, column0 FROM s3('http://localhost:11111/test/hive_partitioning/column0=*/sample.parquet') WHERE column0 = 'Elizabeth' LIMIT 10; """ $CLICKHOUSE_CLIENT -n -q """ @@ -84,5 +78,5 @@ set use_hive_partitioning = 1; SELECT *, column0 FROM s3Cluster(test_cluster_one_shard_three_replicas_localhost, 'http://localhost:11111/test/hive_partitioning/column0=Elizabeth/sample.parquet') LIMIT 10; -SELECT *, column0 FROM s3Cluster(test_cluster_one_shard_three_replicas_localhost, 'http://localhost:11111/test/hive_partitioning/column0=Elizabeth/sample.parquet') WHERE column0 = 'Elizabeth'; +SELECT *, column0 FROM s3Cluster(test_cluster_one_shard_three_replicas_localhost, 'http://localhost:11111/test/hive_partitioning/column0=Elizabeth/sample.parquet') WHERE column0 = 'Elizabeth' LIMIT 10; """ From 62054cae666244fd072a56f70a6df73e68249cb0 Mon Sep 17 00:00:00 2001 From: Shaun Struwig <41984034+Blargian@users.noreply.github.com> Date: Thu, 22 Aug 2024 13:49:16 +0200 Subject: [PATCH 046/114] Update geohash.md --- docs/en/sql-reference/functions/geo/geohash.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/en/sql-reference/functions/geo/geohash.md b/docs/en/sql-reference/functions/geo/geohash.md index c4f41fc53da..ce2e3c43b3e 100644 --- a/docs/en/sql-reference/functions/geo/geohash.md +++ b/docs/en/sql-reference/functions/geo/geohash.md @@ -22,8 +22,8 @@ geohashEncode(longitude, latitude, [precision]) **Input values** -- `longitude` — Longitude part of the coordinate you want to encode. Floating in range`[-180°, 180°]`. [Float](../../data_types/float.md). -- `latitude` — Latitude part of the coordinate you want to encode. Floating in range `[-90°, 90°]`. [Float](../../data_types/float.md). +- `longitude` — Longitude part of the coordinate you want to encode. Floating in range`[-180°, 180°]`. [Float](../../data-types/float.md). +- `latitude` — Latitude part of the coordinate you want to encode. Floating in range `[-90°, 90°]`. [Float](../../data-types/float.md). - `precision` (optional) — Length of the resulting encoded string. Defaults to `12`. Integer in the range `[1, 12]`. [Int8](../../data-types/int-uint.md). :::note From 84467077b886cd48c9cd33c69c1935b3f7863dd7 Mon Sep 17 00:00:00 2001 From: Vitaly Baranov Date: Thu, 22 Aug 2024 13:45:13 +0200 Subject: [PATCH 047/114] Fix test for role expiration in RoleCache. --- tests/integration/test_role/test.py | 81 +++++++++-------------------- 1 file changed, 26 insertions(+), 55 deletions(-) diff --git a/tests/integration/test_role/test.py b/tests/integration/test_role/test.py index b3b18dc8271..9d15f0f81db 100644 --- a/tests/integration/test_role/test.py +++ b/tests/integration/test_role/test.py @@ -418,72 +418,43 @@ def test_function_current_roles(): ) -def test_role_expiration(): - instance.query("CREATE USER ure") +@pytest.mark.parametrize("with_extra_role", [False, True]) +def test_role_expiration(with_extra_role): instance.query("CREATE ROLE rre") - instance.query("GRANT rre TO ure") + instance.query("CREATE USER ure DEFAULT ROLE rre") - instance.query("CREATE TABLE IF NOT EXISTS tre (id Int) Engine=Log") - instance.query("INSERT INTO tre VALUES (0)") + instance.query("CREATE TABLE table1 (id Int) Engine=Log") + instance.query("CREATE TABLE table2 (id Int) Engine=Log") + instance.query("INSERT INTO table1 VALUES (1)") + instance.query("INSERT INTO table2 VALUES (2)") + instance.query("GRANT SELECT ON table1 TO rre") + + assert instance.query("SELECT * FROM table1", user="ure") == "1\n" assert "Not enough privileges" in instance.query_and_get_error( - "SELECT * FROM tre", user="ure" + "SELECT * FROM table2", user="ure" ) - instance.query("GRANT SELECT ON tre TO rre") - - assert instance.query("SELECT * FROM tre", user="ure") == "0\n" - # access_control_improvements/role_cache_expiration_time_seconds value is 2 for the test # so we wait >2 seconds until the role is expired time.sleep(5) - instance.query("CREATE TABLE IF NOT EXISTS tre1 (id Int) Engine=Log") - instance.query("INSERT INTO tre1 VALUES (0)") - instance.query("GRANT SELECT ON tre1 TO rre") + if with_extra_role: + # Expiration of role "rre" from the role cache can be caused by another role being used. + instance.query("CREATE ROLE extra_role") + instance.query("CREATE USER extra_user DEFAULT ROLE extra_role") + instance.query("GRANT SELECT ON table1 TO extra_role") + assert instance.query("SELECT * FROM table1", user="extra_user") == "1\n" - assert instance.query("SELECT * from tre1", user="ure") == "0\n" + instance.query("GRANT SELECT ON table2 TO rre") + assert instance.query("SELECT * FROM table1", user="ure") == "1\n" + assert instance.query("SELECT * FROM table2", user="ure") == "2\n" - instance.query("DROP USER ure") instance.query("DROP ROLE rre") - instance.query("DROP TABLE tre") - instance.query("DROP TABLE tre1") - - -def test_two_roles_expiration(): - instance.query("CREATE USER ure") - instance.query("CREATE ROLE rre") - instance.query("GRANT rre TO ure") - - instance.query("CREATE ROLE rre_second") - - instance.query("CREATE TABLE IF NOT EXISTS tre (id Int) Engine=Log") - instance.query("INSERT INTO tre VALUES (0)") - - assert "Not enough privileges" in instance.query_and_get_error( - "SELECT * FROM tre", user="ure" - ) - - instance.query("GRANT SELECT ON tre TO rre") - - assert instance.query("SELECT * FROM tre", user="ure") == "0\n" - - # access_control_improvements/role_cache_expiration_time_seconds value is 2 for the test - # so we wait >2 seconds until the roles are expired - time.sleep(5) - - instance.query( - "GRANT SELECT ON tre1 TO rre_second" - ) # we expect that both rre and rre_second are gone from cache upon this operation - - instance.query("CREATE TABLE IF NOT EXISTS tre1 (id Int) Engine=Log") - instance.query("INSERT INTO tre1 VALUES (0)") - instance.query("GRANT SELECT ON tre1 TO rre") - - assert instance.query("SELECT * from tre1", user="ure") == "0\n" - instance.query("DROP USER ure") - instance.query("DROP ROLE rre") - instance.query("DROP ROLE rre_second") - instance.query("DROP TABLE tre") - instance.query("DROP TABLE tre1") + instance.query("DROP TABLE table1") + instance.query("DROP TABLE table2") + + if with_extra_role: + instance.query("DROP ROLE extra_role") + instance.query("DROP USER extra_user") From 664e9b3db9d47e45c642ad21e3a5273ab423199a Mon Sep 17 00:00:00 2001 From: Vitaly Baranov Date: Sun, 4 Aug 2024 13:30:41 +0200 Subject: [PATCH 048/114] Add one more test. --- tests/integration/test_role/test.py | 173 ++++++++++++++++++++++++++++ 1 file changed, 173 insertions(+) diff --git a/tests/integration/test_role/test.py b/tests/integration/test_role/test.py index 9d15f0f81db..225cab975ff 100644 --- a/tests/integration/test_role/test.py +++ b/tests/integration/test_role/test.py @@ -1,5 +1,6 @@ import time import pytest +import random from helpers.client import QueryRuntimeException from helpers.cluster import ClickHouseCluster from helpers.test_tools import TSV @@ -458,3 +459,175 @@ def test_role_expiration(with_extra_role): if with_extra_role: instance.query("DROP ROLE extra_role") instance.query("DROP USER extra_user") + + +def test_roles_cache(): + # This test takes 20 seconds. + test_time = 20 + + # Three users A, B, C. + users = ["A", "B", "C"] + instance.query("CREATE USER " + ", ".join(users)) + + # Table "tbl" has 10 columns. Each of the users has access to a different set of columns. + num_columns = 10 + columns = [f"x{i}" for i in range(1, num_columns + 1)] + columns_with_types = [column + " Int64" for column in columns] + columns_with_types_comma_separated = ", ".join(columns_with_types) + values = list(range(1, num_columns + 1)) + values_comma_separated = ", ".join([str(value) for value in values]) + instance.query( + f"CREATE TABLE tbl ({columns_with_types_comma_separated}) ENGINE=MergeTree ORDER BY tuple()" + ) + instance.query(f"INSERT INTO tbl VALUES ({values_comma_separated})") + columns_to_values = dict([(f"x{i}", i) for i in range(1, num_columns + 1)]) + + # In this test we create and modify roles multiple times along with updating the following variables. + # Then we check that each of the users has access to the expected set of columns. + roles = [] + users_to_roles = dict([(user, []) for user in users]) + roles_to_columns = {} + + # Checks that each of the users can access the expected set of columns and can't access other columns. + def check(): + for user in random.sample(users, len(users)): + expected_roles = users_to_roles[user] + expected_columns = list( + set(sum([roles_to_columns[role] for role in expected_roles], [])) + ) + expected_result = sorted( + [columns_to_values[column] for column in expected_columns] + ) + query = " UNION ALL ".join( + [ + f"SELECT * FROM viewIfPermitted(SELECT {column} AS c FROM tbl ELSE null('c Int64'))" + for column in columns + ] + ) + result = instance.query(query, user=user).splitlines() + result = sorted([int(value) for value in result]) + ok = result == expected_result + if not ok: + print(f"Show grants for {user}:") + print( + instance.query( + "SHOW GRANTS FOR " + ", ".join([user] + expected_roles) + ) + ) + print(f"Expected result: {expected_result}") + print(f"Got unexpected result: {result}") + assert ok + + # Grants one of our roles a permission to access one of the columns. + def grant_column(): + columns_used_in_roles = sum(roles_to_columns.values(), []) + columns_to_choose = [ + column for column in columns if column not in columns_used_in_roles + ] + if not columns_to_choose or not roles: + return False + column = random.choice(columns_to_choose) + role = random.choice(roles) + instance.query(f"GRANT SELECT({column}) ON tbl TO {role}") + roles_to_columns[role].append(column) + return True + + # Revokes a permission to access one of the granted column from all our roles. + def revoke_column(): + columns_used_in_roles = sum(roles_to_columns.values(), []) + columns_to_choose = list(set(columns_used_in_roles)) + if not columns_to_choose or not roles: + return False + column = random.choice(columns_to_choose) + roles_str = ", ".join(roles) + instance.query(f"REVOKE SELECT({column}) ON tbl FROM {roles_str}") + for role in roles_to_columns: + if column in roles_to_columns[role]: + roles_to_columns[role].remove(column) + return True + + # Creates a role and grants it to one of the users. + def create_role(): + for role in ["R1", "R2", "R3"]: + if role not in roles: + instance.query(f"CREATE ROLE {role}") + roles.append(role) + if role not in roles_to_columns: + roles_to_columns[role] = [] + if "R1" not in users_to_roles["A"]: + instance.query("GRANT R1 TO A") + users_to_roles["A"].append("R1") + elif "R2" not in users_to_roles["B"]: + instance.query("GRANT R2 TO B") + users_to_roles["B"].append("R2") + elif "R3" not in users_to_roles["B"]: + instance.query("GRANT R3 TO R2") + users_to_roles["B"].append("R3") + elif "R3" not in users_to_roles["C"]: + instance.query("GRANT R3 TO C") + users_to_roles["C"].append("R3") + else: + return False + return True + + # Drops one of our roles. + def drop_role(): + if not roles: + return False + role = random.choice(roles) + instance.query(f"DROP ROLE {role}") + roles.remove(role) + for u in users_to_roles: + if role in users_to_roles[u]: + users_to_roles[u].remove(role) + del roles_to_columns[role] + if (role == "R2") and ("R3" in users_to_roles["B"]): + users_to_roles["B"].remove("R3") + return True + + # Modifies some grants or roles randomly. + def modify(): + while True: + rnd = random.random() + if rnd < 0.4: + if grant_column(): + break + elif rnd < 0.5: + if revoke_column(): + break + elif rnd < 0.9: + if create_role(): + break + else: + if drop_role(): + break + + def maybe_modify(): + if random.random() < 0.9: + modify() + modify() + + # Sleeping is necessary in this test because the role cache in ClickHouse has expiration timeout. + def maybe_sleep(): + if random.random() < 0.1: + # "role_cache_expiration_time_seconds" is set to 2 seconds in the test configuration. + # We need a sleep longer than that in this test sometimes. + seconds = random.random() * 5 + print(f"Sleeping {seconds} seconds") + time.sleep(seconds) + + # Main part of the test. + start_time = time.time() + end_time = start_time + test_time + + while time.time() < end_time: + check() + maybe_sleep() + maybe_modify() + maybe_sleep() + + check() + + instance.query("DROP USER " + ", ".join(users)) + instance.query("DROP ROLE " + ", ".join(roles)) + instance.query("DROP TABLE tbl") From 7ef5c366e873c4fd99f257eefbb3a350848e308c Mon Sep 17 00:00:00 2001 From: Vitaly Baranov Date: Sun, 4 Aug 2024 13:33:50 +0200 Subject: [PATCH 049/114] Fix expiration in RoleCache. --- src/Access/RoleCache.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Access/RoleCache.cpp b/src/Access/RoleCache.cpp index 2d94df2eea5..cc1f1520b67 100644 --- a/src/Access/RoleCache.cpp +++ b/src/Access/RoleCache.cpp @@ -120,7 +120,7 @@ void RoleCache::collectEnabledRoles(EnabledRoles & enabled_roles, SubscriptionsO SubscriptionsOnRoles new_subscriptions_on_roles; new_subscriptions_on_roles.reserve(subscriptions_on_roles.size()); - auto get_role_function = [this, &subscriptions_on_roles](const UUID & id) TSA_NO_THREAD_SAFETY_ANALYSIS { return getRole(id, subscriptions_on_roles); }; + auto get_role_function = [this, &new_subscriptions_on_roles](const UUID & id) TSA_NO_THREAD_SAFETY_ANALYSIS { return getRole(id, new_subscriptions_on_roles); }; for (const auto & current_role : enabled_roles.params.current_roles) collectRoles(*new_info, skip_ids, get_role_function, current_role, true, false); From 54dd3afd49df9c92cd3621a5cec4c7464c341a71 Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 22 Aug 2024 14:52:17 +0200 Subject: [PATCH 050/114] Turn off fault injection for insert in 01396_inactive_replica_cleanup_nodes_zookeeper --- .../01396_inactive_replica_cleanup_nodes_zookeeper.sh | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tests/queries/0_stateless/01396_inactive_replica_cleanup_nodes_zookeeper.sh b/tests/queries/0_stateless/01396_inactive_replica_cleanup_nodes_zookeeper.sh index bff85b3e29f..9ea15071856 100755 --- a/tests/queries/0_stateless/01396_inactive_replica_cleanup_nodes_zookeeper.sh +++ b/tests/queries/0_stateless/01396_inactive_replica_cleanup_nodes_zookeeper.sh @@ -23,11 +23,10 @@ $CLICKHOUSE_CLIENT -n --query " DETACH TABLE r2; " -$CLICKHOUSE_CLIENT --max_block_size 1 --min_insert_block_size_rows 1 --min_insert_block_size_bytes 1 --max_insert_threads 16 --query "INSERT INTO r1 SELECT * FROM numbers_mt(${SCALE})" +# insert_keeper_fault_injection_probability=0 -- can slowdown insert a lot (produce a lot of parts) +$CLICKHOUSE_CLIENT --insert_keeper_fault_injection_probability=0 --max_block_size 1 --min_insert_block_size_rows 1 --min_insert_block_size_bytes 1 --max_insert_threads 16 --query "INSERT INTO r1 SELECT * FROM numbers_mt(${SCALE})" -# Now wait for cleanup thread - for _ in {1..60}; do $CLICKHOUSE_CLIENT --query "SYSTEM FLUSH LOGS" [[ $($CLICKHOUSE_CLIENT --query "SELECT sum(toUInt32(extract(message, 'Removed (\d+) old log entries'))) FROM system.text_log WHERE event_date >= yesterday() AND logger_name LIKE '%' || '$CLICKHOUSE_DATABASE' || '%r1%(ReplicatedMergeTreeCleanupThread)%' AND message LIKE '%Removed % old log entries%'") -gt $((SCALE - 10)) ]] && break; From 7a740819b9551a291827b9d37b8b724612587a20 Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 22 Aug 2024 14:53:15 +0200 Subject: [PATCH 051/114] Accidentally deleted comment --- .../01396_inactive_replica_cleanup_nodes_zookeeper.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/queries/0_stateless/01396_inactive_replica_cleanup_nodes_zookeeper.sh b/tests/queries/0_stateless/01396_inactive_replica_cleanup_nodes_zookeeper.sh index 9ea15071856..80e9253af2c 100755 --- a/tests/queries/0_stateless/01396_inactive_replica_cleanup_nodes_zookeeper.sh +++ b/tests/queries/0_stateless/01396_inactive_replica_cleanup_nodes_zookeeper.sh @@ -27,6 +27,7 @@ $CLICKHOUSE_CLIENT -n --query " $CLICKHOUSE_CLIENT --insert_keeper_fault_injection_probability=0 --max_block_size 1 --min_insert_block_size_rows 1 --min_insert_block_size_bytes 1 --max_insert_threads 16 --query "INSERT INTO r1 SELECT * FROM numbers_mt(${SCALE})" +# Now wait for cleanup thread for _ in {1..60}; do $CLICKHOUSE_CLIENT --query "SYSTEM FLUSH LOGS" [[ $($CLICKHOUSE_CLIENT --query "SELECT sum(toUInt32(extract(message, 'Removed (\d+) old log entries'))) FROM system.text_log WHERE event_date >= yesterday() AND logger_name LIKE '%' || '$CLICKHOUSE_DATABASE' || '%r1%(ReplicatedMergeTreeCleanupThread)%' AND message LIKE '%Removed % old log entries%'") -gt $((SCALE - 10)) ]] && break; From b3f084459f60b1e31c32736573af0810dee99230 Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Thu, 22 Aug 2024 14:53:53 +0200 Subject: [PATCH 052/114] fix black --- tests/integration/test_storage_hdfs/test.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/tests/integration/test_storage_hdfs/test.py b/tests/integration/test_storage_hdfs/test.py index 31cc8609eb4..b18940b7290 100644 --- a/tests/integration/test_storage_hdfs/test.py +++ b/tests/integration/test_storage_hdfs/test.py @@ -1259,8 +1259,13 @@ def test_respect_object_existence_on_partitioned_write(started_cluster): def test_hive_partitioning_with_one_parameter(started_cluster): hdfs_api = started_cluster.hdfs_api - hdfs_api.write_data(f"/column0=Elizabeth/file_1", f"column0,column1\nElizabeth,Gordon\n") - assert hdfs_api.read_data(f"/column0=Elizabeth/file_1") == f"column0,column1\nElizabeth,Gordon\n" + hdfs_api.write_data( + f"/column0=Elizabeth/file_1", f"column0,column1\nElizabeth,Gordon\n" + ) + assert ( + hdfs_api.read_data(f"/column0=Elizabeth/file_1") + == f"column0,column1\nElizabeth,Gordon\n" + ) r = node1.query( "SELECT column0 FROM hdfs('hdfs://hdfs1:9000/column0=Elizabeth/file_1', 'CSVWithNames')", @@ -1269,6 +1274,7 @@ def test_hive_partitioning_with_one_parameter(started_cluster): assert r == f"Elizabeth\n" + def test_hive_partitioning_without_setting(started_cluster): hdfs_api = started_cluster.hdfs_api hdfs_api.write_data( From 8d14d8523098a42cd778ef50a9b066508da8919c Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Thu, 22 Aug 2024 15:24:33 +0200 Subject: [PATCH 053/114] fix black --- tests/integration/test_storage_hdfs/test.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/integration/test_storage_hdfs/test.py b/tests/integration/test_storage_hdfs/test.py index b18940b7290..7a92e8adb0d 100644 --- a/tests/integration/test_storage_hdfs/test.py +++ b/tests/integration/test_storage_hdfs/test.py @@ -1274,7 +1274,6 @@ def test_hive_partitioning_with_one_parameter(started_cluster): assert r == f"Elizabeth\n" - def test_hive_partitioning_without_setting(started_cluster): hdfs_api = started_cluster.hdfs_api hdfs_api.write_data( From add4718634317304f652579a9f201c3b81c96a7d Mon Sep 17 00:00:00 2001 From: Tanya Bragin Date: Thu, 22 Aug 2024 06:37:27 -0700 Subject: [PATCH 054/114] Update README.md - Meetups update Fixed one meetup location; Added more meetups --- README.md | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 5e66b9da73e..c9474ef0fc0 100644 --- a/README.md +++ b/README.md @@ -45,9 +45,17 @@ The following upcoming meetups are featuring creator of ClickHouse & CTO, Alexey * [ClickHouse Guangzhou User Group Meetup](https://mp.weixin.qq.com/s/GSvo-7xUoVzCsuUvlLTpCw) - August 25 * [San Francisco Meetup (Cloudflare)](https://www.meetup.com/clickhouse-silicon-valley-meetup-group/events/302540575) - September 5 * [Raleigh Meetup (Deutsche Bank)](https://www.meetup.com/clickhouse-nc-meetup-group/events/302557230) - September 9 -* [New York Meetup (Ramp)](https://www.meetup.com/clickhouse-new-york-user-group/events/302575342) - September 10 +* [New York Meetup (Rokt)](https://www.meetup.com/clickhouse-new-york-user-group/events/302575342) - September 10 * [Chicago Meetup (Jump Capital)](https://lu.ma/43tvmrfw) - September 12 +Other upcoming meetups +* [Seattle Meetup (Statsig)](https://www.meetup.com/clickhouse-seattle-user-group/events/302518075/) - August 27 +* [Melbourne Meetup](https://www.meetup.com/clickhouse-australia-user-group/events/302732666/) - August 27 +* [Sydney Meetup](https://www.meetup.com/clickhouse-australia-user-group/events/302862966/) - September 5 +* [Zurich Meetup](https://www.meetup.com/clickhouse-switzerland-meetup-group/events/302267429/) - September 5 +* [Toronto Meetup (Shopify)](https://www.meetup.com/clickhouse-toronto-user-group/events/301490855/) - September 10 +* [London Meetup](https://www.meetup.com/clickhouse-london-user-group/events/302977267) - September 17 + ## Recent Recordings * **Recent Meetup Videos**: [Meetup Playlist](https://www.youtube.com/playlist?list=PL0Z2YDlm0b3iNDUzpY1S3L_iV4nARda_U) Whenever possible recordings of the ClickHouse Community Meetups are edited and presented as individual talks. Current featuring "Modern SQL in 2023", "Fast, Concurrent, and Consistent Asynchronous INSERTS in ClickHouse", and "Full-Text Indices: Design and Experiments" * **Recording available**: [**v24.4 Release Call**](https://www.youtube.com/watch?v=dtUqgcfOGmE) All the features of 24.4, one convenient video! Watch it now! From 91e65feaaedd4806875aed3d4be4f07edeefdb71 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Thu, 22 Aug 2024 13:42:30 +0000 Subject: [PATCH 055/114] fix virtual columns in Merge engine --- src/Storages/StorageDistributed.cpp | 2 +- src/Storages/StorageMerge.cpp | 14 +++++++------- .../02890_describe_table_options.reference | 8 ++++++++ 3 files changed, 16 insertions(+), 8 deletions(-) diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp index c4668159759..0b80858800b 100644 --- a/src/Storages/StorageDistributed.cpp +++ b/src/Storages/StorageDistributed.cpp @@ -290,7 +290,7 @@ VirtualColumnsDescription StorageDistributed::createVirtuals() desc.addEphemeral("_shard_num", std::make_shared(), "Deprecated. Use function shardNum instead"); - /// Add virtual columns from table of storage Merges. + /// Add virtual columns from table with Merge engine. desc.addEphemeral("_database", std::make_shared(std::make_shared()), "The name of database which the row comes from"); desc.addEphemeral("_table", std::make_shared(std::make_shared()), "The name of table which the row comes from"); diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp index e88844e2d31..0827321e296 100644 --- a/src/Storages/StorageMerge.cpp +++ b/src/Storages/StorageMerge.cpp @@ -642,10 +642,6 @@ std::vector ReadFromMerge::createChildrenPlans(SelectQ column_names_as_aliases.push_back(ExpressionActions::getSmallestColumn(storage_metadata_snapshot->getColumns().getAllPhysical()).name); } } - else - { - - } auto child = createPlanForTable( nested_storage_snaphsot, @@ -657,6 +653,7 @@ std::vector ReadFromMerge::createChildrenPlans(SelectQ row_policy_data_opt, modified_context, current_streams); + child.plan.addInterpreterContext(modified_context); if (child.plan.isInitialized()) @@ -914,12 +911,14 @@ SelectQueryInfo ReadFromMerge::getModifiedQueryInfo(const ContextMutablePtr & mo modified_query_info.table_expression = replacement_table_expression; modified_query_info.planner_context->getOrCreateTableExpressionData(replacement_table_expression); - auto get_column_options = GetColumnsOptions(GetColumnsOptions::All).withExtendedObjects().withVirtuals(); - if (storage_snapshot_->storage.supportsSubcolumns()) - get_column_options.withSubcolumns(); + auto get_column_options = GetColumnsOptions(GetColumnsOptions::All) + .withExtendedObjects() + .withSubcolumns(storage_snapshot_->storage.supportsSubcolumns()); std::unordered_map column_name_to_node; + /// Consider only non-virtual columns of storage while checking for _table and _database columns. + /// I.e. always override virtual columns with these names from underlying table (if any). if (!storage_snapshot_->tryGetColumn(get_column_options, "_table")) { auto table_name_node = std::make_shared(current_storage_id.table_name); @@ -946,6 +945,7 @@ SelectQueryInfo ReadFromMerge::getModifiedQueryInfo(const ContextMutablePtr & mo column_name_to_node.emplace("_database", function_node); } + get_column_options.withVirtuals(); auto storage_columns = storage_snapshot_->metadata->getColumns(); bool with_aliases = /* common_processed_stage == QueryProcessingStage::FetchColumns && */ !storage_columns.getAliases().empty(); diff --git a/tests/queries/0_stateless/02890_describe_table_options.reference b/tests/queries/0_stateless/02890_describe_table_options.reference index 9181cb27cb0..b77ef4a0fdf 100644 --- a/tests/queries/0_stateless/02890_describe_table_options.reference +++ b/tests/queries/0_stateless/02890_describe_table_options.reference @@ -54,6 +54,8 @@ _row_exists UInt8 Persisted mask created by lightweight delete that show wheth _block_number UInt64 Persisted original number of block that was assigned at insert Delta, LZ4 1 _block_offset UInt64 Persisted original number of row in block that was assigned at insert Delta, LZ4 1 _shard_num UInt32 Deprecated. Use function shardNum instead 1 +_database LowCardinality(String) The name of database which the row comes from 1 +_table LowCardinality(String) The name of table which the row comes from 1 SET describe_compact_output = 0, describe_include_virtual_columns = 1, describe_include_subcolumns = 1; DESCRIBE TABLE t_describe_options; id UInt64 index column 0 0 @@ -87,6 +89,8 @@ _row_exists UInt8 Persisted mask created by lightweight delete that show wheth _block_number UInt64 Persisted original number of block that was assigned at insert Delta, LZ4 0 1 _block_offset UInt64 Persisted original number of row in block that was assigned at insert Delta, LZ4 0 1 _shard_num UInt32 Deprecated. Use function shardNum instead 0 1 +_database LowCardinality(String) The name of database which the row comes from 0 1 +_table LowCardinality(String) The name of table which the row comes from 0 1 arr.size0 UInt64 1 0 t.a String ZSTD(1) 1 0 t.b UInt64 ZSTD(1) 1 0 @@ -144,6 +148,8 @@ _row_exists UInt8 1 _block_number UInt64 1 _block_offset UInt64 1 _shard_num UInt32 1 +_database LowCardinality(String) 1 +_table LowCardinality(String) 1 SET describe_compact_output = 1, describe_include_virtual_columns = 1, describe_include_subcolumns = 1; DESCRIBE TABLE t_describe_options; id UInt64 0 0 @@ -177,6 +183,8 @@ _row_exists UInt8 0 1 _block_number UInt64 0 1 _block_offset UInt64 0 1 _shard_num UInt32 0 1 +_database LowCardinality(String) 0 1 +_table LowCardinality(String) 0 1 arr.size0 UInt64 1 0 t.a String 1 0 t.b UInt64 1 0 From ce33943b430a9ad512f4942083889dea4decb778 Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Thu, 22 Aug 2024 15:50:59 +0200 Subject: [PATCH 056/114] Fix flaky check --- tests/docker_scripts/stateless_runner.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/docker_scripts/stateless_runner.sh b/tests/docker_scripts/stateless_runner.sh index 40a63f74a6b..d8921a04458 100755 --- a/tests/docker_scripts/stateless_runner.sh +++ b/tests/docker_scripts/stateless_runner.sh @@ -339,7 +339,7 @@ export -f run_tests if [ "$NUM_TRIES" -gt "1" ]; then # We don't run tests with Ordinary database in PRs, only in master. # So run new/changed tests with Ordinary at least once in flaky check. - NUM_TRIES=1; USE_DATABASE_ORDINARY=1; run_tests \ + NUM_TRIES=1 USE_DATABASE_ORDINARY=1 run_tests \ | sed 's/All tests have finished/Redacted: a message about tests finish is deleted/' | sed 's/No tests were run/Redacted: a message about no tests run is deleted/' ||: fi From a9e793532ae308767da3bc4da74d9631cd85eb90 Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Thu, 22 Aug 2024 16:34:14 +0200 Subject: [PATCH 057/114] fix shutdown for PeriodicLog --- src/Interpreters/PeriodicLog.cpp | 3 ++- src/Interpreters/PeriodicLog.h | 7 ++++--- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/src/Interpreters/PeriodicLog.cpp b/src/Interpreters/PeriodicLog.cpp index 9d2891e11eb..15970ca5b81 100644 --- a/src/Interpreters/PeriodicLog.cpp +++ b/src/Interpreters/PeriodicLog.cpp @@ -1,6 +1,7 @@ #include #include #include +#include "Functions/DateTimeTransforms.h" namespace DB { @@ -27,7 +28,7 @@ template void PeriodicLog::shutdown() { stopCollect(); - this->stopFlushThread(); + Base::shutdown(); } template diff --git a/src/Interpreters/PeriodicLog.h b/src/Interpreters/PeriodicLog.h index 08c3f7eb23f..ceac8088d40 100644 --- a/src/Interpreters/PeriodicLog.h +++ b/src/Interpreters/PeriodicLog.h @@ -17,6 +17,7 @@ template class PeriodicLog : public SystemLog { using SystemLog::SystemLog; + using Base = SystemLog; public: using TimePoint = std::chrono::system_clock::time_point; @@ -24,12 +25,12 @@ public: /// Launches a background thread to collect metrics with interval void startCollect(size_t collect_interval_milliseconds_); - /// Stop background thread - void stopCollect(); - void shutdown() final; protected: + /// Stop background thread + void stopCollect(); + virtual void stepFunction(TimePoint current_time) = 0; private: From 5340ac5fbc7fba75d6a743d345c0f79dc466df0b Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Thu, 22 Aug 2024 14:39:19 +0000 Subject: [PATCH 058/114] Update version_date.tsv and changelogs after v24.5.5.41-stable --- docs/changelogs/v24.5.5.41-stable.md | 71 ++++++++++++++++++++++++++++ utils/list-versions/version_date.tsv | 2 + 2 files changed, 73 insertions(+) create mode 100644 docs/changelogs/v24.5.5.41-stable.md diff --git a/docs/changelogs/v24.5.5.41-stable.md b/docs/changelogs/v24.5.5.41-stable.md new file mode 100644 index 00000000000..8ba160e31d7 --- /dev/null +++ b/docs/changelogs/v24.5.5.41-stable.md @@ -0,0 +1,71 @@ +--- +sidebar_position: 1 +sidebar_label: 2024 +--- + +# 2024 Changelog + +### ClickHouse release v24.5.5.41-stable (441d4a6ebe3) FIXME as compared to v24.5.4.49-stable (63b760955a0) + +#### Improvement +* Backported in [#66768](https://github.com/ClickHouse/ClickHouse/issues/66768): Make allow_experimental_analyzer be controlled by the initiator for distributed queries. This ensures compatibility and correctness during operations in mixed version clusters. [#65777](https://github.com/ClickHouse/ClickHouse/pull/65777) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). + +#### Bug Fix (user-visible misbehavior in an official stable release) +* Backported in [#65350](https://github.com/ClickHouse/ClickHouse/issues/65350): Fix possible abort on uncaught exception in ~WriteBufferFromFileDescriptor in StatusFile. [#64206](https://github.com/ClickHouse/ClickHouse/pull/64206) ([Kruglov Pavel](https://github.com/Avogar)). +* Backported in [#65621](https://github.com/ClickHouse/ClickHouse/issues/65621): Fix `Cannot find column` in distributed query with `ARRAY JOIN` by `Nested` column. Fixes [#64755](https://github.com/ClickHouse/ClickHouse/issues/64755). [#64801](https://github.com/ClickHouse/ClickHouse/pull/64801) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#67902](https://github.com/ClickHouse/ClickHouse/issues/67902): Fixing the `Not-ready Set` error after the `PREWHERE` optimization for StorageMerge. [#65057](https://github.com/ClickHouse/ClickHouse/pull/65057) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#66884](https://github.com/ClickHouse/ClickHouse/issues/66884): Fix unexpeced size of low cardinality column in function calls. [#65298](https://github.com/ClickHouse/ClickHouse/pull/65298) ([Raúl Marín](https://github.com/Algunenano)). +* Backported in [#65933](https://github.com/ClickHouse/ClickHouse/issues/65933): For queries that read from `PostgreSQL`, cancel the internal `PostgreSQL` query if the ClickHouse query is finished. Otherwise, `ClickHouse` query cannot be canceled until the internal `PostgreSQL` query is finished. [#65771](https://github.com/ClickHouse/ClickHouse/pull/65771) ([Maksim Kita](https://github.com/kitaisreal)). +* Backported in [#66301](https://github.com/ClickHouse/ClickHouse/issues/66301): Better handling of join conditions involving `IS NULL` checks (for example `ON (a = b AND (a IS NOT NULL) AND (b IS NOT NULL) ) OR ( (a IS NULL) AND (b IS NULL) )` is rewritten to `ON a <=> b`), fix incorrect optimization when condition other then `IS NULL` are present. [#65835](https://github.com/ClickHouse/ClickHouse/pull/65835) ([vdimir](https://github.com/vdimir)). +* Backported in [#66328](https://github.com/ClickHouse/ClickHouse/issues/66328): Add missing settings `input_format_csv_skip_first_lines/input_format_tsv_skip_first_lines/input_format_csv_try_infer_numbers_from_strings/input_format_csv_try_infer_strings_from_quoted_tuples` in schema inference cache because they can change the resulting schema. It prevents from incorrect result of schema inference with these settings changed. [#65980](https://github.com/ClickHouse/ClickHouse/pull/65980) ([Kruglov Pavel](https://github.com/Avogar)). +* Backported in [#68252](https://github.com/ClickHouse/ClickHouse/issues/68252): Fixed `Not-ready Set` in some system tables when filtering using subqueries. [#66018](https://github.com/ClickHouse/ClickHouse/pull/66018) ([Michael Kolupaev](https://github.com/al13n321)). +* Backported in [#66155](https://github.com/ClickHouse/ClickHouse/issues/66155): Fixed buffer overflow bug in `unbin`/`unhex` implementation. [#66106](https://github.com/ClickHouse/ClickHouse/pull/66106) ([Nikita Taranov](https://github.com/nickitat)). +* Backported in [#66454](https://github.com/ClickHouse/ClickHouse/issues/66454): Fixed a bug in ZooKeeper client: a session could get stuck in unusable state after receiving a hardware error from ZooKeeper. For example, this might happen due to "soft memory limit" in ClickHouse Keeper. [#66140](https://github.com/ClickHouse/ClickHouse/pull/66140) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#66226](https://github.com/ClickHouse/ClickHouse/issues/66226): Fix issue in SumIfToCountIfVisitor and signed integers. [#66146](https://github.com/ClickHouse/ClickHouse/pull/66146) ([Raúl Marín](https://github.com/Algunenano)). +* Backported in [#66680](https://github.com/ClickHouse/ClickHouse/issues/66680): Fix handling limit for `system.numbers_mt` when no index can be used. [#66231](https://github.com/ClickHouse/ClickHouse/pull/66231) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). +* Backported in [#66604](https://github.com/ClickHouse/ClickHouse/issues/66604): Fixed how the ClickHouse server detects the maximum number of usable CPU cores as specified by cgroups v2 if the server runs in a container such as Docker. In more detail, containers often run their process in the root cgroup which has an empty name. In that case, ClickHouse ignored the CPU limits set by cgroups v2. [#66237](https://github.com/ClickHouse/ClickHouse/pull/66237) ([filimonov](https://github.com/filimonov)). +* Backported in [#66360](https://github.com/ClickHouse/ClickHouse/issues/66360): Fix the `Not-ready set` error when a subquery with `IN` is used in the constraint. [#66261](https://github.com/ClickHouse/ClickHouse/pull/66261) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#68064](https://github.com/ClickHouse/ClickHouse/issues/68064): Fix boolean literals in query sent to external database (for engines like `PostgreSQL`). [#66282](https://github.com/ClickHouse/ClickHouse/pull/66282) ([vdimir](https://github.com/vdimir)). +* Backported in [#68158](https://github.com/ClickHouse/ClickHouse/issues/68158): Fix cluster() for inter-server secret (preserve initial user as before). [#66364](https://github.com/ClickHouse/ClickHouse/pull/66364) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#66972](https://github.com/ClickHouse/ClickHouse/issues/66972): Fix `Column identifier is already registered` error with `group_by_use_nulls=true` and new analyzer. [#66400](https://github.com/ClickHouse/ClickHouse/pull/66400) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#66691](https://github.com/ClickHouse/ClickHouse/issues/66691): Fix the VALID UNTIL clause in the user definition resetting after a restart. Closes [#66405](https://github.com/ClickHouse/ClickHouse/issues/66405). [#66409](https://github.com/ClickHouse/ClickHouse/pull/66409) ([Nikolay Degterinsky](https://github.com/evillique)). +* Backported in [#66969](https://github.com/ClickHouse/ClickHouse/issues/66969): Fix `Cannot find column` error for queries with constant expression in `GROUP BY` key and new analyzer enabled. [#66433](https://github.com/ClickHouse/ClickHouse/pull/66433) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#66720](https://github.com/ClickHouse/ClickHouse/issues/66720): Correctly track memory for `Allocator::realloc`. [#66548](https://github.com/ClickHouse/ClickHouse/pull/66548) ([Antonio Andelic](https://github.com/antonio2368)). +* Backported in [#66951](https://github.com/ClickHouse/ClickHouse/issues/66951): Fix an invalid result for queries with `WINDOW`. This could happen when `PARTITION` columns have sparse serialization and window functions are executed in parallel. [#66579](https://github.com/ClickHouse/ClickHouse/pull/66579) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#66757](https://github.com/ClickHouse/ClickHouse/issues/66757): Fix `Unknown identifier` and `Column is not under aggregate function` errors for queries with the expression `(column IS NULL).` The bug was triggered by [#65088](https://github.com/ClickHouse/ClickHouse/issues/65088), with the disabled analyzer only. [#66654](https://github.com/ClickHouse/ClickHouse/pull/66654) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#66948](https://github.com/ClickHouse/ClickHouse/issues/66948): Fix `Method getResultType is not supported for QUERY query node` error when scalar subquery was used as the first argument of IN (with new analyzer). [#66655](https://github.com/ClickHouse/ClickHouse/pull/66655) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#68115](https://github.com/ClickHouse/ClickHouse/issues/68115): Fix possible PARAMETER_OUT_OF_BOUND error during reading variant subcolumn. [#66659](https://github.com/ClickHouse/ClickHouse/pull/66659) ([Kruglov Pavel](https://github.com/Avogar)). +* Backported in [#67633](https://github.com/ClickHouse/ClickHouse/issues/67633): Fix for occasional deadlock in Context::getDDLWorker. [#66843](https://github.com/ClickHouse/ClickHouse/pull/66843) ([Alexander Gololobov](https://github.com/davenger)). +* Backported in [#67481](https://github.com/ClickHouse/ClickHouse/issues/67481): In rare cases ClickHouse could consider parts as broken because of some unexpected projections on disk. Now it's fixed. [#66898](https://github.com/ClickHouse/ClickHouse/pull/66898) ([alesapin](https://github.com/alesapin)). +* Backported in [#67814](https://github.com/ClickHouse/ClickHouse/issues/67814): Only relevant to the experimental Variant data type. Fix crash with Variant + AggregateFunction type. [#67122](https://github.com/ClickHouse/ClickHouse/pull/67122) ([Kruglov Pavel](https://github.com/Avogar)). +* Backported in [#67197](https://github.com/ClickHouse/ClickHouse/issues/67197): TRUNCATE DATABASE used to stop replication as if it was a DROP DATABASE query, it's fixed. [#67129](https://github.com/ClickHouse/ClickHouse/pull/67129) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#67379](https://github.com/ClickHouse/ClickHouse/issues/67379): Fix error `Cannot convert column because it is non constant in source stream but must be constant in result.` for a query that reads from the `Merge` table over the `Distriburted` table with one shard. [#67146](https://github.com/ClickHouse/ClickHouse/pull/67146) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#67501](https://github.com/ClickHouse/ClickHouse/issues/67501): Fix crash in DistributedAsyncInsert when connection is empty. [#67219](https://github.com/ClickHouse/ClickHouse/pull/67219) ([Pablo Marcos](https://github.com/pamarcos)). +* Backported in [#67886](https://github.com/ClickHouse/ClickHouse/issues/67886): Correctly parse file name/URI containing `::` if it's not an archive. [#67433](https://github.com/ClickHouse/ClickHouse/pull/67433) ([Antonio Andelic](https://github.com/antonio2368)). +* Backported in [#67576](https://github.com/ClickHouse/ClickHouse/issues/67576): Fix execution of nested short-circuit functions. [#67520](https://github.com/ClickHouse/ClickHouse/pull/67520) ([Kruglov Pavel](https://github.com/Avogar)). +* Backported in [#67850](https://github.com/ClickHouse/ClickHouse/issues/67850): Fixes [#66026](https://github.com/ClickHouse/ClickHouse/issues/66026). Avoid unresolved table function arguments traversal in `ReplaceTableNodeToDummyVisitor`. [#67522](https://github.com/ClickHouse/ClickHouse/pull/67522) ([Dmitry Novik](https://github.com/novikd)). +* Backported in [#68272](https://github.com/ClickHouse/ClickHouse/issues/68272): Fix inserting into stream like engines (Kafka, RabbitMQ, NATS) through HTTP interface. [#67554](https://github.com/ClickHouse/ClickHouse/pull/67554) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). +* Backported in [#67807](https://github.com/ClickHouse/ClickHouse/issues/67807): Fix reloading SQL UDFs with UNION. Previously, restarting the server could make UDF invalid. [#67665](https://github.com/ClickHouse/ClickHouse/pull/67665) ([Antonio Andelic](https://github.com/antonio2368)). +* Backported in [#67836](https://github.com/ClickHouse/ClickHouse/issues/67836): Fix potential stack overflow in `JSONMergePatch` function. Renamed this function from `jsonMergePatch` to `JSONMergePatch` because the previous name was wrong. The previous name is still kept for compatibility. Improved diagnostic of errors in the function. This closes [#67304](https://github.com/ClickHouse/ClickHouse/issues/67304). [#67756](https://github.com/ClickHouse/ClickHouse/pull/67756) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#67991](https://github.com/ClickHouse/ClickHouse/issues/67991): Validate experimental/suspicious data types in ALTER ADD/MODIFY COLUMN. [#67911](https://github.com/ClickHouse/ClickHouse/pull/67911) ([Kruglov Pavel](https://github.com/Avogar)). +* Backported in [#68207](https://github.com/ClickHouse/ClickHouse/issues/68207): Fix wrong `count()` result when there is non-deterministic function in predicate. [#67922](https://github.com/ClickHouse/ClickHouse/pull/67922) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). +* Backported in [#68091](https://github.com/ClickHouse/ClickHouse/issues/68091): Fixed the calculation of the maximum thread soft limit in containerized environments where the usable CPU count is limited. [#67963](https://github.com/ClickHouse/ClickHouse/pull/67963) ([Robert Schulze](https://github.com/rschu1ze)). +* Backported in [#68122](https://github.com/ClickHouse/ClickHouse/issues/68122): Fixed skipping of untouched parts in mutations with new analyzer. Previously with enabled analyzer data in part could be rewritten by mutation even if mutation doesn't affect this part according to predicate. [#68052](https://github.com/ClickHouse/ClickHouse/pull/68052) ([Anton Popov](https://github.com/CurtizJ)). +* Backported in [#68171](https://github.com/ClickHouse/ClickHouse/issues/68171): Removes an incorrect optimization to remove sorting in subqueries that use `OFFSET`. Fixes [#67906](https://github.com/ClickHouse/ClickHouse/issues/67906). [#68099](https://github.com/ClickHouse/ClickHouse/pull/68099) ([Graham Campbell](https://github.com/GrahamCampbell)). +* Backported in [#68337](https://github.com/ClickHouse/ClickHouse/issues/68337): Try fix postgres crash when query is cancelled. [#68288](https://github.com/ClickHouse/ClickHouse/pull/68288) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Backported in [#68667](https://github.com/ClickHouse/ClickHouse/issues/68667): Fix `LOGICAL_ERROR`s when functions `sipHash64Keyed`, `sipHash128Keyed`, or `sipHash128ReferenceKeyed` are applied to empty arrays or tuples. [#68630](https://github.com/ClickHouse/ClickHouse/pull/68630) ([Robert Schulze](https://github.com/rschu1ze)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* Backported in [#66387](https://github.com/ClickHouse/ClickHouse/issues/66387): Disable broken cases from 02911_join_on_nullsafe_optimization. [#66310](https://github.com/ClickHouse/ClickHouse/pull/66310) ([vdimir](https://github.com/vdimir)). +* Backported in [#66426](https://github.com/ClickHouse/ClickHouse/issues/66426): Ignore subquery for IN in DDLLoadingDependencyVisitor. [#66395](https://github.com/ClickHouse/ClickHouse/pull/66395) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#66544](https://github.com/ClickHouse/ClickHouse/issues/66544): Add additional log masking in CI. [#66523](https://github.com/ClickHouse/ClickHouse/pull/66523) ([Raúl Marín](https://github.com/Algunenano)). +* Backported in [#66859](https://github.com/ClickHouse/ClickHouse/issues/66859): Fix data race in S3::ClientCache. [#66644](https://github.com/ClickHouse/ClickHouse/pull/66644) ([Konstantin Morozov](https://github.com/k-morozov)). +* Backported in [#66875](https://github.com/ClickHouse/ClickHouse/issues/66875): Support one more case in JOIN ON ... IS NULL. [#66725](https://github.com/ClickHouse/ClickHouse/pull/66725) ([vdimir](https://github.com/vdimir)). +* Backported in [#67059](https://github.com/ClickHouse/ClickHouse/issues/67059): Increase asio pool size in case the server is tiny. [#66761](https://github.com/ClickHouse/ClickHouse/pull/66761) ([alesapin](https://github.com/alesapin)). +* Backported in [#66945](https://github.com/ClickHouse/ClickHouse/issues/66945): Small fix in realloc memory tracking. [#66820](https://github.com/ClickHouse/ClickHouse/pull/66820) ([Antonio Andelic](https://github.com/antonio2368)). +* Backported in [#67252](https://github.com/ClickHouse/ClickHouse/issues/67252): Followup [#66725](https://github.com/ClickHouse/ClickHouse/issues/66725). [#66869](https://github.com/ClickHouse/ClickHouse/pull/66869) ([vdimir](https://github.com/vdimir)). +* Backported in [#67412](https://github.com/ClickHouse/ClickHouse/issues/67412): CI: Fix build results for release branches. [#67402](https://github.com/ClickHouse/ClickHouse/pull/67402) ([Max K.](https://github.com/maxknv)). +* Update version after release. [#67862](https://github.com/ClickHouse/ClickHouse/pull/67862) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Backported in [#68077](https://github.com/ClickHouse/ClickHouse/issues/68077): Add an explicit error for `ALTER MODIFY SQL SECURITY` on non-view tables. [#67953](https://github.com/ClickHouse/ClickHouse/pull/67953) ([pufit](https://github.com/pufit)). + diff --git a/utils/list-versions/version_date.tsv b/utils/list-versions/version_date.tsv index 8556375d543..9063d3ef971 100644 --- a/utils/list-versions/version_date.tsv +++ b/utils/list-versions/version_date.tsv @@ -6,6 +6,7 @@ v24.6.3.95-stable 2024-08-06 v24.6.2.17-stable 2024-07-05 v24.6.1.4423-stable 2024-07-01 v24.5.5.78-stable 2024-08-05 +v24.5.5.41-stable 2024-08-22 v24.5.4.49-stable 2024-07-01 v24.5.3.5-stable 2024-06-13 v24.5.2.34-stable 2024-06-13 @@ -14,6 +15,7 @@ v24.4.4.113-stable 2024-08-02 v24.4.3.25-stable 2024-06-14 v24.4.2.141-stable 2024-06-07 v24.4.1.2088-stable 2024-05-01 +v24.3.9.5-lts 2024-08-22 v24.3.8.13-lts 2024-08-20 v24.3.7.30-lts 2024-08-14 v24.3.6.48-lts 2024-08-02 From 1ea0163dfe6b3278d8a5e8d86c31b3d63d7a3780 Mon Sep 17 00:00:00 2001 From: Salvatore Mesoraca Date: Thu, 22 Aug 2024 16:42:14 +0200 Subject: [PATCH 059/114] Fix issue with maps with arrays as keys --- src/Functions/FunctionsHashing.h | 4 ++-- tests/queries/0_stateless/02534_keyed_siphash.reference | 7 ++++++- tests/queries/0_stateless/02534_keyed_siphash.sql | 5 ++++- 3 files changed, 12 insertions(+), 4 deletions(-) diff --git a/src/Functions/FunctionsHashing.h b/src/Functions/FunctionsHashing.h index 0cf4246fd66..3da0b2cd9be 100644 --- a/src/Functions/FunctionsHashing.h +++ b/src/Functions/FunctionsHashing.h @@ -93,9 +93,9 @@ namespace impl if (is_const) i = 0; assert(key0->size() == key1->size()); - if (offsets != nullptr) + if (offsets != nullptr && i > 0) { - const auto * const begin = offsets->begin(); + const auto * const begin = std::upper_bound(offsets->begin(), offsets->end(), i - 1); const auto * upper = std::upper_bound(begin, offsets->end(), i); if (upper != offsets->end()) i = upper - begin; diff --git a/tests/queries/0_stateless/02534_keyed_siphash.reference b/tests/queries/0_stateless/02534_keyed_siphash.reference index 31c0cae8981..8b147025a05 100644 --- a/tests/queries/0_stateless/02534_keyed_siphash.reference +++ b/tests/queries/0_stateless/02534_keyed_siphash.reference @@ -244,5 +244,10 @@ Test emtpy arrays and maps 0AD04BFD000000000000000000000000 4761183170873013810 0AD04BFD000000000000000000000000 +Test maps with arrays as keys 16734549324845627102 -D675BB3D687973A238AB891DD99C7047 +1D03941D808D04810D2363A6C107D622 +16734549324845627102 +16734549324845627102 +1D03941D808D04810D2363A6C107D622 +1D03941D808D04810D2363A6C107D622 diff --git a/tests/queries/0_stateless/02534_keyed_siphash.sql b/tests/queries/0_stateless/02534_keyed_siphash.sql index b499d8ef02b..ba3c4a9156d 100644 --- a/tests/queries/0_stateless/02534_keyed_siphash.sql +++ b/tests/queries/0_stateless/02534_keyed_siphash.sql @@ -351,5 +351,8 @@ SELECT sipHash64Keyed((1::UInt64, 2::UInt64), []); SELECT hex(sipHash128Keyed((1::UInt64, 2::UInt64), [])); SELECT sipHash64Keyed((1::UInt64, 2::UInt64), mapFromArrays([], [])); SELECT hex(sipHash128Keyed((1::UInt64, 2::UInt64), mapFromArrays([], []))); +SELECT 'Test maps with arrays as keys'; SELECT sipHash64Keyed((1::UInt64, 2::UInt64), map([0], 1, [2], 3)); -SELECT hex(sipHash128Keyed((0::UInt64, 0::UInt64), map([0], 1, [2], 3))); +SELECT hex(sipHash128Keyed((1::UInt64, 2::UInt64), map([0], 1, [2], 3))); +SELECT sipHash64Keyed((materialize(1::UInt64), 2::UInt64), map([0], 1, [2], 3)) FROM numbers(2); +SELECT hex(sipHash128Keyed((materialize(1::UInt64), 2::UInt64), map([0], 1, [2], 3))) FROM numbers(2); From a93d1919804d1c8dc7760f20084ade9a09710a47 Mon Sep 17 00:00:00 2001 From: Salvatore Mesoraca Date: Thu, 22 Aug 2024 16:43:38 +0200 Subject: [PATCH 060/114] Fix typo in test case --- tests/queries/0_stateless/02534_keyed_siphash.reference | 2 +- tests/queries/0_stateless/02534_keyed_siphash.sql | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/02534_keyed_siphash.reference b/tests/queries/0_stateless/02534_keyed_siphash.reference index 8b147025a05..a05446a494e 100644 --- a/tests/queries/0_stateless/02534_keyed_siphash.reference +++ b/tests/queries/0_stateless/02534_keyed_siphash.reference @@ -239,7 +239,7 @@ Check bug found fuzzing Test arrays and maps 608E1FF030C9E206185B112C2A25F1A7 ABB65AE97711A2E053E324ED88B1D08B -Test emtpy arrays and maps +Test empty arrays and maps 4761183170873013810 0AD04BFD000000000000000000000000 4761183170873013810 diff --git a/tests/queries/0_stateless/02534_keyed_siphash.sql b/tests/queries/0_stateless/02534_keyed_siphash.sql index ba3c4a9156d..7cfc82512bd 100644 --- a/tests/queries/0_stateless/02534_keyed_siphash.sql +++ b/tests/queries/0_stateless/02534_keyed_siphash.sql @@ -346,7 +346,7 @@ INSERT INTO sipHashKeyed_keys FORMAT VALUES ({'a':'b', 'c':'d'}), ({'e':'f', 'g' SELECT hex(sipHash128ReferenceKeyed((0::UInt64, materialize(0::UInt64)), a)) FROM sipHashKeyed_keys ORDER BY a; DROP TABLE sipHashKeyed_keys; -SELECT 'Test emtpy arrays and maps'; +SELECT 'Test empty arrays and maps'; SELECT sipHash64Keyed((1::UInt64, 2::UInt64), []); SELECT hex(sipHash128Keyed((1::UInt64, 2::UInt64), [])); SELECT sipHash64Keyed((1::UInt64, 2::UInt64), mapFromArrays([], [])); From 0dc18247df3a290b4fb312325ff3b2a44a3f8357 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Thu, 22 Aug 2024 15:10:24 +0000 Subject: [PATCH 061/114] Update version_date.tsv and changelogs after v24.6.3.38-stable --- docs/changelogs/v24.6.3.38-stable.md | 83 ++++++++++++++++++++++++++++ utils/list-versions/version_date.tsv | 3 + 2 files changed, 86 insertions(+) create mode 100644 docs/changelogs/v24.6.3.38-stable.md diff --git a/docs/changelogs/v24.6.3.38-stable.md b/docs/changelogs/v24.6.3.38-stable.md new file mode 100644 index 00000000000..01d7e26e31f --- /dev/null +++ b/docs/changelogs/v24.6.3.38-stable.md @@ -0,0 +1,83 @@ +--- +sidebar_position: 1 +sidebar_label: 2024 +--- + +# 2024 Changelog + +### ClickHouse release v24.6.3.38-stable (4e33c831589) FIXME as compared to v24.6.2.17-stable (5710a8b5c0c) + +#### Improvement +* Backported in [#66770](https://github.com/ClickHouse/ClickHouse/issues/66770): Make allow_experimental_analyzer be controlled by the initiator for distributed queries. This ensures compatibility and correctness during operations in mixed version clusters. [#65777](https://github.com/ClickHouse/ClickHouse/pull/65777) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). + +#### Bug Fix (user-visible misbehavior in an official stable release) +* Backported in [#66885](https://github.com/ClickHouse/ClickHouse/issues/66885): Fix unexpeced size of low cardinality column in function calls. [#65298](https://github.com/ClickHouse/ClickHouse/pull/65298) ([Raúl Marín](https://github.com/Algunenano)). +* Backported in [#66303](https://github.com/ClickHouse/ClickHouse/issues/66303): Better handling of join conditions involving `IS NULL` checks (for example `ON (a = b AND (a IS NOT NULL) AND (b IS NOT NULL) ) OR ( (a IS NULL) AND (b IS NULL) )` is rewritten to `ON a <=> b`), fix incorrect optimization when condition other then `IS NULL` are present. [#65835](https://github.com/ClickHouse/ClickHouse/pull/65835) ([vdimir](https://github.com/vdimir)). +* Backported in [#66330](https://github.com/ClickHouse/ClickHouse/issues/66330): Add missing settings `input_format_csv_skip_first_lines/input_format_tsv_skip_first_lines/input_format_csv_try_infer_numbers_from_strings/input_format_csv_try_infer_strings_from_quoted_tuples` in schema inference cache because they can change the resulting schema. It prevents from incorrect result of schema inference with these settings changed. [#65980](https://github.com/ClickHouse/ClickHouse/pull/65980) ([Kruglov Pavel](https://github.com/Avogar)). +* Backported in [#66157](https://github.com/ClickHouse/ClickHouse/issues/66157): Fixed buffer overflow bug in `unbin`/`unhex` implementation. [#66106](https://github.com/ClickHouse/ClickHouse/pull/66106) ([Nikita Taranov](https://github.com/nickitat)). +* Backported in [#66210](https://github.com/ClickHouse/ClickHouse/issues/66210): Disable the `merge-filters` optimization introduced in [#64760](https://github.com/ClickHouse/ClickHouse/issues/64760). It may cause an exception if optimization merges two filter expressions and does not apply a short-circuit evaluation. [#66126](https://github.com/ClickHouse/ClickHouse/pull/66126) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#66456](https://github.com/ClickHouse/ClickHouse/issues/66456): Fixed a bug in ZooKeeper client: a session could get stuck in unusable state after receiving a hardware error from ZooKeeper. For example, this might happen due to "soft memory limit" in ClickHouse Keeper. [#66140](https://github.com/ClickHouse/ClickHouse/pull/66140) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#66228](https://github.com/ClickHouse/ClickHouse/issues/66228): Fix issue in SumIfToCountIfVisitor and signed integers. [#66146](https://github.com/ClickHouse/ClickHouse/pull/66146) ([Raúl Marín](https://github.com/Algunenano)). +* Backported in [#66183](https://github.com/ClickHouse/ClickHouse/issues/66183): Fix rare case with missing data in the result of distributed query, close [#61432](https://github.com/ClickHouse/ClickHouse/issues/61432). [#66174](https://github.com/ClickHouse/ClickHouse/pull/66174) ([vdimir](https://github.com/vdimir)). +* Backported in [#66271](https://github.com/ClickHouse/ClickHouse/issues/66271): Don't throw `TIMEOUT_EXCEEDED` for `none_only_active` mode of `distributed_ddl_output_mode`. [#66218](https://github.com/ClickHouse/ClickHouse/pull/66218) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#66682](https://github.com/ClickHouse/ClickHouse/issues/66682): Fix handling limit for `system.numbers_mt` when no index can be used. [#66231](https://github.com/ClickHouse/ClickHouse/pull/66231) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). +* Backported in [#66587](https://github.com/ClickHouse/ClickHouse/issues/66587): Fixed how the ClickHouse server detects the maximum number of usable CPU cores as specified by cgroups v2 if the server runs in a container such as Docker. In more detail, containers often run their process in the root cgroup which has an empty name. In that case, ClickHouse ignored the CPU limits set by cgroups v2. [#66237](https://github.com/ClickHouse/ClickHouse/pull/66237) ([filimonov](https://github.com/filimonov)). +* Backported in [#66362](https://github.com/ClickHouse/ClickHouse/issues/66362): Fix the `Not-ready set` error when a subquery with `IN` is used in the constraint. [#66261](https://github.com/ClickHouse/ClickHouse/pull/66261) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#68066](https://github.com/ClickHouse/ClickHouse/issues/68066): Fix boolean literals in query sent to external database (for engines like `PostgreSQL`). [#66282](https://github.com/ClickHouse/ClickHouse/pull/66282) ([vdimir](https://github.com/vdimir)). +* Backported in [#68566](https://github.com/ClickHouse/ClickHouse/issues/68566): Fix indexHint function case found by fuzzer. [#66286](https://github.com/ClickHouse/ClickHouse/pull/66286) ([Anton Popov](https://github.com/CurtizJ)). +* Backported in [#68159](https://github.com/ClickHouse/ClickHouse/issues/68159): Fix cluster() for inter-server secret (preserve initial user as before). [#66364](https://github.com/ClickHouse/ClickHouse/pull/66364) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#66613](https://github.com/ClickHouse/ClickHouse/issues/66613): Fix `Column identifier is already registered` error with `group_by_use_nulls=true` and new analyzer. [#66400](https://github.com/ClickHouse/ClickHouse/pull/66400) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#66693](https://github.com/ClickHouse/ClickHouse/issues/66693): Fix the VALID UNTIL clause in the user definition resetting after a restart. Closes [#66405](https://github.com/ClickHouse/ClickHouse/issues/66405). [#66409](https://github.com/ClickHouse/ClickHouse/pull/66409) ([Nikolay Degterinsky](https://github.com/evillique)). +* Backported in [#66577](https://github.com/ClickHouse/ClickHouse/issues/66577): Fix `Cannot find column` error for queries with constant expression in `GROUP BY` key and new analyzer enabled. [#66433](https://github.com/ClickHouse/ClickHouse/pull/66433) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#66721](https://github.com/ClickHouse/ClickHouse/issues/66721): Correctly track memory for `Allocator::realloc`. [#66548](https://github.com/ClickHouse/ClickHouse/pull/66548) ([Antonio Andelic](https://github.com/antonio2368)). +* Backported in [#66670](https://github.com/ClickHouse/ClickHouse/issues/66670): Fix reading of uninitialized memory when hashing empty tuples. This closes [#66559](https://github.com/ClickHouse/ClickHouse/issues/66559). [#66562](https://github.com/ClickHouse/ClickHouse/pull/66562) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#66952](https://github.com/ClickHouse/ClickHouse/issues/66952): Fix an invalid result for queries with `WINDOW`. This could happen when `PARTITION` columns have sparse serialization and window functions are executed in parallel. [#66579](https://github.com/ClickHouse/ClickHouse/pull/66579) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#66956](https://github.com/ClickHouse/ClickHouse/issues/66956): Fix removing named collections in local storage. [#66599](https://github.com/ClickHouse/ClickHouse/pull/66599) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). +* Backported in [#66716](https://github.com/ClickHouse/ClickHouse/issues/66716): Fix removing named collections in local storage. [#66599](https://github.com/ClickHouse/ClickHouse/pull/66599) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). +* Backported in [#66759](https://github.com/ClickHouse/ClickHouse/issues/66759): Fix `Unknown identifier` and `Column is not under aggregate function` errors for queries with the expression `(column IS NULL).` The bug was triggered by [#65088](https://github.com/ClickHouse/ClickHouse/issues/65088), with the disabled analyzer only. [#66654](https://github.com/ClickHouse/ClickHouse/pull/66654) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#66751](https://github.com/ClickHouse/ClickHouse/issues/66751): Fix `Method getResultType is not supported for QUERY query node` error when scalar subquery was used as the first argument of IN (with new analyzer). [#66655](https://github.com/ClickHouse/ClickHouse/pull/66655) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#68116](https://github.com/ClickHouse/ClickHouse/issues/68116): Fix possible PARAMETER_OUT_OF_BOUND error during reading variant subcolumn. [#66659](https://github.com/ClickHouse/ClickHouse/pull/66659) ([Kruglov Pavel](https://github.com/Avogar)). +* Backported in [#67635](https://github.com/ClickHouse/ClickHouse/issues/67635): Fix for occasional deadlock in Context::getDDLWorker. [#66843](https://github.com/ClickHouse/ClickHouse/pull/66843) ([Alexander Gololobov](https://github.com/davenger)). +* Backported in [#67482](https://github.com/ClickHouse/ClickHouse/issues/67482): In rare cases ClickHouse could consider parts as broken because of some unexpected projections on disk. Now it's fixed. [#66898](https://github.com/ClickHouse/ClickHouse/pull/66898) ([alesapin](https://github.com/alesapin)). +* Backported in [#67816](https://github.com/ClickHouse/ClickHouse/issues/67816): Only relevant to the experimental Variant data type. Fix crash with Variant + AggregateFunction type. [#67122](https://github.com/ClickHouse/ClickHouse/pull/67122) ([Kruglov Pavel](https://github.com/Avogar)). +* Backported in [#67199](https://github.com/ClickHouse/ClickHouse/issues/67199): TRUNCATE DATABASE used to stop replication as if it was a DROP DATABASE query, it's fixed. [#67129](https://github.com/ClickHouse/ClickHouse/pull/67129) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#67381](https://github.com/ClickHouse/ClickHouse/issues/67381): Fix error `Cannot convert column because it is non constant in source stream but must be constant in result.` for a query that reads from the `Merge` table over the `Distriburted` table with one shard. [#67146](https://github.com/ClickHouse/ClickHouse/pull/67146) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#67244](https://github.com/ClickHouse/ClickHouse/issues/67244): This closes [#67156](https://github.com/ClickHouse/ClickHouse/issues/67156). This closes [#66447](https://github.com/ClickHouse/ClickHouse/issues/66447). The bug was introduced in https://github.com/ClickHouse/ClickHouse/pull/62907. [#67178](https://github.com/ClickHouse/ClickHouse/pull/67178) ([Maksim Kita](https://github.com/kitaisreal)). +* Backported in [#67503](https://github.com/ClickHouse/ClickHouse/issues/67503): Fix crash in DistributedAsyncInsert when connection is empty. [#67219](https://github.com/ClickHouse/ClickHouse/pull/67219) ([Pablo Marcos](https://github.com/pamarcos)). +* Backported in [#67887](https://github.com/ClickHouse/ClickHouse/issues/67887): Correctly parse file name/URI containing `::` if it's not an archive. [#67433](https://github.com/ClickHouse/ClickHouse/pull/67433) ([Antonio Andelic](https://github.com/antonio2368)). +* Backported in [#67578](https://github.com/ClickHouse/ClickHouse/issues/67578): Fix execution of nested short-circuit functions. [#67520](https://github.com/ClickHouse/ClickHouse/pull/67520) ([Kruglov Pavel](https://github.com/Avogar)). +* Backported in [#68611](https://github.com/ClickHouse/ClickHouse/issues/68611): Fixes [#66026](https://github.com/ClickHouse/ClickHouse/issues/66026). Avoid unresolved table function arguments traversal in `ReplaceTableNodeToDummyVisitor`. [#67522](https://github.com/ClickHouse/ClickHouse/pull/67522) ([Dmitry Novik](https://github.com/novikd)). +* Backported in [#67852](https://github.com/ClickHouse/ClickHouse/issues/67852): Fixes [#66026](https://github.com/ClickHouse/ClickHouse/issues/66026). Avoid unresolved table function arguments traversal in `ReplaceTableNodeToDummyVisitor`. [#67522](https://github.com/ClickHouse/ClickHouse/pull/67522) ([Dmitry Novik](https://github.com/novikd)). +* Backported in [#68275](https://github.com/ClickHouse/ClickHouse/issues/68275): Fix inserting into stream like engines (Kafka, RabbitMQ, NATS) through HTTP interface. [#67554](https://github.com/ClickHouse/ClickHouse/pull/67554) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). +* Backported in [#67808](https://github.com/ClickHouse/ClickHouse/issues/67808): Fix reloading SQL UDFs with UNION. Previously, restarting the server could make UDF invalid. [#67665](https://github.com/ClickHouse/ClickHouse/pull/67665) ([Antonio Andelic](https://github.com/antonio2368)). +* Backported in [#67838](https://github.com/ClickHouse/ClickHouse/issues/67838): Fix potential stack overflow in `JSONMergePatch` function. Renamed this function from `jsonMergePatch` to `JSONMergePatch` because the previous name was wrong. The previous name is still kept for compatibility. Improved diagnostic of errors in the function. This closes [#67304](https://github.com/ClickHouse/ClickHouse/issues/67304). [#67756](https://github.com/ClickHouse/ClickHouse/pull/67756) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#67993](https://github.com/ClickHouse/ClickHouse/issues/67993): Validate experimental/suspicious data types in ALTER ADD/MODIFY COLUMN. [#67911](https://github.com/ClickHouse/ClickHouse/pull/67911) ([Kruglov Pavel](https://github.com/Avogar)). +* Backported in [#68208](https://github.com/ClickHouse/ClickHouse/issues/68208): Fix wrong `count()` result when there is non-deterministic function in predicate. [#67922](https://github.com/ClickHouse/ClickHouse/pull/67922) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). +* Backported in [#68093](https://github.com/ClickHouse/ClickHouse/issues/68093): Fixed the calculation of the maximum thread soft limit in containerized environments where the usable CPU count is limited. [#67963](https://github.com/ClickHouse/ClickHouse/pull/67963) ([Robert Schulze](https://github.com/rschu1ze)). +* Backported in [#68124](https://github.com/ClickHouse/ClickHouse/issues/68124): Fixed skipping of untouched parts in mutations with new analyzer. Previously with enabled analyzer data in part could be rewritten by mutation even if mutation doesn't affect this part according to predicate. [#68052](https://github.com/ClickHouse/ClickHouse/pull/68052) ([Anton Popov](https://github.com/CurtizJ)). +* Backported in [#68221](https://github.com/ClickHouse/ClickHouse/issues/68221): Fixed a NULL pointer dereference, triggered by a specially crafted query, that crashed the server via hopEnd, hopStart, tumbleEnd, and tumbleStart. [#68098](https://github.com/ClickHouse/ClickHouse/pull/68098) ([Salvatore Mesoraca](https://github.com/aiven-sal)). +* Backported in [#68173](https://github.com/ClickHouse/ClickHouse/issues/68173): Removes an incorrect optimization to remove sorting in subqueries that use `OFFSET`. Fixes [#67906](https://github.com/ClickHouse/ClickHouse/issues/67906). [#68099](https://github.com/ClickHouse/ClickHouse/pull/68099) ([Graham Campbell](https://github.com/GrahamCampbell)). +* Backported in [#68339](https://github.com/ClickHouse/ClickHouse/issues/68339): Try fix postgres crash when query is cancelled. [#68288](https://github.com/ClickHouse/ClickHouse/pull/68288) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Backported in [#68396](https://github.com/ClickHouse/ClickHouse/issues/68396): Fix missing sync replica mode in query `SYSTEM SYNC REPLICA`. [#68326](https://github.com/ClickHouse/ClickHouse/pull/68326) ([Duc Canh Le](https://github.com/canhld94)). +* Backported in [#68668](https://github.com/ClickHouse/ClickHouse/issues/68668): Fix `LOGICAL_ERROR`s when functions `sipHash64Keyed`, `sipHash128Keyed`, or `sipHash128ReferenceKeyed` are applied to empty arrays or tuples. [#68630](https://github.com/ClickHouse/ClickHouse/pull/68630) ([Robert Schulze](https://github.com/rschu1ze)). + +#### NO CL ENTRY + +* NO CL ENTRY: 'Revert "Backport [#66599](https://github.com/ClickHouse/ClickHouse/issues/66599) to 24.6: Fix dropping named collection in local storage"'. [#66922](https://github.com/ClickHouse/ClickHouse/pull/66922) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* Backported in [#66332](https://github.com/ClickHouse/ClickHouse/issues/66332): Do not raise a NOT_IMPLEMENTED error when getting s3 metrics with a multiple disk configuration. [#65403](https://github.com/ClickHouse/ClickHouse/pull/65403) ([Elena Torró](https://github.com/elenatorro)). +* Backported in [#66142](https://github.com/ClickHouse/ClickHouse/issues/66142): Fix flaky test_storage_s3_queue tests. [#66009](https://github.com/ClickHouse/ClickHouse/pull/66009) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Backported in [#66389](https://github.com/ClickHouse/ClickHouse/issues/66389): Disable broken cases from 02911_join_on_nullsafe_optimization. [#66310](https://github.com/ClickHouse/ClickHouse/pull/66310) ([vdimir](https://github.com/vdimir)). +* Backported in [#66428](https://github.com/ClickHouse/ClickHouse/issues/66428): Ignore subquery for IN in DDLLoadingDependencyVisitor. [#66395](https://github.com/ClickHouse/ClickHouse/pull/66395) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#66546](https://github.com/ClickHouse/ClickHouse/issues/66546): Add additional log masking in CI. [#66523](https://github.com/ClickHouse/ClickHouse/pull/66523) ([Raúl Marín](https://github.com/Algunenano)). +* Backported in [#66861](https://github.com/ClickHouse/ClickHouse/issues/66861): Fix data race in S3::ClientCache. [#66644](https://github.com/ClickHouse/ClickHouse/pull/66644) ([Konstantin Morozov](https://github.com/k-morozov)). +* Backported in [#66877](https://github.com/ClickHouse/ClickHouse/issues/66877): Support one more case in JOIN ON ... IS NULL. [#66725](https://github.com/ClickHouse/ClickHouse/pull/66725) ([vdimir](https://github.com/vdimir)). +* Backported in [#67061](https://github.com/ClickHouse/ClickHouse/issues/67061): Increase asio pool size in case the server is tiny. [#66761](https://github.com/ClickHouse/ClickHouse/pull/66761) ([alesapin](https://github.com/alesapin)). +* Backported in [#66940](https://github.com/ClickHouse/ClickHouse/issues/66940): Small fix in realloc memory tracking. [#66820](https://github.com/ClickHouse/ClickHouse/pull/66820) ([Antonio Andelic](https://github.com/antonio2368)). +* Backported in [#67254](https://github.com/ClickHouse/ClickHouse/issues/67254): Followup [#66725](https://github.com/ClickHouse/ClickHouse/issues/66725). [#66869](https://github.com/ClickHouse/ClickHouse/pull/66869) ([vdimir](https://github.com/vdimir)). +* Backported in [#67414](https://github.com/ClickHouse/ClickHouse/issues/67414): CI: Fix build results for release branches. [#67402](https://github.com/ClickHouse/ClickHouse/pull/67402) ([Max K.](https://github.com/maxknv)). +* Update version after release. [#67909](https://github.com/ClickHouse/ClickHouse/pull/67909) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Backported in [#68079](https://github.com/ClickHouse/ClickHouse/issues/68079): Add an explicit error for `ALTER MODIFY SQL SECURITY` on non-view tables. [#67953](https://github.com/ClickHouse/ClickHouse/pull/67953) ([pufit](https://github.com/pufit)). + diff --git a/utils/list-versions/version_date.tsv b/utils/list-versions/version_date.tsv index 8556375d543..cc168f58862 100644 --- a/utils/list-versions/version_date.tsv +++ b/utils/list-versions/version_date.tsv @@ -3,9 +3,11 @@ v24.7.3.42-stable 2024-08-08 v24.7.2.13-stable 2024-08-01 v24.7.1.2915-stable 2024-07-30 v24.6.3.95-stable 2024-08-06 +v24.6.3.38-stable 2024-08-22 v24.6.2.17-stable 2024-07-05 v24.6.1.4423-stable 2024-07-01 v24.5.5.78-stable 2024-08-05 +v24.5.5.41-stable 2024-08-22 v24.5.4.49-stable 2024-07-01 v24.5.3.5-stable 2024-06-13 v24.5.2.34-stable 2024-06-13 @@ -14,6 +16,7 @@ v24.4.4.113-stable 2024-08-02 v24.4.3.25-stable 2024-06-14 v24.4.2.141-stable 2024-06-07 v24.4.1.2088-stable 2024-05-01 +v24.3.9.5-lts 2024-08-22 v24.3.8.13-lts 2024-08-20 v24.3.7.30-lts 2024-08-14 v24.3.6.48-lts 2024-08-02 From 0b9c24f31d548c87deca3334282c14fc78a295ba Mon Sep 17 00:00:00 2001 From: Michael Stetsyuk Date: Thu, 15 Aug 2024 12:09:50 +0000 Subject: [PATCH 062/114] write metadata to disk and keeper in the same format --- src/Storages/ColumnsDescription.cpp | 30 +++++--- src/Storages/ColumnsDescription.h | 6 +- .../__init__.py | 0 .../config/enable_keeper.xml | 26 +++++++ .../config/users.xml | 8 +++ .../test.py | 71 +++++++++++++++++++ 6 files changed, 128 insertions(+), 13 deletions(-) create mode 100644 tests/integration/test_aliases_in_default_expr_not_break_table_structure/__init__.py create mode 100644 tests/integration/test_aliases_in_default_expr_not_break_table_structure/config/enable_keeper.xml create mode 100644 tests/integration/test_aliases_in_default_expr_not_break_table_structure/config/users.xml create mode 100644 tests/integration/test_aliases_in_default_expr_not_break_table_structure/test.py diff --git a/src/Storages/ColumnsDescription.cpp b/src/Storages/ColumnsDescription.cpp index 0d724245b49..0212bbd6fff 100644 --- a/src/Storages/ColumnsDescription.cpp +++ b/src/Storages/ColumnsDescription.cpp @@ -113,7 +113,15 @@ bool ColumnDescription::operator==(const ColumnDescription & other) const && ast_to_str(ttl) == ast_to_str(other.ttl); } -void ColumnDescription::writeText(WriteBuffer & buf) const +String formatASTStateAware(IAST & ast, IAST::FormatState & state) +{ + WriteBufferFromOwnString buf; + IAST::FormatSettings settings(buf, true, false); + ast.formatImpl(settings, state, IAST::FormatStateStacked()); + return buf.str(); +} + +void ColumnDescription::writeText(WriteBuffer & buf, IAST::FormatState & state, bool include_comment) const { /// NOTE: Serialization format is insane. @@ -126,20 +134,21 @@ void ColumnDescription::writeText(WriteBuffer & buf) const writeChar('\t', buf); DB::writeText(DB::toString(default_desc.kind), buf); writeChar('\t', buf); - writeEscapedString(queryToString(default_desc.expression), buf); + writeEscapedString(formatASTStateAware(*default_desc.expression, state), buf); } - if (!comment.empty()) + if (!comment.empty() && include_comment) { writeChar('\t', buf); DB::writeText("COMMENT ", buf); - writeEscapedString(queryToString(ASTLiteral(Field(comment))), buf); + auto ast = ASTLiteral(Field(comment)); + writeEscapedString(formatASTStateAware(ast, state), buf); } if (codec) { writeChar('\t', buf); - writeEscapedString(queryToString(codec), buf); + writeEscapedString(formatASTStateAware(*codec, state), buf); } if (!settings.empty()) @@ -150,21 +159,21 @@ void ColumnDescription::writeText(WriteBuffer & buf) const ASTSetQuery ast; ast.is_standalone = false; ast.changes = settings; - writeEscapedString(queryToString(ast), buf); + writeEscapedString(formatASTStateAware(ast, state), buf); DB::writeText(")", buf); } if (!statistics.empty()) { writeChar('\t', buf); - writeEscapedString(queryToString(statistics.getAST()), buf); + writeEscapedString(formatASTStateAware(*statistics.getAST(), state), buf); } if (ttl) { writeChar('\t', buf); DB::writeText("TTL ", buf); - writeEscapedString(queryToString(ttl), buf); + writeEscapedString(formatASTStateAware(*ttl, state), buf); } writeChar('\n', buf); @@ -895,16 +904,17 @@ void ColumnsDescription::resetColumnTTLs() } -String ColumnsDescription::toString() const +String ColumnsDescription::toString(bool include_comments) const { WriteBufferFromOwnString buf; + IAST::FormatState ast_format_state; writeCString("columns format version: 1\n", buf); DB::writeText(columns.size(), buf); writeCString(" columns:\n", buf); for (const ColumnDescription & column : columns) - column.writeText(buf); + column.writeText(buf, ast_format_state, include_comments); return buf.str(); } diff --git a/src/Storages/ColumnsDescription.h b/src/Storages/ColumnsDescription.h index f0760160f0a..c89c26501e8 100644 --- a/src/Storages/ColumnsDescription.h +++ b/src/Storages/ColumnsDescription.h @@ -104,7 +104,7 @@ struct ColumnDescription bool operator==(const ColumnDescription & other) const; bool operator!=(const ColumnDescription & other) const { return !(*this == other); } - void writeText(WriteBuffer & buf) const; + void writeText(WriteBuffer & buf, IAST::FormatState & state, bool include_comment) const; void readText(ReadBuffer & buf); }; @@ -137,7 +137,7 @@ public: /// NOTE Must correspond with Nested::flatten function. void flattenNested(); /// TODO: remove, insert already flattened Nested columns. - bool operator==(const ColumnsDescription & other) const { return columns == other.columns; } + bool operator==(const ColumnsDescription & other) const { return toString(false) == other.toString(false); } bool operator!=(const ColumnsDescription & other) const { return !(*this == other); } auto begin() const { return columns.begin(); } @@ -221,7 +221,7 @@ public: /// Does column has non default specified compression codec bool hasCompressionCodec(const String & column_name) const; - String toString() const; + String toString(bool include_comments = true) const; static ColumnsDescription parse(const String & str); size_t size() const diff --git a/tests/integration/test_aliases_in_default_expr_not_break_table_structure/__init__.py b/tests/integration/test_aliases_in_default_expr_not_break_table_structure/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_aliases_in_default_expr_not_break_table_structure/config/enable_keeper.xml b/tests/integration/test_aliases_in_default_expr_not_break_table_structure/config/enable_keeper.xml new file mode 100644 index 00000000000..4ca4f604ec3 --- /dev/null +++ b/tests/integration/test_aliases_in_default_expr_not_break_table_structure/config/enable_keeper.xml @@ -0,0 +1,26 @@ + + + 2181 + 1 + /var/lib/clickhouse/coordination/log + /var/lib/clickhouse/coordination/snapshots + + 20000 + + + + 1 + localhost + 9444 + + + + + + + localhost + 2181 + + 20000 + + \ No newline at end of file diff --git a/tests/integration/test_aliases_in_default_expr_not_break_table_structure/config/users.xml b/tests/integration/test_aliases_in_default_expr_not_break_table_structure/config/users.xml new file mode 100644 index 00000000000..c5de0b6819c --- /dev/null +++ b/tests/integration/test_aliases_in_default_expr_not_break_table_structure/config/users.xml @@ -0,0 +1,8 @@ + + + + default + + + + \ No newline at end of file diff --git a/tests/integration/test_aliases_in_default_expr_not_break_table_structure/test.py b/tests/integration/test_aliases_in_default_expr_not_break_table_structure/test.py new file mode 100644 index 00000000000..e0c15e18c23 --- /dev/null +++ b/tests/integration/test_aliases_in_default_expr_not_break_table_structure/test.py @@ -0,0 +1,71 @@ +import pytest +import random +import string + +from helpers.cluster import ClickHouseCluster + + +cluster = ClickHouseCluster(__file__) +node = cluster.add_instance( + "node", + main_configs=[ + "config/enable_keeper.xml", + "config/users.xml", + ], + stay_alive=True, + with_minio=True, + macros={"shard": 1, "replica": 1}, +) + + +@pytest.fixture(scope="module") +def start_cluster(): + try: + cluster.start() + yield cluster + finally: + cluster.shutdown() + + +def randomize_table_name(table_name, random_suffix_length=10): + letters = string.ascii_letters + string.digits + return f"{table_name}_{''.join(random.choice(letters) for _ in range(random_suffix_length))}" + + +@pytest.mark.parametrize("engine", ["ReplicatedMergeTree"]) +def test_aliases_in_default_expr_not_break_table_structure(start_cluster, engine): + """ + Making sure that using aliases in columns' default expressions does not lead to having different columns metadata in ZooKeeper and on disk. + Issue: https://github.com/ClickHouse/clickhouse-private/issues/5150 + """ + + data = '{"event": {"col1-key": "col1-val", "col2-key": "col2-val"}}' + + table_name = randomize_table_name("t") + + node.query( + f""" + DROP TABLE IF EXISTS {table_name}; + CREATE TABLE {table_name} + ( + `data` String, + `col1` String DEFAULT JSONExtractString(JSONExtractString(data, 'event') AS event, 'col1-key'), + `col2` String MATERIALIZED JSONExtractString(JSONExtractString(data, 'event') AS event, 'col2-key') + ) + ENGINE = {engine}('/test/{table_name}', '{{replica}}') + ORDER BY col1 + """ + ) + + node.restart_clickhouse() + + node.query( + f""" + INSERT INTO {table_name} (data) VALUES ('{data}'); + """ + ) + assert node.query(f"SELECT data FROM {table_name}").strip() == data + assert node.query(f"SELECT col1 FROM {table_name}").strip() == "col1-val" + assert node.query(f"SELECT col2 FROM {table_name}").strip() == "col2-val" + + node.query(f"DROP TABLE {table_name}") From 859d2bfe273f571458be6f007761bc8c743d589a Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Thu, 22 Aug 2024 17:18:06 +0200 Subject: [PATCH 063/114] move stopFlushThread to SystemLogBase --- src/Common/SystemLogBase.cpp | 19 +++++++++++++++++++ src/Common/SystemLogBase.h | 2 ++ src/Interpreters/PeriodicLog.cpp | 6 +++--- src/Interpreters/PeriodicLog.h | 2 +- src/Interpreters/SystemLog.cpp | 21 +-------------------- src/Interpreters/SystemLog.h | 7 +------ 6 files changed, 27 insertions(+), 30 deletions(-) diff --git a/src/Common/SystemLogBase.cpp b/src/Common/SystemLogBase.cpp index 127c8862a35..45f4eb1c5a6 100644 --- a/src/Common/SystemLogBase.cpp +++ b/src/Common/SystemLogBase.cpp @@ -273,6 +273,25 @@ void SystemLogBase::startup() saving_thread = std::make_unique([this] { savingThreadFunction(); }); } +template +void SystemLogBase::stopFlushThread() +{ + { + std::lock_guard lock(thread_mutex); + + if (!saving_thread || !saving_thread->joinable()) + return; + + if (is_shutdown) + return; + + is_shutdown = true; + queue->shutdown(); + } + + saving_thread->join(); +} + template void SystemLogBase::add(LogElement element) { diff --git a/src/Common/SystemLogBase.h b/src/Common/SystemLogBase.h index 0d7b04d5c57..0942e920a42 100644 --- a/src/Common/SystemLogBase.h +++ b/src/Common/SystemLogBase.h @@ -216,6 +216,8 @@ public: static consteval bool shouldTurnOffLogger() { return false; } protected: + void stopFlushThread() final; + std::shared_ptr> queue; }; } diff --git a/src/Interpreters/PeriodicLog.cpp b/src/Interpreters/PeriodicLog.cpp index 15970ca5b81..1b285aad3ff 100644 --- a/src/Interpreters/PeriodicLog.cpp +++ b/src/Interpreters/PeriodicLog.cpp @@ -11,7 +11,7 @@ void PeriodicLog::startCollect(size_t collect_interval_milliseconds_ { collect_interval_milliseconds = collect_interval_milliseconds_; is_shutdown_metric_thread = false; - flush_thread = std::make_unique([this] { threadFunction(); }); + collecting_thread = std::make_unique([this] { threadFunction(); }); } template @@ -20,8 +20,8 @@ void PeriodicLog::stopCollect() bool old_val = false; if (!is_shutdown_metric_thread.compare_exchange_strong(old_val, true)) return; - if (flush_thread) - flush_thread->join(); + if (collecting_thread) + collecting_thread->join(); } template diff --git a/src/Interpreters/PeriodicLog.h b/src/Interpreters/PeriodicLog.h index ceac8088d40..8254a02434a 100644 --- a/src/Interpreters/PeriodicLog.h +++ b/src/Interpreters/PeriodicLog.h @@ -36,7 +36,7 @@ protected: private: void threadFunction(); - std::unique_ptr flush_thread; + std::unique_ptr collecting_thread; size_t collect_interval_milliseconds; std::atomic is_shutdown_metric_thread{false}; }; diff --git a/src/Interpreters/SystemLog.cpp b/src/Interpreters/SystemLog.cpp index 832c39bfaf8..6a3ec197c6e 100644 --- a/src/Interpreters/SystemLog.cpp +++ b/src/Interpreters/SystemLog.cpp @@ -402,32 +402,13 @@ SystemLog::SystemLog( template void SystemLog::shutdown() { - stopFlushThread(); + Base::stopFlushThread(); auto table = DatabaseCatalog::instance().tryGetTable(table_id, getContext()); if (table) table->flushAndShutdown(); } -template -void SystemLog::stopFlushThread() -{ - { - std::lock_guard lock(thread_mutex); - - if (!saving_thread || !saving_thread->joinable()) - return; - - if (is_shutdown) - return; - - is_shutdown = true; - queue->shutdown(); - } - - saving_thread->join(); -} - template void SystemLog::savingThreadFunction() diff --git a/src/Interpreters/SystemLog.h b/src/Interpreters/SystemLog.h index 9e1af3578bd..31652c1af67 100644 --- a/src/Interpreters/SystemLog.h +++ b/src/Interpreters/SystemLog.h @@ -125,8 +125,6 @@ public: void shutdown() override; - void stopFlushThread() override; - /** Creates new table if it does not exist. * Renames old table if its structure is not suitable. * This cannot be done in constructor to avoid deadlock while renaming a table under locked Context when SystemLog object is created. @@ -136,10 +134,7 @@ public: protected: LoggerPtr log; - using ISystemLog::is_shutdown; - using ISystemLog::saving_thread; - using ISystemLog::thread_mutex; - using Base::queue; + using Base::queue; StoragePtr getStorage() const; From 837f2bba8a136170b6aa8800b6b30849a9310e5f Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Thu, 22 Aug 2024 17:23:45 +0200 Subject: [PATCH 064/114] init --- .../0_stateless/00080_show_tables_and_system_tables.sql | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/queries/0_stateless/00080_show_tables_and_system_tables.sql b/tests/queries/0_stateless/00080_show_tables_and_system_tables.sql index a58f9ddb0ac..02e3645ece0 100644 --- a/tests/queries/0_stateless/00080_show_tables_and_system_tables.sql +++ b/tests/queries/0_stateless/00080_show_tables_and_system_tables.sql @@ -6,8 +6,8 @@ CREATE DATABASE {CLICKHOUSE_DATABASE:Identifier}; CREATE TABLE {CLICKHOUSE_DATABASE:Identifier}.A (A UInt8) ENGINE = TinyLog; CREATE TABLE {CLICKHOUSE_DATABASE:Identifier}.B (A UInt8) ENGINE = TinyLog; -SHOW TABLES from {CLICKHOUSE_DATABASE:Identifier}; -SHOW TABLES in system where engine like '%System%' and name in ('numbers', 'one'); +SHOW TABLES FROM {CLICKHOUSE_DATABASE:Identifier}; +SHOW TABLES IN system WHERE engine LIKE '%System%' AND name IN ('numbers', 'one') AND database = 'system'; SELECT name, toUInt32(metadata_modification_time) > 0, engine_full, create_table_query FROM system.tables WHERE database = currentDatabase() ORDER BY name FORMAT TSVRaw; @@ -16,7 +16,7 @@ SELECT name FROM system.tables WHERE is_temporary = 1 AND name = 'test_temporary CREATE TABLE {CLICKHOUSE_DATABASE:Identifier}.test_log(id UInt64) ENGINE = Log; CREATE MATERIALIZED VIEW {CLICKHOUSE_DATABASE:Identifier}.test_materialized ENGINE = Log AS SELECT * FROM {CLICKHOUSE_DATABASE:Identifier}.test_log; -SELECT dependencies_database, dependencies_table FROM system.tables WHERE name = 'test_log' and database=currentDatabase(); +SELECT dependencies_database, dependencies_table FROM system.tables WHERE name = 'test_log' AND database=currentDatabase(); DROP DATABASE {CLICKHOUSE_DATABASE:Identifier}; From 51fbc629c6dff4653e687228b0507947516072bb Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Thu, 22 Aug 2024 15:42:17 +0000 Subject: [PATCH 065/114] Update version_date.tsv and changelogs after v24.7.3.47-stable --- docs/changelogs/v24.7.3.47-stable.md | 55 ++++++++++++++++++++++++++++ utils/list-versions/version_date.tsv | 4 ++ 2 files changed, 59 insertions(+) create mode 100644 docs/changelogs/v24.7.3.47-stable.md diff --git a/docs/changelogs/v24.7.3.47-stable.md b/docs/changelogs/v24.7.3.47-stable.md new file mode 100644 index 00000000000..e5f23a70fe1 --- /dev/null +++ b/docs/changelogs/v24.7.3.47-stable.md @@ -0,0 +1,55 @@ +--- +sidebar_position: 1 +sidebar_label: 2024 +--- + +# 2024 Changelog + +### ClickHouse release v24.7.3.47-stable (2e50fe27a14) FIXME as compared to v24.7.2.13-stable (6e41f601b2f) + +#### Bug Fix (user-visible misbehavior in an official stable release) +* Backported in [#68232](https://github.com/ClickHouse/ClickHouse/issues/68232): Fixed `Not-ready Set` in some system tables when filtering using subqueries. [#66018](https://github.com/ClickHouse/ClickHouse/pull/66018) ([Michael Kolupaev](https://github.com/al13n321)). +* Backported in [#67969](https://github.com/ClickHouse/ClickHouse/issues/67969): Fixed reading of subcolumns after `ALTER ADD COLUMN` query. [#66243](https://github.com/ClickHouse/ClickHouse/pull/66243) ([Anton Popov](https://github.com/CurtizJ)). +* Backported in [#68068](https://github.com/ClickHouse/ClickHouse/issues/68068): Fix boolean literals in query sent to external database (for engines like `PostgreSQL`). [#66282](https://github.com/ClickHouse/ClickHouse/pull/66282) ([vdimir](https://github.com/vdimir)). +* Backported in [#67637](https://github.com/ClickHouse/ClickHouse/issues/67637): Fix for occasional deadlock in Context::getDDLWorker. [#66843](https://github.com/ClickHouse/ClickHouse/pull/66843) ([Alexander Gololobov](https://github.com/davenger)). +* Backported in [#67820](https://github.com/ClickHouse/ClickHouse/issues/67820): Fix possible deadlock on query cancel with parallel replicas. [#66905](https://github.com/ClickHouse/ClickHouse/pull/66905) ([Nikita Taranov](https://github.com/nickitat)). +* Backported in [#67818](https://github.com/ClickHouse/ClickHouse/issues/67818): Only relevant to the experimental Variant data type. Fix crash with Variant + AggregateFunction type. [#67122](https://github.com/ClickHouse/ClickHouse/pull/67122) ([Kruglov Pavel](https://github.com/Avogar)). +* Backported in [#67766](https://github.com/ClickHouse/ClickHouse/issues/67766): Fix crash of `uniq` and `uniqTheta ` with `tuple()` argument. Closes [#67303](https://github.com/ClickHouse/ClickHouse/issues/67303). [#67306](https://github.com/ClickHouse/ClickHouse/pull/67306) ([flynn](https://github.com/ucasfl)). +* Backported in [#67881](https://github.com/ClickHouse/ClickHouse/issues/67881): Correctly parse file name/URI containing `::` if it's not an archive. [#67433](https://github.com/ClickHouse/ClickHouse/pull/67433) ([Antonio Andelic](https://github.com/antonio2368)). +* Backported in [#68613](https://github.com/ClickHouse/ClickHouse/issues/68613): Fixes [#66026](https://github.com/ClickHouse/ClickHouse/issues/66026). Avoid unresolved table function arguments traversal in `ReplaceTableNodeToDummyVisitor`. [#67522](https://github.com/ClickHouse/ClickHouse/pull/67522) ([Dmitry Novik](https://github.com/novikd)). +* Backported in [#67854](https://github.com/ClickHouse/ClickHouse/issues/67854): Fixes [#66026](https://github.com/ClickHouse/ClickHouse/issues/66026). Avoid unresolved table function arguments traversal in `ReplaceTableNodeToDummyVisitor`. [#67522](https://github.com/ClickHouse/ClickHouse/pull/67522) ([Dmitry Novik](https://github.com/novikd)). +* Backported in [#68278](https://github.com/ClickHouse/ClickHouse/issues/68278): Fix inserting into stream like engines (Kafka, RabbitMQ, NATS) through HTTP interface. [#67554](https://github.com/ClickHouse/ClickHouse/pull/67554) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). +* Backported in [#68040](https://github.com/ClickHouse/ClickHouse/issues/68040): Fix creation of view with recursive CTE. [#67587](https://github.com/ClickHouse/ClickHouse/pull/67587) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Backported in [#68038](https://github.com/ClickHouse/ClickHouse/issues/68038): Fix crash on `percent_rank`. `percent_rank`'s default frame type is changed to `range unbounded preceding and unbounded following`. `IWindowFunction`'s default window frame is considered and now window functions without window frame definition in sql can be put into different `WindowTransfomer`s properly. [#67661](https://github.com/ClickHouse/ClickHouse/pull/67661) ([lgbo](https://github.com/lgbo-ustc)). +* Backported in [#67713](https://github.com/ClickHouse/ClickHouse/issues/67713): Fix reloading SQL UDFs with UNION. Previously, restarting the server could make UDF invalid. [#67665](https://github.com/ClickHouse/ClickHouse/pull/67665) ([Antonio Andelic](https://github.com/antonio2368)). +* Backported in [#67840](https://github.com/ClickHouse/ClickHouse/issues/67840): Fix potential stack overflow in `JSONMergePatch` function. Renamed this function from `jsonMergePatch` to `JSONMergePatch` because the previous name was wrong. The previous name is still kept for compatibility. Improved diagnostic of errors in the function. This closes [#67304](https://github.com/ClickHouse/ClickHouse/issues/67304). [#67756](https://github.com/ClickHouse/ClickHouse/pull/67756) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#67995](https://github.com/ClickHouse/ClickHouse/issues/67995): Validate experimental/suspicious data types in ALTER ADD/MODIFY COLUMN. [#67911](https://github.com/ClickHouse/ClickHouse/pull/67911) ([Kruglov Pavel](https://github.com/Avogar)). +* Backported in [#68224](https://github.com/ClickHouse/ClickHouse/issues/68224): Fix wrong `count()` result when there is non-deterministic function in predicate. [#67922](https://github.com/ClickHouse/ClickHouse/pull/67922) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). +* Backported in [#68095](https://github.com/ClickHouse/ClickHouse/issues/68095): Fixed the calculation of the maximum thread soft limit in containerized environments where the usable CPU count is limited. [#67963](https://github.com/ClickHouse/ClickHouse/pull/67963) ([Robert Schulze](https://github.com/rschu1ze)). +* Backported in [#68126](https://github.com/ClickHouse/ClickHouse/issues/68126): Fixed skipping of untouched parts in mutations with new analyzer. Previously with enabled analyzer data in part could be rewritten by mutation even if mutation doesn't affect this part according to predicate. [#68052](https://github.com/ClickHouse/ClickHouse/pull/68052) ([Anton Popov](https://github.com/CurtizJ)). +* Backported in [#68223](https://github.com/ClickHouse/ClickHouse/issues/68223): Fixed a NULL pointer dereference, triggered by a specially crafted query, that crashed the server via hopEnd, hopStart, tumbleEnd, and tumbleStart. [#68098](https://github.com/ClickHouse/ClickHouse/pull/68098) ([Salvatore Mesoraca](https://github.com/aiven-sal)). +* Backported in [#68175](https://github.com/ClickHouse/ClickHouse/issues/68175): Removes an incorrect optimization to remove sorting in subqueries that use `OFFSET`. Fixes [#67906](https://github.com/ClickHouse/ClickHouse/issues/67906). [#68099](https://github.com/ClickHouse/ClickHouse/pull/68099) ([Graham Campbell](https://github.com/GrahamCampbell)). +* Backported in [#68341](https://github.com/ClickHouse/ClickHouse/issues/68341): Try fix postgres crash when query is cancelled. [#68288](https://github.com/ClickHouse/ClickHouse/pull/68288) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Backported in [#68398](https://github.com/ClickHouse/ClickHouse/issues/68398): Fix missing sync replica mode in query `SYSTEM SYNC REPLICA`. [#68326](https://github.com/ClickHouse/ClickHouse/pull/68326) ([Duc Canh Le](https://github.com/canhld94)). +* Backported in [#68669](https://github.com/ClickHouse/ClickHouse/issues/68669): Fix `LOGICAL_ERROR`s when functions `sipHash64Keyed`, `sipHash128Keyed`, or `sipHash128ReferenceKeyed` are applied to empty arrays or tuples. [#68630](https://github.com/ClickHouse/ClickHouse/pull/68630) ([Robert Schulze](https://github.com/rschu1ze)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* Backported in [#67518](https://github.com/ClickHouse/ClickHouse/issues/67518): Split slow test 03036_dynamic_read_subcolumns. [#66954](https://github.com/ClickHouse/ClickHouse/pull/66954) ([Nikita Taranov](https://github.com/nickitat)). +* Backported in [#67516](https://github.com/ClickHouse/ClickHouse/issues/67516): Split 01508_partition_pruning_long. [#66983](https://github.com/ClickHouse/ClickHouse/pull/66983) ([Nikita Taranov](https://github.com/nickitat)). +* Backported in [#67529](https://github.com/ClickHouse/ClickHouse/issues/67529): Reduce max time of 00763_long_lock_buffer_alter_destination_table. [#67185](https://github.com/ClickHouse/ClickHouse/pull/67185) ([Raúl Marín](https://github.com/Algunenano)). +* Backported in [#67803](https://github.com/ClickHouse/ClickHouse/issues/67803): Disable some Dynamic tests under sanitizers, rewrite 03202_dynamic_null_map_subcolumn to sql. [#67359](https://github.com/ClickHouse/ClickHouse/pull/67359) ([Kruglov Pavel](https://github.com/Avogar)). +* Backported in [#67643](https://github.com/ClickHouse/ClickHouse/issues/67643): [Green CI] Fix potentially flaky test_mask_sensitive_info integration test. [#67506](https://github.com/ClickHouse/ClickHouse/pull/67506) ([Alexey Katsman](https://github.com/alexkats)). +* Backported in [#67609](https://github.com/ClickHouse/ClickHouse/issues/67609): Fix test_zookeeper_config_load_balancing after adding the xdist worker name to the instance. [#67590](https://github.com/ClickHouse/ClickHouse/pull/67590) ([Pablo Marcos](https://github.com/pamarcos)). +* Backported in [#67871](https://github.com/ClickHouse/ClickHouse/issues/67871): Fix 02434_cancel_insert_when_client_dies. [#67600](https://github.com/ClickHouse/ClickHouse/pull/67600) ([vdimir](https://github.com/vdimir)). +* Backported in [#67704](https://github.com/ClickHouse/ClickHouse/issues/67704): Fix 02910_bad_logs_level_in_local in fast tests. [#67603](https://github.com/ClickHouse/ClickHouse/pull/67603) ([Raúl Marín](https://github.com/Algunenano)). +* Backported in [#67689](https://github.com/ClickHouse/ClickHouse/issues/67689): Fix 01605_adaptive_granularity_block_borders. [#67605](https://github.com/ClickHouse/ClickHouse/pull/67605) ([Nikita Taranov](https://github.com/nickitat)). +* Backported in [#67827](https://github.com/ClickHouse/ClickHouse/issues/67827): Try fix 03143_asof_join_ddb_long. [#67620](https://github.com/ClickHouse/ClickHouse/pull/67620) ([Nikita Taranov](https://github.com/nickitat)). +* Backported in [#67892](https://github.com/ClickHouse/ClickHouse/issues/67892): Revert "Merge pull request [#66510](https://github.com/ClickHouse/ClickHouse/issues/66510) from canhld94/fix_trivial_count_non_deterministic_func". [#67800](https://github.com/ClickHouse/ClickHouse/pull/67800) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). +* Backported in [#68081](https://github.com/ClickHouse/ClickHouse/issues/68081): Add an explicit error for `ALTER MODIFY SQL SECURITY` on non-view tables. [#67953](https://github.com/ClickHouse/ClickHouse/pull/67953) ([pufit](https://github.com/pufit)). +* Update version after release. [#68044](https://github.com/ClickHouse/ClickHouse/pull/68044) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Backported in [#68269](https://github.com/ClickHouse/ClickHouse/issues/68269): [Green CI] Fix test 01903_correct_block_size_prediction_with_default. [#68203](https://github.com/ClickHouse/ClickHouse/pull/68203) ([Pablo Marcos](https://github.com/pamarcos)). +* Backported in [#68432](https://github.com/ClickHouse/ClickHouse/issues/68432): tests: make 01600_parts_states_metrics_long better. [#68265](https://github.com/ClickHouse/ClickHouse/pull/68265) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#68538](https://github.com/ClickHouse/ClickHouse/issues/68538): CI: Native build for package_aarch64. [#68457](https://github.com/ClickHouse/ClickHouse/pull/68457) ([Max K.](https://github.com/maxknv)). +* Backported in [#68555](https://github.com/ClickHouse/ClickHouse/issues/68555): CI: Minor release workflow fix. [#68536](https://github.com/ClickHouse/ClickHouse/pull/68536) ([Max K.](https://github.com/maxknv)). + diff --git a/utils/list-versions/version_date.tsv b/utils/list-versions/version_date.tsv index 8556375d543..6ef5ace4ba6 100644 --- a/utils/list-versions/version_date.tsv +++ b/utils/list-versions/version_date.tsv @@ -1,11 +1,14 @@ v24.8.1.2684-lts 2024-08-21 +v24.7.3.47-stable 2024-08-22 v24.7.3.42-stable 2024-08-08 v24.7.2.13-stable 2024-08-01 v24.7.1.2915-stable 2024-07-30 v24.6.3.95-stable 2024-08-06 +v24.6.3.38-stable 2024-08-22 v24.6.2.17-stable 2024-07-05 v24.6.1.4423-stable 2024-07-01 v24.5.5.78-stable 2024-08-05 +v24.5.5.41-stable 2024-08-22 v24.5.4.49-stable 2024-07-01 v24.5.3.5-stable 2024-06-13 v24.5.2.34-stable 2024-06-13 @@ -14,6 +17,7 @@ v24.4.4.113-stable 2024-08-02 v24.4.3.25-stable 2024-06-14 v24.4.2.141-stable 2024-06-07 v24.4.1.2088-stable 2024-05-01 +v24.3.9.5-lts 2024-08-22 v24.3.8.13-lts 2024-08-20 v24.3.7.30-lts 2024-08-14 v24.3.6.48-lts 2024-08-02 From 5f61e193401c5fa46db03542cb88ba4188ed00e9 Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Thu, 22 Aug 2024 17:46:47 +0200 Subject: [PATCH 066/114] small fixes --- docs/ru/getting-started/install.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/ru/getting-started/install.md b/docs/ru/getting-started/install.md index 4a0ec258c64..5bce41ec07a 100644 --- a/docs/ru/getting-started/install.md +++ b/docs/ru/getting-started/install.md @@ -22,7 +22,7 @@ grep -q sse4_2 /proc/cpuinfo && echo "SSE 4.2 supported" || echo "SSE 4.2 not su ### Из deb-пакетов {#install-from-deb-packages} -Яндекс рекомендует использовать официальные скомпилированные `deb`-пакеты для Debian или Ubuntu. Для установки пакетов выполните: +Рекомендуется использовать официальные скомпилированные `deb`-пакеты для Debian или Ubuntu. Для установки пакетов выполните: ``` bash sudo apt-get install -y apt-transport-https ca-certificates curl gnupg @@ -55,7 +55,7 @@ clickhouse-client # or "clickhouse-client --password" if you've set up a passwor ::: ### Из rpm-пакетов {#from-rpm-packages} -Команда ClickHouse в Яндексе рекомендует использовать официальные предкомпилированные `rpm`-пакеты для CentOS, RedHat и всех остальных дистрибутивов Linux, основанных на rpm. +Команда ClickHouse рекомендует использовать официальные предкомпилированные `rpm`-пакеты для CentOS, RedHat и всех остальных дистрибутивов Linux, основанных на rpm. #### Установка официального репозитория @@ -102,7 +102,7 @@ sudo yum install clickhouse-server clickhouse-client ### Из tgz-архивов {#from-tgz-archives} -Команда ClickHouse в Яндексе рекомендует использовать предкомпилированные бинарники из `tgz`-архивов для всех дистрибутивов, где невозможна установка `deb`- и `rpm`- пакетов. +Команда ClickHouse рекомендует использовать предкомпилированные бинарники из `tgz`-архивов для всех дистрибутивов, где невозможна установка `deb`- и `rpm`- пакетов. Интересующую версию архивов можно скачать вручную с помощью `curl` или `wget` из репозитория https://packages.clickhouse.com/tgz/. После этого архивы нужно распаковать и воспользоваться скриптами установки. Пример установки самой свежей версии: From 980b02bfd67defbbdf78165e8225fb754d722d7a Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Thu, 22 Aug 2024 17:48:57 +0200 Subject: [PATCH 067/114] fix compatibility with en version --- docs/ru/getting-started/install.md | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/docs/ru/getting-started/install.md b/docs/ru/getting-started/install.md index 5bce41ec07a..f8a660fbec9 100644 --- a/docs/ru/getting-started/install.md +++ b/docs/ru/getting-started/install.md @@ -31,9 +31,17 @@ curl -fsSL 'https://packages.clickhouse.com/rpm/lts/repodata/repomd.xml.key' | s echo "deb [signed-by=/usr/share/keyrings/clickhouse-keyring.gpg] https://packages.clickhouse.com/deb stable main" | sudo tee \ /etc/apt/sources.list.d/clickhouse.list sudo apt-get update +``` +#### Установка ClickHouse server и client + +```bash sudo apt-get install -y clickhouse-server clickhouse-client +``` +#### Запуск ClickHouse server + +```bash sudo service clickhouse-server start clickhouse-client # or "clickhouse-client --password" if you've set up a password. ``` From 7c3a013d56c1dbd5b72f04f6be61f007004aaefa Mon Sep 17 00:00:00 2001 From: Mark Needham Date: Thu, 22 Aug 2024 16:53:30 +0100 Subject: [PATCH 068/114] Update newjson.md --- docs/en/sql-reference/data-types/newjson.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/sql-reference/data-types/newjson.md b/docs/en/sql-reference/data-types/newjson.md index 9e43216df6c..f7fc7e1498e 100644 --- a/docs/en/sql-reference/data-types/newjson.md +++ b/docs/en/sql-reference/data-types/newjson.md @@ -70,7 +70,7 @@ SELECT '{"a" : {"b" : 42},"c" : [1, 2, 3], "d" : "Hello, World!"}'::JSON as json └────────────────────────────────────────────────┘ ``` -CAST from named `Tuple`, `Map` and `Object('json')` to `JSON` type will be supported later. +CAST from `JSON`, named `Tuple`, `Map` and `Object('json')` to `JSON` type will be supported later. ## Reading JSON paths as subcolumns From 28fbd8a4eff4eafa7db99eb37e38376ffda11763 Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Thu, 22 Aug 2024 17:56:16 +0200 Subject: [PATCH 069/114] fix stateless tests --- .../queries/0_stateless/03203_hive_style_partitioning.reference | 2 -- tests/queries/0_stateless/03203_hive_style_partitioning.sh | 2 +- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/tests/queries/0_stateless/03203_hive_style_partitioning.reference b/tests/queries/0_stateless/03203_hive_style_partitioning.reference index acdadc2510b..a9d856babce 100644 --- a/tests/queries/0_stateless/03203_hive_style_partitioning.reference +++ b/tests/queries/0_stateless/03203_hive_style_partitioning.reference @@ -34,8 +34,6 @@ Cross Elizabeth Array(Int64) LowCardinality(Float64) 101 2070 -4081 -2070 2070 b 1 diff --git a/tests/queries/0_stateless/03203_hive_style_partitioning.sh b/tests/queries/0_stateless/03203_hive_style_partitioning.sh index b3d196924af..6734c5f14ad 100755 --- a/tests/queries/0_stateless/03203_hive_style_partitioning.sh +++ b/tests/queries/0_stateless/03203_hive_style_partitioning.sh @@ -32,7 +32,7 @@ SELECT a FROM file('$CURDIR/data_hive/partitioning/a=b/a=b/sample.parquet') LIMI $CLICKHOUSE_LOCAL -n -q """ set use_hive_partitioning = 0; -SELECT *, column0 FROM file('$CURDIR/data_hive/partitioning/column0=Elizabeth/sample.parquet') LIMIT 10; +SELECT *, non_existing_column FROM file('$CURDIR/data_hive/partitioning/non_existing_column=Elizabeth/sample.parquet') LIMIT 10; """ 2>&1 | grep -c "UNKNOWN_IDENTIFIER" From 9c0e1df1663dd5c56066dd615fc3cafe6408d308 Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 22 Aug 2024 17:58:15 +0200 Subject: [PATCH 070/114] Fix flaky test 00989_parallel_parts_loading --- tests/queries/0_stateless/00989_parallel_parts_loading.sql | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/queries/0_stateless/00989_parallel_parts_loading.sql b/tests/queries/0_stateless/00989_parallel_parts_loading.sql index 407e124f137..dc074241ff6 100644 --- a/tests/queries/0_stateless/00989_parallel_parts_loading.sql +++ b/tests/queries/0_stateless/00989_parallel_parts_loading.sql @@ -1,3 +1,5 @@ +-- Tags: no-random-settings, no-random-merge-tree-settings +-- small insert block size can make insert terribly slow, especially with some build like msan DROP TABLE IF EXISTS mt; CREATE TABLE mt (x UInt64) ENGINE = MergeTree ORDER BY x SETTINGS parts_to_delay_insert = 100000, parts_to_throw_insert = 100000; From 0bd8ebf62616ce882b0ebc46945c837a5a91ba44 Mon Sep 17 00:00:00 2001 From: Tyler Hannan Date: Thu, 22 Aug 2024 17:58:56 +0200 Subject: [PATCH 071/114] Update README.md adding community call. resolving recent recordings --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index c9474ef0fc0..9099fd48659 100644 --- a/README.md +++ b/README.md @@ -34,7 +34,7 @@ curl https://clickhouse.com/ | sh Every month we get together with the community (users, contributors, customers, those interested in learning more about ClickHouse) to discuss what is coming in the latest release. If you are interested in sharing what you've built on ClickHouse, let us know. -* [v24.8 Community Call](https://clickhouse.com/company/events/v24-8-community-release-call) - August 20 +* [v24.9 Community Call](https://clickhouse.com/company/events/v24-9-community-release-call) - September 266 ## Upcoming Events @@ -58,7 +58,7 @@ Other upcoming meetups ## Recent Recordings * **Recent Meetup Videos**: [Meetup Playlist](https://www.youtube.com/playlist?list=PL0Z2YDlm0b3iNDUzpY1S3L_iV4nARda_U) Whenever possible recordings of the ClickHouse Community Meetups are edited and presented as individual talks. Current featuring "Modern SQL in 2023", "Fast, Concurrent, and Consistent Asynchronous INSERTS in ClickHouse", and "Full-Text Indices: Design and Experiments" -* **Recording available**: [**v24.4 Release Call**](https://www.youtube.com/watch?v=dtUqgcfOGmE) All the features of 24.4, one convenient video! Watch it now! +* **Recording available**: [**v24.8 LTS Release Call**](https://www.youtube.com/watch?v=AeLmp2jc51k) All the features of 24.8 LTS, one convenient video! Watch it now! ## Interested in joining ClickHouse and making it your full-time job? From 52cdd88eb6d7bbb5d395dd80445655ad47c83c92 Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 22 Aug 2024 17:59:10 +0200 Subject: [PATCH 072/114] Better comment --- tests/queries/0_stateless/00989_parallel_parts_loading.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/00989_parallel_parts_loading.sql b/tests/queries/0_stateless/00989_parallel_parts_loading.sql index dc074241ff6..3b73e6a0e3c 100644 --- a/tests/queries/0_stateless/00989_parallel_parts_loading.sql +++ b/tests/queries/0_stateless/00989_parallel_parts_loading.sql @@ -1,5 +1,5 @@ -- Tags: no-random-settings, no-random-merge-tree-settings --- small insert block size can make insert terribly slow, especially with some build like msan +-- small number of insert threads can make insert terribly slow, especially with some build like msan DROP TABLE IF EXISTS mt; CREATE TABLE mt (x UInt64) ENGINE = MergeTree ORDER BY x SETTINGS parts_to_delay_insert = 100000, parts_to_throw_insert = 100000; From e7b89537bf1bb760c6082f04de4668bd1c00f33a Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Thu, 22 Aug 2024 18:02:42 +0200 Subject: [PATCH 073/114] fix style --- src/Interpreters/PeriodicLog.cpp | 1 - src/Interpreters/SystemLog.h | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/src/Interpreters/PeriodicLog.cpp b/src/Interpreters/PeriodicLog.cpp index 1b285aad3ff..22bc14856c4 100644 --- a/src/Interpreters/PeriodicLog.cpp +++ b/src/Interpreters/PeriodicLog.cpp @@ -1,7 +1,6 @@ #include #include #include -#include "Functions/DateTimeTransforms.h" namespace DB { diff --git a/src/Interpreters/SystemLog.h b/src/Interpreters/SystemLog.h index 31652c1af67..c03f9370068 100644 --- a/src/Interpreters/SystemLog.h +++ b/src/Interpreters/SystemLog.h @@ -134,7 +134,7 @@ public: protected: LoggerPtr log; - using Base::queue; + using Base::queue; StoragePtr getStorage() const; From 1692360233593e635c5a7797847bdfd8a0ffa33e Mon Sep 17 00:00:00 2001 From: Tyler Hannan Date: Thu, 22 Aug 2024 18:12:38 +0200 Subject: [PATCH 074/114] Update README.md 26 and 266 are different --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 9099fd48659..83a5c05c667 100644 --- a/README.md +++ b/README.md @@ -34,7 +34,7 @@ curl https://clickhouse.com/ | sh Every month we get together with the community (users, contributors, customers, those interested in learning more about ClickHouse) to discuss what is coming in the latest release. If you are interested in sharing what you've built on ClickHouse, let us know. -* [v24.9 Community Call](https://clickhouse.com/company/events/v24-9-community-release-call) - September 266 +* [v24.9 Community Call](https://clickhouse.com/company/events/v24-9-community-release-call) - September 26 ## Upcoming Events From 4264fbc037accedecebcd8122910e4406e92cd58 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Thu, 22 Aug 2024 16:16:47 +0000 Subject: [PATCH 075/114] Update version_date.tsv and changelogs after v24.8.2.3-lts --- docker/keeper/Dockerfile | 2 +- docker/server/Dockerfile.alpine | 2 +- docker/server/Dockerfile.ubuntu | 2 +- docs/changelogs/v24.8.2.3-lts.md | 12 ++++++++++++ utils/list-versions/version_date.tsv | 5 +++++ 5 files changed, 20 insertions(+), 3 deletions(-) create mode 100644 docs/changelogs/v24.8.2.3-lts.md diff --git a/docker/keeper/Dockerfile b/docker/keeper/Dockerfile index fc93cee5bbc..6ff7ea43374 100644 --- a/docker/keeper/Dockerfile +++ b/docker/keeper/Dockerfile @@ -34,7 +34,7 @@ RUN arch=${TARGETARCH:-amd64} \ # lts / testing / prestable / etc ARG REPO_CHANNEL="stable" ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}" -ARG VERSION="24.8.1.2684" +ARG VERSION="24.8.2.3" ARG PACKAGES="clickhouse-keeper" ARG DIRECT_DOWNLOAD_URLS="" diff --git a/docker/server/Dockerfile.alpine b/docker/server/Dockerfile.alpine index 3ceaf2a08b4..c87885d3b49 100644 --- a/docker/server/Dockerfile.alpine +++ b/docker/server/Dockerfile.alpine @@ -32,7 +32,7 @@ RUN arch=${TARGETARCH:-amd64} \ # lts / testing / prestable / etc ARG REPO_CHANNEL="stable" ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}" -ARG VERSION="24.8.1.2684" +ARG VERSION="24.8.2.3" ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static" ARG DIRECT_DOWNLOAD_URLS="" diff --git a/docker/server/Dockerfile.ubuntu b/docker/server/Dockerfile.ubuntu index 76db997821c..6ccf74823e2 100644 --- a/docker/server/Dockerfile.ubuntu +++ b/docker/server/Dockerfile.ubuntu @@ -28,7 +28,7 @@ RUN sed -i "s|http://archive.ubuntu.com|${apt_archive}|g" /etc/apt/sources.list ARG REPO_CHANNEL="stable" ARG REPOSITORY="deb [signed-by=/usr/share/keyrings/clickhouse-keyring.gpg] https://packages.clickhouse.com/deb ${REPO_CHANNEL} main" -ARG VERSION="24.8.1.2684" +ARG VERSION="24.8.2.3" ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static" #docker-official-library:off diff --git a/docs/changelogs/v24.8.2.3-lts.md b/docs/changelogs/v24.8.2.3-lts.md new file mode 100644 index 00000000000..69dfc9961a2 --- /dev/null +++ b/docs/changelogs/v24.8.2.3-lts.md @@ -0,0 +1,12 @@ +--- +sidebar_position: 1 +sidebar_label: 2024 +--- + +# 2024 Changelog + +### ClickHouse release v24.8.2.3-lts (b54f79ed323) FIXME as compared to v24.8.1.2684-lts (161c62fd295) + +#### Bug Fix (user-visible misbehavior in an official stable release) +* Backported in [#68670](https://github.com/ClickHouse/ClickHouse/issues/68670): Fix `LOGICAL_ERROR`s when functions `sipHash64Keyed`, `sipHash128Keyed`, or `sipHash128ReferenceKeyed` are applied to empty arrays or tuples. [#68630](https://github.com/ClickHouse/ClickHouse/pull/68630) ([Robert Schulze](https://github.com/rschu1ze)). + diff --git a/utils/list-versions/version_date.tsv b/utils/list-versions/version_date.tsv index 8556375d543..199c4f822f4 100644 --- a/utils/list-versions/version_date.tsv +++ b/utils/list-versions/version_date.tsv @@ -1,11 +1,15 @@ +v24.8.2.3-lts 2024-08-22 v24.8.1.2684-lts 2024-08-21 +v24.7.3.47-stable 2024-08-22 v24.7.3.42-stable 2024-08-08 v24.7.2.13-stable 2024-08-01 v24.7.1.2915-stable 2024-07-30 v24.6.3.95-stable 2024-08-06 +v24.6.3.38-stable 2024-08-22 v24.6.2.17-stable 2024-07-05 v24.6.1.4423-stable 2024-07-01 v24.5.5.78-stable 2024-08-05 +v24.5.5.41-stable 2024-08-22 v24.5.4.49-stable 2024-07-01 v24.5.3.5-stable 2024-06-13 v24.5.2.34-stable 2024-06-13 @@ -14,6 +18,7 @@ v24.4.4.113-stable 2024-08-02 v24.4.3.25-stable 2024-06-14 v24.4.2.141-stable 2024-06-07 v24.4.1.2088-stable 2024-05-01 +v24.3.9.5-lts 2024-08-22 v24.3.8.13-lts 2024-08-20 v24.3.7.30-lts 2024-08-14 v24.3.6.48-lts 2024-08-02 From fa453c3664b18da7a6945e662b881f80fedadc5b Mon Sep 17 00:00:00 2001 From: Max Kainov Date: Thu, 22 Aug 2024 18:13:45 +0200 Subject: [PATCH 076/114] Disable SqlLogic job --- tests/ci/ci_config.py | 7 ++++--- tests/ci/ci_definitions.py | 2 +- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/tests/ci/ci_config.py b/tests/ci/ci_config.py index 58de25f039f..0885f1d9ec2 100644 --- a/tests/ci/ci_config.py +++ b/tests/ci/ci_config.py @@ -498,9 +498,10 @@ class CI: JobNames.SQLANCER_DEBUG: CommonJobConfigs.SQLLANCER_TEST.with_properties( required_builds=[BuildNames.PACKAGE_DEBUG], ), - JobNames.SQL_LOGIC_TEST: CommonJobConfigs.SQLLOGIC_TEST.with_properties( - required_builds=[BuildNames.PACKAGE_RELEASE], - ), + # TODO: job does not work at all, uncomment and fix + # JobNames.SQL_LOGIC_TEST: CommonJobConfigs.SQLLOGIC_TEST.with_properties( + # required_builds=[BuildNames.PACKAGE_RELEASE], + # ), JobNames.SQLTEST: CommonJobConfigs.SQL_TEST.with_properties( required_builds=[BuildNames.PACKAGE_RELEASE], ), diff --git a/tests/ci/ci_definitions.py b/tests/ci/ci_definitions.py index 1cdb3f1487e..9d95a19790f 100644 --- a/tests/ci/ci_definitions.py +++ b/tests/ci/ci_definitions.py @@ -204,7 +204,7 @@ class JobNames(metaclass=WithIter): PERFORMANCE_TEST_AMD64 = "Performance Comparison (release)" PERFORMANCE_TEST_ARM64 = "Performance Comparison (aarch64)" - SQL_LOGIC_TEST = "Sqllogic test (release)" + # SQL_LOGIC_TEST = "Sqllogic test (release)" SQLANCER = "SQLancer (release)" SQLANCER_DEBUG = "SQLancer (debug)" From 06c46ee75bcb94fe02ac68df6a4a044145804d76 Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Thu, 22 Aug 2024 18:56:50 +0200 Subject: [PATCH 077/114] add one more test --- .../0_stateless/03203_hive_style_partitioning.reference | 1 + tests/queries/0_stateless/03203_hive_style_partitioning.sh | 6 ++++++ 2 files changed, 7 insertions(+) diff --git a/tests/queries/0_stateless/03203_hive_style_partitioning.reference b/tests/queries/0_stateless/03203_hive_style_partitioning.reference index a9d856babce..0fbc1fb556e 100644 --- a/tests/queries/0_stateless/03203_hive_style_partitioning.reference +++ b/tests/queries/0_stateless/03203_hive_style_partitioning.reference @@ -37,6 +37,7 @@ Array(Int64) LowCardinality(Float64) 2070 b 1 +1 TESTING THE URL PARTITIONING last Elizabeth Frank Elizabeth diff --git a/tests/queries/0_stateless/03203_hive_style_partitioning.sh b/tests/queries/0_stateless/03203_hive_style_partitioning.sh index 6734c5f14ad..8ab18f5edfe 100755 --- a/tests/queries/0_stateless/03203_hive_style_partitioning.sh +++ b/tests/queries/0_stateless/03203_hive_style_partitioning.sh @@ -29,6 +29,12 @@ SELECT identifier FROM file('$CURDIR/data_hive/partitioning/identifier=*/email.c SELECT a FROM file('$CURDIR/data_hive/partitioning/a=b/a=b/sample.parquet') LIMIT 1; """ +$CLICKHOUSE_LOCAL -n -q """ +set use_hive_partitioning = 1; + +SELECT *, _column0 FROM file('$CURDIR/data_hive/partitioning/column0=Elizabeth/column0=Elizabeth1/sample.parquet') LIMIT 10; +""" 2>&1 | grep -c "INCORRECT_DATA" + $CLICKHOUSE_LOCAL -n -q """ set use_hive_partitioning = 0; From 2a32207e9ee44d52d6fbca7313d847b4eef1c4fb Mon Sep 17 00:00:00 2001 From: Konstantin Bogdanov Date: Thu, 22 Aug 2024 01:20:46 +0200 Subject: [PATCH 078/114] fix: wrap in conditional preprocessor directives --- src/Functions/FunctionsHashingRipe.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/Functions/FunctionsHashingRipe.cpp b/src/Functions/FunctionsHashingRipe.cpp index 5b06b8ab924..315296b7690 100644 --- a/src/Functions/FunctionsHashingRipe.cpp +++ b/src/Functions/FunctionsHashingRipe.cpp @@ -7,6 +7,7 @@ /// due to excessive resource consumption. namespace DB { +#if USE_SSL REGISTER_FUNCTION(HashingRipe) { factory.registerFunction(FunctionDocumentation{ @@ -18,4 +19,5 @@ REGISTER_FUNCTION(HashingRipe) )"}}, .categories{"Hash"}}); } +#endif } From ef9fbe3006b3023bf47e3a0109490d166071c2aa Mon Sep 17 00:00:00 2001 From: Konstantin Bogdanov Date: Thu, 22 Aug 2024 01:20:46 +0200 Subject: [PATCH 079/114] fix: disable running test in fasttest due to missing OpenSSL --- tests/queries/0_stateless/03222_ripeMD160.sql | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/queries/0_stateless/03222_ripeMD160.sql b/tests/queries/0_stateless/03222_ripeMD160.sql index 592f9f830dd..9d418376a20 100644 --- a/tests/queries/0_stateless/03222_ripeMD160.sql +++ b/tests/queries/0_stateless/03222_ripeMD160.sql @@ -1,3 +1,4 @@ +-- Tags: no-fasttest -- Ouput can be verified using: https://emn178.github.io/online-tools/ripemd-160/ SELECT hex(ripeMD160('The quick brown fox jumps over the lazy dog')); From a2ff8e4384f04b2e57d3de93a1ba63971f94794e Mon Sep 17 00:00:00 2001 From: Dergousov Date: Thu, 22 Aug 2024 20:44:52 +0300 Subject: [PATCH 080/114] fix: correct return type inconsistencies in docs --- docs/en/sql-reference/functions/hash-functions.md | 5 ++--- docs/ru/sql-reference/functions/hash-functions.md | 5 +++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/docs/en/sql-reference/functions/hash-functions.md b/docs/en/sql-reference/functions/hash-functions.md index 9b7ac8af0e3..cd1c85b5f4c 100644 --- a/docs/en/sql-reference/functions/hash-functions.md +++ b/docs/en/sql-reference/functions/hash-functions.md @@ -689,9 +689,8 @@ SELECT kostikConsistentHash(16045690984833335023, 2); ``` ## ripeMD160 -Produces [RIPEMD-160](https://en.wikipedia.org/wiki/RIPEMD) hash of a string and returns the resulting set of bytes as [FixedString](../data-types/fixedstring.md). - +Produces [RIPEMD-160](https://en.wikipedia.org/wiki/RIPEMD) hash value. **Syntax** @@ -705,7 +704,7 @@ ripeMD160('input') **Returned value** -- A [UInt256](../data-types/int-uint.md) hash value +- A [UInt256]((../data-types/int-uint.md)) hash value where the 160-bit RIPEMD-160 hash is stored in the first 20 bytes. The remaining 12 bytes are zero-padded. **Example** Use the [hex](../functions/encoding-functions.md/#hex) function to represent the result as a hex-encoded string. diff --git a/docs/ru/sql-reference/functions/hash-functions.md b/docs/ru/sql-reference/functions/hash-functions.md index 66d77e66972..b7adcfc1829 100644 --- a/docs/ru/sql-reference/functions/hash-functions.md +++ b/docs/ru/sql-reference/functions/hash-functions.md @@ -125,7 +125,8 @@ SELECT hex(sipHash128('foo', '\x01', 3)); ``` ## ripeMD160 -Генерирует [RIPEMD-160](https://en.wikipedia.org/wiki/RIPEMD) хеш строки и возвращает полученный набор байт в виде [FixedString](../data-types/fixedstring.md). + +Генерирует [RIPEMD-160](https://en.wikipedia.org/wiki/RIPEMD) хеш строки. **Синтаксис** @@ -139,7 +140,7 @@ ripeMD160('input') **Возвращаемое значение** -- [UInt256](../data-types/int-uint.md) хеш-значение +- [UInt256](../data-types/int-uint.md), где 160-битный хеш RIPEMD-160 хранится в первых 20 байтах. Оставшиеся 12 байт заполняются нулями. **Пример** Используйте функцию [hex](../functions/encoding-functions.md#hex) для представления результата в виде строки с шестнадцатеричной кодировкой From f89193fa416cc333f549d72bb8ba453907edc951 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Thu, 22 Aug 2024 19:12:19 +0000 Subject: [PATCH 081/114] Update version_date.tsv and changelogs after v24.5.5.41-stable --- utils/list-versions/version_date.tsv | 2 -- 1 file changed, 2 deletions(-) diff --git a/utils/list-versions/version_date.tsv b/utils/list-versions/version_date.tsv index 199c4f822f4..0e25f8d3b62 100644 --- a/utils/list-versions/version_date.tsv +++ b/utils/list-versions/version_date.tsv @@ -1,11 +1,9 @@ v24.8.2.3-lts 2024-08-22 v24.8.1.2684-lts 2024-08-21 -v24.7.3.47-stable 2024-08-22 v24.7.3.42-stable 2024-08-08 v24.7.2.13-stable 2024-08-01 v24.7.1.2915-stable 2024-07-30 v24.6.3.95-stable 2024-08-06 -v24.6.3.38-stable 2024-08-22 v24.6.2.17-stable 2024-07-05 v24.6.1.4423-stable 2024-07-01 v24.5.5.78-stable 2024-08-05 From 4200b3d5cbbfe065073c40f1e122c44189f3554f Mon Sep 17 00:00:00 2001 From: Max Kainov Date: Thu, 22 Aug 2024 14:02:25 +0200 Subject: [PATCH 082/114] CI: Stress test fix --- tests/clickhouse-test | 2 +- tests/docker_scripts/stress_runner.sh | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/clickhouse-test b/tests/clickhouse-test index 4f9380d6f20..ad6173065fe 100755 --- a/tests/clickhouse-test +++ b/tests/clickhouse-test @@ -3567,7 +3567,7 @@ if __name__ == "__main__": f"Cannot access the specified directory with queries ({args.queries})", file=sys.stderr, ) - sys.exit(1) + assert False, "No --queries provided" CAPTURE_CLIENT_STACKTRACE = args.capture_client_stacktrace diff --git a/tests/docker_scripts/stress_runner.sh b/tests/docker_scripts/stress_runner.sh index 7666398e10b..039c60c8e4e 100755 --- a/tests/docker_scripts/stress_runner.sh +++ b/tests/docker_scripts/stress_runner.sh @@ -10,8 +10,7 @@ dmesg --clear # shellcheck disable=SC1091 source /setup_export_logs.sh -ln -s /repo/tests/clickhouse-test/ci/stress.py /usr/bin/stress -ln -s /repo/tests/clickhouse-test/clickhouse-test /usr/bin/clickhouse-test +ln -s /repo/tests/clickhouse-test /usr/bin/clickhouse-test # Stress tests and upgrade check uses similar code that was placed # in a separate bash library. See tests/ci/stress_tests.lib @@ -266,6 +265,7 @@ fi start_server +cd /repo/tests/ || exit 1 # clickhouse-test can find queries dir from there python3 /repo/tests/ci/stress.py --hung-check --drop-databases --output-folder /test_output --skip-func-tests "$SKIP_TESTS_OPTION" --global-time-limit 1200 \ && echo -e "Test script exit code$OK" >> /test_output/test_results.tsv \ || echo -e "Test script failed$FAIL script exit code: $?" >> /test_output/test_results.tsv From 69f6ea5083f1686becce4ca9fcf47d1404f2d3ed Mon Sep 17 00:00:00 2001 From: Konstantin Bogdanov Date: Thu, 22 Aug 2024 22:07:02 +0200 Subject: [PATCH 083/114] Update docs/en/sql-reference/functions/hash-functions.md --- docs/en/sql-reference/functions/hash-functions.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/sql-reference/functions/hash-functions.md b/docs/en/sql-reference/functions/hash-functions.md index cd1c85b5f4c..55126640e34 100644 --- a/docs/en/sql-reference/functions/hash-functions.md +++ b/docs/en/sql-reference/functions/hash-functions.md @@ -704,7 +704,7 @@ ripeMD160('input') **Returned value** -- A [UInt256]((../data-types/int-uint.md)) hash value where the 160-bit RIPEMD-160 hash is stored in the first 20 bytes. The remaining 12 bytes are zero-padded. +- A [UInt256](../data-types/int-uint.md) hash value where the 160-bit RIPEMD-160 hash is stored in the first 20 bytes. The remaining 12 bytes are zero-padded. **Example** Use the [hex](../functions/encoding-functions.md/#hex) function to represent the result as a hex-encoded string. From dc862b1411884a462bba8dcf86a474ccbe57e380 Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Thu, 22 Aug 2024 23:40:18 +0200 Subject: [PATCH 084/114] fix test --- tests/queries/0_stateless/03203_hive_style_partitioning.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/03203_hive_style_partitioning.sh b/tests/queries/0_stateless/03203_hive_style_partitioning.sh index 8ab18f5edfe..60e8a6e9faa 100755 --- a/tests/queries/0_stateless/03203_hive_style_partitioning.sh +++ b/tests/queries/0_stateless/03203_hive_style_partitioning.sh @@ -32,7 +32,7 @@ SELECT a FROM file('$CURDIR/data_hive/partitioning/a=b/a=b/sample.parquet') LIMI $CLICKHOUSE_LOCAL -n -q """ set use_hive_partitioning = 1; -SELECT *, _column0 FROM file('$CURDIR/data_hive/partitioning/column0=Elizabeth/column0=Elizabeth1/sample.parquet') LIMIT 10; +SELECT *, column0 FROM file('$CURDIR/data_hive/partitioning/column0=Elizabeth/column0=Elizabeth1/sample.parquet') LIMIT 10; """ 2>&1 | grep -c "INCORRECT_DATA" $CLICKHOUSE_LOCAL -n -q """ From 4c790999eb6ad74e3a8f99c072dcc12c956a63d8 Mon Sep 17 00:00:00 2001 From: Max Kainov Date: Fri, 23 Aug 2024 02:18:26 +0200 Subject: [PATCH 085/114] CI: Force package_debug build on release branches --- .github/workflows/release_branches.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/release_branches.yml b/.github/workflows/release_branches.yml index 82826794ea3..ec119b6ff95 100644 --- a/.github/workflows/release_branches.yml +++ b/.github/workflows/release_branches.yml @@ -130,6 +130,7 @@ jobs: with: build_name: package_debug data: ${{ needs.RunConfig.outputs.data }} + force: true BuilderBinDarwin: needs: [RunConfig, BuildDockers] if: ${{ !failure() && !cancelled() }} From f5739dfe06db8610818fafb5c3a2c33f59fd0a8d Mon Sep 17 00:00:00 2001 From: Max Kainov Date: Fri, 23 Aug 2024 02:51:27 +0200 Subject: [PATCH 086/114] CI: Make job rerun possible if triggered manually --- tests/ci/ci.py | 7 +++++-- tests/ci/ci_utils.py | 5 +++++ 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/tests/ci/ci.py b/tests/ci/ci.py index a9ae078b449..d201b6602f5 100644 --- a/tests/ci/ci.py +++ b/tests/ci/ci.py @@ -333,7 +333,10 @@ def _pre_action(s3, job_name, batch, indata, pr_info): CI.JobNames.BUILD_CHECK, ): # we might want to rerun build report job rerun_helper = RerunHelper(commit, _get_ext_check_name(job_name)) - if rerun_helper.is_already_finished_by_status(): + if ( + rerun_helper.is_already_finished_by_status() + and not Utils.is_job_triggered_manually() + ): print("WARNING: Rerunning job with GH status ") status = rerun_helper.get_finished_status() assert status @@ -344,7 +347,7 @@ def _pre_action(s3, job_name, batch, indata, pr_info): skip_status = status.state # ci cache check - if not to_be_skipped and not no_cache: + if not to_be_skipped and not no_cache and not Utils.is_job_triggered_manually(): ci_cache = CiCache(s3, indata["jobs_data"]["digests"]).update() job_config = CI.get_job_config(job_name) if ci_cache.is_successful( diff --git a/tests/ci/ci_utils.py b/tests/ci/ci_utils.py index a4c0977f47c..e8d9e7dc254 100644 --- a/tests/ci/ci_utils.py +++ b/tests/ci/ci_utils.py @@ -18,6 +18,7 @@ class Envs: ) S3_BUILDS_BUCKET = os.getenv("S3_BUILDS_BUCKET", "clickhouse-builds") GITHUB_WORKFLOW = os.getenv("GITHUB_WORKFLOW", "") + GITHUB_ACTOR = os.getenv("GITHUB_ACTOR", "") class WithIter(type): @@ -282,3 +283,7 @@ class Utils: ): res = res.replace(*r) return res + + @staticmethod + def is_job_triggered_manually(): + return "robot" not in Envs.GITHUB_ACTOR From 60e4bcbbf0b1991b42bcab4b83e55be344e8a659 Mon Sep 17 00:00:00 2001 From: Tanya Bragin Date: Thu, 22 Aug 2024 20:45:28 -0700 Subject: [PATCH 087/114] Update README.md Update Raleigh meetup link --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 83a5c05c667..546f08afd3d 100644 --- a/README.md +++ b/README.md @@ -44,7 +44,7 @@ The following upcoming meetups are featuring creator of ClickHouse & CTO, Alexey * [ClickHouse Guangzhou User Group Meetup](https://mp.weixin.qq.com/s/GSvo-7xUoVzCsuUvlLTpCw) - August 25 * [San Francisco Meetup (Cloudflare)](https://www.meetup.com/clickhouse-silicon-valley-meetup-group/events/302540575) - September 5 -* [Raleigh Meetup (Deutsche Bank)](https://www.meetup.com/clickhouse-nc-meetup-group/events/302557230) - September 9 +* [Raleigh Meetup (Deutsche Bank)](https://www.meetup.com/triangletechtalks/events/302723486/) - September 9 * [New York Meetup (Rokt)](https://www.meetup.com/clickhouse-new-york-user-group/events/302575342) - September 10 * [Chicago Meetup (Jump Capital)](https://lu.ma/43tvmrfw) - September 12 From e5380806653f8d391c6e88664b0096c3c51240f5 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Fri, 23 Aug 2024 07:09:03 +0000 Subject: [PATCH 088/114] Update version_date.tsv and changelogs after v24.5.6.45-stable --- docs/changelogs/v24.5.6.45-stable.md | 33 ++++++++++++++++++++++++++++ utils/list-versions/version_date.tsv | 2 +- 2 files changed, 34 insertions(+), 1 deletion(-) create mode 100644 docs/changelogs/v24.5.6.45-stable.md diff --git a/docs/changelogs/v24.5.6.45-stable.md b/docs/changelogs/v24.5.6.45-stable.md new file mode 100644 index 00000000000..b329ebab27b --- /dev/null +++ b/docs/changelogs/v24.5.6.45-stable.md @@ -0,0 +1,33 @@ +--- +sidebar_position: 1 +sidebar_label: 2024 +--- + +# 2024 Changelog + +### ClickHouse release v24.5.6.45-stable (bdca8604c29) FIXME as compared to v24.5.5.78-stable (0138248cb62) + +#### Bug Fix (user-visible misbehavior in an official stable release) +* Backported in [#67902](https://github.com/ClickHouse/ClickHouse/issues/67902): Fixing the `Not-ready Set` error after the `PREWHERE` optimization for StorageMerge. [#65057](https://github.com/ClickHouse/ClickHouse/pull/65057) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#68252](https://github.com/ClickHouse/ClickHouse/issues/68252): Fixed `Not-ready Set` in some system tables when filtering using subqueries. [#66018](https://github.com/ClickHouse/ClickHouse/pull/66018) ([Michael Kolupaev](https://github.com/al13n321)). +* Backported in [#68064](https://github.com/ClickHouse/ClickHouse/issues/68064): Fix boolean literals in query sent to external database (for engines like `PostgreSQL`). [#66282](https://github.com/ClickHouse/ClickHouse/pull/66282) ([vdimir](https://github.com/vdimir)). +* Backported in [#68158](https://github.com/ClickHouse/ClickHouse/issues/68158): Fix cluster() for inter-server secret (preserve initial user as before). [#66364](https://github.com/ClickHouse/ClickHouse/pull/66364) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#68115](https://github.com/ClickHouse/ClickHouse/issues/68115): Fix possible PARAMETER_OUT_OF_BOUND error during reading variant subcolumn. [#66659](https://github.com/ClickHouse/ClickHouse/pull/66659) ([Kruglov Pavel](https://github.com/Avogar)). +* Backported in [#67886](https://github.com/ClickHouse/ClickHouse/issues/67886): Correctly parse file name/URI containing `::` if it's not an archive. [#67433](https://github.com/ClickHouse/ClickHouse/pull/67433) ([Antonio Andelic](https://github.com/antonio2368)). +* Backported in [#68272](https://github.com/ClickHouse/ClickHouse/issues/68272): Fix inserting into stream like engines (Kafka, RabbitMQ, NATS) through HTTP interface. [#67554](https://github.com/ClickHouse/ClickHouse/pull/67554) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). +* Backported in [#67807](https://github.com/ClickHouse/ClickHouse/issues/67807): Fix reloading SQL UDFs with UNION. Previously, restarting the server could make UDF invalid. [#67665](https://github.com/ClickHouse/ClickHouse/pull/67665) ([Antonio Andelic](https://github.com/antonio2368)). +* Backported in [#67836](https://github.com/ClickHouse/ClickHouse/issues/67836): Fix potential stack overflow in `JSONMergePatch` function. Renamed this function from `jsonMergePatch` to `JSONMergePatch` because the previous name was wrong. The previous name is still kept for compatibility. Improved diagnostic of errors in the function. This closes [#67304](https://github.com/ClickHouse/ClickHouse/issues/67304). [#67756](https://github.com/ClickHouse/ClickHouse/pull/67756) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#67991](https://github.com/ClickHouse/ClickHouse/issues/67991): Validate experimental/suspicious data types in ALTER ADD/MODIFY COLUMN. [#67911](https://github.com/ClickHouse/ClickHouse/pull/67911) ([Kruglov Pavel](https://github.com/Avogar)). +* Backported in [#68207](https://github.com/ClickHouse/ClickHouse/issues/68207): Fix wrong `count()` result when there is non-deterministic function in predicate. [#67922](https://github.com/ClickHouse/ClickHouse/pull/67922) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). +* Backported in [#68091](https://github.com/ClickHouse/ClickHouse/issues/68091): Fixed the calculation of the maximum thread soft limit in containerized environments where the usable CPU count is limited. [#67963](https://github.com/ClickHouse/ClickHouse/pull/67963) ([Robert Schulze](https://github.com/rschu1ze)). +* Backported in [#68122](https://github.com/ClickHouse/ClickHouse/issues/68122): Fixed skipping of untouched parts in mutations with new analyzer. Previously with enabled analyzer data in part could be rewritten by mutation even if mutation doesn't affect this part according to predicate. [#68052](https://github.com/ClickHouse/ClickHouse/pull/68052) ([Anton Popov](https://github.com/CurtizJ)). +* Backported in [#68171](https://github.com/ClickHouse/ClickHouse/issues/68171): Removes an incorrect optimization to remove sorting in subqueries that use `OFFSET`. Fixes [#67906](https://github.com/ClickHouse/ClickHouse/issues/67906). [#68099](https://github.com/ClickHouse/ClickHouse/pull/68099) ([Graham Campbell](https://github.com/GrahamCampbell)). +* Backported in [#68337](https://github.com/ClickHouse/ClickHouse/issues/68337): Try fix postgres crash when query is cancelled. [#68288](https://github.com/ClickHouse/ClickHouse/pull/68288) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Backported in [#68667](https://github.com/ClickHouse/ClickHouse/issues/68667): Fix `LOGICAL_ERROR`s when functions `sipHash64Keyed`, `sipHash128Keyed`, or `sipHash128ReferenceKeyed` are applied to empty arrays or tuples. [#68630](https://github.com/ClickHouse/ClickHouse/pull/68630) ([Robert Schulze](https://github.com/rschu1ze)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* Update version after release. [#67862](https://github.com/ClickHouse/ClickHouse/pull/67862) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Backported in [#68077](https://github.com/ClickHouse/ClickHouse/issues/68077): Add an explicit error for `ALTER MODIFY SQL SECURITY` on non-view tables. [#67953](https://github.com/ClickHouse/ClickHouse/pull/67953) ([pufit](https://github.com/pufit)). +* Backported in [#68756](https://github.com/ClickHouse/ClickHouse/issues/68756): To make patch release possible from every commit on release branch, package_debug build is required and must not be skipped. [#68750](https://github.com/ClickHouse/ClickHouse/pull/68750) ([Max K.](https://github.com/maxknv)). + diff --git a/utils/list-versions/version_date.tsv b/utils/list-versions/version_date.tsv index 0e25f8d3b62..57a59d7ac49 100644 --- a/utils/list-versions/version_date.tsv +++ b/utils/list-versions/version_date.tsv @@ -6,8 +6,8 @@ v24.7.1.2915-stable 2024-07-30 v24.6.3.95-stable 2024-08-06 v24.6.2.17-stable 2024-07-05 v24.6.1.4423-stable 2024-07-01 +v24.5.6.45-stable 2024-08-23 v24.5.5.78-stable 2024-08-05 -v24.5.5.41-stable 2024-08-22 v24.5.4.49-stable 2024-07-01 v24.5.3.5-stable 2024-06-13 v24.5.2.34-stable 2024-06-13 From e1a7bd9163bebf0aeab12d8dd46c729f73b068be Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Fri, 23 Aug 2024 07:37:32 +0000 Subject: [PATCH 089/114] Update version_date.tsv and changelogs after v24.6.4.42-stable --- docs/changelogs/v24.6.4.42-stable.md | 33 ++++++++++++++++++++++++++++ utils/list-versions/version_date.tsv | 3 ++- 2 files changed, 35 insertions(+), 1 deletion(-) create mode 100644 docs/changelogs/v24.6.4.42-stable.md diff --git a/docs/changelogs/v24.6.4.42-stable.md b/docs/changelogs/v24.6.4.42-stable.md new file mode 100644 index 00000000000..29b6ba095af --- /dev/null +++ b/docs/changelogs/v24.6.4.42-stable.md @@ -0,0 +1,33 @@ +--- +sidebar_position: 1 +sidebar_label: 2024 +--- + +# 2024 Changelog + +### ClickHouse release v24.6.4.42-stable (c534bb4b4dd) FIXME as compared to v24.6.3.95-stable (8325c920d11) + +#### Bug Fix (user-visible misbehavior in an official stable release) +* Backported in [#68066](https://github.com/ClickHouse/ClickHouse/issues/68066): Fix boolean literals in query sent to external database (for engines like `PostgreSQL`). [#66282](https://github.com/ClickHouse/ClickHouse/pull/66282) ([vdimir](https://github.com/vdimir)). +* Backported in [#68566](https://github.com/ClickHouse/ClickHouse/issues/68566): Fix indexHint function case found by fuzzer. [#66286](https://github.com/ClickHouse/ClickHouse/pull/66286) ([Anton Popov](https://github.com/CurtizJ)). +* Backported in [#68159](https://github.com/ClickHouse/ClickHouse/issues/68159): Fix cluster() for inter-server secret (preserve initial user as before). [#66364](https://github.com/ClickHouse/ClickHouse/pull/66364) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#68116](https://github.com/ClickHouse/ClickHouse/issues/68116): Fix possible PARAMETER_OUT_OF_BOUND error during reading variant subcolumn. [#66659](https://github.com/ClickHouse/ClickHouse/pull/66659) ([Kruglov Pavel](https://github.com/Avogar)). +* Backported in [#67887](https://github.com/ClickHouse/ClickHouse/issues/67887): Correctly parse file name/URI containing `::` if it's not an archive. [#67433](https://github.com/ClickHouse/ClickHouse/pull/67433) ([Antonio Andelic](https://github.com/antonio2368)). +* Backported in [#68611](https://github.com/ClickHouse/ClickHouse/issues/68611): Fixes [#66026](https://github.com/ClickHouse/ClickHouse/issues/66026). Avoid unresolved table function arguments traversal in `ReplaceTableNodeToDummyVisitor`. [#67522](https://github.com/ClickHouse/ClickHouse/pull/67522) ([Dmitry Novik](https://github.com/novikd)). +* Backported in [#68275](https://github.com/ClickHouse/ClickHouse/issues/68275): Fix inserting into stream like engines (Kafka, RabbitMQ, NATS) through HTTP interface. [#67554](https://github.com/ClickHouse/ClickHouse/pull/67554) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). +* Backported in [#67993](https://github.com/ClickHouse/ClickHouse/issues/67993): Validate experimental/suspicious data types in ALTER ADD/MODIFY COLUMN. [#67911](https://github.com/ClickHouse/ClickHouse/pull/67911) ([Kruglov Pavel](https://github.com/Avogar)). +* Backported in [#68208](https://github.com/ClickHouse/ClickHouse/issues/68208): Fix wrong `count()` result when there is non-deterministic function in predicate. [#67922](https://github.com/ClickHouse/ClickHouse/pull/67922) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). +* Backported in [#68093](https://github.com/ClickHouse/ClickHouse/issues/68093): Fixed the calculation of the maximum thread soft limit in containerized environments where the usable CPU count is limited. [#67963](https://github.com/ClickHouse/ClickHouse/pull/67963) ([Robert Schulze](https://github.com/rschu1ze)). +* Backported in [#68124](https://github.com/ClickHouse/ClickHouse/issues/68124): Fixed skipping of untouched parts in mutations with new analyzer. Previously with enabled analyzer data in part could be rewritten by mutation even if mutation doesn't affect this part according to predicate. [#68052](https://github.com/ClickHouse/ClickHouse/pull/68052) ([Anton Popov](https://github.com/CurtizJ)). +* Backported in [#68221](https://github.com/ClickHouse/ClickHouse/issues/68221): Fixed a NULL pointer dereference, triggered by a specially crafted query, that crashed the server via hopEnd, hopStart, tumbleEnd, and tumbleStart. [#68098](https://github.com/ClickHouse/ClickHouse/pull/68098) ([Salvatore Mesoraca](https://github.com/aiven-sal)). +* Backported in [#68173](https://github.com/ClickHouse/ClickHouse/issues/68173): Removes an incorrect optimization to remove sorting in subqueries that use `OFFSET`. Fixes [#67906](https://github.com/ClickHouse/ClickHouse/issues/67906). [#68099](https://github.com/ClickHouse/ClickHouse/pull/68099) ([Graham Campbell](https://github.com/GrahamCampbell)). +* Backported in [#68339](https://github.com/ClickHouse/ClickHouse/issues/68339): Try fix postgres crash when query is cancelled. [#68288](https://github.com/ClickHouse/ClickHouse/pull/68288) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Backported in [#68396](https://github.com/ClickHouse/ClickHouse/issues/68396): Fix missing sync replica mode in query `SYSTEM SYNC REPLICA`. [#68326](https://github.com/ClickHouse/ClickHouse/pull/68326) ([Duc Canh Le](https://github.com/canhld94)). +* Backported in [#68668](https://github.com/ClickHouse/ClickHouse/issues/68668): Fix `LOGICAL_ERROR`s when functions `sipHash64Keyed`, `sipHash128Keyed`, or `sipHash128ReferenceKeyed` are applied to empty arrays or tuples. [#68630](https://github.com/ClickHouse/ClickHouse/pull/68630) ([Robert Schulze](https://github.com/rschu1ze)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* Update version after release. [#67909](https://github.com/ClickHouse/ClickHouse/pull/67909) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Backported in [#68079](https://github.com/ClickHouse/ClickHouse/issues/68079): Add an explicit error for `ALTER MODIFY SQL SECURITY` on non-view tables. [#67953](https://github.com/ClickHouse/ClickHouse/pull/67953) ([pufit](https://github.com/pufit)). +* Backported in [#68758](https://github.com/ClickHouse/ClickHouse/issues/68758): To make patch release possible from every commit on release branch, package_debug build is required and must not be skipped. [#68750](https://github.com/ClickHouse/ClickHouse/pull/68750) ([Max K.](https://github.com/maxknv)). + diff --git a/utils/list-versions/version_date.tsv b/utils/list-versions/version_date.tsv index 0e25f8d3b62..8ce510f110d 100644 --- a/utils/list-versions/version_date.tsv +++ b/utils/list-versions/version_date.tsv @@ -3,11 +3,12 @@ v24.8.1.2684-lts 2024-08-21 v24.7.3.42-stable 2024-08-08 v24.7.2.13-stable 2024-08-01 v24.7.1.2915-stable 2024-07-30 +v24.6.4.42-stable 2024-08-23 v24.6.3.95-stable 2024-08-06 v24.6.2.17-stable 2024-07-05 v24.6.1.4423-stable 2024-07-01 +v24.5.6.45-stable 2024-08-23 v24.5.5.78-stable 2024-08-05 -v24.5.5.41-stable 2024-08-22 v24.5.4.49-stable 2024-07-01 v24.5.3.5-stable 2024-06-13 v24.5.2.34-stable 2024-06-13 From eec720dab60ea63b033919bbc4c1f6837920a42d Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Fri, 23 Aug 2024 08:05:27 +0000 Subject: [PATCH 090/114] Update version_date.tsv and changelogs after v24.7.4.51-stable --- docs/changelogs/v24.7.4.51-stable.md | 36 ++++++++++++++++++++++++++++ utils/list-versions/version_date.tsv | 2 ++ 2 files changed, 38 insertions(+) create mode 100644 docs/changelogs/v24.7.4.51-stable.md diff --git a/docs/changelogs/v24.7.4.51-stable.md b/docs/changelogs/v24.7.4.51-stable.md new file mode 100644 index 00000000000..a7cf9790383 --- /dev/null +++ b/docs/changelogs/v24.7.4.51-stable.md @@ -0,0 +1,36 @@ +--- +sidebar_position: 1 +sidebar_label: 2024 +--- + +# 2024 Changelog + +### ClickHouse release v24.7.4.51-stable (70fe2f6fa52) FIXME as compared to v24.7.3.42-stable (63730bc4293) + +#### Bug Fix (user-visible misbehavior in an official stable release) +* Backported in [#68232](https://github.com/ClickHouse/ClickHouse/issues/68232): Fixed `Not-ready Set` in some system tables when filtering using subqueries. [#66018](https://github.com/ClickHouse/ClickHouse/pull/66018) ([Michael Kolupaev](https://github.com/al13n321)). +* Backported in [#68068](https://github.com/ClickHouse/ClickHouse/issues/68068): Fix boolean literals in query sent to external database (for engines like `PostgreSQL`). [#66282](https://github.com/ClickHouse/ClickHouse/pull/66282) ([vdimir](https://github.com/vdimir)). +* Backported in [#68613](https://github.com/ClickHouse/ClickHouse/issues/68613): Fixes [#66026](https://github.com/ClickHouse/ClickHouse/issues/66026). Avoid unresolved table function arguments traversal in `ReplaceTableNodeToDummyVisitor`. [#67522](https://github.com/ClickHouse/ClickHouse/pull/67522) ([Dmitry Novik](https://github.com/novikd)). +* Backported in [#68278](https://github.com/ClickHouse/ClickHouse/issues/68278): Fix inserting into stream like engines (Kafka, RabbitMQ, NATS) through HTTP interface. [#67554](https://github.com/ClickHouse/ClickHouse/pull/67554) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). +* Backported in [#68040](https://github.com/ClickHouse/ClickHouse/issues/68040): Fix creation of view with recursive CTE. [#67587](https://github.com/ClickHouse/ClickHouse/pull/67587) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Backported in [#68038](https://github.com/ClickHouse/ClickHouse/issues/68038): Fix crash on `percent_rank`. `percent_rank`'s default frame type is changed to `range unbounded preceding and unbounded following`. `IWindowFunction`'s default window frame is considered and now window functions without window frame definition in sql can be put into different `WindowTransfomer`s properly. [#67661](https://github.com/ClickHouse/ClickHouse/pull/67661) ([lgbo](https://github.com/lgbo-ustc)). +* Backported in [#68224](https://github.com/ClickHouse/ClickHouse/issues/68224): Fix wrong `count()` result when there is non-deterministic function in predicate. [#67922](https://github.com/ClickHouse/ClickHouse/pull/67922) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). +* Backported in [#68095](https://github.com/ClickHouse/ClickHouse/issues/68095): Fixed the calculation of the maximum thread soft limit in containerized environments where the usable CPU count is limited. [#67963](https://github.com/ClickHouse/ClickHouse/pull/67963) ([Robert Schulze](https://github.com/rschu1ze)). +* Backported in [#68126](https://github.com/ClickHouse/ClickHouse/issues/68126): Fixed skipping of untouched parts in mutations with new analyzer. Previously with enabled analyzer data in part could be rewritten by mutation even if mutation doesn't affect this part according to predicate. [#68052](https://github.com/ClickHouse/ClickHouse/pull/68052) ([Anton Popov](https://github.com/CurtizJ)). +* Backported in [#68223](https://github.com/ClickHouse/ClickHouse/issues/68223): Fixed a NULL pointer dereference, triggered by a specially crafted query, that crashed the server via hopEnd, hopStart, tumbleEnd, and tumbleStart. [#68098](https://github.com/ClickHouse/ClickHouse/pull/68098) ([Salvatore Mesoraca](https://github.com/aiven-sal)). +* Backported in [#68175](https://github.com/ClickHouse/ClickHouse/issues/68175): Removes an incorrect optimization to remove sorting in subqueries that use `OFFSET`. Fixes [#67906](https://github.com/ClickHouse/ClickHouse/issues/67906). [#68099](https://github.com/ClickHouse/ClickHouse/pull/68099) ([Graham Campbell](https://github.com/GrahamCampbell)). +* Backported in [#68341](https://github.com/ClickHouse/ClickHouse/issues/68341): Try fix postgres crash when query is cancelled. [#68288](https://github.com/ClickHouse/ClickHouse/pull/68288) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Backported in [#68398](https://github.com/ClickHouse/ClickHouse/issues/68398): Fix missing sync replica mode in query `SYSTEM SYNC REPLICA`. [#68326](https://github.com/ClickHouse/ClickHouse/pull/68326) ([Duc Canh Le](https://github.com/canhld94)). +* Backported in [#68669](https://github.com/ClickHouse/ClickHouse/issues/68669): Fix `LOGICAL_ERROR`s when functions `sipHash64Keyed`, `sipHash128Keyed`, or `sipHash128ReferenceKeyed` are applied to empty arrays or tuples. [#68630](https://github.com/ClickHouse/ClickHouse/pull/68630) ([Robert Schulze](https://github.com/rschu1ze)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* Backported in [#67803](https://github.com/ClickHouse/ClickHouse/issues/67803): Disable some Dynamic tests under sanitizers, rewrite 03202_dynamic_null_map_subcolumn to sql. [#67359](https://github.com/ClickHouse/ClickHouse/pull/67359) ([Kruglov Pavel](https://github.com/Avogar)). +* Backported in [#68081](https://github.com/ClickHouse/ClickHouse/issues/68081): Add an explicit error for `ALTER MODIFY SQL SECURITY` on non-view tables. [#67953](https://github.com/ClickHouse/ClickHouse/pull/67953) ([pufit](https://github.com/pufit)). +* Update version after release. [#68044](https://github.com/ClickHouse/ClickHouse/pull/68044) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Backported in [#68269](https://github.com/ClickHouse/ClickHouse/issues/68269): [Green CI] Fix test 01903_correct_block_size_prediction_with_default. [#68203](https://github.com/ClickHouse/ClickHouse/pull/68203) ([Pablo Marcos](https://github.com/pamarcos)). +* Backported in [#68432](https://github.com/ClickHouse/ClickHouse/issues/68432): tests: make 01600_parts_states_metrics_long better. [#68265](https://github.com/ClickHouse/ClickHouse/pull/68265) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#68538](https://github.com/ClickHouse/ClickHouse/issues/68538): CI: Native build for package_aarch64. [#68457](https://github.com/ClickHouse/ClickHouse/pull/68457) ([Max K.](https://github.com/maxknv)). +* Backported in [#68555](https://github.com/ClickHouse/ClickHouse/issues/68555): CI: Minor release workflow fix. [#68536](https://github.com/ClickHouse/ClickHouse/pull/68536) ([Max K.](https://github.com/maxknv)). +* Backported in [#68760](https://github.com/ClickHouse/ClickHouse/issues/68760): To make patch release possible from every commit on release branch, package_debug build is required and must not be skipped. [#68750](https://github.com/ClickHouse/ClickHouse/pull/68750) ([Max K.](https://github.com/maxknv)). + diff --git a/utils/list-versions/version_date.tsv b/utils/list-versions/version_date.tsv index 57a59d7ac49..d9674ed2366 100644 --- a/utils/list-versions/version_date.tsv +++ b/utils/list-versions/version_date.tsv @@ -1,8 +1,10 @@ v24.8.2.3-lts 2024-08-22 v24.8.1.2684-lts 2024-08-21 +v24.7.4.51-stable 2024-08-23 v24.7.3.42-stable 2024-08-08 v24.7.2.13-stable 2024-08-01 v24.7.1.2915-stable 2024-07-30 +v24.6.4.42-stable 2024-08-23 v24.6.3.95-stable 2024-08-06 v24.6.2.17-stable 2024-07-05 v24.6.1.4423-stable 2024-07-01 From 6ba686d2510a2d95ab4332560163d0b4600533a9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A1nos=20Benjamin=20Antal?= Date: Fri, 23 Aug 2024 09:20:40 +0000 Subject: [PATCH 091/114] Split test case and reduce number of random runs to reduce time necessary to run the test --- .../01395_limit_more_cases.reference | 1 - .../0_stateless/01395_limit_more_cases.sh | 24 ++++--------------- .../01395_limit_more_cases_random.reference | 1 + .../01395_limit_more_cases_random.sh | 22 +++++++++++++++++ 4 files changed, 28 insertions(+), 20 deletions(-) create mode 100644 tests/queries/0_stateless/01395_limit_more_cases_random.reference create mode 100755 tests/queries/0_stateless/01395_limit_more_cases_random.sh diff --git a/tests/queries/0_stateless/01395_limit_more_cases.reference b/tests/queries/0_stateless/01395_limit_more_cases.reference index c9d0dd73ab8..d68b987ea19 100644 --- a/tests/queries/0_stateless/01395_limit_more_cases.reference +++ b/tests/queries/0_stateless/01395_limit_more_cases.reference @@ -254,4 +254,3 @@ 15 13 0 0 0 0 0 0 15 14 0 0 0 0 0 0 15 15 0 0 0 0 0 0 -0 0 0 diff --git a/tests/queries/0_stateless/01395_limit_more_cases.sh b/tests/queries/0_stateless/01395_limit_more_cases.sh index 177147d2142..9709bd74f26 100755 --- a/tests/queries/0_stateless/01395_limit_more_cases.sh +++ b/tests/queries/0_stateless/01395_limit_more_cases.sh @@ -9,8 +9,11 @@ SIZE=13 for OFFSET in {0..15}; do for LIMIT in {0..15}; do echo "SELECT - $OFFSET, $LIMIT, - count() AS c, min(number) AS first, max(number) AS last, + $OFFSET, + $LIMIT, + count() AS c, + min(number) AS first, + max(number) AS last, throwIf(first != ($OFFSET < $SIZE AND $LIMIT > 0 ? $OFFSET : 0)), throwIf(last != ($OFFSET < $SIZE AND $LIMIT > 0 ? least($SIZE - 1, $OFFSET + $LIMIT - 1) : 0)), throwIf((c != 0 OR first != 0 OR last != 0) AND (c != last - first + 1)) @@ -18,20 +21,3 @@ for OFFSET in {0..15}; do " done done | $CLICKHOUSE_CLIENT -n --max_block_size 5 - -# Randomized test - -ITERATIONS=1000 -for _ in $(seq $ITERATIONS); do - SIZE=$(($RANDOM % 100)) - OFFSET=$(($RANDOM % 111)) - LIMIT=$(($RANDOM % 111)) - - echo "WITH count() AS c, min(number) AS first, max(number) AS last - SELECT - throwIf(first != ($OFFSET < $SIZE AND $LIMIT > 0 ? $OFFSET : 0)), - throwIf(last != ($OFFSET < $SIZE AND $LIMIT > 0 ? least($SIZE - 1, $OFFSET + $LIMIT - 1) : 0)), - throwIf((c != 0 OR first != 0 OR last != 0) AND (c != last - first + 1)) - FROM (SELECT * FROM numbers($SIZE) LIMIT $OFFSET, $LIMIT); - " -done | $CLICKHOUSE_CLIENT -n --max_block_size $(($RANDOM % 20 + 1)) | uniq diff --git a/tests/queries/0_stateless/01395_limit_more_cases_random.reference b/tests/queries/0_stateless/01395_limit_more_cases_random.reference new file mode 100644 index 00000000000..06b63ea6c2f --- /dev/null +++ b/tests/queries/0_stateless/01395_limit_more_cases_random.reference @@ -0,0 +1 @@ +0 0 0 diff --git a/tests/queries/0_stateless/01395_limit_more_cases_random.sh b/tests/queries/0_stateless/01395_limit_more_cases_random.sh new file mode 100755 index 00000000000..c2f6b060aab --- /dev/null +++ b/tests/queries/0_stateless/01395_limit_more_cases_random.sh @@ -0,0 +1,22 @@ +#!/usr/bin/env bash +# Tags: no-fasttest + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +SIZE=13 +ITERATIONS=300 +for _ in $(seq $ITERATIONS); do + SIZE=$(($RANDOM % 100)) + OFFSET=$(($RANDOM % 111)) + LIMIT=$(($RANDOM % 111)) + + echo "WITH count() AS c, min(number) AS first, max(number) AS last + SELECT + throwIf(first != ($OFFSET < $SIZE AND $LIMIT > 0 ? $OFFSET : 0)), + throwIf(last != ($OFFSET < $SIZE AND $LIMIT > 0 ? least($SIZE - 1, $OFFSET + $LIMIT - 1) : 0)), + throwIf((c != 0 OR first != 0 OR last != 0) AND (c != last - first + 1)) + FROM (SELECT * FROM numbers($SIZE) LIMIT $OFFSET, $LIMIT); + " +done | $CLICKHOUSE_CLIENT -n --max_block_size $(($RANDOM % 20 + 1)) | uniq From 8c4329964f597b1eb8139990a41360243f9337f9 Mon Sep 17 00:00:00 2001 From: Maxim Dergousov Date: Fri, 23 Aug 2024 12:50:18 +0300 Subject: [PATCH 092/114] small cosmetic changes in docs --- docs/en/sql-reference/functions/hash-functions.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/en/sql-reference/functions/hash-functions.md b/docs/en/sql-reference/functions/hash-functions.md index 55126640e34..908e288cf59 100644 --- a/docs/en/sql-reference/functions/hash-functions.md +++ b/docs/en/sql-reference/functions/hash-functions.md @@ -695,7 +695,7 @@ Produces [RIPEMD-160](https://en.wikipedia.org/wiki/RIPEMD) hash value. **Syntax** ```sql -ripeMD160('input') +ripeMD160(input) ``` **Parameters** @@ -707,6 +707,7 @@ ripeMD160('input') - A [UInt256](../data-types/int-uint.md) hash value where the 160-bit RIPEMD-160 hash is stored in the first 20 bytes. The remaining 12 bytes are zero-padded. **Example** + Use the [hex](../functions/encoding-functions.md/#hex) function to represent the result as a hex-encoded string. Query: From 8cc5d766b5d70f22646e6dc2832f806736c76311 Mon Sep 17 00:00:00 2001 From: Maxim Dergousov Date: Fri, 23 Aug 2024 12:52:55 +0300 Subject: [PATCH 093/114] small cosmetic changes in docs --- docs/ru/sql-reference/functions/hash-functions.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/ru/sql-reference/functions/hash-functions.md b/docs/ru/sql-reference/functions/hash-functions.md index b7adcfc1829..d7b90b09122 100644 --- a/docs/ru/sql-reference/functions/hash-functions.md +++ b/docs/ru/sql-reference/functions/hash-functions.md @@ -131,7 +131,7 @@ SELECT hex(sipHash128('foo', '\x01', 3)); **Синтаксис** ```sql -ripeMD160('input') +ripeMD160(input) ``` **Аргументы** @@ -143,6 +143,7 @@ ripeMD160('input') - [UInt256](../data-types/int-uint.md), где 160-битный хеш RIPEMD-160 хранится в первых 20 байтах. Оставшиеся 12 байт заполняются нулями. **Пример** + Используйте функцию [hex](../functions/encoding-functions.md#hex) для представления результата в виде строки с шестнадцатеричной кодировкой Запрос: From b0894bffe62722acee2fa5d832ceda9a75754bde Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Fri, 23 Aug 2024 12:01:17 +0200 Subject: [PATCH 094/114] change test file location --- .../sample.parquet | Bin 1 file changed, 0 insertions(+), 0 deletions(-) rename tests/queries/0_stateless/data_hive/partitioning/column0=Elizabeth/{column0=Elizabeth => column0=Elizabeth1}/sample.parquet (100%) diff --git a/tests/queries/0_stateless/data_hive/partitioning/column0=Elizabeth/column0=Elizabeth/sample.parquet b/tests/queries/0_stateless/data_hive/partitioning/column0=Elizabeth/column0=Elizabeth1/sample.parquet similarity index 100% rename from tests/queries/0_stateless/data_hive/partitioning/column0=Elizabeth/column0=Elizabeth/sample.parquet rename to tests/queries/0_stateless/data_hive/partitioning/column0=Elizabeth/column0=Elizabeth1/sample.parquet From 1165ae756d3a6ca1b9b7c7e9be77f1812390c527 Mon Sep 17 00:00:00 2001 From: avogar Date: Fri, 23 Aug 2024 12:16:16 +0000 Subject: [PATCH 095/114] Make dynamic structure selection more consistent --- src/Columns/ColumnDynamic.cpp | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/src/Columns/ColumnDynamic.cpp b/src/Columns/ColumnDynamic.cpp index 1f37add9d2d..efb835b2e17 100644 --- a/src/Columns/ColumnDynamic.cpp +++ b/src/Columns/ColumnDynamic.cpp @@ -1182,12 +1182,13 @@ void ColumnDynamic::takeDynamicStructureFromSourceColumns(const Columns & source if (!canAddNewVariants(0, all_variants.size())) { /// Create list of variants with their sizes and sort it. - std::vector> variants_with_sizes; + std::vector> variants_with_sizes; variants_with_sizes.reserve(all_variants.size()); for (const auto & variant : all_variants) { - if (variant->getName() != getSharedVariantTypeName()) - variants_with_sizes.emplace_back(total_sizes[variant->getName()], variant); + auto variant_name = variant->getName(); + if (variant_name != getSharedVariantTypeName()) + variants_with_sizes.emplace_back(total_sizes[variant_name], variant_name, variant); } std::sort(variants_with_sizes.begin(), variants_with_sizes.end(), std::greater()); @@ -1196,14 +1197,14 @@ void ColumnDynamic::takeDynamicStructureFromSourceColumns(const Columns & source result_variants.reserve(max_dynamic_types + 1); /// +1 for shared variant. /// Add shared variant. result_variants.push_back(getSharedVariantDataType()); - for (const auto & [size, variant] : variants_with_sizes) + for (const auto & [size, variant_name, variant_type] : variants_with_sizes) { /// Add variant to the resulting variants list until we reach max_dynamic_types. if (canAddNewVariant(result_variants.size())) - result_variants.push_back(variant); + result_variants.push_back(variant_type); /// Add all remaining variants into shared_variants_statistics until we reach its max size. else if (new_statistics.shared_variants_statistics.size() < Statistics::MAX_SHARED_VARIANT_STATISTICS_SIZE) - new_statistics.shared_variants_statistics[variant->getName()] = size; + new_statistics.shared_variants_statistics[variant_name] = size; else break; } From 6f5210644b95b41cc9d490d4e117c81bd61a1d06 Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Fri, 23 Aug 2024 14:43:09 +0200 Subject: [PATCH 096/114] Update src/Columns/ColumnObject.cpp Co-authored-by: Alexander Gololobov --- src/Columns/ColumnObject.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Columns/ColumnObject.cpp b/src/Columns/ColumnObject.cpp index 999c0f6088e..e397b03b69e 100644 --- a/src/Columns/ColumnObject.cpp +++ b/src/Columns/ColumnObject.cpp @@ -1045,7 +1045,7 @@ void ColumnObject::forEachSubcolumnRecursively(DB::IColumn::RecursiveMutableColu bool ColumnObject::structureEquals(const IColumn & rhs) const { - /// 2 Object columns have equal structure if they have the same typed paths and max_dynamic_paths/max_dynamic_types. + /// 2 Object columns have equal structure if they have the same typed paths and global_max_dynamic_paths/max_dynamic_types. const auto * rhs_object = typeid_cast(&rhs); if (!rhs_object || typed_paths.size() != rhs_object->typed_paths.size() || global_max_dynamic_paths != rhs_object->global_max_dynamic_paths || max_dynamic_types != rhs_object->max_dynamic_types) return false; From 2b20b2d4de78acf4fbb08b3f106ebdf410e4587d Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Fri, 23 Aug 2024 15:02:43 +0200 Subject: [PATCH 097/114] Update src/Columns/ColumnDynamic.cpp Co-authored-by: Dmitry Novik --- src/Columns/ColumnDynamic.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Columns/ColumnDynamic.cpp b/src/Columns/ColumnDynamic.cpp index efb835b2e17..ef6cd7dcea2 100644 --- a/src/Columns/ColumnDynamic.cpp +++ b/src/Columns/ColumnDynamic.cpp @@ -1181,7 +1181,7 @@ void ColumnDynamic::takeDynamicStructureFromSourceColumns(const Columns & source /// Check if the number of all dynamic types exceeds the limit. if (!canAddNewVariants(0, all_variants.size())) { - /// Create list of variants with their sizes and sort it. + /// Create a list of variants with their sizes and names and then sort it. std::vector> variants_with_sizes; variants_with_sizes.reserve(all_variants.size()); for (const auto & variant : all_variants) From 5d6b861ff055de0d04e0c574bf2ebb1e51215ace Mon Sep 17 00:00:00 2001 From: avogar Date: Fri, 23 Aug 2024 13:49:36 +0000 Subject: [PATCH 098/114] Fix index with limit=0 --- src/Columns/ColumnVariant.cpp | 2 +- .../03228_variant_permutation_issue.reference | 4 ++++ .../03228_variant_permutation_issue.sql | 15 +++++++++++++++ 3 files changed, 20 insertions(+), 1 deletion(-) diff --git a/src/Columns/ColumnVariant.cpp b/src/Columns/ColumnVariant.cpp index 2fea3eca123..c6511695f5c 100644 --- a/src/Columns/ColumnVariant.cpp +++ b/src/Columns/ColumnVariant.cpp @@ -953,7 +953,7 @@ ColumnPtr ColumnVariant::index(const IColumn & indexes, size_t limit) const { /// If we have only NULLs, index will take no effect, just return resized column. if (hasOnlyNulls()) - return cloneResized(limit); + return cloneResized(limit == 0 ? indexes.size(): limit); /// Optimization when we have only one non empty variant and no NULLs. /// In this case local_discriminators column is filled with identical values and offsets column diff --git a/tests/queries/0_stateless/03228_variant_permutation_issue.reference b/tests/queries/0_stateless/03228_variant_permutation_issue.reference index 10688253e15..be9cdedaf07 100644 --- a/tests/queries/0_stateless/03228_variant_permutation_issue.reference +++ b/tests/queries/0_stateless/03228_variant_permutation_issue.reference @@ -2,3 +2,7 @@ 3 {"foo2":"bar"} 1 2 {"foo2":"baz"} 2 3 {"foo2":"bar"} 1 +2 {"foo2":"bar"} 1 +3 {"foo2":"bar"} 1 +2 {"foo2":"baz"} 2 +3 {"foo2":"bar"} 1 diff --git a/tests/queries/0_stateless/03228_variant_permutation_issue.sql b/tests/queries/0_stateless/03228_variant_permutation_issue.sql index 088361d6430..81eb2ed69af 100644 --- a/tests/queries/0_stateless/03228_variant_permutation_issue.sql +++ b/tests/queries/0_stateless/03228_variant_permutation_issue.sql @@ -16,3 +16,18 @@ SELECT * FROM test_new_json_type FINAL WHERE data.foo2 is not null ORDER BY id; DROP TABLE test_new_json_type; +CREATE TABLE test_new_json_type(id Nullable(UInt32), data JSON, version UInt64) ENGINE=ReplacingMergeTree(version) ORDER BY id settings allow_nullable_key=1; +INSERT INTO test_new_json_type format JSONEachRow +{"id":1,"data":{"foo1":"bar"},"version":1} +{"id":2,"data":{"foo2":"bar"},"version":1} +{"id":3,"data":{"foo2":"bar"},"version":1} +; + +SELECT * FROM test_new_json_type FINAL WHERE data.foo2 is not null ORDER BY id; + +INSERT INTO test_new_json_type SELECT id, '{"foo2":"baz"}' AS _data, version+1 AS _version FROM test_new_json_type where id=2; + +SELECT * FROM test_new_json_type FINAL PREWHERE data.foo2 IS NOT NULL WHERE data.foo2 IS NOT NULL ORDER BY id ASC NULLS FIRST; + +DROP TABLE test_new_json_type; + From 61fa4e7a476b3db31c22030470341b131501f3b6 Mon Sep 17 00:00:00 2001 From: Han Fei Date: Fri, 23 Aug 2024 16:38:48 +0200 Subject: [PATCH 099/114] fix logical err of modify statistics --- src/Storages/AlterCommands.cpp | 2 +- src/Storages/StatisticsDescription.cpp | 6 ++++-- .../integration/test_manipulate_statistics/test.py | 14 ++++++++++++-- 3 files changed, 17 insertions(+), 5 deletions(-) diff --git a/src/Storages/AlterCommands.cpp b/src/Storages/AlterCommands.cpp index d5780e32db3..67b18217767 100644 --- a/src/Storages/AlterCommands.cpp +++ b/src/Storages/AlterCommands.cpp @@ -734,7 +734,7 @@ void AlterCommand::apply(StorageInMemoryMetadata & metadata, ContextPtr context) { if (!metadata.columns.has(statistics_column_name)) { - throw Exception(ErrorCodes::ILLEGAL_STATISTICS, "Cannot add statistics for column {}: this column is not found", statistics_column_name); + throw Exception(ErrorCodes::ILLEGAL_STATISTICS, "Cannot modify statistics for column {}: this column is not found", statistics_column_name); } } diff --git a/src/Storages/StatisticsDescription.cpp b/src/Storages/StatisticsDescription.cpp index 63c849e3806..acf600dd6f7 100644 --- a/src/Storages/StatisticsDescription.cpp +++ b/src/Storages/StatisticsDescription.cpp @@ -6,7 +6,6 @@ #include #include #include -#include #include @@ -115,8 +114,11 @@ void ColumnStatisticsDescription::merge(const ColumnStatisticsDescription & othe void ColumnStatisticsDescription::assign(const ColumnStatisticsDescription & other) { + /// If the statistics is empty, it's possible that we have not assign a column_name. + if (empty() && column_name == "") + column_name = other.column_name; if (other.column_name != column_name) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot assign statistics from column {} to {}", column_name, other.column_name); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot assign statistics from column {} to {}", other.column_name, column_name); types_to_desc = other.types_to_desc; data_type = other.data_type; diff --git a/tests/integration/test_manipulate_statistics/test.py b/tests/integration/test_manipulate_statistics/test.py index 2541c9b946f..ab5559e18fa 100644 --- a/tests/integration/test_manipulate_statistics/test.py +++ b/tests/integration/test_manipulate_statistics/test.py @@ -6,11 +6,13 @@ from helpers.cluster import ClickHouseCluster cluster = ClickHouseCluster(__file__) node1 = cluster.add_instance( - "node1", user_configs=["config/config.xml"], with_zookeeper=True + "node1", user_configs=["config/config.xml"], with_zookeeper=True, + macros={"replica": "a", "shard": "shard1"} ) node2 = cluster.add_instance( - "node2", user_configs=["config/config.xml"], with_zookeeper=True + "node2", user_configs=["config/config.xml"], with_zookeeper=True, + macros={"replica": "b", "shard": "shard1"} ) @@ -183,3 +185,11 @@ def test_replicated_table_ddl(started_cluster): ) check_stat_file_on_disk(node2, "test_stat", "all_0_0_0_3", "a", True) check_stat_file_on_disk(node2, "test_stat", "all_0_0_0_3", "b", True) + + +def test_replicated_db(started_cluster): + node1.query("CREATE DATABASE test ENGINE = Replicated('/test/shared_stats', '{shard}', '{replica}')") + node2.query("CREATE DATABASE test ENGINE = Replicated('/test/shared_stats', '{shard}', '{replica}')") + node1.query("CREATE TABLE test.test_stats (a Int64, b Int64) ENGINE = ReplicatedMergeTree() ORDER BY()") + node2.query("ALTER TABLE test.test_stats MODIFY COLUMN b Float64") + node2.query("ALTER TABLE test.test_stats MODIFY STATISTICS b TYPE tdigest") From 7aabd7d2fd4a03ddea5ef311cf89b2eb7520674c Mon Sep 17 00:00:00 2001 From: avogar Date: Fri, 23 Aug 2024 15:11:51 +0000 Subject: [PATCH 100/114] Fix resolving dynamic subcolumns from subqueries in analyzer --- src/Analyzer/Resolve/IdentifierResolver.cpp | 2 +- src/Analyzer/Resolve/QueryAnalyzer.cpp | 3 +++ src/Analyzer/Resolve/TableExpressionData.h | 1 + .../03228_dynamic_subcolumns_from_subquery.reference | 4 ++++ .../03228_dynamic_subcolumns_from_subquery.sql | 9 +++++++++ 5 files changed, 18 insertions(+), 1 deletion(-) create mode 100644 tests/queries/0_stateless/03228_dynamic_subcolumns_from_subquery.reference create mode 100644 tests/queries/0_stateless/03228_dynamic_subcolumns_from_subquery.sql diff --git a/src/Analyzer/Resolve/IdentifierResolver.cpp b/src/Analyzer/Resolve/IdentifierResolver.cpp index 14d4acc7c9b..80e7d1e4445 100644 --- a/src/Analyzer/Resolve/IdentifierResolver.cpp +++ b/src/Analyzer/Resolve/IdentifierResolver.cpp @@ -692,7 +692,7 @@ QueryTreeNodePtr IdentifierResolver::tryResolveIdentifierFromStorage( result_column_node = it->second; } /// Check if it's a dynamic subcolumn - else + else if (table_expression_data.supports_subcolumns) { auto [column_name, dynamic_subcolumn_name] = Nested::splitName(identifier_full_name); auto jt = table_expression_data.column_name_to_column_node.find(column_name); diff --git a/src/Analyzer/Resolve/QueryAnalyzer.cpp b/src/Analyzer/Resolve/QueryAnalyzer.cpp index 004da5ed341..a18c2901a58 100644 --- a/src/Analyzer/Resolve/QueryAnalyzer.cpp +++ b/src/Analyzer/Resolve/QueryAnalyzer.cpp @@ -4379,7 +4379,10 @@ void QueryAnalyzer::initializeTableExpressionData(const QueryTreeNodePtr & table auto get_column_options = GetColumnsOptions(GetColumnsOptions::All).withExtendedObjects().withVirtuals(); if (storage_snapshot->storage.supportsSubcolumns()) + { get_column_options.withSubcolumns(); + table_expression_data.supports_subcolumns = true; + } auto column_names_and_types = storage_snapshot->getColumns(get_column_options); table_expression_data.column_names_and_types = NamesAndTypes(column_names_and_types.begin(), column_names_and_types.end()); diff --git a/src/Analyzer/Resolve/TableExpressionData.h b/src/Analyzer/Resolve/TableExpressionData.h index 18cbfa32366..6770672d0c2 100644 --- a/src/Analyzer/Resolve/TableExpressionData.h +++ b/src/Analyzer/Resolve/TableExpressionData.h @@ -36,6 +36,7 @@ struct AnalysisTableExpressionData std::string database_name; std::string table_name; bool should_qualify_columns = true; + bool supports_subcolumns = false; NamesAndTypes column_names_and_types; ColumnNameToColumnNodeMap column_name_to_column_node; std::unordered_set subcolumn_names; /// Subset columns that are subcolumns of other columns diff --git a/tests/queries/0_stateless/03228_dynamic_subcolumns_from_subquery.reference b/tests/queries/0_stateless/03228_dynamic_subcolumns_from_subquery.reference new file mode 100644 index 00000000000..153ad78f694 --- /dev/null +++ b/tests/queries/0_stateless/03228_dynamic_subcolumns_from_subquery.reference @@ -0,0 +1,4 @@ +str +42 +42 +42 diff --git a/tests/queries/0_stateless/03228_dynamic_subcolumns_from_subquery.sql b/tests/queries/0_stateless/03228_dynamic_subcolumns_from_subquery.sql new file mode 100644 index 00000000000..a10b0cb2809 --- /dev/null +++ b/tests/queries/0_stateless/03228_dynamic_subcolumns_from_subquery.sql @@ -0,0 +1,9 @@ +set allow_experimental_dynamic_type=1; +set allow_experimental_json_type=1; +set allow_experimental_analyzer=1; + +select d.String from (select 'str'::Dynamic as d); +select json.a from (select '{"a" : 42}'::JSON as json); +select json.a from (select '{"a" : 42}'::JSON(a UInt32) as json); +select json.a.:Int64 from (select materialize('{"a" : 42}')::JSON as json); + From 80504e7b9b52fec79a89e2fff5881ca397022107 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Fri, 23 Aug 2024 19:07:25 +0000 Subject: [PATCH 101/114] fix test 03228_virtual_column_merge_dist --- .../queries/0_stateless/03228_virtual_column_merge_dist.sql | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/03228_virtual_column_merge_dist.sql b/tests/queries/0_stateless/03228_virtual_column_merge_dist.sql index caf00a2e407..e58c7f38d3b 100644 --- a/tests/queries/0_stateless/03228_virtual_column_merge_dist.sql +++ b/tests/queries/0_stateless/03228_virtual_column_merge_dist.sql @@ -1,3 +1,6 @@ +-- There is a bug in old analyzer with currentDatabase() and distributed query. +SET enable_analyzer = 1; + DROP TABLE IF EXISTS t_local_1; DROP TABLE IF EXISTS t_local_2; DROP TABLE IF EXISTS t_merge; @@ -10,7 +13,7 @@ INSERT INTO t_local_1 VALUES (1); INSERT INTO t_local_2 VALUES (2); CREATE TABLE t_merge AS t_local_1 ENGINE = Merge(currentDatabase(), '^(t_local_1|t_local_2)$'); -CREATE TABLE t_distr AS t_local_1 engine=Distributed('test_shard_localhost', currentDatabase(), t_merge, rand()); +CREATE TABLE t_distr AS t_local_1 ENGINE = Distributed('test_shard_localhost', currentDatabase(), t_merge, rand()); SELECT a, _table FROM t_merge ORDER BY a; SELECT a, _table FROM t_distr ORDER BY a; From a82421719383041a839289093d1882265a068cd1 Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Fri, 23 Aug 2024 20:29:04 +0000 Subject: [PATCH 102/114] Done --- ..._rewrite_sum_column_and_constant.reference | 26 +++++++++---------- ...alyzer_rewrite_sum_column_and_constant.sql | 11 ++++---- 2 files changed, 19 insertions(+), 18 deletions(-) diff --git a/tests/queries/0_stateless/02932_analyzer_rewrite_sum_column_and_constant.reference b/tests/queries/0_stateless/02932_analyzer_rewrite_sum_column_and_constant.reference index 802d920aaef..b41635f014e 100644 --- a/tests/queries/0_stateless/02932_analyzer_rewrite_sum_column_and_constant.reference +++ b/tests/queries/0_stateless/02932_analyzer_rewrite_sum_column_and_constant.reference @@ -1635,21 +1635,21 @@ QUERY id: 0 JOIN TREE TABLE id: 10, alias: __table1, table_name: default.test_table SELECT sum(float64 + 2) From test_table; -26.5 +26.875 SELECT sum(2 + float64) From test_table; -26.5 +26.875 SELECT sum(float64 - 2) From test_table; -6.5 +6.875 SELECT sum(2 - float64) From test_table; --6.5 +-6.875 SELECT sum(float64) + 2 * count(float64) From test_table; -26.5 +26.875 SELECT 2 * count(float64) + sum(float64) From test_table; -26.5 +26.875 SELECT sum(float64) - 2 * count(float64) From test_table; -6.5 +6.875 SELECT 2 * count(float64) - sum(float64) From test_table; --6.5 +-6.875 EXPLAIN QUERY TREE (SELECT sum(float64 + 2) From test_table); QUERY id: 0 PROJECTION COLUMNS @@ -2463,25 +2463,25 @@ QUERY id: 0 JOIN TREE TABLE id: 12, alias: __table1, table_name: default.test_table SELECT sum(float64 + 2) + sum(float64 + 3) From test_table; -58 +58.75 SELECT sum(float64 + 2) - sum(float64 + 3) From test_table; -5 SELECT sum(float64 - 2) + sum(float64 - 3) From test_table; -8 +8.75 SELECT sum(float64 - 2) - sum(float64 - 3) From test_table; 5 SELECT sum(2 - float64) - sum(3 - float64) From test_table; -5 SELECT (sum(float64) + 2 * count(float64)) + (sum(float64) + 3 * count(float64)) From test_table; -58 +58.75 SELECT (sum(float64) + 2 * count(float64)) - (sum(float64) + 3 * count(float64)) From test_table; -5 SELECT (sum(float64) - 2 * count(float64)) + (sum(float64) - 3 * count(float64)) From test_table; -8 +8.75 SELECT (sum(float64) - 2 * count(float64)) - (sum(float64) - 3 * count(float64)) From test_table; 5 SELECT (2 * count(float64) - sum(float64)) + (3 * count(float64) - sum(float64)) From test_table; --8 +-8.75 EXPLAIN QUERY TREE (SELECT sum(float64 + 2) + sum(float64 + 3) From test_table); QUERY id: 0 PROJECTION COLUMNS diff --git a/tests/queries/0_stateless/02932_analyzer_rewrite_sum_column_and_constant.sql b/tests/queries/0_stateless/02932_analyzer_rewrite_sum_column_and_constant.sql index 5492d061c12..b6fa097abe9 100644 --- a/tests/queries/0_stateless/02932_analyzer_rewrite_sum_column_and_constant.sql +++ b/tests/queries/0_stateless/02932_analyzer_rewrite_sum_column_and_constant.sql @@ -25,11 +25,12 @@ CREATE TABLE test_table decimal32 Decimal32(5), ) ENGINE=MergeTree ORDER BY uint64; -INSERT INTO test_table VALUES (1, 1.1, 1.11); -INSERT INTO test_table VALUES (2, 2.2, 2.22); -INSERT INTO test_table VALUES (3, 3.3, 3.33); -INSERT INTO test_table VALUES (4, 4.4, 4.44); -INSERT INTO test_table VALUES (5, 5.5, 5.55); +-- Use Float64 numbers divisible by 1/16 (or some other small power of two), so that their sum doesn't depend on summation order. +INSERT INTO test_table VALUES (1, 1.125, 1.11); +INSERT INTO test_table VALUES (2, 2.250, 2.22); +INSERT INTO test_table VALUES (3, 3.375, 3.33); +INSERT INTO test_table VALUES (4, 4.500, 4.44); +INSERT INTO test_table VALUES (5, 5.625, 5.55); -- { echoOn } SELECT sum(uint64 + 1 AS i) from test_table where i > 0; From 0f265ce33d857a9c7446698629b6517b71b4a71d Mon Sep 17 00:00:00 2001 From: Han Fei Date: Fri, 23 Aug 2024 23:13:53 +0200 Subject: [PATCH 103/114] address comments --- src/Interpreters/InterpreterCreateQuery.cpp | 1 - src/Storages/AlterCommands.cpp | 10 ++++------ src/Storages/ColumnsDescription.cpp | 4 ---- src/Storages/MergeTree/MergeTask.cpp | 2 +- src/Storages/MergeTree/MutateTask.cpp | 4 ++-- src/Storages/Statistics/Statistics.cpp | 16 ++++++++-------- src/Storages/Statistics/Statistics.h | 6 ++++-- src/Storages/StatisticsDescription.cpp | 21 +++++---------------- src/Storages/StatisticsDescription.h | 4 ++-- 9 files changed, 26 insertions(+), 42 deletions(-) diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index 95143031707..467547e6c9e 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -700,7 +700,6 @@ ColumnsDescription InterpreterCreateQuery::getColumnsDescription( col_decl.codec, column.type, sanity_check_compression_codecs, allow_experimental_codecs, enable_deflate_qpl_codec, enable_zstd_qat_codec); } - column.statistics.column_name = column.name; /// We assign column name here for better exception error message. if (col_decl.statistics_desc) { if (!skip_checks && !context_->getSettingsRef().allow_experimental_statistics) diff --git a/src/Storages/AlterCommands.cpp b/src/Storages/AlterCommands.cpp index 67b18217767..07bc87b0162 100644 --- a/src/Storages/AlterCommands.cpp +++ b/src/Storages/AlterCommands.cpp @@ -705,9 +705,9 @@ void AlterCommand::apply(StorageInMemoryMetadata & metadata, ContextPtr context) } auto stats_vec = ColumnStatisticsDescription::fromAST(statistics_decl, metadata.columns); - for (const auto & stats : stats_vec) + for (const auto & [stats_column_name, stats] : stats_vec) { - metadata.columns.modify(stats.column_name, + metadata.columns.modify(stats_column_name, [&](ColumnDescription & column) { column.statistics.merge(stats, column.name, column.type, if_not_exists); }); } } @@ -739,9 +739,9 @@ void AlterCommand::apply(StorageInMemoryMetadata & metadata, ContextPtr context) } auto stats_vec = ColumnStatisticsDescription::fromAST(statistics_decl, metadata.columns); - for (const auto & stats : stats_vec) + for (const auto & [stats_column_name, stats] : stats_vec) { - metadata.columns.modify(stats.column_name, + metadata.columns.modify(stats_column_name, [&](ColumnDescription & column) { column.statistics.assign(stats); }); } } @@ -866,8 +866,6 @@ void AlterCommand::apply(StorageInMemoryMetadata & metadata, ContextPtr context) rename_visitor.visit(column_to_modify.default_desc.expression); if (column_to_modify.ttl) rename_visitor.visit(column_to_modify.ttl); - if (column_to_modify.name == column_name && !column_to_modify.statistics.empty()) - column_to_modify.statistics.column_name = rename_to; }); } if (metadata.table_ttl.definition_ast) diff --git a/src/Storages/ColumnsDescription.cpp b/src/Storages/ColumnsDescription.cpp index 0d724245b49..fdc3446aa46 100644 --- a/src/Storages/ColumnsDescription.cpp +++ b/src/Storages/ColumnsDescription.cpp @@ -209,11 +209,7 @@ void ColumnDescription::readText(ReadBuffer & buf) settings = col_ast->settings->as().changes; if (col_ast->statistics_desc) - { statistics = ColumnStatisticsDescription::fromColumnDeclaration(*col_ast, type); - /// every column has name `x` here, so we have to set the name manually. - statistics.column_name = name; - } } else throw Exception(ErrorCodes::CANNOT_PARSE_TEXT, "Cannot parse column description"); diff --git a/src/Storages/MergeTree/MergeTask.cpp b/src/Storages/MergeTree/MergeTask.cpp index ce06adf110c..0d34eb7f630 100644 --- a/src/Storages/MergeTree/MergeTask.cpp +++ b/src/Storages/MergeTree/MergeTask.cpp @@ -62,7 +62,7 @@ static ColumnsStatistics getStatisticsForColumns( const auto * desc = all_columns.tryGet(column.name); if (desc && !desc->statistics.empty()) { - auto statistics = MergeTreeStatisticsFactory::instance().get(desc->statistics); + auto statistics = MergeTreeStatisticsFactory::instance().get(*desc); all_statistics.push_back(std::move(statistics)); } } diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp index b9b5333a61c..1119ca324d6 100644 --- a/src/Storages/MergeTree/MutateTask.cpp +++ b/src/Storages/MergeTree/MutateTask.cpp @@ -546,7 +546,7 @@ static std::set getStatisticsToRecalculate(const StorageMet { if (!col_desc.statistics.empty() && materialized_stats.contains(col_desc.name)) { - stats_to_recalc.insert(stats_factory.get(col_desc.statistics)); + stats_to_recalc.insert(stats_factory.get(col_desc)); } } return stats_to_recalc; @@ -1530,7 +1530,7 @@ private: if (ctx->materialized_statistics.contains(col.name)) { - stats_to_rewrite.push_back(MergeTreeStatisticsFactory::instance().get(col.statistics)); + stats_to_rewrite.push_back(MergeTreeStatisticsFactory::instance().get(col)); } else { diff --git a/src/Storages/Statistics/Statistics.cpp b/src/Storages/Statistics/Statistics.cpp index fd686c5f0aa..6372c804e0e 100644 --- a/src/Storages/Statistics/Statistics.cpp +++ b/src/Storages/Statistics/Statistics.cpp @@ -58,8 +58,8 @@ IStatistics::IStatistics(const SingleStatisticsDescription & stat_) { } -ColumnStatistics::ColumnStatistics(const ColumnStatisticsDescription & stats_desc_) - : stats_desc(stats_desc_) +ColumnStatistics::ColumnStatistics(const ColumnStatisticsDescription & stats_desc_, const String & column_name_) + : stats_desc(stats_desc_), column_name(column_name_) { } @@ -176,7 +176,7 @@ String ColumnStatistics::getFileName() const const String & ColumnStatistics::columnName() const { - return stats_desc.column_name; + return column_name; } UInt64 ColumnStatistics::rowCount() const @@ -227,15 +227,15 @@ void MergeTreeStatisticsFactory::validate(const ColumnStatisticsDescription & st } } -ColumnStatisticsPtr MergeTreeStatisticsFactory::get(const ColumnStatisticsDescription & stats) const +ColumnStatisticsPtr MergeTreeStatisticsFactory::get(const ColumnDescription & column_desc) const { - ColumnStatisticsPtr column_stat = std::make_shared(stats); - for (const auto & [type, desc] : stats.types_to_desc) + ColumnStatisticsPtr column_stat = std::make_shared(column_desc.statistics, column_desc.name); + for (const auto & [type, desc] : column_desc.statistics.types_to_desc) { auto it = creators.find(type); if (it == creators.end()) throw Exception(ErrorCodes::INCORRECT_QUERY, "Unknown statistic type '{}'. Available types: 'tdigest' 'uniq' and 'count_min'", type); - auto stat_ptr = (it->second)(desc, stats.data_type); + auto stat_ptr = (it->second)(desc, column_desc.type); column_stat->stats[type] = stat_ptr; } return column_stat; @@ -246,7 +246,7 @@ ColumnsStatistics MergeTreeStatisticsFactory::getMany(const ColumnsDescription & ColumnsStatistics result; for (const auto & col : columns) if (!col.statistics.empty()) - result.push_back(get(col.statistics)); + result.push_back(get(col)); return result; } diff --git a/src/Storages/Statistics/Statistics.h b/src/Storages/Statistics/Statistics.h index 2a30c0de315..98666ed73df 100644 --- a/src/Storages/Statistics/Statistics.h +++ b/src/Storages/Statistics/Statistics.h @@ -54,7 +54,7 @@ using StatisticsPtr = std::shared_ptr; class ColumnStatistics { public: - explicit ColumnStatistics(const ColumnStatisticsDescription & stats_desc_); + explicit ColumnStatistics(const ColumnStatisticsDescription & stats_desc_, const String & column_name_); void serialize(WriteBuffer & buf); void deserialize(ReadBuffer & buf); @@ -73,10 +73,12 @@ public: private: friend class MergeTreeStatisticsFactory; ColumnStatisticsDescription stats_desc; + String column_name; std::map stats; UInt64 rows = 0; /// the number of rows in the column }; +struct ColumnDescription; class ColumnsDescription; using ColumnStatisticsPtr = std::shared_ptr; using ColumnsStatistics = std::vector; @@ -91,7 +93,7 @@ public: using Validator = std::function; using Creator = std::function; - ColumnStatisticsPtr get(const ColumnStatisticsDescription & stats) const; + ColumnStatisticsPtr get(const ColumnDescription & column_desc) const; ColumnsStatistics getMany(const ColumnsDescription & columns) const; void registerValidator(StatisticsType type, Validator validator); diff --git a/src/Storages/StatisticsDescription.cpp b/src/Storages/StatisticsDescription.cpp index acf600dd6f7..64634124758 100644 --- a/src/Storages/StatisticsDescription.cpp +++ b/src/Storages/StatisticsDescription.cpp @@ -96,16 +96,13 @@ void ColumnStatisticsDescription::merge(const ColumnStatisticsDescription & othe { chassert(merging_column_type); - if (column_name.empty()) - column_name = merging_column_name; - data_type = merging_column_type; for (const auto & [stats_type, stats_desc]: other.types_to_desc) { if (!if_not_exists && types_to_desc.contains(stats_type)) { - throw Exception(ErrorCodes::ILLEGAL_STATISTICS, "Statistics type name {} has existed in column {}", stats_type, column_name); + throw Exception(ErrorCodes::ILLEGAL_STATISTICS, "Statistics type name {} has existed in column {}", stats_type, merging_column_name); } else if (!types_to_desc.contains(stats_type)) types_to_desc.emplace(stats_type, stats_desc); @@ -114,12 +111,6 @@ void ColumnStatisticsDescription::merge(const ColumnStatisticsDescription & othe void ColumnStatisticsDescription::assign(const ColumnStatisticsDescription & other) { - /// If the statistics is empty, it's possible that we have not assign a column_name. - if (empty() && column_name == "") - column_name = other.column_name; - if (other.column_name != column_name) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot assign statistics from column {} to {}", other.column_name, column_name); - types_to_desc = other.types_to_desc; data_type = other.data_type; } @@ -129,7 +120,7 @@ void ColumnStatisticsDescription::clear() types_to_desc.clear(); } -std::vector ColumnStatisticsDescription::fromAST(const ASTPtr & definition_ast, const ColumnsDescription & columns) +std::vector> ColumnStatisticsDescription::fromAST(const ASTPtr & definition_ast, const ColumnsDescription & columns) { const auto * stat_definition_ast = definition_ast->as(); if (!stat_definition_ast) @@ -147,7 +138,7 @@ std::vector ColumnStatisticsDescription::fromAST(co statistics_types.emplace(stat.type, stat); } - std::vector result; + std::vector> result; result.reserve(stat_definition_ast->columns->children.size()); for (const auto & column_ast : stat_definition_ast->columns->children) @@ -159,10 +150,9 @@ std::vector ColumnStatisticsDescription::fromAST(co throw Exception(ErrorCodes::INCORRECT_QUERY, "Incorrect column name {}", physical_column_name); const auto & column = columns.getPhysical(physical_column_name); - stats.column_name = column.name; stats.data_type = column.type; stats.types_to_desc = statistics_types; - result.push_back(stats); + result.emplace_back(physical_column_name, stats); } if (result.empty()) @@ -177,14 +167,13 @@ ColumnStatisticsDescription ColumnStatisticsDescription::fromColumnDeclaration(c if (stat_type_list_ast->children.empty()) throw Exception(ErrorCodes::INCORRECT_QUERY, "We expect at least one statistics type for column {}", queryToString(column)); ColumnStatisticsDescription stats; - stats.column_name = column.name; for (const auto & ast : stat_type_list_ast->children) { const auto & stat_type = ast->as().name; SingleStatisticsDescription stat(stringToStatisticsType(Poco::toLower(stat_type)), ast->clone()); if (stats.types_to_desc.contains(stat.type)) - throw Exception(ErrorCodes::INCORRECT_QUERY, "Column {} already contains statistics type {}", stats.column_name, stat_type); + throw Exception(ErrorCodes::INCORRECT_QUERY, "Column {} already contains statistics type {}", column.name, stat_type); stats.types_to_desc.emplace(stat.type, std::move(stat)); } stats.data_type = data_type; diff --git a/src/Storages/StatisticsDescription.h b/src/Storages/StatisticsDescription.h index 03b8fb0d583..46927f1418c 100644 --- a/src/Storages/StatisticsDescription.h +++ b/src/Storages/StatisticsDescription.h @@ -55,12 +55,12 @@ struct ColumnStatisticsDescription ASTPtr getAST() const; - static std::vector fromAST(const ASTPtr & definition_ast, const ColumnsDescription & columns); + /// get a vector of pair + static std::vector> fromAST(const ASTPtr & definition_ast, const ColumnsDescription & columns); static ColumnStatisticsDescription fromColumnDeclaration(const ASTColumnDeclaration & column, DataTypePtr data_type); using StatisticsTypeDescMap = std::map; StatisticsTypeDescMap types_to_desc; - String column_name; DataTypePtr data_type; }; From 6fb8f2b4ee10a95104bf6f8880471d24d39095dc Mon Sep 17 00:00:00 2001 From: Han Fei Date: Fri, 23 Aug 2024 23:19:03 +0200 Subject: [PATCH 104/114] fix black --- .../test_manipulate_statistics/test.py | 24 +++++++++++++------ 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/tests/integration/test_manipulate_statistics/test.py b/tests/integration/test_manipulate_statistics/test.py index ab5559e18fa..aff943e4d20 100644 --- a/tests/integration/test_manipulate_statistics/test.py +++ b/tests/integration/test_manipulate_statistics/test.py @@ -6,13 +6,17 @@ from helpers.cluster import ClickHouseCluster cluster = ClickHouseCluster(__file__) node1 = cluster.add_instance( - "node1", user_configs=["config/config.xml"], with_zookeeper=True, - macros={"replica": "a", "shard": "shard1"} + "node1", + user_configs=["config/config.xml"], + with_zookeeper=True, + macros={"replica": "a", "shard": "shard1"}, ) node2 = cluster.add_instance( - "node2", user_configs=["config/config.xml"], with_zookeeper=True, - macros={"replica": "b", "shard": "shard1"} + "node2", + user_configs=["config/config.xml"], + with_zookeeper=True, + macros={"replica": "b", "shard": "shard1"}, ) @@ -188,8 +192,14 @@ def test_replicated_table_ddl(started_cluster): def test_replicated_db(started_cluster): - node1.query("CREATE DATABASE test ENGINE = Replicated('/test/shared_stats', '{shard}', '{replica}')") - node2.query("CREATE DATABASE test ENGINE = Replicated('/test/shared_stats', '{shard}', '{replica}')") - node1.query("CREATE TABLE test.test_stats (a Int64, b Int64) ENGINE = ReplicatedMergeTree() ORDER BY()") + node1.query( + "CREATE DATABASE test ENGINE = Replicated('/test/shared_stats', '{shard}', '{replica}')" + ) + node2.query( + "CREATE DATABASE test ENGINE = Replicated('/test/shared_stats', '{shard}', '{replica}')" + ) + node1.query( + "CREATE TABLE test.test_stats (a Int64, b Int64) ENGINE = ReplicatedMergeTree() ORDER BY()" + ) node2.query("ALTER TABLE test.test_stats MODIFY COLUMN b Float64") node2.query("ALTER TABLE test.test_stats MODIFY STATISTICS b TYPE tdigest") From 0a35b111ffb34f3d6a8a9e9bfa712b57b722c447 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Fri, 23 Aug 2024 20:03:38 +0000 Subject: [PATCH 105/114] fix test 03221_mutation_analyzer_skip_part --- .../03221_mutation_analyzer_skip_part.sh | 46 +++++++++++++++++++ .../03221_mutation_analyzer_skip_part.sql | 21 --------- 2 files changed, 46 insertions(+), 21 deletions(-) create mode 100755 tests/queries/0_stateless/03221_mutation_analyzer_skip_part.sh delete mode 100644 tests/queries/0_stateless/03221_mutation_analyzer_skip_part.sql diff --git a/tests/queries/0_stateless/03221_mutation_analyzer_skip_part.sh b/tests/queries/0_stateless/03221_mutation_analyzer_skip_part.sh new file mode 100755 index 00000000000..03fd15f54e2 --- /dev/null +++ b/tests/queries/0_stateless/03221_mutation_analyzer_skip_part.sh @@ -0,0 +1,46 @@ +#!/usr/bin/env bash +# Tags: no-random-settings, no-random-merge-tree-settings + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +${CLICKHOUSE_CLIENT} --query " + DROP TABLE IF EXISTS t_mutate_skip_part; + + CREATE TABLE t_mutate_skip_part (key UInt64, id UInt64, v1 UInt64, v2 UInt64) + ENGINE = MergeTree ORDER BY id PARTITION BY key + SETTINGS min_bytes_for_wide_part = 0; + + INSERT INTO t_mutate_skip_part SELECT 1, number, number, number FROM numbers(10000); + INSERT INTO t_mutate_skip_part SELECT 2, number, number, number FROM numbers(10000); + + SET mutations_sync = 2; + ALTER TABLE t_mutate_skip_part UPDATE v1 = 1000 WHERE key = 1; + ALTER TABLE t_mutate_skip_part DELETE WHERE key = 2 AND v2 % 10 = 0; +" + +# Mutation query may return before the entry is added to part log. +# So, we may have to retry the flush of logs until all entries are actually flushed. +for _ in {1..10}; do + ${CLICKHOUSE_CLIENT} --query "SYSTEM FLUSH LOGS" + res=$(${CLICKHOUSE_CLIENT} --query "SELECT count() FROM system.part_log WHERE database = currentDatabase() AND table = 't_mutate_skip_part' AND event_type = 'MutatePart'") + + if [[ $res -eq 4 ]]; then + break + fi + + sleep 2.0 +done + +${CLICKHOUSE_CLIENT} --query " + SYSTEM FLUSH LOGS; + + -- If part is skipped in mutation and hardlinked then read_rows must be 0. + SELECT part_name, read_rows + FROM system.part_log + WHERE database = currentDatabase() AND table = 't_mutate_skip_part' AND event_type = 'MutatePart' + ORDER BY part_name; + + DROP TABLE IF EXISTS t_mutate_skip_part; +" diff --git a/tests/queries/0_stateless/03221_mutation_analyzer_skip_part.sql b/tests/queries/0_stateless/03221_mutation_analyzer_skip_part.sql deleted file mode 100644 index bf9a10e2af4..00000000000 --- a/tests/queries/0_stateless/03221_mutation_analyzer_skip_part.sql +++ /dev/null @@ -1,21 +0,0 @@ -DROP TABLE IF EXISTS t_mutate_skip_part; - -CREATE TABLE t_mutate_skip_part (key UInt64, id UInt64, v1 UInt64, v2 UInt64) ENGINE = MergeTree ORDER BY id PARTITION BY key; - -INSERT INTO t_mutate_skip_part SELECT 1, number, number, number FROM numbers(10000); -INSERT INTO t_mutate_skip_part SELECT 2, number, number, number FROM numbers(10000); - -SET mutations_sync = 2; - -ALTER TABLE t_mutate_skip_part UPDATE v1 = 1000 WHERE key = 1; -ALTER TABLE t_mutate_skip_part DELETE WHERE key = 2 AND v2 % 10 = 0; - -SYSTEM FLUSH LOGS; - --- If part is skipped in mutation and hardlinked then read_rows must be 0. -SELECT part_name, read_rows -FROM system.part_log -WHERE database = currentDatabase() AND table = 't_mutate_skip_part' AND event_type = 'MutatePart' -ORDER BY part_name; - -DROP TABLE IF EXISTS t_mutate_skip_part; From 080b8f74be186738813ca9d9e12ed3e327129c33 Mon Sep 17 00:00:00 2001 From: Tanya Bragin Date: Fri, 23 Aug 2024 15:50:56 -0700 Subject: [PATCH 106/114] Update README.md Add Austin meetup --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 546f08afd3d..ba212852ea8 100644 --- a/README.md +++ b/README.md @@ -54,6 +54,7 @@ Other upcoming meetups * [Sydney Meetup](https://www.meetup.com/clickhouse-australia-user-group/events/302862966/) - September 5 * [Zurich Meetup](https://www.meetup.com/clickhouse-switzerland-meetup-group/events/302267429/) - September 5 * [Toronto Meetup (Shopify)](https://www.meetup.com/clickhouse-toronto-user-group/events/301490855/) - September 10 +* [Austin Meetup](https://www.meetup.com/clickhouse-austin-user-group/events/302558689/) - September 17 * [London Meetup](https://www.meetup.com/clickhouse-london-user-group/events/302977267) - September 17 ## Recent Recordings From 5fe151529ab58112f8fa8491d2bfff24562ff624 Mon Sep 17 00:00:00 2001 From: Han Fei Date: Sat, 24 Aug 2024 07:33:18 +0200 Subject: [PATCH 107/114] fix flacky although that is not actually flacky --- tests/integration/test_manipulate_statistics/test.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tests/integration/test_manipulate_statistics/test.py b/tests/integration/test_manipulate_statistics/test.py index aff943e4d20..3a1c5ad5b96 100644 --- a/tests/integration/test_manipulate_statistics/test.py +++ b/tests/integration/test_manipulate_statistics/test.py @@ -135,8 +135,8 @@ def test_single_node_normal(started_cluster): def test_replicated_table_ddl(started_cluster): - node1.query("DROP TABLE IF EXISTS test_stat") - node2.query("DROP TABLE IF EXISTS test_stat") + node1.query("DROP TABLE IF EXISTS test_stat SYNC") + node2.query("DROP TABLE IF EXISTS test_stat SYNC") node1.query( """ @@ -192,6 +192,8 @@ def test_replicated_table_ddl(started_cluster): def test_replicated_db(started_cluster): + node1.query("DROP DATABASE IF EXISTS test SYNC") + node2.query("DROP DATABASE IF EXISTS test SYNC") node1.query( "CREATE DATABASE test ENGINE = Replicated('/test/shared_stats', '{shard}', '{replica}')" ) From e2aa953e700bfbabbfe69a5749f4d2806bd3610f Mon Sep 17 00:00:00 2001 From: Amos Bird Date: Sat, 24 Aug 2024 20:45:10 +0800 Subject: [PATCH 108/114] Fix empty tuple in array --- src/Functions/array/arrayElement.cpp | 3 +++ tests/queries/0_stateless/03229_empty_tuple_in_array.reference | 1 + tests/queries/0_stateless/03229_empty_tuple_in_array.sql | 1 + 3 files changed, 5 insertions(+) create mode 100644 tests/queries/0_stateless/03229_empty_tuple_in_array.reference create mode 100644 tests/queries/0_stateless/03229_empty_tuple_in_array.sql diff --git a/src/Functions/array/arrayElement.cpp b/src/Functions/array/arrayElement.cpp index 81f3f97979b..d0b2b49cc1c 100644 --- a/src/Functions/array/arrayElement.cpp +++ b/src/Functions/array/arrayElement.cpp @@ -1598,6 +1598,9 @@ ColumnPtr FunctionArrayElement::executeTuple(const ColumnsWithTypeAndName & argu const auto & tuple_columns = col_nested->getColumns(); size_t tuple_size = tuple_columns.size(); + if (tuple_size == 0) + return ColumnTuple::create(input_rows_count); + const DataTypes & tuple_types = typeid_cast( *typeid_cast(*arguments[0].type).getNestedType()).getElements(); diff --git a/tests/queries/0_stateless/03229_empty_tuple_in_array.reference b/tests/queries/0_stateless/03229_empty_tuple_in_array.reference new file mode 100644 index 00000000000..6a452c185a8 --- /dev/null +++ b/tests/queries/0_stateless/03229_empty_tuple_in_array.reference @@ -0,0 +1 @@ +() diff --git a/tests/queries/0_stateless/03229_empty_tuple_in_array.sql b/tests/queries/0_stateless/03229_empty_tuple_in_array.sql new file mode 100644 index 00000000000..09ba3595a5a --- /dev/null +++ b/tests/queries/0_stateless/03229_empty_tuple_in_array.sql @@ -0,0 +1 @@ +select [()][0]; From 78c175225b9b4c929ed918e718351c18a166458a Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Thu, 22 Aug 2024 14:50:10 +0000 Subject: [PATCH 109/114] Done --- contrib/replxx | 2 +- src/Client/ReplxxLineReader.cpp | 13 +++++++------ 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/contrib/replxx b/contrib/replxx index 5d04501f93a..5f696c6eb9a 160000 --- a/contrib/replxx +++ b/contrib/replxx @@ -1 +1 @@ -Subproject commit 5d04501f93a4fb7f0bb8b73b8f614bc986f9e25b +Subproject commit 5f696c6eb9a88eb9784e8ff1d68bd5f70285dcc5 diff --git a/src/Client/ReplxxLineReader.cpp b/src/Client/ReplxxLineReader.cpp index 78ae6c5eb15..37ceb471e5b 100644 --- a/src/Client/ReplxxLineReader.cpp +++ b/src/Client/ReplxxLineReader.cpp @@ -299,13 +299,14 @@ ReplxxLineReader::ReplxxLineReader( Patterns delimiters_, const char word_break_characters_[], replxx::Replxx::highlighter_callback_t highlighter_, - [[ maybe_unused ]] std::istream & input_stream_, - [[ maybe_unused ]] std::ostream & output_stream_, - [[ maybe_unused ]] int in_fd_, - [[ maybe_unused ]] int out_fd_, - [[ maybe_unused ]] int err_fd_ + std::istream & input_stream_, + std::ostream & output_stream_, + int in_fd_, + int out_fd_, + int err_fd_ ) : LineReader(history_file_path_, multiline_, std::move(extenders_), std::move(delimiters_), input_stream_, output_stream_, in_fd_) + , rx(input_stream_, output_stream_, in_fd_, out_fd_, err_fd_) , highlighter(std::move(highlighter_)) , word_break_characters(word_break_characters_) , editor(getEditor()) @@ -516,7 +517,7 @@ void ReplxxLineReader::addToHistory(const String & line) rx.history_add(line); // flush changes to the disk - if (!rx.history_save(history_file_path)) + if (history_file_fd >= 0 && !rx.history_save(history_file_path)) rx.print("Saving history failed: %s\n", errnoToString().c_str()); if (history_file_fd >= 0 && locked && 0 != flock(history_file_fd, LOCK_UN)) From 01523cce2a4ba21c9855ab4eb1398986cf66c64b Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Fri, 23 Aug 2024 12:14:40 +0000 Subject: [PATCH 110/114] Bump replxx --- contrib/replxx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/replxx b/contrib/replxx index 5f696c6eb9a..711c18e7f4d 160000 --- a/contrib/replxx +++ b/contrib/replxx @@ -1 +1 @@ -Subproject commit 5f696c6eb9a88eb9784e8ff1d68bd5f70285dcc5 +Subproject commit 711c18e7f4d951255aa8b0851e5a55d5a5fb0ddb From 385c8127cf4b7018a964705d0bdcaf17bdf494e4 Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Fri, 23 Aug 2024 17:25:34 +0200 Subject: [PATCH 111/114] Fix FreeBSD build --- cmake/freebsd/toolchain-x86_64.cmake | 3 +++ 1 file changed, 3 insertions(+) diff --git a/cmake/freebsd/toolchain-x86_64.cmake b/cmake/freebsd/toolchain-x86_64.cmake index 4635880b4a6..4d814693b39 100644 --- a/cmake/freebsd/toolchain-x86_64.cmake +++ b/cmake/freebsd/toolchain-x86_64.cmake @@ -8,4 +8,7 @@ set (CMAKE_CXX_COMPILER_TARGET "x86_64-pc-freebsd11") set (CMAKE_ASM_COMPILER_TARGET "x86_64-pc-freebsd11") set (CMAKE_SYSROOT "${CMAKE_CURRENT_LIST_DIR}/../../contrib/sysroot/freebsd-x86_64") +# dprintf is used in a patched version of replxx +add_compile_definitions(_WITH_DPRINTF) + set (CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY) # disable linkage check - it doesn't work in CMake From d16388000497251856f62e8ac67ade58c29f8e85 Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy <99031427+yakov-olkhovskiy@users.noreply.github.com> Date: Sun, 25 Aug 2024 00:11:31 -0400 Subject: [PATCH 112/114] process possible SSL error on connection reset --- base/poco/NetSSL_OpenSSL/src/SecureSocketImpl.cpp | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/base/poco/NetSSL_OpenSSL/src/SecureSocketImpl.cpp b/base/poco/NetSSL_OpenSSL/src/SecureSocketImpl.cpp index 4873d259ae5..14c877b30af 100644 --- a/base/poco/NetSSL_OpenSSL/src/SecureSocketImpl.cpp +++ b/base/poco/NetSSL_OpenSSL/src/SecureSocketImpl.cpp @@ -311,6 +311,14 @@ int SecureSocketImpl::sendBytes(const void* buffer, int length, int flags) while (mustRetry(rc, remaining_time)); if (rc <= 0) { + // At this stage we still can have last not yet recieved SSL message containing SSL error + // so make a read to force SSL to process possible SSL error + if (SSL_get_error(_pSSL, rc) == SSL_ERROR_SYSCALL && SocketImpl::lastError() == POCO_ECONNRESET) + { + char c = 0; + SSL_read(_pSSL, &c, 1); + } + rc = handleError(rc); if (rc == 0) throw SSLConnectionUnexpectedlyClosedException(); } From f7cc3e9c59947af5b753b154f5b1b59d26fe67d4 Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy <99031427+yakov-olkhovskiy@users.noreply.github.com> Date: Sun, 25 Aug 2024 00:13:12 -0400 Subject: [PATCH 113/114] postpone SSL handshake --- src/Client/Connection.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/Client/Connection.cpp b/src/Client/Connection.cpp index e89bd7a2bf5..da6e5baa3ad 100644 --- a/src/Client/Connection.cpp +++ b/src/Client/Connection.cpp @@ -145,6 +145,9 @@ void Connection::connect(const ConnectionTimeouts & timeouts) /// work we need to pass host name separately. It will be send into TLS Hello packet to let /// the server know which host we want to talk with (single IP can process requests for multiple hosts using SNI). static_cast(socket.get())->setPeerHostName(host); + /// we want to postpone SSL handshake until first read or write operation + /// so any errors during negotiation would be properly processed + static_cast(socket.get())->setLazyHandshake(true); #else throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "tcp_secure protocol is disabled because poco library was built without NetSSL support."); #endif From f38f95a144fa8840bc19647af3be9aa83a505196 Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Sun, 25 Aug 2024 14:26:21 +0200 Subject: [PATCH 114/114] Update base/poco/NetSSL_OpenSSL/src/SecureSocketImpl.cpp --- base/poco/NetSSL_OpenSSL/src/SecureSocketImpl.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/base/poco/NetSSL_OpenSSL/src/SecureSocketImpl.cpp b/base/poco/NetSSL_OpenSSL/src/SecureSocketImpl.cpp index 14c877b30af..eaf267d8a8b 100644 --- a/base/poco/NetSSL_OpenSSL/src/SecureSocketImpl.cpp +++ b/base/poco/NetSSL_OpenSSL/src/SecureSocketImpl.cpp @@ -311,7 +311,7 @@ int SecureSocketImpl::sendBytes(const void* buffer, int length, int flags) while (mustRetry(rc, remaining_time)); if (rc <= 0) { - // At this stage we still can have last not yet recieved SSL message containing SSL error + // At this stage we still can have last not yet received SSL message containing SSL error // so make a read to force SSL to process possible SSL error if (SSL_get_error(_pSSL, rc) == SSL_ERROR_SYSCALL && SocketImpl::lastError() == POCO_ECONNRESET) {