From ca0aa935c1d364eaf04c7fab227731d882d852a1 Mon Sep 17 00:00:00 2001 From: slvrtrn Date: Mon, 27 Nov 2023 23:56:33 +0100 Subject: [PATCH 01/98] Add Enums support to `substring` function --- src/DataTypes/IDataType.h | 2 + src/Functions/GatherUtils/Sources.h | 75 +++++++++ src/Functions/substring.cpp | 58 ++++--- .../00493_substring_of_enum.reference | 148 ++++++++++++++++++ .../0_stateless/00493_substring_of_enum.sql | 24 +++ 5 files changed, 285 insertions(+), 22 deletions(-) create mode 100644 tests/queries/0_stateless/00493_substring_of_enum.reference create mode 100644 tests/queries/0_stateless/00493_substring_of_enum.sql diff --git a/src/DataTypes/IDataType.h b/src/DataTypes/IDataType.h index 4ee615f5f70..98f7e0cb06f 100644 --- a/src/DataTypes/IDataType.h +++ b/src/DataTypes/IDataType.h @@ -447,6 +447,8 @@ template inline bool isNativeNumber(const T & data_type) { return W template inline bool isNumber(const T & data_type) { return WhichDataType(data_type).isNumber(); } template inline bool isEnum(const T & data_type) { return WhichDataType(data_type).isEnum(); } +template inline bool isEnum8(const T & data_type) { return WhichDataType(data_type).isEnum8(); } +template inline bool isEnum16(const T & data_type) { return WhichDataType(data_type).isEnum16(); } template inline bool isDate(const T & data_type) { return WhichDataType(data_type).isDate(); } template inline bool isDate32(const T & data_type) { return WhichDataType(data_type).isDate32(); } diff --git a/src/Functions/GatherUtils/Sources.h b/src/Functions/GatherUtils/Sources.h index fad2be1f622..a3b5c21b89c 100644 --- a/src/Functions/GatherUtils/Sources.h +++ b/src/Functions/GatherUtils/Sources.h @@ -11,6 +11,8 @@ #include #include +#include + #include "IArraySource.h" #include "IValueSource.h" #include "Slices.h" @@ -313,6 +315,79 @@ struct StringSource } }; +template +struct EnumSource { + using Column = ColumnVector; + using Slice = NumericArraySlice; + + using SinkType = StringSink; + + const typename ColumnVector::Container & data; + const DataTypeEnum & data_type; + + size_t row_num = 0; + + explicit EnumSource(const Column & col, const DataTypeEnum & data_type_) : data(col.getData()), data_type(data_type_) { } + + void next() { ++row_num; } + + bool isEnd() const { return row_num == data.size(); } + + size_t rowNum() const { return row_num; } + + size_t getSizeForReserve() const { return data.size(); } + + size_t getElementSize() const + { + StringRef name = data_type.getNameForValue(data[row_num]); + return name.size; + } + + size_t getColumnSize() const { return data.size(); } + + Slice getWhole() const { + StringRef name = data_type.getNameForValue(data[row_num]); + const UInt8 * name_data = reinterpret_cast(name.data); + return {name_data, name.size}; + } + + Slice getSliceFromLeft(size_t offset) const + { + StringRef name = data_type.getNameForValue(data[row_num]); + if (offset >= name.size) + return {nullptr, 0}; + const UInt8 * name_data = reinterpret_cast(name.data); + return {name_data + offset, name.size - offset}; + } + + Slice getSliceFromLeft(size_t offset, size_t length) const + { + StringRef name = data_type.getNameForValue(data[row_num]); + if (offset >= name.size) + return {nullptr, 0}; + const UInt8 * name_data = reinterpret_cast(name.data); + return {name_data + offset, std::min(length, name.size - offset)}; + } + + Slice getSliceFromRight(size_t offset) const + { + StringRef name = data_type.getNameForValue(data[row_num]); + const UInt8 * name_data = reinterpret_cast(name.data); + if (offset > name.size) + return {name_data, name.size}; + return {name_data + name.size - offset, offset}; + } + + Slice getSliceFromRight(size_t offset, size_t length) const + { + StringRef name = data_type.getNameForValue(data[row_num]); + const UInt8 * name_data = reinterpret_cast(name.data); + if (offset > name.size) + return {name_data, length + name.size > offset ? std::min(name.size, length + name.size - offset) : 0}; + return {name_data + name.size - offset, std::min(length, offset)}; + } +}; + /// Differs to StringSource by having 'offset' and 'length' in code points instead of bytes in getSlice* methods. /** NOTE: The behaviour of substring and substringUTF8 is inconsistent when negative offset is greater than string size: diff --git a/src/Functions/substring.cpp b/src/Functions/substring.cpp index 7678692f612..f42452c9d99 100644 --- a/src/Functions/substring.cpp +++ b/src/Functions/substring.cpp @@ -1,4 +1,5 @@ #include +#include #include #include #include @@ -20,10 +21,10 @@ using namespace GatherUtils; namespace ErrorCodes { - extern const int ILLEGAL_COLUMN; - extern const int ILLEGAL_TYPE_OF_ARGUMENT; - extern const int ZERO_ARRAY_OR_TUPLE_INDEX; - extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; +extern const int ILLEGAL_COLUMN; +extern const int ILLEGAL_TYPE_OF_ARGUMENT; +extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; +extern const int ZERO_ARRAY_OR_TUPLE_INDEX; } namespace @@ -61,7 +62,7 @@ public: throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Number of arguments for function {} doesn't match: " "passed {}, should be 2 or 3", getName(), number_of_arguments); - if ((is_utf8 && !isString(arguments[0])) || !isStringOrFixedString(arguments[0])) + if ((is_utf8 && !isString(arguments[0])) || (!isStringOrFixedString(arguments[0]) && !isEnum(arguments[0]))) throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument of function {}", arguments[0]->getName(), getName()); @@ -124,7 +125,7 @@ public: { size_t number_of_arguments = arguments.size(); - ColumnPtr column_string = arguments[0].column; + ColumnPtr column_arg0 = arguments[0].column; ColumnPtr column_start = arguments[1].column; ColumnPtr column_length; @@ -147,33 +148,46 @@ public: if constexpr (is_utf8) { - if (const ColumnString * col = checkAndGetColumn(column_string.get())) + if (const ColumnString * col = checkAndGetColumn(column_arg0.get())) return executeForSource(column_start, column_length, column_start_const, column_length_const, start_value, length_value, UTF8StringSource(*col), input_rows_count); - else if (const ColumnConst * col_const = checkAndGetColumnConst(column_string.get())) + if (const ColumnConst * col_const = checkAndGetColumnConst(column_arg0.get())) return executeForSource(column_start, column_length, column_start_const, column_length_const, start_value, length_value, ConstSource(*col_const), input_rows_count); - else - throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}", - arguments[0].column->getName(), getName()); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}", + arguments[0].column->getName(), getName()); } else { - if (const ColumnString * col = checkAndGetColumn(column_string.get())) + if (const ColumnString * col = checkAndGetColumn(column_arg0.get())) return executeForSource(column_start, column_length, column_start_const, column_length_const, start_value, - length_value, StringSource(*col), input_rows_count); - else if (const ColumnFixedString * col_fixed = checkAndGetColumn(column_string.get())) + length_value, StringSource(*col), input_rows_count); + if (const ColumnFixedString * col_fixed = checkAndGetColumn(column_arg0.get())) return executeForSource(column_start, column_length, column_start_const, column_length_const, start_value, - length_value, FixedStringSource(*col_fixed), input_rows_count); - else if (const ColumnConst * col_const = checkAndGetColumnConst(column_string.get())) + length_value, FixedStringSource(*col_fixed), input_rows_count); + if (const ColumnConst * col_const = checkAndGetColumnConst(column_arg0.get())) return executeForSource(column_start, column_length, column_start_const, column_length_const, start_value, - length_value, ConstSource(*col_const), input_rows_count); - else if (const ColumnConst * col_const_fixed = checkAndGetColumnConst(column_string.get())) + length_value, ConstSource(*col_const), input_rows_count); + if (const ColumnConst * col_const_fixed = checkAndGetColumnConst(column_arg0.get())) return executeForSource(column_start, column_length, column_start_const, column_length_const, start_value, - length_value, ConstSource(*col_const_fixed), input_rows_count); - else - throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}", - arguments[0].column->getName(), getName()); + length_value, ConstSource(*col_const_fixed), input_rows_count); + if (isEnum8(arguments[0].type)) + if (const ColumnVector * col_enum8 = checkAndGetColumn>(column_arg0.get())) + { + const auto * enum_type = typeid_cast *>(arguments[0].type.get()); + return executeForSource(column_start, column_length, column_start_const, column_length_const, start_value, + length_value, EnumSource(*col_enum8, *enum_type), input_rows_count); + } + if (isEnum16(arguments[0].type)) + if (const ColumnVector * col_enum16 = checkAndGetColumn>(column_arg0.get())) + { + const auto * enum_type = typeid_cast *>(arguments[0].type.get()); + return executeForSource(column_start, column_length, column_start_const, column_length_const, start_value, + length_value, EnumSource(*col_enum16, *enum_type), input_rows_count); + } + + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}", + arguments[0].column->getName(), getName()); } } }; diff --git a/tests/queries/0_stateless/00493_substring_of_enum.reference b/tests/queries/0_stateless/00493_substring_of_enum.reference new file mode 100644 index 00000000000..427d9c9eafb --- /dev/null +++ b/tests/queries/0_stateless/00493_substring_of_enum.reference @@ -0,0 +1,148 @@ +-- Positive offsets (slice from left) +Offset: 1 Length: 1 hello shark h s +Offset: 1 Length: 2 hello shark he sh +Offset: 1 Length: 3 hello shark hel sha +Offset: 1 Length: 4 hello shark hell shar +Offset: 1 Length: 5 hello shark hello shark +Offset: 1 Length: 6 hello shark hello shark +Offset: 2 Length: 1 ello hark e h +Offset: 2 Length: 2 ello hark el ha +Offset: 2 Length: 3 ello hark ell har +Offset: 2 Length: 4 ello hark ello hark +Offset: 2 Length: 5 ello hark ello hark +Offset: 2 Length: 6 ello hark ello hark +Offset: 3 Length: 1 llo ark l a +Offset: 3 Length: 2 llo ark ll ar +Offset: 3 Length: 3 llo ark llo ark +Offset: 3 Length: 4 llo ark llo ark +Offset: 3 Length: 5 llo ark llo ark +Offset: 3 Length: 6 llo ark llo ark +Offset: 4 Length: 1 lo rk l r +Offset: 4 Length: 2 lo rk lo rk +Offset: 4 Length: 3 lo rk lo rk +Offset: 4 Length: 4 lo rk lo rk +Offset: 4 Length: 5 lo rk lo rk +Offset: 4 Length: 6 lo rk lo rk +Offset: 5 Length: 1 o k o k +Offset: 5 Length: 2 o k o k +Offset: 5 Length: 3 o k o k +Offset: 5 Length: 4 o k o k +Offset: 5 Length: 5 o k o k +Offset: 5 Length: 6 o k o k +Offset: 6 Length: 1 +Offset: 6 Length: 2 +Offset: 6 Length: 3 +Offset: 6 Length: 4 +Offset: 6 Length: 5 +Offset: 6 Length: 6 +Offset: 1 Length: 1 world eagle w e +Offset: 1 Length: 2 world eagle wo ea +Offset: 1 Length: 3 world eagle wor eag +Offset: 1 Length: 4 world eagle worl eagl +Offset: 1 Length: 5 world eagle world eagle +Offset: 1 Length: 6 world eagle world eagle +Offset: 2 Length: 1 orld agle o a +Offset: 2 Length: 2 orld agle or ag +Offset: 2 Length: 3 orld agle orl agl +Offset: 2 Length: 4 orld agle orld agle +Offset: 2 Length: 5 orld agle orld agle +Offset: 2 Length: 6 orld agle orld agle +Offset: 3 Length: 1 rld gle r g +Offset: 3 Length: 2 rld gle rl gl +Offset: 3 Length: 3 rld gle rld gle +Offset: 3 Length: 4 rld gle rld gle +Offset: 3 Length: 5 rld gle rld gle +Offset: 3 Length: 6 rld gle rld gle +Offset: 4 Length: 1 ld le l l +Offset: 4 Length: 2 ld le ld le +Offset: 4 Length: 3 ld le ld le +Offset: 4 Length: 4 ld le ld le +Offset: 4 Length: 5 ld le ld le +Offset: 4 Length: 6 ld le ld le +Offset: 5 Length: 1 d e d e +Offset: 5 Length: 2 d e d e +Offset: 5 Length: 3 d e d e +Offset: 5 Length: 4 d e d e +Offset: 5 Length: 5 d e d e +Offset: 5 Length: 6 d e d e +Offset: 6 Length: 1 +Offset: 6 Length: 2 +Offset: 6 Length: 3 +Offset: 6 Length: 4 +Offset: 6 Length: 5 +Offset: 6 Length: 6 +-- Negative offsets (slice from right) +Offset: -1 Length: 1 o k o k +Offset: -1 Length: 2 o k o k +Offset: -1 Length: 3 o k o k +Offset: -1 Length: 4 o k o k +Offset: -1 Length: 5 o k o k +Offset: -1 Length: 6 o k o k +Offset: -2 Length: 1 lo rk l r +Offset: -2 Length: 2 lo rk lo rk +Offset: -2 Length: 3 lo rk lo rk +Offset: -2 Length: 4 lo rk lo rk +Offset: -2 Length: 5 lo rk lo rk +Offset: -2 Length: 6 lo rk lo rk +Offset: -3 Length: 1 llo ark l a +Offset: -3 Length: 2 llo ark ll ar +Offset: -3 Length: 3 llo ark llo ark +Offset: -3 Length: 4 llo ark llo ark +Offset: -3 Length: 5 llo ark llo ark +Offset: -3 Length: 6 llo ark llo ark +Offset: -4 Length: 1 ello hark e h +Offset: -4 Length: 2 ello hark el ha +Offset: -4 Length: 3 ello hark ell har +Offset: -4 Length: 4 ello hark ello hark +Offset: -4 Length: 5 ello hark ello hark +Offset: -4 Length: 6 ello hark ello hark +Offset: -5 Length: 1 hello shark h s +Offset: -5 Length: 2 hello shark he sh +Offset: -5 Length: 3 hello shark hel sha +Offset: -5 Length: 4 hello shark hell shar +Offset: -5 Length: 5 hello shark hello shark +Offset: -5 Length: 6 hello shark hello shark +Offset: -6 Length: 1 hello shark +Offset: -6 Length: 2 hello shark h s +Offset: -6 Length: 3 hello shark he sh +Offset: -6 Length: 4 hello shark hel sha +Offset: -6 Length: 5 hello shark hell shar +Offset: -6 Length: 6 hello shark hello shark +Offset: -1 Length: 1 d e d e +Offset: -1 Length: 2 d e d e +Offset: -1 Length: 3 d e d e +Offset: -1 Length: 4 d e d e +Offset: -1 Length: 5 d e d e +Offset: -1 Length: 6 d e d e +Offset: -2 Length: 1 ld le l l +Offset: -2 Length: 2 ld le ld le +Offset: -2 Length: 3 ld le ld le +Offset: -2 Length: 4 ld le ld le +Offset: -2 Length: 5 ld le ld le +Offset: -2 Length: 6 ld le ld le +Offset: -3 Length: 1 rld gle r g +Offset: -3 Length: 2 rld gle rl gl +Offset: -3 Length: 3 rld gle rld gle +Offset: -3 Length: 4 rld gle rld gle +Offset: -3 Length: 5 rld gle rld gle +Offset: -3 Length: 6 rld gle rld gle +Offset: -4 Length: 1 orld agle o a +Offset: -4 Length: 2 orld agle or ag +Offset: -4 Length: 3 orld agle orl agl +Offset: -4 Length: 4 orld agle orld agle +Offset: -4 Length: 5 orld agle orld agle +Offset: -4 Length: 6 orld agle orld agle +Offset: -5 Length: 1 world eagle w e +Offset: -5 Length: 2 world eagle wo ea +Offset: -5 Length: 3 world eagle wor eag +Offset: -5 Length: 4 world eagle worl eagl +Offset: -5 Length: 5 world eagle world eagle +Offset: -5 Length: 6 world eagle world eagle +Offset: -6 Length: 1 world eagle +Offset: -6 Length: 2 world eagle w e +Offset: -6 Length: 3 world eagle wo ea +Offset: -6 Length: 4 world eagle wor eag +Offset: -6 Length: 5 world eagle worl eagl +Offset: -6 Length: 6 world eagle world eagle +-- Constant enums +f fo diff --git a/tests/queries/0_stateless/00493_substring_of_enum.sql b/tests/queries/0_stateless/00493_substring_of_enum.sql new file mode 100644 index 00000000000..39d0014bde9 --- /dev/null +++ b/tests/queries/0_stateless/00493_substring_of_enum.sql @@ -0,0 +1,24 @@ +DROP TABLE IF EXISTS substring_enums_test; +CREATE TABLE substring_enums_test(e8 Enum('hello' = -5, 'world' = 15), e16 Enum('shark' = -999, 'eagle' = 9999)) ENGINE MergeTree ORDER BY tuple(); +INSERT INTO TABLE substring_enums_test VALUES ('hello', 'shark'), ('world', 'eagle'); + +SELECT '-- Positive offsets (slice from left)'; +WITH cte AS (SELECT number + 1 AS n FROM system.numbers LIMIT 6), + permutations AS (SELECT c1.n AS offset, c2.n AS length FROM cte AS c1 CROSS JOIN cte AS c2) +SELECT 'Offset: ', p.offset, 'Length: ', p.length, + substring(e8, p.offset) AS s1, substring(e16, p.offset) AS s2, + substring(e8, p.offset, p.length) AS s3, substring(e16, p.offset, p.length) AS s4 +FROM substring_enums_test LEFT JOIN permutations AS p ON true; + +SELECT '-- Negative offsets (slice from right)'; +WITH cte AS (SELECT number + 1 AS n FROM system.numbers LIMIT 6), + permutations AS (SELECT -c1.n AS offset, c2.n AS length FROM cte AS c1 CROSS JOIN cte AS c2) +SELECT 'Offset: ', p.offset, 'Length: ', p.length, + substring(e8, p.offset) AS s1, substring(e16, p.offset) AS s2, + substring(e8, p.offset, p.length) AS s3, substring(e16, p.offset, p.length) AS s4 +FROM substring_enums_test LEFT JOIN permutations AS p ON true; + +SELECT '-- Constant enums'; +SELECT substring(CAST('foo', 'Enum8(\'foo\' = 1)'), 1, 1), substring(CAST('foo', 'Enum16(\'foo\' = 1111)'), 1, 2); + +DROP TABLE substring_enums_test; From 497e5abc4793a8c486192593fc48c0b67db4d483 Mon Sep 17 00:00:00 2001 From: slvrtrn Date: Tue, 28 Nov 2023 00:09:36 +0100 Subject: [PATCH 02/98] Add a doc entry. --- docs/en/sql-reference/functions/string-functions.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/sql-reference/functions/string-functions.md b/docs/en/sql-reference/functions/string-functions.md index 1940993ce0b..8ecd5af9258 100644 --- a/docs/en/sql-reference/functions/string-functions.md +++ b/docs/en/sql-reference/functions/string-functions.md @@ -579,7 +579,7 @@ A function is called injective if it returns for different arguments different r ## substring(s, offset, length) -Returns a substring with `length` many bytes, starting at the byte at index `offset`. Character indexing starts from 1. +Returns a substring with `length` many bytes, starting at the byte at index `offset`. Character indexing starts from 1. Can be also used with [Enum](../../sql-reference/data-types/enum.md) types. **Syntax** From ff6bdfe8576b632b68700a8b8b220602fbc6b041 Mon Sep 17 00:00:00 2001 From: slvrtrn Date: Tue, 28 Nov 2023 00:43:29 +0100 Subject: [PATCH 03/98] Add more substring with enums tests --- .../queries/0_stateless/00493_substring_of_enum.reference | 7 +++++++ tests/queries/0_stateless/00493_substring_of_enum.sql | 8 ++++++++ 2 files changed, 15 insertions(+) diff --git a/tests/queries/0_stateless/00493_substring_of_enum.reference b/tests/queries/0_stateless/00493_substring_of_enum.reference index 427d9c9eafb..17bc960b318 100644 --- a/tests/queries/0_stateless/00493_substring_of_enum.reference +++ b/tests/queries/0_stateless/00493_substring_of_enum.reference @@ -144,5 +144,12 @@ Offset: -6 Length: 3 world eagle wo ea Offset: -6 Length: 4 world eagle wor eag Offset: -6 Length: 5 world eagle worl eagl Offset: -6 Length: 6 world eagle world eagle +-- Zero offset/length +Offset: 0 Length: 0 +Offset: 0 Length: 1 +Offset: 1 Length: 0 hello shark +Offset: 0 Length: 0 +Offset: 0 Length: 1 +Offset: 1 Length: 0 world eagle -- Constant enums f fo diff --git a/tests/queries/0_stateless/00493_substring_of_enum.sql b/tests/queries/0_stateless/00493_substring_of_enum.sql index 39d0014bde9..ba9fc630490 100644 --- a/tests/queries/0_stateless/00493_substring_of_enum.sql +++ b/tests/queries/0_stateless/00493_substring_of_enum.sql @@ -18,6 +18,14 @@ SELECT 'Offset: ', p.offset, 'Length: ', p.length, substring(e8, p.offset, p.length) AS s3, substring(e16, p.offset, p.length) AS s4 FROM substring_enums_test LEFT JOIN permutations AS p ON true; +SELECT '-- Zero offset/length'; +WITH cte AS (SELECT number AS n FROM system.numbers LIMIT 2), + permutations AS (SELECT c1.n AS offset, c2.n AS length FROM cte AS c1 CROSS JOIN cte AS c2 LIMIT 3) +SELECT 'Offset: ', p.offset, 'Length: ', p.length, + substring(e8, p.offset) AS s1, substring(e16, p.offset) AS s2, + substring(e8, p.offset, p.length) AS s3, substring(e16, p.offset, p.length) AS s4 +FROM substring_enums_test LEFT JOIN permutations AS p ON true; + SELECT '-- Constant enums'; SELECT substring(CAST('foo', 'Enum8(\'foo\' = 1)'), 1, 1), substring(CAST('foo', 'Enum16(\'foo\' = 1111)'), 1, 2); From b56b48d2de0aced8c40c3e00591d3e0c8448812f Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Tue, 28 Nov 2023 14:59:20 +0000 Subject: [PATCH 04/98] Update docs --- .../functions/string-functions.md | 36 ++++++++++++++++--- 1 file changed, 31 insertions(+), 5 deletions(-) diff --git a/docs/en/sql-reference/functions/string-functions.md b/docs/en/sql-reference/functions/string-functions.md index 8ecd5af9258..baf08e18f11 100644 --- a/docs/en/sql-reference/functions/string-functions.md +++ b/docs/en/sql-reference/functions/string-functions.md @@ -577,26 +577,52 @@ Like `concatWithSeparator` but assumes that `concatWithSeparator(sep, expr1, exp A function is called injective if it returns for different arguments different results. In other words: different arguments never produce identical result. -## substring(s, offset, length) +## substring -Returns a substring with `length` many bytes, starting at the byte at index `offset`. Character indexing starts from 1. Can be also used with [Enum](../../sql-reference/data-types/enum.md) types. +Returns the substring of a string `s` which starts at the specified byte index `offset`. Byte counting starts from 1. If `offset` is 0, an empty string is returned. If `offset` is negative, the substring starts `pos` characters from the end of the string, rather than from the beginning. An optional argument `length` specifies the maximum number of bytes the returned substring may have. **Syntax** ```sql -substring(s, offset, length) +substring(s, offset[, length]) ``` Alias: - `substr` - `mid` +**Arguments** + +- `s` — The string to calculate a substring from. [String](../../sql-reference/data-types/string.md), [FixedString](../../sql-reference/data-types/fixedstring.md) or [Enum](../../sql-reference/data-types/enum.md) +- `offset` — The starting position of the substring in `s` . [(U)Int*](../../sql-reference/data-types/int-uint.md). +- `length` — The maximum length of the substring. [(U)Int*](../../sql-reference/data-types/int-uint.md). Optional. + +**Returned value** + +A substring of `s` with `length` many bytes, starting at index `offset`. + +Type: `String`. + +**Example** + +``` sql +SELECT 'database' AS db, substr(db, 5), substr(db, 5, 1) +``` + +Result: + +```result +┌─db───────┬─substring('database', 5)─┬─substring('database', 5, 1)─┐ +│ database │ base │ b │ +└──────────┴──────────────────────────┴─────────────────────────────┘ +``` + ## substringUTF8 Like `substring` but for Unicode code points. Assumes that the string contains valid UTF-8 encoded text. If this assumption is violated, no exception is thrown and the result is undefined. -## substringIndex(s, delim, count) +## substringIndex Returns the substring of `s` before `count` occurrences of the delimiter `delim`, as in Spark or MySQL. @@ -627,7 +653,7 @@ Result: └──────────────────────────────────────────────┘ ``` -## substringIndexUTF8(s, delim, count) +## substringIndexUTF8 Like `substringIndex` but for Unicode code points. Assumes that the string contains valid UTF-8 encoded text. If this assumption is violated, no exception is thrown and the result is undefined. From 157555a45bcd11949a496603b224b5f07883fad8 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Tue, 28 Nov 2023 16:59:12 +0000 Subject: [PATCH 05/98] Some fixups and consistency fixes --- src/DataTypes/IDataType.h | 2 +- src/Functions/GatherUtils/Sources.h | 113 +++++++----- src/Functions/substring.cpp | 161 ++++++++---------- .../0_stateless/00493_substring_of_enum.sql | 14 +- 4 files changed, 150 insertions(+), 140 deletions(-) diff --git a/src/DataTypes/IDataType.h b/src/DataTypes/IDataType.h index 98f7e0cb06f..e287b5879a2 100644 --- a/src/DataTypes/IDataType.h +++ b/src/DataTypes/IDataType.h @@ -446,9 +446,9 @@ template inline bool isFloat(const T & data_type) { return WhichDat template inline bool isNativeNumber(const T & data_type) { return WhichDataType(data_type).isNativeNumber(); } template inline bool isNumber(const T & data_type) { return WhichDataType(data_type).isNumber(); } -template inline bool isEnum(const T & data_type) { return WhichDataType(data_type).isEnum(); } template inline bool isEnum8(const T & data_type) { return WhichDataType(data_type).isEnum8(); } template inline bool isEnum16(const T & data_type) { return WhichDataType(data_type).isEnum16(); } +template inline bool isEnum(const T & data_type) { return WhichDataType(data_type).isEnum(); } template inline bool isDate(const T & data_type) { return WhichDataType(data_type).isDate(); } template inline bool isDate32(const T & data_type) { return WhichDataType(data_type).isDate32(); } diff --git a/src/Functions/GatherUtils/Sources.h b/src/Functions/GatherUtils/Sources.h index a3b5c21b89c..329f71203bf 100644 --- a/src/Functions/GatherUtils/Sources.h +++ b/src/Functions/GatherUtils/Sources.h @@ -58,8 +58,8 @@ struct NumericArraySource : public ArraySourceImpl> } explicit NumericArraySource(const ColumnArray & arr) - : column(typeid_cast(arr.getData())) - , elements(typeid_cast(arr.getData()).getData()), offsets(arr.getOffsets()) + : column(typeid_cast(arr.getData())) + , elements(typeid_cast(arr.getData()).getData()), offsets(arr.getOffsets()) { } @@ -156,17 +156,22 @@ struct ConstSource : public Base size_t row_num = 0; explicit ConstSource(const ColumnConst & col_) - : Base(static_cast(col_.getDataColumn())), total_rows(col_.size()) + : Base(static_cast(col_.getDataColumn())) + , total_rows(col_.size()) { } template - ConstSource(const ColumnType & col_, size_t total_rows_) : Base(col_), total_rows(total_rows_) + ConstSource(const ColumnType & col_, size_t total_rows_) + : Base(col_) + , total_rows(total_rows_) { } template - ConstSource(const ColumnType & col_, const NullMap & null_map_, size_t total_rows_) : Base(col_, null_map_), total_rows(total_rows_) + ConstSource(const ColumnType & col_, const NullMap & null_map_, size_t total_rows_) + : Base(col_, null_map_) + , total_rows(total_rows_) { } @@ -242,7 +247,8 @@ struct StringSource ColumnString::Offset prev_offset = 0; explicit StringSource(const ColumnString & col) - : elements(col.getChars()), offsets(col.getOffsets()) + : elements(col.getChars()) + , offsets(col.getOffsets()) { } @@ -315,76 +321,91 @@ struct StringSource } }; -template +/// Treats Enum values as Strings, modeled after StringSource +template struct EnumSource { - using Column = ColumnVector; + using Column = typename EnumDataType::ColumnType; using Slice = NumericArraySlice; using SinkType = StringSink; - const typename ColumnVector::Container & data; - const DataTypeEnum & data_type; + const typename Column::Container & data; + const EnumDataType & data_type; size_t row_num = 0; - explicit EnumSource(const Column & col, const DataTypeEnum & data_type_) : data(col.getData()), data_type(data_type_) { } + EnumSource(const Column & col, const EnumDataType & data_type_) + : data(col.getData()) + , data_type(data_type_) + { + } - void next() { ++row_num; } + void next() + { + ++row_num; + } - bool isEnd() const { return row_num == data.size(); } + bool isEnd() const + { + return row_num == data.size(); + } - size_t rowNum() const { return row_num; } + size_t rowNum() const + { + return row_num; + } - size_t getSizeForReserve() const { return data.size(); } + size_t getSizeForReserve() const + { + return data.size(); + } size_t getElementSize() const { - StringRef name = data_type.getNameForValue(data[row_num]); - return name.size; + std::string_view name = data_type.getNameForValue(data[row_num]).toView(); + return name.size(); } - size_t getColumnSize() const { return data.size(); } + size_t getColumnSize() const + { + return data.size(); + } Slice getWhole() const { - StringRef name = data_type.getNameForValue(data[row_num]); - const UInt8 * name_data = reinterpret_cast(name.data); - return {name_data, name.size}; + std::string_view name = data_type.getNameForValue(data[row_num]).toView(); + return {reinterpret_cast(name.data()), name.size()}; } Slice getSliceFromLeft(size_t offset) const { - StringRef name = data_type.getNameForValue(data[row_num]); - if (offset >= name.size) - return {nullptr, 0}; - const UInt8 * name_data = reinterpret_cast(name.data); - return {name_data + offset, name.size - offset}; + std::string_view name = data_type.getNameForValue(data[row_num]).toView(); + if (offset >= name.size()) + return {reinterpret_cast(name.data()), 0}; + return {reinterpret_cast(name.data()) + offset, name.size() - offset}; } Slice getSliceFromLeft(size_t offset, size_t length) const { - StringRef name = data_type.getNameForValue(data[row_num]); - if (offset >= name.size) - return {nullptr, 0}; - const UInt8 * name_data = reinterpret_cast(name.data); - return {name_data + offset, std::min(length, name.size - offset)}; + std::string_view name = data_type.getNameForValue(data[row_num]).toView(); + if (offset >= name.size()) + return {reinterpret_cast(name.data()), 0}; + return {reinterpret_cast(name.data()) + offset, std::min(length, name.size() - offset)}; } Slice getSliceFromRight(size_t offset) const { - StringRef name = data_type.getNameForValue(data[row_num]); - const UInt8 * name_data = reinterpret_cast(name.data); - if (offset > name.size) - return {name_data, name.size}; - return {name_data + name.size - offset, offset}; + std::string_view name = data_type.getNameForValue(data[row_num]).toView(); + if (offset > name.size()) + return {reinterpret_cast(name.data()), name.size()}; + return {reinterpret_cast(name.data()) + name.size() - offset, offset}; } Slice getSliceFromRight(size_t offset, size_t length) const { - StringRef name = data_type.getNameForValue(data[row_num]); - const UInt8 * name_data = reinterpret_cast(name.data); - if (offset > name.size) - return {name_data, length + name.size > offset ? std::min(name.size, length + name.size - offset) : 0}; - return {name_data + name.size - offset, std::min(length, offset)}; + std::string_view name = data_type.getNameForValue(data[row_num]).toView(); + if (offset > name.size()) + return {reinterpret_cast(name.data()), length + name.size() > offset ? std::min(name.size(), length + name.size() - offset) : 0}; + return {reinterpret_cast(name.data()) + name.size() - offset, std::min(length, offset)}; } }; @@ -494,7 +515,7 @@ struct FixedStringSource size_t column_size = 0; explicit FixedStringSource(const ColumnFixedString & col) - : string_size(col.getN()) + : string_size(col.getN()) { const auto & chars = col.getChars(); pos = chars.data(); @@ -628,7 +649,8 @@ struct GenericArraySource : public ArraySourceImpl } explicit GenericArraySource(const ColumnArray & arr) - : elements(arr.getData()), offsets(arr.getOffsets()) + : elements(arr.getData()) + , offsets(arr.getOffsets()) { } @@ -888,7 +910,10 @@ struct NullableValueSource : public ValueSource const NullMap & null_map; template - explicit NullableValueSource(const Column & col, const NullMap & null_map_) : ValueSource(col), null_map(null_map_) {} + NullableValueSource(const Column & col, const NullMap & null_map_) + : ValueSource(col) + , null_map(null_map_) + {} void accept(ValueSourceVisitor & visitor) override { visitor.visit(*this); } diff --git a/src/Functions/substring.cpp b/src/Functions/substring.cpp index f42452c9d99..ac6a24fbc11 100644 --- a/src/Functions/substring.cpp +++ b/src/Functions/substring.cpp @@ -1,16 +1,16 @@ -#include -#include -#include -#include #include +#include +#include +#include +#include #include #include -#include +#include #include -#include #include #include -#include +#include +#include #include @@ -31,40 +31,40 @@ namespace { /// If 'is_utf8' - measure offset and length in code points instead of bytes. -/// UTF8 variant is not available for FixedString arguments. template class FunctionSubstring : public IFunction { public: static constexpr auto name = is_utf8 ? "substringUTF8" : "substring"; - static FunctionPtr create(ContextPtr) - { - return std::make_shared(); - } - - String getName() const override - { - return name; - } + static FunctionPtr create(ContextPtr) { return std::make_shared(); } + String getName() const override { return name; } bool isVariadic() const override { return true; } size_t getNumberOfArguments() const override { return 0; } - bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } - bool useDefaultImplementationForConstants() const override { return true; } DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override { - size_t number_of_arguments = arguments.size(); + const size_t number_of_arguments = arguments.size(); if (number_of_arguments < 2 || number_of_arguments > 3) throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Number of arguments for function {} doesn't match: " "passed {}, should be 2 or 3", getName(), number_of_arguments); - if ((is_utf8 && !isString(arguments[0])) || (!isStringOrFixedString(arguments[0]) && !isEnum(arguments[0]))) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument of function {}", - arguments[0]->getName(), getName()); + if constexpr (is_utf8) + { + /// UTF8 variant is not available for FixedString and Enum arguments. + if (!isString(arguments[0])) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of first argument of function {}", + arguments[0]->getName(), getName()); + } + else + { + if (!isStringOrFixedString(arguments[0]) && !isEnum(arguments[0])) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of first argument of function {}", + arguments[0]->getName(), getName()); + } if (!isNativeNumber(arguments[1])) throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of second argument of function {}", @@ -78,44 +78,40 @@ public: } template - ColumnPtr executeForSource(const ColumnPtr & column_start, const ColumnPtr & column_length, - const ColumnConst * column_start_const, const ColumnConst * column_length_const, - Int64 start_value, Int64 length_value, Source && source, - size_t input_rows_count) const + ColumnPtr executeForSource(const ColumnPtr & column_offset, const ColumnPtr & column_length, + bool column_offset_const, bool column_length_const, + Int64 offset, Int64 length, + Source && source, size_t input_rows_count) const { auto col_res = ColumnString::create(); if (!column_length) { - if (column_start_const) + if (column_offset_const) { - if (start_value > 0) - sliceFromLeftConstantOffsetUnbounded( - source, StringSink(*col_res, input_rows_count), static_cast(start_value - 1)); - else if (start_value < 0) - sliceFromRightConstantOffsetUnbounded( - source, StringSink(*col_res, input_rows_count), -static_cast(start_value)); + if (offset > 0) + sliceFromLeftConstantOffsetUnbounded(source, StringSink(*col_res, input_rows_count), static_cast(offset - 1)); + else if (offset < 0) + sliceFromRightConstantOffsetUnbounded(source, StringSink(*col_res, input_rows_count), -static_cast(offset)); else throw Exception(ErrorCodes::ZERO_ARRAY_OR_TUPLE_INDEX, "Indices in strings are 1-based"); } else - sliceDynamicOffsetUnbounded(source, StringSink(*col_res, input_rows_count), *column_start); + sliceDynamicOffsetUnbounded(source, StringSink(*col_res, input_rows_count), *column_offset); } else { - if (column_start_const && column_length_const) + if (column_offset_const && column_length_const) { - if (start_value > 0) - sliceFromLeftConstantOffsetBounded( - source, StringSink(*col_res, input_rows_count), static_cast(start_value - 1), length_value); - else if (start_value < 0) - sliceFromRightConstantOffsetBounded( - source, StringSink(*col_res, input_rows_count), -static_cast(start_value), length_value); + if (offset > 0) + sliceFromLeftConstantOffsetBounded(source, StringSink(*col_res, input_rows_count), static_cast(offset - 1), length); + else if (offset < 0) + sliceFromRightConstantOffsetBounded(source, StringSink(*col_res, input_rows_count), -static_cast(offset), length); else throw Exception(ErrorCodes::ZERO_ARRAY_OR_TUPLE_INDEX, "Indices in strings are 1-based"); } else - sliceDynamicOffsetBounded(source, StringSink(*col_res, input_rows_count), *column_start, *column_length); + sliceDynamicOffsetBounded(source, StringSink(*col_res, input_rows_count), *column_offset, *column_length); } return col_res; @@ -123,71 +119,60 @@ public: ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { - size_t number_of_arguments = arguments.size(); + const size_t number_of_arguments = arguments.size(); - ColumnPtr column_arg0 = arguments[0].column; - ColumnPtr column_start = arguments[1].column; + ColumnPtr column_string = arguments[0].column; + ColumnPtr column_offset = arguments[1].column; ColumnPtr column_length; - if (number_of_arguments == 3) column_length = arguments[2].column; - const ColumnConst * column_start_const = checkAndGetColumn(column_start.get()); + const ColumnConst * column_offset_const = checkAndGetColumn(column_offset.get()); const ColumnConst * column_length_const = nullptr; - if (number_of_arguments == 3) column_length_const = checkAndGetColumn(column_length.get()); - Int64 start_value = 0; - Int64 length_value = 0; + Int64 offset = 0; + Int64 length = 0; - if (column_start_const) - start_value = column_start_const->getInt(0); + if (column_offset_const) + offset = column_offset_const->getInt(0); if (column_length_const) - length_value = column_length_const->getInt(0); + length = column_length_const->getInt(0); if constexpr (is_utf8) { - if (const ColumnString * col = checkAndGetColumn(column_arg0.get())) - return executeForSource(column_start, column_length, column_start_const, column_length_const, start_value, - length_value, UTF8StringSource(*col), input_rows_count); - if (const ColumnConst * col_const = checkAndGetColumnConst(column_arg0.get())) - return executeForSource(column_start, column_length, column_start_const, column_length_const, start_value, - length_value, ConstSource(*col_const), input_rows_count); - throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}", - arguments[0].column->getName(), getName()); + if (const ColumnString * col = checkAndGetColumn(column_string.get())) + return executeForSource(column_offset, column_length, column_offset_const, column_length_const, offset, length, UTF8StringSource(*col), input_rows_count); + if (const ColumnConst * col_const = checkAndGetColumnConst(column_string.get())) + return executeForSource(column_offset, column_length, column_offset_const, column_length_const, offset, length, ConstSource(*col_const), input_rows_count); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}", arguments[0].column->getName(), getName()); } else { - if (const ColumnString * col = checkAndGetColumn(column_arg0.get())) - return executeForSource(column_start, column_length, column_start_const, column_length_const, start_value, - length_value, StringSource(*col), input_rows_count); - if (const ColumnFixedString * col_fixed = checkAndGetColumn(column_arg0.get())) - return executeForSource(column_start, column_length, column_start_const, column_length_const, start_value, - length_value, FixedStringSource(*col_fixed), input_rows_count); - if (const ColumnConst * col_const = checkAndGetColumnConst(column_arg0.get())) - return executeForSource(column_start, column_length, column_start_const, column_length_const, start_value, - length_value, ConstSource(*col_const), input_rows_count); - if (const ColumnConst * col_const_fixed = checkAndGetColumnConst(column_arg0.get())) - return executeForSource(column_start, column_length, column_start_const, column_length_const, start_value, - length_value, ConstSource(*col_const_fixed), input_rows_count); - if (isEnum8(arguments[0].type)) - if (const ColumnVector * col_enum8 = checkAndGetColumn>(column_arg0.get())) + if (const ColumnString * col = checkAndGetColumn(column_string.get())) + return executeForSource(column_offset, column_length, column_offset_const, column_length_const, offset, length, StringSource(*col), input_rows_count); + if (const ColumnFixedString * col_fixed = checkAndGetColumn(column_string.get())) + return executeForSource(column_offset, column_length, column_offset_const, column_length_const, offset, length, FixedStringSource(*col_fixed), input_rows_count); + if (const ColumnConst * col_const = checkAndGetColumnConst(column_string.get())) + return executeForSource(column_offset, column_length, column_offset_const, column_length_const, offset, length, ConstSource(*col_const), input_rows_count); + if (const ColumnConst * col_const_fixed = checkAndGetColumnConst(column_string.get())) + return executeForSource(column_offset, column_length, column_offset_const, column_length_const, offset, length, ConstSource(*col_const_fixed), input_rows_count); + if (isEnum(arguments[0].type)) + { + if (const typename DataTypeEnum8::ColumnType * col_enum8 = checkAndGetColumn(column_string.get())) { - const auto * enum_type = typeid_cast *>(arguments[0].type.get()); - return executeForSource(column_start, column_length, column_start_const, column_length_const, start_value, - length_value, EnumSource(*col_enum8, *enum_type), input_rows_count); + const auto * type_enum8 = assert_cast(arguments[0].type.get()); + return executeForSource(column_offset, column_length, column_offset_const, column_length_const, offset, length, EnumSource(*col_enum8, *type_enum8), input_rows_count); } - if (isEnum16(arguments[0].type)) - if (const ColumnVector * col_enum16 = checkAndGetColumn>(column_arg0.get())) + if (const typename DataTypeEnum16::ColumnType * col_enum16 = checkAndGetColumn(column_string.get())) { - const auto * enum_type = typeid_cast *>(arguments[0].type.get()); - return executeForSource(column_start, column_length, column_start_const, column_length_const, start_value, - length_value, EnumSource(*col_enum16, *enum_type), input_rows_count); + const auto * type_enum16 = assert_cast(arguments[0].type.get()); + return executeForSource(column_offset, column_length, column_offset_const, column_length_const, offset, length, EnumSource(*col_enum16, *type_enum16), input_rows_count); } + } - throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}", - arguments[0].column->getName(), getName()); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}", arguments[0].column->getName(), getName()); } } }; @@ -197,8 +182,8 @@ public: REGISTER_FUNCTION(Substring) { factory.registerFunction>({}, FunctionFactory::CaseInsensitive); - factory.registerAlias("substr", "substring", FunctionFactory::CaseInsensitive); - factory.registerAlias("mid", "substring", FunctionFactory::CaseInsensitive); /// from MySQL dialect + factory.registerAlias("substr", "substring", FunctionFactory::CaseInsensitive); // MySQL alias + factory.registerAlias("mid", "substring", FunctionFactory::CaseInsensitive); /// MySQL alias factory.registerFunction>({}, FunctionFactory::CaseSensitive); } diff --git a/tests/queries/0_stateless/00493_substring_of_enum.sql b/tests/queries/0_stateless/00493_substring_of_enum.sql index ba9fc630490..cdda76e88a7 100644 --- a/tests/queries/0_stateless/00493_substring_of_enum.sql +++ b/tests/queries/0_stateless/00493_substring_of_enum.sql @@ -1,6 +1,6 @@ -DROP TABLE IF EXISTS substring_enums_test; -CREATE TABLE substring_enums_test(e8 Enum('hello' = -5, 'world' = 15), e16 Enum('shark' = -999, 'eagle' = 9999)) ENGINE MergeTree ORDER BY tuple(); -INSERT INTO TABLE substring_enums_test VALUES ('hello', 'shark'), ('world', 'eagle'); +DROP TABLE IF EXISTS tab; +CREATE TABLE tab(e8 Enum8('hello' = -5, 'world' = 15), e16 Enum16('shark' = -999, 'eagle' = 9999)) ENGINE MergeTree ORDER BY tuple(); +INSERT INTO TABLE tab VALUES ('hello', 'shark'), ('world', 'eagle'); SELECT '-- Positive offsets (slice from left)'; WITH cte AS (SELECT number + 1 AS n FROM system.numbers LIMIT 6), @@ -8,7 +8,7 @@ WITH cte AS (SELECT number + 1 AS n FROM system.numbers LIMIT 6), SELECT 'Offset: ', p.offset, 'Length: ', p.length, substring(e8, p.offset) AS s1, substring(e16, p.offset) AS s2, substring(e8, p.offset, p.length) AS s3, substring(e16, p.offset, p.length) AS s4 -FROM substring_enums_test LEFT JOIN permutations AS p ON true; +FROM tab LEFT JOIN permutations AS p ON true; SELECT '-- Negative offsets (slice from right)'; WITH cte AS (SELECT number + 1 AS n FROM system.numbers LIMIT 6), @@ -16,7 +16,7 @@ WITH cte AS (SELECT number + 1 AS n FROM system.numbers LIMIT 6), SELECT 'Offset: ', p.offset, 'Length: ', p.length, substring(e8, p.offset) AS s1, substring(e16, p.offset) AS s2, substring(e8, p.offset, p.length) AS s3, substring(e16, p.offset, p.length) AS s4 -FROM substring_enums_test LEFT JOIN permutations AS p ON true; +FROM tab LEFT JOIN permutations AS p ON true; SELECT '-- Zero offset/length'; WITH cte AS (SELECT number AS n FROM system.numbers LIMIT 2), @@ -24,9 +24,9 @@ WITH cte AS (SELECT number AS n FROM system.numbers LIMIT 2), SELECT 'Offset: ', p.offset, 'Length: ', p.length, substring(e8, p.offset) AS s1, substring(e16, p.offset) AS s2, substring(e8, p.offset, p.length) AS s3, substring(e16, p.offset, p.length) AS s4 -FROM substring_enums_test LEFT JOIN permutations AS p ON true; +FROM tab LEFT JOIN permutations AS p ON true; SELECT '-- Constant enums'; SELECT substring(CAST('foo', 'Enum8(\'foo\' = 1)'), 1, 1), substring(CAST('foo', 'Enum16(\'foo\' = 1111)'), 1, 2); -DROP TABLE substring_enums_test; +DROP TABLE tab; From 1751524dafc04f5bca14b8ea9434fd870ab6cc57 Mon Sep 17 00:00:00 2001 From: slvrtrn Date: Fri, 1 Dec 2023 03:34:54 +0100 Subject: [PATCH 06/98] Fix style --- src/Functions/GatherUtils/Sources.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/Functions/GatherUtils/Sources.h b/src/Functions/GatherUtils/Sources.h index 329f71203bf..222f9f19168 100644 --- a/src/Functions/GatherUtils/Sources.h +++ b/src/Functions/GatherUtils/Sources.h @@ -323,7 +323,8 @@ struct StringSource /// Treats Enum values as Strings, modeled after StringSource template -struct EnumSource { +struct EnumSource +{ using Column = typename EnumDataType::ColumnType; using Slice = NumericArraySlice; @@ -371,7 +372,8 @@ struct EnumSource { return data.size(); } - Slice getWhole() const { + Slice getWhole() const + { std::string_view name = data_type.getNameForValue(data[row_num]).toView(); return {reinterpret_cast(name.data()), name.size()}; } From a90458eb65f3743f262df1241915d3046b163a26 Mon Sep 17 00:00:00 2001 From: Ryan Jacobs Date: Thu, 30 Nov 2023 23:46:42 -0800 Subject: [PATCH 07/98] Fix several issues regarding PostgreSQL `array_ndims` usage. 1. Properly quote column identifiers. 2. Handle empty tables that have array columns. 3. Throw a more user friendly error when column value = NULL when calling array_dims(column) 4. Handle column value being the empty array {} --- .../fetchPostgreSQLTableStructure.cpp | 48 +++++++++++++++++-- 1 file changed, 44 insertions(+), 4 deletions(-) diff --git a/src/Databases/PostgreSQL/fetchPostgreSQLTableStructure.cpp b/src/Databases/PostgreSQL/fetchPostgreSQLTableStructure.cpp index dec3f1ffe5a..9890936007f 100644 --- a/src/Databases/PostgreSQL/fetchPostgreSQLTableStructure.cpp +++ b/src/Databases/PostgreSQL/fetchPostgreSQLTableStructure.cpp @@ -158,6 +158,17 @@ static DataTypePtr convertPostgreSQLDataType(String & type, Fn auto && r return res; } +/// Check if PostgreSQL relation is empty. +/// postgres_table must be already quoted + schema-qualified. +template +bool isTableEmpty(T &tx, const String & postgres_table) { + auto query = fmt::format( + "SELECT NOT EXISTS (SELECT * FROM {} LIMIT 1);", + postgres_table + ); + pqxx::result result{tx.exec(query)}; + return result[0][0].as(); +} template PostgreSQLTableStructure::ColumnsInfoPtr readNamesAndTypesList( @@ -213,10 +224,39 @@ PostgreSQLTableStructure::ColumnsInfoPtr readNamesAndTypesList( { const auto & name_and_type = columns[i]; - /// All rows must contain the same number of dimensions, so limit 1 is ok. If number of dimensions in all rows is not the same - - /// such arrays are not able to be used as ClickHouse Array at all. - pqxx::result result{tx.exec(fmt::format("SELECT array_ndims({}) FROM {} LIMIT 1", name_and_type.name, postgres_table))}; - auto dimensions = result[0][0].as(); + /// NOTE: If the relation is empty, then array_ndims returns NULL. + /// If this is the case, then assume dimensions=1. This covers most + /// use cases, but will be incorrect for empty tables with + /// multi-dimension arrays. The other solutions would be to drop + /// support for empty tables OR attempt fallback to a discovered + /// array_ndims CHECK constraint. + int dimensions; + if (isTableEmpty(tx, postgres_table)) { + dimensions = 1; + } else { + /// All rows must contain the same number of dimensions. + /// 1 is ok. If number of dimensions in all rows is not the same - + /// such arrays are not able to be used as ClickHouse Array at all. + /// + /// Assume dimensions=1 for empty arrays. + auto postgres_column = doubleQuoteString(name_and_type.name); + pqxx::result result{tx.exec(fmt::format( + "SELECT {} IS NULL, COALESCE(array_ndims({}), 1) " + "FROM {} LIMIT 1;", + postgres_column, postgres_column, postgres_table + ))}; + + /// Nullable(Array) is not supported. + auto is_null = result[0][0].as(); + if (is_null) { + throw Exception( + ErrorCodes::BAD_ARGUMENTS, + "PostgreSQL array cannot be NULL. Column: {}", postgres_column + ); + } + + dimensions = result[0][1].as(); + } /// It is always 1d array if it is in recheck. DataTypePtr type = assert_cast(name_and_type.type.get())->getNestedType(); From 17aacda9e53bfa9359ad808c85b81bc299c86694 Mon Sep 17 00:00:00 2001 From: Ryan Jacobs Date: Sat, 2 Dec 2023 06:44:34 -0800 Subject: [PATCH 08/98] fix: format code for stylecheck --- .../fetchPostgreSQLTableStructure.cpp | 60 +++++++++---------- 1 file changed, 29 insertions(+), 31 deletions(-) diff --git a/src/Databases/PostgreSQL/fetchPostgreSQLTableStructure.cpp b/src/Databases/PostgreSQL/fetchPostgreSQLTableStructure.cpp index 9890936007f..0a35bc8c2b5 100644 --- a/src/Databases/PostgreSQL/fetchPostgreSQLTableStructure.cpp +++ b/src/Databases/PostgreSQL/fetchPostgreSQLTableStructure.cpp @@ -160,14 +160,12 @@ static DataTypePtr convertPostgreSQLDataType(String & type, Fn auto && r /// Check if PostgreSQL relation is empty. /// postgres_table must be already quoted + schema-qualified. -template -bool isTableEmpty(T &tx, const String & postgres_table) { - auto query = fmt::format( - "SELECT NOT EXISTS (SELECT * FROM {} LIMIT 1);", - postgres_table - ); - pqxx::result result{tx.exec(query)}; - return result[0][0].as(); +template +bool isTableEmpty(T & tx, const String & postgres_table) +{ + auto query = fmt::format("SELECT NOT EXISTS (SELECT * FROM {} LIMIT 1);", postgres_table); + pqxx::result result{tx.exec(query)}; + return result[0][0].as(); } template @@ -231,31 +229,31 @@ PostgreSQLTableStructure::ColumnsInfoPtr readNamesAndTypesList( /// support for empty tables OR attempt fallback to a discovered /// array_ndims CHECK constraint. int dimensions; - if (isTableEmpty(tx, postgres_table)) { - dimensions = 1; - } else { - /// All rows must contain the same number of dimensions. - /// 1 is ok. If number of dimensions in all rows is not the same - - /// such arrays are not able to be used as ClickHouse Array at all. - /// - /// Assume dimensions=1 for empty arrays. - auto postgres_column = doubleQuoteString(name_and_type.name); - pqxx::result result{tx.exec(fmt::format( - "SELECT {} IS NULL, COALESCE(array_ndims({}), 1) " - "FROM {} LIMIT 1;", - postgres_column, postgres_column, postgres_table - ))}; + if (isTableEmpty(tx, postgres_table)) + { + dimensions = 1; + } + else + { + /// All rows must contain the same number of dimensions. + /// 1 is ok. If number of dimensions in all rows is not the same - + /// such arrays are not able to be used as ClickHouse Array at all. + /// + /// Assume dimensions=1 for empty arrays. + auto postgres_column = doubleQuoteString(name_and_type.name); + pqxx::result result{tx.exec(fmt::format( + "SELECT {} IS NULL, COALESCE(array_ndims({}), 1) " + "FROM {} LIMIT 1;", + postgres_column, + postgres_column, + postgres_table))}; - /// Nullable(Array) is not supported. - auto is_null = result[0][0].as(); - if (is_null) { - throw Exception( - ErrorCodes::BAD_ARGUMENTS, - "PostgreSQL array cannot be NULL. Column: {}", postgres_column - ); - } + /// Nullable(Array) is not supported. + auto is_null = result[0][0].as(); + if (is_null) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "PostgreSQL array cannot be NULL. Column: {}", postgres_column); - dimensions = result[0][1].as(); + dimensions = result[0][1].as(); } /// It is always 1d array if it is in recheck. From b57340bde214855c9e04f77483bdf571d122d822 Mon Sep 17 00:00:00 2001 From: Ryan Jacobs Date: Wed, 6 Dec 2023 11:54:45 -0800 Subject: [PATCH 09/98] postgresql integration: Throw errors instead of assuming array_ndim == 1 --- .../fetchPostgreSQLTableStructure.cpp | 60 ++++++++++--------- 1 file changed, 31 insertions(+), 29 deletions(-) diff --git a/src/Databases/PostgreSQL/fetchPostgreSQLTableStructure.cpp b/src/Databases/PostgreSQL/fetchPostgreSQLTableStructure.cpp index 0a35bc8c2b5..6c25514418e 100644 --- a/src/Databases/PostgreSQL/fetchPostgreSQLTableStructure.cpp +++ b/src/Databases/PostgreSQL/fetchPostgreSQLTableStructure.cpp @@ -222,40 +222,42 @@ PostgreSQLTableStructure::ColumnsInfoPtr readNamesAndTypesList( { const auto & name_and_type = columns[i]; - /// NOTE: If the relation is empty, then array_ndims returns NULL. - /// If this is the case, then assume dimensions=1. This covers most - /// use cases, but will be incorrect for empty tables with - /// multi-dimension arrays. The other solutions would be to drop - /// support for empty tables OR attempt fallback to a discovered - /// array_ndims CHECK constraint. - int dimensions; + /// If the relation is empty, then array_ndims returns NULL. + /// ClickHouse cannot support this use case. if (isTableEmpty(tx, postgres_table)) - { - dimensions = 1; - } - else - { - /// All rows must contain the same number of dimensions. - /// 1 is ok. If number of dimensions in all rows is not the same - - /// such arrays are not able to be used as ClickHouse Array at all. - /// - /// Assume dimensions=1 for empty arrays. - auto postgres_column = doubleQuoteString(name_and_type.name); - pqxx::result result{tx.exec(fmt::format( - "SELECT {} IS NULL, COALESCE(array_ndims({}), 1) " - "FROM {} LIMIT 1;", - postgres_column, - postgres_column, - postgres_table))}; + throw Exception(ErrorCodes::BAD_ARGUMENTS, "PostgreSQL relation containing arrays cannot be empty: {}", postgres_table); - /// Nullable(Array) is not supported. - auto is_null = result[0][0].as(); - if (is_null) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "PostgreSQL array cannot be NULL. Column: {}", postgres_column); + /// All rows must contain the same number of dimensions. + /// 1 is ok. If number of dimensions in all rows is not the same - + /// such arrays are not able to be used as ClickHouse Array at all. + /// + /// For empty arrays, array_ndims([]) will return NULL. + auto postgres_column = doubleQuoteString(name_and_type.name); + pqxx::result result{tx.exec(fmt::format( + "SELECT {} IS NULL, array_ndims({}) " + "FROM {} LIMIT 1;", + postgres_column, + postgres_column, + postgres_table))}; - dimensions = result[0][1].as(); + /// Nullable(Array) is not supported. + auto is_null_array = result[0][0].as(); + if (is_null_array) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "PostgreSQL array cannot be NULL: {}.{}", postgres_table, postgres_column); + + /// Cannot infer dimension of empty arrays. + auto is_empty_array = result[0][1].is_null(); + if (is_empty_array) + { + throw Exception( + ErrorCodes::BAD_ARGUMENTS, + "PostgreSQL cannot infer dimensions of an empty array: {}.{}", + postgres_table, + postgres_column); } + int dimensions = result[0][1].as(); + /// It is always 1d array if it is in recheck. DataTypePtr type = assert_cast(name_and_type.type.get())->getNestedType(); while (dimensions--) From 119c2864a07e7ef83a68add87020bbfad869a237 Mon Sep 17 00:00:00 2001 From: Ryan Jacobs Date: Wed, 6 Dec 2023 12:59:28 -0800 Subject: [PATCH 10/98] test_storage_postgresql: mixed-case identifier on array column --- .../test_storage_postgresql/test.py | 20 +++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/tests/integration/test_storage_postgresql/test.py b/tests/integration/test_storage_postgresql/test.py index 11729a5ab18..39896c57b59 100644 --- a/tests/integration/test_storage_postgresql/test.py +++ b/tests/integration/test_storage_postgresql/test.py @@ -90,20 +90,20 @@ def test_postgres_conversions(started_cluster): cursor.execute( """CREATE TABLE test_types ( a smallint, b integer, c bigint, d real, e double precision, f serial, g bigserial, - h timestamp, i date, j decimal(5, 3), k numeric, l boolean)""" + h timestamp, i date, j decimal(5, 3), k numeric, l boolean, "M" integer)""" ) node1.query( """ INSERT INTO TABLE FUNCTION postgresql('postgres1:5432', 'postgres', 'test_types', 'postgres', 'mysecretpassword') VALUES - (-32768, -2147483648, -9223372036854775808, 1.12345, 1.1234567890, 2147483647, 9223372036854775807, '2000-05-12 12:12:12.012345', '2000-05-12', 22.222, 22.222, 1)""" + (-32768, -2147483648, -9223372036854775808, 1.12345, 1.1234567890, 2147483647, 9223372036854775807, '2000-05-12 12:12:12.012345', '2000-05-12', 22.222, 22.222, 1, 42)""" ) result = node1.query( """ - SELECT a, b, c, d, e, f, g, h, i, j, toDecimal128(k, 3), l FROM postgresql('postgres1:5432', 'postgres', 'test_types', 'postgres', 'mysecretpassword')""" + SELECT a, b, c, d, e, f, g, h, i, j, toDecimal128(k, 3), l, "M" FROM postgresql('postgres1:5432', 'postgres', 'test_types', 'postgres', 'mysecretpassword')""" ) assert ( result - == "-32768\t-2147483648\t-9223372036854775808\t1.12345\t1.123456789\t2147483647\t9223372036854775807\t2000-05-12 12:12:12.012345\t2000-05-12\t22.222\t22.222\t1\n" + == "-32768\t-2147483648\t-9223372036854775808\t1.12345\t1.123456789\t2147483647\t9223372036854775807\t2000-05-12 12:12:12.012345\t2000-05-12\t22.222\t22.222\t1\t42\n" ) cursor.execute( @@ -132,7 +132,8 @@ def test_postgres_conversions(started_cluster): i Char(2)[][][][], -- Nullable(String) j Char(2)[], -- Nullable(String) k UUID[], -- Nullable(UUID) - l UUID[][] -- Nullable(UUID) + l UUID[][], -- Nullable(UUID) + "M" integer[] NOT NULL -- Int32 (mixed-case identifier) )""" ) @@ -152,7 +153,8 @@ def test_postgres_conversions(started_cluster): "i\tArray(Array(Array(Array(Nullable(String)))))\t\t\t\t\t\n" "j\tArray(Nullable(String))\t\t\t\t\t\n" "k\tArray(Nullable(UUID))\t\t\t\t\t\n" - "l\tArray(Array(Nullable(UUID)))" + "l\tArray(Array(Nullable(UUID)))\t\t\t\t\t\n" + "M\tArray(Int32)" "" ) assert result.rstrip() == expected @@ -171,7 +173,8 @@ def test_postgres_conversions(started_cluster): "[[[[NULL]]]], " "[], " "['2a0c0bfc-4fec-4e32-ae3a-7fc8eea6626a', '42209d53-d641-4d73-a8b6-c038db1e75d6', NULL], " - "[[NULL, '42209d53-d641-4d73-a8b6-c038db1e75d6'], ['2a0c0bfc-4fec-4e32-ae3a-7fc8eea6626a', NULL], [NULL, NULL]]" + "[[NULL, '42209d53-d641-4d73-a8b6-c038db1e75d6'], ['2a0c0bfc-4fec-4e32-ae3a-7fc8eea6626a', NULL], [NULL, NULL]]," + "[42, 42, 42]" ")" ) @@ -191,7 +194,8 @@ def test_postgres_conversions(started_cluster): "[[[[NULL]]]]\t" "[]\t" "['2a0c0bfc-4fec-4e32-ae3a-7fc8eea6626a','42209d53-d641-4d73-a8b6-c038db1e75d6',NULL]\t" - "[[NULL,'42209d53-d641-4d73-a8b6-c038db1e75d6'],['2a0c0bfc-4fec-4e32-ae3a-7fc8eea6626a',NULL],[NULL,NULL]]\n" + "[[NULL,'42209d53-d641-4d73-a8b6-c038db1e75d6'],['2a0c0bfc-4fec-4e32-ae3a-7fc8eea6626a',NULL],[NULL,NULL]]\t" + "[42,42,42]\n" ) assert result == expected From 7d4142693513b0cccfedc0d1398e849e53f36107 Mon Sep 17 00:00:00 2001 From: Ryan Jacobs Date: Wed, 6 Dec 2023 18:25:45 -0800 Subject: [PATCH 11/98] test_storage_postgresql: Add test_postgres_array_ndim_error_messges() Tests: 1. View with array column cannot be empty --> error message 2. View cannot have empty array --> error message 3. View cannot have NULL array value --> error message 4. Ensures PG identifiers that require quoting do not crash ClickHouse. These apply to views that contain arrays. --- .../test_storage_postgresql/test.py | 47 +++++++++++++++++++ 1 file changed, 47 insertions(+) diff --git a/tests/integration/test_storage_postgresql/test.py b/tests/integration/test_storage_postgresql/test.py index 39896c57b59..8e1be600687 100644 --- a/tests/integration/test_storage_postgresql/test.py +++ b/tests/integration/test_storage_postgresql/test.py @@ -202,6 +202,53 @@ def test_postgres_conversions(started_cluster): cursor.execute(f"DROP TABLE test_types") cursor.execute(f"DROP TABLE test_array_dimensions") +def test_postgres_array_ndim_error_messges(started_cluster): + cursor = started_cluster.postgres_conn.cursor() + + # cleanup + cursor.execute('DROP VIEW IF EXISTS array_ndim_view;') + cursor.execute('DROP TABLE IF EXISTS array_ndim_table;') + + # setup + cursor.execute('CREATE TABLE array_ndim_table (x INTEGER, "Mixed-case with spaces" INTEGER[]);') + cursor.execute('CREATE VIEW array_ndim_view AS SELECT * FROM array_ndim_table;') + describe_table = """ + DESCRIBE TABLE postgresql( + 'postgres1:5432', 'postgres', 'array_ndim_view', + 'postgres', 'mysecretpassword' + ) + """ + + # View with array column cannot be empty. Should throw a useful error message. + # (Cannot infer array dimension.) + try: + node1.query(describe_table) + assert False + except Exception as error: + assert ('PostgreSQL relation containing arrays cannot be empty: array_ndim_view' in str(error)) + + # View cannot have empty array. Should throw useful error message. + # (Cannot infer array dimension.) + cursor.execute('TRUNCATE array_ndim_table;') + cursor.execute("INSERT INTO array_ndim_table VALUES (1234, '{}');") + try: + node1.query(describe_table) + assert False + except Exception as error: + assert ('PostgreSQL cannot infer dimensions of an empty array: array_ndim_view."Mixed-case with spaces"' in str(error)) + + # View cannot have NULL array value. Should throw useful error message. + cursor.execute('TRUNCATE array_ndim_table;') + cursor.execute('INSERT INTO array_ndim_table VALUES (1234, NULL);') + try: + node1.query(describe_table) + assert False + except Exception as error: + assert ('PostgreSQL array cannot be NULL: array_ndim_view."Mixed-case with spaces"' in str(error)) + + # cleanup + cursor.execute('DROP VIEW IF EXISTS array_ndim_view;') + cursor.execute('DROP TABLE IF EXISTS array_ndim_table;') def test_non_default_schema(started_cluster): node1.query("DROP TABLE IF EXISTS test_pg_table_schema") From 6a698d94804c2fd165c29ea168ec64383e333d3e Mon Sep 17 00:00:00 2001 From: Ryan Jacobs Date: Wed, 6 Dec 2023 18:56:54 -0800 Subject: [PATCH 12/98] clang-tidy --- .../PostgreSQL/fetchPostgreSQLTableStructure.cpp | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/src/Databases/PostgreSQL/fetchPostgreSQLTableStructure.cpp b/src/Databases/PostgreSQL/fetchPostgreSQLTableStructure.cpp index 6c25514418e..d4f9bb6dcf4 100644 --- a/src/Databases/PostgreSQL/fetchPostgreSQLTableStructure.cpp +++ b/src/Databases/PostgreSQL/fetchPostgreSQLTableStructure.cpp @@ -233,12 +233,8 @@ PostgreSQLTableStructure::ColumnsInfoPtr readNamesAndTypesList( /// /// For empty arrays, array_ndims([]) will return NULL. auto postgres_column = doubleQuoteString(name_and_type.name); - pqxx::result result{tx.exec(fmt::format( - "SELECT {} IS NULL, array_ndims({}) " - "FROM {} LIMIT 1;", - postgres_column, - postgres_column, - postgres_table))}; + pqxx::result result{tx.exec( + fmt::format("SELECT {} IS NULL, array_ndims({}) FROM {} LIMIT 1;", postgres_column, postgres_column, postgres_table))}; /// Nullable(Array) is not supported. auto is_null_array = result[0][0].as(); From ca790b6eecade9f23a7cbbfaf755332182e263ce Mon Sep 17 00:00:00 2001 From: Ryan Jacobs Date: Wed, 6 Dec 2023 19:26:50 -0800 Subject: [PATCH 13/98] apply python style check --- .../test_storage_postgresql/test.py | 37 +++++++++++++------ 1 file changed, 25 insertions(+), 12 deletions(-) diff --git a/tests/integration/test_storage_postgresql/test.py b/tests/integration/test_storage_postgresql/test.py index 8e1be600687..a1b13739b5b 100644 --- a/tests/integration/test_storage_postgresql/test.py +++ b/tests/integration/test_storage_postgresql/test.py @@ -202,16 +202,19 @@ def test_postgres_conversions(started_cluster): cursor.execute(f"DROP TABLE test_types") cursor.execute(f"DROP TABLE test_array_dimensions") + def test_postgres_array_ndim_error_messges(started_cluster): cursor = started_cluster.postgres_conn.cursor() # cleanup - cursor.execute('DROP VIEW IF EXISTS array_ndim_view;') - cursor.execute('DROP TABLE IF EXISTS array_ndim_table;') + cursor.execute("DROP VIEW IF EXISTS array_ndim_view;") + cursor.execute("DROP TABLE IF EXISTS array_ndim_table;") # setup - cursor.execute('CREATE TABLE array_ndim_table (x INTEGER, "Mixed-case with spaces" INTEGER[]);') - cursor.execute('CREATE VIEW array_ndim_view AS SELECT * FROM array_ndim_table;') + cursor.execute( + 'CREATE TABLE array_ndim_table (x INTEGER, "Mixed-case with spaces" INTEGER[]);' + ) + cursor.execute("CREATE VIEW array_ndim_view AS SELECT * FROM array_ndim_table;") describe_table = """ DESCRIBE TABLE postgresql( 'postgres1:5432', 'postgres', 'array_ndim_view', @@ -225,30 +228,40 @@ def test_postgres_array_ndim_error_messges(started_cluster): node1.query(describe_table) assert False except Exception as error: - assert ('PostgreSQL relation containing arrays cannot be empty: array_ndim_view' in str(error)) + assert ( + "PostgreSQL relation containing arrays cannot be empty: array_ndim_view" + in str(error) + ) # View cannot have empty array. Should throw useful error message. # (Cannot infer array dimension.) - cursor.execute('TRUNCATE array_ndim_table;') + cursor.execute("TRUNCATE array_ndim_table;") cursor.execute("INSERT INTO array_ndim_table VALUES (1234, '{}');") try: node1.query(describe_table) assert False except Exception as error: - assert ('PostgreSQL cannot infer dimensions of an empty array: array_ndim_view."Mixed-case with spaces"' in str(error)) + assert ( + 'PostgreSQL cannot infer dimensions of an empty array: array_ndim_view."Mixed-case with spaces"' + in str(error) + ) # View cannot have NULL array value. Should throw useful error message. - cursor.execute('TRUNCATE array_ndim_table;') - cursor.execute('INSERT INTO array_ndim_table VALUES (1234, NULL);') + cursor.execute("TRUNCATE array_ndim_table;") + cursor.execute("INSERT INTO array_ndim_table VALUES (1234, NULL);") try: node1.query(describe_table) assert False except Exception as error: - assert ('PostgreSQL array cannot be NULL: array_ndim_view."Mixed-case with spaces"' in str(error)) + assert ( + 'PostgreSQL array cannot be NULL: array_ndim_view."Mixed-case with spaces"' + in str(error) + ) # cleanup - cursor.execute('DROP VIEW IF EXISTS array_ndim_view;') - cursor.execute('DROP TABLE IF EXISTS array_ndim_table;') + cursor.execute("DROP VIEW IF EXISTS array_ndim_view;") + cursor.execute("DROP TABLE IF EXISTS array_ndim_table;") + def test_non_default_schema(started_cluster): node1.query("DROP TABLE IF EXISTS test_pg_table_schema") From 2c6604ec24db0909e7d9273cf5fc3202120e4abc Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Thu, 7 Dec 2023 17:39:09 +0100 Subject: [PATCH 14/98] Allow nodes in config with from_env/from_zk and non empty element with replace=1 Such nodes in config are useful as a nodes with default values, that can be overwritten from ZooKeeper/env. So after this patch the following is valid, and is interpreted as default value 86400, and can be overwritten via env: ```xml 86400 ``` While the following is not: ```xml 86400 ``` Signed-off-by: Azat Khuzhin --- src/Common/Config/ConfigProcessor.cpp | 10 +-- .../configs/000-config_with_env_subst.xml | 1 + .../test_config_substitutions/test.py | 68 ++++++++++++++++++- 3 files changed, 73 insertions(+), 6 deletions(-) diff --git a/src/Common/Config/ConfigProcessor.cpp b/src/Common/Config/ConfigProcessor.cpp index c213b7257d9..92e66fee489 100644 --- a/src/Common/Config/ConfigProcessor.cpp +++ b/src/Common/Config/ConfigProcessor.cpp @@ -519,8 +519,9 @@ void ConfigProcessor::doIncludesRecursive( if (attr_nodes["from_zk"]) /// we have zookeeper subst { - if (node->hasChildNodes()) /// only allow substitution for nodes with no value - throw Poco::Exception("Element <" + node->nodeName() + "> has value, can't process from_zk substitution"); + /// only allow substitution for nodes with no value and without "replace" + if (node->hasChildNodes() && !replace) + throw Poco::Exception("Element <" + node->nodeName() + "> has value and does not have 'replace' attribute, can't process from_zk substitution"); contributing_zk_paths.insert(attr_nodes["from_zk"]->getNodeValue()); @@ -544,8 +545,9 @@ void ConfigProcessor::doIncludesRecursive( if (attr_nodes["from_env"]) /// we have env subst { - if (node->hasChildNodes()) /// only allow substitution for nodes with no value - throw Poco::Exception("Element <" + node->nodeName() + "> has value, can't process from_env substitution"); + /// only allow substitution for nodes with no value and without "replace" + if (node->hasChildNodes() && !replace) + throw Poco::Exception("Element <" + node->nodeName() + "> has value and does not have 'replace' attribute, can't process from_env substitution"); XMLDocumentPtr env_document; auto get_env_node = [&](const std::string & name) -> const Node * diff --git a/tests/integration/test_config_substitutions/configs/000-config_with_env_subst.xml b/tests/integration/test_config_substitutions/configs/000-config_with_env_subst.xml index ffa26488874..b029dd3bd2e 100644 --- a/tests/integration/test_config_substitutions/configs/000-config_with_env_subst.xml +++ b/tests/integration/test_config_substitutions/configs/000-config_with_env_subst.xml @@ -2,6 +2,7 @@ + 1 diff --git a/tests/integration/test_config_substitutions/test.py b/tests/integration/test_config_substitutions/test.py index 46961e5da71..564985b2f50 100644 --- a/tests/integration/test_config_substitutions/test.py +++ b/tests/integration/test_config_substitutions/test.py @@ -1,6 +1,7 @@ import pytest from helpers.cluster import ClickHouseCluster +from helpers.client import QueryRuntimeException cluster = ClickHouseCluster(__file__) node1 = cluster.add_instance( @@ -36,9 +37,13 @@ node7 = cluster.add_instance( "configs/000-config_with_env_subst.xml", "configs/010-env_subst_override.xml", ], - env_variables={"MAX_QUERY_SIZE": "121212"}, + env_variables={ + # overridden with 424242 + "MAX_QUERY_SIZE": "121212", + "MAX_THREADS": "2", + }, instance_env_variables=True, -) # overridden with 424242 +) @pytest.fixture(scope="module") @@ -91,6 +96,65 @@ def test_config(start_cluster): node7.query("select value from system.settings where name = 'max_query_size'") == "424242\n" ) + assert ( + node7.query("select value from system.settings where name = 'max_threads'") + == "2\n" + ) + + +def test_config_invalid_overrides(start_cluster): + node7.replace_config( + "/etc/clickhouse-server/users.d/000-config_with_env_subst.xml", + """ + + + + + 100 + + + + + + default + default + + + + + + +""", + ) + with pytest.raises( + QueryRuntimeException, + match="Failed to preprocess config '/etc/clickhouse-server/users.xml': Exception: Element has value and does not have 'replace' attribute, can't process from_env substitution", + ): + node7.query("SYSTEM RELOAD CONFIG") + node7.replace_config( + "/etc/clickhouse-server/users.d/000-config_with_env_subst.xml", + """ + + + + + 1 + + + + + + default + default + + + + + + +""", + ) + node7.query("SYSTEM RELOAD CONFIG") def test_include_config(start_cluster): From 3ac3a06561f8f98fd3f38b9048a5b1a44c263377 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 9 Dec 2023 23:31:05 +0100 Subject: [PATCH 15/98] Add ClickBench to CI --- .github/workflows/pull_request.yml | 25 ++ docker/images.json | 5 + docker/test/clickbench/Dockerfile | 10 + docker/test/clickbench/create.sql | 112 +++++++++ docker/test/clickbench/queries.sql | 43 ++++ docker/test/clickbench/run.sh | 52 ++++ tests/ci/clickbench.py | 261 ++++++++++++++++++++ tests/ci/fast_test_check.py | 2 +- tests/ci/functional_test_check.py | 2 +- tests/ci/integration_test_check.py | 2 +- tests/ci/test_docker.py | 6 + tests/ci/tests/docker_images_for_tests.json | 5 + 12 files changed, 522 insertions(+), 3 deletions(-) create mode 100644 docker/test/clickbench/Dockerfile create mode 100644 docker/test/clickbench/create.sql create mode 100644 docker/test/clickbench/queries.sql create mode 100755 docker/test/clickbench/run.sh create mode 100644 tests/ci/clickbench.py diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml index a6631a93766..3db382b1fd6 100644 --- a/.github/workflows/pull_request.yml +++ b/.github/workflows/pull_request.yml @@ -701,6 +701,31 @@ jobs: cd "$REPO_COPY/tests/ci" python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT" ############################################################################################## +########################### ClickBench ####################################################### +############################################################################################## + ClickBenchAMD64: + needs: [BuilderDebRelease] + uses: ./.github/workflows/reusable_test.yml + with: + test_name: ClickBench (amd64) + runner_type: func-tester + additional_envs: | + KILL_TIMEOUT=1800 + run_command: | + cd "$REPO_COPY/tests/ci" + python3 clickbench.py "$CHECK_NAME" "$KILL_TIMEOUT" + ClickBenchAarch64: + needs: [ BuilderDebAarch64 ] + uses: ./.github/workflows/reusable_test.yml + with: + test_name: ClickBench (aarch64) + runner_type: func-tester-aarch64 + additional_envs: | + KILL_TIMEOUT=1800 + run_command: | + cd "$REPO_COPY/tests/ci" + python3 clickbench.py "$CHECK_NAME" "$KILL_TIMEOUT" +############################################################################################## ######################################### STRESS TESTS ####################################### ############################################################################################## StressTestAsan: diff --git a/docker/images.json b/docker/images.json index 1535715648c..d2f098f53d7 100644 --- a/docker/images.json +++ b/docker/images.json @@ -125,6 +125,7 @@ "docker/test/server-jepsen", "docker/test/sqllogic", "docker/test/sqltest", + "docker/test/clickbench", "docker/test/stateless" ] }, @@ -145,6 +146,10 @@ "name": "clickhouse/server-jepsen-test", "dependent": [] }, + "docker/test/clickbench": { + "name": "clickhouse/clickbench", + "dependent": [] + }, "docker/test/install/deb": { "name": "clickhouse/install-deb-test", "dependent": [] diff --git a/docker/test/clickbench/Dockerfile b/docker/test/clickbench/Dockerfile new file mode 100644 index 00000000000..0b6b1736e03 --- /dev/null +++ b/docker/test/clickbench/Dockerfile @@ -0,0 +1,10 @@ +ARG FROM_TAG=latest +FROM clickhouse/test-base:$FROM_TAG + +ENV TZ=Europe/Amsterdam +RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone + +COPY *.sh / +COPY *.sql / + +CMD ["/bin/bash", "/run.sh"] diff --git a/docker/test/clickbench/create.sql b/docker/test/clickbench/create.sql new file mode 100644 index 00000000000..620bdf09331 --- /dev/null +++ b/docker/test/clickbench/create.sql @@ -0,0 +1,112 @@ +ATTACH TABLE hits UUID 'c449dfbf-ba06-4d13-abec-8396559eb955' +( + WatchID BIGINT NOT NULL, + JavaEnable SMALLINT NOT NULL, + Title TEXT NOT NULL, + GoodEvent SMALLINT NOT NULL, + EventTime TIMESTAMP NOT NULL, + EventDate Date NOT NULL, + CounterID INTEGER NOT NULL, + ClientIP INTEGER NOT NULL, + RegionID INTEGER NOT NULL, + UserID BIGINT NOT NULL, + CounterClass SMALLINT NOT NULL, + OS SMALLINT NOT NULL, + UserAgent SMALLINT NOT NULL, + URL TEXT NOT NULL, + Referer TEXT NOT NULL, + IsRefresh SMALLINT NOT NULL, + RefererCategoryID SMALLINT NOT NULL, + RefererRegionID INTEGER NOT NULL, + URLCategoryID SMALLINT NOT NULL, + URLRegionID INTEGER NOT NULL, + ResolutionWidth SMALLINT NOT NULL, + ResolutionHeight SMALLINT NOT NULL, + ResolutionDepth SMALLINT NOT NULL, + FlashMajor SMALLINT NOT NULL, + FlashMinor SMALLINT NOT NULL, + FlashMinor2 TEXT NOT NULL, + NetMajor SMALLINT NOT NULL, + NetMinor SMALLINT NOT NULL, + UserAgentMajor SMALLINT NOT NULL, + UserAgentMinor VARCHAR(255) NOT NULL, + CookieEnable SMALLINT NOT NULL, + JavascriptEnable SMALLINT NOT NULL, + IsMobile SMALLINT NOT NULL, + MobilePhone SMALLINT NOT NULL, + MobilePhoneModel TEXT NOT NULL, + Params TEXT NOT NULL, + IPNetworkID INTEGER NOT NULL, + TraficSourceID SMALLINT NOT NULL, + SearchEngineID SMALLINT NOT NULL, + SearchPhrase TEXT NOT NULL, + AdvEngineID SMALLINT NOT NULL, + IsArtifical SMALLINT NOT NULL, + WindowClientWidth SMALLINT NOT NULL, + WindowClientHeight SMALLINT NOT NULL, + ClientTimeZone SMALLINT NOT NULL, + ClientEventTime TIMESTAMP NOT NULL, + SilverlightVersion1 SMALLINT NOT NULL, + SilverlightVersion2 SMALLINT NOT NULL, + SilverlightVersion3 INTEGER NOT NULL, + SilverlightVersion4 SMALLINT NOT NULL, + PageCharset TEXT NOT NULL, + CodeVersion INTEGER NOT NULL, + IsLink SMALLINT NOT NULL, + IsDownload SMALLINT NOT NULL, + IsNotBounce SMALLINT NOT NULL, + FUniqID BIGINT NOT NULL, + OriginalURL TEXT NOT NULL, + HID INTEGER NOT NULL, + IsOldCounter SMALLINT NOT NULL, + IsEvent SMALLINT NOT NULL, + IsParameter SMALLINT NOT NULL, + DontCountHits SMALLINT NOT NULL, + WithHash SMALLINT NOT NULL, + HitColor CHAR NOT NULL, + LocalEventTime TIMESTAMP NOT NULL, + Age SMALLINT NOT NULL, + Sex SMALLINT NOT NULL, + Income SMALLINT NOT NULL, + Interests SMALLINT NOT NULL, + Robotness SMALLINT NOT NULL, + RemoteIP INTEGER NOT NULL, + WindowName INTEGER NOT NULL, + OpenerName INTEGER NOT NULL, + HistoryLength SMALLINT NOT NULL, + BrowserLanguage TEXT NOT NULL, + BrowserCountry TEXT NOT NULL, + SocialNetwork TEXT NOT NULL, + SocialAction TEXT NOT NULL, + HTTPError SMALLINT NOT NULL, + SendTiming INTEGER NOT NULL, + DNSTiming INTEGER NOT NULL, + ConnectTiming INTEGER NOT NULL, + ResponseStartTiming INTEGER NOT NULL, + ResponseEndTiming INTEGER NOT NULL, + FetchTiming INTEGER NOT NULL, + SocialSourceNetworkID SMALLINT NOT NULL, + SocialSourcePage TEXT NOT NULL, + ParamPrice BIGINT NOT NULL, + ParamOrderID TEXT NOT NULL, + ParamCurrency TEXT NOT NULL, + ParamCurrencyID SMALLINT NOT NULL, + OpenstatServiceName TEXT NOT NULL, + OpenstatCampaignID TEXT NOT NULL, + OpenstatAdID TEXT NOT NULL, + OpenstatSourceID TEXT NOT NULL, + UTMSource TEXT NOT NULL, + UTMMedium TEXT NOT NULL, + UTMCampaign TEXT NOT NULL, + UTMContent TEXT NOT NULL, + UTMTerm TEXT NOT NULL, + FromTag TEXT NOT NULL, + HasGCLID SMALLINT NOT NULL, + RefererHash BIGINT NOT NULL, + URLHash BIGINT NOT NULL, + CLID INTEGER NOT NULL, + PRIMARY KEY (CounterID, EventDate, UserID, EventTime, WatchID) +) +ENGINE = MergeTree +SETTINGS disk = disk(type = cache, path = '/dev/shm/clickhouse/', max_size = '16G', + disk = disk(type = web, endpoint = 'https://clickhouse-public-datasets.s3.amazonaws.com/web/')); diff --git a/docker/test/clickbench/queries.sql b/docker/test/clickbench/queries.sql new file mode 100644 index 00000000000..31f65fc898d --- /dev/null +++ b/docker/test/clickbench/queries.sql @@ -0,0 +1,43 @@ +SELECT COUNT(*) FROM hits; +SELECT COUNT(*) FROM hits WHERE AdvEngineID <> 0; +SELECT SUM(AdvEngineID), COUNT(*), AVG(ResolutionWidth) FROM hits; +SELECT AVG(UserID) FROM hits; +SELECT COUNT(DISTINCT UserID) FROM hits; +SELECT COUNT(DISTINCT SearchPhrase) FROM hits; +SELECT MIN(EventDate), MAX(EventDate) FROM hits; +SELECT AdvEngineID, COUNT(*) FROM hits WHERE AdvEngineID <> 0 GROUP BY AdvEngineID ORDER BY COUNT(*) DESC; +SELECT RegionID, COUNT(DISTINCT UserID) AS u FROM hits GROUP BY RegionID ORDER BY u DESC LIMIT 10; +SELECT RegionID, SUM(AdvEngineID), COUNT(*) AS c, AVG(ResolutionWidth), COUNT(DISTINCT UserID) FROM hits GROUP BY RegionID ORDER BY c DESC LIMIT 10; +SELECT MobilePhoneModel, COUNT(DISTINCT UserID) AS u FROM hits WHERE MobilePhoneModel <> '' GROUP BY MobilePhoneModel ORDER BY u DESC LIMIT 10; +SELECT MobilePhone, MobilePhoneModel, COUNT(DISTINCT UserID) AS u FROM hits WHERE MobilePhoneModel <> '' GROUP BY MobilePhone, MobilePhoneModel ORDER BY u DESC LIMIT 10; +SELECT SearchPhrase, COUNT(*) AS c FROM hits WHERE SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10; +SELECT SearchPhrase, COUNT(DISTINCT UserID) AS u FROM hits WHERE SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY u DESC LIMIT 10; +SELECT SearchEngineID, SearchPhrase, COUNT(*) AS c FROM hits WHERE SearchPhrase <> '' GROUP BY SearchEngineID, SearchPhrase ORDER BY c DESC LIMIT 10; +SELECT UserID, COUNT(*) FROM hits GROUP BY UserID ORDER BY COUNT(*) DESC LIMIT 10; +SELECT UserID, SearchPhrase, COUNT(*) FROM hits GROUP BY UserID, SearchPhrase ORDER BY COUNT(*) DESC LIMIT 10; +SELECT UserID, SearchPhrase, COUNT(*) FROM hits GROUP BY UserID, SearchPhrase LIMIT 10; +SELECT UserID, extract(minute FROM EventTime) AS m, SearchPhrase, COUNT(*) FROM hits GROUP BY UserID, m, SearchPhrase ORDER BY COUNT(*) DESC LIMIT 10; +SELECT UserID FROM hits WHERE UserID = 435090932899640449; +SELECT COUNT(*) FROM hits WHERE URL LIKE '%google%'; +SELECT SearchPhrase, MIN(URL), COUNT(*) AS c FROM hits WHERE URL LIKE '%google%' AND SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10; +SELECT SearchPhrase, MIN(URL), MIN(Title), COUNT(*) AS c, COUNT(DISTINCT UserID) FROM hits WHERE Title LIKE '%Google%' AND URL NOT LIKE '%.google.%' AND SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10; +SELECT * FROM hits WHERE URL LIKE '%google%' ORDER BY EventTime LIMIT 10; +SELECT SearchPhrase FROM hits WHERE SearchPhrase <> '' ORDER BY EventTime LIMIT 10; +SELECT SearchPhrase FROM hits WHERE SearchPhrase <> '' ORDER BY SearchPhrase LIMIT 10; +SELECT SearchPhrase FROM hits WHERE SearchPhrase <> '' ORDER BY EventTime, SearchPhrase LIMIT 10; +SELECT CounterID, AVG(length(URL)) AS l, COUNT(*) AS c FROM hits WHERE URL <> '' GROUP BY CounterID HAVING COUNT(*) > 100000 ORDER BY l DESC LIMIT 25; +SELECT REGEXP_REPLACE(Referer, '^https?://(?:www\.)?([^/]+)/.*$', '\1') AS k, AVG(length(Referer)) AS l, COUNT(*) AS c, MIN(Referer) FROM hits WHERE Referer <> '' GROUP BY k HAVING COUNT(*) > 100000 ORDER BY l DESC LIMIT 25; +SELECT SUM(ResolutionWidth), SUM(ResolutionWidth + 1), SUM(ResolutionWidth + 2), SUM(ResolutionWidth + 3), SUM(ResolutionWidth + 4), SUM(ResolutionWidth + 5), SUM(ResolutionWidth + 6), SUM(ResolutionWidth + 7), SUM(ResolutionWidth + 8), SUM(ResolutionWidth + 9), SUM(ResolutionWidth + 10), SUM(ResolutionWidth + 11), SUM(ResolutionWidth + 12), SUM(ResolutionWidth + 13), SUM(ResolutionWidth + 14), SUM(ResolutionWidth + 15), SUM(ResolutionWidth + 16), SUM(ResolutionWidth + 17), SUM(ResolutionWidth + 18), SUM(ResolutionWidth + 19), SUM(ResolutionWidth + 20), SUM(ResolutionWidth + 21), SUM(ResolutionWidth + 22), SUM(ResolutionWidth + 23), SUM(ResolutionWidth + 24), SUM(ResolutionWidth + 25), SUM(ResolutionWidth + 26), SUM(ResolutionWidth + 27), SUM(ResolutionWidth + 28), SUM(ResolutionWidth + 29), SUM(ResolutionWidth + 30), SUM(ResolutionWidth + 31), SUM(ResolutionWidth + 32), SUM(ResolutionWidth + 33), SUM(ResolutionWidth + 34), SUM(ResolutionWidth + 35), SUM(ResolutionWidth + 36), SUM(ResolutionWidth + 37), SUM(ResolutionWidth + 38), SUM(ResolutionWidth + 39), SUM(ResolutionWidth + 40), SUM(ResolutionWidth + 41), SUM(ResolutionWidth + 42), SUM(ResolutionWidth + 43), SUM(ResolutionWidth + 44), SUM(ResolutionWidth + 45), SUM(ResolutionWidth + 46), SUM(ResolutionWidth + 47), SUM(ResolutionWidth + 48), SUM(ResolutionWidth + 49), SUM(ResolutionWidth + 50), SUM(ResolutionWidth + 51), SUM(ResolutionWidth + 52), SUM(ResolutionWidth + 53), SUM(ResolutionWidth + 54), SUM(ResolutionWidth + 55), SUM(ResolutionWidth + 56), SUM(ResolutionWidth + 57), SUM(ResolutionWidth + 58), SUM(ResolutionWidth + 59), SUM(ResolutionWidth + 60), SUM(ResolutionWidth + 61), SUM(ResolutionWidth + 62), SUM(ResolutionWidth + 63), SUM(ResolutionWidth + 64), SUM(ResolutionWidth + 65), SUM(ResolutionWidth + 66), SUM(ResolutionWidth + 67), SUM(ResolutionWidth + 68), SUM(ResolutionWidth + 69), SUM(ResolutionWidth + 70), SUM(ResolutionWidth + 71), SUM(ResolutionWidth + 72), SUM(ResolutionWidth + 73), SUM(ResolutionWidth + 74), SUM(ResolutionWidth + 75), SUM(ResolutionWidth + 76), SUM(ResolutionWidth + 77), SUM(ResolutionWidth + 78), SUM(ResolutionWidth + 79), SUM(ResolutionWidth + 80), SUM(ResolutionWidth + 81), SUM(ResolutionWidth + 82), SUM(ResolutionWidth + 83), SUM(ResolutionWidth + 84), SUM(ResolutionWidth + 85), SUM(ResolutionWidth + 86), SUM(ResolutionWidth + 87), SUM(ResolutionWidth + 88), SUM(ResolutionWidth + 89) FROM hits; +SELECT SearchEngineID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM hits WHERE SearchPhrase <> '' GROUP BY SearchEngineID, ClientIP ORDER BY c DESC LIMIT 10; +SELECT WatchID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM hits WHERE SearchPhrase <> '' GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10; +SELECT WatchID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM hits GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10; +SELECT URL, COUNT(*) AS c FROM hits GROUP BY URL ORDER BY c DESC LIMIT 10; +SELECT 1, URL, COUNT(*) AS c FROM hits GROUP BY 1, URL ORDER BY c DESC LIMIT 10; +SELECT ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3, COUNT(*) AS c FROM hits GROUP BY ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3 ORDER BY c DESC LIMIT 10; +SELECT URL, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND DontCountHits = 0 AND IsRefresh = 0 AND URL <> '' GROUP BY URL ORDER BY PageViews DESC LIMIT 10; +SELECT Title, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND DontCountHits = 0 AND IsRefresh = 0 AND Title <> '' GROUP BY Title ORDER BY PageViews DESC LIMIT 10; +SELECT URL, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 AND IsLink <> 0 AND IsDownload = 0 GROUP BY URL ORDER BY PageViews DESC LIMIT 10 OFFSET 1000; +SELECT TraficSourceID, SearchEngineID, AdvEngineID, CASE WHEN (SearchEngineID = 0 AND AdvEngineID = 0) THEN Referer ELSE '' END AS Src, URL AS Dst, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 GROUP BY TraficSourceID, SearchEngineID, AdvEngineID, Src, Dst ORDER BY PageViews DESC LIMIT 10 OFFSET 1000; +SELECT URLHash, EventDate, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 AND TraficSourceID IN (-1, 6) AND RefererHash = 3594120000172545465 GROUP BY URLHash, EventDate ORDER BY PageViews DESC LIMIT 10 OFFSET 100; +SELECT WindowClientWidth, WindowClientHeight, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 AND DontCountHits = 0 AND URLHash = 2868770270353813622 GROUP BY WindowClientWidth, WindowClientHeight ORDER BY PageViews DESC LIMIT 10 OFFSET 10000; +SELECT DATE_TRUNC('minute', EventTime) AS M, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-14' AND EventDate <= '2013-07-15' AND IsRefresh = 0 AND DontCountHits = 0 GROUP BY DATE_TRUNC('minute', EventTime) ORDER BY DATE_TRUNC('minute', EventTime) LIMIT 10 OFFSET 1000; diff --git a/docker/test/clickbench/run.sh b/docker/test/clickbench/run.sh new file mode 100755 index 00000000000..6e02a346f04 --- /dev/null +++ b/docker/test/clickbench/run.sh @@ -0,0 +1,52 @@ +#!/bin/bash + +# shellcheck disable=SC1091 +source /setup_export_logs.sh + +# fail on errors, verbose and export all env variables +set -e -x -a + +dpkg -i package_folder/clickhouse-common-static_*.deb +dpkg -i package_folder/clickhouse-server_*.deb +dpkg -i package_folder/clickhouse-client_*.deb + +# A directory for cache +sudo mkdir /dev/shm/clickhouse +sudo chown clickhouse:clickhouse /dev/shm/clickhouse + +sudo clickhouse start + +# Wait for the server to start, but not for too long. +for _ in {1..100} +do + clickhouse-client --query "SELECT 1" && break + sleep 1 +done + +setup_logs_replication + +# Load the data + +clickhouse-client --time < /create.sql + +# Run the queries + +TRIES=3 +QUERY_NUM=1 +cat /queries.sql | while read query; do + echo -n "[" + for i in $(seq 1 $TRIES); do + RES=$(clickhouse-client --time --format Null --query "$query" --progress 0 2>&1 ||:) + [[ "$?" == "0" ]] && echo -n "${RES}" || echo -n "null" + [[ "$i" != $TRIES ]] && echo -n ", " + + echo "${QUERY_NUM},${i},${RES}" >> /test_output/test_results.tsv + done + echo "]," + + QUERY_NUM=$((QUERY_NUM + 1)) +done + +clickhouse-client --query "SELECT total_bytes FROM system.tables WHERE name = 'hits' AND database = 'default'" + +echo -e "success\tClickBench finished" > /test_output/check_status.tsv diff --git a/tests/ci/clickbench.py b/tests/ci/clickbench.py new file mode 100644 index 00000000000..0d3452b7a64 --- /dev/null +++ b/tests/ci/clickbench.py @@ -0,0 +1,261 @@ +#!/usr/bin/env python3 + +import argparse +import csv +import logging +import os +import re +import subprocess +import sys +import atexit +from pathlib import Path +from typing import List, Tuple + +from github import Github + +from build_download_helper import download_all_deb_packages +from clickhouse_helper import ( + CiLogsCredentials, + ClickHouseHelper, + prepare_tests_results_for_clickhouse, +) +from commit_status_helper import ( + NotSet, + RerunHelper, + get_commit, + override_status, + post_commit_status, + post_commit_status_to_file, + update_mergeable_check, +) +from docker_pull_helper import DockerImage, get_image_with_version +from download_release_packages import download_last_release +from env_helper import TEMP_PATH, REPO_COPY, REPORTS_PATH +from get_robot_token import get_best_robot_token +from pr_info import FORCE_TESTS_LABEL, PRInfo +from s3_helper import S3Helper +from stopwatch import Stopwatch +from tee_popen import TeePopen +from upload_result_helper import upload_results + +NO_CHANGES_MSG = "Nothing to run" + + +def get_image_name(check_name: str) -> str: + return "clickhouse/clickbench" + + +def get_run_command( + check_name: str, + builds_path: Path, + result_path: Path, + server_log_path: Path, + additional_envs: List[str], + ci_logs_args: str, + image: DockerImage, +) -> str: + + envs = [f"-e {e}" for e in additional_envs] + + env_str = " ".join(envs) + + return ( + f"docker run --volume={builds_path}:/package_folder " + f"{ci_logs_args}" + f"--volume={result_path}:/test_output " + f"--volume={server_log_path}:/var/log/clickhouse-server " + f"--cap-add=SYS_PTRACE {env_str} {image}" + ) + +def process_results( + result_directory: Path, + server_log_path: Path, +) -> Tuple[str, str, TestResults, List[Path]]: + test_results = [] # type: TestResults + additional_files = [] + # Just upload all files from result_directory. + # If task provides processed results, then it's responsible for content of result_directory. + if result_directory.exists(): + additional_files = [p for p in result_directory.iterdir() if p.is_file()] + + if server_log_path.exists(): + additional_files = additional_files + [ + p for p in server_log_path.iterdir() if p.is_file() + ] + + status = [] + status_path = result_directory / "check_status.tsv" + if status_path.exists(): + logging.info("Found check_status.tsv") + with open(status_path, "r", encoding="utf-8") as status_file: + status = list(csv.reader(status_file, delimiter="\t")) + + if len(status) != 1 or len(status[0]) != 2: + logging.info("Files in result folder %s", os.listdir(result_directory)) + return "error", "Invalid check_status.tsv", test_results, additional_files + state, description = status[0][0], status[0][1] + + try: + results_path = result_directory / "test_results.tsv" + + if results_path.exists(): + logging.info("Found test_results.tsv") + else: + logging.info("Files in result folder %s", os.listdir(result_directory)) + return "error", "Not found test_results.tsv", test_results, additional_files + + except Exception as e: + return ( + "error", + f"Cannot parse test_results.tsv ({e})", + test_results, + additional_files, + ) + + return state, description, test_results, additional_files + + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("check_name") + parser.add_argument("kill_timeout", type=int) + parser.add_argument( + "--post-commit-status", + default="commit_status", + choices=["commit_status", "file"], + help="Where to public post commit status", + ) + return parser.parse_args() + + +def main(): + logging.basicConfig(level=logging.INFO) + + stopwatch = Stopwatch() + + temp_path = Path(TEMP_PATH) + temp_path.mkdir(parents=True, exist_ok=True) + + repo_path = Path(REPO_COPY) + reports_path = Path(REPORTS_PATH) + post_commit_path = temp_path / "clickbench_status.tsv" + + args = parse_args() + check_name = args.check_name + kill_timeout = args.kill_timeout + + gh = Github(get_best_robot_token(), per_page=100) + + pr_info = PRInfo() + + commit = get_commit(gh, pr_info.sha) + atexit.register(update_mergeable_check, gh, pr_info, check_name) + + rerun_helper = RerunHelper(commit, check_name) + if rerun_helper.is_already_finished_by_status(): + logging.info("Check is already finished according to github status, exiting") + sys.exit(0) + + image_name = get_image_name(check_name) + docker_image = get_image_with_version(reports_path, image_name) + + packages_path = temp_path / "packages" + packages_path.mkdir(parents=True, exist_ok=True) + + download_all_deb_packages(check_name, reports_path, packages_path) + + server_log_path = temp_path / "server_log" + server_log_path.mkdir(parents=True, exist_ok=True) + + result_path = temp_path / "result_path" + result_path.mkdir(parents=True, exist_ok=True) + + run_log_path = result_path / "run.log" + + additional_envs = [] + + ci_logs_credentials = CiLogsCredentials(temp_path / "export-logs-config.sh") + ci_logs_args = ci_logs_credentials.get_docker_arguments( + pr_info, stopwatch.start_time_str, check_name + ) + + run_command = get_run_command( + check_name, + packages_path, + repo_path, + result_path, + server_log_path, + kill_timeout, + additional_envs, + ci_logs_args, + docker_image, + ) + logging.info("Going to run ClickBench: %s", run_command) + + with TeePopen(run_command, run_log_path) as process: + retcode = process.wait() + if retcode == 0: + logging.info("Run successfully") + else: + logging.info("Run failed") + + try: + subprocess.check_call(f"sudo chown -R ubuntu:ubuntu {temp_path}", shell=True) + except subprocess.CalledProcessError: + logging.warning("Failed to change files owner in %s, ignoring it", temp_path) + + ci_logs_credentials.clean_ci_logs_from_credentials(run_log_path) + s3_helper = S3Helper() + + state, description, test_results, additional_logs = process_results( + result_path, server_log_path + ) + state = override_status(state, check_name) + + ch_helper = ClickHouseHelper() + + report_url = upload_results( + s3_helper, + pr_info.number, + pr_info.sha, + test_results, + [run_log_path] + additional_logs, + check_name, + ) + + print(f"::notice:: {check_name} Report url: {report_url}") + if args.post_commit_status == "commit_status": + post_commit_status( + commit, state, report_url, description, check_name_with_group, pr_info + ) + elif args.post_commit_status == "file": + post_commit_status_to_file( + post_commit_path, + description, + state, + report_url, + ) + else: + raise Exception( + f'Unknown post_commit_status option "{args.post_commit_status}"' + ) + + prepared_events = prepare_tests_results_for_clickhouse( + pr_info, + test_results, + state, + stopwatch.duration_seconds, + stopwatch.start_time_str, + report_url, + ) + ch_helper.insert_events_into(db="default", table="checks", events=prepared_events) + + if state != "success": + if FORCE_TESTS_LABEL in pr_info.labels: + print(f"'{FORCE_TESTS_LABEL}' enabled, will report success") + else: + sys.exit(1) + + +if __name__ == "__main__": + main() diff --git a/tests/ci/fast_test_check.py b/tests/ci/fast_test_check.py index ee046d6dfef..f5c7342d6f4 100644 --- a/tests/ci/fast_test_check.py +++ b/tests/ci/fast_test_check.py @@ -72,7 +72,7 @@ def process_results(result_directory: Path) -> Tuple[str, str, TestResults]: status = [] status_path = result_directory / "check_status.tsv" if status_path.exists(): - logging.info("Found test_results.tsv") + logging.info("Found check_status.tsv") with open(status_path, "r", encoding="utf-8") as status_file: status = list(csv.reader(status_file, delimiter="\t")) if len(status) != 1 or len(status[0]) != 2: diff --git a/tests/ci/functional_test_check.py b/tests/ci/functional_test_check.py index f85c44f617d..c8b3e42eed4 100644 --- a/tests/ci/functional_test_check.py +++ b/tests/ci/functional_test_check.py @@ -169,7 +169,7 @@ def process_results( status = [] status_path = result_directory / "check_status.tsv" if status_path.exists(): - logging.info("Found test_results.tsv") + logging.info("Found check_status.tsv") with open(status_path, "r", encoding="utf-8") as status_file: status = list(csv.reader(status_file, delimiter="\t")) diff --git a/tests/ci/integration_test_check.py b/tests/ci/integration_test_check.py index 8535d71c5c4..b22aa08354c 100644 --- a/tests/ci/integration_test_check.py +++ b/tests/ci/integration_test_check.py @@ -118,7 +118,7 @@ def process_results( status = [] status_path = result_directory / "check_status.tsv" if status_path.exists(): - logging.info("Found test_results.tsv") + logging.info("Found check_status.tsv") with open(status_path, "r", encoding="utf-8") as status_file: status = list(csv.reader(status_file, delimiter="\t")) diff --git a/tests/ci/test_docker.py b/tests/ci/test_docker.py index 8aab50ed082..f15fd1ae63f 100644 --- a/tests/ci/test_docker.py +++ b/tests/ci/test_docker.py @@ -64,6 +64,12 @@ class TestDockerImageCheck(unittest.TestCase): False, "clickhouse/test-base", # type: ignore ), + di.DockerImage( + "docker/test/clickbench", + "clickhouse/clickbench", + False, + "clickhouse/test-base", # type: ignore + ), di.DockerImage( "docker/test/keeper-jepsen", "clickhouse/keeper-jepsen-test", diff --git a/tests/ci/tests/docker_images_for_tests.json b/tests/ci/tests/docker_images_for_tests.json index 70db8760561..19e4b94bcdf 100644 --- a/tests/ci/tests/docker_images_for_tests.json +++ b/tests/ci/tests/docker_images_for_tests.json @@ -120,6 +120,7 @@ "docker/test/integration/base", "docker/test/fuzzer", "docker/test/keeper-jepsen", + "docker/test/clickbench", "docker/test/sqltest" ] }, @@ -131,6 +132,10 @@ "name": "clickhouse/sqlancer-test", "dependent": [] }, + "docker/test/clickbench": { + "name": "clickhouse/clickbench", + "dependent": [] + }, "docker/test/keeper-jepsen": { "name": "clickhouse/keeper-jepsen-test", "dependent": [] From f8f42d82ddcdb3f48f4ceab0154486ad4b80a8b0 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Sat, 9 Dec 2023 22:52:58 +0000 Subject: [PATCH 16/98] Automatic style fix --- tests/ci/clickbench.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/ci/clickbench.py b/tests/ci/clickbench.py index 0d3452b7a64..2b320d69e7d 100644 --- a/tests/ci/clickbench.py +++ b/tests/ci/clickbench.py @@ -54,7 +54,6 @@ def get_run_command( ci_logs_args: str, image: DockerImage, ) -> str: - envs = [f"-e {e}" for e in additional_envs] env_str = " ".join(envs) @@ -67,6 +66,7 @@ def get_run_command( f"--cap-add=SYS_PTRACE {env_str} {image}" ) + def process_results( result_directory: Path, server_log_path: Path, From 17772dc41c3098c5a8411e80cd970e6d73c45ce2 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 10 Dec 2023 00:03:33 +0100 Subject: [PATCH 17/98] Recommendation from @felixoid --- tests/ci/tests/docker_images_for_tests.json | 5 ----- 1 file changed, 5 deletions(-) diff --git a/tests/ci/tests/docker_images_for_tests.json b/tests/ci/tests/docker_images_for_tests.json index 19e4b94bcdf..70db8760561 100644 --- a/tests/ci/tests/docker_images_for_tests.json +++ b/tests/ci/tests/docker_images_for_tests.json @@ -120,7 +120,6 @@ "docker/test/integration/base", "docker/test/fuzzer", "docker/test/keeper-jepsen", - "docker/test/clickbench", "docker/test/sqltest" ] }, @@ -132,10 +131,6 @@ "name": "clickhouse/sqlancer-test", "dependent": [] }, - "docker/test/clickbench": { - "name": "clickhouse/clickbench", - "dependent": [] - }, "docker/test/keeper-jepsen": { "name": "clickhouse/keeper-jepsen-test", "dependent": [] From f2875068be79c2b1838aad693e338e0bbf8b3b96 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 10 Dec 2023 00:19:30 +0100 Subject: [PATCH 18/98] Style --- docker/test/clickbench/run.sh | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/docker/test/clickbench/run.sh b/docker/test/clickbench/run.sh index 6e02a346f04..eadaa2b5939 100755 --- a/docker/test/clickbench/run.sh +++ b/docker/test/clickbench/run.sh @@ -33,19 +33,19 @@ clickhouse-client --time < /create.sql TRIES=3 QUERY_NUM=1 -cat /queries.sql | while read query; do +while read query; do echo -n "[" for i in $(seq 1 $TRIES); do RES=$(clickhouse-client --time --format Null --query "$query" --progress 0 2>&1 ||:) - [[ "$?" == "0" ]] && echo -n "${RES}" || echo -n "null" - [[ "$i" != $TRIES ]] && echo -n ", " + echo -n "${RES}" + [[ "$i" != "$TRIES" ]] && echo -n ", " echo "${QUERY_NUM},${i},${RES}" >> /test_output/test_results.tsv done echo "]," QUERY_NUM=$((QUERY_NUM + 1)) -done +done < /queries.sql clickhouse-client --query "SELECT total_bytes FROM system.tables WHERE name = 'hits' AND database = 'default'" From 2602a7ba817b23668f46aade018601da3214669b Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 10 Dec 2023 00:22:19 +0100 Subject: [PATCH 19/98] Python --- tests/ci/clickbench.py | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/tests/ci/clickbench.py b/tests/ci/clickbench.py index 2b320d69e7d..5f41d3d497f 100644 --- a/tests/ci/clickbench.py +++ b/tests/ci/clickbench.py @@ -41,12 +41,11 @@ from upload_result_helper import upload_results NO_CHANGES_MSG = "Nothing to run" -def get_image_name(check_name: str) -> str: +def get_image_name() -> str: return "clickhouse/clickbench" def get_run_command( - check_name: str, builds_path: Path, result_path: Path, server_log_path: Path, @@ -70,8 +69,8 @@ def get_run_command( def process_results( result_directory: Path, server_log_path: Path, -) -> Tuple[str, str, TestResults, List[Path]]: - test_results = [] # type: TestResults +) -> Tuple[str, str, List[str], List[Path]]: + test_results = [] additional_files = [] # Just upload all files from result_directory. # If task provides processed results, then it's responsible for content of result_directory. @@ -156,7 +155,7 @@ def main(): logging.info("Check is already finished according to github status, exiting") sys.exit(0) - image_name = get_image_name(check_name) + image_name = get_image_name() docker_image = get_image_with_version(reports_path, image_name) packages_path = temp_path / "packages" @@ -180,9 +179,7 @@ def main(): ) run_command = get_run_command( - check_name, packages_path, - repo_path, result_path, server_log_path, kill_timeout, From ea31c8471f11ccc4c753ae98643fbac3715c2b29 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 10 Dec 2023 00:26:44 +0100 Subject: [PATCH 20/98] Python --- tests/ci/clickbench.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/tests/ci/clickbench.py b/tests/ci/clickbench.py index 5f41d3d497f..82209acd2c4 100644 --- a/tests/ci/clickbench.py +++ b/tests/ci/clickbench.py @@ -4,7 +4,6 @@ import argparse import csv import logging import os -import re import subprocess import sys import atexit @@ -20,7 +19,6 @@ from clickhouse_helper import ( prepare_tests_results_for_clickhouse, ) from commit_status_helper import ( - NotSet, RerunHelper, get_commit, override_status, @@ -29,7 +27,6 @@ from commit_status_helper import ( update_mergeable_check, ) from docker_pull_helper import DockerImage, get_image_with_version -from download_release_packages import download_last_release from env_helper import TEMP_PATH, REPO_COPY, REPORTS_PATH from get_robot_token import get_best_robot_token from pr_info import FORCE_TESTS_LABEL, PRInfo @@ -38,8 +35,6 @@ from stopwatch import Stopwatch from tee_popen import TeePopen from upload_result_helper import upload_results -NO_CHANGES_MSG = "Nothing to run" - def get_image_name() -> str: return "clickhouse/clickbench" From 9a3860581c4a8e3b01026c6355cb9ea6110a7ea1 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 10 Dec 2023 00:27:11 +0100 Subject: [PATCH 21/98] YAML --- .github/workflows/pull_request.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml index 3db382b1fd6..5bb62b04c32 100644 --- a/.github/workflows/pull_request.yml +++ b/.github/workflows/pull_request.yml @@ -715,7 +715,7 @@ jobs: cd "$REPO_COPY/tests/ci" python3 clickbench.py "$CHECK_NAME" "$KILL_TIMEOUT" ClickBenchAarch64: - needs: [ BuilderDebAarch64 ] + needs: [BuilderDebAarch64] uses: ./.github/workflows/reusable_test.yml with: test_name: ClickBench (aarch64) From af48cb97322541495e496bfc1a346186584d936e Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 10 Dec 2023 00:35:04 +0100 Subject: [PATCH 22/98] Python --- tests/ci/test_docker.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/tests/ci/test_docker.py b/tests/ci/test_docker.py index f15fd1ae63f..8aab50ed082 100644 --- a/tests/ci/test_docker.py +++ b/tests/ci/test_docker.py @@ -64,12 +64,6 @@ class TestDockerImageCheck(unittest.TestCase): False, "clickhouse/test-base", # type: ignore ), - di.DockerImage( - "docker/test/clickbench", - "clickhouse/clickbench", - False, - "clickhouse/test-base", # type: ignore - ), di.DockerImage( "docker/test/keeper-jepsen", "clickhouse/keeper-jepsen-test", From 0eb7a41babe9148dc2b246d25717f4008900661e Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Thu, 7 Dec 2023 13:54:28 +0300 Subject: [PATCH 23/98] CHJIT add assembly printer --- src/Interpreters/JIT/CHJIT.cpp | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/src/Interpreters/JIT/CHJIT.cpp b/src/Interpreters/JIT/CHJIT.cpp index 63fe4f44f5f..8db50d73193 100644 --- a/src/Interpreters/JIT/CHJIT.cpp +++ b/src/Interpreters/JIT/CHJIT.cpp @@ -244,6 +244,33 @@ private: } }; +#define PRINT_ASSEMBLY + +#ifdef PRINT_ASSEMBLY + +class AssemblyPrinter +{ +public: + explicit AssemblyPrinter(llvm::TargetMachine &target_machine_) + : target_machine(target_machine_) + { + } + + void print(llvm::Module & module) + { + llvm::legacy::PassManager pass_manager; + target_machine.Options.MCOptions.AsmVerbose = true; + if (target_machine.addPassesToEmitFile(pass_manager, llvm::errs(), nullptr, llvm::CodeGenFileType::CGFT_AssemblyFile)) + throw Exception(ErrorCodes::CANNOT_COMPILE_CODE, "MachineCode cannot be printed"); + + pass_manager.run(module); + } +private: + llvm::TargetMachine & target_machine; +}; + +#endif + /** MemoryManager for module. * Keep total allocated size during RuntimeDyld linker execution. */ @@ -375,6 +402,11 @@ CHJIT::CompiledModule CHJIT::compileModule(std::unique_ptr module) { runOptimizationPassesOnModule(*module); +#ifdef PRINT_ASSEMBLY + AssemblyPrinter assembly_printer(*machine); + assembly_printer.print(*module); +#endif + auto buffer = compiler->compile(*module); llvm::Expected> object = llvm::object::ObjectFile::createObjectFile(*buffer); From cdf6da88fe6e5dcbc0341e535a180d7169d25b59 Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Sun, 10 Dec 2023 16:09:55 +0300 Subject: [PATCH 24/98] Fixed code review issues --- src/Interpreters/JIT/CHJIT.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/Interpreters/JIT/CHJIT.cpp b/src/Interpreters/JIT/CHJIT.cpp index 8db50d73193..fc1decc8482 100644 --- a/src/Interpreters/JIT/CHJIT.cpp +++ b/src/Interpreters/JIT/CHJIT.cpp @@ -244,8 +244,6 @@ private: } }; -#define PRINT_ASSEMBLY - #ifdef PRINT_ASSEMBLY class AssemblyPrinter From bb501a0e136c41c5ac8573c010822ecaa30d136d Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 10 Dec 2023 19:06:38 +0100 Subject: [PATCH 25/98] Shellcheck --- docker/test/clickbench/run.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/test/clickbench/run.sh b/docker/test/clickbench/run.sh index eadaa2b5939..8244497a36c 100755 --- a/docker/test/clickbench/run.sh +++ b/docker/test/clickbench/run.sh @@ -33,7 +33,7 @@ clickhouse-client --time < /create.sql TRIES=3 QUERY_NUM=1 -while read query; do +while read -r query; do echo -n "[" for i in $(seq 1 $TRIES); do RES=$(clickhouse-client --time --format Null --query "$query" --progress 0 2>&1 ||:) From 11dda11f22f252d4996171bcc2887a6664878588 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 10 Dec 2023 19:08:50 +0100 Subject: [PATCH 26/98] Python --- tests/ci/clickbench.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tests/ci/clickbench.py b/tests/ci/clickbench.py index 82209acd2c4..661a6ba50dc 100644 --- a/tests/ci/clickbench.py +++ b/tests/ci/clickbench.py @@ -130,7 +130,6 @@ def main(): temp_path = Path(TEMP_PATH) temp_path.mkdir(parents=True, exist_ok=True) - repo_path = Path(REPO_COPY) reports_path = Path(REPORTS_PATH) post_commit_path = temp_path / "clickbench_status.tsv" @@ -177,7 +176,6 @@ def main(): packages_path, result_path, server_log_path, - kill_timeout, additional_envs, ci_logs_args, docker_image, @@ -218,7 +216,7 @@ def main(): print(f"::notice:: {check_name} Report url: {report_url}") if args.post_commit_status == "commit_status": post_commit_status( - commit, state, report_url, description, check_name_with_group, pr_info + commit, state, report_url, description, check_name, pr_info ) elif args.post_commit_status == "file": post_commit_status_to_file( @@ -239,6 +237,7 @@ def main(): stopwatch.duration_seconds, stopwatch.start_time_str, report_url, + check_name ) ch_helper.insert_events_into(db="default", table="checks", events=prepared_events) From 99c8430c3e3b25ef4098e7006ec751fa34cabdd9 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 10 Dec 2023 19:10:44 +0100 Subject: [PATCH 27/98] mypy --- tests/ci/clickbench.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/ci/clickbench.py b/tests/ci/clickbench.py index 661a6ba50dc..657695a57a1 100644 --- a/tests/ci/clickbench.py +++ b/tests/ci/clickbench.py @@ -65,8 +65,8 @@ def process_results( result_directory: Path, server_log_path: Path, ) -> Tuple[str, str, List[str], List[Path]]: - test_results = [] - additional_files = [] + test_results = [] # type: List[str] + additional_files = [] # type: List[str] # Just upload all files from result_directory. # If task provides processed results, then it's responsible for content of result_directory. if result_directory.exists(): From c1e387d682df837782ea1b5dde828e38f6d18795 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Sun, 10 Dec 2023 18:32:09 +0000 Subject: [PATCH 28/98] Automatic style fix --- tests/ci/clickbench.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/tests/ci/clickbench.py b/tests/ci/clickbench.py index 657695a57a1..c2efcacefad 100644 --- a/tests/ci/clickbench.py +++ b/tests/ci/clickbench.py @@ -215,9 +215,7 @@ def main(): print(f"::notice:: {check_name} Report url: {report_url}") if args.post_commit_status == "commit_status": - post_commit_status( - commit, state, report_url, description, check_name, pr_info - ) + post_commit_status(commit, state, report_url, description, check_name, pr_info) elif args.post_commit_status == "file": post_commit_status_to_file( post_commit_path, @@ -237,7 +235,7 @@ def main(): stopwatch.duration_seconds, stopwatch.start_time_str, report_url, - check_name + check_name, ) ch_helper.insert_events_into(db="default", table="checks", events=prepared_events) From 75df8db29870de080c60684889f9a7440492d248 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 10 Dec 2023 19:52:52 +0100 Subject: [PATCH 29/98] Python --- tests/ci/clickbench.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/ci/clickbench.py b/tests/ci/clickbench.py index 657695a57a1..8e6b9a621fe 100644 --- a/tests/ci/clickbench.py +++ b/tests/ci/clickbench.py @@ -27,7 +27,7 @@ from commit_status_helper import ( update_mergeable_check, ) from docker_pull_helper import DockerImage, get_image_with_version -from env_helper import TEMP_PATH, REPO_COPY, REPORTS_PATH +from env_helper import TEMP_PATH, REPORTS_PATH from get_robot_token import get_best_robot_token from pr_info import FORCE_TESTS_LABEL, PRInfo from s3_helper import S3Helper @@ -135,7 +135,6 @@ def main(): args = parse_args() check_name = args.check_name - kill_timeout = args.kill_timeout gh = Github(get_best_robot_token(), per_page=100) From ce894c28f3c22ac52f359442c6f583608f771503 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 10 Dec 2023 19:54:59 +0100 Subject: [PATCH 30/98] mypy --- tests/ci/clickbench.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/ci/clickbench.py b/tests/ci/clickbench.py index 8e6b9a621fe..870d83ecded 100644 --- a/tests/ci/clickbench.py +++ b/tests/ci/clickbench.py @@ -65,8 +65,8 @@ def process_results( result_directory: Path, server_log_path: Path, ) -> Tuple[str, str, List[str], List[Path]]: - test_results = [] # type: List[str] - additional_files = [] # type: List[str] + test_results = [] # type: List[TestResult] + additional_files = [] # type: List[Path] # Just upload all files from result_directory. # If task provides processed results, then it's responsible for content of result_directory. if result_directory.exists(): @@ -164,7 +164,7 @@ def main(): run_log_path = result_path / "run.log" - additional_envs = [] + additional_envs = [] # type: List[str] ci_logs_credentials = CiLogsCredentials(temp_path / "export-logs-config.sh") ci_logs_args = ci_logs_credentials.get_docker_arguments( From 6b55c16b4ed40864aa0577fa61a9a6a41c12912d Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 10 Dec 2023 20:49:03 +0100 Subject: [PATCH 31/98] mypy --- tests/ci/clickbench.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/ci/clickbench.py b/tests/ci/clickbench.py index 3cce429a4e3..9c700adb398 100644 --- a/tests/ci/clickbench.py +++ b/tests/ci/clickbench.py @@ -34,6 +34,7 @@ from s3_helper import S3Helper from stopwatch import Stopwatch from tee_popen import TeePopen from upload_result_helper import upload_results +from report import TestResults def get_image_name() -> str: @@ -64,8 +65,8 @@ def get_run_command( def process_results( result_directory: Path, server_log_path: Path, -) -> Tuple[str, str, List[str], List[Path]]: - test_results = [] # type: List[TestResult] +) -> Tuple[str, str, TestResults, List[Path]]: + test_results = [] # type: TestResults additional_files = [] # type: List[Path] # Just upload all files from result_directory. # If task provides processed results, then it's responsible for content of result_directory. From 4b13a6d08f35daf09ee1f6ef951f260f68523ea5 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 10 Dec 2023 22:02:05 +0100 Subject: [PATCH 32/98] Fix CI --- tests/ci/ci_config.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tests/ci/ci_config.py b/tests/ci/ci_config.py index aa3aa5654aa..e9f75d66b2e 100644 --- a/tests/ci/ci_config.py +++ b/tests/ci/ci_config.py @@ -329,6 +329,8 @@ CI_CONFIG = CiConfig( "SQLancer (debug)": TestConfig("package_debug"), "Sqllogic test (release)": TestConfig("package_release"), "SQLTest": TestConfig("package_release"), + "ClickBench (amd64)": TestConfig("package_release"), + "ClickBench (aarch64)": TestConfig("package_aarch64"), "libFuzzer tests": TestConfig("fuzzers"), }, ) @@ -507,6 +509,11 @@ CHECK_DESCRIPTIONS = [ "successfully startup without any errors, crashes or sanitizer asserts", lambda x: x.startswith("Upgrade check ("), ), + CheckDescription( + "ClickBench", + "Runs [ClickBench](https://github.com/ClickHouse/ClickBench/) with instant-attach table", + lambda x: x.startswith("Upgrade check ("), + ), CheckDescription( "Falback for unknown", "There's no description for the check yet, please add it to " From 7df7793724c7ff30c0f6c0d475eeda121d9c3cab Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 10 Dec 2023 23:53:22 +0100 Subject: [PATCH 33/98] Docker --- docker/test/clickbench/run.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docker/test/clickbench/run.sh b/docker/test/clickbench/run.sh index 8244497a36c..82eb06dffcd 100755 --- a/docker/test/clickbench/run.sh +++ b/docker/test/clickbench/run.sh @@ -11,10 +11,10 @@ dpkg -i package_folder/clickhouse-server_*.deb dpkg -i package_folder/clickhouse-client_*.deb # A directory for cache -sudo mkdir /dev/shm/clickhouse -sudo chown clickhouse:clickhouse /dev/shm/clickhouse +mkdir /dev/shm/clickhouse +chown clickhouse:clickhouse /dev/shm/clickhouse -sudo clickhouse start +clickhouse start # Wait for the server to start, but not for too long. for _ in {1..100} From 740ceea108f61d8cda725d6a1a5c0f26b4b399cd Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 11 Dec 2023 03:10:17 +0100 Subject: [PATCH 34/98] Docker --- docker/test/base/Dockerfile | 1 + 1 file changed, 1 insertion(+) diff --git a/docker/test/base/Dockerfile b/docker/test/base/Dockerfile index b55baa0e0fc..b48017fdacc 100644 --- a/docker/test/base/Dockerfile +++ b/docker/test/base/Dockerfile @@ -12,6 +12,7 @@ RUN apt-get update \ ripgrep \ zstd \ locales \ + sudo \ --yes --no-install-recommends # Sanitizer options for services (clickhouse-server) From 89ba4a845232849b7bd2dc807b63328d853f0451 Mon Sep 17 00:00:00 2001 From: Konstantin Bogdanov Date: Thu, 7 Dec 2023 07:28:41 +0100 Subject: [PATCH 35/98] Add HTTP readiness endpoint --- programs/keeper/Keeper.cpp | 23 +++++ programs/server/Server.cpp | 29 ++++++ src/Server/HTTPHandlerFactory.h | 7 ++ src/Server/KeeperReadinessHandler.cpp | 95 +++++++++++++++++++ src/Server/KeeperReadinessHandler.h | 31 ++++++ tests/integration/helpers/keeper_utils.py | 5 + .../test_keeper_http_control/__init__.py | 0 .../configs/enable_keeper1.xml | 37 ++++++++ .../configs/enable_keeper2.xml | 37 ++++++++ .../configs/enable_keeper3.xml | 37 ++++++++ .../test_keeper_http_control/test.py | 62 ++++++++++++ 11 files changed, 363 insertions(+) create mode 100644 src/Server/KeeperReadinessHandler.cpp create mode 100644 src/Server/KeeperReadinessHandler.h create mode 100644 tests/integration/test_keeper_http_control/__init__.py create mode 100644 tests/integration/test_keeper_http_control/configs/enable_keeper1.xml create mode 100644 tests/integration/test_keeper_http_control/configs/enable_keeper2.xml create mode 100644 tests/integration/test_keeper_http_control/configs/enable_keeper3.xml create mode 100644 tests/integration/test_keeper_http_control/test.py diff --git a/programs/keeper/Keeper.cpp b/programs/keeper/Keeper.cpp index 1acf7e39b04..4f45b09b682 100644 --- a/programs/keeper/Keeper.cpp +++ b/programs/keeper/Keeper.cpp @@ -466,6 +466,29 @@ try std::make_unique( std::move(my_http_context), createPrometheusMainHandlerFactory(*this, config_getter(), async_metrics, "PrometheusHandler-factory"), server_pool, socket, http_params)); }); + + /// HTTP control endpoints + port_name = "keeper_server.http_control.port"; + createServer(listen_host, port_name, listen_try, [&](UInt16 port) mutable + { + auto my_http_context = httpContext(); + Poco::Timespan my_keep_alive_timeout(config.getUInt("keep_alive_timeout", 10), 0); + Poco::Net::HTTPServerParams::Ptr my_http_params = new Poco::Net::HTTPServerParams; + my_http_params->setTimeout(my_http_context->getReceiveTimeout()); + my_http_params->setKeepAliveTimeout(my_keep_alive_timeout); + + Poco::Net::ServerSocket socket; + auto address = socketBindListen(socket, listen_host, port); + socket.setReceiveTimeout(my_http_context->getReceiveTimeout()); + socket.setSendTimeout(my_http_context->getSendTimeout()); + servers->emplace_back( + listen_host, + port_name, + "HTTP Control: http://" + address.toString(), + std::make_unique( + std::move(my_http_context), createKeeperHTTPControlMainHandlerFactory(*this, config_getter(), global_context->getKeeperDispatcher(), "KeeperHTTPControlHandler-factory"), server_pool, socket, http_params) + ); + }); } for (auto & server : *servers) diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index 8076d108083..ee3c7a72edc 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -1551,6 +1551,35 @@ try throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "SSL support for TCP protocol is disabled because Poco library was built without NetSSL support."); #endif }); + + /// HTTP control endpoints + port_name = "keeper_server.http_control.port"; + createServer(config(), listen_host, port_name, listen_try, /* start_server: */ false, + servers_to_start_before_tables, + [&](UInt16 port) -> ProtocolServerAdapter + { + auto http_context = httpContext(); + Poco::Timespan keep_alive_timeout(config().getUInt("keep_alive_timeout", 10), 0); + Poco::Net::HTTPServerParams::Ptr http_params = new Poco::Net::HTTPServerParams; + http_params->setTimeout(http_context->getReceiveTimeout()); + http_params->setKeepAliveTimeout(keep_alive_timeout); + + Poco::Net::ServerSocket socket; + auto address = socketBindListen(config(), socket, listen_host, port); + socket.setReceiveTimeout(http_context->getReceiveTimeout()); + socket.setSendTimeout(http_context->getSendTimeout()); + return ProtocolServerAdapter( + listen_host, + port_name, + "HTTP Control: http://" + address.toString(), + std::make_unique( + std::move(http_context), + createKeeperHTTPControlMainHandlerFactory( + *this, + config_getter(), + global_context->getKeeperDispatcher(), + "KeeperHTTPControlHandler-factory"), server_pool, socket, http_params)); + }); } #else throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "ClickHouse server built without NuRaft library. Cannot use internal coordination."); diff --git a/src/Server/HTTPHandlerFactory.h b/src/Server/HTTPHandlerFactory.h index 94b02e52277..fd927d480fd 100644 --- a/src/Server/HTTPHandlerFactory.h +++ b/src/Server/HTTPHandlerFactory.h @@ -5,6 +5,7 @@ #include #include #include +#include #include #include @@ -136,6 +137,12 @@ createPrometheusMainHandlerFactory(IServer & server, AsynchronousMetrics & async_metrics, const std::string & name); +HTTPRequestHandlerFactoryPtr +createKeeperHTTPControlMainHandlerFactory(IServer & server, + const Poco::Util::AbstractConfiguration & config, + std::shared_ptr keeper_dispatcher, + const std::string & name); + /// @param server - used in handlers to check IServer::isCancelled() /// @param config - not the same as server.config(), since it can be newer /// @param async_metrics - used for prometheus (in case of prometheus.asynchronous_metrics=true) diff --git a/src/Server/KeeperReadinessHandler.cpp b/src/Server/KeeperReadinessHandler.cpp new file mode 100644 index 00000000000..f69f6cb377a --- /dev/null +++ b/src/Server/KeeperReadinessHandler.cpp @@ -0,0 +1,95 @@ +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +void KeeperReadinessHandler::handleRequest(HTTPServerRequest & /*request*/, HTTPServerResponse & response) +{ + try + { + auto is_leader = keeper_dispatcher->isLeader(); + auto is_follower = keeper_dispatcher->isFollower() && keeper_dispatcher->hasLeader(); + + auto status = is_leader || is_follower; + + Poco::JSON::Object json, details; + + details.set("leader", is_leader); + details.set("follower", is_follower); + json.set("details", details); + json.set("status", status ? "ok": "fail"); + + std::ostringstream oss; // STYLE_CHECK_ALLOW_STD_STRING_STREAM + oss.exceptions(std::ios::failbit); + Poco::JSON::Stringifier::stringify(json, oss); + + if (!status) + response.setStatusAndReason(Poco::Net::HTTPResponse::HTTP_SERVICE_UNAVAILABLE); + + *response.send() << oss.str(); + } + catch (...) + { + tryLogCurrentException("KeeperReadinessHandler"); + + try + { + response.setStatusAndReason(Poco::Net::HTTPResponse::HTTP_INTERNAL_SERVER_ERROR); + + if (!response.sent()) + { + /// We have not sent anything yet and we don't even know if we need to compress response. + *response.send() << getCurrentExceptionMessage(false) << std::endl; + } + } + catch (...) + { + LOG_ERROR((&Poco::Logger::get("KeeperReadinessHandler")), "Cannot send exception to client"); + } + } +} + + +HTTPRequestHandlerFactoryPtr createKeeperHTTPControlMainHandlerFactory( + IServer & server, + const Poco::Util::AbstractConfiguration & config, + std::shared_ptr keeper_dispatcher, + const std::string & name) +{ + auto factory = std::make_shared(name); + using Factory = HandlingRuleHTTPHandlerFactory; + Factory::Creator creator = [&server, keeper_dispatcher]() -> std::unique_ptr + { + return std::make_unique(server, keeper_dispatcher); + }; + + auto readiness_handler = std::make_shared(std::move(creator)); + + readiness_handler->attachStrictPath(config.getString("keeper_server.http_control.readiness.endpoint", "/ready")); + readiness_handler->allowGetAndHeadRequest(); + factory->addHandler(readiness_handler); + + return factory; +} + +} diff --git a/src/Server/KeeperReadinessHandler.h b/src/Server/KeeperReadinessHandler.h new file mode 100644 index 00000000000..143751c5d67 --- /dev/null +++ b/src/Server/KeeperReadinessHandler.h @@ -0,0 +1,31 @@ +#pragma once + +#include +#include +#include +#include + +namespace DB +{ + +class Context; +class IServer; + +class KeeperReadinessHandler : public HTTPRequestHandler, WithContext +{ +private: + IServer & server; + std::shared_ptr keeper_dispatcher; + +public: + explicit KeeperReadinessHandler(IServer & server_, std::shared_ptr keeper_dispatcher_) + : server(server_) + , keeper_dispatcher(keeper_dispatcher_) + { + } + + void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response) override; +}; + + +} diff --git a/tests/integration/helpers/keeper_utils.py b/tests/integration/helpers/keeper_utils.py index 1ca17e923e4..6e4c1c1d417 100644 --- a/tests/integration/helpers/keeper_utils.py +++ b/tests/integration/helpers/keeper_utils.py @@ -278,6 +278,11 @@ def get_leader(cluster, nodes): return node raise Exception("No leader in Keeper cluster.") +def get_follower(cluster, nodes): + for node in nodes: + if is_follower(cluster, node): + return node + raise Exception("No followers in Keeper cluster.") def get_fake_zk(cluster, node, timeout: float = 30.0) -> KazooClient: _fake = KazooClient( diff --git a/tests/integration/test_keeper_http_control/__init__.py b/tests/integration/test_keeper_http_control/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_keeper_http_control/configs/enable_keeper1.xml b/tests/integration/test_keeper_http_control/configs/enable_keeper1.xml new file mode 100644 index 00000000000..20e3c307f31 --- /dev/null +++ b/tests/integration/test_keeper_http_control/configs/enable_keeper1.xml @@ -0,0 +1,37 @@ + + + 9181 + 1 + /var/lib/clickhouse/coordination/log + /var/lib/clickhouse/coordination/snapshots + + + 5000 + 10000 + trace + + + + + 1 + node1 + 9234 + + + 2 + node2 + 9234 + true + + + 3 + node3 + 9234 + true + + + + 9182 + + + diff --git a/tests/integration/test_keeper_http_control/configs/enable_keeper2.xml b/tests/integration/test_keeper_http_control/configs/enable_keeper2.xml new file mode 100644 index 00000000000..b9002eb2436 --- /dev/null +++ b/tests/integration/test_keeper_http_control/configs/enable_keeper2.xml @@ -0,0 +1,37 @@ + + + 9181 + 2 + /var/lib/clickhouse/coordination/log + /var/lib/clickhouse/coordination/snapshots + + + 5000 + 10000 + trace + + + + + 1 + node1 + 9234 + + + 2 + node2 + 9234 + true + + + 3 + node3 + 9234 + true + + + + 9182 + + + diff --git a/tests/integration/test_keeper_http_control/configs/enable_keeper3.xml b/tests/integration/test_keeper_http_control/configs/enable_keeper3.xml new file mode 100644 index 00000000000..6e4e17399f7 --- /dev/null +++ b/tests/integration/test_keeper_http_control/configs/enable_keeper3.xml @@ -0,0 +1,37 @@ + + + 9181 + 3 + /var/lib/clickhouse/coordination/log + /var/lib/clickhouse/coordination/snapshots + + + 5000 + 10000 + trace + + + + + 1 + node1 + 9234 + + + 2 + node2 + 9234 + true + + + 3 + node3 + 9234 + true + + + + 9182 + + + diff --git a/tests/integration/test_keeper_http_control/test.py b/tests/integration/test_keeper_http_control/test.py new file mode 100644 index 00000000000..04d84671de2 --- /dev/null +++ b/tests/integration/test_keeper_http_control/test.py @@ -0,0 +1,62 @@ +#!/usr/bin/env python3 + +import os +import pytest +import requests + +import helpers.keeper_utils as keeper_utils +from kazoo.client import KazooClient +from helpers.cluster import ClickHouseCluster + +cluster = ClickHouseCluster(__file__) +CONFIG_DIR = os.path.join(os.path.dirname(os.path.realpath(__file__)), "configs") + +node1 = cluster.add_instance( + "node1", main_configs=["configs/enable_keeper1.xml"], stay_alive=True +) +node2 = cluster.add_instance( + "node2", main_configs=["configs/enable_keeper2.xml"], stay_alive=True +) +node3 = cluster.add_instance( + "node3", main_configs=["configs/enable_keeper3.xml"], stay_alive=True +) + + +@pytest.fixture(scope="module") +def started_cluster(): + try: + cluster.start() + yield cluster + finally: + cluster.shutdown() + + +def get_fake_zk(node, timeout=30.0): + _fake_zk_instance = KazooClient( + hosts=cluster.get_instance_ip(node.name) + ":9181", timeout=timeout + ) + _fake_zk_instance.start() + return _fake_zk_instance + +def test_http_readiness(started_cluster): + leader = keeper_utils.get_leader(cluster, [node1, node2, node3]) + response = requests.get( + "http://{host}:{port}/ready".format(host=leader.ip_address, port=9182) + ) + assert(response.status_code == 200) + + readiness_data = response.json() + assert(readiness_data["status"] == "ok") + assert(readiness_data["details"]["leader"] == True) + assert(readiness_data["details"]["follower"] == False) + + follower = keeper_utils.get_follower(cluster, [node1, node2, node3]) + response = requests.get( + "http://{host}:{port}/ready".format(host=follower.ip_address, port=9182) + ) + assert(response.status_code == 200) + + readiness_data = response.json() + assert(readiness_data["status"] == "ok") + assert(readiness_data["details"]["leader"] == False) + assert(readiness_data["details"]["follower"] == True) From 3adb83e406027edc92a9792aa0f5587e33451765 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Thu, 7 Dec 2023 06:49:10 +0000 Subject: [PATCH 36/98] Automatic style fix --- tests/integration/helpers/keeper_utils.py | 2 ++ .../test_keeper_http_control/test.py | 21 ++++++++++--------- 2 files changed, 13 insertions(+), 10 deletions(-) diff --git a/tests/integration/helpers/keeper_utils.py b/tests/integration/helpers/keeper_utils.py index 6e4c1c1d417..e07bce901d2 100644 --- a/tests/integration/helpers/keeper_utils.py +++ b/tests/integration/helpers/keeper_utils.py @@ -278,12 +278,14 @@ def get_leader(cluster, nodes): return node raise Exception("No leader in Keeper cluster.") + def get_follower(cluster, nodes): for node in nodes: if is_follower(cluster, node): return node raise Exception("No followers in Keeper cluster.") + def get_fake_zk(cluster, node, timeout: float = 30.0) -> KazooClient: _fake = KazooClient( hosts=cluster.get_instance_ip(node.name) + ":9181", timeout=timeout diff --git a/tests/integration/test_keeper_http_control/test.py b/tests/integration/test_keeper_http_control/test.py index 04d84671de2..b415a03a5c4 100644 --- a/tests/integration/test_keeper_http_control/test.py +++ b/tests/integration/test_keeper_http_control/test.py @@ -38,25 +38,26 @@ def get_fake_zk(node, timeout=30.0): _fake_zk_instance.start() return _fake_zk_instance + def test_http_readiness(started_cluster): leader = keeper_utils.get_leader(cluster, [node1, node2, node3]) response = requests.get( - "http://{host}:{port}/ready".format(host=leader.ip_address, port=9182) + "http://{host}:{port}/ready".format(host=leader.ip_address, port=9182) ) - assert(response.status_code == 200) + assert response.status_code == 200 readiness_data = response.json() - assert(readiness_data["status"] == "ok") - assert(readiness_data["details"]["leader"] == True) - assert(readiness_data["details"]["follower"] == False) + assert readiness_data["status"] == "ok" + assert readiness_data["details"]["leader"] == True + assert readiness_data["details"]["follower"] == False follower = keeper_utils.get_follower(cluster, [node1, node2, node3]) response = requests.get( - "http://{host}:{port}/ready".format(host=follower.ip_address, port=9182) + "http://{host}:{port}/ready".format(host=follower.ip_address, port=9182) ) - assert(response.status_code == 200) + assert response.status_code == 200 readiness_data = response.json() - assert(readiness_data["status"] == "ok") - assert(readiness_data["details"]["leader"] == False) - assert(readiness_data["details"]["follower"] == True) + assert readiness_data["status"] == "ok" + assert readiness_data["details"]["leader"] == False + assert readiness_data["details"]["follower"] == True From 771d5c58ea49271b96e70fbfe5c992362eae6c13 Mon Sep 17 00:00:00 2001 From: Konstantin Bogdanov Date: Fri, 8 Dec 2023 05:28:01 +0100 Subject: [PATCH 37/98] Fix Keeper standalone build --- programs/keeper/CMakeLists.txt | 1 + programs/keeper/Keeper.cpp | 3 ++- programs/server/Server.cpp | 2 +- src/Server/HTTPHandlerFactory.h | 6 ------ src/Server/KeeperReadinessHandler.cpp | 17 ++++------------- src/Server/KeeperReadinessHandler.h | 15 +++++++++------ 6 files changed, 17 insertions(+), 27 deletions(-) diff --git a/programs/keeper/CMakeLists.txt b/programs/keeper/CMakeLists.txt index cdb1d89b18e..f3d82b6029b 100644 --- a/programs/keeper/CMakeLists.txt +++ b/programs/keeper/CMakeLists.txt @@ -68,6 +68,7 @@ if (BUILD_STANDALONE_KEEPER) ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/waitServersToFinish.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/ServerType.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/HTTPRequestHandlerFactoryMain.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/KeeperReadinessHandler.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/HTTP/HTTPServer.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/HTTP/ReadHeaders.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/HTTP/HTTPServerConnection.cpp diff --git a/programs/keeper/Keeper.cpp b/programs/keeper/Keeper.cpp index 4f45b09b682..7585f147161 100644 --- a/programs/keeper/Keeper.cpp +++ b/programs/keeper/Keeper.cpp @@ -32,6 +32,7 @@ #include #include #include +#include #include "Core/Defines.h" #include "config.h" @@ -486,7 +487,7 @@ try port_name, "HTTP Control: http://" + address.toString(), std::make_unique( - std::move(my_http_context), createKeeperHTTPControlMainHandlerFactory(*this, config_getter(), global_context->getKeeperDispatcher(), "KeeperHTTPControlHandler-factory"), server_pool, socket, http_params) + std::move(my_http_context), createKeeperHTTPControlMainHandlerFactory(config_getter(), global_context->getKeeperDispatcher(), "KeeperHTTPControlHandler-factory"), server_pool, socket, http_params) ); }); } diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index ee3c7a72edc..5abaf67e3d8 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -92,6 +92,7 @@ #include #include #include +#include #include #include #include @@ -1575,7 +1576,6 @@ try std::make_unique( std::move(http_context), createKeeperHTTPControlMainHandlerFactory( - *this, config_getter(), global_context->getKeeperDispatcher(), "KeeperHTTPControlHandler-factory"), server_pool, socket, http_params)); diff --git a/src/Server/HTTPHandlerFactory.h b/src/Server/HTTPHandlerFactory.h index fd927d480fd..459d54e27a5 100644 --- a/src/Server/HTTPHandlerFactory.h +++ b/src/Server/HTTPHandlerFactory.h @@ -137,12 +137,6 @@ createPrometheusMainHandlerFactory(IServer & server, AsynchronousMetrics & async_metrics, const std::string & name); -HTTPRequestHandlerFactoryPtr -createKeeperHTTPControlMainHandlerFactory(IServer & server, - const Poco::Util::AbstractConfiguration & config, - std::shared_ptr keeper_dispatcher, - const std::string & name); - /// @param server - used in handlers to check IServer::isCancelled() /// @param config - not the same as server.config(), since it can be newer /// @param async_metrics - used for prometheus (in case of prometheus.asynchronous_metrics=true) diff --git a/src/Server/KeeperReadinessHandler.cpp b/src/Server/KeeperReadinessHandler.cpp index f69f6cb377a..9ba4fe44ab4 100644 --- a/src/Server/KeeperReadinessHandler.cpp +++ b/src/Server/KeeperReadinessHandler.cpp @@ -1,18 +1,11 @@ #include -#include -#include #include -#include -#include +#include +#include #include #include -#include -#include -#include -#include #include - #include #include #include @@ -69,18 +62,16 @@ void KeeperReadinessHandler::handleRequest(HTTPServerRequest & /*request*/, HTTP } } - HTTPRequestHandlerFactoryPtr createKeeperHTTPControlMainHandlerFactory( - IServer & server, const Poco::Util::AbstractConfiguration & config, std::shared_ptr keeper_dispatcher, const std::string & name) { auto factory = std::make_shared(name); using Factory = HandlingRuleHTTPHandlerFactory; - Factory::Creator creator = [&server, keeper_dispatcher]() -> std::unique_ptr + Factory::Creator creator = [keeper_dispatcher]() -> std::unique_ptr { - return std::make_unique(server, keeper_dispatcher); + return std::make_unique(keeper_dispatcher); }; auto readiness_handler = std::make_shared(std::move(creator)); diff --git a/src/Server/KeeperReadinessHandler.h b/src/Server/KeeperReadinessHandler.h index 143751c5d67..03fd58e831f 100644 --- a/src/Server/KeeperReadinessHandler.h +++ b/src/Server/KeeperReadinessHandler.h @@ -1,8 +1,8 @@ #pragma once -#include +#include #include -#include +#include #include namespace DB @@ -14,18 +14,21 @@ class IServer; class KeeperReadinessHandler : public HTTPRequestHandler, WithContext { private: - IServer & server; std::shared_ptr keeper_dispatcher; public: - explicit KeeperReadinessHandler(IServer & server_, std::shared_ptr keeper_dispatcher_) - : server(server_) - , keeper_dispatcher(keeper_dispatcher_) + explicit KeeperReadinessHandler(std::shared_ptr keeper_dispatcher_) + : keeper_dispatcher(keeper_dispatcher_) { } void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response) override; }; +HTTPRequestHandlerFactoryPtr +createKeeperHTTPControlMainHandlerFactory( + const Poco::Util::AbstractConfiguration & config, + std::shared_ptr keeper_dispatcher, + const std::string & name); } From 76966818bade118a0e003469314fbb196f806c66 Mon Sep 17 00:00:00 2001 From: Konstantin Bogdanov Date: Fri, 8 Dec 2023 13:57:22 +0100 Subject: [PATCH 38/98] Fix FreeBSD build --- src/Server/KeeperReadinessHandler.cpp | 7 ++++++- src/Server/KeeperReadinessHandler.h | 10 ++++++---- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/src/Server/KeeperReadinessHandler.cpp b/src/Server/KeeperReadinessHandler.cpp index 9ba4fe44ab4..37afd8e9898 100644 --- a/src/Server/KeeperReadinessHandler.cpp +++ b/src/Server/KeeperReadinessHandler.cpp @@ -1,8 +1,11 @@ +#include + +#if USE_NURAFT + #include #include #include -#include #include #include #include @@ -84,3 +87,5 @@ HTTPRequestHandlerFactoryPtr createKeeperHTTPControlMainHandlerFactory( } } + +#endif diff --git a/src/Server/KeeperReadinessHandler.h b/src/Server/KeeperReadinessHandler.h index 03fd58e831f..caa59098427 100644 --- a/src/Server/KeeperReadinessHandler.h +++ b/src/Server/KeeperReadinessHandler.h @@ -1,6 +1,9 @@ #pragma once -#include +#include "config.h" + +#if USE_NURAFT + #include #include #include @@ -8,9 +11,6 @@ namespace DB { -class Context; -class IServer; - class KeeperReadinessHandler : public HTTPRequestHandler, WithContext { private: @@ -32,3 +32,5 @@ createKeeperHTTPControlMainHandlerFactory( const std::string & name); } + +#endif From 7172a8ec9a87b43097db50f64914f2991329856c Mon Sep 17 00:00:00 2001 From: Konstantin Bogdanov Date: Fri, 8 Dec 2023 13:22:46 +0100 Subject: [PATCH 39/98] Remove redundant include --- src/Server/HTTPHandlerFactory.h | 1 - 1 file changed, 1 deletion(-) diff --git a/src/Server/HTTPHandlerFactory.h b/src/Server/HTTPHandlerFactory.h index 459d54e27a5..94b02e52277 100644 --- a/src/Server/HTTPHandlerFactory.h +++ b/src/Server/HTTPHandlerFactory.h @@ -5,7 +5,6 @@ #include #include #include -#include #include #include From 4ccf4e11a8ee0e1a7504701f0c77792064bcb5a1 Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Mon, 11 Dec 2023 13:00:08 +0000 Subject: [PATCH 40/98] Parallel replicas (perf): announcement response handling improvement --- .../MergeTree/ParallelReplicasReadingCoordinator.cpp | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp b/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp index 9137dc89705..44d10eda21e 100644 --- a/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp +++ b/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp @@ -161,11 +161,10 @@ void DefaultCoordinator::updateReadingState(InitialAllRangesAnnouncement announc PartRefs parts_diff; /// To get rid of duplicates - for (auto && part: announcement.description) + for (auto && part_ranges: announcement.description) { - auto the_same_it = std::find_if(all_parts_to_read.begin(), all_parts_to_read.end(), - [&part] (const Part & other) { return other.description.info.getPartNameV1() == part.info.getPartNameV1(); }); - + Part part{.description = std::move(part_ranges), .replicas = {announcement.replica_num}}; + auto the_same_it = all_parts_to_read.find(part); /// We have the same part - add the info about presence on current replica to it if (the_same_it != all_parts_to_read.end()) { @@ -174,13 +173,13 @@ void DefaultCoordinator::updateReadingState(InitialAllRangesAnnouncement announc } auto covering_or_the_same_it = std::find_if(all_parts_to_read.begin(), all_parts_to_read.end(), - [&part] (const Part & other) { return !other.description.info.isDisjoint(part.info); }); + [&part] (const Part & other) { return !other.description.info.isDisjoint(part.description.info); }); /// It is covering part or we have covering - skip it if (covering_or_the_same_it != all_parts_to_read.end()) continue; - auto [insert_it, _] = all_parts_to_read.emplace(Part{.description = std::move(part), .replicas = {announcement.replica_num}}); + auto [insert_it, _] = all_parts_to_read.emplace(part); parts_diff.push_back(insert_it); } From c76ceb29fd3619b755c631d0d0aa4a60a602a97b Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 11 Dec 2023 17:21:02 +0100 Subject: [PATCH 41/98] Minor changes --- docker/test/clickbench/run.sh | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/docker/test/clickbench/run.sh b/docker/test/clickbench/run.sh index 82eb06dffcd..7357fa6df86 100755 --- a/docker/test/clickbench/run.sh +++ b/docker/test/clickbench/run.sh @@ -31,6 +31,8 @@ clickhouse-client --time < /create.sql # Run the queries +set +x + TRIES=3 QUERY_NUM=1 while read -r query; do @@ -47,6 +49,8 @@ while read -r query; do QUERY_NUM=$((QUERY_NUM + 1)) done < /queries.sql +set -x + clickhouse-client --query "SELECT total_bytes FROM system.tables WHERE name = 'hits' AND database = 'default'" echo -e "success\tClickBench finished" > /test_output/check_status.tsv From 30dabd42d35e4dea6805b6340f3fd5edd8561d59 Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Mon, 11 Dec 2023 19:37:42 +0000 Subject: [PATCH 42/98] One lookup for 2 checks (1) if it's the same part (2) if annouced part has intersections with parts in working set --- .../ParallelReplicasReadingCoordinator.cpp | 38 ++++++++++++------- 1 file changed, 25 insertions(+), 13 deletions(-) diff --git a/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp b/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp index 44d10eda21e..d81f5dd41ce 100644 --- a/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp +++ b/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp @@ -164,22 +164,34 @@ void DefaultCoordinator::updateReadingState(InitialAllRangesAnnouncement announc for (auto && part_ranges: announcement.description) { Part part{.description = std::move(part_ranges), .replicas = {announcement.replica_num}}; - auto the_same_it = all_parts_to_read.find(part); - /// We have the same part - add the info about presence on current replica to it - if (the_same_it != all_parts_to_read.end()) + + auto it = std::lower_bound(cbegin(all_parts_to_read), cend(all_parts_to_read), part); + if (it != all_parts_to_read.cend()) { - the_same_it->replicas.insert(announcement.replica_num); - continue; + const MergeTreePartInfo & announced_part = part.description.info; + const MergeTreePartInfo & found_part = it->description.info; + if (found_part == announced_part) + { + /// We have the same part - add the info about presence on current replica + it->replicas.insert(announcement.replica_num); + continue; + } + else + { + /// check if it is covering or covered part + /// need to compare with 2 nearest parts in set, - lesser and greater than the part from the announcement + bool is_disjoint = found_part.isDisjoint(announced_part); + if (it != all_parts_to_read.cbegin() && is_disjoint) + { + const MergeTreePartInfo & lesser_part_info = (--it)->description.info; + is_disjoint &= lesser_part_info.isDisjoint(announced_part); + } + if (!is_disjoint) + continue; + } } - auto covering_or_the_same_it = std::find_if(all_parts_to_read.begin(), all_parts_to_read.end(), - [&part] (const Part & other) { return !other.description.info.isDisjoint(part.description.info); }); - - /// It is covering part or we have covering - skip it - if (covering_or_the_same_it != all_parts_to_read.end()) - continue; - - auto [insert_it, _] = all_parts_to_read.emplace(part); + auto [insert_it, _] = all_parts_to_read.emplace(std::move(part)); parts_diff.push_back(insert_it); } From fcb8ab9b6375907cf0b0e5ee498f11972925d503 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 12 Dec 2023 00:42:23 +0300 Subject: [PATCH 43/98] Update tests/ci/functional_test_check.py Co-authored-by: Mikhail f. Shiryaev --- tests/ci/functional_test_check.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/ci/functional_test_check.py b/tests/ci/functional_test_check.py index c8b3e42eed4..0dea2c5476f 100644 --- a/tests/ci/functional_test_check.py +++ b/tests/ci/functional_test_check.py @@ -169,7 +169,7 @@ def process_results( status = [] status_path = result_directory / "check_status.tsv" if status_path.exists(): - logging.info("Found check_status.tsv") + logging.info("Found %s", status_path.name) with open(status_path, "r", encoding="utf-8") as status_file: status = list(csv.reader(status_file, delimiter="\t")) From b9b6e7584e547a358098a6a174f302cb2d7b7774 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 12 Dec 2023 00:42:41 +0300 Subject: [PATCH 44/98] Update tests/ci/fast_test_check.py Co-authored-by: Mikhail f. Shiryaev --- tests/ci/fast_test_check.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/ci/fast_test_check.py b/tests/ci/fast_test_check.py index f5c7342d6f4..265fc81ccb3 100644 --- a/tests/ci/fast_test_check.py +++ b/tests/ci/fast_test_check.py @@ -72,7 +72,7 @@ def process_results(result_directory: Path) -> Tuple[str, str, TestResults]: status = [] status_path = result_directory / "check_status.tsv" if status_path.exists(): - logging.info("Found check_status.tsv") + logging.info("Found %s", status_path.name) with open(status_path, "r", encoding="utf-8") as status_file: status = list(csv.reader(status_file, delimiter="\t")) if len(status) != 1 or len(status[0]) != 2: From a3262003f3bba9921e160d1abf8a05027b92a69d Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 12 Dec 2023 00:42:47 +0300 Subject: [PATCH 45/98] Update tests/ci/integration_test_check.py Co-authored-by: Mikhail f. Shiryaev --- tests/ci/integration_test_check.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/ci/integration_test_check.py b/tests/ci/integration_test_check.py index b22aa08354c..e49cec6d694 100644 --- a/tests/ci/integration_test_check.py +++ b/tests/ci/integration_test_check.py @@ -118,7 +118,7 @@ def process_results( status = [] status_path = result_directory / "check_status.tsv" if status_path.exists(): - logging.info("Found check_status.tsv") + logging.info("Found %s", status_path.name) with open(status_path, "r", encoding="utf-8") as status_file: status = list(csv.reader(status_file, delimiter="\t")) From 0c81892463428e06b30b1d7a3a9729f86618a3a3 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 12 Dec 2023 00:43:06 +0300 Subject: [PATCH 46/98] Update tests/ci/clickbench.py Co-authored-by: Mikhail f. Shiryaev --- tests/ci/clickbench.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/ci/clickbench.py b/tests/ci/clickbench.py index 9c700adb398..359c10eeb9d 100644 --- a/tests/ci/clickbench.py +++ b/tests/ci/clickbench.py @@ -94,7 +94,7 @@ def process_results( results_path = result_directory / "test_results.tsv" if results_path.exists(): - logging.info("Found test_results.tsv") + logging.info("Found %s", results_path.name) else: logging.info("Files in result folder %s", os.listdir(result_directory)) return "error", "Not found test_results.tsv", test_results, additional_files From 16afd81322a15e4cdde07ea0007d45bbdbccd1b9 Mon Sep 17 00:00:00 2001 From: Vitaly Baranov Date: Tue, 12 Dec 2023 00:19:16 +0100 Subject: [PATCH 47/98] Fix retries for disconnected nodes for BACKUP/RESTORE ON CLUSTER. --- src/Backups/BackupCoordinationRemote.cpp | 6 +- src/Backups/BackupCoordinationStageSync.cpp | 107 ++++++++------------ src/Backups/BackupCoordinationStageSync.h | 2 +- 3 files changed, 43 insertions(+), 72 deletions(-) diff --git a/src/Backups/BackupCoordinationRemote.cpp b/src/Backups/BackupCoordinationRemote.cpp index 2633e1bedd2..7319b1aba58 100644 --- a/src/Backups/BackupCoordinationRemote.cpp +++ b/src/Backups/BackupCoordinationRemote.cpp @@ -184,11 +184,9 @@ BackupCoordinationRemote::BackupCoordinationRemote( if (my_is_internal) { String alive_node_path = my_zookeeper_path + "/stage/alive|" + my_current_host; + zk->createAncestors(alive_node_path); auto code = zk->tryCreate(alive_node_path, "", zkutil::CreateMode::Ephemeral); - - if (code == Coordination::Error::ZNODEEXISTS) - zk->handleEphemeralNodeExistenceNoFailureInjection(alive_node_path, ""); - else if (code != Coordination::Error::ZOK) + if ((code != Coordination::Error::ZOK) && (code != Coordination::Error::ZNODEEXISTS)) throw zkutil::KeeperException::fromPath(code, alive_node_path); } }) diff --git a/src/Backups/BackupCoordinationStageSync.cpp b/src/Backups/BackupCoordinationStageSync.cpp index 9b9ddc8515c..e4dac7dbbe9 100644 --- a/src/Backups/BackupCoordinationStageSync.cpp +++ b/src/Backups/BackupCoordinationStageSync.cpp @@ -60,12 +60,6 @@ void BackupCoordinationStageSync::set(const String & current_host, const String } else { - /// Make an ephemeral node so the initiator can track if the current host is still working. - String alive_node_path = zookeeper_path + "/alive|" + current_host; - auto code = zookeeper->tryCreate(alive_node_path, "", zkutil::CreateMode::Ephemeral); - if (code != Coordination::Error::ZOK && code != Coordination::Error::ZNODEEXISTS) - throw zkutil::KeeperException::fromPath(code, alive_node_path); - zookeeper->createIfNotExists(zookeeper_path + "/started|" + current_host, ""); zookeeper->createIfNotExists(zookeeper_path + "/current|" + current_host + "|" + new_stage, message); } @@ -118,27 +112,24 @@ struct BackupCoordinationStageSync::State Strings results; std::map unready_hosts; std::optional> error; - std::optional host_terminated; + std::optional disconnected_host; }; BackupCoordinationStageSync::State BackupCoordinationStageSync::readCurrentState( - const Strings & zk_nodes, const Strings & all_hosts, const String & stage_to_wait) const + WithRetries::RetriesControlHolder & retries_control_holder, + const Strings & zk_nodes, + const Strings & all_hosts, + const String & stage_to_wait) const { + auto zookeeper = retries_control_holder.faulty_zookeeper; + auto & retries_ctl = retries_control_holder.retries_ctl; + std::unordered_set zk_nodes_set{zk_nodes.begin(), zk_nodes.end()}; State state; if (zk_nodes_set.contains("error")) { - String errors; - { - auto holder = with_retries.createRetriesControlHolder("readCurrentState"); - holder.retries_ctl.retryLoop( - [&, &zookeeper = holder.faulty_zookeeper]() - { - with_retries.renewZooKeeper(zookeeper); - errors = zookeeper->get(zookeeper_path + "/error"); - }); - } + String errors = zookeeper->get(zookeeper_path + "/error"); ReadBufferFromOwnString buf{errors}; String host; readStringBinary(host, buf); @@ -150,60 +141,40 @@ BackupCoordinationStageSync::State BackupCoordinationStageSync::readCurrentState { if (!zk_nodes_set.contains("current|" + host + "|" + stage_to_wait)) { - UnreadyHostState unready_host_state; const String started_node_name = "started|" + host; const String alive_node_name = "alive|" + host; const String alive_node_path = zookeeper_path + "/" + alive_node_name; + + UnreadyHostState unready_host_state; unready_host_state.started = zk_nodes_set.contains(started_node_name); - - /// Because we do retries everywhere we can't fully rely on ephemeral nodes anymore. - /// Though we recreate "alive" node when reconnecting it might be not enough and race condition is possible. - /// And everything we can do here - just retry. - /// In worst case when we won't manage to see the alive node for a long time we will just abort the backup. unready_host_state.alive = zk_nodes_set.contains(alive_node_name); - if (!unready_host_state.alive) - { - LOG_TRACE(log, "Seems like host ({}) is dead. Will retry the check to confirm", host); - auto holder = with_retries.createRetriesControlHolder("readCurrentState::checkAliveNode"); - holder.retries_ctl.retryLoop( - [&, &zookeeper = holder.faulty_zookeeper]() - { - with_retries.renewZooKeeper(zookeeper); - - if (zookeeper->existsNoFailureInjection(alive_node_path)) - { - unready_host_state.alive = true; - return; - } - - // Retry with backoff. We also check whether it is last retry or no, because we won't to rethrow an exception. - if (!holder.retries_ctl.isLastRetry()) - holder.retries_ctl.setKeeperError(Coordination::Error::ZNONODE, "There is no alive node for host {}. Will retry", host); - }); - } - LOG_TRACE(log, "Host ({}) appeared to be {}", host, unready_host_state.alive ? "alive" : "dead"); - state.unready_hosts.emplace(host, unready_host_state); - if (!unready_host_state.alive && unready_host_state.started && !state.host_terminated) - state.host_terminated = host; + + if (!unready_host_state.alive && !state.disconnected_host) + { + /// If the "alive" node doesn't exist then we don't have connection to the corresponding host. + /// This node is ephemeral so probably it will be recreated soon. We use zookeeper retries to wait. + /// In worst case when we won't manage to see the alive node for a long time we will just abort the backup. + state.disconnected_host = host; + String message; + if (unready_host_state.started) + message = fmt::format("Lost connection to host {}", host); + else + message = fmt::format("No connection to host {} yet", host); + if (!retries_ctl.isLastRetry()) + message += ", will retry"; + retries_ctl.setUserError(ErrorCodes::FAILED_TO_SYNC_BACKUP_OR_RESTORE, message); + } } } - if (state.host_terminated || !state.unready_hosts.empty()) + if (state.disconnected_host || !state.unready_hosts.empty()) return state; - auto holder = with_retries.createRetriesControlHolder("waitImpl::collectStagesToWait"); - holder.retries_ctl.retryLoop( - [&, &zookeeper = holder.faulty_zookeeper]() - { - with_retries.renewZooKeeper(zookeeper); - Strings results; - - for (const auto & host : all_hosts) - results.emplace_back(zookeeper->get(zookeeper_path + "/current|" + host + "|" + stage_to_wait)); - - state.results = std::move(results); - }); + Strings results; + for (const auto & host : all_hosts) + results.emplace_back(zookeeper->get(zookeeper_path + "/current|" + host + "|" + stage_to_wait)); + state.results = std::move(results); return state; } @@ -229,7 +200,7 @@ Strings BackupCoordinationStageSync::waitImpl( auto watch = std::make_shared(); Strings zk_nodes; { - auto holder = with_retries.createRetriesControlHolder("waitImpl::getChildren"); + auto holder = with_retries.createRetriesControlHolder("waitImpl"); holder.retries_ctl.retryLoop( [&, &zookeeper = holder.faulty_zookeeper]() { @@ -237,12 +208,14 @@ Strings BackupCoordinationStageSync::waitImpl( watch->reset(); /// Get zk nodes and subscribe on their changes. zk_nodes = zookeeper->getChildren(zookeeper_path, nullptr, watch); + + /// Read the current state of zk nodes. + state = readCurrentState(holder, zk_nodes, all_hosts, stage_to_wait); }); } - /// Read and analyze the current state of zk nodes. - state = readCurrentState(zk_nodes, all_hosts, stage_to_wait); - if (state.error || state.host_terminated || state.unready_hosts.empty()) + /// Analyze the current state of zk nodes. + if (state.error || state.disconnected_host || state.unready_hosts.empty()) break; /// Error happened or everything is ready. /// Log that we will wait @@ -270,8 +243,8 @@ Strings BackupCoordinationStageSync::waitImpl( state.error->second.rethrow(); /// Another host terminated without errors. - if (state.host_terminated) - throw Exception(ErrorCodes::FAILED_TO_SYNC_BACKUP_OR_RESTORE, "Host {} suddenly stopped working", *state.host_terminated); + if (state.disconnected_host) + throw Exception(ErrorCodes::FAILED_TO_SYNC_BACKUP_OR_RESTORE, "No connection to host {}", *state.disconnected_host); /// Something's unready, timeout is probably not enough. if (!state.unready_hosts.empty()) diff --git a/src/Backups/BackupCoordinationStageSync.h b/src/Backups/BackupCoordinationStageSync.h index 2efaec46b3a..e34fbcc099b 100644 --- a/src/Backups/BackupCoordinationStageSync.h +++ b/src/Backups/BackupCoordinationStageSync.h @@ -29,7 +29,7 @@ private: void createRootNodes(); struct State; - State readCurrentState(const Strings & zk_nodes, const Strings & all_hosts, const String & stage_to_wait) const; + State readCurrentState(WithRetries::RetriesControlHolder & retries_control_holder, const Strings & zk_nodes, const Strings & all_hosts, const String & stage_to_wait) const; Strings waitImpl(const Strings & all_hosts, const String & stage_to_wait, std::optional timeout) const; From 4240e48a5e0230316c928a5999646a37b10137b8 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 12 Dec 2023 04:55:16 +0100 Subject: [PATCH 48/98] Sending the logs better --- docker/test/clickbench/run.sh | 3 +++ 1 file changed, 3 insertions(+) diff --git a/docker/test/clickbench/run.sh b/docker/test/clickbench/run.sh index 7357fa6df86..e3c56ed9a75 100755 --- a/docker/test/clickbench/run.sh +++ b/docker/test/clickbench/run.sh @@ -53,4 +53,7 @@ set -x clickhouse-client --query "SELECT total_bytes FROM system.tables WHERE name = 'hits' AND database = 'default'" +clickhouse-client -q "system flush logs" ||: +stop_logs_replication + echo -e "success\tClickBench finished" > /test_output/check_status.tsv From b40e04a8beafb013e1dadd81971764313bb6d5fb Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 12 Dec 2023 05:04:37 +0100 Subject: [PATCH 49/98] Export the logs --- docker/test/clickbench/run.sh | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/docker/test/clickbench/run.sh b/docker/test/clickbench/run.sh index e3c56ed9a75..b3b4ea85e24 100755 --- a/docker/test/clickbench/run.sh +++ b/docker/test/clickbench/run.sh @@ -14,6 +14,15 @@ dpkg -i package_folder/clickhouse-client_*.deb mkdir /dev/shm/clickhouse chown clickhouse:clickhouse /dev/shm/clickhouse +# Allow introspection functions, needed for sending the logs +echo " +profiles: + default: + allow_introspection_functions: 1 +" > /etc/clickhouse-server/allow_introspection_functions.yaml + +config_logs_export_cluster /etc/clickhouse-server/config.d/system_logs_export.yaml + clickhouse start # Wait for the server to start, but not for too long. From 9789c2caa214e17bb8323c9d67b6cc62c56eb350 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 12 Dec 2023 05:48:09 +0100 Subject: [PATCH 50/98] Review fixes --- .github/workflows/pull_request.yml | 8 ++----- docker/test/clickbench/run.sh | 2 ++ src/Common/parseRemoteDescription.cpp | 2 +- src/Common/parseRemoteDescription.h | 4 ++++ .../Cached/registerDiskCache.cpp | 3 +-- tests/ci/clickbench.py | 22 +------------------ 6 files changed, 11 insertions(+), 30 deletions(-) diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml index 5bb62b04c32..0be703e1196 100644 --- a/.github/workflows/pull_request.yml +++ b/.github/workflows/pull_request.yml @@ -709,22 +709,18 @@ jobs: with: test_name: ClickBench (amd64) runner_type: func-tester - additional_envs: | - KILL_TIMEOUT=1800 run_command: | cd "$REPO_COPY/tests/ci" - python3 clickbench.py "$CHECK_NAME" "$KILL_TIMEOUT" + python3 clickbench.py "$CHECK_NAME" ClickBenchAarch64: needs: [BuilderDebAarch64] uses: ./.github/workflows/reusable_test.yml with: test_name: ClickBench (aarch64) runner_type: func-tester-aarch64 - additional_envs: | - KILL_TIMEOUT=1800 run_command: | cd "$REPO_COPY/tests/ci" - python3 clickbench.py "$CHECK_NAME" "$KILL_TIMEOUT" + python3 clickbench.py "$CHECK_NAME" ############################################################################################## ######################################### STRESS TESTS ####################################### ############################################################################################## diff --git a/docker/test/clickbench/run.sh b/docker/test/clickbench/run.sh index b3b4ea85e24..a344e0ec27c 100755 --- a/docker/test/clickbench/run.sh +++ b/docker/test/clickbench/run.sh @@ -65,4 +65,6 @@ clickhouse-client --query "SELECT total_bytes FROM system.tables WHERE name = 'h clickhouse-client -q "system flush logs" ||: stop_logs_replication +mv /var/log/clickhouse-server/* /test_output/ + echo -e "success\tClickBench finished" > /test_output/check_status.tsv diff --git a/src/Common/parseRemoteDescription.cpp b/src/Common/parseRemoteDescription.cpp index 8ea3f4a0aa5..7b2045b9de1 100644 --- a/src/Common/parseRemoteDescription.cpp +++ b/src/Common/parseRemoteDescription.cpp @@ -184,7 +184,7 @@ std::vector> parseRemoteDescriptionForExternalDataba } else { - result.emplace_back(std::make_pair(address.substr(0, colon), DB::parseFromString(address.substr(colon + 1)))); + result.emplace_back(std::make_pair(address.substr(0, colon), parseFromString(address.substr(colon + 1)))); } } diff --git a/src/Common/parseRemoteDescription.h b/src/Common/parseRemoteDescription.h index d97558c4728..12435bc68a0 100644 --- a/src/Common/parseRemoteDescription.h +++ b/src/Common/parseRemoteDescription.h @@ -1,8 +1,12 @@ #pragma once + #include #include + + namespace DB { + /* Parse a string that generates shards and replicas. Separator - one of two characters '|' or ',' * depending on whether shards or replicas are generated. * For example: diff --git a/src/Disks/ObjectStorages/Cached/registerDiskCache.cpp b/src/Disks/ObjectStorages/Cached/registerDiskCache.cpp index 182326bbdc3..99fd2c932af 100644 --- a/src/Disks/ObjectStorages/Cached/registerDiskCache.cpp +++ b/src/Disks/ObjectStorages/Cached/registerDiskCache.cpp @@ -6,9 +6,8 @@ #include #include #include -#include #include -#include + namespace DB { diff --git a/tests/ci/clickbench.py b/tests/ci/clickbench.py index 359c10eeb9d..061d36f02fa 100644 --- a/tests/ci/clickbench.py +++ b/tests/ci/clickbench.py @@ -23,7 +23,6 @@ from commit_status_helper import ( get_commit, override_status, post_commit_status, - post_commit_status_to_file, update_mergeable_check, ) from docker_pull_helper import DockerImage, get_image_with_version @@ -113,13 +112,6 @@ def process_results( def parse_args(): parser = argparse.ArgumentParser() parser.add_argument("check_name") - parser.add_argument("kill_timeout", type=int) - parser.add_argument( - "--post-commit-status", - default="commit_status", - choices=["commit_status", "file"], - help="Where to public post commit status", - ) return parser.parse_args() @@ -214,19 +206,7 @@ def main(): ) print(f"::notice:: {check_name} Report url: {report_url}") - if args.post_commit_status == "commit_status": - post_commit_status(commit, state, report_url, description, check_name, pr_info) - elif args.post_commit_status == "file": - post_commit_status_to_file( - post_commit_path, - description, - state, - report_url, - ) - else: - raise Exception( - f'Unknown post_commit_status option "{args.post_commit_status}"' - ) + post_commit_status(commit, state, report_url, description, check_name, pr_info) prepared_events = prepare_tests_results_for_clickhouse( pr_info, From 8a68a4247e164dd822460fc73e03f4f2ad8b8a2c Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 12 Dec 2023 07:58:54 +0100 Subject: [PATCH 51/98] Style --- tests/ci/clickbench.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/ci/clickbench.py b/tests/ci/clickbench.py index 061d36f02fa..096309eaf92 100644 --- a/tests/ci/clickbench.py +++ b/tests/ci/clickbench.py @@ -124,7 +124,6 @@ def main(): temp_path.mkdir(parents=True, exist_ok=True) reports_path = Path(REPORTS_PATH) - post_commit_path = temp_path / "clickbench_status.tsv" args = parse_args() check_name = args.check_name From be9fac3a55392da05dba36b7a8adc949ae5da593 Mon Sep 17 00:00:00 2001 From: Konstantin Bogdanov Date: Tue, 12 Dec 2023 09:26:06 +0100 Subject: [PATCH 52/98] Lint includes Co-authored-by: alesapin --- src/Server/KeeperReadinessHandler.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Server/KeeperReadinessHandler.h b/src/Server/KeeperReadinessHandler.h index caa59098427..00b51b886f9 100644 --- a/src/Server/KeeperReadinessHandler.h +++ b/src/Server/KeeperReadinessHandler.h @@ -1,6 +1,6 @@ #pragma once -#include "config.h" +#include #if USE_NURAFT From 1f9c7336a97b88a070d0ce783ff5e687c8abcfb7 Mon Sep 17 00:00:00 2001 From: Konstantin Bogdanov Date: Tue, 12 Dec 2023 09:32:16 +0100 Subject: [PATCH 53/98] Fix test helper naming --- tests/integration/helpers/keeper_utils.py | 2 +- tests/integration/test_keeper_http_control/test.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/integration/helpers/keeper_utils.py b/tests/integration/helpers/keeper_utils.py index e07bce901d2..39fa0d0f074 100644 --- a/tests/integration/helpers/keeper_utils.py +++ b/tests/integration/helpers/keeper_utils.py @@ -279,7 +279,7 @@ def get_leader(cluster, nodes): raise Exception("No leader in Keeper cluster.") -def get_follower(cluster, nodes): +def get_any_follower(cluster, nodes): for node in nodes: if is_follower(cluster, node): return node diff --git a/tests/integration/test_keeper_http_control/test.py b/tests/integration/test_keeper_http_control/test.py index b415a03a5c4..8bffaa6763c 100644 --- a/tests/integration/test_keeper_http_control/test.py +++ b/tests/integration/test_keeper_http_control/test.py @@ -51,7 +51,7 @@ def test_http_readiness(started_cluster): assert readiness_data["details"]["leader"] == True assert readiness_data["details"]["follower"] == False - follower = keeper_utils.get_follower(cluster, [node1, node2, node3]) + follower = keeper_utils.get_any_follower(cluster, [node1, node2, node3]) response = requests.get( "http://{host}:{port}/ready".format(host=follower.ip_address, port=9182) ) From 69a022f72a35b214b8305ae2cd5bca90dcb6f099 Mon Sep 17 00:00:00 2001 From: Konstantin Bogdanov Date: Tue, 12 Dec 2023 09:42:32 +0100 Subject: [PATCH 54/98] Add `observer` status --- src/Server/KeeperReadinessHandler.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/Server/KeeperReadinessHandler.cpp b/src/Server/KeeperReadinessHandler.cpp index 37afd8e9898..148a209fb12 100644 --- a/src/Server/KeeperReadinessHandler.cpp +++ b/src/Server/KeeperReadinessHandler.cpp @@ -25,13 +25,15 @@ void KeeperReadinessHandler::handleRequest(HTTPServerRequest & /*request*/, HTTP { auto is_leader = keeper_dispatcher->isLeader(); auto is_follower = keeper_dispatcher->isFollower() && keeper_dispatcher->hasLeader(); + auto is_observer = keeper_dispatcher->isObserver() && keeper_dispatcher->hasLeader(); - auto status = is_leader || is_follower; + auto status = is_leader || is_follower || is_observer; Poco::JSON::Object json, details; details.set("leader", is_leader); details.set("follower", is_follower); + details.set("observer", is_observer); json.set("details", details); json.set("status", status ? "ok": "fail"); From ea123ed5c143ee221fb372d462ede73a1492c317 Mon Sep 17 00:00:00 2001 From: Konstantin Bogdanov Date: Tue, 12 Dec 2023 11:35:01 +0100 Subject: [PATCH 55/98] Change response structure --- src/Server/KeeperReadinessHandler.cpp | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/Server/KeeperReadinessHandler.cpp b/src/Server/KeeperReadinessHandler.cpp index 148a209fb12..ed972055aee 100644 --- a/src/Server/KeeperReadinessHandler.cpp +++ b/src/Server/KeeperReadinessHandler.cpp @@ -27,15 +27,16 @@ void KeeperReadinessHandler::handleRequest(HTTPServerRequest & /*request*/, HTTP auto is_follower = keeper_dispatcher->isFollower() && keeper_dispatcher->hasLeader(); auto is_observer = keeper_dispatcher->isObserver() && keeper_dispatcher->hasLeader(); + auto data = keeper_dispatcher->getKeeper4LWInfo(); + auto status = is_leader || is_follower || is_observer; Poco::JSON::Object json, details; - details.set("leader", is_leader); - details.set("follower", is_follower); - details.set("observer", is_observer); + details.set("role", data.getRole()); + details.set("hasLeader", keeper_dispatcher->hasLeader()); json.set("details", details); - json.set("status", status ? "ok": "fail"); + json.set("status", status ? "ok" : "fail"); std::ostringstream oss; // STYLE_CHECK_ALLOW_STD_STRING_STREAM oss.exceptions(std::ios::failbit); From 8fe2cd1a7effaca3676b44158004fe5747a8bbc2 Mon Sep 17 00:00:00 2001 From: Konstantin Bogdanov Date: Tue, 12 Dec 2023 11:35:17 +0100 Subject: [PATCH 56/98] Update tests --- .../test_keeper_http_control/test.py | 44 ++++++++++++------- 1 file changed, 28 insertions(+), 16 deletions(-) diff --git a/tests/integration/test_keeper_http_control/test.py b/tests/integration/test_keeper_http_control/test.py index 8bffaa6763c..ed86e06c626 100644 --- a/tests/integration/test_keeper_http_control/test.py +++ b/tests/integration/test_keeper_http_control/test.py @@ -1,12 +1,13 @@ #!/usr/bin/env python3 import os +import time import pytest import requests -import helpers.keeper_utils as keeper_utils -from kazoo.client import KazooClient from helpers.cluster import ClickHouseCluster +from helpers.network import PartitionManager +import helpers.keeper_utils as keeper_utils cluster = ClickHouseCluster(__file__) CONFIG_DIR = os.path.join(os.path.dirname(os.path.realpath(__file__)), "configs") @@ -30,16 +31,7 @@ def started_cluster(): finally: cluster.shutdown() - -def get_fake_zk(node, timeout=30.0): - _fake_zk_instance = KazooClient( - hosts=cluster.get_instance_ip(node.name) + ":9181", timeout=timeout - ) - _fake_zk_instance.start() - return _fake_zk_instance - - -def test_http_readiness(started_cluster): +def test_http_readiness_basic_responses(started_cluster): leader = keeper_utils.get_leader(cluster, [node1, node2, node3]) response = requests.get( "http://{host}:{port}/ready".format(host=leader.ip_address, port=9182) @@ -48,8 +40,7 @@ def test_http_readiness(started_cluster): readiness_data = response.json() assert readiness_data["status"] == "ok" - assert readiness_data["details"]["leader"] == True - assert readiness_data["details"]["follower"] == False + assert readiness_data["details"]["role"] == "leader" follower = keeper_utils.get_any_follower(cluster, [node1, node2, node3]) response = requests.get( @@ -59,5 +50,26 @@ def test_http_readiness(started_cluster): readiness_data = response.json() assert readiness_data["status"] == "ok" - assert readiness_data["details"]["leader"] == False - assert readiness_data["details"]["follower"] == True + assert readiness_data["details"]["role"] == "follower" + assert readiness_data["details"]["hasLeader"] == True + +def test_http_readiness_partitioned_cluster(started_cluster): + with PartitionManager() as pm: + leader = keeper_utils.get_leader(cluster, [node1, node2, node3]) + follower = keeper_utils.get_any_follower(cluster, [node1, node2, node3]) + + pm.partition_instances( + leader, follower + ) + time.sleep(3) + + response = requests.get( + "http://{host}:{port}/ready".format(host=follower.ip_address, port=9182) + ) + print(response.json()) + assert response.status_code == 503 + + readiness_data = response.json() + assert readiness_data["status"] == "fail" + assert readiness_data["details"]["role"] == "follower" + assert readiness_data["details"]["hasLeader"] == False From b49452fb45e7b68575411e1bd9479d2c7e9531cb Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Tue, 12 Dec 2023 10:44:55 +0000 Subject: [PATCH 57/98] Automatic style fix --- tests/integration/test_keeper_http_control/test.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/integration/test_keeper_http_control/test.py b/tests/integration/test_keeper_http_control/test.py index ed86e06c626..49d2f70e6b0 100644 --- a/tests/integration/test_keeper_http_control/test.py +++ b/tests/integration/test_keeper_http_control/test.py @@ -31,6 +31,7 @@ def started_cluster(): finally: cluster.shutdown() + def test_http_readiness_basic_responses(started_cluster): leader = keeper_utils.get_leader(cluster, [node1, node2, node3]) response = requests.get( @@ -53,14 +54,13 @@ def test_http_readiness_basic_responses(started_cluster): assert readiness_data["details"]["role"] == "follower" assert readiness_data["details"]["hasLeader"] == True + def test_http_readiness_partitioned_cluster(started_cluster): with PartitionManager() as pm: leader = keeper_utils.get_leader(cluster, [node1, node2, node3]) follower = keeper_utils.get_any_follower(cluster, [node1, node2, node3]) - pm.partition_instances( - leader, follower - ) + pm.partition_instances(leader, follower) time.sleep(3) response = requests.get( From 511cfb393dcc7765c30a0ff50d909ca88bbfa35a Mon Sep 17 00:00:00 2001 From: Konstantin Bogdanov Date: Tue, 12 Dec 2023 12:27:49 +0100 Subject: [PATCH 58/98] Remove `time.sleep` from test --- tests/integration/test_keeper_http_control/test.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/integration/test_keeper_http_control/test.py b/tests/integration/test_keeper_http_control/test.py index 49d2f70e6b0..65dc5bea909 100644 --- a/tests/integration/test_keeper_http_control/test.py +++ b/tests/integration/test_keeper_http_control/test.py @@ -1,7 +1,6 @@ #!/usr/bin/env python3 import os -import time import pytest import requests @@ -61,7 +60,7 @@ def test_http_readiness_partitioned_cluster(started_cluster): follower = keeper_utils.get_any_follower(cluster, [node1, node2, node3]) pm.partition_instances(leader, follower) - time.sleep(3) + keeper_utils.wait_until_quorum_lost(cluster, follower) response = requests.get( "http://{host}:{port}/ready".format(host=follower.ip_address, port=9182) From 7ff30211128d08a82cd830d4d1ed16321d58fa47 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 12 Dec 2023 14:12:10 +0100 Subject: [PATCH 59/98] Fix Docker --- tests/ci/ci_config.py | 2 +- tests/ci/clickbench.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/ci/ci_config.py b/tests/ci/ci_config.py index e9f75d66b2e..de2ba3dc1ce 100644 --- a/tests/ci/ci_config.py +++ b/tests/ci/ci_config.py @@ -512,7 +512,7 @@ CHECK_DESCRIPTIONS = [ CheckDescription( "ClickBench", "Runs [ClickBench](https://github.com/ClickHouse/ClickBench/) with instant-attach table", - lambda x: x.startswith("Upgrade check ("), + lambda x: x.startswith("ClickBench"), ), CheckDescription( "Falback for unknown", diff --git a/tests/ci/clickbench.py b/tests/ci/clickbench.py index 096309eaf92..2ea5e39ce8e 100644 --- a/tests/ci/clickbench.py +++ b/tests/ci/clickbench.py @@ -53,7 +53,7 @@ def get_run_command( env_str = " ".join(envs) return ( - f"docker run --volume={builds_path}:/package_folder " + f"docker run --shm-size=16g --volume={builds_path}:/package_folder " f"{ci_logs_args}" f"--volume={result_path}:/test_output " f"--volume={server_log_path}:/var/log/clickhouse-server " From 7142eacad3c8d2e793f21a36bb58d94d6d9b5656 Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Tue, 12 Dec 2023 17:54:26 +0300 Subject: [PATCH 60/98] SerializationString reduce memory usage --- src/DataTypes/Serializations/SerializationString.cpp | 2 +- tests/queries/0_stateless/01926_order_by_desc_limit.sql | 6 ++++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/src/DataTypes/Serializations/SerializationString.cpp b/src/DataTypes/Serializations/SerializationString.cpp index a87c5e7d880..788ff429088 100644 --- a/src/DataTypes/Serializations/SerializationString.cpp +++ b/src/DataTypes/Serializations/SerializationString.cpp @@ -175,7 +175,7 @@ static NO_INLINE void deserializeBinarySSE2(ColumnString::Chars & data, ColumnSt offsets.push_back(offset); if (unlikely(offset > data.size())) - data.resize(roundUpToPowerOfTwoOrZero(std::max(offset, data.size() * 2))); + data.resize_exact(roundUpToPowerOfTwoOrZero(std::max(offset, data.size() * 2))); if (size) { diff --git a/tests/queries/0_stateless/01926_order_by_desc_limit.sql b/tests/queries/0_stateless/01926_order_by_desc_limit.sql index 6854e6c1e84..a0047a2925a 100644 --- a/tests/queries/0_stateless/01926_order_by_desc_limit.sql +++ b/tests/queries/0_stateless/01926_order_by_desc_limit.sql @@ -11,9 +11,11 @@ SETTINGS index_granularity = 1024, index_granularity_bytes = '10Mi'; INSERT INTO order_by_desc SELECT number, repeat('a', 1024) FROM numbers(1024 * 300); OPTIMIZE TABLE order_by_desc FINAL; -SELECT s FROM order_by_desc ORDER BY u DESC LIMIT 10 FORMAT Null; +SELECT s FROM order_by_desc ORDER BY u DESC LIMIT 10 FORMAT Null +SETTINGS max_memory_usage = '400M'; -SELECT s FROM order_by_desc ORDER BY u LIMIT 10 FORMAT Null; +SELECT s FROM order_by_desc ORDER BY u LIMIT 10 FORMAT Null +SETTINGS max_memory_usage = '400M'; SYSTEM FLUSH LOGS; From 54676707412d586b1f97a773a22a540b7eb40d85 Mon Sep 17 00:00:00 2001 From: johnnymatthews <9611008+johnnymatthews@users.noreply.github.com> Date: Tue, 12 Dec 2023 11:13:34 -0400 Subject: [PATCH 61/98] Mentions that APPEND or TRUNCATE should be used with INTO-OUTFILE. --- docs/en/sql-reference/statements/select/into-outfile.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/en/sql-reference/statements/select/into-outfile.md b/docs/en/sql-reference/statements/select/into-outfile.md index 352af16042a..985f5e25b05 100644 --- a/docs/en/sql-reference/statements/select/into-outfile.md +++ b/docs/en/sql-reference/statements/select/into-outfile.md @@ -26,6 +26,7 @@ SELECT INTO OUTFILE file_name [AND STDOUT] [APPEND] [COMPRESSION typ - The default [output format](../../../interfaces/formats.md) is `TabSeparated` (like in the command-line client batch mode). Use [FORMAT](format.md) clause to change it. - If `AND STDOUT` is mentioned in the query then the output that is written to the file is also displayed on standard output. If used with compression, the plaintext is displayed on standard output. - If `APPEND` is mentioned in the query then the output is appended to an existing file. If compression is used, append cannot be used. +- When writing to a file that already exists, `APPEND` or `TRUNCATE` must be used. **Example** From af4f1abadd1563b00d5ff572142b59039eee76c3 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 12 Dec 2023 16:41:38 +0100 Subject: [PATCH 62/98] Fix error --- docker/test/clickbench/run.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/test/clickbench/run.sh b/docker/test/clickbench/run.sh index a344e0ec27c..471e1fd6714 100755 --- a/docker/test/clickbench/run.sh +++ b/docker/test/clickbench/run.sh @@ -21,7 +21,7 @@ profiles: allow_introspection_functions: 1 " > /etc/clickhouse-server/allow_introspection_functions.yaml -config_logs_export_cluster /etc/clickhouse-server/config.d/system_logs_export.yaml +config_logs_export_cluster /etc/clickhouse-server/users.d/system_logs_export.yaml clickhouse start From 12561c0c9b7abaee9c7bf0d469de909b70af9c84 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 12 Dec 2023 16:42:31 +0100 Subject: [PATCH 63/98] Maybe better --- docker/test/clickbench/run.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/docker/test/clickbench/run.sh b/docker/test/clickbench/run.sh index 471e1fd6714..5d2312c22c5 100755 --- a/docker/test/clickbench/run.sh +++ b/docker/test/clickbench/run.sh @@ -64,6 +64,7 @@ clickhouse-client --query "SELECT total_bytes FROM system.tables WHERE name = 'h clickhouse-client -q "system flush logs" ||: stop_logs_replication +clickhouse stop mv /var/log/clickhouse-server/* /test_output/ From 7196103be5bf2d937ca0422f01e21dfad94978ba Mon Sep 17 00:00:00 2001 From: Vitaly Baranov Date: Tue, 12 Dec 2023 17:41:16 +0100 Subject: [PATCH 64/98] Always recreate ephemeral "alive" node on reconnection. --- src/Backups/BackupCoordinationRemote.cpp | 8 +++++--- src/Backups/RestoreCoordinationRemote.cpp | 10 +++++----- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/src/Backups/BackupCoordinationRemote.cpp b/src/Backups/BackupCoordinationRemote.cpp index 7319b1aba58..b659887e0da 100644 --- a/src/Backups/BackupCoordinationRemote.cpp +++ b/src/Backups/BackupCoordinationRemote.cpp @@ -184,10 +184,12 @@ BackupCoordinationRemote::BackupCoordinationRemote( if (my_is_internal) { String alive_node_path = my_zookeeper_path + "/stage/alive|" + my_current_host; + + /// Delete the ephemeral node from the previous connection so we don't have to wait for keeper to do it automatically. + zk->tryRemove(alive_node_path); + zk->createAncestors(alive_node_path); - auto code = zk->tryCreate(alive_node_path, "", zkutil::CreateMode::Ephemeral); - if ((code != Coordination::Error::ZOK) && (code != Coordination::Error::ZNODEEXISTS)) - throw zkutil::KeeperException::fromPath(code, alive_node_path); + zk->create(alive_node_path, "", zkutil::CreateMode::Ephemeral); } }) { diff --git a/src/Backups/RestoreCoordinationRemote.cpp b/src/Backups/RestoreCoordinationRemote.cpp index 60a83c580f0..190634de4a9 100644 --- a/src/Backups/RestoreCoordinationRemote.cpp +++ b/src/Backups/RestoreCoordinationRemote.cpp @@ -43,12 +43,12 @@ RestoreCoordinationRemote::RestoreCoordinationRemote( if (my_is_internal) { String alive_node_path = my_zookeeper_path + "/stage/alive|" + my_current_host; - auto code = zk->tryCreate(alive_node_path, "", zkutil::CreateMode::Ephemeral); - if (code == Coordination::Error::ZNODEEXISTS) - zk->handleEphemeralNodeExistenceNoFailureInjection(alive_node_path, ""); - else if (code != Coordination::Error::ZOK) - throw zkutil::KeeperException::fromPath(code, alive_node_path); + /// Delete the ephemeral node from the previous connection so we don't have to wait for keeper to do it automatically. + zk->tryRemove(alive_node_path); + + zk->createAncestors(alive_node_path); + zk->create(alive_node_path, "", zkutil::CreateMode::Ephemeral); } }) { From 64d7abde099bfc516e0fb630b581b19c3a548279 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Tue, 12 Dec 2023 16:51:23 +0000 Subject: [PATCH 65/98] fix result of external aggregation in case of partially materialized projection --- .../Transforms/AggregatingTransform.cpp | 22 ++++--- ...projections_external_aggregation.reference | 41 ++++++++++++ ...02941_projections_external_aggregation.sql | 66 +++++++++++++++++++ 3 files changed, 121 insertions(+), 8 deletions(-) create mode 100644 tests/queries/0_stateless/02941_projections_external_aggregation.reference create mode 100644 tests/queries/0_stateless/02941_projections_external_aggregation.sql diff --git a/src/Processors/Transforms/AggregatingTransform.cpp b/src/Processors/Transforms/AggregatingTransform.cpp index bf475c57d36..1f52ed97491 100644 --- a/src/Processors/Transforms/AggregatingTransform.cpp +++ b/src/Processors/Transforms/AggregatingTransform.cpp @@ -726,8 +726,11 @@ void AggregatingTransform::initGenerate() auto prepared_data = params->aggregator.prepareVariantsToMerge(many_data->variants); Pipes pipes; for (auto & variant : prepared_data) + { /// Converts hash tables to blocks with data (finalized or not). pipes.emplace_back(std::make_shared(params, variant)); + } + Pipe pipe = Pipe::unitePipes(std::move(pipes)); if (!pipe.empty()) { @@ -781,21 +784,23 @@ void AggregatingTransform::initGenerate() } } - const auto & tmp_data = params->aggregator.getTemporaryData(); + size_t num_streams = 0; + size_t compressed_size = 0; + size_t uncompressed_size = 0; - Pipe pipe; + Pipes pipes; + /// Merge external data from all aggregators used in query. + for (const auto & aggregator : *params->aggregator_list_ptr) { - Pipes pipes; - + const auto & tmp_data = aggregator.getTemporaryData(); for (auto * tmp_stream : tmp_data.getStreams()) pipes.emplace_back(Pipe(std::make_unique(tmp_stream))); - pipe = Pipe::unitePipes(std::move(pipes)); + num_streams += tmp_data.getStreams().size(); + compressed_size += tmp_data.getStat().compressed_size; + uncompressed_size += tmp_data.getStat().uncompressed_size; } - size_t num_streams = tmp_data.getStreams().size(); - size_t compressed_size = tmp_data.getStat().compressed_size; - size_t uncompressed_size = tmp_data.getStat().uncompressed_size; LOG_DEBUG( log, "Will merge {} temporary files of size {} compressed, {} uncompressed.", @@ -803,6 +808,7 @@ void AggregatingTransform::initGenerate() ReadableSize(compressed_size), ReadableSize(uncompressed_size)); + auto pipe = Pipe::unitePipes(std::move(pipes)); addMergingAggregatedMemoryEfficientTransform(pipe, params, temporary_data_merge_threads); processors = Pipe::detachProcessors(std::move(pipe)); diff --git a/tests/queries/0_stateless/02941_projections_external_aggregation.reference b/tests/queries/0_stateless/02941_projections_external_aggregation.reference new file mode 100644 index 00000000000..4b1a62520cd --- /dev/null +++ b/tests/queries/0_stateless/02941_projections_external_aggregation.reference @@ -0,0 +1,41 @@ +*** correct aggregation *** +1 0 0 1249950000 +1 0 2 1250000000 +1 1 1 1249975000 +1 1 3 1250025000 +*** correct aggregation with projection *** +1 0 0 1249950000 +1 0 2 1250000000 +1 1 1 1249975000 +1 1 3 1250025000 +*** optimize_aggregation_in_order = 0, max_bytes_before_external_group_by = 1, group_by_two_level_threshold = 1 *** +1 0 0 1249950000 +1 0 2 1250000000 +1 1 1 1249975000 +1 1 3 1250025000 +*** optimize_aggregation_in_order = 1, max_bytes_before_external_group_by = 1, group_by_two_level_threshold = 1 *** +1 0 0 1249950000 +1 0 2 1250000000 +1 1 1 1249975000 +1 1 3 1250025000 +*** after materialization *** +*** correct aggregation *** +1 0 0 1249950000 +1 0 2 1250000000 +1 1 1 1249975000 +1 1 3 1250025000 +*** correct aggregation with projection *** +1 0 0 1249950000 +1 0 2 1250000000 +1 1 1 1249975000 +1 1 3 1250025000 +*** optimize_aggregation_in_order = 0, max_bytes_before_external_group_by = 1, group_by_two_level_threshold = 1 *** +1 0 0 1249950000 +1 0 2 1250000000 +1 1 1 1249975000 +1 1 3 1250025000 +*** optimize_aggregation_in_order = 1, max_bytes_before_external_group_by = 1, group_by_two_level_threshold = 1 *** +1 0 0 1249950000 +1 0 2 1250000000 +1 1 1 1249975000 +1 1 3 1250025000 diff --git a/tests/queries/0_stateless/02941_projections_external_aggregation.sql b/tests/queries/0_stateless/02941_projections_external_aggregation.sql new file mode 100644 index 00000000000..5053773f142 --- /dev/null +++ b/tests/queries/0_stateless/02941_projections_external_aggregation.sql @@ -0,0 +1,66 @@ +DROP TABLE IF EXISTS t_proj_external; + +CREATE TABLE t_proj_external +( + k1 UInt32, + k2 UInt32, + k3 UInt32, + value UInt32 +) +ENGINE = MergeTree +ORDER BY tuple(); + +INSERT INTO t_proj_external SELECT 1, number%2, number%4, number FROM numbers(50000); + +SYSTEM STOP MERGES t_proj_external; + +ALTER TABLE t_proj_external ADD PROJECTION aaaa ( + SELECT + k1, + k2, + k3, + sum(value) + GROUP BY k1, k2, k3 +); + +INSERT INTO t_proj_external SELECT 1, number%2, number%4, number FROM numbers(100000) LIMIT 50000, 100000; + +SELECT '*** correct aggregation ***'; + +SELECT k1, k2, k3, sum(value) v FROM t_proj_external GROUP BY k1, k2, k3 ORDER BY k1, k2, k3 SETTINGS optimize_use_projections = 0; + +SELECT '*** correct aggregation with projection ***'; + +SELECT k1, k2, k3, sum(value) v FROM t_proj_external GROUP BY k1, k2, k3 ORDER BY k1, k2, k3; + +SELECT '*** optimize_aggregation_in_order = 0, max_bytes_before_external_group_by = 1, group_by_two_level_threshold = 1 ***'; + +SELECT k1, k2, k3, sum(value) v FROM t_proj_external GROUP BY k1, k2, k3 ORDER BY k1, k2, k3 SETTINGS optimize_aggregation_in_order = 0, max_bytes_before_external_group_by = 1, group_by_two_level_threshold = 1; + +SELECT '*** optimize_aggregation_in_order = 1, max_bytes_before_external_group_by = 1, group_by_two_level_threshold = 1 ***'; + +SELECT k1, k2, k3, sum(value) v FROM t_proj_external GROUP BY k1, k2, k3 ORDER BY k1, k2, k3 SETTINGS optimize_aggregation_in_order = 1, max_bytes_before_external_group_by = 1, group_by_two_level_threshold = 1; + +SYSTEM START MERGES t_proj_external; + +ALTER TABLE t_proj_external MATERIALIZE PROJECTION aaaa SETTINGS mutations_sync = 2; + +SELECT '*** after materialization ***'; + +SELECT '*** correct aggregation ***'; + +SELECT k1, k2, k3, sum(value) v FROM t_proj_external GROUP BY k1, k2, k3 ORDER BY k1, k2, k3 SETTINGS optimize_use_projections = 0; + +SELECT '*** correct aggregation with projection ***'; + +SELECT k1, k2, k3, sum(value) v FROM t_proj_external GROUP BY k1, k2, k3 ORDER BY k1, k2, k3; + +SELECT '*** optimize_aggregation_in_order = 0, max_bytes_before_external_group_by = 1, group_by_two_level_threshold = 1 ***'; + +SELECT k1, k2, k3, sum(value) v FROM t_proj_external GROUP BY k1, k2, k3 ORDER BY k1, k2, k3 SETTINGS optimize_aggregation_in_order = 0, max_bytes_before_external_group_by = 1, group_by_two_level_threshold = 1; + +SELECT '*** optimize_aggregation_in_order = 1, max_bytes_before_external_group_by = 1, group_by_two_level_threshold = 1 ***'; + +SELECT k1, k2, k3, sum(value) v FROM t_proj_external GROUP BY k1, k2, k3 ORDER BY k1, k2, k3 SETTINGS optimize_aggregation_in_order = 1, max_bytes_before_external_group_by = 1, group_by_two_level_threshold = 1; + +DROP TABLE IF EXISTS t_proj_external; From 028763def5313debef322ffabaedbb4c3a9cdcd6 Mon Sep 17 00:00:00 2001 From: Vitaly Baranov Date: Tue, 12 Dec 2023 18:53:52 +0100 Subject: [PATCH 66/98] Simplify logic in BackupCoordinationStageSync::readCurrentState() and return earlier from the cycly on a connection problem. --- src/Backups/BackupCoordinationStageSync.cpp | 56 ++++++++++++--------- 1 file changed, 33 insertions(+), 23 deletions(-) diff --git a/src/Backups/BackupCoordinationStageSync.cpp b/src/Backups/BackupCoordinationStageSync.cpp index e4dac7dbbe9..cedcecfd35c 100644 --- a/src/Backups/BackupCoordinationStageSync.cpp +++ b/src/Backups/BackupCoordinationStageSync.cpp @@ -100,19 +100,19 @@ Strings BackupCoordinationStageSync::waitFor(const Strings & all_hosts, const St namespace { - struct UnreadyHostState + struct UnreadyHost { + String host; bool started = false; - bool alive = false; }; } struct BackupCoordinationStageSync::State { - Strings results; - std::map unready_hosts; + std::optional results; std::optional> error; std::optional disconnected_host; + std::optional unready_host; }; BackupCoordinationStageSync::State BackupCoordinationStageSync::readCurrentState( @@ -137,39 +137,45 @@ BackupCoordinationStageSync::State BackupCoordinationStageSync::readCurrentState return state; } + std::optional unready_host; + for (const auto & host : all_hosts) { if (!zk_nodes_set.contains("current|" + host + "|" + stage_to_wait)) { const String started_node_name = "started|" + host; const String alive_node_name = "alive|" + host; - const String alive_node_path = zookeeper_path + "/" + alive_node_name; - UnreadyHostState unready_host_state; - unready_host_state.started = zk_nodes_set.contains(started_node_name); - unready_host_state.alive = zk_nodes_set.contains(alive_node_name); - state.unready_hosts.emplace(host, unready_host_state); + bool started = zk_nodes_set.contains(started_node_name); + bool alive = zk_nodes_set.contains(alive_node_name); - if (!unready_host_state.alive && !state.disconnected_host) + if (!alive) { /// If the "alive" node doesn't exist then we don't have connection to the corresponding host. /// This node is ephemeral so probably it will be recreated soon. We use zookeeper retries to wait. /// In worst case when we won't manage to see the alive node for a long time we will just abort the backup. - state.disconnected_host = host; String message; - if (unready_host_state.started) + if (started) message = fmt::format("Lost connection to host {}", host); else message = fmt::format("No connection to host {} yet", host); if (!retries_ctl.isLastRetry()) message += ", will retry"; retries_ctl.setUserError(ErrorCodes::FAILED_TO_SYNC_BACKUP_OR_RESTORE, message); + state.disconnected_host = host; + return state; } + + if (!unready_host) + unready_host.emplace(UnreadyHost{.host = host, .started = started}); } } - if (state.disconnected_host || !state.unready_hosts.empty()) + if (unready_host) + { + state.unready_host = std::move(unready_host); return state; + } Strings results; for (const auto & host : all_hosts) @@ -215,12 +221,16 @@ Strings BackupCoordinationStageSync::waitImpl( } /// Analyze the current state of zk nodes. - if (state.error || state.disconnected_host || state.unready_hosts.empty()) - break; /// Error happened or everything is ready. + chassert(state.results || state.error || state.disconnected_host || state.unready_host); - /// Log that we will wait - const auto & unready_host = state.unready_hosts.begin()->first; - LOG_INFO(log, "Waiting on ZooKeeper watch for any node to be changed (currently waiting for host {})", unready_host); + if (state.results || state.error || state.disconnected_host) + break; /// Everything is ready or error happened. + + /// Log what we will wait. + const auto & unready_host = *state.unready_host; + LOG_INFO(log, "Waiting on ZooKeeper watch for any node to be changed (currently waiting for host {}{})", + unready_host.host, + (!unready_host.started ? " which didn't start the operation yet" : "")); /// Wait until `watch_callback` is called by ZooKeeper meaning that zk nodes have changed. { @@ -247,19 +257,19 @@ Strings BackupCoordinationStageSync::waitImpl( throw Exception(ErrorCodes::FAILED_TO_SYNC_BACKUP_OR_RESTORE, "No connection to host {}", *state.disconnected_host); /// Something's unready, timeout is probably not enough. - if (!state.unready_hosts.empty()) + if (state.unready_host) { - const auto & [unready_host, unready_host_state] = *state.unready_hosts.begin(); + const auto & unready_host = *state.unready_host; throw Exception( ErrorCodes::FAILED_TO_SYNC_BACKUP_OR_RESTORE, "Waited for host {} too long (> {}){}", - unready_host, + unready_host.host, to_string(*timeout), - unready_host_state.started ? "" : ": Operation didn't start"); + unready_host.started ? "" : ": Operation didn't start"); } LOG_TRACE(log, "Everything is Ok. All hosts achieved stage {}", stage_to_wait); - return state.results; + return std::move(*state.results); } } From 49aad9c88e9ce4aea771b28a1fa8a4816cb481a4 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 12 Dec 2023 19:33:08 +0100 Subject: [PATCH 67/98] Maybe better --- docker/test/clickbench/run.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docker/test/clickbench/run.sh b/docker/test/clickbench/run.sh index 5d2312c22c5..255ff46b0bc 100755 --- a/docker/test/clickbench/run.sh +++ b/docker/test/clickbench/run.sh @@ -19,9 +19,9 @@ echo " profiles: default: allow_introspection_functions: 1 -" > /etc/clickhouse-server/allow_introspection_functions.yaml +" > /etc/clickhouse-server/users.d/allow_introspection_functions.yaml -config_logs_export_cluster /etc/clickhouse-server/users.d/system_logs_export.yaml +config_logs_export_cluster /etc/clickhouse-server/config.d/system_logs_export.yaml clickhouse start From 4c1860b9b4e499c736f8e85cef98afe27e35db65 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 12 Dec 2023 21:21:58 +0100 Subject: [PATCH 68/98] Fix a mistake --- docker/test/base/setup_export_logs.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/test/base/setup_export_logs.sh b/docker/test/base/setup_export_logs.sh index 6e3721956c0..ea82e071112 100755 --- a/docker/test/base/setup_export_logs.sh +++ b/docker/test/base/setup_export_logs.sh @@ -21,7 +21,7 @@ EXTRA_ORDER_BY_COLUMNS=${EXTRA_ORDER_BY_COLUMNS:-"check_name, "} # trace_log needs more columns for symbolization EXTRA_COLUMNS_TRACE_LOG="${EXTRA_COLUMNS} symbols Array(LowCardinality(String)), lines Array(LowCardinality(String)), " -EXTRA_COLUMNS_EXPRESSION_TRACE_LOG="${EXTRA_COLUMNS_EXPRESSION}, arrayMap(x -> toLowCardinality(demangle(addressToSymbol(x))), trace) AS symbols, arrayMap(x -> toLowCardinality(addressToLine(x)), trace) AS lines" +EXTRA_COLUMNS_EXPRESSION_TRACE_LOG="${EXTRA_COLUMNS_EXPRESSION}, arrayMap(x -> demangle(addressToSymbol(x)), trace)::Array(LowCardinality(String)) AS symbols, arrayMap(x -> addressToLine(x), trace)::Array(LowCardinality(String)) AS lines" function __set_connection_args From 2099130bd2d66b8f2d9b87e27c833fdddaebc723 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 12 Dec 2023 21:28:28 +0100 Subject: [PATCH 69/98] Enable text_log --- docker/test/clickbench/run.sh | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/docker/test/clickbench/run.sh b/docker/test/clickbench/run.sh index 255ff46b0bc..921d2023fd7 100755 --- a/docker/test/clickbench/run.sh +++ b/docker/test/clickbench/run.sh @@ -21,6 +21,11 @@ profiles: allow_introspection_functions: 1 " > /etc/clickhouse-server/users.d/allow_introspection_functions.yaml +# Enable text_log +echo " +text_log: +" > /etc/clickhouse-server/config.d/text_log.yaml + config_logs_export_cluster /etc/clickhouse-server/config.d/system_logs_export.yaml clickhouse start From d51aaddf12119e45525a12112557c0595422a2b3 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 12 Dec 2023 22:15:19 +0100 Subject: [PATCH 70/98] Use the local region --- docker/test/clickbench/create.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/test/clickbench/create.sql b/docker/test/clickbench/create.sql index 620bdf09331..a57faf35837 100644 --- a/docker/test/clickbench/create.sql +++ b/docker/test/clickbench/create.sql @@ -109,4 +109,4 @@ ATTACH TABLE hits UUID 'c449dfbf-ba06-4d13-abec-8396559eb955' ) ENGINE = MergeTree SETTINGS disk = disk(type = cache, path = '/dev/shm/clickhouse/', max_size = '16G', - disk = disk(type = web, endpoint = 'https://clickhouse-public-datasets.s3.amazonaws.com/web/')); + disk = disk(type = web, endpoint = 'https://clickhouse-datasets.s3.amazonaws.com/web/')); From 7f4a028196e53dca878ecda6c4449730891572b3 Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Tue, 12 Dec 2023 21:26:58 +0000 Subject: [PATCH 71/98] Test and fix --- .../ParallelReplicasReadingCoordinator.cpp | 13 +- .../__init__.py | 0 .../configs/remote_servers.xml | 22 ++++ .../test.py | 122 ++++++++++++++++++ 4 files changed, 154 insertions(+), 3 deletions(-) create mode 100644 tests/integration/test_parallel_replicas_working_set/__init__.py create mode 100644 tests/integration/test_parallel_replicas_working_set/configs/remote_servers.xml create mode 100644 tests/integration/test_parallel_replicas_working_set/test.py diff --git a/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp b/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp index d81f5dd41ce..c6edb1049f4 100644 --- a/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp +++ b/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp @@ -164,11 +164,11 @@ void DefaultCoordinator::updateReadingState(InitialAllRangesAnnouncement announc for (auto && part_ranges: announcement.description) { Part part{.description = std::move(part_ranges), .replicas = {announcement.replica_num}}; + const MergeTreePartInfo & announced_part = part.description.info; auto it = std::lower_bound(cbegin(all_parts_to_read), cend(all_parts_to_read), part); if (it != all_parts_to_read.cend()) { - const MergeTreePartInfo & announced_part = part.description.info; const MergeTreePartInfo & found_part = it->description.info; if (found_part == announced_part) { @@ -183,13 +183,20 @@ void DefaultCoordinator::updateReadingState(InitialAllRangesAnnouncement announc bool is_disjoint = found_part.isDisjoint(announced_part); if (it != all_parts_to_read.cbegin() && is_disjoint) { - const MergeTreePartInfo & lesser_part_info = (--it)->description.info; - is_disjoint &= lesser_part_info.isDisjoint(announced_part); + const MergeTreePartInfo & lesser_part = (--it)->description.info; + is_disjoint &= lesser_part.isDisjoint(announced_part); } if (!is_disjoint) continue; } } + else if (!all_parts_to_read.empty()) + { + /// the announced part is greatest - check if it's disjoint with lesser part + const MergeTreePartInfo & lesser_part = all_parts_to_read.crbegin()->description.info; + if (!lesser_part.isDisjoint(announced_part)) + continue; + } auto [insert_it, _] = all_parts_to_read.emplace(std::move(part)); parts_diff.push_back(insert_it); diff --git a/tests/integration/test_parallel_replicas_working_set/__init__.py b/tests/integration/test_parallel_replicas_working_set/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_parallel_replicas_working_set/configs/remote_servers.xml b/tests/integration/test_parallel_replicas_working_set/configs/remote_servers.xml new file mode 100644 index 00000000000..02a315479f8 --- /dev/null +++ b/tests/integration/test_parallel_replicas_working_set/configs/remote_servers.xml @@ -0,0 +1,22 @@ + + + + + true + + n1 + 9000 + + + n2 + 9000 + + + n3 + 9000 + + + + + + diff --git a/tests/integration/test_parallel_replicas_working_set/test.py b/tests/integration/test_parallel_replicas_working_set/test.py new file mode 100644 index 00000000000..7b93c2fcf4b --- /dev/null +++ b/tests/integration/test_parallel_replicas_working_set/test.py @@ -0,0 +1,122 @@ +import pytest +from helpers.cluster import ClickHouseCluster + +cluster = ClickHouseCluster(__file__) + +nodes = [ + cluster.add_instance( + f"n{i}", main_configs=["configs/remote_servers.xml"], with_zookeeper=True + ) + for i in (1, 2, 3) +] + + +@pytest.fixture(scope="module", autouse=True) +def start_cluster(): + try: + cluster.start() + yield cluster + finally: + cluster.shutdown() + + +def create_tables(cluster, table_name, node_with_covering_part): + + # create replicated tables + for node in nodes: + node.query(f"DROP TABLE IF EXISTS {table_name} SYNC") + + nodes[0].query( + f"""CREATE TABLE IF NOT EXISTS {table_name} (key Int64, value String) Engine=ReplicatedMergeTree('/test_parallel_replicas/shard1/{table_name}', 'r1') + ORDER BY (key)""" + ) + nodes[1].query( + f"""CREATE TABLE IF NOT EXISTS {table_name} (key Int64, value String) Engine=ReplicatedMergeTree('/test_parallel_replicas/shard1/{table_name}', 'r2') + ORDER BY (key)""" + ) + nodes[2].query( + f"""CREATE TABLE IF NOT EXISTS {table_name} (key Int64, value String) Engine=ReplicatedMergeTree('/test_parallel_replicas/shard1/{table_name}', 'r3') + ORDER BY (key)""" + ) + # stop merges + for i in (0, 1, 2): + if i != node_with_covering_part: + nodes[i].query(f"system stop fetches {table_name}") + + # populate data, equal number of rows for each replica + nodes[0].query( + f"INSERT INTO {table_name} SELECT number, number FROM numbers(10)", + ) + nodes[0].query( + f"INSERT INTO {table_name} SELECT number, number FROM numbers(10, 10)" + ) + nodes[1].query( + f"INSERT INTO {table_name} SELECT number, number FROM numbers(20, 10)" + ) + nodes[1].query( + f"INSERT INTO {table_name} SELECT number, number FROM numbers(30, 10)" + ) + nodes[2].query( + f"INSERT INTO {table_name} SELECT number, number FROM numbers(40, 10)" + ) + nodes[2].query( + f"INSERT INTO {table_name} SELECT number, number FROM numbers(50, 10)" + ) + nodes[node_with_covering_part].query(f"system sync replica {table_name}") + nodes[node_with_covering_part].query(f"optimize table {table_name}") + + # check we have expected set of parts + expected_active_parts = "" + if node_with_covering_part == 0: + expected_active_parts = "all_0_5_1\nall_2_2_0\nall_3_3_0\nall_4_4_0\nall_5_5_0\n" + + if node_with_covering_part == 1: + expected_active_parts = "all_0_0_0\nall_0_5_1\nall_1_1_0\nall_4_4_0\nall_5_5_0\n" + + if node_with_covering_part == 2: + expected_active_parts = "all_0_0_0\nall_0_5_1\nall_1_1_0\nall_2_2_0\nall_3_3_0\n" + + assert (nodes[0].query(f"select distinct name from clusterAllReplicas({cluster}, system.parts) where table='{table_name}' and active order by name") == expected_active_parts) + + + +@pytest.mark.parametrize("node_with_covering_part", [0, 1, 2]) +def test_covering_part_in_announcement(start_cluster, node_with_covering_part): + """create and populate table in special way (see create_table()), + node_with_covering_part contains all parts merged into one, + other nodes contain only parts which are result of insert via the node + """ + + cluster = "test_single_shard_multiple_replicas" + table_name = "test_table" + create_tables(cluster, table_name, node_with_covering_part) + + expected_full_result = "60\t0\t59\t1770\n" + expected_results = {expected_full_result} + if node_with_covering_part == 0: + expected_results.add("40\t20\t59\t1580\n") + if node_with_covering_part == 1: + expected_results.add("40\t0\t59\t1180\n") + if node_with_covering_part == 2: + expected_results.add("40\t0\t39\t780\n") + + # parallel replicas + result = nodes[0].query( + f"SELECT count(), min(key), max(key), sum(key) FROM {table_name}", + settings={ + "allow_experimental_parallel_reading_from_replicas": 2, + "prefer_localhost_replica": 0, + "max_parallel_replicas": 3, + "use_hedged_requests": 0, + "cluster_for_parallel_replicas": cluster + }, + ) + assert(result in expected_results) + + # w/o parallel replicas + assert ( + nodes[node_with_covering_part].query( + f"SELECT count(), min(key), max(key), sum(key) FROM {table_name}" + ) + == expected_full_result + ) From 3333a7f2194a7699cdf30f302dc1b0426f9c026d Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Tue, 12 Dec 2023 21:39:08 +0000 Subject: [PATCH 72/98] Test cleanup --- .../integration/test_parallel_replicas_working_set/test.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tests/integration/test_parallel_replicas_working_set/test.py b/tests/integration/test_parallel_replicas_working_set/test.py index 7b93c2fcf4b..747ad7ec89f 100644 --- a/tests/integration/test_parallel_replicas_working_set/test.py +++ b/tests/integration/test_parallel_replicas_working_set/test.py @@ -79,7 +79,6 @@ def create_tables(cluster, table_name, node_with_covering_part): assert (nodes[0].query(f"select distinct name from clusterAllReplicas({cluster}, system.parts) where table='{table_name}' and active order by name") == expected_active_parts) - @pytest.mark.parametrize("node_with_covering_part", [0, 1, 2]) def test_covering_part_in_announcement(start_cluster, node_with_covering_part): """create and populate table in special way (see create_table()), @@ -116,7 +115,10 @@ def test_covering_part_in_announcement(start_cluster, node_with_covering_part): # w/o parallel replicas assert ( nodes[node_with_covering_part].query( - f"SELECT count(), min(key), max(key), sum(key) FROM {table_name}" + f"SELECT count(), min(key), max(key), sum(key) FROM {table_name}", + settings={ + "allow_experimental_parallel_reading_from_replicas": 0, + }, ) == expected_full_result ) From ea86b33d3bb498e989dbdee1b43b892ee077cf4d Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Tue, 12 Dec 2023 21:59:08 +0000 Subject: [PATCH 73/98] Automatic style fix --- .../test.py | 50 +++++++++++-------- 1 file changed, 30 insertions(+), 20 deletions(-) diff --git a/tests/integration/test_parallel_replicas_working_set/test.py b/tests/integration/test_parallel_replicas_working_set/test.py index 747ad7ec89f..04768694cc4 100644 --- a/tests/integration/test_parallel_replicas_working_set/test.py +++ b/tests/integration/test_parallel_replicas_working_set/test.py @@ -21,7 +21,6 @@ def start_cluster(): def create_tables(cluster, table_name, node_with_covering_part): - # create replicated tables for node in nodes: node.query(f"DROP TABLE IF EXISTS {table_name} SYNC") @@ -68,22 +67,33 @@ def create_tables(cluster, table_name, node_with_covering_part): # check we have expected set of parts expected_active_parts = "" if node_with_covering_part == 0: - expected_active_parts = "all_0_5_1\nall_2_2_0\nall_3_3_0\nall_4_4_0\nall_5_5_0\n" + expected_active_parts = ( + "all_0_5_1\nall_2_2_0\nall_3_3_0\nall_4_4_0\nall_5_5_0\n" + ) if node_with_covering_part == 1: - expected_active_parts = "all_0_0_0\nall_0_5_1\nall_1_1_0\nall_4_4_0\nall_5_5_0\n" + expected_active_parts = ( + "all_0_0_0\nall_0_5_1\nall_1_1_0\nall_4_4_0\nall_5_5_0\n" + ) if node_with_covering_part == 2: - expected_active_parts = "all_0_0_0\nall_0_5_1\nall_1_1_0\nall_2_2_0\nall_3_3_0\n" + expected_active_parts = ( + "all_0_0_0\nall_0_5_1\nall_1_1_0\nall_2_2_0\nall_3_3_0\n" + ) - assert (nodes[0].query(f"select distinct name from clusterAllReplicas({cluster}, system.parts) where table='{table_name}' and active order by name") == expected_active_parts) + assert ( + nodes[0].query( + f"select distinct name from clusterAllReplicas({cluster}, system.parts) where table='{table_name}' and active order by name" + ) + == expected_active_parts + ) @pytest.mark.parametrize("node_with_covering_part", [0, 1, 2]) def test_covering_part_in_announcement(start_cluster, node_with_covering_part): """create and populate table in special way (see create_table()), - node_with_covering_part contains all parts merged into one, - other nodes contain only parts which are result of insert via the node + node_with_covering_part contains all parts merged into one, + other nodes contain only parts which are result of insert via the node """ cluster = "test_single_shard_multiple_replicas" @@ -93,24 +103,24 @@ def test_covering_part_in_announcement(start_cluster, node_with_covering_part): expected_full_result = "60\t0\t59\t1770\n" expected_results = {expected_full_result} if node_with_covering_part == 0: - expected_results.add("40\t20\t59\t1580\n") + expected_results.add("40\t20\t59\t1580\n") if node_with_covering_part == 1: - expected_results.add("40\t0\t59\t1180\n") + expected_results.add("40\t0\t59\t1180\n") if node_with_covering_part == 2: - expected_results.add("40\t0\t39\t780\n") + expected_results.add("40\t0\t39\t780\n") # parallel replicas result = nodes[0].query( - f"SELECT count(), min(key), max(key), sum(key) FROM {table_name}", - settings={ - "allow_experimental_parallel_reading_from_replicas": 2, - "prefer_localhost_replica": 0, - "max_parallel_replicas": 3, - "use_hedged_requests": 0, - "cluster_for_parallel_replicas": cluster - }, - ) - assert(result in expected_results) + f"SELECT count(), min(key), max(key), sum(key) FROM {table_name}", + settings={ + "allow_experimental_parallel_reading_from_replicas": 2, + "prefer_localhost_replica": 0, + "max_parallel_replicas": 3, + "use_hedged_requests": 0, + "cluster_for_parallel_replicas": cluster, + }, + ) + assert result in expected_results # w/o parallel replicas assert ( From 2043791ed76d040c8f05f5ad856bb599512da15c Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 12 Dec 2023 23:37:55 +0100 Subject: [PATCH 74/98] Fix typo --- docker/test/stateful/s3downloader | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docker/test/stateful/s3downloader b/docker/test/stateful/s3downloader index 96f2aa96dd5..77601fb5af6 100755 --- a/docker/test/stateful/s3downloader +++ b/docker/test/stateful/s3downloader @@ -30,7 +30,7 @@ def build_url(base_url, dataset): return os.path.join(base_url, dataset, "partitions", AVAILABLE_DATASETS[dataset]) -def dowload_with_progress(url, path): +def download_with_progress(url, path): logging.info("Downloading from %s to temp path %s", url, path) for i in range(RETRIES_COUNT): try: @@ -110,7 +110,7 @@ if __name__ == "__main__": temp_archive_path = _get_temp_file_name() try: download_url_for_dataset = build_url(args.url_prefix, dataset) - dowload_with_progress(download_url_for_dataset, temp_archive_path) + download_with_progress(download_url_for_dataset, temp_archive_path) unpack_to_clickhouse_directory(temp_archive_path, args.clickhouse_data_path) except Exception as ex: logging.info("Some exception occured %s", str(ex)) From f11b90e7bbec13ba27989442e359f818d8e85088 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 13 Dec 2023 00:10:58 +0100 Subject: [PATCH 75/98] Allow buckets without List access --- src/Storages/StorageMergeTree.cpp | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp index e9a0dd5fbf3..16f4122d605 100644 --- a/src/Storages/StorageMergeTree.cpp +++ b/src/Storages/StorageMergeTree.cpp @@ -829,8 +829,13 @@ void StorageMergeTree::loadDeduplicationLog() auto disk = getDisks()[0]; std::string path = fs::path(relative_data_path) / "deduplication_logs"; - deduplication_log = std::make_unique(path, settings->non_replicated_deduplication_window, format_version, disk); - deduplication_log->load(); + + /// If either there is already a deduplication log, or we will be able to use it. + if (disk->exists(path) || !disk->isReadOnly()) + { + deduplication_log = std::make_unique(path, settings->non_replicated_deduplication_window, format_version, disk); + deduplication_log->load(); + } } void StorageMergeTree::loadMutations() From 9f5299e118fc536f0ec9deb224c6ed6028362743 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 13 Dec 2023 00:11:10 +0100 Subject: [PATCH 76/98] Use a new bucket --- docker/test/clickbench/create.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/test/clickbench/create.sql b/docker/test/clickbench/create.sql index a57faf35837..9f18a47474b 100644 --- a/docker/test/clickbench/create.sql +++ b/docker/test/clickbench/create.sql @@ -109,4 +109,4 @@ ATTACH TABLE hits UUID 'c449dfbf-ba06-4d13-abec-8396559eb955' ) ENGINE = MergeTree SETTINGS disk = disk(type = cache, path = '/dev/shm/clickhouse/', max_size = '16G', - disk = disk(type = web, endpoint = 'https://clickhouse-datasets.s3.amazonaws.com/web/')); + disk = disk(type = web, endpoint = 'https://clickhouse-datasets-web.s3.us-east-1.amazonaws.com/')); From bb22ce6ec87063086ef5f6525d7d40dd72cfe88b Mon Sep 17 00:00:00 2001 From: Mikhail Koviazin Date: Wed, 13 Dec 2023 07:17:56 +0000 Subject: [PATCH 77/98] fix clickhouse-client invocation in 02327_capnproto_protobuf_empty_messages The test relies on `clickhouse-client` to be in `$PATH`, which is a wrong assumption. This commit makes it use `$CLICKHOUSE_CLIENT_BINARY` instead. --- .../0_stateless/02327_capnproto_protobuf_empty_messages.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/02327_capnproto_protobuf_empty_messages.sh b/tests/queries/0_stateless/02327_capnproto_protobuf_empty_messages.sh index 69e65112305..dfc0dedeaf1 100755 --- a/tests/queries/0_stateless/02327_capnproto_protobuf_empty_messages.sh +++ b/tests/queries/0_stateless/02327_capnproto_protobuf_empty_messages.sh @@ -5,10 +5,10 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -USER_FILES_PATH=$(clickhouse-client --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') +USER_FILES_PATH=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') touch $USER_FILES_PATH/data.capnp -SCHEMADIR=$(clickhouse-client --query "select * from file('data.capnp', 'CapnProto', 'val1 char') settings format_schema='nonexist:Message'" 2>&1 | grep Exception | grep -oP "file \K.*(?=/nonexist.capnp)") +SCHEMADIR=$($CLICKHOUSE_CLIENT_BINARY --query "select * from file('data.capnp', 'CapnProto', 'val1 char') settings format_schema='nonexist:Message'" 2>&1 | grep Exception | grep -oP "file \K.*(?=/nonexist.capnp)") CLIENT_SCHEMADIR=$CURDIR/format_schemas SERVER_SCHEMADIR=test_02327 mkdir -p $SCHEMADIR/$SERVER_SCHEMADIR From 5abeadf20fbe3f3697d60504ae6ae53b9f653900 Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Wed, 13 Dec 2023 08:53:04 +0000 Subject: [PATCH 78/98] Fix: stop merges, otherwise test can be flaky --- tests/integration/test_parallel_replicas_working_set/test.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/integration/test_parallel_replicas_working_set/test.py b/tests/integration/test_parallel_replicas_working_set/test.py index 04768694cc4..6292d33ed3e 100644 --- a/tests/integration/test_parallel_replicas_working_set/test.py +++ b/tests/integration/test_parallel_replicas_working_set/test.py @@ -37,10 +37,12 @@ def create_tables(cluster, table_name, node_with_covering_part): f"""CREATE TABLE IF NOT EXISTS {table_name} (key Int64, value String) Engine=ReplicatedMergeTree('/test_parallel_replicas/shard1/{table_name}', 'r3') ORDER BY (key)""" ) - # stop merges + # stop merges to keep original parts + # stop fetches to keep only parts created on the nodes for i in (0, 1, 2): if i != node_with_covering_part: nodes[i].query(f"system stop fetches {table_name}") + nodes[i].query(f"system stop merges {table_name}") # populate data, equal number of rows for each replica nodes[0].query( From 7762beaf6cd64f2553e81db5f7b1e5ba4ea4d8bd Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Wed, 13 Dec 2023 11:23:53 +0000 Subject: [PATCH 79/98] Fix: w/o replicas sync query result can vary --- .../test_parallel_replicas_over_distributed/test.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/tests/integration/test_parallel_replicas_over_distributed/test.py b/tests/integration/test_parallel_replicas_over_distributed/test.py index ecfc2ddea63..aecc0fcdcb8 100644 --- a/tests/integration/test_parallel_replicas_over_distributed/test.py +++ b/tests/integration/test_parallel_replicas_over_distributed/test.py @@ -129,6 +129,9 @@ def test_parallel_replicas_over_distributed( node = nodes[0] expected_result = f"6003\t-1999\t1999\t3\n" + # sync all replicas to get consistent result + node.query(f"SYSTEM SYNC REPLICA ON CLUSTER {cluster} {table_name}") + # parallel replicas assert ( node.query( @@ -143,11 +146,12 @@ def test_parallel_replicas_over_distributed( == expected_result ) - # sync all replicas to get consistent result by next distributed query - node.query(f"SYSTEM SYNC REPLICA ON CLUSTER {cluster} {table_name}") - # w/o parallel replicas assert ( - node.query(f"SELECT count(), min(key), max(key), sum(key) FROM {table_name}_d") + node.query(f"SELECT count(), min(key), max(key), sum(key) FROM {table_name}_d", + settings={ + "allow_experimental_parallel_reading_from_replicas": 0, + } + ) == expected_result ) From 7d9e9fd42eab9ae926d7bbd748627e8272c6afec Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Wed, 13 Dec 2023 11:38:41 +0000 Subject: [PATCH 80/98] Automatic style fix --- .../test_parallel_replicas_over_distributed/test.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/integration/test_parallel_replicas_over_distributed/test.py b/tests/integration/test_parallel_replicas_over_distributed/test.py index aecc0fcdcb8..ebff0309a4f 100644 --- a/tests/integration/test_parallel_replicas_over_distributed/test.py +++ b/tests/integration/test_parallel_replicas_over_distributed/test.py @@ -148,10 +148,11 @@ def test_parallel_replicas_over_distributed( # w/o parallel replicas assert ( - node.query(f"SELECT count(), min(key), max(key), sum(key) FROM {table_name}_d", + node.query( + f"SELECT count(), min(key), max(key), sum(key) FROM {table_name}_d", settings={ "allow_experimental_parallel_reading_from_replicas": 0, - } + }, ) == expected_result ) From 8c2137e0c62721d6867cc252d9f2985e6b9d5339 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Wed, 13 Dec 2023 13:09:36 +0100 Subject: [PATCH 81/98] Revert "Merge pull request #57741 from ucasfl/negtive-position" This reverts commit 3d846800e0bdd94916ed8b8faf1c1bc7868ca933, reversing changes made to b31b4c932f78c8ea4f65657f88d65b494de15db0. --- src/Analyzer/Passes/QueryAnalysisPass.cpp | 27 ++---- .../replaceForPositionalArguments.cpp | 25 +---- .../0_stateless/01162_strange_mutations.sh | 2 +- .../0_stateless/01798_having_push_down.sql | 3 +- .../02006_test_positional_arguments.reference | 94 ------------------- .../02006_test_positional_arguments.sql | 21 ----- .../02932_group_by_null_fuzzer.sql | 1 - 7 files changed, 14 insertions(+), 159 deletions(-) diff --git a/src/Analyzer/Passes/QueryAnalysisPass.cpp b/src/Analyzer/Passes/QueryAnalysisPass.cpp index a3b461f32ea..1e63d5ca8e4 100644 --- a/src/Analyzer/Passes/QueryAnalysisPass.cpp +++ b/src/Analyzer/Passes/QueryAnalysisPass.cpp @@ -2153,32 +2153,19 @@ void QueryAnalyzer::replaceNodesWithPositionalArguments(QueryTreeNodePtr & node_ node_to_replace = &sort_node->getExpression(); auto * constant_node = (*node_to_replace)->as(); - - if (!constant_node - || (constant_node->getValue().getType() != Field::Types::UInt64 && constant_node->getValue().getType() != Field::Types::Int64)) + if (!constant_node || constant_node->getValue().getType() != Field::Types::UInt64) continue; - UInt64 pos; - if (constant_node->getValue().getType() == Field::Types::UInt64) - { - pos = constant_node->getValue().get(); - } - else // Int64 - { - auto value = constant_node->getValue().get(); - pos = value > 0 ? value : projection_nodes.size() + value + 1; - } - - - if (!pos || pos > projection_nodes.size()) - throw Exception( - ErrorCodes::BAD_ARGUMENTS, + UInt64 positional_argument_number = constant_node->getValue().get(); + if (positional_argument_number == 0 || positional_argument_number > projection_nodes.size()) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Positional argument number {} is out of bounds. Expected in range [1, {}]. In scope {}", - pos, + positional_argument_number, projection_nodes.size(), scope.scope_node->formatASTForErrorMessage()); - *node_to_replace = projection_nodes[--pos]; + --positional_argument_number; + *node_to_replace = projection_nodes[positional_argument_number]; } } diff --git a/src/Interpreters/replaceForPositionalArguments.cpp b/src/Interpreters/replaceForPositionalArguments.cpp index 8306da17f52..241dd7cf92c 100644 --- a/src/Interpreters/replaceForPositionalArguments.cpp +++ b/src/Interpreters/replaceForPositionalArguments.cpp @@ -27,29 +27,14 @@ bool replaceForPositionalArguments(ASTPtr & argument, const ASTSelectQuery * sel return false; auto which = ast_literal->value.getType(); - if (which != Field::Types::UInt64 && which != Field::Types::Int64) + if (which != Field::Types::UInt64) return false; - UInt64 pos; - - if (which == Field::Types::UInt64) - { - pos = ast_literal->value.get(); - } - else if (which == Field::Types::Int64) - { - auto value = ast_literal->value.get(); - pos = value > 0 ? value : columns.size() + value + 1; - } - else - { - return false; - } - - + auto pos = ast_literal->value.get(); if (!pos || pos > columns.size()) - throw Exception( - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Positional argument out of bounds: {} (expected in range [1, {}]", pos, columns.size()); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Positional argument out of bounds: {} (expected in range [1, {}]", + pos, columns.size()); const auto & column = columns[--pos]; if (typeid_cast(column.get()) || typeid_cast(column.get())) diff --git a/tests/queries/0_stateless/01162_strange_mutations.sh b/tests/queries/0_stateless/01162_strange_mutations.sh index f6b31847c1e..eea9ea5f7e5 100755 --- a/tests/queries/0_stateless/01162_strange_mutations.sh +++ b/tests/queries/0_stateless/01162_strange_mutations.sh @@ -28,7 +28,7 @@ do $CLICKHOUSE_CLIENT -q "CREATE TABLE test ENGINE=$engine AS SELECT number + 100 AS n, 0 AS test FROM numbers(50)" 2>&1| grep -Ev "Removing leftovers from table|removed by another replica" $CLICKHOUSE_CLIENT -q "select count(), sum(n), sum(test) from test" if [[ $engine == *"ReplicatedMergeTree"* ]]; then - $CLICKHOUSE_CLIENT --enable_positional_arguments=0 -q "ALTER TABLE test + $CLICKHOUSE_CLIENT -q "ALTER TABLE test UPDATE test = (SELECT groupArray(id) FROM t1 GROUP BY 'dummy')[n - 99] WHERE 1" 2>&1| grep -Fa "DB::Exception: " | grep -Fv "statement with subquery may be nondeterministic" $CLICKHOUSE_CLIENT --allow_nondeterministic_mutations=1 --mutations_sync=1 -q "ALTER TABLE test UPDATE test = (SELECT groupArray(id) FROM t1)[n - 99] WHERE 1" diff --git a/tests/queries/0_stateless/01798_having_push_down.sql b/tests/queries/0_stateless/01798_having_push_down.sql index c0c3447f5ab..b3a77c8f5b5 100644 --- a/tests/queries/0_stateless/01798_having_push_down.sql +++ b/tests/queries/0_stateless/01798_having_push_down.sql @@ -8,12 +8,11 @@ SELECT sum(c0 = 0), min(c0 + 1), sum(c0 + 2) FROM t_having GROUP BY c0 HAVING c0 = 0 SETTINGS enable_optimize_predicate_expression=0; -SET enable_positional_arguments=0; - SELECT c0 + -1, sum(intDivOrZero(intDivOrZero(NULL, NULL), '2'), intDivOrZero(10000000000., intDivOrZero(intDivOrZero(intDivOrZero(NULL, NULL), 10), NULL))) FROM t_having GROUP BY c0 = 2, c0 = 10, intDivOrZero(intDivOrZero(intDivOrZero(NULL, NULL), NULL), NULL), c0 HAVING c0 = 2 SETTINGS enable_optimize_predicate_expression = 0; SELECT sum(c0 + 257) FROM t_having GROUP BY c0 = -9223372036854775808, NULL, -2147483649, c0 HAVING c0 = -9223372036854775808 SETTINGS enable_optimize_predicate_expression = 0; +SET enable_positional_arguments=0; SELECT c0 + -2, c0 + -9223372036854775807, c0 = NULL FROM t_having GROUP BY c0 = 0.9998999834060669, 1023, c0 HAVING c0 = 0.9998999834060669 SETTINGS enable_optimize_predicate_expression = 0; DROP TABLE t_having; diff --git a/tests/queries/0_stateless/02006_test_positional_arguments.reference b/tests/queries/0_stateless/02006_test_positional_arguments.reference index 079bd071103..40100e8d5be 100644 --- a/tests/queries/0_stateless/02006_test_positional_arguments.reference +++ b/tests/queries/0_stateless/02006_test_positional_arguments.reference @@ -3,50 +3,18 @@ select x3, x2, x1 from test order by 1; 1 100 100 10 1 10 100 10 1 -select x3, x2, x1 from test order by -3; -1 100 100 -10 1 10 -100 10 1 select x3, x2, x1 from test order by x3; 1 100 100 10 1 10 100 10 1 -select x3, x2, x1 from test order by 3; -100 10 1 -10 1 10 -1 100 100 -select x3, x2, x1 from test order by -1; -100 10 1 -10 1 10 -1 100 100 -select x3, x2, x1 from test order by x1; -100 10 1 -10 1 10 -1 100 100 select x3, x2, x1 from test order by 1 desc; 100 10 1 10 1 10 1 100 100 -select x3, x2, x1 from test order by -3 desc; -100 10 1 -10 1 10 -1 100 100 select x3, x2, x1 from test order by x3 desc; 100 10 1 10 1 10 1 100 100 -select x3, x2, x1 from test order by 3 desc; -1 100 100 -10 1 10 -100 10 1 -select x3, x2, x1 from test order by -1 desc; -1 100 100 -10 1 10 -100 10 1 -select x3, x2, x1 from test order by x1 desc; -1 100 100 -10 1 10 -100 10 1 insert into test values (1, 10, 100), (10, 1, 10), (100, 100, 1); select x3, x2 from test group by x3, x2 order by x3; 1 100 @@ -86,20 +54,6 @@ SELECT x1 FROM test ORDER BY x3 + 1 ASC -explain syntax select x3, x2, x1 from test order by -1; -SELECT - x3, - x2, - x1 -FROM test -ORDER BY x1 ASC -explain syntax select x3 + 1, x2, x1 from test order by -1; -SELECT - x3 + 1, - x2, - x1 -FROM test -ORDER BY x1 ASC explain syntax select x3, x3 - x2, x2, x1 from test order by 2; SELECT x3, @@ -108,14 +62,6 @@ SELECT x1 FROM test ORDER BY x3 - x2 ASC -explain syntax select x3, x3 - x2, x2, x1 from test order by -2; -SELECT - x3, - x3 - x2, - x2, - x1 -FROM test -ORDER BY x2 ASC explain syntax select x3, if(x3 > 10, x3, plus(x1, x2)), x1 + x2 from test order by 2; SELECT x3, @@ -123,28 +69,12 @@ SELECT x1 + x2 FROM test ORDER BY if(x3 > 10, x3, x1 + x2) ASC -explain syntax select x3, if(x3 > 10, x3, plus(x1, x2)), x1 + x2 from test order by -2; -SELECT - x3, - if(x3 > 10, x3, x1 + x2), - x1 + x2 -FROM test -ORDER BY if(x3 > 10, x3, x1 + x2) ASC explain syntax select max(x1), x2 from test group by 2 order by 1, 2; SELECT max(x1), x2 FROM test GROUP BY x2 -ORDER BY - max(x1) ASC, - x2 ASC -explain syntax select max(x1), x2 from test group by -1 order by -2, -1; -SELECT - max(x1), - x2 -FROM test -GROUP BY x2 ORDER BY max(x1) ASC, x2 ASC @@ -153,34 +83,16 @@ SELECT 1 + greatest(x1, 1), x2 FROM test -GROUP BY - 1 + greatest(x1, 1), - x2 -explain syntax select 1 + greatest(x1, 1), x2 from test group by -2, -1; -SELECT - 1 + greatest(x1, 1), - x2 -FROM test GROUP BY 1 + greatest(x1, 1), x2 select max(x1), x2 from test group by 1, 2; -- { serverError 43, 184 } select 1 + max(x1), x2 from test group by 1, 2; -- { serverError 43, 184 } -select max(x1), x2 from test group by -2, -1; -- { serverError 43, 184 } -select 1 + max(x1), x2 from test group by -2, -1; -- { serverError 43, 184 } explain syntax select x1 + x3, x3 from test group by 1, 2; SELECT x1 + x3, x3 FROM test -GROUP BY - x1 + x3, - x3 -explain syntax select x1 + x3, x3 from test group by -2, -1; -SELECT - x1 + x3, - x3 -FROM test GROUP BY x1 + x3, x3 @@ -190,14 +102,8 @@ select x1, x1 * 2, max(x2), max(x3) from test2 group by 2, 1, x1 order by 1, 2, 1 2 10 100 10 20 1 10 100 200 100 1 -select x1, x1 * 2, max(x2), max(x3) from test2 group by 2, 1, x1 order by 1, 2, -1 desc, -2 asc; -1 2 10 100 -10 20 1 10 -100 200 100 1 select a, b, c, d, e, f from (select 44 a, 88 b, 13 c, 14 d, 15 e, 16 f) t group by 1,2,3,4,5,6 order by a; 44 88 13 14 15 16 -select a, b, c, d, e, f from (select 44 a, 88 b, 13 c, 14 d, 15 e, 16 f) t group by 1,2,3,-3,-2,-1 order by a; -44 88 13 14 15 16 explain syntax select plus(1, 1) as a group by a; SELECT 1 + 1 AS a GROUP BY a diff --git a/tests/queries/0_stateless/02006_test_positional_arguments.sql b/tests/queries/0_stateless/02006_test_positional_arguments.sql index 6f427e0298d..159ad6bd427 100644 --- a/tests/queries/0_stateless/02006_test_positional_arguments.sql +++ b/tests/queries/0_stateless/02006_test_positional_arguments.sql @@ -9,21 +9,11 @@ insert into test values (1, 10, 100), (10, 1, 10), (100, 100, 1); -- { echo } select x3, x2, x1 from test order by 1; -select x3, x2, x1 from test order by -3; select x3, x2, x1 from test order by x3; -select x3, x2, x1 from test order by 3; -select x3, x2, x1 from test order by -1; -select x3, x2, x1 from test order by x1; - select x3, x2, x1 from test order by 1 desc; -select x3, x2, x1 from test order by -3 desc; select x3, x2, x1 from test order by x3 desc; -select x3, x2, x1 from test order by 3 desc; -select x3, x2, x1 from test order by -1 desc; -select x3, x2, x1 from test order by x1 desc; - insert into test values (1, 10, 100), (10, 1, 10), (100, 100, 1); select x3, x2 from test group by x3, x2 order by x3; select x3, x2 from test group by 1, 2 order by x3; @@ -35,32 +25,21 @@ select x1, x2, x3 from test order by 3 limit 1 by 1; explain syntax select x3, x2, x1 from test order by 1; explain syntax select x3 + 1, x2, x1 from test order by 1; -explain syntax select x3, x2, x1 from test order by -1; -explain syntax select x3 + 1, x2, x1 from test order by -1; explain syntax select x3, x3 - x2, x2, x1 from test order by 2; -explain syntax select x3, x3 - x2, x2, x1 from test order by -2; explain syntax select x3, if(x3 > 10, x3, plus(x1, x2)), x1 + x2 from test order by 2; -explain syntax select x3, if(x3 > 10, x3, plus(x1, x2)), x1 + x2 from test order by -2; explain syntax select max(x1), x2 from test group by 2 order by 1, 2; -explain syntax select max(x1), x2 from test group by -1 order by -2, -1; explain syntax select 1 + greatest(x1, 1), x2 from test group by 1, 2; -explain syntax select 1 + greatest(x1, 1), x2 from test group by -2, -1; select max(x1), x2 from test group by 1, 2; -- { serverError 43, 184 } select 1 + max(x1), x2 from test group by 1, 2; -- { serverError 43, 184 } -select max(x1), x2 from test group by -2, -1; -- { serverError 43, 184 } -select 1 + max(x1), x2 from test group by -2, -1; -- { serverError 43, 184 } explain syntax select x1 + x3, x3 from test group by 1, 2; -explain syntax select x1 + x3, x3 from test group by -2, -1; create table test2(x1 Int, x2 Int, x3 Int) engine=Memory; insert into test2 values (1, 10, 100), (10, 1, 10), (100, 100, 1); select x1, x1 * 2, max(x2), max(x3) from test2 group by 2, 1, x1 order by 1, 2, 4 desc, 3 asc; -select x1, x1 * 2, max(x2), max(x3) from test2 group by 2, 1, x1 order by 1, 2, -1 desc, -2 asc; select a, b, c, d, e, f from (select 44 a, 88 b, 13 c, 14 d, 15 e, 16 f) t group by 1,2,3,4,5,6 order by a; -select a, b, c, d, e, f from (select 44 a, 88 b, 13 c, 14 d, 15 e, 16 f) t group by 1,2,3,-3,-2,-1 order by a; explain syntax select plus(1, 1) as a group by a; select substr('aaaaaaaaaaaaaa', 8) as a group by a order by a; diff --git a/tests/queries/0_stateless/02932_group_by_null_fuzzer.sql b/tests/queries/0_stateless/02932_group_by_null_fuzzer.sql index 603c7783ef8..0c28c120d40 100644 --- a/tests/queries/0_stateless/02932_group_by_null_fuzzer.sql +++ b/tests/queries/0_stateless/02932_group_by_null_fuzzer.sql @@ -1,6 +1,5 @@ -- https://github.com/ClickHouse/ClickHouse/issues/43202 -- Queries are generated by the fuzzer, so don't expect them to make sense -SET enable_positional_arguments=0; SELECT NULL, '' FROM (SELECT toNullable(''), NULL AS key GROUP BY GROUPING SETS ((NULL))) AS s1 ALL LEFT JOIN (SELECT '' AS key, NULL AS value GROUP BY GROUPING SETS (('')) WITH TOTALS UNION ALL SELECT NULL AS key, toNullable(NULL) AS value GROUP BY '', NULL, '' WITH TOTALS) AS s2 USING (key); SELECT NULL GROUP BY NULL WITH TOTALS; SELECT 1048575, NULL, b FROM (SELECT '25.5' AS a, NULL, NULL AS b GROUP BY GROUPING SETS ((0.0001)) WITH TOTALS) AS js1 ANY RIGHT JOIN (SELECT NULL AS a, NULL AS b WHERE NULL GROUP BY NULL, -9223372036854775807 WITH CUBE WITH TOTALS UNION ALL SELECT NULL AS a, NULL AS b GROUP BY 1, '21474836.46' WITH TOTALS) AS js2 USING (a, b) ORDER BY nan DESC NULLS LAST, '9223372036854775807' DESC NULLS LAST, a ASC NULLS LAST; From 090d412d7cc37104ba90355c880a357fbd34e091 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Wed, 13 Dec 2023 13:14:00 +0100 Subject: [PATCH 82/98] Add tests for 46628 --- .../02943_positional_arguments_bugs.reference | 2 ++ .../02943_positional_arguments_bugs.sql | 23 +++++++++++++++++++ 2 files changed, 25 insertions(+) create mode 100644 tests/queries/0_stateless/02943_positional_arguments_bugs.reference create mode 100644 tests/queries/0_stateless/02943_positional_arguments_bugs.sql diff --git a/tests/queries/0_stateless/02943_positional_arguments_bugs.reference b/tests/queries/0_stateless/02943_positional_arguments_bugs.reference new file mode 100644 index 00000000000..702e1261186 --- /dev/null +++ b/tests/queries/0_stateless/02943_positional_arguments_bugs.reference @@ -0,0 +1,2 @@ +45 1 +processed 99 0 diff --git a/tests/queries/0_stateless/02943_positional_arguments_bugs.sql b/tests/queries/0_stateless/02943_positional_arguments_bugs.sql new file mode 100644 index 00000000000..b8cf73da42d --- /dev/null +++ b/tests/queries/0_stateless/02943_positional_arguments_bugs.sql @@ -0,0 +1,23 @@ +-- https://github.com/ClickHouse/ClickHouse/issues/46628 +DROP TABLE IF EXISTS t; +CREATE TABLE t +( + `n` int +) + ENGINE = MergeTree + ORDER BY n AS +SELECT * +FROM numbers(10); + +SELECT + sum(n), + 1 AS x +FROM t +GROUP BY x; + +SELECT + 'processed' AS type, + max(number) AS max_date, + min(number) AS min_date +FROM numbers(100) +GROUP BY type; From 2a0a5f755c166604ca67901559afa50261556222 Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Wed, 13 Dec 2023 12:29:57 +0000 Subject: [PATCH 83/98] Comment to about possible query results --- tests/integration/test_parallel_replicas_working_set/test.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tests/integration/test_parallel_replicas_working_set/test.py b/tests/integration/test_parallel_replicas_working_set/test.py index 6292d33ed3e..0ede9d9b1a5 100644 --- a/tests/integration/test_parallel_replicas_working_set/test.py +++ b/tests/integration/test_parallel_replicas_working_set/test.py @@ -102,8 +102,12 @@ def test_covering_part_in_announcement(start_cluster, node_with_covering_part): table_name = "test_table" create_tables(cluster, table_name, node_with_covering_part) + # query result can be one of the following outcomes + # (1) query result if parallel replicas working set contains all_0_5_1 expected_full_result = "60\t0\t59\t1770\n" expected_results = {expected_full_result} + + # (2) query result if parallel replicas working set DOESN'T contain all_0_5_1 if node_with_covering_part == 0: expected_results.add("40\t20\t59\t1580\n") if node_with_covering_part == 1: From 54abbf146d1d7dfcdab8da50a4292e2079d40bc8 Mon Sep 17 00:00:00 2001 From: Johnny <9611008+johnnymatthews@users.noreply.github.com> Date: Wed, 13 Dec 2023 09:04:07 -0400 Subject: [PATCH 84/98] Update into-outfile.md --- docs/en/sql-reference/statements/select/into-outfile.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/sql-reference/statements/select/into-outfile.md b/docs/en/sql-reference/statements/select/into-outfile.md index 985f5e25b05..5b7196f13e3 100644 --- a/docs/en/sql-reference/statements/select/into-outfile.md +++ b/docs/en/sql-reference/statements/select/into-outfile.md @@ -12,7 +12,7 @@ Compressed files are supported. Compression type is detected by the extension of **Syntax** ```sql -SELECT INTO OUTFILE file_name [AND STDOUT] [APPEND] [COMPRESSION type [LEVEL level]] +SELECT INTO OUTFILE file_name [AND STDOUT] [APPEND | TRUNCATE] [COMPRESSION type [LEVEL level]] ``` `file_name` and `type` are string literals. Supported compression types are: `'none'`, `'gzip'`, `'deflate'`, `'br'`, `'xz'`, `'zstd'`, `'lz4'`, `'bz2'`. From c165be76abfb51a2dca9ee9a7baec9e46ce52d34 Mon Sep 17 00:00:00 2001 From: Igor Nikonov <954088+devcrafter@users.noreply.github.com> Date: Wed, 13 Dec 2023 14:42:06 +0100 Subject: [PATCH 85/98] Parallel replicas: friendly settings (#57542) --- docker/test/stateful/run.sh | 2 +- .../ClusterProxy/executeQuery.cpp | 38 ++++++++++++++++++- src/Interpreters/ClusterProxy/executeQuery.h | 3 +- src/Interpreters/Context.cpp | 2 +- src/Storages/StorageMergeTree.cpp | 22 +++++------ src/Storages/StorageReplicatedMergeTree.cpp | 17 +++------ src/Storages/StorageReplicatedMergeTree.h | 4 +- .../test.py | 1 - .../test.py | 3 -- .../test.py | 1 - .../test.py | 2 - ...arallel_reading_from_replicas_benchmark.sh | 1 - .../02731_parallel_replicas_join_subquery.sql | 1 - ...arallel_replicas_bug_chunkinfo_not_set.sql | 2 +- ...764_parallel_replicas_plain_merge_tree.sql | 2 +- ...02765_parallel_replicas_final_modifier.sql | 2 +- ...9_parallel_replicas_unavailable_shards.sql | 2 +- ...02771_parallel_replicas_analyzer.reference | 4 +- .../02771_parallel_replicas_analyzer.sql | 3 +- ...lel_replicas_trivial_count_optimization.sh | 4 -- ...84_parallel_replicas_automatic_decision.sh | 1 - ...rallel_replicas_automatic_decision_join.sh | 1 - ...02811_parallel_replicas_prewhere_count.sql | 1 - ...835_parallel_replicas_over_distributed.sql | 8 ++-- .../02841_parallel_replicas_summary.sh | 2 - .../02861_index_set_incorrect_args.sql | 2 +- ...69_parallel_replicas_read_from_several.sql | 2 +- ...parallel_replicas_cluster_all_replicas.sql | 2 +- .../02875_parallel_replicas_remote.sql | 2 +- .../02898_parallel_replicas_progress_bar.sql | 2 +- .../02901_parallel_replicas_rollup.sh | 2 - ...02935_parallel_replicas_settings.reference | 4 ++ .../02935_parallel_replicas_settings.sql | 35 +++++++++++++++++ .../1_stateful/00177_memory_bound_merging.sh | 6 +-- 34 files changed, 114 insertions(+), 72 deletions(-) create mode 100644 tests/queries/0_stateless/02935_parallel_replicas_settings.reference create mode 100644 tests/queries/0_stateless/02935_parallel_replicas_settings.sql diff --git a/docker/test/stateful/run.sh b/docker/test/stateful/run.sh index a0def50bfb5..806b57c4616 100755 --- a/docker/test/stateful/run.sh +++ b/docker/test/stateful/run.sh @@ -151,7 +151,7 @@ function run_tests() set +e if [[ -n "$USE_PARALLEL_REPLICAS" ]] && [[ "$USE_PARALLEL_REPLICAS" -eq 1 ]]; then - clickhouse-test --client="clickhouse-client --use_hedged_requests=0 --allow_experimental_parallel_reading_from_replicas=1 --parallel_replicas_for_non_replicated_merge_tree=1 \ + clickhouse-test --client="clickhouse-client --allow_experimental_parallel_reading_from_replicas=1 --parallel_replicas_for_non_replicated_merge_tree=1 \ --max_parallel_replicas=100 --cluster_for_parallel_replicas='parallel_replicas'" \ -j 2 --testname --shard --zookeeper --check-zookeeper-session --no-stateless --no-parallel-replicas --hung-check --print-time "${ADDITIONAL_OPTIONS[@]}" \ "$SKIP_TESTS_OPTION" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee test_output/test_result.txt diff --git a/src/Interpreters/ClusterProxy/executeQuery.cpp b/src/Interpreters/ClusterProxy/executeQuery.cpp index 8a2f7e3205a..f3b7e371f38 100644 --- a/src/Interpreters/ClusterProxy/executeQuery.cpp +++ b/src/Interpreters/ClusterProxy/executeQuery.cpp @@ -30,6 +30,7 @@ namespace ErrorCodes { extern const int TOO_LARGE_DISTRIBUTED_DEPTH; extern const int LOGICAL_ERROR; + extern const int CLUSTER_DOESNT_EXIST; } namespace ClusterProxy @@ -322,11 +323,44 @@ void executeQueryWithParallelReplicas( SelectStreamFactory & stream_factory, const ASTPtr & query_ast, ContextPtr context, - std::shared_ptr storage_limits, - const ClusterPtr & not_optimized_cluster) + std::shared_ptr storage_limits) { const auto & settings = context->getSettingsRef(); + + /// check cluster for parallel replicas + if (settings.cluster_for_parallel_replicas.value.empty()) + { + throw Exception( + ErrorCodes::CLUSTER_DOESNT_EXIST, + "Reading in parallel from replicas is enabled but cluster to execute query is not provided. Please set " + "'cluster_for_parallel_replicas' setting"); + } + auto not_optimized_cluster = context->getCluster(settings.cluster_for_parallel_replicas); + auto new_context = Context::createCopy(context); + + /// check hedged connections setting + if (settings.use_hedged_requests.value) + { + if (settings.use_hedged_requests.changed) + { + LOG_WARNING( + &Poco::Logger::get("executeQueryWithParallelReplicas"), + "Setting 'use_hedged_requests' explicitly with enabled 'allow_experimental_parallel_reading_from_replicas' has no effect. " + "Hedged connections are not used for parallel reading from replicas"); + } + else + { + LOG_INFO( + &Poco::Logger::get("executeQueryWithParallelReplicas"), + "Disabling 'use_hedged_requests' in favor of 'allow_experimental_parallel_reading_from_replicas'. Hedged connections are " + "not used for parallel reading from replicas"); + } + + /// disable hedged connections -> parallel replicas uses own logic to choose replicas + new_context->setSetting("use_hedged_requests", Field{false}); + } + auto scalars = new_context->hasQueryContext() ? new_context->getQueryContext()->getScalars() : Scalars{}; UInt64 shard_num = 0; /// shard_num is 1-based, so 0 - no shard specified diff --git a/src/Interpreters/ClusterProxy/executeQuery.h b/src/Interpreters/ClusterProxy/executeQuery.h index 7ffaa3ae62c..2149d8c1640 100644 --- a/src/Interpreters/ClusterProxy/executeQuery.h +++ b/src/Interpreters/ClusterProxy/executeQuery.h @@ -71,8 +71,7 @@ void executeQueryWithParallelReplicas( SelectStreamFactory & stream_factory, const ASTPtr & query_ast, ContextPtr context, - std::shared_ptr storage_limits, - const ClusterPtr & not_optimized_cluster); + std::shared_ptr storage_limits); } } diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index f0f20e171af..79cfe9a9546 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -5020,7 +5020,7 @@ Context::ParallelReplicasMode Context::getParallelReplicasMode() const if (!settings_ref.parallel_replicas_custom_key.value.empty()) return CUSTOM_KEY; - if (settings_ref.allow_experimental_parallel_reading_from_replicas > 0 && !settings_ref.use_hedged_requests) + if (settings_ref.allow_experimental_parallel_reading_from_replicas > 0) return READ_TASKS; return SAMPLE_KEY; diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp index e9a0dd5fbf3..22d72902e8d 100644 --- a/src/Storages/StorageMergeTree.cpp +++ b/src/Storages/StorageMergeTree.cpp @@ -211,17 +211,12 @@ void StorageMergeTree::read( { if (local_context->canUseParallelReplicasOnInitiator() && local_context->getSettingsRef().parallel_replicas_for_non_replicated_merge_tree) { - auto table_id = getStorageID(); - + const auto table_id = getStorageID(); const auto & modified_query_ast = ClusterProxy::rewriteSelectQuery( local_context, query_info.query, table_id.database_name, table_id.table_name, /*remote_table_function_ptr*/nullptr); - String cluster_for_parallel_replicas = local_context->getSettingsRef().cluster_for_parallel_replicas; - auto cluster = local_context->getCluster(cluster_for_parallel_replicas); - Block header; - if (local_context->getSettingsRef().allow_experimental_analyzer) header = InterpreterSelectQueryAnalyzer::getSampleBlock(modified_query_ast, local_context, SelectQueryOptions(processed_stage).analyze()); else @@ -240,17 +235,22 @@ void StorageMergeTree::read( select_stream_factory, modified_query_ast, local_context, - query_info.storage_limits, - cluster); + query_info.storage_limits); } else { const bool enable_parallel_reading = local_context->canUseParallelReplicasOnFollower() && local_context->getSettingsRef().parallel_replicas_for_non_replicated_merge_tree; if (auto plan = reader.read( - column_names, storage_snapshot, query_info, - local_context, max_block_size, num_streams, - processed_stage, nullptr, enable_parallel_reading)) + column_names, + storage_snapshot, + query_info, + local_context, + max_block_size, + num_streams, + processed_stage, + nullptr, + enable_parallel_reading)) query_plan = std::move(*plan); } } diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 4fb21705534..307870aaf4c 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -5338,7 +5338,7 @@ void StorageReplicatedMergeTree::read( return readLocalSequentialConsistencyImpl(query_plan, column_names, storage_snapshot, query_info, local_context, processed_stage, max_block_size, num_streams); if (local_context->canUseParallelReplicasOnInitiator()) - return readParallelReplicasImpl(query_plan, column_names, storage_snapshot, query_info, local_context, processed_stage, max_block_size, num_streams); + return readParallelReplicasImpl(query_plan, column_names, storage_snapshot, query_info, local_context, processed_stage); readLocalImpl(query_plan, column_names, storage_snapshot, query_info, local_context, processed_stage, max_block_size, num_streams); } @@ -5367,18 +5367,11 @@ void StorageReplicatedMergeTree::readParallelReplicasImpl( const StorageSnapshotPtr & storage_snapshot, SelectQueryInfo & query_info, ContextPtr local_context, - QueryProcessingStage::Enum processed_stage, - const size_t /*max_block_size*/, - const size_t /*num_streams*/) + QueryProcessingStage::Enum processed_stage) { - auto table_id = getStorageID(); - - auto scalars = local_context->hasQueryContext() ? local_context->getQueryContext()->getScalars() : Scalars{}; - String cluster_for_parallel_replicas = local_context->getSettingsRef().cluster_for_parallel_replicas; - auto parallel_replicas_cluster = local_context->getCluster(cluster_for_parallel_replicas); - ASTPtr modified_query_ast; Block header; + if (local_context->getSettingsRef().allow_experimental_analyzer) { auto modified_query_tree = buildQueryTreeForShard(query_info, query_info.query_tree); @@ -5389,6 +5382,7 @@ void StorageReplicatedMergeTree::readParallelReplicasImpl( } else { + const auto table_id = getStorageID(); modified_query_ast = ClusterProxy::rewriteSelectQuery(local_context, query_info.query, table_id.database_name, table_id.table_name, /*remote_table_function_ptr*/nullptr); header @@ -5407,8 +5401,7 @@ void StorageReplicatedMergeTree::readParallelReplicasImpl( select_stream_factory, modified_query_ast, local_context, - query_info.storage_limits, - parallel_replicas_cluster); + query_info.storage_limits); } void StorageReplicatedMergeTree::readLocalImpl( diff --git a/src/Storages/StorageReplicatedMergeTree.h b/src/Storages/StorageReplicatedMergeTree.h index a8ab8eb7013..159828effcf 100644 --- a/src/Storages/StorageReplicatedMergeTree.h +++ b/src/Storages/StorageReplicatedMergeTree.h @@ -582,9 +582,7 @@ private: const StorageSnapshotPtr & storage_snapshot, SelectQueryInfo & query_info, ContextPtr local_context, - QueryProcessingStage::Enum processed_stage, - size_t max_block_size, - size_t num_streams); + QueryProcessingStage::Enum processed_stage); template void foreachActiveParts(Func && func, bool select_sequential_consistency) const; diff --git a/tests/integration/test_parallel_replicas_distributed_read_from_all/test.py b/tests/integration/test_parallel_replicas_distributed_read_from_all/test.py index 7e12da956ea..8af7bb12595 100644 --- a/tests/integration/test_parallel_replicas_distributed_read_from_all/test.py +++ b/tests/integration/test_parallel_replicas_distributed_read_from_all/test.py @@ -119,7 +119,6 @@ def test_read_equally_from_each_replica(start_cluster, prefer_localhost_replica) "allow_experimental_parallel_reading_from_replicas": 2, "prefer_localhost_replica": prefer_localhost_replica, "max_parallel_replicas": 3, - "use_hedged_requests": 0, }, ) == expected_result diff --git a/tests/integration/test_parallel_replicas_distributed_skip_shards/test.py b/tests/integration/test_parallel_replicas_distributed_skip_shards/test.py index 315a9781c8b..af114ade2d7 100644 --- a/tests/integration/test_parallel_replicas_distributed_skip_shards/test.py +++ b/tests/integration/test_parallel_replicas_distributed_skip_shards/test.py @@ -84,7 +84,6 @@ def test_skip_unavailable_shards(start_cluster, prefer_localhost_replica): settings={ "allow_experimental_parallel_reading_from_replicas": 2, "max_parallel_replicas": 3, - "use_hedged_requests": 0, "prefer_localhost_replica": prefer_localhost_replica, "skip_unavailable_shards": 1, "connections_with_failover_max_tries": 0, # just don't wait for unavailable replicas @@ -119,7 +118,6 @@ def test_error_on_unavailable_shards(start_cluster, prefer_localhost_replica): settings={ "allow_experimental_parallel_reading_from_replicas": 2, "max_parallel_replicas": 3, - "use_hedged_requests": 0, "prefer_localhost_replica": prefer_localhost_replica, "skip_unavailable_shards": 0, }, @@ -155,7 +153,6 @@ def test_no_unavailable_shards(start_cluster, skip_unavailable_shards): settings={ "allow_experimental_parallel_reading_from_replicas": 2, "max_parallel_replicas": 3, - "use_hedged_requests": 0, "prefer_localhost_replica": 0, "skip_unavailable_shards": skip_unavailable_shards, }, diff --git a/tests/integration/test_parallel_replicas_over_distributed/test.py b/tests/integration/test_parallel_replicas_over_distributed/test.py index ecfc2ddea63..00b95965b65 100644 --- a/tests/integration/test_parallel_replicas_over_distributed/test.py +++ b/tests/integration/test_parallel_replicas_over_distributed/test.py @@ -137,7 +137,6 @@ def test_parallel_replicas_over_distributed( "allow_experimental_parallel_reading_from_replicas": 2, "prefer_localhost_replica": prefer_localhost_replica, "max_parallel_replicas": max_parallel_replicas, - "use_hedged_requests": 0, }, ) == expected_result diff --git a/tests/integration/test_parallel_replicas_skip_shards/test.py b/tests/integration/test_parallel_replicas_skip_shards/test.py index 3df80ba061e..a18c82a53a9 100644 --- a/tests/integration/test_parallel_replicas_skip_shards/test.py +++ b/tests/integration/test_parallel_replicas_skip_shards/test.py @@ -38,7 +38,6 @@ def test_skip_unavailable_shards(start_cluster): settings={ "allow_experimental_parallel_reading_from_replicas": 2, "max_parallel_replicas": 3, - "use_hedged_requests": 0, "skip_unavailable_shards": 1, # "async_socket_for_remote" : 0, # "async_query_sending_for_remote" : 0, @@ -65,7 +64,6 @@ def test_error_on_unavailable_shards(start_cluster): settings={ "allow_experimental_parallel_reading_from_replicas": 2, "max_parallel_replicas": 3, - "use_hedged_requests": 0, "skip_unavailable_shards": 0, }, ) diff --git a/tests/queries/0_stateless/02226_parallel_reading_from_replicas_benchmark.sh b/tests/queries/0_stateless/02226_parallel_reading_from_replicas_benchmark.sh index 941f024825a..bc90f4b2c11 100755 --- a/tests/queries/0_stateless/02226_parallel_reading_from_replicas_benchmark.sh +++ b/tests/queries/0_stateless/02226_parallel_reading_from_replicas_benchmark.sh @@ -17,7 +17,6 @@ opts=( --allow_experimental_parallel_reading_from_replicas 1 --parallel_replicas_for_non_replicated_merge_tree 1 --max_parallel_replicas 3 - --use_hedged_requests 0 --cluster_for_parallel_replicas parallel_replicas --iterations 1 diff --git a/tests/queries/0_stateless/02731_parallel_replicas_join_subquery.sql b/tests/queries/0_stateless/02731_parallel_replicas_join_subquery.sql index 29c20980c14..fa40c96048c 100644 --- a/tests/queries/0_stateless/02731_parallel_replicas_join_subquery.sql +++ b/tests/queries/0_stateless/02731_parallel_replicas_join_subquery.sql @@ -23,7 +23,6 @@ SET allow_experimental_analyzer = 0; SET max_parallel_replicas = 3; SET prefer_localhost_replica = 1; SET cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost'; -SET use_hedged_requests = 0; SET joined_subquery_requires_alias = 0; SELECT '=============== INNER QUERY (NO PARALLEL) ==============='; diff --git a/tests/queries/0_stateless/02751_parallel_replicas_bug_chunkinfo_not_set.sql b/tests/queries/0_stateless/02751_parallel_replicas_bug_chunkinfo_not_set.sql index 2ea2cecc7b5..5ec0a1fcc31 100644 --- a/tests/queries/0_stateless/02751_parallel_replicas_bug_chunkinfo_not_set.sql +++ b/tests/queries/0_stateless/02751_parallel_replicas_bug_chunkinfo_not_set.sql @@ -18,7 +18,7 @@ INSERT INTO join_inner_table__fuzz_1 SELECT FROM generateRandom('number Int64, value1 String, value2 String, time Int64', 1, 10, 2) LIMIT 100; -SET max_parallel_replicas = 3, prefer_localhost_replica = 1, use_hedged_requests = 0, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', allow_experimental_parallel_reading_from_replicas = 1; +SET max_parallel_replicas = 3, prefer_localhost_replica = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', allow_experimental_parallel_reading_from_replicas = 1; -- SELECT query will write a Warning to the logs SET send_logs_level='error'; diff --git a/tests/queries/0_stateless/02764_parallel_replicas_plain_merge_tree.sql b/tests/queries/0_stateless/02764_parallel_replicas_plain_merge_tree.sql index aaf68dfd300..9caa6f76e89 100644 --- a/tests/queries/0_stateless/02764_parallel_replicas_plain_merge_tree.sql +++ b/tests/queries/0_stateless/02764_parallel_replicas_plain_merge_tree.sql @@ -1,7 +1,7 @@ CREATE TABLE IF NOT EXISTS parallel_replicas_plain (x String) ENGINE=MergeTree() ORDER BY x; INSERT INTO parallel_replicas_plain SELECT toString(number) FROM numbers(10); -SET max_parallel_replicas=3, allow_experimental_parallel_reading_from_replicas=1, use_hedged_requests=0, cluster_for_parallel_replicas='parallel_replicas'; +SET max_parallel_replicas=3, allow_experimental_parallel_reading_from_replicas=1, cluster_for_parallel_replicas='parallel_replicas'; SET send_logs_level='error'; SET parallel_replicas_for_non_replicated_merge_tree = 0; diff --git a/tests/queries/0_stateless/02765_parallel_replicas_final_modifier.sql b/tests/queries/0_stateless/02765_parallel_replicas_final_modifier.sql index f447051e1e5..6c121802b06 100644 --- a/tests/queries/0_stateless/02765_parallel_replicas_final_modifier.sql +++ b/tests/queries/0_stateless/02765_parallel_replicas_final_modifier.sql @@ -2,7 +2,7 @@ CREATE TABLE IF NOT EXISTS parallel_replicas_final (x String) ENGINE=ReplacingMe INSERT INTO parallel_replicas_final SELECT toString(number) FROM numbers(10); -SET max_parallel_replicas=3, allow_experimental_parallel_reading_from_replicas=1, use_hedged_requests=0, cluster_for_parallel_replicas='parallel_replicas'; +SET max_parallel_replicas=3, allow_experimental_parallel_reading_from_replicas=1, cluster_for_parallel_replicas='parallel_replicas'; SET parallel_replicas_for_non_replicated_merge_tree = 1; SELECT * FROM parallel_replicas_final FINAL FORMAT Null; diff --git a/tests/queries/0_stateless/02769_parallel_replicas_unavailable_shards.sql b/tests/queries/0_stateless/02769_parallel_replicas_unavailable_shards.sql index 020a429c109..38d592201e3 100644 --- a/tests/queries/0_stateless/02769_parallel_replicas_unavailable_shards.sql +++ b/tests/queries/0_stateless/02769_parallel_replicas_unavailable_shards.sql @@ -4,7 +4,7 @@ INSERT INTO test_parallel_replicas_unavailable_shards SELECT * FROM numbers(10); SYSTEM FLUSH LOGS; -SET allow_experimental_parallel_reading_from_replicas=2, max_parallel_replicas=11, use_hedged_requests=0, cluster_for_parallel_replicas='parallel_replicas', parallel_replicas_for_non_replicated_merge_tree=1; +SET allow_experimental_parallel_reading_from_replicas=2, max_parallel_replicas=11, cluster_for_parallel_replicas='parallel_replicas', parallel_replicas_for_non_replicated_merge_tree=1; SET send_logs_level='error'; SELECT count() FROM test_parallel_replicas_unavailable_shards WHERE NOT ignore(*); diff --git a/tests/queries/0_stateless/02771_parallel_replicas_analyzer.reference b/tests/queries/0_stateless/02771_parallel_replicas_analyzer.reference index f688db940d9..35573110550 100644 --- a/tests/queries/0_stateless/02771_parallel_replicas_analyzer.reference +++ b/tests/queries/0_stateless/02771_parallel_replicas_analyzer.reference @@ -8,5 +8,5 @@ 5935810273536892891 7885388429666205427 8124171311239967992 -1 1 -- Simple query with analyzer and pure parallel replicas\nSELECT number\nFROM join_inner_table__fuzz_146_replicated\n SETTINGS\n allow_experimental_analyzer = 1,\n max_parallel_replicas = 2,\n cluster_for_parallel_replicas = \'test_cluster_one_shard_three_replicas_localhost\',\n allow_experimental_parallel_reading_from_replicas = 1,\n use_hedged_requests = 0; -0 2 SELECT `join_inner_table__fuzz_146_replicated`.`number` AS `number` FROM `default`.`join_inner_table__fuzz_146_replicated` SETTINGS allow_experimental_analyzer = 1, max_parallel_replicas = 2, cluster_for_parallel_replicas = \'test_cluster_one_shard_three_replicas_localhost\', allow_experimental_parallel_reading_from_replicas = 1, use_hedged_requests = 0 +1 1 -- Simple query with analyzer and pure parallel replicas\nSELECT number\nFROM join_inner_table__fuzz_146_replicated\n SETTINGS\n allow_experimental_analyzer = 1,\n max_parallel_replicas = 2,\n cluster_for_parallel_replicas = \'test_cluster_one_shard_three_replicas_localhost\',\n allow_experimental_parallel_reading_from_replicas = 1; +0 2 SELECT `join_inner_table__fuzz_146_replicated`.`number` AS `number` FROM `default`.`join_inner_table__fuzz_146_replicated` SETTINGS allow_experimental_analyzer = 1, max_parallel_replicas = 2, cluster_for_parallel_replicas = \'test_cluster_one_shard_three_replicas_localhost\', allow_experimental_parallel_reading_from_replicas = 1 diff --git a/tests/queries/0_stateless/02771_parallel_replicas_analyzer.sql b/tests/queries/0_stateless/02771_parallel_replicas_analyzer.sql index 35089c0cedb..88a0d2163d6 100644 --- a/tests/queries/0_stateless/02771_parallel_replicas_analyzer.sql +++ b/tests/queries/0_stateless/02771_parallel_replicas_analyzer.sql @@ -24,8 +24,7 @@ FROM join_inner_table__fuzz_146_replicated allow_experimental_analyzer = 1, max_parallel_replicas = 2, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', - allow_experimental_parallel_reading_from_replicas = 1, - use_hedged_requests = 0; + allow_experimental_parallel_reading_from_replicas = 1; SYSTEM FLUSH LOGS; -- There should be 2 different queries diff --git a/tests/queries/0_stateless/02783_parallel_replicas_trivial_count_optimization.sh b/tests/queries/0_stateless/02783_parallel_replicas_trivial_count_optimization.sh index 9cfd3a392c8..6c697095b57 100755 --- a/tests/queries/0_stateless/02783_parallel_replicas_trivial_count_optimization.sh +++ b/tests/queries/0_stateless/02783_parallel_replicas_trivial_count_optimization.sh @@ -30,7 +30,6 @@ function run_query_with_pure_parallel_replicas () { --query_id "${1}_pure" \ --max_parallel_replicas 3 \ --prefer_localhost_replica 1 \ - --use_hedged_requests 0 \ --cluster_for_parallel_replicas 'test_cluster_one_shard_three_replicas_localhost' \ --allow_experimental_parallel_reading_from_replicas 1 \ --allow_experimental_analyzer 0 @@ -40,7 +39,6 @@ function run_query_with_pure_parallel_replicas () { --query_id "${1}_pure_analyzer" \ --max_parallel_replicas 3 \ --prefer_localhost_replica 1 \ - --use_hedged_requests 0 \ --cluster_for_parallel_replicas 'test_cluster_one_shard_three_replicas_localhost' \ --allow_experimental_parallel_reading_from_replicas 1 \ --allow_experimental_analyzer 1 @@ -56,7 +54,6 @@ function run_query_with_custom_key_parallel_replicas () { --query "$2" \ --query_id "${1}_custom_key" \ --max_parallel_replicas 3 \ - --use_hedged_requests 0 \ --parallel_replicas_custom_key_filter_type 'default' \ --parallel_replicas_custom_key "$2" \ --allow_experimental_analyzer 0 @@ -65,7 +62,6 @@ function run_query_with_custom_key_parallel_replicas () { --query "$2" \ --query_id "${1}_custom_key_analyzer" \ --max_parallel_replicas 3 \ - --use_hedged_requests 0 \ --parallel_replicas_custom_key_filter_type 'default' \ --parallel_replicas_custom_key "$2" \ --allow_experimental_analyzer 1 diff --git a/tests/queries/0_stateless/02784_parallel_replicas_automatic_decision.sh b/tests/queries/0_stateless/02784_parallel_replicas_automatic_decision.sh index 741b51284fe..8a3b34e5cfa 100755 --- a/tests/queries/0_stateless/02784_parallel_replicas_automatic_decision.sh +++ b/tests/queries/0_stateless/02784_parallel_replicas_automatic_decision.sh @@ -49,7 +49,6 @@ function run_query_with_pure_parallel_replicas () { --query_id "${1}_pure" \ --max_parallel_replicas 3 \ --prefer_localhost_replica 1 \ - --use_hedged_requests 0 \ --cluster_for_parallel_replicas "parallel_replicas" \ --allow_experimental_parallel_reading_from_replicas 1 \ --parallel_replicas_for_non_replicated_merge_tree 1 \ diff --git a/tests/queries/0_stateless/02784_parallel_replicas_automatic_decision_join.sh b/tests/queries/0_stateless/02784_parallel_replicas_automatic_decision_join.sh index ed68a304b85..baeeb820da5 100755 --- a/tests/queries/0_stateless/02784_parallel_replicas_automatic_decision_join.sh +++ b/tests/queries/0_stateless/02784_parallel_replicas_automatic_decision_join.sh @@ -64,7 +64,6 @@ function run_query_with_pure_parallel_replicas () { --query_id "${1}_pure" \ --max_parallel_replicas 3 \ --prefer_localhost_replica 1 \ - --use_hedged_requests 0 \ --cluster_for_parallel_replicas "parallel_replicas" \ --allow_experimental_parallel_reading_from_replicas 1 \ --parallel_replicas_for_non_replicated_merge_tree 1 \ diff --git a/tests/queries/0_stateless/02811_parallel_replicas_prewhere_count.sql b/tests/queries/0_stateless/02811_parallel_replicas_prewhere_count.sql index 374d73d7d03..14edeecf57e 100644 --- a/tests/queries/0_stateless/02811_parallel_replicas_prewhere_count.sql +++ b/tests/queries/0_stateless/02811_parallel_replicas_prewhere_count.sql @@ -13,7 +13,6 @@ SET skip_unavailable_shards=1, allow_experimental_parallel_reading_from_replicas=1, max_parallel_replicas=3, -use_hedged_requests=0, cluster_for_parallel_replicas='parallel_replicas', parallel_replicas_for_non_replicated_merge_tree=1, parallel_replicas_min_number_of_rows_per_replica=1000; diff --git a/tests/queries/0_stateless/02835_parallel_replicas_over_distributed.sql b/tests/queries/0_stateless/02835_parallel_replicas_over_distributed.sql index 60aa5748575..1e6f9304c0c 100644 --- a/tests/queries/0_stateless/02835_parallel_replicas_over_distributed.sql +++ b/tests/queries/0_stateless/02835_parallel_replicas_over_distributed.sql @@ -14,13 +14,13 @@ insert into test select *, today() from numbers(100); SELECT count(), min(id), max(id), avg(id) FROM test_d -SETTINGS allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 3, prefer_localhost_replica = 0, parallel_replicas_for_non_replicated_merge_tree=1, use_hedged_requests=0; +SETTINGS allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 3, prefer_localhost_replica = 0, parallel_replicas_for_non_replicated_merge_tree=1; insert into test select *, today() from numbers(100); SELECT count(), min(id), max(id), avg(id) FROM test_d -SETTINGS allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 3, prefer_localhost_replica = 0, parallel_replicas_for_non_replicated_merge_tree=1, use_hedged_requests=0; +SETTINGS allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 3, prefer_localhost_replica = 0, parallel_replicas_for_non_replicated_merge_tree=1; -- 2 shards @@ -38,10 +38,10 @@ insert into test2 select *, today() from numbers(100); SELECT count(), min(id), max(id), avg(id) FROM test2_d -SETTINGS allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 3, prefer_localhost_replica = 0, parallel_replicas_for_non_replicated_merge_tree=1, use_hedged_requests=0; +SETTINGS allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 3, prefer_localhost_replica = 0, parallel_replicas_for_non_replicated_merge_tree=1; insert into test2 select *, today() from numbers(100); SELECT count(), min(id), max(id), avg(id) FROM test2_d -SETTINGS allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 3, prefer_localhost_replica = 0, parallel_replicas_for_non_replicated_merge_tree=1, use_hedged_requests=0; +SETTINGS allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 3, prefer_localhost_replica = 0, parallel_replicas_for_non_replicated_merge_tree=1; diff --git a/tests/queries/0_stateless/02841_parallel_replicas_summary.sh b/tests/queries/0_stateless/02841_parallel_replicas_summary.sh index 792c45b06d6..c82d2c8b0c0 100755 --- a/tests/queries/0_stateless/02841_parallel_replicas_summary.sh +++ b/tests/queries/0_stateless/02841_parallel_replicas_summary.sh @@ -36,7 +36,6 @@ echo " cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', allow_experimental_parallel_reading_from_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, - use_hedged_requests = 0, interactive_delay=0 "\ | ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&wait_end_of_query=1&query_id=${query_id_base}_interactive_0" --data-binary @- -vvv 2>&1 \ @@ -51,7 +50,6 @@ echo " cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', allow_experimental_parallel_reading_from_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, - use_hedged_requests = 0, interactive_delay=99999999999 "\ | ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&wait_end_of_query=1&query_id=${query_id_base}_interactive_high" --data-binary @- -vvv 2>&1 \ diff --git a/tests/queries/0_stateless/02861_index_set_incorrect_args.sql b/tests/queries/0_stateless/02861_index_set_incorrect_args.sql index fa51f5c9abc..17b505cd051 100644 --- a/tests/queries/0_stateless/02861_index_set_incorrect_args.sql +++ b/tests/queries/0_stateless/02861_index_set_incorrect_args.sql @@ -2,5 +2,5 @@ DROP TABLE IF EXISTS set_index__fuzz_41; CREATE TABLE set_index__fuzz_41 (`a` Date, `b` Nullable(DateTime64(3)), INDEX b_set b TYPE set(0) GRANULARITY 1) ENGINE = MergeTree ORDER BY tuple(); INSERT INTO set_index__fuzz_41 (a) VALUES (today()); -SELECT b FROM set_index__fuzz_41 WHERE and(b = 256) SETTINGS force_data_skipping_indices = 'b_set', optimize_move_to_prewhere = 0, max_parallel_replicas=2, parallel_replicas_for_non_replicated_merge_tree=1, allow_experimental_parallel_reading_from_replicas=2, use_hedged_requests=0; -- { serverError TOO_FEW_ARGUMENTS_FOR_FUNCTION } +SELECT b FROM set_index__fuzz_41 WHERE and(b = 256) SETTINGS force_data_skipping_indices = 'b_set', optimize_move_to_prewhere = 0, max_parallel_replicas=2, parallel_replicas_for_non_replicated_merge_tree=1, allow_experimental_parallel_reading_from_replicas=2; -- { serverError TOO_FEW_ARGUMENTS_FOR_FUNCTION } DROP TABLE set_index__fuzz_41; diff --git a/tests/queries/0_stateless/02869_parallel_replicas_read_from_several.sql b/tests/queries/0_stateless/02869_parallel_replicas_read_from_several.sql index 9559b46fa08..e040fae1fa6 100644 --- a/tests/queries/0_stateless/02869_parallel_replicas_read_from_several.sql +++ b/tests/queries/0_stateless/02869_parallel_replicas_read_from_several.sql @@ -24,5 +24,5 @@ system sync replica t3; SELECT count(), min(k), max(k), avg(k) FROM t1 -SETTINGS allow_experimental_parallel_reading_from_replicas = 1, max_parallel_replicas = 3, prefer_localhost_replica = 0, use_hedged_requests=0, +SETTINGS allow_experimental_parallel_reading_from_replicas = 1, max_parallel_replicas = 3, prefer_localhost_replica = 0, cluster_for_parallel_replicas='test_cluster_one_shard_three_replicas_localhost', parallel_replicas_single_task_marks_count_multiplier = 0.001; diff --git a/tests/queries/0_stateless/02875_parallel_replicas_cluster_all_replicas.sql b/tests/queries/0_stateless/02875_parallel_replicas_cluster_all_replicas.sql index 1201a156246..f59d38ceb04 100644 --- a/tests/queries/0_stateless/02875_parallel_replicas_cluster_all_replicas.sql +++ b/tests/queries/0_stateless/02875_parallel_replicas_cluster_all_replicas.sql @@ -2,7 +2,7 @@ DROP TABLE IF EXISTS tt; CREATE TABLE tt (n UInt64) ENGINE=MergeTree() ORDER BY tuple(); INSERT INTO tt SELECT * FROM numbers(10); -SET allow_experimental_parallel_reading_from_replicas=1, max_parallel_replicas=3, use_hedged_requests=0, parallel_replicas_for_non_replicated_merge_tree=1; +SET allow_experimental_parallel_reading_from_replicas=1, max_parallel_replicas=3, parallel_replicas_for_non_replicated_merge_tree=1; SELECT count() FROM clusterAllReplicas('test_cluster_two_shard_three_replicas_localhost', currentDatabase(), tt) settings log_comment='02875_190aed82-2423-413b-ad4c-24dcca50f65b'; SYSTEM FLUSH LOGS; diff --git a/tests/queries/0_stateless/02875_parallel_replicas_remote.sql b/tests/queries/0_stateless/02875_parallel_replicas_remote.sql index f47fc559df9..5fbaf34b621 100644 --- a/tests/queries/0_stateless/02875_parallel_replicas_remote.sql +++ b/tests/queries/0_stateless/02875_parallel_replicas_remote.sql @@ -2,7 +2,7 @@ DROP TABLE IF EXISTS tt; CREATE TABLE tt (n UInt64) ENGINE=MergeTree() ORDER BY tuple(); INSERT INTO tt SELECT * FROM numbers(10); -SET allow_experimental_parallel_reading_from_replicas=1, max_parallel_replicas=3, use_hedged_requests=0, parallel_replicas_for_non_replicated_merge_tree=1; +SET allow_experimental_parallel_reading_from_replicas=1, max_parallel_replicas=3, parallel_replicas_for_non_replicated_merge_tree=1; SELECT count() FROM remote('127.0.0.{1..6}', currentDatabase(), tt) settings log_comment='02875_89f3c39b-1919-48cb-b66e-ef9904e73146'; SYSTEM FLUSH LOGS; diff --git a/tests/queries/0_stateless/02898_parallel_replicas_progress_bar.sql b/tests/queries/0_stateless/02898_parallel_replicas_progress_bar.sql index 70a1cedf663..6b2f146efd0 100644 --- a/tests/queries/0_stateless/02898_parallel_replicas_progress_bar.sql +++ b/tests/queries/0_stateless/02898_parallel_replicas_progress_bar.sql @@ -14,7 +14,7 @@ system sync replica t1; system sync replica t2; system sync replica t3; -SET allow_experimental_parallel_reading_from_replicas=1, max_parallel_replicas=3, use_hedged_requests=0, parallel_replicas_for_non_replicated_merge_tree=1, cluster_for_parallel_replicas='test_cluster_one_shard_three_replicas_localhost'; +SET allow_experimental_parallel_reading_from_replicas=1, max_parallel_replicas=3, parallel_replicas_for_non_replicated_merge_tree=1, cluster_for_parallel_replicas='test_cluster_one_shard_three_replicas_localhost'; -- default coordinator SELECT count(), min(k), max(k), avg(k) FROM t1 SETTINGS log_comment='02898_default_190aed82-2423-413b-ad4c-24dcca50f65b'; diff --git a/tests/queries/0_stateless/02901_parallel_replicas_rollup.sh b/tests/queries/0_stateless/02901_parallel_replicas_rollup.sh index f23b80348c1..9c922ec4723 100755 --- a/tests/queries/0_stateless/02901_parallel_replicas_rollup.sh +++ b/tests/queries/0_stateless/02901_parallel_replicas_rollup.sh @@ -29,7 +29,6 @@ $CLICKHOUSE_CLIENT \ --query_id "${query_id}" \ --max_parallel_replicas 3 \ --prefer_localhost_replica 1 \ - --use_hedged_requests 0 \ --cluster_for_parallel_replicas "parallel_replicas" \ --allow_experimental_parallel_reading_from_replicas 1 \ --parallel_replicas_for_non_replicated_merge_tree 1 \ @@ -63,7 +62,6 @@ $CLICKHOUSE_CLIENT \ --query_id "${query_id}" \ --max_parallel_replicas 3 \ --prefer_localhost_replica 1 \ - --use_hedged_requests 0 \ --cluster_for_parallel_replicas "parallel_replicas" \ --allow_experimental_parallel_reading_from_replicas 1 \ --parallel_replicas_for_non_replicated_merge_tree 1 \ diff --git a/tests/queries/0_stateless/02935_parallel_replicas_settings.reference b/tests/queries/0_stateless/02935_parallel_replicas_settings.reference new file mode 100644 index 00000000000..846d77bfa57 --- /dev/null +++ b/tests/queries/0_stateless/02935_parallel_replicas_settings.reference @@ -0,0 +1,4 @@ +10 +1 +10 +1 diff --git a/tests/queries/0_stateless/02935_parallel_replicas_settings.sql b/tests/queries/0_stateless/02935_parallel_replicas_settings.sql new file mode 100644 index 00000000000..be6f1c2958c --- /dev/null +++ b/tests/queries/0_stateless/02935_parallel_replicas_settings.sql @@ -0,0 +1,35 @@ +DROP TABLE IF EXISTS test_parallel_replicas_settings; +CREATE TABLE test_parallel_replicas_settings (n UInt64) ENGINE=MergeTree() ORDER BY tuple(); +INSERT INTO test_parallel_replicas_settings SELECT * FROM numbers(10); + +SET allow_experimental_parallel_reading_from_replicas=2, max_parallel_replicas=3, parallel_replicas_for_non_replicated_merge_tree=1; + +SET cluster_for_parallel_replicas=''; +SELECT count() FROM test_parallel_replicas_settings WHERE NOT ignore(*); -- { serverError CLUSTER_DOESNT_EXIST } + +SET cluster_for_parallel_replicas='parallel_replicas'; +SELECT count() FROM test_parallel_replicas_settings WHERE NOT ignore(*) settings log_comment='0_f621c4f2-4da7-4a7c-bb6d-052c442d0f7f'; + +SYSTEM FLUSH LOGS; + +SELECT count() > 0 FROM system.text_log +WHERE yesterday() <= event_date + AND query_id in (select query_id from system.query_log where current_database=currentDatabase() AND log_comment='0_f621c4f2-4da7-4a7c-bb6d-052c442d0f7f') + AND level = 'Information' + AND message ILIKE '%Disabling ''use_hedged_requests'' in favor of ''allow_experimental_parallel_reading_from_replicas''%' +SETTINGS allow_experimental_parallel_reading_from_replicas=0; + +SET use_hedged_requests=1; +SELECT count() FROM test_parallel_replicas_settings WHERE NOT ignore(*) settings log_comment='1_f621c4f2-4da7-4a7c-bb6d-052c442d0f7f'; + +SYSTEM FLUSH LOGS; + +SET allow_experimental_parallel_reading_from_replicas=0; +SELECT count() > 0 FROM system.text_log +WHERE yesterday() <= event_date + AND query_id in (select query_id from system.query_log where current_database = currentDatabase() AND log_comment = '1_f621c4f2-4da7-4a7c-bb6d-052c442d0f7f') + AND level = 'Warning' + AND message ILIKE '%Setting ''use_hedged_requests'' explicitly with enabled ''allow_experimental_parallel_reading_from_replicas'' has no effect%' +SETTINGS allow_experimental_parallel_reading_from_replicas=0; + +DROP TABLE test_parallel_replicas_settings; diff --git a/tests/queries/1_stateful/00177_memory_bound_merging.sh b/tests/queries/1_stateful/00177_memory_bound_merging.sh index ce889b338d6..d5cd1a05cd8 100755 --- a/tests/queries/1_stateful/00177_memory_bound_merging.sh +++ b/tests/queries/1_stateful/00177_memory_bound_merging.sh @@ -31,7 +31,7 @@ test1() { GROUP BY CounterID, URL, EventDate ORDER BY URL, EventDate LIMIT 5 OFFSET 10 - SETTINGS optimize_aggregation_in_order = 1, enable_memory_bound_merging_of_aggregation_results = 1, allow_experimental_parallel_reading_from_replicas = 1, parallel_replicas_for_non_replicated_merge_tree = 1, max_parallel_replicas = 3, use_hedged_requests = 0" + SETTINGS optimize_aggregation_in_order = 1, enable_memory_bound_merging_of_aggregation_results = 1, allow_experimental_parallel_reading_from_replicas = 1, parallel_replicas_for_non_replicated_merge_tree = 1, max_parallel_replicas = 3" check_replicas_read_in_order $query_id } @@ -48,7 +48,7 @@ test2() { GROUP BY URL, EventDate ORDER BY URL, EventDate LIMIT 5 OFFSET 10 - SETTINGS optimize_aggregation_in_order = 1, enable_memory_bound_merging_of_aggregation_results = 1, allow_experimental_parallel_reading_from_replicas = 1, parallel_replicas_for_non_replicated_merge_tree = 1, max_parallel_replicas = 3, use_hedged_requests = 0, query_plan_aggregation_in_order = 1" + SETTINGS optimize_aggregation_in_order = 1, enable_memory_bound_merging_of_aggregation_results = 1, allow_experimental_parallel_reading_from_replicas = 1, parallel_replicas_for_non_replicated_merge_tree = 1, max_parallel_replicas = 3, query_plan_aggregation_in_order = 1" check_replicas_read_in_order $query_id } @@ -64,7 +64,7 @@ test3() { FROM test.hits WHERE CounterID = 1704509 AND UserID = 4322253409885123546 GROUP BY URL, EventDate - SETTINGS optimize_aggregation_in_order = 1, enable_memory_bound_merging_of_aggregation_results = 1, allow_experimental_parallel_reading_from_replicas = 1, parallel_replicas_for_non_replicated_merge_tree = 1, max_parallel_replicas = 3, use_hedged_requests = 0 + SETTINGS optimize_aggregation_in_order = 1, enable_memory_bound_merging_of_aggregation_results = 1, allow_experimental_parallel_reading_from_replicas = 1, parallel_replicas_for_non_replicated_merge_tree = 1, max_parallel_replicas = 3 ) WHERE explain LIKE '%Aggr%Transform%' OR explain LIKE '%InOrder%'" } From fa7190805a64aa447b9bb568db26f614f15adfb5 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 13 Dec 2023 17:59:55 +0100 Subject: [PATCH 86/98] A timeout --- docker/test/clickbench/run.sh | 3 +++ 1 file changed, 3 insertions(+) diff --git a/docker/test/clickbench/run.sh b/docker/test/clickbench/run.sh index 921d2023fd7..ba9580f55ae 100755 --- a/docker/test/clickbench/run.sh +++ b/docker/test/clickbench/run.sh @@ -1,5 +1,8 @@ #!/bin/bash +SCRIPT_PID=$! +(sleep 1200 && kill -9 $SCRIPT_PID) & + # shellcheck disable=SC1091 source /setup_export_logs.sh From 1d70c9464e6e23dde1e5a9370bb9ae1d57bec9c9 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 13 Dec 2023 18:01:18 +0100 Subject: [PATCH 87/98] Add it to master checks --- .github/workflows/master.yml | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml index 05654926fd7..771de46be13 100644 --- a/.github/workflows/master.yml +++ b/.github/workflows/master.yml @@ -555,6 +555,27 @@ jobs: cd "$REPO_COPY/tests/ci" python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT" ############################################################################################## +########################### ClickBench ####################################################### +############################################################################################## + ClickBenchAMD64: + needs: [BuilderDebRelease] + uses: ./.github/workflows/reusable_test.yml + with: + test_name: ClickBench (amd64) + runner_type: func-tester + run_command: | + cd "$REPO_COPY/tests/ci" + python3 clickbench.py "$CHECK_NAME" + ClickBenchAarch64: + needs: [BuilderDebAarch64] + uses: ./.github/workflows/reusable_test.yml + with: + test_name: ClickBench (aarch64) + runner_type: func-tester-aarch64 + run_command: | + cd "$REPO_COPY/tests/ci" + python3 clickbench.py "$CHECK_NAME" +############################################################################################## ######################################### STRESS TESTS ####################################### ############################################################################################## StressTestAsan: From 055c2314381edba5e9f946f7450f886d09024ba0 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Wed, 13 Dec 2023 17:59:53 +0100 Subject: [PATCH 88/98] Disable system.kafka_consumers by default (due to possible live memory leak) It is not safe to use statistics because of how KafkaEngine works - it pre-creates consumers, and this leads to the situation when this statistics entries generated (RD_KAFKA_OP_STATS), but never consumed. Which creates a live memory leak for a server with Kafka tables, but without materialized view attached to it (and no SELECT). Another problem is that this makes shutdown very slow, because of how pending queue entries are handled in librdkafka, it uses TAILQ_INSERT_SORTED, which is sorted insert into linked list, which works incredibly slow (likely you will never wait till it ends and kill the server) For instance in my production setup the server was running for ~67 days with such table, and it got 1'942'233 `TAILQ_INSERT_SORTED` entries (which perfectly matches by the way - `67*86400/3` = 1'929'600), and it moved only 289'806 entries for a few hours, though I'm not sure how much time the process was in the running state, since most of the time it was with debugger attached. So for now let's disable it, to make this patch easy for backporting, and I will think about long term fix - do not pre-create consumers in Kafka engine. Signed-off-by: Azat Khuzhin --- src/Storages/Kafka/StorageKafka.cpp | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/Storages/Kafka/StorageKafka.cpp b/src/Storages/Kafka/StorageKafka.cpp index c17defca673..34138b2237f 100644 --- a/src/Storages/Kafka/StorageKafka.cpp +++ b/src/Storages/Kafka/StorageKafka.cpp @@ -661,10 +661,19 @@ void StorageKafka::updateConfiguration(cppkafka::Configuration & kafka_config, if (kafka_consumer_weak_ptr_ptr) { + /// NOTE: statistics should be consumed, otherwise it creates too much + /// entries in the queue, that leads to memory leak and slow shutdown. + /// + /// This is the case when you have kafka table but no SELECT from it or + /// materialized view attached. + /// + /// So for now it is disabled by default, until properly fixed. +#if 0 if (!config.has(config_prefix + "." + "statistics_interval_ms")) { kafka_config.set("statistics.interval.ms", "3000"); // every 3 seconds by default. set to 0 to disable. } +#endif if (kafka_config.get("statistics.interval.ms") != "0") { From 30e6797c7e63488056405dc69c8f46ec22d737fd Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Wed, 13 Dec 2023 17:19:14 +0000 Subject: [PATCH 89/98] Update version_date.tsv and changelogs after v23.11.2.11-stable --- docker/keeper/Dockerfile | 2 +- docker/server/Dockerfile.alpine | 2 +- docker/server/Dockerfile.ubuntu | 2 +- docs/changelogs/v23.11.2.11-stable.md | 22 ++++++++++++++++++++++ utils/list-versions/version_date.tsv | 1 + 5 files changed, 26 insertions(+), 3 deletions(-) create mode 100644 docs/changelogs/v23.11.2.11-stable.md diff --git a/docker/keeper/Dockerfile b/docker/keeper/Dockerfile index 8fc639af1a7..a238a9851d9 100644 --- a/docker/keeper/Dockerfile +++ b/docker/keeper/Dockerfile @@ -34,7 +34,7 @@ RUN arch=${TARGETARCH:-amd64} \ # lts / testing / prestable / etc ARG REPO_CHANNEL="stable" ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}" -ARG VERSION="23.11.1.2711" +ARG VERSION="23.11.2.11" ARG PACKAGES="clickhouse-keeper" # user/group precreated explicitly with fixed uid/gid on purpose. diff --git a/docker/server/Dockerfile.alpine b/docker/server/Dockerfile.alpine index 76b03218eab..31dbc38708f 100644 --- a/docker/server/Dockerfile.alpine +++ b/docker/server/Dockerfile.alpine @@ -32,7 +32,7 @@ RUN arch=${TARGETARCH:-amd64} \ # lts / testing / prestable / etc ARG REPO_CHANNEL="stable" ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}" -ARG VERSION="23.11.1.2711" +ARG VERSION="23.11.2.11" ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static" # user/group precreated explicitly with fixed uid/gid on purpose. diff --git a/docker/server/Dockerfile.ubuntu b/docker/server/Dockerfile.ubuntu index c6dfcf9f679..6bbec625300 100644 --- a/docker/server/Dockerfile.ubuntu +++ b/docker/server/Dockerfile.ubuntu @@ -30,7 +30,7 @@ RUN sed -i "s|http://archive.ubuntu.com|${apt_archive}|g" /etc/apt/sources.list ARG REPO_CHANNEL="stable" ARG REPOSITORY="deb [signed-by=/usr/share/keyrings/clickhouse-keyring.gpg] https://packages.clickhouse.com/deb ${REPO_CHANNEL} main" -ARG VERSION="23.11.1.2711" +ARG VERSION="23.11.2.11" ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static" # set non-empty deb_location_url url to create a docker image diff --git a/docs/changelogs/v23.11.2.11-stable.md b/docs/changelogs/v23.11.2.11-stable.md new file mode 100644 index 00000000000..490cc9a4590 --- /dev/null +++ b/docs/changelogs/v23.11.2.11-stable.md @@ -0,0 +1,22 @@ +--- +sidebar_position: 1 +sidebar_label: 2023 +--- + +# 2023 Changelog + +### ClickHouse release v23.11.2.11-stable (6e5411358c8) FIXME as compared to v23.11.1.2711-stable (05bc8ef1e02) + +#### Improvement +* Backported in [#57661](https://github.com/ClickHouse/ClickHouse/issues/57661): Handle sigabrt case when getting PostgreSQl table structure with empty array. [#57618](https://github.com/ClickHouse/ClickHouse/pull/57618) ([Mike Kot (Михаил Кот)](https://github.com/myrrc)). + +#### Bug Fix (user-visible misbehavior in an official stable release) + +* Ignore ON CLUSTER clause in grant/revoke queries for management of replicated access entities. [#57538](https://github.com/ClickHouse/ClickHouse/pull/57538) ([MikhailBurdukov](https://github.com/MikhailBurdukov)). +* Fix SIGSEGV for aggregation of sparse columns with any() RESPECT NULL [#57710](https://github.com/ClickHouse/ClickHouse/pull/57710) ([Azat Khuzhin](https://github.com/azat)). +* Fix bug window functions: revert [#39631](https://github.com/ClickHouse/ClickHouse/issues/39631) [#57766](https://github.com/ClickHouse/ClickHouse/pull/57766) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* Pin alpine version of integration tests helper container [#57669](https://github.com/ClickHouse/ClickHouse/pull/57669) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). + diff --git a/utils/list-versions/version_date.tsv b/utils/list-versions/version_date.tsv index cb4102b3072..f319f57e0b9 100644 --- a/utils/list-versions/version_date.tsv +++ b/utils/list-versions/version_date.tsv @@ -1,3 +1,4 @@ +v23.11.2.11-stable 2023-12-13 v23.11.1.2711-stable 2023-12-06 v23.10.5.20-stable 2023-11-25 v23.10.4.25-stable 2023-11-17 From 560e66f3ca5f76475b18ef4d6a3efe2b9a21a9af Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Wed, 13 Dec 2023 19:51:49 +0100 Subject: [PATCH 90/98] More respect to `min_number_of_marks` in `ParallelReplicasReadingCoordinator` (#57763) --- .../ParallelReplicasReadingCoordinator.cpp | 62 +++++++++---------- 1 file changed, 29 insertions(+), 33 deletions(-) diff --git a/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp b/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp index 9137dc89705..757d1461769 100644 --- a/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp +++ b/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp @@ -300,20 +300,20 @@ void DefaultCoordinator::selectPartsAndRanges(const PartRefs & container, size_t while (!part->description.ranges.empty() && current_mark_size < min_number_of_marks) { auto & range = part->description.ranges.front(); + const size_t needed = min_number_of_marks - current_mark_size; - if (range.getNumberOfMarks() > min_number_of_marks) + if (range.getNumberOfMarks() > needed) { - auto new_range = range; - range.begin += min_number_of_marks; - new_range.end = new_range.begin + min_number_of_marks; + auto range_we_take = MarkRange{range.begin, range.begin + needed}; + response.description.back().ranges.emplace_back(range_we_take); + current_mark_size += range_we_take.getNumberOfMarks(); - response.description.back().ranges.emplace_back(new_range); - current_mark_size += new_range.getNumberOfMarks(); - continue; + range.begin += needed; + break; } - current_mark_size += part->description.ranges.front().getNumberOfMarks(); - response.description.back().ranges.emplace_back(part->description.ranges.front()); + response.description.back().ranges.emplace_back(range); + current_mark_size += range.getNumberOfMarks(); part->description.ranges.pop_front(); } } @@ -473,23 +473,21 @@ ParallelReadResponse InOrderCoordinator::handleRequest(ParallelReadRequest { while (!global_part_it->description.ranges.empty() && current_mark_size < request.min_number_of_marks) { - auto range = global_part_it->description.ranges.back(); + auto & range = global_part_it->description.ranges.back(); + const size_t needed = request.min_number_of_marks - current_mark_size; - if (range.getNumberOfMarks() > request.min_number_of_marks) + if (range.getNumberOfMarks() > needed) { - auto new_range = range; - range.end -= request.min_number_of_marks; - new_range.begin = new_range.end - request.min_number_of_marks; + auto range_we_take = MarkRange{range.end - needed, range.end}; + part.ranges.emplace_front(range_we_take); + current_mark_size += range_we_take.getNumberOfMarks(); - global_part_it->description.ranges.back() = range; - - part.ranges.emplace_front(new_range); - current_mark_size += new_range.getNumberOfMarks(); - continue; + range.end -= needed; + break; } - current_mark_size += global_part_it->description.ranges.back().getNumberOfMarks(); - part.ranges.emplace_front(global_part_it->description.ranges.back()); + part.ranges.emplace_front(range); + current_mark_size += range.getNumberOfMarks(); global_part_it->description.ranges.pop_back(); } } @@ -497,23 +495,21 @@ ParallelReadResponse InOrderCoordinator::handleRequest(ParallelReadRequest { while (!global_part_it->description.ranges.empty() && current_mark_size < request.min_number_of_marks) { - auto range = global_part_it->description.ranges.front(); + auto & range = global_part_it->description.ranges.front(); + const size_t needed = request.min_number_of_marks - current_mark_size; - if (range.getNumberOfMarks() > request.min_number_of_marks) + if (range.getNumberOfMarks() > needed) { - auto new_range = range; - range.begin += request.min_number_of_marks; - new_range.end = new_range.begin + request.min_number_of_marks; + auto range_we_take = MarkRange{range.begin, range.begin + needed}; + part.ranges.emplace_back(range_we_take); + current_mark_size += range_we_take.getNumberOfMarks(); - global_part_it->description.ranges.front() = range; - - part.ranges.emplace_back(new_range); - current_mark_size += new_range.getNumberOfMarks(); - continue; + range.begin += needed; + break; } - current_mark_size += global_part_it->description.ranges.front().getNumberOfMarks(); - part.ranges.emplace_back(global_part_it->description.ranges.front()); + part.ranges.emplace_back(range); + current_mark_size += range.getNumberOfMarks(); global_part_it->description.ranges.pop_front(); } } From b8d274d070b89bdfee578492f8210cd96859fdd8 Mon Sep 17 00:00:00 2001 From: Julia Kartseva Date: Wed, 13 Dec 2023 10:59:31 -0800 Subject: [PATCH 91/98] Add malformed output generation to JSON fuzzer (#57646) Randomly modify structural characters of a valid JSON ('{', '}', '[', ']', ':', '"', ',') to generate output that cannot be parsed as JSON. Follow-up to https://github.com/ClickHouse/ClickHouse/pull/56490 --- .../sql-reference/table-functions/fuzzJSON.md | 11 ++++ src/Storages/StorageFuzzJSON.cpp | 48 ++++++++++---- src/Storages/StorageFuzzJSON.h | 1 + .../02918_fuzzjson_table_function.reference | 1 + .../02918_fuzzjson_table_function.sql | 65 +++++++++++++++++-- 5 files changed, 107 insertions(+), 19 deletions(-) diff --git a/docs/en/sql-reference/table-functions/fuzzJSON.md b/docs/en/sql-reference/table-functions/fuzzJSON.md index 74ccb0bcb8a..a64f35691f6 100644 --- a/docs/en/sql-reference/table-functions/fuzzJSON.md +++ b/docs/en/sql-reference/table-functions/fuzzJSON.md @@ -19,6 +19,7 @@ fuzzJSON({ named_collection [option=value [,..]] | json_str[, random_seed] }) - `json_str` (String) - The source string representing structured data in JSON format. - `random_seed` (UInt64) - Manual random seed for producing stable results. - `reuse_output` (boolean) - Reuse the output from a fuzzing process as input for the next fuzzer. + - `malform_output` (boolean) - Generate a string that cannot be parsed as a JSON object. - `max_output_length` (UInt64) - Maximum allowable length of the generated or perturbed JSON string. - `probability` (Float64) - The probability to fuzz a JSON field (a key-value pair). Must be within [0, 1] range. - `max_nesting_level` (UInt64) - The maximum allowed depth of nested structures within the JSON data. @@ -84,3 +85,13 @@ SELECT * FROM fuzzJSON('{"id":1}', 1234) LIMIT 3; {"BRjE":16137826149911306846} {"XjKE":15076727133550123563} ``` + +``` sql +SELECT * FROM fuzzJSON(json_nc, json_str='{"name" : "FuzzJSON"}', random_seed=1337, malform_output=true) LIMIT 3; +``` + +``` text +U"name":"FuzzJSON*"SpByjZKtr2VAyHCO"falseh +{"name"keFuzzJSON, "g6vVO7TCIk":jTt^ +{"DBhz":YFuzzJSON5} +``` diff --git a/src/Storages/StorageFuzzJSON.cpp b/src/Storages/StorageFuzzJSON.cpp index 8b2dd7ac692..6bf69efa1dd 100644 --- a/src/Storages/StorageFuzzJSON.cpp +++ b/src/Storages/StorageFuzzJSON.cpp @@ -248,10 +248,10 @@ Field generateRandomFixedValue(const StorageFuzzJSON::Configuration & config, pc return f; } -String fuzzJSONKey(const StorageFuzzJSON::Configuration & config, pcg64 & rnd, const String & source) +String fuzzString(UInt64 min_length, UInt64 max_length, pcg64 & rnd, const String & source, std::function charGen) { String result; - result.reserve(config.max_key_length); + result.reserve(max_length); using FA = FuzzAction; auto get_action = [&]() -> FuzzAction @@ -261,7 +261,7 @@ String fuzzJSONKey(const StorageFuzzJSON::Configuration & config, pcg64 & rnd, c }; size_t i = 0; - while (i < source.size() && result.size() < config.max_key_length) + while (i < source.size() && result.size() < max_length) { auto action = get_action(); switch (action) @@ -271,12 +271,12 @@ String fuzzJSONKey(const StorageFuzzJSON::Configuration & config, pcg64 & rnd, c } break; case FA::Edit: { - result.push_back(generateRandomKeyCharacter(rnd)); + result.push_back(charGen(rnd)); ++i; } break; case FA::Add: { - result.push_back(generateRandomKeyCharacter(rnd)); + result.push_back(charGen(rnd)); } break; default: @@ -284,12 +284,24 @@ String fuzzJSONKey(const StorageFuzzJSON::Configuration & config, pcg64 & rnd, c } } - while (result.size() < config.min_key_length) - result.push_back(generateRandomKeyCharacter(rnd)); + while (result.size() < min_length) + result.push_back(charGen(rnd)); return result; } +String fuzzJSONKey(const StorageFuzzJSON::Configuration & config, pcg64 & rnd, const String & key) +{ + return fuzzString(config.min_key_length, config.max_key_length, rnd, key, generateRandomKeyCharacter); +} + +// Randomly modify structural characters (e.g. '{', '}', '[', ']', ':', '"') to generate output that cannot be parsed as JSON. +String fuzzJSONStructure(const StorageFuzzJSON::Configuration & config, pcg64 & rnd, const String & s) +{ + return config.should_malform_output ? fuzzString(/*min_length*/ 0, /*max_length*/ s.size(), rnd, s, generateRandomStringValueCharacter) + : s; +} + std::shared_ptr generateRandomJSONNode(const StorageFuzzJSON::Configuration & config, pcg64 & rnd, bool with_key, JSONValue::Type type) { @@ -397,7 +409,7 @@ void fuzzJSONObject( if (next_node->key) { writeDoubleQuoted(*next_node->key, out); - out << ":"; + out << fuzzJSONStructure(config, rnd, ":"); } auto & val = next_node->value; @@ -405,7 +417,11 @@ void fuzzJSONObject( if (val.fixed) { if (val.fixed->getType() == Field::Types::Which::String) - writeDoubleQuoted(val.fixed->get(), out); + { + out << fuzzJSONStructure(config, rnd, "\""); + writeText(val.fixed->get(), out); + out << fuzzJSONStructure(config, rnd, "\""); + } else writeFieldText(*val.fixed, out); } @@ -414,9 +430,9 @@ void fuzzJSONObject( if (!val.array && !val.object) return; - const auto & [op, cl, node_list] = val.array ? std::make_tuple('[', ']', *val.array) : std::make_tuple('{', '}', *val.object); + const auto & [op, cl, node_list] = val.array ? std::make_tuple("[", "]", *val.array) : std::make_tuple("{", "}", *val.object); - out << op; + out << fuzzJSONStructure(config, rnd, op); bool first = true; for (const auto & ptr : node_list) @@ -426,7 +442,7 @@ void fuzzJSONObject( WriteBufferFromOwnString child_out; if (!first) - child_out << ", "; + child_out << fuzzJSONStructure(config, rnd, ", "); first = false; fuzzJSONObject(ptr, child_out, config, rnd, depth + 1, node_count); @@ -435,7 +451,7 @@ void fuzzJSONObject( break; out << child_out.str(); } - out << cl; + out << fuzzJSONStructure(config, rnd, cl); } } @@ -554,10 +570,11 @@ Pipe StorageFuzzJSON::read( return Pipe::unitePipes(std::move(pipes)); } -static constexpr std::array optional_configuration_keys +static constexpr std::array optional_configuration_keys = {"json_str", "random_seed", "reuse_output", + "malform_output", "probability", "max_output_length", "max_nesting_level", @@ -583,6 +600,9 @@ void StorageFuzzJSON::processNamedCollectionResult(Configuration & configuration if (collection.has("reuse_output")) configuration.should_reuse_output = static_cast(collection.get("reuse_output")); + if (collection.has("malform_output")) + configuration.should_malform_output = static_cast(collection.get("malform_output")); + if (collection.has("probability")) { configuration.probability = collection.get("probability"); diff --git a/src/Storages/StorageFuzzJSON.h b/src/Storages/StorageFuzzJSON.h index f1d78fba85c..027c5a98634 100644 --- a/src/Storages/StorageFuzzJSON.h +++ b/src/Storages/StorageFuzzJSON.h @@ -27,6 +27,7 @@ public: String json_str = "{}"; UInt64 random_seed = randomSeed(); bool should_reuse_output = false; + bool should_malform_output = false; Float64 probability = 0.25; UInt64 max_output_length = 1024; diff --git a/tests/queries/0_stateless/02918_fuzzjson_table_function.reference b/tests/queries/0_stateless/02918_fuzzjson_table_function.reference index 1b5c6f46f77..8ad9e886b49 100644 --- a/tests/queries/0_stateless/02918_fuzzjson_table_function.reference +++ b/tests/queries/0_stateless/02918_fuzzjson_table_function.reference @@ -150,3 +150,4 @@ {} 730 200 +50 diff --git a/tests/queries/0_stateless/02918_fuzzjson_table_function.sql b/tests/queries/0_stateless/02918_fuzzjson_table_function.sql index 6db0c69dbac..398b3572587 100644 --- a/tests/queries/0_stateless/02918_fuzzjson_table_function.sql +++ b/tests/queries/0_stateless/02918_fuzzjson_table_function.sql @@ -92,15 +92,70 @@ SELECT * FROM fuzzJSON(02918_json_fuzzer, max_key_length=10, min_key_length=0) L SELECT * FROM fuzzJSON(02918_json_fuzzer, max_key_length=10, min_key_length=11) LIMIT 10; -- { serverError BAD_ARGUMENTS } -- -DROP TABLE IF EXISTS 02918_table_obj; -CREATE TABLE 02918_table_obj (json_obj Object('json')) Engine=Memory; +DROP TABLE IF EXISTS 02918_table_obj1; +CREATE TABLE 02918_table_obj1 (json_obj Object('json')) Engine=Memory; -INSERT INTO 02918_table_obj SELECT * FROM fuzzJSON( +INSERT INTO 02918_table_obj1 SELECT * FROM fuzzJSON( 02918_json_fuzzer, json_str='{"name": "John Doe", "age": 27, "address": {"city": "Citiville", "zip": "12345"}, "hobbies": ["reading", "traveling", "coding"]}', random_seed=12345) LIMIT 200; -SELECT count() FROM 02918_table_obj; -DROP TABLE IF EXISTS 02918_table_obj; +SELECT count() FROM 02918_table_obj1; + +DROP TABLE IF EXISTS 02918_table_obj1; + +-- +DROP TABLE IF EXISTS 02918_table_obj2; +CREATE TABLE 02918_table_obj2 (json_obj Object('json')) Engine=Memory; + +INSERT INTO 02918_table_obj2 SELECT * FROM fuzzJSON( + 02918_json_fuzzer, + json_str= + '{ + "name": { + "first": "Joan", + "last": "of Arc" + }, + "birth": {"date": "January 6, 1412", "place": "Domremy, France"}, + "death": {"date": "May 30, 1431", "place": "Rouen, France"}, + "occupation": "Military Leader", + "achievements": ["Lifted Siege of Orleans", "Assisted in Charles VII\'s Coronation"], + "legacy": { + "honors": ["Canonized Saint", "National Heroine of France"], + "memorials": [ + {"name": "Joan of Arc Memorial", "location": "Domremy"}, + {"name": "Place Jeanne d\'Arc", "location": "Rouen"} + ] + } + }', + random_seed=12345, + max_output_length=1024) LIMIT 50; + +INSERT INTO 02918_table_obj2 SELECT * FROM fuzzJSON( + 02918_json_fuzzer, + json_str= + '{ + "name": { + "first": "Joan", + "last": "of Arc" + }, + "birth": {"date": "January 6, 1412", "place": "Domremy, France"}, + "death": {"date": "May 30, 1431", "place": "Rouen, France"}, + "occupation": "Military Leader", + "achievements": ["Lifted Siege of Orleans", "Assisted in Charles VII\'s Coronation"], + "legacy": { + "honors": ["Canonized Saint", "National Heroine of France"], + "memorials": [ + {"name": "Joan of Arc Memorial", "location": "Domremy"}, + {"name": "Place Jeanne d\'Arc", "location": "Rouen"} + ] + } + }', + random_seed=12345, + max_output_length=1024, malform_output=true) LIMIT 50; -- {serverError INCORRECT_DATA } + +SELECT count() FROM 02918_table_obj2; + +DROP TABLE IF EXISTS 02918_table_obj2; DROP NAMED COLLECTION IF EXISTS 02918_json_fuzzer; From 781ba523764e0d477bd2552038457565f444b835 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 13 Dec 2023 22:12:53 +0100 Subject: [PATCH 92/98] ClickBench: slightly better --- docker/test/clickbench/run.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/test/clickbench/run.sh b/docker/test/clickbench/run.sh index ba9580f55ae..3d27a40bb74 100755 --- a/docker/test/clickbench/run.sh +++ b/docker/test/clickbench/run.sh @@ -55,7 +55,7 @@ QUERY_NUM=1 while read -r query; do echo -n "[" for i in $(seq 1 $TRIES); do - RES=$(clickhouse-client --time --format Null --query "$query" --progress 0 2>&1 ||:) + RES=$(clickhouse-client --query_id "q${QUERY_NUM}-${i}" --time --format Null --query "$query" --progress 0 2>&1 ||:) echo -n "${RES}" [[ "$i" != "$TRIES" ]] && echo -n ", " From c29007beeb876819f8673f237056c1334cceb7fd Mon Sep 17 00:00:00 2001 From: andrewzolotukhin Date: Thu, 14 Dec 2023 04:54:52 +0200 Subject: [PATCH 93/98] Fixed typo in string-functions.md --- docs/en/sql-reference/functions/string-functions.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/sql-reference/functions/string-functions.md b/docs/en/sql-reference/functions/string-functions.md index fffbea31d0d..e9db47a5c4c 100644 --- a/docs/en/sql-reference/functions/string-functions.md +++ b/docs/en/sql-reference/functions/string-functions.md @@ -1251,7 +1251,7 @@ This function also replaces numeric character references with Unicode characters **Syntax** ``` sql -decodeHTMComponent(x) +decodeHTMLComponent(x) ``` **Arguments** From 15dc0ed610998b847cb0752f5721c55d538fb629 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 14 Dec 2023 04:34:32 +0100 Subject: [PATCH 94/98] Remove `arrayFold` --- .../functions/array-functions.md | 54 ---- src/Functions/array/arrayFold.cpp | 236 ------------------ tests/performance/array_fold.xml | 5 - .../0_stateless/02718_array_fold.reference | 25 -- .../queries/0_stateless/02718_array_fold.sql | 24 -- 5 files changed, 344 deletions(-) delete mode 100644 src/Functions/array/arrayFold.cpp delete mode 100644 tests/performance/array_fold.xml delete mode 100644 tests/queries/0_stateless/02718_array_fold.reference delete mode 100644 tests/queries/0_stateless/02718_array_fold.sql diff --git a/docs/en/sql-reference/functions/array-functions.md b/docs/en/sql-reference/functions/array-functions.md index 00efa63c960..a058e1db6b4 100644 --- a/docs/en/sql-reference/functions/array-functions.md +++ b/docs/en/sql-reference/functions/array-functions.md @@ -1081,10 +1081,6 @@ Result: └─────────────────────────────────────────────────────────────┘ ``` -**See also** - -- [arrayFold](#arrayfold) - ## arrayReduceInRanges Applies an aggregate function to array elements in given ranges and returns an array containing the result corresponding to each range. The function will return the same result as multiple `arrayReduce(agg_func, arraySlice(arr1, index, length), ...)`. @@ -1127,56 +1123,6 @@ Result: └─────────────────────────────┘ ``` -## arrayFold - -Applies a lambda function to one or more equally-sized arrays and collects the result in an accumulator. - -**Syntax** - -``` sql -arrayFold(lambda_function, arr1, arr2, ..., accumulator) -``` - -**Example** - -Query: - -``` sql -SELECT arrayFold( acc,x -> acc + x*2, [1, 2, 3, 4], toInt64(3)) AS res; -``` - -Result: - -``` text -┌─res─┐ -│ 23 │ -└─────┘ -``` - -**Example with the Fibonacci sequence** - -```sql -SELECT arrayFold( acc,x -> (acc.2, acc.2 + acc.1), range(number), (1::Int64, 0::Int64)).1 AS fibonacci -FROM numbers(1,10); - -┌─fibonacci─┐ -│ 0 │ -│ 1 │ -│ 1 │ -│ 2 │ -│ 3 │ -│ 5 │ -│ 8 │ -│ 13 │ -│ 21 │ -│ 34 │ -└───────────┘ -``` - -**See also** - -- [arrayReduce](#arrayreduce) - ## arrayReverse(arr) Returns an array of the same size as the original array containing the elements in reverse order. diff --git a/src/Functions/array/arrayFold.cpp b/src/Functions/array/arrayFold.cpp deleted file mode 100644 index b5b650e7289..00000000000 --- a/src/Functions/array/arrayFold.cpp +++ /dev/null @@ -1,236 +0,0 @@ -#include "FunctionArrayMapped.h" -#include -#include - -namespace DB -{ - -namespace ErrorCodes -{ - extern const int ILLEGAL_COLUMN; - extern const int ILLEGAL_TYPE_OF_ARGUMENT; - extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; - extern const int SIZES_OF_ARRAYS_DONT_MATCH; - extern const int TYPE_MISMATCH; -} - -/** - * arrayFold(x1,...,xn,accum -> expression, array1,...,arrayn, accum_initial) - apply the expression to each element of the array (or set of arrays). - */ -class ArrayFold : public IFunction -{ -public: - static constexpr auto name = "arrayFold"; - static FunctionPtr create(ContextPtr) { return std::make_shared(); } - - bool isVariadic() const override { return true; } - size_t getNumberOfArguments() const override { return 0; } - bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } - - void getLambdaArgumentTypes(DataTypes & arguments) const override - { - if (arguments.size() < 3) - throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Function {} requires as arguments a lambda function, at least one array and an accumulator", getName()); - - DataTypes accumulator_and_array_types(arguments.size() - 1); - accumulator_and_array_types[0] = arguments.back(); - for (size_t i = 1; i < accumulator_and_array_types.size(); ++i) - { - const auto * array_type = checkAndGetDataType(&*arguments[i]); - if (!array_type) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Argument {} of function {} must be of type Array, found {} instead", i + 1, getName(), arguments[i]->getName()); - accumulator_and_array_types[i] = recursiveRemoveLowCardinality(array_type->getNestedType()); - } - - const auto * lambda_function_type = checkAndGetDataType(arguments[0].get()); - if (!lambda_function_type || lambda_function_type->getArgumentTypes().size() != accumulator_and_array_types.size()) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "First argument of function {} must be a lambda function with {} arguments, found {} instead.", - getName(), accumulator_and_array_types.size(), arguments[0]->getName()); - - arguments[0] = std::make_shared(accumulator_and_array_types); - } - - DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override - { - if (arguments.size() < 3) - throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Function {} requires as arguments a lambda function, at least one array and an accumulator", getName()); - - const auto * lambda_function_type = checkAndGetDataType(arguments[0].type.get()); - if (!lambda_function_type) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "First argument for function {} must be a function", getName()); - - auto accumulator_type = arguments.back().type; - auto lambda_type = lambda_function_type->getReturnType(); - if (!accumulator_type->equals(*lambda_type)) - throw Exception(ErrorCodes::TYPE_MISMATCH, - "Return type of lambda function must be the same as the accumulator type, inferred return type of lambda: {}, inferred type of accumulator: {}", - lambda_type->getName(), accumulator_type->getName()); - - return accumulator_type; - } - - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override - { - const auto & lambda_function_with_type_and_name = arguments[0]; - - if (!lambda_function_with_type_and_name.column) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "First argument for function {} must be a function", getName()); - - const auto * lambda_function = typeid_cast(lambda_function_with_type_and_name.column.get()); - if (!lambda_function) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "First argument for function {} must be a function", getName()); - - ColumnPtr offsets_column; - ColumnPtr column_first_array_ptr; - const ColumnArray * column_first_array = nullptr; - ColumnsWithTypeAndName arrays; - arrays.reserve(arguments.size() - 1); - - /// Validate input types and get input array columns in convenient form - for (size_t i = 1; i < arguments.size() - 1; ++i) - { - const auto & array_with_type_and_name = arguments[i]; - ColumnPtr column_array_ptr = array_with_type_and_name.column; - const auto * column_array = checkAndGetColumn(column_array_ptr.get()); - if (!column_array) - { - const ColumnConst * column_const_array = checkAndGetColumnConst(column_array_ptr.get()); - if (!column_const_array) - throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Expected array column, found {}", column_array_ptr->getName()); - column_array_ptr = recursiveRemoveLowCardinality(column_const_array->convertToFullColumn()); - column_array = checkAndGetColumn(column_array_ptr.get()); - } - - const DataTypePtr & array_type_ptr = array_with_type_and_name.type; - const auto * array_type = checkAndGetDataType(array_type_ptr.get()); - if (!array_type) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Expected array type, found {}", array_type_ptr->getName()); - - if (!offsets_column) - offsets_column = column_array->getOffsetsPtr(); - else - { - /// The first condition is optimization: do not compare data if the pointers are equal. - if (column_array->getOffsetsPtr() != offsets_column - && column_array->getOffsets() != typeid_cast(*offsets_column).getData()) - throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH, "Arrays passed to {} must have equal size", getName()); - } - if (i == 1) - { - column_first_array_ptr = column_array_ptr; - column_first_array = column_array; - } - arrays.emplace_back(ColumnWithTypeAndName(column_array->getDataPtr(), - recursiveRemoveLowCardinality(array_type->getNestedType()), - array_with_type_and_name.name)); - } - - ssize_t rows_count = input_rows_count; - ssize_t data_row_count = arrays[0].column->size(); - size_t array_count = arrays.size(); - - if (rows_count == 0) - return arguments.back().column->convertToFullColumnIfConst()->cloneEmpty(); - - ColumnPtr current_column = arguments.back().column->convertToFullColumnIfConst(); - MutableColumnPtr result_data = arguments.back().column->convertToFullColumnIfConst()->cloneEmpty(); - - size_t max_array_size = 0; - const auto & offsets = column_first_array->getOffsets(); - - IColumn::Selector selector(data_row_count); - size_t cur_ind = 0; - ssize_t cur_arr = 0; - - /// skip to the first non empty array - if (data_row_count) - while (offsets[cur_arr] == 0) - ++cur_arr; - - /// selector[i] is an index that i_th data element has in an array it corresponds to - for (ssize_t i = 0; i < data_row_count; ++i) - { - selector[i] = cur_ind; - cur_ind++; - if (cur_ind > max_array_size) - max_array_size = cur_ind; - while (cur_arr < rows_count && cur_ind >= offsets[cur_arr] - offsets[cur_arr - 1]) - { - ++cur_arr; - cur_ind = 0; - } - } - - std::vector data_arrays; - data_arrays.resize(array_count); - - /// Split each data column to columns containing elements of only Nth index in array - if (max_array_size > 0) - for (size_t i = 0; i < array_count; ++i) - data_arrays[i] = arrays[i].column->scatter(max_array_size, selector); - - size_t prev_size = rows_count; - - IColumn::Permutation inverse_permutation(rows_count); - size_t inverse_permutation_count = 0; - - /// current_column after each iteration contains value of accumulator after applying values under indexes of arrays. - /// At each iteration only rows of current_column with arrays that still has unapplied elements are kept. - /// Discarded rows which contain finished calculations are added to result_data column and as we insert them we save their original row_number in inverse_permutation vector - for (size_t ind = 0; ind < max_array_size; ++ind) - { - IColumn::Selector prev_selector(prev_size); - size_t prev_ind = 0; - for (ssize_t irow = 0; irow < rows_count; ++irow) - { - if (offsets[irow] - offsets[irow - 1] > ind) - prev_selector[prev_ind++] = 1; - else if (offsets[irow] - offsets[irow - 1] == ind) - { - inverse_permutation[inverse_permutation_count++] = irow; - prev_selector[prev_ind++] = 0; - } - } - auto prev = current_column->scatter(2, prev_selector); - - result_data->insertRangeFrom(*(prev[0]), 0, prev[0]->size()); - - auto res_lambda = lambda_function->cloneResized(prev[1]->size()); - auto * res_lambda_ptr = typeid_cast(res_lambda.get()); - - res_lambda_ptr->appendArguments(std::vector({ColumnWithTypeAndName(std::move(prev[1]), arguments.back().type, arguments.back().name)})); - for (size_t i = 0; i < array_count; i++) - res_lambda_ptr->appendArguments(std::vector({ColumnWithTypeAndName(std::move(data_arrays[i][ind]), arrays[i].type, arrays[i].name)})); - - current_column = IColumn::mutate(res_lambda_ptr->reduce().column); - prev_size = current_column->size(); - } - - result_data->insertRangeFrom(*current_column, 0, current_column->size()); - for (ssize_t irow = 0; irow < rows_count; ++irow) - if (offsets[irow] - offsets[irow - 1] == max_array_size) - inverse_permutation[inverse_permutation_count++] = irow; - - /// We have result_data containing result for every row and inverse_permutation which contains indexes of rows in input it corresponds to. - /// Now we need to invert inverse_permuation and apply it to result_data to get rows in right order. - IColumn::Permutation perm(rows_count); - for (ssize_t i = 0; i < rows_count; i++) - perm[inverse_permutation[i]] = i; - return result_data->permute(perm, 0); - } - -private: - String getName() const override - { - return name; - } -}; - -REGISTER_FUNCTION(ArrayFold) -{ - factory.registerFunction(FunctionDocumentation{.description=R"( - Function arrayFold(x1,...,xn,accum -> expression, array1,...,arrayn, accum_initial) applies lambda function to a number of equally-sized arrays - and collects the result in an accumulator. - )", .examples{{"sum", "SELECT arrayFold(x,acc -> acc+x, [1,2,3,4], toInt64(1));", "11"}}, .categories{"Array"}}); -} -} diff --git a/tests/performance/array_fold.xml b/tests/performance/array_fold.xml deleted file mode 100644 index 32bd45beb1e..00000000000 --- a/tests/performance/array_fold.xml +++ /dev/null @@ -1,5 +0,0 @@ - - SELECT arrayFold((acc, x) -> acc + x, range(number % 100), toUInt64(0)) from numbers(100000) Format Null - SELECT arrayFold((acc, x) -> acc + 1, range(number % 100), toUInt64(0)) from numbers(100000) Format Null - SELECT arrayFold((acc, x) -> acc + x, range(number), toUInt64(0)) from numbers(10000) Format Null - diff --git a/tests/queries/0_stateless/02718_array_fold.reference b/tests/queries/0_stateless/02718_array_fold.reference deleted file mode 100644 index 4139232d145..00000000000 --- a/tests/queries/0_stateless/02718_array_fold.reference +++ /dev/null @@ -1,25 +0,0 @@ -Negative tests -Const arrays -23 -3 -101 -[1,2,3,4] -[4,3,2,1] -([4,3,2,1],[1,2,3,4]) -([1,3,5],[2,4,6]) -Non-const arrays -0 -1 -3 -6 -10 -[] -[0] -[1,0] -[2,1,0] -[3,2,1,0] -[] -[0] -[1,0] -[1,0,2] -[3,1,0,2] diff --git a/tests/queries/0_stateless/02718_array_fold.sql b/tests/queries/0_stateless/02718_array_fold.sql deleted file mode 100644 index 0486a5ce2e3..00000000000 --- a/tests/queries/0_stateless/02718_array_fold.sql +++ /dev/null @@ -1,24 +0,0 @@ -SELECT 'Negative tests'; -SELECT arrayFold(); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } -SELECT arrayFold(1); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } -SELECT arrayFold(1, toUInt64(0)); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } -SELECT arrayFold(1, emptyArrayUInt64(), toUInt64(0)); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } -SELECT arrayFold( acc,x -> x, emptyArrayString(), toInt8(0)); -- { serverError TYPE_MISMATCH } -SELECT arrayFold( acc,x -> x, 'not an array', toUInt8(0)); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } -SELECT arrayFold( acc,x,y -> x, [0, 1], 'not an array', toUInt8(0)); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } -SELECT arrayFold( acc,x -> x, [0, 1], [2, 3], toUInt8(0)); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } -SELECT arrayFold( acc,x,y -> x, [0, 1], [2, 3, 4], toUInt8(0)); -- { serverError SIZES_OF_ARRAYS_DONT_MATCH } - -SELECT 'Const arrays'; -SELECT arrayFold( acc,x -> acc+x*2, [1, 2, 3, 4], toInt64(3)); -SELECT arrayFold( acc,x -> acc+x*2, emptyArrayInt64(), toInt64(3)); -SELECT arrayFold( acc,x,y -> acc+x*2+y*3, [1, 2, 3, 4], [5, 6, 7, 8], toInt64(3)); -SELECT arrayFold( acc,x -> arrayPushBack(acc, x), [1, 2, 3, 4], emptyArrayInt64()); -SELECT arrayFold( acc,x -> arrayPushFront(acc, x), [1, 2, 3, 4], emptyArrayInt64()); -SELECT arrayFold( acc,x -> (arrayPushFront(acc.1, x),arrayPushBack(acc.2, x)), [1, 2, 3, 4], (emptyArrayInt64(), emptyArrayInt64())); -SELECT arrayFold( acc,x -> x%2 ? (arrayPushBack(acc.1, x), acc.2): (acc.1, arrayPushBack(acc.2, x)), [1, 2, 3, 4, 5, 6], (emptyArrayInt64(), emptyArrayInt64())); - -SELECT 'Non-const arrays'; -SELECT arrayFold( acc,x -> acc+x, range(number), number) FROM system.numbers LIMIT 5; -SELECT arrayFold( acc,x -> arrayPushFront(acc,x), range(number), emptyArrayUInt64()) FROM system.numbers LIMIT 5; -SELECT arrayFold( acc,x -> x%2 ? arrayPushFront(acc,x) : arrayPushBack(acc,x), range(number), emptyArrayUInt64()) FROM system.numbers LIMIT 5; From 4c8cc4e0bd7c06dc7b2ec6124a890af45b309b21 Mon Sep 17 00:00:00 2001 From: Nikolay Degterinsky <43110995+evillique@users.noreply.github.com> Date: Thu, 14 Dec 2023 05:32:12 +0100 Subject: [PATCH 95/98] Update string-functions.md --- docs/en/sql-reference/functions/string-functions.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/sql-reference/functions/string-functions.md b/docs/en/sql-reference/functions/string-functions.md index e9db47a5c4c..4f3c6e1e858 100644 --- a/docs/en/sql-reference/functions/string-functions.md +++ b/docs/en/sql-reference/functions/string-functions.md @@ -1268,7 +1268,7 @@ Type: [String](../../sql-reference/data-types/string.md). ``` sql SELECT decodeHTMLComponent(''CH'); -SELECT decodeHMLComponent('I♥ClickHouse'); +SELECT decodeHTMLComponent('I♥ClickHouse'); ``` Result: From a0af0392cd36826cbadee499c284d4e70de16c2f Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Thu, 14 Dec 2023 12:47:11 +0100 Subject: [PATCH 96/98] Random changes in random files (#57642) --- src/Access/Common/AccessType.h | 1 + src/Common/CurrentMetrics.cpp | 1 + src/Common/ZooKeeper/TestKeeper.cpp | 11 +++++++++++ src/Common/ZooKeeper/ZooKeeper.cpp | 11 +++++++++++ src/Common/ZooKeeper/ZooKeeper.h | 10 ++++++++-- src/Coordination/KeeperSnapshotManagerS3.cpp | 5 ++++- src/Coordination/Standalone/Context.cpp | 6 ++++++ src/Coordination/Standalone/Context.h | 7 +++++++ src/Core/SettingsEnums.h | 2 ++ src/Dictionaries/DictionaryFactory.cpp | 6 ------ src/Dictionaries/DictionaryFactory.h | 5 ----- src/Formats/MarkInCompressedFile.h | 18 ++++++++++++------ src/IO/ReadBufferFromS3.cpp | 2 ++ src/IO/S3/PocoHTTPClientFactory.cpp | 4 ++-- src/IO/S3/PocoHTTPClientFactory.h | 2 +- src/IO/S3/copyS3File.cpp | 1 + src/IO/S3Common.h | 1 + src/Interpreters/InterpreterSystemQuery.cpp | 15 +++++++++++++++ src/Interpreters/InterpreterSystemQuery.h | 1 + src/Parsers/ASTSystemQuery.cpp | 3 ++- src/Parsers/ASTSystemQuery.h | 3 +++ src/Parsers/ParserSystemQuery.cpp | 9 +++++++++ src/Server/HTTPHandler.cpp | 9 +++++++-- src/Server/ReplicasStatusHandler.cpp | 14 ++++++++++---- src/Server/ServerType.cpp | 3 +++ src/Server/ServerType.h | 1 + src/Storages/System/StorageSystemDatabases.cpp | 1 + src/Storages/System/StorageSystemMutations.cpp | 2 ++ src/Storages/System/StorageSystemPartsBase.cpp | 2 ++ .../01271_show_privileges.reference | 1 + .../02117_show_create_table_system.reference | 1 + 31 files changed, 128 insertions(+), 30 deletions(-) diff --git a/src/Access/Common/AccessType.h b/src/Access/Common/AccessType.h index f99ae2a8aea..45d427a7c55 100644 --- a/src/Access/Common/AccessType.h +++ b/src/Access/Common/AccessType.h @@ -183,6 +183,7 @@ enum class AccessType M(SYSTEM_REPLICATION_QUEUES, "SYSTEM STOP REPLICATION QUEUES, SYSTEM START REPLICATION QUEUES, STOP REPLICATION QUEUES, START REPLICATION QUEUES", TABLE, SYSTEM) \ M(SYSTEM_DROP_REPLICA, "DROP REPLICA", TABLE, SYSTEM) \ M(SYSTEM_SYNC_REPLICA, "SYNC REPLICA", TABLE, SYSTEM) \ + M(SYSTEM_REPLICA_READINESS, "SYSTEM REPLICA READY, SYSTEM REPLICA UNREADY", GLOBAL, SYSTEM) \ M(SYSTEM_RESTART_REPLICA, "RESTART REPLICA", TABLE, SYSTEM) \ M(SYSTEM_RESTORE_REPLICA, "RESTORE REPLICA", TABLE, SYSTEM) \ M(SYSTEM_WAIT_LOADING_PARTS, "WAIT LOADING PARTS", TABLE, SYSTEM) \ diff --git a/src/Common/CurrentMetrics.cpp b/src/Common/CurrentMetrics.cpp index 186771cafc2..38b14e4b0b4 100644 --- a/src/Common/CurrentMetrics.cpp +++ b/src/Common/CurrentMetrics.cpp @@ -260,6 +260,7 @@ #define APPLY_FOR_METRICS(M) APPLY_FOR_BUILTIN_METRICS(M) #endif + namespace CurrentMetrics { #define M(NAME, DOCUMENTATION) extern const Metric NAME = Metric(__COUNTER__); diff --git a/src/Common/ZooKeeper/TestKeeper.cpp b/src/Common/ZooKeeper/TestKeeper.cpp index a19892736ea..a25329ad7c0 100644 --- a/src/Common/ZooKeeper/TestKeeper.cpp +++ b/src/Common/ZooKeeper/TestKeeper.cpp @@ -99,6 +99,7 @@ struct TestKeeperExistsRequest final : ExistsRequest, TestKeeperRequest struct TestKeeperGetRequest final : GetRequest, TestKeeperRequest { TestKeeperGetRequest() = default; + explicit TestKeeperGetRequest(const GetRequest & base) : GetRequest(base) {} ResponsePtr createResponse() const override; std::pair process(TestKeeper::Container & container, int64_t zxid) const override; }; @@ -118,6 +119,8 @@ struct TestKeeperSetRequest final : SetRequest, TestKeeperRequest struct TestKeeperListRequest : ListRequest, TestKeeperRequest { + TestKeeperListRequest() = default; + explicit TestKeeperListRequest(const ListRequest & base) : ListRequest(base) {} ResponsePtr createResponse() const override; std::pair process(TestKeeper::Container & container, int64_t zxid) const override; }; @@ -176,6 +179,14 @@ struct TestKeeperMultiRequest final : MultiRequest, TestKeeperRequest { requests.push_back(std::make_shared(*concrete_request_check)); } + else if (const auto * concrete_request_get = dynamic_cast(generic_request.get())) + { + requests.push_back(std::make_shared(*concrete_request_get)); + } + else if (const auto * concrete_request_list = dynamic_cast(generic_request.get())) + { + requests.push_back(std::make_shared(*concrete_request_list)); + } else throw Exception::fromMessage(Error::ZBADARGUMENTS, "Illegal command as part of multi ZooKeeper request"); } diff --git a/src/Common/ZooKeeper/ZooKeeper.cpp b/src/Common/ZooKeeper/ZooKeeper.cpp index 8d18494e964..e682eaaea0d 100644 --- a/src/Common/ZooKeeper/ZooKeeper.cpp +++ b/src/Common/ZooKeeper/ZooKeeper.cpp @@ -497,6 +497,17 @@ bool ZooKeeper::exists(const std::string & path, Coordination::Stat * stat, cons return existsWatch(path, stat, callbackForEvent(watch)); } +bool ZooKeeper::anyExists(const std::vector & paths) +{ + auto exists_multi_response = exists(paths); + for (size_t i = 0; i < exists_multi_response.size(); ++i) + { + if (exists_multi_response[i].error == Coordination::Error::ZOK) + return true; + } + return false; +} + bool ZooKeeper::existsWatch(const std::string & path, Coordination::Stat * stat, Coordination::WatchCallback watch_callback) { Coordination::Error code = existsImpl(path, stat, watch_callback); diff --git a/src/Common/ZooKeeper/ZooKeeper.h b/src/Common/ZooKeeper/ZooKeeper.h index 785842b94bd..73b730f60b3 100644 --- a/src/Common/ZooKeeper/ZooKeeper.h +++ b/src/Common/ZooKeeper/ZooKeeper.h @@ -286,6 +286,8 @@ public: return exists(paths.begin(), paths.end()); } + bool anyExists(const std::vector & paths); + std::string get(const std::string & path, Coordination::Stat * stat = nullptr, const EventPtr & watch = nullptr); std::string getWatch(const std::string & path, Coordination::Stat * stat, Coordination::WatchCallback watch_callback); @@ -422,8 +424,9 @@ public: /// Performs several operations in a transaction. /// Throws on every error. Coordination::Responses multi(const Coordination::Requests & requests); - /// Throws only if some operation has returned an "unexpected" error - /// - an error that would cause the corresponding try- method to throw. + /// Throws only if some operation has returned an "unexpected" error - an error that would cause + /// the corresponding try- method to throw. + /// On exception, `responses` may or may not be populated. Coordination::Error tryMulti(const Coordination::Requests & requests, Coordination::Responses & responses); /// Throws nothing (even session expired errors) Coordination::Error tryMultiNoThrow(const Coordination::Requests & requests, Coordination::Responses & responses); @@ -567,8 +570,11 @@ public: void setZooKeeperLog(std::shared_ptr zk_log_); UInt32 getSessionUptime() const { return static_cast(session_uptime.elapsedSeconds()); } + bool hasReachedDeadline() const { return impl->hasReachedDeadline(); } + uint64_t getSessionTimeoutMS() const { return args.session_timeout_ms; } + void setServerCompletelyStarted(); Int8 getConnectedHostIdx() const; diff --git a/src/Coordination/KeeperSnapshotManagerS3.cpp b/src/Coordination/KeeperSnapshotManagerS3.cpp index 56f64d58e2f..d76e310f2a3 100644 --- a/src/Coordination/KeeperSnapshotManagerS3.cpp +++ b/src/Coordination/KeeperSnapshotManagerS3.cpp @@ -208,6 +208,9 @@ void KeeperSnapshotManagerS3::uploadSnapshotImpl(const SnapshotFileInfo & snapsh return; } + /// To avoid reference to binding + const auto & snapshot_path_ref = snapshot_path; + SCOPE_EXIT( { LOG_INFO(log, "Removing lock file"); @@ -223,7 +226,7 @@ void KeeperSnapshotManagerS3::uploadSnapshotImpl(const SnapshotFileInfo & snapsh } catch (...) { - LOG_INFO(log, "Failed to delete lock file for {} from S3", snapshot_file_info.path); + LOG_INFO(log, "Failed to delete lock file for {} from S3", snapshot_path_ref); tryLogCurrentException(__PRETTY_FUNCTION__); } }); diff --git a/src/Coordination/Standalone/Context.cpp b/src/Coordination/Standalone/Context.cpp index 97a034b22a4..374610769c4 100644 --- a/src/Coordination/Standalone/Context.cpp +++ b/src/Coordination/Standalone/Context.cpp @@ -35,6 +35,7 @@ namespace DB namespace ErrorCodes { extern const int LOGICAL_ERROR; + extern const int UNSUPPORTED_METHOD; } struct ContextSharedPart : boost::noncopyable @@ -376,4 +377,9 @@ void Context::updateKeeperConfiguration([[maybe_unused]] const Poco::Util::Abstr shared->keeper_dispatcher->updateConfiguration(getConfigRef(), getMacros()); } +std::shared_ptr Context::getZooKeeper() const +{ + throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "Cannot connect to ZooKeeper from Keeper"); +} + } diff --git a/src/Coordination/Standalone/Context.h b/src/Coordination/Standalone/Context.h index 5aecf850d7c..a6199864422 100644 --- a/src/Coordination/Standalone/Context.h +++ b/src/Coordination/Standalone/Context.h @@ -21,6 +21,11 @@ #include #include "config.h" +namespace zkutil +{ + class ZooKeeper; + using ZooKeeperPtr = std::shared_ptr; +} namespace DB { @@ -153,6 +158,8 @@ public: void initializeKeeperDispatcher(bool start_async) const; void shutdownKeeperDispatcher() const; void updateKeeperConfiguration(const Poco::Util::AbstractConfiguration & config); + + zkutil::ZooKeeperPtr getZooKeeper() const; }; } diff --git a/src/Core/SettingsEnums.h b/src/Core/SettingsEnums.h index 3d087d42386..7977a0b3ab6 100644 --- a/src/Core/SettingsEnums.h +++ b/src/Core/SettingsEnums.h @@ -133,6 +133,8 @@ enum class DefaultTableEngine ReplacingMergeTree, ReplicatedMergeTree, ReplicatedReplacingMergeTree, + SharedMergeTree, + SharedReplacingMergeTree, Memory, }; diff --git a/src/Dictionaries/DictionaryFactory.cpp b/src/Dictionaries/DictionaryFactory.cpp index c3102632167..f6102d7c657 100644 --- a/src/Dictionaries/DictionaryFactory.cpp +++ b/src/Dictionaries/DictionaryFactory.cpp @@ -69,12 +69,6 @@ DictionaryPtr DictionaryFactory::create( layout_type); } -DictionaryPtr DictionaryFactory::create(const std::string & name, const ASTCreateQuery & ast, ContextPtr global_context) const -{ - auto configuration = getDictionaryConfigurationFromAST(ast, global_context); - return DictionaryFactory::create(name, *configuration, "dictionary", global_context, true); -} - bool DictionaryFactory::isComplex(const std::string & layout_type) const { auto it = registered_layouts.find(layout_type); diff --git a/src/Dictionaries/DictionaryFactory.h b/src/Dictionaries/DictionaryFactory.h index 35097a5ed24..2834451df81 100644 --- a/src/Dictionaries/DictionaryFactory.h +++ b/src/Dictionaries/DictionaryFactory.h @@ -39,11 +39,6 @@ public: ContextPtr global_context, bool created_from_ddl) const; - /// Create dictionary from DDL-query - DictionaryPtr create(const std::string & name, - const ASTCreateQuery & ast, - ContextPtr global_context) const; - using LayoutCreateFunction = std::function(const MarkInCompressedFile &) const = default; auto asTuple() const { return std::make_tuple(offset_in_compressed_file, offset_in_decompressed_block); } @@ -39,6 +41,10 @@ struct MarkInCompressedFile } }; +#ifdef __clang__ + #pragma clang diagnostic pop +#endif + /** * In-memory representation of an array of marks. * diff --git a/src/IO/ReadBufferFromS3.cpp b/src/IO/ReadBufferFromS3.cpp index c9c9319c44c..36cac929e3f 100644 --- a/src/IO/ReadBufferFromS3.cpp +++ b/src/IO/ReadBufferFromS3.cpp @@ -515,7 +515,9 @@ Aws::S3::Model::GetObjectResult ReadBufferFromS3::sendRequest(size_t attempt, si // We do not know in advance how many bytes we are going to consume, to avoid blocking estimated it from below constexpr ResourceCost estimated_cost = 1; ResourceGuard rlock(read_settings.resource_link, estimated_cost); + Aws::S3::Model::GetObjectOutcome outcome = client_ptr->GetObject(req); + rlock.unlock(); if (outcome.IsSuccess()) diff --git a/src/IO/S3/PocoHTTPClientFactory.cpp b/src/IO/S3/PocoHTTPClientFactory.cpp index 9dd52a263b0..ef7af2d01ba 100644 --- a/src/IO/S3/PocoHTTPClientFactory.cpp +++ b/src/IO/S3/PocoHTTPClientFactory.cpp @@ -13,9 +13,9 @@ namespace DB::S3 { std::shared_ptr -PocoHTTPClientFactory::CreateHttpClient(const Aws::Client::ClientConfiguration & clientConfiguration) const +PocoHTTPClientFactory::CreateHttpClient(const Aws::Client::ClientConfiguration & client_configuration) const { - return std::make_shared(static_cast(clientConfiguration)); + return std::make_shared(static_cast(client_configuration)); } std::shared_ptr PocoHTTPClientFactory::CreateHttpRequest( diff --git a/src/IO/S3/PocoHTTPClientFactory.h b/src/IO/S3/PocoHTTPClientFactory.h index 4e555f05502..60704332e7b 100644 --- a/src/IO/S3/PocoHTTPClientFactory.h +++ b/src/IO/S3/PocoHTTPClientFactory.h @@ -15,7 +15,7 @@ class PocoHTTPClientFactory : public Aws::Http::HttpClientFactory public: ~PocoHTTPClientFactory() override = default; [[nodiscard]] std::shared_ptr - CreateHttpClient(const Aws::Client::ClientConfiguration & clientConfiguration) const override; + CreateHttpClient(const Aws::Client::ClientConfiguration & client_configuration) const override; [[nodiscard]] std::shared_ptr CreateHttpRequest(const Aws::String & uri, Aws::Http::HttpMethod method, const Aws::IOStreamFactory & streamFactory) const override; [[nodiscard]] std::shared_ptr diff --git a/src/IO/S3/copyS3File.cpp b/src/IO/S3/copyS3File.cpp index 819c345938d..25de61360fe 100644 --- a/src/IO/S3/copyS3File.cpp +++ b/src/IO/S3/copyS3File.cpp @@ -655,6 +655,7 @@ namespace void performCopy() { + LOG_TEST(log, "Copy object {} to {} using native copy", src_key, dest_key); if (!supports_multipart_copy || size <= upload_settings.max_single_operation_copy_size) performSingleOperationCopy(); else diff --git a/src/IO/S3Common.h b/src/IO/S3Common.h index 71d52c727c7..8c45c1c34a7 100644 --- a/src/IO/S3Common.h +++ b/src/IO/S3Common.h @@ -16,6 +16,7 @@ #include #include +#include #include #include diff --git a/src/Interpreters/InterpreterSystemQuery.cpp b/src/Interpreters/InterpreterSystemQuery.cpp index db10d377cc1..e3b0c2df567 100644 --- a/src/Interpreters/InterpreterSystemQuery.cpp +++ b/src/Interpreters/InterpreterSystemQuery.cpp @@ -45,6 +45,7 @@ #include #include #include +#include #include #include #include @@ -92,6 +93,7 @@ namespace ErrorCodes extern const int TIMEOUT_EXCEEDED; extern const int TABLE_WAS_NOT_DROPPED; extern const int ABORTED; + extern const int SUPPORT_IS_DISABLED; } @@ -442,6 +444,10 @@ BlockIO InterpreterSystemQuery::execute() result.pipeline = QueryPipeline(std::move(source)); break; } + case Type::DROP_DISK_METADATA_CACHE: + { + throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "Not implemented"); + } case Type::DROP_SCHEMA_CACHE: { getContext()->checkAccess(AccessType::SYSTEM_DROP_SCHEMA_CACHE); @@ -611,6 +617,10 @@ BlockIO InterpreterSystemQuery::execute() case Type::SYNC_DATABASE_REPLICA: syncReplicatedDatabase(query); break; + case Type::REPLICA_UNREADY: + throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "Not implemented"); + case Type::REPLICA_READY: + throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "Not implemented"); case Type::SYNC_TRANSACTION_LOG: syncTransactionLog(); break; @@ -1119,6 +1129,8 @@ AccessRightsElements InterpreterSystemQuery::getRequiredAccessForDDLOnCluster() required_access.emplace_back(AccessType::SYSTEM_DROP_CACHE); break; } + case Type::DROP_DISK_METADATA_CACHE: + throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "Not implemented"); case Type::RELOAD_DICTIONARY: case Type::RELOAD_DICTIONARIES: case Type::RELOAD_EMBEDDED_DICTIONARIES: @@ -1245,6 +1257,9 @@ AccessRightsElements InterpreterSystemQuery::getRequiredAccessForDDLOnCluster() required_access.emplace_back(AccessType::SYSTEM_SYNC_REPLICA, query.getDatabase(), query.getTable()); break; } + case Type::REPLICA_READY: + case Type::REPLICA_UNREADY: + throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "Not implemented"); case Type::RESTART_REPLICA: { required_access.emplace_back(AccessType::SYSTEM_RESTART_REPLICA, query.getDatabase(), query.getTable()); diff --git a/src/Interpreters/InterpreterSystemQuery.h b/src/Interpreters/InterpreterSystemQuery.h index 826d4cc0c69..462449623d0 100644 --- a/src/Interpreters/InterpreterSystemQuery.h +++ b/src/Interpreters/InterpreterSystemQuery.h @@ -57,6 +57,7 @@ private: void restartReplica(const StorageID & replica, ContextMutablePtr system_context); void restartReplicas(ContextMutablePtr system_context); void syncReplica(ASTSystemQuery & query); + void setReplicaReadiness(bool ready); void waitLoadingParts(); void syncReplicatedDatabase(ASTSystemQuery & query); diff --git a/src/Parsers/ASTSystemQuery.cpp b/src/Parsers/ASTSystemQuery.cpp index 77235dfb6c2..c005d49a93d 100644 --- a/src/Parsers/ASTSystemQuery.cpp +++ b/src/Parsers/ASTSystemQuery.cpp @@ -179,7 +179,8 @@ void ASTSystemQuery::formatImpl(const FormatSettings & settings, FormatState &, || type == Type::RELOAD_DICTIONARY || type == Type::RELOAD_MODEL || type == Type::RELOAD_FUNCTION - || type == Type::RESTART_DISK) + || type == Type::RESTART_DISK + || type == Type::DROP_DISK_METADATA_CACHE) { if (table) { diff --git a/src/Parsers/ASTSystemQuery.h b/src/Parsers/ASTSystemQuery.h index 5f7ba5be330..8e6100fe7b4 100644 --- a/src/Parsers/ASTSystemQuery.h +++ b/src/Parsers/ASTSystemQuery.h @@ -32,6 +32,7 @@ public: DROP_COMPILED_EXPRESSION_CACHE, #endif DROP_FILESYSTEM_CACHE, + DROP_DISK_METADATA_CACHE, DROP_SCHEMA_CACHE, DROP_FORMAT_SCHEMA_CACHE, #if USE_AWS_S3 @@ -49,6 +50,8 @@ public: SYNC_DATABASE_REPLICA, SYNC_TRANSACTION_LOG, SYNC_FILE_CACHE, + REPLICA_READY, + REPLICA_UNREADY, RELOAD_DICTIONARY, RELOAD_DICTIONARIES, RELOAD_MODEL, diff --git a/src/Parsers/ParserSystemQuery.cpp b/src/Parsers/ParserSystemQuery.cpp index 2f6a1142a8f..2e1283187d3 100644 --- a/src/Parsers/ParserSystemQuery.cpp +++ b/src/Parsers/ParserSystemQuery.cpp @@ -12,6 +12,11 @@ namespace DB { +namespace ErrorCodes +{ + extern const int SUPPORT_IS_DISABLED; +} + [[nodiscard]] static bool parseQueryWithOnClusterAndMaybeTable(std::shared_ptr & res, IParser::Pos & pos, Expected & expected, bool require_table, bool allow_string_literal) { @@ -427,6 +432,10 @@ bool ParserSystemQuery::parseImpl(IParser::Pos & pos, ASTPtr & node, Expected & return false; break; } + case Type::DROP_DISK_METADATA_CACHE: + { + throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "Not implemented"); + } case Type::DROP_SCHEMA_CACHE: { if (ParserKeyword{"FOR"}.ignore(pos, expected)) diff --git a/src/Server/HTTPHandler.cpp b/src/Server/HTTPHandler.cpp index f9cd3b40f4a..ffa8c11fa44 100644 --- a/src/Server/HTTPHandler.cpp +++ b/src/Server/HTTPHandler.cpp @@ -1061,8 +1061,13 @@ void HTTPHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse response.setChunkedTransferEncoding(true); HTMLForm params(default_settings, request); - with_stacktrace = params.getParsed("stacktrace", false); - close_session = params.getParsed("close_session", false); + + if (params.getParsed("stacktrace", false) && server.config().getBool("enable_http_stacktrace", true)) + with_stacktrace = true; + + if (params.getParsed("close_session", false) && server.config().getBool("enable_http_close_session", true)) + close_session = true; + if (close_session) session_id = params.get("session_id"); diff --git a/src/Server/ReplicasStatusHandler.cpp b/src/Server/ReplicasStatusHandler.cpp index ad54b24f31d..2b7a52572a8 100644 --- a/src/Server/ReplicasStatusHandler.cpp +++ b/src/Server/ReplicasStatusHandler.cpp @@ -28,11 +28,17 @@ void ReplicasStatusHandler::handleRequest(HTTPServerRequest & request, HTTPServe { HTMLForm params(getContext()->getSettingsRef(), request); - /// Even if lag is small, output detailed information about the lag. - bool verbose = params.get("verbose", "") == "1"; + const auto & config = getContext()->getConfigRef(); const MergeTreeSettings & settings = getContext()->getReplicatedMergeTreeSettings(); + /// Even if lag is small, output detailed information about the lag. + bool verbose = false; + bool enable_verbose = config.getBool("enable_verbose_replicas_status", true); + + if (params.get("verbose", "") == "1" && enable_verbose) + verbose = true; + bool ok = true; WriteBufferFromOwnString message; @@ -78,13 +84,13 @@ void ReplicasStatusHandler::handleRequest(HTTPServerRequest & request, HTTPServe } } - const auto & config = getContext()->getConfigRef(); setResponseDefaultHeaders(response, config.getUInt("keep_alive_timeout", DEFAULT_HTTP_KEEP_ALIVE_TIMEOUT)); if (!ok) { response.setStatusAndReason(Poco::Net::HTTPResponse::HTTP_SERVICE_UNAVAILABLE); - verbose = true; + if (enable_verbose) + verbose = true; } if (verbose) diff --git a/src/Server/ServerType.cpp b/src/Server/ServerType.cpp index fb052e7d6e6..b0511632e6e 100644 --- a/src/Server/ServerType.cpp +++ b/src/Server/ServerType.cpp @@ -144,6 +144,9 @@ bool ServerType::shouldStop(const std::string & port_name) const port_custom_name = port_name.substr(protocols_size, port_name.size() - protocols_size - ports_size + 1); } + else if (port_name == "cloud.port") + port_type = Type::CLOUD; + else return false; diff --git a/src/Server/ServerType.h b/src/Server/ServerType.h index 07bb74ea009..c31fb663811 100644 --- a/src/Server/ServerType.h +++ b/src/Server/ServerType.h @@ -26,6 +26,7 @@ public: QUERIES_ALL, QUERIES_DEFAULT, QUERIES_CUSTOM, + CLOUD, END }; diff --git a/src/Storages/System/StorageSystemDatabases.cpp b/src/Storages/System/StorageSystemDatabases.cpp index 1fa94fab7bf..6dbe780193d 100644 --- a/src/Storages/System/StorageSystemDatabases.cpp +++ b/src/Storages/System/StorageSystemDatabases.cpp @@ -9,6 +9,7 @@ #include #include #include +#include namespace DB diff --git a/src/Storages/System/StorageSystemMutations.cpp b/src/Storages/System/StorageSystemMutations.cpp index fa521c632b8..b250765c4d2 100644 --- a/src/Storages/System/StorageSystemMutations.cpp +++ b/src/Storages/System/StorageSystemMutations.cpp @@ -28,6 +28,7 @@ NamesAndTypesList StorageSystemMutations::getNamesAndTypes() { "parts_to_do_names", std::make_shared(std::make_shared()) }, { "parts_to_do", std::make_shared() }, { "is_done", std::make_shared() }, + { "is_killed", std::make_shared() }, { "latest_failed_part", std::make_shared() }, { "latest_fail_time", std::make_shared() }, { "latest_fail_reason", std::make_shared() }, @@ -138,6 +139,7 @@ void StorageSystemMutations::fillData(MutableColumns & res_columns, ContextPtr c res_columns[col_num++]->insert(parts_to_do_names); res_columns[col_num++]->insert(parts_to_do_names.size()); res_columns[col_num++]->insert(status.is_done); + res_columns[col_num++]->insert(status.is_killed); res_columns[col_num++]->insert(status.latest_failed_part); res_columns[col_num++]->insert(UInt64(status.latest_fail_time)); res_columns[col_num++]->insert(status.latest_fail_reason); diff --git a/src/Storages/System/StorageSystemPartsBase.cpp b/src/Storages/System/StorageSystemPartsBase.cpp index 513af6cfc46..8d2e2900722 100644 --- a/src/Storages/System/StorageSystemPartsBase.cpp +++ b/src/Storages/System/StorageSystemPartsBase.cpp @@ -285,6 +285,8 @@ StorageSystemPartsBase::StorageSystemPartsBase(const StorageID & table_id_, Name auto add_alias = [&](const String & alias_name, const String & column_name) { + if (!tmp_columns.has(column_name)) + return; ColumnDescription column(alias_name, tmp_columns.get(column_name).type); column.default_desc.kind = ColumnDefaultKind::Alias; column.default_desc.expression = std::make_shared(column_name); diff --git a/tests/queries/0_stateless/01271_show_privileges.reference b/tests/queries/0_stateless/01271_show_privileges.reference index 6c639926aac..e2c0655b2bc 100644 --- a/tests/queries/0_stateless/01271_show_privileges.reference +++ b/tests/queries/0_stateless/01271_show_privileges.reference @@ -133,6 +133,7 @@ SYSTEM SENDS ['SYSTEM STOP SENDS','SYSTEM START SENDS','STOP SENDS','START SENDS SYSTEM REPLICATION QUEUES ['SYSTEM STOP REPLICATION QUEUES','SYSTEM START REPLICATION QUEUES','STOP REPLICATION QUEUES','START REPLICATION QUEUES'] TABLE SYSTEM SYSTEM DROP REPLICA ['DROP REPLICA'] TABLE SYSTEM SYSTEM SYNC REPLICA ['SYNC REPLICA'] TABLE SYSTEM +SYSTEM REPLICA READINESS ['SYSTEM REPLICA READY','SYSTEM REPLICA UNREADY'] GLOBAL SYSTEM SYSTEM RESTART REPLICA ['RESTART REPLICA'] TABLE SYSTEM SYSTEM RESTORE REPLICA ['RESTORE REPLICA'] TABLE SYSTEM SYSTEM WAIT LOADING PARTS ['WAIT LOADING PARTS'] TABLE SYSTEM diff --git a/tests/queries/0_stateless/02117_show_create_table_system.reference b/tests/queries/0_stateless/02117_show_create_table_system.reference index 9ed905a0df8..2e9d733aeb3 100644 --- a/tests/queries/0_stateless/02117_show_create_table_system.reference +++ b/tests/queries/0_stateless/02117_show_create_table_system.reference @@ -406,6 +406,7 @@ CREATE TABLE system.mutations `parts_to_do_names` Array(String), `parts_to_do` Int64, `is_done` UInt8, + `is_killed` UInt8, `latest_failed_part` String, `latest_fail_time` DateTime, `latest_fail_reason` String From 64a58ee4456986f88abb1c794f6b896c0b80d32a Mon Sep 17 00:00:00 2001 From: Christoph Wurm Date: Thu, 14 Dec 2023 14:15:41 +0000 Subject: [PATCH 97/98] Update clickhouse-benchmark.md: Remove json output Suppport for `--json` was removed in https://github.com/ClickHouse/ClickHouse/pull/53293. --- docs/en/operations/utilities/clickhouse-benchmark.md | 1 - 1 file changed, 1 deletion(-) diff --git a/docs/en/operations/utilities/clickhouse-benchmark.md b/docs/en/operations/utilities/clickhouse-benchmark.md index 8620b44c368..8b7d7f85552 100644 --- a/docs/en/operations/utilities/clickhouse-benchmark.md +++ b/docs/en/operations/utilities/clickhouse-benchmark.md @@ -53,7 +53,6 @@ clickhouse-benchmark [keys] < queries_file; - `--confidence=N` — Level of confidence for T-test. Possible values: 0 (80%), 1 (90%), 2 (95%), 3 (98%), 4 (99%), 5 (99.5%). Default value: 5. In the [comparison mode](#clickhouse-benchmark-comparison-mode) `clickhouse-benchmark` performs the [Independent two-sample Student’s t-test](https://en.wikipedia.org/wiki/Student%27s_t-test#Independent_two-sample_t-test) to determine whether the two distributions aren’t different with the selected level of confidence. - `--cumulative` — Printing cumulative data instead of data per interval. - `--database=DATABASE_NAME` — ClickHouse database name. Default value: `default`. -- `--json=FILEPATH` — `JSON` output. When the key is set, `clickhouse-benchmark` outputs a report to the specified JSON-file. - `--user=USERNAME` — ClickHouse user name. Default value: `default`. - `--password=PSWD` — ClickHouse user password. Default value: empty string. - `--stacktrace` — Stack traces output. When the key is set, `clickhouse-bencmark` outputs stack traces of exceptions. From 93dd6b83e7de1659379f08e006354885be46e00f Mon Sep 17 00:00:00 2001 From: Duc Canh Le Date: Fri, 15 Dec 2023 00:26:35 +0800 Subject: [PATCH 98/98] Do not allow creating replicated table with inconsistent merge params (#56833) * save all merge params to zookeeper Signed-off-by: Duc Canh Le * calculate hash for graphite merge params Signed-off-by: Duc Canh Le * add graphite params hash to zookeeper + fix tests Signed-off-by: Duc Canh Le * install new graphite for testing Signed-off-by: Duc Canh Le * fix backward incompatibility Signed-off-by: Duc Canh Le * minor fix test Signed-off-by: Duc Canh Le * Update src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp Co-authored-by: Alexander Tokmakov * remove peekString and add more comments - peekString doesn't always work even for ReadBufferFromString - more comment re. backward compatibility Signed-off-by: Duc Canh Le --------- Signed-off-by: Duc Canh Le Co-authored-by: Alexander Tokmakov --- src/Processors/Merges/Algorithms/Graphite.h | 27 ++++++ .../ReplicatedMergeTreeTableMetadata.cpp | 82 +++++++++++++++++++ .../ReplicatedMergeTreeTableMetadata.h | 10 +++ .../config/config.d/graphite_alternative.xml | 29 +++++++ tests/config/install.sh | 1 + ...llapsing_attach_detach_zookeeper.reference | 2 +- ...lize_create_alter_function_names.reference | 2 +- ...merge_parameters_must_consistent.reference | 0 ...cated_merge_parameters_must_consistent.sql | 80 ++++++++++++++++++ 9 files changed, 231 insertions(+), 2 deletions(-) create mode 100644 tests/config/config.d/graphite_alternative.xml create mode 100644 tests/queries/0_stateless/02910_replicated_merge_parameters_must_consistent.reference create mode 100644 tests/queries/0_stateless/02910_replicated_merge_parameters_must_consistent.sql diff --git a/src/Processors/Merges/Algorithms/Graphite.h b/src/Processors/Merges/Algorithms/Graphite.h index 46b1bbbfcad..692e36d2eae 100644 --- a/src/Processors/Merges/Algorithms/Graphite.h +++ b/src/Processors/Merges/Algorithms/Graphite.h @@ -1,5 +1,6 @@ #pragma once +#include #include #include @@ -122,6 +123,17 @@ struct Pattern AggregateFunctionPtr function; Retentions retentions; /// Must be ordered by 'age' descending. enum { TypeUndef, TypeRetention, TypeAggregation, TypeAll } type = TypeAll; /// The type of defined pattern, filled automatically + void updateHash(SipHash & hash) const + { + hash.update(rule_type); + hash.update(regexp_str); + hash.update(function->getName()); + for (const auto & r : retentions) + { + hash.update(r.age); + hash.update(r.precision); + } + } }; bool operator==(const Pattern & a, const Pattern & b); @@ -142,6 +154,21 @@ struct Params Graphite::Patterns patterns; Graphite::Patterns patterns_plain; Graphite::Patterns patterns_tagged; + void updateHash(SipHash & hash) const + { + hash.update(path_column_name); + hash.update(time_column_name); + hash.update(value_column_name); + hash.update(value_column_name); + hash.update(version_column_name); + hash.update(patterns_typed); + for (const auto & p : patterns) + p.updateHash(hash); + for (const auto & p : patterns_plain) + p.updateHash(hash); + for (const auto & p : patterns_tagged) + p.updateHash(hash); + } }; using RollupRule = std::pair; diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp index f4e4756279f..41188891118 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp @@ -6,6 +6,9 @@ #include #include #include +#include +#include +#include namespace DB @@ -49,6 +52,17 @@ ReplicatedMergeTreeTableMetadata::ReplicatedMergeTreeTableMetadata(const MergeTr index_granularity = data_settings->index_granularity; merging_params_mode = static_cast(data.merging_params.mode); sign_column = data.merging_params.sign_column; + is_deleted_column = data.merging_params.is_deleted_column; + columns_to_sum = fmt::format("{}", fmt::join(data.merging_params.columns_to_sum.begin(), data.merging_params.columns_to_sum.end(), ",")); + version_column = data.merging_params.version_column; + if (data.merging_params.mode == MergeTreeData::MergingParams::Graphite) + { + SipHash graphite_hash; + data.merging_params.graphite_params.updateHash(graphite_hash); + WriteBufferFromOwnString wb; + writeText(graphite_hash.get128(), wb); + graphite_params_hash = std::move(wb.str()); + } /// This code may looks strange, but previously we had only one entity: PRIMARY KEY (or ORDER BY, it doesn't matter) /// Now we have two different entities ORDER BY and it's optional prefix -- PRIMARY KEY. @@ -90,6 +104,22 @@ ReplicatedMergeTreeTableMetadata::ReplicatedMergeTreeTableMetadata(const MergeTr void ReplicatedMergeTreeTableMetadata::write(WriteBuffer & out) const { + /// Important notes: new added field must always be append to the end of serialized metadata + /// for backward compatible. + + /// In addition, two consecutive fields should not share any prefix, otherwise deserialize may fails. + /// For example, if you have two field `v1` and `v2` serialized as: + /// if (!v1.empty()) out << "v1: " << v1 << "\n"; + /// if (!v2.empty()) out << "v2: " << v2 << "\n"; + /// Let say if `v1` is empty and v2 is non-empty, then `v1` is not in serialized metadata. + /// Later, to deserialize the metadata, `read` will sequentially check if each field with `checkString`. + /// When it begin to check for `v1` and `v2`, the metadata buffer look like this: + /// v2: + /// ^ + /// cursor + /// `checkString("v1: ", in)` will be called first and it moves the cursor to `2` instead of `v`, so the + /// subsequent call `checkString("v2: ", in)` will also fails. + out << "metadata format version: 1\n" << "date column: " << date_column << "\n" << "sampling expression: " << sampling_expression << "\n" @@ -121,6 +151,19 @@ void ReplicatedMergeTreeTableMetadata::write(WriteBuffer & out) const if (!constraints.empty()) out << "constraints: " << constraints << "\n"; + + if (merge_params_version >= REPLICATED_MERGE_TREE_METADATA_WITH_ALL_MERGE_PARAMETERS) + { + out << "merge parameters format version: " << merge_params_version << "\n"; + if (!version_column.empty()) + out << "version column: " << version_column << "\n"; + if (!is_deleted_column.empty()) + out << "is_deleted column: " << is_deleted_column << "\n"; + if (!columns_to_sum.empty()) + out << "columns to sum: " << columns_to_sum << "\n"; + if (!graphite_params_hash.empty()) + out << "graphite hash: " << graphite_params_hash << "\n"; + } } String ReplicatedMergeTreeTableMetadata::toString() const @@ -170,6 +213,26 @@ void ReplicatedMergeTreeTableMetadata::read(ReadBuffer & in) if (checkString("constraints: ", in)) in >> constraints >> "\n"; + + if (checkString("merge parameters format version: ", in)) + in >> merge_params_version >> "\n"; + else + merge_params_version = REPLICATED_MERGE_TREE_METADATA_LEGACY_VERSION; + + if (merge_params_version >= REPLICATED_MERGE_TREE_METADATA_WITH_ALL_MERGE_PARAMETERS) + { + if (checkString("version column: ", in)) + in >> version_column >> "\n"; + + if (checkString("is_deleted column: ", in)) + in >> is_deleted_column >> "\n"; + + if (checkString("columns to sum: ", in)) + in >> columns_to_sum >> "\n"; + + if (checkString("graphite hash: ", in)) + in >> graphite_params_hash >> "\n"; + } } ReplicatedMergeTreeTableMetadata ReplicatedMergeTreeTableMetadata::parse(const String & s) @@ -210,6 +273,25 @@ void ReplicatedMergeTreeTableMetadata::checkImmutableFieldsEquals(const Replicat throw Exception(ErrorCodes::METADATA_MISMATCH, "Existing table metadata in ZooKeeper differs in sign column. " "Stored in ZooKeeper: {}, local: {}", from_zk.sign_column, sign_column); + if (merge_params_version >= REPLICATED_MERGE_TREE_METADATA_WITH_ALL_MERGE_PARAMETERS && from_zk.merge_params_version >= REPLICATED_MERGE_TREE_METADATA_WITH_ALL_MERGE_PARAMETERS) + { + if (version_column != from_zk.version_column) + throw Exception(ErrorCodes::METADATA_MISMATCH, "Existing table metadata in ZooKeeper differs in version column. " + "Stored in ZooKeeper: {}, local: {}", from_zk.version_column, version_column); + + if (is_deleted_column != from_zk.is_deleted_column) + throw Exception(ErrorCodes::METADATA_MISMATCH, "Existing table metadata in ZooKeeper differs in is_deleted column. " + "Stored in ZooKeeper: {}, local: {}", from_zk.is_deleted_column, is_deleted_column); + + if (columns_to_sum != from_zk.columns_to_sum) + throw Exception(ErrorCodes::METADATA_MISMATCH, "Existing table metadata in ZooKeeper differs in sum columns. " + "Stored in ZooKeeper: {}, local: {}", from_zk.columns_to_sum, columns_to_sum); + + if (graphite_params_hash != from_zk.graphite_params_hash) + throw Exception(ErrorCodes::METADATA_MISMATCH, "Existing table metadata in ZooKeeper differs in graphite params. " + "Stored in ZooKeeper hash: {}, local hash: {}", from_zk.graphite_params_hash, graphite_params_hash); + } + /// NOTE: You can make a less strict check of match expressions so that tables do not break from small changes /// in formatAST code. String parsed_zk_primary_key = formattedAST(KeyDescription::parse(from_zk.primary_key, columns, context).expression_list_ast); diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.h b/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.h index eb2d087e988..15ed8671f9b 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.h +++ b/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.h @@ -4,6 +4,7 @@ #include #include #include +#include namespace DB { @@ -17,11 +18,20 @@ class ReadBuffer; */ struct ReplicatedMergeTreeTableMetadata { + static constexpr int REPLICATED_MERGE_TREE_METADATA_LEGACY_VERSION = 1; + static constexpr int REPLICATED_MERGE_TREE_METADATA_WITH_ALL_MERGE_PARAMETERS = 2; + String date_column; String sampling_expression; UInt64 index_granularity; + /// Merging related params int merging_params_mode; + int merge_params_version = REPLICATED_MERGE_TREE_METADATA_WITH_ALL_MERGE_PARAMETERS; String sign_column; + String version_column; + String is_deleted_column; + String columns_to_sum; + String graphite_params_hash; String primary_key; MergeTreeDataFormatVersion data_format_version; String partition_key; diff --git a/tests/config/config.d/graphite_alternative.xml b/tests/config/config.d/graphite_alternative.xml new file mode 100644 index 00000000000..1a00de52af5 --- /dev/null +++ b/tests/config/config.d/graphite_alternative.xml @@ -0,0 +1,29 @@ + + + + Version + + sum + any + + 0 + 600 + + + 17280 + 6000 + + + + any + + 0 + 600 + + + 17280 + 6000 + + + + diff --git a/tests/config/install.sh b/tests/config/install.sh index bcfd8924422..bcc8a8425d2 100755 --- a/tests/config/install.sh +++ b/tests/config/install.sh @@ -26,6 +26,7 @@ ln -sf $SRC_PATH/config.d/macros.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/secure_ports.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/clusters.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/graphite.xml $DEST_SERVER_PATH/config.d/ +ln -sf $SRC_PATH/config.d/graphite_alternative.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/database_atomic.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/max_concurrent_queries.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/merge_tree_settings.xml $DEST_SERVER_PATH/config.d/ diff --git a/tests/queries/0_stateless/01357_version_collapsing_attach_detach_zookeeper.reference b/tests/queries/0_stateless/01357_version_collapsing_attach_detach_zookeeper.reference index 353c70aec11..41609184a74 100644 --- a/tests/queries/0_stateless/01357_version_collapsing_attach_detach_zookeeper.reference +++ b/tests/queries/0_stateless/01357_version_collapsing_attach_detach_zookeeper.reference @@ -1,3 +1,3 @@ -metadata format version: 1\ndate column: \nsampling expression: \nindex granularity: 8192\nmode: 7\nsign column: sign\nprimary key: key1, key2\ndata format version: 1\npartition key: d\ngranularity bytes: 10485760\n +metadata format version: 1\ndate column: \nsampling expression: \nindex granularity: 8192\nmode: 7\nsign column: sign\nprimary key: key1, key2\ndata format version: 1\npartition key: d\ngranularity bytes: 10485760\nmerge parameters format version: 2\nversion column: version\n 1 1 diff --git a/tests/queries/0_stateless/01705_normalize_create_alter_function_names.reference b/tests/queries/0_stateless/01705_normalize_create_alter_function_names.reference index b5b93c34c00..75598e15320 100644 --- a/tests/queries/0_stateless/01705_normalize_create_alter_function_names.reference +++ b/tests/queries/0_stateless/01705_normalize_create_alter_function_names.reference @@ -1,2 +1,2 @@ CREATE TABLE default.x\n(\n `i` Int32,\n INDEX mm log2(i) TYPE minmax GRANULARITY 1,\n INDEX nn log2(i) TYPE minmax GRANULARITY 1,\n PROJECTION p\n (\n SELECT max(i)\n ),\n PROJECTION p2\n (\n SELECT min(i)\n )\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/default/x\', \'r\')\nORDER BY i\nSETTINGS index_granularity = 8192 -metadata format version: 1\ndate column: \nsampling expression: \nindex granularity: 8192\nmode: 0\nsign column: \nprimary key: i\ndata format version: 1\npartition key: \nindices: mm log2(i) TYPE minmax GRANULARITY 1, nn log2(i) TYPE minmax GRANULARITY 1\nprojections: p (SELECT max(i)), p2 (SELECT min(i))\ngranularity bytes: 10485760\n +metadata format version: 1\ndate column: \nsampling expression: \nindex granularity: 8192\nmode: 0\nsign column: \nprimary key: i\ndata format version: 1\npartition key: \nindices: mm log2(i) TYPE minmax GRANULARITY 1, nn log2(i) TYPE minmax GRANULARITY 1\nprojections: p (SELECT max(i)), p2 (SELECT min(i))\ngranularity bytes: 10485760\nmerge parameters format version: 2\n diff --git a/tests/queries/0_stateless/02910_replicated_merge_parameters_must_consistent.reference b/tests/queries/0_stateless/02910_replicated_merge_parameters_must_consistent.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02910_replicated_merge_parameters_must_consistent.sql b/tests/queries/0_stateless/02910_replicated_merge_parameters_must_consistent.sql new file mode 100644 index 00000000000..3c1bec4fb3f --- /dev/null +++ b/tests/queries/0_stateless/02910_replicated_merge_parameters_must_consistent.sql @@ -0,0 +1,80 @@ +-- Tags: zookeeper, no-replicated-database +CREATE TABLE t +( + `id` UInt64, + `val` String, + `legacy_ver` UInt64, +) +ENGINE = ReplicatedReplacingMergeTree('/tables/{database}/t/', 'r1', legacy_ver) +ORDER BY id; + +CREATE TABLE t_r +( + `id` UInt64, + `val` String, + `legacy_ver` UInt64 +) +ENGINE = ReplicatedReplacingMergeTree('/tables/{database}/t/', 'r2') +ORDER BY id; -- { serverError METADATA_MISMATCH } + +CREATE TABLE t2 +( + `id` UInt64, + `val` String, + `legacy_ver` UInt64, + `deleted` UInt8 +) +ENGINE = ReplicatedReplacingMergeTree('/tables/{database}/t2/', 'r1', legacy_ver) +ORDER BY id; + +CREATE TABLE t2_r +( + `id` UInt64, + `val` String, + `legacy_ver` UInt64, + `deleted` UInt8 +) +ENGINE = ReplicatedReplacingMergeTree('/tables/{database}/t2/', 'r2', legacy_ver, deleted) +ORDER BY id; -- { serverError METADATA_MISMATCH } + +CREATE TABLE t3 +( + `key` UInt64, + `metrics1` UInt64, + `metrics2` UInt64 +) +ENGINE = ReplicatedSummingMergeTree('/tables/{database}/t3/', 'r1', metrics1) +ORDER BY key; + +CREATE TABLE t3_r +( + `key` UInt64, + `metrics1` UInt64, + `metrics2` UInt64 +) +ENGINE = ReplicatedSummingMergeTree('/tables/{database}/t3/', 'r2', metrics2) +ORDER BY key; -- { serverError METADATA_MISMATCH } + +CREATE TABLE t4 +( + `key` UInt32, + `Path` String, + `Time` DateTime('UTC'), + `Value` Float64, + `Version` UInt32, + `col` UInt64 +) +ENGINE = ReplicatedGraphiteMergeTree('/tables/{database}/t4/', 'r1', 'graphite_rollup') +ORDER BY key; + +CREATE TABLE t4_r +( + `key` UInt32, + `Path` String, + `Time` DateTime('UTC'), + `Value` Float64, + `Version` UInt32, + `col` UInt64 +) +ENGINE = ReplicatedGraphiteMergeTree('/tables/{database}/t4/', 'r2', 'graphite_rollup_alternative') +ORDER BY key; -- { serverError METADATA_MISMATCH }