From 47b31c39d1bf4ece6a3d83d60b1d6bb097c58f71 Mon Sep 17 00:00:00 2001 From: HarryLeeIBM Date: Tue, 28 Mar 2023 15:31:26 -0700 Subject: [PATCH 01/33] Fix partition id issue for s390x --- src/Storages/MergeTree/MergeTreePartition.cpp | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/Storages/MergeTree/MergeTreePartition.cpp b/src/Storages/MergeTree/MergeTreePartition.cpp index 2c5350909d5..1626018f1c1 100644 --- a/src/Storages/MergeTree/MergeTreePartition.cpp +++ b/src/Storages/MergeTree/MergeTreePartition.cpp @@ -84,7 +84,15 @@ namespace } void operator() (const UUID & x) const { +#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + auto tmp_x = x.toUnderType(); + char * start = reinterpret_cast(&tmp_x); + char * end = start + sizeof(tmp_x); + std::reverse(start, end); + operator()(tmp_x); +#else operator()(x.toUnderType()); +#endif } void operator() (const IPv4 & x) const { From 4846fc3bbc256611913dc5e5de5fd0f71d8aeb0a Mon Sep 17 00:00:00 2001 From: vdimir Date: Tue, 18 Apr 2023 09:49:57 +0000 Subject: [PATCH 02/33] Fix test reference files for join using nullable column --- ...1142_join_lc_and_nullable_in_key.reference | 20 ++++++-- .../01142_join_lc_and_nullable_in_key.sql | 32 ++++++++++--- ...erge_join_lc_and_nullable_in_key.reference | 10 ++++ ...1142_merge_join_lc_and_nullable_in_key.sql | 24 +++++++--- .../01476_right_full_join_switch.reference | 10 ++++ .../01476_right_full_join_switch.sql | 19 ++++++++ .../01477_lc_in_merge_join_left_key.reference | 46 ++++++++++++++++++- ...=> 01477_lc_in_merge_join_left_key.sql.j2} | 19 ++++++-- 8 files changed, 157 insertions(+), 23 deletions(-) rename tests/queries/0_stateless/{01477_lc_in_merge_join_left_key.sql => 01477_lc_in_merge_join_left_key.sql.j2} (83%) diff --git a/tests/queries/0_stateless/01142_join_lc_and_nullable_in_key.reference b/tests/queries/0_stateless/01142_join_lc_and_nullable_in_key.reference index 01efbb7c64b..6d32c20909b 100644 --- a/tests/queries/0_stateless/01142_join_lc_and_nullable_in_key.reference +++ b/tests/queries/0_stateless/01142_join_lc_and_nullable_in_key.reference @@ -3,15 +3,27 @@ 1 l \N Nullable(String) 2 \N Nullable(String) - +1 l Nullable(String) \N Nullable(String) +0 \N Nullable(String) \N Nullable(String) +0 \N Nullable(String) \N Nullable(String) +1 l Nullable(String) \N Nullable(String) +- +1 l LowCardinality(String) \N Nullable(String) +0 LowCardinality(String) \N Nullable(String) +0 LowCardinality(String) \N Nullable(String) +1 l LowCardinality(String) \N Nullable(String) +- +1 l \N Nullable(String) +0 \N \N Nullable(String) +0 \N \N Nullable(String) +1 l \N Nullable(String) +- 1 l \N Nullable(String) 0 \N Nullable(String) 0 \N Nullable(String) 1 l \N Nullable(String) - -1 l \N Nullable(String) -0 \N Nullable(String) -0 \N Nullable(String) -1 l \N Nullable(String) +0 \N - 0 - diff --git a/tests/queries/0_stateless/01142_join_lc_and_nullable_in_key.sql b/tests/queries/0_stateless/01142_join_lc_and_nullable_in_key.sql index 38b72837174..2464b7a57cf 100644 --- a/tests/queries/0_stateless/01142_join_lc_and_nullable_in_key.sql +++ b/tests/queries/0_stateless/01142_join_lc_and_nullable_in_key.sql @@ -15,19 +15,37 @@ SELECT x, lc, r.lc, toTypeName(r.lc) FROM t AS l FULL JOIN nr AS r USING (x) ORD SELECT '-'; -SELECT x, lc, r.lc, toTypeName(r.lc) FROM t AS l LEFT JOIN nr AS r USING (lc) ORDER BY x; -SELECT x, lc, r.lc, toTypeName(r.lc) FROM t AS l RIGHT JOIN nr AS r USING (lc) ORDER BY x; -SELECT x, lc, r.lc, toTypeName(r.lc) FROM t AS l FULL JOIN nr AS r USING (lc) ORDER BY x; +-- lc should be supertype for l.lc and r.lc, so expect Nullable(String) +SELECT x, lc, toTypeName(lc), r.lc, toTypeName(r.lc) FROM t AS l LEFT JOIN nr AS r USING (lc) ORDER BY x SETTINGS allow_experimental_analyzer = 1; +SELECT x, lc, toTypeName(lc), r.lc, toTypeName(r.lc) FROM t AS l RIGHT JOIN nr AS r USING (lc) ORDER BY x SETTINGS allow_experimental_analyzer = 1; +SELECT x, lc, toTypeName(lc), r.lc, toTypeName(r.lc) FROM t AS l FULL JOIN nr AS r USING (lc) ORDER BY x SETTINGS allow_experimental_analyzer = 1; SELECT '-'; -SELECT x, lc, materialize(r.lc) y, toTypeName(y) FROM t AS l LEFT JOIN nr AS r USING (lc) ORDER BY x; -SELECT x, lc, materialize(r.lc) y, toTypeName(y) FROM t AS l RIGHT JOIN nr AS r USING (lc) ORDER BY x; -SELECT x, lc, materialize(r.lc) y, toTypeName(y) FROM t AS l FULL JOIN nr AS r USING (lc) ORDER BY x; +-- old behavior is different +SELECT x, lc, toTypeName(lc), r.lc, toTypeName(r.lc) FROM t AS l LEFT JOIN nr AS r USING (lc) ORDER BY x SETTINGS allow_experimental_analyzer = 0; +SELECT x, lc, toTypeName(lc), r.lc, toTypeName(r.lc) FROM t AS l RIGHT JOIN nr AS r USING (lc) ORDER BY x SETTINGS allow_experimental_analyzer = 0; +SELECT x, lc, toTypeName(lc), r.lc, toTypeName(r.lc) FROM t AS l FULL JOIN nr AS r USING (lc) ORDER BY x SETTINGS allow_experimental_analyzer = 0; SELECT '-'; -SELECT x, lc FROM t AS l RIGHT JOIN nr AS r USING (lc); +SELECT x, lc, materialize(r.lc) y, toTypeName(y) FROM t AS l LEFT JOIN nr AS r USING (lc) ORDER BY x SETTINGS allow_experimental_analyzer = 1; +SELECT x, lc, materialize(r.lc) y, toTypeName(y) FROM t AS l RIGHT JOIN nr AS r USING (lc) ORDER BY x SETTINGS allow_experimental_analyzer = 1; +SELECT x, lc, materialize(r.lc) y, toTypeName(y) FROM t AS l FULL JOIN nr AS r USING (lc) ORDER BY x SETTINGS allow_experimental_analyzer = 1; + +SELECT '-'; + +SELECT x, lc, materialize(r.lc) y, toTypeName(y) FROM t AS l LEFT JOIN nr AS r USING (lc) ORDER BY x SETTINGS allow_experimental_analyzer = 0; +SELECT x, lc, materialize(r.lc) y, toTypeName(y) FROM t AS l RIGHT JOIN nr AS r USING (lc) ORDER BY x SETTINGS allow_experimental_analyzer = 0; +SELECT x, lc, materialize(r.lc) y, toTypeName(y) FROM t AS l FULL JOIN nr AS r USING (lc) ORDER BY x SETTINGS allow_experimental_analyzer = 0; + +SELECT '-'; + +SELECT x, lc FROM t AS l RIGHT JOIN nr AS r USING (lc) SETTINGS allow_experimental_analyzer = 1; + +SELECT '-'; + +SELECT x, lc FROM t AS l RIGHT JOIN nr AS r USING (lc) SETTINGS allow_experimental_analyzer = 0; SELECT '-'; diff --git a/tests/queries/0_stateless/01142_merge_join_lc_and_nullable_in_key.reference b/tests/queries/0_stateless/01142_merge_join_lc_and_nullable_in_key.reference index c6bdcb773b2..bb29ec9becd 100644 --- a/tests/queries/0_stateless/01142_merge_join_lc_and_nullable_in_key.reference +++ b/tests/queries/0_stateless/01142_merge_join_lc_and_nullable_in_key.reference @@ -4,6 +4,16 @@ 2 \N Nullable(String) - 1 l \N Nullable(String) +0 \N \N Nullable(String) +0 \N \N Nullable(String) +1 l \N Nullable(String) +- +1 l \N Nullable(String) +0 \N \N Nullable(String) +0 \N \N Nullable(String) +1 l \N Nullable(String) +- +1 l \N Nullable(String) 0 \N Nullable(String) 0 \N Nullable(String) 1 l \N Nullable(String) diff --git a/tests/queries/0_stateless/01142_merge_join_lc_and_nullable_in_key.sql b/tests/queries/0_stateless/01142_merge_join_lc_and_nullable_in_key.sql index dbc2d7c9f5d..718e8358c64 100644 --- a/tests/queries/0_stateless/01142_merge_join_lc_and_nullable_in_key.sql +++ b/tests/queries/0_stateless/01142_merge_join_lc_and_nullable_in_key.sql @@ -17,15 +17,27 @@ SELECT x, lc, r.lc, toTypeName(r.lc) FROM t AS l FULL JOIN nr AS r USING (x) ORD SELECT '-'; -SELECT x, lc, r.lc, toTypeName(r.lc) FROM t AS l LEFT JOIN nr AS r USING (lc) ORDER BY x; -SELECT x, lc, r.lc, toTypeName(r.lc) FROM t AS l RIGHT JOIN nr AS r USING (lc) ORDER BY x; -SELECT x, lc, r.lc, toTypeName(r.lc) FROM t AS l FULL JOIN nr AS r USING (lc) ORDER BY x; +SELECT x, lc, r.lc, toTypeName(r.lc) FROM t AS l LEFT JOIN nr AS r USING (lc) ORDER BY x SETTINGS allow_experimental_analyzer = 1; +SELECT x, lc, r.lc, toTypeName(r.lc) FROM t AS l RIGHT JOIN nr AS r USING (lc) ORDER BY x SETTINGS allow_experimental_analyzer = 1; +SELECT x, lc, r.lc, toTypeName(r.lc) FROM t AS l FULL JOIN nr AS r USING (lc) ORDER BY x SETTINGS allow_experimental_analyzer = 1; SELECT '-'; -SELECT x, lc, materialize(r.lc) y, toTypeName(y) FROM t AS l LEFT JOIN nr AS r USING (lc) ORDER BY x; -SELECT x, lc, materialize(r.lc) y, toTypeName(y) FROM t AS l RIGHT JOIN nr AS r USING (lc) ORDER BY x; -SELECT x, lc, materialize(r.lc) y, toTypeName(y) FROM t AS l FULL JOIN nr AS r USING (lc) ORDER BY x; +SELECT x, lc, materialize(r.lc) y, toTypeName(y) FROM t AS l LEFT JOIN nr AS r USING (lc) ORDER BY x SETTINGS allow_experimental_analyzer = 1; +SELECT x, lc, materialize(r.lc) y, toTypeName(y) FROM t AS l RIGHT JOIN nr AS r USING (lc) ORDER BY x SETTINGS allow_experimental_analyzer = 1; +SELECT x, lc, materialize(r.lc) y, toTypeName(y) FROM t AS l FULL JOIN nr AS r USING (lc) ORDER BY x SETTINGS allow_experimental_analyzer = 1; + +SELECT '-'; + +SELECT x, lc, r.lc, toTypeName(r.lc) FROM t AS l LEFT JOIN nr AS r USING (lc) ORDER BY x SETTINGS allow_experimental_analyzer = 0; +SELECT x, lc, r.lc, toTypeName(r.lc) FROM t AS l RIGHT JOIN nr AS r USING (lc) ORDER BY x SETTINGS allow_experimental_analyzer = 0; +SELECT x, lc, r.lc, toTypeName(r.lc) FROM t AS l FULL JOIN nr AS r USING (lc) ORDER BY x SETTINGS allow_experimental_analyzer = 0; + +SELECT '-'; + +SELECT x, lc, materialize(r.lc) y, toTypeName(y) FROM t AS l LEFT JOIN nr AS r USING (lc) ORDER BY x SETTINGS allow_experimental_analyzer = 0; +SELECT x, lc, materialize(r.lc) y, toTypeName(y) FROM t AS l RIGHT JOIN nr AS r USING (lc) ORDER BY x SETTINGS allow_experimental_analyzer = 0; +SELECT x, lc, materialize(r.lc) y, toTypeName(y) FROM t AS l FULL JOIN nr AS r USING (lc) ORDER BY x SETTINGS allow_experimental_analyzer = 0; SELECT '-'; diff --git a/tests/queries/0_stateless/01476_right_full_join_switch.reference b/tests/queries/0_stateless/01476_right_full_join_switch.reference index 1f839b86013..54f9909762f 100644 --- a/tests/queries/0_stateless/01476_right_full_join_switch.reference +++ b/tests/queries/0_stateless/01476_right_full_join_switch.reference @@ -3,6 +3,16 @@ 1 l \N LowCardinality(String) Nullable(String) 2 \N LowCardinality(String) Nullable(String) - +\N \N Nullable(String) LowCardinality(String) +1 \N l Nullable(String) LowCardinality(String) +1 \N l Nullable(String) LowCardinality(String) +\N \N Nullable(String) LowCardinality(String) +- +1 l \N LowCardinality(String) Nullable(String) +2 \N LowCardinality(String) Nullable(String) +1 l \N LowCardinality(String) Nullable(String) +2 \N LowCardinality(String) Nullable(String) +- 0 \N Nullable(String) LowCardinality(String) 1 \N l Nullable(String) LowCardinality(String) 0 \N Nullable(String) LowCardinality(String) diff --git a/tests/queries/0_stateless/01476_right_full_join_switch.sql b/tests/queries/0_stateless/01476_right_full_join_switch.sql index 5d041843ee2..dfbdec47e1f 100644 --- a/tests/queries/0_stateless/01476_right_full_join_switch.sql +++ b/tests/queries/0_stateless/01476_right_full_join_switch.sql @@ -10,8 +10,27 @@ CREATE TABLE nr (`x` Nullable(UInt32), `s` Nullable(String)) ENGINE = Memory; INSERT INTO t VALUES (1, 'l'); INSERT INTO nr VALUES (2, NULL); + SET join_use_nulls = 0; +SET allow_experimental_analyzer = 1; + +-- t.x is supertupe for `x` from left and right since `x` is inside `USING`. +SELECT t.x, l.s, r.s, toTypeName(l.s), toTypeName(r.s) FROM t AS l LEFT JOIN nr AS r USING (x) ORDER BY t.x; +SELECT t.x, l.s, r.s, toTypeName(l.s), toTypeName(r.s) FROM t AS l RIGHT JOIN nr AS r USING (x) ORDER BY t.x; +SELECT t.x, l.s, r.s, toTypeName(l.s), toTypeName(r.s) FROM t AS l FULL JOIN nr AS r USING (x) ORDER BY t.x; + +SELECT '-'; + +SELECT t.x, l.s, r.s, toTypeName(l.s), toTypeName(r.s) FROM nr AS l LEFT JOIN t AS r USING (x) ORDER BY t.x; +SELECT t.x, l.s, r.s, toTypeName(l.s), toTypeName(r.s) FROM nr AS l RIGHT JOIN t AS r USING (x) ORDER BY t.x; +SELECT t.x, l.s, r.s, toTypeName(l.s), toTypeName(r.s) FROM nr AS l FULL JOIN t AS r USING (x) ORDER BY t.x; + +SELECT '-'; + +SET allow_experimental_analyzer = 0; + +-- t.x is supertupe for `x` from left and right since `x` is inside `USING`. SELECT t.x, l.s, r.s, toTypeName(l.s), toTypeName(r.s) FROM t AS l LEFT JOIN nr AS r USING (x) ORDER BY t.x; SELECT t.x, l.s, r.s, toTypeName(l.s), toTypeName(r.s) FROM t AS l RIGHT JOIN nr AS r USING (x) ORDER BY t.x; SELECT t.x, l.s, r.s, toTypeName(l.s), toTypeName(r.s) FROM t AS l FULL JOIN nr AS r USING (x) ORDER BY t.x; diff --git a/tests/queries/0_stateless/01477_lc_in_merge_join_left_key.reference b/tests/queries/0_stateless/01477_lc_in_merge_join_left_key.reference index ac4d0a3d21a..9b6890c01ee 100644 --- a/tests/queries/0_stateless/01477_lc_in_merge_join_left_key.reference +++ b/tests/queries/0_stateless/01477_lc_in_merge_join_left_key.reference @@ -17,7 +17,7 @@ 1 \N l Nullable(String) LowCardinality(String) 0 \N Nullable(String) LowCardinality(String) 1 \N l Nullable(String) LowCardinality(String) -- +- join_use_nulls - 1 l \N LowCardinality(String) Nullable(String) 2 \N \N LowCardinality(Nullable(String)) Nullable(String) 1 l \N LowCardinality(Nullable(String)) Nullable(String) @@ -33,3 +33,47 @@ 1 l \N LowCardinality(Nullable(String)) Nullable(String) \N \N \N LowCardinality(Nullable(String)) Nullable(String) - +\N \N \N Nullable(String) LowCardinality(Nullable(String)) +1 \N l Nullable(String) LowCardinality(String) +1 \N l Nullable(String) LowCardinality(Nullable(String)) +\N \N \N Nullable(String) LowCardinality(Nullable(String)) +- analyzer - +1 l \N LowCardinality(String) Nullable(String) +2 \N LowCardinality(String) Nullable(String) +1 l \N LowCardinality(String) Nullable(String) +2 \N LowCardinality(String) Nullable(String) +- +\N \N Nullable(String) LowCardinality(String) +1 \N l Nullable(String) LowCardinality(String) +1 \N l Nullable(String) LowCardinality(String) +\N \N Nullable(String) LowCardinality(String) +- +1 l \N Nullable(String) Nullable(String) +0 \N \N Nullable(String) Nullable(String) +0 \N \N Nullable(String) Nullable(String) +1 l \N Nullable(String) Nullable(String) +- +0 \N \N Nullable(String) Nullable(String) +1 \N l Nullable(String) Nullable(String) +0 \N \N Nullable(String) Nullable(String) +1 \N l Nullable(String) Nullable(String) +- join_use_nulls - +1 l \N LowCardinality(String) Nullable(String) +2 \N \N LowCardinality(Nullable(String)) Nullable(String) +1 l \N LowCardinality(Nullable(String)) Nullable(String) +2 \N \N LowCardinality(Nullable(String)) Nullable(String) +- +\N \N \N Nullable(String) LowCardinality(Nullable(String)) +1 \N l Nullable(String) LowCardinality(String) +1 \N l Nullable(String) LowCardinality(Nullable(String)) +\N \N \N Nullable(String) LowCardinality(Nullable(String)) +- +1 l \N Nullable(String) Nullable(String) +\N \N \N Nullable(String) Nullable(String) +1 l \N Nullable(String) Nullable(String) +\N \N \N Nullable(String) Nullable(String) +- +\N \N \N Nullable(String) Nullable(String) +1 \N l Nullable(String) Nullable(String) +1 \N l Nullable(String) Nullable(String) +\N \N \N Nullable(String) Nullable(String) diff --git a/tests/queries/0_stateless/01477_lc_in_merge_join_left_key.sql b/tests/queries/0_stateless/01477_lc_in_merge_join_left_key.sql.j2 similarity index 83% rename from tests/queries/0_stateless/01477_lc_in_merge_join_left_key.sql rename to tests/queries/0_stateless/01477_lc_in_merge_join_left_key.sql.j2 index 2507613f051..6eafd41b411 100644 --- a/tests/queries/0_stateless/01477_lc_in_merge_join_left_key.sql +++ b/tests/queries/0_stateless/01477_lc_in_merge_join_left_key.sql.j2 @@ -10,6 +10,14 @@ CREATE TABLE nr (`x` Nullable(UInt32), `s` Nullable(String)) ENGINE = Memory; INSERT INTO t VALUES (1, 'l'); INSERT INTO nr VALUES (2, NULL); +{% for allow_experimental_analyzer in [0, 1] -%} + +SET allow_experimental_analyzer = {{ allow_experimental_analyzer }}; + +{% if allow_experimental_analyzer -%} +SELECT '- analyzer -'; +{% endif -%} + SET join_use_nulls = 0; SELECT t.x, l.s, r.s, toTypeName(l.s), toTypeName(r.s) FROM t AS l LEFT JOIN nr AS r USING (x) ORDER BY t.x; @@ -36,7 +44,7 @@ SELECT t.x, l.s, r.s, toTypeName(l.s), toTypeName(r.s) FROM nr AS l FULL JOIN t SET join_use_nulls = 1; -SELECT '-'; +SELECT '- join_use_nulls -'; SELECT t.x, l.s, r.s, toTypeName(l.s), toTypeName(r.s) FROM t AS l LEFT JOIN nr AS r USING (x) ORDER BY t.x; SELECT t.x, l.s, r.s, toTypeName(l.s), toTypeName(r.s) FROM t AS l RIGHT JOIN nr AS r USING (x) ORDER BY t.x; @@ -56,10 +64,11 @@ SELECT t.x, l.s, r.s, toTypeName(l.s), toTypeName(r.s) FROM t AS l FULL JOIN nr SELECT '-'; --- TODO --- SELECT t.x, l.s, r.s, toTypeName(l.s), toTypeName(r.s) FROM nr AS l LEFT JOIN t AS r USING (s) ORDER BY t.x; --- SELECT t.x, l.s, r.s, toTypeName(l.s), toTypeName(r.s) FROM nr AS l RIGHT JOIN t AS r USING (s) ORDER BY t.x; --- SELECT t.x, l.s, r.s, toTypeName(l.s), toTypeName(r.s) FROM nr AS l FULL JOIN t AS r USING (s) ORDER BY t.x; +SELECT t.x, l.s, r.s, toTypeName(l.s), toTypeName(r.s) FROM nr AS l LEFT JOIN t AS r USING (s) ORDER BY t.x; +SELECT t.x, l.s, r.s, toTypeName(l.s), toTypeName(r.s) FROM nr AS l RIGHT JOIN t AS r USING (s) ORDER BY t.x; +SELECT t.x, l.s, r.s, toTypeName(l.s), toTypeName(r.s) FROM nr AS l FULL JOIN t AS r USING (s) ORDER BY t.x; + +{% endfor %} DROP TABLE t; DROP TABLE nr; From 33bedc0291d9410c1c0ff0d9c1e1faee797aacf5 Mon Sep 17 00:00:00 2001 From: lgbo-ustc Date: Fri, 14 Apr 2023 10:03:38 +0800 Subject: [PATCH 03/33] improve jsonpath support in json functions --- .../Parsers/ParserJSONPathMemberAccess.cpp | 50 ++++++++++++++---- ...arserJSONPathMemberSquareBracketAccess.cpp | 51 +++++++++++++++++++ .../ParserJSONPathMemberSquareBracketAccess.h | 17 +++++++ .../JSONPath/Parsers/ParserJSONPathQuery.cpp | 3 ++ .../01889_sql_json_functions.reference | 20 ++++++++ .../0_stateless/01889_sql_json_functions.sql | 10 ++++ 6 files changed, 142 insertions(+), 9 deletions(-) create mode 100644 src/Functions/JSONPath/Parsers/ParserJSONPathMemberSquareBracketAccess.cpp create mode 100644 src/Functions/JSONPath/Parsers/ParserJSONPathMemberSquareBracketAccess.h diff --git a/src/Functions/JSONPath/Parsers/ParserJSONPathMemberAccess.cpp b/src/Functions/JSONPath/Parsers/ParserJSONPathMemberAccess.cpp index f0ed178e1c2..8a415816798 100644 --- a/src/Functions/JSONPath/Parsers/ParserJSONPathMemberAccess.cpp +++ b/src/Functions/JSONPath/Parsers/ParserJSONPathMemberAccess.cpp @@ -1,9 +1,11 @@ #include #include +#include #include #include #include +#include namespace DB { @@ -16,18 +18,48 @@ namespace DB */ bool ParserJSONPathMemberAccess::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { - if (pos->type != TokenType::Dot) + // There's a specical case, that a path member can begin with number + if (pos->type != TokenType::Dot && pos->type != TokenType::Number) return false; + if (pos->type != TokenType::Number) + ++pos; - ++pos; - - if (pos->type != TokenType::BareWord && pos->type !=TokenType::QuotedIdentifier) - return false; - - ParserIdentifier name_p; ASTPtr member_name; - if (!name_p.parse(pos, member_name, expected)) - return false; + + if (pos->type == TokenType::Number)[[unlikely]] + { + for (const auto * c = pos->begin; c != pos->end; ++c) + { + if (*c == '.' && c == pos->begin) + continue; + if (!isNumericASCII(*c)) + { + return false; + } + } + const auto * last_begin = *pos->begin == '.' ? pos->begin + 1 : pos->begin; + const auto * last_end = pos->end; + ++pos; + + if (pos.isValid() && pos->type == TokenType::BareWord && pos->begin == last_end) + { + member_name = std::make_shared(String(last_begin, pos->end)); + ++pos; + } + else + { + return false; + } + } + else + { + if (pos->type != TokenType::BareWord && pos->type != TokenType::QuotedIdentifier) + return false; + + ParserIdentifier name_p; + if (!name_p.parse(pos, member_name, expected)) + return false; + } auto member_access = std::make_shared(); node = member_access; diff --git a/src/Functions/JSONPath/Parsers/ParserJSONPathMemberSquareBracketAccess.cpp b/src/Functions/JSONPath/Parsers/ParserJSONPathMemberSquareBracketAccess.cpp new file mode 100644 index 00000000000..b3059fcfd94 --- /dev/null +++ b/src/Functions/JSONPath/Parsers/ParserJSONPathMemberSquareBracketAccess.cpp @@ -0,0 +1,51 @@ +#include "ParserJSONPathMemberSquareBracketAccess.h" +#include +#include +#include +#include +#include +#include + +namespace DB +{ +bool ParserJSONPathMemberSquareBracketAccess::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + if (pos->type != TokenType::OpeningSquareBracket) + return false; + ++pos; + ASTPtr member_name; + if (pos->type == TokenType::BareWord || pos->type == TokenType::QuotedIdentifier) + { + ParserIdentifier name_p; + if (!name_p.parse(pos, member_name, expected)) + return false; + } + else if (pos->type == TokenType::StringLiteral) + { + try + { + ReadBufferFromMemory in(pos->begin, pos->size()); + String name; + readQuotedStringWithSQLStyle(name, in); + member_name = std::make_shared(name); + ++pos; + } + catch (const Exception &) + { + return false; + } + } + else + { + return false; + } + if (pos->type != TokenType::ClosingSquareBracket) + { + return false; + } + ++pos; + auto member_access = std::make_shared(); + node = member_access; + return tryGetIdentifierNameInto(member_name, member_access->member_name); +} +} diff --git a/src/Functions/JSONPath/Parsers/ParserJSONPathMemberSquareBracketAccess.h b/src/Functions/JSONPath/Parsers/ParserJSONPathMemberSquareBracketAccess.h new file mode 100644 index 00000000000..b682ec5bb96 --- /dev/null +++ b/src/Functions/JSONPath/Parsers/ParserJSONPathMemberSquareBracketAccess.h @@ -0,0 +1,17 @@ +#pragma once +#include +// cases +// - [ident] +// - ['ident'] +// - ["ident"] +namespace DB +{ +class ParserJSONPathMemberSquareBracketAccess : public IParserBase +{ +private: + const char * getName() const override { return "ParserJSONPathMemberSquareBracketAccess"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +public: + explicit ParserJSONPathMemberSquareBracketAccess() = default; +}; +} diff --git a/src/Functions/JSONPath/Parsers/ParserJSONPathQuery.cpp b/src/Functions/JSONPath/Parsers/ParserJSONPathQuery.cpp index c18b2ad9b31..d8d633a1ec9 100644 --- a/src/Functions/JSONPath/Parsers/ParserJSONPathQuery.cpp +++ b/src/Functions/JSONPath/Parsers/ParserJSONPathQuery.cpp @@ -2,6 +2,7 @@ #include #include #include +#include #include #include @@ -19,6 +20,7 @@ bool ParserJSONPathQuery::parseImpl(Pos & pos, ASTPtr & query, Expected & expect { query = std::make_shared(); ParserJSONPathMemberAccess parser_jsonpath_member_access; + ParserJSONPathMemberSquareBracketAccess parser_jsonpath_member_square_bracket_access; ParserJSONPathRange parser_jsonpath_range; ParserJSONPathStar parser_jsonpath_star; ParserJSONPathRoot parser_jsonpath_root; @@ -32,6 +34,7 @@ bool ParserJSONPathQuery::parseImpl(Pos & pos, ASTPtr & query, Expected & expect ASTPtr accessor; while (parser_jsonpath_member_access.parse(pos, accessor, expected) + || parser_jsonpath_member_square_bracket_access.parse(pos, accessor, expected) || parser_jsonpath_range.parse(pos, accessor, expected) || parser_jsonpath_star.parse(pos, accessor, expected)) { diff --git a/tests/queries/0_stateless/01889_sql_json_functions.reference b/tests/queries/0_stateless/01889_sql_json_functions.reference index 5ac1ff501e5..23630bffb93 100644 --- a/tests/queries/0_stateless/01889_sql_json_functions.reference +++ b/tests/queries/0_stateless/01889_sql_json_functions.reference @@ -37,6 +37,16 @@ select JSON_VALUE('{"hello":{"world":"!"}}', '$.hello') settings function_json_v {"world":"!"} SELECT JSON_VALUE('{"hello":["world","world2"]}', '$.hello') settings function_json_value_return_type_allow_complex=true; ["world","world2"] +SELECT JSON_VALUE('{"1key":1}', '$.1key'); +1 +SELECT JSON_VALUE('{"hello":1}', '$[hello]'); +1 +SELECT JSON_VALUE('{"hello":1}', '$["hello"]'); +1 +SELECT JSON_VALUE('{"hello":1}', '$[\'hello\']'); +1 +SELECT JSON_VALUE('{"hello 1":1}', '$["hello 1"]'); +1 SELECT '--JSON_QUERY--'; --JSON_QUERY-- SELECT JSON_QUERY('{"hello":1}', '$'); @@ -61,6 +71,16 @@ SELECT JSON_QUERY('', '$.hello'); SELECT JSON_QUERY('{"array":[[0, 1, 2, 3, 4, 5], [0, -1, -2, -3, -4, -5]]}', '$.array[*][0 to 2, 4]'); [0, 1, 4, 0, -1, -4] +SELECT JSON_QUERY('{"1key":1}', '$.1key'); +[1] +SELECT JSON_QUERY('{"hello":1}', '$[hello]'); +[1] +SELECT JSON_QUERY('{"hello":1}', '$["hello"]'); +[1] +SELECT JSON_QUERY('{"hello":1}', '$[\'hello\']'); +[1] +SELECT JSON_QUERY('{"hello 1":1}', '$["hello 1"]'); +[1] SELECT '--JSON_EXISTS--'; --JSON_EXISTS-- SELECT JSON_EXISTS('{"hello":1}', '$'); diff --git a/tests/queries/0_stateless/01889_sql_json_functions.sql b/tests/queries/0_stateless/01889_sql_json_functions.sql index f174d04933c..7f417f3f228 100644 --- a/tests/queries/0_stateless/01889_sql_json_functions.sql +++ b/tests/queries/0_stateless/01889_sql_json_functions.sql @@ -20,6 +20,11 @@ select JSON_VALUE('{"a":"\\u263a"}', '$.a'); select JSON_VALUE('{"hello":"world"}', '$.b') settings function_json_value_return_type_allow_nullable=true; select JSON_VALUE('{"hello":{"world":"!"}}', '$.hello') settings function_json_value_return_type_allow_complex=true; SELECT JSON_VALUE('{"hello":["world","world2"]}', '$.hello') settings function_json_value_return_type_allow_complex=true; +SELECT JSON_VALUE('{"1key":1}', '$.1key'); +SELECT JSON_VALUE('{"hello":1}', '$[hello]'); +SELECT JSON_VALUE('{"hello":1}', '$["hello"]'); +SELECT JSON_VALUE('{"hello":1}', '$[\'hello\']'); +SELECT JSON_VALUE('{"hello 1":1}', '$["hello 1"]'); SELECT '--JSON_QUERY--'; SELECT JSON_QUERY('{"hello":1}', '$'); @@ -33,6 +38,11 @@ SELECT JSON_QUERY('{"hello":{"world":"!"}}', '$.hello'); SELECT JSON_QUERY( '{hello:{"world":"!"}}}', '$.hello'); -- invalid json => default value (empty string) SELECT JSON_QUERY('', '$.hello'); SELECT JSON_QUERY('{"array":[[0, 1, 2, 3, 4, 5], [0, -1, -2, -3, -4, -5]]}', '$.array[*][0 to 2, 4]'); +SELECT JSON_QUERY('{"1key":1}', '$.1key'); +SELECT JSON_QUERY('{"hello":1}', '$[hello]'); +SELECT JSON_QUERY('{"hello":1}', '$["hello"]'); +SELECT JSON_QUERY('{"hello":1}', '$[\'hello\']'); +SELECT JSON_QUERY('{"hello 1":1}', '$["hello 1"]'); SELECT '--JSON_EXISTS--'; SELECT JSON_EXISTS('{"hello":1}', '$'); From d8d7639ba7501158f4bf0c0af22fb964914926a8 Mon Sep 17 00:00:00 2001 From: lgbo-ustc Date: Fri, 14 Apr 2023 14:42:59 +0800 Subject: [PATCH 04/33] fixed typos & style --- src/Functions/JSONPath/Parsers/ParserJSONPathMemberAccess.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Functions/JSONPath/Parsers/ParserJSONPathMemberAccess.cpp b/src/Functions/JSONPath/Parsers/ParserJSONPathMemberAccess.cpp index 8a415816798..460f1d6a649 100644 --- a/src/Functions/JSONPath/Parsers/ParserJSONPathMemberAccess.cpp +++ b/src/Functions/JSONPath/Parsers/ParserJSONPathMemberAccess.cpp @@ -18,7 +18,7 @@ namespace DB */ bool ParserJSONPathMemberAccess::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { - // There's a specical case, that a path member can begin with number + // There's a special case, that a path member can begin with number if (pos->type != TokenType::Dot && pos->type != TokenType::Number) return false; if (pos->type != TokenType::Number) From ac87a1c23ae3cd7a4863d93c9da97bec24a91db9 Mon Sep 17 00:00:00 2001 From: lgbo-ustc Date: Sun, 23 Apr 2023 13:10:41 +0800 Subject: [PATCH 05/33] update --- .../Parsers/ParserJSONPathMemberAccess.cpp | 12 ++++++++++++ .../ParserJSONPathMemberSquareBracketAccess.cpp | 17 +++++------------ .../01889_sql_json_functions.reference | 8 ++++++++ .../0_stateless/01889_sql_json_functions.sql | 8 ++++++++ 4 files changed, 33 insertions(+), 12 deletions(-) diff --git a/src/Functions/JSONPath/Parsers/ParserJSONPathMemberAccess.cpp b/src/Functions/JSONPath/Parsers/ParserJSONPathMemberAccess.cpp index 460f1d6a649..709ef89dd3c 100644 --- a/src/Functions/JSONPath/Parsers/ParserJSONPathMemberAccess.cpp +++ b/src/Functions/JSONPath/Parsers/ParserJSONPathMemberAccess.cpp @@ -19,10 +19,22 @@ namespace DB bool ParserJSONPathMemberAccess::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { // There's a special case, that a path member can begin with number + // some invalid cases as following + // - ".123" is parsed as a number, not a dot and a number + // - ".123abc" is parsed as two parts, a number ".123" and a token "abc" + // - ".abc" is parsed as two parts. a dot and a token "abc" + // "$..123abc" is parsed into three parts, ".", ".123" and "abc" if (pos->type != TokenType::Dot && pos->type != TokenType::Number) return false; if (pos->type != TokenType::Number) + { ++pos; + // Check the case "$..123abc" + if (pos->type == TokenType::Number) + { + return false; + } + } ASTPtr member_name; diff --git a/src/Functions/JSONPath/Parsers/ParserJSONPathMemberSquareBracketAccess.cpp b/src/Functions/JSONPath/Parsers/ParserJSONPathMemberSquareBracketAccess.cpp index b3059fcfd94..93e0639ccfe 100644 --- a/src/Functions/JSONPath/Parsers/ParserJSONPathMemberSquareBracketAccess.cpp +++ b/src/Functions/JSONPath/Parsers/ParserJSONPathMemberSquareBracketAccess.cpp @@ -22,18 +22,11 @@ bool ParserJSONPathMemberSquareBracketAccess::parseImpl(Pos & pos, ASTPtr & node } else if (pos->type == TokenType::StringLiteral) { - try - { - ReadBufferFromMemory in(pos->begin, pos->size()); - String name; - readQuotedStringWithSQLStyle(name, in); - member_name = std::make_shared(name); - ++pos; - } - catch (const Exception &) - { - return false; - } + ReadBufferFromMemory in(pos->begin, pos->size()); + String name; + readQuotedStringWithSQLStyle(name, in); + member_name = std::make_shared(name); + ++pos; } else { diff --git a/tests/queries/0_stateless/01889_sql_json_functions.reference b/tests/queries/0_stateless/01889_sql_json_functions.reference index 23630bffb93..cb8e19ea2a0 100644 --- a/tests/queries/0_stateless/01889_sql_json_functions.reference +++ b/tests/queries/0_stateless/01889_sql_json_functions.reference @@ -47,6 +47,10 @@ SELECT JSON_VALUE('{"hello":1}', '$[\'hello\']'); 1 SELECT JSON_VALUE('{"hello 1":1}', '$["hello 1"]'); 1 +SELECT JSON_VALUE('{"1key":1}', '$..1key'); -- { serverError 36 } +SELECT JSON_VALUE('{"1key":1}', '$1key'); -- { serverError 36 } +SELECT JSON_VALUE('{"1key":1}', '$key'); -- { serverError 36 } +SELECT JSON_VALUE('{"1key":1}', '$.[key]'); -- { serverError 36 } SELECT '--JSON_QUERY--'; --JSON_QUERY-- SELECT JSON_QUERY('{"hello":1}', '$'); @@ -81,6 +85,10 @@ SELECT JSON_QUERY('{"hello":1}', '$[\'hello\']'); [1] SELECT JSON_QUERY('{"hello 1":1}', '$["hello 1"]'); [1] +SELECT JSON_QUERY('{"1key":1}', '$..1key'); -- { serverError 36 } +SELECT JSON_QUERY('{"1key":1}', '$1key'); -- { serverError 36 } +SELECT JSON_QUERY('{"1key":1}', '$key'); -- { serverError 36 } +SELECT JSON_QUERY('{"1key":1}', '$.[key]'); -- { serverError 36 } SELECT '--JSON_EXISTS--'; --JSON_EXISTS-- SELECT JSON_EXISTS('{"hello":1}', '$'); diff --git a/tests/queries/0_stateless/01889_sql_json_functions.sql b/tests/queries/0_stateless/01889_sql_json_functions.sql index 7f417f3f228..947b0171ec6 100644 --- a/tests/queries/0_stateless/01889_sql_json_functions.sql +++ b/tests/queries/0_stateless/01889_sql_json_functions.sql @@ -25,6 +25,10 @@ SELECT JSON_VALUE('{"hello":1}', '$[hello]'); SELECT JSON_VALUE('{"hello":1}', '$["hello"]'); SELECT JSON_VALUE('{"hello":1}', '$[\'hello\']'); SELECT JSON_VALUE('{"hello 1":1}', '$["hello 1"]'); +SELECT JSON_VALUE('{"1key":1}', '$..1key'); -- { serverError 36 } +SELECT JSON_VALUE('{"1key":1}', '$1key'); -- { serverError 36 } +SELECT JSON_VALUE('{"1key":1}', '$key'); -- { serverError 36 } +SELECT JSON_VALUE('{"1key":1}', '$.[key]'); -- { serverError 36 } SELECT '--JSON_QUERY--'; SELECT JSON_QUERY('{"hello":1}', '$'); @@ -43,6 +47,10 @@ SELECT JSON_QUERY('{"hello":1}', '$[hello]'); SELECT JSON_QUERY('{"hello":1}', '$["hello"]'); SELECT JSON_QUERY('{"hello":1}', '$[\'hello\']'); SELECT JSON_QUERY('{"hello 1":1}', '$["hello 1"]'); +SELECT JSON_QUERY('{"1key":1}', '$..1key'); -- { serverError 36 } +SELECT JSON_QUERY('{"1key":1}', '$1key'); -- { serverError 36 } +SELECT JSON_QUERY('{"1key":1}', '$key'); -- { serverError 36 } +SELECT JSON_QUERY('{"1key":1}', '$.[key]'); -- { serverError 36 } SELECT '--JSON_EXISTS--'; SELECT JSON_EXISTS('{"hello":1}', '$'); From 277393fd8dd3159d288d3809d5c916f7772b6fe6 Mon Sep 17 00:00:00 2001 From: kssenii Date: Mon, 24 Apr 2023 14:45:41 +0200 Subject: [PATCH 06/33] Fix --- src/Databases/PostgreSQL/DatabasePostgreSQL.cpp | 4 ++++ src/Databases/PostgreSQL/DatabasePostgreSQL.h | 1 + src/Storages/StoragePostgreSQL.cpp | 2 +- .../test_postgresql_database_engine/test.py | 11 +++++++++++ 4 files changed, 17 insertions(+), 1 deletion(-) diff --git a/src/Databases/PostgreSQL/DatabasePostgreSQL.cpp b/src/Databases/PostgreSQL/DatabasePostgreSQL.cpp index 8c267ea929b..d9116f74c52 100644 --- a/src/Databases/PostgreSQL/DatabasePostgreSQL.cpp +++ b/src/Databases/PostgreSQL/DatabasePostgreSQL.cpp @@ -51,6 +51,7 @@ DatabasePostgreSQL::DatabasePostgreSQL( , configuration(configuration_) , pool(std::move(pool_)) , cache_tables(cache_tables_) + , log(&Poco::Logger::get("DatabasePostgreSQL(" + dbname_ + ")")) { cleaner_task = getContext()->getSchedulePool().createTask("PostgreSQLCleanerTask", [this]{ removeOutdatedTables(); }); cleaner_task->deactivate(); @@ -192,7 +193,10 @@ StoragePtr DatabasePostgreSQL::fetchTable(const String & table_name, ContextPtr, ColumnsDescription{columns_info->columns}, ConstraintsDescription{}, String{}, configuration.schema, configuration.on_conflict); if (cache_tables) + { + LOG_TEST(log, "Cached table `{}`", table_name); cached_tables[table_name] = storage; + } return storage; } diff --git a/src/Databases/PostgreSQL/DatabasePostgreSQL.h b/src/Databases/PostgreSQL/DatabasePostgreSQL.h index 08583f4b6d9..31fa036c0ee 100644 --- a/src/Databases/PostgreSQL/DatabasePostgreSQL.h +++ b/src/Databases/PostgreSQL/DatabasePostgreSQL.h @@ -73,6 +73,7 @@ private: mutable Tables cached_tables; std::unordered_set detached_or_dropped; BackgroundSchedulePool::TaskHolder cleaner_task; + Poco::Logger * log; String getTableNameForLogs(const String & table_name) const; diff --git a/src/Storages/StoragePostgreSQL.cpp b/src/Storages/StoragePostgreSQL.cpp index 8e1a799fa07..8548d558fd3 100644 --- a/src/Storages/StoragePostgreSQL.cpp +++ b/src/Storages/StoragePostgreSQL.cpp @@ -397,7 +397,7 @@ StoragePostgreSQL::Configuration StoragePostgreSQL::processNamedCollectionResult required_arguments.insert("table"); validateNamedCollection>( - named_collection, required_arguments, {"schema", "on_conflict", "addresses_expr", "host", "hostname", "port"}); + named_collection, required_arguments, {"schema", "on_conflict", "addresses_expr", "host", "hostname", "port", "use_tables_cache"}); configuration.addresses_expr = named_collection.getOrDefault("addresses_expr", ""); if (configuration.addresses_expr.empty()) diff --git a/tests/integration/test_postgresql_database_engine/test.py b/tests/integration/test_postgresql_database_engine/test.py index de6c9ad2cf9..63e85afb1d4 100644 --- a/tests/integration/test_postgresql_database_engine/test.py +++ b/tests/integration/test_postgresql_database_engine/test.py @@ -327,6 +327,17 @@ def test_predefined_connection_configuration(started_cluster): node1.query(f"SELECT count() FROM postgres_database.test_table").rstrip() == "100" ) + node1.query( + """ + DROP DATABASE postgres_database; + CREATE DATABASE postgres_database ENGINE = PostgreSQL(postgres1, use_tables_cache=1); + """ + ) + assert ( + node1.query(f"SELECT count() FROM postgres_database.test_table").rstrip() + == "100" + ) + assert node1.contains_in_log("Cached table `test_table`") node1.query("DROP DATABASE postgres_database") cursor.execute(f"DROP TABLE test_table ") From 50dd8365869071dee74569fc85f184d1e20873c1 Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Mon, 24 Apr 2023 16:37:54 +0200 Subject: [PATCH 07/33] Fix flaky integration test test_async_query_sending --- tests/integration/test_hedged_requests/test.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/integration/test_hedged_requests/test.py b/tests/integration/test_hedged_requests/test.py index 88371f6908d..2ca37fbb7ee 100644 --- a/tests/integration/test_hedged_requests/test.py +++ b/tests/integration/test_hedged_requests/test.py @@ -128,12 +128,12 @@ def check_changing_replica_events(expected_count): assert int(result) >= expected_count -def check_if_query_sending_was_suspended(minimum_count): +def check_if_query_sending_was_suspended(): result = NODES["node"].query( "SELECT value FROM system.events WHERE event='SuspendSendingQueryToShard'" ) - assert int(result) >= minimum_count + assert int(result) >= 1 def check_if_query_sending_was_not_suspended(): @@ -381,7 +381,7 @@ def test_async_connect(started_cluster): "SELECT hostName(), id FROM distributed_connect ORDER BY id LIMIT 1 SETTINGS prefer_localhost_replica = 0, connect_timeout_with_failover_ms=5000, async_query_sending_for_remote=1, max_threads=1" ) check_changing_replica_events(2) - check_if_query_sending_was_suspended(2) + check_if_query_sending_was_suspended() NODES["node"].query("DROP TABLE distributed_connect") @@ -406,7 +406,7 @@ def test_async_query_sending(started_cluster): NODES["node"].query("DROP TABLE IF EXISTS tmp") NODES["node"].query( "CREATE TEMPORARY TABLE tmp (number UInt64, s String) " - "as select number, randomString(number % 1000) from numbers(1000000)" + "as select number, randomString(number % 1000) from numbers(10000000)" ) NODES["node"].query( @@ -419,6 +419,6 @@ def test_async_query_sending(started_cluster): "SELECT hostName(), id FROM distributed_query_sending ORDER BY id LIMIT 1 SETTINGS" " prefer_localhost_replica = 0, async_query_sending_for_remote=1, max_threads = 1" ) - check_if_query_sending_was_suspended(3) + check_if_query_sending_was_suspended() NODES["node"].query("DROP TABLE distributed_query_sending") From 38e4148c0a4472a8de7dde31f73b476bbd454128 Mon Sep 17 00:00:00 2001 From: Alexander Gololobov <440544+davenger@users.noreply.github.com> Date: Mon, 24 Apr 2023 15:44:32 +0200 Subject: [PATCH 08/33] Disable in debug --- .../02581_share_big_sets_between_mutation_tasks_long.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/02581_share_big_sets_between_mutation_tasks_long.sql b/tests/queries/0_stateless/02581_share_big_sets_between_mutation_tasks_long.sql index 97cf979e80a..21ff453cd8e 100644 --- a/tests/queries/0_stateless/02581_share_big_sets_between_mutation_tasks_long.sql +++ b/tests/queries/0_stateless/02581_share_big_sets_between_mutation_tasks_long.sql @@ -1,4 +1,4 @@ --- Tags: long, no-tsan, no-asan, no-ubsan, no-msan +-- Tags: long, no-debug, no-tsan, no-asan, no-ubsan, no-msan DROP TABLE IF EXISTS 02581_trips; From 125736dbccc32af850414be26f042e6294c7fbc2 Mon Sep 17 00:00:00 2001 From: Alexander Gololobov <440544+davenger@users.noreply.github.com> Date: Mon, 24 Apr 2023 19:06:30 +0200 Subject: [PATCH 09/33] More cases from long test to short test --- ..._big_sets_between_mutation_tasks.reference | 39 +++++++++++++++ ..._share_big_sets_between_mutation_tasks.sql | 49 +++++++++++++++++-- 2 files changed, 83 insertions(+), 5 deletions(-) diff --git a/tests/queries/0_stateless/02581_share_big_sets_between_mutation_tasks.reference b/tests/queries/0_stateless/02581_share_big_sets_between_mutation_tasks.reference index 18e83d1244a..452e0e0801e 100644 --- a/tests/queries/0_stateless/02581_share_big_sets_between_mutation_tasks.reference +++ b/tests/queries/0_stateless/02581_share_big_sets_between_mutation_tasks.reference @@ -17,3 +17,42 @@ SELECT count(), _part FROM 02581_trips WHERE description = '' GROUP BY _part ORD 8000 all_2_2_0_6 8000 all_3_3_0_6 8000 all_4_4_0_6 +-- Run mutation with `id 'IN big subquery' +ALTER TABLE 02581_trips UPDATE description='a' WHERE id IN (SELECT (number*10)::UInt32 FROM numbers(10000000)) SETTINGS mutations_sync=2; +SELECT count() from 02581_trips WHERE description = ''; +28000 +ALTER TABLE 02581_trips UPDATE description='a' WHERE id IN (SELECT (number*10 + 1)::UInt32 FROM numbers(10000000)) SETTINGS mutations_sync=2, max_rows_in_set=1000; +SELECT count() from 02581_trips WHERE description = ''; +28000 +-- Run mutation with func(`id`) IN big subquery +ALTER TABLE 02581_trips UPDATE description='b' WHERE id::UInt64 IN (SELECT (number*10 + 2)::UInt32 FROM numbers(10000000)) SETTINGS mutations_sync=2; +SELECT count() from 02581_trips WHERE description = ''; +28000 +-- Run mutation with non-PK `id2` IN big subquery +ALTER TABLE 02581_trips UPDATE description='c' WHERE id2 IN (SELECT (number*10 + 3)::UInt32 FROM numbers(10000000)) SETTINGS mutations_sync=2; +SELECT count() from 02581_trips WHERE description = ''; +24000 +-- Run mutation with PK and non-PK IN big subquery +ALTER TABLE 02581_trips UPDATE description='c' +WHERE + (id IN (SELECT (number*10 + 4)::UInt32 FROM numbers(10000000))) OR + (id2 IN (SELECT (number*10 + 4)::UInt32 FROM numbers(10000000))) +SETTINGS mutations_sync=2; +SELECT count() from 02581_trips WHERE description = ''; +20000 +-- Run mutation with PK and non-PK IN big subquery +ALTER TABLE 02581_trips UPDATE description='c' +WHERE + (id::UInt64 IN (SELECT (number*10 + 5)::UInt32 FROM numbers(10000000))) OR + (id2::UInt64 IN (SELECT (number*10 + 5)::UInt32 FROM numbers(10000000))) +SETTINGS mutations_sync=2; +SELECT count() from 02581_trips WHERE description = ''; +16000 +-- Run mutation with PK and non-PK IN big subquery +ALTER TABLE 02581_trips UPDATE description='c' +WHERE + (id::UInt32 IN (SELECT (number*10 + 6)::UInt32 FROM numbers(10000000))) OR + ((id2+1)::String IN (SELECT (number*10 + 6)::UInt32 FROM numbers(10000000))) +SETTINGS mutations_sync=2; +SELECT count() from 02581_trips WHERE description = ''; +12000 diff --git a/tests/queries/0_stateless/02581_share_big_sets_between_mutation_tasks.sql b/tests/queries/0_stateless/02581_share_big_sets_between_mutation_tasks.sql index fc90582d20e..7b52a89b16f 100644 --- a/tests/queries/0_stateless/02581_share_big_sets_between_mutation_tasks.sql +++ b/tests/queries/0_stateless/02581_share_big_sets_between_mutation_tasks.sql @@ -1,12 +1,12 @@ DROP TABLE IF EXISTS 02581_trips; -CREATE TABLE 02581_trips(id UInt32, description String) ENGINE=MergeTree ORDER BY id; +CREATE TABLE 02581_trips(id UInt32, id2 UInt32, description String) ENGINE=MergeTree ORDER BY id; -- Make multiple parts -INSERT INTO 02581_trips SELECT number, '' FROM numbers(10000); -INSERT INTO 02581_trips SELECT number+10000, '' FROM numbers(10000); -INSERT INTO 02581_trips SELECT number+20000, '' FROM numbers(10000); -INSERT INTO 02581_trips SELECT number+30000, '' FROM numbers(10000); +INSERT INTO 02581_trips SELECT number, number, '' FROM numbers(10000); +INSERT INTO 02581_trips SELECT number+10000, number+10000, '' FROM numbers(10000); +INSERT INTO 02581_trips SELECT number+20000, number+20000, '' FROM numbers(10000); +INSERT INTO 02581_trips SELECT number+30000, number+30000, '' FROM numbers(10000); -- { echoOn } SELECT count(), _part FROM 02581_trips GROUP BY _part ORDER BY _part; @@ -16,6 +16,45 @@ ALTER TABLE 02581_trips UPDATE description='1' WHERE id IN (SELECT (number*10+1) SELECT count(), _part FROM 02581_trips WHERE description = '' GROUP BY _part ORDER BY _part; ALTER TABLE 02581_trips UPDATE description='2' WHERE id IN (SELECT (number*10+2)::UInt32 FROM numbers(10000)) SETTINGS mutations_sync=2; SELECT count(), _part FROM 02581_trips WHERE description = '' GROUP BY _part ORDER BY _part; + +-- Run mutation with `id 'IN big subquery' +ALTER TABLE 02581_trips UPDATE description='a' WHERE id IN (SELECT (number*10)::UInt32 FROM numbers(10000000)) SETTINGS mutations_sync=2; +SELECT count() from 02581_trips WHERE description = ''; + +ALTER TABLE 02581_trips UPDATE description='a' WHERE id IN (SELECT (number*10 + 1)::UInt32 FROM numbers(10000000)) SETTINGS mutations_sync=2, max_rows_in_set=1000; +SELECT count() from 02581_trips WHERE description = ''; + +-- Run mutation with func(`id`) IN big subquery +ALTER TABLE 02581_trips UPDATE description='b' WHERE id::UInt64 IN (SELECT (number*10 + 2)::UInt32 FROM numbers(10000000)) SETTINGS mutations_sync=2; +SELECT count() from 02581_trips WHERE description = ''; + +-- Run mutation with non-PK `id2` IN big subquery +ALTER TABLE 02581_trips UPDATE description='c' WHERE id2 IN (SELECT (number*10 + 3)::UInt32 FROM numbers(10000000)) SETTINGS mutations_sync=2; +SELECT count() from 02581_trips WHERE description = ''; + +-- Run mutation with PK and non-PK IN big subquery +ALTER TABLE 02581_trips UPDATE description='c' +WHERE + (id IN (SELECT (number*10 + 4)::UInt32 FROM numbers(10000000))) OR + (id2 IN (SELECT (number*10 + 4)::UInt32 FROM numbers(10000000))) +SETTINGS mutations_sync=2; +SELECT count() from 02581_trips WHERE description = ''; + +-- Run mutation with PK and non-PK IN big subquery +ALTER TABLE 02581_trips UPDATE description='c' +WHERE + (id::UInt64 IN (SELECT (number*10 + 5)::UInt32 FROM numbers(10000000))) OR + (id2::UInt64 IN (SELECT (number*10 + 5)::UInt32 FROM numbers(10000000))) +SETTINGS mutations_sync=2; +SELECT count() from 02581_trips WHERE description = ''; + +-- Run mutation with PK and non-PK IN big subquery +ALTER TABLE 02581_trips UPDATE description='c' +WHERE + (id::UInt32 IN (SELECT (number*10 + 6)::UInt32 FROM numbers(10000000))) OR + ((id2+1)::String IN (SELECT (number*10 + 6)::UInt32 FROM numbers(10000000))) +SETTINGS mutations_sync=2; +SELECT count() from 02581_trips WHERE description = ''; -- { echoOff } DROP TABLE 02581_trips; From aa81937eeea7f17000ec2cd7682d723907584fcb Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Tue, 25 Apr 2023 00:31:55 +0000 Subject: [PATCH 10/33] add field with number of rows to async insert log --- src/Interpreters/AsynchronousInsertLog.cpp | 2 ++ src/Interpreters/AsynchronousInsertLog.h | 1 + src/Interpreters/AsynchronousInsertQueue.cpp | 4 +++- .../0_stateless/02456_async_inserts_logs.reference | 12 ++++++------ .../queries/0_stateless/02456_async_inserts_logs.sh | 2 +- 5 files changed, 13 insertions(+), 8 deletions(-) diff --git a/src/Interpreters/AsynchronousInsertLog.cpp b/src/Interpreters/AsynchronousInsertLog.cpp index 916ec8f3d56..eeccd9ad92e 100644 --- a/src/Interpreters/AsynchronousInsertLog.cpp +++ b/src/Interpreters/AsynchronousInsertLog.cpp @@ -36,6 +36,7 @@ NamesAndTypesList AsynchronousInsertLogElement::getNamesAndTypes() {"format", std::make_shared(std::make_shared())}, {"query_id", std::make_shared()}, {"bytes", std::make_shared()}, + {"rows", std::make_shared()}, {"exception", std::make_shared()}, {"status", type_status}, @@ -71,6 +72,7 @@ void AsynchronousInsertLogElement::appendToBlock(MutableColumns & columns) const columns[i++]->insert(insert_query.format); columns[i++]->insert(query_id); columns[i++]->insert(bytes); + columns[i++]->insert(rows); columns[i++]->insert(exception); columns[i++]->insert(status); diff --git a/src/Interpreters/AsynchronousInsertLog.h b/src/Interpreters/AsynchronousInsertLog.h index e2fdd4c90a0..f1e39db1ded 100644 --- a/src/Interpreters/AsynchronousInsertLog.h +++ b/src/Interpreters/AsynchronousInsertLog.h @@ -24,6 +24,7 @@ struct AsynchronousInsertLogElement ASTPtr query; String query_id; UInt64 bytes{}; + UInt64 rows{}; String exception; Status status{}; diff --git a/src/Interpreters/AsynchronousInsertQueue.cpp b/src/Interpreters/AsynchronousInsertQueue.cpp index b8de0246ae2..76f956341fd 100644 --- a/src/Interpreters/AsynchronousInsertQueue.cpp +++ b/src/Interpreters/AsynchronousInsertQueue.cpp @@ -444,7 +444,8 @@ try { auto buffer = std::make_unique(entry->bytes); current_entry = entry; - total_rows += executor.execute(*buffer); + size_t num_rows = executor.execute(*buffer); + total_rows += num_rows; chunk_info->offsets.push_back(total_rows); /// Keep buffer, because it still can be used @@ -459,6 +460,7 @@ try elem.query = key.query; elem.query_id = entry->query_id; elem.bytes = entry->bytes.size(); + elem.rows = num_rows; elem.exception = current_exception; current_exception.clear(); diff --git a/tests/queries/0_stateless/02456_async_inserts_logs.reference b/tests/queries/0_stateless/02456_async_inserts_logs.reference index efd8a88eca4..79f7ea458a7 100644 --- a/tests/queries/0_stateless/02456_async_inserts_logs.reference +++ b/tests/queries/0_stateless/02456_async_inserts_logs.reference @@ -1,7 +1,7 @@ 5 - Values 21 1 Ok 1 -t_async_inserts_logs JSONEachRow 39 1 Ok 1 -t_async_inserts_logs Values 8 1 Ok 1 -t_async_inserts_logs JSONEachRow 6 0 ParsingError 1 -t_async_inserts_logs Values 6 0 ParsingError 1 -t_async_inserts_logs Values 8 0 FlushError 1 + Values 21 2 1 Ok 1 +t_async_inserts_logs JSONEachRow 39 2 1 Ok 1 +t_async_inserts_logs Values 8 1 1 Ok 1 +t_async_inserts_logs JSONEachRow 6 0 0 ParsingError 1 +t_async_inserts_logs Values 6 0 0 ParsingError 1 +t_async_inserts_logs Values 8 1 0 FlushError 1 diff --git a/tests/queries/0_stateless/02456_async_inserts_logs.sh b/tests/queries/0_stateless/02456_async_inserts_logs.sh index 006455e2d42..2e4db67c069 100755 --- a/tests/queries/0_stateless/02456_async_inserts_logs.sh +++ b/tests/queries/0_stateless/02456_async_inserts_logs.sh @@ -30,7 +30,7 @@ ${CLICKHOUSE_CLIENT} -q "SELECT count() FROM t_async_inserts_logs" ${CLICKHOUSE_CLIENT} -q "SYSTEM FLUSH LOGS" ${CLICKHOUSE_CLIENT} -q " - SELECT table, format, bytes, empty(exception), status, + SELECT table, format, bytes, rows, empty(exception), status, status = 'ParsingError' ? flush_time_microseconds = 0 : flush_time_microseconds > event_time_microseconds AS time_ok FROM system.asynchronous_insert_log WHERE database = '$CLICKHOUSE_DATABASE' OR query ILIKE 'INSERT INTO FUNCTION%$CLICKHOUSE_DATABASE%' From cc5acfbe62a52c2b651794db9c8173e0d8be0ea5 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Tue, 25 Apr 2023 01:37:37 +0000 Subject: [PATCH 11/33] add profile event --- src/Common/ProfileEvents.cpp | 1 + src/Interpreters/AsynchronousInsertQueue.cpp | 2 ++ tests/queries/0_stateless/02456_async_inserts_logs.reference | 3 +++ tests/queries/0_stateless/02456_async_inserts_logs.sh | 5 +++++ 4 files changed, 11 insertions(+) diff --git a/src/Common/ProfileEvents.cpp b/src/Common/ProfileEvents.cpp index da096085d5b..166db25e14c 100644 --- a/src/Common/ProfileEvents.cpp +++ b/src/Common/ProfileEvents.cpp @@ -10,6 +10,7 @@ M(InsertQuery, "Same as Query, but only for INSERT queries.") \ M(AsyncInsertQuery, "Same as InsertQuery, but only for asynchronous INSERT queries.") \ M(AsyncInsertBytes, "Data size in bytes of asynchronous INSERT queries.") \ + M(AsyncInsertRows, "Number of rows inserted by asynchronous INSERT queries.") \ M(AsyncInsertCacheHits, "Number of times a duplicate hash id has been found in asynchronous INSERT hash id cache.") \ M(FailedQuery, "Number of failed queries.") \ M(FailedSelectQuery, "Same as FailedQuery, but only for SELECT queries.") \ diff --git a/src/Interpreters/AsynchronousInsertQueue.cpp b/src/Interpreters/AsynchronousInsertQueue.cpp index 76f956341fd..88233f58a54 100644 --- a/src/Interpreters/AsynchronousInsertQueue.cpp +++ b/src/Interpreters/AsynchronousInsertQueue.cpp @@ -40,6 +40,7 @@ namespace ProfileEvents { extern const Event AsyncInsertQuery; extern const Event AsyncInsertBytes; + extern const Event AsyncInsertRows; extern const Event FailedAsyncInsertQuery; } @@ -481,6 +482,7 @@ try format->addBuffer(std::move(last_buffer)); auto insert_query_id = insert_context->getCurrentQueryId(); + ProfileEvents::increment(ProfileEvents::AsyncInsertRows, total_rows); auto finish_entries = [&] { diff --git a/tests/queries/0_stateless/02456_async_inserts_logs.reference b/tests/queries/0_stateless/02456_async_inserts_logs.reference index 79f7ea458a7..ba1b19fb184 100644 --- a/tests/queries/0_stateless/02456_async_inserts_logs.reference +++ b/tests/queries/0_stateless/02456_async_inserts_logs.reference @@ -5,3 +5,6 @@ t_async_inserts_logs Values 8 1 1 Ok 1 t_async_inserts_logs JSONEachRow 6 0 0 ParsingError 1 t_async_inserts_logs Values 6 0 0 ParsingError 1 t_async_inserts_logs Values 8 1 0 FlushError 1 +AsyncInsertBytes 1 +AsyncInsertQuery 1 +AsyncInsertRows 1 diff --git a/tests/queries/0_stateless/02456_async_inserts_logs.sh b/tests/queries/0_stateless/02456_async_inserts_logs.sh index 2e4db67c069..43cd73d7231 100755 --- a/tests/queries/0_stateless/02456_async_inserts_logs.sh +++ b/tests/queries/0_stateless/02456_async_inserts_logs.sh @@ -37,3 +37,8 @@ ${CLICKHOUSE_CLIENT} -q " ORDER BY table, status, format" ${CLICKHOUSE_CLIENT} -q "DROP TABLE t_async_inserts_logs" + +${CLICKHOUSE_CLIENT} -q " +SELECT event, value > 0 FROM system.events +WHERE event IN ('AsyncInsertQuery', 'AsyncInsertBytes', 'AsyncInsertRows') +ORDER BY event" From 7ec2de0d3fdd9c4cd654873e32ff06a5b717ef9a Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Tue, 25 Apr 2023 08:41:40 +0200 Subject: [PATCH 12/33] Fix building iceberg without avro Signed-off-by: Azat Khuzhin --- src/Storages/DataLakes/StorageIceberg.h | 4 ++-- src/Storages/DataLakes/registerDataLakes.cpp | 4 ++++ 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/src/Storages/DataLakes/StorageIceberg.h b/src/Storages/DataLakes/StorageIceberg.h index 6e93c732338..f1c9c485ef0 100644 --- a/src/Storages/DataLakes/StorageIceberg.h +++ b/src/Storages/DataLakes/StorageIceberg.h @@ -5,7 +5,7 @@ #include #include "config.h" -#if USE_AWS_S3 +#if USE_AWS_S3 && USE_AVRO #include #include #endif @@ -18,7 +18,7 @@ struct StorageIcebergName static constexpr auto name = "Iceberg"; }; -#if USE_AWS_S3 +#if USE_AWS_S3 && USE_AVRO using StorageIcebergS3 = IStorageDataLake>; #endif diff --git a/src/Storages/DataLakes/registerDataLakes.cpp b/src/Storages/DataLakes/registerDataLakes.cpp index 508cf9c8d80..1447a4777c5 100644 --- a/src/Storages/DataLakes/registerDataLakes.cpp +++ b/src/Storages/DataLakes/registerDataLakes.cpp @@ -31,11 +31,15 @@ void registerStorageDeltaLake(StorageFactory & factory) } #endif +#if USE_AVRO /// StorageIceberg depending on Avro to parse metadata with Avro format. + void registerStorageIceberg(StorageFactory & factory) { REGISTER_DATA_LAKE_STORAGE(StorageIcebergS3, StorageIcebergName::name) } +#endif + void registerStorageHudi(StorageFactory & factory) { REGISTER_DATA_LAKE_STORAGE(StorageHudiS3, StorageHudiName::name) From 98d7e94b4e1d885de0c7cedb73d9d67a76e61375 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Tue, 25 Apr 2023 09:10:48 +0000 Subject: [PATCH 13/33] Extend test --- .../01891_partition_hash.reference | 1 + .../0_stateless/01891_partition_hash.sql | 35 ++++++++++++++++--- 2 files changed, 31 insertions(+), 5 deletions(-) diff --git a/tests/queries/0_stateless/01891_partition_hash.reference b/tests/queries/0_stateless/01891_partition_hash.reference index 56d11075e50..c5814777dfe 100644 --- a/tests/queries/0_stateless/01891_partition_hash.reference +++ b/tests/queries/0_stateless/01891_partition_hash.reference @@ -1 +1,2 @@ 6ba51fa36c625adab5d58007c96e32bf +ebc1c2f37455caea601feeb840757dd3 diff --git a/tests/queries/0_stateless/01891_partition_hash.sql b/tests/queries/0_stateless/01891_partition_hash.sql index f56ed6a4ff4..894594dd465 100644 --- a/tests/queries/0_stateless/01891_partition_hash.sql +++ b/tests/queries/0_stateless/01891_partition_hash.sql @@ -1,7 +1,32 @@ -drop table if exists tab; -create table tab (i8 Int8, i16 Int16, i32 Int32, i64 Int64, i128 Int128, i256 Int256, u8 UInt8, u16 UInt16, u32 UInt32, u64 UInt64, u128 UInt128, u256 UInt256, id UUID, s String, fs FixedString(33), a Array(UInt8), t Tuple(UInt16, UInt32), d Date, dt DateTime('Asia/Istanbul'), dt64 DateTime64(3, 'Asia/Istanbul'), dec128 Decimal128(3), dec256 Decimal256(4), lc LowCardinality(String)) engine = MergeTree PARTITION BY (i8, i16, i32, i64, i128, i256, u8, u16, u32, u64, u128, u256, id, s, fs, a, t, d, dt, dt64, dec128, dec256, lc) order by tuple(); -insert into tab values (-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, '61f0c404-5cb3-11e7-907b-a6006ad3dba0', 'a', 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa', [1, 2, 3], (-1, -2), '2020-01-01', '2020-01-01 01:01:01', '2020-01-01 01:01:01', '123.456', '78.9101', 'a'); +DROP TABLE IF EXISTS tab; +CREATE TABLE tab ( + i8 Int8, + i16 Int16, + i32 Int32, + i64 Int64, + i128 Int128, + i256 Int256, + u8 UInt8, + u16 UInt16, + u32 UInt32, + u64 UInt64, + u128 UInt128, + u256 UInt256, + id UUID, + s String, + fs FixedString(33), + a Array(UInt8), + t Tuple(UInt16, UInt32), + d Date, + dt DateTime('Asia/Istanbul'), + dt64 DateTime64(3, 'Asia/Istanbul'), + dec128 Decimal128(3), + dec256 Decimal256(4), + lc LowCardinality(String)) +engine = MergeTree PARTITION BY (i8, i16, i32, i64, i128, i256, u8, u16, u32, u64, u128, u256, id, s, fs, a, t, d, dt, dt64, dec128, dec256, lc) ORDER BY tuple(); +INSERT INTO tab VALUES (-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, '61f0c404-5cb3-11e7-907b-a6006ad3dba0', 'a', 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa', [1, 2, 3], (-1, -2), '2020-01-01', '2020-01-01 01:01:01', '2020-01-01 01:01:01', '123.456', '78.9101', 'a'); +INSERT INTO tab VALUES (123, 12345, 1234567890, 1234567890000000000, 123456789000000000000000000000000000000, 123456789000000000000000000000000000000000000000000000000000000000000000000000, 123, 12345, 1234567890, 1234567890000000000, 123456789000000000000000000000000000000, 123456789000000000000000000000000000000000000000000000000000000000000000000000, '61f0c404-5cb3-11e7-907b-a6006ad3dba0', 'a', 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa', [1, 2, 3], (-1, -2), '2020-01-01', '2020-01-01 01:01:01', '2020-01-01 01:01:01', '123.456', '78.9101', 'a'); -- Here we check that partition id did not change. -- Different result means Backward Incompatible Change. Old partitions will not be accepted by new server. -select partition_id from system.parts where table = 'tab' and database = currentDatabase(); -drop table if exists tab; +SELECT partition_id FROM system.parts WHERE table = 'tab' AND database = currentDatabase(); +DROP TABLE IF EXISTS tab; From a0791e3187d52c70da80ca00bcd9823d6cbe1d86 Mon Sep 17 00:00:00 2001 From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> Date: Tue, 25 Apr 2023 11:53:59 +0200 Subject: [PATCH 14/33] Update DatabasePostgreSQL.cpp --- src/Databases/PostgreSQL/DatabasePostgreSQL.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Databases/PostgreSQL/DatabasePostgreSQL.cpp b/src/Databases/PostgreSQL/DatabasePostgreSQL.cpp index d9116f74c52..4b42d799661 100644 --- a/src/Databases/PostgreSQL/DatabasePostgreSQL.cpp +++ b/src/Databases/PostgreSQL/DatabasePostgreSQL.cpp @@ -17,6 +17,7 @@ #include #include #include +#include #include namespace fs = std::filesystem; From d7f759ce381bd8d5c732d8b1f2f99e2272d76e69 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Tue, 25 Apr 2023 11:31:53 +0000 Subject: [PATCH 15/33] Docs: Make caption of processors_profile_log page consistent with other pages --- docs/en/operations/settings/settings.md | 2 +- docs/en/operations/system-tables/processors_profile_log.md | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index 66a24ac3fca..4cef8d724c9 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -890,7 +890,7 @@ Write time that processor spent during execution/waiting for data to `system.pro See also: -- [`system.processors_profile_log`](../../operations/system-tables/processors_profile_log.md#system-processors_profile_log) +- [`system.processors_profile_log`](../../operations/system-tables/processors_profile_log.md#processors_profile_log) - [`EXPLAIN PIPELINE`](../../sql-reference/statements/explain.md#explain-pipeline) ## max_insert_block_size {#settings-max_insert_block_size} diff --git a/docs/en/operations/system-tables/processors_profile_log.md b/docs/en/operations/system-tables/processors_profile_log.md index e849525e495..0532acd4288 100644 --- a/docs/en/operations/system-tables/processors_profile_log.md +++ b/docs/en/operations/system-tables/processors_profile_log.md @@ -1,4 +1,4 @@ -# system.processors_profile_log {#system-processors_profile_log} +# processors_profile_log {#processors_profile_log} This table contains profiling on processors level (that you can find in [`EXPLAIN PIPELINE`](../../sql-reference/statements/explain.md#explain-pipeline)). @@ -73,4 +73,4 @@ Here you can see: **See Also** -- [`EXPLAIN PIPELINE`](../../sql-reference/statements/explain.md#explain-pipeline) \ No newline at end of file +- [`EXPLAIN PIPELINE`](../../sql-reference/statements/explain.md#explain-pipeline) From 3255f937f27aee295a4363e5676a5558a98b61c8 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Tue, 25 Apr 2023 11:34:10 +0000 Subject: [PATCH 16/33] Remove anchor --- docs/en/operations/settings/settings.md | 2 +- docs/en/operations/system-tables/processors_profile_log.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index 4cef8d724c9..c6fdcf317c3 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -890,7 +890,7 @@ Write time that processor spent during execution/waiting for data to `system.pro See also: -- [`system.processors_profile_log`](../../operations/system-tables/processors_profile_log.md#processors_profile_log) +- [`system.processors_profile_log`](../../operations/system-tables/processors_profile_log.md) - [`EXPLAIN PIPELINE`](../../sql-reference/statements/explain.md#explain-pipeline) ## max_insert_block_size {#settings-max_insert_block_size} diff --git a/docs/en/operations/system-tables/processors_profile_log.md b/docs/en/operations/system-tables/processors_profile_log.md index 0532acd4288..a6ff15642a1 100644 --- a/docs/en/operations/system-tables/processors_profile_log.md +++ b/docs/en/operations/system-tables/processors_profile_log.md @@ -1,4 +1,4 @@ -# processors_profile_log {#processors_profile_log} +# processors_profile_log This table contains profiling on processors level (that you can find in [`EXPLAIN PIPELINE`](../../sql-reference/statements/explain.md#explain-pipeline)). From 6fecdef0fedfa72efd8fdd3afaced80b396bbb1e Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 25 Apr 2023 15:35:28 +0200 Subject: [PATCH 17/33] Fix glibc compatibility check --- base/glibc-compatibility/glibc-compatibility.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/base/glibc-compatibility/glibc-compatibility.c b/base/glibc-compatibility/glibc-compatibility.c index 7e8ea5051d7..49bb81a58be 100644 --- a/base/glibc-compatibility/glibc-compatibility.c +++ b/base/glibc-compatibility/glibc-compatibility.c @@ -235,6 +235,17 @@ ssize_t getrandom(void *buf, size_t buflen, unsigned flags) return syscall(SYS_getrandom, buf, buflen, flags); } +/* Structure for scatter/gather I/O. */ +struct iovec +{ + void *iov_base; /* Pointer to data. */ + size_t iov_len; /* Length of data. */ +}; + +ssize_t preadv(int __fd, const struct iovec *__iovec, int __count, __off_t __offset) +{ + return syscall(SYS_preadv, __fd, __iovec, __count, (long)(__offset), (long)(__offset>>32)); +} #include #include From bb2146114503258c4b1b6dd2f8ef34c45110a498 Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 25 Apr 2023 15:45:23 +0200 Subject: [PATCH 18/33] Try to fix with pie --- CMakeLists.txt | 7 +++++-- cmake/sanitize.cmake | 3 +-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index cc1a64a9e96..a59ecacd89d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -421,8 +421,11 @@ endif () set (CMAKE_POSTFIX_VARIABLE "CMAKE_${CMAKE_BUILD_TYPE_UC}_POSTFIX") -set (CMAKE_POSITION_INDEPENDENT_CODE OFF) -if (OS_LINUX AND NOT (ARCH_AARCH64 OR ARCH_S390X)) +if (NOT SANITIZE) + set (CMAKE_POSITION_INDEPENDENT_CODE OFF) +endif + +if (OS_LINUX AND NOT (ARCH_AARCH64 OR ARCH_S390X) AND NOT SANITIZE) # Slightly more efficient code can be generated # It's disabled for ARM because otherwise ClickHouse cannot run on Android. set (CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -fno-pie") diff --git a/cmake/sanitize.cmake b/cmake/sanitize.cmake index 13aaa414b93..fc9793d8f35 100644 --- a/cmake/sanitize.cmake +++ b/cmake/sanitize.cmake @@ -33,8 +33,7 @@ if (SANITIZE) # RelWithDebInfo, and downgrade optimizations to -O1 but not to -Og, to # keep the binary size down. # TODO: try compiling with -Og and with ld.gold. - set (MSAN_FLAGS "-fsanitize=memory -fsanitize-memory-use-after-dtor -fsanitize-memory-track-origins -fno-optimize-sibling-calls -fsanitize-blacklist=${CMAKE_SOURCE_DIR}/tests/msan_suppressions.txt") - + set (MSAN_FLAGS "-fsanitize=memory -fsanitize-memory-use-after-dtor -fsanitize-memory-track-origins -fno-optimize-sibling-calls -fPIC -fpie -fsanitize-blacklist=${CMAKE_SOURCE_DIR}/tests/msan_suppressions.txt") set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${SAN_FLAGS} ${MSAN_FLAGS}") set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${SAN_FLAGS} ${MSAN_FLAGS}") From 0fcf19efd31a01b14ed5d5e1b13f340e390e8392 Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 25 Apr 2023 15:47:47 +0200 Subject: [PATCH 19/33] Missed brackets --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index a59ecacd89d..0554403cce5 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -423,7 +423,7 @@ set (CMAKE_POSTFIX_VARIABLE "CMAKE_${CMAKE_BUILD_TYPE_UC}_POSTFIX") if (NOT SANITIZE) set (CMAKE_POSITION_INDEPENDENT_CODE OFF) -endif +endif() if (OS_LINUX AND NOT (ARCH_AARCH64 OR ARCH_S390X) AND NOT SANITIZE) # Slightly more efficient code can be generated From 0caa34f55e4e3d2de8b03b003e895d2434fceb16 Mon Sep 17 00:00:00 2001 From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> Date: Tue, 25 Apr 2023 15:50:13 +0200 Subject: [PATCH 20/33] Do not randomize prefetch settings for debug build (#49134) * Do not randomize prefetch settings for debug build * Update tests/clickhouse-test --------- Co-authored-by: Alexander Tokmakov Co-authored-by: Alexander Tokmakov --- tests/clickhouse-test | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/tests/clickhouse-test b/tests/clickhouse-test index eb5faa1ffb5..7c492a9b467 100755 --- a/tests/clickhouse-test +++ b/tests/clickhouse-test @@ -579,10 +579,17 @@ class SettingsRandomizer: } @staticmethod - def get_random_settings(): + def get_random_settings(args): random_settings = [] + is_debug = BuildFlags.DEBUG in args.build_flags for setting, generator in SettingsRandomizer.settings.items(): - random_settings.append(f"{setting}={generator()}") + if ( + is_debug + and setting == "allow_prefetched_read_pool_for_remote_filesystem" + ): + random_settings.append(f"{setting}=0") + else: + random_settings.append(f"{setting}={generator()}") return random_settings @@ -817,7 +824,7 @@ class TestCase: ) if self.randomize_settings: - self.random_settings = SettingsRandomizer.get_random_settings() + self.random_settings = SettingsRandomizer.get_random_settings(args) if self.randomize_merge_tree_settings: self.merge_tree_random_settings = ( From 2495e30b7d4a181107d07961cb7ae731e8a72f32 Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 25 Apr 2023 18:15:07 +0200 Subject: [PATCH 21/33] Bump sysroot --- contrib/sysroot | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/sysroot b/contrib/sysroot index f0081b2649b..e0d1b64da66 160000 --- a/contrib/sysroot +++ b/contrib/sysroot @@ -1 +1 @@ -Subproject commit f0081b2649b94837855f3bc7d05ef326b100bad8 +Subproject commit e0d1b64da666afbfaa6f1ee0487c33f3fd2cd5cb From ed8ca018bd38e469cede86d1929f860079be8160 Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Tue, 25 Apr 2023 19:11:20 +0200 Subject: [PATCH 22/33] Add a note regarding private/public repo to logs --- tests/ci/workflow_jobs_lambda/app.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/ci/workflow_jobs_lambda/app.py b/tests/ci/workflow_jobs_lambda/app.py index 6ac75bcb8e5..49d475d11dc 100644 --- a/tests/ci/workflow_jobs_lambda/app.py +++ b/tests/ci/workflow_jobs_lambda/app.py @@ -284,7 +284,9 @@ def handler(event: dict, _: Any) -> dict: wf_job["runner_group_name"] or "", # nullable repo["full_name"], ) - logging.info("Got the next event: %s", workflow_job) + logging.info( + "Got the next event (private_repo=%s): %s", repo["private"], workflow_job + ) if repo["private"]: workflow_job.anonimyze() From 9b0e4835cd583f96809a3553d50d39aab7358704 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 25 Apr 2023 22:57:34 +0200 Subject: [PATCH 23/33] Add changelog for 23.4 --- CHANGELOG.md | 150 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 150 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 47320208f02..482ca3cec73 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,4 +1,5 @@ ### Table of Contents +**[ClickHouse release v23.4, 2023-04-26](#234)**
**[ClickHouse release v23.3 LTS, 2023-03-30](#233)**
**[ClickHouse release v23.2, 2023-02-23](#232)**
**[ClickHouse release v23.1, 2023-01-25](#231)**
@@ -6,6 +7,155 @@ # 2023 Changelog +### ClickHouse release 23.4 LTS, 2023-04-26 + +#### Backward Incompatible Change +* Formatter '%M' in function formatDateTime() now prints the month name instead of the minutes. This makes the behavior consistent with MySQL. The previous behavior can be restored using setting "formatdatetime_parsedatetime_m_is_month_name = 0". [#47246](https://github.com/ClickHouse/ClickHouse/pull/47246) ([Robert Schulze](https://github.com/rschu1ze)). +* This change makes sense only if you are using the virtual filesystem cache. If `path` in the virtual filesystem cache configuration is not empty and is not an absolute path, then it will be put in `/caches/`. [#48784](https://github.com/ClickHouse/ClickHouse/pull/48784) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Primary/secondary indices and sorting keys with identical expressions are now rejected. This behavior can be disabled using setting `allow_suspicious_indices`. [#48536](https://github.com/ClickHouse/ClickHouse/pull/48536) ([凌涛](https://github.com/lingtaolf)). + +#### New Feature +* Support new aggregate function `quantileGK`/`quantilesGK`, like [approx_percentile](https://spark.apache.org/docs/latest/api/sql/index.html#approx_percentile) in spark. Greenwald-Khanna algorithm refer to http://infolab.stanford.edu/~datar/courses/cs361a/papers/quantiles.pdf. [#46428](https://github.com/ClickHouse/ClickHouse/pull/46428) ([李扬](https://github.com/taiyang-li)). +* Add a statement `SHOW COLUMNS` which shows distilled information from system.columns. [#48017](https://github.com/ClickHouse/ClickHouse/pull/48017) ([Robert Schulze](https://github.com/rschu1ze)). +* Added `LIGHTWEIGHT` and `PULL` modifiers for `SYSTEM SYNC REPLICA` query. `LIGHTWEIGHT` version waits for fetches and drop-ranges only (merges and mutations are ignored). `PULL` version pulls new entries from ZooKeeper and does not wait for them. Fixes [#47794](https://github.com/ClickHouse/ClickHouse/issues/47794). [#48085](https://github.com/ClickHouse/ClickHouse/pull/48085) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Add `kafkaMurmurHash` function for compatibility with Kafka DefaultPartitioner. Closes [#47834](https://github.com/ClickHouse/ClickHouse/issues/47834). [#48185](https://github.com/ClickHouse/ClickHouse/pull/48185) ([Nikolay Degterinsky](https://github.com/evillique)). +* Allow to easily create a user with the same grants as the current user by using `GRANT CURRENT GRANTS`. [#48262](https://github.com/ClickHouse/ClickHouse/pull/48262) ([pufit](https://github.com/pufit)). +* Add statistical aggregate function `kolmogorovSmirnovTest`. Close [#48228](https://github.com/ClickHouse/ClickHouse/issues/48228). [#48325](https://github.com/ClickHouse/ClickHouse/pull/48325) ([FFFFFFFHHHHHHH](https://github.com/FFFFFFFHHHHHHH)). +* Added a `lost_part_count` column to the `system.replicas` table. The column value shows the total number of lost parts in the corresponding table. Value is stored in zookeeper and can be used instead of not persistent `ReplicatedDataLoss` profile event for monitoring. [#48526](https://github.com/ClickHouse/ClickHouse/pull/48526) ([Sergei Trifonov](https://github.com/serxa)). +* Add `soundex` function for compatibility. Closes [#39880](https://github.com/ClickHouse/ClickHouse/issues/39880). [#48567](https://github.com/ClickHouse/ClickHouse/pull/48567) ([FriendLey](https://github.com/FriendLey)). +* Support `Map` type for JSONExtract. [#48629](https://github.com/ClickHouse/ClickHouse/pull/48629) ([李扬](https://github.com/taiyang-li)). +* Add `PrettyJSONEachRow` format to output pretty JSON with new line delimieters and 4 space indents. [#48898](https://github.com/ClickHouse/ClickHouse/pull/48898) ([Kruglov Pavel](https://github.com/Avogar)). +* Add `ParquetMetadata` input format to read Parquet file metadata. [#48911](https://github.com/ClickHouse/ClickHouse/pull/48911) ([Kruglov Pavel](https://github.com/Avogar)). +* Add `extractKeyValuePairs` function to extract key value pairs from strings. Input strings might contain noise (i.e log files / do not need to be 100% formatted in key-value-pair format), the algorithm will look for key value pairs matching the arguments passed to the function. As of now, function accepts the following arguments: `data_column` (mandatory), `key_value_pair_delimiter` (defaults to `:`), `pair_delimiters` (defaults to `\space \, \;`) and `quoting_character` (defaults to double quotes). [#43606](https://github.com/ClickHouse/ClickHouse/pull/43606) ([Arthur Passos](https://github.com/arthurpassos)). +* Functions replaceOne(), replaceAll(), replaceRegexpOne() and replaceRegexpAll() can now be called with non-const pattern and replacement arguments. [#46589](https://github.com/ClickHouse/ClickHouse/pull/46589) ([Robert Schulze](https://github.com/rschu1ze)). +* Added functions to work with columns of type `Map`: `mapConcat`, `mapSort`, `mapExists`. [#48071](https://github.com/ClickHouse/ClickHouse/pull/48071) ([Anton Popov](https://github.com/CurtizJ)). + +#### Performance Improvement +* Reading files in `Parquet` format is now much faster. IO and decoding are parallelized (controlled by `max_threads` setting), and only required data ranges are read. [#47964](https://github.com/ClickHouse/ClickHouse/pull/47964) ([Michael Kolupaev](https://github.com/al13n321)). +* If we run a mutation with IN (subquery) like this: `ALTER TABLE t UPDATE col='new value' WHERE id IN (SELECT id FROM huge_table)` and the table `t` has multiple parts than for each part a set for subquery `SELECT id FROM huge_table` is built in memory. And if there are many parts then this might consume a lot of memory (and lead to an OOM) and CPU. The solution is to introduce a short-lived cache of sets that are currently being built by mutation tasks. If another task of the same mutation is executed concurrently it can lookup the set in the cache, wait for it to be built and reuse it. [#46835](https://github.com/ClickHouse/ClickHouse/pull/46835) ([Alexander Gololobov](https://github.com/davenger)). +* Only check dependencies if necessary when applying `ALTER TABLE` queries. [#48062](https://github.com/ClickHouse/ClickHouse/pull/48062) ([Raúl Marín](https://github.com/Algunenano)). +* Optimize function `mapUpdate`. [#48118](https://github.com/ClickHouse/ClickHouse/pull/48118) ([Anton Popov](https://github.com/CurtizJ)). +* Now an internal query to local replica is sent explicitly and data from it received through loopback interface. Setting `prefer_localhost_replica` is not respected for parallel replicas. This is needed for better scheduling and makes the code cleaner: the initiator is only responsible for coordinating of the reading process and merging results, continiously answering for requests while all the secondary queries read the data. Note: Using loopback interface is not so performant, otherwise some replicas could starve for tasks which could lead to even slower query execution and not utilizing all possible resources. The initialization of the coordinator is now even more lazy. All incoming requests contain the information about the reading algorithm we initialize the coordinator with it when first request comes. If any replica will decide to read with different algorithm - an exception will be thrown and a query will be aborted. [#48246](https://github.com/ClickHouse/ClickHouse/pull/48246) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Do not build set for the right side of `IN` clause with subquery when it is used only for analysis of skip indexes and they are disabled by setting (`use_skip_indexes=0`). Previously it might affect the performance of queries. [#48299](https://github.com/ClickHouse/ClickHouse/pull/48299) ([Anton Popov](https://github.com/CurtizJ)). +* Query processing is parallelized right after reading `FROM file(...)`. Related to [#38755](https://github.com/ClickHouse/ClickHouse/issues/38755). [#48525](https://github.com/ClickHouse/ClickHouse/pull/48525) ([Igor Nikonov](https://github.com/devcrafter)). +* Query processing is parallelized right after reading from a data source. Affected data sources are mostly simple or external storages like table functions `url`, `file`. [#48727](https://github.com/ClickHouse/ClickHouse/pull/48727) ([Igor Nikonov](https://github.com/devcrafter)). +* Lowered contention of ThreadPool mutex (may increase performance for a huge amount of small jobs). [#48750](https://github.com/ClickHouse/ClickHouse/pull/48750) ([Sergei Trifonov](https://github.com/serxa)). +* Reduce memory usage for multiple `ALTER DELETE` mutations. [#48522](https://github.com/ClickHouse/ClickHouse/pull/48522) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Remove the excessive connection attempts if the `skip_unavailable_shards` setting is enabled. [#48771](https://github.com/ClickHouse/ClickHouse/pull/48771) ([Azat Khuzhin](https://github.com/azat)). + +#### Experimental Feature +* Entries in the query cache are now squashed to max_block_size and compressed. [#45912](https://github.com/ClickHouse/ClickHouse/pull/45912) ([Robert Schulze](https://github.com/rschu1ze)). +* It is now possible to define per-user quotas in the query cache. [#48284](https://github.com/ClickHouse/ClickHouse/pull/48284) ([Robert Schulze](https://github.com/rschu1ze)). +* Some fixes for parallel replicas [#48433](https://github.com/ClickHouse/ClickHouse/pull/48433) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Implement zero-copy-replication (an experimental feature) on encrypted disks. [#48741](https://github.com/ClickHouse/ClickHouse/pull/48741) ([Vitaly Baranov](https://github.com/vitlibar)). + +#### Improvement +* Increase default value for `connect_timeout_with_failover_ms` to 1000 ms (because of adding async connections in https://github.com/ClickHouse/ClickHouse/pull/47229) . Closes [#5188](https://github.com/ClickHouse/ClickHouse/issues/5188). [#49009](https://github.com/ClickHouse/ClickHouse/pull/49009) ([Kruglov Pavel](https://github.com/Avogar)). +* Several improvements around data lakes: - Make `Iceberg` work with non-partitioned data. - Support `Iceberg` format version v2 (previously only v1 was supported) - Support reading partitioned data for `DeltaLake`/`Hudi` - Faster reading of `DeltaLake` metadata by using Delta's checkpoint files - Fixed incorrect `Hudi` reads: previously it incorrectly chose which data to read and therefore was able to read correctly only small size tables - Made these engines to pickup updates of changed data (previously the state was set on table creation) - Make proper testing for `Iceberg`/`DeltaLake`/`Hudi` using spark. [#47307](https://github.com/ClickHouse/ClickHouse/pull/47307) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Add async connection to socket and async writing to socket. Make creating connections and sending query/external tables async across shards. Refactor code with fibers. Closes [#46931](https://github.com/ClickHouse/ClickHouse/issues/46931). We will be able to increase `connect_timeout_with_failover_ms` by default after this PR (https://github.com/ClickHouse/ClickHouse/issues/5188). [#47229](https://github.com/ClickHouse/ClickHouse/pull/47229) ([Kruglov Pavel](https://github.com/Avogar)). +* Support config sections `keeper`/`keeper_server` as an alternative to `zookeeper`. Close [#34766](https://github.com/ClickHouse/ClickHouse/issues/34766) , [#34767](https://github.com/ClickHouse/ClickHouse/issues/34767). [#35113](https://github.com/ClickHouse/ClickHouse/pull/35113) ([李扬](https://github.com/taiyang-li)). +* It is possible to set _secure_ flag in named_collections for a dictionary with a ClickHouse table source. Addresses [#38450](https://github.com/ClickHouse/ClickHouse/issues/38450) . [#46323](https://github.com/ClickHouse/ClickHouse/pull/46323) ([Ilya Golshtein](https://github.com/ilejn)). +* `bitCount` function support `FixedString` and `String` data type. [#49044](https://github.com/ClickHouse/ClickHouse/pull/49044) ([flynn](https://github.com/ucasfl)). +* Added configurable retries for all operations with [Zoo]Keeper for Backup queries. [#47224](https://github.com/ClickHouse/ClickHouse/pull/47224) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Enable `use_environment_credentials` for S3 by default, so the entire provider chain is constructed by default. [#47397](https://github.com/ClickHouse/ClickHouse/pull/47397) ([Antonio Andelic](https://github.com/antonio2368)). +* Currently, the JSON_VALUE function is similar as spark's get_json_object function, which support to get value from json string by a path like '$.key'. But still has something different - 1. in spark's get_json_object will return null while the path is not exist, but in JSON_VALUE will return empty string; - 2. in spark's get_json_object will return a complext type value, such as a json object/array value, but in JSON_VALUE will return empty string. [#47494](https://github.com/ClickHouse/ClickHouse/pull/47494) ([KevinyhZou](https://github.com/KevinyhZou)). +* For `use_structure_from_insertion_table_in_table_functions` more flexible insert table structure propagation to table function. Fixed an issue with name mapping and using virtual columns. No more need for 'auto' setting. [#47962](https://github.com/ClickHouse/ClickHouse/pull/47962) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Do not continue retrying to connect to ZK if the query is killed or over limits. [#47985](https://github.com/ClickHouse/ClickHouse/pull/47985) ([Raúl Marín](https://github.com/Algunenano)). +* Support Enum output/input in `BSONEachRow`, allow all map key types and avoid extra calculations on output. [#48122](https://github.com/ClickHouse/ClickHouse/pull/48122) ([Kruglov Pavel](https://github.com/Avogar)). +* Support more ClickHouse types in `ORC`/`Arrow`/`Parquet` formats: Enum(8|16), (U)Int(128|256), Decimal256 (for ORC), allow reading IPv4 from Int32 values (ORC outputs IPv4 as Int32 and we couldn't read it back), fix reading Nullable(IPv6) from binary data for `ORC`. [#48126](https://github.com/ClickHouse/ClickHouse/pull/48126) ([Kruglov Pavel](https://github.com/Avogar)). +* Add columns `perform_ttl_move_on_insert`, `load_balancing` for table `system.storage_policies`, modify column `volume_type` type to `Enum8`. [#48167](https://github.com/ClickHouse/ClickHouse/pull/48167) ([lizhuoyu5](https://github.com/lzydmxy)). +* Added support for `BACKUP ALL` command which backups all tables and databases, including temporary and system ones. [#48189](https://github.com/ClickHouse/ClickHouse/pull/48189) ([Vitaly Baranov](https://github.com/vitlibar)). +* Function mapFromArrays supports `Map` type as an input. [#48207](https://github.com/ClickHouse/ClickHouse/pull/48207) ([李扬](https://github.com/taiyang-li)). +* The output of some SHOW PROCESSLIST is now sorted. [#48241](https://github.com/ClickHouse/ClickHouse/pull/48241) ([Robert Schulze](https://github.com/rschu1ze)). +* Per-query/per-server throttling for remote IO/local IO/BACKUPs (server settings: `max_remote_read_network_bandwidth_for_server`, `max_remote_write_network_bandwidth_for_server`, `max_local_read_bandwidth_for_server`, `max_local_write_bandwidth_for_server`, `max_backup_bandwidth_for_server`, settings: `max_remote_read_network_bandwidth`, `max_remote_write_network_bandwidth`, `max_local_read_bandwidth`, `max_local_write_bandwidth`, `max_backup_bandwidth`). [#48242](https://github.com/ClickHouse/ClickHouse/pull/48242) ([Azat Khuzhin](https://github.com/azat)). +* Support more types in `CapnProto` format: Map, (U)Int(128|256), Decimal(128|256). Allow integer conversions during input/output. [#48257](https://github.com/ClickHouse/ClickHouse/pull/48257) ([Kruglov Pavel](https://github.com/Avogar)). +* Don't throw CURRENT_WRITE_BUFFER_IS_EXHAUSTED for normal behaviour. [#48288](https://github.com/ClickHouse/ClickHouse/pull/48288) ([Raúl Marín](https://github.com/Algunenano)). +* Add new setting `keeper_map_strict_mode` which enforces extra guarantees on operations made on top of `KeeperMap` tables. [#48293](https://github.com/ClickHouse/ClickHouse/pull/48293) ([Antonio Andelic](https://github.com/antonio2368)). +* Check primary key type for simple dictionary is native unsigned integer type Add setting `check_dictionary_primary_key ` for compatibility(set `check_dictionary_primary_key =false` to disable checking). [#48335](https://github.com/ClickHouse/ClickHouse/pull/48335) ([lizhuoyu5](https://github.com/lzydmxy)). +* Don't replicate mutations for `KeeperMap` because it's unnecessary. [#48354](https://github.com/ClickHouse/ClickHouse/pull/48354) ([Antonio Andelic](https://github.com/antonio2368)). +* Allow write/read unnamed tuple as nested Message in Protobuf format. Tuple elements and Message fields are mathced by position. [#48390](https://github.com/ClickHouse/ClickHouse/pull/48390) ([Kruglov Pavel](https://github.com/Avogar)). +* Support `additional_table_filters` and `additional_result_filter` settings in the new planner. Also, add a documentation entry for `additional_result_filter`. [#48405](https://github.com/ClickHouse/ClickHouse/pull/48405) ([Dmitry Novik](https://github.com/novikd)). +* `parseDateTime` now understands format string '%f' (fractional seconds). [#48420](https://github.com/ClickHouse/ClickHouse/pull/48420) ([Robert Schulze](https://github.com/rschu1ze)). +* Format string "%f" in formatDateTime() now prints "000000" if the formatted value has no fractional seconds, the previous behavior (single zero) can be restored using setting "formatdatetime_f_prints_single_zero = 1". [#48422](https://github.com/ClickHouse/ClickHouse/pull/48422) ([Robert Schulze](https://github.com/rschu1ze)). +* Don't replicate DELETE and TRUNCATE for KeeperMap. [#48434](https://github.com/ClickHouse/ClickHouse/pull/48434) ([Antonio Andelic](https://github.com/antonio2368)). +* Generate valid Decimals and Bools in generateRandom function. [#48436](https://github.com/ClickHouse/ClickHouse/pull/48436) ([Kruglov Pavel](https://github.com/Avogar)). +* Allow trailing commas in expression list of SELECT query, for example `SELECT a, b, c, FROM table`. Closes [#37802](https://github.com/ClickHouse/ClickHouse/issues/37802). [#48438](https://github.com/ClickHouse/ClickHouse/pull/48438) ([Nikolay Degterinsky](https://github.com/evillique)). +* Override `CLICKHOUSE_USER` and `CLICKHOUSE_PASSWORD` environment variables with `--user` and `--password` client parameters. Closes [#38909](https://github.com/ClickHouse/ClickHouse/issues/38909). [#48440](https://github.com/ClickHouse/ClickHouse/pull/48440) ([Nikolay Degterinsky](https://github.com/evillique)). +* Added retries to loading of data parts in `MergeTree` tables in case of retryable errors. [#48442](https://github.com/ClickHouse/ClickHouse/pull/48442) ([Anton Popov](https://github.com/CurtizJ)). +* Add support for `Date`, `Date32`, `DateTime`, `DateTime64` data types to `arrayMin`, `arrayMax`, `arrayDifference` functions. Closes [#21645](https://github.com/ClickHouse/ClickHouse/issues/21645). [#48445](https://github.com/ClickHouse/ClickHouse/pull/48445) ([Nikolay Degterinsky](https://github.com/evillique)). +* Add support for `{server_uuid}` macro. It is useful for identifying replicas in autoscaled clusters when new replicas are constantly added and removed in runtime. This closes [#48554](https://github.com/ClickHouse/ClickHouse/issues/48554). [#48563](https://github.com/ClickHouse/ClickHouse/pull/48563) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* The installation script will create a hard link instead of copying if it is possible. [#48578](https://github.com/ClickHouse/ClickHouse/pull/48578) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Support `SHOW TABLE` syntax meaning the same as `SHOW CREATE TABLE`. Closes [#48580](https://github.com/ClickHouse/ClickHouse/issues/48580). [#48591](https://github.com/ClickHouse/ClickHouse/pull/48591) ([flynn](https://github.com/ucasfl)). +* HTTP temporary buffers now support working by evicting data from the virtual filesystem cache. [#48664](https://github.com/ClickHouse/ClickHouse/pull/48664) ([Vladimir C](https://github.com/vdimir)). +* Make Schema inference works for `CREATE AS SELECT`. Closes [#47599](https://github.com/ClickHouse/ClickHouse/issues/47599). [#48679](https://github.com/ClickHouse/ClickHouse/pull/48679) ([flynn](https://github.com/ucasfl)). +* Added a `replicated_max_mutations_in_one_entry` setting for `ReplicatedMergeTree` that allows limiting the number of mutation commands per one `MUTATE_PART` entry (default is 10000). [#48731](https://github.com/ClickHouse/ClickHouse/pull/48731) ([Alexander Tokmakov](https://github.com/tavplubix)). +* In AggregateFunction types, don't count unused arena bytes as `read_bytes`. [#48745](https://github.com/ClickHouse/ClickHouse/pull/48745) ([Raúl Marín](https://github.com/Algunenano)). +* Fix some MySQL-related settings not being handled with the MySQL dictionary source + named collection. Closes [#48402](https://github.com/ClickHouse/ClickHouse/issues/48402). [#48759](https://github.com/ClickHouse/ClickHouse/pull/48759) ([Kseniia Sumarokova](https://github.com/kssenii)). +* If a user set `max_single_part_upload_size` to a very large value, it can lead to a crash due to a bug in the AWS S3 SDK. This fixes [#47679](https://github.com/ClickHouse/ClickHouse/issues/47679). [#48816](https://github.com/ClickHouse/ClickHouse/pull/48816) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix data race in `RabbitMQ` ([report](https://pastila.nl/?004f7100/de1505289ab5bb355e67ebe6c7cc8707)), refactor the code. [#48845](https://github.com/ClickHouse/ClickHouse/pull/48845) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Add aliases `name` and `part_name` form `system.parts` and `system.part_log`. Closes [#48718](https://github.com/ClickHouse/ClickHouse/issues/48718). [#48850](https://github.com/ClickHouse/ClickHouse/pull/48850) ([sichenzhao](https://github.com/sichenzhao)). +* Functions "arrayDifferenceSupport()", "arrayCumSum()" and "arrayCumSumNonNegative()" now support input arrays of wide integer types (U)Int128/256. [#48866](https://github.com/ClickHouse/ClickHouse/pull/48866) ([cluster](https://github.com/infdahai)). +* Multi-line history in clickhouse-client is now no longer padded. This makes pasting more natural. [#48870](https://github.com/ClickHouse/ClickHouse/pull/48870) ([Joanna Hulboj](https://github.com/jh0x)). +* Implement a slight improvement for the rare case when ClickHouse is run inside LXC and LXCFS is used. The LXCFS has an issue: sometimes it returns an error "Transport endpoint is not connected" on reading from the file inside `/proc`. This error was correctly logged into ClickHouse's server log. We have additionally workaround this issue by reopening a file. This is a minuscule change. [#48922](https://github.com/ClickHouse/ClickHouse/pull/48922) ([Real](https://github.com/RunningXie)). +* Improve memory accounting for prefetches. Randomise prefetch settings In CI. [#48973](https://github.com/ClickHouse/ClickHouse/pull/48973) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Correctly set headers for native copy operations on GCS. [#48981](https://github.com/ClickHouse/ClickHouse/pull/48981) ([Antonio Andelic](https://github.com/antonio2368)). +* Add support for specifying setting names in the command line with dashes instead of underscores, for example, `--max-threads` instead of `--max_threads`. Additionally, support Unicode dash characters like `—` instead of `--` - this is useful when you communicate with a team in another company, and a manager from that team copy-pasted code from MS Word. [#48985](https://github.com/ClickHouse/ClickHouse/pull/48985) ([alekseygolub](https://github.com/alekseygolub)). +* Add fallback to password authentication when authentication with SSL user certificate has failed. Closes [#48974](https://github.com/ClickHouse/ClickHouse/issues/48974). [#48989](https://github.com/ClickHouse/ClickHouse/pull/48989) ([Nikolay Degterinsky](https://github.com/evillique)). +* Improve the embedded dashboard. Close [#46671](https://github.com/ClickHouse/ClickHouse/issues/46671). [#49036](https://github.com/ClickHouse/ClickHouse/pull/49036) ([Kevin Zhang](https://github.com/Kinzeng)). +* Add profile events for log messages, so you can easily see the count of log messages by severity. [#49042](https://github.com/ClickHouse/ClickHouse/pull/49042) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* In previous versions, the `LineAsString` format worked inconsistently when the parallel parsing was enabled or not, in presence of DOS or MacOS Classic line breaks. This closes [#49039](https://github.com/ClickHouse/ClickHouse/issues/49039). [#49052](https://github.com/ClickHouse/ClickHouse/pull/49052) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* The exception message about the unparsed query parameter will also tell about the name of the parameter. Reimplement [#48878](https://github.com/ClickHouse/ClickHouse/issues/48878). Close [#48772](https://github.com/ClickHouse/ClickHouse/issues/48772). [#49061](https://github.com/ClickHouse/ClickHouse/pull/49061) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + +#### Build/Testing/Packaging Improvement +* Update time zones. The following were updated: Africa/Cairo, Africa/Casablanca, Africa/El_Aaiun, America/Bogota, America/Cambridge_Bay, America/Ciudad_Juarez, America/Godthab, America/Inuvik, America/Iqaluit, America/Nuuk, America/Ojinaga, America/Pangnirtung, America/Rankin_Inlet, America/Resolute, America/Whitehorse, America/Yellowknife, Asia/Gaza, Asia/Hebron, Asia/Kuala_Lumpur, Asia/Singapore, Canada/Yukon, Egypt, Europe/Kirov, Europe/Volgograd, Singapore. [#48572](https://github.com/ClickHouse/ClickHouse/pull/48572) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Reduce the number of dependencies in the header files to speed up the build. [#47984](https://github.com/ClickHouse/ClickHouse/pull/47984) ([Dmitry Novik](https://github.com/novikd)). +* Randomize compression of marks and indices in tests. [#48286](https://github.com/ClickHouse/ClickHouse/pull/48286) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Bump internal ZSTD from 1.5.4 to 1.5.5. [#46797](https://github.com/ClickHouse/ClickHouse/pull/46797) ([Robert Schulze](https://github.com/rschu1ze)). +* Randomize vertical merges from compact to wide parts in tests. [#48287](https://github.com/ClickHouse/ClickHouse/pull/48287) ([Raúl Marín](https://github.com/Algunenano)). +* Support for CRC32 checksum in HDFS. Fix performance issues. [#48614](https://github.com/ClickHouse/ClickHouse/pull/48614) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Remove remainders of GCC support. [#48671](https://github.com/ClickHouse/ClickHouse/pull/48671) ([Robert Schulze](https://github.com/rschu1ze)). +* Add CI run with new analyzer infrastructure enabled. [#48719](https://github.com/ClickHouse/ClickHouse/pull/48719) ([Dmitry Novik](https://github.com/novikd)). + +#### Bug Fix (user-visible misbehavior in an official stable release) + +* Fix system.query_views_log for MVs that are pushed from background threads [#46668](https://github.com/ClickHouse/ClickHouse/pull/46668) ([Azat Khuzhin](https://github.com/azat)). +* Fix several `RENAME COLUMN` bugs [#46946](https://github.com/ClickHouse/ClickHouse/pull/46946) ([alesapin](https://github.com/alesapin)). +* Fix minor hiliting issues in clickhouse-format [#47610](https://github.com/ClickHouse/ClickHouse/pull/47610) ([Natasha Murashkina](https://github.com/murfel)). +* Fix a bug in LLVM's libc++ leading to a crash for uploading parts to S3 which size is greater then INT_MAX [#47693](https://github.com/ClickHouse/ClickHouse/pull/47693) ([Azat Khuzhin](https://github.com/azat)). +* Fix overflow in the `sparkbar` function [#48121](https://github.com/ClickHouse/ClickHouse/pull/48121) ([Vladimir C](https://github.com/vdimir)). +* Fix race in S3 [#48190](https://github.com/ClickHouse/ClickHouse/pull/48190) ([Anton Popov](https://github.com/CurtizJ)). +* Disable JIT for aggregate functions due to inconsistent behavior [#48195](https://github.com/ClickHouse/ClickHouse/pull/48195) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix alter formatting (minor) [#48289](https://github.com/ClickHouse/ClickHouse/pull/48289) ([Natasha Murashkina](https://github.com/murfel)). +* Fix cpu usage in RabbitMQ (was worsened in 23.2 after [#44404](https://github.com/ClickHouse/ClickHouse/issues/44404)) [#48311](https://github.com/ClickHouse/ClickHouse/pull/48311) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix crash in EXPLAIN PIPELINE for Merge over Distributed [#48320](https://github.com/ClickHouse/ClickHouse/pull/48320) ([Azat Khuzhin](https://github.com/azat)). +* Fix serializing LowCardinality as Arrow dictionary [#48361](https://github.com/ClickHouse/ClickHouse/pull/48361) ([Kruglov Pavel](https://github.com/Avogar)). +* Reset downloader for cache file segment in TemporaryFileStream [#48386](https://github.com/ClickHouse/ClickHouse/pull/48386) ([Vladimir C](https://github.com/vdimir)). +* Fix possible SYSTEM SYNC REPLICA stuck in case of DROP/REPLACE PARTITION [#48391](https://github.com/ClickHouse/ClickHouse/pull/48391) ([Azat Khuzhin](https://github.com/azat)). +* Fix a startup error when loading a distributed table that depends on a dictionary [#48419](https://github.com/ClickHouse/ClickHouse/pull/48419) ([MikhailBurdukov](https://github.com/MikhailBurdukov)). +* Don't check dependencies when renaming system tables automatically [#48431](https://github.com/ClickHouse/ClickHouse/pull/48431) ([Raúl Marín](https://github.com/Algunenano)). +* Update only affected rows in KeeperMap storage [#48435](https://github.com/ClickHouse/ClickHouse/pull/48435) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix possible segfault in the VFS cache [#48469](https://github.com/ClickHouse/ClickHouse/pull/48469) ([Kseniia Sumarokova](https://github.com/kssenii)). +* `toTimeZone` function throws an error when no constant string is provided [#48471](https://github.com/ClickHouse/ClickHouse/pull/48471) ([Jordi Villar](https://github.com/jrdi)). +* Fix logical error with IPv4 in Protobuf, add support for Date32 [#48486](https://github.com/ClickHouse/ClickHouse/pull/48486) ([Kruglov Pavel](https://github.com/Avogar)). +* "changed" flag in system.settings was calculated incorrectly for settings with multiple values [#48516](https://github.com/ClickHouse/ClickHouse/pull/48516) ([MikhailBurdukov](https://github.com/MikhailBurdukov)). +* Fix storage `Memory` with enabled compression [#48517](https://github.com/ClickHouse/ClickHouse/pull/48517) ([Anton Popov](https://github.com/CurtizJ)). +* Fix bracketed-paste mode messing up password input in the event of client reconnection [#48528](https://github.com/ClickHouse/ClickHouse/pull/48528) ([Michael Kolupaev](https://github.com/al13n321)). +* Fix nested map for keys of IP and UUID types [#48556](https://github.com/ClickHouse/ClickHouse/pull/48556) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Fix an uncaught exception in case of parallel loader for hashed dictionaries [#48571](https://github.com/ClickHouse/ClickHouse/pull/48571) ([Azat Khuzhin](https://github.com/azat)). +* The `groupArray` aggregate function correctly works for empty result over nullable types [#48593](https://github.com/ClickHouse/ClickHouse/pull/48593) ([lgbo](https://github.com/lgbo-ustc)). +* Fix bug in Keeper when a node is not created with scheme `auth` in ACL sometimes. [#48595](https://github.com/ClickHouse/ClickHouse/pull/48595) ([Aleksei Filatov](https://github.com/aalexfvk)). +* Allow IPv4 comparison operators with UInt [#48611](https://github.com/ClickHouse/ClickHouse/pull/48611) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Fix possible error from cache [#48636](https://github.com/ClickHouse/ClickHouse/pull/48636) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Async inserts with empty data will no longer throw exception. [#48663](https://github.com/ClickHouse/ClickHouse/pull/48663) ([Anton Popov](https://github.com/CurtizJ)). +* Fix table dependencies in case of failed RENAME TABLE [#48683](https://github.com/ClickHouse/ClickHouse/pull/48683) ([Azat Khuzhin](https://github.com/azat)). +* If the primary key has duplicate columns (which is only possible for projections), in previous versions it might lead to a bug [#48838](https://github.com/ClickHouse/ClickHouse/pull/48838) ([Amos Bird](https://github.com/amosbird)). +* Fix for a race condition in ZooKeeper when joining send_thread/receive_thread [#48849](https://github.com/ClickHouse/ClickHouse/pull/48849) ([Alexander Gololobov](https://github.com/davenger)). +* Fix unexpected part name error when trying to drop a ignored detached part with zero copy replication [#48862](https://github.com/ClickHouse/ClickHouse/pull/48862) ([Michael Lex](https://github.com/mlex)). +* Fix reading `Date32` Parquet/Arrow column into not a `Date32` column [#48864](https://github.com/ClickHouse/ClickHouse/pull/48864) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix `UNKNOWN_IDENTIFIER` error while selecting from table with row policy and column with dots [#48976](https://github.com/ClickHouse/ClickHouse/pull/48976) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix aggregation by empty nullable strings [#48999](https://github.com/ClickHouse/ClickHouse/pull/48999) ([LiuNeng](https://github.com/liuneng1994)). + + ### ClickHouse release 23.3 LTS, 2023-03-30 #### Upgrade Notes From a6664e6b084b89518e6dc08ff6cbd4e813f18024 Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Tue, 25 Apr 2023 18:15:09 +0200 Subject: [PATCH 24/33] Add typing, capitalize only the first letter w/o lowering the rest --- tests/ci/report.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/tests/ci/report.py b/tests/ci/report.py index 15d8ff9010e..a40eb559792 100644 --- a/tests/ci/report.py +++ b/tests/ci/report.py @@ -262,17 +262,20 @@ class ReportColorTheme: ColorTheme = Tuple[str, str, str] -def _format_header(header, branch_name, branch_url=None): - result = " ".join([w.capitalize() for w in header.split(" ")]) +def _format_header( + header: str, branch_name: str, branch_url: Optional[str] = None +) -> str: + # Following line does not lower CI->Ci and SQLancer->Sqlancer. It only + # capitalizes the first letter and doesn't touch the rest of the word + result = " ".join([w[0].upper() + w[1:] for w in header.split(" ") if w]) result = result.replace("Clickhouse", "ClickHouse") result = result.replace("clickhouse", "ClickHouse") if "ClickHouse" not in result: - result = "ClickHouse " + result - result += " for " + result = f"ClickHouse {result}" if branch_url: - result += f'{branch_name}' + result = f'{result} for {branch_name}' else: - result += branch_name + result = f"{result} for {branch_name}" return result From 9a7f09bae4e4546ad34f25d2471082bb7099b434 Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Tue, 25 Apr 2023 18:16:29 +0200 Subject: [PATCH 25/33] Avoid splitting check_name into subdirectories --- tests/ci/performance_comparison_check.py | 1 + tests/ci/upload_result_helper.py | 7 ++++--- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/tests/ci/performance_comparison_check.py b/tests/ci/performance_comparison_check.py index d0c84d56496..0da41e0ae82 100644 --- a/tests/ci/performance_comparison_check.py +++ b/tests/ci/performance_comparison_check.py @@ -142,6 +142,7 @@ if __name__ == "__main__": .replace("(", "_") .replace(")", "_") .replace(",", "_") + .replace("/", "_") ) docker_image = get_image_with_version(reports_path, IMAGE_NAME) diff --git a/tests/ci/upload_result_helper.py b/tests/ci/upload_result_helper.py index b988e240b0e..150af7aff4a 100644 --- a/tests/ci/upload_result_helper.py +++ b/tests/ci/upload_result_helper.py @@ -59,9 +59,10 @@ def upload_results( additional_files: List[str], check_name: str, ) -> str: - s3_path_prefix = f"{pr_number}/{commit_sha}/" + check_name.lower().replace( - " ", "_" - ).replace("(", "_").replace(")", "_").replace(",", "_") + normalized_check_name = check_name.lower() + for r in ((" ", "_"), ("(", "_"), (")", "_"), (",", "_"), ("/", "_")): + normalized_check_name = normalized_check_name.replace(*r) + s3_path_prefix = f"{pr_number}/{commit_sha}/{normalized_check_name}" additional_urls = process_logs( s3_client, additional_files, s3_path_prefix, test_results ) From f65b5264726ee68e464ddd03389581360b168cb7 Mon Sep 17 00:00:00 2001 From: Aram Peres <6775216+aramperes@users.noreply.github.com> Date: Tue, 25 Apr 2023 20:48:46 -0400 Subject: [PATCH 26/33] Fix an unclosed XML tag in documentation --- docs/en/operations/settings/constraints-on-settings.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/operations/settings/constraints-on-settings.md b/docs/en/operations/settings/constraints-on-settings.md index 83ef46053a4..1895a79cd3e 100644 --- a/docs/en/operations/settings/constraints-on-settings.md +++ b/docs/en/operations/settings/constraints-on-settings.md @@ -40,7 +40,7 @@ If the user tries to violate the constraints an exception is thrown and the sett There are supported few types of constraints: `min`, `max`, `readonly` (with alias `const`) and `changeable_in_readonly`. The `min` and `max` constraints specify upper and lower boundaries for a numeric setting and can be used in combination. The `readonly` or `const` constraint specifies that the user cannot change the corresponding setting at all. The `changeable_in_readonly` constraint type allows user to change the setting within `min`/`max` range even if `readonly` setting is set to 1, otherwise settings are not allow to be changed in `readonly=1` mode. Note that `changeable_in_readonly` is supported only if `settings_constraints_replace_previous` is enabled: ``` xml - true + true ``` From 4eb83a94211dd8c8841f72f32681685fe0272b90 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=88=98=E6=B5=A9=E6=9E=97?= Date: Tue, 25 Apr 2023 18:22:07 +0800 Subject: [PATCH 27/33] fix: add slash for close tag --- programs/server/config.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/programs/server/config.xml b/programs/server/config.xml index 7a75d7251a9..1aeda624db2 100644 --- a/programs/server/config.xml +++ b/programs/server/config.xml @@ -1293,7 +1293,7 @@ - +