From d970cd5b616ed8facab1c95e524940fe1a8502bf Mon Sep 17 00:00:00 2001 From: zhongyuankai <872237106@qq.com> Date: Tue, 9 Jan 2024 18:11:03 +0800 Subject: [PATCH 001/150] drop tables --- docs/en/sql-reference/statements/drop.md | 4 +- src/Interpreters/InterpreterDropQuery.cpp | 92 ++++++++++++++++--- src/Interpreters/InterpreterDropQuery.h | 4 + src/Parsers/ASTDropQuery.cpp | 20 +++- src/Parsers/ASTDropQuery.h | 3 + src/Parsers/ParserCreateQuery.cpp | 2 +- src/Parsers/ParserDropQuery.cpp | 21 ++--- src/Parsers/tests/gtest_dictionary_parser.cpp | 20 +++- .../0_stateless/02961_drop_tables.reference | 8 ++ .../queries/0_stateless/02961_drop_tables.sql | 32 +++++++ 10 files changed, 171 insertions(+), 35 deletions(-) create mode 100644 tests/queries/0_stateless/02961_drop_tables.reference create mode 100644 tests/queries/0_stateless/02961_drop_tables.sql diff --git a/docs/en/sql-reference/statements/drop.md b/docs/en/sql-reference/statements/drop.md index 8ed00f625d6..159ab09ab94 100644 --- a/docs/en/sql-reference/statements/drop.md +++ b/docs/en/sql-reference/statements/drop.md @@ -30,9 +30,11 @@ Also see [UNDROP TABLE](/docs/en/sql-reference/statements/undrop.md) Syntax: ``` sql -DROP [TEMPORARY] TABLE [IF EXISTS] [IF EMPTY] [db.]name [ON CLUSTER cluster] [SYNC] +DROP [TEMPORARY] TABLE [IF EXISTS] [IF EMPTY] [db1.]name_1[, [db2.]name_2, ...] [ON CLUSTER cluster] [SYNC] ``` +Note that deleting multiple tables at the same time is a non-atomic deletion. If a table fails to be deleted, subsequent tables will not be deleted. + ## DROP DICTIONARY Deletes the dictionary. diff --git a/src/Interpreters/InterpreterDropQuery.cpp b/src/Interpreters/InterpreterDropQuery.cpp index 711100b5de1..3285e29d22a 100644 --- a/src/Interpreters/InterpreterDropQuery.cpp +++ b/src/Interpreters/InterpreterDropQuery.cpp @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include @@ -53,16 +54,56 @@ InterpreterDropQuery::InterpreterDropQuery(const ASTPtr & query_ptr_, ContextMut { } - BlockIO InterpreterDropQuery::execute() { auto & drop = query_ptr->as(); + if (drop.database_and_tables) + { + BlockIO res; + auto & database_and_tables = drop.database_and_tables->as(); + for (const auto & child : database_and_tables.children) + { + auto cloned = drop.clone(); + auto & query = cloned->as(); + query.database_and_tables = nullptr; - if (!drop.cluster.empty() && drop.table && !drop.if_empty && !maybeRemoveOnCluster(query_ptr, getContext())) + auto database_and_table = dynamic_pointer_cast(child); + if (database_and_table->name_parts.size() == 2) + { + query.database = std::make_shared(database_and_table->name_parts[0]); + query.table = std::make_shared(database_and_table->name_parts[1]); + } + else + { + query.table = std::make_shared(database_and_table->name_parts[0]); + } + + if (query.database) + query.children.push_back(query.database); + + if (query.table) + query.children.push_back(query.table); + + current_query_ptr = cloned; + res = executeSingleDropQuery(cloned); + } + return res; + } + else + { + current_query_ptr = query_ptr; + return executeSingleDropQuery(query_ptr); + } +} + +BlockIO InterpreterDropQuery::executeSingleDropQuery(const ASTPtr & drop_query_ptr) +{ + auto & drop = drop_query_ptr->as(); + if (!drop.cluster.empty() && drop.table && !drop.if_empty && !maybeRemoveOnCluster(current_query_ptr, getContext())) { DDLQueryOnClusterParams params; params.access_to_check = getRequiredAccessForDDLOnCluster(); - return executeDDLQueryOnCluster(query_ptr, getContext(), params); + return executeDDLQueryOnCluster(current_query_ptr, getContext(), params); } if (getContext()->getSettingsRef().database_atomic_wait_for_drop_and_detach_synchronously) @@ -70,11 +111,11 @@ BlockIO InterpreterDropQuery::execute() if (drop.table) return executeToTable(drop); - else if (drop.database && !drop.cluster.empty() && !maybeRemoveOnCluster(query_ptr, getContext())) + else if (drop.database && !drop.cluster.empty() && !maybeRemoveOnCluster(current_query_ptr, getContext())) { - DDLQueryOnClusterParams params; - params.access_to_check = getRequiredAccessForDDLOnCluster(); - return executeDDLQueryOnCluster(query_ptr, getContext(), params); + DDLQueryOnClusterParams params; + params.access_to_check = getRequiredAccessForDDLOnCluster(); + return executeDDLQueryOnCluster(current_query_ptr, getContext(), params); } else if (drop.database) return executeToDatabase(drop); @@ -82,7 +123,6 @@ BlockIO InterpreterDropQuery::execute() throw Exception(ErrorCodes::LOGICAL_ERROR, "Nothing to drop, both names are empty"); } - void InterpreterDropQuery::waitForTableToBeActuallyDroppedOrDetached(const ASTDropQuery & query, const DatabasePtr & db, const UUID & uuid_to_wait) { if (uuid_to_wait == UUIDHelpers::Nil) @@ -155,7 +195,7 @@ BlockIO InterpreterDropQuery::executeToTableImpl(ContextPtr context_, ASTDropQue table_id.uuid = database->tryGetTableUUID(table_id.table_name); /// Prevents recursive drop from drop database query. The original query must specify a table. - bool is_drop_or_detach_database = !query_ptr->as()->table; + bool is_drop_or_detach_database = !current_query_ptr->as()->table; AccessFlags drop_storage; @@ -178,7 +218,7 @@ BlockIO InterpreterDropQuery::executeToTableImpl(ContextPtr context_, ASTDropQue return executeDDLQueryOnCluster(new_query_ptr, getContext(), params); } - if (database->shouldReplicateQuery(getContext(), query_ptr)) + if (database->shouldReplicateQuery(getContext(), current_query_ptr)) { if (query.kind == ASTDropQuery::Kind::Detach) context_->checkAccess(drop_storage, table_id); @@ -248,7 +288,7 @@ BlockIO InterpreterDropQuery::executeToTableImpl(ContextPtr context_, ASTDropQue auto metadata_snapshot = table->getInMemoryMetadataPtr(); /// Drop table data, don't touch metadata - table->truncate(query_ptr, metadata_snapshot, context_, table_excl_lock); + table->truncate(current_query_ptr, metadata_snapshot, context_, table_excl_lock); } else if (query.kind == ASTDropQuery::Kind::Drop) { @@ -307,7 +347,7 @@ BlockIO InterpreterDropQuery::executeToTemporaryTable(const String & table_name, = table->lockExclusively(getContext()->getCurrentQueryId(), getContext()->getSettingsRef().lock_acquire_timeout); /// Drop table data, don't touch metadata auto metadata_snapshot = table->getInMemoryMetadataPtr(); - table->truncate(query_ptr, metadata_snapshot, getContext(), table_lock); + table->truncate(current_query_ptr, metadata_snapshot, getContext(), table_lock); } else if (kind == ASTDropQuery::Kind::Drop) { @@ -440,11 +480,35 @@ BlockIO InterpreterDropQuery::executeToDatabaseImpl(const ASTDropQuery & query, return {}; } +void InterpreterDropQuery::extendQueryLogElemImpl(DB::QueryLogElement & elem, const DB::ASTPtr & ast, DB::ContextPtr context_) const +{ + auto & drop = ast->as(); + if (drop.database_and_tables) + { + auto & list = drop.database_and_tables->as(); + for (auto it = list.children.begin(); it != list.children.end(); ++it) + { + auto identifier = dynamic_pointer_cast(*it); + if (identifier->name_parts.size() == 2) + { + auto quoted_database = backQuoteIfNeed(identifier->name_parts[0]); + elem.query_databases.insert(quoted_database); + elem.query_tables.insert(quoted_database + "." + backQuoteIfNeed(identifier->name_parts[1])); + } + else + { + auto quoted_database = backQuoteIfNeed(context_->getCurrentDatabase()); + elem.query_databases.insert(quoted_database); + elem.query_tables.insert(quoted_database + "." + backQuoteIfNeed(identifier->name_parts[0])); + } + } + } +} AccessRightsElements InterpreterDropQuery::getRequiredAccessForDDLOnCluster() const { AccessRightsElements required_access; - const auto & drop = query_ptr->as(); + const auto & drop = current_query_ptr->as(); if (!drop.table) { @@ -512,7 +576,7 @@ bool InterpreterDropQuery::supportsTransactions() const { /// Enable only for truncate table with MergeTreeData engine - auto & drop = query_ptr->as(); + auto & drop = current_query_ptr->as(); return drop.cluster.empty() && !drop.temporary diff --git a/src/Interpreters/InterpreterDropQuery.h b/src/Interpreters/InterpreterDropQuery.h index 7ae544a7356..08668f47225 100644 --- a/src/Interpreters/InterpreterDropQuery.h +++ b/src/Interpreters/InterpreterDropQuery.h @@ -29,10 +29,14 @@ public: bool supportsTransactions() const override; + void extendQueryLogElemImpl(QueryLogElement & elem, const ASTPtr & ast, ContextPtr context_) const override; + private: AccessRightsElements getRequiredAccessForDDLOnCluster() const; ASTPtr query_ptr; + ASTPtr current_query_ptr; + BlockIO executeSingleDropQuery(const ASTPtr & drop_query_ptr); BlockIO executeToDatabase(const ASTDropQuery & query); BlockIO executeToDatabaseImpl(const ASTDropQuery & query, DatabasePtr & database, std::vector & uuids_to_wait); diff --git a/src/Parsers/ASTDropQuery.cpp b/src/Parsers/ASTDropQuery.cpp index ad1294c6e71..aea5a00bbcd 100644 --- a/src/Parsers/ASTDropQuery.cpp +++ b/src/Parsers/ASTDropQuery.cpp @@ -1,4 +1,6 @@ #include +#include +#include #include #include @@ -48,7 +50,7 @@ void ASTDropQuery::formatQueryImpl(const FormatSettings & settings, FormatState settings.ostr << "TEMPORARY "; - if (!table && database) + if (!table && !database_and_tables && database) settings.ostr << "DATABASE "; else if (is_dictionary) settings.ostr << "DICTIONARY "; @@ -65,8 +67,22 @@ void ASTDropQuery::formatQueryImpl(const FormatSettings & settings, FormatState settings.ostr << (settings.hilite ? hilite_none : ""); - if (!table && database) + if (!table && !database_and_tables && database) settings.ostr << backQuoteIfNeed(getDatabase()); + else if (database_and_tables) + { + auto & list = database_and_tables->as(); + for (auto it = list.children.begin(); it != list.children.end(); ++it) + { + if (it != list.children.begin()) + settings.ostr << ", "; + + auto identifier = dynamic_pointer_cast(*it); + settings.ostr << (identifier->name_parts.size() == 2 + ? backQuoteIfNeed(identifier->name_parts[0]) + "." + backQuoteIfNeed(identifier->name_parts[1]) + : backQuoteIfNeed(identifier->name_parts[0])); + } + } else settings.ostr << (database ? backQuoteIfNeed(getDatabase()) + "." : "") << backQuoteIfNeed(getTable()); diff --git a/src/Parsers/ASTDropQuery.h b/src/Parsers/ASTDropQuery.h index a732b354260..7aca9a883ed 100644 --- a/src/Parsers/ASTDropQuery.h +++ b/src/Parsers/ASTDropQuery.h @@ -37,6 +37,9 @@ public: // We detach the object permanently, so it will not be reattached back during server restart. bool permanently{false}; + /// Example: Drop TABLE t1, t2, t3... + ASTPtr database_and_tables; + /** Get the text that identifies this element. */ String getID(char) const override; ASTPtr clone() const override; diff --git a/src/Parsers/ParserCreateQuery.cpp b/src/Parsers/ParserCreateQuery.cpp index 1f6f68c9d8e..db11960cb59 100644 --- a/src/Parsers/ParserCreateQuery.cpp +++ b/src/Parsers/ParserCreateQuery.cpp @@ -104,7 +104,7 @@ bool ParserColumnDeclarationList::parseImpl(Pos & pos, ASTPtr & node, Expected & bool ParserNameList::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { - return ParserList(std::make_unique(), std::make_unique(TokenType::Comma), false) + return ParserList(std::make_unique(true, true), std::make_unique(TokenType::Comma), false) .parse(pos, node, expected); } diff --git a/src/Parsers/ParserDropQuery.cpp b/src/Parsers/ParserDropQuery.cpp index 450c8a1afec..b6a6d6e28d4 100644 --- a/src/Parsers/ParserDropQuery.cpp +++ b/src/Parsers/ParserDropQuery.cpp @@ -2,7 +2,7 @@ #include #include - +#include namespace DB { @@ -18,15 +18,17 @@ bool parseDropQuery(IParser::Pos & pos, ASTPtr & node, Expected & expected, cons ParserKeyword s_view("VIEW"); ParserKeyword s_database("DATABASE"); ParserToken s_dot(TokenType::Dot); + ParserToken s_comma(TokenType::Comma); ParserKeyword s_if_exists("IF EXISTS"); ParserKeyword s_if_empty("IF EMPTY"); ParserIdentifier name_p(true); ParserKeyword s_permanently("PERMANENTLY"); ParserKeyword s_no_delay("NO DELAY"); ParserKeyword s_sync("SYNC"); + ParserNameList tables_p; ASTPtr database; - ASTPtr table; + ASTPtr database_and_tables; String cluster_str; bool if_exists = false; bool if_empty = false; @@ -68,15 +70,8 @@ bool parseDropQuery(IParser::Pos & pos, ASTPtr & node, Expected & expected, cons if (s_if_empty.ignore(pos, expected)) if_empty = true; - if (!name_p.parse(pos, table, expected)) + if (!tables_p.parse(pos, database_and_tables, expected)) return false; - - if (s_dot.ignore(pos, expected)) - { - database = table; - if (!name_p.parse(pos, table, expected)) - return false; - } } /// common for tables / dictionaries / databases @@ -105,13 +100,13 @@ bool parseDropQuery(IParser::Pos & pos, ASTPtr & node, Expected & expected, cons query->sync = sync; query->permanently = permanently; query->database = database; - query->table = table; + query->database_and_tables = database_and_tables; if (database) query->children.push_back(database); - if (table) - query->children.push_back(table); + if (database_and_tables) + query->children.push_back(database_and_tables); query->cluster = cluster_str; diff --git a/src/Parsers/tests/gtest_dictionary_parser.cpp b/src/Parsers/tests/gtest_dictionary_parser.cpp index c0a975f7a38..020e145157c 100644 --- a/src/Parsers/tests/gtest_dictionary_parser.cpp +++ b/src/Parsers/tests/gtest_dictionary_parser.cpp @@ -300,9 +300,20 @@ TEST(ParserDictionaryDDL, ParseDropQuery) ASTPtr ast1 = parseQuery(parser, input1.data(), input1.data() + input1.size(), "", 0, 0); ASTDropQuery * drop1 = ast1->as(); + auto get_database_and_table = [](const ASTDropQuery & drop) -> std::pair + { + auto & database_and_tables = drop.database_and_tables->as(); + auto database_and_table = dynamic_pointer_cast(database_and_tables.children[0]); + if (database_and_table->name_parts.size() == 2) + return {database_and_table->name_parts[0], database_and_table->name_parts[1]}; + else + return {"", database_and_table->name_parts[0]}; + }; + EXPECT_TRUE(drop1->is_dictionary); - EXPECT_EQ(drop1->getDatabase(), "test"); - EXPECT_EQ(drop1->getTable(), "dict1"); + auto [database1, table1] = get_database_and_table(*drop1); + EXPECT_EQ(database1, "test"); + EXPECT_EQ(table1, "dict1"); auto str1 = serializeAST(*drop1); EXPECT_EQ(input1, str1); @@ -312,8 +323,9 @@ TEST(ParserDictionaryDDL, ParseDropQuery) ASTDropQuery * drop2 = ast2->as(); EXPECT_TRUE(drop2->is_dictionary); - EXPECT_EQ(drop2->getDatabase(), ""); - EXPECT_EQ(drop2->getTable(), "dict2"); + auto [database2, table2] = get_database_and_table(*drop2); + EXPECT_EQ(database2, ""); + EXPECT_EQ(table2, "dict2"); auto str2 = serializeAST(*drop2); EXPECT_EQ(input2, str2); } diff --git a/tests/queries/0_stateless/02961_drop_tables.reference b/tests/queries/0_stateless/02961_drop_tables.reference new file mode 100644 index 00000000000..c0465dc592a --- /dev/null +++ b/tests/queries/0_stateless/02961_drop_tables.reference @@ -0,0 +1,8 @@ +-- check which tables exist in 02961_db1 +-- check which tables exist in 02961_db2 +02961_tb4 +02961_tb5 +Test when deletion of existing table fails +-- check which tables exist in 02961_db1 +-- check which tables exist in 02961_db2 +02961_tb5 diff --git a/tests/queries/0_stateless/02961_drop_tables.sql b/tests/queries/0_stateless/02961_drop_tables.sql new file mode 100644 index 00000000000..e9695da5ed8 --- /dev/null +++ b/tests/queries/0_stateless/02961_drop_tables.sql @@ -0,0 +1,32 @@ +-- Tags: no-parallel-replicas +DROP DATABASE IF EXISTS 02961_db1; +CREATE DATABASE IF NOT EXISTS 02961_db1; +DROP DATABASE IF EXISTS 02961_db2; +CREATE DATABASE IF NOT EXISTS 02961_db2; + + +CREATE TABLE IF NOT EXISTS 02961_db1.02961_tb1 (id UInt32) Engine=Memory(); +CREATE TABLE IF NOT EXISTS 02961_db1.02961_tb2 (id UInt32) Engine=Memory(); + +CREATE TABLE IF NOT EXISTS 02961_db2.02961_tb3 (id UInt32) Engine=Memory(); +CREATE TABLE IF NOT EXISTS 02961_db2.02961_tb4 (id UInt32) Engine=Memory(); +CREATE TABLE IF NOT EXISTS 02961_db2.02961_tb5 (id UInt32) Engine=Memory(); + +DROP TABLE 02961_db1.02961_tb1, 02961_db1.02961_tb2, 02961_db2.02961_tb3; + +SELECT '-- check which tables exist in 02961_db1'; +SHOW TABLES FROM 02961_db1; +SELECT '-- check which tables exist in 02961_db2'; +SHOW TABLES FROM 02961_db2; + +SELECT 'Test when deletion of existing table fails'; +DROP TABLE 02961_db2.02961_tb4, 02961_db1.02961_tb1, 02961_db2.02961_tb5; -- { serverError UNKNOWN_TABLE } + +SELECT '-- check which tables exist in 02961_db1'; +SHOW TABLES FROM 02961_db1; +SELECT '-- check which tables exist in 02961_db2'; +SHOW TABLES FROM 02961_db2; + + +DROP DATABASE IF EXISTS 02961_db1; +DROP DATABASE IF EXISTS 02961_db2; From 915226d22b772191a82f7ca45826eab6f82dcccf Mon Sep 17 00:00:00 2001 From: zhongyuankai <872237106@qq.com> Date: Wed, 24 Jan 2024 13:26:38 +0800 Subject: [PATCH 002/150] fix test --- src/Interpreters/InterpreterDropQuery.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Interpreters/InterpreterDropQuery.cpp b/src/Interpreters/InterpreterDropQuery.cpp index 3285e29d22a..4a13ff4de77 100644 --- a/src/Interpreters/InterpreterDropQuery.cpp +++ b/src/Interpreters/InterpreterDropQuery.cpp @@ -576,12 +576,12 @@ bool InterpreterDropQuery::supportsTransactions() const { /// Enable only for truncate table with MergeTreeData engine - auto & drop = current_query_ptr->as(); + auto & drop = query_ptr->as(); return drop.cluster.empty() && !drop.temporary && drop.kind == ASTDropQuery::Kind::Truncate - && drop.table; + && drop.database_and_tables; } void registerInterpreterDropQuery(InterpreterFactory & factory) From 3b2a1dc21999496b3ad88c88e3a1dc52cb1a7ef7 Mon Sep 17 00:00:00 2001 From: zhongyuankai <54787696+zhongyuankai@users.noreply.github.com> Date: Sun, 28 Jan 2024 09:41:56 +0800 Subject: [PATCH 003/150] fix test --- src/Interpreters/InterpreterDropQuery.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Interpreters/InterpreterDropQuery.cpp b/src/Interpreters/InterpreterDropQuery.cpp index 4a13ff4de77..6e0fda610ad 100644 --- a/src/Interpreters/InterpreterDropQuery.cpp +++ b/src/Interpreters/InterpreterDropQuery.cpp @@ -66,6 +66,7 @@ BlockIO InterpreterDropQuery::execute() auto cloned = drop.clone(); auto & query = cloned->as(); query.database_and_tables = nullptr; + query.children.clear(); auto database_and_table = dynamic_pointer_cast(child); if (database_and_table->name_parts.size() == 2) From 9a289a3baac9b2ed51462a9ff15a1c945acbca1f Mon Sep 17 00:00:00 2001 From: zhongyuankai <54787696+zhongyuankai@users.noreply.github.com> Date: Sun, 28 Jan 2024 13:53:38 +0800 Subject: [PATCH 004/150] fix test --- tests/queries/0_stateless/02961_drop_tables.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/02961_drop_tables.sql b/tests/queries/0_stateless/02961_drop_tables.sql index e9695da5ed8..e91ac4bfe19 100644 --- a/tests/queries/0_stateless/02961_drop_tables.sql +++ b/tests/queries/0_stateless/02961_drop_tables.sql @@ -1,4 +1,4 @@ --- Tags: no-parallel-replicas +-- Tags: no-parallel DROP DATABASE IF EXISTS 02961_db1; CREATE DATABASE IF NOT EXISTS 02961_db1; DROP DATABASE IF EXISTS 02961_db2; From 3de9dafa1493f90ce95d0a94cf65dc03a21516ab Mon Sep 17 00:00:00 2001 From: zhongyuankai <54787696+zhongyuankai@users.noreply.github.com> Date: Sun, 28 Jan 2024 16:12:51 +0800 Subject: [PATCH 005/150] fix build check --- src/Parsers/ASTDropQuery.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Parsers/ASTDropQuery.cpp b/src/Parsers/ASTDropQuery.cpp index aea5a00bbcd..f7b7c768abd 100644 --- a/src/Parsers/ASTDropQuery.cpp +++ b/src/Parsers/ASTDropQuery.cpp @@ -72,7 +72,7 @@ void ASTDropQuery::formatQueryImpl(const FormatSettings & settings, FormatState else if (database_and_tables) { auto & list = database_and_tables->as(); - for (auto it = list.children.begin(); it != list.children.end(); ++it) + for (auto * it = list.children.begin(); it != list.children.end(); ++it) { if (it != list.children.begin()) settings.ostr << ", "; From 7231e12e36069a2f7fb1c868d3ca2e856df17af4 Mon Sep 17 00:00:00 2001 From: zhongyuankai <872237106@qq.com> Date: Tue, 30 Jan 2024 21:47:16 +0800 Subject: [PATCH 006/150] fix build check --- src/Interpreters/InterpreterDropQuery.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Interpreters/InterpreterDropQuery.cpp b/src/Interpreters/InterpreterDropQuery.cpp index 6e0fda610ad..1fdbf2fa376 100644 --- a/src/Interpreters/InterpreterDropQuery.cpp +++ b/src/Interpreters/InterpreterDropQuery.cpp @@ -487,9 +487,9 @@ void InterpreterDropQuery::extendQueryLogElemImpl(DB::QueryLogElement & elem, co if (drop.database_and_tables) { auto & list = drop.database_and_tables->as(); - for (auto it = list.children.begin(); it != list.children.end(); ++it) + for (auto & child : list.children) { - auto identifier = dynamic_pointer_cast(*it); + auto identifier = dynamic_pointer_cast(child); if (identifier->name_parts.size() == 2) { auto quoted_database = backQuoteIfNeed(identifier->name_parts[0]); From dcff866c9fa83c1d0fb755158e57096ec1a89155 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Fri, 1 Mar 2024 19:36:49 +0000 Subject: [PATCH 007/150] add virtual column _block_offset --- .../Algorithms/SummingSortedAlgorithm.cpp | 2 +- src/Storages/MergeTree/IMergeTreeReader.cpp | 2 +- src/Storages/MergeTree/MergeTask.cpp | 28 +++++++++---- src/Storages/MergeTree/MergeTask.h | 11 +++-- .../MergeTree/MergeTreeBlockReadUtils.cpp | 2 +- src/Storages/MergeTree/MergeTreeData.cpp | 1 + .../MergeTree/MergeTreeRangeReader.cpp | 27 +++++++++--- .../MergeTree/MergeTreeSequentialSource.cpp | 10 +++++ src/Storages/MergeTree/MergeTreeSettings.h | 3 +- src/Storages/MergeTree/MutateTask.cpp | 25 +++++++---- src/Storages/MergeTreeVirtualColumns.cpp | 4 ++ src/Storages/MergeTreeVirtualColumns.h | 7 ++++ .../03001_block_offset_column.reference | 41 +++++++++++++++++++ .../0_stateless/03001_block_offset_column.sql | 34 +++++++++++++++ 14 files changed, 167 insertions(+), 30 deletions(-) create mode 100644 tests/queries/0_stateless/03001_block_offset_column.reference create mode 100644 tests/queries/0_stateless/03001_block_offset_column.sql diff --git a/src/Processors/Merges/Algorithms/SummingSortedAlgorithm.cpp b/src/Processors/Merges/Algorithms/SummingSortedAlgorithm.cpp index 6253d3058aa..275179e5a50 100644 --- a/src/Processors/Merges/Algorithms/SummingSortedAlgorithm.cpp +++ b/src/Processors/Merges/Algorithms/SummingSortedAlgorithm.cpp @@ -224,7 +224,7 @@ static SummingSortedAlgorithm::ColumnsDefinition defineColumns( const ColumnWithTypeAndName & column = header.safeGetByPosition(i); const auto * simple = dynamic_cast(column.type->getCustomName()); - if (column.name == BlockNumberColumn::name) + if (column.name == BlockNumberColumn::name || column.name == BlockOffsetColumn::name) { def.column_numbers_not_to_aggregate.push_back(i); continue; diff --git a/src/Storages/MergeTree/IMergeTreeReader.cpp b/src/Storages/MergeTree/IMergeTreeReader.cpp index 30be1aa1c56..8bef26ca240 100644 --- a/src/Storages/MergeTree/IMergeTreeReader.cpp +++ b/src/Storages/MergeTree/IMergeTreeReader.cpp @@ -93,7 +93,7 @@ void IMergeTreeReader::fillVirtualColumns(Columns & columns, size_t rows) const it->name, it->type->getName(), virtual_column->type->getName()); } - if (it->name == "_part_offset") + if (it->name == "_part_offset" || it->name == BlockOffsetColumn::name) throw Exception(ErrorCodes::LOGICAL_ERROR, "Virtual column {} must be filled by range reader", it->name); Field field; diff --git a/src/Storages/MergeTree/MergeTask.cpp b/src/Storages/MergeTree/MergeTask.cpp index 4621314cb98..03b10034547 100644 --- a/src/Storages/MergeTree/MergeTask.cpp +++ b/src/Storages/MergeTree/MergeTask.cpp @@ -7,6 +7,7 @@ #include #include +#include "Storages/MergeTreeVirtualColumns.h" #include #include @@ -224,13 +225,11 @@ bool MergeTask::ExecuteAndFinalizeHorizontalPart::prepare() ctx->need_remove_expired_values = false; ctx->force_ttl = false; - if (supportsBlockNumberColumn(global_ctx) && !global_ctx->storage_columns.contains(BlockNumberColumn::name)) - { - global_ctx->storage_columns.emplace_back(NameAndTypePair{BlockNumberColumn::name,BlockNumberColumn::type}); - global_ctx->all_column_names.emplace_back(BlockNumberColumn::name); - global_ctx->gathering_columns.emplace_back(NameAndTypePair{BlockNumberColumn::name,BlockNumberColumn::type}); - global_ctx->gathering_column_names.emplace_back(BlockNumberColumn::name); - } + if (enableBlockNumberColumn(global_ctx)) + addGatheringColumn(global_ctx, BlockNumberColumn::name, BlockNumberColumn::type); + + if (enableBlockOffsetColumn(global_ctx)) + addGatheringColumn(global_ctx, BlockOffsetColumn::name, BlockOffsetColumn::type); SerializationInfo::Settings info_settings = { @@ -296,7 +295,7 @@ bool MergeTask::ExecuteAndFinalizeHorizontalPart::prepare() switch (global_ctx->chosen_merge_algorithm) { - case MergeAlgorithm::Horizontal : + case MergeAlgorithm::Horizontal: { global_ctx->merging_columns = global_ctx->storage_columns; global_ctx->merging_column_names = global_ctx->all_column_names; @@ -304,7 +303,7 @@ bool MergeTask::ExecuteAndFinalizeHorizontalPart::prepare() global_ctx->gathering_column_names.clear(); break; } - case MergeAlgorithm::Vertical : + case MergeAlgorithm::Vertical: { ctx->rows_sources_uncompressed_write_buf = ctx->tmp_disk->createRawStream(); ctx->rows_sources_write_buf = std::make_unique(*ctx->rows_sources_uncompressed_write_buf); @@ -402,6 +401,17 @@ bool MergeTask::ExecuteAndFinalizeHorizontalPart::prepare() return false; } +void MergeTask::addGatheringColumn(GlobalRuntimeContextPtr global_ctx, const String & name, const DataTypePtr & type) +{ + if (global_ctx->storage_columns.contains(name)) + return; + + global_ctx->storage_columns.emplace_back(name, type); + global_ctx->all_column_names.emplace_back(name); + global_ctx->gathering_columns.emplace_back(name, type); + global_ctx->gathering_column_names.emplace_back(name); +} + MergeTask::StageRuntimeContextPtr MergeTask::ExecuteAndFinalizeHorizontalPart::getContextForNextStage() { diff --git a/src/Storages/MergeTree/MergeTask.h b/src/Storages/MergeTree/MergeTask.h index 28a3c671914..f7b58f818a6 100644 --- a/src/Storages/MergeTree/MergeTask.h +++ b/src/Storages/MergeTree/MergeTask.h @@ -404,12 +404,17 @@ private: Stages::const_iterator stages_iterator = stages.begin(); - /// Check for persisting block number column - static bool supportsBlockNumberColumn(GlobalRuntimeContextPtr global_ctx) + static bool enableBlockNumberColumn(GlobalRuntimeContextPtr global_ctx) { - return global_ctx->data->getSettings()->allow_experimental_block_number_column && global_ctx->metadata_snapshot->getGroupByTTLs().empty(); + return global_ctx->data->getSettings()->enable_block_number_column && global_ctx->metadata_snapshot->getGroupByTTLs().empty(); } + static bool enableBlockOffsetColumn(GlobalRuntimeContextPtr global_ctx) + { + return global_ctx->data->getSettings()->enable_block_offset_column && global_ctx->metadata_snapshot->getGroupByTTLs().empty(); + } + + static void addGatheringColumn(GlobalRuntimeContextPtr global_ctx, const String & name, const DataTypePtr & type); }; /// FIXME diff --git a/src/Storages/MergeTree/MergeTreeBlockReadUtils.cpp b/src/Storages/MergeTree/MergeTreeBlockReadUtils.cpp index e84ed0a8068..7d54d3867ac 100644 --- a/src/Storages/MergeTree/MergeTreeBlockReadUtils.cpp +++ b/src/Storages/MergeTree/MergeTreeBlockReadUtils.cpp @@ -278,7 +278,7 @@ MergeTreeReadTaskColumns getReadTaskColumns( .withVirtuals() .withSubcolumns(with_subcolumns); - static const NameSet columns_to_read_at_first_step = {"_part_offset"}; + static const NameSet columns_to_read_at_first_step = {"_part_offset", BlockOffsetColumn::name}; NameSet columns_from_previous_steps; auto add_step = [&](const PrewhereExprStep & step) diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 0b7ac39aa1b..6d23969b606 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -449,6 +449,7 @@ VirtualColumnsDescription MergeTreeData::createVirtuals(const StorageInMemoryMet desc.addPersistent(RowExistsColumn::name, RowExistsColumn::type, nullptr, "Persisted mask created by lightweight delete that show whether row exists or is deleted"); desc.addPersistent(BlockNumberColumn::name, BlockNumberColumn::type, BlockNumberColumn::codec, "Persisted original number of block that was assigned at insert"); + desc.addPersistent(BlockOffsetColumn::name, BlockOffsetColumn::type, BlockOffsetColumn::codec, "Persisted original number of row in block that was assigned at insert"); return desc; } diff --git a/src/Storages/MergeTree/MergeTreeRangeReader.cpp b/src/Storages/MergeTree/MergeTreeRangeReader.cpp index ff86ec01efa..d23ad7bfed1 100644 --- a/src/Storages/MergeTree/MergeTreeRangeReader.cpp +++ b/src/Storages/MergeTree/MergeTreeRangeReader.cpp @@ -5,6 +5,7 @@ #include #include #include +#include "Storages/MergeTreeVirtualColumns.h" #include #include #include @@ -1148,12 +1149,28 @@ MergeTreeRangeReader::ReadResult MergeTreeRangeReader::startReadingChain(size_t if (!result.rows_per_granule.empty()) result.adjustLastGranule(); - if (read_sample_block.has("_part_offset")) + bool has_part_offset = read_sample_block.has("_part_offset"); + bool has_block_offset = read_sample_block.has(BlockOffsetColumn::name); + + if (has_part_offset || has_block_offset) { - size_t pos = read_sample_block.getPositionByName("_part_offset"); - chassert(pos < result.columns.size()); - chassert(result.columns[pos] == nullptr); - result.columns[pos] = fillPartOffsetColumn(result, leading_begin_part_offset, leading_end_part_offset); + auto part_offset_column = fillPartOffsetColumn(result, leading_begin_part_offset, leading_end_part_offset); + + auto add_offset_column = [&](const auto & column_name) + { + size_t pos = read_sample_block.getPositionByName(column_name); + chassert(pos < result.columns.size()); + + /// Column may be persisted in part. + if (!result.columns[pos]) + result.columns[pos] = part_offset_column; + }; + + if (has_part_offset) + add_offset_column("_part_offset"); + + if (has_block_offset) + add_offset_column(BlockOffsetColumn::name); } return result; diff --git a/src/Storages/MergeTree/MergeTreeSequentialSource.cpp b/src/Storages/MergeTree/MergeTreeSequentialSource.cpp index 2eb010c54ec..fb0bc617aa4 100644 --- a/src/Storages/MergeTree/MergeTreeSequentialSource.cpp +++ b/src/Storages/MergeTree/MergeTreeSequentialSource.cpp @@ -210,6 +210,16 @@ static void fillBlockNumberColumns( { res_columns[i] = BlockNumberColumn::type->createColumnConst(num_rows, block_number)->convertToFullColumnIfConst(); } + else if (it->name == BlockOffsetColumn::name) + { + auto column = BlockOffsetColumn::type->createColumn(); + auto & block_offset_data = assert_cast(*column).getData(); + + block_offset_data.resize(num_rows); + std::iota(block_offset_data.begin(), block_offset_data.end(), block_offset); + + res_columns[i] = std::move(column); + } } } diff --git a/src/Storages/MergeTree/MergeTreeSettings.h b/src/Storages/MergeTree/MergeTreeSettings.h index 45c56a037f9..d80567a25ec 100644 --- a/src/Storages/MergeTree/MergeTreeSettings.h +++ b/src/Storages/MergeTree/MergeTreeSettings.h @@ -184,6 +184,8 @@ struct Settings; M(Bool, disable_freeze_partition_for_zero_copy_replication, true, "Disable FREEZE PARTITION query for zero copy replication.", 0) \ M(Bool, disable_detach_partition_for_zero_copy_replication, true, "Disable DETACH PARTITION query for zero copy replication.", 0) \ M(Bool, disable_fetch_partition_for_zero_copy_replication, true, "Disable FETCH PARTITION query for zero copy replication.", 0) \ + M(Bool, enable_block_number_column, false, "Enable persisting column _block_number for each row.", 0) ALIAS(allow_experimental_block_number_column) \ + M(Bool, enable_block_offset_column, false, "Enable persisting column _block_offset for each row.", 0) \ \ /** Experimental/work in progress feature. Unsafe for production. */ \ M(UInt64, part_moves_between_shards_enable, 0, "Experimental/Incomplete feature to move parts between shards. Does not take into account sharding expressions.", 0) \ @@ -192,7 +194,6 @@ struct Settings; M(String, remote_fs_zero_copy_zookeeper_path, "/clickhouse/zero_copy", "ZooKeeper path for zero-copy table-independent info.", 0) \ M(Bool, remote_fs_zero_copy_path_compatible_mode, false, "Run zero-copy in compatible mode during conversion process.", 0) \ M(Bool, cache_populated_by_fetch, false, "Only available in ClickHouse Cloud", 0) \ - M(Bool, allow_experimental_block_number_column, false, "Enable persisting column _block_number for each row.", 0) \ M(Bool, allow_experimental_replacing_merge_with_cleanup, false, "Allow experimental CLEANUP merges for ReplacingMergeTree with is_deleted column.", 0) \ \ /** Compress marks and primary key. */ \ diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp index b3c36f7180b..4ba67b64c81 100644 --- a/src/Storages/MergeTree/MutateTask.cpp +++ b/src/Storages/MergeTree/MutateTask.cpp @@ -324,19 +324,26 @@ getColumnsForNewDataPart( if (!storage_columns_set.contains(RowExistsColumn::name)) { - if (deleted_mask_updated || (part_columns.has(RowExistsColumn::name) && !has_delete_command)) - { - storage_columns.emplace_back(RowExistsColumn::name, RowExistsColumn::type); - storage_columns_set.insert(RowExistsColumn::name); - } + } - if (!storage_columns_set.contains(BlockNumberColumn::name)) + auto persistent_virtuals = source_part->storage.getVirtualsDescription()->get(VirtualsKind::Persistent); + + for (const auto & [name, type] : persistent_virtuals) { - if (source_part->tryGetSerialization(BlockNumberColumn::name) != nullptr) + if (storage_columns_set.contains(name)) + continue; + + bool need_column = false; + if (name == RowExistsColumn::name) + need_column = deleted_mask_updated || (part_columns.has(name) && !has_delete_command); + else + need_column = part_columns.has(name); + + if (need_column) { - storage_columns.push_back({BlockNumberColumn::name, BlockNumberColumn::type}); - storage_columns_set.insert(BlockNumberColumn::name); + storage_columns.emplace_back(name, type); + storage_columns_set.insert(name); } } diff --git a/src/Storages/MergeTreeVirtualColumns.cpp b/src/Storages/MergeTreeVirtualColumns.cpp index 8250ceda7fa..885e46c6828 100644 --- a/src/Storages/MergeTreeVirtualColumns.cpp +++ b/src/Storages/MergeTreeVirtualColumns.cpp @@ -26,6 +26,10 @@ const String BlockNumberColumn::name = "_block_number"; const DataTypePtr BlockNumberColumn::type = std::make_shared(); const ASTPtr BlockNumberColumn::codec = getCompressionCodecDeltaLZ4(); +const String BlockOffsetColumn::name = "_block_offset"; +const DataTypePtr BlockOffsetColumn::type = std::make_shared(); +const ASTPtr BlockOffsetColumn::codec = getCompressionCodecDeltaLZ4(); + Field getFieldForConstVirtualColumn(const String & column_name, const IMergeTreeDataPart & part) { if (column_name == RowExistsColumn::name) diff --git a/src/Storages/MergeTreeVirtualColumns.h b/src/Storages/MergeTreeVirtualColumns.h index 24721bf1ad1..cd9fe544ed8 100644 --- a/src/Storages/MergeTreeVirtualColumns.h +++ b/src/Storages/MergeTreeVirtualColumns.h @@ -21,6 +21,13 @@ struct BlockNumberColumn static const ASTPtr codec; }; +struct BlockOffsetColumn +{ + static const String name; + static const DataTypePtr type; + static const ASTPtr codec; +}; + Field getFieldForConstVirtualColumn(const String & column_name, const IMergeTreeDataPart & part); } diff --git a/tests/queries/0_stateless/03001_block_offset_column.reference b/tests/queries/0_stateless/03001_block_offset_column.reference new file mode 100644 index 00000000000..fba88974380 --- /dev/null +++ b/tests/queries/0_stateless/03001_block_offset_column.reference @@ -0,0 +1,41 @@ +*** BEFORE MUTATION BEFORE MERGE *** +1 1 1 0 all_1_1_0 +2 2 2 0 all_2_2_0 +3 3 1 1 all_1_1_0 +4 4 1 2 all_1_1_0 +5 5 2 1 all_2_2_0 +6 6 2 2 all_2_2_0 +*** AFTER MUTATION BEFORE MERGE *** +1 0 1 0 all_1_1_0_3 +2 0 2 0 all_2_2_0_3 +3 0 1 1 all_1_1_0_3 +4 4 1 2 all_1_1_0_3 +5 5 2 1 all_2_2_0_3 +6 6 2 2 all_2_2_0_3 +*** AFTER MUTATION AFTER MERGE *** +1 0 1 0 all_1_2_1_3 +2 0 2 0 all_1_2_1_3 +3 0 1 1 all_1_2_1_3 +4 4 1 2 all_1_2_1_3 +5 5 2 1 all_1_2_1_3 +6 6 2 2 all_1_2_1_3 +*** AFTER MUTATION AFTER MERGE , NEW BLOCK *** +1 0 1 0 all_1_2_1_3 +2 0 2 0 all_1_2_1_3 +3 0 1 1 all_1_2_1_3 +4 4 1 2 all_1_2_1_3 +5 5 2 1 all_1_2_1_3 +6 6 2 2 all_1_2_1_3 +7 7 4 0 all_4_4_0 +8 8 4 1 all_4_4_0 +9 9 4 2 all_4_4_0 +*** AFTER MUTATION AFTER MERGE , NEW BLOCK MERGED *** +1 0 1 0 all_1_4_2_3 +2 0 2 0 all_1_4_2_3 +3 0 1 1 all_1_4_2_3 +4 4 1 2 all_1_4_2_3 +5 5 2 1 all_1_4_2_3 +6 6 2 2 all_1_4_2_3 +7 7 4 0 all_1_4_2_3 +8 8 4 1 all_1_4_2_3 +9 9 4 2 all_1_4_2_3 diff --git a/tests/queries/0_stateless/03001_block_offset_column.sql b/tests/queries/0_stateless/03001_block_offset_column.sql new file mode 100644 index 00000000000..0ef2b58f77b --- /dev/null +++ b/tests/queries/0_stateless/03001_block_offset_column.sql @@ -0,0 +1,34 @@ +DROP TABLE IF EXISTS test; + +CREATE TABLE test (id UInt32, a UInt32) ENGINE = MergeTree +ORDER BY id +SETTINGS enable_block_number_column = 1, enable_block_offset_column = 1; + +INSERT INTO test(id,a) VALUES (1,1),(3,3),(4,4); +INSERT INTO test(id,a) VALUES (2,2),(5,5),(6,6); + +SELECT '*** BEFORE MUTATION BEFORE MERGE ***'; +SELECT id,a,_block_number,_block_offset,_part from test ORDER BY id; + +set mutations_sync=1; +ALTER TABLE test UPDATE a=0 WHERE id<4; + +SELECT '*** AFTER MUTATION BEFORE MERGE ***'; +SELECT id,a,_block_number,_block_offset,_part from test ORDER BY id; + +OPTIMIZE TABLE test FINAL; + +SELECT '*** AFTER MUTATION AFTER MERGE ***'; +SELECT *,_block_number,_block_offset,_part from test ORDER BY id; + +INSERT INTO test(id,a) VALUES (7,7),(8,8),(9,9); + +SELECT '*** AFTER MUTATION AFTER MERGE , NEW BLOCK ***'; +SELECT *,_block_number,_block_offset,_part from test ORDER BY id; + +OPTIMIZE TABLE test FINAL; + +SELECT '*** AFTER MUTATION AFTER MERGE , NEW BLOCK MERGED ***'; +SELECT *,_block_number,_block_offset,_part from test ORDER BY id; + +DROP TABLE test; \ No newline at end of file From dd400dedf247fb6c6451d4ab3e338ed53e0a7049 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Fri, 1 Mar 2024 19:51:14 +0000 Subject: [PATCH 008/150] add virtual column _data_version --- src/Storages/MergeTree/MergeTask.cpp | 1 - src/Storages/MergeTree/MergeTreeData.cpp | 3 ++- .../MergeTree/MergeTreeRangeReader.cpp | 2 +- src/Storages/MergeTreeVirtualColumns.cpp | 3 +++ .../03001_data_version_column.reference | 5 +++++ .../0_stateless/03001_data_version_column.sql | 20 +++++++++++++++++++ 6 files changed, 31 insertions(+), 3 deletions(-) create mode 100644 tests/queries/0_stateless/03001_data_version_column.reference create mode 100644 tests/queries/0_stateless/03001_data_version_column.sql diff --git a/src/Storages/MergeTree/MergeTask.cpp b/src/Storages/MergeTree/MergeTask.cpp index 03b10034547..12733e19870 100644 --- a/src/Storages/MergeTree/MergeTask.cpp +++ b/src/Storages/MergeTree/MergeTask.cpp @@ -7,7 +7,6 @@ #include #include -#include "Storages/MergeTreeVirtualColumns.h" #include #include diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 6d23969b606..9c25b82ede7 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -440,6 +440,7 @@ VirtualColumnsDescription MergeTreeData::createVirtuals(const StorageInMemoryMet desc.addEphemeral("_partition_id", std::make_shared(std::make_shared()), "Name of partition"); desc.addEphemeral("_sample_factor", std::make_shared(), "Sample factor (from the query)"); desc.addEphemeral("_part_offset", std::make_shared(), "Number of row in the part"); + desc.addEphemeral("_data_version", std::make_shared(), "Data version of mutation of part"); if (metadata.hasPartitionKey()) { @@ -1027,7 +1028,7 @@ void MergeTreeData::MergingParams::check(const StorageInMemoryMetadata & metadat /// TODO Checks for Graphite mode. } -const Names MergeTreeData::virtuals_useful_for_filter = {"_part", "_partition_id", "_part_uuid", "_partition_value"}; +const Names MergeTreeData::virtuals_useful_for_filter = {"_part", "_partition_id", "_part_uuid", "_partition_value", "_data_version"}; Block MergeTreeData::getHeaderWithVirtualsForFilter() const { diff --git a/src/Storages/MergeTree/MergeTreeRangeReader.cpp b/src/Storages/MergeTree/MergeTreeRangeReader.cpp index d23ad7bfed1..ae6722ebce7 100644 --- a/src/Storages/MergeTree/MergeTreeRangeReader.cpp +++ b/src/Storages/MergeTree/MergeTreeRangeReader.cpp @@ -1,11 +1,11 @@ #include #include +#include #include #include #include #include #include -#include "Storages/MergeTreeVirtualColumns.h" #include #include #include diff --git a/src/Storages/MergeTreeVirtualColumns.cpp b/src/Storages/MergeTreeVirtualColumns.cpp index 885e46c6828..e67ba811df4 100644 --- a/src/Storages/MergeTreeVirtualColumns.cpp +++ b/src/Storages/MergeTreeVirtualColumns.cpp @@ -47,6 +47,9 @@ Field getFieldForConstVirtualColumn(const String & column_name, const IMergeTree if (column_name == "_partition_id") return part.info.partition_id; + if (column_name == "_data_version") + return part.info.getDataVersion(); + if (column_name == "_partition_value") return Tuple(part.partition.value.begin(), part.partition.value.end()); diff --git a/tests/queries/0_stateless/03001_data_version_column.reference b/tests/queries/0_stateless/03001_data_version_column.reference new file mode 100644 index 00000000000..cd85ce9cc63 --- /dev/null +++ b/tests/queries/0_stateless/03001_data_version_column.reference @@ -0,0 +1,5 @@ +all_1_1_0 1 1 1 +all_2_2_0 2 2 2 +all_1_1_0_3 3 1 100 +all_2_2_0_3 3 2 200 +all_4_4_0 4 3 3 diff --git a/tests/queries/0_stateless/03001_data_version_column.sql b/tests/queries/0_stateless/03001_data_version_column.sql new file mode 100644 index 00000000000..6cb6b192326 --- /dev/null +++ b/tests/queries/0_stateless/03001_data_version_column.sql @@ -0,0 +1,20 @@ +DROP TABLE IF EXISTS t_data_version; + +CREATE TABLE t_data_version (a UInt64, b UInt64) ENGINE = MergeTree ORDER BY a; + +INSERT INTO t_data_version VALUES (1, 1); +INSERT INTO t_data_version VALUES (2, 2); + +SELECT _part, _data_version, * FROM t_data_version ORDER BY a; + +ALTER TABLE t_data_version UPDATE b = a * 100 WHERE 1 SETTINGS mutations_sync = 2; + +SELECT _part, _data_version, * FROM t_data_version ORDER BY a; + +INSERT INTO t_data_version VALUES (3, 3); + +-- Check part pruning. +SELECT _part, _data_version, * FROM t_data_version WHERE _data_version = 4 ORDER BY a SETTINGS max_rows_to_read = 1; + +DROP TABLE t_data_version; + From 88d8880267d279817797fb770ba89335f258c8d0 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Thu, 7 Mar 2024 14:53:28 +0000 Subject: [PATCH 009/150] better virtual columns and tests added --- src/Storages/MergeTree/IMergeTreeReader.cpp | 2 +- src/Storages/MergeTree/MergeTask.cpp | 4 +- src/Storages/MergeTree/MergeTask.h | 4 +- .../MergeTree/MergeTreeBlockReadUtils.cpp | 6 +-- src/Storages/MergeTree/MergeTreeData.cpp | 4 +- .../MergeTree/MergeTreeRangeReader.cpp | 5 +- src/Storages/MergeTree/MergeTreeRangeReader.h | 3 ++ .../MergeTree/MergeTreeSequentialSource.cpp | 3 +- .../MergeTree/MergeTreeVirtualColumns.cpp | 2 +- src/Storages/MergeTree/MutateTask.cpp | 2 +- ...=> 03001_block_offset_column.reference.j2} | 2 + .../0_stateless/03001_block_offset_column.sql | 34 ------------- .../03001_block_offset_column.sql.j2 | 44 +++++++++++++++++ .../03001_block_offset_column_2.reference | 49 +++++++++++++++++++ .../03001_block_offset_column_2.sql | 25 ++++++++++ .../0_stateless/03001_data_version_column.sql | 8 +-- .../03002_sample_factor_where.reference | 3 ++ .../0_stateless/03002_sample_factor_where.sql | 11 +++++ 18 files changed, 159 insertions(+), 52 deletions(-) rename tests/queries/0_stateless/{03001_block_offset_column.reference => 03001_block_offset_column.reference.j2} (93%) delete mode 100644 tests/queries/0_stateless/03001_block_offset_column.sql create mode 100644 tests/queries/0_stateless/03001_block_offset_column.sql.j2 create mode 100644 tests/queries/0_stateless/03001_block_offset_column_2.reference create mode 100644 tests/queries/0_stateless/03001_block_offset_column_2.sql create mode 100644 tests/queries/0_stateless/03002_sample_factor_where.reference create mode 100644 tests/queries/0_stateless/03002_sample_factor_where.sql diff --git a/src/Storages/MergeTree/IMergeTreeReader.cpp b/src/Storages/MergeTree/IMergeTreeReader.cpp index 41e8db8fac4..cf6b64aac85 100644 --- a/src/Storages/MergeTree/IMergeTreeReader.cpp +++ b/src/Storages/MergeTree/IMergeTreeReader.cpp @@ -95,7 +95,7 @@ void IMergeTreeReader::fillVirtualColumns(Columns & columns, size_t rows) const it->name, it->type->getName(), virtual_column->type->getName()); } - if (it->name == "_part_offset" || it->name == BlockOffsetColumn::name) + if (MergeTreeRangeReader::virtuals_to_fill.contains(it->name)) throw Exception(ErrorCodes::LOGICAL_ERROR, "Virtual column {} must be filled by range reader", it->name); Field field; diff --git a/src/Storages/MergeTree/MergeTask.cpp b/src/Storages/MergeTree/MergeTask.cpp index b61cf236435..3453e4339fa 100644 --- a/src/Storages/MergeTree/MergeTask.cpp +++ b/src/Storages/MergeTree/MergeTask.cpp @@ -224,10 +224,10 @@ bool MergeTask::ExecuteAndFinalizeHorizontalPart::prepare() ctx->need_remove_expired_values = false; ctx->force_ttl = false; - if (enableBlockNumberColumn(global_ctx)) + if (enabledBlockNumberColumn(global_ctx)) addGatheringColumn(global_ctx, BlockNumberColumn::name, BlockNumberColumn::type); - if (enableBlockOffsetColumn(global_ctx)) + if (enabledBlockOffsetColumn(global_ctx)) addGatheringColumn(global_ctx, BlockOffsetColumn::name, BlockOffsetColumn::type); SerializationInfo::Settings info_settings = diff --git a/src/Storages/MergeTree/MergeTask.h b/src/Storages/MergeTree/MergeTask.h index b36fd35cff9..f6268886b14 100644 --- a/src/Storages/MergeTree/MergeTask.h +++ b/src/Storages/MergeTree/MergeTask.h @@ -404,12 +404,12 @@ private: Stages::const_iterator stages_iterator = stages.begin(); - static bool enableBlockNumberColumn(GlobalRuntimeContextPtr global_ctx) + static bool enabledBlockNumberColumn(GlobalRuntimeContextPtr global_ctx) { return global_ctx->data->getSettings()->enable_block_number_column && global_ctx->metadata_snapshot->getGroupByTTLs().empty(); } - static bool enableBlockOffsetColumn(GlobalRuntimeContextPtr global_ctx) + static bool enabledBlockOffsetColumn(GlobalRuntimeContextPtr global_ctx) { return global_ctx->data->getSettings()->enable_block_offset_column && global_ctx->metadata_snapshot->getGroupByTTLs().empty(); } diff --git a/src/Storages/MergeTree/MergeTreeBlockReadUtils.cpp b/src/Storages/MergeTree/MergeTreeBlockReadUtils.cpp index d1b2c2558ef..570387a7046 100644 --- a/src/Storages/MergeTree/MergeTreeBlockReadUtils.cpp +++ b/src/Storages/MergeTree/MergeTreeBlockReadUtils.cpp @@ -278,17 +278,17 @@ MergeTreeReadTaskColumns getReadTaskColumns( .withVirtuals() .withSubcolumns(with_subcolumns); - static const NameSet columns_to_read_at_first_step = {"_part_offset", BlockOffsetColumn::name}; - NameSet columns_from_previous_steps; auto add_step = [&](const PrewhereExprStep & step) { Names step_column_names; + /// Virtual columns that are filled by RangeReader + /// must be read in the first step before any filtering. if (columns_from_previous_steps.empty()) { for (const auto & required_column : required_columns) - if (columns_to_read_at_first_step.contains(required_column)) + if (MergeTreeRangeReader::virtuals_to_fill.contains(required_column)) step_column_names.push_back(required_column); } diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 947158b2aa1..6fb9bd07f10 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -440,7 +440,7 @@ VirtualColumnsDescription MergeTreeData::createVirtuals(const StorageInMemoryMet desc.addEphemeral("_partition_id", std::make_shared(std::make_shared()), "Name of partition"); desc.addEphemeral("_sample_factor", std::make_shared(), "Sample factor (from the query)"); desc.addEphemeral("_part_offset", std::make_shared(), "Number of row in the part"); - desc.addEphemeral("_data_version", std::make_shared(), "Data version of mutation of part"); + desc.addEphemeral("_part_data_version", std::make_shared(), "Data version of part (either min block number or mutation version)"); if (metadata.hasPartitionKey()) { @@ -1028,7 +1028,7 @@ void MergeTreeData::MergingParams::check(const StorageInMemoryMetadata & metadat /// TODO Checks for Graphite mode. } -const Names MergeTreeData::virtuals_useful_for_filter = {"_part", "_partition_id", "_part_uuid", "_partition_value", "_data_version"}; +const Names MergeTreeData::virtuals_useful_for_filter = {"_part", "_partition_id", "_part_uuid", "_partition_value", "_part_data_version"}; Block MergeTreeData::getHeaderWithVirtualsForFilter() const { diff --git a/src/Storages/MergeTree/MergeTreeRangeReader.cpp b/src/Storages/MergeTree/MergeTreeRangeReader.cpp index ec1fef20267..eb213fdc5ad 100644 --- a/src/Storages/MergeTree/MergeTreeRangeReader.cpp +++ b/src/Storages/MergeTree/MergeTreeRangeReader.cpp @@ -1,6 +1,6 @@ #include #include -#include +#include #include #include #include @@ -872,6 +872,8 @@ size_t MergeTreeRangeReader::currentMark() const return stream.currentMark(); } +const NameSet MergeTreeRangeReader::virtuals_to_fill = {"_part_offset", BlockOffsetColumn::name}; + size_t MergeTreeRangeReader::Stream::numPendingRows() const { size_t rows_between_marks = index_granularity->getRowsCountInRange(current_mark, last_mark); @@ -1144,6 +1146,7 @@ MergeTreeRangeReader::ReadResult MergeTreeRangeReader::startReadingChain(size_t if (!result.rows_per_granule.empty()) result.adjustLastGranule(); + /// Column _block_offset is the same as _part_offset if it's not persisted in part. bool has_part_offset = read_sample_block.has("_part_offset"); bool has_block_offset = read_sample_block.has(BlockOffsetColumn::name); diff --git a/src/Storages/MergeTree/MergeTreeRangeReader.h b/src/Storages/MergeTree/MergeTreeRangeReader.h index 688a6b0922b..51fbbc8b052 100644 --- a/src/Storages/MergeTree/MergeTreeRangeReader.h +++ b/src/Storages/MergeTree/MergeTreeRangeReader.h @@ -115,6 +115,9 @@ public: bool isCurrentRangeFinished() const; bool isInitialized() const { return is_initialized; } + /// Names of virtual columns that are filled in RangeReader. + static const NameSet virtuals_to_fill; + private: /// Accumulates sequential read() requests to perform a large read instead of multiple small reads class DelayedStream diff --git a/src/Storages/MergeTree/MergeTreeSequentialSource.cpp b/src/Storages/MergeTree/MergeTreeSequentialSource.cpp index 1d29eea4c68..1dc34e0f37e 100644 --- a/src/Storages/MergeTree/MergeTreeSequentialSource.cpp +++ b/src/Storages/MergeTree/MergeTreeSequentialSource.cpp @@ -195,6 +195,7 @@ static void fillBlockNumberColumns( Columns & res_columns, const NamesAndTypesList & columns_list, UInt64 block_number, + UInt64 block_offset, UInt64 num_rows) { chassert(res_columns.size() == columns_list.size()); @@ -240,7 +241,7 @@ try if (rows_read) { - fillBlockNumberColumns(columns, sample, data_part->info.min_block, rows_read); + fillBlockNumberColumns(columns, sample, data_part->info.min_block, current_row, rows_read); reader->fillVirtualColumns(columns, rows_read); current_row += rows_read; diff --git a/src/Storages/MergeTree/MergeTreeVirtualColumns.cpp b/src/Storages/MergeTree/MergeTreeVirtualColumns.cpp index 0a7f0dfbdbc..821724a3cfb 100644 --- a/src/Storages/MergeTree/MergeTreeVirtualColumns.cpp +++ b/src/Storages/MergeTree/MergeTreeVirtualColumns.cpp @@ -47,7 +47,7 @@ Field getFieldForConstVirtualColumn(const String & column_name, const IMergeTree if (column_name == "_partition_id") return part.info.partition_id; - if (column_name == "_data_version") + if (column_name == "_part_data_version") return part.info.getDataVersion(); if (column_name == "_partition_value") diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp index cafd4e4ed68..1a6c6c0073b 100644 --- a/src/Storages/MergeTree/MutateTask.cpp +++ b/src/Storages/MergeTree/MutateTask.cpp @@ -327,7 +327,7 @@ getColumnsForNewDataPart( } - auto persistent_virtuals = source_part->storage.getVirtualsDescription()->get(VirtualsKind::Persistent); + auto persistent_virtuals = source_part->storage.getVirtualsPtr()->getNamesAndTypesList(VirtualsKind::Persistent); for (const auto & [name, type] : persistent_virtuals) { diff --git a/tests/queries/0_stateless/03001_block_offset_column.reference b/tests/queries/0_stateless/03001_block_offset_column.reference.j2 similarity index 93% rename from tests/queries/0_stateless/03001_block_offset_column.reference rename to tests/queries/0_stateless/03001_block_offset_column.reference.j2 index fba88974380..641409167bf 100644 --- a/tests/queries/0_stateless/03001_block_offset_column.reference +++ b/tests/queries/0_stateless/03001_block_offset_column.reference.j2 @@ -1,3 +1,4 @@ +{% for enable_vertical_merge_algorithm in [0, 1] -%} *** BEFORE MUTATION BEFORE MERGE *** 1 1 1 0 all_1_1_0 2 2 2 0 all_2_2_0 @@ -39,3 +40,4 @@ 7 7 4 0 all_1_4_2_3 8 8 4 1 all_1_4_2_3 9 9 4 2 all_1_4_2_3 +{% endfor -%} diff --git a/tests/queries/0_stateless/03001_block_offset_column.sql b/tests/queries/0_stateless/03001_block_offset_column.sql deleted file mode 100644 index 0ef2b58f77b..00000000000 --- a/tests/queries/0_stateless/03001_block_offset_column.sql +++ /dev/null @@ -1,34 +0,0 @@ -DROP TABLE IF EXISTS test; - -CREATE TABLE test (id UInt32, a UInt32) ENGINE = MergeTree -ORDER BY id -SETTINGS enable_block_number_column = 1, enable_block_offset_column = 1; - -INSERT INTO test(id,a) VALUES (1,1),(3,3),(4,4); -INSERT INTO test(id,a) VALUES (2,2),(5,5),(6,6); - -SELECT '*** BEFORE MUTATION BEFORE MERGE ***'; -SELECT id,a,_block_number,_block_offset,_part from test ORDER BY id; - -set mutations_sync=1; -ALTER TABLE test UPDATE a=0 WHERE id<4; - -SELECT '*** AFTER MUTATION BEFORE MERGE ***'; -SELECT id,a,_block_number,_block_offset,_part from test ORDER BY id; - -OPTIMIZE TABLE test FINAL; - -SELECT '*** AFTER MUTATION AFTER MERGE ***'; -SELECT *,_block_number,_block_offset,_part from test ORDER BY id; - -INSERT INTO test(id,a) VALUES (7,7),(8,8),(9,9); - -SELECT '*** AFTER MUTATION AFTER MERGE , NEW BLOCK ***'; -SELECT *,_block_number,_block_offset,_part from test ORDER BY id; - -OPTIMIZE TABLE test FINAL; - -SELECT '*** AFTER MUTATION AFTER MERGE , NEW BLOCK MERGED ***'; -SELECT *,_block_number,_block_offset,_part from test ORDER BY id; - -DROP TABLE test; \ No newline at end of file diff --git a/tests/queries/0_stateless/03001_block_offset_column.sql.j2 b/tests/queries/0_stateless/03001_block_offset_column.sql.j2 new file mode 100644 index 00000000000..b161183f884 --- /dev/null +++ b/tests/queries/0_stateless/03001_block_offset_column.sql.j2 @@ -0,0 +1,44 @@ +{% for enable_vertical_merge_algorithm in [0, 1] -%} + +DROP TABLE IF EXISTS t_block_offset; + +CREATE TABLE t_block_offset (id UInt32, a UInt32) ENGINE = MergeTree +ORDER BY id +SETTINGS + enable_block_number_column = 1, + enable_block_offset_column = 1, + index_granularity = 2, + vertical_merge_algorithm_min_bytes_to_activate = 1, + vertical_merge_algorithm_min_columns_to_activate = 1, + enable_vertical_merge_algorithm = {{ enable_vertical_merge_algorithm }}; + +INSERT INTO t_block_offset(id,a) VALUES (1,1),(3,3),(4,4); +INSERT INTO t_block_offset(id,a) VALUES (2,2),(5,5),(6,6); + +SELECT '*** BEFORE MUTATION BEFORE MERGE ***'; +SELECT id,a,_block_number,_block_offset,_part from t_block_offset ORDER BY id; + +set mutations_sync=1; +ALTER TABLE t_block_offset UPDATE a=0 WHERE id<4; + +SELECT '*** AFTER MUTATION BEFORE MERGE ***'; +SELECT id,a,_block_number,_block_offset,_part from t_block_offset ORDER BY id; + +OPTIMIZE TABLE t_block_offset FINAL; + +SELECT '*** AFTER MUTATION AFTER MERGE ***'; +SELECT *,_block_number,_block_offset,_part from t_block_offset ORDER BY id; + +INSERT INTO t_block_offset(id,a) VALUES (7,7),(8,8),(9,9); + +SELECT '*** AFTER MUTATION AFTER MERGE , NEW BLOCK ***'; +SELECT *,_block_number,_block_offset,_part from t_block_offset ORDER BY id; + +OPTIMIZE TABLE t_block_offset FINAL; + +SELECT '*** AFTER MUTATION AFTER MERGE , NEW BLOCK MERGED ***'; +SELECT *,_block_number,_block_offset,_part from t_block_offset ORDER BY id; + +DROP TABLE t_block_offset; + +{% endfor %} diff --git a/tests/queries/0_stateless/03001_block_offset_column_2.reference b/tests/queries/0_stateless/03001_block_offset_column_2.reference new file mode 100644 index 00000000000..a7ad8232678 --- /dev/null +++ b/tests/queries/0_stateless/03001_block_offset_column_2.reference @@ -0,0 +1,49 @@ +all_1_2_1 1 0 0 0 +all_1_2_1 1 1 1 2 +all_1_2_1 1 2 2 4 +all_1_2_1 1 3 3 6 +all_1_2_1 1 4 4 8 +all_1_2_1 1 5 5 10 +all_1_2_1 1 6 6 12 +all_1_2_1 1 7 7 14 +all_1_2_1 1 8 8 16 +all_1_2_1 1 9 9 18 +all_1_2_1 1 10 10 20 +all_1_2_1 1 11 11 22 +all_1_2_1 1 12 12 24 +all_1_2_1 1 13 13 26 +all_1_2_1 1 14 14 28 +all_1_2_1 1 15 15 30 +=========== +all_1_3_2 1 0 0 0 +all_1_3_2 1 1 2 2 +all_1_3_2 1 2 4 4 +all_1_3_2 1 3 6 6 +all_1_3_2 1 4 8 8 +all_1_3_2 1 5 10 10 +all_1_3_2 1 6 12 12 +all_1_3_2 1 7 14 14 +all_1_3_2 1 8 16 16 +all_1_3_2 1 9 18 18 +all_1_3_2 1 10 20 20 +all_1_3_2 1 11 22 22 +all_1_3_2 1 12 24 24 +all_1_3_2 1 13 26 26 +all_1_3_2 1 14 28 28 +all_1_3_2 1 15 30 30 +all_1_3_2 3 0 1 1 +all_1_3_2 3 1 3 3 +all_1_3_2 3 2 5 5 +all_1_3_2 3 3 7 7 +all_1_3_2 3 4 9 9 +all_1_3_2 3 5 11 11 +all_1_3_2 3 6 13 13 +all_1_3_2 3 7 15 15 +all_1_3_2 3 8 17 17 +all_1_3_2 3 9 19 19 +all_1_3_2 3 10 21 21 +all_1_3_2 3 11 23 23 +all_1_3_2 3 12 25 25 +all_1_3_2 3 13 27 27 +all_1_3_2 3 14 29 29 +all_1_3_2 3 15 31 31 diff --git a/tests/queries/0_stateless/03001_block_offset_column_2.sql b/tests/queries/0_stateless/03001_block_offset_column_2.sql new file mode 100644 index 00000000000..b994e37b952 --- /dev/null +++ b/tests/queries/0_stateless/03001_block_offset_column_2.sql @@ -0,0 +1,25 @@ + +DROP TABLE IF EXISTS t_block_offset; + +CREATE TABLE t_block_offset (id UInt32) ENGINE = MergeTree ORDER BY id SETTINGS index_granularity = 3; + +INSERT INTO t_block_offset SELECT number * 2 FROM numbers(8); + +INSERT INTO t_block_offset SELECT number * 2 FROM numbers(8, 8); + +OPTIMIZE TABLE t_block_offset FINAL; + +SELECT _part, _block_number, _block_offset, _part_offset, id FROM t_block_offset ORDER BY _block_number, _block_offset; + +ALTER TABLE t_block_offset MODIFY SETTING enable_block_number_column = 1; +ALTER TABLE t_block_offset MODIFY SETTING enable_block_offset_column = 1; + +INSERT INTO t_block_offset SELECT number * 2 + 1 FROM numbers(16); + +OPTIMIZE TABLE t_block_offset FINAL; + +SELECT '==========='; +SELECT _part, _block_number, _block_offset, _part_offset, id FROM t_block_offset ORDER BY _block_number, _block_offset; + + +DROP TABLE t_block_offset; diff --git a/tests/queries/0_stateless/03001_data_version_column.sql b/tests/queries/0_stateless/03001_data_version_column.sql index 6cb6b192326..4e3377ebf47 100644 --- a/tests/queries/0_stateless/03001_data_version_column.sql +++ b/tests/queries/0_stateless/03001_data_version_column.sql @@ -5,16 +5,16 @@ CREATE TABLE t_data_version (a UInt64, b UInt64) ENGINE = MergeTree ORDER BY a; INSERT INTO t_data_version VALUES (1, 1); INSERT INTO t_data_version VALUES (2, 2); -SELECT _part, _data_version, * FROM t_data_version ORDER BY a; +SELECT _part, _part_data_version, * FROM t_data_version ORDER BY a; ALTER TABLE t_data_version UPDATE b = a * 100 WHERE 1 SETTINGS mutations_sync = 2; -SELECT _part, _data_version, * FROM t_data_version ORDER BY a; +SELECT _part, _part_data_version, * FROM t_data_version ORDER BY a; INSERT INTO t_data_version VALUES (3, 3); --- Check part pruning. -SELECT _part, _data_version, * FROM t_data_version WHERE _data_version = 4 ORDER BY a SETTINGS max_rows_to_read = 1; +-- Check parts pruning. +SELECT _part, _part_data_version, * FROM t_data_version WHERE _part_data_version = 4 ORDER BY a SETTINGS max_rows_to_read = 1; DROP TABLE t_data_version; diff --git a/tests/queries/0_stateless/03002_sample_factor_where.reference b/tests/queries/0_stateless/03002_sample_factor_where.reference new file mode 100644 index 00000000000..2f0d8589603 --- /dev/null +++ b/tests/queries/0_stateless/03002_sample_factor_where.reference @@ -0,0 +1,3 @@ +2 +0 +0 diff --git a/tests/queries/0_stateless/03002_sample_factor_where.sql b/tests/queries/0_stateless/03002_sample_factor_where.sql new file mode 100644 index 00000000000..6430034349a --- /dev/null +++ b/tests/queries/0_stateless/03002_sample_factor_where.sql @@ -0,0 +1,11 @@ +DROP TABLE IF EXISTS t_sample_factor; + +CREATE TABLE t_sample_factor(a UInt64, b UInt64) ENGINE = MergeTree ORDER BY (a, b) SAMPLE BY b; +INSERT INTO t_sample_factor(a, b) VALUES (1, 2), (3, 4); + +SELECT uniq(b) * any(_sample_factor) FROM t_sample_factor SAMPLE 200000; + +SELECT uniq(b) * any(_sample_factor) FROM t_sample_factor SAMPLE 200000 WHERE a < -1; +SELECT uniq(b) * any(_sample_factor) FROM t_sample_factor SAMPLE 200000 PREWHERE a < -1; + +DROP TABLE t_sample_factor; From bc936cc7bba34f63d42c3b5c62c0936330bac3a7 Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Mon, 11 Mar 2024 14:41:51 +0800 Subject: [PATCH 010/150] inline small function in PODArrayDetails --- src/Common/PODArray.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Common/PODArray.cpp b/src/Common/PODArray.cpp index dd1fed08cb5..2786a50346a 100644 --- a/src/Common/PODArray.cpp +++ b/src/Common/PODArray.cpp @@ -22,7 +22,7 @@ void protectMemoryRegion(void * addr, size_t len, int prot) } #endif -size_t byte_size(size_t num_elements, size_t element_size) +ALWAY_INLINE size_t byte_size(size_t num_elements, size_t element_size) { size_t amount; if (__builtin_mul_overflow(num_elements, element_size, &amount)) From 150e7f338a3a1d49104f4d605bd2f65bd20fa149 Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Mon, 11 Mar 2024 14:44:16 +0800 Subject: [PATCH 011/150] inline another function --- src/Common/PODArray.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Common/PODArray.cpp b/src/Common/PODArray.cpp index 2786a50346a..a8195f15241 100644 --- a/src/Common/PODArray.cpp +++ b/src/Common/PODArray.cpp @@ -22,7 +22,7 @@ void protectMemoryRegion(void * addr, size_t len, int prot) } #endif -ALWAY_INLINE size_t byte_size(size_t num_elements, size_t element_size) +ALWAYS_INLINE size_t byte_size(size_t num_elements, size_t element_size) { size_t amount; if (__builtin_mul_overflow(num_elements, element_size, &amount)) @@ -30,7 +30,7 @@ ALWAY_INLINE size_t byte_size(size_t num_elements, size_t element_size) return amount; } -size_t minimum_memory_for_elements(size_t num_elements, size_t element_size, size_t pad_left, size_t pad_right) +ALWAYS_INLINE size_t minimum_memory_for_elements(size_t num_elements, size_t element_size, size_t pad_left, size_t pad_right) { size_t amount; if (__builtin_add_overflow(byte_size(num_elements, element_size), pad_left + pad_right, &amount)) From 2bec92c48e9ad01ecc1bde38808ac35e628758b0 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Mon, 11 Mar 2024 14:41:32 +0000 Subject: [PATCH 012/150] better _part_offset column --- .../MergeTree/MergeTreeRangeReader.cpp | 37 +++++++++---------- 1 file changed, 18 insertions(+), 19 deletions(-) diff --git a/src/Storages/MergeTree/MergeTreeRangeReader.cpp b/src/Storages/MergeTree/MergeTreeRangeReader.cpp index eb213fdc5ad..bbc4adaf458 100644 --- a/src/Storages/MergeTree/MergeTreeRangeReader.cpp +++ b/src/Storages/MergeTree/MergeTreeRangeReader.cpp @@ -872,7 +872,7 @@ size_t MergeTreeRangeReader::currentMark() const return stream.currentMark(); } -const NameSet MergeTreeRangeReader::virtuals_to_fill = {"_part_offset", BlockOffsetColumn::name}; +const NameSet MergeTreeRangeReader::virtuals_to_fill = {"_part_offset", "_block_offset"}; size_t MergeTreeRangeReader::Stream::numPendingRows() const { @@ -1146,30 +1146,29 @@ MergeTreeRangeReader::ReadResult MergeTreeRangeReader::startReadingChain(size_t if (!result.rows_per_granule.empty()) result.adjustLastGranule(); - /// Column _block_offset is the same as _part_offset if it's not persisted in part. - bool has_part_offset = read_sample_block.has("_part_offset"); - bool has_block_offset = read_sample_block.has(BlockOffsetColumn::name); + ColumnPtr part_offset_column; - if (has_part_offset || has_block_offset) + auto add_offset_column = [&](const auto & column_name) { - auto part_offset_column = createPartOffsetColumn(result, leading_begin_part_offset, leading_end_part_offset); + size_t pos = read_sample_block.getPositionByName(column_name); + chassert(pos < result.columns.size()); - auto add_offset_column = [&](const auto & column_name) - { - size_t pos = read_sample_block.getPositionByName(column_name); - chassert(pos < result.columns.size()); + /// Column may be persisted in part. + if (result.columns[pos]) + return; - /// Column may be persisted in part. - if (!result.columns[pos]) - result.columns[pos] = part_offset_column; - }; + if (!part_offset_column) + part_offset_column = createPartOffsetColumn(result, leading_begin_part_offset, leading_end_part_offset); - if (has_part_offset) - add_offset_column("_part_offset"); + result.columns[pos] = part_offset_column; + }; - if (has_block_offset) - add_offset_column(BlockOffsetColumn::name); - } + if (read_sample_block.has("_part_offset")) + add_offset_column("_part_offset"); + + /// Column _block_offset is the same as _part_offset if it's not persisted in part. + if (read_sample_block.has(BlockOffsetColumn::name)) + add_offset_column(BlockOffsetColumn::name); return result; } From db48b9749b8a9ebb8500c52b32ca0bfa61481df1 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Mon, 11 Mar 2024 15:35:17 +0000 Subject: [PATCH 013/150] better _part_offset column --- .../MergeTree/MergeTreeRangeReader.cpp | 8 +- src/Storages/MergeTree/MergeTreeRangeReader.h | 2 + .../02890_describe_table_options.reference | 256 ++++++++++-------- 3 files changed, 144 insertions(+), 122 deletions(-) diff --git a/src/Storages/MergeTree/MergeTreeRangeReader.cpp b/src/Storages/MergeTree/MergeTreeRangeReader.cpp index bbc4adaf458..eb757e1d8c7 100644 --- a/src/Storages/MergeTree/MergeTreeRangeReader.cpp +++ b/src/Storages/MergeTree/MergeTreeRangeReader.cpp @@ -1146,6 +1146,12 @@ MergeTreeRangeReader::ReadResult MergeTreeRangeReader::startReadingChain(size_t if (!result.rows_per_granule.empty()) result.adjustLastGranule(); + fillVirtualColumns(result, leading_begin_part_offset, leading_end_part_offset); + return result; +} + +void MergeTreeRangeReader::fillVirtualColumns(ReadResult & result, UInt64 leading_begin_part_offset, UInt64 leading_end_part_offset) +{ ColumnPtr part_offset_column; auto add_offset_column = [&](const auto & column_name) @@ -1169,8 +1175,6 @@ MergeTreeRangeReader::ReadResult MergeTreeRangeReader::startReadingChain(size_t /// Column _block_offset is the same as _part_offset if it's not persisted in part. if (read_sample_block.has(BlockOffsetColumn::name)) add_offset_column(BlockOffsetColumn::name); - - return result; } ColumnPtr MergeTreeRangeReader::createPartOffsetColumn(ReadResult & result, UInt64 leading_begin_part_offset, UInt64 leading_end_part_offset) diff --git a/src/Storages/MergeTree/MergeTreeRangeReader.h b/src/Storages/MergeTree/MergeTreeRangeReader.h index 51fbbc8b052..b282ada6038 100644 --- a/src/Storages/MergeTree/MergeTreeRangeReader.h +++ b/src/Storages/MergeTree/MergeTreeRangeReader.h @@ -311,6 +311,8 @@ private: ReadResult startReadingChain(size_t max_rows, MarkRanges & ranges); Columns continueReadingChain(const ReadResult & result, size_t & num_rows); void executePrewhereActionsAndFilterColumns(ReadResult & result) const; + + void fillVirtualColumns(ReadResult & result, UInt64 leading_begin_part_offset, UInt64 leading_end_part_offset); ColumnPtr createPartOffsetColumn(ReadResult & result, UInt64 leading_begin_part_offset, UInt64 leading_end_part_offset); IMergeTreeReader * merge_tree_reader = nullptr; diff --git a/tests/queries/0_stateless/02890_describe_table_options.reference b/tests/queries/0_stateless/02890_describe_table_options.reference index ff58202ae49..fe73e677432 100644 --- a/tests/queries/0_stateless/02890_describe_table_options.reference +++ b/tests/queries/0_stateless/02890_describe_table_options.reference @@ -34,70 +34,78 @@ DESCRIBE remote(default, currentDatabase(), t_describe_options) FORMAT PrettyCom └───────────┴───────────────────────────┴──────────────┴────────────────────┴──────────────┴──────────────────┴────────────────┴──────────────┘ SET describe_compact_output = 0, describe_include_virtual_columns = 1, describe_include_subcolumns = 0; DESCRIBE TABLE t_describe_options FORMAT PrettyCompactNoEscapes; -┌─name───────────┬─type──────────────────────┬─default_type─┬─default_expression─┬─comment─────────────────────────────────────────────────────────────────────────────────┬─codec_expression─┬─ttl_expression─┬─is_virtual─┐ -│ id │ UInt64 │ │ │ index column │ │ │ 0 │ -│ arr │ Array(UInt64) │ DEFAULT │ [10, 20] │ │ ZSTD(1) │ │ 0 │ -│ t │ Tuple(a String, b UInt64) │ DEFAULT │ ('foo', 0) │ │ ZSTD(1) │ │ 0 │ -│ _part │ LowCardinality(String) │ │ │ Name of part │ │ │ 1 │ -│ _part_index │ UInt64 │ │ │ Sequential index of the part in the query result │ │ │ 1 │ -│ _part_uuid │ UUID │ │ │ Unique part identifier (if enabled MergeTree setting assign_part_uuids) │ │ │ 1 │ -│ _partition_id │ LowCardinality(String) │ │ │ Name of partition │ │ │ 1 │ -│ _sample_factor │ Float64 │ │ │ Sample factor (from the query) │ │ │ 1 │ -│ _part_offset │ UInt64 │ │ │ Number of row in the part │ │ │ 1 │ -│ _row_exists │ UInt8 │ │ │ Persisted mask created by lightweight delete that show whether row exists or is deleted │ │ │ 1 │ -│ _block_number │ UInt64 │ │ │ Persisted original number of block that was assigned at insert │ Delta, LZ4 │ │ 1 │ -└────────────────┴───────────────────────────┴──────────────┴────────────────────┴─────────────────────────────────────────────────────────────────────────────────────────┴──────────────────┴────────────────┴────────────┘ +┌─name───────────────┬─type──────────────────────┬─default_type─┬─default_expression─┬─comment─────────────────────────────────────────────────────────────────────────────────┬─codec_expression─┬─ttl_expression─┬─is_virtual─┐ +│ id │ UInt64 │ │ │ index column │ │ │ 0 │ +│ arr │ Array(UInt64) │ DEFAULT │ [10, 20] │ │ ZSTD(1) │ │ 0 │ +│ t │ Tuple(a String, b UInt64) │ DEFAULT │ ('foo', 0) │ │ ZSTD(1) │ │ 0 │ +│ _part │ LowCardinality(String) │ │ │ Name of part │ │ │ 1 │ +│ _part_index │ UInt64 │ │ │ Sequential index of the part in the query result │ │ │ 1 │ +│ _part_uuid │ UUID │ │ │ Unique part identifier (if enabled MergeTree setting assign_part_uuids) │ │ │ 1 │ +│ _partition_id │ LowCardinality(String) │ │ │ Name of partition │ │ │ 1 │ +│ _sample_factor │ Float64 │ │ │ Sample factor (from the query) │ │ │ 1 │ +│ _part_offset │ UInt64 │ │ │ Number of row in the part │ │ │ 1 │ +│ _part_data_version │ UInt64 │ │ │ Data version of part (either min block number or mutation version) │ │ │ 1 │ +│ _row_exists │ UInt8 │ │ │ Persisted mask created by lightweight delete that show whether row exists or is deleted │ │ │ 1 │ +│ _block_number │ UInt64 │ │ │ Persisted original number of block that was assigned at insert │ Delta, LZ4 │ │ 1 │ +│ _block_offset │ UInt64 │ │ │ Persisted original number of row in block that was assigned at insert │ Delta, LZ4 │ │ 1 │ +└────────────────────┴───────────────────────────┴──────────────┴────────────────────┴─────────────────────────────────────────────────────────────────────────────────────────┴──────────────────┴────────────────┴────────────┘ DESCRIBE remote(default, currentDatabase(), t_describe_options) FORMAT PrettyCompactNoEscapes; -┌─name───────────┬─type──────────────────────┬─default_type─┬─default_expression─┬─comment─────────────────────────────────────────────────────────────────────────────────┬─codec_expression─┬─ttl_expression─┬─is_virtual─┐ -│ id │ UInt64 │ │ │ index column │ │ │ 0 │ -│ arr │ Array(UInt64) │ DEFAULT │ [10, 20] │ │ ZSTD(1) │ │ 0 │ -│ t │ Tuple(a String, b UInt64) │ DEFAULT │ ('foo', 0) │ │ ZSTD(1) │ │ 0 │ -│ _part │ LowCardinality(String) │ │ │ Name of part │ │ │ 1 │ -│ _part_index │ UInt64 │ │ │ Sequential index of the part in the query result │ │ │ 1 │ -│ _part_uuid │ UUID │ │ │ Unique part identifier (if enabled MergeTree setting assign_part_uuids) │ │ │ 1 │ -│ _partition_id │ LowCardinality(String) │ │ │ Name of partition │ │ │ 1 │ -│ _sample_factor │ Float64 │ │ │ Sample factor (from the query) │ │ │ 1 │ -│ _part_offset │ UInt64 │ │ │ Number of row in the part │ │ │ 1 │ -│ _row_exists │ UInt8 │ │ │ Persisted mask created by lightweight delete that show whether row exists or is deleted │ │ │ 1 │ -│ _block_number │ UInt64 │ │ │ Persisted original number of block that was assigned at insert │ Delta, LZ4 │ │ 1 │ -│ _shard_num │ UInt32 │ │ │ Deprecated. Use function shardNum instead │ │ │ 1 │ -└────────────────┴───────────────────────────┴──────────────┴────────────────────┴─────────────────────────────────────────────────────────────────────────────────────────┴──────────────────┴────────────────┴────────────┘ +┌─name───────────────┬─type──────────────────────┬─default_type─┬─default_expression─┬─comment─────────────────────────────────────────────────────────────────────────────────┬─codec_expression─┬─ttl_expression─┬─is_virtual─┐ +│ id │ UInt64 │ │ │ index column │ │ │ 0 │ +│ arr │ Array(UInt64) │ DEFAULT │ [10, 20] │ │ ZSTD(1) │ │ 0 │ +│ t │ Tuple(a String, b UInt64) │ DEFAULT │ ('foo', 0) │ │ ZSTD(1) │ │ 0 │ +│ _part │ LowCardinality(String) │ │ │ Name of part │ │ │ 1 │ +│ _part_index │ UInt64 │ │ │ Sequential index of the part in the query result │ │ │ 1 │ +│ _part_uuid │ UUID │ │ │ Unique part identifier (if enabled MergeTree setting assign_part_uuids) │ │ │ 1 │ +│ _partition_id │ LowCardinality(String) │ │ │ Name of partition │ │ │ 1 │ +│ _sample_factor │ Float64 │ │ │ Sample factor (from the query) │ │ │ 1 │ +│ _part_offset │ UInt64 │ │ │ Number of row in the part │ │ │ 1 │ +│ _part_data_version │ UInt64 │ │ │ Data version of part (either min block number or mutation version) │ │ │ 1 │ +│ _row_exists │ UInt8 │ │ │ Persisted mask created by lightweight delete that show whether row exists or is deleted │ │ │ 1 │ +│ _block_number │ UInt64 │ │ │ Persisted original number of block that was assigned at insert │ Delta, LZ4 │ │ 1 │ +│ _block_offset │ UInt64 │ │ │ Persisted original number of row in block that was assigned at insert │ Delta, LZ4 │ │ 1 │ +│ _shard_num │ UInt32 │ │ │ Deprecated. Use function shardNum instead │ │ │ 1 │ +└────────────────────┴───────────────────────────┴──────────────┴────────────────────┴─────────────────────────────────────────────────────────────────────────────────────────┴──────────────────┴────────────────┴────────────┘ SET describe_compact_output = 0, describe_include_virtual_columns = 1, describe_include_subcolumns = 1; DESCRIBE TABLE t_describe_options FORMAT PrettyCompactNoEscapes; -┌─name───────────┬─type──────────────────────┬─default_type─┬─default_expression─┬─comment─────────────────────────────────────────────────────────────────────────────────┬─codec_expression─┬─ttl_expression─┬─is_subcolumn─┬─is_virtual─┐ -│ id │ UInt64 │ │ │ index column │ │ │ 0 │ 0 │ -│ arr │ Array(UInt64) │ DEFAULT │ [10, 20] │ │ ZSTD(1) │ │ 0 │ 0 │ -│ t │ Tuple(a String, b UInt64) │ DEFAULT │ ('foo', 0) │ │ ZSTD(1) │ │ 0 │ 0 │ -│ _part │ LowCardinality(String) │ │ │ Name of part │ │ │ 0 │ 1 │ -│ _part_index │ UInt64 │ │ │ Sequential index of the part in the query result │ │ │ 0 │ 1 │ -│ _part_uuid │ UUID │ │ │ Unique part identifier (if enabled MergeTree setting assign_part_uuids) │ │ │ 0 │ 1 │ -│ _partition_id │ LowCardinality(String) │ │ │ Name of partition │ │ │ 0 │ 1 │ -│ _sample_factor │ Float64 │ │ │ Sample factor (from the query) │ │ │ 0 │ 1 │ -│ _part_offset │ UInt64 │ │ │ Number of row in the part │ │ │ 0 │ 1 │ -│ _row_exists │ UInt8 │ │ │ Persisted mask created by lightweight delete that show whether row exists or is deleted │ │ │ 0 │ 1 │ -│ _block_number │ UInt64 │ │ │ Persisted original number of block that was assigned at insert │ Delta, LZ4 │ │ 0 │ 1 │ -│ arr.size0 │ UInt64 │ │ │ │ │ │ 1 │ 0 │ -│ t.a │ String │ │ │ │ ZSTD(1) │ │ 1 │ 0 │ -│ t.b │ UInt64 │ │ │ │ ZSTD(1) │ │ 1 │ 0 │ -└────────────────┴───────────────────────────┴──────────────┴────────────────────┴─────────────────────────────────────────────────────────────────────────────────────────┴──────────────────┴────────────────┴──────────────┴────────────┘ +┌─name───────────────┬─type──────────────────────┬─default_type─┬─default_expression─┬─comment─────────────────────────────────────────────────────────────────────────────────┬─codec_expression─┬─ttl_expression─┬─is_subcolumn─┬─is_virtual─┐ +│ id │ UInt64 │ │ │ index column │ │ │ 0 │ 0 │ +│ arr │ Array(UInt64) │ DEFAULT │ [10, 20] │ │ ZSTD(1) │ │ 0 │ 0 │ +│ t │ Tuple(a String, b UInt64) │ DEFAULT │ ('foo', 0) │ │ ZSTD(1) │ │ 0 │ 0 │ +│ _part │ LowCardinality(String) │ │ │ Name of part │ │ │ 0 │ 1 │ +│ _part_index │ UInt64 │ │ │ Sequential index of the part in the query result │ │ │ 0 │ 1 │ +│ _part_uuid │ UUID │ │ │ Unique part identifier (if enabled MergeTree setting assign_part_uuids) │ │ │ 0 │ 1 │ +│ _partition_id │ LowCardinality(String) │ │ │ Name of partition │ │ │ 0 │ 1 │ +│ _sample_factor │ Float64 │ │ │ Sample factor (from the query) │ │ │ 0 │ 1 │ +│ _part_offset │ UInt64 │ │ │ Number of row in the part │ │ │ 0 │ 1 │ +│ _part_data_version │ UInt64 │ │ │ Data version of part (either min block number or mutation version) │ │ │ 0 │ 1 │ +│ _row_exists │ UInt8 │ │ │ Persisted mask created by lightweight delete that show whether row exists or is deleted │ │ │ 0 │ 1 │ +│ _block_number │ UInt64 │ │ │ Persisted original number of block that was assigned at insert │ Delta, LZ4 │ │ 0 │ 1 │ +│ _block_offset │ UInt64 │ │ │ Persisted original number of row in block that was assigned at insert │ Delta, LZ4 │ │ 0 │ 1 │ +│ arr.size0 │ UInt64 │ │ │ │ │ │ 1 │ 0 │ +│ t.a │ String │ │ │ │ ZSTD(1) │ │ 1 │ 0 │ +│ t.b │ UInt64 │ │ │ │ ZSTD(1) │ │ 1 │ 0 │ +└────────────────────┴───────────────────────────┴──────────────┴────────────────────┴─────────────────────────────────────────────────────────────────────────────────────────┴──────────────────┴────────────────┴──────────────┴────────────┘ DESCRIBE remote(default, currentDatabase(), t_describe_options) FORMAT PrettyCompactNoEscapes; -┌─name───────────┬─type──────────────────────┬─default_type─┬─default_expression─┬─comment─────────────────────────────────────────────────────────────────────────────────┬─codec_expression─┬─ttl_expression─┬─is_subcolumn─┬─is_virtual─┐ -│ id │ UInt64 │ │ │ index column │ │ │ 0 │ 0 │ -│ arr │ Array(UInt64) │ DEFAULT │ [10, 20] │ │ ZSTD(1) │ │ 0 │ 0 │ -│ t │ Tuple(a String, b UInt64) │ DEFAULT │ ('foo', 0) │ │ ZSTD(1) │ │ 0 │ 0 │ -│ _part │ LowCardinality(String) │ │ │ Name of part │ │ │ 0 │ 1 │ -│ _part_index │ UInt64 │ │ │ Sequential index of the part in the query result │ │ │ 0 │ 1 │ -│ _part_uuid │ UUID │ │ │ Unique part identifier (if enabled MergeTree setting assign_part_uuids) │ │ │ 0 │ 1 │ -│ _partition_id │ LowCardinality(String) │ │ │ Name of partition │ │ │ 0 │ 1 │ -│ _sample_factor │ Float64 │ │ │ Sample factor (from the query) │ │ │ 0 │ 1 │ -│ _part_offset │ UInt64 │ │ │ Number of row in the part │ │ │ 0 │ 1 │ -│ _row_exists │ UInt8 │ │ │ Persisted mask created by lightweight delete that show whether row exists or is deleted │ │ │ 0 │ 1 │ -│ _block_number │ UInt64 │ │ │ Persisted original number of block that was assigned at insert │ Delta, LZ4 │ │ 0 │ 1 │ -│ _shard_num │ UInt32 │ │ │ Deprecated. Use function shardNum instead │ │ │ 0 │ 1 │ -│ arr.size0 │ UInt64 │ │ │ │ │ │ 1 │ 0 │ -│ t.a │ String │ │ │ │ ZSTD(1) │ │ 1 │ 0 │ -│ t.b │ UInt64 │ │ │ │ ZSTD(1) │ │ 1 │ 0 │ -└────────────────┴───────────────────────────┴──────────────┴────────────────────┴─────────────────────────────────────────────────────────────────────────────────────────┴──────────────────┴────────────────┴──────────────┴────────────┘ +┌─name───────────────┬─type──────────────────────┬─default_type─┬─default_expression─┬─comment─────────────────────────────────────────────────────────────────────────────────┬─codec_expression─┬─ttl_expression─┬─is_subcolumn─┬─is_virtual─┐ +│ id │ UInt64 │ │ │ index column │ │ │ 0 │ 0 │ +│ arr │ Array(UInt64) │ DEFAULT │ [10, 20] │ │ ZSTD(1) │ │ 0 │ 0 │ +│ t │ Tuple(a String, b UInt64) │ DEFAULT │ ('foo', 0) │ │ ZSTD(1) │ │ 0 │ 0 │ +│ _part │ LowCardinality(String) │ │ │ Name of part │ │ │ 0 │ 1 │ +│ _part_index │ UInt64 │ │ │ Sequential index of the part in the query result │ │ │ 0 │ 1 │ +│ _part_uuid │ UUID │ │ │ Unique part identifier (if enabled MergeTree setting assign_part_uuids) │ │ │ 0 │ 1 │ +│ _partition_id │ LowCardinality(String) │ │ │ Name of partition │ │ │ 0 │ 1 │ +│ _sample_factor │ Float64 │ │ │ Sample factor (from the query) │ │ │ 0 │ 1 │ +│ _part_offset │ UInt64 │ │ │ Number of row in the part │ │ │ 0 │ 1 │ +│ _part_data_version │ UInt64 │ │ │ Data version of part (either min block number or mutation version) │ │ │ 0 │ 1 │ +│ _row_exists │ UInt8 │ │ │ Persisted mask created by lightweight delete that show whether row exists or is deleted │ │ │ 0 │ 1 │ +│ _block_number │ UInt64 │ │ │ Persisted original number of block that was assigned at insert │ Delta, LZ4 │ │ 0 │ 1 │ +│ _block_offset │ UInt64 │ │ │ Persisted original number of row in block that was assigned at insert │ Delta, LZ4 │ │ 0 │ 1 │ +│ _shard_num │ UInt32 │ │ │ Deprecated. Use function shardNum instead │ │ │ 0 │ 1 │ +│ arr.size0 │ UInt64 │ │ │ │ │ │ 1 │ 0 │ +│ t.a │ String │ │ │ │ ZSTD(1) │ │ 1 │ 0 │ +│ t.b │ UInt64 │ │ │ │ ZSTD(1) │ │ 1 │ 0 │ +└────────────────────┴───────────────────────────┴──────────────┴────────────────────┴─────────────────────────────────────────────────────────────────────────────────────────┴──────────────────┴────────────────┴──────────────┴────────────┘ SET describe_compact_output = 1, describe_include_virtual_columns = 0, describe_include_subcolumns = 0; DESCRIBE TABLE t_describe_options FORMAT PrettyCompactNoEscapes; ┌─name─┬─type──────────────────────┐ @@ -132,67 +140,75 @@ DESCRIBE remote(default, currentDatabase(), t_describe_options) FORMAT PrettyCom └───────────┴───────────────────────────┴──────────────┘ SET describe_compact_output = 1, describe_include_virtual_columns = 1, describe_include_subcolumns = 0; DESCRIBE TABLE t_describe_options FORMAT PrettyCompactNoEscapes; -┌─name───────────┬─type──────────────────────┬─is_virtual─┐ -│ id │ UInt64 │ 0 │ -│ arr │ Array(UInt64) │ 0 │ -│ t │ Tuple(a String, b UInt64) │ 0 │ -│ _part │ LowCardinality(String) │ 1 │ -│ _part_index │ UInt64 │ 1 │ -│ _part_uuid │ UUID │ 1 │ -│ _partition_id │ LowCardinality(String) │ 1 │ -│ _sample_factor │ Float64 │ 1 │ -│ _part_offset │ UInt64 │ 1 │ -│ _row_exists │ UInt8 │ 1 │ -│ _block_number │ UInt64 │ 1 │ -└────────────────┴───────────────────────────┴────────────┘ +┌─name───────────────┬─type──────────────────────┬─is_virtual─┐ +│ id │ UInt64 │ 0 │ +│ arr │ Array(UInt64) │ 0 │ +│ t │ Tuple(a String, b UInt64) │ 0 │ +│ _part │ LowCardinality(String) │ 1 │ +│ _part_index │ UInt64 │ 1 │ +│ _part_uuid │ UUID │ 1 │ +│ _partition_id │ LowCardinality(String) │ 1 │ +│ _sample_factor │ Float64 │ 1 │ +│ _part_offset │ UInt64 │ 1 │ +│ _part_data_version │ UInt64 │ 1 │ +│ _row_exists │ UInt8 │ 1 │ +│ _block_number │ UInt64 │ 1 │ +│ _block_offset │ UInt64 │ 1 │ +└────────────────────┴───────────────────────────┴────────────┘ DESCRIBE remote(default, currentDatabase(), t_describe_options) FORMAT PrettyCompactNoEscapes; -┌─name───────────┬─type──────────────────────┬─is_virtual─┐ -│ id │ UInt64 │ 0 │ -│ arr │ Array(UInt64) │ 0 │ -│ t │ Tuple(a String, b UInt64) │ 0 │ -│ _part │ LowCardinality(String) │ 1 │ -│ _part_index │ UInt64 │ 1 │ -│ _part_uuid │ UUID │ 1 │ -│ _partition_id │ LowCardinality(String) │ 1 │ -│ _sample_factor │ Float64 │ 1 │ -│ _part_offset │ UInt64 │ 1 │ -│ _row_exists │ UInt8 │ 1 │ -│ _block_number │ UInt64 │ 1 │ -│ _shard_num │ UInt32 │ 1 │ -└────────────────┴───────────────────────────┴────────────┘ +┌─name───────────────┬─type──────────────────────┬─is_virtual─┐ +│ id │ UInt64 │ 0 │ +│ arr │ Array(UInt64) │ 0 │ +│ t │ Tuple(a String, b UInt64) │ 0 │ +│ _part │ LowCardinality(String) │ 1 │ +│ _part_index │ UInt64 │ 1 │ +│ _part_uuid │ UUID │ 1 │ +│ _partition_id │ LowCardinality(String) │ 1 │ +│ _sample_factor │ Float64 │ 1 │ +│ _part_offset │ UInt64 │ 1 │ +│ _part_data_version │ UInt64 │ 1 │ +│ _row_exists │ UInt8 │ 1 │ +│ _block_number │ UInt64 │ 1 │ +│ _block_offset │ UInt64 │ 1 │ +│ _shard_num │ UInt32 │ 1 │ +└────────────────────┴───────────────────────────┴────────────┘ SET describe_compact_output = 1, describe_include_virtual_columns = 1, describe_include_subcolumns = 1; DESCRIBE TABLE t_describe_options FORMAT PrettyCompactNoEscapes; -┌─name───────────┬─type──────────────────────┬─is_subcolumn─┬─is_virtual─┐ -│ id │ UInt64 │ 0 │ 0 │ -│ arr │ Array(UInt64) │ 0 │ 0 │ -│ t │ Tuple(a String, b UInt64) │ 0 │ 0 │ -│ _part │ LowCardinality(String) │ 0 │ 1 │ -│ _part_index │ UInt64 │ 0 │ 1 │ -│ _part_uuid │ UUID │ 0 │ 1 │ -│ _partition_id │ LowCardinality(String) │ 0 │ 1 │ -│ _sample_factor │ Float64 │ 0 │ 1 │ -│ _part_offset │ UInt64 │ 0 │ 1 │ -│ _row_exists │ UInt8 │ 0 │ 1 │ -│ _block_number │ UInt64 │ 0 │ 1 │ -│ arr.size0 │ UInt64 │ 1 │ 0 │ -│ t.a │ String │ 1 │ 0 │ -│ t.b │ UInt64 │ 1 │ 0 │ -└────────────────┴───────────────────────────┴──────────────┴────────────┘ +┌─name───────────────┬─type──────────────────────┬─is_subcolumn─┬─is_virtual─┐ +│ id │ UInt64 │ 0 │ 0 │ +│ arr │ Array(UInt64) │ 0 │ 0 │ +│ t │ Tuple(a String, b UInt64) │ 0 │ 0 │ +│ _part │ LowCardinality(String) │ 0 │ 1 │ +│ _part_index │ UInt64 │ 0 │ 1 │ +│ _part_uuid │ UUID │ 0 │ 1 │ +│ _partition_id │ LowCardinality(String) │ 0 │ 1 │ +│ _sample_factor │ Float64 │ 0 │ 1 │ +│ _part_offset │ UInt64 │ 0 │ 1 │ +│ _part_data_version │ UInt64 │ 0 │ 1 │ +│ _row_exists │ UInt8 │ 0 │ 1 │ +│ _block_number │ UInt64 │ 0 │ 1 │ +│ _block_offset │ UInt64 │ 0 │ 1 │ +│ arr.size0 │ UInt64 │ 1 │ 0 │ +│ t.a │ String │ 1 │ 0 │ +│ t.b │ UInt64 │ 1 │ 0 │ +└────────────────────┴───────────────────────────┴──────────────┴────────────┘ DESCRIBE remote(default, currentDatabase(), t_describe_options) FORMAT PrettyCompactNoEscapes; -┌─name───────────┬─type──────────────────────┬─is_subcolumn─┬─is_virtual─┐ -│ id │ UInt64 │ 0 │ 0 │ -│ arr │ Array(UInt64) │ 0 │ 0 │ -│ t │ Tuple(a String, b UInt64) │ 0 │ 0 │ -│ _part │ LowCardinality(String) │ 0 │ 1 │ -│ _part_index │ UInt64 │ 0 │ 1 │ -│ _part_uuid │ UUID │ 0 │ 1 │ -│ _partition_id │ LowCardinality(String) │ 0 │ 1 │ -│ _sample_factor │ Float64 │ 0 │ 1 │ -│ _part_offset │ UInt64 │ 0 │ 1 │ -│ _row_exists │ UInt8 │ 0 │ 1 │ -│ _block_number │ UInt64 │ 0 │ 1 │ -│ _shard_num │ UInt32 │ 0 │ 1 │ -│ arr.size0 │ UInt64 │ 1 │ 0 │ -│ t.a │ String │ 1 │ 0 │ -│ t.b │ UInt64 │ 1 │ 0 │ -└────────────────┴───────────────────────────┴──────────────┴────────────┘ +┌─name───────────────┬─type──────────────────────┬─is_subcolumn─┬─is_virtual─┐ +│ id │ UInt64 │ 0 │ 0 │ +│ arr │ Array(UInt64) │ 0 │ 0 │ +│ t │ Tuple(a String, b UInt64) │ 0 │ 0 │ +│ _part │ LowCardinality(String) │ 0 │ 1 │ +│ _part_index │ UInt64 │ 0 │ 1 │ +│ _part_uuid │ UUID │ 0 │ 1 │ +│ _partition_id │ LowCardinality(String) │ 0 │ 1 │ +│ _sample_factor │ Float64 │ 0 │ 1 │ +│ _part_offset │ UInt64 │ 0 │ 1 │ +│ _part_data_version │ UInt64 │ 0 │ 1 │ +│ _row_exists │ UInt8 │ 0 │ 1 │ +│ _block_number │ UInt64 │ 0 │ 1 │ +│ _block_offset │ UInt64 │ 0 │ 1 │ +│ _shard_num │ UInt32 │ 0 │ 1 │ +│ arr.size0 │ UInt64 │ 1 │ 0 │ +│ t.a │ String │ 1 │ 0 │ +│ t.b │ UInt64 │ 1 │ 0 │ +└────────────────────┴───────────────────────────┴──────────────┴────────────┘ From 5608005b34515cc1b12b6043377cb2c78683c07b Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Mon, 11 Mar 2024 21:22:45 +0100 Subject: [PATCH 014/150] Revert "Revert "Don't allow to set max_parallel_replicas to 0 as it doesn't make sense"" --- src/Client/ConnectionPoolWithFailover.cpp | 10 ++++++++++ src/Client/HedgedConnectionsFactory.cpp | 6 +++++- src/Client/HedgedConnectionsFactory.h | 2 +- src/Interpreters/InterpreterSelectQuery.cpp | 2 +- src/Planner/PlannerJoinTree.cpp | 4 ++-- .../03001_max_parallel_replicas_zero_value.reference | 0 .../03001_max_parallel_replicas_zero_value.sql | 5 +++++ 7 files changed, 24 insertions(+), 5 deletions(-) create mode 100644 tests/queries/0_stateless/03001_max_parallel_replicas_zero_value.reference create mode 100644 tests/queries/0_stateless/03001_max_parallel_replicas_zero_value.sql diff --git a/src/Client/ConnectionPoolWithFailover.cpp b/src/Client/ConnectionPoolWithFailover.cpp index 492fd4ae9e2..ad8ed0067d8 100644 --- a/src/Client/ConnectionPoolWithFailover.cpp +++ b/src/Client/ConnectionPoolWithFailover.cpp @@ -21,6 +21,7 @@ namespace ErrorCodes { extern const int LOGICAL_ERROR; extern const int ALL_CONNECTION_TRIES_FAILED; + extern const int BAD_ARGUMENTS; } @@ -191,11 +192,20 @@ std::vector ConnectionPoolWithFailover::g max_entries = nested_pools.size(); } else if (pool_mode == PoolMode::GET_ONE) + { max_entries = 1; + } else if (pool_mode == PoolMode::GET_MANY) + { + if (settings.max_parallel_replicas == 0) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "The value of the setting max_parallel_replicas must be greater than 0"); + max_entries = settings.max_parallel_replicas; + } else + { throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Unknown pool allocation mode"); + } if (!priority_func) priority_func = makeGetPriorityFunc(settings); diff --git a/src/Client/HedgedConnectionsFactory.cpp b/src/Client/HedgedConnectionsFactory.cpp index f5b074a0257..703cc1f8821 100644 --- a/src/Client/HedgedConnectionsFactory.cpp +++ b/src/Client/HedgedConnectionsFactory.cpp @@ -19,6 +19,7 @@ namespace ErrorCodes extern const int ALL_CONNECTION_TRIES_FAILED; extern const int ALL_REPLICAS_ARE_STALE; extern const int LOGICAL_ERROR; + extern const int BAD_ARGUMENTS; } HedgedConnectionsFactory::HedgedConnectionsFactory( @@ -82,7 +83,10 @@ std::vector HedgedConnectionsFactory::getManyConnections(PoolMode } case PoolMode::GET_MANY: { - max_entries = max_parallel_replicas; + if (max_parallel_replicas == 0) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "The value of the setting max_parallel_replicas must be greater than 0"); + + max_entries = std::min(max_parallel_replicas, shuffled_pools.size()); break; } } diff --git a/src/Client/HedgedConnectionsFactory.h b/src/Client/HedgedConnectionsFactory.h index ce7b553acdd..dd600d58e1e 100644 --- a/src/Client/HedgedConnectionsFactory.h +++ b/src/Client/HedgedConnectionsFactory.h @@ -158,7 +158,7 @@ private: /// checking the number of requested replicas that are still in process). size_t requested_connections_count = 0; - const size_t max_parallel_replicas = 0; + const size_t max_parallel_replicas = 1; const bool skip_unavailable_shards = 0; }; diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index bcedba7346d..e28d8366aa7 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -947,7 +947,7 @@ bool InterpreterSelectQuery::adjustParallelReplicasAfterAnalysis() if (number_of_replicas_to_use <= 1) { context->setSetting("allow_experimental_parallel_reading_from_replicas", Field(0)); - context->setSetting("max_parallel_replicas", UInt64{0}); + context->setSetting("max_parallel_replicas", UInt64{1}); LOG_DEBUG(log, "Disabling parallel replicas because there aren't enough rows to read"); return true; } diff --git a/src/Planner/PlannerJoinTree.cpp b/src/Planner/PlannerJoinTree.cpp index 7b3fb0c5c91..0fe943e0bc7 100644 --- a/src/Planner/PlannerJoinTree.cpp +++ b/src/Planner/PlannerJoinTree.cpp @@ -295,7 +295,7 @@ bool applyTrivialCountIfPossible( /// The query could use trivial count if it didn't use parallel replicas, so let's disable it query_context->setSetting("allow_experimental_parallel_reading_from_replicas", Field(0)); - query_context->setSetting("max_parallel_replicas", UInt64{0}); + query_context->setSetting("max_parallel_replicas", UInt64{1}); LOG_TRACE(getLogger("Planner"), "Disabling parallel replicas to be able to use a trivial count optimization"); } @@ -756,7 +756,7 @@ JoinTreeQueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expres { planner_context->getMutableQueryContext()->setSetting( "allow_experimental_parallel_reading_from_replicas", Field(0)); - planner_context->getMutableQueryContext()->setSetting("max_parallel_replicas", UInt64{0}); + planner_context->getMutableQueryContext()->setSetting("max_parallel_replicas", UInt64{1}); LOG_DEBUG(getLogger("Planner"), "Disabling parallel replicas because there aren't enough rows to read"); } else if (number_of_replicas_to_use < settings.max_parallel_replicas) diff --git a/tests/queries/0_stateless/03001_max_parallel_replicas_zero_value.reference b/tests/queries/0_stateless/03001_max_parallel_replicas_zero_value.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/03001_max_parallel_replicas_zero_value.sql b/tests/queries/0_stateless/03001_max_parallel_replicas_zero_value.sql new file mode 100644 index 00000000000..611aa4777ba --- /dev/null +++ b/tests/queries/0_stateless/03001_max_parallel_replicas_zero_value.sql @@ -0,0 +1,5 @@ +drop table if exists test_d; +create table test_d engine=Distributed(test_cluster_two_shard_three_replicas_localhost, system, numbers); +select * from test_d limit 10 settings max_parallel_replicas = 0, prefer_localhost_replica = 0; --{serverError BAD_ARGUMENTS} +drop table test_d; + From dd6599868adb6cbc3306a5946cae4ee3f833c138 Mon Sep 17 00:00:00 2001 From: avogar Date: Tue, 12 Mar 2024 12:06:25 +0000 Subject: [PATCH 015/150] Better check for 0 setting value --- src/Client/ConnectionPoolWithFailover.cpp | 3 -- src/Client/HedgedConnectionsFactory.cpp | 3 -- src/Core/Settings.h | 2 +- src/Core/SettingsFields.cpp | 36 +++++++++++++++++++++++ src/Core/SettingsFields.h | 15 ++++++++++ 5 files changed, 52 insertions(+), 7 deletions(-) diff --git a/src/Client/ConnectionPoolWithFailover.cpp b/src/Client/ConnectionPoolWithFailover.cpp index ad8ed0067d8..94531f58bc6 100644 --- a/src/Client/ConnectionPoolWithFailover.cpp +++ b/src/Client/ConnectionPoolWithFailover.cpp @@ -197,9 +197,6 @@ std::vector ConnectionPoolWithFailover::g } else if (pool_mode == PoolMode::GET_MANY) { - if (settings.max_parallel_replicas == 0) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "The value of the setting max_parallel_replicas must be greater than 0"); - max_entries = settings.max_parallel_replicas; } else diff --git a/src/Client/HedgedConnectionsFactory.cpp b/src/Client/HedgedConnectionsFactory.cpp index 703cc1f8821..6b22cc18674 100644 --- a/src/Client/HedgedConnectionsFactory.cpp +++ b/src/Client/HedgedConnectionsFactory.cpp @@ -83,9 +83,6 @@ std::vector HedgedConnectionsFactory::getManyConnections(PoolMode } case PoolMode::GET_MANY: { - if (max_parallel_replicas == 0) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "The value of the setting max_parallel_replicas must be greater than 0"); - max_entries = std::min(max_parallel_replicas, shuffled_pools.size()); break; } diff --git a/src/Core/Settings.h b/src/Core/Settings.h index d70a6cf51c5..b23538cf209 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -178,7 +178,7 @@ class IColumn; \ M(Bool, group_by_use_nulls, false, "Treat columns mentioned in ROLLUP, CUBE or GROUPING SETS as Nullable", 0) \ \ - M(UInt64, max_parallel_replicas, 1, "The maximum number of replicas of each shard used when the query is executed. For consistency (to get different parts of the same partition), this option only works for the specified sampling key. The lag of the replicas is not controlled.", 0) \ + M(NonZeroUInt64, max_parallel_replicas, 1, "The maximum number of replicas of each shard used when the query is executed. For consistency (to get different parts of the same partition), this option only works for the specified sampling key. The lag of the replicas is not controlled. Should be always greater than 0", 0) \ M(UInt64, parallel_replicas_count, 0, "This is internal setting that should not be used directly and represents an implementation detail of the 'parallel replicas' mode. This setting will be automatically set up by the initiator server for distributed queries to the number of parallel replicas participating in query processing.", 0) \ M(UInt64, parallel_replica_offset, 0, "This is internal setting that should not be used directly and represents an implementation detail of the 'parallel replicas' mode. This setting will be automatically set up by the initiator server for distributed queries to the index of the replica participating in query processing among parallel replicas.", 0) \ M(String, parallel_replicas_custom_key, "", "Custom key assigning work to replicas when parallel replicas are used.", 0) \ diff --git a/src/Core/SettingsFields.cpp b/src/Core/SettingsFields.cpp index 001d3e09dc9..caa8b3fdffd 100644 --- a/src/Core/SettingsFields.cpp +++ b/src/Core/SettingsFields.cpp @@ -575,4 +575,40 @@ void SettingFieldCustom::readBinary(ReadBuffer & in) parseFromString(str); } +SettingFieldNonZeroUInt64::SettingFieldNonZeroUInt64(UInt64 x) : SettingFieldUInt64(x) +{ + checkValueNonZero(); +} + +SettingFieldNonZeroUInt64::SettingFieldNonZeroUInt64(const DB::Field & f) : SettingFieldUInt64(f) +{ + checkValueNonZero(); +} + +SettingFieldNonZeroUInt64 & SettingFieldNonZeroUInt64::operator=(UInt64 x) +{ + SettingFieldUInt64::operator=(x); + checkValueNonZero(); + return *this; +} + +SettingFieldNonZeroUInt64 & SettingFieldNonZeroUInt64::operator=(const DB::Field & f) +{ + SettingFieldUInt64::operator=(f); + checkValueNonZero(); + return *this; +} + +void SettingFieldNonZeroUInt64::parseFromString(const String & str) +{ + SettingFieldUInt64::parseFromString(str); + checkValueNonZero(); +} + +void SettingFieldNonZeroUInt64::checkValueNonZero() const +{ + if (value == 0) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "A setting's value has to be greater than 0"); +} + } diff --git a/src/Core/SettingsFields.h b/src/Core/SettingsFields.h index 452f3f149ab..dc70d468851 100644 --- a/src/Core/SettingsFields.h +++ b/src/Core/SettingsFields.h @@ -627,4 +627,19 @@ struct SettingFieldCustom void readBinary(ReadBuffer & in); }; +struct SettingFieldNonZeroUInt64 : public SettingFieldUInt64 +{ +public: + explicit SettingFieldNonZeroUInt64(UInt64 x = 1); + explicit SettingFieldNonZeroUInt64(const Field & f); + + SettingFieldNonZeroUInt64 & operator=(UInt64 x); + SettingFieldNonZeroUInt64 & operator=(const Field & f); + + void parseFromString(const String & str); + +private: + void checkValueNonZero() const; +}; + } From a065231ca15684b7ebd0c1359ede037a46c6d450 Mon Sep 17 00:00:00 2001 From: avogar Date: Tue, 12 Mar 2024 12:07:36 +0000 Subject: [PATCH 016/150] Remove unused error code --- src/Client/ConnectionPoolWithFailover.cpp | 1 - src/Client/HedgedConnectionsFactory.cpp | 1 - 2 files changed, 2 deletions(-) diff --git a/src/Client/ConnectionPoolWithFailover.cpp b/src/Client/ConnectionPoolWithFailover.cpp index 94531f58bc6..0724153b277 100644 --- a/src/Client/ConnectionPoolWithFailover.cpp +++ b/src/Client/ConnectionPoolWithFailover.cpp @@ -21,7 +21,6 @@ namespace ErrorCodes { extern const int LOGICAL_ERROR; extern const int ALL_CONNECTION_TRIES_FAILED; - extern const int BAD_ARGUMENTS; } diff --git a/src/Client/HedgedConnectionsFactory.cpp b/src/Client/HedgedConnectionsFactory.cpp index 6b22cc18674..0fa2bc12924 100644 --- a/src/Client/HedgedConnectionsFactory.cpp +++ b/src/Client/HedgedConnectionsFactory.cpp @@ -19,7 +19,6 @@ namespace ErrorCodes extern const int ALL_CONNECTION_TRIES_FAILED; extern const int ALL_REPLICAS_ARE_STALE; extern const int LOGICAL_ERROR; - extern const int BAD_ARGUMENTS; } HedgedConnectionsFactory::HedgedConnectionsFactory( From d17212616c46f5e18c0719bedc84b1bd91e05d41 Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Wed, 13 Mar 2024 17:26:45 +0800 Subject: [PATCH 017/150] add perf tests --- tests/performance/function_tokens.xml | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 tests/performance/function_tokens.xml diff --git a/tests/performance/function_tokens.xml b/tests/performance/function_tokens.xml new file mode 100644 index 00000000000..63b72f83df3 --- /dev/null +++ b/tests/performance/function_tokens.xml @@ -0,0 +1,3 @@ + + with 'Many years later as he faced the firing squad, Colonel Aureliano Buendia was to remember that distant afternoon when his father took him to discover ice.' as s select splitByChar(' ', materialize(s)) as w from numbers(1000000) + From 32410a68c136570cc19f0115a6b752f1d4cf93aa Mon Sep 17 00:00:00 2001 From: avogar Date: Wed, 13 Mar 2024 18:00:57 +0000 Subject: [PATCH 018/150] Fix tests --- src/Interpreters/InterpreterSelectQuery.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index e28d8366aa7..22bbfc04401 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -800,7 +800,7 @@ InterpreterSelectQuery::InterpreterSelectQuery( != parallel_replicas_before_analysis) { context->setSetting("allow_experimental_parallel_reading_from_replicas", Field(0)); - context->setSetting("max_parallel_replicas", UInt64{0}); + context->setSetting("max_parallel_replicas", UInt64{1}); need_analyze_again = true; } From 5020741b253f66967ef84707c98a2fd201d5080d Mon Sep 17 00:00:00 2001 From: Austin Kothig Date: Thu, 14 Mar 2024 14:34:45 -0700 Subject: [PATCH 019/150] Add support for nanosecond level precision. --- .../functions/date-time-functions.md | 4 +- .../functions/date-time-functions.md | 4 +- .../functions/date-time-functions.md | 1 + src/Functions/DateTimeTransforms.h | 38 +++++++++------- src/Functions/dateDiff.cpp | 43 +++++++++++++------ .../00538_datediff_plural_units.reference | 1 + .../00538_datediff_plural_units.sql | 1 + .../0_stateless/02814_age_datediff.reference | 6 ++- .../0_stateless/02814_age_datediff.sql | 3 ++ 9 files changed, 69 insertions(+), 32 deletions(-) diff --git a/docs/en/sql-reference/functions/date-time-functions.md b/docs/en/sql-reference/functions/date-time-functions.md index ba7695af3fa..46ec9f01152 100644 --- a/docs/en/sql-reference/functions/date-time-functions.md +++ b/docs/en/sql-reference/functions/date-time-functions.md @@ -1670,7 +1670,7 @@ Like [fromDaysSinceYearZero](#fromDaysSinceYearZero) but returns a [Date32](../. ## age -Returns the `unit` component of the difference between `startdate` and `enddate`. The difference is calculated using a precision of 1 microsecond. +Returns the `unit` component of the difference between `startdate` and `enddate`. The difference is calculated using a precision of 1 nanosecond. E.g. the difference between `2021-12-29` and `2022-01-01` is 3 days for `day` unit, 0 months for `month` unit, 0 years for `year` unit. For an alternative to `age`, see function `date\_diff`. @@ -1686,6 +1686,7 @@ age('unit', startdate, enddate, [timezone]) - `unit` — The type of interval for result. [String](../../sql-reference/data-types/string.md). Possible values: + - `nanosecond` `nanoseconds` `ns` - `microsecond` `microseconds` `us` `u` - `millisecond` `milliseconds` `ms` - `second` `seconds` `ss` `s` @@ -1763,6 +1764,7 @@ Aliases: `dateDiff`, `DATE_DIFF`, `timestampDiff`, `timestamp_diff`, `TIMESTAMP_ - `unit` — The type of interval for result. [String](../../sql-reference/data-types/string.md). Possible values: + - `nanosecond` `nanoseconds` `ns` - `microsecond` `microseconds` `us` `u` - `millisecond` `milliseconds` `ms` - `second` `seconds` `ss` `s` diff --git a/docs/ru/sql-reference/functions/date-time-functions.md b/docs/ru/sql-reference/functions/date-time-functions.md index cbbb456aa80..65c482e9914 100644 --- a/docs/ru/sql-reference/functions/date-time-functions.md +++ b/docs/ru/sql-reference/functions/date-time-functions.md @@ -627,7 +627,7 @@ SELECT toDate('2016-12-27') AS date, toYearWeek(date) AS yearWeek0, toYearWeek(d ## age -Вычисляет компонент `unit` разницы между `startdate` и `enddate`. Разница вычисляется с точностью в 1 микросекунду. +Вычисляет компонент `unit` разницы между `startdate` и `enddate`. Разница вычисляется с точностью в 1 наносекунда. Например, разница между `2021-12-29` и `2022-01-01` 3 дня для единицы `day`, 0 месяцев для единицы `month`, 0 лет для единицы `year`. **Синтаксис** @@ -641,6 +641,7 @@ age('unit', startdate, enddate, [timezone]) - `unit` — единица измерения времени, в которой будет выражено возвращаемое значение функции. [String](../../sql-reference/data-types/string.md). Возможные значения: + - `nanosecond` (возможные сокращения: `ns`) - `microsecond` (возможные сокращения: `us`, `u`) - `millisecond` (возможные сокращения: `ms`) - `second` (возможные сокращения: `ss`, `s`) @@ -716,6 +717,7 @@ date_diff('unit', startdate, enddate, [timezone]) - `unit` — единица измерения времени, в которой будет выражено возвращаемое значение функции. [String](../../sql-reference/data-types/string.md). Возможные значения: + - `nanosecond` (возможные сокращения: `ns`) - `microsecond` (возможные сокращения: `us`, `u`) - `millisecond` (возможные сокращения: `ms`) - `second` (возможные сокращения: `ss`, `s`) diff --git a/docs/zh/sql-reference/functions/date-time-functions.md b/docs/zh/sql-reference/functions/date-time-functions.md index e4b70322477..d6493ffe605 100644 --- a/docs/zh/sql-reference/functions/date-time-functions.md +++ b/docs/zh/sql-reference/functions/date-time-functions.md @@ -643,6 +643,7 @@ date_diff('unit', startdate, enddate, [timezone]) - `unit` — `value`对应的时间单位。类型为[String](../../sql-reference/data-types/string.md)。 可能的值: + - `nanosecond` - `microsecond` - `millisecond` - `second` diff --git a/src/Functions/DateTimeTransforms.h b/src/Functions/DateTimeTransforms.h index 70b2a7a83b4..a92680ac124 100644 --- a/src/Functions/DateTimeTransforms.h +++ b/src/Functions/DateTimeTransforms.h @@ -21,6 +21,7 @@ namespace DB { +static constexpr auto nanosecond_multiplier = 1000000000; static constexpr auto microsecond_multiplier = 1000000; static constexpr auto millisecond_multiplier = 1000; @@ -1902,9 +1903,10 @@ struct ToRelativeSubsecondNumImpl { static constexpr auto name = "toRelativeSubsecondNumImpl"; - static Int64 execute(const DateTime64 & t, DateTime64::NativeType scale, const DateLUTImpl &) + static Int64 execute(const DateTime64 & t, const DateTime64::NativeType scale, const DateLUTImpl &) { - static_assert(scale_multiplier == 1000 || scale_multiplier == 1000000); + static_assert( + scale_multiplier == millisecond_multiplier || scale_multiplier == microsecond_multiplier || scale_multiplier == nanosecond_multiplier); if (scale == scale_multiplier) return t.value; if (scale > scale_multiplier) @@ -2030,13 +2032,14 @@ struct DateTimeComponentsWithFractionalPart : public DateLUTImpl::DateTimeCompon { UInt16 millisecond; UInt16 microsecond; + UInt16 nanosecond; }; struct ToDateTimeComponentsImpl { static constexpr auto name = "toDateTimeComponents"; - static DateTimeComponentsWithFractionalPart execute(const DateTime64 & t, DateTime64::NativeType scale_multiplier, const DateLUTImpl & time_zone) + static DateTimeComponentsWithFractionalPart execute(const DateTime64 & t, const DateTime64::NativeType scale_multiplier, const DateLUTImpl & time_zone) { auto components = DecimalUtils::splitWithScaleMultiplier(t, scale_multiplier); @@ -2045,28 +2048,33 @@ struct ToDateTimeComponentsImpl components.fractional = scale_multiplier + (components.whole ? Int64(-1) : Int64(1)) * components.fractional; --components.whole; } - Int64 fractional = components.fractional; - if (scale_multiplier > microsecond_multiplier) - fractional = fractional / (scale_multiplier / microsecond_multiplier); - else if (scale_multiplier < microsecond_multiplier) - fractional = fractional * (microsecond_multiplier / scale_multiplier); - constexpr Int64 divider = microsecond_multiplier/ millisecond_multiplier; - UInt16 millisecond = static_cast(fractional / divider); - UInt16 microsecond = static_cast(fractional % divider); - return DateTimeComponentsWithFractionalPart{time_zone.toDateTimeComponents(components.whole), millisecond, microsecond}; + // Normalize the dividers between microseconds and nanoseconds w.r.t. the scale. + Int64 microsecond_divider = (millisecond_multiplier * scale_multiplier) / microsecond_multiplier; + Int64 nanosecond_divider = scale_multiplier / microsecond_multiplier; + + // Protect against division by zero for smaller scale multipliers. + microsecond_divider = (microsecond_divider ? microsecond_divider : 1); + nanosecond_divider = (nanosecond_divider ? nanosecond_divider : 1); + + const Int64 & fractional = components.fractional; + UInt16 millisecond = static_cast(fractional / microsecond_divider); + UInt16 microsecond = static_cast((fractional % microsecond_divider) / nanosecond_divider); + UInt16 nanosecond = static_cast(fractional % nanosecond_divider); + + return DateTimeComponentsWithFractionalPart{time_zone.toDateTimeComponents(components.whole), millisecond, microsecond, nanosecond}; } static DateTimeComponentsWithFractionalPart execute(UInt32 t, const DateLUTImpl & time_zone) { - return DateTimeComponentsWithFractionalPart{time_zone.toDateTimeComponents(static_cast(t)), 0, 0}; + return DateTimeComponentsWithFractionalPart{time_zone.toDateTimeComponents(static_cast(t)), 0, 0, 0}; } static DateTimeComponentsWithFractionalPart execute(Int32 d, const DateLUTImpl & time_zone) { - return DateTimeComponentsWithFractionalPart{time_zone.toDateTimeComponents(ExtendedDayNum(d)), 0, 0}; + return DateTimeComponentsWithFractionalPart{time_zone.toDateTimeComponents(ExtendedDayNum(d)), 0, 0, 0}; } static DateTimeComponentsWithFractionalPart execute(UInt16 d, const DateLUTImpl & time_zone) { - return DateTimeComponentsWithFractionalPart{time_zone.toDateTimeComponents(DayNum(d)), 0, 0}; + return DateTimeComponentsWithFractionalPart{time_zone.toDateTimeComponents(DayNum(d)), 0, 0, 0}; } using FactorTransform = ZeroTransform; diff --git a/src/Functions/dateDiff.cpp b/src/Functions/dateDiff.cpp index f75e6eb4fc8..85b569fb634 100644 --- a/src/Functions/dateDiff.cpp +++ b/src/Functions/dateDiff.cpp @@ -177,10 +177,10 @@ public: DateTimeComponentsWithFractionalPart a_comp; DateTimeComponentsWithFractionalPart b_comp; Int64 adjust_value; - auto x_microseconds = TransformDateTime64>(transform_x.getScaleMultiplier()).execute(x, timezone_x); - auto y_microseconds = TransformDateTime64>(transform_y.getScaleMultiplier()).execute(y, timezone_y); + auto x_nanoseconds = TransformDateTime64>(transform_x.getScaleMultiplier()).execute(x, timezone_x); + auto y_nanoseconds = TransformDateTime64>(transform_y.getScaleMultiplier()).execute(y, timezone_y); - if (x_microseconds <= y_microseconds) + if (x_nanoseconds <= y_nanoseconds) { a_comp = TransformDateTime64(transform_x.getScaleMultiplier()).execute(x, timezone_x); b_comp = TransformDateTime64(transform_y.getScaleMultiplier()).execute(y, timezone_y); @@ -193,7 +193,6 @@ public: adjust_value = 1; } - if constexpr (std::is_same_v>>) { if ((a_comp.date.month > b_comp.date.month) @@ -202,7 +201,8 @@ public: || ((a_comp.time.hour == b_comp.time.hour) && ((a_comp.time.minute > b_comp.time.minute) || ((a_comp.time.minute == b_comp.time.minute) && ((a_comp.time.second > b_comp.time.second) || ((a_comp.time.second == b_comp.time.second) && ((a_comp.millisecond > b_comp.millisecond) - || ((a_comp.millisecond == b_comp.millisecond) && (a_comp.microsecond > b_comp.microsecond))))))))))))) + || ((a_comp.millisecond == b_comp.millisecond) && ((a_comp.microsecond > b_comp.microsecond) + || ((a_comp.microsecond == b_comp.microsecond) && (a_comp.nanosecond > b_comp.nanosecond))))))))))))))) res += adjust_value; } else if constexpr (std::is_same_v>>) @@ -215,7 +215,8 @@ public: || ((a_comp.time.hour == b_comp.time.hour) && ((a_comp.time.minute > b_comp.time.minute) || ((a_comp.time.minute == b_comp.time.minute) && ((a_comp.time.second > b_comp.time.second) || ((a_comp.time.second == b_comp.time.second) && ((a_comp.millisecond > b_comp.millisecond) - || ((a_comp.millisecond == b_comp.millisecond) && (a_comp.microsecond > b_comp.microsecond))))))))))))) + || ((a_comp.millisecond == b_comp.millisecond) && ((a_comp.microsecond > b_comp.microsecond) + || ((a_comp.microsecond == b_comp.microsecond) && (a_comp.nanosecond > b_comp.nanosecond))))))))))))))) res += adjust_value; } else if constexpr (std::is_same_v>>) @@ -225,7 +226,8 @@ public: || ((a_comp.time.hour == b_comp.time.hour) && ((a_comp.time.minute > b_comp.time.minute) || ((a_comp.time.minute == b_comp.time.minute) && ((a_comp.time.second > b_comp.time.second) || ((a_comp.time.second == b_comp.time.second) && ((a_comp.millisecond > b_comp.millisecond) - || ((a_comp.millisecond == b_comp.millisecond) && (a_comp.microsecond > b_comp.microsecond))))))))))) + || ((a_comp.millisecond == b_comp.millisecond) && ((a_comp.microsecond > b_comp.microsecond) + || ((a_comp.microsecond == b_comp.microsecond) && (a_comp.nanosecond > b_comp.nanosecond))))))))))))) res += adjust_value; } else if constexpr (std::is_same_v>>) @@ -237,7 +239,8 @@ public: || ((a_comp.time.hour == b_comp.time.hour) && ((a_comp.time.minute > b_comp.time.minute) || ((a_comp.time.minute == b_comp.time.minute) && ((a_comp.time.second > b_comp.time.second) || ((a_comp.time.second == b_comp.time.second) && ((a_comp.millisecond > b_comp.millisecond) - || ((a_comp.millisecond == b_comp.millisecond) && (a_comp.microsecond > b_comp.microsecond))))))))) + || ((a_comp.millisecond == b_comp.millisecond) && ((a_comp.microsecond > b_comp.microsecond) + || ((a_comp.microsecond == b_comp.microsecond) && (a_comp.nanosecond > b_comp.nanosecond))))))))))) res += adjust_value; } else if constexpr (std::is_same_v>>) @@ -246,7 +249,8 @@ public: || ((a_comp.time.hour == b_comp.time.hour) && ((a_comp.time.minute > b_comp.time.minute) || ((a_comp.time.minute == b_comp.time.minute) && ((a_comp.time.second > b_comp.time.second) || ((a_comp.time.second == b_comp.time.second) && ((a_comp.millisecond > b_comp.millisecond) - || ((a_comp.millisecond == b_comp.millisecond) && (a_comp.microsecond > b_comp.microsecond))))))))) + || ((a_comp.millisecond == b_comp.millisecond) && ((a_comp.microsecond > b_comp.microsecond) + || ((a_comp.microsecond == b_comp.microsecond) && (a_comp.nanosecond > b_comp.nanosecond))))))))))) res += adjust_value; } else if constexpr (std::is_same_v>>) @@ -254,25 +258,34 @@ public: if ((a_comp.time.minute > b_comp.time.minute) || ((a_comp.time.minute == b_comp.time.minute) && ((a_comp.time.second > b_comp.time.second) || ((a_comp.time.second == b_comp.time.second) && ((a_comp.millisecond > b_comp.millisecond) - || ((a_comp.millisecond == b_comp.millisecond) && (a_comp.microsecond > b_comp.microsecond))))))) + || ((a_comp.millisecond == b_comp.millisecond) && ((a_comp.microsecond > b_comp.microsecond) + || ((a_comp.microsecond == b_comp.microsecond) && (a_comp.nanosecond > b_comp.nanosecond))))))))) res += adjust_value; } else if constexpr (std::is_same_v>>) { if ((a_comp.time.second > b_comp.time.second) || ((a_comp.time.second == b_comp.time.second) && ((a_comp.millisecond > b_comp.millisecond) - || ((a_comp.millisecond == b_comp.millisecond) && (a_comp.microsecond > b_comp.microsecond))))) + || ((a_comp.millisecond == b_comp.millisecond) && ((a_comp.microsecond > b_comp.microsecond) + || ((a_comp.microsecond == b_comp.microsecond) && (a_comp.nanosecond > b_comp.nanosecond))))))) res += adjust_value; } else if constexpr (std::is_same_v>>) { if ((a_comp.millisecond > b_comp.millisecond) - || ((a_comp.millisecond == b_comp.millisecond) && (a_comp.microsecond > b_comp.microsecond))) + || ((a_comp.millisecond == b_comp.millisecond) && ((a_comp.microsecond > b_comp.microsecond) + || ((a_comp.microsecond == b_comp.microsecond) && (a_comp.nanosecond > b_comp.nanosecond))))) res += adjust_value; } - else if constexpr (std::is_same_v>>) + else if constexpr (std::is_same_v>>) { - if (a_comp.microsecond > b_comp.microsecond) + if ((a_comp.microsecond > b_comp.microsecond) + || ((a_comp.microsecond == b_comp.microsecond) && (a_comp.nanosecond > b_comp.nanosecond))) + res += adjust_value; + } + else if constexpr (std::is_same_v>>) + { + if (a_comp.nanosecond > b_comp.nanosecond) res += adjust_value; } return res; @@ -401,6 +414,8 @@ public: impl.template dispatchForColumns>(x, y, timezone_x, timezone_y, res->getData()); else if (unit == "microsecond" || unit == "microseconds" || unit == "us" || unit == "u") impl.template dispatchForColumns>(x, y, timezone_x, timezone_y, res->getData()); + else if (unit == "nanosecond" || unit == "nanoseconds" || unit == "ns") + impl.template dispatchForColumns>(x, y, timezone_x, timezone_y, res->getData()); else throw Exception(ErrorCodes::BAD_ARGUMENTS, "Function {} does not support '{}' unit", getName(), unit); diff --git a/tests/queries/0_stateless/00538_datediff_plural_units.reference b/tests/queries/0_stateless/00538_datediff_plural_units.reference index ebe63974df8..885f8a1cfe2 100644 --- a/tests/queries/0_stateless/00538_datediff_plural_units.reference +++ b/tests/queries/0_stateless/00538_datediff_plural_units.reference @@ -8,3 +8,4 @@ -63072000 -63072000000 -63072000000000 +-63072000000000000 diff --git a/tests/queries/0_stateless/00538_datediff_plural_units.sql b/tests/queries/0_stateless/00538_datediff_plural_units.sql index d1234155a56..dd8395fc60f 100644 --- a/tests/queries/0_stateless/00538_datediff_plural_units.sql +++ b/tests/queries/0_stateless/00538_datediff_plural_units.sql @@ -8,3 +8,4 @@ SELECT dateDiff('minutes', toDateTime('2017-12-31', 'UTC'), toDateTime('2016-01- SELECT dateDiff('seconds', toDateTime('2017-12-31', 'UTC'), toDateTime('2016-01-01', 'UTC')); SELECT dateDiff('milliseconds', toDateTime('2017-12-31', 'UTC'), toDateTime('2016-01-01', 'UTC')); SELECT dateDiff('microseconds', toDateTime('2017-12-31', 'UTC'), toDateTime('2016-01-01', 'UTC')); +SELECT dateDiff('nanoseconds', toDateTime('2017-12-31', 'UTC'), toDateTime('2016-01-01', 'UTC')); diff --git a/tests/queries/0_stateless/02814_age_datediff.reference b/tests/queries/0_stateless/02814_age_datediff.reference index cbcb8c8a7b6..85dc205a499 100644 --- a/tests/queries/0_stateless/02814_age_datediff.reference +++ b/tests/queries/0_stateless/02814_age_datediff.reference @@ -1,10 +1,14 @@ -- { echo } -- DateTime64 vs DateTime64 with fractional part +SELECT age('nanosecond', toDateTime64('2015-08-18 20:30:36.100200005', 9, 'UTC'), toDateTime64('2015-08-18 20:30:41.200400005', 9, 'UTC')); +5100200000 +SELECT age('nanosecond', toDateTime64('2015-08-18 20:30:36.100200005', 9, 'UTC'), toDateTime64('2015-08-18 20:30:41.200400004', 9, 'UTC')); +5100199999 SELECT age('microsecond', toDateTime64('2015-08-18 20:30:36.100200005', 9, 'UTC'), toDateTime64('2015-08-18 20:30:41.200400005', 9, 'UTC')); 5100200 SELECT age('microsecond', toDateTime64('2015-08-18 20:30:36.100200005', 9, 'UTC'), toDateTime64('2015-08-18 20:30:41.200400004', 9, 'UTC')); -5100200 +5100199 SELECT age('millisecond', toDateTime64('2015-08-18 20:30:36.450299', 6, 'UTC'), toDateTime64('2015-08-18 20:30:41.550299', 6, 'UTC')); 5100 SELECT age('millisecond', toDateTime64('2015-08-18 20:30:36.450299', 6, 'UTC'), toDateTime64('2015-08-18 20:30:41.550298', 6, 'UTC')); diff --git a/tests/queries/0_stateless/02814_age_datediff.sql b/tests/queries/0_stateless/02814_age_datediff.sql index 934a95c035f..64e329b2fc5 100644 --- a/tests/queries/0_stateless/02814_age_datediff.sql +++ b/tests/queries/0_stateless/02814_age_datediff.sql @@ -1,6 +1,9 @@ -- { echo } -- DateTime64 vs DateTime64 with fractional part +SELECT age('nanosecond', toDateTime64('2015-08-18 20:30:36.100200005', 9, 'UTC'), toDateTime64('2015-08-18 20:30:41.200400005', 9, 'UTC')); +SELECT age('nanosecond', toDateTime64('2015-08-18 20:30:36.100200005', 9, 'UTC'), toDateTime64('2015-08-18 20:30:41.200400004', 9, 'UTC')); + SELECT age('microsecond', toDateTime64('2015-08-18 20:30:36.100200005', 9, 'UTC'), toDateTime64('2015-08-18 20:30:41.200400005', 9, 'UTC')); SELECT age('microsecond', toDateTime64('2015-08-18 20:30:36.100200005', 9, 'UTC'), toDateTime64('2015-08-18 20:30:41.200400004', 9, 'UTC')); From a05ca50b01abc21404252596f5c99a4a4d6684b1 Mon Sep 17 00:00:00 2001 From: Austin Kothig Date: Thu, 14 Mar 2024 17:48:54 -0400 Subject: [PATCH 020/150] Update docs/ru/sql-reference/functions/date-time-functions.md Co-authored-by: Ilya Yatsishin <2159081+qoega@users.noreply.github.com> --- docs/ru/sql-reference/functions/date-time-functions.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/ru/sql-reference/functions/date-time-functions.md b/docs/ru/sql-reference/functions/date-time-functions.md index 65c482e9914..56ae4359bf1 100644 --- a/docs/ru/sql-reference/functions/date-time-functions.md +++ b/docs/ru/sql-reference/functions/date-time-functions.md @@ -627,7 +627,7 @@ SELECT toDate('2016-12-27') AS date, toYearWeek(date) AS yearWeek0, toYearWeek(d ## age -Вычисляет компонент `unit` разницы между `startdate` и `enddate`. Разница вычисляется с точностью в 1 наносекунда. +Вычисляет компонент `unit` разницы между `startdate` и `enddate`. Разница вычисляется с точностью в 1 наносекунду. Например, разница между `2021-12-29` и `2022-01-01` 3 дня для единицы `day`, 0 месяцев для единицы `month`, 0 лет для единицы `year`. **Синтаксис** From 4285f1a8114084b0b7af8dd3546eae1953072915 Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Fri, 15 Mar 2024 13:50:03 +0100 Subject: [PATCH 021/150] Revert "Revert "Updated format settings references in the docs (datetime.md)"" --- docs/en/sql-reference/data-types/datetime.md | 8 ++++---- docs/ru/sql-reference/data-types/datetime.md | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/docs/en/sql-reference/data-types/datetime.md b/docs/en/sql-reference/data-types/datetime.md index 1adff18f598..a465106c2ff 100644 --- a/docs/en/sql-reference/data-types/datetime.md +++ b/docs/en/sql-reference/data-types/datetime.md @@ -36,9 +36,9 @@ You can explicitly set a time zone for `DateTime`-type columns when creating a t The [clickhouse-client](../../interfaces/cli.md) applies the server time zone by default if a time zone isn’t explicitly set when initializing the data type. To use the client time zone, run `clickhouse-client` with the `--use_client_time_zone` parameter. -ClickHouse outputs values depending on the value of the [date_time_output_format](../../operations/settings/settings.md#settings-date_time_output_format) setting. `YYYY-MM-DD hh:mm:ss` text format by default. Additionally, you can change the output with the [formatDateTime](../../sql-reference/functions/date-time-functions.md#formatdatetime) function. +ClickHouse outputs values depending on the value of the [date_time_output_format](../../operations/settings/settings-formats.md#date_time_output_format) setting. `YYYY-MM-DD hh:mm:ss` text format by default. Additionally, you can change the output with the [formatDateTime](../../sql-reference/functions/date-time-functions.md#formatdatetime) function. -When inserting data into ClickHouse, you can use different formats of date and time strings, depending on the value of the [date_time_input_format](../../operations/settings/settings.md#settings-date_time_input_format) setting. +When inserting data into ClickHouse, you can use different formats of date and time strings, depending on the value of the [date_time_input_format](../../operations/settings/settings-formats.md#date_time_input_format) setting. ## Examples @@ -147,8 +147,8 @@ Time shifts for multiple days. Some pacific islands changed their timezone offse - [Type conversion functions](../../sql-reference/functions/type-conversion-functions.md) - [Functions for working with dates and times](../../sql-reference/functions/date-time-functions.md) - [Functions for working with arrays](../../sql-reference/functions/array-functions.md) -- [The `date_time_input_format` setting](../../operations/settings/settings-formats.md#settings-date_time_input_format) -- [The `date_time_output_format` setting](../../operations/settings/settings-formats.md#settings-date_time_output_format) +- [The `date_time_input_format` setting](../../operations/settings/settings-formats.md#date_time_input_format) +- [The `date_time_output_format` setting](../../operations/settings/settings-formats.md#date_time_output_format) - [The `timezone` server configuration parameter](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) - [The `session_timezone` setting](../../operations/settings/settings.md#session_timezone) - [Operators for working with dates and times](../../sql-reference/operators/index.md#operators-datetime) diff --git a/docs/ru/sql-reference/data-types/datetime.md b/docs/ru/sql-reference/data-types/datetime.md index 57f24786bb7..25e87794147 100644 --- a/docs/ru/sql-reference/data-types/datetime.md +++ b/docs/ru/sql-reference/data-types/datetime.md @@ -27,9 +27,9 @@ DateTime([timezone]) Консольный клиент ClickHouse по умолчанию использует часовой пояс сервера, если для значения `DateTime` часовой пояс не был задан в явном виде при инициализации типа данных. Чтобы использовать часовой пояс клиента, запустите [clickhouse-client](../../interfaces/cli.md) с параметром `--use_client_time_zone`. -ClickHouse отображает значения в зависимости от значения параметра [date\_time\_output\_format](../../operations/settings/index.md#settings-date_time_output_format). Текстовый формат по умолчанию `YYYY-MM-DD hh:mm:ss`. Кроме того, вы можете поменять отображение с помощью функции [formatDateTime](../../sql-reference/functions/date-time-functions.md#formatdatetime). +ClickHouse отображает значения в зависимости от значения параметра [date\_time\_output\_format](../../operations/settings/settings-formats.md#date_time_output_format). Текстовый формат по умолчанию `YYYY-MM-DD hh:mm:ss`. Кроме того, вы можете поменять отображение с помощью функции [formatDateTime](../../sql-reference/functions/date-time-functions.md#formatdatetime). -При вставке данных в ClickHouse, можно использовать различные форматы даты и времени в зависимости от значения настройки [date_time_input_format](../../operations/settings/index.md#settings-date_time_input_format). +При вставке данных в ClickHouse, можно использовать различные форматы даты и времени в зависимости от значения настройки [date_time_input_format](../../operations/settings/settings-formats.md#date_time_input_format). ## Примеры {#primery} @@ -119,8 +119,8 @@ FROM dt - [Функции преобразования типов](../../sql-reference/functions/type-conversion-functions.md) - [Функции для работы с датой и временем](../../sql-reference/functions/date-time-functions.md) - [Функции для работы с массивами](../../sql-reference/functions/array-functions.md) -- [Настройка `date_time_input_format`](../../operations/settings/index.md#settings-date_time_input_format) -- [Настройка `date_time_output_format`](../../operations/settings/index.md) +- [Настройка `date_time_input_format`](../../operations/settings/settings-formats.md#date_time_input_format) +- [Настройка `date_time_output_format`](../../operations/settings/settings-formats.md#date_time_output_format) - [Конфигурационный параметр сервера `timezone`](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) - [Параметр `session_timezone`](../../operations/settings/settings.md#session_timezone) - [Операторы для работы с датой и временем](../../sql-reference/operators/index.md#operators-datetime) From db0a5209f1ed0ddc88057ce8d2425b97e9c84397 Mon Sep 17 00:00:00 2001 From: avogar Date: Fri, 15 Mar 2024 16:14:59 +0000 Subject: [PATCH 022/150] Fix tests --- .../02783_parallel_replicas_trivial_count_optimization.sh | 4 ++-- .../0_stateless/03001_max_parallel_replicas_zero_value.sql | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/queries/0_stateless/02783_parallel_replicas_trivial_count_optimization.sh b/tests/queries/0_stateless/02783_parallel_replicas_trivial_count_optimization.sh index bafab249b47..20b3efedd49 100755 --- a/tests/queries/0_stateless/02783_parallel_replicas_trivial_count_optimization.sh +++ b/tests/queries/0_stateless/02783_parallel_replicas_trivial_count_optimization.sh @@ -25,7 +25,7 @@ function run_query_with_pure_parallel_replicas () { $CLICKHOUSE_CLIENT \ --query "$2" \ --query_id "${1}_disabled" \ - --max_parallel_replicas 0 + --max_parallel_replicas 1 $CLICKHOUSE_CLIENT \ --query "$2" \ @@ -50,7 +50,7 @@ function run_query_with_custom_key_parallel_replicas () { $CLICKHOUSE_CLIENT \ --query "$2" \ --query_id "${1}_disabled" \ - --max_parallel_replicas 0 + --max_parallel_replicas 1 $CLICKHOUSE_CLIENT \ --query "$2" \ diff --git a/tests/queries/0_stateless/03001_max_parallel_replicas_zero_value.sql b/tests/queries/0_stateless/03001_max_parallel_replicas_zero_value.sql index 611aa4777ba..499486713a6 100644 --- a/tests/queries/0_stateless/03001_max_parallel_replicas_zero_value.sql +++ b/tests/queries/0_stateless/03001_max_parallel_replicas_zero_value.sql @@ -1,5 +1,5 @@ drop table if exists test_d; create table test_d engine=Distributed(test_cluster_two_shard_three_replicas_localhost, system, numbers); -select * from test_d limit 10 settings max_parallel_replicas = 0, prefer_localhost_replica = 0; --{serverError BAD_ARGUMENTS} +select * from test_d limit 10 settings max_parallel_replicas = 0, prefer_localhost_replica = 0; --{clientError BAD_ARGUMENTS} drop table test_d; From 5ac15574b72a44d349f3bfef400263893eb73457 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Mon, 18 Mar 2024 18:52:01 +0000 Subject: [PATCH 023/150] do not load marks on merges --- src/Storages/MergeTree/MarkRange.cpp | 5 ++ src/Storages/MergeTree/MarkRange.h | 1 + src/Storages/MergeTree/MergeTreeIOSettings.h | 2 + .../MergeTree/MergeTreeIndexReader.cpp | 3 + .../MergeTree/MergeTreeMarksLoader.cpp | 8 +++ src/Storages/MergeTree/MergeTreeMarksLoader.h | 1 + .../MergeTree/MergeTreeReaderCompact.cpp | 1 + .../MergeTree/MergeTreeReaderStream.cpp | 72 ++++++++++++++----- .../MergeTree/MergeTreeReaderStream.h | 38 +++++++--- .../MergeTree/MergeTreeReaderWide.cpp | 30 ++++++-- .../MergeTree/MergeTreeSequentialSource.cpp | 10 +-- .../test_merge_tree_load_marks/__init__.py | 0 .../configs/config.xml | 12 ++++ .../test_merge_tree_load_marks/test.py | 62 ++++++++++++++++ 14 files changed, 206 insertions(+), 39 deletions(-) create mode 100644 tests/integration/test_merge_tree_load_marks/__init__.py create mode 100644 tests/integration/test_merge_tree_load_marks/configs/config.xml create mode 100644 tests/integration/test_merge_tree_load_marks/test.py diff --git a/src/Storages/MergeTree/MarkRange.cpp b/src/Storages/MergeTree/MarkRange.cpp index bd8546f04cc..c6e98b4e5a1 100644 --- a/src/Storages/MergeTree/MarkRange.cpp +++ b/src/Storages/MergeTree/MarkRange.cpp @@ -81,6 +81,11 @@ size_t MarkRanges::getNumberOfMarks() const return result; } +bool MarkRanges::isOneRangeForWholePart(size_t num_marks_in_part) const +{ + return size() == 1 && front().begin == 0 && front().end == num_marks_in_part; +} + void MarkRanges::serialize(WriteBuffer & out) const { writeBinaryLittleEndian(this->size(), out); diff --git a/src/Storages/MergeTree/MarkRange.h b/src/Storages/MergeTree/MarkRange.h index f31d6a2a73b..9d5eac24fab 100644 --- a/src/Storages/MergeTree/MarkRange.h +++ b/src/Storages/MergeTree/MarkRange.h @@ -36,6 +36,7 @@ struct MarkRanges : public std::deque using std::deque::deque; size_t getNumberOfMarks() const; + bool isOneRangeForWholePart(size_t num_marks_in_part) const; void serialize(WriteBuffer & out) const; String describe() const; diff --git a/src/Storages/MergeTree/MergeTreeIOSettings.h b/src/Storages/MergeTree/MergeTreeIOSettings.h index feeb1808a6f..9b5842c24c9 100644 --- a/src/Storages/MergeTree/MergeTreeIOSettings.h +++ b/src/Storages/MergeTree/MergeTreeIOSettings.h @@ -44,6 +44,8 @@ struct MergeTreeReaderSettings bool enable_multiple_prewhere_read_steps = false; /// If true, try to lower size of read buffer according to granule size and compressed block size. bool adjust_read_buffer_size = true; + /// TODO: + bool always_load_marks = true; }; struct MergeTreeWriterSettings diff --git a/src/Storages/MergeTree/MergeTreeIndexReader.cpp b/src/Storages/MergeTree/MergeTreeIndexReader.cpp index 6012994b46d..e7ae1fc5c13 100644 --- a/src/Storages/MergeTree/MergeTreeIndexReader.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexReader.cpp @@ -31,6 +31,8 @@ std::unique_ptr makeIndexReader( load_marks_threadpool, /*num_columns_in_mark=*/ 1); + marks_loader->startAsyncLoad(); + return std::make_unique( part->getDataPartStoragePtr(), index->getFileName(), extension, marks_count, @@ -65,6 +67,7 @@ MergeTreeIndexReader::MergeTreeIndexReader( mark_cache, uncompressed_cache, std::move(settings)); + version = index_format.version; stream->adjustRightMark(getLastMark(all_mark_ranges_)); diff --git a/src/Storages/MergeTree/MergeTreeMarksLoader.cpp b/src/Storages/MergeTree/MergeTreeMarksLoader.cpp index a7a635f3b72..6798f97e494 100644 --- a/src/Storages/MergeTree/MergeTreeMarksLoader.cpp +++ b/src/Storages/MergeTree/MergeTreeMarksLoader.cpp @@ -64,6 +64,10 @@ MergeTreeMarksLoader::MergeTreeMarksLoader( , read_settings(read_settings_) , num_columns_in_mark(num_columns_in_mark_) , load_marks_threadpool(load_marks_threadpool_) +{ +} + +void MergeTreeMarksLoader::startAsyncLoad() { if (load_marks_threadpool) future = loadMarksAsync(); @@ -102,6 +106,8 @@ MergeTreeMarksGetterPtr MergeTreeMarksLoader::loadMarks() MarkCache::MappedPtr MergeTreeMarksLoader::loadMarksImpl() { + LOG_TEST(getLogger("MergeTreeMarksLoader"), "Loading marks from path {}", mrk_path); + /// Memory for marks must not be accounted as memory usage for query, because they are stored in shared cache. MemoryTrackerBlockerInThread temporarily_disable_memory_tracker; @@ -218,7 +224,9 @@ MarkCache::MappedPtr MergeTreeMarksLoader::loadMarksSync() } } else + { loaded_marks = loadMarksImpl(); + } if (!loaded_marks) { diff --git a/src/Storages/MergeTree/MergeTreeMarksLoader.h b/src/Storages/MergeTree/MergeTreeMarksLoader.h index 73dd462f2fa..2aa4474e1c5 100644 --- a/src/Storages/MergeTree/MergeTreeMarksLoader.h +++ b/src/Storages/MergeTree/MergeTreeMarksLoader.h @@ -50,6 +50,7 @@ public: ~MergeTreeMarksLoader(); + void startAsyncLoad(); MergeTreeMarksGetterPtr loadMarks(); size_t getNumColumns() const { return num_columns_in_mark; } diff --git a/src/Storages/MergeTree/MergeTreeReaderCompact.cpp b/src/Storages/MergeTree/MergeTreeReaderCompact.cpp index a22bff6b8d2..8810491b62e 100644 --- a/src/Storages/MergeTree/MergeTreeReaderCompact.cpp +++ b/src/Storages/MergeTree/MergeTreeReaderCompact.cpp @@ -48,6 +48,7 @@ MergeTreeReaderCompact::MergeTreeReaderCompact( , profile_callback(profile_callback_) , clock_type(clock_type_) { + marks_loader->startAsyncLoad(); } void MergeTreeReaderCompact::fillColumnPositions() diff --git a/src/Storages/MergeTree/MergeTreeReaderStream.cpp b/src/Storages/MergeTree/MergeTreeReaderStream.cpp index 40a16176c69..15ef02440cb 100644 --- a/src/Storages/MergeTree/MergeTreeReaderStream.cpp +++ b/src/Storages/MergeTree/MergeTreeReaderStream.cpp @@ -13,6 +13,7 @@ namespace ErrorCodes { extern const int ARGUMENT_OUT_OF_BOUND; extern const int CANNOT_READ_ALL_DATA; + extern const int LOGICAL_ERROR; } MergeTreeReaderStream::MergeTreeReaderStream( @@ -41,14 +42,17 @@ MergeTreeReaderStream::MergeTreeReaderStream( { } +void MergeTreeReaderStream::loadMarks() +{ + if (!marks_getter) + marks_getter = marks_loader->loadMarks(); +} + void MergeTreeReaderStream::init() { if (initialized) return; - initialized = true; - marks_getter = marks_loader->loadMarks(); - /// Compute the size of the buffer. auto [max_mark_range_bytes, sum_mark_range_bytes] = estimateMarkRangeBytes(all_mark_ranges); @@ -110,11 +114,15 @@ void MergeTreeReaderStream::init() data_buffer = non_cached_buffer.get(); compressed_data_buffer = non_cached_buffer.get(); } + + initialized = true; } void MergeTreeReaderStream::seekToMarkAndColumn(size_t row_index, size_t column_position) { init(); + loadMarks(); + const auto & mark = marks_getter->getMark(row_index, column_position); try @@ -193,7 +201,7 @@ CompressedReadBufferBase * MergeTreeReaderStream::getCompressedDataBuffer() return compressed_data_buffer; } -size_t MergeTreeReaderStreamSingleColumn::getRightOffset(size_t right_mark) const +size_t MergeTreeReaderStreamSingleColumn::getRightOffset(size_t right_mark) { /// NOTE: if we are reading the whole file, then right_mark == marks_count /// and we will use max_read_buffer_size for buffer size, thus avoiding the need to load marks. @@ -202,7 +210,8 @@ size_t MergeTreeReaderStreamSingleColumn::getRightOffset(size_t right_mark) cons if (marks_count == 0) return 0; - assert(right_mark <= marks_count); + chassert(right_mark <= marks_count); + loadMarks(); if (right_mark == 0) return marks_getter->getMark(right_mark, 0).offset_in_compressed_file; @@ -281,9 +290,9 @@ size_t MergeTreeReaderStreamSingleColumn::getRightOffset(size_t right_mark) cons return file_size; } -std::pair MergeTreeReaderStreamSingleColumn::estimateMarkRangeBytes(const MarkRanges & mark_ranges) const +std::pair MergeTreeReaderStreamSingleColumn::estimateMarkRangeBytes(const MarkRanges & mark_ranges) { - assert(marks_getter != nullptr); + loadMarks(); size_t max_range_bytes = 0; size_t sum_range_bytes = 0; @@ -302,7 +311,34 @@ std::pair MergeTreeReaderStreamSingleColumn::estimateMarkRangeBy return {max_range_bytes, sum_range_bytes}; } -size_t MergeTreeReaderStreamMultipleColumns::getRightOffsetOneColumn(size_t right_mark_non_included, size_t column_position) const +size_t MergeTreeReaderStreamSingleColumnWholePart::getRightOffset(size_t right_mark) +{ + if (right_mark != marks_count) + { + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Expected one right mark: {}, got: {}", + marks_count, right_mark); + } + return file_size; +} + +std::pair MergeTreeReaderStreamSingleColumnWholePart::estimateMarkRangeBytes(const MarkRanges & mark_ranges) +{ + if (!mark_ranges.isOneRangeForWholePart(marks_count)) + { + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Expected one mark range that covers the whole part, got: {}", + mark_ranges.describe()); + } + return {file_size, file_size}; +} + +void MergeTreeReaderStreamSingleColumnWholePart::seekToMark(size_t) +{ + throw Exception(ErrorCodes::LOGICAL_ERROR, "MergeTreeReaderStreamSingleColumnWholePart cannot seek to marks"); +} + +size_t MergeTreeReaderStreamMultipleColumns::getRightOffsetOneColumn(size_t right_mark_non_included, size_t column_position) { /// NOTE: if we are reading the whole file, then right_mark == marks_count /// and we will use max_read_buffer_size for buffer size, thus avoiding the need to load marks. @@ -311,7 +347,8 @@ size_t MergeTreeReaderStreamMultipleColumns::getRightOffsetOneColumn(size_t righ if (marks_count == 0) return 0; - assert(right_mark_non_included <= marks_count); + chassert(right_mark_non_included <= marks_count); + loadMarks(); if (right_mark_non_included == 0) return marks_getter->getMark(right_mark_non_included, column_position).offset_in_compressed_file; @@ -347,9 +384,9 @@ size_t MergeTreeReaderStreamMultipleColumns::getRightOffsetOneColumn(size_t righ } std::pair -MergeTreeReaderStreamMultipleColumns::estimateMarkRangeBytesOneColumn(const MarkRanges & mark_ranges, size_t column_position) const +MergeTreeReaderStreamMultipleColumns::estimateMarkRangeBytesOneColumn(const MarkRanges & mark_ranges, size_t column_position) { - assert(marks_getter != nullptr); + loadMarks(); /// As a maximal range we return the maximal size of a whole stripe. size_t max_range_bytes = 0; @@ -386,8 +423,9 @@ MergeTreeReaderStreamMultipleColumns::estimateMarkRangeBytesOneColumn(const Mark return {max_range_bytes, sum_range_bytes}; } -MarkInCompressedFile MergeTreeReaderStreamMultipleColumns::getStartOfNextStripeMark(size_t row_index, size_t column_position) const +MarkInCompressedFile MergeTreeReaderStreamMultipleColumns::getStartOfNextStripeMark(size_t row_index, size_t column_position) { + loadMarks(); const auto & current_mark = marks_getter->getMark(row_index, column_position); if (marks_getter->getNumColumns() == 1) @@ -434,27 +472,27 @@ MarkInCompressedFile MergeTreeReaderStreamMultipleColumns::getStartOfNextStripeM return marks_getter->getMark(mark_index + 1, column_position + 1); } -size_t MergeTreeReaderStreamOneOfMultipleColumns::getRightOffset(size_t right_mark_non_included) const +size_t MergeTreeReaderStreamOneOfMultipleColumns::getRightOffset(size_t right_mark_non_included) { return getRightOffsetOneColumn(right_mark_non_included, column_position); } -std::pair MergeTreeReaderStreamOneOfMultipleColumns::estimateMarkRangeBytes(const MarkRanges & mark_ranges) const +std::pair MergeTreeReaderStreamOneOfMultipleColumns::estimateMarkRangeBytes(const MarkRanges & mark_ranges) { return estimateMarkRangeBytesOneColumn(mark_ranges, column_position); } -size_t MergeTreeReaderStreamAllOfMultipleColumns::getRightOffset(size_t right_mark_non_included) const +size_t MergeTreeReaderStreamAllOfMultipleColumns::getRightOffset(size_t right_mark_non_included) { return getRightOffsetOneColumn(right_mark_non_included, marks_loader->getNumColumns() - 1); } -std::pair MergeTreeReaderStreamAllOfMultipleColumns::estimateMarkRangeBytes(const MarkRanges & mark_ranges) const +std::pair MergeTreeReaderStreamAllOfMultipleColumns::estimateMarkRangeBytes(const MarkRanges & mark_ranges) { size_t max_range_bytes = 0; size_t sum_range_bytes = 0; - for (size_t i = 0; i < marks_getter->getNumColumns(); ++i) + for (size_t i = 0; i < marks_loader->getNumColumns(); ++i) { auto [current_max, current_sum] = estimateMarkRangeBytesOneColumn(mark_ranges, i); diff --git a/src/Storages/MergeTree/MergeTreeReaderStream.h b/src/Storages/MergeTree/MergeTreeReaderStream.h index f3ca6953ceb..05341cd8acc 100644 --- a/src/Storages/MergeTree/MergeTreeReaderStream.h +++ b/src/Storages/MergeTree/MergeTreeReaderStream.h @@ -40,6 +40,7 @@ public: /// Seeks to exact mark in file. void seekToMarkAndColumn(size_t row_index, size_t column_position); + /// Seeks to the start of the file. void seekToStart(); /** @@ -53,11 +54,11 @@ public: private: /// Returns offset in file up to which it's needed to read file to read all rows up to @right_mark mark. - virtual size_t getRightOffset(size_t right_mark) const = 0; + virtual size_t getRightOffset(size_t right_mark) = 0; /// Returns estimated max amount of bytes to read among mark ranges (which is used as size for read buffer) /// and total amount of bytes to read in all mark ranges. - virtual std::pair estimateMarkRangeBytes(const MarkRanges & mark_ranges) const = 0; + virtual std::pair estimateMarkRangeBytes(const MarkRanges & mark_ranges) = 0; const ReadBufferFromFileBase::ProfileCallback profile_callback; const clockid_t clock_type; @@ -80,6 +81,7 @@ private: protected: void init(); + void loadMarks(); const MergeTreeReaderSettings settings; const size_t marks_count; @@ -100,11 +102,25 @@ public: { } - size_t getRightOffset(size_t right_mark_non_included) const override; - std::pair estimateMarkRangeBytes(const MarkRanges & mark_ranges) const override; + size_t getRightOffset(size_t right_mark_non_included) override; + std::pair estimateMarkRangeBytes(const MarkRanges & mark_ranges) override; void seekToMark(size_t row_index) override { seekToMarkAndColumn(row_index, 0); } }; +class MergeTreeReaderStreamSingleColumnWholePart : public MergeTreeReaderStream +{ +public: + template + explicit MergeTreeReaderStreamSingleColumnWholePart(Args &&... args) + : MergeTreeReaderStream{std::forward(args)...} + { + } + + size_t getRightOffset(size_t right_mark_non_included) override; + std::pair estimateMarkRangeBytes(const MarkRanges & mark_ranges) override; + void seekToMark(size_t row_index) override; +}; + /// Base class for reading from file that contains multiple columns. /// It is used to read from compact parts. /// See more details about data layout in MergeTreeDataPartCompact.h. @@ -118,9 +134,9 @@ public: } protected: - size_t getRightOffsetOneColumn(size_t right_mark_non_included, size_t column_position) const; - std::pair estimateMarkRangeBytesOneColumn(const MarkRanges & mark_ranges, size_t column_position) const; - MarkInCompressedFile getStartOfNextStripeMark(size_t row_index, size_t column_position) const; + size_t getRightOffsetOneColumn(size_t right_mark_non_included, size_t column_position); + std::pair estimateMarkRangeBytesOneColumn(const MarkRanges & mark_ranges, size_t column_position); + MarkInCompressedFile getStartOfNextStripeMark(size_t row_index, size_t column_position); }; /// Class for reading a single column from file that contains multiple columns @@ -135,8 +151,8 @@ public: { } - size_t getRightOffset(size_t right_mark_non_included) const override; - std::pair estimateMarkRangeBytes(const MarkRanges & mark_ranges) const override; + size_t getRightOffset(size_t right_mark_non_included) override; + std::pair estimateMarkRangeBytes(const MarkRanges & mark_ranges) override; void seekToMark(size_t row_index) override { seekToMarkAndColumn(row_index, column_position); } private: @@ -154,8 +170,8 @@ public: { } - size_t getRightOffset(size_t right_mark_non_included) const override; - std::pair estimateMarkRangeBytes(const MarkRanges & mark_ranges) const override; + size_t getRightOffset(size_t right_mark_non_included) override; + std::pair estimateMarkRangeBytes(const MarkRanges & mark_ranges) override; void seekToMark(size_t row_index) override { seekToMarkAndColumn(row_index, 0); } }; diff --git a/src/Storages/MergeTree/MergeTreeReaderWide.cpp b/src/Storages/MergeTree/MergeTreeReaderWide.cpp index 394a22835f1..afed94a5bb4 100644 --- a/src/Storages/MergeTree/MergeTreeReaderWide.cpp +++ b/src/Storages/MergeTree/MergeTreeReaderWide.cpp @@ -227,12 +227,13 @@ void MergeTreeReaderWide::addStreams( auto context = data_part_info_for_read->getContext(); auto * load_marks_threadpool = settings.read_settings.load_marks_asynchronously ? &context->getLoadMarksThreadpool() : nullptr; + size_t num_marks_in_part = data_part_info_for_read->getMarksCount(); auto marks_loader = std::make_shared( data_part_info_for_read, mark_cache, data_part_info_for_read->getIndexGranularityInfo().getMarksFilePath(*stream_name), - data_part_info_for_read->getMarksCount(), + num_marks_in_part, data_part_info_for_read->getIndexGranularityInfo(), settings.save_marks_in_cache, settings.read_settings, @@ -243,11 +244,26 @@ void MergeTreeReaderWide::addStreams( auto stream_settings = settings; stream_settings.is_low_cardinality_dictionary = substream_path.size() > 1 && substream_path[substream_path.size() - 2].type == ISerialization::Substream::Type::DictionaryKeys; - streams.emplace(*stream_name, std::make_unique( - data_part_info_for_read->getDataPartStorage(), *stream_name, DATA_FILE_EXTENSION, - data_part_info_for_read->getMarksCount(), all_mark_ranges, stream_settings, - uncompressed_cache, data_part_info_for_read->getFileSizeOrZero(*stream_name + DATA_FILE_EXTENSION), - std::move(marks_loader), profile_callback, clock_type)); + auto create_stream = [&]() + { + return std::make_unique( + data_part_info_for_read->getDataPartStorage(), *stream_name, DATA_FILE_EXTENSION, + num_marks_in_part, all_mark_ranges, stream_settings, + uncompressed_cache, data_part_info_for_read->getFileSizeOrZero(*stream_name + DATA_FILE_EXTENSION), + std::move(marks_loader), profile_callback, clock_type); + }; + + LOG_DEBUG(getLogger("KEK"), "settings.always_load_marks: {}, one range: {}", settings.always_load_marks, all_mark_ranges.isOneRangeForWholePart(num_marks_in_part)); + + if (!settings.always_load_marks && all_mark_ranges.isOneRangeForWholePart(num_marks_in_part)) + { + streams.emplace(*stream_name, create_stream.operator()()); + } + else + { + marks_loader->startAsyncLoad(); + streams.emplace(*stream_name, create_stream.operator()()); + } }; serialization->enumerateStreams(callback); @@ -284,7 +300,7 @@ static ReadBuffer * getStream( if (seek_to_start) stream.seekToStart(); - else if (seek_to_mark) + else if (seek_to_mark && from_mark != 0) stream.seekToMark(from_mark); return stream.getDataBuffer(); diff --git a/src/Storages/MergeTree/MergeTreeSequentialSource.cpp b/src/Storages/MergeTree/MergeTreeSequentialSource.cpp index 81eb166b300..be7df7b4611 100644 --- a/src/Storages/MergeTree/MergeTreeSequentialSource.cpp +++ b/src/Storages/MergeTree/MergeTreeSequentialSource.cpp @@ -174,6 +174,7 @@ MergeTreeSequentialSource::MergeTreeSequentialSource( .read_settings = read_settings, .save_marks_in_cache = false, .apply_deleted_mask = apply_deleted_mask, + .always_load_marks = false, }; if (!mark_ranges) @@ -184,12 +185,12 @@ MergeTreeSequentialSource::MergeTreeSequentialSource( storage_snapshot, *mark_ranges, /*virtual_fields=*/ {}, - /*uncompressed_cache=*/{}, + /*uncompressed_cache=*/ {}, mark_cache.get(), alter_conversions, reader_settings, - {}, - {}); + /*avg_value_size_hints=*/ {}, + /*profile_callback=*/ {}); } static void fillBlockNumberColumns( @@ -219,6 +220,7 @@ try const auto & header = getPort().getHeader(); /// Part level is useful for next step for merging non-merge tree table bool add_part_level = storage.merging_params.mode != MergeTreeData::MergingParams::Ordinary; + size_t num_marks_in_part = data_part->getMarksCount(); if (!isCancelled() && current_row < data_part->rows_count) { @@ -227,7 +229,7 @@ try const auto & sample = reader->getColumns(); Columns columns(sample.size()); - size_t rows_read = reader->readRows(current_mark, data_part->getMarksCount(), continue_reading, rows_to_read, columns); + size_t rows_read = reader->readRows(current_mark, num_marks_in_part, continue_reading, rows_to_read, columns); if (rows_read) { diff --git a/tests/integration/test_merge_tree_load_marks/__init__.py b/tests/integration/test_merge_tree_load_marks/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_merge_tree_load_marks/configs/config.xml b/tests/integration/test_merge_tree_load_marks/configs/config.xml new file mode 100644 index 00000000000..1c9ee8d698f --- /dev/null +++ b/tests/integration/test_merge_tree_load_marks/configs/config.xml @@ -0,0 +1,12 @@ + + + system + text_log
+ 7500 + 1048576 + 8192 + 524288 + false + test +
+
diff --git a/tests/integration/test_merge_tree_load_marks/test.py b/tests/integration/test_merge_tree_load_marks/test.py new file mode 100644 index 00000000000..b066b2a6ec0 --- /dev/null +++ b/tests/integration/test_merge_tree_load_marks/test.py @@ -0,0 +1,62 @@ +import pytest +from helpers.cluster import ClickHouseCluster + +cluster = ClickHouseCluster(__file__) + +node = cluster.add_instance( + "node", + main_configs=["configs/config.xml"], +) + + +@pytest.fixture(scope="module") +def started_cluster(): + try: + cluster.start() + yield cluster + finally: + cluster.shutdown() + + +# This test is bad and it should be a functional test but S3 metrics +# are accounted incorrectly for merges in part_log and query_log. +# Also we have text_log with level 'trace' in functional tests +# but this test requeires text_log with level 'test'. + + +@pytest.mark.parametrize("min_bytes_for_wide_part", [0, 1000000000]) +def test_merge_load_marks(started_cluster, min_bytes_for_wide_part): + node.query( + f""" + DROP TABLE IF EXISTS t_load_marks; + + CREATE TABLE t_load_marks (a UInt64, b UInt64) + ENGINE = MergeTree ORDER BY a + SETTINGS min_bytes_for_wide_part = {min_bytes_for_wide_part}; + + INSERT INTO t_load_marks SELECT number, number FROM numbers(1000); + INSERT INTO t_load_marks SELECT number, number FROM numbers(1000); + + OPTIMIZE TABLE t_load_marks FINAL; + SYSTEM FLUSH LOGS; + """ + ) + + uuid = node.query( + "SELECT uuid FROM system.tables WHERE table = 't_prewarm_merge'" + ).strip() + + result = node.query( + f""" + SELECT count() + FROM system.text_log + WHERE (query_id LIKE '%{uuid}::all_1_2_1%') AND (message LIKE '%Loading marks%') + """ + ).strip() + + result = int(result) + + is_wide = min_bytes_for_wide_part == 0 + not_loaded = result == 0 + + assert is_wide == not_loaded From cefdbfb7c3a3bd415b417b1588b82603265b56d2 Mon Sep 17 00:00:00 2001 From: Austin Kothig Date: Wed, 20 Mar 2024 07:05:49 -0700 Subject: [PATCH 024/150] Applying feedback. --- .../functions/date-time-functions.md | 44 +++++++++---------- src/Functions/DateTimeTransforms.h | 6 +-- 2 files changed, 25 insertions(+), 25 deletions(-) diff --git a/docs/en/sql-reference/functions/date-time-functions.md b/docs/en/sql-reference/functions/date-time-functions.md index 46ec9f01152..4c419004344 100644 --- a/docs/en/sql-reference/functions/date-time-functions.md +++ b/docs/en/sql-reference/functions/date-time-functions.md @@ -1686,17 +1686,17 @@ age('unit', startdate, enddate, [timezone]) - `unit` — The type of interval for result. [String](../../sql-reference/data-types/string.md). Possible values: - - `nanosecond` `nanoseconds` `ns` - - `microsecond` `microseconds` `us` `u` - - `millisecond` `milliseconds` `ms` - - `second` `seconds` `ss` `s` - - `minute` `minutes` `mi` `n` - - `hour` `hours` `hh` `h` - - `day` `days` `dd` `d` - - `week` `weeks` `wk` `ww` - - `month` `months` `mm` `m` - - `quarter` `quarters` `qq` `q` - - `year` `years` `yyyy` `yy` + - `nanosecond`, `nanoseconds`, `ns` + - `microsecond`, `microseconds`, `us`, `u` + - `millisecond`, `milliseconds`, `ms` + - `second`, `seconds`, `ss`, `s` + - `minute`, `minutes`, `mi`, `n` + - `hour`, `hours`, `hh`, `h` + - `day`, `days`, `dd`, `d` + - `week`, `weeks`, `wk`, `ww` + - `month`, `months`, `mm`, `m` + - `quarter`, `quarters`, `qq`, `q` + - `year`, `years`, `yyyy`, `yy` - `startdate` — The first time value to subtract (the subtrahend). [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). @@ -1764,17 +1764,17 @@ Aliases: `dateDiff`, `DATE_DIFF`, `timestampDiff`, `timestamp_diff`, `TIMESTAMP_ - `unit` — The type of interval for result. [String](../../sql-reference/data-types/string.md). Possible values: - - `nanosecond` `nanoseconds` `ns` - - `microsecond` `microseconds` `us` `u` - - `millisecond` `milliseconds` `ms` - - `second` `seconds` `ss` `s` - - `minute` `minutes` `mi` `n` - - `hour` `hours` `hh` `h` - - `day` `days` `dd` `d` - - `week` `weeks` `wk` `ww` - - `month` `months` `mm` `m` - - `quarter` `quarters` `qq` `q` - - `year` `years` `yyyy` `yy` + - `nanosecond`, `nanoseconds`, `ns` + - `microsecond`, `microseconds`, `us`, `u` + - `millisecond`, `milliseconds`, `ms` + - `second`, `seconds`, `ss`, `s` + - `minute`, `minutes`, `mi`, `n` + - `hour`, `hours`, `hh`, `h` + - `day`, `days`, `dd`, `d` + - `week`, `weeks`, `wk`, `ww` + - `month`, `months`, `mm`, `m` + - `quarter`, `quarters`, `qq`, `q` + - `year`, `years`, `yyyy`, `yy` - `startdate` — The first time value to subtract (the subtrahend). [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). diff --git a/src/Functions/DateTimeTransforms.h b/src/Functions/DateTimeTransforms.h index a92680ac124..7410d87d16c 100644 --- a/src/Functions/DateTimeTransforms.h +++ b/src/Functions/DateTimeTransforms.h @@ -21,9 +21,9 @@ namespace DB { -static constexpr auto nanosecond_multiplier = 1000000000; -static constexpr auto microsecond_multiplier = 1000000; -static constexpr auto millisecond_multiplier = 1000; +static constexpr auto millisecond_multiplier = 1'000; +static constexpr auto microsecond_multiplier = 1'000'000; +static constexpr auto nanosecond_multiplier = 1'000'000'000; static constexpr FormatSettings::DateTimeOverflowBehavior default_date_time_overflow_behavior = FormatSettings::DateTimeOverflowBehavior::Ignore; From 18bc53264f2e51c32b2009f00e6df717a98d6117 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Fri, 22 Mar 2024 14:34:07 +0000 Subject: [PATCH 025/150] fix reading without marks --- src/Storages/MergeTree/MergeTreeIOSettings.h | 2 -- src/Storages/MergeTree/MergeTreeReaderWide.cpp | 12 ++++++------ src/Storages/MergeTree/MergeTreeReaderWide.h | 1 + src/Storages/MergeTree/MergeTreeSequentialSource.cpp | 1 - .../0_stateless/02532_send_logs_level_test.reference | 1 + .../0_stateless/02532_send_logs_level_test.sh | 2 +- 6 files changed, 9 insertions(+), 10 deletions(-) diff --git a/src/Storages/MergeTree/MergeTreeIOSettings.h b/src/Storages/MergeTree/MergeTreeIOSettings.h index 9b5842c24c9..feeb1808a6f 100644 --- a/src/Storages/MergeTree/MergeTreeIOSettings.h +++ b/src/Storages/MergeTree/MergeTreeIOSettings.h @@ -44,8 +44,6 @@ struct MergeTreeReaderSettings bool enable_multiple_prewhere_read_steps = false; /// If true, try to lower size of read buffer according to granule size and compressed block size. bool adjust_read_buffer_size = true; - /// TODO: - bool always_load_marks = true; }; struct MergeTreeWriterSettings diff --git a/src/Storages/MergeTree/MergeTreeReaderWide.cpp b/src/Storages/MergeTree/MergeTreeReaderWide.cpp index afed94a5bb4..d398668d5c8 100644 --- a/src/Storages/MergeTree/MergeTreeReaderWide.cpp +++ b/src/Storages/MergeTree/MergeTreeReaderWide.cpp @@ -43,6 +43,7 @@ MergeTreeReaderWide::MergeTreeReaderWide( mark_ranges_, settings_, avg_value_size_hints_) + , read_whole_part(all_mark_ranges.isOneRangeForWholePart(data_part_info_for_read->getMarksCount())) { try { @@ -253,9 +254,7 @@ void MergeTreeReaderWide::addStreams( std::move(marks_loader), profile_callback, clock_type); }; - LOG_DEBUG(getLogger("KEK"), "settings.always_load_marks: {}, one range: {}", settings.always_load_marks, all_mark_ranges.isOneRangeForWholePart(num_marks_in_part)); - - if (!settings.always_load_marks && all_mark_ranges.isOneRangeForWholePart(num_marks_in_part)) + if (read_whole_part) { streams.emplace(*stream_name, create_stream.operator()()); } @@ -300,7 +299,7 @@ static ReadBuffer * getStream( if (seek_to_start) stream.seekToStart(); - else if (seek_to_mark && from_mark != 0) + else if (seek_to_mark) stream.seekToMark(from_mark); return stream.getDataBuffer(); @@ -341,7 +340,8 @@ void MergeTreeReaderWide::prefetchForColumn( if (stream_name && !prefetched_streams.contains(*stream_name)) { - bool seek_to_mark = !continue_reading; + bool seek_to_mark = !continue_reading && !read_whole_part; + if (ReadBuffer * buf = getStream(false, substream_path, data_part_info_for_read->getChecksums(), streams, name_and_type, from_mark, seek_to_mark, current_task_last_mark, cache)) { buf->prefetch(priority); @@ -365,7 +365,7 @@ void MergeTreeReaderWide::readData( deserialize_settings.getter = [&](const ISerialization::SubstreamPath & substream_path) { - bool seek_to_mark = !was_prefetched && !continue_reading; + bool seek_to_mark = !was_prefetched && !continue_reading && !read_whole_part; return getStream( /* seek_to_start = */false, substream_path, diff --git a/src/Storages/MergeTree/MergeTreeReaderWide.h b/src/Storages/MergeTree/MergeTreeReaderWide.h index a9a5526dd65..7ffe565d262 100644 --- a/src/Storages/MergeTree/MergeTreeReaderWide.h +++ b/src/Storages/MergeTree/MergeTreeReaderWide.h @@ -73,6 +73,7 @@ private: std::unordered_map caches; std::unordered_set prefetched_streams; ssize_t prefetched_from_mark = -1; + bool read_whole_part = false; }; } diff --git a/src/Storages/MergeTree/MergeTreeSequentialSource.cpp b/src/Storages/MergeTree/MergeTreeSequentialSource.cpp index be7df7b4611..2f814c7f289 100644 --- a/src/Storages/MergeTree/MergeTreeSequentialSource.cpp +++ b/src/Storages/MergeTree/MergeTreeSequentialSource.cpp @@ -174,7 +174,6 @@ MergeTreeSequentialSource::MergeTreeSequentialSource( .read_settings = read_settings, .save_marks_in_cache = false, .apply_deleted_mask = apply_deleted_mask, - .always_load_marks = false, }; if (!mark_ranges) diff --git a/tests/queries/0_stateless/02532_send_logs_level_test.reference b/tests/queries/0_stateless/02532_send_logs_level_test.reference index dbd49cfc0a4..7e51b888d9c 100644 --- a/tests/queries/0_stateless/02532_send_logs_level_test.reference +++ b/tests/queries/0_stateless/02532_send_logs_level_test.reference @@ -1,2 +1,3 @@ + MergeTreeMarksLoader: Loading marks from path data.cmrk3 MergeTreeRangeReader: First reader returned: num_rows: 1, columns: 1, total_rows_per_granule: 1, no filter, column[0]: Int32(size = 1), requested columns: key MergeTreeRangeReader: read() returned num_rows: 1, columns: 1, total_rows_per_granule: 1, no filter, column[0]: Int32(size = 1), sample block key diff --git a/tests/queries/0_stateless/02532_send_logs_level_test.sh b/tests/queries/0_stateless/02532_send_logs_level_test.sh index f65d8705569..4afc6d4496b 100755 --- a/tests/queries/0_stateless/02532_send_logs_level_test.sh +++ b/tests/queries/0_stateless/02532_send_logs_level_test.sh @@ -9,7 +9,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) $CLICKHOUSE_CLIENT -nm -q " drop table if exists data; - create table data (key Int) engine=MergeTree order by tuple(); + create table data (key Int) engine=MergeTree order by tuple() settings min_bytes_for_wide_part = '1G', compress_marks = 1; insert into data values (1); " From 47095f63b1834075d21a56cad7ca03c9767af83b Mon Sep 17 00:00:00 2001 From: avogar Date: Fri, 22 Mar 2024 16:03:28 +0000 Subject: [PATCH 026/150] Fix writing exception message in output format in HTTP when http_wait_end_of_query is used --- src/Interpreters/executeQuery.cpp | 9 +- src/Interpreters/executeQuery.h | 2 +- src/Server/HTTPHandler.cpp | 46 ++- src/Server/HTTPHandler.h | 3 +- ...d_json_and_xml_on_http_exception.reference | 348 ++++++++++++++++++ ...41_valid_json_and_xml_on_http_exception.sh | 8 +- 6 files changed, 397 insertions(+), 19 deletions(-) diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp index 7dd46534fdf..985fd13e0ee 100644 --- a/src/Interpreters/executeQuery.cpp +++ b/src/Interpreters/executeQuery.cpp @@ -1453,6 +1453,7 @@ void executeQuery( ASTPtr ast; BlockIO streams; OutputFormatPtr output_format; + String format_name; auto update_format_on_exception_if_needed = [&]() { @@ -1460,7 +1461,7 @@ void executeQuery( { try { - String format_name = context->getDefaultFormat(); + format_name = context->getDefaultFormat(); output_format = FormatFactory::instance().getOutputFormat(format_name, ostr, {}, context, output_format_settings); if (output_format && output_format->supportsWritingException()) { @@ -1501,7 +1502,7 @@ void executeQuery( { update_format_on_exception_if_needed(); if (output_format) - handle_exception_in_output_format(*output_format); + handle_exception_in_output_format(*output_format, format_name, context, output_format_settings); } throw; } @@ -1543,7 +1544,7 @@ void executeQuery( ); } - String format_name = ast_query_with_output && (ast_query_with_output->format != nullptr) + format_name = ast_query_with_output && (ast_query_with_output->format != nullptr) ? getIdentifierName(ast_query_with_output->format) : context->getDefaultFormat(); @@ -1609,7 +1610,7 @@ void executeQuery( { update_format_on_exception_if_needed(); if (output_format) - handle_exception_in_output_format(*output_format); + handle_exception_in_output_format(*output_format, format_name, context, output_format_settings); } throw; } diff --git a/src/Interpreters/executeQuery.h b/src/Interpreters/executeQuery.h index 0f599922668..c6b3e1fc34e 100644 --- a/src/Interpreters/executeQuery.h +++ b/src/Interpreters/executeQuery.h @@ -27,7 +27,7 @@ struct QueryResultDetails }; using SetResultDetailsFunc = std::function; -using HandleExceptionInOutputFormatFunc = std::function; +using HandleExceptionInOutputFormatFunc = std::function & format_settings)>; struct QueryFlags { diff --git a/src/Server/HTTPHandler.cpp b/src/Server/HTTPHandler.cpp index c112eefec6c..e754e8b58c3 100644 --- a/src/Server/HTTPHandler.cpp +++ b/src/Server/HTTPHandler.cpp @@ -880,18 +880,32 @@ void HTTPHandler::processQuery( response.add("X-ClickHouse-Timezone", *details.timezone); }; - auto handle_exception_in_output_format = [&](IOutputFormat & output_format) + auto handle_exception_in_output_format = [&](IOutputFormat & current_output_format, const String & format_name, const ContextPtr & context_, const std::optional & format_settings) { - if (settings.http_write_exception_in_output_format && output_format.supportsWritingException()) + if (settings.http_write_exception_in_output_format && current_output_format.supportsWritingException()) { - bool with_stacktrace = (params.getParsed("stacktrace", false) && server.config().getBool("enable_http_stacktrace", true)); - - ExecutionStatus status = ExecutionStatus::fromCurrentException("", with_stacktrace); - formatExceptionForClient(status.code, request, response, used_output); - - output_format.setException(getCurrentExceptionMessage(false)); - output_format.finalize(); - used_output.exception_is_written = true; + /// If wait_end_of_query=true in case of an exception all data written to output format during query execution will be + /// ignored, so we cannot write exception message in current output format as it will be also ignored. + /// Instead, we create exception_writer function that will write exception in required format + /// and will use it later in trySendExceptionToClient when all buffers will be prepared. + if (buffer_until_eof) + { + auto header = current_output_format.getPort(IOutputFormat::PortKind::Main).getHeader(); + used_output.exception_writer = [format_name, header, context_, format_settings](WriteBuffer & buf, const String & message) + { + auto output_format = FormatFactory::instance().getOutputFormat(format_name, buf, header, context_, format_settings); + output_format->setException(message); + output_format->finalize(); + }; + } + else + { + bool with_stacktrace = (params.getParsed("stacktrace", false) && server.config().getBool("enable_http_stacktrace", true)); + ExecutionStatus status = ExecutionStatus::fromCurrentException("", with_stacktrace); + current_output_format.setException(status.message); + current_output_format.finalize(); + used_output.exception_is_written = true; + } } }; @@ -955,8 +969,16 @@ try used_output.out_holder->position() = used_output.out_holder->buffer().begin(); } - writeString(s, *used_output.out_maybe_compressed); - writeChar('\n', *used_output.out_maybe_compressed); + /// We might have special formatter for exception message. + if (used_output.exception_writer) + { + used_output.exception_writer(*used_output.out_maybe_compressed, s); + } + else + { + writeString(s, *used_output.out_maybe_compressed); + writeChar('\n', *used_output.out_maybe_compressed); + } } used_output.out_maybe_compressed->next(); diff --git a/src/Server/HTTPHandler.h b/src/Server/HTTPHandler.h index 0e30b466694..ae4cf034276 100644 --- a/src/Server/HTTPHandler.h +++ b/src/Server/HTTPHandler.h @@ -32,7 +32,7 @@ class HTTPHandler : public HTTPRequestHandler { public: HTTPHandler(IServer & server_, const std::string & name, const std::optional & content_type_override_); - virtual ~HTTPHandler() override; + ~HTTPHandler() override; void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response, const ProfileEvents::Event & write_event) override; @@ -75,6 +75,7 @@ private: bool finalized = false; bool exception_is_written = false; + std::function exception_writer; inline bool hasDelayed() const { diff --git a/tests/queries/0_stateless/02841_valid_json_and_xml_on_http_exception.reference b/tests/queries/0_stateless/02841_valid_json_and_xml_on_http_exception.reference index 1818ca7b5f2..2cfba492361 100644 --- a/tests/queries/0_stateless/02841_valid_json_and_xml_on_http_exception.reference +++ b/tests/queries/0_stateless/02841_valid_json_and_xml_on_http_exception.reference @@ -1,3 +1,4 @@ +wait_end_of_query=0 One block Parallel formatting: 0 JSON @@ -430,3 +431,350 @@ Test 2 Test 3 1 1 +wait_end_of_query=1 +One block +Parallel formatting: 0 +JSON +{ + "meta": + [ + { + "name": "number", + "type": "UInt64" + }, + { + "name": "res", + "type": "UInt8" + } + ], + + "data": + [ + + ], + + "rows": 0, + + "exception": "Code: 395. : Value passed to 'throwIf' function is non-zero: while executing 'FUNCTION throwIf(greater(number, 3) :: 2) -> throwIf(greater(number, 3)) UInt8 : 1'. (FUNCTION_THROW_IF_VALUE_IS_NON_ZERO) " +} +JSONEachRow +{"exception": "Code: 395. : Value passed to 'throwIf' function is non-zero: while executing 'FUNCTION throwIf(greater(number, 3) :: 2) -> throwIf(greater(number, 3)) UInt8 : 1'. (FUNCTION_THROW_IF_VALUE_IS_NON_ZERO) "} +JSONCompact +{ + "meta": + [ + { + "name": "number", + "type": "UInt64" + }, + { + "name": "res", + "type": "UInt8" + } + ], + + "data": + [ + + ], + + "rows": 0, + + "exception": "Code: 395. : Value passed to 'throwIf' function is non-zero: while executing 'FUNCTION throwIf(greater(number, 3) :: 2) -> throwIf(greater(number, 3)) UInt8 : 1'. (FUNCTION_THROW_IF_VALUE_IS_NON_ZERO) " +} +JSONCompactEachRow +["Code: 395. : Value passed to 'throwIf' function is non-zero: while executing 'FUNCTION throwIf(greater(number, 3) :: 2) -> throwIf(greater(number, 3)) UInt8 : 1'. (FUNCTION_THROW_IF_VALUE_IS_NON_ZERO) "] +JSONObjectEachRow +{ + "exception": "Code: 395. : Value passed to 'throwIf' function is non-zero: while executing 'FUNCTION throwIf(greater(number, 3) :: 2) -> throwIf(greater(number, 3)) UInt8 : 1'. (FUNCTION_THROW_IF_VALUE_IS_NON_ZERO) " +} +XML + + + + + + number + UInt64 + + + res + UInt8 + + + + + + 0 + Code: 395. : Value passed to 'throwIf' function is non-zero: while executing 'FUNCTION throwIf(greater(number, 3) :: 2) -> throwIf(greater(number, 3)) UInt8 : 1'. (FUNCTION_THROW_IF_VALUE_IS_NON_ZERO) + +Parallel formatting: 1 +JSON +{ + "meta": + [ + { + "name": "number", + "type": "UInt64" + }, + { + "name": "res", + "type": "UInt8" + } + ], + + "data": + [ + + ], + + "rows": 0, + + "exception": "Code: 395. : Value passed to 'throwIf' function is non-zero: while executing 'FUNCTION throwIf(greater(number, 3) :: 2) -> throwIf(greater(number, 3)) UInt8 : 1'. (FUNCTION_THROW_IF_VALUE_IS_NON_ZERO) " +} +JSONEachRow +{"exception": "Code: 395. : Value passed to 'throwIf' function is non-zero: while executing 'FUNCTION throwIf(greater(number, 3) :: 2) -> throwIf(greater(number, 3)) UInt8 : 1'. (FUNCTION_THROW_IF_VALUE_IS_NON_ZERO) "} +JSONCompact +{ + "meta": + [ + { + "name": "number", + "type": "UInt64" + }, + { + "name": "res", + "type": "UInt8" + } + ], + + "data": + [ + + ], + + "rows": 0, + + "exception": "Code: 395. : Value passed to 'throwIf' function is non-zero: while executing 'FUNCTION throwIf(greater(number, 3) :: 2) -> throwIf(greater(number, 3)) UInt8 : 1'. (FUNCTION_THROW_IF_VALUE_IS_NON_ZERO) " +} +JSONCompactEachRow +["Code: 395. : Value passed to 'throwIf' function is non-zero: while executing 'FUNCTION throwIf(greater(number, 3) :: 2) -> throwIf(greater(number, 3)) UInt8 : 1'. (FUNCTION_THROW_IF_VALUE_IS_NON_ZERO) "] +JSONObjectEachRow +{ + "exception": "Code: 395. : Value passed to 'throwIf' function is non-zero: while executing 'FUNCTION throwIf(greater(number, 3) :: 2) -> throwIf(greater(number, 3)) UInt8 : 1'. (FUNCTION_THROW_IF_VALUE_IS_NON_ZERO) " +} +XML + + + + + + number + UInt64 + + + res + UInt8 + + + + + + 0 + Code: 395. : Value passed to 'throwIf' function is non-zero: while executing 'FUNCTION throwIf(greater(number, 3) :: 2) -> throwIf(greater(number, 3)) UInt8 : 1'. (FUNCTION_THROW_IF_VALUE_IS_NON_ZERO) + +Several blocks +Without parallel formatting +JSON +{ + "meta": + [ + { + "name": "number", + "type": "UInt64" + }, + { + "name": "res", + "type": "UInt8" + } + ], + + "data": + [ + + ], + + "rows": 0, + + "exception": "Code: 395. : Value passed to 'throwIf' function is non-zero: while executing 'FUNCTION throwIf(greater(number, 3) :: 2) -> throwIf(greater(number, 3)) UInt8 : 1'. (FUNCTION_THROW_IF_VALUE_IS_NON_ZERO) " +} +JSONEachRow +{"exception": "Code: 395. : Value passed to 'throwIf' function is non-zero: while executing 'FUNCTION throwIf(greater(number, 3) :: 2) -> throwIf(greater(number, 3)) UInt8 : 1'. (FUNCTION_THROW_IF_VALUE_IS_NON_ZERO) "} +JSONCompact +{ + "meta": + [ + { + "name": "number", + "type": "UInt64" + }, + { + "name": "res", + "type": "UInt8" + } + ], + + "data": + [ + + ], + + "rows": 0, + + "exception": "Code: 395. : Value passed to 'throwIf' function is non-zero: while executing 'FUNCTION throwIf(greater(number, 3) :: 2) -> throwIf(greater(number, 3)) UInt8 : 1'. (FUNCTION_THROW_IF_VALUE_IS_NON_ZERO) " +} +JSONCompactEachRow +["Code: 395. : Value passed to 'throwIf' function is non-zero: while executing 'FUNCTION throwIf(greater(number, 3) :: 2) -> throwIf(greater(number, 3)) UInt8 : 1'. (FUNCTION_THROW_IF_VALUE_IS_NON_ZERO) "] +JSONObjectEachRow +{ + "exception": "Code: 395. : Value passed to 'throwIf' function is non-zero: while executing 'FUNCTION throwIf(greater(number, 3) :: 2) -> throwIf(greater(number, 3)) UInt8 : 1'. (FUNCTION_THROW_IF_VALUE_IS_NON_ZERO) " +} +XML + + + + + + number + UInt64 + + + res + UInt8 + + + + + + 0 + Code: 395. : Value passed to 'throwIf' function is non-zero: while executing 'FUNCTION throwIf(greater(number, 3) :: 2) -> throwIf(greater(number, 3)) UInt8 : 1'. (FUNCTION_THROW_IF_VALUE_IS_NON_ZERO) + +With parallel formatting +JSON +1 +JSONCompact +1 +JSONObjectEachRow +1 +JSONEachRow +1 +JSONCompactEachRow +1 +Formatting error +Without parallel formatting +JSON +{ + "meta": + [ + { + "name": "x", + "type": "UInt32" + }, + { + "name": "s", + "type": "String" + }, + { + "name": "y", + "type": "Enum8('a' = 1)" + } + ], + + "data": + [ + + ], + + "rows": 0, + + "exception": "Code: 36. : Unexpected value 99 in enum: While executing JSONRowOutputFormat. (BAD_ARGUMENTS) " +} +JSONEachRow +{"exception": "Code: 36. : Unexpected value 99 in enum: While executing JSONEachRowRowOutputFormat. (BAD_ARGUMENTS) "} +JSONCompact +{ + "meta": + [ + { + "name": "x", + "type": "UInt32" + }, + { + "name": "s", + "type": "String" + }, + { + "name": "y", + "type": "Enum8('a' = 1)" + } + ], + + "data": + [ + + ], + + "rows": 0, + + "exception": "Code: 36. : Unexpected value 99 in enum: While executing JSONCompactRowOutputFormat. (BAD_ARGUMENTS) " +} +JSONCompactEachRow +["Code: 36. : Unexpected value 99 in enum: While executing JSONCompactEachRowRowOutputFormat. (BAD_ARGUMENTS) "] +JSONObjectEachRow +{ + "exception": "Code: 36. : Unexpected value 99 in enum: While executing JSONObjectEachRowRowOutputFormat. (BAD_ARGUMENTS) " +} +XML + + + + + + x + UInt32 + + + s + String + + + y + Enum8('a' = 1) + + + + + + 0 + Code: 36. : Unexpected value 99 in enum: While executing XMLRowOutputFormat. (BAD_ARGUMENTS) + +With parallel formatting +JSON +1 +JSONCompact +1 +JSONObjectEachRow +1 +JSONEachRow +1 +JSONCompactEachRow +1 +Test 1 +1 +1 +Test 2 +1 +1 +Test 3 +1 +1 diff --git a/tests/queries/0_stateless/02841_valid_json_and_xml_on_http_exception.sh b/tests/queries/0_stateless/02841_valid_json_and_xml_on_http_exception.sh index 26b3ef64d61..c47fe5c7e94 100755 --- a/tests/queries/0_stateless/02841_valid_json_and_xml_on_http_exception.sh +++ b/tests/queries/0_stateless/02841_valid_json_and_xml_on_http_exception.sh @@ -4,7 +4,12 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -CH_URL="$CLICKHOUSE_URL&http_write_exception_in_output_format=1&allow_experimental_analyzer=0" +CH_URL_BASE="$CLICKHOUSE_URL&http_write_exception_in_output_format=1&allow_experimental_analyzer=0" + +for wait_end_of_query in 0 1 +do +echo "wait_end_of_query=$wait_end_of_query" +CH_URL="$CH_URL_BASE&wait_end_of_query=$wait_end_of_query" echo "One block" for parallel in 0 1 @@ -106,3 +111,4 @@ ${CLICKHOUSE_CURL} -sS "$CH_URL" -d "select * from test_02841 format JSON settin $CLICKHOUSE_CLIENT -q "drop table test_02841" +done From 7e264839c9c5eb8fd3c24f78567aa88a4b66564a Mon Sep 17 00:00:00 2001 From: zhongyuankai <872237106@qq.com> Date: Mon, 25 Mar 2024 11:04:12 +0800 Subject: [PATCH 027/150] bug fix --- src/Interpreters/DDLTask.cpp | 3 ++ src/Interpreters/InterpreterDropQuery.cpp | 42 ++++------------------- src/Parsers/ASTDropQuery.cpp | 39 +++++++++++++++++++++ src/Parsers/ASTDropQuery.h | 2 ++ 4 files changed, 50 insertions(+), 36 deletions(-) diff --git a/src/Interpreters/DDLTask.cpp b/src/Interpreters/DDLTask.cpp index fe2baea6b4e..761848006ff 100644 --- a/src/Interpreters/DDLTask.cpp +++ b/src/Interpreters/DDLTask.cpp @@ -14,6 +14,7 @@ #include #include #include +#include #include @@ -198,6 +199,8 @@ void DDLTaskBase::parseQueryFromEntry(ContextPtr context) ParserQuery parser_query(end, settings.allow_settings_after_format_in_insert); String description; query = parseQuery(parser_query, begin, end, description, 0, settings.max_parser_depth); + if (auto * query_drop = query->as()) + query = query_drop->getRewrittenASTWithoutMultipleTables()[0]; } void DDLTaskBase::formatRewrittenQuery(ContextPtr context) diff --git a/src/Interpreters/InterpreterDropQuery.cpp b/src/Interpreters/InterpreterDropQuery.cpp index 1fdbf2fa376..1110200e411 100644 --- a/src/Interpreters/InterpreterDropQuery.cpp +++ b/src/Interpreters/InterpreterDropQuery.cpp @@ -56,45 +56,15 @@ InterpreterDropQuery::InterpreterDropQuery(const ASTPtr & query_ptr_, ContextMut BlockIO InterpreterDropQuery::execute() { + BlockIO res; auto & drop = query_ptr->as(); - if (drop.database_and_tables) + ASTs drops = drop.getRewrittenASTWithoutMultipleTables(); + for (const auto & drop_query_ptr : drops) { - BlockIO res; - auto & database_and_tables = drop.database_and_tables->as(); - for (const auto & child : database_and_tables.children) - { - auto cloned = drop.clone(); - auto & query = cloned->as(); - query.database_and_tables = nullptr; - query.children.clear(); - - auto database_and_table = dynamic_pointer_cast(child); - if (database_and_table->name_parts.size() == 2) - { - query.database = std::make_shared(database_and_table->name_parts[0]); - query.table = std::make_shared(database_and_table->name_parts[1]); - } - else - { - query.table = std::make_shared(database_and_table->name_parts[0]); - } - - if (query.database) - query.children.push_back(query.database); - - if (query.table) - query.children.push_back(query.table); - - current_query_ptr = cloned; - res = executeSingleDropQuery(cloned); - } - return res; - } - else - { - current_query_ptr = query_ptr; - return executeSingleDropQuery(query_ptr); + current_query_ptr = drop_query_ptr; + res = executeSingleDropQuery(drop_query_ptr); } + return res; } BlockIO InterpreterDropQuery::executeSingleDropQuery(const ASTPtr & drop_query_ptr) diff --git a/src/Parsers/ASTDropQuery.cpp b/src/Parsers/ASTDropQuery.cpp index ab95845a856..6e872d8e6b8 100644 --- a/src/Parsers/ASTDropQuery.cpp +++ b/src/Parsers/ASTDropQuery.cpp @@ -105,4 +105,43 @@ void ASTDropQuery::formatQueryImpl(const FormatSettings & settings, FormatState settings.ostr << (settings.hilite ? hilite_keyword : "") << " SYNC" << (settings.hilite ? hilite_none : ""); } +ASTs ASTDropQuery::getRewrittenASTWithoutMultipleTables() +{ + ASTs res; + if (database_and_tables == nullptr) + { + res.push_back(shared_from_this()); + return res; + } + + auto & list = database_and_tables->as(); + for (const auto & child : list.children) + { + auto cloned = clone(); + auto & query = cloned->as(); + query.database_and_tables = nullptr; + query.children.clear(); + + auto database_and_table = dynamic_pointer_cast(child); + if (database_and_table->name_parts.size() == 2) + { + query.database = std::make_shared(database_and_table->name_parts[0]); + query.table = std::make_shared(database_and_table->name_parts[1]); + } + else + { + query.table = std::make_shared(database_and_table->name_parts[0]); + } + + if (query.database) + query.children.push_back(query.database); + + if (query.table) + query.children.push_back(query.table); + + res.push_back(cloned); + } + return res; +} + } diff --git a/src/Parsers/ASTDropQuery.h b/src/Parsers/ASTDropQuery.h index 7aca9a883ed..00bf8cc9b78 100644 --- a/src/Parsers/ASTDropQuery.h +++ b/src/Parsers/ASTDropQuery.h @@ -49,6 +49,8 @@ public: return removeOnCluster(clone(), params.default_database); } + ASTs getRewrittenASTWithoutMultipleTables(); + QueryKind getQueryKind() const override { return QueryKind::Drop; } protected: From 911fab7824ccabf572699a4aa05403307c4a919f Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Mon, 25 Mar 2024 11:19:46 +0100 Subject: [PATCH 028/150] Fix possible connections data-race for distributed_foreground_insert/distributed_background_insert_batch Previously connection will marked as not in use earlier than RemoteInserter, while it still may use it and this will lead to crashes, like in [1]. [1]: https://s3.amazonaws.com/clickhouse-test-reports/61832/d0cd8d0d7f74ff251d52e1871cba57d26a51873b/stateless_tests_flaky_check__asan_.html I was able to reproduce the crash locally by running the test 03030_system_flush_distributed_settings in parallel. Fixes: #45491 Signed-off-by: Azat Khuzhin --- src/Storages/Distributed/DistributedAsyncInsertBatch.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/Storages/Distributed/DistributedAsyncInsertBatch.cpp b/src/Storages/Distributed/DistributedAsyncInsertBatch.cpp index 8d95e49de57..b45afb7e1aa 100644 --- a/src/Storages/Distributed/DistributedAsyncInsertBatch.cpp +++ b/src/Storages/Distributed/DistributedAsyncInsertBatch.cpp @@ -203,11 +203,10 @@ void DistributedAsyncInsertBatch::readText(ReadBuffer & in) void DistributedAsyncInsertBatch::sendBatch(const SettingsChanges & settings_changes) { + IConnectionPool::Entry connection; std::unique_ptr remote; bool compression_expected = false; - IConnectionPool::Entry connection; - /// Since the batch is sent as a whole (in case of failure, the whole batch /// will be repeated), we need to mark the whole batch as failed in case of /// error). From e22c3f7bff42d6490537daec311f6f2a1508c8b3 Mon Sep 17 00:00:00 2001 From: avogar Date: Mon, 25 Mar 2024 18:13:11 +0000 Subject: [PATCH 029/150] Mark CANNOT_PARSE_ESCAPE_SEQUENCE error as parse error to be able to skip it in row input formats --- src/Processors/Formats/IRowInputFormat.cpp | 4 +++- ...nput_format_allow_errors_num_bad_escape_sequence.reference | 1 + ...3031_input_format_allow_errors_num_bad_escape_sequence.sql | 2 ++ 3 files changed, 6 insertions(+), 1 deletion(-) create mode 100644 tests/queries/0_stateless/03031_input_format_allow_errors_num_bad_escape_sequence.reference create mode 100644 tests/queries/0_stateless/03031_input_format_allow_errors_num_bad_escape_sequence.sql diff --git a/src/Processors/Formats/IRowInputFormat.cpp b/src/Processors/Formats/IRowInputFormat.cpp index 8c317a34a9d..0b6c81923db 100644 --- a/src/Processors/Formats/IRowInputFormat.cpp +++ b/src/Processors/Formats/IRowInputFormat.cpp @@ -29,6 +29,7 @@ namespace ErrorCodes extern const int CANNOT_PARSE_IPV4; extern const int CANNOT_PARSE_IPV6; extern const int UNKNOWN_ELEMENT_OF_ENUM; + extern const int CANNOT_PARSE_ESCAPE_SEQUENCE; } @@ -50,7 +51,8 @@ bool isParseError(int code) || code == ErrorCodes::CANNOT_PARSE_DOMAIN_VALUE_FROM_STRING || code == ErrorCodes::CANNOT_PARSE_IPV4 || code == ErrorCodes::CANNOT_PARSE_IPV6 - || code == ErrorCodes::UNKNOWN_ELEMENT_OF_ENUM; + || code == ErrorCodes::UNKNOWN_ELEMENT_OF_ENUM + || code == ErrorCodes::CANNOT_PARSE_ESCAPE_SEQUENCE; } IRowInputFormat::IRowInputFormat(Block header, ReadBuffer & in_, Params params_) diff --git a/tests/queries/0_stateless/03031_input_format_allow_errors_num_bad_escape_sequence.reference b/tests/queries/0_stateless/03031_input_format_allow_errors_num_bad_escape_sequence.reference new file mode 100644 index 00000000000..50433fd0b12 --- /dev/null +++ b/tests/queries/0_stateless/03031_input_format_allow_errors_num_bad_escape_sequence.reference @@ -0,0 +1 @@ +some string diff --git a/tests/queries/0_stateless/03031_input_format_allow_errors_num_bad_escape_sequence.sql b/tests/queries/0_stateless/03031_input_format_allow_errors_num_bad_escape_sequence.sql new file mode 100644 index 00000000000..d551a449a8a --- /dev/null +++ b/tests/queries/0_stateless/03031_input_format_allow_errors_num_bad_escape_sequence.sql @@ -0,0 +1,2 @@ +select * from format(JSONEachRow, '{"item" : "some string"}, {"item":"\\\\ \ud83d"}') settings input_format_allow_errors_num=1; + From 97d4fb01f437fca13d6c82cac4e0ae415bd9332b Mon Sep 17 00:00:00 2001 From: zhongyuankai <872237106@qq.com> Date: Tue, 26 Mar 2024 09:17:58 +0800 Subject: [PATCH 030/150] fix test --- src/Parsers/ASTDropQuery.cpp | 33 +++++++++++++++++++-------------- 1 file changed, 19 insertions(+), 14 deletions(-) diff --git a/src/Parsers/ASTDropQuery.cpp b/src/Parsers/ASTDropQuery.cpp index 6e872d8e6b8..e59ce43287f 100644 --- a/src/Parsers/ASTDropQuery.cpp +++ b/src/Parsers/ASTDropQuery.cpp @@ -78,10 +78,19 @@ void ASTDropQuery::formatQueryImpl(const FormatSettings & settings, FormatState if (it != list.children.begin()) settings.ostr << ", "; - auto identifier = dynamic_pointer_cast(*it); - settings.ostr << (identifier->name_parts.size() == 2 - ? backQuoteIfNeed(identifier->name_parts[0]) + "." + backQuoteIfNeed(identifier->name_parts[1]) - : backQuoteIfNeed(identifier->name_parts[0])); + auto identifier = dynamic_pointer_cast(*it); + if (!identifier) + throw Exception(ErrorCodes::SYNTAX_ERROR, "Unexpected ASTIdentifier type for list of table names."); + + if (auto db = identifier->getDatabase()) + { + db->formatImpl(settings, state, frame); + settings.ostr << '.'; + } + + auto tb = identifier->getTable(); + chassert(tb); + tb->formatImpl(settings, state, frame); } } else @@ -122,16 +131,12 @@ ASTs ASTDropQuery::getRewrittenASTWithoutMultipleTables() query.database_and_tables = nullptr; query.children.clear(); - auto database_and_table = dynamic_pointer_cast(child); - if (database_and_table->name_parts.size() == 2) - { - query.database = std::make_shared(database_and_table->name_parts[0]); - query.table = std::make_shared(database_and_table->name_parts[1]); - } - else - { - query.table = std::make_shared(database_and_table->name_parts[0]); - } + auto database_and_table = dynamic_pointer_cast(child); + if (!database_and_table) + throw Exception(ErrorCodes::SYNTAX_ERROR, "Unexpected ASTIdentifier type for list of table names."); + + query.database = database_and_table->getDatabase(); + query.table = database_and_table->getTable(); if (query.database) query.children.push_back(query.database); From 8af56823954fa036df25eef0f61aa3b3d4de5749 Mon Sep 17 00:00:00 2001 From: zhongyuankai <872237106@qq.com> Date: Tue, 26 Mar 2024 09:42:10 +0800 Subject: [PATCH 031/150] batter --- src/Interpreters/DDLTask.cpp | 8 ++++++- src/Interpreters/InterpreterDropQuery.cpp | 22 ++++++++++------- src/Parsers/ASTDropQuery.cpp | 6 ++--- src/Parsers/ASTDropQuery.h | 2 +- src/Parsers/ASTIdentifier.cpp | 6 +++++ src/Parsers/ASTIdentifier.h | 1 + src/Parsers/tests/gtest_dictionary_parser.cpp | 24 +++++++------------ 7 files changed, 39 insertions(+), 30 deletions(-) diff --git a/src/Interpreters/DDLTask.cpp b/src/Interpreters/DDLTask.cpp index 104e71fa2e4..37954850851 100644 --- a/src/Interpreters/DDLTask.cpp +++ b/src/Interpreters/DDLTask.cpp @@ -202,7 +202,13 @@ void DDLTaskBase::parseQueryFromEntry(ContextPtr context) String description; query = parseQuery(parser_query, begin, end, description, 0, settings.max_parser_depth, settings.max_parser_backtracks); if (auto * query_drop = query->as()) - query = query_drop->getRewrittenASTWithoutMultipleTables()[0]; + { + ASTs drops = query_drop->getRewrittenASTsOfSingleTable(); + if (drops.size() > 1) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Not supports drop multiple tables for ddl task."); + + query = drops[0]; + } } void DDLTaskBase::formatRewrittenQuery(ContextPtr context) diff --git a/src/Interpreters/InterpreterDropQuery.cpp b/src/Interpreters/InterpreterDropQuery.cpp index 578609cdcd1..87298774ed4 100644 --- a/src/Interpreters/InterpreterDropQuery.cpp +++ b/src/Interpreters/InterpreterDropQuery.cpp @@ -59,7 +59,7 @@ BlockIO InterpreterDropQuery::execute() { BlockIO res; auto & drop = query_ptr->as(); - ASTs drops = drop.getRewrittenASTWithoutMultipleTables(); + ASTs drops = drop.getRewrittenASTsOfSingleTable(); for (const auto & drop_query_ptr : drops) { current_query_ptr = drop_query_ptr; @@ -460,18 +460,22 @@ void InterpreterDropQuery::extendQueryLogElemImpl(DB::QueryLogElement & elem, co auto & list = drop.database_and_tables->as(); for (auto & child : list.children) { - auto identifier = dynamic_pointer_cast(child); - if (identifier->name_parts.size() == 2) + auto identifier = dynamic_pointer_cast(child); + if (!identifier) + throw Exception(ErrorCodes::SYNTAX_ERROR, "Unexpected type for list of table names."); + + String query_database = identifier->getDatabaseName(); + String query_table = identifier->getTableName(); + if (!query_database.empty() && query_table.empty()) { - auto quoted_database = backQuoteIfNeed(identifier->name_parts[0]); - elem.query_databases.insert(quoted_database); - elem.query_tables.insert(quoted_database + "." + backQuoteIfNeed(identifier->name_parts[1])); + elem.query_databases.insert(backQuoteIfNeed(query_database)); } - else + else if (!query_table.empty()) { - auto quoted_database = backQuoteIfNeed(context_->getCurrentDatabase()); + auto quoted_database = query_database.empty() ? backQuoteIfNeed(context_->getCurrentDatabase()) + : backQuoteIfNeed(query_database); elem.query_databases.insert(quoted_database); - elem.query_tables.insert(quoted_database + "." + backQuoteIfNeed(identifier->name_parts[0])); + elem.query_tables.insert(quoted_database + "." + backQuoteIfNeed(query_table)); } } } diff --git a/src/Parsers/ASTDropQuery.cpp b/src/Parsers/ASTDropQuery.cpp index e59ce43287f..6324a7e99bf 100644 --- a/src/Parsers/ASTDropQuery.cpp +++ b/src/Parsers/ASTDropQuery.cpp @@ -80,7 +80,7 @@ void ASTDropQuery::formatQueryImpl(const FormatSettings & settings, FormatState auto identifier = dynamic_pointer_cast(*it); if (!identifier) - throw Exception(ErrorCodes::SYNTAX_ERROR, "Unexpected ASTIdentifier type for list of table names."); + throw Exception(ErrorCodes::SYNTAX_ERROR, "Unexpected type for list of table names."); if (auto db = identifier->getDatabase()) { @@ -114,7 +114,7 @@ void ASTDropQuery::formatQueryImpl(const FormatSettings & settings, FormatState settings.ostr << (settings.hilite ? hilite_keyword : "") << " SYNC" << (settings.hilite ? hilite_none : ""); } -ASTs ASTDropQuery::getRewrittenASTWithoutMultipleTables() +ASTs ASTDropQuery::getRewrittenASTsOfSingleTable() { ASTs res; if (database_and_tables == nullptr) @@ -133,7 +133,7 @@ ASTs ASTDropQuery::getRewrittenASTWithoutMultipleTables() auto database_and_table = dynamic_pointer_cast(child); if (!database_and_table) - throw Exception(ErrorCodes::SYNTAX_ERROR, "Unexpected ASTIdentifier type for list of table names."); + throw Exception(ErrorCodes::SYNTAX_ERROR, "Unexpected type for list of table names."); query.database = database_and_table->getDatabase(); query.table = database_and_table->getTable(); diff --git a/src/Parsers/ASTDropQuery.h b/src/Parsers/ASTDropQuery.h index 00bf8cc9b78..3eae98ad43d 100644 --- a/src/Parsers/ASTDropQuery.h +++ b/src/Parsers/ASTDropQuery.h @@ -49,7 +49,7 @@ public: return removeOnCluster(clone(), params.default_database); } - ASTs getRewrittenASTWithoutMultipleTables(); + ASTs getRewrittenASTsOfSingleTable(); QueryKind getQueryKind() const override { return QueryKind::Drop; } diff --git a/src/Parsers/ASTIdentifier.cpp b/src/Parsers/ASTIdentifier.cpp index 80a618170c6..1a24dac61e6 100644 --- a/src/Parsers/ASTIdentifier.cpp +++ b/src/Parsers/ASTIdentifier.cpp @@ -207,6 +207,12 @@ String ASTTableIdentifier::getDatabaseName() const else return {}; } +String ASTTableIdentifier::getTableName() const +{ + if (name_parts.size() == 2) return name_parts[1]; + else return name_parts[0]; +} + ASTPtr ASTTableIdentifier::getTable() const { if (name_parts.size() == 2) diff --git a/src/Parsers/ASTIdentifier.h b/src/Parsers/ASTIdentifier.h index d986b9170f3..d75b5a99dbe 100644 --- a/src/Parsers/ASTIdentifier.h +++ b/src/Parsers/ASTIdentifier.h @@ -86,6 +86,7 @@ public: StorageID getTableId() const; String getDatabaseName() const; + String getTableName() const; ASTPtr getTable() const; ASTPtr getDatabase() const; diff --git a/src/Parsers/tests/gtest_dictionary_parser.cpp b/src/Parsers/tests/gtest_dictionary_parser.cpp index d998bc89796..f9074c0b2eb 100644 --- a/src/Parsers/tests/gtest_dictionary_parser.cpp +++ b/src/Parsers/tests/gtest_dictionary_parser.cpp @@ -300,20 +300,11 @@ TEST(ParserDictionaryDDL, ParseDropQuery) ASTPtr ast1 = parseQuery(parser, input1.data(), input1.data() + input1.size(), "", 0, 0, 0); ASTDropQuery * drop1 = ast1->as(); - auto get_database_and_table = [](const ASTDropQuery & drop) -> std::pair - { - auto & database_and_tables = drop.database_and_tables->as(); - auto database_and_table = dynamic_pointer_cast(database_and_tables.children[0]); - if (database_and_table->name_parts.size() == 2) - return {database_and_table->name_parts[0], database_and_table->name_parts[1]}; - else - return {"", database_and_table->name_parts[0]}; - }; - EXPECT_TRUE(drop1->is_dictionary); - auto [database1, table1] = get_database_and_table(*drop1); - EXPECT_EQ(database1, "test"); - EXPECT_EQ(table1, "dict1"); + auto & database_and_tables1 = drop1->database_and_tables->as(); + auto identifier1 = dynamic_pointer_cast(database_and_tables1.children[0]); + EXPECT_EQ(identifier1->getDatabaseName(), "test"); + EXPECT_EQ(identifier1->getTableName(), "dict1"); auto str1 = serializeAST(*drop1); EXPECT_EQ(input1, str1); @@ -323,9 +314,10 @@ TEST(ParserDictionaryDDL, ParseDropQuery) ASTDropQuery * drop2 = ast2->as(); EXPECT_TRUE(drop2->is_dictionary); - auto [database2, table2] = get_database_and_table(*drop2); - EXPECT_EQ(database2, ""); - EXPECT_EQ(table2, "dict2"); + auto & database_and_tables2 = drop2->database_and_tables->as(); + auto identifier2 = dynamic_pointer_cast(database_and_tables2.children[0]); + EXPECT_EQ(identifier2->getDatabaseName(), ""); + EXPECT_EQ(identifier2->getTableName(), "dict2"); auto str2 = serializeAST(*drop2); EXPECT_EQ(input2, str2); } From 2bf1d118c70c6ddf4c42490f93b421cec5e6c6f8 Mon Sep 17 00:00:00 2001 From: Alexander Gololobov Date: Fri, 22 Mar 2024 17:06:03 +0100 Subject: [PATCH 032/150] Stream rows when reading from system.replicas --- src/Storages/System/StorageSystemReplicas.cpp | 82 +++++++++++++++---- 1 file changed, 64 insertions(+), 18 deletions(-) diff --git a/src/Storages/System/StorageSystemReplicas.cpp b/src/Storages/System/StorageSystemReplicas.cpp index 35550de11cb..0748995e8fb 100644 --- a/src/Storages/System/StorageSystemReplicas.cpp +++ b/src/Storages/System/StorageSystemReplicas.cpp @@ -353,6 +353,43 @@ void StorageSystemReplicas::read( query_plan.addStep(std::move(reading)); } +class SystemReplicasSource : public ISource +{ +public: + SystemReplicasSource( + Block header_, + ColumnPtr col_database_, + ColumnPtr col_table_, + ColumnPtr col_engine_, + std::vector> futures_, + ContextPtr context_) + : ISource(header_) + , col_database(std::move(col_database_)) + , col_table(std::move(col_table_)) + , col_engine(std::move(col_engine_)) + , futures(std::move(futures_)) + , context(std::move(context_)) + { + } + + String getName() const override { return "SystemReplicas"; } + +protected: + Chunk generate() override; + +private: + /// Columns with table metadata. + ColumnPtr col_database; + ColumnPtr col_table; + ColumnPtr col_engine; + /// Futures for the status of each table. + std::vector> futures; + ContextPtr context; + /// Index (row number) of the next table to process. + size_t i = 0; +}; + + void ReadFromSystemReplicas::initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) { auto header = getOutputStream().header; @@ -398,8 +435,6 @@ void ReadFromSystemReplicas::initializePipeline(QueryPipelineBuilder & pipeline, col_engine = filtered_block.getByName("engine").column; } - MutableColumns res_columns = storage_snapshot->metadata->getSampleBlock().cloneEmptyColumns(); - size_t tables_size = col_database->size(); /// Use separate queues for requests with and without ZooKeeper fields. @@ -426,11 +461,33 @@ void ReadFromSystemReplicas::initializePipeline(QueryPipelineBuilder & pipeline, /// If there are more requests, they will be scheduled by the query that needs them. get_status_requests.scheduleRequests(max_request_id, query_status); - for (size_t i = 0; i < tables_size; ++i) + pipeline.init(Pipe(std::make_shared(header, col_database, col_table, col_engine, std::move(futures), context))); +} + +Chunk SystemReplicasSource::generate() +{ + if (i == futures.size()) + return {}; + + QueryStatusPtr query_status = context ? context->getProcessListElement() : nullptr; + + MutableColumns res_columns = getPort().getHeader().cloneEmptyColumns(); + + bool rows_added = false; + + for (; i < futures.size(); ++i) { if (query_status) query_status->checkTimeLimit(); + /// Return current chunk if the next future is not ready yet + if (rows_added && futures[i].wait_for(std::chrono::seconds(0)) != std::future_status::ready) + break; + + res_columns[0]->insert((*col_database)[i]); + res_columns[1]->insert((*col_table)[i]); + res_columns[2]->insert((*col_engine)[i]); + const auto & status = futures[i].get(); size_t col_num = 3; res_columns[col_num++]->insert(status.is_leader); @@ -476,23 +533,12 @@ void ReadFromSystemReplicas::initializePipeline(QueryPipelineBuilder & pipeline, } res_columns[col_num++]->insert(std::move(replica_is_active_values)); + + rows_added = true; } - Columns fin_columns; - fin_columns.reserve(res_columns.size()); - - for (auto & col : res_columns) - fin_columns.emplace_back(std::move(col)); - - fin_columns[0] = std::move(col_database); - fin_columns[1] = std::move(col_table); - fin_columns[2] = std::move(col_engine); - - UInt64 num_rows = fin_columns.at(0)->size(); - Chunk chunk(std::move(fin_columns), num_rows); - - pipeline.init(Pipe(std::make_shared(header, std::move(chunk)))); + UInt64 num_rows = res_columns.at(0)->size(); + return Chunk(std::move(res_columns), num_rows); } - } From 4e94510e3286658f7481212303840904a33ad480 Mon Sep 17 00:00:00 2001 From: Alexander Gololobov Date: Mon, 25 Mar 2024 15:44:33 +0100 Subject: [PATCH 033/150] Finish chunk if the size exceeds max_block_size --- src/Storages/System/StorageSystemReplicas.cpp | 31 +++++++++++++++---- 1 file changed, 25 insertions(+), 6 deletions(-) diff --git a/src/Storages/System/StorageSystemReplicas.cpp b/src/Storages/System/StorageSystemReplicas.cpp index 0748995e8fb..7c18282beb7 100644 --- a/src/Storages/System/StorageSystemReplicas.cpp +++ b/src/Storages/System/StorageSystemReplicas.cpp @@ -261,6 +261,7 @@ public: Block sample_block, std::map> replicated_tables_, bool with_zk_fields_, + size_t max_block_size_, std::shared_ptr impl_) : SourceStepWithFilter( DataStream{.header = std::move(sample_block)}, @@ -270,6 +271,7 @@ public: context_) , replicated_tables(std::move(replicated_tables_)) , with_zk_fields(with_zk_fields_) + , max_block_size(max_block_size_) , impl(std::move(impl_)) { } @@ -279,6 +281,7 @@ public: private: std::map> replicated_tables; const bool with_zk_fields; + const size_t max_block_size; std::shared_ptr impl; const ActionsDAG::Node * predicate = nullptr; }; @@ -297,7 +300,7 @@ void StorageSystemReplicas::read( SelectQueryInfo & query_info, ContextPtr context, QueryProcessingStage::Enum /*processed_stage*/, - const size_t /*max_block_size*/, + const size_t max_block_size, const size_t /*num_streams*/) { storage_snapshot->check(column_names); @@ -348,7 +351,7 @@ void StorageSystemReplicas::read( auto header = storage_snapshot->metadata->getSampleBlock(); auto reading = std::make_unique( column_names, query_info, storage_snapshot, - std::move(context), std::move(header), std::move(replicated_tables), with_zk_fields, impl); // /*std::move(this_ptr),*/ std::move(columns_mask), max_block_size); + std::move(context), std::move(header), std::move(replicated_tables), with_zk_fields, max_block_size, impl); query_plan.addStep(std::move(reading)); } @@ -358,12 +361,14 @@ class SystemReplicasSource : public ISource public: SystemReplicasSource( Block header_, + size_t max_block_size_, ColumnPtr col_database_, ColumnPtr col_table_, ColumnPtr col_engine_, std::vector> futures_, ContextPtr context_) : ISource(header_) + , max_block_size(max_block_size_) , col_database(std::move(col_database_)) , col_table(std::move(col_table_)) , col_engine(std::move(col_engine_)) @@ -378,6 +383,7 @@ protected: Chunk generate() override; private: + const size_t max_block_size; /// Columns with table metadata. ColumnPtr col_database; ColumnPtr col_table; @@ -461,7 +467,7 @@ void ReadFromSystemReplicas::initializePipeline(QueryPipelineBuilder & pipeline, /// If there are more requests, they will be scheduled by the query that needs them. get_status_requests.scheduleRequests(max_request_id, query_status); - pipeline.init(Pipe(std::make_shared(header, col_database, col_table, col_engine, std::move(futures), context))); + pipeline.init(Pipe(std::make_shared(header, max_block_size, col_database, col_table, col_engine, std::move(futures), context))); } Chunk SystemReplicasSource::generate() @@ -480,9 +486,22 @@ Chunk SystemReplicasSource::generate() if (query_status) query_status->checkTimeLimit(); - /// Return current chunk if the next future is not ready yet - if (rows_added && futures[i].wait_for(std::chrono::seconds(0)) != std::future_status::ready) - break; + if (rows_added) + { + /// Return current chunk if the next future is not ready yet + if (futures[i].wait_for(std::chrono::seconds(0)) != std::future_status::ready) + break; + + if (max_block_size != 0) + { + size_t total_size = 0; + for (const auto & column : res_columns) + total_size += column->byteSize(); + /// If the block size exceeds the maximum, return the current block + if (total_size >= max_block_size) + break; + } + } res_columns[0]->insert((*col_database)[i]); res_columns[1]->insert((*col_table)[i]); From 277df60dc4d2ccf8e9939255e03bd74606574898 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Tue, 26 Mar 2024 12:01:17 +0000 Subject: [PATCH 034/150] Fix some crashes with analyzer and group_by_use_nulls. --- src/Analyzer/Passes/QueryAnalysisPass.cpp | 15 +++++- ...up_by_use_nulls_analyzer_crashes.reference | 52 +++++++++++++++++++ ...23_group_by_use_nulls_analyzer_crashes.sql | 5 ++ 3 files changed, 71 insertions(+), 1 deletion(-) create mode 100644 tests/queries/0_stateless/03023_group_by_use_nulls_analyzer_crashes.reference create mode 100644 tests/queries/0_stateless/03023_group_by_use_nulls_analyzer_crashes.sql diff --git a/src/Analyzer/Passes/QueryAnalysisPass.cpp b/src/Analyzer/Passes/QueryAnalysisPass.cpp index 6719504eac3..d60ab24c63b 100644 --- a/src/Analyzer/Passes/QueryAnalysisPass.cpp +++ b/src/Analyzer/Passes/QueryAnalysisPass.cpp @@ -777,6 +777,13 @@ struct IdentifierResolveScope std::unordered_map table_expression_node_to_data; QueryTreeNodePtrWithHashSet nullable_group_by_keys; + /// Here we count the number of nullable GROUP BY keys we met resolving expression. + /// E.g. for a query `SELECT tuple(tuple(number)) FROM numbers(10) GROUP BY (number, tuple(number)) with cube` + /// both `number` and `tuple(number)` would be in nullable_group_by_keys. + /// But when we resolve `tuple(tuple(number))` we should figure out that `tuple(number)` is already a key, + /// and we should not convert `number` to nullable. + size_t found_nullable_group_by_key_in_scope = 0; + QueryTreeNodePtrWithHashMap nullable_join_columns; /// Use identifier lookup to result cache @@ -5952,6 +5959,12 @@ ProjectionNames QueryAnalyzer::resolveExpressionNode(QueryTreeNodePtr & node, Id return resolved_expression_it->second; } + bool is_nullable_group_by_key = scope.nullable_group_by_keys.contains(node) && !scope.expressions_in_resolve_process_stack.hasAggregateFunction(); + if (is_nullable_group_by_key) + ++scope.found_nullable_group_by_key_in_scope; + + SCOPE_EXIT(scope.found_nullable_group_by_key_in_scope -= is_nullable_group_by_key); + String node_alias = node->getAlias(); ProjectionNames result_projection_names; @@ -6243,7 +6256,7 @@ ProjectionNames QueryAnalyzer::resolveExpressionNode(QueryTreeNodePtr & node, Id validateTreeSize(node, scope.context->getSettingsRef().max_expanded_ast_elements, node_to_tree_size); - if (scope.nullable_group_by_keys.contains(node) && !scope.expressions_in_resolve_process_stack.hasAggregateFunction()) + if (is_nullable_group_by_key && scope.found_nullable_group_by_key_in_scope == 1) { node = node->clone(); node->convertToNullable(); diff --git a/tests/queries/0_stateless/03023_group_by_use_nulls_analyzer_crashes.reference b/tests/queries/0_stateless/03023_group_by_use_nulls_analyzer_crashes.reference new file mode 100644 index 00000000000..9d8381407fb --- /dev/null +++ b/tests/queries/0_stateless/03023_group_by_use_nulls_analyzer_crashes.reference @@ -0,0 +1,52 @@ +((0)) +((0)) +((0)) +((0)) +((0)) +((0)) +((0)) +((0)) +((0)) +((0)) +((0)) +((0)) +((0)) +((1)) +((1)) +((2)) +((2)) +((3)) +((3)) +((4)) +((4)) +((5)) +((5)) +((6)) +((6)) +((7)) +((7)) +((8)) +((8)) +((9)) +((9)) +([]) +([]) +([]) +([]) +([]) +([]) +([]) +([]) +([]) +([]) +([]) +([0]) +([1]) +([2]) +([3]) +([4]) +([5]) +([6]) +([7]) +([8]) +([9]) diff --git a/tests/queries/0_stateless/03023_group_by_use_nulls_analyzer_crashes.sql b/tests/queries/0_stateless/03023_group_by_use_nulls_analyzer_crashes.sql new file mode 100644 index 00000000000..670c4fb1284 --- /dev/null +++ b/tests/queries/0_stateless/03023_group_by_use_nulls_analyzer_crashes.sql @@ -0,0 +1,5 @@ +set allow_experimental_analyzer = 1, group_by_use_nulls = 1; + +SELECT tuple(tuple(number)) as x FROM numbers(10) GROUP BY (number, tuple(number)) with cube order by x; + +select tuple(array(number)) as x FROM numbers(10) GROUP BY number, array(number) WITH ROLLUP order by x; From c5f6296b434bc3a79a402b8b13f5b71afcde1fc3 Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Tue, 26 Mar 2024 16:07:15 +0100 Subject: [PATCH 035/150] Try to fix links in ru docs --- docs/ru/sql-reference/data-types/datetime.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/ru/sql-reference/data-types/datetime.md b/docs/ru/sql-reference/data-types/datetime.md index 25e87794147..9f2d21eb29f 100644 --- a/docs/ru/sql-reference/data-types/datetime.md +++ b/docs/ru/sql-reference/data-types/datetime.md @@ -27,9 +27,9 @@ DateTime([timezone]) Консольный клиент ClickHouse по умолчанию использует часовой пояс сервера, если для значения `DateTime` часовой пояс не был задан в явном виде при инициализации типа данных. Чтобы использовать часовой пояс клиента, запустите [clickhouse-client](../../interfaces/cli.md) с параметром `--use_client_time_zone`. -ClickHouse отображает значения в зависимости от значения параметра [date\_time\_output\_format](../../operations/settings/settings-formats.md#date_time_output_format). Текстовый формат по умолчанию `YYYY-MM-DD hh:mm:ss`. Кроме того, вы можете поменять отображение с помощью функции [formatDateTime](../../sql-reference/functions/date-time-functions.md#formatdatetime). +ClickHouse отображает значения в зависимости от значения параметра [date\_time\_output\_format](../../operations/settings/index.md#settings-date_time_output_format). Текстовый формат по умолчанию `YYYY-MM-DD hh:mm:ss`. Кроме того, вы можете поменять отображение с помощью функции [formatDateTime](../../sql-reference/functions/date-time-functions.md#formatdatetime). -При вставке данных в ClickHouse, можно использовать различные форматы даты и времени в зависимости от значения настройки [date_time_input_format](../../operations/settings/settings-formats.md#date_time_input_format). +При вставке данных в ClickHouse, можно использовать различные форматы даты и времени в зависимости от значения настройки [date_time_input_format](../../operations/settings/index.md#settings-date_time_output_format). ## Примеры {#primery} @@ -119,8 +119,8 @@ FROM dt - [Функции преобразования типов](../../sql-reference/functions/type-conversion-functions.md) - [Функции для работы с датой и временем](../../sql-reference/functions/date-time-functions.md) - [Функции для работы с массивами](../../sql-reference/functions/array-functions.md) -- [Настройка `date_time_input_format`](../../operations/settings/settings-formats.md#date_time_input_format) -- [Настройка `date_time_output_format`](../../operations/settings/settings-formats.md#date_time_output_format) +- [Настройка `date_time_input_format`](../../operations/settings/index.md#settings-date_time_output_format) +- [Настройка `date_time_output_format`](../../operations/settings/index.md#settings-date_time_output_format) - [Конфигурационный параметр сервера `timezone`](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) - [Параметр `session_timezone`](../../operations/settings/settings.md#session_timezone) - [Операторы для работы с датой и временем](../../sql-reference/operators/index.md#operators-datetime) From 837b89b8fe0cd740d724fea664b278fe8fb1d72f Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Tue, 26 Mar 2024 16:08:19 +0100 Subject: [PATCH 036/150] Fix settings names --- docs/ru/sql-reference/data-types/datetime.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/ru/sql-reference/data-types/datetime.md b/docs/ru/sql-reference/data-types/datetime.md index 9f2d21eb29f..34cd44d4709 100644 --- a/docs/ru/sql-reference/data-types/datetime.md +++ b/docs/ru/sql-reference/data-types/datetime.md @@ -29,7 +29,7 @@ DateTime([timezone]) ClickHouse отображает значения в зависимости от значения параметра [date\_time\_output\_format](../../operations/settings/index.md#settings-date_time_output_format). Текстовый формат по умолчанию `YYYY-MM-DD hh:mm:ss`. Кроме того, вы можете поменять отображение с помощью функции [formatDateTime](../../sql-reference/functions/date-time-functions.md#formatdatetime). -При вставке данных в ClickHouse, можно использовать различные форматы даты и времени в зависимости от значения настройки [date_time_input_format](../../operations/settings/index.md#settings-date_time_output_format). +При вставке данных в ClickHouse, можно использовать различные форматы даты и времени в зависимости от значения настройки [date_time_input_format](../../operations/settings/index.md#settings-date_time_input_format). ## Примеры {#primery} @@ -119,7 +119,7 @@ FROM dt - [Функции преобразования типов](../../sql-reference/functions/type-conversion-functions.md) - [Функции для работы с датой и временем](../../sql-reference/functions/date-time-functions.md) - [Функции для работы с массивами](../../sql-reference/functions/array-functions.md) -- [Настройка `date_time_input_format`](../../operations/settings/index.md#settings-date_time_output_format) +- [Настройка `date_time_input_format`](../../operations/settings/index.md#settings-date_time_input_format) - [Настройка `date_time_output_format`](../../operations/settings/index.md#settings-date_time_output_format) - [Конфигурационный параметр сервера `timezone`](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) - [Параметр `session_timezone`](../../operations/settings/settings.md#session_timezone) From 98f99db6b22aed467e8eee37b8b972a199d2873a Mon Sep 17 00:00:00 2001 From: avogar Date: Tue, 26 Mar 2024 16:01:56 +0000 Subject: [PATCH 037/150] Remove unneded change --- src/Server/HTTPHandler.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Server/HTTPHandler.h b/src/Server/HTTPHandler.h index ae4cf034276..aa7b9d69cb0 100644 --- a/src/Server/HTTPHandler.h +++ b/src/Server/HTTPHandler.h @@ -32,7 +32,7 @@ class HTTPHandler : public HTTPRequestHandler { public: HTTPHandler(IServer & server_, const std::string & name, const std::optional & content_type_override_); - ~HTTPHandler() override; + virtual ~HTTPHandler() override; void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response, const ProfileEvents::Event & write_event) override; From 97fca6682ed413036495cc361591fe6d7495421b Mon Sep 17 00:00:00 2001 From: Han Fei Date: Tue, 26 Mar 2024 18:38:05 +0100 Subject: [PATCH 038/150] fix logical-error when undoing quorum insert transaction --- src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp index b43d47bf5f4..91f85cbd87d 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp @@ -940,14 +940,25 @@ std::pair, bool> ReplicatedMergeTreeSinkImpl:: }); bool node_exists = false; + bool quorum_fail_exists = false; /// The loop will be executed at least once new_retry_controller.retryLoop([&] { fiu_do_on(FailPoints::replicated_merge_tree_commit_zk_fail_when_recovering_from_hw_fault, { zookeeper->forceFailureBeforeOperation(); }); zookeeper->setKeeper(storage.getZooKeeper()); node_exists = zookeeper->exists(fs::path(storage.replica_path) / "parts" / part->name); + if (isQuorumEnabled()) + quorum_fail_exists = zookeeper->exists(fs::path(storage.zookeeper_path) / "quorum" / "failed_parts" / part->name); }); + /// if it has quorum fail node, the restarting thread will clean the garbage. + if (quorum_fail_exists) + { + LOG_INFO(log, "Part {} fails to commit and will not retry or clean garbage. Restarting Thread will do everything.", part->name); + transaction.clear(); + return CommitRetryContext::ERROR; + } + if (node_exists) { LOG_DEBUG(log, "Insert of part {} recovered from keeper successfully. It will be committed", part->name); From 3b6ea659dfbfe25983bf1cdbdaac51ce38f6d73b Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Tue, 26 Mar 2024 19:10:34 +0000 Subject: [PATCH 039/150] Fixing 02535_analyzer_group_by_use_nulls --- src/Analyzer/Passes/QueryAnalysisPass.cpp | 29 +++++++++++++---------- 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/src/Analyzer/Passes/QueryAnalysisPass.cpp b/src/Analyzer/Passes/QueryAnalysisPass.cpp index 87d943f2e24..c21a1546259 100644 --- a/src/Analyzer/Passes/QueryAnalysisPass.cpp +++ b/src/Analyzer/Passes/QueryAnalysisPass.cpp @@ -6678,45 +6678,48 @@ void QueryAnalyzer::resolveGroupByNode(QueryNode & query_node_typed, IdentifierR if (query_node_typed.isGroupByWithGroupingSets()) { + QueryTreeNodes nullable_group_by_keys; for (auto & grouping_sets_keys_list_node : query_node_typed.getGroupBy().getNodes()) { if (settings.enable_positional_arguments) replaceNodesWithPositionalArguments(grouping_sets_keys_list_node, query_node_typed.getProjection().getNodes(), scope); - resolveExpressionNodeList(grouping_sets_keys_list_node, scope, false /*allow_lambda_expression*/, false /*allow_table_expression*/); - // Remove redundant calls to `tuple` function. It simplifies checking if expression is an aggregation key. // It's required to support queries like: SELECT number FROM numbers(3) GROUP BY (number, number % 2) auto & group_by_list = grouping_sets_keys_list_node->as().getNodes(); expandTuplesInList(group_by_list); + + if (scope.group_by_use_nulls) + for (const auto & group_by_elem : group_by_list) + nullable_group_by_keys.push_back(group_by_elem->clone()); + + resolveExpressionNodeList(grouping_sets_keys_list_node, scope, false /*allow_lambda_expression*/, false /*allow_table_expression*/); } - if (scope.group_by_use_nulls) - { - for (const auto & grouping_set : query_node_typed.getGroupBy().getNodes()) - { - for (const auto & group_by_elem : grouping_set->as()->getNodes()) - scope.nullable_group_by_keys.insert(group_by_elem); - } - } + for (auto & nullable_group_by_key : nullable_group_by_keys) + scope.nullable_group_by_keys.insert(std::move(nullable_group_by_key)); } else { if (settings.enable_positional_arguments) replaceNodesWithPositionalArguments(query_node_typed.getGroupByNode(), query_node_typed.getProjection().getNodes(), scope); - resolveExpressionNodeList(query_node_typed.getGroupByNode(), scope, false /*allow_lambda_expression*/, false /*allow_table_expression*/); - // Remove redundant calls to `tuple` function. It simplifies checking if expression is an aggregation key. // It's required to support queries like: SELECT number FROM numbers(3) GROUP BY (number, number % 2) auto & group_by_list = query_node_typed.getGroupBy().getNodes(); expandTuplesInList(group_by_list); + QueryTreeNodes nullable_group_by_keys; if (scope.group_by_use_nulls) { for (const auto & group_by_elem : query_node_typed.getGroupBy().getNodes()) - scope.nullable_group_by_keys.insert(group_by_elem); + nullable_group_by_keys.push_back(group_by_elem->clone()); } + + resolveExpressionNodeList(query_node_typed.getGroupByNode(), scope, false /*allow_lambda_expression*/, false /*allow_table_expression*/); + + for (auto & nullable_group_by_key : nullable_group_by_keys) + scope.nullable_group_by_keys.insert(std::move(nullable_group_by_key)); } } From 128c51c6fc6bd88db20b255a8183cb591b26c2ba Mon Sep 17 00:00:00 2001 From: vdimir Date: Wed, 27 Mar 2024 11:27:30 +0000 Subject: [PATCH 040/150] Fix storage join loading order --- src/Disks/DiskLocal.cpp | 1 - src/Storages/StorageSet.cpp | 13 ++++++++++++- .../integration/test_async_load_databases/test.py | 3 +++ tests/integration/test_join_set_family_s3/test.py | 14 +++++++++++--- 4 files changed, 26 insertions(+), 5 deletions(-) diff --git a/src/Disks/DiskLocal.cpp b/src/Disks/DiskLocal.cpp index f5d67d37b07..1a8d46668e0 100644 --- a/src/Disks/DiskLocal.cpp +++ b/src/Disks/DiskLocal.cpp @@ -153,7 +153,6 @@ public: return dir_path / entry->path().filename(); } - String name() const override { return entry->path().filename(); } private: diff --git a/src/Storages/StorageSet.cpp b/src/Storages/StorageSet.cpp index 7d7f3113cdb..8561c3f3aa8 100644 --- a/src/Storages/StorageSet.cpp +++ b/src/Storages/StorageSet.cpp @@ -247,6 +247,8 @@ void StorageSetOrJoinBase::restore() static const char * file_suffix = ".bin"; static const auto file_suffix_size = strlen(".bin"); + using FilePriority = std::pair; + std::priority_queue, std::greater> backup_files; for (auto dir_it{disk->iterateDirectory(path)}; dir_it->isValid(); dir_it->next()) { const auto & name = dir_it->name(); @@ -261,9 +263,18 @@ void StorageSetOrJoinBase::restore() if (file_num > increment) increment = file_num; - restoreFromFile(dir_it->path()); + backup_files.push({file_num, file_path}); } } + + /// Restore in the same order as blocks were written + /// It may be important for storage Join, user expect to get the first row (unless `join_any_take_last_row` setting is set) + /// but after restart we may have different order of blocks in memory. + while (!backup_files.empty()) + { + restoreFromFile(backup_files.top().second); + backup_files.pop(); + } } diff --git a/tests/integration/test_async_load_databases/test.py b/tests/integration/test_async_load_databases/test.py index 050b529a227..d06897b1045 100644 --- a/tests/integration/test_async_load_databases/test.py +++ b/tests/integration/test_async_load_databases/test.py @@ -122,6 +122,9 @@ def test_dependent_tables(started_cluster): ) query("create table system.join (n int, m int) engine=Join(any, left, n)") query("insert into system.join values (1, 1)") + for i in range(2, 100): + query(f"insert into system.join values (1, {i})") + query( "create table src (n int, m default joinGet('system.join', 'm', 1::int)," "t default dictGetOrNull('a.d', 'm', toUInt64(3))," diff --git a/tests/integration/test_join_set_family_s3/test.py b/tests/integration/test_join_set_family_s3/test.py index 38b56b7b15b..f0e1480d867 100644 --- a/tests/integration/test_join_set_family_s3/test.py +++ b/tests/integration/test_join_set_family_s3/test.py @@ -93,10 +93,18 @@ def test_join_s3(cluster): "CREATE TABLE testLocalJoin(`id` UInt64, `val` String) ENGINE = Join(ANY, LEFT, id)" ) node.query( - "CREATE TABLE testS3Join(`id` UInt64, `val` String) ENGINE = Join(ANY, LEFT, id) SETTINGS disk='s3'" + "CREATE TABLE testS3Join(`id` UInt64, `val` String) ENGINE = Join(ANY, LEFT, id) SETTINGS disk='s3', join_any_take_last_row = 1" ) node.query("INSERT INTO testLocalJoin VALUES (1, 'a')") + for i in range(1, 10): + c = chr(ord("a") + i) + node.query(f"INSERT INTO testLocalJoin VALUES (1, '{c}')") + + # because of `join_any_take_last_row = 1` we expect the last row with 'a' value + for i in range(1, 10): + c = chr(ord("a") + i) + node.query(f"INSERT INTO testS3Join VALUES (1, '{c}')") node.query("INSERT INTO testS3Join VALUES (1, 'a')") assert ( @@ -105,7 +113,7 @@ def test_join_s3(cluster): ) == "\t\na\ta\n\t\n" ) - assert_objects_count(cluster, 1) + assert_objects_count(cluster, 10) node.query("INSERT INTO testLocalJoin VALUES (2, 'b')") node.query("INSERT INTO testS3Join VALUES (2, 'b')") @@ -116,7 +124,7 @@ def test_join_s3(cluster): ) == "\t\na\ta\nb\tb\n" ) - assert_objects_count(cluster, 2) + assert_objects_count(cluster, 11) node.restart_clickhouse() assert ( From 42a27c5c875af506bddc8c4e932acf91ec0410a9 Mon Sep 17 00:00:00 2001 From: Smita Kulkarni Date: Wed, 27 Mar 2024 16:27:47 +0100 Subject: [PATCH 041/150] Fix WriteBufferAzureBlobStorage destructor uncaught exception --- .../IO/WriteBufferFromAzureBlobStorage.cpp | 42 +++++++++++++++---- .../IO/WriteBufferFromAzureBlobStorage.h | 5 ++- 2 files changed, 38 insertions(+), 9 deletions(-) diff --git a/src/Disks/IO/WriteBufferFromAzureBlobStorage.cpp b/src/Disks/IO/WriteBufferFromAzureBlobStorage.cpp index 05b93dd1fa3..9432cdf9fef 100644 --- a/src/Disks/IO/WriteBufferFromAzureBlobStorage.cpp +++ b/src/Disks/IO/WriteBufferFromAzureBlobStorage.cpp @@ -65,7 +65,20 @@ WriteBufferFromAzureBlobStorage::WriteBufferFromAzureBlobStorage( WriteBufferFromAzureBlobStorage::~WriteBufferFromAzureBlobStorage() { - finalize(); + LOG_TRACE(limitedLog, "Close WriteBufferFromAzureBlobStorage. {}.", blob_path); + + /// That destructor could be call with finalized=false in case of exceptions + if (!finalized) + { + LOG_INFO( + log, + "WriteBufferFromAzureBlobStorage is not finalized in destructor. " + "The file might not be written to AzureBlobStorage. " + "{}.", + blob_path); + } + + task_tracker->safeWaitAll(); } void WriteBufferFromAzureBlobStorage::execWithRetry(std::function func, size_t num_tries, size_t cost) @@ -102,9 +115,13 @@ void WriteBufferFromAzureBlobStorage::execWithRetry(std::function func, } } -void WriteBufferFromAzureBlobStorage::finalizeImpl() +void WriteBufferFromAzureBlobStorage::preFinalize() { - auto block_blob_client = blob_container_client->GetBlockBlobClient(blob_path); + if (is_prefinalized) + return; + + // This function should not be run again + is_prefinalized = true; /// If there is only one block and size is less than or equal to max_single_part_upload_size /// then we use single part upload instead of multi part upload @@ -113,6 +130,7 @@ void WriteBufferFromAzureBlobStorage::finalizeImpl() size_t data_size = size_t(position() - memory.data()); if (data_size <= max_single_part_upload_size) { + auto block_blob_client = blob_container_client->GetBlockBlobClient(blob_path); Azure::Core::IO::MemoryBodyStream memory_stream(reinterpret_cast(memory.data()), data_size); execWithRetry([&](){ block_blob_client.Upload(memory_stream); }, max_unexpected_write_error_retries, data_size); LOG_TRACE(log, "Committed single block for blob `{}`", blob_path); @@ -120,14 +138,22 @@ void WriteBufferFromAzureBlobStorage::finalizeImpl() } } + writePart(); +} - execWithRetry([this](){ next(); }, max_unexpected_write_error_retries); +void WriteBufferFromAzureBlobStorage::finalizeImpl() +{ + LOG_TRACE(log, "finalizeImpl WriteBufferFromAzureBlobStorage {}", blob_path); - task_tracker->waitAll(); + if (!is_prefinalized) + preFinalize(); - execWithRetry([&](){ block_blob_client.CommitBlockList(block_ids); }, max_unexpected_write_error_retries); - - LOG_TRACE(log, "Committed {} blocks for blob `{}`", block_ids.size(), blob_path); + if (!block_ids.empty()) + { + auto block_blob_client = blob_container_client->GetBlockBlobClient(blob_path); + execWithRetry([&](){ block_blob_client.CommitBlockList(block_ids); }, max_unexpected_write_error_retries); + LOG_TRACE(log, "Committed {} blocks for blob `{}`", block_ids.size(), blob_path); + } } void WriteBufferFromAzureBlobStorage::nextImpl() diff --git a/src/Disks/IO/WriteBufferFromAzureBlobStorage.h b/src/Disks/IO/WriteBufferFromAzureBlobStorage.h index 6e10c07b255..7d4081ad792 100644 --- a/src/Disks/IO/WriteBufferFromAzureBlobStorage.h +++ b/src/Disks/IO/WriteBufferFromAzureBlobStorage.h @@ -41,7 +41,7 @@ public: ~WriteBufferFromAzureBlobStorage() override; void nextImpl() override; - + void preFinalize() override; std::string getFileName() const override { return blob_path; } void sync() override { next(); } @@ -65,6 +65,9 @@ private: const std::string blob_path; const WriteSettings write_settings; + /// Track that prefinalize() is called only once + bool is_prefinalized = false; + AzureClientPtr blob_container_client; std::vector block_ids; From d8b06588b8387f3aa30898914341b8a94166437d Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Wed, 27 Mar 2024 20:14:52 +0300 Subject: [PATCH 042/150] Aggregator parallel merge cancellation --- .../AggregateFunctionUniq.h | 14 ++++---- .../Combinators/AggregateFunctionArray.h | 9 +++-- .../Combinators/AggregateFunctionIf.h | 9 +++-- .../Combinators/AggregateFunctionMerge.h | 9 +++-- .../Combinators/AggregateFunctionNull.h | 13 +++++-- .../Combinators/AggregateFunctionState.h | 9 +++-- src/AggregateFunctions/IAggregateFunction.h | 4 +-- src/AggregateFunctions/UniqExactSet.h | 19 +++++++--- src/Interpreters/Aggregator.cpp | 35 +++++++++++-------- src/Interpreters/Aggregator.h | 21 +++++++---- src/Processors/IProcessor.h | 4 +-- .../AggregatingInOrderTransform.cpp | 2 +- .../Transforms/AggregatingTransform.cpp | 22 ++++++++---- ...gingAggregatedMemoryEfficientTransform.cpp | 2 +- .../Transforms/MergingAggregatedTransform.cpp | 11 +++--- src/Processors/Transforms/RollupTransform.cpp | 2 +- 16 files changed, 122 insertions(+), 63 deletions(-) diff --git a/src/AggregateFunctions/AggregateFunctionUniq.h b/src/AggregateFunctions/AggregateFunctionUniq.h index 891f2ac4284..c53b5e3bdb7 100644 --- a/src/AggregateFunctions/AggregateFunctionUniq.h +++ b/src/AggregateFunctions/AggregateFunctionUniq.h @@ -457,9 +457,9 @@ public: detail::Adder::add(this->data(place), columns, num_args, row_begin, row_end, flags, null_map); } - bool isParallelizeMergePrepareNeeded() const override { return is_parallelize_merge_prepare_needed;} + bool isParallelizeMergePrepareNeeded() const override { return is_parallelize_merge_prepare_needed; } - void parallelizeMergePrepare(AggregateDataPtrs & places, ThreadPool & thread_pool) const override + void parallelizeMergePrepare(AggregateDataPtrs & places, ThreadPool & thread_pool, std::atomic * is_cancelled) const override { if constexpr (is_parallelize_merge_prepare_needed) { @@ -469,7 +469,7 @@ public: for (size_t i = 0; i < data_vec.size(); ++i) data_vec[i] = &this->data(places[i]).set; - DataSet::parallelizeMergePrepare(data_vec, thread_pool); + DataSet::parallelizeMergePrepare(data_vec, thread_pool, is_cancelled); } else { @@ -485,10 +485,10 @@ public: bool isAbleToParallelizeMerge() const override { return is_able_to_parallelize_merge; } bool canOptimizeEqualKeysRanges() const override { return !is_able_to_parallelize_merge; } - void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, ThreadPool & thread_pool, Arena *) const override + void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, ThreadPool & thread_pool, std::atomic * is_cancelled, Arena *) const override { if constexpr (is_able_to_parallelize_merge) - this->data(place).set.merge(this->data(rhs).set, &thread_pool); + this->data(place).set.merge(this->data(rhs).set, &thread_pool, is_cancelled); else this->data(place).set.merge(this->data(rhs).set); } @@ -579,10 +579,10 @@ public: bool isAbleToParallelizeMerge() const override { return is_able_to_parallelize_merge; } bool canOptimizeEqualKeysRanges() const override { return !is_able_to_parallelize_merge; } - void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, ThreadPool & thread_pool, Arena *) const override + void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, ThreadPool & thread_pool, std::atomic * is_cancelled, Arena *) const override { if constexpr (is_able_to_parallelize_merge) - this->data(place).set.merge(this->data(rhs).set, &thread_pool); + this->data(place).set.merge(this->data(rhs).set, &thread_pool, is_cancelled); else this->data(place).set.merge(this->data(rhs).set); } diff --git a/src/AggregateFunctions/Combinators/AggregateFunctionArray.h b/src/AggregateFunctions/Combinators/AggregateFunctionArray.h index 6b918926d0d..9dc5e274dab 100644 --- a/src/AggregateFunctions/Combinators/AggregateFunctionArray.h +++ b/src/AggregateFunctions/Combinators/AggregateFunctionArray.h @@ -144,9 +144,14 @@ public: bool isAbleToParallelizeMerge() const override { return nested_func->isAbleToParallelizeMerge(); } bool canOptimizeEqualKeysRanges() const override { return nested_func->canOptimizeEqualKeysRanges(); } - void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, ThreadPool & thread_pool, Arena * arena) const override + void parallelizeMergePrepare(AggregateDataPtrs & places, ThreadPool & thread_pool, std::atomic * is_cancelled) const override { - nested_func->merge(place, rhs, thread_pool, arena); + nested_func->parallelizeMergePrepare(places, thread_pool, is_cancelled); + } + + void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, ThreadPool & thread_pool, std::atomic * is_cancelled, Arena * arena) const override + { + nested_func->merge(place, rhs, thread_pool, is_cancelled, arena); } void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional version) const override diff --git a/src/AggregateFunctions/Combinators/AggregateFunctionIf.h b/src/AggregateFunctions/Combinators/AggregateFunctionIf.h index df23398a10d..91dcfa4db0b 100644 --- a/src/AggregateFunctions/Combinators/AggregateFunctionIf.h +++ b/src/AggregateFunctions/Combinators/AggregateFunctionIf.h @@ -167,9 +167,14 @@ public: bool isAbleToParallelizeMerge() const override { return nested_func->isAbleToParallelizeMerge(); } bool canOptimizeEqualKeysRanges() const override { return nested_func->canOptimizeEqualKeysRanges(); } - void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, ThreadPool & thread_pool, Arena * arena) const override + void parallelizeMergePrepare(AggregateDataPtrs & places, ThreadPool & thread_pool, std::atomic * is_cancelled) const override { - nested_func->merge(place, rhs, thread_pool, arena); + nested_func->parallelizeMergePrepare(places, thread_pool, is_cancelled); + } + + void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, ThreadPool & thread_pool, std::atomic * is_cancelled, Arena * arena) const override + { + nested_func->merge(place, rhs, thread_pool, is_cancelled, arena); } void mergeBatch( diff --git a/src/AggregateFunctions/Combinators/AggregateFunctionMerge.h b/src/AggregateFunctions/Combinators/AggregateFunctionMerge.h index 53c24bd60c1..5bc478116e0 100644 --- a/src/AggregateFunctions/Combinators/AggregateFunctionMerge.h +++ b/src/AggregateFunctions/Combinators/AggregateFunctionMerge.h @@ -113,9 +113,14 @@ public: bool isAbleToParallelizeMerge() const override { return nested_func->isAbleToParallelizeMerge(); } bool canOptimizeEqualKeysRanges() const override { return nested_func->canOptimizeEqualKeysRanges(); } - void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, ThreadPool & thread_pool, Arena * arena) const override + void parallelizeMergePrepare(AggregateDataPtrs & places, ThreadPool & thread_pool, std::atomic * is_cancelled) const override { - nested_func->merge(place, rhs, thread_pool, arena); + nested_func->parallelizeMergePrepare(places, thread_pool, is_cancelled); + } + + void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, ThreadPool & thread_pool, std::atomic * is_cancelled, Arena * arena) const override + { + nested_func->merge(place, rhs, thread_pool, is_cancelled, arena); } void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional version) const override diff --git a/src/AggregateFunctions/Combinators/AggregateFunctionNull.h b/src/AggregateFunctions/Combinators/AggregateFunctionNull.h index 72ab3cf5acb..eef5f8bf66b 100644 --- a/src/AggregateFunctions/Combinators/AggregateFunctionNull.h +++ b/src/AggregateFunctions/Combinators/AggregateFunctionNull.h @@ -154,9 +154,18 @@ public: bool isAbleToParallelizeMerge() const override { return nested_function->isAbleToParallelizeMerge(); } bool canOptimizeEqualKeysRanges() const override { return nested_function->canOptimizeEqualKeysRanges(); } - void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, ThreadPool & thread_pool, Arena * arena) const override + void parallelizeMergePrepare(AggregateDataPtrs & places, ThreadPool & thread_pool, std::atomic * is_cancelled) const override { - nested_function->merge(nestedPlace(place), nestedPlace(rhs), thread_pool, arena); + AggregateDataPtrs nested_places(places.begin(), places.end()); + for (auto & nested_place : nested_places) + nested_place = nestedPlace(nested_place); + + nested_function->parallelizeMergePrepare(nested_places, thread_pool, is_cancelled); + } + + void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, ThreadPool & thread_pool, std::atomic * is_cancelled, Arena * arena) const override + { + nested_function->merge(nestedPlace(place), nestedPlace(rhs), thread_pool, is_cancelled, arena); } void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional version) const override diff --git a/src/AggregateFunctions/Combinators/AggregateFunctionState.h b/src/AggregateFunctions/Combinators/AggregateFunctionState.h index b0ab6d49604..7b2933d42c9 100644 --- a/src/AggregateFunctions/Combinators/AggregateFunctionState.h +++ b/src/AggregateFunctions/Combinators/AggregateFunctionState.h @@ -94,9 +94,14 @@ public: bool isAbleToParallelizeMerge() const override { return nested_func->isAbleToParallelizeMerge(); } bool canOptimizeEqualKeysRanges() const override { return nested_func->canOptimizeEqualKeysRanges(); } - void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, ThreadPool & thread_pool, Arena * arena) const override + void parallelizeMergePrepare(AggregateDataPtrs & places, ThreadPool & thread_pool, std::atomic * is_cancelled) const override { - nested_func->merge(place, rhs, thread_pool, arena); + nested_func->parallelizeMergePrepare(places, thread_pool, is_cancelled); + } + + void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, ThreadPool & thread_pool, std::atomic * is_cancelled, Arena * arena) const override + { + nested_func->merge(place, rhs, thread_pool, is_cancelled, arena); } void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional version) const override diff --git a/src/AggregateFunctions/IAggregateFunction.h b/src/AggregateFunctions/IAggregateFunction.h index 499185320e6..b33d4b20a16 100644 --- a/src/AggregateFunctions/IAggregateFunction.h +++ b/src/AggregateFunctions/IAggregateFunction.h @@ -151,7 +151,7 @@ public: virtual bool isParallelizeMergePrepareNeeded() const { return false; } - virtual void parallelizeMergePrepare(AggregateDataPtrs & /*places*/, ThreadPool & /*thread_pool*/) const + virtual void parallelizeMergePrepare(AggregateDataPtrs & /*places*/, ThreadPool & /*thread_pool*/, std::atomic * /*is_cancelled*/) const { throw Exception(ErrorCodes::NOT_IMPLEMENTED, "parallelizeMergePrepare() with thread pool parameter isn't implemented for {} ", getName()); } @@ -168,7 +168,7 @@ public: /// Should be used only if isAbleToParallelizeMerge() returned true. virtual void - merge(AggregateDataPtr __restrict /*place*/, ConstAggregateDataPtr /*rhs*/, ThreadPool & /*thread_pool*/, Arena * /*arena*/) const + merge(AggregateDataPtr __restrict /*place*/, ConstAggregateDataPtr /*rhs*/, ThreadPool & /*thread_pool*/, std::atomic * /*is_cancelled*/, Arena * /*arena*/) const { throw Exception(ErrorCodes::NOT_IMPLEMENTED, "merge() with thread pool parameter isn't implemented for {} ", getName()); } diff --git a/src/AggregateFunctions/UniqExactSet.h b/src/AggregateFunctions/UniqExactSet.h index 131c59b9ed6..18b0b830cb8 100644 --- a/src/AggregateFunctions/UniqExactSet.h +++ b/src/AggregateFunctions/UniqExactSet.h @@ -37,7 +37,7 @@ public: /// In merge, if one of the lhs and rhs is twolevelset and the other is singlelevelset, then the singlelevelset will need to convertToTwoLevel(). /// It's not in parallel and will cost extra large time if the thread_num is large. /// This method will convert all the SingleLevelSet to TwoLevelSet in parallel if the hashsets are not all singlelevel or not all twolevel. - static void parallelizeMergePrepare(const std::vector & data_vec, ThreadPool & thread_pool) + static void parallelizeMergePrepare(const std::vector & data_vec, ThreadPool & thread_pool, std::atomic * is_cancelled) { UInt64 single_level_set_num = 0; UInt64 all_single_hash_size = 0; @@ -63,7 +63,7 @@ public: try { auto data_vec_atomic_index = std::make_shared(0); - auto thread_func = [data_vec, data_vec_atomic_index, thread_group = CurrentThread::getGroup()]() + auto thread_func = [data_vec, data_vec_atomic_index, is_cancelled, thread_group = CurrentThread::getGroup()]() { SCOPE_EXIT_SAFE( if (thread_group) @@ -76,6 +76,9 @@ public: while (true) { + if (is_cancelled && is_cancelled->load(std::memory_order_seq_cst)) + return; + const auto i = data_vec_atomic_index->fetch_add(1); if (i >= data_vec.size()) return; @@ -96,7 +99,7 @@ public: } } - auto merge(const UniqExactSet & other, ThreadPool * thread_pool = nullptr) + auto merge(const UniqExactSet & other, ThreadPool * thread_pool = nullptr, std::atomic * is_cancelled = nullptr) { if (isSingleLevel() && other.isTwoLevel()) convertToTwoLevel(); @@ -113,7 +116,12 @@ public: if (!thread_pool) { for (size_t i = 0; i < rhs.NUM_BUCKETS; ++i) + { + if (is_cancelled && is_cancelled->load(std::memory_order_seq_cst)) + return; + lhs.impls[i].merge(rhs.impls[i]); + } } else { @@ -121,7 +129,7 @@ public: { auto next_bucket_to_merge = std::make_shared(0); - auto thread_func = [&lhs, &rhs, next_bucket_to_merge, thread_group = CurrentThread::getGroup()]() + auto thread_func = [&lhs, &rhs, next_bucket_to_merge, is_cancelled, thread_group = CurrentThread::getGroup()]() { SCOPE_EXIT_SAFE( if (thread_group) @@ -133,6 +141,9 @@ public: while (true) { + if (is_cancelled && is_cancelled->load(std::memory_order_seq_cst)) + return; + const auto bucket = next_bucket_to_merge->fetch_add(1); if (bucket >= rhs.NUM_BUCKETS) return; diff --git a/src/Interpreters/Aggregator.cpp b/src/Interpreters/Aggregator.cpp index 7f3b961a598..837b4e47fba 100644 --- a/src/Interpreters/Aggregator.cpp +++ b/src/Interpreters/Aggregator.cpp @@ -1435,13 +1435,14 @@ void NO_INLINE Aggregator::mergeOnIntervalWithoutKey( AggregatedDataVariants & data_variants, size_t row_begin, size_t row_end, - const AggregateColumnsConstData & aggregate_columns_data) const + const AggregateColumnsConstData & aggregate_columns_data, + std::atomic * is_cancelled) const { /// `data_variants` will destroy the states of aggregate functions in the destructor data_variants.aggregator = this; data_variants.init(AggregatedDataVariants::Type::without_key); - mergeWithoutKeyStreamsImpl(data_variants, row_begin, row_end, aggregate_columns_data); + mergeWithoutKeyStreamsImpl(data_variants, row_begin, row_end, aggregate_columns_data, is_cancelled); } @@ -2636,7 +2637,8 @@ void NO_INLINE Aggregator::mergeDataOnlyExistingKeysImpl( void NO_INLINE Aggregator::mergeWithoutKeyDataImpl( - ManyAggregatedDataVariants & non_empty_data) const + ManyAggregatedDataVariants & non_empty_data, + std::atomic * is_cancelled) const { ThreadPool thread_pool{CurrentMetrics::AggregatorThreads, CurrentMetrics::AggregatorThreadsActive, CurrentMetrics::AggregatorThreadsScheduled, params.max_threads}; @@ -2652,7 +2654,7 @@ void NO_INLINE Aggregator::mergeWithoutKeyDataImpl( for (size_t result_num = 0; result_num < size; ++result_num) data_vec.emplace_back(non_empty_data[result_num]->without_key + offsets_of_aggregate_states[i]); - aggregate_functions[i]->parallelizeMergePrepare(data_vec, thread_pool); + aggregate_functions[i]->parallelizeMergePrepare(data_vec, thread_pool, is_cancelled); } } @@ -2668,6 +2670,7 @@ void NO_INLINE Aggregator::mergeWithoutKeyDataImpl( res_data + offsets_of_aggregate_states[i], current_data + offsets_of_aggregate_states[i], thread_pool, + is_cancelled, res->aggregates_pool); else aggregate_functions[i]->merge( @@ -2954,17 +2957,19 @@ void NO_INLINE Aggregator::mergeStreamsImpl( void NO_INLINE Aggregator::mergeBlockWithoutKeyStreamsImpl( Block block, - AggregatedDataVariants & result) const + AggregatedDataVariants & result, + std::atomic * is_cancelled) const { AggregateColumnsConstData aggregate_columns = params.makeAggregateColumnsData(block); - mergeWithoutKeyStreamsImpl(result, 0, block.rows(), aggregate_columns); + mergeWithoutKeyStreamsImpl(result, 0, block.rows(), aggregate_columns, is_cancelled); } void NO_INLINE Aggregator::mergeWithoutKeyStreamsImpl( AggregatedDataVariants & result, size_t row_begin, size_t row_end, - const AggregateColumnsConstData & aggregate_columns_data) const + const AggregateColumnsConstData & aggregate_columns_data, + std::atomic * is_cancelled) const { using namespace CurrentMetrics; @@ -2986,12 +2991,12 @@ void NO_INLINE Aggregator::mergeWithoutKeyStreamsImpl( if (aggregate_functions[i]->isParallelizeMergePrepareNeeded()) { std::vector data_vec{res + offsets_of_aggregate_states[i], (*aggregate_columns_data[i])[row]}; - aggregate_functions[i]->parallelizeMergePrepare(data_vec, thread_pool); + aggregate_functions[i]->parallelizeMergePrepare(data_vec, thread_pool, is_cancelled); } if (aggregate_functions[i]->isAbleToParallelizeMerge()) aggregate_functions[i]->merge( - res + offsets_of_aggregate_states[i], (*aggregate_columns_data[i])[row], thread_pool, result.aggregates_pool); + res + offsets_of_aggregate_states[i], (*aggregate_columns_data[i])[row], thread_pool, is_cancelled, result.aggregates_pool); else aggregate_functions[i]->merge( res + offsets_of_aggregate_states[i], (*aggregate_columns_data[i])[row], result.aggregates_pool); @@ -3000,7 +3005,7 @@ void NO_INLINE Aggregator::mergeWithoutKeyStreamsImpl( } -bool Aggregator::mergeOnBlock(Block block, AggregatedDataVariants & result, bool & no_more_keys) const +bool Aggregator::mergeOnBlock(Block block, AggregatedDataVariants & result, bool & no_more_keys, std::atomic * is_cancelled) const { /// `result` will destroy the states of aggregate functions in the destructor result.aggregator = this; @@ -3022,7 +3027,7 @@ bool Aggregator::mergeOnBlock(Block block, AggregatedDataVariants & result, bool } if (result.type == AggregatedDataVariants::Type::without_key || block.info.is_overflows) - mergeBlockWithoutKeyStreamsImpl(std::move(block), result); + mergeBlockWithoutKeyStreamsImpl(std::move(block), result, is_cancelled); #define M(NAME, IS_TWO_LEVEL) \ else if (result.type == AggregatedDataVariants::Type::NAME) \ mergeStreamsImpl(std::move(block), result.aggregates_pool, *result.NAME, result.NAME->data, result.without_key, result.consecutive_keys_cache_stats, no_more_keys); @@ -3070,7 +3075,7 @@ bool Aggregator::mergeOnBlock(Block block, AggregatedDataVariants & result, bool } -void Aggregator::mergeBlocks(BucketToBlocks bucket_to_blocks, AggregatedDataVariants & result, size_t max_threads) +void Aggregator::mergeBlocks(BucketToBlocks bucket_to_blocks, AggregatedDataVariants & result, size_t max_threads, std::atomic * is_cancelled) { if (bucket_to_blocks.empty()) return; @@ -3183,7 +3188,7 @@ void Aggregator::mergeBlocks(BucketToBlocks bucket_to_blocks, AggregatedDataVari break; if (result.type == AggregatedDataVariants::Type::without_key || block.info.is_overflows) - mergeBlockWithoutKeyStreamsImpl(std::move(block), result); + mergeBlockWithoutKeyStreamsImpl(std::move(block), result, is_cancelled); #define M(NAME, IS_TWO_LEVEL) \ else if (result.type == AggregatedDataVariants::Type::NAME) \ @@ -3202,7 +3207,7 @@ void Aggregator::mergeBlocks(BucketToBlocks bucket_to_blocks, AggregatedDataVari } -Block Aggregator::mergeBlocks(BlocksList & blocks, bool final) +Block Aggregator::mergeBlocks(BlocksList & blocks, bool final, std::atomic * is_cancelled) { if (blocks.empty()) return {}; @@ -3264,7 +3269,7 @@ Block Aggregator::mergeBlocks(BlocksList & blocks, bool final) bucket_num = -1; if (result.type == AggregatedDataVariants::Type::without_key || is_overflows) - mergeBlockWithoutKeyStreamsImpl(std::move(block), result); + mergeBlockWithoutKeyStreamsImpl(std::move(block), result, is_cancelled); #define M(NAME, IS_TWO_LEVEL) \ else if (result.type == AggregatedDataVariants::Type::NAME) \ diff --git a/src/Interpreters/Aggregator.h b/src/Interpreters/Aggregator.h index e339047063c..4bce700a099 100644 --- a/src/Interpreters/Aggregator.h +++ b/src/Interpreters/Aggregator.h @@ -266,7 +266,10 @@ public: AggregateFunctionInstruction * aggregate_instructions) const; /// Used for aggregate projection. - bool mergeOnBlock(Block block, AggregatedDataVariants & result, bool & no_more_keys) const; + bool mergeOnBlock(Block block, + AggregatedDataVariants & result, + bool & no_more_keys, + std::atomic * is_cancelled) const; void mergeOnBlockSmall( AggregatedDataVariants & result, @@ -279,7 +282,8 @@ public: AggregatedDataVariants & data_variants, size_t row_begin, size_t row_end, - const AggregateColumnsConstData & aggregate_columns_data) const; + const AggregateColumnsConstData & aggregate_columns_data, + std::atomic * is_cancelled) const; /** Convert the aggregation data structure into a block. * If overflow_row = true, then aggregates for rows that are not included in max_rows_to_group_by are put in the first block. @@ -294,13 +298,13 @@ public: using BucketToBlocks = std::map; /// Merge partially aggregated blocks separated to buckets into one data structure. - void mergeBlocks(BucketToBlocks bucket_to_blocks, AggregatedDataVariants & result, size_t max_threads); + void mergeBlocks(BucketToBlocks bucket_to_blocks, AggregatedDataVariants & result, size_t max_threads, std::atomic * is_cancelled); /// Merge several partially aggregated blocks into one. /// Precondition: for all blocks block.info.is_overflows flag must be the same. /// (either all blocks are from overflow data or none blocks are). /// The resulting block has the same value of is_overflows flag. - Block mergeBlocks(BlocksList & blocks, bool final); + Block mergeBlocks(BlocksList & blocks, bool final, std::atomic * is_cancelled); /** Split block with partially-aggregated data to many blocks, as if two-level method of aggregation was used. * This is needed to simplify merging of that data with other results, that are already two-level. @@ -486,7 +490,8 @@ private: Arena * arena) const; void mergeWithoutKeyDataImpl( - ManyAggregatedDataVariants & non_empty_data) const; + ManyAggregatedDataVariants & non_empty_data, + std::atomic * is_cancelled) const; template void mergeSingleLevelDataImpl( @@ -597,13 +602,15 @@ private: void mergeBlockWithoutKeyStreamsImpl( Block block, - AggregatedDataVariants & result) const; + AggregatedDataVariants & result, + std::atomic * is_cancelled) const; void mergeWithoutKeyStreamsImpl( AggregatedDataVariants & result, size_t row_begin, size_t row_end, - const AggregateColumnsConstData & aggregate_columns_data) const; + const AggregateColumnsConstData & aggregate_columns_data, + std::atomic * is_cancelled) const; template void mergeBucketImpl( diff --git a/src/Processors/IProcessor.h b/src/Processors/IProcessor.h index c6bef186877..56b4509fe00 100644 --- a/src/Processors/IProcessor.h +++ b/src/Processors/IProcessor.h @@ -369,6 +369,8 @@ public: protected: virtual void onCancel() {} + std::atomic is_cancelled{false}; + private: /// For: /// - elapsed_us @@ -378,8 +380,6 @@ private: /// - output_wait_elapsed_us friend class ExecutingGraph; - std::atomic is_cancelled{false}; - std::string processor_description; /// For processors_profile_log diff --git a/src/Processors/Transforms/AggregatingInOrderTransform.cpp b/src/Processors/Transforms/AggregatingInOrderTransform.cpp index a39a0db1311..f959b2b01b4 100644 --- a/src/Processors/Transforms/AggregatingInOrderTransform.cpp +++ b/src/Processors/Transforms/AggregatingInOrderTransform.cpp @@ -160,7 +160,7 @@ void AggregatingInOrderTransform::consume(Chunk chunk) if (group_by_key) params->aggregator.mergeOnBlockSmall(variants, key_begin, key_end, aggregate_columns_data, key_columns_raw); else - params->aggregator.mergeOnIntervalWithoutKey(variants, key_begin, key_end, aggregate_columns_data); + params->aggregator.mergeOnIntervalWithoutKey(variants, key_begin, key_end, aggregate_columns_data, &is_cancelled); } else { diff --git a/src/Processors/Transforms/AggregatingTransform.cpp b/src/Processors/Transforms/AggregatingTransform.cpp index ea5c525d5f2..767448edc64 100644 --- a/src/Processors/Transforms/AggregatingTransform.cpp +++ b/src/Processors/Transforms/AggregatingTransform.cpp @@ -285,7 +285,11 @@ class ConvertingAggregatedToChunksTransform : public IProcessor { public: ConvertingAggregatedToChunksTransform(AggregatingTransformParamsPtr params_, ManyAggregatedDataVariantsPtr data_, size_t num_threads_) - : IProcessor({}, {params_->getHeader()}), params(std::move(params_)), data(std::move(data_)), num_threads(num_threads_) + : IProcessor({}, {params_->getHeader()}) + , params(std::move(params_)) + , data(std::move(data_)) + , shared_data(std::make_shared()) + , num_threads(num_threads_) { } @@ -346,8 +350,7 @@ public: for (auto & input : inputs) input.close(); - if (shared_data) - shared_data->is_cancelled.store(true); + shared_data->is_cancelled.store(true); return Status::Finished; } @@ -372,6 +375,11 @@ public: return prepareTwoLevel(); } + void onCancel() override + { + shared_data->is_cancelled.store(true, std::memory_order_seq_cst); + } + private: IProcessor::Status preparePushToOutput() { @@ -464,7 +472,7 @@ private: if (first->type == AggregatedDataVariants::Type::without_key || params->params.overflow_row) { - params->aggregator.mergeWithoutKeyDataImpl(*data); + params->aggregator.mergeWithoutKeyDataImpl(*data, &shared_data->is_cancelled); auto block = params->aggregator.prepareBlockAndFillWithoutKey( *first, params->final, first->type != AggregatedDataVariants::Type::without_key); @@ -506,7 +514,7 @@ private: void createSources() { AggregatedDataVariantsPtr & first = data->at(0); - shared_data = std::make_shared(); + processors.reserve(num_threads); for (size_t thread = 0; thread < num_threads; ++thread) { @@ -684,7 +692,7 @@ void AggregatingTransform::consume(Chunk chunk) { auto block = getInputs().front().getHeader().cloneWithColumns(chunk.detachColumns()); block = materializeBlock(block); - if (!params->aggregator.mergeOnBlock(block, variants, no_more_keys)) + if (!params->aggregator.mergeOnBlock(block, variants, no_more_keys, &is_cancelled)) is_consume_finished = true; } else @@ -704,7 +712,7 @@ void AggregatingTransform::initGenerate() if (variants.empty() && params->params.keys_size == 0 && !params->params.empty_result_for_aggregation_by_empty_set) { if (params->params.only_merge) - params->aggregator.mergeOnBlock(getInputs().front().getHeader(), variants, no_more_keys); + params->aggregator.mergeOnBlock(getInputs().front().getHeader(), variants, no_more_keys, &is_cancelled); else params->aggregator.executeOnBlock(getInputs().front().getHeader(), variants, key_columns, aggregate_columns, no_more_keys); } diff --git a/src/Processors/Transforms/MergingAggregatedMemoryEfficientTransform.cpp b/src/Processors/Transforms/MergingAggregatedMemoryEfficientTransform.cpp index a92e2253314..3bfd7874ac7 100644 --- a/src/Processors/Transforms/MergingAggregatedMemoryEfficientTransform.cpp +++ b/src/Processors/Transforms/MergingAggregatedMemoryEfficientTransform.cpp @@ -363,7 +363,7 @@ void MergingAggregatedBucketTransform::transform(Chunk & chunk) res_info->chunk_num = chunks_to_merge->chunk_num; chunk.setChunkInfo(std::move(res_info)); - auto block = params->aggregator.mergeBlocks(blocks_list, params->final); + auto block = params->aggregator.mergeBlocks(blocks_list, params->final, &is_cancelled); if (!required_sort_description.empty()) sortBlock(block, required_sort_description); diff --git a/src/Processors/Transforms/MergingAggregatedTransform.cpp b/src/Processors/Transforms/MergingAggregatedTransform.cpp index e4955d06859..64207093568 100644 --- a/src/Processors/Transforms/MergingAggregatedTransform.cpp +++ b/src/Processors/Transforms/MergingAggregatedTransform.cpp @@ -39,11 +39,10 @@ void MergingAggregatedTransform::consume(Chunk chunk) if (const auto * agg_info = typeid_cast(info.get())) { /** If the remote servers used a two-level aggregation method, - * then blocks will contain information about the number of the bucket. - * Then the calculations can be parallelized by buckets. - * We decompose the blocks to the bucket numbers indicated in them. - */ - + * then blocks will contain information about the number of the bucket. + * Then the calculations can be parallelized by buckets. + * We decompose the blocks to the bucket numbers indicated in them. + */ auto block = getInputPort().getHeader().cloneWithColumns(chunk.getColumns()); block.info.is_overflows = agg_info->is_overflows; block.info.bucket_num = agg_info->bucket_num; @@ -73,7 +72,7 @@ Chunk MergingAggregatedTransform::generate() next_block = blocks.begin(); /// TODO: this operation can be made async. Add async for IAccumulatingTransform. - params->aggregator.mergeBlocks(std::move(bucket_to_blocks), data_variants, max_threads); + params->aggregator.mergeBlocks(std::move(bucket_to_blocks), data_variants, max_threads, &is_cancelled); blocks = params->aggregator.convertToBlocks(data_variants, params->final, max_threads); next_block = blocks.begin(); } diff --git a/src/Processors/Transforms/RollupTransform.cpp b/src/Processors/Transforms/RollupTransform.cpp index a5d67fb2f15..20ee91a203a 100644 --- a/src/Processors/Transforms/RollupTransform.cpp +++ b/src/Processors/Transforms/RollupTransform.cpp @@ -57,7 +57,7 @@ Chunk GroupByModifierTransform::merge(Chunks && chunks, bool is_input, bool fina for (auto & chunk : chunks) blocks.emplace_back(header.cloneWithColumns(chunk.detachColumns())); - auto current_block = is_input ? params->aggregator.mergeBlocks(blocks, final) : output_aggregator->mergeBlocks(blocks, final); + auto current_block = is_input ? params->aggregator.mergeBlocks(blocks, final, &is_cancelled) : output_aggregator->mergeBlocks(blocks, final, &is_cancelled); auto num_rows = current_block.rows(); return Chunk(current_block.getColumns(), num_rows); } From 6a7d48b932f67e4191d280e17d1e18151489318b Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Wed, 27 Mar 2024 17:26:12 +0000 Subject: [PATCH 043/150] Fix aliases. --- src/Analyzer/HashUtils.h | 16 +++++++++------- src/Analyzer/IQueryTreeNode.cpp | 4 ++-- src/Analyzer/IQueryTreeNode.h | 2 +- 3 files changed, 12 insertions(+), 10 deletions(-) diff --git a/src/Analyzer/HashUtils.h b/src/Analyzer/HashUtils.h index 3727ea1ea14..8673361d499 100644 --- a/src/Analyzer/HashUtils.h +++ b/src/Analyzer/HashUtils.h @@ -11,35 +11,37 @@ namespace DB * Example of usage: * std::unordered_map map; */ -template +template struct QueryTreeNodeWithHash { QueryTreeNodeWithHash(QueryTreeNodePtrType node_) /// NOLINT : node(std::move(node_)) - , hash(node->getTreeHash()) + , hash(node->getTreeHash({.compare_aliases = compare_aliases})) {} QueryTreeNodePtrType node = nullptr; CityHash_v1_0_2::uint128 hash; }; -template -inline bool operator==(const QueryTreeNodeWithHash & lhs, const QueryTreeNodeWithHash & rhs) +template +inline bool operator==(const QueryTreeNodeWithHash & lhs, const QueryTreeNodeWithHash & rhs) { - return lhs.hash == rhs.hash && lhs.node->isEqual(*rhs.node); + return lhs.hash == rhs.hash && lhs.node->isEqual(*rhs.node, {.compare_aliases = compare_aliases}); } -template -inline bool operator!=(const QueryTreeNodeWithHash & lhs, const QueryTreeNodeWithHash & rhs) +template +inline bool operator!=(const QueryTreeNodeWithHash & lhs, const QueryTreeNodeWithHash & rhs) { return !(lhs == rhs); } using QueryTreeNodePtrWithHash = QueryTreeNodeWithHash; +using QueryTreeNodePtrWithHashWithoutAlias = QueryTreeNodeWithHash; using QueryTreeNodeRawPtrWithHash = QueryTreeNodeWithHash; using QueryTreeNodeConstRawPtrWithHash = QueryTreeNodeWithHash; using QueryTreeNodePtrWithHashSet = std::unordered_set; +using QueryTreeNodePtrWithHashWithoutAliasSet = std::unordered_set; using QueryTreeNodeConstRawPtrWithHashSet = std::unordered_set; template diff --git a/src/Analyzer/IQueryTreeNode.cpp b/src/Analyzer/IQueryTreeNode.cpp index d61cb0ffab1..7815b93c3ac 100644 --- a/src/Analyzer/IQueryTreeNode.cpp +++ b/src/Analyzer/IQueryTreeNode.cpp @@ -164,7 +164,7 @@ bool IQueryTreeNode::isEqual(const IQueryTreeNode & rhs, CompareOptions compare_ return true; } -IQueryTreeNode::Hash IQueryTreeNode::getTreeHash() const +IQueryTreeNode::Hash IQueryTreeNode::getTreeHash(CompareOptions compare_options) const { /** Compute tree hash with this node as root. * @@ -201,7 +201,7 @@ IQueryTreeNode::Hash IQueryTreeNode::getTreeHash() const } hash_state.update(static_cast(node_to_process->getNodeType())); - if (!node_to_process->alias.empty()) + if (compare_options.compare_aliases && !node_to_process->alias.empty()) { hash_state.update(node_to_process->alias.size()); hash_state.update(node_to_process->alias); diff --git a/src/Analyzer/IQueryTreeNode.h b/src/Analyzer/IQueryTreeNode.h index b07aa2d31b0..92e34616c4d 100644 --- a/src/Analyzer/IQueryTreeNode.h +++ b/src/Analyzer/IQueryTreeNode.h @@ -114,7 +114,7 @@ public: * Alias of query tree node is part of query tree hash. * Original AST is not part of query tree hash. */ - Hash getTreeHash() const; + Hash getTreeHash(CompareOptions compare_options = { .compare_aliases = true }) const; /// Get a deep copy of the query tree QueryTreeNodePtr clone() const; From 6127b0b89962d01b34ec4dd658afb21c7e7fd237 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Wed, 27 Mar 2024 17:43:27 +0000 Subject: [PATCH 044/150] Revert "Fix broken only_analyze in QueryAnalysisPass" This reverts commit c88cb6be9dc6dc9b877c2c30d1dd92d67e325b5f. --- src/Analyzer/Passes/QueryAnalysisPass.cpp | 119 ++++++++++++---------- 1 file changed, 68 insertions(+), 51 deletions(-) diff --git a/src/Analyzer/Passes/QueryAnalysisPass.cpp b/src/Analyzer/Passes/QueryAnalysisPass.cpp index a4893373d6c..afc51d27a0b 100644 --- a/src/Analyzer/Passes/QueryAnalysisPass.cpp +++ b/src/Analyzer/Passes/QueryAnalysisPass.cpp @@ -2066,75 +2066,92 @@ void QueryAnalyzer::evaluateScalarSubqueryIfNeeded(QueryTreeNodePtr & node, Iden io.pipeline.setProgressCallback(context->getProgressCallback()); io.pipeline.setProcessListElement(context->getProcessListElement()); - Block block; - - while (block.rows() == 0 && executor.pull(block)) + if (only_analyze) { - } - - if (block.rows() == 0) - { - auto types = interpreter->getSampleBlock().getDataTypes(); - if (types.size() != 1) - types = {std::make_shared(types)}; - - auto & type = types[0]; - if (!type->isNullable()) + /// If query is only analyzed, then constants are not correct. + scalar_block = interpreter->getSampleBlock(); + for (auto & column : scalar_block) { - if (!type->canBeInsideNullable()) - throw Exception(ErrorCodes::INCORRECT_RESULT_OF_SCALAR_SUBQUERY, - "Scalar subquery returned empty result of type {} which cannot be Nullable", - type->getName()); - - type = makeNullable(type); + if (column.column->empty()) + { + auto mut_col = column.column->cloneEmpty(); + mut_col->insertDefault(); + column.column = std::move(mut_col); + } } - - auto scalar_column = type->createColumn(); - scalar_column->insert(Null()); - scalar_block.insert({std::move(scalar_column), type, "null"}); } else { - if (block.rows() != 1) - throw Exception(ErrorCodes::INCORRECT_RESULT_OF_SCALAR_SUBQUERY, "Scalar subquery returned more than one row"); + Block block; - Block tmp_block; - while (tmp_block.rows() == 0 && executor.pull(tmp_block)) + while (block.rows() == 0 && executor.pull(block)) { } - if (tmp_block.rows() != 0) - throw Exception(ErrorCodes::INCORRECT_RESULT_OF_SCALAR_SUBQUERY, "Scalar subquery returned more than one row"); - - block = materializeBlock(block); - size_t columns = block.columns(); - - if (columns == 1) + if (block.rows() == 0) { - auto & column = block.getByPosition(0); - /// Here we wrap type to nullable if we can. - /// It is needed cause if subquery return no rows, it's result will be Null. - /// In case of many columns, do not check it cause tuple can't be nullable. - if (!column.type->isNullable() && column.type->canBeInsideNullable()) + auto types = interpreter->getSampleBlock().getDataTypes(); + if (types.size() != 1) + types = {std::make_shared(types)}; + + auto & type = types[0]; + if (!type->isNullable()) { - column.type = makeNullable(column.type); - column.column = makeNullable(column.column); + if (!type->canBeInsideNullable()) + throw Exception(ErrorCodes::INCORRECT_RESULT_OF_SCALAR_SUBQUERY, + "Scalar subquery returned empty result of type {} which cannot be Nullable", + type->getName()); + + type = makeNullable(type); } - scalar_block = block; + auto scalar_column = type->createColumn(); + scalar_column->insert(Null()); + scalar_block.insert({std::move(scalar_column), type, "null"}); } else { - /** Make unique column names for tuple. - * - * Example: SELECT (SELECT 2 AS x, x) - */ - makeUniqueColumnNamesInBlock(block); + if (block.rows() != 1) + throw Exception(ErrorCodes::INCORRECT_RESULT_OF_SCALAR_SUBQUERY, "Scalar subquery returned more than one row"); - scalar_block.insert({ - ColumnTuple::create(block.getColumns()), - std::make_shared(block.getDataTypes(), block.getNames()), - "tuple"}); + Block tmp_block; + while (tmp_block.rows() == 0 && executor.pull(tmp_block)) + { + } + + if (tmp_block.rows() != 0) + throw Exception(ErrorCodes::INCORRECT_RESULT_OF_SCALAR_SUBQUERY, "Scalar subquery returned more than one row"); + + block = materializeBlock(block); + size_t columns = block.columns(); + + if (columns == 1) + { + auto & column = block.getByPosition(0); + /// Here we wrap type to nullable if we can. + /// It is needed cause if subquery return no rows, it's result will be Null. + /// In case of many columns, do not check it cause tuple can't be nullable. + if (!column.type->isNullable() && column.type->canBeInsideNullable()) + { + column.type = makeNullable(column.type); + column.column = makeNullable(column.column); + } + + scalar_block = block; + } + else + { + /** Make unique column names for tuple. + * + * Example: SELECT (SELECT 2 AS x, x) + */ + makeUniqueColumnNamesInBlock(block); + + scalar_block.insert({ + ColumnTuple::create(block.getColumns()), + std::make_shared(block.getDataTypes(), block.getNames()), + "tuple"}); + } } } From e849c48e21b13a698552a371acac8201b48bc802 Mon Sep 17 00:00:00 2001 From: Han Fei Date: Wed, 27 Mar 2024 18:56:13 +0100 Subject: [PATCH 045/150] add a test --- src/Access/Common/AccessType.h | 2 +- src/Common/FailPoint.cpp | 4 +- src/Interpreters/InterpreterSystemQuery.cpp | 9 ++ src/Parsers/ASTSystemQuery.cpp | 1 + src/Parsers/ASTSystemQuery.h | 1 + src/Parsers/ParserSystemQuery.cpp | 1 + .../ReplicatedMergeTreeRestartingThread.cpp | 7 ++ .../MergeTree/ReplicatedMergeTreeSink.cpp | 5 +- src/Storages/StorageReplicatedMergeTree.cpp | 2 + tests/integration/test_quorum_inserts/test.py | 82 +++++++++++++++++++ 10 files changed, 111 insertions(+), 3 deletions(-) diff --git a/src/Access/Common/AccessType.h b/src/Access/Common/AccessType.h index 6095f8ce6d3..83b50ce96c3 100644 --- a/src/Access/Common/AccessType.h +++ b/src/Access/Common/AccessType.h @@ -205,7 +205,7 @@ enum class AccessType M(SYSTEM_FLUSH, "", GROUP, SYSTEM) \ M(SYSTEM_THREAD_FUZZER, "SYSTEM START THREAD FUZZER, SYSTEM STOP THREAD FUZZER, START THREAD FUZZER, STOP THREAD FUZZER", GLOBAL, SYSTEM) \ M(SYSTEM_UNFREEZE, "SYSTEM UNFREEZE", GLOBAL, SYSTEM) \ - M(SYSTEM_FAILPOINT, "SYSTEM ENABLE FAILPOINT, SYSTEM DISABLE FAILPOINT", GLOBAL, SYSTEM) \ + M(SYSTEM_FAILPOINT, "SYSTEM ENABLE FAILPOINT, SYSTEM DISABLE FAILPOINT, SYSTEM WAIT FAILPOINT", GLOBAL, SYSTEM) \ M(SYSTEM_LISTEN, "SYSTEM START LISTEN, SYSTEM STOP LISTEN", GLOBAL, SYSTEM) \ M(SYSTEM_JEMALLOC, "SYSTEM JEMALLOC PURGE, SYSTEM JEMALLOC ENABLE PROFILE, SYSTEM JEMALLOC DISABLE PROFILE, SYSTEM JEMALLOC FLUSH PROFILE", GLOBAL, SYSTEM) \ M(SYSTEM, "", GROUP, ALL) /* allows to execute SYSTEM {SHUTDOWN|RELOAD CONFIG|...} */ \ diff --git a/src/Common/FailPoint.cpp b/src/Common/FailPoint.cpp index 9e551c8f2cd..2434c6004ad 100644 --- a/src/Common/FailPoint.cpp +++ b/src/Common/FailPoint.cpp @@ -50,7 +50,9 @@ static struct InitFiu REGULAR(check_table_query_delay_for_part) \ REGULAR(dummy_failpoint) \ REGULAR(prefetched_reader_pool_failpoint) \ - PAUSEABLE_ONCE(dummy_pausable_failpoint_once) \ + PAUSEABLE_ONCE(replicated_merge_tree_insert_retry_pause) \ + PAUSEABLE_ONCE(finish_set_quorum_failed_parts) \ + PAUSEABLE_ONCE(finish_clean_quorum_failed_parts) \ PAUSEABLE(dummy_pausable_failpoint) \ ONCE(execute_query_calling_empty_set_result_func_on_exception) diff --git a/src/Interpreters/InterpreterSystemQuery.cpp b/src/Interpreters/InterpreterSystemQuery.cpp index 1cbc9c49631..5f8b0ae01c8 100644 --- a/src/Interpreters/InterpreterSystemQuery.cpp +++ b/src/Interpreters/InterpreterSystemQuery.cpp @@ -755,6 +755,14 @@ BlockIO InterpreterSystemQuery::execute() FailPointInjection::disableFailPoint(query.fail_point_name); break; } + case Type::WAIT_FAILPOINT: + { + getContext()->checkAccess(AccessType::SYSTEM_FAILPOINT); + LOG_TRACE(log, "waiting for failpoint {}", query.fail_point_name); + FailPointInjection::pauseFailPoint(query.fail_point_name); + LOG_TRACE(log, "finished failpoint {}", query.fail_point_name); + break; + } case Type::RESET_COVERAGE: { getContext()->checkAccess(AccessType::SYSTEM); @@ -1454,6 +1462,7 @@ AccessRightsElements InterpreterSystemQuery::getRequiredAccessForDDLOnCluster() case Type::STOP_THREAD_FUZZER: case Type::START_THREAD_FUZZER: case Type::ENABLE_FAILPOINT: + case Type::WAIT_FAILPOINT: case Type::DISABLE_FAILPOINT: case Type::RESET_COVERAGE: case Type::UNKNOWN: diff --git a/src/Parsers/ASTSystemQuery.cpp b/src/Parsers/ASTSystemQuery.cpp index 9215353e2b3..ed122b2b191 100644 --- a/src/Parsers/ASTSystemQuery.cpp +++ b/src/Parsers/ASTSystemQuery.cpp @@ -364,6 +364,7 @@ void ASTSystemQuery::formatImpl(const FormatSettings & settings, FormatState & s } case Type::ENABLE_FAILPOINT: case Type::DISABLE_FAILPOINT: + case Type::WAIT_FAILPOINT: { settings.ostr << ' '; print_identifier(fail_point_name); diff --git a/src/Parsers/ASTSystemQuery.h b/src/Parsers/ASTSystemQuery.h index 18a804ebc45..65c3f0eb328 100644 --- a/src/Parsers/ASTSystemQuery.h +++ b/src/Parsers/ASTSystemQuery.h @@ -87,6 +87,7 @@ public: UNFREEZE, ENABLE_FAILPOINT, DISABLE_FAILPOINT, + WAIT_FAILPOINT, SYNC_FILESYSTEM_CACHE, STOP_PULLING_REPLICATION_LOG, START_PULLING_REPLICATION_LOG, diff --git a/src/Parsers/ParserSystemQuery.cpp b/src/Parsers/ParserSystemQuery.cpp index 81f9332c730..b660f947290 100644 --- a/src/Parsers/ParserSystemQuery.cpp +++ b/src/Parsers/ParserSystemQuery.cpp @@ -263,6 +263,7 @@ bool ParserSystemQuery::parseImpl(IParser::Pos & pos, ASTPtr & node, Expected & } case Type::ENABLE_FAILPOINT: case Type::DISABLE_FAILPOINT: + case Type::WAIT_FAILPOINT: { ASTPtr ast; if (ParserIdentifier{}.parse(pos, ast, expected)) diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp index b79418da791..35f355d1d9b 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp @@ -4,6 +4,7 @@ #include #include #include +#include #include #include #include @@ -24,6 +25,11 @@ namespace ErrorCodes extern const int REPLICA_IS_ALREADY_ACTIVE; } +namespace FailPoints +{ + extern const char finish_clean_quorum_failed_parts[]; +}; + /// Used to check whether it's us who set node `is_active`, or not. static String generateActiveNodeIdentifier() { @@ -241,6 +247,7 @@ void ReplicatedMergeTreeRestartingThread::removeFailedQuorumParts() storage.queue.removeFailedQuorumPart(part->info); } } + FailPointInjection::disableFailPoint(FailPoints::finish_clean_quorum_failed_parts); } diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp index 91f85cbd87d..7fcf6b971bb 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp @@ -30,6 +30,7 @@ namespace FailPoints extern const char replicated_merge_tree_commit_zk_fail_after_op[]; extern const char replicated_merge_tree_insert_quorum_fail_0[]; extern const char replicated_merge_tree_commit_zk_fail_when_recovering_from_hw_fault[]; + extern const char replicated_merge_tree_insert_retry_pause[]; } namespace ErrorCodes @@ -945,6 +946,7 @@ std::pair, bool> ReplicatedMergeTreeSinkImpl:: new_retry_controller.retryLoop([&] { fiu_do_on(FailPoints::replicated_merge_tree_commit_zk_fail_when_recovering_from_hw_fault, { zookeeper->forceFailureBeforeOperation(); }); + FailPointInjection::pauseFailPoint(FailPoints::replicated_merge_tree_insert_retry_pause); zookeeper->setKeeper(storage.getZooKeeper()); node_exists = zookeeper->exists(fs::path(storage.replica_path) / "parts" / part->name); if (isQuorumEnabled()) @@ -956,7 +958,8 @@ std::pair, bool> ReplicatedMergeTreeSinkImpl:: { LOG_INFO(log, "Part {} fails to commit and will not retry or clean garbage. Restarting Thread will do everything.", part->name); transaction.clear(); - return CommitRetryContext::ERROR; + /// `quorum/failed_parts/part_name` exists because table is read only for a while, So we return table is read only. + throw Exception(ErrorCodes::TABLE_IS_READ_ONLY, "Table is in readonly mode due to shutdown: replica_path={}", storage.replica_path); } if (node_exists) diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index ce6735d9176..0ff5d4794e0 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -141,6 +141,7 @@ namespace FailPoints { extern const char replicated_queue_fail_next_entry[]; extern const char replicated_queue_unfail_entries[]; + extern const char finish_set_quorum_failed_parts[]; } namespace ErrorCodes @@ -2221,6 +2222,7 @@ bool StorageReplicatedMergeTree::executeFetch(LogEntry & entry, bool need_to_che if (code == Coordination::Error::ZOK) { LOG_DEBUG(log, "Marked quorum for part {} as failed.", entry.new_part_name); + FailPointInjection::disableFailPoint(FailPoints::finish_set_quorum_failed_parts); queue.removeFailedQuorumPart(part_info); return true; } diff --git a/tests/integration/test_quorum_inserts/test.py b/tests/integration/test_quorum_inserts/test.py index 1276a6079f0..0cc53558521 100644 --- a/tests/integration/test_quorum_inserts/test.py +++ b/tests/integration/test_quorum_inserts/test.py @@ -1,7 +1,9 @@ +import concurrent import time import pytest from helpers.cluster import ClickHouseCluster +from helpers.network import PartitionManager from helpers.test_tools import TSV cluster = ClickHouseCluster(__file__) @@ -361,3 +363,83 @@ def test_insert_quorum_with_ttl(started_cluster): ) zero.query("DROP TABLE IF EXISTS test_insert_quorum_with_ttl ON CLUSTER cluster") + +def test_insert_quorum_with_keeper_loss_connection(): + zero.query("DROP TABLE IF EXISTS test_insert_quorum_with_keeper_fail ON CLUSTER cluster") + create_query = ( + "CREATE TABLE test_insert_quorum_with_keeper_loss" + "(a Int8, d Date) " + "Engine = ReplicatedMergeTree('/clickhouse/tables/{table}', '{replica}') " + "ORDER BY a " + ) + + print("Create Replicated table with two replicas") + zero.query(create_query) + first.query(create_query) + + print("Stop fetches for test_insert_quorum_with_keeper_loss at first replica.") + first.query("SYSTEM STOP FETCHES test_insert_quorum_with_keeper_loss") + + print("Inject insert fail and retry pause for server zero") + zero.query("SYSTEM ENABLE FAILPOINT replicated_merge_tree_commit_zk_fail_after_op") + zero.query("SYSTEM ENABLE FAILPOINT replicated_merge_tree_insert_retry_pause") + + with concurrent.futures.ThreadPoolExecutor(max_workers=5) as executor: + print("Inject zero") + insert_future = executor.submit( + lambda: zero.query( + "INSERT INTO test_insert_quorum_with_keeper_loss(a,d) VALUES(1, '2011-01-01')", + settings={"insert_quorum_timeout": 150000}, + ) + ) + + print("Inject zero keeper") + pm = PartitionManager() + pm.drop_instance_zk_connections(zero) + + print("Wait zero is not active") + retries = 0 + zk = cluster.get_kazoo_client("zoo1") + while True: + if ( + zk.exists( + "/clickhouse/tables/test_insert_quorum_with_keeper_loss/replicas/zero/is_active" + ) + is None + ): + break + print("replica is still active") + time.sleep(1) + retries += 1 + if retries == 120: + raise Exception("Can not wait cluster replica inactive") + + print("Inject first wait for quorum fail") + first.query("SYSTEM ENABLE FAILPOINT finish_set_quorum_failed_parts") + quorum_fail_future = executor.submit( + lambda: first.query("SYSTEM WAIT FAILPOINT finish_set_quorum_failed_parts", timeout=300) + ) + print("Start fetches at first node") + first.query("SYSTEM START FETCHES test_insert_quorum_with_keeper_loss") + + concurrent.futures.wait([quorum_fail_future]) + + assert(quorum_fail_future.exception() is None) + + print("Inject zero wait for clean quorum fail parts") + zero.query("SYSTEM ENABLE FAILPOINT finish_clean_quorum_failed_parts") + clean_quorum_fail_parts_future = executor.submit( + lambda: first.query("SYSTEM WAIT FAILPOINT finish_clean_quorum_failed_parts", timeout=300) + ) + print("Restore zero keeper") + pm.restore_instance_zk_connections(zero) + concurrent.futures.wait([clean_quorum_fail_parts_future]) + + assert(clean_quorum_fail_parts_future.exception() is None) + + print("Disable retry pause") + zero.query("SYSTEM DISABLE FAILPOINT replicated_merge_tree_insert_retry_pause") + concurrent.futures.wait([insert_future]) + assert(insert_future.exception() is not None) + assert(not zero.contains_in_log("LOGICAL_ERROR")) + assert(zero.contains_in_log("fails to commit and will not retry or clean garbage")) From 920ba121febb0cb5f823d4b4d3897e4079bf1fcf Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Wed, 27 Mar 2024 19:17:41 +0000 Subject: [PATCH 046/150] Fix scalars for CREATE AS SELECT for analyzer. --- src/Analyzer/Passes/QueryAnalysisPass.cpp | 13 +++++++------ .../03032_scalars_create_as_select.reference | 1 + .../0_stateless/03032_scalars_create_as_select.sql | 2 ++ 3 files changed, 10 insertions(+), 6 deletions(-) create mode 100644 tests/queries/0_stateless/03032_scalars_create_as_select.reference create mode 100644 tests/queries/0_stateless/03032_scalars_create_as_select.sql diff --git a/src/Analyzer/Passes/QueryAnalysisPass.cpp b/src/Analyzer/Passes/QueryAnalysisPass.cpp index afc51d27a0b..b2fc74cafd4 100644 --- a/src/Analyzer/Passes/QueryAnalysisPass.cpp +++ b/src/Analyzer/Passes/QueryAnalysisPass.cpp @@ -2059,13 +2059,9 @@ void QueryAnalyzer::evaluateScalarSubqueryIfNeeded(QueryTreeNodePtr & node, Iden subquery_context->setSetting("use_structure_from_insertion_table_in_table_functions", false); auto options = SelectQueryOptions(QueryProcessingStage::Complete, scope.subquery_depth, true /*is_subquery*/); + options.only_analyze = only_analyze; auto interpreter = std::make_unique(node->toAST(), subquery_context, subquery_context->getViewSource(), options); - auto io = interpreter->execute(); - PullingAsyncPipelineExecutor executor(io.pipeline); - io.pipeline.setProgressCallback(context->getProgressCallback()); - io.pipeline.setProcessListElement(context->getProcessListElement()); - if (only_analyze) { /// If query is only analyzed, then constants are not correct. @@ -2082,6 +2078,11 @@ void QueryAnalyzer::evaluateScalarSubqueryIfNeeded(QueryTreeNodePtr & node, Iden } else { + auto io = interpreter->execute(); + PullingAsyncPipelineExecutor executor(io.pipeline); + io.pipeline.setProgressCallback(context->getProgressCallback()); + io.pipeline.setProcessListElement(context->getProcessListElement()); + Block block; while (block.rows() == 0 && executor.pull(block)) @@ -2193,7 +2194,7 @@ void QueryAnalyzer::evaluateScalarSubqueryIfNeeded(QueryTreeNodePtr & node, Iden auto & nearest_query_scope_query_node = nearest_query_scope->scope_node->as(); auto & mutable_context = nearest_query_scope_query_node.getMutableContext(); - auto scalar_query_hash_string = DB::toString(node_with_hash.hash); + auto scalar_query_hash_string = DB::toString(node_with_hash.hash) + (only_analyze ? "_analyze" : ""); if (mutable_context->hasQueryContext()) mutable_context->getQueryContext()->addScalar(scalar_query_hash_string, scalar_block); diff --git a/tests/queries/0_stateless/03032_scalars_create_as_select.reference b/tests/queries/0_stateless/03032_scalars_create_as_select.reference new file mode 100644 index 00000000000..2e73a0708a2 --- /dev/null +++ b/tests/queries/0_stateless/03032_scalars_create_as_select.reference @@ -0,0 +1 @@ +['a','b'] diff --git a/tests/queries/0_stateless/03032_scalars_create_as_select.sql b/tests/queries/0_stateless/03032_scalars_create_as_select.sql new file mode 100644 index 00000000000..5b20b8cf6f9 --- /dev/null +++ b/tests/queries/0_stateless/03032_scalars_create_as_select.sql @@ -0,0 +1,2 @@ +create or replace table query_run_metric_arrays engine Memory as with (with (select groupUniqArrayArray(['a', 'b']) from numbers(1)) as all_names select all_names) as all_metrics select all_metrics; +select * from query_run_metric_arrays; From 3ce122ef2061ed7b40766b1bf0211309b89fa621 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Wed, 27 Mar 2024 19:23:16 +0000 Subject: [PATCH 047/150] Fixing tests. --- src/Analyzer/HashUtils.h | 4 ++-- src/Analyzer/Passes/QueryAnalysisPass.cpp | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Analyzer/HashUtils.h b/src/Analyzer/HashUtils.h index 8673361d499..e0b96ea9e58 100644 --- a/src/Analyzer/HashUtils.h +++ b/src/Analyzer/HashUtils.h @@ -52,8 +52,8 @@ using QueryTreeNodeConstRawPtrWithHashMap = std::unordered_map -struct std::hash> +template +struct std::hash> { size_t operator()(const DB::QueryTreeNodeWithHash & node_with_hash) const { diff --git a/src/Analyzer/Passes/QueryAnalysisPass.cpp b/src/Analyzer/Passes/QueryAnalysisPass.cpp index a1832b2db3f..8b687922288 100644 --- a/src/Analyzer/Passes/QueryAnalysisPass.cpp +++ b/src/Analyzer/Passes/QueryAnalysisPass.cpp @@ -776,7 +776,7 @@ struct IdentifierResolveScope /// Table expression node to data std::unordered_map table_expression_node_to_data; - QueryTreeNodePtrWithHashSet nullable_group_by_keys; + QueryTreeNodePtrWithHashWithoutAliasSet nullable_group_by_keys; /// Here we count the number of nullable GROUP BY keys we met resolving expression. /// E.g. for a query `SELECT tuple(tuple(number)) FROM numbers(10) GROUP BY (number, tuple(number)) with cube` /// both `number` and `tuple(number)` would be in nullable_group_by_keys. From ce2e5a7d13c2e750f8b7d7fecb4a644b2568fc42 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Wed, 27 Mar 2024 19:25:07 +0000 Subject: [PATCH 048/150] Fixing tests. --- src/Analyzer/HashUtils.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Analyzer/HashUtils.h b/src/Analyzer/HashUtils.h index e0b96ea9e58..eb6aac88fe9 100644 --- a/src/Analyzer/HashUtils.h +++ b/src/Analyzer/HashUtils.h @@ -55,7 +55,7 @@ using QueryTreeNodeConstRawPtrWithHashMap = std::unordered_map struct std::hash> { - size_t operator()(const DB::QueryTreeNodeWithHash & node_with_hash) const + size_t operator()(const DB::QueryTreeNodeWithHash & node_with_hash) const { return node_with_hash.hash.low64; } From 343b51ccfa13c25648a6004954b9442f8de3f86b Mon Sep 17 00:00:00 2001 From: Han Fei Date: Wed, 27 Mar 2024 21:27:07 +0100 Subject: [PATCH 049/150] fix style --- tests/integration/test_quorum_inserts/test.py | 36 +++++++++---------- 1 file changed, 17 insertions(+), 19 deletions(-) diff --git a/tests/integration/test_quorum_inserts/test.py b/tests/integration/test_quorum_inserts/test.py index 0cc53558521..b842a54741e 100644 --- a/tests/integration/test_quorum_inserts/test.py +++ b/tests/integration/test_quorum_inserts/test.py @@ -364,8 +364,11 @@ def test_insert_quorum_with_ttl(started_cluster): zero.query("DROP TABLE IF EXISTS test_insert_quorum_with_ttl ON CLUSTER cluster") + def test_insert_quorum_with_keeper_loss_connection(): - zero.query("DROP TABLE IF EXISTS test_insert_quorum_with_keeper_fail ON CLUSTER cluster") + zero.query( + "DROP TABLE IF EXISTS test_insert_quorum_with_keeper_fail ON CLUSTER cluster" + ) create_query = ( "CREATE TABLE test_insert_quorum_with_keeper_loss" "(a Int8, d Date) " @@ -373,19 +376,15 @@ def test_insert_quorum_with_keeper_loss_connection(): "ORDER BY a " ) - print("Create Replicated table with two replicas") zero.query(create_query) first.query(create_query) - print("Stop fetches for test_insert_quorum_with_keeper_loss at first replica.") first.query("SYSTEM STOP FETCHES test_insert_quorum_with_keeper_loss") - print("Inject insert fail and retry pause for server zero") zero.query("SYSTEM ENABLE FAILPOINT replicated_merge_tree_commit_zk_fail_after_op") zero.query("SYSTEM ENABLE FAILPOINT replicated_merge_tree_insert_retry_pause") with concurrent.futures.ThreadPoolExecutor(max_workers=5) as executor: - print("Inject zero") insert_future = executor.submit( lambda: zero.query( "INSERT INTO test_insert_quorum_with_keeper_loss(a,d) VALUES(1, '2011-01-01')", @@ -393,11 +392,9 @@ def test_insert_quorum_with_keeper_loss_connection(): ) ) - print("Inject zero keeper") pm = PartitionManager() pm.drop_instance_zk_connections(zero) - print("Wait zero is not active") retries = 0 zk = cluster.get_kazoo_client("zoo1") while True: @@ -414,32 +411,33 @@ def test_insert_quorum_with_keeper_loss_connection(): if retries == 120: raise Exception("Can not wait cluster replica inactive") - print("Inject first wait for quorum fail") first.query("SYSTEM ENABLE FAILPOINT finish_set_quorum_failed_parts") quorum_fail_future = executor.submit( - lambda: first.query("SYSTEM WAIT FAILPOINT finish_set_quorum_failed_parts", timeout=300) + lambda: first.query( + "SYSTEM WAIT FAILPOINT finish_set_quorum_failed_parts", timeout=300 + ) ) - print("Start fetches at first node") first.query("SYSTEM START FETCHES test_insert_quorum_with_keeper_loss") concurrent.futures.wait([quorum_fail_future]) - assert(quorum_fail_future.exception() is None) + assert quorum_fail_future.exception() is None - print("Inject zero wait for clean quorum fail parts") zero.query("SYSTEM ENABLE FAILPOINT finish_clean_quorum_failed_parts") clean_quorum_fail_parts_future = executor.submit( - lambda: first.query("SYSTEM WAIT FAILPOINT finish_clean_quorum_failed_parts", timeout=300) + lambda: first.query( + "SYSTEM WAIT FAILPOINT finish_clean_quorum_failed_parts", timeout=300 + ) ) - print("Restore zero keeper") pm.restore_instance_zk_connections(zero) concurrent.futures.wait([clean_quorum_fail_parts_future]) - assert(clean_quorum_fail_parts_future.exception() is None) + assert clean_quorum_fail_parts_future.exception() is None - print("Disable retry pause") zero.query("SYSTEM DISABLE FAILPOINT replicated_merge_tree_insert_retry_pause") concurrent.futures.wait([insert_future]) - assert(insert_future.exception() is not None) - assert(not zero.contains_in_log("LOGICAL_ERROR")) - assert(zero.contains_in_log("fails to commit and will not retry or clean garbage")) + assert insert_future.exception() is not None + assert not zero.contains_in_log("LOGICAL_ERROR") + assert zero.contains_in_log( + "fails to commit and will not retry or clean garbage" + ) From 15566f68c11c938407ef8c8982cd312573bf0a73 Mon Sep 17 00:00:00 2001 From: Han Fei Date: Wed, 27 Mar 2024 22:58:48 +0100 Subject: [PATCH 050/150] fix test --- tests/queries/0_stateless/01271_show_privileges.reference | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/01271_show_privileges.reference b/tests/queries/0_stateless/01271_show_privileges.reference index b18ae8a99be..b1237189cb3 100644 --- a/tests/queries/0_stateless/01271_show_privileges.reference +++ b/tests/queries/0_stateless/01271_show_privileges.reference @@ -155,7 +155,7 @@ SYSTEM FLUSH ASYNC INSERT QUEUE ['FLUSH ASYNC INSERT QUEUE'] GLOBAL SYSTEM FLUSH SYSTEM FLUSH [] \N SYSTEM SYSTEM THREAD FUZZER ['SYSTEM START THREAD FUZZER','SYSTEM STOP THREAD FUZZER','START THREAD FUZZER','STOP THREAD FUZZER'] GLOBAL SYSTEM SYSTEM UNFREEZE ['SYSTEM UNFREEZE'] GLOBAL SYSTEM -SYSTEM FAILPOINT ['SYSTEM ENABLE FAILPOINT','SYSTEM DISABLE FAILPOINT'] GLOBAL SYSTEM +SYSTEM FAILPOINT ['SYSTEM ENABLE FAILPOINT','SYSTEM DISABLE FAILPOINT','SYSTEM WAIT FAILPOINT'] GLOBAL SYSTEM SYSTEM LISTEN ['SYSTEM START LISTEN','SYSTEM STOP LISTEN'] GLOBAL SYSTEM SYSTEM JEMALLOC ['SYSTEM JEMALLOC PURGE','SYSTEM JEMALLOC ENABLE PROFILE','SYSTEM JEMALLOC DISABLE PROFILE','SYSTEM JEMALLOC FLUSH PROFILE'] GLOBAL SYSTEM SYSTEM [] \N ALL From c39cd75dff6b31c05dc179636a7d6d06e1903bf4 Mon Sep 17 00:00:00 2001 From: avogar Date: Wed, 27 Mar 2024 22:15:29 +0000 Subject: [PATCH 051/150] Support Variant in JSONExtract functions --- src/Functions/FunctionsJSON.h | 44 ++++++++++++++++++- .../03034_json_extract_variant.reference | 5 +++ .../03034_json_extract_variant.sql | 6 +++ 3 files changed, 54 insertions(+), 1 deletion(-) create mode 100644 tests/queries/0_stateless/03034_json_extract_variant.reference create mode 100644 tests/queries/0_stateless/03034_json_extract_variant.sql diff --git a/src/Functions/FunctionsJSON.h b/src/Functions/FunctionsJSON.h index 8a193785f87..af7cdeccba4 100644 --- a/src/Functions/FunctionsJSON.h +++ b/src/Functions/FunctionsJSON.h @@ -10,7 +10,7 @@ #include #include -#include +//#include #include #include @@ -21,6 +21,7 @@ #include #include #include +#include #include #include @@ -35,6 +36,8 @@ #include #include #include +#include +#include #include #include @@ -1234,6 +1237,35 @@ struct JSONExtractTree std::unique_ptr value; }; + class VariantNode : public Node + { + public: + VariantNode(std::vector> variant_nodes_, std::vector order_) : variant_nodes(std::move(variant_nodes_)), order(std::move(order_)) { } + + bool insertResultToColumn(IColumn & dest, const Element & element) override + { + auto & column_variant = assert_cast(dest); + for (size_t i : order) + { + auto & variant = column_variant.getVariantByGlobalDiscriminator(i); + if (variant_nodes[i]->insertResultToColumn(variant, element)) + { + column_variant.getLocalDiscriminators().push_back(column_variant.localDiscriminatorByGlobal(i)); + column_variant.getOffsets().push_back(variant.size() - 1); + return true; + } + } + + return false; + } + + private: + std::vector> variant_nodes; + /// Order in which we should try variants nodes. + /// For example, String should be always the last one. + std::vector order; + }; + static std::unique_ptr build(const char * function_name, const DataTypePtr & type) { switch (type->getTypeId()) @@ -1310,6 +1342,16 @@ struct JSONExtractTree const auto & value_type = map_type.getValueType(); return std::make_unique(build(function_name, key_type), build(function_name, value_type)); } + case TypeIndex::Variant: + { + const auto & variant_type = static_cast(*type); + const auto & variants = variant_type.getVariants(); + std::vector> variant_nodes; + variant_nodes.reserve(variants.size()); + for (const auto & variant : variants) + variant_nodes.push_back(build(function_name, variant)); + return std::make_unique(std::move(variant_nodes), SerializationVariant::getVariantsDeserializeTextOrder(variants)); + } default: throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Function {} doesn't support the return type schema: {}", diff --git a/tests/queries/0_stateless/03034_json_extract_variant.reference b/tests/queries/0_stateless/03034_json_extract_variant.reference new file mode 100644 index 00000000000..8d78d639fd2 --- /dev/null +++ b/tests/queries/0_stateless/03034_json_extract_variant.reference @@ -0,0 +1,5 @@ +42 UInt32 +Hello String +[1,2,3] Array(UInt32) +{'a':42,'b':'Hello','c':[1,2,3]} +[('a',42),('b','Hello'),('c',[1,2,3])] Array(Tuple(String, Variant(Array(UInt32), String, UInt32))) diff --git a/tests/queries/0_stateless/03034_json_extract_variant.sql b/tests/queries/0_stateless/03034_json_extract_variant.sql new file mode 100644 index 00000000000..54d5bed9582 --- /dev/null +++ b/tests/queries/0_stateless/03034_json_extract_variant.sql @@ -0,0 +1,6 @@ +select JSONExtract('{"a" : 42}', 'a', 'Variant(String, UInt32)') as v, variantType(v); +select JSONExtract('{"a" : "Hello"}', 'a', 'Variant(String, UInt32)') as v, variantType(v); +select JSONExtract('{"a" : [1, 2, 3]}', 'a', 'Variant(String, Array(UInt32))') as v, variantType(v); +select JSONExtract('{"obj" : {"a" : 42, "b" : "Hello", "c" : [1,2,3]}}', 'obj', 'Map(String, Variant(UInt32, String, Array(UInt32)))'); +select JSONExtractKeysAndValues('{"a" : 42, "b" : "Hello", "c" : [1,2,3]}', 'Variant(UInt32, String, Array(UInt32))') as v, toTypeName(v); + From 0772536a4c6addf790fca729611feeb430a0d63a Mon Sep 17 00:00:00 2001 From: avogar Date: Wed, 27 Mar 2024 22:26:44 +0000 Subject: [PATCH 052/150] Add examples in docs --- docs/en/sql-reference/data-types/variant.md | 34 +++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/docs/en/sql-reference/data-types/variant.md b/docs/en/sql-reference/data-types/variant.md index 7d10d4b0e97..668a1b260f4 100644 --- a/docs/en/sql-reference/data-types/variant.md +++ b/docs/en/sql-reference/data-types/variant.md @@ -395,3 +395,37 @@ SELECT v, variantType(v) FROM test ORDER by v; │ 100 │ UInt32 │ └─────┴────────────────┘ ``` + +## JSONExtract functions with Variant + +All `JSONExtract*` functions support `Variant` type: + +```sql +SELECT JSONExtract('{"a" : [1, 2, 3]}', 'a', 'Variant(UInt32, String, Array(UInt32))') AS variant, variantType(variant) AS variant_type; +``` + +```text +┌─variant─┬─variant_type──┐ +│ [1,2,3] │ Array(UInt32) │ +└─────────┴───────────────┘ +``` + +```sql +SELECT JSONExtract('{"obj" : {"a" : 42, "b" : "Hello", "c" : [1,2,3]}}', 'obj', 'Map(String, Variant(UInt32, String, Array(UInt32)))') AS map_of_variants, mapApply((k, v) -> (k, variantType(v)), map_of_variants) AS map_of_variant_types +``` + +```text +┌─map_of_variants──────────────────┬─map_of_variant_types────────────────────────────┐ +│ {'a':42,'b':'Hello','c':[1,2,3]} │ {'a':'UInt32','b':'String','c':'Array(UInt32)'} │ +└──────────────────────────────────┴─────────────────────────────────────────────────┘ +``` + +```sql +SELECT JSONExtractKeysAndValues('{"a" : 42, "b" : "Hello", "c" : [1,2,3]}', 'Variant(UInt32, String, Array(UInt32))') AS variants, arrayMap(x -> (x.1, variantType(x.2)), variants) AS variant_types +``` + +```text +┌─variants───────────────────────────────┬─variant_types─────────────────────────────────────────┐ +│ [('a',42),('b','Hello'),('c',[1,2,3])] │ [('a','UInt32'),('b','String'),('c','Array(UInt32)')] │ +└────────────────────────────────────────┴───────────────────────────────────────────────────────┘ +``` From 75be74874e15edf718b205f5ee2836ce7c5e54b6 Mon Sep 17 00:00:00 2001 From: avogar Date: Wed, 27 Mar 2024 22:27:27 +0000 Subject: [PATCH 053/150] Fix include --- src/Functions/FunctionsJSON.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Functions/FunctionsJSON.h b/src/Functions/FunctionsJSON.h index af7cdeccba4..33dd40aed90 100644 --- a/src/Functions/FunctionsJSON.h +++ b/src/Functions/FunctionsJSON.h @@ -10,7 +10,7 @@ #include #include -//#include +#include #include #include From 72f3871bd364a24d4a0b58d8a8078ebb069dd0be Mon Sep 17 00:00:00 2001 From: Nikolay Degterinsky <43110995+evillique@users.noreply.github.com> Date: Thu, 28 Mar 2024 01:58:17 +0100 Subject: [PATCH 054/150] Fix clickhouse-test --- tests/clickhouse-test | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/clickhouse-test b/tests/clickhouse-test index 10851d23481..2f1a59dceb6 100755 --- a/tests/clickhouse-test +++ b/tests/clickhouse-test @@ -451,8 +451,7 @@ def get_processlist_size(args): """ SELECT count() - FROM - FROM system.processes + FROM clusterAllReplicas('test_cluster_database_replicated', system.processes) WHERE query NOT LIKE '%system.processes%' """, ).strip() From 54348e5e8c0636ae731b434a641cf7d1fc0f80e0 Mon Sep 17 00:00:00 2001 From: Nikolay Degterinsky <43110995+evillique@users.noreply.github.com> Date: Thu, 28 Mar 2024 06:27:48 +0100 Subject: [PATCH 055/150] Fix logs saving in DatabaseReplicated tests --- docker/test/stateless/run.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docker/test/stateless/run.sh b/docker/test/stateless/run.sh index bac9d8df7a9..c6b3a9a99f0 100755 --- a/docker/test/stateless/run.sh +++ b/docker/test/stateless/run.sh @@ -257,10 +257,10 @@ do echo "$err" [[ "0" != "${#err}" ]] && failed_to_save_logs=1 if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then - err=$( { clickhouse-client -q "select * from system.$table format TSVWithNamesAndTypes" | zstd --threads=0 > /test_output/$table.1.tsv.zst; } 2>&1 ) + err=$( { clickhouse-client -q --port 19000 "select * from system.$table format TSVWithNamesAndTypes" | zstd --threads=0 > /test_output/$table.1.tsv.zst; } 2>&1 ) echo "$err" [[ "0" != "${#err}" ]] && failed_to_save_logs=1 - err=$( { clickhouse-client -q "select * from system.$table format TSVWithNamesAndTypes" | zstd --threads=0 > /test_output/$table.2.tsv.zst; } 2>&1 ) + err=$( { clickhouse-client -q --port 29000 "select * from system.$table format TSVWithNamesAndTypes" | zstd --threads=0 > /test_output/$table.2.tsv.zst; } 2>&1 ) echo "$err" [[ "0" != "${#err}" ]] && failed_to_save_logs=1 fi From dae3a9e3896c0da5df4331776efdff6346c390f4 Mon Sep 17 00:00:00 2001 From: Nikolay Degterinsky <43110995+evillique@users.noreply.github.com> Date: Thu, 28 Mar 2024 06:31:46 +0100 Subject: [PATCH 056/150] Fix --- docker/test/stateless/run.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docker/test/stateless/run.sh b/docker/test/stateless/run.sh index c6b3a9a99f0..b9ed0561a48 100755 --- a/docker/test/stateless/run.sh +++ b/docker/test/stateless/run.sh @@ -257,10 +257,10 @@ do echo "$err" [[ "0" != "${#err}" ]] && failed_to_save_logs=1 if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then - err=$( { clickhouse-client -q --port 19000 "select * from system.$table format TSVWithNamesAndTypes" | zstd --threads=0 > /test_output/$table.1.tsv.zst; } 2>&1 ) + err=$( { clickhouse-client --port 19000 -q "select * from system.$table format TSVWithNamesAndTypes" | zstd --threads=0 > /test_output/$table.1.tsv.zst; } 2>&1 ) echo "$err" [[ "0" != "${#err}" ]] && failed_to_save_logs=1 - err=$( { clickhouse-client -q --port 29000 "select * from system.$table format TSVWithNamesAndTypes" | zstd --threads=0 > /test_output/$table.2.tsv.zst; } 2>&1 ) + err=$( { clickhouse-client --port 29000 -q "select * from system.$table format TSVWithNamesAndTypes" | zstd --threads=0 > /test_output/$table.2.tsv.zst; } 2>&1 ) echo "$err" [[ "0" != "${#err}" ]] && failed_to_save_logs=1 fi From afbb3f8a506b29ef085b67d936cb2dc1321fa20f Mon Sep 17 00:00:00 2001 From: Smita Kulkarni Date: Thu, 28 Mar 2024 09:26:00 +0100 Subject: [PATCH 057/150] Added wait before commit blocks --- src/Disks/IO/WriteBufferFromAzureBlobStorage.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Disks/IO/WriteBufferFromAzureBlobStorage.cpp b/src/Disks/IO/WriteBufferFromAzureBlobStorage.cpp index 9432cdf9fef..fe64415191c 100644 --- a/src/Disks/IO/WriteBufferFromAzureBlobStorage.cpp +++ b/src/Disks/IO/WriteBufferFromAzureBlobStorage.cpp @@ -150,6 +150,7 @@ void WriteBufferFromAzureBlobStorage::finalizeImpl() if (!block_ids.empty()) { + task_tracker->waitAll(); auto block_blob_client = blob_container_client->GetBlockBlobClient(blob_path); execWithRetry([&](){ block_blob_client.CommitBlockList(block_ids); }, max_unexpected_write_error_retries); LOG_TRACE(log, "Committed {} blocks for blob `{}`", block_ids.size(), blob_path); From 241e68c44b234785aa783dd0888d15a578a3a5ed Mon Sep 17 00:00:00 2001 From: Blargian Date: Thu, 28 Mar 2024 11:22:28 +0100 Subject: [PATCH 058/150] Add documentation for hasToken functions --- .../functions/other-functions.md | 48 +++++- .../functions/string-search-functions.md | 151 ++++++++++++++++++ 2 files changed, 196 insertions(+), 3 deletions(-) diff --git a/docs/en/sql-reference/functions/other-functions.md b/docs/en/sql-reference/functions/other-functions.md index e7fca31483a..2638a5a650b 100644 --- a/docs/en/sql-reference/functions/other-functions.md +++ b/docs/en/sql-reference/functions/other-functions.md @@ -543,12 +543,54 @@ You can get similar result by using the [ternary operator](../../sql-reference/f Returns 1 if the Float32 and Float64 argument is NaN, otherwise this function 0. -## hasColumnInTable(\[‘hostname’\[, ‘username’\[, ‘password’\]\],\] ‘database’, ‘table’, ‘column’) +## hasColumnInTable + +Given the database name, the table name, and the column name as constant strings, returns 1 if the given column exists, otherwise 0. + +**Syntax** + +```sql +hasColumnInTable(\[‘hostname’\[, ‘username’\[, ‘password’\]\],\] ‘database’, ‘table’, ‘column’) +``` + +**Parameters** + +- `database` : name of the database. [String literal](../syntax#syntax-string-literal) +- `table` : name of the table. [String literal](../syntax#syntax-string-literal) +- `column` : name of the column. [String literal](../syntax#syntax-string-literal) +- `hostname` : remote server name to perform the check on. [String literal](../syntax#syntax-string-literal) +- `username` : username for remote server. [String literal](../syntax#syntax-string-literal) +- `password` : password for remote server. [String literal](../syntax#syntax-string-literal) + +**Returned value** + +- `1` if the given column exists. +- `0`, otherwise. + +**Implementation details** -Given the database name, the table name, and the column name as constant strings, returns 1 if the given column exists, otherwise 0. If parameter `hostname` is given, the check is performed on a remote server. -If the table does not exist, an exception is thrown. For elements in a nested data structure, the function checks for the existence of a column. For the nested data structure itself, the function returns 0. +**Example** + +Query: + +```sql +SELECT hasColumnInTable('system','metrics','metric') +``` + +```response +1 +``` + +```sql +SELECT hasColumnInTable('system','metrics','non-existing_column') +``` + +```response +0 +``` + ## bar Builds a bar chart. diff --git a/docs/en/sql-reference/functions/string-search-functions.md b/docs/en/sql-reference/functions/string-search-functions.md index 22f879c62ae..818626fc4fc 100644 --- a/docs/en/sql-reference/functions/string-search-functions.md +++ b/docs/en/sql-reference/functions/string-search-functions.md @@ -685,3 +685,154 @@ Like [hasSubsequence](#hasSubsequence) but assumes `haystack` and `needle` are U ## hasSubsequenceCaseInsensitiveUTF8 Like [hasSubsequenceUTF8](#hasSubsequenceUTF8) but searches case-insensitively. + +## hasToken + +Returns 1 if a given token is present in a haystack, or 0 otherwise. + +**Syntax** + +```sql +hasToken(haystack, token) +``` + +**Parameters** + +- `haystack`: String in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal). +- `token`: Maximal length substring between two non alphanumeric ASCII characters (or boundaries of haystack). + +**Returned value** + +- 1, if the token is present in the haystack. +- 0, if the token is not present. + +**Implementation details** + +Token must be a constant string. Supported by tokenbf_v1 index specialization. + +**Example** + +Query: + +```sql +SELECT hasToken('Hello World','Hello'); +``` + +```response +1 +``` + +## hasTokenOrNull + +Returns 1 if a given token is present, 0 if not present, and null if the token is ill-formed. + +**Syntax** + +```sql +hasTokenOrNull(haystack, token) +``` + +**Parameters** + +- `haystack`: String in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal). +- `token`: Maximal length substring between two non alphanumeric ASCII characters (or boundaries of haystack). + +**Returned value** + +- 1, if the token is present in the haystack. +- 0, if the token is not present in the haystack. +- null, if the token is ill-formed. + +**Implementation details** + +Token must be a constant string. Supported by tokenbf_v1 index specialization. + +**Example** + +Where `hasToken` would throw an error for an ill-formed token, `hasTokenOrNull` returns `null` for an ill-formed token. + +Query: + +```sql +SELECT hasTokenOrNull('Hello World','Hello,World'); +``` + +```response +null +``` + +## hasTokenCaseInsensitive + +Returns 1 if a given token is present in a haystack, 0 otherwise. Ignores case. + +**Syntax** + +```sql +hasTokenCaseInsensitive(haystack, token) +``` + +**Parameters** + +- `haystack`: String in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal). +- `token`: Maximal length substring between two non alphanumeric ASCII characters (or boundaries of haystack). + +**Returned value** + +- 1, if the token is present in the haystack. +- 0, otherwise. + +**Implementation details** + +Token must be a constant string. Supported by tokenbf_v1 index specialization. + +**Example** + +Query: + +```sql +SELECT hasTokenCaseInsensitive('Hello World','hello'); +``` + +```response +1 +``` + +## hasTokenCaseInsensitiveOrNull + +Returns 1 if a given token is present in a haystack, 0 otherwise. Ignores case and returns null if the token is ill-formed. + +**Syntax** + +```sql +hasTokenCaseInsensitive(haystack, token) +``` + +**Parameters** + +- `haystack`: String in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal). +- `token`: Maximal length substring between two non alphanumeric ASCII characters (or boundaries of haystack). + +**Returned value** + +- 1, if the token is present in the haystack. +- 0, if token is not present. +- null, if the token is ill-formed. + +**Implementation details** + +Token must be a constant string. Supported by tokenbf_v1 index specialization. + +**Example** + + +Where `hasTokenCaseInsensitive` would throw an error for an ill-formed token, `hasTokenCaseInsensitiveOrNull` returns `null` for an ill-formed token. + +Query: + +```sql +SELECT hasTokenCaseInsensitiveOrNull('Hello World','hello,world'); +``` + +```response +null +``` \ No newline at end of file From f5437d3055835b06b3f5a800944581d9b9cdeb2c Mon Sep 17 00:00:00 2001 From: KrJin <33444112+jincong8973@users.noreply.github.com> Date: Thu, 28 Mar 2024 19:08:38 +0800 Subject: [PATCH 059/150] Update string-search-functions.md Improve zh-cn doc of string-search-functions --- .../functions/string-search-functions.md | 698 ++++++++++++++++-- 1 file changed, 636 insertions(+), 62 deletions(-) diff --git a/docs/zh/sql-reference/functions/string-search-functions.md b/docs/zh/sql-reference/functions/string-search-functions.md index e4167127424..14c40fa1243 100644 --- a/docs/zh/sql-reference/functions/string-search-functions.md +++ b/docs/zh/sql-reference/functions/string-search-functions.md @@ -1,128 +1,702 @@ --- slug: /zh/sql-reference/functions/string-search-functions --- -# 字符串搜索函数 {#zi-fu-chuan-sou-suo-han-shu} -下列所有函数在默认的情况下区分大小写。对于不区分大小写的搜索,存在单独的变体。 +# 字符串搜索函数 -## 位置(大海捞针),定位(大海捞针) {#positionhaystack-needle-locatehaystack-needle} +本节中的所有函数默认情况下都区分大小写进行搜索。不区分大小写的搜索通常由单独的函数变体提供。 +请注意,不区分大小写的搜索,遵循英语的小写-大写规则。 +例如。英语中大写的`i`是`I`,而在土耳其语中则是`İ`, 对于英语以外的语言,结果可能会不符合预期。 -在字符串`haystack`中搜索子串`needle`。 -返回子串的位置(以字节为单位),从1开始,如果未找到子串,则返回0。 +本节中的函数还假设搜索字符串和被搜索字符串是单字节编码文本(例如ASCII)。如果违反此假设,不会抛出异常且结果为undefined。 +UTF-8 编码字符串的搜索通常由单独的函数变体提供。同样,如果使用 UTF-8 函数变体但输入字符串不是 UTF-8 编码文本,不会抛出异常且结果为undefined。 +需要注意,函数不会执行自动 Unicode 规范化,您可以使用[normalizeUTF8*()](https://clickhouse.com/docs/zh/sql-reference/functions/string-functions/) 函数来执行此操作。 +在[字符串函数](string-functions.md) 和 [字符串替换函数](string-replace-functions.md) 会分别说明. -对于不区分大小写的搜索,请使用函数`positionCaseInsensitive`。 +## position -## positionUTF8(大海捞针) {#positionutf8haystack-needle} +返回字符串`haystack`中子字符串`needle`的位置(以字节为单位,从 1 开始)。 -与`position`相同,但位置以Unicode字符返回。此函数工作在UTF-8编码的文本字符集中。如非此编码的字符集,则返回一些非预期结果(他不会抛出异常)。 +**语法** -对于不区分大小写的搜索,请使用函数`positionCaseInsensitiveUTF8`。 +``` sql +position(haystack, needle[, start_pos]) +``` -## 多搜索分配(干草堆,\[针1,针2, …, needlen\]) {#multisearchallpositionshaystack-needle1-needle2-needlen} +别名: +- `position(needle IN haystack)` -与`position`相同,但函数返回一个数组,其中包含所有匹配needle的位置。 +**参数** -对于不区分大小写的搜索或/和UTF-8格式,使用函数`multiSearchAllPositionsCaseInsensitive,multiSearchAllPositionsUTF8,multiSearchAllPositionsCaseInsensitiveUTF8`。 +- `haystack` — 被检索查询字符串,类型为[String](../../sql-reference/syntax.md#syntax-string-literal). +- `needle` — 子字符串,类型为[String](../../sql-reference/syntax.md#syntax-string-literal). +- `start_pos` – 在字符串`haystack` 中开始检索的位置(从1开始),类型为[UInt](../../sql-reference/data-types/int-uint.md),可选 -## multiSearchFirstPosition(大海捞针,\[针1,针2, …, needlen\]) {#multisearchfirstpositionhaystack-needle1-needle2-needlen} +**返回值** -与`position`相同,但返回在`haystack`中与needles字符串匹配的最左偏移。 +- 若子字符串存在,返回位置(以字节为单位,从 1 开始)。 +- 如果不存在子字符串,返回0。 -对于不区分大小写的搜索或/和UTF-8格式,使用函数`multiSearchFirstPositionCaseInsensitive,multiSearchFirstPositionUTF8,multiSearchFirstPositionCaseInsensitiveUTF8`。 +如果子字符串 `needle` 为空,则: +- 如果未指定 `start_pos`,返回 `1` +- 如果 `start_pos` 为 0,则返回 `1` +- 如果 `start_pos >= 1` 且 `start_pos <= length(haystack) + 1`,则返回 `start_pos` +- 否则返回 `0` -## multiSearchFirstIndex(大海捞针,\[针1,针2, …, needlen\]) {#multisearchfirstindexhaystack-needle1-needle2-needlen} +以上规则同样在这些函数中生效: [locate](#locate), [positionCaseInsensitive](#positionCaseInsensitive), [positionUTF8](#positionUTF8), [positionCaseInsensitiveUTF8](#positionCaseInsensitiveUTF8) -返回在字符串`haystack`中最先查找到的needle的索引`i`(从1开始),没有找到任何匹配项则返回0。 +数据类型: `Integer`. -对于不区分大小写的搜索或/和UTF-8格式,使用函数`multiSearchFirstIndexCaseInsensitive,multiSearchFirstIndexUTF8,multiSearchFirstIndexCaseInsensitiveUTF8`。 +**示例** -## 多搜索(大海捞针,\[针1,针2, …, needlen\]) {#multisearchanyhaystack-needle1-needle2-needlen} +``` sql +SELECT position('Hello, world!', '!'); +``` -如果`haystack`中至少存在一个needle匹配则返回1,否则返回0。 +结果: + +``` text +┌─position('Hello, world!', '!')─┐ +│ 13 │ +└────────────────────────────────┘ +``` + +示例,使用参数 `start_pos` : + +``` sql +SELECT + position('Hello, world!', 'o', 1), + position('Hello, world!', 'o', 7) +``` +结果: +``` text +┌─position('Hello, world!', 'o', 1)─┬─position('Hello, world!', 'o', 7)─┐ +│ 5 │ 9 │ +└───────────────────────────────────┴───────────────────────────────────┘ +``` + +示例,语法别名 `needle IN haystack`: + +```sql +SELECT 6 = position('/' IN s) FROM (SELECT 'Hello/World' AS s); +``` + +结果: + +```text +┌─equals(6, position(s, '/'))─┐ +│ 1 │ +└─────────────────────────────┘ +``` + +示例,子字符串 `needle` 为空: + +``` sql +SELECT + position('abc', ''), + position('abc', '', 0), + position('abc', '', 1), + position('abc', '', 2), + position('abc', '', 3), + position('abc', '', 4), + position('abc', '', 5) +``` +结果: +``` text +┌─position('abc', '')─┬─position('abc', '', 0)─┬─position('abc', '', 1)─┬─position('abc', '', 2)─┬─position('abc', '', 3)─┬─position('abc', '', 4)─┬─position('abc', '', 5)─┐ +│ 1 │ 1 │ 1 │ 2 │ 3 │ 4 │ 0 │ +└─────────────────────┴────────────────────────┴────────────────────────┴────────────────────────┴────────────────────────┴────────────────────────┴────────────────────────┘ +``` + +## locate + +类似于 [position](#position) but with arguments `haystack` and `locate` switched. + +此函数的行为取决于 ClickHouse 版本: +- in versions < v24.3, `locate` was an alias of function `position` and accepted arguments `(haystack, needle[, start_pos])`. +- 在 v24.3 以下的版本中,`locate` 是函数`position`的别名,参数为 `(haystack, needle[, start_pos])`。 +- 在 v24.3 及以上的版本中,, `locate` 是独立的函数 (以更好地兼容 MySQL) ,参数为 `(needle, haystack[, start_pos])`。 之前的行为 + 可以在设置中恢复 [function_locate_has_mysql_compatible_argument_order = false](../../operations/settings/settings.md#function-locate-has-mysql-compatible-argument-order); + +**语法** + +``` sql +locate(needle, haystack[, start_pos]) +``` + +## positionCaseInsensitive + +类似于 [position](#position) 但是不区分大小写。 + +## positionUTF8 + +类似于 [position](#position) 但是假定 `haystack` 和 `needle` 是 UTF-8 编码的字符串。 + +**示例** + +函数 `positionUTF8` 可以正确的将字符 `ö` 计为单个 Unicode 代码点(`ö`由两个点表示): + +``` sql +SELECT positionUTF8('Motörhead', 'r'); +``` + +结果: + +``` text +┌─position('Motörhead', 'r')─┐ +│ 5 │ +└────────────────────────────┘ +``` + +## positionCaseInsensitiveUTF8 + +类似于 [positionUTF8](#positionutf8) 但是不区分大小写。 + +## multiSearchAllPositions + +类似于 [position](#position)返回多个在字符串 `haystack` 中 `needle` 子字符串的位置的数组(以字节为单位,从 1 开始)。 -对于不区分大小写的搜索或/和UTF-8格式,使用函数`multiSearchAnyCaseInsensitive,multiSearchAnyUTF8,multiSearchAnyCaseInsensitiveUTF8`。 :::note -在所有`multiSearch*`函数中,由于实现规范,needles的数量应小于28。 +所有以 `multiSearch*()` 开头的函数最多支持28 个`needle`. ::: -## 匹配(大海捞针,模式) {#matchhaystack-pattern} +**语法** -检查字符串是否与`pattern`正则表达式匹配。`pattern`可以是一个任意的`re2`正则表达式。 `re2`正则表达式的[语法](https://github.com/google/re2/wiki/Syntax)比Perl正则表达式的语法存在更多限制。 +``` sql +multiSearchAllPositions(haystack, [needle1, needle2, ..., needleN]) +``` -如果不匹配返回0,否则返回1。 +**参数** -请注意,反斜杠符号(`\`)用于在正则表达式中转义。由于字符串中采用相同的符号来进行转义。因此,为了在正则表达式中转义符号,必须在字符串文字中写入两个反斜杠(\\)。 +- `haystack` — 被检索查询字符串,类型为[String](../../sql-reference/syntax.md#syntax-string-literal). +- `needle` — 子字符串数组, 类型为[Array](../../sql-reference/data-types/array.md) -正则表达式与字符串一起使用,就像它是一组字节一样。正则表达式中不能包含空字节。 -对于在字符串中搜索子字符串的模式,最好使用LIKE或«position»,因为它们更加高效。 +**返回值** -## multiMatchAny(大海捞针,\[模式1,模式2, …, patternn\]) {#multimatchanyhaystack-pattern1-pattern2-patternn} +- 位置数组,数组中的每个元素对应于 `needle` 数组中的一个元素。如果在 `haystack` 中找到子字符串,则返回的数组中的元素为子字符串的位置(以字节为单位,从 1 开始);如果未找到子字符串,则返回的数组中的元素为 0。 -与`match`相同,但如果所有正则表达式都不匹配,则返回0;如果任何模式匹配,则返回1。它使用[超扫描](https://github.com/intel/hyperscan)库。对于在字符串中搜索子字符串的模式,最好使用«multisearchany»,因为它更高效。 +**示例** + +``` sql +SELECT multiSearchAllPositions('Hello, World!', ['hello', '!', 'world']); +``` + +结果: + +``` text +┌─multiSearchAllPositions('Hello, World!', ['hello', '!', 'world'])─┐ +│ [0,13,0] │ +└───────────────────────────────────────────────────────────────────┘ +``` + +## multiSearchAllPositionsUTF8 + +类似于 [multiSearchAllPositions](#multiSearchAllPositions) ,但假定 `haystack` 和 `needle`-s 是 UTF-8 编码的字符串。 + +## multiSearchFirstPosition + +类似于 `position` , 在字符串`haystack`中匹配多个`needle`子字符串,从左开始任一匹配的子串,返回其位置。 + +函数 `multiSearchFirstPositionCaseInsensitive`, `multiSearchFirstPositionUTF8` and `multiSearchFirstPositionCaseInsensitiveUTF8` 提供此函数的不区分大小写 以及/或 UTF-8 变体。 + +**语法** + +```sql +multiSearchFirstPosition(haystack, [needle1, needle2, …, needleN]) +``` + +## multiSearchFirstIndex + +在字符串`haystack`中匹配多个`needle`子字符串,从左开始任一匹配的子串,返回其索引 `i` (从1开始),如无法匹配则返回0。 + +函数 `multiSearchFirstIndexCaseInsensitive`, `multiSearchFirstIndexUTF8` 和 `multiSearchFirstIndexCaseInsensitiveUTF8` 提供此函数的不区分大小写以及/或 UTF-8 变体。 + +**语法** + +```sql +multiSearchFirstIndex(haystack, \[needle1, needle2, …, needlen\]) +``` + +## multiSearchAny {#multisearchany} + +Returns 1, if at least one string needlei matches the string `haystack` and 0 otherwise. +至少已有一个子字符串`needle`匹配 `haystack` 时返回1,否则返回 0 。 + +函数 `multiSearchAnyCaseInsensitive`, `multiSearchAnyUTF8` 和 `multiSearchAnyCaseInsensitiveUTF8` 提供此函数的不区分大小写以及/或 UTF-8 变体。 + + +**语法** + +```sql +multiSearchAny(haystack, [needle1, needle2, …, needleN]) +``` + +## match {#match} + +返回字符串 `haystack` 是否匹配正则表达式 `pattern` ([re2正则语法参考](https://github.com/google/re2/wiki/Syntax) + +匹配基于 UTF-8,例如`.` 匹配 Unicode 代码点 `¥`,它使用两个字节以 UTF-8 表示。T正则表达式不得包含空字节。如果 `haystack` 或`pattern`不是有效的 UTF-8,则此行为为undefined。 + +与 re2 的默认行为不同,`.` 会匹配换行符。要禁用此功能,请在模式前面添加`(?-s)`。 + +如果仅希望搜索子字符串,可以使用函数 [like](#like)或 [position](#position) 来替代,这些函数的性能比此函数更高。 + +**语法** + +```sql +match(haystack, pattern) +``` + +别名: `haystack REGEXP pattern operator` + +## multiMatchAny + +类似于 `match`,如果至少有一个表达式 `patterni` 匹配字符串 `haystack`,则返回1,否则返回0。 :::note -任何`haystack`字符串的长度必须小于232\字节,否则抛出异常。这种限制是因为hyperscan API而产生的。 +`multi[Fuzzy]Match*()` 函数家族使用了(Vectorscan)[https://github.com/VectorCamp/vectorscan]库. 因此,只有当 ClickHouse 编译时支持矢量扫描时,它们才会启用。 + +要关闭所有使用矢量扫描(hyperscan)的功能,请使用设置 `SET allow_hyperscan = 0;`。 + +由于Vectorscan的限制,`haystack` 字符串的长度必须小于232字节。 + +Hyperscan 通常容易受到正则表达式拒绝服务 (ReDoS) 攻击。有关更多信息,请参见 +[https://www.usenix.org/conference/usenixsecurity22/presentation/turonova](https://www.usenix.org/conference/usenixsecurity22/presentation/turonova) +[https://doi.org/10.1007/s10664-021-10033-1](https://doi.org/10.1007/s10664-021-10033-1) +[https://doi.org/10.1145/3236024.3236027](https://doi.org/10.1145/3236024.3236027) +建议用户谨慎检查提供的表达式。 + ::: -## multiMatchAnyIndex(大海捞针,\[模式1,模式2, …, patternn\]) {#multimatchanyindexhaystack-pattern1-pattern2-patternn} +如果仅希望搜索子字符串,可以使用函数 [multiSearchAny](#multisearchany) 来替代,这些函数的性能比此函数更高。 -与`multiMatchAny`相同,但返回与haystack匹配的任何内容的索引位置。 +**语法** -## multiFuzzyMatchAny(干草堆,距离,\[模式1,模式2, …, patternn\]) {#multifuzzymatchanyhaystack-distance-pattern1-pattern2-patternn} +```sql +multiMatchAny(haystack, \[pattern1, pattern2, …, patternn\]) +``` -与`multiMatchAny`相同,但如果在haystack能够查找到任何模式匹配能够在指定的[编辑距离](https://en.wikipedia.org/wiki/Edit_distance)内进行匹配,则返回1。此功能也处于实验模式,可能非常慢。有关更多信息,请参阅[hyperscan文档](https://intel.github.io/hyperscan/dev-reference/compilation.html#approximate-matching)。 +## multiMatchAnyIndex -## multiFuzzyMatchAnyIndex(大海捞针,距离,\[模式1,模式2, …, patternn\]) {#multifuzzymatchanyindexhaystack-distance-pattern1-pattern2-patternn} +类似于 `multiMatchAny` ,返回任何子串匹配 `haystack` 的索引。 -与`multiFuzzyMatchAny`相同,但返回匹配项的匹配能容的索引位置。 +**语法** + +```sql +multiMatchAnyIndex(haystack, \[pattern1, pattern2, …, patternn\]) +``` + +## multiMatchAllIndices + +类似于 `multiMatchAny`,返回一个数组,包含所有匹配 `haystack` 的子字符串的索引。 + +**语法** + +```sql +multiMatchAllIndices(haystack, \[pattern1, pattern2, …, patternn\]) +``` + +## multiFuzzyMatchAny + +类似于 `multiMatchAny` ,如果任一 `pattern` 匹配 `haystack`,则返回1 within a constant [edit distance](https://en.wikipedia.org/wiki/Edit_distance). 该功能依赖于实验特征 [hyperscan](https://intel.github.io/hyperscan/dev-reference/compilation.html#approximate-matching) 库,并且对于某些边缘场景可能会很慢。性能取决于编辑距离`distance`的值和使用的`partten`,但与非模糊搜索相比,它的开销总是更高的。 :::note -`multiFuzzyMatch*`函数不支持UTF-8正则表达式,由于hyperscan限制,这些表达式被按字节解析。 +由于 hyperscan 的限制,`multiFuzzyMatch*()` 函数族不支持 UTF-8 正则表达式(hyperscan以一串字节来处理)。 ::: +**语法** + +```sql +multiFuzzyMatchAny(haystack, distance, \[pattern1, pattern2, …, patternn\]) +``` + +## multiFuzzyMatchAnyIndex + +类似于 `multiFuzzyMatchAny` 返回在编辑距离内与`haystack`匹配的任何索引 + +**语法** + +```sql +multiFuzzyMatchAnyIndex(haystack, distance, \[pattern1, pattern2, …, patternn\]) +``` + +## multiFuzzyMatchAllIndices + +类似于 `multiFuzzyMatchAny` 返回在编辑距离内与`haystack`匹配的所有索引的数组。 + +**语法** + +```sql +multiFuzzyMatchAllIndices(haystack, distance, \[pattern1, pattern2, …, patternn\]) +``` + +## extract + +使用正则表达式提取字符串。如果字符串 `haystack` 不匹配正则表达式 `pattern` ,则返回空字符串。 + +对于没有子模式的正则表达式,该函数使用与整个正则表达式匹配的片段。否则,它使用与第一个子模式匹配的片段。 + +**语法** + +```sql +extract(haystack, pattern) +``` + +## extractAll + +使用正则表达式提取字符串的所有片段。如果字符串 `haystack` 不匹配正则表达式 `pattern` ,则返回空字符串。 + +返回所有匹配项组成的字符串数组。 + +子模式的行为与函数`extract`中的行为相同。 + +**语法** + +```sql +extractAll(haystack, pattern) +``` + +## extractAllGroupsHorizontal + +使用`pattern`正则表达式匹配`haystack`字符串的所有组。 + +返回一个元素为数组的数组,其中第一个数组包含与第一组匹配的所有片段,第二个数组包含与第二组匹配的所有片段,依此类推。 + +这个函数相比 [extractAllGroupsVertical](#extractallgroups-vertical)更慢。 + +**语法** + +``` sql +extractAllGroupsHorizontal(haystack, pattern) +``` + +**参数** + +- `haystack` — 输入的字符串,数据类型为[String](../../sql-reference/data-types/string.md). +- `pattern` — 正则表达式([re2正则语法参考](https://github.com/google/re2/wiki/Syntax) ,必须包含group,每个group用括号括起来。 如果 `pattern` 不包含group则会抛出异常。 数据类型为[String](../../sql-reference/data-types/string.md). + +**返回值** + +- 数据类型: [Array](../../sql-reference/data-types/array.md). + +如果`haystack`不匹配`pattern`正则表达式,则返回一个空数组的数组。 + +**示例** + +``` sql +SELECT extractAllGroupsHorizontal('abc=111, def=222, ghi=333', '("[^"]+"|\\w+)=("[^"]+"|\\w+)'); +``` + +结果: + +``` text +┌─extractAllGroupsHorizontal('abc=111, def=222, ghi=333', '("[^"]+"|\\w+)=("[^"]+"|\\w+)')─┐ +│ [['abc','def','ghi'],['111','222','333']] │ +└──────────────────────────────────────────────────────────────────────────────────────────┘ +``` + +## extractAllGroupsVertical + +使用正则表达式 `pattern`匹配字符串`haystack`中的所有group。返回一个数组,其中每个数组包含每个group的匹配片段。片段按照在`haystack`中出现的顺序进行分组。 + +**语法** + +``` sql +extractAllGroupsVertical(haystack, pattern) +``` + +**参数** + +- `haystack` — 输入的字符串,数据类型为[String](../../sql-reference/data-types/string.md). +- `pattern` — 正则表达式([re2正则语法参考](https://github.com/google/re2/wiki/Syntax) ,必须包含group,每个group用括号括起来。 如果 `pattern` 不包含group则会抛出异常。 数据类型为[String](../../sql-reference/data-types/string.md). + +**返回值** + +- 数据类型: [Array](../../sql-reference/data-types/array.md). + +如果`haystack`不匹配`pattern`正则表达式,则返回一个空数组。 + +**示例** + +``` sql +SELECT extractAllGroupsVertical('abc=111, def=222, ghi=333', '("[^"]+"|\\w+)=("[^"]+"|\\w+)'); +``` + +结果: + +``` text +┌─extractAllGroupsVertical('abc=111, def=222, ghi=333', '("[^"]+"|\\w+)=("[^"]+"|\\w+)')─┐ +│ [['abc','111'],['def','222'],['ghi','333']] │ +└────────────────────────────────────────────────────────────────────────────────────────┘ +``` + +## like {#like} + +返回字符串 `haystack` 是否匹配 LIKE 表达式 `pattern`。 + +一个 LIKE 表达式可以包含普通字符和以下元字符: + +- `%` 表示任意数量的任意字符(包括零个字符)。 +- `_` 表示单个任意字符。 +- `\` 用于转义文字 `%`, `_` 和 `\`。 + +匹配基于 UTF-8,例如 `_` 匹配 Unicode 代码点 `¥`,它使用两个字节以 UTF-8 表示。 + +如果 `haystack` 或 `LIKE` 表达式不是有效的 UTF-8,则行为是未定义的。 + +不会自动执行 Unicode 规范化,您可以使用[normalizeUTF8*()](https://clickhouse.com/docs/zh/sql-reference/functions/string-functions/) 函数来执行此操作。 + +匹配字面上的 `%`, `_` 和 `/`(这些是 LIKE 元字符),请在其前面加上反斜杠:`\%`, `\_` 和 `\\`。 +如果在反斜杠前使用非 `%`, `_` 或 `\` 字符,则反斜杠将失去其特殊含义(即被解释为字面值)。 +请注意,ClickHouse 要求字符串中使用反斜杠 [也需要被转义](../syntax.md#string), 因此您实际上需要编写 `\\%`、`\\_` 和 `\\\\`。 + + +对于形式为 `%needle%` 的 LIKE 表达式,函数的性能与 `position` 函数相同。 +所有其他 LIKE 表达式都会被内部转换为正则表达式,并以与函数 `match` 相似的性能执行。 + +**语法** + +```sql +like(haystack, pattern) +``` + +别名: `haystack LIKE pattern` (operator) + +## notLike {#notlike} + +类似于 `like` 但是返回相反的结果。 + +别名: `haystack NOT LIKE pattern` (operator) + +## ilike + +类似于 `like` 但是不区分大小写。 + +别名: `haystack ILIKE pattern` (operator) + +## notILike + +类似于 `ilike` 但是返回相反的结果。 + +别名: `haystack NOT ILIKE pattern` (operator) + +## ngramDistance + +计算字符串`haystack` 和子字符串`needle`的4-gram距离。 为此,它计算两个 4-gram 多重集之间的对称差异,并通过它们的基数之和对其进行标准化。返回0-1之间的Float32。返回值越小,代表字符串越相似. Throws an exception if constant `needle` or `haystack` arguments are more than 32Kb in size. If any of non-constant `haystack` or `needle` arguments is more than 32Kb in size, the distance is always 1. + +函数 `ngramDistanceCaseInsensitive, ngramDistanceUTF8, ngramDistanceCaseInsensitiveUTF8` 提供此函数的不区分大小写以及/或 UTF-8 变体。 + +**语法** + +```sql +ngramDistance(haystack, needle) +``` + +## ngramSearch + +类似于`ngramDistance`,但计算`needle`字符串和`haystack`字符串之间的非对称差异,即来自needle的n-gram数量减去由`needle`数量归一化的n-gram的公共数量n-gram。返回 0 到 1 之间的 Float32。结果越大,’needle’越有可能在’haystack’中。该函数对于模糊字符串搜索很有用。另请参阅函数’soundex’。 +函数 `ngramSearchCaseInsensitive, ngramSearchUTF8, ngramSearchCaseInsensitiveUTF8` 提供此函数的不区分大小写以及/或 UTF-8 变体。 + :::note -如要关闭所有hyperscan函数的使用,请设置`SET allow_hyperscan = 0;`。 +UTF-8变体使用了3-gram距离。这些并不是完全公平的n-gram距离。我们使用2字节的哈希函数来哈希n-gram,然后计算这些哈希表之间的(非)对称差异——可能会发生冲突。在使用UTF-8大小写不敏感格式时,我们并不使用公平的tolower函数——我们将每个码点字节的第5位(从零开始)和如果字节超过一个的零字节的第一位置零——这对拉丁字母和大部分西里尔字母都有效。 ::: -## 提取(大海捞针,图案) {#extracthaystack-pattern} +**语法** -使用正则表达式截取字符串。如果’haystack’与’pattern’不匹配,则返回空字符串。如果正则表达式中不包含子模式,它将获取与整个正则表达式匹配的子串。否则,它将获取与第一个子模式匹配的子串。 +```sql +ngramSearch(haystack, needle) +``` -## extractAll(大海捞针,图案) {#extractallhaystack-pattern} +## countSubstrings -使用正则表达式提取字符串的所有片段。如果’haystack’与’pattern’正则表达式不匹配,则返回一个空字符串。否则返回所有与正则表达式匹配的字符串数组。通常,行为与’extract’函数相同(它采用第一个子模式,如果没有子模式,则采用整个表达式)。 +返回字符串 `haystack` 中子字符串 `needle` 出现的次数。 -## 像(干草堆,模式),干草堆像模式运算符 {#likehaystack-pattern-haystack-like-pattern-operator} +函数 `countSubstringsCaseInsensitive` and `countSubstringsCaseInsensitiveUTF8` 提供此函数的不区分大小写以及/或 UTF-8 变体。 -检查字符串是否与简单正则表达式匹配。 -正则表达式可以包含的元符号有`%`和`_`。 +**语法** -`%` 表示任何字节数(包括零字符)。 +``` sql +countSubstrings(haystack, needle[, start_pos]) +``` -`_` 表示任何一个字节。 +**参数** -可以使用反斜杠(`\`)来对元符号进行转义。请参阅«match»函数说明中有关转义的说明。 +- `haystack` — 被检索查询字符串,类型为[String](../../sql-reference/syntax.md#syntax-string-literal). +- `needle` — 子字符串,类型为[String](../../sql-reference/syntax.md#syntax-string-literal). +- `start_pos` – 在字符串`haystack` 中开始检索的位置(从1开始),类型为[UInt](../../sql-reference/data-types/int-uint.md),可选 -对于像`%needle%`这样的正则表达式,改函数与`position`函数一样快。 -对于其他正则表达式,函数与’match’函数相同。 +**返回值** -## 不喜欢(干草堆,模式),干草堆不喜欢模式运算符 {#notlikehaystack-pattern-haystack-not-like-pattern-operator} +- 子字符串出现的次数。 -与’like’函数返回相反的结果。 +数据类型: [UInt64](../../sql-reference/data-types/int-uint.md). -## 大海捞针) {#ngramdistancehaystack-needle} +**示例** -基于4-gram计算`haystack`和`needle`之间的距离:计算两个4-gram集合之间的对称差异,并用它们的基数和对其进行归一化。返回0到1之间的任何浮点数 – 越接近0则表示越多的字符串彼此相似。如果常量的`needle`或`haystack`超过32KB,函数将抛出异常。如果非常量的`haystack`或`needle`字符串超过32Kb,则距离始终为1。 +``` sql +SELECT countSubstrings('aaaa', 'aa'); +``` -对于不区分大小写的搜索或/和UTF-8格式,使用函数`ngramDistanceCaseInsensitive,ngramDistanceUTF8,ngramDistanceCaseInsensitiveUTF8`。 +结果: -## ツ暗ェツ氾环催ツ団ツ法ツ人) {#ngramsearchhaystack-needle} +``` text +┌─countSubstrings('aaaa', 'aa')─┐ +│ 2 │ +└───────────────────────────────┘ +``` -与`ngramDistance`相同,但计算`needle`和`haystack`之间的非对称差异——`needle`的n-gram减去`needle`归一化n-gram。可用于模糊字符串搜索。 +示例,使用参数 `start_pos` : -对于不区分大小写的搜索或/和UTF-8格式,使用函数`ngramSearchCaseInsensitive,ngramSearchUTF8,ngramSearchCaseInsensitiveUTF8`。 +```sql +SELECT countSubstrings('abc___abc', 'abc', 4); +``` -:::note -对于UTF-8,我们使用3-gram。所有这些都不是完全公平的n-gram距离。我们使用2字节哈希来散列n-gram,然后计算这些哈希表之间的(非)对称差异 - 可能会发生冲突。对于UTF-8不区分大小写的格式,我们不使用公平的`tolower`函数 - 我们将每个Unicode字符字节的第5位(从零开始)和字节的第一位归零 - 这适用于拉丁语,主要用于所有西里尔字母。 -::: +结果: + +``` text +┌─countSubstrings('abc___abc', 'abc', 4)─┐ +│ 1 │ +└────────────────────────────────────────┘ +``` + +## countMatches + +返回正则表达式成功匹配的次数。 + +**语法** + +``` sql +countMatches(haystack, pattern) +``` + +**参数** + +- `haystack` — 输入的字符串,数据类型为[String](../../sql-reference/data-types/string.md). +- `pattern` — 正则表达式([re2正则语法参考](https://github.com/google/re2/wiki/Syntax) ,必须包含group,每个group用括号括起来。 如果 `pattern` 不包含group则会抛出异常。 数据类型为[String](../../sql-reference/data-types/string.md). + +**返回值** + +- 匹配次数。 + +数据类型: [UInt64](../../sql-reference/data-types/int-uint.md). + +**示例** + +``` sql +SELECT countMatches('foobar.com', 'o+'); +``` + +结果: + +``` text +┌─countMatches('foobar.com', 'o+')─┐ +│ 2 │ +└──────────────────────────────────┘ +``` + +``` sql +SELECT countMatches('aaaa', 'aa'); +``` + +结果: + +``` text +┌─countMatches('aaaa', 'aa')────┐ +│ 2 │ +└───────────────────────────────┘ +``` + +## countMatchesCaseInsensitive + +类似于 `countMatches(haystack, pattern)` 但是不区分大小写。 + +## regexpExtract + +提取匹配正则表达式模式的字符串`haystack`中的第一个字符串,并对应于正则表达式组索引。 + +**语法** + +``` sql +regexpExtract(haystack, pattern[, index]) +``` + +别名: `REGEXP_EXTRACT(haystack, pattern[, index])`. + +**参数** + +- `haystack` — 被匹配字符串,类型为[String](../../sql-reference/syntax.md#syntax-string-literal). +- `pattern` — 正则表达式,必须是常量。 [String](../../sql-reference/syntax.md#syntax-string-literal). +- `index` – 一个大于等于0的整数,默认为1,它代表要提取哪个正则表达式组。 [UInt or Int](../../sql-reference/data-types/int-uint.md) 可选。 + +**返回值** + +`pattern`可以包含多个正则组, `index` 代表要提取哪个正则表达式组。如果 `index` 为0,则返回整个匹配的字符串。 + +数据类型: `String`. + +**示例** + +``` sql +SELECT + regexpExtract('100-200', '(\\d+)-(\\d+)', 1), + regexpExtract('100-200', '(\\d+)-(\\d+)', 2), + regexpExtract('100-200', '(\\d+)-(\\d+)', 0), + regexpExtract('100-200', '(\\d+)-(\\d+)'); +``` + +结果: + +``` text +┌─regexpExtract('100-200', '(\\d+)-(\\d+)', 1)─┬─regexpExtract('100-200', '(\\d+)-(\\d+)', 2)─┬─regexpExtract('100-200', '(\\d+)-(\\d+)', 0)─┬─regexpExtract('100-200', '(\\d+)-(\\d+)')─┐ +│ 100 │ 200 │ 100-200 │ 100 │ +└──────────────────────────────────────────────┴──────────────────────────────────────────────┴──────────────────────────────────────────────┴───────────────────────────────────────────┘ +``` + +## hasSubsequence + +如果`needle`是`haystack`的子序列,返回1,否贼返回0。 +子序列是从给定字符串中删除零个或多个元素而不改变剩余元素的顺序得到的序列。 + +**语法** + +``` sql +hasSubsequence(haystack, needle) +``` + +**参数** + +- `haystack` — 被搜索的字符串,类型为[String](../../sql-reference/syntax.md#syntax-string-literal). +- `needle` — 搜索子序列,类型为[String](../../sql-reference/syntax.md#syntax-string-literal). + +**返回值** + +- 1, 如果`needle`是`haystack`的子序列 +- 0, 如果`needle`不是`haystack`的子序列 + +数据类型: `UInt8`. + +**示例** + +``` sql +SELECT hasSubsequence('garbage', 'arg') ; +``` + +结果: + +``` text +┌─hasSubsequence('garbage', 'arg')─┐ +│ 1 │ +└──────────────────────────────────┘ +``` + +## hasSubsequenceCaseInsensitive +类似于[hasSubsequence](#hasSubsequence)但是不区分大小写。 + +## hasSubsequenceUTF8 + +类似于 [hasSubsequence](#hasSubsequence) 但是假定 `haystack` 和 `needle` 是 UTF-8 编码的字符串。 + +## hasSubsequenceCaseInsensitiveUTF8 + +类似于 [hasSubsequenceUTF8](#hasSubsequenceUTF8) 但是不区分大小写。 From 6f51b3c64dd5eb7c2f493f668ddadc62a28b5b48 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Thu, 28 Mar 2024 12:08:58 +0100 Subject: [PATCH 060/150] Update 03032_scalars_create_as_select.sql --- tests/queries/0_stateless/03032_scalars_create_as_select.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/03032_scalars_create_as_select.sql b/tests/queries/0_stateless/03032_scalars_create_as_select.sql index 5b20b8cf6f9..ae75a30ad8b 100644 --- a/tests/queries/0_stateless/03032_scalars_create_as_select.sql +++ b/tests/queries/0_stateless/03032_scalars_create_as_select.sql @@ -1,2 +1,2 @@ -create or replace table query_run_metric_arrays engine Memory as with (with (select groupUniqArrayArray(['a', 'b']) from numbers(1)) as all_names select all_names) as all_metrics select all_metrics; +create table query_run_metric_arrays engine Memory as with (with (select groupUniqArrayArray(['a', 'b']) from numbers(1)) as all_names select all_names) as all_metrics select all_metrics; select * from query_run_metric_arrays; From a376a7354fc863151aecf774df5cd82a73bd3b94 Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Thu, 28 Mar 2024 15:04:14 +0300 Subject: [PATCH 061/150] Fixed code review issues --- .../AggregateFunctionUniq.h | 10 ++++---- .../Combinators/AggregateFunctionArray.h | 4 ++-- .../Combinators/AggregateFunctionIf.h | 4 ++-- .../Combinators/AggregateFunctionMerge.h | 4 ++-- .../Combinators/AggregateFunctionNull.h | 4 ++-- .../Combinators/AggregateFunctionState.h | 4 ++-- src/AggregateFunctions/IAggregateFunction.h | 4 ++-- src/AggregateFunctions/UniqExactSet.h | 11 ++++----- src/Interpreters/Aggregator.cpp | 24 +++++++++---------- src/Interpreters/Aggregator.h | 18 +++++++------- .../AggregatingInOrderTransform.cpp | 2 +- .../Transforms/AggregatingTransform.cpp | 10 ++++---- ...gingAggregatedMemoryEfficientTransform.cpp | 2 +- .../Transforms/MergingAggregatedTransform.cpp | 2 +- src/Processors/Transforms/RollupTransform.cpp | 3 ++- 15 files changed, 52 insertions(+), 54 deletions(-) diff --git a/src/AggregateFunctions/AggregateFunctionUniq.h b/src/AggregateFunctions/AggregateFunctionUniq.h index c53b5e3bdb7..cef23f766c7 100644 --- a/src/AggregateFunctions/AggregateFunctionUniq.h +++ b/src/AggregateFunctions/AggregateFunctionUniq.h @@ -459,7 +459,7 @@ public: bool isParallelizeMergePrepareNeeded() const override { return is_parallelize_merge_prepare_needed; } - void parallelizeMergePrepare(AggregateDataPtrs & places, ThreadPool & thread_pool, std::atomic * is_cancelled) const override + void parallelizeMergePrepare(AggregateDataPtrs & places, ThreadPool & thread_pool, std::atomic & is_cancelled) const override { if constexpr (is_parallelize_merge_prepare_needed) { @@ -485,10 +485,10 @@ public: bool isAbleToParallelizeMerge() const override { return is_able_to_parallelize_merge; } bool canOptimizeEqualKeysRanges() const override { return !is_able_to_parallelize_merge; } - void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, ThreadPool & thread_pool, std::atomic * is_cancelled, Arena *) const override + void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, ThreadPool & thread_pool, std::atomic & is_cancelled, Arena *) const override { if constexpr (is_able_to_parallelize_merge) - this->data(place).set.merge(this->data(rhs).set, &thread_pool, is_cancelled); + this->data(place).set.merge(this->data(rhs).set, &thread_pool, &is_cancelled); else this->data(place).set.merge(this->data(rhs).set); } @@ -579,10 +579,10 @@ public: bool isAbleToParallelizeMerge() const override { return is_able_to_parallelize_merge; } bool canOptimizeEqualKeysRanges() const override { return !is_able_to_parallelize_merge; } - void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, ThreadPool & thread_pool, std::atomic * is_cancelled, Arena *) const override + void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, ThreadPool & thread_pool, std::atomic & is_cancelled, Arena *) const override { if constexpr (is_able_to_parallelize_merge) - this->data(place).set.merge(this->data(rhs).set, &thread_pool, is_cancelled); + this->data(place).set.merge(this->data(rhs).set, &thread_pool, &is_cancelled); else this->data(place).set.merge(this->data(rhs).set); } diff --git a/src/AggregateFunctions/Combinators/AggregateFunctionArray.h b/src/AggregateFunctions/Combinators/AggregateFunctionArray.h index 9dc5e274dab..1940985f8e3 100644 --- a/src/AggregateFunctions/Combinators/AggregateFunctionArray.h +++ b/src/AggregateFunctions/Combinators/AggregateFunctionArray.h @@ -144,12 +144,12 @@ public: bool isAbleToParallelizeMerge() const override { return nested_func->isAbleToParallelizeMerge(); } bool canOptimizeEqualKeysRanges() const override { return nested_func->canOptimizeEqualKeysRanges(); } - void parallelizeMergePrepare(AggregateDataPtrs & places, ThreadPool & thread_pool, std::atomic * is_cancelled) const override + void parallelizeMergePrepare(AggregateDataPtrs & places, ThreadPool & thread_pool, std::atomic & is_cancelled) const override { nested_func->parallelizeMergePrepare(places, thread_pool, is_cancelled); } - void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, ThreadPool & thread_pool, std::atomic * is_cancelled, Arena * arena) const override + void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, ThreadPool & thread_pool, std::atomic & is_cancelled, Arena * arena) const override { nested_func->merge(place, rhs, thread_pool, is_cancelled, arena); } diff --git a/src/AggregateFunctions/Combinators/AggregateFunctionIf.h b/src/AggregateFunctions/Combinators/AggregateFunctionIf.h index 91dcfa4db0b..a893fc91780 100644 --- a/src/AggregateFunctions/Combinators/AggregateFunctionIf.h +++ b/src/AggregateFunctions/Combinators/AggregateFunctionIf.h @@ -167,12 +167,12 @@ public: bool isAbleToParallelizeMerge() const override { return nested_func->isAbleToParallelizeMerge(); } bool canOptimizeEqualKeysRanges() const override { return nested_func->canOptimizeEqualKeysRanges(); } - void parallelizeMergePrepare(AggregateDataPtrs & places, ThreadPool & thread_pool, std::atomic * is_cancelled) const override + void parallelizeMergePrepare(AggregateDataPtrs & places, ThreadPool & thread_pool, std::atomic & is_cancelled) const override { nested_func->parallelizeMergePrepare(places, thread_pool, is_cancelled); } - void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, ThreadPool & thread_pool, std::atomic * is_cancelled, Arena * arena) const override + void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, ThreadPool & thread_pool, std::atomic & is_cancelled, Arena * arena) const override { nested_func->merge(place, rhs, thread_pool, is_cancelled, arena); } diff --git a/src/AggregateFunctions/Combinators/AggregateFunctionMerge.h b/src/AggregateFunctions/Combinators/AggregateFunctionMerge.h index 5bc478116e0..4a39ec0ab87 100644 --- a/src/AggregateFunctions/Combinators/AggregateFunctionMerge.h +++ b/src/AggregateFunctions/Combinators/AggregateFunctionMerge.h @@ -113,12 +113,12 @@ public: bool isAbleToParallelizeMerge() const override { return nested_func->isAbleToParallelizeMerge(); } bool canOptimizeEqualKeysRanges() const override { return nested_func->canOptimizeEqualKeysRanges(); } - void parallelizeMergePrepare(AggregateDataPtrs & places, ThreadPool & thread_pool, std::atomic * is_cancelled) const override + void parallelizeMergePrepare(AggregateDataPtrs & places, ThreadPool & thread_pool, std::atomic & is_cancelled) const override { nested_func->parallelizeMergePrepare(places, thread_pool, is_cancelled); } - void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, ThreadPool & thread_pool, std::atomic * is_cancelled, Arena * arena) const override + void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, ThreadPool & thread_pool, std::atomic & is_cancelled, Arena * arena) const override { nested_func->merge(place, rhs, thread_pool, is_cancelled, arena); } diff --git a/src/AggregateFunctions/Combinators/AggregateFunctionNull.h b/src/AggregateFunctions/Combinators/AggregateFunctionNull.h index eef5f8bf66b..306e293cae7 100644 --- a/src/AggregateFunctions/Combinators/AggregateFunctionNull.h +++ b/src/AggregateFunctions/Combinators/AggregateFunctionNull.h @@ -154,7 +154,7 @@ public: bool isAbleToParallelizeMerge() const override { return nested_function->isAbleToParallelizeMerge(); } bool canOptimizeEqualKeysRanges() const override { return nested_function->canOptimizeEqualKeysRanges(); } - void parallelizeMergePrepare(AggregateDataPtrs & places, ThreadPool & thread_pool, std::atomic * is_cancelled) const override + void parallelizeMergePrepare(AggregateDataPtrs & places, ThreadPool & thread_pool, std::atomic & is_cancelled) const override { AggregateDataPtrs nested_places(places.begin(), places.end()); for (auto & nested_place : nested_places) @@ -163,7 +163,7 @@ public: nested_function->parallelizeMergePrepare(nested_places, thread_pool, is_cancelled); } - void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, ThreadPool & thread_pool, std::atomic * is_cancelled, Arena * arena) const override + void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, ThreadPool & thread_pool, std::atomic & is_cancelled, Arena * arena) const override { nested_function->merge(nestedPlace(place), nestedPlace(rhs), thread_pool, is_cancelled, arena); } diff --git a/src/AggregateFunctions/Combinators/AggregateFunctionState.h b/src/AggregateFunctions/Combinators/AggregateFunctionState.h index 7b2933d42c9..5f2eb647c92 100644 --- a/src/AggregateFunctions/Combinators/AggregateFunctionState.h +++ b/src/AggregateFunctions/Combinators/AggregateFunctionState.h @@ -94,12 +94,12 @@ public: bool isAbleToParallelizeMerge() const override { return nested_func->isAbleToParallelizeMerge(); } bool canOptimizeEqualKeysRanges() const override { return nested_func->canOptimizeEqualKeysRanges(); } - void parallelizeMergePrepare(AggregateDataPtrs & places, ThreadPool & thread_pool, std::atomic * is_cancelled) const override + void parallelizeMergePrepare(AggregateDataPtrs & places, ThreadPool & thread_pool, std::atomic & is_cancelled) const override { nested_func->parallelizeMergePrepare(places, thread_pool, is_cancelled); } - void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, ThreadPool & thread_pool, std::atomic * is_cancelled, Arena * arena) const override + void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, ThreadPool & thread_pool, std::atomic & is_cancelled, Arena * arena) const override { nested_func->merge(place, rhs, thread_pool, is_cancelled, arena); } diff --git a/src/AggregateFunctions/IAggregateFunction.h b/src/AggregateFunctions/IAggregateFunction.h index b33d4b20a16..97e0e89aee9 100644 --- a/src/AggregateFunctions/IAggregateFunction.h +++ b/src/AggregateFunctions/IAggregateFunction.h @@ -151,7 +151,7 @@ public: virtual bool isParallelizeMergePrepareNeeded() const { return false; } - virtual void parallelizeMergePrepare(AggregateDataPtrs & /*places*/, ThreadPool & /*thread_pool*/, std::atomic * /*is_cancelled*/) const + virtual void parallelizeMergePrepare(AggregateDataPtrs & /*places*/, ThreadPool & /*thread_pool*/, std::atomic & /*is_cancelled*/) const { throw Exception(ErrorCodes::NOT_IMPLEMENTED, "parallelizeMergePrepare() with thread pool parameter isn't implemented for {} ", getName()); } @@ -168,7 +168,7 @@ public: /// Should be used only if isAbleToParallelizeMerge() returned true. virtual void - merge(AggregateDataPtr __restrict /*place*/, ConstAggregateDataPtr /*rhs*/, ThreadPool & /*thread_pool*/, std::atomic * /*is_cancelled*/, Arena * /*arena*/) const + merge(AggregateDataPtr __restrict /*place*/, ConstAggregateDataPtr /*rhs*/, ThreadPool & /*thread_pool*/, std::atomic & /*is_cancelled*/, Arena * /*arena*/) const { throw Exception(ErrorCodes::NOT_IMPLEMENTED, "merge() with thread pool parameter isn't implemented for {} ", getName()); } diff --git a/src/AggregateFunctions/UniqExactSet.h b/src/AggregateFunctions/UniqExactSet.h index 18b0b830cb8..2ae8c3a8386 100644 --- a/src/AggregateFunctions/UniqExactSet.h +++ b/src/AggregateFunctions/UniqExactSet.h @@ -37,7 +37,7 @@ public: /// In merge, if one of the lhs and rhs is twolevelset and the other is singlelevelset, then the singlelevelset will need to convertToTwoLevel(). /// It's not in parallel and will cost extra large time if the thread_num is large. /// This method will convert all the SingleLevelSet to TwoLevelSet in parallel if the hashsets are not all singlelevel or not all twolevel. - static void parallelizeMergePrepare(const std::vector & data_vec, ThreadPool & thread_pool, std::atomic * is_cancelled) + static void parallelizeMergePrepare(const std::vector & data_vec, ThreadPool & thread_pool, std::atomic & is_cancelled) { UInt64 single_level_set_num = 0; UInt64 all_single_hash_size = 0; @@ -63,7 +63,7 @@ public: try { auto data_vec_atomic_index = std::make_shared(0); - auto thread_func = [data_vec, data_vec_atomic_index, is_cancelled, thread_group = CurrentThread::getGroup()]() + auto thread_func = [data_vec, data_vec_atomic_index, &is_cancelled, thread_group = CurrentThread::getGroup()]() { SCOPE_EXIT_SAFE( if (thread_group) @@ -76,7 +76,7 @@ public: while (true) { - if (is_cancelled && is_cancelled->load(std::memory_order_seq_cst)) + if (is_cancelled.load(std::memory_order_seq_cst)) return; const auto i = data_vec_atomic_index->fetch_add(1); @@ -117,9 +117,6 @@ public: { for (size_t i = 0; i < rhs.NUM_BUCKETS; ++i) { - if (is_cancelled && is_cancelled->load(std::memory_order_seq_cst)) - return; - lhs.impls[i].merge(rhs.impls[i]); } } @@ -141,7 +138,7 @@ public: while (true) { - if (is_cancelled && is_cancelled->load(std::memory_order_seq_cst)) + if (is_cancelled->load(std::memory_order_seq_cst)) return; const auto bucket = next_bucket_to_merge->fetch_add(1); diff --git a/src/Interpreters/Aggregator.cpp b/src/Interpreters/Aggregator.cpp index 837b4e47fba..ab8cec864ae 100644 --- a/src/Interpreters/Aggregator.cpp +++ b/src/Interpreters/Aggregator.cpp @@ -1436,7 +1436,7 @@ void NO_INLINE Aggregator::mergeOnIntervalWithoutKey( size_t row_begin, size_t row_end, const AggregateColumnsConstData & aggregate_columns_data, - std::atomic * is_cancelled) const + std::atomic & is_cancelled) const { /// `data_variants` will destroy the states of aggregate functions in the destructor data_variants.aggregator = this; @@ -1752,7 +1752,7 @@ Block Aggregator::mergeAndConvertOneBucketToBlock( Arena * arena, bool final, Int32 bucket, - std::atomic * is_cancelled) const + std::atomic & is_cancelled) const { auto & merged_data = *variants[0]; auto method = merged_data.type; @@ -1762,8 +1762,8 @@ Block Aggregator::mergeAndConvertOneBucketToBlock( #define M(NAME) \ else if (method == AggregatedDataVariants::Type::NAME) \ { \ - mergeBucketImpl(variants, bucket, arena); \ - if (is_cancelled && is_cancelled->load(std::memory_order_seq_cst)) \ + mergeBucketImpl(variants, bucket, arena, is_cancelled); \ + if (is_cancelled.load(std::memory_order_seq_cst)) \ return {}; \ block = convertOneBucketToBlock(merged_data, *merged_data.NAME, arena, final, bucket); \ } @@ -2638,7 +2638,7 @@ void NO_INLINE Aggregator::mergeDataOnlyExistingKeysImpl( void NO_INLINE Aggregator::mergeWithoutKeyDataImpl( ManyAggregatedDataVariants & non_empty_data, - std::atomic * is_cancelled) const + std::atomic & is_cancelled) const { ThreadPool thread_pool{CurrentMetrics::AggregatorThreads, CurrentMetrics::AggregatorThreadsActive, CurrentMetrics::AggregatorThreadsScheduled, params.max_threads}; @@ -2746,7 +2746,7 @@ void NO_INLINE Aggregator::mergeSingleLevelDataImpl( template void NO_INLINE Aggregator::mergeBucketImpl( - ManyAggregatedDataVariants & data, Int32 bucket, Arena * arena, std::atomic * is_cancelled) const + ManyAggregatedDataVariants & data, Int32 bucket, Arena * arena, std::atomic & is_cancelled) const { /// We merge all aggregation results to the first. AggregatedDataVariantsPtr & res = data[0]; @@ -2756,7 +2756,7 @@ void NO_INLINE Aggregator::mergeBucketImpl( for (size_t result_num = 1, size = data.size(); result_num < size; ++result_num) { - if (is_cancelled && is_cancelled->load(std::memory_order_seq_cst)) + if (is_cancelled.load(std::memory_order_seq_cst)) return; AggregatedDataVariants & current = *data[result_num]; @@ -2958,7 +2958,7 @@ void NO_INLINE Aggregator::mergeStreamsImpl( void NO_INLINE Aggregator::mergeBlockWithoutKeyStreamsImpl( Block block, AggregatedDataVariants & result, - std::atomic * is_cancelled) const + std::atomic & is_cancelled) const { AggregateColumnsConstData aggregate_columns = params.makeAggregateColumnsData(block); mergeWithoutKeyStreamsImpl(result, 0, block.rows(), aggregate_columns, is_cancelled); @@ -2969,7 +2969,7 @@ void NO_INLINE Aggregator::mergeWithoutKeyStreamsImpl( size_t row_begin, size_t row_end, const AggregateColumnsConstData & aggregate_columns_data, - std::atomic * is_cancelled) const + std::atomic & is_cancelled) const { using namespace CurrentMetrics; @@ -3005,7 +3005,7 @@ void NO_INLINE Aggregator::mergeWithoutKeyStreamsImpl( } -bool Aggregator::mergeOnBlock(Block block, AggregatedDataVariants & result, bool & no_more_keys, std::atomic * is_cancelled) const +bool Aggregator::mergeOnBlock(Block block, AggregatedDataVariants & result, bool & no_more_keys, std::atomic & is_cancelled) const { /// `result` will destroy the states of aggregate functions in the destructor result.aggregator = this; @@ -3075,7 +3075,7 @@ bool Aggregator::mergeOnBlock(Block block, AggregatedDataVariants & result, bool } -void Aggregator::mergeBlocks(BucketToBlocks bucket_to_blocks, AggregatedDataVariants & result, size_t max_threads, std::atomic * is_cancelled) +void Aggregator::mergeBlocks(BucketToBlocks bucket_to_blocks, AggregatedDataVariants & result, size_t max_threads, std::atomic & is_cancelled) { if (bucket_to_blocks.empty()) return; @@ -3207,7 +3207,7 @@ void Aggregator::mergeBlocks(BucketToBlocks bucket_to_blocks, AggregatedDataVari } -Block Aggregator::mergeBlocks(BlocksList & blocks, bool final, std::atomic * is_cancelled) +Block Aggregator::mergeBlocks(BlocksList & blocks, bool final, std::atomic & is_cancelled) { if (blocks.empty()) return {}; diff --git a/src/Interpreters/Aggregator.h b/src/Interpreters/Aggregator.h index 4bce700a099..406d28597cf 100644 --- a/src/Interpreters/Aggregator.h +++ b/src/Interpreters/Aggregator.h @@ -269,7 +269,7 @@ public: bool mergeOnBlock(Block block, AggregatedDataVariants & result, bool & no_more_keys, - std::atomic * is_cancelled) const; + std::atomic & is_cancelled) const; void mergeOnBlockSmall( AggregatedDataVariants & result, @@ -283,7 +283,7 @@ public: size_t row_begin, size_t row_end, const AggregateColumnsConstData & aggregate_columns_data, - std::atomic * is_cancelled) const; + std::atomic & is_cancelled) const; /** Convert the aggregation data structure into a block. * If overflow_row = true, then aggregates for rows that are not included in max_rows_to_group_by are put in the first block. @@ -298,13 +298,13 @@ public: using BucketToBlocks = std::map; /// Merge partially aggregated blocks separated to buckets into one data structure. - void mergeBlocks(BucketToBlocks bucket_to_blocks, AggregatedDataVariants & result, size_t max_threads, std::atomic * is_cancelled); + void mergeBlocks(BucketToBlocks bucket_to_blocks, AggregatedDataVariants & result, size_t max_threads, std::atomic & is_cancelled); /// Merge several partially aggregated blocks into one. /// Precondition: for all blocks block.info.is_overflows flag must be the same. /// (either all blocks are from overflow data or none blocks are). /// The resulting block has the same value of is_overflows flag. - Block mergeBlocks(BlocksList & blocks, bool final, std::atomic * is_cancelled); + Block mergeBlocks(BlocksList & blocks, bool final, std::atomic & is_cancelled); /** Split block with partially-aggregated data to many blocks, as if two-level method of aggregation was used. * This is needed to simplify merging of that data with other results, that are already two-level. @@ -491,7 +491,7 @@ private: void mergeWithoutKeyDataImpl( ManyAggregatedDataVariants & non_empty_data, - std::atomic * is_cancelled) const; + std::atomic & is_cancelled) const; template void mergeSingleLevelDataImpl( @@ -546,7 +546,7 @@ private: Arena * arena, bool final, Int32 bucket, - std::atomic * is_cancelled = nullptr) const; + std::atomic & is_cancelled) const; Block prepareBlockAndFillWithoutKey(AggregatedDataVariants & data_variants, bool final, bool is_overflows) const; BlocksList prepareBlocksAndFillTwoLevel(AggregatedDataVariants & data_variants, bool final, ThreadPool * thread_pool) const; @@ -603,18 +603,18 @@ private: void mergeBlockWithoutKeyStreamsImpl( Block block, AggregatedDataVariants & result, - std::atomic * is_cancelled) const; + std::atomic & is_cancelled) const; void mergeWithoutKeyStreamsImpl( AggregatedDataVariants & result, size_t row_begin, size_t row_end, const AggregateColumnsConstData & aggregate_columns_data, - std::atomic * is_cancelled) const; + std::atomic & is_cancelled) const; template void mergeBucketImpl( - ManyAggregatedDataVariants & data, Int32 bucket, Arena * arena, std::atomic * is_cancelled = nullptr) const; + ManyAggregatedDataVariants & data, Int32 bucket, Arena * arena, std::atomic & is_cancelled) const; template void convertBlockToTwoLevelImpl( diff --git a/src/Processors/Transforms/AggregatingInOrderTransform.cpp b/src/Processors/Transforms/AggregatingInOrderTransform.cpp index f959b2b01b4..9ffe15d0f85 100644 --- a/src/Processors/Transforms/AggregatingInOrderTransform.cpp +++ b/src/Processors/Transforms/AggregatingInOrderTransform.cpp @@ -160,7 +160,7 @@ void AggregatingInOrderTransform::consume(Chunk chunk) if (group_by_key) params->aggregator.mergeOnBlockSmall(variants, key_begin, key_end, aggregate_columns_data, key_columns_raw); else - params->aggregator.mergeOnIntervalWithoutKey(variants, key_begin, key_end, aggregate_columns_data, &is_cancelled); + params->aggregator.mergeOnIntervalWithoutKey(variants, key_begin, key_end, aggregate_columns_data, is_cancelled); } else { diff --git a/src/Processors/Transforms/AggregatingTransform.cpp b/src/Processors/Transforms/AggregatingTransform.cpp index 767448edc64..b48d435720a 100644 --- a/src/Processors/Transforms/AggregatingTransform.cpp +++ b/src/Processors/Transforms/AggregatingTransform.cpp @@ -132,7 +132,7 @@ protected: return {}; } - Block block = params->aggregator.mergeAndConvertOneBucketToBlock(*data, arena, params->final, bucket_num, &shared_data->is_cancelled); + Block block = params->aggregator.mergeAndConvertOneBucketToBlock(*data, arena, params->final, bucket_num, shared_data->is_cancelled); Chunk chunk = convertToChunk(block); shared_data->is_bucket_processed[bucket_num] = true; @@ -350,7 +350,7 @@ public: for (auto & input : inputs) input.close(); - shared_data->is_cancelled.store(true); + shared_data->is_cancelled.store(true, std::memory_order_seq_cst); return Status::Finished; } @@ -472,7 +472,7 @@ private: if (first->type == AggregatedDataVariants::Type::without_key || params->params.overflow_row) { - params->aggregator.mergeWithoutKeyDataImpl(*data, &shared_data->is_cancelled); + params->aggregator.mergeWithoutKeyDataImpl(*data, shared_data->is_cancelled); auto block = params->aggregator.prepareBlockAndFillWithoutKey( *first, params->final, first->type != AggregatedDataVariants::Type::without_key); @@ -692,7 +692,7 @@ void AggregatingTransform::consume(Chunk chunk) { auto block = getInputs().front().getHeader().cloneWithColumns(chunk.detachColumns()); block = materializeBlock(block); - if (!params->aggregator.mergeOnBlock(block, variants, no_more_keys, &is_cancelled)) + if (!params->aggregator.mergeOnBlock(block, variants, no_more_keys, is_cancelled)) is_consume_finished = true; } else @@ -712,7 +712,7 @@ void AggregatingTransform::initGenerate() if (variants.empty() && params->params.keys_size == 0 && !params->params.empty_result_for_aggregation_by_empty_set) { if (params->params.only_merge) - params->aggregator.mergeOnBlock(getInputs().front().getHeader(), variants, no_more_keys, &is_cancelled); + params->aggregator.mergeOnBlock(getInputs().front().getHeader(), variants, no_more_keys, is_cancelled); else params->aggregator.executeOnBlock(getInputs().front().getHeader(), variants, key_columns, aggregate_columns, no_more_keys); } diff --git a/src/Processors/Transforms/MergingAggregatedMemoryEfficientTransform.cpp b/src/Processors/Transforms/MergingAggregatedMemoryEfficientTransform.cpp index 3bfd7874ac7..fc40c6894bb 100644 --- a/src/Processors/Transforms/MergingAggregatedMemoryEfficientTransform.cpp +++ b/src/Processors/Transforms/MergingAggregatedMemoryEfficientTransform.cpp @@ -363,7 +363,7 @@ void MergingAggregatedBucketTransform::transform(Chunk & chunk) res_info->chunk_num = chunks_to_merge->chunk_num; chunk.setChunkInfo(std::move(res_info)); - auto block = params->aggregator.mergeBlocks(blocks_list, params->final, &is_cancelled); + auto block = params->aggregator.mergeBlocks(blocks_list, params->final, is_cancelled); if (!required_sort_description.empty()) sortBlock(block, required_sort_description); diff --git a/src/Processors/Transforms/MergingAggregatedTransform.cpp b/src/Processors/Transforms/MergingAggregatedTransform.cpp index 64207093568..ad723da7527 100644 --- a/src/Processors/Transforms/MergingAggregatedTransform.cpp +++ b/src/Processors/Transforms/MergingAggregatedTransform.cpp @@ -72,7 +72,7 @@ Chunk MergingAggregatedTransform::generate() next_block = blocks.begin(); /// TODO: this operation can be made async. Add async for IAccumulatingTransform. - params->aggregator.mergeBlocks(std::move(bucket_to_blocks), data_variants, max_threads, &is_cancelled); + params->aggregator.mergeBlocks(std::move(bucket_to_blocks), data_variants, max_threads, is_cancelled); blocks = params->aggregator.convertToBlocks(data_variants, params->final, max_threads); next_block = blocks.begin(); } diff --git a/src/Processors/Transforms/RollupTransform.cpp b/src/Processors/Transforms/RollupTransform.cpp index 20ee91a203a..bc5a7aeb7bf 100644 --- a/src/Processors/Transforms/RollupTransform.cpp +++ b/src/Processors/Transforms/RollupTransform.cpp @@ -57,7 +57,8 @@ Chunk GroupByModifierTransform::merge(Chunks && chunks, bool is_input, bool fina for (auto & chunk : chunks) blocks.emplace_back(header.cloneWithColumns(chunk.detachColumns())); - auto current_block = is_input ? params->aggregator.mergeBlocks(blocks, final, &is_cancelled) : output_aggregator->mergeBlocks(blocks, final, &is_cancelled); + auto & aggregator = is_input ? params->aggregator : *output_aggregator; + auto current_block = aggregator.mergeBlocks(blocks, final, is_cancelled); auto num_rows = current_block.rows(); return Chunk(current_block.getColumns(), num_rows); } From f4c5be3bb4bc999c65eb040ced98be88982d12f0 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Thu, 28 Mar 2024 14:04:31 +0100 Subject: [PATCH 062/150] Fix CREATE TABLE w/o columns definition for ReplicatedMergeTree Signed-off-by: Azat Khuzhin --- src/Storages/MergeTree/registerStorageMergeTree.cpp | 4 ++-- .../03032_rmt_create_columns_from_replica.reference | 7 +++++++ .../0_stateless/03032_rmt_create_columns_from_replica.sql | 5 +++++ 3 files changed, 14 insertions(+), 2 deletions(-) create mode 100644 tests/queries/0_stateless/03032_rmt_create_columns_from_replica.reference create mode 100644 tests/queries/0_stateless/03032_rmt_create_columns_from_replica.sql diff --git a/src/Storages/MergeTree/registerStorageMergeTree.cpp b/src/Storages/MergeTree/registerStorageMergeTree.cpp index e89547952d0..9a3c17923d8 100644 --- a/src/Storages/MergeTree/registerStorageMergeTree.cpp +++ b/src/Storages/MergeTree/registerStorageMergeTree.cpp @@ -588,7 +588,7 @@ static StoragePtr create(const StorageFactory::Arguments & args) FunctionNameNormalizer().visit(partition_key.get()); auto primary_key_asts = metadata.primary_key.expression_list_ast->children; metadata.minmax_count_projection.emplace(ProjectionDescription::getMinMaxCountProjection( - args.columns, partition_key, minmax_columns, primary_key_asts, context)); + columns, partition_key, minmax_columns, primary_key_asts, context)); if (args.storage_def->sample_by) metadata.sampling_key = KeyDescription::getKeyFromAST(args.storage_def->sample_by->ptr(), metadata.columns, context); @@ -697,7 +697,7 @@ static StoragePtr create(const StorageFactory::Arguments & args) FunctionNameNormalizer().visit(partition_key.get()); auto primary_key_asts = metadata.primary_key.expression_list_ast->children; metadata.minmax_count_projection.emplace(ProjectionDescription::getMinMaxCountProjection( - args.columns, partition_key, minmax_columns, primary_key_asts, context)); + columns, partition_key, minmax_columns, primary_key_asts, context)); const auto * ast = engine_args[arg_num]->as(); if (ast && ast->value.getType() == Field::Types::UInt64) diff --git a/tests/queries/0_stateless/03032_rmt_create_columns_from_replica.reference b/tests/queries/0_stateless/03032_rmt_create_columns_from_replica.reference new file mode 100644 index 00000000000..6dda968659d --- /dev/null +++ b/tests/queries/0_stateless/03032_rmt_create_columns_from_replica.reference @@ -0,0 +1,7 @@ +CREATE TABLE default.data_r2 +( + `key` Int32 +) +ENGINE = ReplicatedMergeTree('/tables/default', 'r2') +ORDER BY tuple() +SETTINGS index_granularity = 8192 diff --git a/tests/queries/0_stateless/03032_rmt_create_columns_from_replica.sql b/tests/queries/0_stateless/03032_rmt_create_columns_from_replica.sql new file mode 100644 index 00000000000..e08db0bdacf --- /dev/null +++ b/tests/queries/0_stateless/03032_rmt_create_columns_from_replica.sql @@ -0,0 +1,5 @@ +drop table if exists data_r1; +drop table if exists data_r2; +create table data_r1 (key Int) engine=ReplicatedMergeTree('/tables/{database}', 'r1') order by tuple(); +create table data_r2 engine=ReplicatedMergeTree('/tables/{database}', 'r2') order by tuple(); +show create data_r2 format LineAsString; From 1a75d3ed46d0a2fe6a596afbc27ae20ff97427fb Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Thu, 28 Mar 2024 13:58:58 +0000 Subject: [PATCH 063/150] Updating test. --- .../02802_with_cube_with_totals.reference | 30 +++++++++++++++++++ .../02802_with_cube_with_totals.sql | 1 + 2 files changed, 31 insertions(+) diff --git a/tests/queries/0_stateless/02802_with_cube_with_totals.reference b/tests/queries/0_stateless/02802_with_cube_with_totals.reference index c7b7b570456..206c32e562b 100644 --- a/tests/queries/0_stateless/02802_with_cube_with_totals.reference +++ b/tests/queries/0_stateless/02802_with_cube_with_totals.reference @@ -1,5 +1,35 @@ ((2147483648,(-0,1.1754943508222875e-38,2147483646,'-9223372036854775808',NULL))) 0 ((2147483648,(-0,1.1754943508222875e-38,2147483646,'-9223372036854775808',NULL))) 0 +((2147483648,(-0,1.1754943508222875e-38,2147483646,'-9223372036854775808',NULL))) 0 +((2147483648,(-0,1.1754943508222875e-38,2147483646,'-9223372036854775808',NULL))) 0 +((2147483648,(-0,1.1754943508222875e-38,2147483646,'-9223372036854775808',NULL))) 0 +((2147483648,(-0,1.1754943508222875e-38,2147483646,'-9223372036854775808',NULL))) 0 +((2147483648,(-0,1.1754943508222875e-38,2147483646,'-9223372036854775808',NULL))) 0 +((2147483648,(-0,1.1754943508222875e-38,2147483646,'-9223372036854775808',NULL))) 0 +((2147483648,(-0,1.1754943508222875e-38,2147483646,'-9223372036854775808',NULL))) 0 +((2147483648,(-0,1.1754943508222875e-38,2147483646,'-9223372036854775808',NULL))) 0 +((2147483648,(-0,1.1754943508222875e-38,2147483646,'-9223372036854775808',NULL))) 0 +((2147483648,(-0,1.1754943508222875e-38,2147483646,'-9223372036854775808',NULL))) 0 +((2147483648,(-0,1.1754943508222875e-38,2147483646,'-9223372036854775808',NULL))) 0 +((2147483648,(-0,1.1754943508222875e-38,2147483646,'-9223372036854775808',NULL))) 0 +((2147483648,(-0,1.1754943508222875e-38,2147483646,'-9223372036854775808',NULL))) 0 +((2147483648,(-0,1.1754943508222875e-38,2147483646,'-9223372036854775808',NULL))) 0 +((2147483648,(-0,1.1754943508222875e-38,2147483646,'-9223372036854775808',NULL))) 0 +((2147483648,(-0,1.1754943508222875e-38,2147483646,'-9223372036854775808',NULL))) 0 +((2147483648,(-0,1.1754943508222875e-38,2147483646,'-9223372036854775808',NULL))) 0 +((2147483648,(-0,1.1754943508222875e-38,2147483646,'-9223372036854775808',NULL))) 0 +((2147483648,(-0,1.1754943508222875e-38,2147483646,'-9223372036854775808',NULL))) 0 +((2147483648,(-0,1.1754943508222875e-38,2147483646,'-9223372036854775808',NULL))) 0 +((2147483648,(-0,1.1754943508222875e-38,2147483646,'-9223372036854775808',NULL))) 0 +((2147483648,(-0,1.1754943508222875e-38,2147483646,'-9223372036854775808',NULL))) 0 +((2147483648,(-0,1.1754943508222875e-38,2147483646,'-9223372036854775808',NULL))) 0 +((2147483648,(-0,1.1754943508222875e-38,2147483646,'-9223372036854775808',NULL))) 0 +((2147483648,(-0,1.1754943508222875e-38,2147483646,'-9223372036854775808',NULL))) 0 +((2147483648,(-0,1.1754943508222875e-38,2147483646,'-9223372036854775808',NULL))) 0 +((2147483648,(-0,1.1754943508222875e-38,2147483646,'-9223372036854775808',NULL))) 0 +((2147483648,(-0,1.1754943508222875e-38,2147483646,'-9223372036854775808',NULL))) 0 +((2147483648,(-0,1.1754943508222875e-38,2147483646,'-9223372036854775808',NULL))) 0 +((2147483648,(-0,1.1754943508222875e-38,2147483646,'-9223372036854775808',NULL))) 0 ((2147483648,(-0,1.1754943508222875e-38,2147483646,'-9223372036854775808',NULL))) 0 \N diff --git a/tests/queries/0_stateless/02802_with_cube_with_totals.sql b/tests/queries/0_stateless/02802_with_cube_with_totals.sql index 77adb68eb4b..168e4d61b68 100644 --- a/tests/queries/0_stateless/02802_with_cube_with_totals.sql +++ b/tests/queries/0_stateless/02802_with_cube_with_totals.sql @@ -1,2 +1,3 @@ +set allow_experimental_analyzer=1; SELECT tuple((2147483648, (-0., 1.1754943508222875e-38, 2147483646, '-9223372036854775808', NULL))), toInt128(0.0001) GROUP BY ((256, toInt64(1.1754943508222875e-38), NULL), NULL, -0., ((65535, '-92233720368547758.07'), 0.9999), tuple(((1., 3.4028234663852886e38, '1', 0.5), NULL, tuple('0.1')))) WITH CUBE WITH TOTALS; SELECT NULL GROUP BY toUUID(NULL, '0', NULL, '0.0000065535'), 1 WITH CUBE WITH TOTALS; From 863f479ff025532ea8a601c7081b65931197b98e Mon Sep 17 00:00:00 2001 From: Max Kainov Date: Thu, 28 Mar 2024 14:00:04 +0000 Subject: [PATCH 064/150] CI: fix for timeout env variable --- tests/ci/ci.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/ci/ci.py b/tests/ci/ci.py index 17552377971..0f57d3ba991 100644 --- a/tests/ci/ci.py +++ b/tests/ci/ci.py @@ -1761,9 +1761,10 @@ def _run_test(job_name: str, run_command: str) -> int: run_command or CI_CONFIG.get_job_config(job_name).run_command ), "Run command must be provided as input argument or be configured in job config" + if CI_CONFIG.get_job_config(job_name).timeout: + os.environ["KILL_TIMEOUT"] = str(CI_CONFIG.get_job_config(job_name).timeout) + if not run_command: - if CI_CONFIG.get_job_config(job_name).timeout: - os.environ["KILL_TIMEOUT"] = str(CI_CONFIG.get_job_config(job_name).timeout) run_command = "/".join( (os.path.dirname(__file__), CI_CONFIG.get_job_config(job_name).run_command) ) From 4b990e732cd0e0cb30e03b4b930ce3a85ef448c4 Mon Sep 17 00:00:00 2001 From: vdimir Date: Thu, 28 Mar 2024 15:22:26 +0100 Subject: [PATCH 065/150] Fix style --- src/Storages/StorageSet.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/StorageSet.cpp b/src/Storages/StorageSet.cpp index 8561c3f3aa8..54218351cf1 100644 --- a/src/Storages/StorageSet.cpp +++ b/src/Storages/StorageSet.cpp @@ -248,7 +248,7 @@ void StorageSetOrJoinBase::restore() static const auto file_suffix_size = strlen(".bin"); using FilePriority = std::pair; - std::priority_queue, std::greater> backup_files; + std::priority_queue, std::greater<>> backup_files; for (auto dir_it{disk->iterateDirectory(path)}; dir_it->isValid(); dir_it->next()) { const auto & name = dir_it->name(); From fb235c0840d380fdf2b7107dc242c18a7f5721a5 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Thu, 28 Mar 2024 14:44:45 +0000 Subject: [PATCH 066/150] Fix optimize_arithmetic_operations_in_aggregate_functions optimization which could change result type. --- .../Passes/RewriteSumFunctionWithSumAndCountPass.cpp | 10 ++++++++-- ..._functions_arithmetic_operations_pass_fix.reference | 1 + ...regate_functions_arithmetic_operations_pass_fix.sql | 2 ++ 3 files changed, 11 insertions(+), 2 deletions(-) diff --git a/src/Analyzer/Passes/RewriteSumFunctionWithSumAndCountPass.cpp b/src/Analyzer/Passes/RewriteSumFunctionWithSumAndCountPass.cpp index 3c93bf9e1bf..917256bf4b1 100644 --- a/src/Analyzer/Passes/RewriteSumFunctionWithSumAndCountPass.cpp +++ b/src/Analyzer/Passes/RewriteSumFunctionWithSumAndCountPass.cpp @@ -5,6 +5,7 @@ #include #include #include +#include #include namespace DB @@ -83,7 +84,7 @@ public: rhs->getArguments().getNodes().push_back(rhs_count); resolveOrdinaryFunctionNode(*rhs, rhs->getFunctionName()); - const auto new_node = std::make_shared(Poco::toLower(func_plus_minus_node->getFunctionName())); + auto new_node = std::make_shared(Poco::toLower(func_plus_minus_node->getFunctionName())); if (column_id == 0) new_node->getArguments().getNodes() = {lhs, rhs}; else if (column_id == 1) @@ -93,7 +94,12 @@ public: if (!new_node) return; - node = new_node; + QueryTreeNodePtr res = std::move(new_node); + + if (!res->getResultType()->equals(*function_node->getResultType())) + res = createCastFunction(res, function_node->getResultType(), getContext()); + + node = std::move(res); } diff --git a/tests/queries/0_stateless/02498_analyzer_aggregate_functions_arithmetic_operations_pass_fix.reference b/tests/queries/0_stateless/02498_analyzer_aggregate_functions_arithmetic_operations_pass_fix.reference index 43282d09bab..5028adae32e 100644 --- a/tests/queries/0_stateless/02498_analyzer_aggregate_functions_arithmetic_operations_pass_fix.reference +++ b/tests/queries/0_stateless/02498_analyzer_aggregate_functions_arithmetic_operations_pass_fix.reference @@ -1,2 +1,3 @@ 4 2 100 99 +(3.22,1) diff --git a/tests/queries/0_stateless/02498_analyzer_aggregate_functions_arithmetic_operations_pass_fix.sql b/tests/queries/0_stateless/02498_analyzer_aggregate_functions_arithmetic_operations_pass_fix.sql index 5eba14ea528..8491018eb72 100644 --- a/tests/queries/0_stateless/02498_analyzer_aggregate_functions_arithmetic_operations_pass_fix.sql +++ b/tests/queries/0_stateless/02498_analyzer_aggregate_functions_arithmetic_operations_pass_fix.sql @@ -14,3 +14,5 @@ INSERT INTO test_table VALUES (1, 1); SELECT sum((2 * id) as func), func FROM test_table GROUP BY id; SELECT max(100-number), min(100-number) FROM numbers(2); + +select (sum(toDecimal64(2.11, 15) - number), 1) FROM numbers(2); From 56e95c84c17c9201c35af71408f0684683d74f32 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Thu, 28 Mar 2024 16:45:02 +0100 Subject: [PATCH 067/150] Update DatabaseOnDisk.cpp --- src/Databases/DatabaseOnDisk.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Databases/DatabaseOnDisk.cpp b/src/Databases/DatabaseOnDisk.cpp index 550f1a756cb..d8acfb5fa01 100644 --- a/src/Databases/DatabaseOnDisk.cpp +++ b/src/Databases/DatabaseOnDisk.cpp @@ -109,7 +109,7 @@ std::pair createTableFromAST( } else { - columns = InterpreterCreateQuery::getColumnsDescription(*ast_create_query.columns_list->columns, context, LoadingStrictnessLevel::ATTACH); + columns = InterpreterCreateQuery::getColumnsDescription(*ast_create_query.columns_list->columns, context, mode); constraints = InterpreterCreateQuery::getConstraintsDescription(ast_create_query.columns_list->constraints); } } From 0b85d8570bf58e193f1df41a38f2b82be5ac96a6 Mon Sep 17 00:00:00 2001 From: vdimir Date: Thu, 28 Mar 2024 16:46:44 +0100 Subject: [PATCH 068/150] Resubmit 'Update invalidate_query_response on dictionary startup' --- src/Interpreters/ExternalLoader.cpp | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/Interpreters/ExternalLoader.cpp b/src/Interpreters/ExternalLoader.cpp index bd56a540128..73dacfacf79 100644 --- a/src/Interpreters/ExternalLoader.cpp +++ b/src/Interpreters/ExternalLoader.cpp @@ -996,6 +996,14 @@ private: if (!new_object && !new_exception) throw Exception(ErrorCodes::LOGICAL_ERROR, "No object created and no exception raised for {}", type_name); + if (!info->object && new_object) + { + /// If we loaded the object for the first time then we should set `invalidate_query_response` to the current value. + /// Otherwise we will immediately try to reload the object again despite the fact that it was just loaded. + bool is_modified = new_object->isModified(); + LOG_TRACE(log, "Object '{}' was{} modified", name, (is_modified ? "" : " not")); + } + /// Saving the result of the loading. { LoadingGuardForAsyncLoad lock(async, mutex); From c834a11b653e9a67abe990d9db24dfa998aa9aa5 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Thu, 28 Mar 2024 16:32:12 +0000 Subject: [PATCH 069/150] Ignore IfChainToMultiIfPass if returned type changed. --- src/Analyzer/Passes/IfChainToMultiIfPass.cpp | 6 ++++++ .../0_stateless/01388_multi_if_optimization.reference | 2 ++ tests/queries/0_stateless/01388_multi_if_optimization.sql | 3 +++ 3 files changed, 11 insertions(+) diff --git a/src/Analyzer/Passes/IfChainToMultiIfPass.cpp b/src/Analyzer/Passes/IfChainToMultiIfPass.cpp index 70b717f3108..beb2247607e 100644 --- a/src/Analyzer/Passes/IfChainToMultiIfPass.cpp +++ b/src/Analyzer/Passes/IfChainToMultiIfPass.cpp @@ -65,6 +65,12 @@ public: auto multi_if_function = std::make_shared("multiIf"); multi_if_function->getArguments().getNodes() = std::move(multi_if_arguments); multi_if_function->resolveAsFunction(multi_if_function_ptr->build(multi_if_function->getArgumentColumns())); + + /// Ignore if returned type changed. + /// Example : SELECT now64(if(Null, NULL, if(Null, nan, toFloat64(number))), Null) FROM numbers(2) + if (!multi_if_function->getResultType()->equals(*function_node->getResultType())) + return; + node = std::move(multi_if_function); } diff --git a/tests/queries/0_stateless/01388_multi_if_optimization.reference b/tests/queries/0_stateless/01388_multi_if_optimization.reference index 6dbe0f0d96f..9c2990274fe 100644 --- a/tests/queries/0_stateless/01388_multi_if_optimization.reference +++ b/tests/queries/0_stateless/01388_multi_if_optimization.reference @@ -2,3 +2,5 @@ SELECT if(number = 1, \'hello\', if(number = 2, \'world\', \'xyz\')) FROM numbers(10) SELECT multiIf(number = 1, \'hello\', number = 2, \'world\', \'xyz\') FROM numbers(10) +\N +\N diff --git a/tests/queries/0_stateless/01388_multi_if_optimization.sql b/tests/queries/0_stateless/01388_multi_if_optimization.sql index 345fcfb6fcc..0ad6df9fdfa 100644 --- a/tests/queries/0_stateless/01388_multi_if_optimization.sql +++ b/tests/queries/0_stateless/01388_multi_if_optimization.sql @@ -3,3 +3,6 @@ SET optimize_if_chain_to_multiif = 0; EXPLAIN SYNTAX SELECT number = 1 ? 'hello' : (number = 2 ? 'world' : 'xyz') FROM numbers(10); SET optimize_if_chain_to_multiif = 1; EXPLAIN SYNTAX SELECT number = 1 ? 'hello' : (number = 2 ? 'world' : 'xyz') FROM numbers(10); + +-- fuzzed +SELECT now64(if(Null, NULL, if(Null, nan, toFloat64(number))), Null) FROM numbers(2); From be3c498833566f20e47afe1dc65fdf58af9d7ee1 Mon Sep 17 00:00:00 2001 From: Alexander Gololobov Date: Thu, 28 Mar 2024 18:22:26 +0100 Subject: [PATCH 070/150] Support more that 255 replicas in system table --- src/Storages/System/StorageSystemReplicas.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Storages/System/StorageSystemReplicas.cpp b/src/Storages/System/StorageSystemReplicas.cpp index 35550de11cb..e4b8d78a3a6 100644 --- a/src/Storages/System/StorageSystemReplicas.cpp +++ b/src/Storages/System/StorageSystemReplicas.cpp @@ -235,8 +235,8 @@ StorageSystemReplicas::StorageSystemReplicas(const StorageID & table_id_) "If log_pointer is much smaller than log_max_index, something is wrong."}, { "last_queue_update", std::make_shared(), "When the queue was updated last time."}, { "absolute_delay", std::make_shared(), "How big lag in seconds the current replica has."}, - { "total_replicas", std::make_shared(), "The total number of known replicas of this table."}, - { "active_replicas", std::make_shared(), "The number of replicas of this table that have a session in ClickHouse Keeper (i.e., the number of functioning replicas)."}, + { "total_replicas", std::make_shared(), "The total number of known replicas of this table."}, + { "active_replicas", std::make_shared(), "The number of replicas of this table that have a session in ClickHouse Keeper (i.e., the number of functioning replicas)."}, { "lost_part_count", std::make_shared(), "The number of data parts lost in the table by all replicas in total since table creation. Value is persisted in ClickHouse Keeper and can only increase."}, { "last_queue_update_exception", std::make_shared(), "When the queue contains broken entries. Especially important when ClickHouse breaks backward compatibility between versions and log entries written by newer versions aren't parseable by old versions."}, { "zookeeper_exception", std::make_shared(), "The last exception message, got if the error happened when fetching the info from ClickHouse Keeper."}, From a94143ee194176ead64603f8be9d54c8548fc6d8 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Thu, 28 Mar 2024 18:26:03 +0100 Subject: [PATCH 071/150] Fix stress tests for analyzer due to experimental WINDOW VIEW (by disabling it) CI found [1]: 2024.03.28 13:16:00.797344 [ 99740 ] {} Application: Caught exception while loading metadata: Code: 695. DB::Exception: Load job 'load table 01069_window_view_proc_tumble_watch.wv' failed: Code: 1. DB::Exception: Experimental WINDOW VIEW feature is not supported with new infrastructure for query analysis (the setting 'allow_experimental_analyzer'): Cannot attach table `01069_window_view_proc_tumble_watch`.`wv` from metadata file /var/lib/clickhouse/store/dd8/dd82a4f5-5485-4747-9172-8976d1194c54/wv.sql from query ATTACH WINDOW VIEW `01069_window_view_proc_tumble_watch`.wv UUID '35cc7d27-df3b-4569-9cb9-2ccf6cb1ff4c' (`count` UInt64) ENGINE = Memory AS SELECT count(a) AS count FROM `01069_window_view_proc_tumble_watch`.mt GROUP BY tumble(timestamp, toIntervalSecond('1'), 'US/Samoa') AS wid. (UNSUPPORTED_METHOD), Stack trace (when copying this message, always include the lines below): [1]: https://s3.amazonaws.com/clickhouse-test-reports/61997/ced095394b6fb6d50ed8b6834bd5911ad4702c6e/stateless_tests__tsan__s3_storage__[5_5].html Signed-off-by: Azat Khuzhin --- docker/test/stress/run.sh | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/docker/test/stress/run.sh b/docker/test/stress/run.sh index cd7c1243a4a..6c6caf872e9 100644 --- a/docker/test/stress/run.sh +++ b/docker/test/stress/run.sh @@ -87,6 +87,25 @@ if [ "$cache_policy" = "SLRU" ]; then mv /etc/clickhouse-server/config.d/storage_conf.xml.tmp /etc/clickhouse-server/config.d/storage_conf.xml fi +# Disable experimental WINDOW VIEW tests for stress tests, since they may be +# created with old analyzer and then, after server restart it will refuse to +# start. +# FIXME: remove once the support for WINDOW VIEW will be implemented in analyzer. +sudo cat /etc/clickhouse-server/users.d/stress_tests_overrides.xml < + + + false + + + + + + + + +EOL + start_server clickhouse-client --query "SHOW TABLES FROM datasets" From 54d7a6041e36d6d8eaf793f0a931e34b6bd8e103 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Thu, 28 Mar 2024 17:35:38 +0000 Subject: [PATCH 072/150] Fix type for ConvertInToEqualPass --- src/Analyzer/Passes/ConvertInToEqualPass.cpp | 5 ++++- tests/queries/0_stateless/03013_optimize_in_to_equal.sql | 3 +++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/src/Analyzer/Passes/ConvertInToEqualPass.cpp b/src/Analyzer/Passes/ConvertInToEqualPass.cpp index 66a37fea5bd..6980983486a 100644 --- a/src/Analyzer/Passes/ConvertInToEqualPass.cpp +++ b/src/Analyzer/Passes/ConvertInToEqualPass.cpp @@ -40,7 +40,10 @@ public: return ; auto result_func_name = MAPPING.at(func_node->getFunctionName()); auto equal = std::make_shared(result_func_name); - QueryTreeNodes arguments{column_node->clone(), constant_node->clone()}; + QueryTreeNodes arguments{ + column_node->clone(), + std::make_shared(constant_node->getValue(), removeNullable(constant_node->getResultType())) + }; equal->getArguments().getNodes() = std::move(arguments); FunctionOverloadResolverPtr resolver; bool decimal_check_overflow = getContext()->getSettingsRef().decimal_check_overflow; diff --git a/tests/queries/0_stateless/03013_optimize_in_to_equal.sql b/tests/queries/0_stateless/03013_optimize_in_to_equal.sql index ba6eb5d4f5f..e0eaa84cb8d 100644 --- a/tests/queries/0_stateless/03013_optimize_in_to_equal.sql +++ b/tests/queries/0_stateless/03013_optimize_in_to_equal.sql @@ -27,3 +27,6 @@ select '-------------------'; explain query tree select * from test where x not in (NULL); select '-------------------'; explain query tree select * from test where x in (NULL); +select '-------------------'; +--- fuzzed +SELECT number FROM numbers(2) WHERE arrayExists(_ -> (_ IN toNullable(4294967290)), [number]); From 8758d7f3a8ebbdb4fa0bb071e4f23695fc53215e Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Thu, 28 Mar 2024 18:19:40 +0000 Subject: [PATCH 073/150] Fixing test. --- src/Analyzer/Passes/ConvertInToEqualPass.cpp | 7 +++---- .../0_stateless/03013_optimize_in_to_equal.reference | 1 + 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/Analyzer/Passes/ConvertInToEqualPass.cpp b/src/Analyzer/Passes/ConvertInToEqualPass.cpp index 6980983486a..b204d2fb922 100644 --- a/src/Analyzer/Passes/ConvertInToEqualPass.cpp +++ b/src/Analyzer/Passes/ConvertInToEqualPass.cpp @@ -40,10 +40,9 @@ public: return ; auto result_func_name = MAPPING.at(func_node->getFunctionName()); auto equal = std::make_shared(result_func_name); - QueryTreeNodes arguments{ - column_node->clone(), - std::make_shared(constant_node->getValue(), removeNullable(constant_node->getResultType())) - }; + auto new_const = std::make_shared(constant_node->getValue(), removeNullable(constant_node->getResultType())); + new_const->getSourceExpression() = constant_node->getSourceExpression(); + QueryTreeNodes arguments{column_node->clone(), new_const}; equal->getArguments().getNodes() = std::move(arguments); FunctionOverloadResolverPtr resolver; bool decimal_check_overflow = getContext()->getSettingsRef().decimal_check_overflow; diff --git a/tests/queries/0_stateless/03013_optimize_in_to_equal.reference b/tests/queries/0_stateless/03013_optimize_in_to_equal.reference index 93ac91bd957..7d1118b7730 100644 --- a/tests/queries/0_stateless/03013_optimize_in_to_equal.reference +++ b/tests/queries/0_stateless/03013_optimize_in_to_equal.reference @@ -186,3 +186,4 @@ QUERY id: 0 LIST id: 6, nodes: 2 COLUMN id: 7, column_name: x, result_type: String, source_id: 3 CONSTANT id: 8, constant_value: NULL, constant_value_type: Nullable(Nothing) +------------------- From fd01a1227699f786d9c977cfda3512ed0fef33ef Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Thu, 28 Mar 2024 18:26:15 +0000 Subject: [PATCH 074/150] Fixing another case with grouping set --- .../Passes/GroupingFunctionsResolvePass.cpp | 2 +- src/Processors/QueryPlan/AggregatingStep.cpp | 26 +++++++++++-------- ...up_by_use_nulls_analyzer_crashes.reference | 10 +++++++ ...23_group_by_use_nulls_analyzer_crashes.sql | 2 ++ 4 files changed, 28 insertions(+), 12 deletions(-) diff --git a/src/Analyzer/Passes/GroupingFunctionsResolvePass.cpp b/src/Analyzer/Passes/GroupingFunctionsResolvePass.cpp index cf0fb824b4d..085519c7220 100644 --- a/src/Analyzer/Passes/GroupingFunctionsResolvePass.cpp +++ b/src/Analyzer/Passes/GroupingFunctionsResolvePass.cpp @@ -146,7 +146,7 @@ void resolveGroupingFunctions(QueryTreeNodePtr & query_node, ContextPtr context) if (query_node_typed.hasGroupBy()) { /// It is expected by execution layer that if there are only 1 grouping set it will be removed - if (query_node_typed.isGroupByWithGroupingSets() && query_node_typed.getGroupBy().getNodes().size() == 1) + if (query_node_typed.isGroupByWithGroupingSets() && query_node_typed.getGroupBy().getNodes().size() == 1 && !context->getSettingsRef().group_by_use_nulls) { auto grouping_set_list_node = query_node_typed.getGroupBy().getNodes().front(); auto & grouping_set_list_node_typed = grouping_set_list_node->as(); diff --git a/src/Processors/QueryPlan/AggregatingStep.cpp b/src/Processors/QueryPlan/AggregatingStep.cpp index a76bacdd97b..74f293e5682 100644 --- a/src/Processors/QueryPlan/AggregatingStep.cpp +++ b/src/Processors/QueryPlan/AggregatingStep.cpp @@ -191,20 +191,24 @@ void AggregatingStep::transformPipeline(QueryPipelineBuilder & pipeline, const B const size_t streams = pipeline.getNumStreams(); auto input_header = pipeline.getHeader(); - pipeline.transform([&](OutputPortRawPtrs ports) + + if (grouping_sets_size > 1) { - Processors copiers; - copiers.reserve(ports.size()); - - for (auto * port : ports) + pipeline.transform([&](OutputPortRawPtrs ports) { - auto copier = std::make_shared(input_header, grouping_sets_size); - connect(*port, copier->getInputPort()); - copiers.push_back(copier); - } + Processors copiers; + copiers.reserve(ports.size()); - return copiers; - }); + for (auto * port : ports) + { + auto copier = std::make_shared(input_header, grouping_sets_size); + connect(*port, copier->getInputPort()); + copiers.push_back(copier); + } + + return copiers; + }); + } pipeline.transform([&](OutputPortRawPtrs ports) { diff --git a/tests/queries/0_stateless/03023_group_by_use_nulls_analyzer_crashes.reference b/tests/queries/0_stateless/03023_group_by_use_nulls_analyzer_crashes.reference index 9d8381407fb..4081b82a8f5 100644 --- a/tests/queries/0_stateless/03023_group_by_use_nulls_analyzer_crashes.reference +++ b/tests/queries/0_stateless/03023_group_by_use_nulls_analyzer_crashes.reference @@ -50,3 +50,13 @@ ([7]) ([8]) ([9]) +(0) +(1) +(2) +(3) +(4) +(5) +(6) +(7) +(8) +(9) diff --git a/tests/queries/0_stateless/03023_group_by_use_nulls_analyzer_crashes.sql b/tests/queries/0_stateless/03023_group_by_use_nulls_analyzer_crashes.sql index 670c4fb1284..012da547581 100644 --- a/tests/queries/0_stateless/03023_group_by_use_nulls_analyzer_crashes.sql +++ b/tests/queries/0_stateless/03023_group_by_use_nulls_analyzer_crashes.sql @@ -3,3 +3,5 @@ set allow_experimental_analyzer = 1, group_by_use_nulls = 1; SELECT tuple(tuple(number)) as x FROM numbers(10) GROUP BY (number, tuple(number)) with cube order by x; select tuple(array(number)) as x FROM numbers(10) GROUP BY number, array(number) WITH ROLLUP order by x; + +SELECT tuple(number) AS x FROM numbers(10) GROUP BY GROUPING SETS (number) order by x; From 1ea058a702b68b9d3342ed01a700341b584c5979 Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Thu, 28 Mar 2024 19:33:23 +0100 Subject: [PATCH 075/150] ReadWriteBufferFromHTTP set right header host when redireced --- src/Common/HTTPConnectionPool.cpp | 12 +++++++++--- src/IO/ReadWriteBufferFromHTTP.cpp | 29 ++++++++++++++++------------- src/IO/ReadWriteBufferFromHTTP.h | 1 - 3 files changed, 25 insertions(+), 17 deletions(-) diff --git a/src/Common/HTTPConnectionPool.cpp b/src/Common/HTTPConnectionPool.cpp index f729b8ea8d0..cd2505df7f3 100644 --- a/src/Common/HTTPConnectionPool.cpp +++ b/src/Common/HTTPConnectionPool.cpp @@ -295,8 +295,13 @@ private: String getTarget() const { if (!Session::getProxyConfig().host.empty()) - return fmt::format("{} over proxy {}", Session::getHost(), Session::getProxyConfig().host); - return Session::getHost(); + return fmt::format("{}:{} over proxy {}", + Session::getHost(), + Session::getPort(), + Session::getProxyConfig().host); + return fmt::format("{}:{}", + Session::getHost(), + Session::getPort()); } void flushRequest() override @@ -472,7 +477,8 @@ public: String getTarget() const { if (!proxy_configuration.isEmpty()) - return fmt::format("{} over proxy {}", host, proxy_configuration.host); + return fmt::format("{} over proxy {}", + host, proxy_configuration.host); return host; } diff --git a/src/IO/ReadWriteBufferFromHTTP.cpp b/src/IO/ReadWriteBufferFromHTTP.cpp index 61c40c5a8fc..63a91026701 100644 --- a/src/IO/ReadWriteBufferFromHTTP.cpp +++ b/src/IO/ReadWriteBufferFromHTTP.cpp @@ -97,7 +97,7 @@ size_t ReadWriteBufferFromHTTP::getOffset() const void ReadWriteBufferFromHTTP::prepareRequest(Poco::Net::HTTPRequest & request, std::optional range) const { - request.setHost(initial_uri.getHost()); // use original, not resolved host name in header + request.setHost(current_uri.getHost()); if (out_stream_callback) request.setChunkedTransferEncoding(true); @@ -237,15 +237,15 @@ ReadWriteBufferFromHTTP::ReadWriteBufferFromHTTP( } ReadWriteBufferFromHTTP::CallResult ReadWriteBufferFromHTTP::callImpl( - Poco::Net::HTTPResponse & response, const Poco::URI & uri_, const std::string & method_, const std::optional & range, bool allow_redirects) const + Poco::Net::HTTPResponse & response, const std::string & method_, const std::optional & range, bool allow_redirects) const { if (remote_host_filter) - remote_host_filter->checkURL(uri_); + remote_host_filter->checkURL(current_uri); - Poco::Net::HTTPRequest request(method_, uri_.getPathAndQuery(), Poco::Net::HTTPRequest::HTTP_1_1); + Poco::Net::HTTPRequest request(method_, current_uri.getPathAndQuery(), Poco::Net::HTTPRequest::HTTP_1_1); prepareRequest(request, range); - auto session = makeHTTPSession(connection_group, uri_, timeouts, proxy_config); + auto session = makeHTTPSession(connection_group, current_uri, timeouts, proxy_config); ProfileEvents::increment(ProfileEvents::ReadWriteBufferFromHTTPRequestsSent); @@ -263,7 +263,7 @@ ReadWriteBufferFromHTTP::CallResult ReadWriteBufferFromHTTP::callImpl( ReadWriteBufferFromHTTP::CallResult ReadWriteBufferFromHTTP::callWithRedirects( Poco::Net::HTTPResponse & response, const String & method_, const std::optional & range) { - auto result = callImpl(response, current_uri, method_, range, true); + auto result = callImpl(response, method_, range, true); while (isRedirect(response.getStatus())) { @@ -279,8 +279,7 @@ ReadWriteBufferFromHTTP::CallResult ReadWriteBufferFromHTTP::callWithRedirects( initial_uri.toString(), max_redirects ? "increase the allowed maximum number of" : "allow"); current_uri = uri_redirect; - - result = callImpl(response, uri_redirect, method_, range, true); + result = callImpl(response, method_, range, true); } return result; @@ -347,9 +346,11 @@ void ReadWriteBufferFromHTTP::doWithRetries(std::function && callable, { if (!mute_logging) LOG_ERROR(log, - "Failed to make request to '{}'. Error: '{}'. " + "Failed to make request to `{}`{}. " + "Error: '{}'. " "Failed at try {}/{}.", - initial_uri.toString(), error_message, + initial_uri.toString(), current_uri == initial_uri ? String() : fmt::format(" redirect to `{}`", current_uri.toString()), + error_message, attempt, read_settings.http_max_tries); std::rethrow_exception(exception); @@ -361,10 +362,12 @@ void ReadWriteBufferFromHTTP::doWithRetries(std::function && callable, if (!mute_logging) LOG_INFO(log, - "Failed to make request to `{}`. Error: {}. " + "Failed to make request to `{}`{}. " + "Error: {}. " "Failed at try {}/{}. " "Will retry with current backoff wait is {}/{} ms.", - initial_uri.toString(), error_message, + initial_uri.toString(), current_uri == initial_uri ? String() : fmt::format(" redirect to `{}`", current_uri.toString()), + error_message, attempt + 1, read_settings.http_max_tries, milliseconds_to_wait, read_settings.http_retry_max_backoff_ms); @@ -512,7 +515,7 @@ size_t ReadWriteBufferFromHTTP::readBigAt(char * to, size_t n, size_t offset, co auto range = HTTPRange{offset, offset + n - 1}; Poco::Net::HTTPResponse response; - auto result = callImpl(response, current_uri, method, range, false); + auto result = callImpl(response, method, range, false); if (response.getStatus() != Poco::Net::HTTPResponse::HTTPStatus::HTTP_PARTIAL_CONTENT && (offset != 0 || offset + n < *file_info->file_size)) diff --git a/src/IO/ReadWriteBufferFromHTTP.h b/src/IO/ReadWriteBufferFromHTTP.h index 224aac809a4..f496fe3ddcd 100644 --- a/src/IO/ReadWriteBufferFromHTTP.h +++ b/src/IO/ReadWriteBufferFromHTTP.h @@ -107,7 +107,6 @@ private: CallResult callImpl( Poco::Net::HTTPResponse & response, - const Poco::URI & uri_, const std::string & method_, const std::optional & range, bool allow_redirects) const; From d967a9280c9ef2b182be441c459a9f8151141ed2 Mon Sep 17 00:00:00 2001 From: Alexander Gololobov Date: Thu, 28 Mar 2024 19:41:30 +0100 Subject: [PATCH 076/150] Update test --- .../0_stateless/02117_show_create_table_system.reference | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/02117_show_create_table_system.reference b/tests/queries/0_stateless/02117_show_create_table_system.reference index c9638e62655..bdd0da7d166 100644 --- a/tests/queries/0_stateless/02117_show_create_table_system.reference +++ b/tests/queries/0_stateless/02117_show_create_table_system.reference @@ -881,8 +881,8 @@ CREATE TABLE system.replicas `log_pointer` UInt64, `last_queue_update` DateTime, `absolute_delay` UInt64, - `total_replicas` UInt8, - `active_replicas` UInt8, + `total_replicas` UInt32, + `active_replicas` UInt32, `lost_part_count` UInt64, `last_queue_update_exception` String, `zookeeper_exception` String, From 1fde8942d6a4d13a39ac31952bf096054ab7cacc Mon Sep 17 00:00:00 2001 From: Alexander Gololobov Date: Mon, 25 Mar 2024 16:45:43 +0100 Subject: [PATCH 077/150] Read system.replicas with different max_block_size --- .../02908_many_requests_to_system_replicas.reference | 4 ++++ .../0_stateless/02908_many_requests_to_system_replicas.sh | 7 +++++++ 2 files changed, 11 insertions(+) diff --git a/tests/queries/0_stateless/02908_many_requests_to_system_replicas.reference b/tests/queries/0_stateless/02908_many_requests_to_system_replicas.reference index 17c94686470..0a4008cc35e 100644 --- a/tests/queries/0_stateless/02908_many_requests_to_system_replicas.reference +++ b/tests/queries/0_stateless/02908_many_requests_to_system_replicas.reference @@ -1,4 +1,8 @@ Creating 300 tables +900 2700 2700 +900 2700 2700 +900 2700 2700 +900 2700 2700 Making 200 requests to system.replicas Query system.replicas while waiting for other concurrent requests to finish 0 diff --git a/tests/queries/0_stateless/02908_many_requests_to_system_replicas.sh b/tests/queries/0_stateless/02908_many_requests_to_system_replicas.sh index 17e1d87963a..18c55159281 100755 --- a/tests/queries/0_stateless/02908_many_requests_to_system_replicas.sh +++ b/tests/queries/0_stateless/02908_many_requests_to_system_replicas.sh @@ -45,6 +45,13 @@ done wait; +# Check results with different max_block_size +$CLICKHOUSE_CLIENT -q 'SELECT count(), sum(total_replicas), sum(active_replicas) FROM system.replicas WHERE database=currentDatabase()' +$CLICKHOUSE_CLIENT -q 'SELECT count(), sum(total_replicas), sum(active_replicas) FROM system.replicas WHERE database=currentDatabase() SETTINGS max_block_size=1' +$CLICKHOUSE_CLIENT -q 'SELECT count(), sum(total_replicas), sum(active_replicas) FROM system.replicas WHERE database=currentDatabase() SETTINGS max_block_size=77' +$CLICKHOUSE_CLIENT -q 'SELECT count(), sum(total_replicas), sum(active_replicas) FROM system.replicas WHERE database=currentDatabase() SETTINGS max_block_size=11111' + + echo "Making $CONCURRENCY requests to system.replicas" for i in $(seq 1 $CONCURRENCY) From 79b486b8bfa9d9e540d1ab95281e4d4ca554f492 Mon Sep 17 00:00:00 2001 From: zhongyuankai <872237106@qq.com> Date: Fri, 29 Mar 2024 09:49:24 +0800 Subject: [PATCH 078/150] batter --- src/Interpreters/InterpreterDropQuery.cpp | 2 +- src/Parsers/ASTIdentifier.cpp | 6 ------ src/Parsers/ASTIdentifier.h | 1 - src/Parsers/tests/gtest_dictionary_parser.cpp | 4 ++-- 4 files changed, 3 insertions(+), 10 deletions(-) diff --git a/src/Interpreters/InterpreterDropQuery.cpp b/src/Interpreters/InterpreterDropQuery.cpp index 7c5d0de081a..e29e59ee4c3 100644 --- a/src/Interpreters/InterpreterDropQuery.cpp +++ b/src/Interpreters/InterpreterDropQuery.cpp @@ -465,7 +465,7 @@ void InterpreterDropQuery::extendQueryLogElemImpl(DB::QueryLogElement & elem, co throw Exception(ErrorCodes::SYNTAX_ERROR, "Unexpected type for list of table names."); String query_database = identifier->getDatabaseName(); - String query_table = identifier->getTableName(); + String query_table = identifier->shortName(); if (!query_database.empty() && query_table.empty()) { elem.query_databases.insert(backQuoteIfNeed(query_database)); diff --git a/src/Parsers/ASTIdentifier.cpp b/src/Parsers/ASTIdentifier.cpp index 1a24dac61e6..80a618170c6 100644 --- a/src/Parsers/ASTIdentifier.cpp +++ b/src/Parsers/ASTIdentifier.cpp @@ -207,12 +207,6 @@ String ASTTableIdentifier::getDatabaseName() const else return {}; } -String ASTTableIdentifier::getTableName() const -{ - if (name_parts.size() == 2) return name_parts[1]; - else return name_parts[0]; -} - ASTPtr ASTTableIdentifier::getTable() const { if (name_parts.size() == 2) diff --git a/src/Parsers/ASTIdentifier.h b/src/Parsers/ASTIdentifier.h index d75b5a99dbe..d986b9170f3 100644 --- a/src/Parsers/ASTIdentifier.h +++ b/src/Parsers/ASTIdentifier.h @@ -86,7 +86,6 @@ public: StorageID getTableId() const; String getDatabaseName() const; - String getTableName() const; ASTPtr getTable() const; ASTPtr getDatabase() const; diff --git a/src/Parsers/tests/gtest_dictionary_parser.cpp b/src/Parsers/tests/gtest_dictionary_parser.cpp index f9074c0b2eb..44205975cdc 100644 --- a/src/Parsers/tests/gtest_dictionary_parser.cpp +++ b/src/Parsers/tests/gtest_dictionary_parser.cpp @@ -304,7 +304,7 @@ TEST(ParserDictionaryDDL, ParseDropQuery) auto & database_and_tables1 = drop1->database_and_tables->as(); auto identifier1 = dynamic_pointer_cast(database_and_tables1.children[0]); EXPECT_EQ(identifier1->getDatabaseName(), "test"); - EXPECT_EQ(identifier1->getTableName(), "dict1"); + EXPECT_EQ(identifier1->shortName(), "dict1"); auto str1 = serializeAST(*drop1); EXPECT_EQ(input1, str1); @@ -317,7 +317,7 @@ TEST(ParserDictionaryDDL, ParseDropQuery) auto & database_and_tables2 = drop2->database_and_tables->as(); auto identifier2 = dynamic_pointer_cast(database_and_tables2.children[0]); EXPECT_EQ(identifier2->getDatabaseName(), ""); - EXPECT_EQ(identifier2->getTableName(), "dict2"); + EXPECT_EQ(identifier2->shortName(), "dict2"); auto str2 = serializeAST(*drop2); EXPECT_EQ(input2, str2); } From ba62b01f4d15cc798e19a67a77dcc29daf7a7b8d Mon Sep 17 00:00:00 2001 From: jsc0218 Date: Fri, 29 Mar 2024 02:23:53 +0000 Subject: [PATCH 079/150] remove dead code --- src/Storages/MergeTree/MergeTreeSelectProcessor.h | 9 --------- 1 file changed, 9 deletions(-) diff --git a/src/Storages/MergeTree/MergeTreeSelectProcessor.h b/src/Storages/MergeTree/MergeTreeSelectProcessor.h index 01bb3851e04..6b663e0fd36 100644 --- a/src/Storages/MergeTree/MergeTreeSelectProcessor.h +++ b/src/Storages/MergeTree/MergeTreeSelectProcessor.h @@ -66,15 +66,6 @@ public: void addPartLevelToChunk(bool add_part_level_) { add_part_level = add_part_level_; } private: - /// This struct allow to return block with no columns but with non-zero number of rows similar to Chunk - struct BlockAndProgress - { - Block block; - size_t row_count = 0; - size_t num_read_rows = 0; - size_t num_read_bytes = 0; - }; - /// Sets up range readers corresponding to data readers void initializeRangeReaders(); From 9da4ce29dbfa694190ae05e27e502784c046282c Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 29 Mar 2024 16:16:15 +0100 Subject: [PATCH 080/150] Revert output Pretty in tty --- src/Client/ClientBase.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index 767a9b2b9f9..7a3192d1d9c 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -753,7 +753,7 @@ void ClientBase::setDefaultFormatsFromConfiguration() else default_output_format = "TSV"; } - else if (is_interactive || stdout_is_a_tty) + else if (is_interactive) { default_output_format = "PrettyCompact"; } From 3c07d411521284fbc5535d6d36780b1a8a227406 Mon Sep 17 00:00:00 2001 From: Blargian Date: Fri, 29 Mar 2024 17:24:54 +0100 Subject: [PATCH 081/150] Add minimal documentation for hasThreadFuzzer --- docs/en/sql-reference/functions/other-functions.md | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/docs/en/sql-reference/functions/other-functions.md b/docs/en/sql-reference/functions/other-functions.md index 2638a5a650b..9307729beed 100644 --- a/docs/en/sql-reference/functions/other-functions.md +++ b/docs/en/sql-reference/functions/other-functions.md @@ -591,6 +591,16 @@ SELECT hasColumnInTable('system','metrics','non-existing_column') 0 ``` +## hasThreadFuzzer + +Returns whether Thread Fuzzer is effective. It can be used in tests to prevent too long runs. + +**Syntax** + +```sql +hasThreadFuzzer(); +``` + ## bar Builds a bar chart. From c946a3a89c623797895fc2abe0cc0a22fc7a6114 Mon Sep 17 00:00:00 2001 From: filimonov <1549571+filimonov@users.noreply.github.com> Date: Fri, 29 Mar 2024 17:59:47 +0100 Subject: [PATCH 082/150] Update storing-data.md --- docs/en/operations/storing-data.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/en/operations/storing-data.md b/docs/en/operations/storing-data.md index fd81bc197d1..9ffbb64c1ed 100644 --- a/docs/en/operations/storing-data.md +++ b/docs/en/operations/storing-data.md @@ -520,13 +520,13 @@ Example of configuration for versions later or equal to 22.8: - +
cache
-
+ ``` @@ -546,13 +546,13 @@ Example of configuration for versions earlier than 22.8: - +
s3
-
+ ``` From 9182789ed8bd784f0987d60219626afffdb72a2f Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 30 Mar 2024 02:36:03 +0100 Subject: [PATCH 083/150] Update test --- tests/queries/0_stateless/03021_output_format_tty.sh | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/tests/queries/0_stateless/03021_output_format_tty.sh b/tests/queries/0_stateless/03021_output_format_tty.sh index dfc37c3b30a..55dedeb6773 100755 --- a/tests/queries/0_stateless/03021_output_format_tty.sh +++ b/tests/queries/0_stateless/03021_output_format_tty.sh @@ -21,12 +21,6 @@ expect_after { -i $any_spawn_id timeout { exit 1 } } -spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT --query 'SELECT 1'" -expect "│ 1 │" -expect "└───┘" -expect eof - -spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_LOCAL --query 'SELECT 2'" -expect "│ 2 │" -expect "└───┘" +spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT --query 'SELECT 1, 2'" +expect "1\t2" expect eof From 49c33039b9f53a2f280b8ca74aeea1d119ce9a2d Mon Sep 17 00:00:00 2001 From: Blargian Date: Sat, 30 Mar 2024 14:13:30 +0100 Subject: [PATCH 084/150] Updates to hasSubsequence functions --- .../functions/string-search-functions.md | 106 +++++++++++++++++- 1 file changed, 105 insertions(+), 1 deletion(-) diff --git a/docs/en/sql-reference/functions/string-search-functions.md b/docs/en/sql-reference/functions/string-search-functions.md index 818626fc4fc..f373353d3f6 100644 --- a/docs/en/sql-reference/functions/string-search-functions.md +++ b/docs/en/sql-reference/functions/string-search-functions.md @@ -662,8 +662,10 @@ Type: `UInt8`. **Examples** +Query: + ``` sql -SELECT hasSubsequence('garbage', 'arg') ; +SELECT hasSubsequence('garbage', 'arg'); ``` Result: @@ -678,14 +680,116 @@ Result: Like [hasSubsequence](#hasSubsequence) but searches case-insensitively. +**Syntax** + +``` sql +hasSubsequence(haystack, needle) +``` + +**Arguments** + +- `haystack` — String in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal). +- `needle` — Subsequence to be searched. [String](../../sql-reference/syntax.md#syntax-string-literal). + +**Returned values** + +- 1, if needle is a subsequence of haystack. +- 0, otherwise. + +Type: `UInt8`. + +**Examples** + +Query: + +``` sql +SELECT hasSubsequenceCaseInsensitive('garbage', 'ARG'); +``` + +Result: + +``` text +┌─hasSubsequenceCaseInsensitive('garbage', 'ARG')─┐ +│ 1 │ +└─────────────────────────────────────────────────┘ +``` + ## hasSubsequenceUTF8 Like [hasSubsequence](#hasSubsequence) but assumes `haystack` and `needle` are UTF-8 encoded strings. +**Syntax** + +``` sql +hasSubsequenceUTF8(haystack, needle) +``` + +**Arguments** + +- `haystack` — String in which the search is performed. UTF-8 encoded [String](../../sql-reference/syntax.md#syntax-string-literal). +- `needle` — Subsequence to be searched. UTF-8 encoded [String](../../sql-reference/syntax.md#syntax-string-literal). + +**Returned values** + +- 1, if needle is a subsequence of haystack. +- 0, otherwise. + +Type: `UInt8`. + +Query: + +**Examples** + +``` sql +select hasSubsequenceUTF8('ClickHouse - столбцовая система управления базами данных', 'система'); +``` + +Result: + +``` text +┌─hasSubsequenceUTF8('ClickHouse - столбцовая система управления базами данных', 'система')─┐ +│ 1 │ +└───────────────────────────────────────────────────────────────────────────────────────────┘ +``` + ## hasSubsequenceCaseInsensitiveUTF8 Like [hasSubsequenceUTF8](#hasSubsequenceUTF8) but searches case-insensitively. +**Syntax** + +``` sql +hasSubsequenceCaseInsensitiveUTF8(haystack, needle) +``` + +**Arguments** + +- `haystack` — String in which the search is performed. UTF-8 encoded [String](../../sql-reference/syntax.md#syntax-string-literal). +- `needle` — Subsequence to be searched. UTF-8 encoded [String](../../sql-reference/syntax.md#syntax-string-literal). + +**Returned values** + +- 1, if needle is a subsequence of haystack. +- 0, otherwise. + +Type: `UInt8`. + +**Examples** + +Query: + +``` sql +select hasSubsequenceCaseInsensitiveUTF8('ClickHouse - столбцовая система управления базами данных', 'СИСТЕМА'); +``` + +Result: + +``` text +┌─hasSubsequenceCaseInsensitiveUTF8('ClickHouse - столбцовая система управления базами данных', 'СИСТЕМА')─┐ +│ 1 │ +└──────────────────────────────────────────────────────────────────────────────────────────────────────────┘ +``` + ## hasToken Returns 1 if a given token is present in a haystack, or 0 otherwise. From 33136b9a1a0ffb10b81cee954a875f8f233867e2 Mon Sep 17 00:00:00 2001 From: Blargian Date: Sat, 30 Mar 2024 15:16:01 +0100 Subject: [PATCH 085/150] Minor detail fixes --- docs/en/sql-reference/functions/other-functions.md | 2 +- docs/en/sql-reference/functions/string-search-functions.md | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/en/sql-reference/functions/other-functions.md b/docs/en/sql-reference/functions/other-functions.md index 9307729beed..feead3c3b5e 100644 --- a/docs/en/sql-reference/functions/other-functions.md +++ b/docs/en/sql-reference/functions/other-functions.md @@ -593,7 +593,7 @@ SELECT hasColumnInTable('system','metrics','non-existing_column') ## hasThreadFuzzer -Returns whether Thread Fuzzer is effective. It can be used in tests to prevent too long runs. +Returns whether Thread Fuzzer is effective. It can be used in tests to prevent runs from being too long. **Syntax** diff --git a/docs/en/sql-reference/functions/string-search-functions.md b/docs/en/sql-reference/functions/string-search-functions.md index f373353d3f6..d613a100a27 100644 --- a/docs/en/sql-reference/functions/string-search-functions.md +++ b/docs/en/sql-reference/functions/string-search-functions.md @@ -683,7 +683,7 @@ Like [hasSubsequence](#hasSubsequence) but searches case-insensitively. **Syntax** ``` sql -hasSubsequence(haystack, needle) +hasSubsequenceCaseInsensitive(haystack, needle) ``` **Arguments** @@ -908,7 +908,7 @@ Returns 1 if a given token is present in a haystack, 0 otherwise. Ignores case a **Syntax** ```sql -hasTokenCaseInsensitive(haystack, token) +hasTokenCaseInsensitiveOrNull(haystack, token) ``` **Parameters** From 2f50c76f3d40324cd5ea201b2acf6d5272f2bb5e Mon Sep 17 00:00:00 2001 From: Blargian Date: Sat, 30 Mar 2024 16:30:55 +0100 Subject: [PATCH 086/150] Update ngramDistance functions --- .../functions/string-search-functions.md | 136 +++++++++++++++++- 1 file changed, 134 insertions(+), 2 deletions(-) diff --git a/docs/en/sql-reference/functions/string-search-functions.md b/docs/en/sql-reference/functions/string-search-functions.md index 22f879c62ae..145228ae727 100644 --- a/docs/en/sql-reference/functions/string-search-functions.md +++ b/docs/en/sql-reference/functions/string-search-functions.md @@ -465,9 +465,9 @@ Alias: `haystack NOT ILIKE pattern` (operator) ## ngramDistance -Calculates the 4-gram distance between a `haystack` string and a `needle` string. For that, it counts the symmetric difference between two multisets of 4-grams and normalizes it by the sum of their cardinalities. Returns a Float32 between 0 and 1. The smaller the result is, the more strings are similar to each other. Throws an exception if constant `needle` or `haystack` arguments are more than 32Kb in size. If any of non-constant `haystack` or `needle` arguments is more than 32Kb in size, the distance is always 1. +Calculates the 4-gram distance between a `haystack` string and a `needle` string. For this, it counts the symmetric difference between two multisets of 4-grams and normalizes it by the sum of their cardinalities. Returns a [Float32](../../sql-reference/data-types/float.md/#float32-float64) between 0 and 1. The smaller the result is, the more similar the strings are to each other. -Functions `ngramDistanceCaseInsensitive, ngramDistanceUTF8, ngramDistanceCaseInsensitiveUTF8` provide case-insensitive and/or UTF-8 variants of this function. +Functions [`ngramDistanceCaseInsensitive`](#ngramdistancecaseinsensitive), [`ngramDistanceUTF8`](#ngramdistanceutf8), [`ngramDistanceCaseInsensitiveUTF8`](#ngramdistancecaseinsensitiveutf8) provide case-insensitive and/or UTF-8 variants of this function. **Syntax** @@ -475,6 +475,138 @@ Functions `ngramDistanceCaseInsensitive, ngramDistanceUTF8, ngramDistanceCaseIns ngramDistance(haystack, needle) ``` +**Parameters** + +- `haystack`: First comparison string. [String literal](../syntax#syntax-string-literal) +- `needle`: Second comparison string. [String literal](../syntax#syntax-string-literal) + +**Returned value** + +- Value between 0 and 1 representing the similarity between the two strings. [Float32](../../sql-reference/data-types/float.md/#float32-float64) + +**Implementation details** + +This function will throw an exception if constant `needle` or `haystack` arguments are more than 32Kb in size. If any non-constant `haystack` or `needle` arguments are more than 32Kb in size, then the distance is always 1. + +**Examples** + +The more similar two strings are to each other, the closer the result will be to 0 (identical). + +Query: + +```sql +SELECT ngramDistance('ClickHouse','ClickHouse!'); +``` + +Result: + +```response +0.06666667 +``` + +The less similar two strings are to each, the larger the result will be. + + +Query: + +```sql +SELECT ngramDistance('ClickHouse','House'); +``` + +Result: + +```response +0.5555556 +``` + +## ngramDistanceCaseInsensitive + +Provides a case-insensitve variant of [ngramDistance](#ngramdistance). + +**Syntax** + +```sql +ngramDistanceCaseInsensitive(haystack, needle) +``` + +**Parameters** + +- `haystack`: First comparison string. [String literal](../syntax#syntax-string-literal) +- `needle`: Second comparison string. [String literal](../syntax#syntax-string-literal) + +**Returned value** + +- Value between 0 and 1 representing the similarity between the two strings. [Float32](../../sql-reference/data-types/float.md/#float32-float64) + +**Examples** + +With [ngramDistance](#ngramdistance) differences in case will drive up the similarity score: + +Query: + +```sql +SELECT ngramDistance('ClickHouse','clickhouse'); +``` + +Result: + +```response +0.71428573 +``` + +With [ngramDistanceCaseInsensitive](#ngramdistancecaseinsensitive) case is ignored so two identical strings differing only in case will now read as identical: + +Query: + +```sql +SELECT ngramDistanceCaseInsensitive('ClickHouse','clickhouse'); +``` + +Result: + +```response +0 +``` + +## ngramDistanceUTF8 + +Provides a UTF-8 variant of [ngramDistance](#ngramdistance). Assumes that `needle` and `haystack` strings are UTF-8 encoded strings. + +**Syntax** + +```sql +ngramDistanceUTF8(haystack, needle) +``` + +**Parameters** + +- `haystack`: First comparison string. [String literal](../syntax#syntax-string-literal) +- `needle`: Second comparison string. [String literal](../syntax#syntax-string-literal) + +**Returned value** + +- Value between 0 and 1 representing the similarity between the two strings. [Float32](../../sql-reference/data-types/float.md/#float32-float64) + +## ngramDistanceCaseInsensitiveUTF8 + +Provides a case-insensitive variant of [ngramDistanceUTF8](#ngramdistanceutf8). + +**Syntax** + +```sql +ngramDistanceCaseInsensitiveUTF8(haystack, needle) +``` + +**Parameters** + +- `haystack`: First comparison string. [String literal](../syntax#syntax-string-literal) +- `needle`: Second comparison string. [String literal](../syntax#syntax-string-literal) + +**Returned value** + +- Value between 0 and 1 representing the similarity between the two strings. [Float32](../../sql-reference/data-types/float.md/#float32-float64) + + ## ngramSearch Like `ngramDistance` but calculates the non-symmetric difference between a `needle` string and a `haystack` string, i.e. the number of n-grams from needle minus the common number of n-grams normalized by the number of `needle` n-grams. Returns a Float32 between 0 and 1. The bigger the result is, the more likely `needle` is in the `haystack`. This function is useful for fuzzy string search. Also see function `soundex`. From 380788ea30ff2aba639afc5f3568bbe599d4d835 Mon Sep 17 00:00:00 2001 From: Blargian Date: Sat, 30 Mar 2024 17:22:31 +0100 Subject: [PATCH 087/150] Update ngramSearch functions --- .../functions/string-search-functions.md | 142 +++++++++++++++++- 1 file changed, 136 insertions(+), 6 deletions(-) diff --git a/docs/en/sql-reference/functions/string-search-functions.md b/docs/en/sql-reference/functions/string-search-functions.md index 145228ae727..82226caeba3 100644 --- a/docs/en/sql-reference/functions/string-search-functions.md +++ b/docs/en/sql-reference/functions/string-search-functions.md @@ -609,13 +609,9 @@ ngramDistanceCaseInsensitiveUTF8(haystack, needle) ## ngramSearch -Like `ngramDistance` but calculates the non-symmetric difference between a `needle` string and a `haystack` string, i.e. the number of n-grams from needle minus the common number of n-grams normalized by the number of `needle` n-grams. Returns a Float32 between 0 and 1. The bigger the result is, the more likely `needle` is in the `haystack`. This function is useful for fuzzy string search. Also see function `soundex`. +Like `ngramDistance` but calculates the non-symmetric difference between a `needle` string and a `haystack` string, i.e. the number of n-grams from the needle minus the common number of n-grams normalized by the number of `needle` n-grams. Returns a [Float32](../../sql-reference/data-types/float.md/#float32-float64) between 0 and 1. The bigger the result is, the more likely `needle` is in the `haystack`. This function is useful for fuzzy string search. Also see function [`soundex`](../../sql-reference/functions/string-functions#soundex). -Functions `ngramSearchCaseInsensitive, ngramSearchUTF8, ngramSearchCaseInsensitiveUTF8` provide case-insensitive and/or UTF-8 variants of this function. - -:::note -The UTF-8 variants use the 3-gram distance. These are not perfectly fair n-gram distances. We use 2-byte hashes to hash n-grams and then calculate the (non-)symmetric difference between these hash tables – collisions may occur. With UTF-8 case-insensitive format we do not use fair `tolower` function – we zero the 5-th bit (starting from zero) of each codepoint byte and first bit of zeroth byte if bytes more than one – this works for Latin and mostly for all Cyrillic letters. -::: +Functions [`ngramSearchCaseInsensitive`](#ngramsearchcaseinsensitive), [`ngramSearchUTF8`](#ngramsearchutf8), [`ngramSearchCaseInsensitiveUTF8`](#ngramsearchcaseinsensitiveutf8) provide case-insensitive and/or UTF-8 variants of this function. **Syntax** @@ -623,6 +619,140 @@ The UTF-8 variants use the 3-gram distance. These are not perfectly fair n-gram ngramSearch(haystack, needle) ``` +**Parameters** + +- `haystack`: First comparison string. [String literal](../syntax#syntax-string-literal) +- `needle`: Second comparison string. [String literal](../syntax#syntax-string-literal) + +**Returned value** + +- Value between 0 and 1 representing the likelihood of the `needle` being in the `haystack`. [Float32](../../sql-reference/data-types/float.md/#float32-float64) + +**Implementation details** + +:::note +The UTF-8 variants use the 3-gram distance. These are not perfectly fair n-gram distances. We use 2-byte hashes to hash n-grams and then calculate the (non-)symmetric difference between these hash tables – collisions may occur. With UTF-8 case-insensitive format we do not use fair `tolower` function – we zero the 5-th bit (starting from zero) of each codepoint byte and first bit of zeroth byte if bytes more than one – this works for Latin and mostly for all Cyrillic letters. +::: + +**Example** + +Query: + +```sql +SELECT ngramSearch('Hello World','World Hello'); +``` + +Result: + +```response +0.5 +``` + +## ngramSearchCaseInsensitive + +Provides a case-insensitive variant of [ngramSearch](#ngramSearch). + +**Syntax** + +```sql +ngramSearchCaseInsensitive(haystack, needle) +``` + +**Parameters** + +- `haystack`: First comparison string. [String literal](../syntax#syntax-string-literal) +- `needle`: Second comparison string. [String literal](../syntax#syntax-string-literal) + +**Returned value** + +- Value between 0 and 1 representing the likelihood of the `needle` being in the `haystack`. [Float32](../../sql-reference/data-types/float.md/#float32-float64) + +The bigger the result is, the more likely `needle` is in the `haystack`. + +**Example** + +Query: + +```sql +SELECT ngramSearchCaseInsensitive('Hello World','hello'); +``` + +Result: + +```response +1 +``` + +## ngramSearchUTF8 + +Provides a UTF-8 variant of [ngramSearch](#ngramsearch) in which `needle` and `haystack` are assumed to be UTF-8 encoded strings. + +**Syntax** + +```sql +ngramSearchUTF8(haystack, needle) +``` + +**Parameters** + +- `haystack`: First UTF-8 encoded comparison string. [String literal](../syntax#syntax-string-literal) +- `needle`: Second UTF-8 encoded comparison string. [String literal](../syntax#syntax-string-literal) + +**Returned value** + +- Value between 0 and 1 representing the likelihood of the `needle` being in the `haystack`. [Float32](../../sql-reference/data-types/float.md/#float32-float64) + +The bigger the result is, the more likely `needle` is in the `haystack`. + +**Example** + +Query: + +```sql +SELECT ngramSearchUTF8('абвгдеёжз', 'гдеёзд'); +``` + +Result: + +```response +0.5 +``` + +## ngramSearchCaseInsensitiveUTF8 + +Provides a case-insensitive variant of [ngramSearchUTF8](#ngramsearchutf8) in which `needle` and `haystack`. + +**Syntax** + +```sql +ngramSearchCaseInsensitiveUTF8(haystack, needle) +``` + +**Parameters** + +- `haystack`: First UTF-8 encoded comparison string. [String literal](../syntax#syntax-string-literal) +- `needle`: Second UTF-8 encoded comparison string. [String literal](../syntax#syntax-string-literal) + +**Returned value** + +- Value between 0 and 1 representing the likelihood of the `needle` being in the `haystack`. [Float32](../../sql-reference/data-types/float.md/#float32-float64) + +The bigger the result is, the more likely `needle` is in the `haystack`. + +**Example** + +Query: + +```sql +SELECT ngramSearchCaseInsensitiveUTF8('абвГДЕёжз', 'АбвгдЕЁжз'); +``` + +Result: + +```response +0.57142854 +``` + ## countSubstrings Returns how often substring `needle` occurs in string `haystack`. From 6b9b597051636661784cfb1a283a1364104443a7 Mon Sep 17 00:00:00 2001 From: Ilya Golshtein Date: Sat, 30 Mar 2024 21:34:01 +0300 Subject: [PATCH 088/150] Crash in Engine Merge if Row Policy does not have expression (#61971) --- src/Storages/StorageMerge.cpp | 2 +- .../02763_row_policy_storage_merge.reference | 8 +++++++ .../02763_row_policy_storage_merge.sql.j2 | 23 +++++++++++++++++++ 3 files changed, 32 insertions(+), 1 deletion(-) diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp index caec03c95b3..1b5e4860464 100644 --- a/src/Storages/StorageMerge.cpp +++ b/src/Storages/StorageMerge.cpp @@ -583,7 +583,7 @@ std::vector ReadFromMerge::createChildrenPlans(SelectQ database_name, table_name, RowPolicyFilterType::SELECT_FILTER); - if (row_policy_filter_ptr) + if (row_policy_filter_ptr && !row_policy_filter_ptr->empty()) { row_policy_data_opt = RowPolicyData(row_policy_filter_ptr, storage, modified_context); row_policy_data_opt->extendNames(real_column_names); diff --git a/tests/queries/0_stateless/02763_row_policy_storage_merge.reference b/tests/queries/0_stateless/02763_row_policy_storage_merge.reference index 9fa5612e7cd..6510a3c933e 100644 --- a/tests/queries/0_stateless/02763_row_policy_storage_merge.reference +++ b/tests/queries/0_stateless/02763_row_policy_storage_merge.reference @@ -166,6 +166,10 @@ SELECT x, y from merge(currentDatabase(), 02763_merge 4 14 4 14 4 14 +USING 0 +USING 1 +zzz +==== SETTINGS optimize_move_to_prewhere= 1 SELECT * FROM 02763_merge_log_1 3 13 @@ -312,3 +316,7 @@ SELECT x, y from merge(currentDatabase(), 02763_merge 4 14 4 14 4 14 +USING 0 +USING 1 +zzz +==== diff --git a/tests/queries/0_stateless/02763_row_policy_storage_merge.sql.j2 b/tests/queries/0_stateless/02763_row_policy_storage_merge.sql.j2 index 0263e1a974f..eabbde9e9dd 100644 --- a/tests/queries/0_stateless/02763_row_policy_storage_merge.sql.j2 +++ b/tests/queries/0_stateless/02763_row_policy_storage_merge.sql.j2 @@ -129,6 +129,27 @@ SELECT x, lc, cnst from merge(currentDatabase(), '02763_merge_fancycols') ORDER SELECT 'SELECT x, y from merge(currentDatabase(), 02763_merge'; SELECT x, y from merge(currentDatabase(), '02763_merge') ORDER BY x SETTINGS optimize_move_to_prewhere= {{prew}}; + + +CREATE TABLE 02763_t1 ( b String ) ENGINE = MergeTree() ORDER BY tuple(); +INSERT INTO 02763_t1 VALUES('zzz'); +CREATE TABLE 02763_t2 AS 02763_t1 ENGINE = Merge(currentDatabase(), '02763_t1'); + +SELECT 'USING 0'; +CREATE ROW POLICY OR REPLACE 02763_filter_t1 ON 02763_t1 USING 0 TO ALL; +SELECT * FROM 02763_t2 SETTINGS optimize_move_to_prewhere= {{prew}}; + +SELECT 'USING 1'; +CREATE ROW POLICY OR REPLACE 02763_filter_t1 ON 02763_t1 USING 1 TO ALL; +SELECT * FROM 02763_t2 SETTINGS optimize_move_to_prewhere= {{prew}}; + + +DROP TABLE 02763_t1; +DROP TABLE 02763_t2; +SELECT '===='; + + + DROP TABLE 02763_merge_fancycols; DROP ROW POLICY 02763_filter_1 ON 02763_merge_log_1; @@ -140,4 +161,6 @@ DROP ROW POLICY 02763_filter_4 ON 02763_merge_merge_1; DROP ROW POLICY 02763_filter_5 ON 02763_merge_fancycols; DROP ROW POLICY 02763_filter_6 ON 02763_merge_fancycols; +DROP ROW POLICY 02763_filter_t1 ON 02763_t1; + {% endfor %} From c65261b7376a09c3765267824ada902d1fac446d Mon Sep 17 00:00:00 2001 From: Shaun Struwig <41984034+Blargian@users.noreply.github.com> Date: Sat, 30 Mar 2024 20:02:54 +0100 Subject: [PATCH 089/150] Update aspell-dict.txt add `hasThreadFuzzer` to aspell-dict.txt --- utils/check-style/aspell-ignore/en/aspell-dict.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt index 57a8e0d5840..1a5f5f219b0 100644 --- a/utils/check-style/aspell-ignore/en/aspell-dict.txt +++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt @@ -1673,6 +1673,7 @@ hasSubsequenceCaseInsensitiveUTF hasSubsequenceUTF hasSubstr hasToken +hasThreadFuzzer hasTokenCaseInsensitive hasTokenCaseInsensitiveOrNull hasTokenOrNull From 8f94261e0eb402c109057354ef8073858e7f7b5b Mon Sep 17 00:00:00 2001 From: Shaun Struwig <41984034+Blargian@users.noreply.github.com> Date: Sat, 30 Mar 2024 20:04:51 +0100 Subject: [PATCH 090/150] Fix spelling mistake insensitve -> insensitive --- docs/en/sql-reference/functions/string-search-functions.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/sql-reference/functions/string-search-functions.md b/docs/en/sql-reference/functions/string-search-functions.md index 82226caeba3..e93971879a2 100644 --- a/docs/en/sql-reference/functions/string-search-functions.md +++ b/docs/en/sql-reference/functions/string-search-functions.md @@ -521,7 +521,7 @@ Result: ## ngramDistanceCaseInsensitive -Provides a case-insensitve variant of [ngramDistance](#ngramdistance). +Provides a case-insensitive variant of [ngramDistance](#ngramdistance). **Syntax** From b5425c03501ff1cd2bd01d2aaff8a8a3622fe062 Mon Sep 17 00:00:00 2001 From: Shaun Struwig <41984034+Blargian@users.noreply.github.com> Date: Sat, 30 Mar 2024 20:07:46 +0100 Subject: [PATCH 091/150] Add ngram variants to aspell-dict.txt added the following variants to aspell-dict.txt : ngramDistanceCaseInsensitive ngramDistanceCaseInsensitiveUTF ngramDistanceUTF ngramSearchCaseInsensitive ngramSearchCaseInsensitiveUTF ngramSearchUTF --- utils/check-style/aspell-ignore/en/aspell-dict.txt | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt index 57a8e0d5840..dd13d8a3a6e 100644 --- a/utils/check-style/aspell-ignore/en/aspell-dict.txt +++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt @@ -1977,6 +1977,9 @@ nestjs netloc ngram ngramDistance +ngramDistanceCaseInsensitive +ngramDistanceCaseInsensitiveUTF +ngramDistanceUTF ngramMinHash ngramMinHashArg ngramMinHashArgCaseInsensitive @@ -1986,6 +1989,9 @@ ngramMinHashCaseInsensitive ngramMinHashCaseInsensitiveUTF ngramMinHashUTF ngramSearch +ngramSearchCaseInsensitive +ngramSearchCaseInsensitiveUTF +ngramSearchUTF ngramSimHash ngramSimHashCaseInsensitive ngramSimHashCaseInsensitiveUTF From e8953a379bc01ef90708d379ce6d95572c04e549 Mon Sep 17 00:00:00 2001 From: Duc Canh Le Date: Sun, 31 Mar 2024 07:54:58 +0000 Subject: [PATCH 092/150] fix external table with Bool Signed-off-by: Duc Canh Le --- src/Core/ExternalTable.cpp | 6 ++++-- .../02935_external_table_enum_type.reference | 6 ++++++ .../0_stateless/02935_external_table_enum_type.sh | 10 ++++++++++ 3 files changed, 20 insertions(+), 2 deletions(-) diff --git a/src/Core/ExternalTable.cpp b/src/Core/ExternalTable.cpp index e043a2f9492..f8bbd16d038 100644 --- a/src/Core/ExternalTable.cpp +++ b/src/Core/ExternalTable.cpp @@ -81,8 +81,10 @@ void BaseExternalTable::parseStructureFromStructureField(const std::string & arg for (auto & child : columns_list_raw->children) { auto * column = child->as(); + /// We use `formatWithPossiblyHidingSensitiveData` instead of `getColumnNameWithoutAlias` because `column->type` is an ASTFunction. + /// `getColumnNameWithoutAlias` will return name of the function with `(arguments)` even if arguments is empty. if (column) - structure.emplace_back(column->name, column->type->getColumnNameWithoutAlias()); + structure.emplace_back(column->name, column->type->formatWithPossiblyHidingSensitiveData(0, true, true)); else throw Exception(ErrorCodes::BAD_ARGUMENTS, "Error while parsing table structure: expected column definition, got {}", child->formatForErrorMessage()); } @@ -99,7 +101,7 @@ void BaseExternalTable::parseStructureFromTypesField(const std::string & argumen throw Exception(ErrorCodes::BAD_ARGUMENTS, "Error while parsing table structure: {}", error); for (size_t i = 0; i < type_list_raw->children.size(); ++i) - structure.emplace_back("_" + toString(i + 1), type_list_raw->children[i]->getColumnNameWithoutAlias()); + structure.emplace_back("_" + toString(i + 1), type_list_raw->children[i]->formatWithPossiblyHidingSensitiveData(0, true, true)); } void BaseExternalTable::initSampleBlock() diff --git a/tests/queries/0_stateless/02935_external_table_enum_type.reference b/tests/queries/0_stateless/02935_external_table_enum_type.reference index ed152e608b9..d13bb1e990c 100644 --- a/tests/queries/0_stateless/02935_external_table_enum_type.reference +++ b/tests/queries/0_stateless/02935_external_table_enum_type.reference @@ -2,3 +2,9 @@ foo 1 bar 2 foo 1 bar 2 +true +true +true +[1] +('foo',1) +(1,1) diff --git a/tests/queries/0_stateless/02935_external_table_enum_type.sh b/tests/queries/0_stateless/02935_external_table_enum_type.sh index 61d5508e9f9..bbbf79fce4a 100755 --- a/tests/queries/0_stateless/02935_external_table_enum_type.sh +++ b/tests/queries/0_stateless/02935_external_table_enum_type.sh @@ -10,3 +10,13 @@ curl -s "${http_url}temp_structure=x+Enum8('foo'%3D1,'bar'%3D2),y+Int" -F "$(pri curl -s "${http_url}temp_types=Enum8('foo'%3D1,'bar'%3D2),Int" -F "$(printf 'temp='"bar"'\t2');filename=data1" -F "query=SELECT * FROM temp" echo -ne 'foo\t1' | ${CLICKHOUSE_CLIENT} --query="select * from tmp" --external --file=- --name=tmp --structure="x Enum8('foo'=1,'bar'=2),y Int" echo -ne 'bar\t2' | ${CLICKHOUSE_CLIENT} --query="select * from tmp" --external --file=- --name=tmp --types="Enum8('foo'=1,'bar'=2),Int" + +# https://github.com/ClickHouse/ClickHouse/issues/62108 +echo -ne 'true' | ${CLICKHOUSE_CLIENT} --query="select * from tmp" --external --file=- --name=tmp --structure="x Bool" +echo -ne 'true' | ${CLICKHOUSE_CLIENT} --query="select * from tmp" --external --file=- --name=tmp --types="Bool" + +# Test for some complex and custome types +echo -ne 'true' | ${CLICKHOUSE_CLIENT} --query="select * from tmp" --external --file=- --name=tmp --structure="x Nullable(FixedString(4))" +echo -ne '[1]' | ${CLICKHOUSE_CLIENT} --query="select * from tmp" --external --file=- --name=tmp --structure="x Array(UInt8)" +echo -ne '('"'"'foo'"'"',1)' | ${CLICKHOUSE_CLIENT} --query="select * from tmp" --external --file=- --name=tmp --structure="x Tuple(String, UInt8)" +echo -ne '(1,1)' | ${CLICKHOUSE_CLIENT} --query="select * from tmp" --external --file=- --name=tmp --structure="x Point" From 47e865334f4ee0fba5d14fb8938fbcf653cd9c32 Mon Sep 17 00:00:00 2001 From: Alexander Gololobov Date: Sun, 31 Mar 2024 13:19:22 +0200 Subject: [PATCH 093/150] Fix test with DatabaseReplicated --- .../02908_many_requests_to_system_replicas.reference | 8 ++++---- .../0_stateless/02908_many_requests_to_system_replicas.sh | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/tests/queries/0_stateless/02908_many_requests_to_system_replicas.reference b/tests/queries/0_stateless/02908_many_requests_to_system_replicas.reference index 0a4008cc35e..f376bb87044 100644 --- a/tests/queries/0_stateless/02908_many_requests_to_system_replicas.reference +++ b/tests/queries/0_stateless/02908_many_requests_to_system_replicas.reference @@ -1,8 +1,8 @@ Creating 300 tables -900 2700 2700 -900 2700 2700 -900 2700 2700 -900 2700 2700 +900 1 1 +900 1 1 +900 1 1 +900 1 1 Making 200 requests to system.replicas Query system.replicas while waiting for other concurrent requests to finish 0 diff --git a/tests/queries/0_stateless/02908_many_requests_to_system_replicas.sh b/tests/queries/0_stateless/02908_many_requests_to_system_replicas.sh index 18c55159281..144831a2cdc 100755 --- a/tests/queries/0_stateless/02908_many_requests_to_system_replicas.sh +++ b/tests/queries/0_stateless/02908_many_requests_to_system_replicas.sh @@ -46,10 +46,10 @@ wait; # Check results with different max_block_size -$CLICKHOUSE_CLIENT -q 'SELECT count(), sum(total_replicas), sum(active_replicas) FROM system.replicas WHERE database=currentDatabase()' -$CLICKHOUSE_CLIENT -q 'SELECT count(), sum(total_replicas), sum(active_replicas) FROM system.replicas WHERE database=currentDatabase() SETTINGS max_block_size=1' -$CLICKHOUSE_CLIENT -q 'SELECT count(), sum(total_replicas), sum(active_replicas) FROM system.replicas WHERE database=currentDatabase() SETTINGS max_block_size=77' -$CLICKHOUSE_CLIENT -q 'SELECT count(), sum(total_replicas), sum(active_replicas) FROM system.replicas WHERE database=currentDatabase() SETTINGS max_block_size=11111' +$CLICKHOUSE_CLIENT -q 'SELECT count(), sum(total_replicas) >= 2700, sum(active_replicas) >= 2700 FROM system.replicas WHERE database=currentDatabase()' +$CLICKHOUSE_CLIENT -q 'SELECT count(), sum(total_replicas) >= 2700, sum(active_replicas) >= 2700 FROM system.replicas WHERE database=currentDatabase() SETTINGS max_block_size=1' +$CLICKHOUSE_CLIENT -q 'SELECT count(), sum(total_replicas) >= 2700, sum(active_replicas) >= 2700 FROM system.replicas WHERE database=currentDatabase() SETTINGS max_block_size=77' +$CLICKHOUSE_CLIENT -q 'SELECT count(), sum(total_replicas) >= 2700, sum(active_replicas) >= 2700 FROM system.replicas WHERE database=currentDatabase() SETTINGS max_block_size=11111' echo "Making $CONCURRENCY requests to system.replicas" From 9a9f0161bb94c2800ecf4247a32b3ca2e0c8afa2 Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Mon, 1 Apr 2024 01:19:00 +0200 Subject: [PATCH 094/150] add test --- src/IO/ReadWriteBufferFromHTTP.cpp | 8 +-- .../http_headers_echo_server.py | 37 ++++++++-- .../redirect_server.py | 46 ++++++++++++ .../test_storage_url_http_headers/test.py | 71 ++++++++++++++++--- 4 files changed, 141 insertions(+), 21 deletions(-) create mode 100644 tests/integration/test_storage_url_http_headers/redirect_server.py diff --git a/src/IO/ReadWriteBufferFromHTTP.cpp b/src/IO/ReadWriteBufferFromHTTP.cpp index 63a91026701..c99b08d0c9d 100644 --- a/src/IO/ReadWriteBufferFromHTTP.cpp +++ b/src/IO/ReadWriteBufferFromHTTP.cpp @@ -346,10 +346,10 @@ void ReadWriteBufferFromHTTP::doWithRetries(std::function && callable, { if (!mute_logging) LOG_ERROR(log, - "Failed to make request to `{}`{}. " + "Failed to make request to '{}'{}. " "Error: '{}'. " "Failed at try {}/{}.", - initial_uri.toString(), current_uri == initial_uri ? String() : fmt::format(" redirect to `{}`", current_uri.toString()), + initial_uri.toString(), current_uri == initial_uri ? String() : fmt::format(" redirect to '{}'", current_uri.toString()), error_message, attempt, read_settings.http_max_tries); @@ -362,11 +362,11 @@ void ReadWriteBufferFromHTTP::doWithRetries(std::function && callable, if (!mute_logging) LOG_INFO(log, - "Failed to make request to `{}`{}. " + "Failed to make request to '{}'{}. " "Error: {}. " "Failed at try {}/{}. " "Will retry with current backoff wait is {}/{} ms.", - initial_uri.toString(), current_uri == initial_uri ? String() : fmt::format(" redirect to `{}`", current_uri.toString()), + initial_uri.toString(), current_uri == initial_uri ? String() : fmt::format(" redirect to '{}'", current_uri.toString()), error_message, attempt + 1, read_settings.http_max_tries, milliseconds_to_wait, read_settings.http_retry_max_backoff_ms); diff --git a/tests/integration/test_storage_url_http_headers/http_headers_echo_server.py b/tests/integration/test_storage_url_http_headers/http_headers_echo_server.py index b1a3f6777b1..8fb2f8d0e2d 100644 --- a/tests/integration/test_storage_url_http_headers/http_headers_echo_server.py +++ b/tests/integration/test_storage_url_http_headers/http_headers_echo_server.py @@ -1,6 +1,8 @@ import http.server +import sys +import json -RESULT_PATH = "/headers.txt" +RESULT_PATH = "/echo_server_headers.txt" class RequestHandler(http.server.BaseHTTPRequestHandler): @@ -8,6 +10,28 @@ class RequestHandler(http.server.BaseHTTPRequestHandler): with open(RESULT_PATH, "w") as f: f.write(self.headers.as_string()) + def do_GET(self): + if self.path == "/": + self.send_response(200) + self.send_header("Content-Type", "text/plain") + self.end_headers() + self.wfile.write(b'{"status":"ok"}') + if self.path == "/sample-data": + self.send_response(200) + self.send_header("Content-Type", "text/plain") + self.end_headers() + sample_data = [ + { + "title": "ClickHouse Newsletter June 2022: Materialized, but still real-time", + "theme": "Newsletter", + }, + { + "title": "ClickHouse Over the Years with Benchmarks", + "theme": "ClickHouse Journey", + } + ] + self.wfile.write(bytes(json.dumps(sample_data), "UTF-8")) + def do_POST(self): self.rfile.read1() self.send_response(200) @@ -16,15 +40,16 @@ class RequestHandler(http.server.BaseHTTPRequestHandler): if __name__ == "__main__": - with open(RESULT_PATH, "w") as f: - f.write("") - httpd = http.server.HTTPServer( + host = sys.argv[1] + port = int(sys.argv[2]) + httpd = http.server.ThreadingHTTPServer( ( - "localhost", - 8000, + host, + port, ), RequestHandler, ) + try: httpd.serve_forever() finally: diff --git a/tests/integration/test_storage_url_http_headers/redirect_server.py b/tests/integration/test_storage_url_http_headers/redirect_server.py new file mode 100644 index 00000000000..b1d92d0cd4e --- /dev/null +++ b/tests/integration/test_storage_url_http_headers/redirect_server.py @@ -0,0 +1,46 @@ +import http.server +import sys + +REDIRECT_HOST = "" +REDIRECT_PORT = 0 + +RESULT_PATH = "/redirect_server_headers.txt" + + +class RequestHandler(http.server.BaseHTTPRequestHandler): + def log_message(self, *args): + with open(RESULT_PATH, "w") as f: + f.write(self.headers.as_string()) + + def do_GET(self): + if self.path == "/": + self.send_response(200) + self.send_header("Content-Type", "text/plain") + self.end_headers() + self.wfile.write(b'{"status":"ok"}') + else: + global REDIRECT_HOST, REDIRECT_PORT + self.send_response(302) + target_location = f"http://{REDIRECT_HOST}:{REDIRECT_PORT}{self.path}" + self.send_header("Location", target_location) + self.end_headers() + self.wfile.write(b'{"status":"redirected"}') + + +if __name__ == "__main__": + host = sys.argv[1] + port = int(sys.argv[2]) + REDIRECT_HOST = sys.argv[3] + REDIRECT_PORT = int(sys.argv[4]) + httpd = http.server.ThreadingHTTPServer( + ( + host, + port, + ), + RequestHandler, + ) + + try: + httpd.serve_forever() + finally: + httpd.server_close() diff --git a/tests/integration/test_storage_url_http_headers/test.py b/tests/integration/test_storage_url_http_headers/test.py index 3bbf5ec81c9..8fc08ec5c9d 100644 --- a/tests/integration/test_storage_url_http_headers/test.py +++ b/tests/integration/test_storage_url_http_headers/test.py @@ -1,8 +1,7 @@ import pytest import os -import time - from . import http_headers_echo_server +from . import redirect_server from helpers.cluster import ClickHouseCluster @@ -10,31 +9,37 @@ cluster = ClickHouseCluster(__file__) server = cluster.add_instance("node") -def run_echo_server(): +def run_server(container_id, file_name, hostname, port, *args): script_dir = os.path.dirname(os.path.realpath(__file__)) - server.copy_file_to_container( - os.path.join(script_dir, "http_headers_echo_server.py"), - "/http_headers_echo_server.py", + cluster.copy_file_to_container( + container_id, + os.path.join(script_dir, file_name), + f"/{file_name}", ) - server.exec_in_container( + cmd_args = [hostname, port] + list(args) + cmd_args_val = " ".join([str(x) for x in cmd_args]) + + cluster.exec_in_container( + container_id, [ "bash", "-c", - "python3 /http_headers_echo_server.py > /http_headers_echo.server.log 2>&1", + f"python3 /{file_name} {cmd_args_val} > {file_name}.log 2>&1", ], detach=True, user="root", ) for _ in range(0, 10): - ping_response = server.exec_in_container( - ["curl", "-s", f"http://localhost:8000/"], + ping_response = cluster.exec_in_container( + container_id, + ["curl", "-s", f"http://{hostname}:{port}/"], nothrow=True, ) - if "html" in ping_response: + if '{"status":"ok"}' in ping_response: return print(ping_response) @@ -42,11 +47,23 @@ def run_echo_server(): raise Exception("Echo server is not responding") +def run_echo_server(): + container_id = cluster.get_container_id("node") + run_server(container_id, "http_headers_echo_server.py", "localhost", 8000) + + +def run_redirect_server(): + container_id = cluster.get_container_id("node") + run_server(container_id, "redirect_server.py", "localhost", 8080, "localhost", 8000) + + @pytest.fixture(scope="module") def started_cluster(): try: cluster.start() + run_redirect_server() run_echo_server() + yield cluster finally: cluster.shutdown() @@ -64,3 +81,35 @@ def test_storage_url_http_headers(started_cluster): print(result) assert "X-My-Custom-Header: test-header" in result + + +def test_storage_url_redirected_headers(started_cluster): + query = """ + SELECT + title::String as title, + theme::String as theme + FROM + url('http://127.0.0.1:8080/sample-data', 'JSONEachRow', 'title String, theme String') + SETTINGS http_max_tries=2, max_http_get_redirects=2 + """ + + result = server.query(query) + assert 2 == len(result.strip().split("\n")) + + result_redirect = server.exec_in_container( + ["cat", redirect_server.RESULT_PATH], user="root" + ) + + print(result_redirect) + + assert "Host: 127.0.0.1" in result_redirect + assert "Host: localhost" not in result_redirect + + result = server.exec_in_container( + ["cat", http_headers_echo_server.RESULT_PATH], user="root" + ) + + print(result) + + assert "Host: 127.0.0.1" not in result + assert "Host: localhost" in result From 267ebba3c0a654da36aac5146012dd4b162a34bc Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Sun, 31 Mar 2024 23:43:28 +0000 Subject: [PATCH 095/150] Automatic style fix --- .../test_storage_url_http_headers/http_headers_echo_server.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/integration/test_storage_url_http_headers/http_headers_echo_server.py b/tests/integration/test_storage_url_http_headers/http_headers_echo_server.py index 8fb2f8d0e2d..3c62112a7d3 100644 --- a/tests/integration/test_storage_url_http_headers/http_headers_echo_server.py +++ b/tests/integration/test_storage_url_http_headers/http_headers_echo_server.py @@ -28,8 +28,8 @@ class RequestHandler(http.server.BaseHTTPRequestHandler): { "title": "ClickHouse Over the Years with Benchmarks", "theme": "ClickHouse Journey", - } - ] + }, + ] self.wfile.write(bytes(json.dumps(sample_data), "UTF-8")) def do_POST(self): From b2e764f63326e22d3cb306bd0009108b1a955934 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Mon, 1 Apr 2024 13:51:56 +0200 Subject: [PATCH 096/150] Revert "Merge pull request #61564 from liuneng1994/optimize_in_single_value" This reverts commit 20a45b40735e4b033bdcd0b5ea6fe4585e4f840c, reversing changes made to a642f4d3ec3f28edb2def71bbf7ac2bf211b5bac. --- src/Analyzer/Passes/ConvertInToEqualPass.cpp | 75 ------- src/Analyzer/Passes/ConvertInToEqualPass.h | 27 --- src/Analyzer/QueryTreePassManager.cpp | 2 - src/Functions/CMakeLists.txt | 2 - src/Functions/equals.cpp | 5 - src/Functions/equals.h | 11 - src/Functions/notEquals.cpp | 5 - src/Functions/notEquals.h | 11 - ..._transform_query_for_external_database.cpp | 6 +- tests/performance/function_in.xml | 28 --- .../03013_optimize_in_to_equal.reference | 189 ------------------ .../03013_optimize_in_to_equal.sql | 32 --- 12 files changed, 2 insertions(+), 391 deletions(-) delete mode 100644 src/Analyzer/Passes/ConvertInToEqualPass.cpp delete mode 100644 src/Analyzer/Passes/ConvertInToEqualPass.h delete mode 100644 src/Functions/equals.h delete mode 100644 src/Functions/notEquals.h delete mode 100644 tests/performance/function_in.xml delete mode 100644 tests/queries/0_stateless/03013_optimize_in_to_equal.reference delete mode 100644 tests/queries/0_stateless/03013_optimize_in_to_equal.sql diff --git a/src/Analyzer/Passes/ConvertInToEqualPass.cpp b/src/Analyzer/Passes/ConvertInToEqualPass.cpp deleted file mode 100644 index b204d2fb922..00000000000 --- a/src/Analyzer/Passes/ConvertInToEqualPass.cpp +++ /dev/null @@ -1,75 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include - -namespace DB -{ - -class ConvertInToEqualPassVisitor : public InDepthQueryTreeVisitorWithContext -{ -public: - using Base = InDepthQueryTreeVisitorWithContext; - using Base::Base; - - void enterImpl(QueryTreeNodePtr & node) - { - static const std::unordered_map MAPPING = { - {"in", "equals"}, - {"notIn", "notEquals"} - }; - auto * func_node = node->as(); - if (!func_node - || !MAPPING.contains(func_node->getFunctionName()) - || func_node->getArguments().getNodes().size() != 2) - return ; - auto args = func_node->getArguments().getNodes(); - auto * column_node = args[0]->as(); - auto * constant_node = args[1]->as(); - if (!column_node || !constant_node) - return ; - // IN multiple values is not supported - if (constant_node->getValue().getType() == Field::Types::Which::Tuple - || constant_node->getValue().getType() == Field::Types::Which::Array) - return ; - // x IN null not equivalent to x = null - if (constant_node->getValue().isNull()) - return ; - auto result_func_name = MAPPING.at(func_node->getFunctionName()); - auto equal = std::make_shared(result_func_name); - auto new_const = std::make_shared(constant_node->getValue(), removeNullable(constant_node->getResultType())); - new_const->getSourceExpression() = constant_node->getSourceExpression(); - QueryTreeNodes arguments{column_node->clone(), new_const}; - equal->getArguments().getNodes() = std::move(arguments); - FunctionOverloadResolverPtr resolver; - bool decimal_check_overflow = getContext()->getSettingsRef().decimal_check_overflow; - if (result_func_name == "equals") - { - resolver = createInternalFunctionEqualOverloadResolver(decimal_check_overflow); - } - else - { - resolver = createInternalFunctionNotEqualOverloadResolver(decimal_check_overflow); - } - try - { - equal->resolveAsFunction(resolver); - } - catch (...) - { - // When function resolver fails, we should not replace the function node - return; - } - node = equal; - } -}; - -void ConvertInToEqualPass::run(QueryTreeNodePtr & query_tree_node, ContextPtr context) -{ - ConvertInToEqualPassVisitor visitor(std::move(context)); - visitor.visit(query_tree_node); -} -} diff --git a/src/Analyzer/Passes/ConvertInToEqualPass.h b/src/Analyzer/Passes/ConvertInToEqualPass.h deleted file mode 100644 index bd4f8607c88..00000000000 --- a/src/Analyzer/Passes/ConvertInToEqualPass.h +++ /dev/null @@ -1,27 +0,0 @@ -#pragma once - -#include - -namespace DB -{ -/** Optimize `in` to `equals` if possible. - * 1. convert in single value to equal - * Example: SELECT * from test where x IN (1); - * Result: SELECT * from test where x = 1; - * - * 2. convert not in single value to notEqual - * Example: SELECT * from test where x NOT IN (1); - * Result: SELECT * from test where x != 1; - * - * If value is null or tuple, do not convert. - */ -class ConvertInToEqualPass final : public IQueryTreePass -{ -public: - String getName() override { return "ConvertInToEqualPass"; } - - String getDescription() override { return "Convert in to equal"; } - - void run(QueryTreeNodePtr & query_tree_node, ContextPtr context) override; -}; -} diff --git a/src/Analyzer/QueryTreePassManager.cpp b/src/Analyzer/QueryTreePassManager.cpp index 14eb179680c..9c07884a464 100644 --- a/src/Analyzer/QueryTreePassManager.cpp +++ b/src/Analyzer/QueryTreePassManager.cpp @@ -28,7 +28,6 @@ #include #include #include -#include #include #include #include @@ -264,7 +263,6 @@ void addQueryTreePasses(QueryTreePassManager & manager, bool only_analyze) manager.addPass(std::make_unique()); manager.addPass(std::make_unique()); manager.addPass(std::make_unique()); - manager.addPass(std::make_unique()); /// should before AggregateFunctionsArithmericOperationsPass manager.addPass(std::make_unique()); diff --git a/src/Functions/CMakeLists.txt b/src/Functions/CMakeLists.txt index d5eb12f3dee..733ae25274e 100644 --- a/src/Functions/CMakeLists.txt +++ b/src/Functions/CMakeLists.txt @@ -14,8 +14,6 @@ extract_into_parent_list(clickhouse_functions_sources dbms_sources multiMatchAny.cpp checkHyperscanRegexp.cpp array/has.cpp - equals.cpp - notEquals.cpp CastOverloadResolver.cpp ) extract_into_parent_list(clickhouse_functions_headers dbms_headers diff --git a/src/Functions/equals.cpp b/src/Functions/equals.cpp index 512abaa6fc7..5c59daf0537 100644 --- a/src/Functions/equals.cpp +++ b/src/Functions/equals.cpp @@ -13,11 +13,6 @@ REGISTER_FUNCTION(Equals) factory.registerFunction(); } -FunctionOverloadResolverPtr createInternalFunctionEqualOverloadResolver(bool decimal_check_overflow) -{ - return std::make_unique(std::make_shared(decimal_check_overflow)); -} - template <> ColumnPtr FunctionComparison::executeTupleImpl( const ColumnsWithTypeAndName & x, const ColumnsWithTypeAndName & y, size_t tuple_size, size_t input_rows_count) const diff --git a/src/Functions/equals.h b/src/Functions/equals.h deleted file mode 100644 index 855cba4db3e..00000000000 --- a/src/Functions/equals.h +++ /dev/null @@ -1,11 +0,0 @@ -#pragma once -#include - -namespace DB -{ - -class IFunctionOverloadResolver; -using FunctionOverloadResolverPtr = std::shared_ptr; - -FunctionOverloadResolverPtr createInternalFunctionEqualOverloadResolver(bool decimal_check_overflow); -} diff --git a/src/Functions/notEquals.cpp b/src/Functions/notEquals.cpp index 744a0997d95..3a63db46711 100644 --- a/src/Functions/notEquals.cpp +++ b/src/Functions/notEquals.cpp @@ -12,11 +12,6 @@ REGISTER_FUNCTION(NotEquals) factory.registerFunction(); } -FunctionOverloadResolverPtr createInternalFunctionNotEqualOverloadResolver(bool decimal_check_overflow) -{ - return std::make_unique(std::make_shared(decimal_check_overflow)); -} - template <> ColumnPtr FunctionComparison::executeTupleImpl( const ColumnsWithTypeAndName & x, const ColumnsWithTypeAndName & y, size_t tuple_size, size_t input_rows_count) const diff --git a/src/Functions/notEquals.h b/src/Functions/notEquals.h deleted file mode 100644 index 961889d68d7..00000000000 --- a/src/Functions/notEquals.h +++ /dev/null @@ -1,11 +0,0 @@ -#pragma once -#include - -namespace DB -{ - -class IFunctionOverloadResolver; -using FunctionOverloadResolverPtr = std::shared_ptr; - -FunctionOverloadResolverPtr createInternalFunctionNotEqualOverloadResolver(bool decimal_check_overflow); -} diff --git a/src/Storages/tests/gtest_transform_query_for_external_database.cpp b/src/Storages/tests/gtest_transform_query_for_external_database.cpp index 6490498d717..7e2d393c3d1 100644 --- a/src/Storages/tests/gtest_transform_query_for_external_database.cpp +++ b/src/Storages/tests/gtest_transform_query_for_external_database.cpp @@ -306,8 +306,7 @@ TEST(TransformQueryForExternalDatabase, Aliases) check(state, 1, {"field"}, "SELECT field AS value, field AS display FROM table WHERE field NOT IN ('') AND display LIKE '%test%'", - R"(SELECT "field" FROM "test"."table" WHERE ("field" NOT IN ('')) AND ("field" LIKE '%test%'))", - R"(SELECT "field" FROM "test"."table" WHERE ("field" != '') AND ("field" LIKE '%test%'))"); + R"(SELECT "field" FROM "test"."table" WHERE ("field" NOT IN ('')) AND ("field" LIKE '%test%'))"); } TEST(TransformQueryForExternalDatabase, ForeignColumnInWhere) @@ -409,6 +408,5 @@ TEST(TransformQueryForExternalDatabase, Analyzer) check(state, 1, {"column", "apply_id", "apply_type", "apply_status", "create_time", "field", "value", "a", "b", "foo"}, "SELECT * FROM table WHERE (column) IN (1)", - R"(SELECT "column", "apply_id", "apply_type", "apply_status", "create_time", "field", "value", "a", "b", "foo" FROM "test"."table" WHERE "column" IN (1))", - R"(SELECT "column", "apply_id", "apply_type", "apply_status", "create_time", "field", "value", "a", "b", "foo" FROM "test"."table" WHERE "column" = 1)"); + R"(SELECT "column", "apply_id", "apply_type", "apply_status", "create_time", "field", "value", "a", "b", "foo" FROM "test"."table" WHERE "column" IN (1))"); } diff --git a/tests/performance/function_in.xml b/tests/performance/function_in.xml deleted file mode 100644 index af4f8737ba7..00000000000 --- a/tests/performance/function_in.xml +++ /dev/null @@ -1,28 +0,0 @@ - - - 8 - 1 - - - - CREATE TABLE t_nullable - ( - key_string1 Nullable(String), - key_string2 Nullable(String), - key_string3 Nullable(String), - key_int64_1 Nullable(Int64), - key_int64_2 Nullable(Int64), - key_int64_3 Nullable(Int64), - key_int64_4 Nullable(Int64), - key_int64_5 Nullable(Int64), - m1 Int64, - m2 Int64 - ) - ENGINE = Memory - - insert into t_nullable select ['aaaaaa','bbaaaa','ccaaaa','ddaaaa'][number % 101 + 1], ['aa','bb','cc','dd'][number % 100 + 1], ['aa','bb','cc','dd'][number % 102 + 1], number%10+1, number%10+2, number%10+3, number%10+4,number%10+5, number%6000+1, number%5000+2 from numbers_mt(30000000) - select * from t_nullable where key_string1 in ('aaaaaa') format Null SETTINGS allow_experimental_analyzer=1 - select * from t_nullable where key_string2 in ('3') format Null SETTINGS allow_experimental_analyzer=1 - drop table if exists t_nullable - - diff --git a/tests/queries/0_stateless/03013_optimize_in_to_equal.reference b/tests/queries/0_stateless/03013_optimize_in_to_equal.reference deleted file mode 100644 index 7d1118b7730..00000000000 --- a/tests/queries/0_stateless/03013_optimize_in_to_equal.reference +++ /dev/null @@ -1,189 +0,0 @@ -a 1 -------------------- -0 -0 -0 -------------------- -QUERY id: 0 - PROJECTION COLUMNS - x String - y Int32 - PROJECTION - LIST id: 1, nodes: 2 - COLUMN id: 2, column_name: x, result_type: String, source_id: 3 - COLUMN id: 4, column_name: y, result_type: Int32, source_id: 3 - JOIN TREE - TABLE id: 3, alias: __table1, table_name: default.test - WHERE - FUNCTION id: 5, function_name: equals, function_type: ordinary, result_type: UInt8 - ARGUMENTS - LIST id: 6, nodes: 2 - COLUMN id: 7, column_name: x, result_type: String, source_id: 3 - CONSTANT id: 8, constant_value: \'a\', constant_value_type: String -------------------- -QUERY id: 0 - PROJECTION COLUMNS - x String - y Int32 - PROJECTION - LIST id: 1, nodes: 2 - COLUMN id: 2, column_name: x, result_type: String, source_id: 3 - COLUMN id: 4, column_name: y, result_type: Int32, source_id: 3 - JOIN TREE - TABLE id: 3, alias: __table1, table_name: default.test - WHERE - FUNCTION id: 5, function_name: equals, function_type: ordinary, result_type: UInt8 - ARGUMENTS - LIST id: 6, nodes: 2 - COLUMN id: 7, column_name: x, result_type: String, source_id: 3 - CONSTANT id: 8, constant_value: \'A\', constant_value_type: String - EXPRESSION - FUNCTION id: 9, function_name: upper, function_type: ordinary, result_type: String - ARGUMENTS - LIST id: 10, nodes: 1 - CONSTANT id: 11, constant_value: \'a\', constant_value_type: String -------------------- -QUERY id: 0 - PROJECTION COLUMNS - x String - y Int32 - PROJECTION - LIST id: 1, nodes: 2 - COLUMN id: 2, column_name: x, result_type: String, source_id: 3 - COLUMN id: 4, column_name: y, result_type: Int32, source_id: 3 - JOIN TREE - TABLE id: 3, alias: __table1, table_name: default.test - WHERE - FUNCTION id: 5, function_name: in, function_type: ordinary, result_type: UInt8 - ARGUMENTS - LIST id: 6, nodes: 2 - COLUMN id: 7, column_name: x, result_type: String, source_id: 3 - CONSTANT id: 8, constant_value: Tuple_(\'a\', \'b\'), constant_value_type: Tuple(String, String) -------------------- -QUERY id: 0 - PROJECTION COLUMNS - x String - y Int32 - PROJECTION - LIST id: 1, nodes: 2 - COLUMN id: 2, column_name: x, result_type: String, source_id: 3 - COLUMN id: 4, column_name: y, result_type: Int32, source_id: 3 - JOIN TREE - TABLE id: 3, alias: __table1, table_name: default.test - WHERE - FUNCTION id: 5, function_name: in, function_type: ordinary, result_type: UInt8 - ARGUMENTS - LIST id: 6, nodes: 2 - COLUMN id: 7, column_name: x, result_type: String, source_id: 3 - CONSTANT id: 8, constant_value: Array_[\'a\', \'b\'], constant_value_type: Array(String) -------------------- -b 2 -c 3 -------------------- -QUERY id: 0 - PROJECTION COLUMNS - x String - y Int32 - PROJECTION - LIST id: 1, nodes: 2 - COLUMN id: 2, column_name: x, result_type: String, source_id: 3 - COLUMN id: 4, column_name: y, result_type: Int32, source_id: 3 - JOIN TREE - TABLE id: 3, alias: __table1, table_name: default.test - WHERE - FUNCTION id: 5, function_name: notEquals, function_type: ordinary, result_type: UInt8 - ARGUMENTS - LIST id: 6, nodes: 2 - COLUMN id: 7, column_name: x, result_type: String, source_id: 3 - CONSTANT id: 8, constant_value: \'a\', constant_value_type: String -------------------- -QUERY id: 0 - PROJECTION COLUMNS - x String - y Int32 - PROJECTION - LIST id: 1, nodes: 2 - COLUMN id: 2, column_name: x, result_type: String, source_id: 3 - COLUMN id: 4, column_name: y, result_type: Int32, source_id: 3 - JOIN TREE - TABLE id: 3, alias: __table1, table_name: default.test - WHERE - FUNCTION id: 5, function_name: notEquals, function_type: ordinary, result_type: UInt8 - ARGUMENTS - LIST id: 6, nodes: 2 - COLUMN id: 7, column_name: x, result_type: String, source_id: 3 - CONSTANT id: 8, constant_value: \'A\', constant_value_type: String - EXPRESSION - FUNCTION id: 9, function_name: upper, function_type: ordinary, result_type: String - ARGUMENTS - LIST id: 10, nodes: 1 - CONSTANT id: 11, constant_value: \'a\', constant_value_type: String -------------------- -QUERY id: 0 - PROJECTION COLUMNS - x String - y Int32 - PROJECTION - LIST id: 1, nodes: 2 - COLUMN id: 2, column_name: x, result_type: String, source_id: 3 - COLUMN id: 4, column_name: y, result_type: Int32, source_id: 3 - JOIN TREE - TABLE id: 3, alias: __table1, table_name: default.test - WHERE - FUNCTION id: 5, function_name: notIn, function_type: ordinary, result_type: UInt8 - ARGUMENTS - LIST id: 6, nodes: 2 - COLUMN id: 7, column_name: x, result_type: String, source_id: 3 - CONSTANT id: 8, constant_value: Tuple_(\'a\', \'b\'), constant_value_type: Tuple(String, String) -------------------- -QUERY id: 0 - PROJECTION COLUMNS - x String - y Int32 - PROJECTION - LIST id: 1, nodes: 2 - COLUMN id: 2, column_name: x, result_type: String, source_id: 3 - COLUMN id: 4, column_name: y, result_type: Int32, source_id: 3 - JOIN TREE - TABLE id: 3, alias: __table1, table_name: default.test - WHERE - FUNCTION id: 5, function_name: notIn, function_type: ordinary, result_type: UInt8 - ARGUMENTS - LIST id: 6, nodes: 2 - COLUMN id: 7, column_name: x, result_type: String, source_id: 3 - CONSTANT id: 8, constant_value: Array_[\'a\', \'b\'], constant_value_type: Array(String) -------------------- -QUERY id: 0 - PROJECTION COLUMNS - x String - y Int32 - PROJECTION - LIST id: 1, nodes: 2 - COLUMN id: 2, column_name: x, result_type: String, source_id: 3 - COLUMN id: 4, column_name: y, result_type: Int32, source_id: 3 - JOIN TREE - TABLE id: 3, alias: __table1, table_name: default.test - WHERE - FUNCTION id: 5, function_name: notIn, function_type: ordinary, result_type: UInt8 - ARGUMENTS - LIST id: 6, nodes: 2 - COLUMN id: 7, column_name: x, result_type: String, source_id: 3 - CONSTANT id: 8, constant_value: NULL, constant_value_type: Nullable(Nothing) -------------------- -QUERY id: 0 - PROJECTION COLUMNS - x String - y Int32 - PROJECTION - LIST id: 1, nodes: 2 - COLUMN id: 2, column_name: x, result_type: String, source_id: 3 - COLUMN id: 4, column_name: y, result_type: Int32, source_id: 3 - JOIN TREE - TABLE id: 3, alias: __table1, table_name: default.test - WHERE - FUNCTION id: 5, function_name: in, function_type: ordinary, result_type: UInt8 - ARGUMENTS - LIST id: 6, nodes: 2 - COLUMN id: 7, column_name: x, result_type: String, source_id: 3 - CONSTANT id: 8, constant_value: NULL, constant_value_type: Nullable(Nothing) -------------------- diff --git a/tests/queries/0_stateless/03013_optimize_in_to_equal.sql b/tests/queries/0_stateless/03013_optimize_in_to_equal.sql deleted file mode 100644 index e0eaa84cb8d..00000000000 --- a/tests/queries/0_stateless/03013_optimize_in_to_equal.sql +++ /dev/null @@ -1,32 +0,0 @@ -DROP TABLE IF EXISTS test; -CREATE TABLE test (x String, y Int32) ENGINE = MergeTree() ORDER BY x; -SET allow_experimental_analyzer = 1; -INSERT INTO test VALUES ('a', 1), ('b', 2), ('c', 3); -select * from test where x in ('a'); -select '-------------------'; -select x in Null from test; -select '-------------------'; -explain query tree select * from test where x in ('a'); -select '-------------------'; -explain query tree select * from test where x in (upper('a')); -select '-------------------'; -explain query tree select * from test where x in ('a','b'); -select '-------------------'; -explain query tree select * from test where x in ['a','b']; -select '-------------------'; -select * from test where x not in ('a'); -select '-------------------'; -explain query tree select * from test where x not in ('a'); -select '-------------------'; -explain query tree select * from test where x not in (upper('a')); -select '-------------------'; -explain query tree select * from test where x not in ('a','b'); -select '-------------------'; -explain query tree select * from test where x not in ['a','b']; -select '-------------------'; -explain query tree select * from test where x not in (NULL); -select '-------------------'; -explain query tree select * from test where x in (NULL); -select '-------------------'; ---- fuzzed -SELECT number FROM numbers(2) WHERE arrayExists(_ -> (_ IN toNullable(4294967290)), [number]); From a7f48cf936a8f50388ef9a2ca49827246499e62c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Mon, 1 Apr 2024 14:12:36 +0200 Subject: [PATCH 097/150] Fix upgrade check --- src/Core/SettingsChangesHistory.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/Core/SettingsChangesHistory.h b/src/Core/SettingsChangesHistory.h index 170836cb980..768b6aa6cbd 100644 --- a/src/Core/SettingsChangesHistory.h +++ b/src/Core/SettingsChangesHistory.h @@ -85,8 +85,9 @@ namespace SettingsChangesHistory /// It's used to implement `compatibility` setting (see https://github.com/ClickHouse/ClickHouse/issues/35972) static std::map settings_changes_history = { + {"24.4", {{"input_format_json_throw_on_bad_escape_sequence", true, true, "Allow to save JSON strings with bad escape sequences"}, + }}, {"24.3", {{"s3_connect_timeout_ms", 1000, 1000, "Introduce new dedicated setting for s3 connection timeout"}, - {"input_format_json_throw_on_bad_escape_sequence", true, true, "Allow to save JSON strings with bad escape sequences"}, {"allow_experimental_shared_merge_tree", false, true, "The setting is obsolete"}, {"use_page_cache_for_disks_without_file_cache", false, false, "Added userspace page cache"}, {"read_from_page_cache_if_exists_otherwise_bypass_cache", false, false, "Added userspace page cache"}, From 5e9436611aa0358629e50cf6ef8749936149978a Mon Sep 17 00:00:00 2001 From: Arthur Passos Date: Mon, 1 Apr 2024 10:21:23 -0300 Subject: [PATCH 098/150] fix incorrect ci err message --- tests/clickhouse-test | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/clickhouse-test b/tests/clickhouse-test index 10851d23481..0661380e061 100755 --- a/tests/clickhouse-test +++ b/tests/clickhouse-test @@ -69,6 +69,7 @@ TEST_FILE_EXTENSIONS = [".sql", ".sql.j2", ".sh", ".py", ".expect"] VERSION_PATTERN = r"^((\d+\.)?(\d+\.)?(\d+\.)?\d+)$" +TEST_MAX_RUN_TIME_IN_SECONDS = 120 class SharedEngineReplacer: ENGINES_NON_REPLICATED_REGEXP = r"[ =]((Collapsing|VersionedCollapsing|Summing|Replacing|Aggregating|)MergeTree\(?\)?)" @@ -682,7 +683,7 @@ class FailureReason(enum.Enum): STDERR = "having stderror: " EXCEPTION = "having exception in stdout: " RESULT_DIFF = "result differs with reference: " - TOO_LONG = "Test runs too long (> 60s). Make it faster." + TOO_LONG = f"Test runs too long (> {TEST_MAX_RUN_TIME_IN_SECONDS}s). Make it faster." INTERNAL_QUERY_FAIL = "Internal query (CREATE/DROP DATABASE) failed:" # SKIPPED reasons @@ -1421,7 +1422,7 @@ class TestCase: if ( self.testcase_args.test_runs > 1 - and total_time > 120 + and total_time > TEST_MAX_RUN_TIME_IN_SECONDS and "long" not in self.tags ): if debug_log: From fe8d4b5dfc659682eeb70b285809377eb71f4719 Mon Sep 17 00:00:00 2001 From: Han Fei Date: Mon, 1 Apr 2024 17:06:54 +0200 Subject: [PATCH 099/150] refine --- .../functions/string-search-functions.md | 8 +-- .../functions/string-search-functions.md | 53 ++++++++++--------- 2 files changed, 32 insertions(+), 29 deletions(-) diff --git a/docs/en/sql-reference/functions/string-search-functions.md b/docs/en/sql-reference/functions/string-search-functions.md index 1b03f220db2..53a6e1de4a4 100644 --- a/docs/en/sql-reference/functions/string-search-functions.md +++ b/docs/en/sql-reference/functions/string-search-functions.md @@ -74,6 +74,8 @@ SELECT position('Hello, world!', 'o', 7) ``` +Result: + ``` text ┌─position('Hello, world!', 'o', 1)─┬─position('Hello, world!', 'o', 7)─┐ │ 5 │ 9 │ @@ -491,7 +493,7 @@ ngramDistance(haystack, needle) ## ngramSearch -Like `ngramDistance` but calculates the non-symmetric difference between a `needle` string and a `haystack` string, i.e. the number of n-grams from needle minus the common number of n-grams normalized by the number of `needle` n-grams. Returns a Float32 between 0 and 1. The bigger the result is, the more likely `needle` is in the `haystack`. This function is useful for fuzzy string search. Also see function `soundex`. +Like `ngramDistance` but calculates the non-symmetric difference between a `needle` string and a `haystack` string, i.e. the number of n-grams from `needle` minus the common number of n-grams normalized by the number of `needle` n-grams. Returns a Float32 between 0 and 1. The bigger the result is, the more likely `needle` is in the `haystack`. This function is useful for fuzzy string search. Also see function `soundex`. Functions `ngramSearchCaseInsensitive, ngramSearchUTF8, ngramSearchCaseInsensitiveUTF8` provide case-insensitive and/or UTF-8 variants of this function. @@ -610,7 +612,7 @@ Like `countMatches(haystack, pattern)` but matching ignores the case. ## regexpExtract -Extracts the first string in haystack that matches the regexp pattern and corresponds to the regex group index. +Extracts the first string in `haystack` that matches the regexp pattern and corresponds to the regex group index. **Syntax** @@ -652,7 +654,7 @@ Result: ## hasSubsequence -Returns 1 if needle is a subsequence of haystack, or 0 otherwise. +Returns 1 if `needle` is a subsequence of `haystack`, or 0 otherwise. A subsequence of a string is a sequence that can be derived from the given string by deleting zero or more elements without changing the order of the remaining elements. diff --git a/docs/zh/sql-reference/functions/string-search-functions.md b/docs/zh/sql-reference/functions/string-search-functions.md index 14c40fa1243..3d88704a297 100644 --- a/docs/zh/sql-reference/functions/string-search-functions.md +++ b/docs/zh/sql-reference/functions/string-search-functions.md @@ -9,7 +9,7 @@ slug: /zh/sql-reference/functions/string-search-functions 例如。英语中大写的`i`是`I`,而在土耳其语中则是`İ`, 对于英语以外的语言,结果可能会不符合预期。 本节中的函数还假设搜索字符串和被搜索字符串是单字节编码文本(例如ASCII)。如果违反此假设,不会抛出异常且结果为undefined。 -UTF-8 编码字符串的搜索通常由单独的函数变体提供。同样,如果使用 UTF-8 函数变体但输入字符串不是 UTF-8 编码文本,不会抛出异常且结果为undefined。 +UTF-8 编码字符串的搜索通常由单独的函数变体提供。同样,如果使用 UTF-8 函数变体但输入字符串不是 UTF-8 编码文本,不会抛出异常且结果为 undefined。 需要注意,函数不会执行自动 Unicode 规范化,您可以使用[normalizeUTF8*()](https://clickhouse.com/docs/zh/sql-reference/functions/string-functions/) 函数来执行此操作。 在[字符串函数](string-functions.md) 和 [字符串替换函数](string-replace-functions.md) 会分别说明. @@ -29,17 +29,17 @@ position(haystack, needle[, start_pos]) **参数** - `haystack` — 被检索查询字符串,类型为[String](../../sql-reference/syntax.md#syntax-string-literal). -- `needle` — 子字符串,类型为[String](../../sql-reference/syntax.md#syntax-string-literal). -- `start_pos` – 在字符串`haystack` 中开始检索的位置(从1开始),类型为[UInt](../../sql-reference/data-types/int-uint.md),可选 +- `needle` — 进行查询的子字符串,类型为[String](../../sql-reference/syntax.md#syntax-string-literal). +- `start_pos` – 在字符串`haystack` 中开始检索的位置(从1开始),类型为[UInt](../../sql-reference/data-types/int-uint.md),可选。 **返回值** - 若子字符串存在,返回位置(以字节为单位,从 1 开始)。 -- 如果不存在子字符串,返回0。 +- 如果不存在子字符串,返回 0。 如果子字符串 `needle` 为空,则: - 如果未指定 `start_pos`,返回 `1` -- 如果 `start_pos` 为 0,则返回 `1` +- 如果 `start_pos = 0`,则返回 `1` - 如果 `start_pos >= 1` 且 `start_pos <= length(haystack) + 1`,则返回 `start_pos` - 否则返回 `0` @@ -68,14 +68,16 @@ SELECT position('Hello, world!', 'o', 1), position('Hello, world!', 'o', 7) ``` + 结果: + ``` text ┌─position('Hello, world!', 'o', 1)─┬─position('Hello, world!', 'o', 7)─┐ │ 5 │ 9 │ └───────────────────────────────────┴───────────────────────────────────┘ ``` -示例,语法别名 `needle IN haystack`: +示例,`needle IN haystack`: ```sql SELECT 6 = position('/' IN s) FROM (SELECT 'Hello/World' AS s); @@ -154,11 +156,11 @@ SELECT positionUTF8('Motörhead', 'r'); ## multiSearchAllPositions -类似于 [position](#position)返回多个在字符串 `haystack` 中 `needle` 子字符串的位置的数组(以字节为单位,从 1 开始)。 +类似于 [position](#position) 但是返回多个在字符串 `haystack` 中 `needle` 子字符串的位置的数组(以字节为单位,从 1 开始)。 :::note -所有以 `multiSearch*()` 开头的函数最多支持28 个`needle`. +所有以 `multiSearch*()` 开头的函数仅支持最多 28 个`needle`. ::: **语法** @@ -208,7 +210,7 @@ multiSearchFirstPosition(haystack, [needle1, needle2, …, needleN]) ## multiSearchFirstIndex -在字符串`haystack`中匹配多个`needle`子字符串,从左开始任一匹配的子串,返回其索引 `i` (从1开始),如无法匹配则返回0。 +在字符串`haystack`中匹配最左侧的`needle`子字符串i,返回其索引 `i` (从1开始),如无法匹配则返回0。 函数 `multiSearchFirstIndexCaseInsensitive`, `multiSearchFirstIndexUTF8` 和 `multiSearchFirstIndexCaseInsensitiveUTF8` 提供此函数的不区分大小写以及/或 UTF-8 变体。 @@ -220,7 +222,6 @@ multiSearchFirstIndex(haystack, \[needle1, needle2, …, n ## multiSearchAny {#multisearchany} -Returns 1, if at least one string needlei matches the string `haystack` and 0 otherwise. 至少已有一个子字符串`needle`匹配 `haystack` 时返回1,否则返回 0 。 函数 `multiSearchAnyCaseInsensitive`, `multiSearchAnyUTF8` 和 `multiSearchAnyCaseInsensitiveUTF8` 提供此函数的不区分大小写以及/或 UTF-8 变体。 @@ -237,7 +238,6 @@ multiSearchAny(haystack, [needle1, needle2, …, needleN]) 返回字符串 `haystack` 是否匹配正则表达式 `pattern` ([re2正则语法参考](https://github.com/google/re2/wiki/Syntax) 匹配基于 UTF-8,例如`.` 匹配 Unicode 代码点 `¥`,它使用两个字节以 UTF-8 表示。T正则表达式不得包含空字节。如果 `haystack` 或`pattern`不是有效的 UTF-8,则此行为为undefined。 - 与 re2 的默认行为不同,`.` 会匹配换行符。要禁用此功能,请在模式前面添加`(?-s)`。 如果仅希望搜索子字符串,可以使用函数 [like](#like)或 [position](#position) 来替代,这些函数的性能比此函数更高。 @@ -447,8 +447,8 @@ SELECT extractAllGroupsVertical('abc=111, def=222, ghi=333', '("[^"]+"|\\w+)=("[ 不会自动执行 Unicode 规范化,您可以使用[normalizeUTF8*()](https://clickhouse.com/docs/zh/sql-reference/functions/string-functions/) 函数来执行此操作。 -匹配字面上的 `%`, `_` 和 `/`(这些是 LIKE 元字符),请在其前面加上反斜杠:`\%`, `\_` 和 `\\`。 -如果在反斜杠前使用非 `%`, `_` 或 `\` 字符,则反斜杠将失去其特殊含义(即被解释为字面值)。 +如果需要匹配字符 `%`, `_` 和 `/`(这些是 LIKE 元字符),请在其前面加上反斜杠:`\%`, `\_` 和 `\\`。 +如果在非 `%`, `_` 或 `\` 字符前使用反斜杠,则反斜杠将失去其特殊含义(即被解释为字面值)。 请注意,ClickHouse 要求字符串中使用反斜杠 [也需要被转义](../syntax.md#string), 因此您实际上需要编写 `\\%`、`\\_` 和 `\\\\`。 @@ -483,7 +483,7 @@ like(haystack, pattern) ## ngramDistance -计算字符串`haystack` 和子字符串`needle`的4-gram距离。 为此,它计算两个 4-gram 多重集之间的对称差异,并通过它们的基数之和对其进行标准化。返回0-1之间的Float32。返回值越小,代表字符串越相似. Throws an exception if constant `needle` or `haystack` arguments are more than 32Kb in size. If any of non-constant `haystack` or `needle` arguments is more than 32Kb in size, the distance is always 1. +计算字符串 `haystack` 和子字符串 `needle` 的 4-gram 距离。 为此,它计算两个 4-gram 多重集之间的对称差异,并通过它们的基数之和对其进行标准化。返回 0 到 1 之间的 Float32 浮点数。返回值越小,代表字符串越相似. 如果参数 `needle` or `haystack` 是常数且大小超过 32Kb,则抛出异常。如果参数 `haystack` 或 `needle` 是非常数且大小超过 32Kb ,则返回值恒为 1。 函数 `ngramDistanceCaseInsensitive, ngramDistanceUTF8, ngramDistanceCaseInsensitiveUTF8` 提供此函数的不区分大小写以及/或 UTF-8 变体。 @@ -495,11 +495,12 @@ ngramDistance(haystack, needle) ## ngramSearch -类似于`ngramDistance`,但计算`needle`字符串和`haystack`字符串之间的非对称差异,即来自needle的n-gram数量减去由`needle`数量归一化的n-gram的公共数量n-gram。返回 0 到 1 之间的 Float32。结果越大,’needle’越有可能在’haystack’中。该函数对于模糊字符串搜索很有用。另请参阅函数’soundex’。 +类似于`ngramDistance`,但计算`needle`字符串和 `haystack` 字符串之间的非对称差异,即来自 `needle` 的 n-gram 数量减去由`needle`数量归一化的 n-gram 的公共数量 n-gram。返回 0 到 1 之间的 Float32 浮点数。结果越大,`needle` 越有可能在 `haystack` 中。该函数对于模糊字符串搜索很有用。另请参阅函数 `soundex``。 + 函数 `ngramSearchCaseInsensitive, ngramSearchUTF8, ngramSearchCaseInsensitiveUTF8` 提供此函数的不区分大小写以及/或 UTF-8 变体。 :::note -UTF-8变体使用了3-gram距离。这些并不是完全公平的n-gram距离。我们使用2字节的哈希函数来哈希n-gram,然后计算这些哈希表之间的(非)对称差异——可能会发生冲突。在使用UTF-8大小写不敏感格式时,我们并不使用公平的tolower函数——我们将每个码点字节的第5位(从零开始)和如果字节超过一个的零字节的第一位置零——这对拉丁字母和大部分西里尔字母都有效。 +UTF-8 变体使用了 3-gram 距离。这些并不是完全公平的 n-gram 距离。我们使用 2 字节的哈希函数来哈希 n-gram,然后计算这些哈希表之间的(非)对称差异——可能会发生冲突。在使用 UTF-8 大小写不敏感格式时,我们并不使用公平的 `tolower` 函数——我们将每个码点字节的第 5 位(从零开始)和第零字节的第一个比特位位置为零(如果该串的大小超过一个字节)——这对拉丁字母和大部分西里尔字母都有效。 ::: **语法** @@ -512,7 +513,7 @@ ngramSearch(haystack, needle) 返回字符串 `haystack` 中子字符串 `needle` 出现的次数。 -函数 `countSubstringsCaseInsensitive` and `countSubstringsCaseInsensitiveUTF8` 提供此函数的不区分大小写以及/或 UTF-8 变体。 +函数 `countSubstringsCaseInsensitive` 和 `countSubstringsCaseInsensitiveUTF8` 提供此函数的不区分大小写以及 UTF-8 变体。 **语法** @@ -522,9 +523,9 @@ countSubstrings(haystack, needle[, start_pos]) **参数** -- `haystack` — 被检索查询字符串,类型为[String](../../sql-reference/syntax.md#syntax-string-literal). -- `needle` — 子字符串,类型为[String](../../sql-reference/syntax.md#syntax-string-literal). -- `start_pos` – 在字符串`haystack` 中开始检索的位置(从1开始),类型为[UInt](../../sql-reference/data-types/int-uint.md),可选 +- `haystack` — 被搜索的字符串,类型为[String](../../sql-reference/syntax.md#syntax-string-literal). +- `needle` — 用于搜索的模式子字符串,类型为[String](../../sql-reference/syntax.md#syntax-string-literal). +- `start_pos` – 在字符串`haystack` 中开始检索的位置(从 1 开始),类型为[UInt](../../sql-reference/data-types/int-uint.md),可选。 **返回值** @@ -562,7 +563,7 @@ SELECT countSubstrings('abc___abc', 'abc', 4); ## countMatches -返回正则表达式成功匹配的次数。 +返回正则表达式 `pattern` 在 `haystack` 中成功匹配的次数。 **语法** @@ -573,7 +574,7 @@ countMatches(haystack, pattern) **参数** - `haystack` — 输入的字符串,数据类型为[String](../../sql-reference/data-types/string.md). -- `pattern` — 正则表达式([re2正则语法参考](https://github.com/google/re2/wiki/Syntax) ,必须包含group,每个group用括号括起来。 如果 `pattern` 不包含group则会抛出异常。 数据类型为[String](../../sql-reference/data-types/string.md). +- `pattern` — 正则表达式([re2正则语法参考](https://github.com/google/re2/wiki/Syntax)) 数据类型为[String](../../sql-reference/data-types/string.md). **返回值** @@ -627,11 +628,11 @@ regexpExtract(haystack, pattern[, index]) - `haystack` — 被匹配字符串,类型为[String](../../sql-reference/syntax.md#syntax-string-literal). - `pattern` — 正则表达式,必须是常量。 [String](../../sql-reference/syntax.md#syntax-string-literal). -- `index` – 一个大于等于0的整数,默认为1,它代表要提取哪个正则表达式组。 [UInt or Int](../../sql-reference/data-types/int-uint.md) 可选。 +- `index` – 一个大于等于 0 的整数,默认为 1 ,它代表要提取哪个正则表达式组。 [UInt or Int](../../sql-reference/data-types/int-uint.md) 可选。 **返回值** -`pattern`可以包含多个正则组, `index` 代表要提取哪个正则表达式组。如果 `index` 为0,则返回整个匹配的字符串。 +`pattern`可以包含多个正则组, `index` 代表要提取哪个正则表达式组。如果 `index` 为 0,则返回整个匹配的字符串。 数据类型: `String`. @@ -655,7 +656,7 @@ SELECT ## hasSubsequence -如果`needle`是`haystack`的子序列,返回1,否贼返回0。 +如果`needle`是`haystack`的子序列,返回1,否则返回0。 子序列是从给定字符串中删除零个或多个元素而不改变剩余元素的顺序得到的序列。 **语法** @@ -691,7 +692,7 @@ SELECT hasSubsequence('garbage', 'arg') ; ``` ## hasSubsequenceCaseInsensitive -类似于[hasSubsequence](#hasSubsequence)但是不区分大小写。 +类似于 [hasSubsequence](#hasSubsequence) 但是不区分大小写。 ## hasSubsequenceUTF8 From 51db06992d01917ec5d5aa4aa76489c0a2767c07 Mon Sep 17 00:00:00 2001 From: Han Fei Date: Mon, 1 Apr 2024 17:54:48 +0200 Subject: [PATCH 100/150] refine --- .../sql-reference/functions/string-search-functions.md | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/docs/zh/sql-reference/functions/string-search-functions.md b/docs/zh/sql-reference/functions/string-search-functions.md index 3d88704a297..79b33a4b0bc 100644 --- a/docs/zh/sql-reference/functions/string-search-functions.md +++ b/docs/zh/sql-reference/functions/string-search-functions.md @@ -112,10 +112,9 @@ SELECT ## locate -类似于 [position](#position) but with arguments `haystack` and `locate` switched. +类似于 [position](#position) 但交换了 `haystack` 和 `locate` 参数。 此函数的行为取决于 ClickHouse 版本: -- in versions < v24.3, `locate` was an alias of function `position` and accepted arguments `(haystack, needle[, start_pos])`. - 在 v24.3 以下的版本中,`locate` 是函数`position`的别名,参数为 `(haystack, needle[, start_pos])`。 - 在 v24.3 及以上的版本中,, `locate` 是独立的函数 (以更好地兼容 MySQL) ,参数为 `(needle, haystack[, start_pos])`。 之前的行为 可以在设置中恢复 [function_locate_has_mysql_compatible_argument_order = false](../../operations/settings/settings.md#function-locate-has-mysql-compatible-argument-order); @@ -200,7 +199,7 @@ SELECT multiSearchAllPositions('Hello, World!', ['hello', '!', 'world']); 类似于 `position` , 在字符串`haystack`中匹配多个`needle`子字符串,从左开始任一匹配的子串,返回其位置。 -函数 `multiSearchFirstPositionCaseInsensitive`, `multiSearchFirstPositionUTF8` and `multiSearchFirstPositionCaseInsensitiveUTF8` 提供此函数的不区分大小写 以及/或 UTF-8 变体。 +函数 `multiSearchFirstPositionCaseInsensitive`, `multiSearchFirstPositionUTF8` 和 `multiSearchFirstPositionCaseInsensitiveUTF8` 提供此函数的不区分大小写 以及/或 UTF-8 变体。 **语法** @@ -374,7 +373,7 @@ extractAllGroupsHorizontal(haystack, pattern) **参数** - `haystack` — 输入的字符串,数据类型为[String](../../sql-reference/data-types/string.md). -- `pattern` — 正则表达式([re2正则语法参考](https://github.com/google/re2/wiki/Syntax) ,必须包含group,每个group用括号括起来。 如果 `pattern` 不包含group则会抛出异常。 数据类型为[String](../../sql-reference/data-types/string.md). +- `pattern` — 正则表达式([re2正则语法参考](https://github.com/google/re2/wiki/Syntax) ,必须包含 group,每个 group 用括号括起来。 如果 `pattern` 不包含 group 则会抛出异常。 数据类型为[String](../../sql-reference/data-types/string.md). **返回值** @@ -627,7 +626,7 @@ regexpExtract(haystack, pattern[, index]) **参数** - `haystack` — 被匹配字符串,类型为[String](../../sql-reference/syntax.md#syntax-string-literal). -- `pattern` — 正则表达式,必须是常量。 [String](../../sql-reference/syntax.md#syntax-string-literal). +- `pattern` — 正则表达式,必须是常量。类型为[String](../../sql-reference/syntax.md#syntax-string-literal). - `index` – 一个大于等于 0 的整数,默认为 1 ,它代表要提取哪个正则表达式组。 [UInt or Int](../../sql-reference/data-types/int-uint.md) 可选。 **返回值** From 99a5e1ed2d114ddd6d9c6b3d8845a9265d20609e Mon Sep 17 00:00:00 2001 From: Han Fei Date: Mon, 1 Apr 2024 17:58:25 +0200 Subject: [PATCH 101/150] improve --- docs/zh/sql-reference/functions/string-search-functions.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/zh/sql-reference/functions/string-search-functions.md b/docs/zh/sql-reference/functions/string-search-functions.md index 79b33a4b0bc..972fd84e2a1 100644 --- a/docs/zh/sql-reference/functions/string-search-functions.md +++ b/docs/zh/sql-reference/functions/string-search-functions.md @@ -209,7 +209,7 @@ multiSearchFirstPosition(haystack, [needle1, needle2, …, needleN]) ## multiSearchFirstIndex -在字符串`haystack`中匹配最左侧的`needle`子字符串i,返回其索引 `i` (从1开始),如无法匹配则返回0。 +在字符串`haystack`中匹配最左侧的 needlei 子字符串,返回其索引 `i` (从1开始),如无法匹配则返回0。 函数 `multiSearchFirstIndexCaseInsensitive`, `multiSearchFirstIndexUTF8` 和 `multiSearchFirstIndexCaseInsensitiveUTF8` 提供此函数的不区分大小写以及/或 UTF-8 变体。 From 2a5e9ea7136f9f2ec9a5334cd5b82799d3110715 Mon Sep 17 00:00:00 2001 From: Arthur Passos Date: Mon, 1 Apr 2024 13:07:52 -0300 Subject: [PATCH 102/150] fix black --- tests/clickhouse-test | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tests/clickhouse-test b/tests/clickhouse-test index 0661380e061..1f0c0a131e4 100755 --- a/tests/clickhouse-test +++ b/tests/clickhouse-test @@ -71,6 +71,7 @@ VERSION_PATTERN = r"^((\d+\.)?(\d+\.)?(\d+\.)?\d+)$" TEST_MAX_RUN_TIME_IN_SECONDS = 120 + class SharedEngineReplacer: ENGINES_NON_REPLICATED_REGEXP = r"[ =]((Collapsing|VersionedCollapsing|Summing|Replacing|Aggregating|)MergeTree\(?\)?)" ENGINES_MAPPING_REPLICATED = [ @@ -683,7 +684,9 @@ class FailureReason(enum.Enum): STDERR = "having stderror: " EXCEPTION = "having exception in stdout: " RESULT_DIFF = "result differs with reference: " - TOO_LONG = f"Test runs too long (> {TEST_MAX_RUN_TIME_IN_SECONDS}s). Make it faster." + TOO_LONG = ( + f"Test runs too long (> {TEST_MAX_RUN_TIME_IN_SECONDS}s). Make it faster." + ) INTERNAL_QUERY_FAIL = "Internal query (CREATE/DROP DATABASE) failed:" # SKIPPED reasons From f0642485d7191d7aeb9d0716382efc4d4f5ff9d2 Mon Sep 17 00:00:00 2001 From: Blargian Date: Mon, 1 Apr 2024 19:31:02 +0200 Subject: [PATCH 103/150] Add examples for UTF8 variants of ngramDistance --- .../functions/string-search-functions.md | 36 ++++++++++++++++--- 1 file changed, 32 insertions(+), 4 deletions(-) diff --git a/docs/en/sql-reference/functions/string-search-functions.md b/docs/en/sql-reference/functions/string-search-functions.md index e93971879a2..dda1d2170dd 100644 --- a/docs/en/sql-reference/functions/string-search-functions.md +++ b/docs/en/sql-reference/functions/string-search-functions.md @@ -580,13 +580,27 @@ ngramDistanceUTF8(haystack, needle) **Parameters** -- `haystack`: First comparison string. [String literal](../syntax#syntax-string-literal) -- `needle`: Second comparison string. [String literal](../syntax#syntax-string-literal) +- `haystack`: First UTF-8 encoded comparison string. [String literal](../syntax#syntax-string-literal) +- `needle`: Second UTF-8 encoded comparison string. [String literal](../syntax#syntax-string-literal) **Returned value** - Value between 0 and 1 representing the similarity between the two strings. [Float32](../../sql-reference/data-types/float.md/#float32-float64) +**Example** + +Query: + +```sql +SELECT ngramDistanceUTF8('abcde','cde'); +``` + +Result: + +```response +0.5 +``` + ## ngramDistanceCaseInsensitiveUTF8 Provides a case-insensitive variant of [ngramDistanceUTF8](#ngramdistanceutf8). @@ -599,13 +613,26 @@ ngramDistanceCaseInsensitiveUTF8(haystack, needle) **Parameters** -- `haystack`: First comparison string. [String literal](../syntax#syntax-string-literal) -- `needle`: Second comparison string. [String literal](../syntax#syntax-string-literal) +- `haystack`: First UTF-8 encoded comparison string. [String literal](../syntax#syntax-string-literal) +- `needle`: Second UTF-8 encoded comparison string. [String literal](../syntax#syntax-string-literal) **Returned value** - Value between 0 and 1 representing the similarity between the two strings. [Float32](../../sql-reference/data-types/float.md/#float32-float64) +**Example** + +Query: + +```sql +SELECT ngramDistanceCaseInsensitiveUTF8('abcde','CDE'); +``` + +Result: + +```response +0.5 +``` ## ngramSearch @@ -628,6 +655,7 @@ ngramSearch(haystack, needle) - Value between 0 and 1 representing the likelihood of the `needle` being in the `haystack`. [Float32](../../sql-reference/data-types/float.md/#float32-float64) + **Implementation details** :::note From 5e664b0f9dd28f4196cb56b275260b068d4511ad Mon Sep 17 00:00:00 2001 From: Blargian Date: Mon, 1 Apr 2024 19:41:18 +0200 Subject: [PATCH 104/150] Correct link to string literal --- .../functions/string-search-functions.md | 32 +++++++++---------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/docs/en/sql-reference/functions/string-search-functions.md b/docs/en/sql-reference/functions/string-search-functions.md index dda1d2170dd..dcfb34cde37 100644 --- a/docs/en/sql-reference/functions/string-search-functions.md +++ b/docs/en/sql-reference/functions/string-search-functions.md @@ -477,8 +477,8 @@ ngramDistance(haystack, needle) **Parameters** -- `haystack`: First comparison string. [String literal](../syntax#syntax-string-literal) -- `needle`: Second comparison string. [String literal](../syntax#syntax-string-literal) +- `haystack`: First comparison string. [String literal](../syntax#string) +- `needle`: Second comparison string. [String literal](../syntax#string) **Returned value** @@ -531,8 +531,8 @@ ngramDistanceCaseInsensitive(haystack, needle) **Parameters** -- `haystack`: First comparison string. [String literal](../syntax#syntax-string-literal) -- `needle`: Second comparison string. [String literal](../syntax#syntax-string-literal) +- `haystack`: First comparison string. [String literal](../syntax#string) +- `needle`: Second comparison string. [String literal](../syntax#string) **Returned value** @@ -580,8 +580,8 @@ ngramDistanceUTF8(haystack, needle) **Parameters** -- `haystack`: First UTF-8 encoded comparison string. [String literal](../syntax#syntax-string-literal) -- `needle`: Second UTF-8 encoded comparison string. [String literal](../syntax#syntax-string-literal) +- `haystack`: First UTF-8 encoded comparison string. [String literal](../syntax#string) +- `needle`: Second UTF-8 encoded comparison string. [String literal](../syntax#string) **Returned value** @@ -613,8 +613,8 @@ ngramDistanceCaseInsensitiveUTF8(haystack, needle) **Parameters** -- `haystack`: First UTF-8 encoded comparison string. [String literal](../syntax#syntax-string-literal) -- `needle`: Second UTF-8 encoded comparison string. [String literal](../syntax#syntax-string-literal) +- `haystack`: First UTF-8 encoded comparison string. [String literal](../syntax#string) +- `needle`: Second UTF-8 encoded comparison string. [String literal](../syntax#string) **Returned value** @@ -648,8 +648,8 @@ ngramSearch(haystack, needle) **Parameters** -- `haystack`: First comparison string. [String literal](../syntax#syntax-string-literal) -- `needle`: Second comparison string. [String literal](../syntax#syntax-string-literal) +- `haystack`: First comparison string. [String literal](../syntax#string) +- `needle`: Second comparison string. [String literal](../syntax#string) **Returned value** @@ -688,8 +688,8 @@ ngramSearchCaseInsensitive(haystack, needle) **Parameters** -- `haystack`: First comparison string. [String literal](../syntax#syntax-string-literal) -- `needle`: Second comparison string. [String literal](../syntax#syntax-string-literal) +- `haystack`: First comparison string. [String literal](../syntax#string) +- `needle`: Second comparison string. [String literal](../syntax#string) **Returned value** @@ -723,8 +723,8 @@ ngramSearchUTF8(haystack, needle) **Parameters** -- `haystack`: First UTF-8 encoded comparison string. [String literal](../syntax#syntax-string-literal) -- `needle`: Second UTF-8 encoded comparison string. [String literal](../syntax#syntax-string-literal) +- `haystack`: First UTF-8 encoded comparison string. [String literal](../syntax#string) +- `needle`: Second UTF-8 encoded comparison string. [String literal](../syntax#string) **Returned value** @@ -758,8 +758,8 @@ ngramSearchCaseInsensitiveUTF8(haystack, needle) **Parameters** -- `haystack`: First UTF-8 encoded comparison string. [String literal](../syntax#syntax-string-literal) -- `needle`: Second UTF-8 encoded comparison string. [String literal](../syntax#syntax-string-literal) +- `haystack`: First UTF-8 encoded comparison string. [String literal](../syntax#string) +- `needle`: Second UTF-8 encoded comparison string. [String literal](../syntax#string) **Returned value** From 3207f230f8e6769ccec8c5283a3fcfede98c18a5 Mon Sep 17 00:00:00 2001 From: Blargian Date: Mon, 1 Apr 2024 20:00:30 +0200 Subject: [PATCH 105/150] Small fixes --- docs/en/sql-reference/functions/string-search-functions.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/en/sql-reference/functions/string-search-functions.md b/docs/en/sql-reference/functions/string-search-functions.md index dcfb34cde37..af4fc8edcd3 100644 --- a/docs/en/sql-reference/functions/string-search-functions.md +++ b/docs/en/sql-reference/functions/string-search-functions.md @@ -540,7 +540,7 @@ ngramDistanceCaseInsensitive(haystack, needle) **Examples** -With [ngramDistance](#ngramdistance) differences in case will drive up the similarity score: +With [ngramDistance](#ngramdistance) differences in case will affect the similarity value: Query: @@ -554,7 +554,7 @@ Result: 0.71428573 ``` -With [ngramDistanceCaseInsensitive](#ngramdistancecaseinsensitive) case is ignored so two identical strings differing only in case will now read as identical: +With [ngramDistanceCaseInsensitive](#ngramdistancecaseinsensitive) case is ignored so two identical strings differing only in case will now return a low similarity value: Query: From 9f51e9825d875ebb27b64467c879486afa8cb7bf Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Tue, 2 Apr 2024 11:02:23 +0200 Subject: [PATCH 106/150] Update NuRaft --- contrib/NuRaft | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/NuRaft b/contrib/NuRaft index 4a12f99dfc9..08ac76ea80a 160000 --- a/contrib/NuRaft +++ b/contrib/NuRaft @@ -1 +1 @@ -Subproject commit 4a12f99dfc9d47c687ff7700b927cc76856225d1 +Subproject commit 08ac76ea80a37f89b12109c805eafe9f1dc9b991 From 24b02dc9b81df32f13256549abc9869151617e46 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Tue, 2 Apr 2024 11:10:09 +0200 Subject: [PATCH 107/150] Update CMake --- contrib/nuraft-cmake/CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/contrib/nuraft-cmake/CMakeLists.txt b/contrib/nuraft-cmake/CMakeLists.txt index eaca00566d6..970ca4b9ce1 100644 --- a/contrib/nuraft-cmake/CMakeLists.txt +++ b/contrib/nuraft-cmake/CMakeLists.txt @@ -32,6 +32,7 @@ set(SRCS "${LIBRARY_DIR}/src/handle_custom_notification.cxx" "${LIBRARY_DIR}/src/handle_vote.cxx" "${LIBRARY_DIR}/src/launcher.cxx" + "${LIBRARY_DIR}/src/log_entry.cxx" "${LIBRARY_DIR}/src/srv_config.cxx" "${LIBRARY_DIR}/src/snapshot_sync_req.cxx" "${LIBRARY_DIR}/src/snapshot_sync_ctx.cxx" From 48ab0721a88611957fe42bc6c9e10975f8b7cbbe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Tue, 2 Apr 2024 11:52:14 +0200 Subject: [PATCH 108/150] Unify lightweight mutation control --- src/Interpreters/InterpreterSelectQuery.cpp | 2 +- src/Planner/PlannerJoinTree.cpp | 6 ++---- src/Storages/HDFS/StorageHDFS.h | 2 +- src/Storages/HDFS/StorageHDFSCluster.h | 2 +- src/Storages/IStorage.h | 5 ++++- src/Storages/MergeTree/MergeTreeData.cpp | 5 +++++ src/Storages/MergeTree/MergeTreeData.h | 2 +- src/Storages/RocksDB/StorageEmbeddedRocksDB.h | 2 +- src/Storages/StorageAzureBlob.h | 2 +- src/Storages/StorageAzureBlobCluster.h | 2 +- src/Storages/StorageFile.h | 2 +- src/Storages/StorageFileCluster.h | 2 +- src/Storages/StorageJoin.h | 2 +- src/Storages/StorageMaterializedMySQL.h | 2 +- src/Storages/StorageMerge.cpp | 4 ++-- src/Storages/StorageMerge.h | 2 +- src/Storages/StorageS3.h | 2 +- src/Storages/StorageS3Cluster.h | 2 +- src/Storages/StorageURL.h | 2 +- src/Storages/StorageURLCluster.h | 2 +- 20 files changed, 29 insertions(+), 23 deletions(-) diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index 6bbf03bb1e0..dee522a2184 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -2298,7 +2298,7 @@ std::optional InterpreterSelectQuery::getTrivialCount(UInt64 max_paralle && !settings.allow_experimental_query_deduplication && !settings.empty_result_for_aggregation_by_empty_set && storage - && storage->supportsTrivialCountOptimization() + && storage->supportsTrivialCountOptimization(storage_snapshot, getContext()) && query_info.filter_asts.empty() && query_analyzer->hasAggregation() && (query_analyzer->aggregates().size() == 1) diff --git a/src/Planner/PlannerJoinTree.cpp b/src/Planner/PlannerJoinTree.cpp index dddab524101..d2f37ff1ad4 100644 --- a/src/Planner/PlannerJoinTree.cpp +++ b/src/Planner/PlannerJoinTree.cpp @@ -225,7 +225,8 @@ bool applyTrivialCountIfPossible( return false; const auto & storage = table_node ? table_node->getStorage() : table_function_node->getStorage(); - if (!storage->supportsTrivialCountOptimization()) + if (!storage->supportsTrivialCountOptimization( + table_node ? table_node->getStorageSnapshot() : table_function_node->getStorageSnapshot(), query_context)) return false; auto storage_id = storage->getStorageID(); @@ -262,9 +263,6 @@ bool applyTrivialCountIfPossible( if (main_query_node.hasGroupBy() || main_query_node.hasPrewhere() || main_query_node.hasWhere()) return false; - if (storage->hasLightweightDeletedMask()) - return false; - if (settings.allow_experimental_query_deduplication || settings.empty_result_for_aggregation_by_empty_set) return false; diff --git a/src/Storages/HDFS/StorageHDFS.h b/src/Storages/HDFS/StorageHDFS.h index b14bb7f997b..b8faa27d678 100644 --- a/src/Storages/HDFS/StorageHDFS.h +++ b/src/Storages/HDFS/StorageHDFS.h @@ -92,7 +92,7 @@ public: static SchemaCache & getSchemaCache(const ContextPtr & ctx); - bool supportsTrivialCountOptimization() const override { return true; } + bool supportsTrivialCountOptimization(const StorageSnapshotPtr &, ContextPtr) const override { return true; } protected: friend class HDFSSource; diff --git a/src/Storages/HDFS/StorageHDFSCluster.h b/src/Storages/HDFS/StorageHDFSCluster.h index 26ebc8601ee..0b5c6242aa9 100644 --- a/src/Storages/HDFS/StorageHDFSCluster.h +++ b/src/Storages/HDFS/StorageHDFSCluster.h @@ -36,7 +36,7 @@ public: bool supportsSubcolumns() const override { return true; } - bool supportsTrivialCountOptimization() const override { return true; } + bool supportsTrivialCountOptimization(const StorageSnapshotPtr &, ContextPtr) const override { return true; } private: void updateQueryToSendIfNeeded(ASTPtr & query, const StorageSnapshotPtr & storage_snapshot, const ContextPtr & context) override; diff --git a/src/Storages/IStorage.h b/src/Storages/IStorage.h index 1108eafc6b6..87a04c3fcc6 100644 --- a/src/Storages/IStorage.h +++ b/src/Storages/IStorage.h @@ -265,7 +265,10 @@ public: /// Return true if the trivial count query could be optimized without reading the data at all /// in totalRows() or totalRowsByPartitionPredicate() methods or with optimized reading in read() method. - virtual bool supportsTrivialCountOptimization() const { return false; } + virtual bool supportsTrivialCountOptimization(const StorageSnapshotPtr & /*storage_snapshot*/, ContextPtr /*query_context*/) const + { + return false; + } private: StorageID storage_id; diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index e984f306e2e..8faed72b198 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -8221,6 +8221,11 @@ void MergeTreeData::updateObjectColumns(const DataPartPtr & part, const DataPart DB::updateObjectColumns(object_columns, columns, part->getColumns()); } +bool MergeTreeData::supportsTrivialCountOptimization(const StorageSnapshotPtr &, ContextPtr) const +{ + return !hasLightweightDeletedMask(); +} + StorageSnapshotPtr MergeTreeData::getStorageSnapshot(const StorageMetadataPtr & metadata_snapshot, ContextPtr query_context) const { auto snapshot_data = std::make_unique(); diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h index 046376be474..0d56b902f1a 100644 --- a/src/Storages/MergeTree/MergeTreeData.h +++ b/src/Storages/MergeTree/MergeTreeData.h @@ -440,7 +440,7 @@ public: bool areAsynchronousInsertsEnabled() const override { return getSettings()->async_insert; } - bool supportsTrivialCountOptimization() const override { return !hasLightweightDeletedMask(); } + bool supportsTrivialCountOptimization(const StorageSnapshotPtr &, ContextPtr) const override; /// Snapshot for MergeTree contains the current set of data parts /// at the moment of the start of query. diff --git a/src/Storages/RocksDB/StorageEmbeddedRocksDB.h b/src/Storages/RocksDB/StorageEmbeddedRocksDB.h index 8525108735b..230464a161f 100644 --- a/src/Storages/RocksDB/StorageEmbeddedRocksDB.h +++ b/src/Storages/RocksDB/StorageEmbeddedRocksDB.h @@ -93,7 +93,7 @@ public: bool supportsDelete() const override { return true; } /// To turn on the optimization optimize_trivial_approximate_count_query=1 should be set for a query. - bool supportsTrivialCountOptimization() const override { return true; } + bool supportsTrivialCountOptimization(const StorageSnapshotPtr &, ContextPtr) const override { return true; } std::optional totalRows(const Settings & settings) const override; diff --git a/src/Storages/StorageAzureBlob.h b/src/Storages/StorageAzureBlob.h index 27ac7a5c368..3f1ba33f636 100644 --- a/src/Storages/StorageAzureBlob.h +++ b/src/Storages/StorageAzureBlob.h @@ -100,7 +100,7 @@ public: bool supportsSubsetOfColumns(const ContextPtr & context) const; - bool supportsTrivialCountOptimization() const override { return true; } + bool supportsTrivialCountOptimization(const StorageSnapshotPtr &, ContextPtr) const override { return true; } bool prefersLargeBlocks() const override; diff --git a/src/Storages/StorageAzureBlobCluster.h b/src/Storages/StorageAzureBlobCluster.h index 545e568a772..eff4d70f1bd 100644 --- a/src/Storages/StorageAzureBlobCluster.h +++ b/src/Storages/StorageAzureBlobCluster.h @@ -35,7 +35,7 @@ public: bool supportsSubcolumns() const override { return true; } - bool supportsTrivialCountOptimization() const override { return true; } + bool supportsTrivialCountOptimization(const StorageSnapshotPtr &, ContextPtr) const override { return true; } private: void updateBeforeRead(const ContextPtr & /*context*/) override {} diff --git a/src/Storages/StorageFile.h b/src/Storages/StorageFile.h index 93c263008a6..588429284f0 100644 --- a/src/Storages/StorageFile.h +++ b/src/Storages/StorageFile.h @@ -134,7 +134,7 @@ public: const ContextPtr & context, size_t & total_bytes_to_read); - bool supportsTrivialCountOptimization() const override { return true; } + bool supportsTrivialCountOptimization(const StorageSnapshotPtr &, ContextPtr) const override { return true; } protected: friend class StorageFileSource; diff --git a/src/Storages/StorageFileCluster.h b/src/Storages/StorageFileCluster.h index 3acbc71ba7e..973d595bbf0 100644 --- a/src/Storages/StorageFileCluster.h +++ b/src/Storages/StorageFileCluster.h @@ -32,7 +32,7 @@ public: bool supportsSubcolumns() const override { return true; } - bool supportsTrivialCountOptimization() const override { return true; } + bool supportsTrivialCountOptimization(const StorageSnapshotPtr &, ContextPtr) const override { return true; } private: void updateQueryToSendIfNeeded(ASTPtr & query, const StorageSnapshotPtr & storage_snapshot, const ContextPtr & context) override; diff --git a/src/Storages/StorageJoin.h b/src/Storages/StorageJoin.h index dc68c68a21b..c76df0cb452 100644 --- a/src/Storages/StorageJoin.h +++ b/src/Storages/StorageJoin.h @@ -85,7 +85,7 @@ public: const Names & getKeyNames() const { return key_names; } - bool supportsTrivialCountOptimization() const override { return true; } + bool supportsTrivialCountOptimization(const StorageSnapshotPtr &, ContextPtr) const override { return true; } private: Block sample_block; diff --git a/src/Storages/StorageMaterializedMySQL.h b/src/Storages/StorageMaterializedMySQL.h index 9f5d157ce3b..3e0cf3e6925 100644 --- a/src/Storages/StorageMaterializedMySQL.h +++ b/src/Storages/StorageMaterializedMySQL.h @@ -40,7 +40,7 @@ public: void drop() override { nested_storage->drop(); } - bool supportsTrivialCountOptimization() const override { return false; } + bool supportsTrivialCountOptimization(const StorageSnapshotPtr &, ContextPtr) const override { return false; } IndexSizeByName getSecondaryIndexSizes() const override { diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp index 1b5e4860464..5eceddfe06d 100644 --- a/src/Storages/StorageMerge.cpp +++ b/src/Storages/StorageMerge.cpp @@ -1675,9 +1675,9 @@ std::tuple StorageMerge::evaluateDatabaseName(cons return {false, ast}; } -bool StorageMerge::supportsTrivialCountOptimization() const +bool StorageMerge::supportsTrivialCountOptimization(const StorageSnapshotPtr & storage_snapshot, ContextPtr ctx) const { - return getFirstTable([&](const auto & table) { return !table->supportsTrivialCountOptimization(); }) == nullptr; + return getFirstTable([&](const auto & table) { return !table->supportsTrivialCountOptimization(storage_snapshot, ctx); }) == nullptr; } std::optional StorageMerge::totalRows(const Settings & settings) const diff --git a/src/Storages/StorageMerge.h b/src/Storages/StorageMerge.h index c049d50f3b4..a63ea1e32ef 100644 --- a/src/Storages/StorageMerge.h +++ b/src/Storages/StorageMerge.h @@ -76,7 +76,7 @@ public: /// Evaluate database name or regexp for StorageMerge and TableFunction merge static std::tuple evaluateDatabaseName(const ASTPtr & node, ContextPtr context); - bool supportsTrivialCountOptimization() const override; + bool supportsTrivialCountOptimization(const StorageSnapshotPtr &, ContextPtr) const override; std::optional totalRows(const Settings & settings) const override; std::optional totalBytes(const Settings & settings) const override; diff --git a/src/Storages/StorageS3.h b/src/Storages/StorageS3.h index d1f15edfd6d..19cbfaa6f08 100644 --- a/src/Storages/StorageS3.h +++ b/src/Storages/StorageS3.h @@ -352,7 +352,7 @@ public: using KeysWithInfo = StorageS3Source::KeysWithInfo; - bool supportsTrivialCountOptimization() const override { return true; } + bool supportsTrivialCountOptimization(const StorageSnapshotPtr &, ContextPtr) const override { return true; } protected: virtual Configuration updateConfigurationAndGetCopy(const ContextPtr & local_context); diff --git a/src/Storages/StorageS3Cluster.h b/src/Storages/StorageS3Cluster.h index 6a5b03e682f..802fd3f9139 100644 --- a/src/Storages/StorageS3Cluster.h +++ b/src/Storages/StorageS3Cluster.h @@ -32,7 +32,7 @@ public: bool supportsSubcolumns() const override { return true; } - bool supportsTrivialCountOptimization() const override { return true; } + bool supportsTrivialCountOptimization(const StorageSnapshotPtr &, ContextPtr) const override { return true; } protected: void updateConfigurationIfChanged(ContextPtr local_context); diff --git a/src/Storages/StorageURL.h b/src/Storages/StorageURL.h index 842cfd5b627..5aca3df1513 100644 --- a/src/Storages/StorageURL.h +++ b/src/Storages/StorageURL.h @@ -127,7 +127,7 @@ protected: bool parallelizeOutputAfterReading(ContextPtr context) const override; - bool supportsTrivialCountOptimization() const override { return true; } + bool supportsTrivialCountOptimization(const StorageSnapshotPtr &, ContextPtr) const override { return true; } private: static std::pair getTableStructureAndFormatFromDataImpl( diff --git a/src/Storages/StorageURLCluster.h b/src/Storages/StorageURLCluster.h index dce2e0106ea..c80cdec74a2 100644 --- a/src/Storages/StorageURLCluster.h +++ b/src/Storages/StorageURLCluster.h @@ -35,7 +35,7 @@ public: bool supportsSubcolumns() const override { return true; } - bool supportsTrivialCountOptimization() const override { return true; } + bool supportsTrivialCountOptimization(const StorageSnapshotPtr &, ContextPtr) const override { return true; } private: void updateQueryToSendIfNeeded(ASTPtr & query, const StorageSnapshotPtr & storage_snapshot, const ContextPtr & context) override; From fb94a954258ebaeb6ca3fcecb6164243e4eb8d33 Mon Sep 17 00:00:00 2001 From: kssenii Date: Tue, 2 Apr 2024 12:23:22 +0200 Subject: [PATCH 109/150] Add logging --- src/Interpreters/Cache/IFileCachePriority.h | 4 ++-- src/Interpreters/Cache/LRUFileCachePriority.cpp | 13 +++++++++++-- src/Interpreters/Cache/LRUFileCachePriority.h | 1 + 3 files changed, 14 insertions(+), 4 deletions(-) diff --git a/src/Interpreters/Cache/IFileCachePriority.h b/src/Interpreters/Cache/IFileCachePriority.h index ff06f17ce36..8dcc114d9cd 100644 --- a/src/Interpreters/Cache/IFileCachePriority.h +++ b/src/Interpreters/Cache/IFileCachePriority.h @@ -192,8 +192,8 @@ protected: virtual void releaseImpl(size_t /* size */, size_t /* elements */) {} - size_t max_size = 0; - size_t max_elements = 0; + std::atomic max_size = 0; + std::atomic max_elements = 0; }; } diff --git a/src/Interpreters/Cache/LRUFileCachePriority.cpp b/src/Interpreters/Cache/LRUFileCachePriority.cpp index e65c102f1e3..1d9725352be 100644 --- a/src/Interpreters/Cache/LRUFileCachePriority.cpp +++ b/src/Interpreters/Cache/LRUFileCachePriority.cpp @@ -439,12 +439,15 @@ void LRUFileCachePriority::LRUIterator::invalidate() assertValid(); const auto & entry = *iterator; - LOG_TEST(cache_priority->log, - "Invalidating entry in LRU queue entry {}", entry->toString()); chassert(entry->size != 0); cache_priority->updateSize(-entry->size); cache_priority->updateElementsCount(-1); + + LOG_TEST(cache_priority->log, + "Invalidated entry in LRU queue {}: {}", + entry->toString(), cache_priority->getApproxStateInfoForLog()); + entry->size = 0; } @@ -521,6 +524,12 @@ std::string LRUFileCachePriority::getStateInfoForLog(const CachePriorityGuard::L getSize(lock), max_size, getElementsCount(lock), max_elements, description); } +std::string LRUFileCachePriority::getApproxStateInfoForLog() const +{ + return fmt::format("size: {}/{}, elements: {}/{} (description: {})", + getSizeApprox(), max_size, getElementsCountApprox(), max_elements, description); +} + void LRUFileCachePriority::holdImpl( size_t size, size_t elements, diff --git a/src/Interpreters/Cache/LRUFileCachePriority.h b/src/Interpreters/Cache/LRUFileCachePriority.h index 31968d61196..6627fcf1dee 100644 --- a/src/Interpreters/Cache/LRUFileCachePriority.h +++ b/src/Interpreters/Cache/LRUFileCachePriority.h @@ -116,6 +116,7 @@ private: const CachePriorityGuard::Lock & lock) override; void releaseImpl(size_t size, size_t elements) override; + std::string getApproxStateInfoForLog() const; }; class LRUFileCachePriority::LRUIterator : public IFileCachePriority::Iterator From 3914a0f67a1754b9ed8ef3ee99c0edb0c75c48d7 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Tue, 2 Apr 2024 11:01:02 +0000 Subject: [PATCH 110/150] Document SYSTEM RELOAD ASYNCHRONOUS METRICS --- docs/en/sql-reference/statements/system.md | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/docs/en/sql-reference/statements/system.md b/docs/en/sql-reference/statements/system.md index 1dee2eac698..991a272166a 100644 --- a/docs/en/sql-reference/statements/system.md +++ b/docs/en/sql-reference/statements/system.md @@ -64,6 +64,14 @@ RELOAD FUNCTIONS [ON CLUSTER cluster_name] RELOAD FUNCTION [ON CLUSTER cluster_name] function_name ``` +## RELOAD ASYNCHRONOUS METRICS + +Re-calculates all [asynchronous metrics](../../operations/system-tables/asynchronous_metrics.md). Since asynchronous metrics are periodically updated based on setting [asynchronous_metrics_update_period_s](../../operations/server-configuration-parameters/settings.md), updating them manually using this statment is not necessary. + +```sql +RELOAD ASYNCHRONOUS METRICS [ON CLUSTER cluster_name] +``` + ## DROP DNS CACHE Clears ClickHouse’s internal DNS cache. Sometimes (for old ClickHouse versions) it is necessary to use this command when changing the infrastructure (changing the IP address of another ClickHouse server or the server used by dictionaries). From 13c348c3c8e95a5c4aed3c28e70e365cbd4e16eb Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Tue, 2 Apr 2024 13:24:11 +0200 Subject: [PATCH 111/150] Fix spelling --- docs/en/sql-reference/statements/system.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/sql-reference/statements/system.md b/docs/en/sql-reference/statements/system.md index 991a272166a..b35e9426297 100644 --- a/docs/en/sql-reference/statements/system.md +++ b/docs/en/sql-reference/statements/system.md @@ -66,7 +66,7 @@ RELOAD FUNCTION [ON CLUSTER cluster_name] function_name ## RELOAD ASYNCHRONOUS METRICS -Re-calculates all [asynchronous metrics](../../operations/system-tables/asynchronous_metrics.md). Since asynchronous metrics are periodically updated based on setting [asynchronous_metrics_update_period_s](../../operations/server-configuration-parameters/settings.md), updating them manually using this statment is not necessary. +Re-calculates all [asynchronous metrics](../../operations/system-tables/asynchronous_metrics.md). Since asynchronous metrics are periodically updated based on setting [asynchronous_metrics_update_period_s](../../operations/server-configuration-parameters/settings.md), updating them manually using this statement is typically not necessary. ```sql RELOAD ASYNCHRONOUS METRICS [ON CLUSTER cluster_name] From a8f3a07f1ff81dfb253919f57b8cf801f13783c8 Mon Sep 17 00:00:00 2001 From: yariks5s Date: Tue, 2 Apr 2024 11:35:41 +0000 Subject: [PATCH 112/150] init --- docs/en/operations/system-tables/index.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/operations/system-tables/index.md b/docs/en/operations/system-tables/index.md index eaf79d035a9..d9800e05ff9 100644 --- a/docs/en/operations/system-tables/index.md +++ b/docs/en/operations/system-tables/index.md @@ -47,7 +47,7 @@ An example: ENGINE = MergeTree PARTITION BY toYYYYMM(event_date) ORDER BY (event_date, event_time) SETTINGS index_granularity = 1024 --> 7500 - 1048576 + 1048576 8192 524288 false From e659071b789512fcfde108e73ea291d3a8fe86ed Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A1nos=20Benjamin=20Antal?= Date: Tue, 2 Apr 2024 11:37:48 +0000 Subject: [PATCH 113/150] Improve docs --- docs/en/sql-reference/statements/alter/index.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/en/sql-reference/statements/alter/index.md b/docs/en/sql-reference/statements/alter/index.md index dc6668c7983..7961315c193 100644 --- a/docs/en/sql-reference/statements/alter/index.md +++ b/docs/en/sql-reference/statements/alter/index.md @@ -56,7 +56,9 @@ Entries for finished mutations are not deleted right away (the number of preserv For non-replicated tables, all `ALTER` queries are performed synchronously. For replicated tables, the query just adds instructions for the appropriate actions to `ZooKeeper`, and the actions themselves are performed as soon as possible. However, the query can wait for these actions to be completed on all the replicas. -For all `ALTER` queries, you can use the [alter_sync](/docs/en/operations/settings/settings.md/#alter-sync) setting to set up waiting. +For `ALTER` queries that creates mutations (e.g.: including, but not limited to `UPDATE`, `DELETE`, `MATERIALIZE INDEX`, `MATERIALIZE PROJECTION`, `MATERIALIZE COLUMN`, `APPLY DELETED MASK`, `CLEAR STATISTIC`, `MATERIALIZE STATISTIC`) the synchronicity is defined by the [mutations_sync](/docs/en/operations/settings/settings.md/#mutations_sync) setting. + +For other `ALTER` queries which only modify the metadata, you can use the [alter_sync](/docs/en/operations/settings/settings.md/#alter-sync) setting to set up waiting. You can specify how long (in seconds) to wait for inactive replicas to execute all `ALTER` queries with the [replication_wait_for_inactive_replica_timeout](/docs/en/operations/settings/settings.md/#replication-wait-for-inactive-replica-timeout) setting. @@ -64,8 +66,6 @@ You can specify how long (in seconds) to wait for inactive replicas to execute a For all `ALTER` queries, if `alter_sync = 2` and some replicas are not active for more than the time, specified in the `replication_wait_for_inactive_replica_timeout` setting, then an exception `UNFINISHED` is thrown. ::: -For `ALTER TABLE ... UPDATE|DELETE|MATERIALIZE INDEX|MATERIALIZE PROJECTION|MATERIALIZE COLUMN` queries the synchronicity is defined by the [mutations_sync](/docs/en/operations/settings/settings.md/#mutations_sync) setting. - ## Related content - Blog: [Handling Updates and Deletes in ClickHouse](https://clickhouse.com/blog/handling-updates-and-deletes-in-clickhouse) From 40a22ffa63453f3ee4db7e17e5d4d0d4023a886d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Tue, 2 Apr 2024 13:43:14 +0200 Subject: [PATCH 114/150] Remove a few nested include dependencies --- src/Access/AccessRights.cpp | 4 +++- src/Common/AtomicLogger.h | 4 ++++ src/Common/CurrentThreadHelpers.cpp | 16 ++++++++++++++++ src/Common/CurrentThreadHelpers.h | 9 +++++++++ src/Common/Exception.cpp | 19 ++++++++++--------- src/Common/Exception.h | 16 ++++++++-------- src/Common/Jemalloc.cpp | 1 + src/Common/Logger.h | 13 +++++++++---- src/Common/ProfileEventsScope.cpp | 1 + src/Common/ProfileEventsScope.h | 3 ++- src/Common/logger_useful.h | 15 ++++++--------- src/Core/BackgroundSchedulePool.h | 21 ++++++++++----------- src/IO/TimeoutSetter.cpp | 2 +- src/Interpreters/ExternalLoader.cpp | 21 +++++++++++---------- src/Server/CertificateReloader.cpp | 3 ++- src/Storages/NATS/NATSHandler.cpp | 1 + 16 files changed, 94 insertions(+), 55 deletions(-) create mode 100644 src/Common/CurrentThreadHelpers.cpp create mode 100644 src/Common/CurrentThreadHelpers.h diff --git a/src/Access/AccessRights.cpp b/src/Access/AccessRights.cpp index 36a68bc0a34..a87e9361e8e 100644 --- a/src/Access/AccessRights.cpp +++ b/src/Access/AccessRights.cpp @@ -1,6 +1,8 @@ #include -#include #include +#include +#include + #include #include #include diff --git a/src/Common/AtomicLogger.h b/src/Common/AtomicLogger.h index 4bda55e070b..9581358218c 100644 --- a/src/Common/AtomicLogger.h +++ b/src/Common/AtomicLogger.h @@ -6,6 +6,8 @@ #include #include +namespace DB +{ /** AtomicLogger allows to atomically change logger. * Standard library does not have atomic_shared_ptr, and we do not use std::atomic* operations, @@ -49,3 +51,5 @@ private: mutable DB::SharedMutex log_mutex; LoggerPtr logger; }; + +} diff --git a/src/Common/CurrentThreadHelpers.cpp b/src/Common/CurrentThreadHelpers.cpp new file mode 100644 index 00000000000..cbfb50bf3b1 --- /dev/null +++ b/src/Common/CurrentThreadHelpers.cpp @@ -0,0 +1,16 @@ +#include +#include + +namespace DB +{ + +bool currentThreadHasGroup() +{ + return DB::CurrentThread::getGroup() != nullptr; +} + +LogsLevel currentThreadLogsLevel() +{ + return DB::CurrentThread::get().getClientLogsLevel(); +} +} diff --git a/src/Common/CurrentThreadHelpers.h b/src/Common/CurrentThreadHelpers.h new file mode 100644 index 00000000000..01a180e74d2 --- /dev/null +++ b/src/Common/CurrentThreadHelpers.h @@ -0,0 +1,9 @@ +#pragma once + +#include + +namespace DB +{ +bool currentThreadHasGroup(); +LogsLevel currentThreadLogsLevel(); +} diff --git a/src/Common/Exception.cpp b/src/Common/Exception.cpp index 7e73e2c0783..7d38fdafddb 100644 --- a/src/Common/Exception.cpp +++ b/src/Common/Exception.cpp @@ -1,26 +1,27 @@ -#include "Exception.h" - -#include -#include -#include -#include -#include #include #include #include #include #include #include -#include +#include #include +#include #include #include #include +#include #include #include #include -#include +#include +#include +#include +#include +#include + +#include namespace fs = std::filesystem; diff --git a/src/Common/Exception.h b/src/Common/Exception.h index 0c8a7177a99..97af8d1ffc3 100644 --- a/src/Common/Exception.h +++ b/src/Common/Exception.h @@ -1,22 +1,20 @@ #pragma once -#include -#include -#include -#include - -#include - #include #include #include #include -#include #include #include #include +#include +#include +#include +#include + #include +#include namespace Poco { class Logger; } @@ -24,6 +22,8 @@ namespace Poco { class Logger; } namespace DB { +class AtomicLogger; + [[noreturn]] void abortOnFailedAssertion(const String & description); /// This flag can be set for testing purposes - to check that no exceptions are thrown. diff --git a/src/Common/Jemalloc.cpp b/src/Common/Jemalloc.cpp index 3eb8691a1e1..6514639e700 100644 --- a/src/Common/Jemalloc.cpp +++ b/src/Common/Jemalloc.cpp @@ -2,6 +2,7 @@ #if USE_JEMALLOC +#include #include #include #include diff --git a/src/Common/Logger.h b/src/Common/Logger.h index 0425da8c847..b4688eb0a9c 100644 --- a/src/Common/Logger.h +++ b/src/Common/Logger.h @@ -1,15 +1,20 @@ #pragma once -#include - #include -#include +#include + #include #include -using LoggerPtr = Poco::LoggerPtr; +namespace Poco +{ +class Channel; +class Logger; +using LoggerPtr = std::shared_ptr; +} +using LoggerPtr = std::shared_ptr; using LoggerRawPtr = Poco::Logger *; /** RAII wrappers around Poco/Logger.h. diff --git a/src/Common/ProfileEventsScope.cpp b/src/Common/ProfileEventsScope.cpp index 92f75f4f5b0..1ba5f7f165c 100644 --- a/src/Common/ProfileEventsScope.cpp +++ b/src/Common/ProfileEventsScope.cpp @@ -1,3 +1,4 @@ +#include #include namespace DB diff --git a/src/Common/ProfileEventsScope.h b/src/Common/ProfileEventsScope.h index 0444531d02b..8eabfcb55c0 100644 --- a/src/Common/ProfileEventsScope.h +++ b/src/Common/ProfileEventsScope.h @@ -1,7 +1,8 @@ #pragma once #include -#include + +#include namespace DB { diff --git a/src/Common/logger_useful.h b/src/Common/logger_useful.h index 1ce4f545e6f..9d6ebaddcc6 100644 --- a/src/Common/logger_useful.h +++ b/src/Common/logger_useful.h @@ -5,13 +5,11 @@ #include #include #include -#include -#include -#include -#include #include - -namespace Poco { class Logger; } +#include +#include +#include +#include #define LogToStr(x, y) std::make_unique(x, y) @@ -22,7 +20,7 @@ using LogSeriesLimiterPtr = std::shared_ptr; namespace impl { [[maybe_unused]] inline LoggerPtr getLoggerHelper(const LoggerPtr & logger) { return logger; } - [[maybe_unused]] inline LoggerPtr getLoggerHelper(const AtomicLogger & logger) { return logger.load(); } + [[maybe_unused]] inline LoggerPtr getLoggerHelper(const DB::AtomicLogger & logger) { return logger.load(); } [[maybe_unused]] inline const ::Poco::Logger * getLoggerHelper(const ::Poco::Logger * logger) { return logger; } [[maybe_unused]] inline std::unique_ptr getLoggerHelper(std::unique_ptr && logger) { return logger; } [[maybe_unused]] inline std::unique_ptr getLoggerHelper(std::unique_ptr && logger) { return logger; } @@ -66,8 +64,7 @@ namespace impl #define LOG_IMPL(logger, priority, PRIORITY, ...) do \ { \ auto _logger = ::impl::getLoggerHelper(logger); \ - const bool _is_clients_log = (DB::CurrentThread::getGroup() != nullptr) && \ - (DB::CurrentThread::get().getClientLogsLevel() >= (priority)); \ + const bool _is_clients_log = DB::currentThreadHasGroup() && DB::currentThreadLogsLevel() >= (priority); \ if (!_is_clients_log && !_logger->is((PRIORITY))) \ break; \ \ diff --git a/src/Core/BackgroundSchedulePool.h b/src/Core/BackgroundSchedulePool.h index eca93353283..a1450be2466 100644 --- a/src/Core/BackgroundSchedulePool.h +++ b/src/Core/BackgroundSchedulePool.h @@ -1,21 +1,20 @@ #pragma once +#include +#include +#include +#include +#include +#include +#include +#include +#include #include #include #include -#include -#include -#include -#include -#include -#include -#include -#include -#include #include -#include #include -#include +#include namespace DB diff --git a/src/IO/TimeoutSetter.cpp b/src/IO/TimeoutSetter.cpp index b355a119462..2e732782700 100644 --- a/src/IO/TimeoutSetter.cpp +++ b/src/IO/TimeoutSetter.cpp @@ -1,5 +1,5 @@ #include - +#include #include diff --git a/src/Interpreters/ExternalLoader.cpp b/src/Interpreters/ExternalLoader.cpp index bd56a540128..f9e24e2de70 100644 --- a/src/Interpreters/ExternalLoader.cpp +++ b/src/Interpreters/ExternalLoader.cpp @@ -1,19 +1,20 @@ #include "ExternalLoader.h" #include -#include -#include -#include -#include -#include -#include -#include -#include -#include +#include #include #include #include -#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include namespace DB diff --git a/src/Server/CertificateReloader.cpp b/src/Server/CertificateReloader.cpp index 311ece67bce..98d7a362bd7 100644 --- a/src/Server/CertificateReloader.cpp +++ b/src/Server/CertificateReloader.cpp @@ -1,7 +1,8 @@ -#include "CertificateReloader.h" +#include #if USE_SSL +#include #include #include #include diff --git a/src/Storages/NATS/NATSHandler.cpp b/src/Storages/NATS/NATSHandler.cpp index 03f1fc1a495..f0554a7f1f0 100644 --- a/src/Storages/NATS/NATSHandler.cpp +++ b/src/Storages/NATS/NATSHandler.cpp @@ -1,3 +1,4 @@ +#include #include #include #include From 66cd879d2dd48381249c0f3263c646e5d220d3c0 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Tue, 2 Apr 2024 13:58:17 +0200 Subject: [PATCH 115/150] Fix msan --- src/Coordination/Changelog.cpp | 2 +- src/Coordination/InMemoryLogStore.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Coordination/Changelog.cpp b/src/Coordination/Changelog.cpp index d531b1266ef..58d396aad88 100644 --- a/src/Coordination/Changelog.cpp +++ b/src/Coordination/Changelog.cpp @@ -2219,7 +2219,7 @@ uint64_t Changelog::getStartIndex() const LogEntryPtr Changelog::getLastEntry() const { /// This entry treaded in special way by NuRaft - static LogEntryPtr fake_entry = nuraft::cs_new(0, nuraft::buffer::alloc(sizeof(uint64_t))); + static LogEntryPtr fake_entry = nuraft::cs_new(0, nuraft::buffer::alloc(0)); auto entry = entry_storage.getEntry(max_log_id); if (entry == nullptr) diff --git a/src/Coordination/InMemoryLogStore.cpp b/src/Coordination/InMemoryLogStore.cpp index ee93c02b4b0..32aaf8e0d4a 100644 --- a/src/Coordination/InMemoryLogStore.cpp +++ b/src/Coordination/InMemoryLogStore.cpp @@ -16,7 +16,7 @@ ptr makeClone(const ptr & entry) InMemoryLogStore::InMemoryLogStore() : start_idx(1) { - nuraft::ptr buf = nuraft::buffer::alloc(sizeof(uint64_t)); + nuraft::ptr buf = nuraft::buffer::alloc(0); logs[0] = nuraft::cs_new(0, buf); } From c35a4364359e0d0d2b061c684b3005c5e179b5e5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Tue, 2 Apr 2024 14:45:48 +0200 Subject: [PATCH 116/150] Remove nested dependency on DateLutImpl --- programs/client/Client.cpp | 9 +- .../AggregateFunctionFactory.cpp | 2 +- src/Analyzer/SetUtils.h | 9 +- src/Common/DateLUT.cpp | 38 +++++++- src/Common/DateLUT.h | 45 ++------- src/Common/DateLUTImpl.cpp | 34 ++++++- src/Common/DateLUTImpl.h | 30 ++---- src/Common/LocalDate.h | 3 +- src/Common/LoggingFormatStringHelpers.cpp | 1 + src/Common/QueryProfiler.cpp | 11 ++- src/Common/ThreadFuzzer.cpp | 1 + src/Common/ZooKeeper/ZooKeeperImpl.cpp | 11 ++- src/Common/mysqlxx/mysqlxx/Pool.h | 2 + src/Common/tests/gtest_resolve_pool.cpp | 3 +- src/Daemon/BaseDaemon.cpp | 6 +- src/DataTypes/DataTypeDate32.cpp | 7 ++ src/DataTypes/DataTypeDate32.h | 6 +- .../DatabaseMaterializedPostgreSQL.cpp | 1 + src/Formats/JSONUtils.h | 1 + src/Functions/DateTimeTransforms.h | 2 +- src/Functions/FunctionsConversion.cpp | 93 ++++++++++--------- src/Functions/FunctionsTimeWindow.h | 3 +- .../extractTimeZoneFromFunctionArguments.cpp | 7 +- src/Functions/makeDate.cpp | 1 + src/Functions/today.cpp | 8 +- src/Functions/yesterday.cpp | 8 +- src/IO/S3/BlobStorageLogWriter.cpp | 1 + src/IO/S3/Client.cpp | 1 + src/Interpreters/AsynchronousMetricLog.cpp | 5 +- src/Interpreters/Cache/EvictionCandidates.cpp | 1 + src/Interpreters/Cache/FileSegment.cpp | 3 +- .../Cache/LRUFileCachePriority.cpp | 11 ++- src/Interpreters/Cache/QueryLimit.cpp | 5 +- .../Cache/WriteBufferToFileSegment.cpp | 3 +- src/Interpreters/CrashLog.cpp | 11 ++- .../FilesystemReadPrefetchesLog.cpp | 3 +- src/Interpreters/HashJoin.cpp | 10 +- src/Interpreters/MetricLog.cpp | 11 ++- src/Interpreters/ProcessorsProfileLog.cpp | 9 +- src/Interpreters/tests/gtest_page_cache.cpp | 2 + src/Loggers/Loggers.cpp | 9 +- .../Formats/Impl/CHColumnToArrowColumn.cpp | 2 +- src/Server/InterserverIOHTTPHandler.cpp | 3 +- src/Server/MySQLHandler.cpp | 2 + src/Server/PostgreSQLHandler.cpp | 10 +- src/Storages/FileLog/DirectoryWatcherBase.h | 1 + src/Storages/Kafka/StorageKafka.h | 3 +- src/Storages/MaterializedView/RefreshSet.h | 2 +- .../MergeTree/BackgroundJobsAssignee.h | 4 +- src/Storages/S3Queue/S3QueueFilesMetadata.cpp | 10 +- 50 files changed, 262 insertions(+), 202 deletions(-) diff --git a/programs/client/Client.cpp b/programs/client/Client.cpp index d4bf2f686c8..192f9e61891 100644 --- a/programs/client/Client.cpp +++ b/programs/client/Client.cpp @@ -17,12 +17,13 @@ #include -#include -#include -#include -#include #include #include +#include +#include +#include +#include +#include #include #include diff --git a/src/AggregateFunctions/AggregateFunctionFactory.cpp b/src/AggregateFunctions/AggregateFunctionFactory.cpp index 18edb7c8ce0..6555ae63128 100644 --- a/src/AggregateFunctions/AggregateFunctionFactory.cpp +++ b/src/AggregateFunctions/AggregateFunctionFactory.cpp @@ -1,11 +1,11 @@ #include #include - #include #include #include #include #include +#include static constexpr size_t MAX_AGGREGATE_FUNCTION_NAME_LENGTH = 1000; diff --git a/src/Analyzer/SetUtils.h b/src/Analyzer/SetUtils.h index c35b45dce59..aef906a6576 100644 --- a/src/Analyzer/SetUtils.h +++ b/src/Analyzer/SetUtils.h @@ -1,14 +1,15 @@ #pragma once -#include +#include -#include - -#include +#include namespace DB { +class IDataType; +using DataTypePtr = std::shared_ptr; + class Set; using SetPtr = std::shared_ptr; diff --git a/src/Common/DateLUT.cpp b/src/Common/DateLUT.cpp index 2b261a28469..3a20fb1a125 100644 --- a/src/Common/DateLUT.cpp +++ b/src/Common/DateLUT.cpp @@ -1,13 +1,15 @@ #include "DateLUT.h" +#include +#include +#include + #include #include #include -#include #include #include -#include namespace @@ -140,6 +142,38 @@ std::string determineDefaultTimeZone() } +const DateLUTImpl & DateLUT::instance() +{ + const auto & date_lut = getInstance(); + + if (DB::CurrentThread::isInitialized()) + { + std::string timezone_from_context; + const DB::ContextPtr query_context = DB::CurrentThread::get().getQueryContext(); + + if (query_context) + { + timezone_from_context = extractTimezoneFromContext(query_context); + + if (!timezone_from_context.empty()) + return date_lut.getImplementation(timezone_from_context); + } + + /// On the server side, timezone is passed in query_context, + /// but on CH-client side we have no query context, + /// and each time we modify client's global context + const DB::ContextPtr global_context = DB::CurrentThread::get().getGlobalContext(); + if (global_context) + { + timezone_from_context = extractTimezoneFromContext(global_context); + + if (!timezone_from_context.empty()) + return date_lut.getImplementation(timezone_from_context); + } + } + return serverTimezoneInstance(); +} + DateLUT::DateLUT() { /// Initialize the pointer to the default DateLUTImpl. diff --git a/src/Common/DateLUT.h b/src/Common/DateLUT.h index 2045d4895e7..d0b85ea9895 100644 --- a/src/Common/DateLUT.h +++ b/src/Common/DateLUT.h @@ -1,17 +1,23 @@ #pragma once -#include "DateLUTImpl.h" - #include +#include #include -#include "Common/CurrentThread.h" #include #include #include #include +namespace DB +{ +class Context; +using ContextPtr = std::shared_ptr; +} + +class DateLUTImpl; + /// This class provides lazy initialization and lookup of singleton DateLUTImpl objects for a given timezone. class DateLUT : private boost::noncopyable @@ -20,38 +26,7 @@ public: /// Return DateLUTImpl instance for session timezone. /// session_timezone is a session-level setting. /// If setting is not set, returns the server timezone. - static ALWAYS_INLINE const DateLUTImpl & instance() - { - const auto & date_lut = getInstance(); - - if (DB::CurrentThread::isInitialized()) - { - std::string timezone_from_context; - const DB::ContextPtr query_context = DB::CurrentThread::get().getQueryContext(); - - if (query_context) - { - timezone_from_context = extractTimezoneFromContext(query_context); - - if (!timezone_from_context.empty()) - return date_lut.getImplementation(timezone_from_context); - } - - /// On the server side, timezone is passed in query_context, - /// but on CH-client side we have no query context, - /// and each time we modify client's global context - const DB::ContextPtr global_context = DB::CurrentThread::get().getGlobalContext(); - if (global_context) - { - timezone_from_context = extractTimezoneFromContext(global_context); - - if (!timezone_from_context.empty()) - return date_lut.getImplementation(timezone_from_context); - } - - } - return serverTimezoneInstance(); - } + static const DateLUTImpl & instance(); static ALWAYS_INLINE const DateLUTImpl & instance(const std::string & time_zone) { diff --git a/src/Common/DateLUTImpl.cpp b/src/Common/DateLUTImpl.cpp index bb677b3a62d..341e571e4eb 100644 --- a/src/Common/DateLUTImpl.cpp +++ b/src/Common/DateLUTImpl.cpp @@ -1,8 +1,5 @@ -#include "DateLUTImpl.h" - -#include -#include -#include +#include +#include #include #include @@ -11,6 +8,10 @@ #include #include +#include +#include +#include + namespace DB { @@ -214,6 +215,29 @@ DateLUTImpl::DateLUTImpl(const std::string & time_zone_) } } +unsigned int DateLUTImpl::toMillisecond(const DB::DateTime64 & datetime, Int64 scale_multiplier) const +{ + constexpr Int64 millisecond_multiplier = 1'000; + constexpr Int64 microsecond_multiplier = 1'000 * millisecond_multiplier; + constexpr Int64 divider = microsecond_multiplier / millisecond_multiplier; + + auto components = DB::DecimalUtils::splitWithScaleMultiplier(datetime, scale_multiplier); + + if (datetime.value < 0 && components.fractional) + { + components.fractional = scale_multiplier + (components.whole ? Int64(-1) : Int64(1)) * components.fractional; + --components.whole; + } + Int64 fractional = components.fractional; + if (scale_multiplier > microsecond_multiplier) + fractional = fractional / (scale_multiplier / microsecond_multiplier); + else if (scale_multiplier < microsecond_multiplier) + fractional = fractional * (microsecond_multiplier / scale_multiplier); + + UInt16 millisecond = static_cast(fractional / divider); + return millisecond; +} + /// Prefer to load timezones from blobs linked to the binary. /// The blobs are provided by "tzdata" library. diff --git a/src/Common/DateLUTImpl.h b/src/Common/DateLUTImpl.h index 082127e717c..01cbae3d447 100644 --- a/src/Common/DateLUTImpl.h +++ b/src/Common/DateLUTImpl.h @@ -3,7 +3,6 @@ #include #include #include -#include #include #include @@ -50,6 +49,11 @@ enum class WeekDayMode WeekStartsSunday1 = 3 }; +namespace DB +{ +class DateTime64; +} + /** Lookup table to conversion of time to date, and to month / year / day of week / day of month and so on. * First time was implemented for OLAPServer, that needed to do billions of such transformations. */ @@ -593,29 +597,7 @@ public: return time % 60; } - template - unsigned toMillisecond(const DateOrTime & datetime, Int64 scale_multiplier) const - { - constexpr Int64 millisecond_multiplier = 1'000; - constexpr Int64 microsecond_multiplier = 1'000 * millisecond_multiplier; - constexpr Int64 divider = microsecond_multiplier / millisecond_multiplier; - - auto components = DB::DecimalUtils::splitWithScaleMultiplier(datetime, scale_multiplier); - - if (datetime.value < 0 && components.fractional) - { - components.fractional = scale_multiplier + (components.whole ? Int64(-1) : Int64(1)) * components.fractional; - --components.whole; - } - Int64 fractional = components.fractional; - if (scale_multiplier > microsecond_multiplier) - fractional = fractional / (scale_multiplier / microsecond_multiplier); - else if (scale_multiplier < microsecond_multiplier) - fractional = fractional * (microsecond_multiplier / scale_multiplier); - - UInt16 millisecond = static_cast(fractional / divider); - return millisecond; - } + unsigned toMillisecond(const DB::DateTime64 & datetime, Int64 scale_multiplier) const; unsigned toMinute(Time t) const { diff --git a/src/Common/LocalDate.h b/src/Common/LocalDate.h index 2331a40fd12..f1abc98c8e2 100644 --- a/src/Common/LocalDate.h +++ b/src/Common/LocalDate.h @@ -1,9 +1,10 @@ #pragma once #include -#include #include +#include #include +#include /** Stores a calendar date in broken-down form (year, month, day-in-month). diff --git a/src/Common/LoggingFormatStringHelpers.cpp b/src/Common/LoggingFormatStringHelpers.cpp index 7cbef779f28..3e90526f76d 100644 --- a/src/Common/LoggingFormatStringHelpers.cpp +++ b/src/Common/LoggingFormatStringHelpers.cpp @@ -1,4 +1,5 @@ #include +#include #include #include #include diff --git a/src/Common/QueryProfiler.cpp b/src/Common/QueryProfiler.cpp index 34ffbf6c498..61d4d7d609c 100644 --- a/src/Common/QueryProfiler.cpp +++ b/src/Common/QueryProfiler.cpp @@ -1,15 +1,16 @@ #include "QueryProfiler.h" #include -#include +#include +#include +#include #include #include +#include #include -#include +#include #include -#include -#include -#include +#include #include diff --git a/src/Common/ThreadFuzzer.cpp b/src/Common/ThreadFuzzer.cpp index 9f9ec4fa356..d1e252a8184 100644 --- a/src/Common/ThreadFuzzer.cpp +++ b/src/Common/ThreadFuzzer.cpp @@ -15,6 +15,7 @@ #include #include +#include #include #include diff --git a/src/Common/ZooKeeper/ZooKeeperImpl.cpp b/src/Common/ZooKeeper/ZooKeeperImpl.cpp index 03d1b5a93d4..2185d32e47a 100644 --- a/src/Common/ZooKeeper/ZooKeeperImpl.cpp +++ b/src/Common/ZooKeeper/ZooKeeperImpl.cpp @@ -1,7 +1,8 @@ #include -#include -#include +#include +#include +#include #include #include #include @@ -10,17 +11,17 @@ #include #include #include +#include #include #include #include #include #include #include +#include #include #include -#include -#include -#include +#include #include "Coordination/KeeperConstants.h" #include "config.h" diff --git a/src/Common/mysqlxx/mysqlxx/Pool.h b/src/Common/mysqlxx/mysqlxx/Pool.h index c85295c4dd0..6e509d8bdd6 100644 --- a/src/Common/mysqlxx/mysqlxx/Pool.h +++ b/src/Common/mysqlxx/mysqlxx/Pool.h @@ -1,5 +1,7 @@ #pragma once +#include + #include #include #include diff --git a/src/Common/tests/gtest_resolve_pool.cpp b/src/Common/tests/gtest_resolve_pool.cpp index 25e867fdebc..eef4635e7b1 100644 --- a/src/Common/tests/gtest_resolve_pool.cpp +++ b/src/Common/tests/gtest_resolve_pool.cpp @@ -1,6 +1,7 @@ #include -#include #include +#include +#include #include #include diff --git a/src/Daemon/BaseDaemon.cpp b/src/Daemon/BaseDaemon.cpp index 7fc210a691a..cc22db3969c 100644 --- a/src/Daemon/BaseDaemon.cpp +++ b/src/Daemon/BaseDaemon.cpp @@ -1,9 +1,11 @@ #pragma clang diagnostic ignored "-Wreserved-identifier" +#include +#include +#include +#include #include #include -#include -#include #include #include diff --git a/src/DataTypes/DataTypeDate32.cpp b/src/DataTypes/DataTypeDate32.cpp index 83b1260eb6d..762552bcb4c 100644 --- a/src/DataTypes/DataTypeDate32.cpp +++ b/src/DataTypes/DataTypeDate32.cpp @@ -1,6 +1,8 @@ #include #include #include +#include +#include namespace DB { @@ -14,6 +16,11 @@ SerializationPtr DataTypeDate32::doGetDefaultSerialization() const return std::make_shared(); } +Field DataTypeDate32::getDefault() const +{ + return -static_cast(DateLUT::instance().getDayNumOffsetEpoch()); +} + void registerDataTypeDate32(DataTypeFactory & factory) { factory.registerSimpleDataType( diff --git a/src/DataTypes/DataTypeDate32.h b/src/DataTypes/DataTypeDate32.h index 02e818f10df..65633e7a228 100644 --- a/src/DataTypes/DataTypeDate32.h +++ b/src/DataTypes/DataTypeDate32.h @@ -1,7 +1,6 @@ #pragma once #include -#include #include namespace DB @@ -15,10 +14,7 @@ public: TypeIndex getColumnType() const override { return TypeIndex::Int32; } const char * getFamilyName() const override { return family_name; } - Field getDefault() const override - { - return -static_cast(DateLUT::instance().getDayNumOffsetEpoch()); - } + Field getDefault() const override; bool canBeUsedAsVersion() const override { return true; } bool canBeInsideNullable() const override { return true; } diff --git a/src/Databases/PostgreSQL/DatabaseMaterializedPostgreSQL.cpp b/src/Databases/PostgreSQL/DatabaseMaterializedPostgreSQL.cpp index b44bc136b1f..1c0d5fe3de1 100644 --- a/src/Databases/PostgreSQL/DatabaseMaterializedPostgreSQL.cpp +++ b/src/Databases/PostgreSQL/DatabaseMaterializedPostgreSQL.cpp @@ -5,6 +5,7 @@ #include #include +#include #include #include #include diff --git a/src/Formats/JSONUtils.h b/src/Formats/JSONUtils.h index 106884484c7..7ee111c1285 100644 --- a/src/Formats/JSONUtils.h +++ b/src/Formats/JSONUtils.h @@ -13,6 +13,7 @@ namespace DB { +class Block; struct JSONInferenceInfo; namespace JSONUtils diff --git a/src/Functions/DateTimeTransforms.h b/src/Functions/DateTimeTransforms.h index 558c309007c..4e8c1d651e1 100644 --- a/src/Functions/DateTimeTransforms.h +++ b/src/Functions/DateTimeTransforms.h @@ -1526,7 +1526,7 @@ struct ToMillisecondImpl static UInt16 execute(const DateTime64 & datetime64, Int64 scale_multiplier, const DateLUTImpl & time_zone) { - return time_zone.toMillisecond(datetime64, scale_multiplier); + return time_zone.toMillisecond(datetime64, scale_multiplier); } static UInt16 execute(UInt32, const DateLUTImpl &) diff --git a/src/Functions/FunctionsConversion.cpp b/src/Functions/FunctionsConversion.cpp index 7049ca44110..0f624a2fa2e 100644 --- a/src/Functions/FunctionsConversion.cpp +++ b/src/Functions/FunctionsConversion.cpp @@ -1,66 +1,67 @@ #include -#include -#include -#include -#include -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include #include #include #include #include #include -#include -#include -#include -#include -#include -#include +#include +#include +#include #include -#include -#include -#include +#include +#include #include +#include +#include +#include +#include +#include +#include #include +#include +#include +#include #include #include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include #include +#include +#include +#include +#include +#include +#include #include -#include -#include +#include +#include +#include +#include +#include #include +#include +#include +#include #include -#include #include -#include +#include +#include namespace DB diff --git a/src/Functions/FunctionsTimeWindow.h b/src/Functions/FunctionsTimeWindow.h index d52b76bec91..4532286830d 100644 --- a/src/Functions/FunctionsTimeWindow.h +++ b/src/Functions/FunctionsTimeWindow.h @@ -1,8 +1,9 @@ #pragma once -#include #include #include +#include +#include namespace DB diff --git a/src/Functions/extractTimeZoneFromFunctionArguments.cpp b/src/Functions/extractTimeZoneFromFunctionArguments.cpp index 7168c68c9c9..cb8a834ed3b 100644 --- a/src/Functions/extractTimeZoneFromFunctionArguments.cpp +++ b/src/Functions/extractTimeZoneFromFunctionArguments.cpp @@ -1,10 +1,11 @@ -#include -#include +#include #include #include #include -#include +#include +#include #include +#include namespace DB diff --git a/src/Functions/makeDate.cpp b/src/Functions/makeDate.cpp index c7f3c195578..8794283a856 100644 --- a/src/Functions/makeDate.cpp +++ b/src/Functions/makeDate.cpp @@ -13,6 +13,7 @@ #include #include +#include #include #include diff --git a/src/Functions/today.cpp b/src/Functions/today.cpp index 16a5b98d7ec..356660fa7b5 100644 --- a/src/Functions/today.cpp +++ b/src/Functions/today.cpp @@ -1,11 +1,9 @@ -#include - #include - #include - -#include #include +#include +#include +#include namespace DB diff --git a/src/Functions/yesterday.cpp b/src/Functions/yesterday.cpp index 43832c1faaa..4d79f1fef79 100644 --- a/src/Functions/yesterday.cpp +++ b/src/Functions/yesterday.cpp @@ -1,11 +1,9 @@ -#include - #include - #include - -#include #include +#include +#include +#include namespace DB diff --git a/src/IO/S3/BlobStorageLogWriter.cpp b/src/IO/S3/BlobStorageLogWriter.cpp index fe33f1c8799..aaf4aea5a8e 100644 --- a/src/IO/S3/BlobStorageLogWriter.cpp +++ b/src/IO/S3/BlobStorageLogWriter.cpp @@ -3,6 +3,7 @@ #if USE_AWS_S3 #include +#include #include #include #include diff --git a/src/IO/S3/Client.cpp b/src/IO/S3/Client.cpp index 4f93aba2f84..b2ad4668095 100644 --- a/src/IO/S3/Client.cpp +++ b/src/IO/S3/Client.cpp @@ -1,4 +1,5 @@ #include +#include #include #if USE_AWS_S3 diff --git a/src/Interpreters/AsynchronousMetricLog.cpp b/src/Interpreters/AsynchronousMetricLog.cpp index dc67bd91550..4287798c4ca 100644 --- a/src/Interpreters/AsynchronousMetricLog.cpp +++ b/src/Interpreters/AsynchronousMetricLog.cpp @@ -1,4 +1,3 @@ -#include #include #include #include @@ -6,8 +5,10 @@ #include #include #include -#include #include +#include +#include +#include namespace DB diff --git a/src/Interpreters/Cache/EvictionCandidates.cpp b/src/Interpreters/Cache/EvictionCandidates.cpp index cb15af72704..f21c5f3a508 100644 --- a/src/Interpreters/Cache/EvictionCandidates.cpp +++ b/src/Interpreters/Cache/EvictionCandidates.cpp @@ -1,5 +1,6 @@ #include #include +#include namespace ProfileEvents diff --git a/src/Interpreters/Cache/FileSegment.cpp b/src/Interpreters/Cache/FileSegment.cpp index 9ec2b090dc7..e474e24c6f1 100644 --- a/src/Interpreters/Cache/FileSegment.cpp +++ b/src/Interpreters/Cache/FileSegment.cpp @@ -6,10 +6,11 @@ #include #include #include +#include +#include #include #include #include -#include #include #include diff --git a/src/Interpreters/Cache/LRUFileCachePriority.cpp b/src/Interpreters/Cache/LRUFileCachePriority.cpp index e65c102f1e3..ddc30755409 100644 --- a/src/Interpreters/Cache/LRUFileCachePriority.cpp +++ b/src/Interpreters/Cache/LRUFileCachePriority.cpp @@ -1,10 +1,11 @@ -#include -#include #include -#include -#include -#include +#include +#include #include +#include +#include +#include +#include namespace CurrentMetrics { diff --git a/src/Interpreters/Cache/QueryLimit.cpp b/src/Interpreters/Cache/QueryLimit.cpp index 9421005dc92..6a5b5bf67ca 100644 --- a/src/Interpreters/Cache/QueryLimit.cpp +++ b/src/Interpreters/Cache/QueryLimit.cpp @@ -1,6 +1,7 @@ -#include -#include #include +#include +#include +#include namespace DB { diff --git a/src/Interpreters/Cache/WriteBufferToFileSegment.cpp b/src/Interpreters/Cache/WriteBufferToFileSegment.cpp index 51914c0a14e..acdfa0d5437 100644 --- a/src/Interpreters/Cache/WriteBufferToFileSegment.cpp +++ b/src/Interpreters/Cache/WriteBufferToFileSegment.cpp @@ -7,8 +7,9 @@ #include -#include +#include #include +#include namespace DB { diff --git a/src/Interpreters/CrashLog.cpp b/src/Interpreters/CrashLog.cpp index 410ea922429..4a8ef84fd5c 100644 --- a/src/Interpreters/CrashLog.cpp +++ b/src/Interpreters/CrashLog.cpp @@ -1,14 +1,15 @@ -#include -#include #include +#include +#include #include #include #include -#include -#include +#include +#include #include -#include +#include #include +#include #include diff --git a/src/Interpreters/FilesystemReadPrefetchesLog.cpp b/src/Interpreters/FilesystemReadPrefetchesLog.cpp index 7fb2e3d1f4c..8cea05a1857 100644 --- a/src/Interpreters/FilesystemReadPrefetchesLog.cpp +++ b/src/Interpreters/FilesystemReadPrefetchesLog.cpp @@ -1,4 +1,3 @@ -#include #include #include #include @@ -6,6 +5,8 @@ #include #include #include +#include +#include namespace DB diff --git a/src/Interpreters/HashJoin.cpp b/src/Interpreters/HashJoin.cpp index ddd65f95627..12a906526f6 100644 --- a/src/Interpreters/HashJoin.cpp +++ b/src/Interpreters/HashJoin.cpp @@ -3,15 +3,15 @@ #include #include -#include -#include - #include -#include -#include #include #include +#include #include +#include +#include +#include +#include #include diff --git a/src/Interpreters/MetricLog.cpp b/src/Interpreters/MetricLog.cpp index 5f6db0da520..6ed29cfadcb 100644 --- a/src/Interpreters/MetricLog.cpp +++ b/src/Interpreters/MetricLog.cpp @@ -1,12 +1,13 @@ -#include -#include -#include -#include -#include #include #include #include +#include #include +#include +#include +#include +#include +#include namespace DB diff --git a/src/Interpreters/ProcessorsProfileLog.cpp b/src/Interpreters/ProcessorsProfileLog.cpp index 015b4abc712..7dec2a3163a 100644 --- a/src/Interpreters/ProcessorsProfileLog.cpp +++ b/src/Interpreters/ProcessorsProfileLog.cpp @@ -1,15 +1,16 @@ #include -#include -#include +#include #include #include #include #include +#include #include #include -#include -#include +#include +#include +#include #include #include diff --git a/src/Interpreters/tests/gtest_page_cache.cpp b/src/Interpreters/tests/gtest_page_cache.cpp index 1e2688c0ca2..30fa3b921c9 100644 --- a/src/Interpreters/tests/gtest_page_cache.cpp +++ b/src/Interpreters/tests/gtest_page_cache.cpp @@ -1,4 +1,6 @@ +#include #include + #include #include diff --git a/src/Loggers/Loggers.cpp b/src/Loggers/Loggers.cpp index cc6e4691737..c5862b82f34 100644 --- a/src/Loggers/Loggers.cpp +++ b/src/Loggers/Loggers.cpp @@ -1,14 +1,17 @@ #include "Loggers.h" -#include -#include -#include #include "OwnFormattingChannel.h" #include "OwnPatternFormatter.h" #include "OwnSplitChannel.h" + +#include +#include + #include #include #include +#include +#include #ifndef WITHOUT_TEXT_LOG #include diff --git a/src/Processors/Formats/Impl/CHColumnToArrowColumn.cpp b/src/Processors/Formats/Impl/CHColumnToArrowColumn.cpp index 9d6c8420069..2b40e796c5c 100644 --- a/src/Processors/Formats/Impl/CHColumnToArrowColumn.cpp +++ b/src/Processors/Formats/Impl/CHColumnToArrowColumn.cpp @@ -2,7 +2,6 @@ #if USE_ARROW || USE_PARQUET -// #include #include #include #include @@ -10,6 +9,7 @@ #include #include #include +#include #include #include #include diff --git a/src/Server/InterserverIOHTTPHandler.cpp b/src/Server/InterserverIOHTTPHandler.cpp index 28045380cd7..0d79aaa227b 100644 --- a/src/Server/InterserverIOHTTPHandler.cpp +++ b/src/Server/InterserverIOHTTPHandler.cpp @@ -8,8 +8,9 @@ #include #include #include -#include +#include #include +#include #include #include diff --git a/src/Server/MySQLHandler.cpp b/src/Server/MySQLHandler.cpp index 32490627214..6456f6d24ff 100644 --- a/src/Server/MySQLHandler.cpp +++ b/src/Server/MySQLHandler.cpp @@ -21,8 +21,10 @@ #include #include #include +#include #include #include +#include #include #include #include diff --git a/src/Server/PostgreSQLHandler.cpp b/src/Server/PostgreSQLHandler.cpp index 83e06628185..473d681ddb2 100644 --- a/src/Server/PostgreSQLHandler.cpp +++ b/src/Server/PostgreSQLHandler.cpp @@ -1,17 +1,19 @@ +#include "PostgreSQLHandler.h" #include -#include #include +#include #include #include #include -#include "PostgreSQLHandler.h" #include #include -#include -#include #include #include +#include +#include #include +#include +#include #if USE_SSL # include diff --git a/src/Storages/FileLog/DirectoryWatcherBase.h b/src/Storages/FileLog/DirectoryWatcherBase.h index 0dfb58fbc5c..3bf93415b8f 100644 --- a/src/Storages/FileLog/DirectoryWatcherBase.h +++ b/src/Storages/FileLog/DirectoryWatcherBase.h @@ -2,6 +2,7 @@ #include #include +#include #include #include diff --git a/src/Storages/Kafka/StorageKafka.h b/src/Storages/Kafka/StorageKafka.h index 829e23faf77..6479902f0f2 100644 --- a/src/Storages/Kafka/StorageKafka.h +++ b/src/Storages/Kafka/StorageKafka.h @@ -19,8 +19,9 @@ namespace DB { -class StorageSystemKafkaConsumers; class ReadFromStorageKafka; +class StorageSystemKafkaConsumers; +class ThreadStatus; struct StorageKafkaInterceptors; diff --git a/src/Storages/MaterializedView/RefreshSet.h b/src/Storages/MaterializedView/RefreshSet.h index 792e4b60897..eff445023a6 100644 --- a/src/Storages/MaterializedView/RefreshSet.h +++ b/src/Storages/MaterializedView/RefreshSet.h @@ -1,9 +1,9 @@ #pragma once +#include #include #include #include - #include namespace DB diff --git a/src/Storages/MergeTree/BackgroundJobsAssignee.h b/src/Storages/MergeTree/BackgroundJobsAssignee.h index 65fefce0917..9369ebe9135 100644 --- a/src/Storages/MergeTree/BackgroundJobsAssignee.h +++ b/src/Storages/MergeTree/BackgroundJobsAssignee.h @@ -1,7 +1,9 @@ #pragma once -#include #include +#include +#include + #include diff --git a/src/Storages/S3Queue/S3QueueFilesMetadata.cpp b/src/Storages/S3Queue/S3QueueFilesMetadata.cpp index 7b4438e1387..ed2f8d2ec1b 100644 --- a/src/Storages/S3Queue/S3QueueFilesMetadata.cpp +++ b/src/Storages/S3Queue/S3QueueFilesMetadata.cpp @@ -1,9 +1,5 @@ #include "config.h" -#include -#include -#include -#include #include #include #include @@ -12,6 +8,12 @@ #include #include #include +#include +#include +#include +#include +#include + #include #include #include From 9c3bf05675700a7ad2c9c66bb4ea67aaf2d8964f Mon Sep 17 00:00:00 2001 From: yariks5s Date: Tue, 2 Apr 2024 14:58:45 +0000 Subject: [PATCH 117/150] init --- .../sql-reference/aggregate-functions/parametric-functions.md | 2 +- .../sql-reference/aggregate-functions/parametric-functions.md | 2 +- .../sql-reference/aggregate-functions/parametric-functions.md | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/en/sql-reference/aggregate-functions/parametric-functions.md b/docs/en/sql-reference/aggregate-functions/parametric-functions.md index 3654cd157e9..38dca6b7071 100644 --- a/docs/en/sql-reference/aggregate-functions/parametric-functions.md +++ b/docs/en/sql-reference/aggregate-functions/parametric-functions.md @@ -483,7 +483,7 @@ Where: - `r1`- the number of unique visitors who visited the site during 2020-01-01 (the `cond1` condition). - `r2`- the number of unique visitors who visited the site during a specific time period between 2020-01-01 and 2020-01-02 (`cond1` and `cond2` conditions). -- `r3`- the number of unique visitors who visited the site during a specific time period between 2020-01-01 and 2020-01-03 (`cond1` and `cond3` conditions). +- `r3`- the number of unique visitors who visited the site during a specific time period on 2020-01-01 and 2020-01-03 (`cond1` and `cond3` conditions). ## uniqUpTo(N)(x) diff --git a/docs/ru/sql-reference/aggregate-functions/parametric-functions.md b/docs/ru/sql-reference/aggregate-functions/parametric-functions.md index 59a9c7f8cf1..6463f6bd95d 100644 --- a/docs/ru/sql-reference/aggregate-functions/parametric-functions.md +++ b/docs/ru/sql-reference/aggregate-functions/parametric-functions.md @@ -476,7 +476,7 @@ FROM - `r1` - количество уникальных посетителей за 2020-01-01 (`cond1`). - `r2` - количество уникальных посетителей в период между 2020-01-01 и 2020-01-02 (`cond1` и `cond2`). -- `r3` - количество уникальных посетителей в период между 2020-01-01 и 2020-01-03 (`cond1` и `cond3`). +- `r3` - количество уникальных посетителей в период за 2020-01-01 и 2020-01-03 (`cond1` и `cond3`). ## uniqUpTo(N)(x) {#uniquptonx} diff --git a/docs/zh/sql-reference/aggregate-functions/parametric-functions.md b/docs/zh/sql-reference/aggregate-functions/parametric-functions.md index 1c7de515c58..cb1dcc35f5c 100644 --- a/docs/zh/sql-reference/aggregate-functions/parametric-functions.md +++ b/docs/zh/sql-reference/aggregate-functions/parametric-functions.md @@ -472,7 +472,7 @@ FROM - `r1`-2020-01-01期间访问该网站的独立访问者数量( `cond1` 条件)。 - `r2`-在2020-01-01和2020-01-02之间的特定时间段内访问该网站的唯一访问者的数量 (`cond1` 和 `cond2` 条件)。 -- `r3`-在2020-01-01和2020-01-03之间的特定时间段内访问该网站的唯一访问者的数量 (`cond1` 和 `cond3` 条件)。 +- `r3`-在2020-01-01和2020-01-03 网站的独立访客数量 (`cond1` 和 `cond3` 条件)。 ## uniqUpTo(N)(x) {#uniquptonx} From 7285a55f6983f7d6d89e5c0e95da19ccce78e4c8 Mon Sep 17 00:00:00 2001 From: kssenii Date: Tue, 2 Apr 2024 17:08:32 +0200 Subject: [PATCH 118/150] One more --- src/Interpreters/Cache/LRUFileCachePriority.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/Interpreters/Cache/LRUFileCachePriority.cpp b/src/Interpreters/Cache/LRUFileCachePriority.cpp index 1d9725352be..4b65b1bd8ad 100644 --- a/src/Interpreters/Cache/LRUFileCachePriority.cpp +++ b/src/Interpreters/Cache/LRUFileCachePriority.cpp @@ -125,6 +125,9 @@ void LRUFileCachePriority::updateSize(int64_t size) chassert(size != 0); chassert(size > 0 || state->current_size >= size_t(-size)); + LOG_TEST(log, "Updating size with {}, current is {}", + size, state->current_size); + state->current_size += size; CurrentMetrics::add(CurrentMetrics::FilesystemCacheSize, size); } From 40b9f39c00b4c4fce757540e0ea0058bbe8f8360 Mon Sep 17 00:00:00 2001 From: Dmitry Novik Date: Tue, 2 Apr 2024 17:47:00 +0200 Subject: [PATCH 119/150] Analyzer: Fix query parameters --- src/Analyzer/QueryTreeBuilder.cpp | 16 +++--- src/Analyzer/SortNode.cpp | 17 ++++--- src/Interpreters/InterpreterSelectQuery.cpp | 16 +++--- src/Interpreters/TreeOptimizer.cpp | 2 +- src/Parsers/ASTOrderByElement.cpp | 8 +-- src/Parsers/ASTOrderByElement.h | 56 ++++++++++++++++++--- src/Parsers/ExpressionElementParsers.cpp | 13 +++-- 7 files changed, 86 insertions(+), 42 deletions(-) diff --git a/src/Analyzer/QueryTreeBuilder.cpp b/src/Analyzer/QueryTreeBuilder.cpp index df80f46b3cd..7f7d7a82885 100644 --- a/src/Analyzer/QueryTreeBuilder.cpp +++ b/src/Analyzer/QueryTreeBuilder.cpp @@ -444,8 +444,8 @@ QueryTreeNodePtr QueryTreeBuilder::buildSortList(const ASTPtr & order_by_express nulls_sort_direction = order_by_element.nulls_direction == 1 ? SortDirection::ASCENDING : SortDirection::DESCENDING; std::shared_ptr collator; - if (order_by_element.collation) - collator = std::make_shared(order_by_element.collation->as().value.get()); + if (order_by_element.getCollation()) + collator = std::make_shared(order_by_element.getCollation()->as().value.get()); const auto & sort_expression_ast = order_by_element.children.at(0); auto sort_expression = buildExpression(sort_expression_ast, context); @@ -455,12 +455,12 @@ QueryTreeNodePtr QueryTreeBuilder::buildSortList(const ASTPtr & order_by_express std::move(collator), order_by_element.with_fill); - if (order_by_element.fill_from) - sort_node->getFillFrom() = buildExpression(order_by_element.fill_from, context); - if (order_by_element.fill_to) - sort_node->getFillTo() = buildExpression(order_by_element.fill_to, context); - if (order_by_element.fill_step) - sort_node->getFillStep() = buildExpression(order_by_element.fill_step, context); + if (order_by_element.getFillFrom()) + sort_node->getFillFrom() = buildExpression(order_by_element.getFillFrom(), context); + if (order_by_element.getFillTo()) + sort_node->getFillTo() = buildExpression(order_by_element.getFillTo(), context); + if (order_by_element.getFillStep()) + sort_node->getFillStep() = buildExpression(order_by_element.getFillStep(), context); list_node->getNodes().push_back(std::move(sort_node)); } diff --git a/src/Analyzer/SortNode.cpp b/src/Analyzer/SortNode.cpp index 8e9913af442..b9d93511b84 100644 --- a/src/Analyzer/SortNode.cpp +++ b/src/Analyzer/SortNode.cpp @@ -120,17 +120,18 @@ ASTPtr SortNode::toASTImpl(const ConvertToASTOptions & options) const result->nulls_direction_was_explicitly_specified = nulls_sort_direction.has_value(); - result->with_fill = with_fill; - result->fill_from = hasFillFrom() ? getFillFrom()->toAST(options) : nullptr; - result->fill_to = hasFillTo() ? getFillTo()->toAST(options) : nullptr; - result->fill_step = hasFillStep() ? getFillStep()->toAST(options) : nullptr; result->children.push_back(getExpression()->toAST(options)); if (collator) - { - result->children.push_back(std::make_shared(Field(collator->getLocale()))); - result->collation = result->children.back(); - } + result->setCollation(std::make_shared(Field(collator->getLocale()))); + + result->with_fill = with_fill; + if (hasFillFrom()) + result->setFillFrom(getFillFrom()->toAST(options)); + if (hasFillTo()) + result->setFillTo(getFillTo()->toAST(options)); + if (hasFillStep()) + result->setFillStep(getFillStep()->toAST(options)); return result; } diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index 6bbf03bb1e0..5864b35799e 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -1165,13 +1165,13 @@ static FillColumnDescription getWithFillDescription(const ASTOrderByElement & or { FillColumnDescription descr; - if (order_by_elem.fill_from) - std::tie(descr.fill_from, descr.fill_from_type) = getWithFillFieldValue(order_by_elem.fill_from, context); - if (order_by_elem.fill_to) - std::tie(descr.fill_to, descr.fill_to_type) = getWithFillFieldValue(order_by_elem.fill_to, context); + if (order_by_elem.getFillFrom()) + std::tie(descr.fill_from, descr.fill_from_type) = getWithFillFieldValue(order_by_elem.getFillFrom(), context); + if (order_by_elem.getFillTo()) + std::tie(descr.fill_to, descr.fill_to_type) = getWithFillFieldValue(order_by_elem.getFillTo(), context); - if (order_by_elem.fill_step) - std::tie(descr.fill_step, descr.step_kind) = getWithFillStep(order_by_elem.fill_step, context); + if (order_by_elem.getFillStep()) + std::tie(descr.fill_step, descr.step_kind) = getWithFillStep(order_by_elem.getFillStep(), context); else descr.fill_step = order_by_elem.direction; @@ -1217,8 +1217,8 @@ SortDescription InterpreterSelectQuery::getSortDescription(const ASTSelectQuery const auto & order_by_elem = elem->as(); std::shared_ptr collator; - if (order_by_elem.collation) - collator = std::make_shared(order_by_elem.collation->as().value.get()); + if (order_by_elem.getCollation()) + collator = std::make_shared(order_by_elem.getCollation()->as().value.get()); if (order_by_elem.with_fill) { diff --git a/src/Interpreters/TreeOptimizer.cpp b/src/Interpreters/TreeOptimizer.cpp index b71a8e3681d..a341dae32fa 100644 --- a/src/Interpreters/TreeOptimizer.cpp +++ b/src/Interpreters/TreeOptimizer.cpp @@ -277,7 +277,7 @@ void optimizeDuplicatesInOrderBy(const ASTSelectQuery * select_query) const auto & order_by_elem = elem->as(); if (order_by_elem.with_fill /// Always keep elements WITH FILL as they affects other. - || elems_set.emplace(name, order_by_elem.collation ? order_by_elem.collation->getColumnName() : "").second) + || elems_set.emplace(name, order_by_elem.getCollation() ? order_by_elem.getCollation()->getColumnName() : "").second) unique_elems.emplace_back(elem); } diff --git a/src/Parsers/ASTOrderByElement.cpp b/src/Parsers/ASTOrderByElement.cpp index 318849812aa..be0416359a1 100644 --- a/src/Parsers/ASTOrderByElement.cpp +++ b/src/Parsers/ASTOrderByElement.cpp @@ -31,7 +31,7 @@ void ASTOrderByElement::formatImpl(const FormatSettings & settings, FormatState << (settings.hilite ? hilite_none : ""); } - if (collation) + if (auto collation = getCollation()) { settings.ostr << (settings.hilite ? hilite_keyword : "") << " COLLATE " << (settings.hilite ? hilite_none : ""); collation->formatImpl(settings, state, frame); @@ -40,17 +40,17 @@ void ASTOrderByElement::formatImpl(const FormatSettings & settings, FormatState if (with_fill) { settings.ostr << (settings.hilite ? hilite_keyword : "") << " WITH FILL" << (settings.hilite ? hilite_none : ""); - if (fill_from) + if (auto fill_from = getFillFrom()) { settings.ostr << (settings.hilite ? hilite_keyword : "") << " FROM " << (settings.hilite ? hilite_none : ""); fill_from->formatImpl(settings, state, frame); } - if (fill_to) + if (auto fill_to = getFillTo()) { settings.ostr << (settings.hilite ? hilite_keyword : "") << " TO " << (settings.hilite ? hilite_none : ""); fill_to->formatImpl(settings, state, frame); } - if (fill_step) + if (auto fill_step = getFillStep()) { settings.ostr << (settings.hilite ? hilite_keyword : "") << " STEP " << (settings.hilite ? hilite_none : ""); fill_step->formatImpl(settings, state, frame); diff --git a/src/Parsers/ASTOrderByElement.h b/src/Parsers/ASTOrderByElement.h index 4cebc30be31..6edf84d7bde 100644 --- a/src/Parsers/ASTOrderByElement.h +++ b/src/Parsers/ASTOrderByElement.h @@ -10,18 +10,34 @@ namespace DB */ class ASTOrderByElement : public IAST { +private: + enum class Child : uint8_t + { + EXPRESSION, + COLLATION, + FILL_FROM, + FILL_TO, + FILL_STEP, + }; + public: int direction = 0; /// 1 for ASC, -1 for DESC int nulls_direction = 0; /// Same as direction for NULLS LAST, opposite for NULLS FIRST. bool nulls_direction_was_explicitly_specified = false; - /** Collation for locale-specific string comparison. If empty, then sorting done by bytes. */ - ASTPtr collation; - bool with_fill = false; - ASTPtr fill_from; - ASTPtr fill_to; - ASTPtr fill_step; + + /** Collation for locale-specific string comparison. If empty, then sorting done by bytes. */ + void setCollation(ASTPtr node) { setChild(Child::COLLATION, node); } + void setFillFrom(ASTPtr node) { setChild(Child::FILL_FROM, node); } + void setFillTo(ASTPtr node) { setChild(Child::FILL_TO, node); } + void setFillStep(ASTPtr node) { setChild(Child::FILL_STEP, node); } + + /** Collation for locale-specific string comparison. If empty, then sorting done by bytes. */ + ASTPtr getCollation() const { return getChild(Child::COLLATION); } + ASTPtr getFillFrom() const { return getChild(Child::FILL_FROM); } + ASTPtr getFillTo() const { return getChild(Child::FILL_TO); } + ASTPtr getFillStep() const { return getChild(Child::FILL_STEP); } String getID(char) const override { return "OrderByElement"; } @@ -36,6 +52,34 @@ public: protected: void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override; +private: + + ASTPtr getChild(Child child) const + { + auto it = positions.find(child); + if (it != positions.end()) + return children[it->second]; + return {}; + } + + void setChild(Child child, ASTPtr node) + { + if (node == nullptr) + return; + + auto it = positions.find(child); + if (it != positions.end()) + { + children[it->second] = node; + } + else + { + positions[child] = children.size(); + children.push_back(node); + } + } + + std::unordered_map positions; }; } diff --git a/src/Parsers/ExpressionElementParsers.cpp b/src/Parsers/ExpressionElementParsers.cpp index 67f4a306292..d4ad210b315 100644 --- a/src/Parsers/ExpressionElementParsers.cpp +++ b/src/Parsers/ExpressionElementParsers.cpp @@ -2120,17 +2120,16 @@ bool ParserOrderByElement::parseImpl(Pos & pos, ASTPtr & node, Expected & expect auto elem = std::make_shared(); + elem->children.push_back(expr_elem); + elem->direction = direction; elem->nulls_direction = nulls_direction; elem->nulls_direction_was_explicitly_specified = nulls_direction_was_explicitly_specified; - elem->collation = locale_node; + elem->setCollation(locale_node); elem->with_fill = has_with_fill; - elem->fill_from = fill_from; - elem->fill_to = fill_to; - elem->fill_step = fill_step; - elem->children.push_back(expr_elem); - if (locale_node) - elem->children.push_back(locale_node); + elem->setFillFrom(fill_from); + elem->setFillTo(fill_to); + elem->setFillStep(fill_step); node = elem; From 259da73b17189c25f70e0e15bd4bc47f1362166a Mon Sep 17 00:00:00 2001 From: Dmitry Novik Date: Tue, 2 Apr 2024 17:54:49 +0200 Subject: [PATCH 120/150] Add a test --- .../0_stateless/03033_analyzer_query_parameters.reference | 2 ++ .../0_stateless/03033_analyzer_query_parameters.sh | 8 ++++++++ 2 files changed, 10 insertions(+) create mode 100644 tests/queries/0_stateless/03033_analyzer_query_parameters.reference create mode 100755 tests/queries/0_stateless/03033_analyzer_query_parameters.sh diff --git a/tests/queries/0_stateless/03033_analyzer_query_parameters.reference b/tests/queries/0_stateless/03033_analyzer_query_parameters.reference new file mode 100644 index 00000000000..6ed281c757a --- /dev/null +++ b/tests/queries/0_stateless/03033_analyzer_query_parameters.reference @@ -0,0 +1,2 @@ +1 +1 diff --git a/tests/queries/0_stateless/03033_analyzer_query_parameters.sh b/tests/queries/0_stateless/03033_analyzer_query_parameters.sh new file mode 100755 index 00000000000..c821791e437 --- /dev/null +++ b/tests/queries/0_stateless/03033_analyzer_query_parameters.sh @@ -0,0 +1,8 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +clickhouse-local --param_rounding 1 --query "SELECT 1 AS x ORDER BY x WITH FILL STEP {rounding:UInt32} SETTINGS allow_experimental_analyzer = 1" +clickhouse-local --param_rounding 1 --query "SELECT 1 AS x ORDER BY x WITH FILL STEP {rounding:UInt32} SETTINGS allow_experimental_analyzer = 0" From 3e6168e63a117fe7c9c80346460bf8728645770e Mon Sep 17 00:00:00 2001 From: Konstantin Bogdanov Date: Tue, 2 Apr 2024 18:25:52 +0200 Subject: [PATCH 121/150] Exclude one more format string --- .../0_stateless/00002_log_and_exception_messages_formatting.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/00002_log_and_exception_messages_formatting.sql b/tests/queries/0_stateless/00002_log_and_exception_messages_formatting.sql index b5710e29be9..74608360b9c 100644 --- a/tests/queries/0_stateless/00002_log_and_exception_messages_formatting.sql +++ b/tests/queries/0_stateless/00002_log_and_exception_messages_formatting.sql @@ -203,7 +203,7 @@ select with 0.16 as threshold select 'noisy Trace messages', - greatest(coalesce(((select message_format_string, count() from logs where level = 'Trace' and message_format_string not in ('Access granted: {}{}', '{} -> {}', 'Query {} to stage {}{}') + greatest(coalesce(((select message_format_string, count() from logs where level = 'Trace' and message_format_string not in ('Access granted: {}{}', '{} -> {}', 'Query {} to stage {}{}', 'Query {} from stage {} to stage {}{}') group by message_format_string order by count() desc limit 1) as top_message).2, 0) / (select count() from logs), threshold) as r, r <= threshold ? '' : top_message.1; From e6e49688b25bbd70555a76ae06a89bd1768fcd01 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Tue, 2 Apr 2024 18:18:32 +0000 Subject: [PATCH 122/150] Fix 02503_insert_storage_snapshot --- tests/queries/0_stateless/02503_insert_storage_snapshot.sh | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tests/queries/0_stateless/02503_insert_storage_snapshot.sh b/tests/queries/0_stateless/02503_insert_storage_snapshot.sh index b494adeb785..13561947e08 100755 --- a/tests/queries/0_stateless/02503_insert_storage_snapshot.sh +++ b/tests/queries/0_stateless/02503_insert_storage_snapshot.sh @@ -20,6 +20,13 @@ counter=0 retries=60 # for a short period of time. To avoid flakyness we check that refcount became 1 at least once during long INSERT query. # It proves that the INSERT query doesn't hold redundant references to parts. while [[ $counter -lt $retries ]]; do + query_result=$($CLICKHOUSE_CLIENT -q "select count() from system.processes where query_id = '$query_id' FORMAT CSV") + if [ "$query_result" -lt 1 ]; then + sleep 0.1 + ((++counter)) + continue; + fi + query_result=$($CLICKHOUSE_CLIENT -q "SELECT name, active, refcount FROM system.parts WHERE database = '$CLICKHOUSE_DATABASE' AND table = 't_insert_storage_snapshot' FORMAT CSV") if [ "$query_result" == '"all_1_1_0",1,1' ]; then echo "$query_result" From 22fb91a6ef4532104cdee5f1af80c8ab910e147e Mon Sep 17 00:00:00 2001 From: avogar Date: Tue, 2 Apr 2024 18:18:32 +0000 Subject: [PATCH 123/150] Fix tests --- src/Server/HTTPHandler.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Server/HTTPHandler.cpp b/src/Server/HTTPHandler.cpp index e754e8b58c3..8ba1e1d6df1 100644 --- a/src/Server/HTTPHandler.cpp +++ b/src/Server/HTTPHandler.cpp @@ -902,6 +902,7 @@ void HTTPHandler::processQuery( { bool with_stacktrace = (params.getParsed("stacktrace", false) && server.config().getBool("enable_http_stacktrace", true)); ExecutionStatus status = ExecutionStatus::fromCurrentException("", with_stacktrace); + formatExceptionForClient(status.code, request, response, used_output); current_output_format.setException(status.message); current_output_format.finalize(); used_output.exception_is_written = true; From ed448eae426db8c225cf5a6f70f5f780c7820a4b Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Tue, 2 Apr 2024 18:40:46 +0000 Subject: [PATCH 124/150] add setting lightweight_deletes_sync --- src/Core/Settings.h | 1 + src/Interpreters/InterpreterDeleteQuery.cpp | 2 +- .../03033_lightweight_deletes_sync.reference | 4 ++++ .../03033_lightweight_deletes_sync.sql | 18 ++++++++++++++++++ 4 files changed, 24 insertions(+), 1 deletion(-) create mode 100644 tests/queries/0_stateless/03033_lightweight_deletes_sync.reference create mode 100644 tests/queries/0_stateless/03033_lightweight_deletes_sync.sql diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 84e709294aa..be5a38a5bd0 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -590,6 +590,7 @@ class IColumn; M(Bool, optimize_respect_aliases, true, "If it is set to true, it will respect aliases in WHERE/GROUP BY/ORDER BY, that will help with partition pruning/secondary indexes/optimize_aggregation_in_order/optimize_read_in_order/optimize_trivial_count", 0) \ M(UInt64, mutations_sync, 0, "Wait for synchronous execution of ALTER TABLE UPDATE/DELETE queries (mutations). 0 - execute asynchronously. 1 - wait current server. 2 - wait all replicas if they exist.", 0) \ M(Bool, enable_lightweight_delete, true, "Enable lightweight DELETE mutations for mergetree tables.", 0) ALIAS(allow_experimental_lightweight_delete) \ + M(UInt64, lightweight_deletes_sync, 2, "The same as 'mutation_sync', but controls only execution of lightweight deletes", 0) \ M(Bool, apply_deleted_mask, true, "Enables filtering out rows deleted with lightweight DELETE. If disabled, a query will be able to read those rows. This is useful for debugging and \"undelete\" scenarios", 0) \ M(Bool, optimize_normalize_count_variants, true, "Rewrite aggregate functions that semantically equals to count() as count().", 0) \ M(Bool, optimize_injective_functions_inside_uniq, true, "Delete injective functions of one argument inside uniq*() functions.", 0) \ diff --git a/src/Interpreters/InterpreterDeleteQuery.cpp b/src/Interpreters/InterpreterDeleteQuery.cpp index 8fb0dabb5b5..07d23be78a7 100644 --- a/src/Interpreters/InterpreterDeleteQuery.cpp +++ b/src/Interpreters/InterpreterDeleteQuery.cpp @@ -101,7 +101,7 @@ BlockIO InterpreterDeleteQuery::execute() DBMS_DEFAULT_MAX_PARSER_BACKTRACKS); auto context = Context::createCopy(getContext()); - context->setSetting("mutations_sync", 2); /// Lightweight delete is always synchronous + context->setSetting("mutations_sync", Field(context->getSettingsRef().lightweight_deletes_sync)); InterpreterAlterQuery alter_interpreter(alter_ast, context); return alter_interpreter.execute(); } diff --git a/tests/queries/0_stateless/03033_lightweight_deletes_sync.reference b/tests/queries/0_stateless/03033_lightweight_deletes_sync.reference new file mode 100644 index 00000000000..181282ffa9a --- /dev/null +++ b/tests/queries/0_stateless/03033_lightweight_deletes_sync.reference @@ -0,0 +1,4 @@ +2 +0 +2 +1 diff --git a/tests/queries/0_stateless/03033_lightweight_deletes_sync.sql b/tests/queries/0_stateless/03033_lightweight_deletes_sync.sql new file mode 100644 index 00000000000..bb4bb6dfa3d --- /dev/null +++ b/tests/queries/0_stateless/03033_lightweight_deletes_sync.sql @@ -0,0 +1,18 @@ +DROP TABLE IF EXISTS t_lightweight_deletes; + +CREATE TABLE t_lightweight_deletes (a UInt64) ENGINE = MergeTree ORDER BY a; + +INSERT INTO t_lightweight_deletes VALUES (1) (2) (3); + +DELETE FROM t_lightweight_deletes WHERE a = 1 SETTINGS lightweight_deletes_sync = 2; + +SELECT count() FROM t_lightweight_deletes; +SELECT count() FROM system.mutations WHERE database = currentDatabase() AND table = 't_lightweight_deletes' AND NOT is_done; + +SYSTEM STOP MERGES t_lightweight_deletes; +DELETE FROM t_lightweight_deletes WHERE a = 2 SETTINGS lightweight_deletes_sync = 0; + +SELECT count() FROM t_lightweight_deletes; +SELECT count() FROM system.mutations WHERE database = currentDatabase() AND table = 't_lightweight_deletes' AND NOT is_done; + +DROP TABLE t_lightweight_deletes; From 9422ea35c5d93f5811361784f9344c2f5e84a6b8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Tue, 2 Apr 2024 21:24:05 +0200 Subject: [PATCH 125/150] Fix non Linux build --- src/Common/AtomicLogger.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/Common/AtomicLogger.h b/src/Common/AtomicLogger.h index 9581358218c..0ece9e8a09a 100644 --- a/src/Common/AtomicLogger.h +++ b/src/Common/AtomicLogger.h @@ -2,9 +2,10 @@ #include +#include #include -#include #include +#include namespace DB { From 534905ff8c2cd90cda863a07d4d8510365e99f32 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Tue, 2 Apr 2024 22:14:58 +0000 Subject: [PATCH 126/150] fix test --- src/Core/SettingsChangesHistory.h | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Core/SettingsChangesHistory.h b/src/Core/SettingsChangesHistory.h index 768b6aa6cbd..f43ca154d56 100644 --- a/src/Core/SettingsChangesHistory.h +++ b/src/Core/SettingsChangesHistory.h @@ -86,6 +86,7 @@ namespace SettingsChangesHistory static std::map settings_changes_history = { {"24.4", {{"input_format_json_throw_on_bad_escape_sequence", true, true, "Allow to save JSON strings with bad escape sequences"}, + {"lightweight_deletes_sync", 2, 2, "The same as 'mutation_sync', but controls only execution of lightweight deletes"}, }}, {"24.3", {{"s3_connect_timeout_ms", 1000, 1000, "Introduce new dedicated setting for s3 connection timeout"}, {"allow_experimental_shared_merge_tree", false, true, "The setting is obsolete"}, From a7c490e1dfeadc6f968a57aca147683663fbe766 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 3 Apr 2024 05:42:41 +0200 Subject: [PATCH 127/150] Fix build with clang-19 (master) --- base/poco/JSON/src/pdjson.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/base/poco/JSON/src/pdjson.c b/base/poco/JSON/src/pdjson.c index 18768ac96d3..563fa277439 100644 --- a/base/poco/JSON/src/pdjson.c +++ b/base/poco/JSON/src/pdjson.c @@ -314,13 +314,13 @@ static int read_unicode(json_stream *json) if (l < 0xdc00 || l > 0xdfff) { json_error(json, "invalid surrogate pair continuation \\u%04lx out " - "of range (dc00-dfff)", l); + "of range (dc00-dfff)", (unsigned long)l); return -1; } cp = ((h - 0xd800) * 0x400) + ((l - 0xdc00) + 0x10000); } else if (cp >= 0xdc00 && cp <= 0xdfff) { - json_error(json, "dangling surrogate \\u%04lx", cp); + json_error(json, "dangling surrogate \\u%04lx", (unsigned long)cp); return -1; } From cc31b837f2d9fc44334d831a24898e1321b50134 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Tue, 2 Apr 2024 15:28:57 +0200 Subject: [PATCH 128/150] User specific S3 endpoint backup/restore ON CLUSTER --- src/Backups/BackupIO_S3.cpp | 10 +++-- src/Backups/BackupIO_S3.h | 21 +++++++++- src/Backups/BackupsWorker.cpp | 1 + src/Backups/registerBackupEngineS3.cpp | 6 ++- src/Storages/StorageS3Settings.cpp | 4 +- src/Storages/StorageS3Settings.h | 2 +- .../configs/remote_servers.xml | 12 ++++++ .../test_backup_restore_s3/test.py | 40 ++++++++++++++++++- 8 files changed, 84 insertions(+), 12 deletions(-) create mode 100644 tests/integration/test_backup_restore_s3/configs/remote_servers.xml diff --git a/src/Backups/BackupIO_S3.cpp b/src/Backups/BackupIO_S3.cpp index 2063af2061c..4b7e3d1e775 100644 --- a/src/Backups/BackupIO_S3.cpp +++ b/src/Backups/BackupIO_S3.cpp @@ -124,11 +124,12 @@ BackupReaderS3::BackupReaderS3( bool allow_s3_native_copy, const ReadSettings & read_settings_, const WriteSettings & write_settings_, - const ContextPtr & context_) + const ContextPtr & context_, + bool is_internal_backup) : BackupReaderDefault(read_settings_, write_settings_, getLogger("BackupReaderS3")) , s3_uri(s3_uri_) , data_source_description{DataSourceType::ObjectStorage, ObjectStorageType::S3, MetadataStorageType::None, s3_uri.endpoint, false, false} - , s3_settings(context_->getStorageS3Settings().getSettings(s3_uri.uri.toString(), context_->getUserName())) + , s3_settings(context_->getStorageS3Settings().getSettings(s3_uri.uri.toString(), context_->getUserName(), /*ignore_user=*/is_internal_backup)) { auto & request_settings = s3_settings.request_settings; request_settings.updateFromSettings(context_->getSettingsRef()); @@ -214,11 +215,12 @@ BackupWriterS3::BackupWriterS3( const String & storage_class_name, const ReadSettings & read_settings_, const WriteSettings & write_settings_, - const ContextPtr & context_) + const ContextPtr & context_, + bool is_internal_backup) : BackupWriterDefault(read_settings_, write_settings_, getLogger("BackupWriterS3")) , s3_uri(s3_uri_) , data_source_description{DataSourceType::ObjectStorage, ObjectStorageType::S3, MetadataStorageType::None, s3_uri.endpoint, false, false} - , s3_settings(context_->getStorageS3Settings().getSettings(s3_uri.uri.toString(), context_->getUserName())) + , s3_settings(context_->getStorageS3Settings().getSettings(s3_uri.uri.toString(), context_->getUserName(), /*ignore_user=*/is_internal_backup)) { auto & request_settings = s3_settings.request_settings; request_settings.updateFromSettings(context_->getSettingsRef()); diff --git a/src/Backups/BackupIO_S3.h b/src/Backups/BackupIO_S3.h index 57108d122ea..f81eb975df3 100644 --- a/src/Backups/BackupIO_S3.h +++ b/src/Backups/BackupIO_S3.h @@ -18,7 +18,15 @@ namespace DB class BackupReaderS3 : public BackupReaderDefault { public: - BackupReaderS3(const S3::URI & s3_uri_, const String & access_key_id_, const String & secret_access_key_, bool allow_s3_native_copy, const ReadSettings & read_settings_, const WriteSettings & write_settings_, const ContextPtr & context_); + BackupReaderS3( + const S3::URI & s3_uri_, + const String & access_key_id_, + const String & secret_access_key_, + bool allow_s3_native_copy, + const ReadSettings & read_settings_, + const WriteSettings & write_settings_, + const ContextPtr & context_, + bool is_internal_backup); ~BackupReaderS3() override; bool fileExists(const String & file_name) override; @@ -41,7 +49,16 @@ private: class BackupWriterS3 : public BackupWriterDefault { public: - BackupWriterS3(const S3::URI & s3_uri_, const String & access_key_id_, const String & secret_access_key_, bool allow_s3_native_copy, const String & storage_class_name, const ReadSettings & read_settings_, const WriteSettings & write_settings_, const ContextPtr & context_); + BackupWriterS3( + const S3::URI & s3_uri_, + const String & access_key_id_, + const String & secret_access_key_, + bool allow_s3_native_copy, + const String & storage_class_name, + const ReadSettings & read_settings_, + const WriteSettings & write_settings_, + const ContextPtr & context_, + bool is_internal_backup); ~BackupWriterS3() override; bool fileExists(const String & file_name) override; diff --git a/src/Backups/BackupsWorker.cpp b/src/Backups/BackupsWorker.cpp index 96fe770227c..d345223b3b4 100644 --- a/src/Backups/BackupsWorker.cpp +++ b/src/Backups/BackupsWorker.cpp @@ -940,6 +940,7 @@ void BackupsWorker::doRestore( backup_open_params.use_same_s3_credentials_for_base_backup = restore_settings.use_same_s3_credentials_for_base_backup; backup_open_params.read_settings = getReadSettingsForRestore(context); backup_open_params.write_settings = getWriteSettingsForRestore(context); + backup_open_params.is_internal_backup = restore_settings.internal; BackupPtr backup = BackupFactory::instance().createBackup(backup_open_params); String current_database = context->getCurrentDatabase(); diff --git a/src/Backups/registerBackupEngineS3.cpp b/src/Backups/registerBackupEngineS3.cpp index fed5c6b4d22..c34dbe273f5 100644 --- a/src/Backups/registerBackupEngineS3.cpp +++ b/src/Backups/registerBackupEngineS3.cpp @@ -110,7 +110,8 @@ void registerBackupEngineS3(BackupFactory & factory) params.allow_s3_native_copy, params.read_settings, params.write_settings, - params.context); + params.context, + params.is_internal_backup); return std::make_unique( params.backup_info, @@ -129,7 +130,8 @@ void registerBackupEngineS3(BackupFactory & factory) params.s3_storage_class, params.read_settings, params.write_settings, - params.context); + params.context, + params.is_internal_backup); return std::make_unique( params.backup_info, diff --git a/src/Storages/StorageS3Settings.cpp b/src/Storages/StorageS3Settings.cpp index 5887018268b..04634bcf1b3 100644 --- a/src/Storages/StorageS3Settings.cpp +++ b/src/Storages/StorageS3Settings.cpp @@ -292,7 +292,7 @@ void StorageS3Settings::loadFromConfig(const String & config_elem, const Poco::U } } -S3Settings StorageS3Settings::getSettings(const String & endpoint, const String & user) const +S3Settings StorageS3Settings::getSettings(const String & endpoint, const String & user, bool ignore_user) const { std::lock_guard lock(mutex); auto next_prefix_setting = s3_settings.upper_bound(endpoint); @@ -302,7 +302,7 @@ S3Settings StorageS3Settings::getSettings(const String & endpoint, const String { std::advance(possible_prefix_setting, -1); const auto & [endpoint_prefix, settings] = *possible_prefix_setting; - if (endpoint.starts_with(endpoint_prefix) && settings.auth_settings.canBeUsedByUser(user)) + if (endpoint.starts_with(endpoint_prefix) && (ignore_user || settings.auth_settings.canBeUsedByUser(user))) return possible_prefix_setting->second; } diff --git a/src/Storages/StorageS3Settings.h b/src/Storages/StorageS3Settings.h index 21b6264717e..0f972db02b1 100644 --- a/src/Storages/StorageS3Settings.h +++ b/src/Storages/StorageS3Settings.h @@ -112,7 +112,7 @@ class StorageS3Settings public: void loadFromConfig(const String & config_elem, const Poco::Util::AbstractConfiguration & config, const Settings & settings); - S3Settings getSettings(const String & endpoint, const String & user) const; + S3Settings getSettings(const String & endpoint, const String & user, bool ignore_user = false) const; private: mutable std::mutex mutex; diff --git a/tests/integration/test_backup_restore_s3/configs/remote_servers.xml b/tests/integration/test_backup_restore_s3/configs/remote_servers.xml new file mode 100644 index 00000000000..9607aac2003 --- /dev/null +++ b/tests/integration/test_backup_restore_s3/configs/remote_servers.xml @@ -0,0 +1,12 @@ + + + + + + node + 9000 + + + + + diff --git a/tests/integration/test_backup_restore_s3/test.py b/tests/integration/test_backup_restore_s3/test.py index d65fc1f09d6..05424887736 100644 --- a/tests/integration/test_backup_restore_s3/test.py +++ b/tests/integration/test_backup_restore_s3/test.py @@ -1,4 +1,4 @@ -from typing import Dict, Iterable +from typing import Dict import pytest from helpers.cluster import ClickHouseCluster from helpers.test_tools import TSV @@ -13,11 +13,13 @@ node = cluster.add_instance( "configs/named_collection_s3_backups.xml", "configs/s3_settings.xml", "configs/blob_log.xml", + "configs/remote_servers.xml", ], user_configs=[ "configs/zookeeper_retries.xml", ], with_minio=True, + with_zookeeper=True, ) @@ -544,9 +546,45 @@ def test_user_specific_auth(start_cluster): "SELECT * FROM s3('http://minio1:9001/root/data/backups/limited/backup1.zip', 'RawBLOB')", user="regularuser", ) + node.query( "SELECT * FROM s3('http://minio1:9001/root/data/backups/limited/backup1.zip', 'RawBLOB')", user="superuser1", ) + assert "Access Denied" in node.query_and_get_error( + "BACKUP TABLE specific_auth ON CLUSTER 'cluster' TO S3('http://minio1:9001/root/data/backups/limited/backup3/')", + user="regularuser", + ) + + node.query( + "BACKUP TABLE specific_auth ON CLUSTER 'cluster' TO S3('http://minio1:9001/root/data/backups/limited/backup3/')", + user="superuser1", + ) + + assert "Access Denied" in node.query_and_get_error( + "RESTORE TABLE specific_auth ON CLUSTER 'cluster' FROM S3('http://minio1:9001/root/data/backups/limited/backup3/')", + user="regularuser", + ) + + node.query( + "RESTORE TABLE specific_auth ON CLUSTER 'cluster' FROM S3('http://minio1:9001/root/data/backups/limited/backup3/')", + user="superuser1", + ) + + assert "Access Denied" in node.query_and_get_error( + "SELECT * FROM s3('http://minio1:9001/root/data/backups/limited/backup3/*', 'RawBLOB')", + user="regularuser", + ) + + node.query( + "SELECT * FROM s3('http://minio1:9001/root/data/backups/limited/backup3/*', 'RawBLOB')", + user="superuser1", + ) + + assert "Access Denied" in node.query_and_get_error( + "SELECT * FROM s3Cluster(cluster, 'http://minio1:9001/root/data/backups/limited/backup3/*', 'RawBLOB')", + user="regularuser", + ) + node.query("DROP TABLE IF EXISTS test.specific_auth") From a6356c120ab39db6af18e175856cc11b4d8fed00 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 3 Apr 2024 09:22:01 +0200 Subject: [PATCH 129/150] Add more documentation to the release script --- tests/ci/release.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tests/ci/release.py b/tests/ci/release.py index 679e65560f5..b7ccc59f7c1 100755 --- a/tests/ci/release.py +++ b/tests/ci/release.py @@ -7,6 +7,13 @@ The `gh` CLI preferred over the PyGithub to have an easy way to rollback bad release in command line by simple execution giving rollback commands On another hand, PyGithub is used for convenient getting commit's status from API + +To run this script on a freshly installed Ubuntu 22.04 system, it is enough to do the following commands: + +sudo apt install pip +pip install requests boto3 github PyGithub +sudo snap install gh +gh auth login """ From 1c10c3802c156e566c7a5d131a92d3413eaa02d9 Mon Sep 17 00:00:00 2001 From: pufit Date: Wed, 3 Apr 2024 11:09:35 +0200 Subject: [PATCH 130/150] Docs formatting typo --- docs/en/sql-reference/functions/string-functions.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/sql-reference/functions/string-functions.md b/docs/en/sql-reference/functions/string-functions.md index b4e2adbed3c..573790f7ff7 100644 --- a/docs/en/sql-reference/functions/string-functions.md +++ b/docs/en/sql-reference/functions/string-functions.md @@ -99,7 +99,7 @@ Alias: `OCTET_LENGTH` Returns the length of a string in Unicode code points (not: in bytes or characters). It assumes that the string contains valid UTF-8 encoded text. If this assumption is violated, no exception is thrown and the result is undefined. Alias: -- `CHAR_LENGTH`` +- `CHAR_LENGTH` - `CHARACTER_LENGTH` ## leftPad From 0d3d7b3146e1117057c60db098276d28ee3561e4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Wed, 3 Apr 2024 12:12:31 +0200 Subject: [PATCH 131/150] Revert "Resubmit 'Update invalidate_query_response on dictionary startup'" --- src/Interpreters/ExternalLoader.cpp | 8 -------- 1 file changed, 8 deletions(-) diff --git a/src/Interpreters/ExternalLoader.cpp b/src/Interpreters/ExternalLoader.cpp index 73dacfacf79..bd56a540128 100644 --- a/src/Interpreters/ExternalLoader.cpp +++ b/src/Interpreters/ExternalLoader.cpp @@ -996,14 +996,6 @@ private: if (!new_object && !new_exception) throw Exception(ErrorCodes::LOGICAL_ERROR, "No object created and no exception raised for {}", type_name); - if (!info->object && new_object) - { - /// If we loaded the object for the first time then we should set `invalidate_query_response` to the current value. - /// Otherwise we will immediately try to reload the object again despite the fact that it was just loaded. - bool is_modified = new_object->isModified(); - LOG_TRACE(log, "Object '{}' was{} modified", name, (is_modified ? "" : " not")); - } - /// Saving the result of the loading. { LoadingGuardForAsyncLoad lock(async, mutex); From 71450c909a30e2ae4120aad5dbff518f6d5985da Mon Sep 17 00:00:00 2001 From: vdimir Date: Wed, 3 Apr 2024 10:39:43 +0000 Subject: [PATCH 132/150] Fix lambda(tuple(x), x + 1) syntax in analyzer --- src/Analyzer/QueryTreeBuilder.cpp | 2 +- src/Interpreters/ActionsVisitor.cpp | 7 +++-- src/Parsers/ASTFunction.cpp | 11 ++++++++ src/Parsers/ASTFunction.h | 3 +++ src/Parsers/ExpressionElementParsers.cpp | 4 +-- .../02343_analyzer_lambdas.reference | 8 ++++++ .../0_stateless/02343_analyzer_lambdas.sql | 26 +++++++++++++++++++ 7 files changed, 54 insertions(+), 7 deletions(-) diff --git a/src/Analyzer/QueryTreeBuilder.cpp b/src/Analyzer/QueryTreeBuilder.cpp index df80f46b3cd..d2587d74b7c 100644 --- a/src/Analyzer/QueryTreeBuilder.cpp +++ b/src/Analyzer/QueryTreeBuilder.cpp @@ -558,7 +558,7 @@ QueryTreeNodePtr QueryTreeBuilder::buildExpression(const ASTPtr & expression, co } else if (const auto * function = expression->as()) { - if (function->is_lambda_function) + if (function->is_lambda_function || isASTLambdaFunction(*function)) { const auto & lambda_arguments_and_expression = function->arguments->as().children; auto & lambda_arguments_tuple = lambda_arguments_and_expression.at(0)->as(); diff --git a/src/Interpreters/ActionsVisitor.cpp b/src/Interpreters/ActionsVisitor.cpp index 16e2449206d..093c266c785 100644 --- a/src/Interpreters/ActionsVisitor.cpp +++ b/src/Interpreters/ActionsVisitor.cpp @@ -1130,12 +1130,11 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data & const auto * query_parameter = child->as(); if (function && function->name == "lambda") { + if (!isASTLambdaFunction(*function)) + throw Exception(ErrorCodes::SYNTAX_ERROR, "Lambda function definition expects two arguments, first argument must be a tuple of arguments"); + /// If the argument is a lambda expression, just remember its approximate type. - if (function->arguments->children.size() != 2) - throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "lambda requires two arguments"); - const auto * lambda_args_tuple = function->arguments->children.at(0)->as(); - if (!lambda_args_tuple || lambda_args_tuple->name != "tuple") throw Exception(ErrorCodes::TYPE_MISMATCH, "First argument of lambda must be a tuple"); diff --git a/src/Parsers/ASTFunction.cpp b/src/Parsers/ASTFunction.cpp index 07eea86ef81..cdc9a471e98 100644 --- a/src/Parsers/ASTFunction.cpp +++ b/src/Parsers/ASTFunction.cpp @@ -793,4 +793,15 @@ bool tryGetFunctionNameInto(const IAST * ast, String & name) return false; } +bool isASTLambdaFunction(const ASTFunction & function) +{ + if (function.name == "lambda" && function.arguments && function.arguments->children.size() == 2) + { + const auto * lambda_args_tuple = function.arguments->children.at(0)->as(); + return lambda_args_tuple && lambda_args_tuple->name == "tuple"; + } + + return false; +} + } diff --git a/src/Parsers/ASTFunction.h b/src/Parsers/ASTFunction.h index 631b6285bfa..3a94691f25d 100644 --- a/src/Parsers/ASTFunction.h +++ b/src/Parsers/ASTFunction.h @@ -111,4 +111,7 @@ inline String getFunctionName(const ASTPtr & ast) { return getFunctionName(ast.g inline std::optional tryGetFunctionName(const ASTPtr & ast) { return tryGetFunctionName(ast.get()); } inline bool tryGetFunctionNameInto(const ASTPtr & ast, String & name) { return tryGetFunctionNameInto(ast.get(), name); } +/// Checks if function is a lambda function definition `lambda((x, y), x + y)` +bool isASTLambdaFunction(const ASTFunction & function); + } diff --git a/src/Parsers/ExpressionElementParsers.cpp b/src/Parsers/ExpressionElementParsers.cpp index 67f4a306292..29b49736783 100644 --- a/src/Parsers/ExpressionElementParsers.cpp +++ b/src/Parsers/ExpressionElementParsers.cpp @@ -1545,8 +1545,8 @@ bool ParserColumnsTransformers::parseImpl(Pos & pos, ASTPtr & node, Expected & e { if (auto * func = lambda->as(); func && func->name == "lambda") { - if (func->arguments->children.size() != 2) - throw Exception(ErrorCodes::SYNTAX_ERROR, "lambda requires two arguments"); + if (!isASTLambdaFunction(*func)) + throw Exception(ErrorCodes::SYNTAX_ERROR, "Lambda function definition expects two arguments, first argument must be a tuple of arguments"); const auto * lambda_args_tuple = func->arguments->children.at(0)->as(); if (!lambda_args_tuple || lambda_args_tuple->name != "tuple") diff --git a/tests/queries/0_stateless/02343_analyzer_lambdas.reference b/tests/queries/0_stateless/02343_analyzer_lambdas.reference index 8d29481c255..62d9e9f4726 100644 --- a/tests/queries/0_stateless/02343_analyzer_lambdas.reference +++ b/tests/queries/0_stateless/02343_analyzer_lambdas.reference @@ -27,3 +27,11 @@ Lambda untuple Lambda carrying 2 1 1 0 +Lambda legacy syntax +[2,3,4] +[2,3,4] +[2,3,4] +['hello','world'] +[2,3,4] +[2,3,4] 2 +[2,3,4] 2 1 diff --git a/tests/queries/0_stateless/02343_analyzer_lambdas.sql b/tests/queries/0_stateless/02343_analyzer_lambdas.sql index b90f7b32b57..0c257cf6f18 100644 --- a/tests/queries/0_stateless/02343_analyzer_lambdas.sql +++ b/tests/queries/0_stateless/02343_analyzer_lambdas.sql @@ -65,5 +65,31 @@ SELECT 'Lambda carrying'; WITH (functor, x) -> functor(x) AS lambda, x -> x + 1 AS functor_1, x -> toString(x) AS functor_2 SELECT lambda(functor_1, 1), lambda(functor_2, 1); WITH (functor, x) -> functor(x) AS lambda, x -> x + 1 AS functor_1, x -> toString(x) AS functor_2 SELECT lambda(functor_1, id), lambda(functor_2, id) FROM test_table; + +SELECT 'Lambda legacy syntax'; + +SELECT arrayMap(lambda(tuple(x), x + 1), [1, 2, 3]); + +WITH 222 AS lambda +SELECT arrayMap(lambda(tuple(x), x + 1), [1, 2, 3]); + +SELECT arrayMap(lambda((x,), x + 1), [1, 2, 3]); + +SELECT arraySort(lambda((x, y), y), ['world', 'hello'], [2, 1]); + +WITH 222 AS lambda +SELECT arrayMap(lambda((x, ), x + 1), [1, 2, 3]); + +WITH x -> x + 1 AS lambda +SELECT arrayMap(lambda(tuple(x), x + 1), [1, 2, 3]), lambda(1); + +-- lambda(tuple(x), x + 1) parsed as lambda definion but not as call of lambda defined in WITH +WITH (x, y) -> y AS lambda +SELECT arrayMap(lambda(tuple(x), x + 1), [1, 2, 3]), lambda(tuple(x), x + 1), 1 AS x; -- { serverError BAD_ARGUMENTS } + +WITH (x, y) -> y AS lambda2 +SELECT arrayMap(lambda(tuple(x), x + 1), [1, 2, 3]), lambda2(tuple(x), x + 1), 1 AS x; + + DROP TABLE test_table_tuple; DROP TABLE test_table; From c9430180258631d44fb7677c1ea725db586d63e4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Wed, 3 Apr 2024 13:00:25 +0200 Subject: [PATCH 133/150] Include table name in paranoid checks --- src/Storages/StorageReplicatedMergeTree.cpp | 32 +++++++++++++++------ 1 file changed, 23 insertions(+), 9 deletions(-) diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 8ca061db4ec..70d77432847 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -1524,8 +1524,13 @@ void StorageReplicatedMergeTree::paranoidCheckForCoveredPartsInZooKeeperOnStart( if (!found) { - LOG_WARNING(log, "Part {} exists in ZooKeeper and covered by another part in ZooKeeper ({}), but doesn't exist on any disk. " - "It may cause false-positive 'part is lost forever' messages", part_name, covering_part); + LOG_WARNING( + log, + "Part {} of table {} exists in ZooKeeper and covered by another part in ZooKeeper ({}), but doesn't exist on any disk. " + "It may cause false-positive 'part is lost forever' messages", + part_name, + getStorageID().getNameForLogs(), + covering_part); ProfileEvents::increment(ProfileEvents::ReplicatedCoveredPartsInZooKeeperOnStart); chassert(false); } @@ -2351,8 +2356,12 @@ MergeTreeData::MutableDataPartPtr StorageReplicatedMergeTree::executeFetchShared } } -static void paranoidCheckForCoveredPartsInZooKeeper(const ZooKeeperPtr & zookeeper, const String & replica_path, - MergeTreeDataFormatVersion format_version, const String & covering_part_name) +static void paranoidCheckForCoveredPartsInZooKeeper( + const ZooKeeperPtr & zookeeper, + const String & replica_path, + MergeTreeDataFormatVersion format_version, + const String & covering_part_name, + const StorageReplicatedMergeTree & storage) { #ifdef ABORT_ON_LOGICAL_ERROR constexpr bool paranoid_check_for_covered_parts_default = true; @@ -2371,8 +2380,12 @@ static void paranoidCheckForCoveredPartsInZooKeeper(const ZooKeeperPtr & zookeep { auto part_info = MergeTreePartInfo::fromPartName(part_name, format_version); if (drop_range_info.contains(part_info)) - throw Exception(ErrorCodes::LOGICAL_ERROR, - "Part {} remains in ZooKeeper after DROP_RANGE {}", part_name, covering_part_name); + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "Part {} from table {} remains in ZooKeeper after DROP_RANGE {}", + part_name, + storage.getStorageID().getNameForLogs(), + covering_part_name); } } @@ -2434,7 +2447,7 @@ void StorageReplicatedMergeTree::executeDropRange(const LogEntry & entry) /// Forcibly remove parts from ZooKeeper removePartsFromZooKeeperWithRetries(parts_to_remove); - paranoidCheckForCoveredPartsInZooKeeper(getZooKeeper(), replica_path, format_version, entry.new_part_name); + paranoidCheckForCoveredPartsInZooKeeper(getZooKeeper(), replica_path, format_version, entry.new_part_name, *this); if (entry.detach) LOG_DEBUG(log, "Detached {} parts inside {}.", parts_to_remove.size(), entry.new_part_name); @@ -2572,7 +2585,8 @@ bool StorageReplicatedMergeTree::executeReplaceRange(LogEntry & entry) LOG_INFO(log, "All parts from REPLACE PARTITION command have been already attached"); removePartsFromZooKeeperWithRetries(parts_to_remove); if (replace) - paranoidCheckForCoveredPartsInZooKeeper(getZooKeeper(), replica_path, format_version, entry_replace.drop_range_part_name); + paranoidCheckForCoveredPartsInZooKeeper( + getZooKeeper(), replica_path, format_version, entry_replace.drop_range_part_name, *this); return true; } @@ -2893,7 +2907,7 @@ bool StorageReplicatedMergeTree::executeReplaceRange(LogEntry & entry) removePartsFromZooKeeperWithRetries(parts_to_remove); if (replace) - paranoidCheckForCoveredPartsInZooKeeper(getZooKeeper(), replica_path, format_version, entry_replace.drop_range_part_name); + paranoidCheckForCoveredPartsInZooKeeper(getZooKeeper(), replica_path, format_version, entry_replace.drop_range_part_name, *this); res_parts.clear(); parts_to_remove.clear(); cleanup_thread.wakeup(); From 90ac11171c80f18914e0f246fb886c6c5d9c32ca Mon Sep 17 00:00:00 2001 From: Dmitry Novik Date: Wed, 3 Apr 2024 12:18:54 +0000 Subject: [PATCH 134/150] Fix crash --- src/Interpreters/RewriteOrderByVisitor.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/Interpreters/RewriteOrderByVisitor.cpp b/src/Interpreters/RewriteOrderByVisitor.cpp index 694dec84b7a..26817b70dc0 100644 --- a/src/Interpreters/RewriteOrderByVisitor.cpp +++ b/src/Interpreters/RewriteOrderByVisitor.cpp @@ -39,9 +39,8 @@ void RewriteOrderBy::visit(ASTPtr & ast, Data &) { // clone w/o children auto clone = std::make_shared(*order_by_elem); - clone->children.clear(); - clone->children.emplace_back(identifier); + clone->children[0] = identifier; new_order_by->children.emplace_back(clone); } if (!new_order_by->children.empty()) From d19d09e31e55e60bbf97a8636946e80d4e3f6688 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Wed, 3 Apr 2024 16:17:56 +0200 Subject: [PATCH 135/150] Correctly handle const columns in DistinctTransfom --- src/Processors/Transforms/DistinctTransform.cpp | 1 + .../0_stateless/03033_distinct_transform_const_columns.reference | 1 + .../0_stateless/03033_distinct_transform_const_columns.sql | 1 + 3 files changed, 3 insertions(+) create mode 100644 tests/queries/0_stateless/03033_distinct_transform_const_columns.reference create mode 100644 tests/queries/0_stateless/03033_distinct_transform_const_columns.sql diff --git a/src/Processors/Transforms/DistinctTransform.cpp b/src/Processors/Transforms/DistinctTransform.cpp index 3619fa51bf6..d528303a642 100644 --- a/src/Processors/Transforms/DistinctTransform.cpp +++ b/src/Processors/Transforms/DistinctTransform.cpp @@ -55,6 +55,7 @@ void DistinctTransform::transform(Chunk & chunk) /// Convert to full column, because SetVariant for sparse column is not implemented. convertToFullIfSparse(chunk); + convertToFullIfConst(chunk); const auto num_rows = chunk.getNumRows(); auto columns = chunk.detachColumns(); diff --git a/tests/queries/0_stateless/03033_distinct_transform_const_columns.reference b/tests/queries/0_stateless/03033_distinct_transform_const_columns.reference new file mode 100644 index 00000000000..d05b1f927f4 --- /dev/null +++ b/tests/queries/0_stateless/03033_distinct_transform_const_columns.reference @@ -0,0 +1 @@ +0 0 diff --git a/tests/queries/0_stateless/03033_distinct_transform_const_columns.sql b/tests/queries/0_stateless/03033_distinct_transform_const_columns.sql new file mode 100644 index 00000000000..41df19ab64e --- /dev/null +++ b/tests/queries/0_stateless/03033_distinct_transform_const_columns.sql @@ -0,0 +1 @@ +SELECT DISTINCT COALESCE(COALESCE('') = toNullable('b3'), toUInt128(toNullable(2)), 2, 2, toLowCardinality(2), 2, 2, 2, toUInt128(toNullable(2)), materialize(2), toUInt128(2), 2, 2), COALESCE(COALESCE(COALESCE(materialize(''))) = 'b3', 2, 2, 2, toLowCardinality(2), toUInt128(2), 2, 2, 2, materialize(toUInt256(2)), 2, 2, 2) FROM numbers(100000); From 6661484e555081a01a587ffe1b0174baad11a7af Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Wed, 3 Apr 2024 16:31:01 +0200 Subject: [PATCH 136/150] Proper fix for LowCardinality together with JSONExtact functions (#61957) --- src/Functions/FunctionsJSON.h | 71 ++++++++++++++++--- .../00918_json_functions.reference | 6 ++ .../0_stateless/00918_json_functions.sql | 6 ++ ...74_extract_fixedstring_from_json.reference | 7 ++ .../02474_extract_fixedstring_from_json.sql | 7 ++ 5 files changed, 89 insertions(+), 8 deletions(-) diff --git a/src/Functions/FunctionsJSON.h b/src/Functions/FunctionsJSON.h index 2539fa1aeb4..9e824fabc42 100644 --- a/src/Functions/FunctionsJSON.h +++ b/src/Functions/FunctionsJSON.h @@ -257,7 +257,7 @@ private: } case MoveType::Key: { - key = (*arguments[j + 1].column).getDataAt(row).toView(); + key = arguments[j + 1].column->getDataAt(row).toView(); if (!moveToElementByKey(res_element, key)) return false; break; @@ -334,6 +334,26 @@ private: }; +template +class JSONExtractImpl; + +template +class JSONExtractKeysAndValuesImpl; + +/** +* Functions JSONExtract and JSONExtractKeysAndValues force the return type - it is specified in the last argument. +* For example - `SELECT JSONExtract(materialize('{"a": 131231, "b": 1234}'), 'b', 'LowCardinality(FixedString(4))')` +* But by default ClickHouse decides on its own whether the return type will be LowCardinality based on the types of +* input arguments. +* And for these specific functions we cannot rely on this mechanism, so these functions have their own implementation - +* just convert all of the LowCardinality input columns to full ones, execute and wrap the resulting column in LowCardinality +* if needed. +*/ +template typename Impl> +constexpr bool functionForcesTheReturnType() +{ + return std::is_same_v, JSONExtractImpl> || std::is_same_v, JSONExtractKeysAndValuesImpl>; +} template typename Impl> class ExecutableFunctionJSON : public IExecutableFunction @@ -348,17 +368,50 @@ public: String getName() const override { return Name::name; } bool useDefaultImplementationForNulls() const override { return false; } bool useDefaultImplementationForConstants() const override { return true; } + bool useDefaultImplementationForLowCardinalityColumns() const override + { + return !functionForcesTheReturnType(); + } ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override { if (null_presence.has_null_constant) return result_type->createColumnConstWithDefaultValue(input_rows_count); - ColumnsWithTypeAndName temporary_columns = null_presence.has_nullable ? createBlockWithNestedColumns(arguments) : arguments; - ColumnPtr temporary_result = chooseAndRunJSONParser(temporary_columns, json_return_type, input_rows_count); - if (null_presence.has_nullable) - return wrapInNullable(temporary_result, arguments, result_type, input_rows_count); - return temporary_result; + if constexpr (functionForcesTheReturnType()) + { + ColumnsWithTypeAndName columns_without_low_cardinality = arguments; + + for (auto & column : columns_without_low_cardinality) + { + column.column = recursiveRemoveLowCardinality(column.column); + column.type = recursiveRemoveLowCardinality(column.type); + } + + ColumnsWithTypeAndName temporary_columns = null_presence.has_nullable ? createBlockWithNestedColumns(columns_without_low_cardinality) : columns_without_low_cardinality; + ColumnPtr temporary_result = chooseAndRunJSONParser(temporary_columns, json_return_type, input_rows_count); + + if (null_presence.has_nullable) + temporary_result = wrapInNullable(temporary_result, columns_without_low_cardinality, result_type, input_rows_count); + + if (result_type->lowCardinality()) + temporary_result = recursiveLowCardinalityTypeConversion(temporary_result, json_return_type, result_type); + + return temporary_result; + } + else + { + ColumnsWithTypeAndName temporary_columns = null_presence.has_nullable ? createBlockWithNestedColumns(arguments) : arguments; + ColumnPtr temporary_result = chooseAndRunJSONParser(temporary_columns, json_return_type, input_rows_count); + + if (null_presence.has_nullable) + temporary_result = wrapInNullable(temporary_result, arguments, result_type, input_rows_count); + + if (result_type->lowCardinality()) + temporary_result = recursiveLowCardinalityTypeConversion(temporary_result, json_return_type, result_type); + + return temporary_result; + } } private: @@ -429,7 +482,6 @@ private: DataTypePtr json_return_type; }; - /// We use IFunctionOverloadResolver instead of IFunction to handle non-default NULL processing. /// Both NULL and JSON NULL should generate NULL value. If any argument is NULL, return NULL. template typename Impl> @@ -450,6 +502,10 @@ public: bool isVariadic() const override { return true; } size_t getNumberOfArguments() const override { return 0; } bool useDefaultImplementationForNulls() const override { return false; } + bool useDefaultImplementationForLowCardinalityColumns() const override + { + return !functionForcesTheReturnType(); + } FunctionBasePtr build(const ColumnsWithTypeAndName & arguments) const override { @@ -481,7 +537,6 @@ public: } }; - struct NameJSONHas { static constexpr auto name{"JSONHas"}; }; struct NameIsValidJSON { static constexpr auto name{"isValidJSON"}; }; struct NameJSONLength { static constexpr auto name{"JSONLength"}; }; diff --git a/tests/queries/0_stateless/00918_json_functions.reference b/tests/queries/0_stateless/00918_json_functions.reference index 43b15ded93d..078348cd20f 100644 --- a/tests/queries/0_stateless/00918_json_functions.reference +++ b/tests/queries/0_stateless/00918_json_functions.reference @@ -286,3 +286,9 @@ v --show error: type should be const string --show error: index type should be integer --show error: key of map type should be String +\N +\N +Hello +Hello +Hello +Hello diff --git a/tests/queries/0_stateless/00918_json_functions.sql b/tests/queries/0_stateless/00918_json_functions.sql index e19dd17670e..3d30ce841ba 100644 --- a/tests/queries/0_stateless/00918_json_functions.sql +++ b/tests/queries/0_stateless/00918_json_functions.sql @@ -326,3 +326,9 @@ SELECT JSONExtract('[]', JSONExtract('0', 'UInt256'), 'UInt256'); -- { serverErr SELECT '--show error: key of map type should be String'; SELECT JSONExtract('{"a": [100.0, 200], "b": [-100, 200.0, 300]}', 'Map(Int64, Array(Float64))'); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT JSONExtract(materialize(toLowCardinality('{"string_value":null}')), materialize('string_value'), 'LowCardinality(Nullable(String))'); +SELECT JSONExtract(materialize('{"string_value":null}'), materialize('string_value'), 'LowCardinality(Nullable(String))'); +SELECT JSONExtract(materialize('{"string_value":"Hello"}'), materialize('string_value'), 'LowCardinality(Nullable(String))') AS x; +SELECT JSONExtract(materialize(toLowCardinality('{"string_value":"Hello"}')), materialize('string_value'), 'LowCardinality(Nullable(String))') AS x; +SELECT JSONExtract(materialize('{"string_value":"Hello"}'), materialize(toLowCardinality('string_value')), 'LowCardinality(Nullable(String))') AS x; +SELECT JSONExtract(materialize(toLowCardinality('{"string_value":"Hello"}')), materialize(toLowCardinality('string_value')), 'LowCardinality(Nullable(String))') AS x; diff --git a/tests/queries/0_stateless/02474_extract_fixedstring_from_json.reference b/tests/queries/0_stateless/02474_extract_fixedstring_from_json.reference index 783d12fcf1a..21ddf5d3512 100644 --- a/tests/queries/0_stateless/02474_extract_fixedstring_from_json.reference +++ b/tests/queries/0_stateless/02474_extract_fixedstring_from_json.reference @@ -8,3 +8,10 @@ \0\0\0\0\0 131231 131231 +1234 +1234 +{"b":131231} +\0\0\0\0 +1234567890 +18446744073709551615 +-9223372036854775807 diff --git a/tests/queries/0_stateless/02474_extract_fixedstring_from_json.sql b/tests/queries/0_stateless/02474_extract_fixedstring_from_json.sql index cfc47e00cba..bbb9f55062b 100644 --- a/tests/queries/0_stateless/02474_extract_fixedstring_from_json.sql +++ b/tests/queries/0_stateless/02474_extract_fixedstring_from_json.sql @@ -6,3 +6,10 @@ SELECT JSONExtract('{"a": 123456}', 'a', 'FixedString(5)'); SELECT JSONExtract('{"a": 123456}', 'a', 'FixedString(6)'); SELECT JSONExtract(materialize('{"a": 131231}'), 'a', 'LowCardinality(FixedString(5))') FROM numbers(2); SELECT JSONExtract(materialize('{"a": 131231}'), 'a', 'LowCardinality(FixedString(6))') FROM numbers(2); +SELECT JSONExtract(materialize('{"a": 131231, "b": 1234}'), 'b', 'LowCardinality(FixedString(4))'); +SELECT JSONExtract(materialize('{"a": 131231, "b": "1234"}'), 'b', 'LowCardinality(FixedString(4))'); +SELECT JSONExtract(materialize('{"a": {"b": 131231} }'), 'a', 'LowCardinality(FixedString(12))'); +SELECT JSONExtract(materialize('{"a": 131231, "b": 1234567890}'), 'b', 'LowCardinality(FixedString(4))'); +SELECT JSONExtract(materialize('{"a": 131231, "b": 1234567890}'), 'b', 'LowCardinality(FixedString(10))'); +SELECT JSONExtract(materialize('{"a": 18446744073709551615}'), 'a', 'LowCardinality(FixedString(20))'); +SELECT JSONExtract(materialize('{"a": -9223372036854775807}'), 'a', 'LowCardinality(FixedString(20))'); From bbcecd26606df86e4c2359c2bab811892dd8d4f1 Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Wed, 3 Apr 2024 17:07:44 +0200 Subject: [PATCH 137/150] Remove reverted PR from 24.3 changelog --- CHANGELOG.md | 1 - 1 file changed, 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 84e51c1efdf..dd88f3ee2c7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -123,7 +123,6 @@ * Something was wrong with Apache Hive, which is experimental and not supported. [#60262](https://github.com/ClickHouse/ClickHouse/pull/60262) ([shanfengp](https://github.com/Aed-p)). * An improvement for experimental parallel replicas: force reanalysis if parallel replicas changed [#60362](https://github.com/ClickHouse/ClickHouse/pull/60362) ([Raúl Marín](https://github.com/Algunenano)). * Fix usage of plain metadata type with new disks configuration option [#60396](https://github.com/ClickHouse/ClickHouse/pull/60396) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Don't allow to set max_parallel_replicas to 0 as it doesn't make sense [#60430](https://github.com/ClickHouse/ClickHouse/pull/60430) ([Kruglov Pavel](https://github.com/Avogar)). * Try to fix logical error 'Cannot capture column because it has incompatible type' in mapContainsKeyLike [#60451](https://github.com/ClickHouse/ClickHouse/pull/60451) ([Kruglov Pavel](https://github.com/Avogar)). * Avoid calculation of scalar subqueries for CREATE TABLE. [#60464](https://github.com/ClickHouse/ClickHouse/pull/60464) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). * Fix deadlock in parallel parsing when lots of rows are skipped due to errors [#60516](https://github.com/ClickHouse/ClickHouse/pull/60516) ([Kruglov Pavel](https://github.com/Avogar)). From ca27cf3fde37442f76f3d244f8cc57b5c541ebc7 Mon Sep 17 00:00:00 2001 From: yariks5s Date: Wed, 3 Apr 2024 16:38:41 +0000 Subject: [PATCH 138/150] fix mapper for gcs --- programs/server/config.xml | 2 +- src/IO/S3/URI.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/programs/server/config.xml b/programs/server/config.xml index ea3ead47c32..e92381eeb1e 100644 --- a/programs/server/config.xml +++ b/programs/server/config.xml @@ -96,7 +96,7 @@ https://{bucket}.s3.amazonaws.com - https://{bucket}.storage.googleapis.com + https://storage.googleapis.com/{bucket} https://{bucket}.oss.aliyuncs.com diff --git a/src/IO/S3/URI.cpp b/src/IO/S3/URI.cpp index 027cb624ed5..0d8502ecf1f 100644 --- a/src/IO/S3/URI.cpp +++ b/src/IO/S3/URI.cpp @@ -67,7 +67,7 @@ URI::URI(const std::string & uri_) else { mapper["s3"] = "https://{bucket}.s3.amazonaws.com"; - mapper["gs"] = "https://{bucket}.storage.googleapis.com"; + mapper["gs"] = "https://storage.googleapis.com/{bucket}"; mapper["oss"] = "https://{bucket}.oss.aliyuncs.com"; } From 463691922c43a9ee30ad7d0a3a7895fcbd76760a Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Wed, 3 Apr 2024 17:05:49 +0000 Subject: [PATCH 139/150] Fix: disable test for SMT --- .../0_stateless/02980_dist_insert_readonly_replica.sql.j2 | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/02980_dist_insert_readonly_replica.sql.j2 b/tests/queries/0_stateless/02980_dist_insert_readonly_replica.sql.j2 index 5bf40f34f5c..aba742fa64a 100644 --- a/tests/queries/0_stateless/02980_dist_insert_readonly_replica.sql.j2 +++ b/tests/queries/0_stateless/02980_dist_insert_readonly_replica.sql.j2 @@ -1,6 +1,7 @@ --- Tags: no-parallel, no-fasttest +-- Tags: no-parallel, no-fasttest, no-shared-merge-tree -- Tag no-parallel - due to static databases -- Tag no-fasttest - S3 is required +-- Tag no-shared-merge-tree - no reliable way to make SMT read-only in stateless test drop database if exists shard_0; drop database if exists shard_1; From 2db1e3451773c160382325334a6c2b992c8b3314 Mon Sep 17 00:00:00 2001 From: kssenii Date: Wed, 3 Apr 2024 19:06:33 +0200 Subject: [PATCH 140/150] Better logger name --- src/Interpreters/Cache/FileCache.cpp | 10 ++++++++-- src/Interpreters/Cache/SLRUFileCachePriority.cpp | 8 +++++--- src/Interpreters/Cache/SLRUFileCachePriority.h | 5 +++-- 3 files changed, 16 insertions(+), 7 deletions(-) diff --git a/src/Interpreters/Cache/FileCache.cpp b/src/Interpreters/Cache/FileCache.cpp index 71dc0cca3a7..3dc13ba7737 100644 --- a/src/Interpreters/Cache/FileCache.cpp +++ b/src/Interpreters/Cache/FileCache.cpp @@ -90,9 +90,15 @@ FileCache::FileCache(const std::string & cache_name, const FileCacheSettings & s , metadata(settings.base_path, settings.background_download_queue_size_limit, settings.background_download_threads, write_cache_per_user_directory) { if (settings.cache_policy == "LRU") - main_priority = std::make_unique(settings.max_size, settings.max_elements); + { + main_priority = std::make_unique( + settings.max_size, settings.max_elements, nullptr, cache_name); + } else if (settings.cache_policy == "SLRU") - main_priority = std::make_unique(settings.max_size, settings.max_elements, settings.slru_size_ratio); + { + main_priority = std::make_unique( + settings.max_size, settings.max_elements, settings.slru_size_ratio, nullptr, nullptr, cache_name); + } else throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown cache policy: {}", settings.cache_policy); diff --git a/src/Interpreters/Cache/SLRUFileCachePriority.cpp b/src/Interpreters/Cache/SLRUFileCachePriority.cpp index 1400d3219c6..59e51ae31f1 100644 --- a/src/Interpreters/Cache/SLRUFileCachePriority.cpp +++ b/src/Interpreters/Cache/SLRUFileCachePriority.cpp @@ -28,17 +28,19 @@ SLRUFileCachePriority::SLRUFileCachePriority( size_t max_elements_, double size_ratio_, LRUFileCachePriority::StatePtr probationary_state_, - LRUFileCachePriority::StatePtr protected_state_) + LRUFileCachePriority::StatePtr protected_state_, + const std::string & description_) : IFileCachePriority(max_size_, max_elements_) , size_ratio(size_ratio_) , protected_queue(LRUFileCachePriority(getRatio(max_size_, size_ratio), getRatio(max_elements_, size_ratio), protected_state_, - "protected")) + description_ + ", protected")) , probationary_queue(LRUFileCachePriority(getRatio(max_size_, 1 - size_ratio), getRatio(max_elements_, 1 - size_ratio), probationary_state_, - "probationary")) + description_ + ", probationary")) + , log(getLogger("SLRUFileCachePriority(" + description_ + ")")) { LOG_DEBUG( log, "Probationary queue {} in size and {} in elements. " diff --git a/src/Interpreters/Cache/SLRUFileCachePriority.h b/src/Interpreters/Cache/SLRUFileCachePriority.h index 4cf5bb0f199..734828f55dd 100644 --- a/src/Interpreters/Cache/SLRUFileCachePriority.h +++ b/src/Interpreters/Cache/SLRUFileCachePriority.h @@ -19,7 +19,8 @@ public: size_t max_elements_, double size_ratio_, LRUFileCachePriority::StatePtr probationary_state_ = nullptr, - LRUFileCachePriority::StatePtr protected_state_ = nullptr); + LRUFileCachePriority::StatePtr protected_state_ = nullptr, + const std::string & description_ = "none"); size_t getSize(const CachePriorityGuard::Lock & lock) const override; @@ -67,7 +68,7 @@ private: double size_ratio; LRUFileCachePriority protected_queue; LRUFileCachePriority probationary_queue; - LoggerPtr log = getLogger("SLRUFileCachePriority"); + LoggerPtr log; void increasePriority(SLRUIterator & iterator, const CachePriorityGuard::Lock & lock); From c70ecfe5f3bc37ad49dffb79158a86dc887de798 Mon Sep 17 00:00:00 2001 From: kssenii Date: Wed, 3 Apr 2024 19:18:56 +0200 Subject: [PATCH 141/150] Fix --- .../Cache/LRUFileCachePriority.cpp | 36 ++++++++++--------- 1 file changed, 20 insertions(+), 16 deletions(-) diff --git a/src/Interpreters/Cache/LRUFileCachePriority.cpp b/src/Interpreters/Cache/LRUFileCachePriority.cpp index ddc30755409..012ba14e5a8 100644 --- a/src/Interpreters/Cache/LRUFileCachePriority.cpp +++ b/src/Interpreters/Cache/LRUFileCachePriority.cpp @@ -307,25 +307,29 @@ bool LRUFileCachePriority::collectCandidatesForEviction( if (can_fit()) { - /// As eviction is done without a cache priority lock, - /// then if some space was partially available and some needed - /// to be freed via eviction, we need to make sure that this - /// partially available space is still available - /// after we finish with eviction for non-available space. - /// So we create a space holder for the currently available part - /// of the required space for the duration of eviction of the other - /// currently non-available part of the space. + /// `res` contains eviction candidates. Do we have any? + if (res.size() > 0) + { + /// As eviction is done without a cache priority lock, + /// then if some space was partially available and some needed + /// to be freed via eviction, we need to make sure that this + /// partially available space is still available + /// after we finish with eviction for non-available space. + /// So we create a space holder for the currently available part + /// of the required space for the duration of eviction of the other + /// currently non-available part of the space. - const size_t hold_size = size > stat.total_stat.releasable_size - ? size - stat.total_stat.releasable_size - : 0; + const size_t hold_size = size > stat.total_stat.releasable_size + ? size - stat.total_stat.releasable_size + : 0; - const size_t hold_elements = elements > stat.total_stat.releasable_count - ? elements - stat.total_stat.releasable_count - : 0; + const size_t hold_elements = elements > stat.total_stat.releasable_count + ? elements - stat.total_stat.releasable_count + : 0; - if (hold_size || hold_elements) - res.setSpaceHolder(hold_size, hold_elements, *this, lock); + if (hold_size || hold_elements) + res.setSpaceHolder(hold_size, hold_elements, *this, lock); + } // LOG_TEST(log, "Collected {} candidates for eviction (total size: {}). " // "Took hold of size {} and elements {}", From de2a0be02580eb7501ad1fec5de35a7107ef9a1e Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Wed, 3 Apr 2024 18:50:33 +0000 Subject: [PATCH 142/150] Don't access static members through instance - clang-tidy rightfully complains (-readability-static-accessed-through-instance) - not going to enable the warning for now to avoid breaking the build --- programs/format/Format.cpp | 2 +- .../ExternalDictionaryLibraryUtils.h | 2 +- .../AggregateFunctionAnyHeavy.cpp | 12 ++++++------ .../AggregateFunctionFlameGraph.cpp | 6 +++--- .../AggregateFunctionGroupArray.cpp | 4 ++-- ...gateFunctionLargestTriangleThreeBuckets.cpp | 12 ++++++------ .../AggregateFunctionMannWhitney.cpp | 16 ++++++++-------- .../AggregateFunctionRankCorrelation.cpp | 14 +++++++------- .../AggregateFunctionRetention.cpp | 12 ++++++------ ...AggregateFunctionSimpleLinearRegression.cpp | 12 ++++++------ .../AggregateFunctionSingleValueOrNull.cpp | 16 ++++++++-------- .../AggregateFunctionStatistics.cpp | 18 +++++++++--------- .../Combinators/AggregateFunctionNull.h | 2 +- src/AggregateFunctions/ReservoirSampler.h | 4 ++-- src/Analyzer/Passes/QueryAnalysisPass.cpp | 3 +-- src/Columns/ColumnSparse.cpp | 2 +- src/Common/AsynchronousMetrics.cpp | 2 +- src/Common/HTTPConnectionPool.cpp | 2 +- src/Common/HashTable/HashMap.h | 6 +++--- src/Common/StackTrace.cpp | 2 +- src/Common/ThreadStatus.cpp | 2 +- src/Compression/CachedCompressedReadBuffer.cpp | 2 +- src/Databases/DatabaseOnDisk.cpp | 2 +- src/Databases/DatabaseOrdinary.cpp | 2 +- src/Databases/TablesDependencyGraph.cpp | 2 +- src/Dictionaries/FlatDictionary.cpp | 3 +-- src/Dictionaries/PolygonDictionaryUtils.h | 2 +- src/Disks/DiskLocal.cpp | 2 +- src/Functions/FunctionsExternalDictionaries.h | 6 +++--- .../UserDefinedSQLObjectsStorageBase.cpp | 2 +- src/IO/Archives/LibArchiveWriter.h | 2 +- src/IO/MMapReadBufferFromFileWithCache.cpp | 2 +- src/IO/ReadHelpers.h | 10 +++++----- src/IO/WriteHelpers.h | 4 ++-- src/Interpreters/AsynchronousInsertQueue.cpp | 2 +- .../ExecuteScalarSubqueriesVisitor.cpp | 2 +- .../InterpreterCreateFunctionQuery.cpp | 2 +- src/Interpreters/InterpreterDeleteQuery.cpp | 2 +- .../InterpreterDropFunctionQuery.cpp | 2 +- src/Interpreters/InterpreterSelectQuery.cpp | 10 +++++----- src/Interpreters/TreeRewriter.cpp | 2 +- .../evaluateConstantExpression.cpp | 2 +- src/Planner/PlannerActionsVisitor.cpp | 2 +- src/Planner/PlannerJoinTree.cpp | 2 +- src/Processors/Executors/ExecutorTasks.cpp | 2 +- src/Processors/QueryPlan/ReadFromMergeTree.cpp | 4 ++-- src/Server/HTTP/ReadHeaders.cpp | 2 +- src/Storages/AlterCommands.cpp | 2 +- src/Storages/FileLog/StorageFileLog.h | 2 +- src/Storages/KeyDescription.cpp | 2 +- src/Storages/MergeTree/IMergeTreeDataPart.cpp | 16 ++++++++-------- src/Storages/MergeTree/KeyCondition.cpp | 4 ++-- src/Storages/MergeTree/MergeTreeData.cpp | 4 ++-- .../MergeTree/MergeTreeDataSelectExecutor.cpp | 4 ++-- src/Storages/MergeTree/MergeTreeDataWriter.cpp | 4 ++-- src/Storages/MergeTree/MergeTreeSink.cpp | 2 +- .../MergeTree/MutateFromLogEntryTask.cpp | 2 +- src/Storages/MergeTree/MutateTask.cpp | 12 ++++++------ .../MergeTree/ReplicatedMergeTreeSink.cpp | 2 +- .../ReplicatedMergeTreeTableMetadata.cpp | 2 +- .../MergeTree/registerStorageMergeTree.cpp | 4 ++-- src/Storages/StorageFile.cpp | 2 +- src/Storages/StorageFile.h | 2 +- src/Storages/StorageMergeTree.cpp | 2 +- src/Storages/StorageReplicatedMergeTree.cpp | 2 +- src/Storages/StorageS3.cpp | 2 +- 66 files changed, 149 insertions(+), 151 deletions(-) diff --git a/programs/format/Format.cpp b/programs/format/Format.cpp index fc73eda6815..d4b975ce1e8 100644 --- a/programs/format/Format.cpp +++ b/programs/format/Format.cpp @@ -237,7 +237,7 @@ int mainEntryClickHouseFormat(int argc, char ** argv) ASTPtr res = parseQueryAndMovePosition( parser, pos, end, "query", multiple, cmd_settings.max_query_size, cmd_settings.max_parser_depth, cmd_settings.max_parser_backtracks); - std::unique_ptr insert_query_payload = nullptr; + std::unique_ptr insert_query_payload; /// If the query is INSERT ... VALUES, then we will try to parse the data. if (auto * insert_query = res->as(); insert_query && insert_query->data) { diff --git a/programs/library-bridge/ExternalDictionaryLibraryUtils.h b/programs/library-bridge/ExternalDictionaryLibraryUtils.h index c9d03d27f75..e6bf8f2a4c3 100644 --- a/programs/library-bridge/ExternalDictionaryLibraryUtils.h +++ b/programs/library-bridge/ExternalDictionaryLibraryUtils.h @@ -35,7 +35,7 @@ public: ExternalDictionaryLibraryAPI::CStrings strings; // will pass pointer to lib private: - std::unique_ptr ptr_holder = nullptr; + std::unique_ptr ptr_holder; Container strings_holder; }; diff --git a/src/AggregateFunctions/AggregateFunctionAnyHeavy.cpp b/src/AggregateFunctions/AggregateFunctionAnyHeavy.cpp index 4f4d4a19cba..ffddd46f2e3 100644 --- a/src/AggregateFunctions/AggregateFunctionAnyHeavy.cpp +++ b/src/AggregateFunctions/AggregateFunctionAnyHeavy.cpp @@ -115,34 +115,34 @@ public: void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override { - this->data(place).add(*columns[0], row_num, arena); + data(place).add(*columns[0], row_num, arena); } void addManyDefaults(AggregateDataPtr __restrict place, const IColumn ** columns, size_t, Arena * arena) const override { - this->data(place).addManyDefaults(*columns[0], 0, arena); + data(place).addManyDefaults(*columns[0], 0, arena); } void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena * arena) const override { - this->data(place).add(this->data(rhs), arena); + data(place).add(data(rhs), arena); } void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional /* version */) const override { - this->data(place).write(buf, *serialization); + data(place).write(buf, *serialization); } void deserialize(AggregateDataPtr place, ReadBuffer & buf, std::optional /* version */, Arena * arena) const override { - this->data(place).read(buf, *serialization, arena); + data(place).read(buf, *serialization, arena); } bool allocatesMemoryInArena() const override { return singleValueTypeAllocatesMemoryInArena(value_type_index); } void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override { - this->data(place).insertResultInto(to); + data(place).insertResultInto(to); } }; diff --git a/src/AggregateFunctions/AggregateFunctionFlameGraph.cpp b/src/AggregateFunctions/AggregateFunctionFlameGraph.cpp index f3d99046036..33e318b6c2f 100644 --- a/src/AggregateFunctions/AggregateFunctionFlameGraph.cpp +++ b/src/AggregateFunctions/AggregateFunctionFlameGraph.cpp @@ -559,7 +559,7 @@ public: ptr = ptrs[row_num]; } - this->data(place).add(ptr, allocated, trace_values.data() + prev_offset, trace_size, arena); + data(place).add(ptr, allocated, trace_values.data() + prev_offset, trace_size, arena); } void addManyDefaults( @@ -572,7 +572,7 @@ public: void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena * arena) const override { - this->data(place).merge(this->data(rhs), arena); + data(place).merge(data(rhs), arena); } void serialize(ConstAggregateDataPtr __restrict, WriteBuffer &, std::optional /* version */) const override @@ -590,7 +590,7 @@ public: auto & array = assert_cast(to); auto & str = assert_cast(array.getData()); - this->data(place).dumpFlameGraph(str.getChars(), str.getOffsets(), 0, 0); + data(place).dumpFlameGraph(str.getChars(), str.getOffsets(), 0, 0); array.getOffsets().push_back(str.size()); } diff --git a/src/AggregateFunctions/AggregateFunctionGroupArray.cpp b/src/AggregateFunctions/AggregateFunctionGroupArray.cpp index 6af8b1018dd..63002652166 100644 --- a/src/AggregateFunctions/AggregateFunctionGroupArray.cpp +++ b/src/AggregateFunctions/AggregateFunctionGroupArray.cpp @@ -89,10 +89,10 @@ struct GroupArraySamplerData chassert(lim != 0); /// With a large number of values, we will generate random numbers several times slower. - if (lim <= static_cast(rng.max())) + if (lim <= static_cast(pcg32_fast::max())) return rng() % lim; else - return (static_cast(rng()) * (static_cast(rng.max()) + 1ULL) + static_cast(rng())) % lim; + return (static_cast(rng()) * (static_cast(pcg32::max()) + 1ULL) + static_cast(rng())) % lim; } void randomShuffle() diff --git a/src/AggregateFunctions/AggregateFunctionLargestTriangleThreeBuckets.cpp b/src/AggregateFunctions/AggregateFunctionLargestTriangleThreeBuckets.cpp index d5abdbc12fb..b24b6c8996f 100644 --- a/src/AggregateFunctions/AggregateFunctionLargestTriangleThreeBuckets.cpp +++ b/src/AggregateFunctions/AggregateFunctionLargestTriangleThreeBuckets.cpp @@ -242,7 +242,7 @@ public: { Float64 x = getFloat64DataFromColumn(columns[0], row_num, this->x_type); Float64 y = getFloat64DataFromColumn(columns[1], row_num, this->y_type); - this->data(place).add(x, y, arena); + data(place).add(x, y, arena); } Float64 getFloat64DataFromColumn(const IColumn * column, size_t row_num, TypeIndex type_index) const @@ -264,25 +264,25 @@ public: void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena * arena) const override { - auto & a = this->data(place); - const auto & b = this->data(rhs); + auto & a = data(place); + const auto & b = data(rhs); a.merge(b, arena); } void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional /* version */) const override { - this->data(place).write(buf); + data(place).write(buf); } void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional /* version */, Arena * arena) const override { - this->data(place).read(buf, arena); + data(place).read(buf, arena); } void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena * arena) const override { - auto res = this->data(place).getResult(total_buckets, arena); + auto res = data(place).getResult(total_buckets, arena); auto & col = assert_cast(to); auto & col_offsets = assert_cast(col.getOffsetsColumn()); diff --git a/src/AggregateFunctions/AggregateFunctionMannWhitney.cpp b/src/AggregateFunctions/AggregateFunctionMannWhitney.cpp index a70da7b35d5..e7bc5df335f 100644 --- a/src/AggregateFunctions/AggregateFunctionMannWhitney.cpp +++ b/src/AggregateFunctions/AggregateFunctionMannWhitney.cpp @@ -205,35 +205,35 @@ public: UInt8 is_second = columns[1]->getUInt(row_num); if (is_second) - this->data(place).addY(value, arena); + data(place).addY(value, arena); else - this->data(place).addX(value, arena); + data(place).addX(value, arena); } void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena * arena) const override { - auto & a = this->data(place); - const auto & b = this->data(rhs); + auto & a = data(place); + const auto & b = data(rhs); a.merge(b, arena); } void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional /* version */) const override { - this->data(place).write(buf); + data(place).write(buf); } void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional /* version */, Arena * arena) const override { - this->data(place).read(buf, arena); + data(place).read(buf, arena); } void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override { - if (!this->data(place).size_x || !this->data(place).size_y) + if (!data(place).size_x || !data(place).size_y) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Aggregate function {} require both samples to be non empty", getName()); - auto [u_statistic, p_value] = this->data(place).getResult(alternative, continuity_correction); + auto [u_statistic, p_value] = data(place).getResult(alternative, continuity_correction); /// Because p-value is a probability. p_value = std::min(1.0, std::max(0.0, p_value)); diff --git a/src/AggregateFunctions/AggregateFunctionRankCorrelation.cpp b/src/AggregateFunctions/AggregateFunctionRankCorrelation.cpp index d338808c717..0c4726734ce 100644 --- a/src/AggregateFunctions/AggregateFunctionRankCorrelation.cpp +++ b/src/AggregateFunctions/AggregateFunctionRankCorrelation.cpp @@ -66,31 +66,31 @@ public: { Float64 new_x = columns[0]->getFloat64(row_num); Float64 new_y = columns[1]->getFloat64(row_num); - this->data(place).addX(new_x, arena); - this->data(place).addY(new_y, arena); + data(place).addX(new_x, arena); + data(place).addY(new_y, arena); } void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena * arena) const override { - auto & a = this->data(place); - const auto & b = this->data(rhs); + auto & a = data(place); + const auto & b = data(rhs); a.merge(b, arena); } void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional /* version */) const override { - this->data(place).write(buf); + data(place).write(buf); } void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional /* version */, Arena * arena) const override { - this->data(place).read(buf, arena); + data(place).read(buf, arena); } void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override { - auto answer = this->data(place).getResult(); + auto answer = data(place).getResult(); auto & column = static_cast &>(to); column.getData().push_back(answer); diff --git a/src/AggregateFunctions/AggregateFunctionRetention.cpp b/src/AggregateFunctions/AggregateFunctionRetention.cpp index 5eaa1a7a39c..e9b46e62c14 100644 --- a/src/AggregateFunctions/AggregateFunctionRetention.cpp +++ b/src/AggregateFunctions/AggregateFunctionRetention.cpp @@ -102,24 +102,24 @@ public: auto event = assert_cast *>(columns[i])->getData()[row_num]; if (event) { - this->data(place).add(i); + data(place).add(i); } } } void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override { - this->data(place).merge(this->data(rhs)); + data(place).merge(data(rhs)); } void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional /* version */) const override { - this->data(place).serialize(buf); + data(place).serialize(buf); } void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional /* version */, Arena *) const override { - this->data(place).deserialize(buf); + data(place).deserialize(buf); } void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override @@ -130,13 +130,13 @@ public: ColumnArray::Offset current_offset = data_to.size(); data_to.resize(current_offset + events_size); - const bool first_flag = this->data(place).events.test(0); + const bool first_flag = data(place).events.test(0); data_to[current_offset] = first_flag; ++current_offset; for (size_t i = 1; i < events_size; ++i) { - data_to[current_offset] = (first_flag && this->data(place).events.test(i)); + data_to[current_offset] = (first_flag && data(place).events.test(i)); ++current_offset; } diff --git a/src/AggregateFunctions/AggregateFunctionSimpleLinearRegression.cpp b/src/AggregateFunctions/AggregateFunctionSimpleLinearRegression.cpp index 75d2fe595d8..ce2f7ee195d 100644 --- a/src/AggregateFunctions/AggregateFunctionSimpleLinearRegression.cpp +++ b/src/AggregateFunctions/AggregateFunctionSimpleLinearRegression.cpp @@ -123,22 +123,22 @@ public: Float64 x = columns[0]->getFloat64(row_num); Float64 y = columns[1]->getFloat64(row_num); - this->data(place).add(x, y); + data(place).add(x, y); } void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override { - this->data(place).merge(this->data(rhs)); + data(place).merge(data(rhs)); } void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional /* version */) const override { - this->data(place).serialize(buf); + data(place).serialize(buf); } void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional /* version */, Arena *) const override { - this->data(place).deserialize(buf); + data(place).deserialize(buf); } static DataTypePtr createResultType() @@ -168,8 +168,8 @@ public: IColumn & to, Arena *) const override { - Float64 k = this->data(place).getK(); - Float64 b = this->data(place).getB(k); + Float64 k = data(place).getK(); + Float64 b = data(place).getB(k); auto & col_tuple = assert_cast(to); auto & col_k = assert_cast &>(col_tuple.getColumn(0)); diff --git a/src/AggregateFunctions/AggregateFunctionSingleValueOrNull.cpp b/src/AggregateFunctions/AggregateFunctionSingleValueOrNull.cpp index b14af34c5fc..0625e37d1b0 100644 --- a/src/AggregateFunctions/AggregateFunctionSingleValueOrNull.cpp +++ b/src/AggregateFunctions/AggregateFunctionSingleValueOrNull.cpp @@ -120,7 +120,7 @@ public: void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override { - this->data(place).add(*columns[0], row_num, arena); + data(place).add(*columns[0], row_num, arena); } void addBatchSinglePlace( @@ -131,7 +131,7 @@ public: Arena * arena, ssize_t if_argument_pos) const override { - if (this->data(place).isNull()) + if (data(place).isNull()) return; IAggregateFunctionDataHelper::addBatchSinglePlace( row_begin, row_end, place, columns, arena, if_argument_pos); @@ -146,7 +146,7 @@ public: Arena * arena, ssize_t if_argument_pos) const override { - if (this->data(place).isNull()) + if (data(place).isNull()) return; IAggregateFunctionDataHelper::addBatchSinglePlaceNotNull( row_begin, row_end, place, columns, null_map, arena, if_argument_pos); @@ -154,29 +154,29 @@ public: void addManyDefaults(AggregateDataPtr __restrict place, const IColumn ** columns, size_t, Arena * arena) const override { - this->data(place).add(*columns[0], 0, arena); + data(place).add(*columns[0], 0, arena); } void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena * arena) const override { - this->data(place).add(this->data(rhs), arena); + data(place).add(data(rhs), arena); } void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional /* version */) const override { - this->data(place).write(buf, *serialization); + data(place).write(buf, *serialization); } void deserialize(AggregateDataPtr place, ReadBuffer & buf, std::optional /* version */, Arena * arena) const override { - this->data(place).read(buf, *serialization, arena); + data(place).read(buf, *serialization, arena); } bool allocatesMemoryInArena() const override { return singleValueTypeAllocatesMemoryInArena(value_type_index); } void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override { - this->data(place).insertResultInto(to); + data(place).insertResultInto(to); } }; diff --git a/src/AggregateFunctions/AggregateFunctionStatistics.cpp b/src/AggregateFunctions/AggregateFunctionStatistics.cpp index e9d9b7409ca..15fede94fe7 100644 --- a/src/AggregateFunctions/AggregateFunctionStatistics.cpp +++ b/src/AggregateFunctions/AggregateFunctionStatistics.cpp @@ -150,13 +150,13 @@ private: Float64 getResult(ConstAggregateDataPtr __restrict place) const { - const auto & data = this->data(place); + const auto & dt = data(place); switch (kind) { - case VarKind::varSampStable: return getVarSamp(data.m2, data.count); - case VarKind::stddevSampStable: return getStddevSamp(data.m2, data.count); - case VarKind::varPopStable: return getVarPop(data.m2, data.count); - case VarKind::stddevPopStable: return getStddevPop(data.m2, data.count); + case VarKind::varSampStable: return getVarSamp(dt.m2, dt.count); + case VarKind::stddevSampStable: return getStddevSamp(dt.m2, dt.count); + case VarKind::varPopStable: return getVarPop(dt.m2, dt.count); + case VarKind::stddevPopStable: return getStddevPop(dt.m2, dt.count); } } @@ -182,22 +182,22 @@ public: void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override { - this->data(place).update(*columns[0], row_num); + data(place).update(*columns[0], row_num); } void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override { - this->data(place).mergeWith(this->data(rhs)); + data(place).mergeWith(data(rhs)); } void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional /* version */) const override { - this->data(place).serialize(buf); + data(place).serialize(buf); } void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional /* version */, Arena *) const override { - this->data(place).deserialize(buf); + data(place).deserialize(buf); } void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override diff --git a/src/AggregateFunctions/Combinators/AggregateFunctionNull.h b/src/AggregateFunctions/Combinators/AggregateFunctionNull.h index 306e293cae7..9d13b77664d 100644 --- a/src/AggregateFunctions/Combinators/AggregateFunctionNull.h +++ b/src/AggregateFunctions/Combinators/AggregateFunctionNull.h @@ -491,7 +491,7 @@ public: std::vector nullable_filters; const IColumn * nested_columns[number_of_arguments]; - std::unique_ptr final_flags = nullptr; + std::unique_ptr final_flags; const UInt8 * final_flags_ptr = nullptr; if (if_argument_pos >= 0) diff --git a/src/AggregateFunctions/ReservoirSampler.h b/src/AggregateFunctions/ReservoirSampler.h index 37fc05a2e4c..7b6ef1b2dc0 100644 --- a/src/AggregateFunctions/ReservoirSampler.h +++ b/src/AggregateFunctions/ReservoirSampler.h @@ -258,10 +258,10 @@ private: chassert(limit > 0); /// With a large number of values, we will generate random numbers several times slower. - if (limit <= static_cast(rng.max())) + if (limit <= static_cast(pcg32_fast::max())) return rng() % limit; else - return (static_cast(rng()) * (static_cast(rng.max()) + 1ULL) + static_cast(rng())) % limit; + return (static_cast(rng()) * (static_cast(pcg32_fast::max()) + 1ULL) + static_cast(rng())) % limit; } void sortIfNeeded() diff --git a/src/Analyzer/Passes/QueryAnalysisPass.cpp b/src/Analyzer/Passes/QueryAnalysisPass.cpp index f5474ddb662..fffb8f7f281 100644 --- a/src/Analyzer/Passes/QueryAnalysisPass.cpp +++ b/src/Analyzer/Passes/QueryAnalysisPass.cpp @@ -1940,8 +1940,7 @@ std::vector QueryAnalyzer::collectIdentifierTypoHints(const Identifier & for (const auto & valid_identifier : valid_identifiers) prompting_strings.push_back(valid_identifier.getFullName()); - NamePrompter<1> prompter; - return prompter.getHints(unresolved_identifier.getFullName(), prompting_strings); + return NamePrompter<1>::getHints(unresolved_identifier.getFullName(), prompting_strings); } /** Wrap expression node in tuple element function calls for nested paths. diff --git a/src/Columns/ColumnSparse.cpp b/src/Columns/ColumnSparse.cpp index b9a173fd92c..3c08ebbf8b4 100644 --- a/src/Columns/ColumnSparse.cpp +++ b/src/Columns/ColumnSparse.cpp @@ -346,7 +346,7 @@ ColumnPtr ColumnSparse::filter(const Filter & filt, ssize_t) const } auto res_values = values->filter(values_filter, values_result_size_hint); - return this->create(res_values, std::move(res_offsets), res_offset); + return create(res_values, std::move(res_offsets), res_offset); } void ColumnSparse::expand(const Filter & mask, bool inverted) diff --git a/src/Common/AsynchronousMetrics.cpp b/src/Common/AsynchronousMetrics.cpp index 0b9be18c84e..ab54b180fbf 100644 --- a/src/Common/AsynchronousMetrics.cpp +++ b/src/Common/AsynchronousMetrics.cpp @@ -671,7 +671,7 @@ void AsynchronousMetrics::update(TimePoint update_time, bool force_update) ReadableSize(rss), ReadableSize(difference)); - total_memory_tracker.setRSS(rss, free_memory_in_allocator_arenas); + MemoryTracker::setRSS(rss, free_memory_in_allocator_arenas); } } diff --git a/src/Common/HTTPConnectionPool.cpp b/src/Common/HTTPConnectionPool.cpp index cd2505df7f3..1c011880f8f 100644 --- a/src/Common/HTTPConnectionPool.cpp +++ b/src/Common/HTTPConnectionPool.cpp @@ -203,7 +203,7 @@ public: if (total_connections_in_group >= limits.warning_limit && total_connections_in_group >= mute_warning_until) { LOG_WARNING(log, "Too many active sessions in group {}, count {}, warning limit {}", type, total_connections_in_group, limits.warning_limit); - mute_warning_until = roundUp(total_connections_in_group, limits.warning_step); + mute_warning_until = roundUp(total_connections_in_group, HTTPConnectionPools::Limits::warning_step); } } diff --git a/src/Common/HashTable/HashMap.h b/src/Common/HashTable/HashMap.h index 5f4cb396822..dc601bf1319 100644 --- a/src/Common/HashTable/HashMap.h +++ b/src/Common/HashTable/HashMap.h @@ -207,7 +207,7 @@ public: void ALWAYS_INLINE mergeToViaEmplace(Self & that, Func && func) { DB::PrefetchingHelper prefetching; - size_t prefetch_look_ahead = prefetching.getInitialLookAheadValue(); + size_t prefetch_look_ahead = DB::PrefetchingHelper::getInitialLookAheadValue(); size_t i = 0; auto prefetch_it = advanceIterator(this->begin(), prefetch_look_ahead); @@ -216,10 +216,10 @@ public: { if constexpr (prefetch) { - if (i == prefetching.iterationsToMeasure()) + if (i == DB::PrefetchingHelper::iterationsToMeasure()) { prefetch_look_ahead = prefetching.calcPrefetchLookAhead(); - prefetch_it = advanceIterator(prefetch_it, prefetch_look_ahead - prefetching.getInitialLookAheadValue()); + prefetch_it = advanceIterator(prefetch_it, prefetch_look_ahead - DB::PrefetchingHelper::getInitialLookAheadValue()); } if (prefetch_it != end) diff --git a/src/Common/StackTrace.cpp b/src/Common/StackTrace.cpp index 436b85ff30b..891850ccb79 100644 --- a/src/Common/StackTrace.cpp +++ b/src/Common/StackTrace.cpp @@ -366,7 +366,7 @@ String demangleAndCollapseNames(std::optional file, const char if (file.has_value()) { std::string_view file_copy = file.value(); - if (auto trim_pos = file_copy.find_last_of('/'); trim_pos != file_copy.npos) + if (auto trim_pos = file_copy.find_last_of('/'); trim_pos != std::string_view::npos) file_copy.remove_suffix(file_copy.size() - trim_pos); if (file_copy.ends_with("functional")) return "?"; diff --git a/src/Common/ThreadStatus.cpp b/src/Common/ThreadStatus.cpp index cf50d305e95..8719a9e093a 100644 --- a/src/Common/ThreadStatus.cpp +++ b/src/Common/ThreadStatus.cpp @@ -96,7 +96,7 @@ ThreadStatus::ThreadStatus(bool check_current_thread_on_destruction_) stack_t altstack_description{}; altstack_description.ss_sp = alt_stack.getData(); altstack_description.ss_flags = 0; - altstack_description.ss_size = alt_stack.getSize(); + altstack_description.ss_size = ThreadStack::getSize(); if (0 != sigaltstack(&altstack_description, nullptr)) { diff --git a/src/Compression/CachedCompressedReadBuffer.cpp b/src/Compression/CachedCompressedReadBuffer.cpp index 0febfca75cc..3476f436eeb 100644 --- a/src/Compression/CachedCompressedReadBuffer.cpp +++ b/src/Compression/CachedCompressedReadBuffer.cpp @@ -38,7 +38,7 @@ void CachedCompressedReadBuffer::prefetch(Priority priority) bool CachedCompressedReadBuffer::nextImpl() { /// Let's check for the presence of a decompressed block in the cache, grab the ownership of this block, if it exists. - UInt128 key = cache->hash(path, file_pos); + UInt128 key = UncompressedCache::hash(path, file_pos); owned_cell = cache->getOrSet(key, [&]() { diff --git a/src/Databases/DatabaseOnDisk.cpp b/src/Databases/DatabaseOnDisk.cpp index d8acfb5fa01..674e9afa8ac 100644 --- a/src/Databases/DatabaseOnDisk.cpp +++ b/src/Databases/DatabaseOnDisk.cpp @@ -68,7 +68,7 @@ std::pair createTableFromAST( ast_create_query.setDatabase(database_name); if (ast_create_query.select && ast_create_query.isView()) - ApplyWithSubqueryVisitor().visit(*ast_create_query.select); + ApplyWithSubqueryVisitor::visit(*ast_create_query.select); if (ast_create_query.as_table_function) { diff --git a/src/Databases/DatabaseOrdinary.cpp b/src/Databases/DatabaseOrdinary.cpp index 95bdcfc7dce..90f777d7d1d 100644 --- a/src/Databases/DatabaseOrdinary.cpp +++ b/src/Databases/DatabaseOrdinary.cpp @@ -171,7 +171,7 @@ void DatabaseOrdinary::loadTablesMetadata(ContextPtr local_context, ParsedTables auto ast = parseQueryFromMetadata(log, getContext(), full_path.string(), /*throw_on_error*/ true, /*remove_empty*/ false); if (ast) { - FunctionNameNormalizer().visit(ast.get()); + FunctionNameNormalizer::visit(ast.get()); auto * create_query = ast->as(); /// NOTE No concurrent writes are possible during database loading create_query->setDatabase(TSA_SUPPRESS_WARNING_FOR_READ(database_name)); diff --git a/src/Databases/TablesDependencyGraph.cpp b/src/Databases/TablesDependencyGraph.cpp index 4b05f19fe91..d227a3ac76b 100644 --- a/src/Databases/TablesDependencyGraph.cpp +++ b/src/Databases/TablesDependencyGraph.cpp @@ -448,7 +448,7 @@ std::vector TablesDependencyGraph::getTables() const void TablesDependencyGraph::mergeWith(const TablesDependencyGraph & other) { for (const auto & other_node : other.nodes) - addDependencies(other_node->storage_id, other.getDependencies(*other_node)); + addDependencies(other_node->storage_id, TablesDependencyGraph::getDependencies(*other_node)); } diff --git a/src/Dictionaries/FlatDictionary.cpp b/src/Dictionaries/FlatDictionary.cpp index fc58ff525bd..e3b1e8a84e2 100644 --- a/src/Dictionaries/FlatDictionary.cpp +++ b/src/Dictionaries/FlatDictionary.cpp @@ -412,8 +412,7 @@ void FlatDictionary::blockToAttributes(const Block & block) { const auto keys_column = block.safeGetByPosition(0).column; - DictionaryKeysArenaHolder arena_holder; - DictionaryKeysExtractor keys_extractor({ keys_column }, arena_holder.getComplexKeyArena()); + DictionaryKeysExtractor keys_extractor({ keys_column }, DictionaryKeysArenaHolder::getComplexKeyArena()); size_t keys_size = keys_extractor.getKeysSize(); static constexpr size_t key_offset = 1; diff --git a/src/Dictionaries/PolygonDictionaryUtils.h b/src/Dictionaries/PolygonDictionaryUtils.h index 0acf0d23e5e..0fd1fead456 100644 --- a/src/Dictionaries/PolygonDictionaryUtils.h +++ b/src/Dictionaries/PolygonDictionaryUtils.h @@ -214,7 +214,7 @@ public: static constexpr Coord kEps = 1e-4f; private: - std::unique_ptr> root = nullptr; + std::unique_ptr> root; Coord min_x = 0, min_y = 0; Coord max_x = 0, max_y = 0; const size_t k_min_intersections; diff --git a/src/Disks/DiskLocal.cpp b/src/Disks/DiskLocal.cpp index 1a8d46668e0..33f7ca1ec19 100644 --- a/src/Disks/DiskLocal.cpp +++ b/src/Disks/DiskLocal.cpp @@ -581,7 +581,7 @@ try auto disk_ptr = std::static_pointer_cast(shared_from_this()); auto tmp_file = std::make_unique(disk_ptr); auto buf = std::make_unique(std::move(tmp_file)); - buf->write(data.data, data.PAGE_SIZE_IN_BYTES); + buf->write(data.data, DiskWriteCheckData::PAGE_SIZE_IN_BYTES); buf->finalize(); buf->sync(); } diff --git a/src/Functions/FunctionsExternalDictionaries.h b/src/Functions/FunctionsExternalDictionaries.h index d3317e2dfcf..4460a8bd7bd 100644 --- a/src/Functions/FunctionsExternalDictionaries.h +++ b/src/Functions/FunctionsExternalDictionaries.h @@ -1139,7 +1139,7 @@ private: getName()); auto dictionary = helper.getDictionary(arguments[0].column); - const auto & hierarchical_attribute = helper.getDictionaryHierarchicalAttribute(dictionary); + const auto & hierarchical_attribute = FunctionDictHelper::getDictionaryHierarchicalAttribute(dictionary); return std::make_shared(removeNullable(hierarchical_attribute.type)); } @@ -1150,7 +1150,7 @@ private: return result_type->createColumn(); auto dictionary = helper.getDictionary(arguments[0].column); - const auto & hierarchical_attribute = helper.getDictionaryHierarchicalAttribute(dictionary); + const auto & hierarchical_attribute = FunctionDictHelper::getDictionaryHierarchicalAttribute(dictionary); auto key_column = ColumnWithTypeAndName{arguments[1].column, arguments[1].type, arguments[1].name}; auto key_column_casted = castColumnAccurate(key_column, removeNullable(hierarchical_attribute.type)); @@ -1205,7 +1205,7 @@ private: return result_type->createColumn(); auto dictionary = helper.getDictionary(arguments[0].column); - const auto & hierarchical_attribute = helper.getDictionaryHierarchicalAttribute(dictionary); + const auto & hierarchical_attribute = FunctionDictHelper::getDictionaryHierarchicalAttribute(dictionary); auto key_column = ColumnWithTypeAndName{arguments[1].column->convertToFullColumnIfConst(), arguments[1].type, arguments[2].name}; auto in_key_column = ColumnWithTypeAndName{arguments[2].column->convertToFullColumnIfConst(), arguments[2].type, arguments[2].name}; diff --git a/src/Functions/UserDefined/UserDefinedSQLObjectsStorageBase.cpp b/src/Functions/UserDefined/UserDefinedSQLObjectsStorageBase.cpp index 4f47a46b10d..f251d11789f 100644 --- a/src/Functions/UserDefined/UserDefinedSQLObjectsStorageBase.cpp +++ b/src/Functions/UserDefined/UserDefinedSQLObjectsStorageBase.cpp @@ -23,7 +23,7 @@ ASTPtr normalizeCreateFunctionQuery(const IAST & create_function_query) auto & res = typeid_cast(*ptr); res.if_not_exists = false; res.or_replace = false; - FunctionNameNormalizer().visit(res.function_core.get()); + FunctionNameNormalizer::visit(res.function_core.get()); return ptr; } diff --git a/src/IO/Archives/LibArchiveWriter.h b/src/IO/Archives/LibArchiveWriter.h index f54a8ce2367..da566c82ff6 100644 --- a/src/IO/Archives/LibArchiveWriter.h +++ b/src/IO/Archives/LibArchiveWriter.h @@ -68,7 +68,7 @@ private: void startWritingFile(); void endWritingFile(); - std::unique_ptr stream_info TSA_GUARDED_BY(mutex) = nullptr; + std::unique_ptr stream_info TSA_GUARDED_BY(mutex); bool is_writing_file TSA_GUARDED_BY(mutex) = false; bool finalized TSA_GUARDED_BY(mutex) = false; mutable std::mutex mutex; diff --git a/src/IO/MMapReadBufferFromFileWithCache.cpp b/src/IO/MMapReadBufferFromFileWithCache.cpp index d53f3bc325d..68c0c7227ca 100644 --- a/src/IO/MMapReadBufferFromFileWithCache.cpp +++ b/src/IO/MMapReadBufferFromFileWithCache.cpp @@ -26,7 +26,7 @@ void MMapReadBufferFromFileWithCache::init() MMapReadBufferFromFileWithCache::MMapReadBufferFromFileWithCache( MMappedFileCache & cache, const std::string & file_name, size_t offset, size_t length) { - mapped = cache.getOrSet(cache.hash(file_name, offset, length), [&] + mapped = cache.getOrSet(MMappedFileCache::hash(file_name, offset, length), [&] { return std::make_shared(file_name, offset, length); }); diff --git a/src/IO/ReadHelpers.h b/src/IO/ReadHelpers.h index fc105539061..a9c861be13c 100644 --- a/src/IO/ReadHelpers.h +++ b/src/IO/ReadHelpers.h @@ -822,7 +822,7 @@ inline ReturnType readDateTextImpl(ExtendedDayNum & date, ReadBuffer & buf, cons return false; /// When the parameter is out of rule or out of range, Date32 uses 1925-01-01 as the default value (-DateLUT::instance().getDayNumOffsetEpoch(), -16436) and Date uses 1970-01-01. - date = date_lut.makeDayNum(local_date.year(), local_date.month(), local_date.day(), -static_cast(date_lut.getDayNumOffsetEpoch())); + date = date_lut.makeDayNum(local_date.year(), local_date.month(), local_date.day(), -static_cast(DateLUTImpl::getDayNumOffsetEpoch())); return ReturnType(true); } @@ -1880,10 +1880,10 @@ struct PcgDeserializer assertChar(' ', buf); readText(state, buf); - if (multiplier != rng.multiplier()) - throw Exception(ErrorCodes::INCORRECT_DATA, "Incorrect multiplier in pcg32: expected {}, got {}", rng.multiplier(), multiplier); - if (increment != rng.increment()) - throw Exception(ErrorCodes::INCORRECT_DATA, "Incorrect increment in pcg32: expected {}, got {}", rng.increment(), increment); + if (multiplier != pcg32_fast::multiplier()) + throw Exception(ErrorCodes::INCORRECT_DATA, "Incorrect multiplier in pcg32: expected {}, got {}", pcg32_fast::multiplier(), multiplier); + if (increment != pcg32_fast::increment()) + throw Exception(ErrorCodes::INCORRECT_DATA, "Incorrect increment in pcg32: expected {}, got {}", pcg32_fast::increment(), increment); rng.state_ = state; } diff --git a/src/IO/WriteHelpers.h b/src/IO/WriteHelpers.h index b42b4e6e978..a30e2feb439 100644 --- a/src/IO/WriteHelpers.h +++ b/src/IO/WriteHelpers.h @@ -1390,9 +1390,9 @@ struct PcgSerializer { static void serializePcg32(const pcg32_fast & rng, WriteBuffer & buf) { - writeText(rng.multiplier(), buf); + writeText(pcg32_fast::multiplier(), buf); writeChar(' ', buf); - writeText(rng.increment(), buf); + writeText(pcg32_fast::increment(), buf); writeChar(' ', buf); writeText(rng.state_, buf); } diff --git a/src/Interpreters/AsynchronousInsertQueue.cpp b/src/Interpreters/AsynchronousInsertQueue.cpp index c05d1b8f979..6b9ca34c2d7 100644 --- a/src/Interpreters/AsynchronousInsertQueue.cpp +++ b/src/Interpreters/AsynchronousInsertQueue.cpp @@ -294,7 +294,7 @@ void AsynchronousInsertQueue::preprocessInsertQuery(const ASTPtr & query, const InterpreterInsertQuery interpreter(query, query_context, query_context->getSettingsRef().insert_allow_materialized_columns); auto table = interpreter.getTable(insert_query); - auto sample_block = interpreter.getSampleBlock(insert_query, table, table->getInMemoryMetadataPtr(), query_context); + auto sample_block = InterpreterInsertQuery::getSampleBlock(insert_query, table, table->getInMemoryMetadataPtr(), query_context); if (!FormatFactory::instance().isInputFormat(insert_query.format)) throw Exception(ErrorCodes::UNKNOWN_FORMAT, "Unknown input format {}", insert_query.format); diff --git a/src/Interpreters/ExecuteScalarSubqueriesVisitor.cpp b/src/Interpreters/ExecuteScalarSubqueriesVisitor.cpp index 0cf138c14f6..a70ff3c6c53 100644 --- a/src/Interpreters/ExecuteScalarSubqueriesVisitor.cpp +++ b/src/Interpreters/ExecuteScalarSubqueriesVisitor.cpp @@ -112,7 +112,7 @@ void ExecuteScalarSubqueriesMatcher::visit(const ASTSubquery & subquery, ASTPtr auto hash = subquery.getTreeHash(/*ignore_aliases=*/ true); const auto scalar_query_hash_str = toString(hash); - std::unique_ptr interpreter = nullptr; + std::unique_ptr interpreter; bool hit = false; bool is_local = false; diff --git a/src/Interpreters/InterpreterCreateFunctionQuery.cpp b/src/Interpreters/InterpreterCreateFunctionQuery.cpp index 18e9ba4a64b..3f4a03c3497 100644 --- a/src/Interpreters/InterpreterCreateFunctionQuery.cpp +++ b/src/Interpreters/InterpreterCreateFunctionQuery.cpp @@ -21,7 +21,7 @@ namespace ErrorCodes BlockIO InterpreterCreateFunctionQuery::execute() { - FunctionNameNormalizer().visit(query_ptr.get()); + FunctionNameNormalizer::visit(query_ptr.get()); const auto updated_query_ptr = removeOnClusterClauseIfNeeded(query_ptr, getContext()); ASTCreateFunctionQuery & create_function_query = updated_query_ptr->as(); diff --git a/src/Interpreters/InterpreterDeleteQuery.cpp b/src/Interpreters/InterpreterDeleteQuery.cpp index 07d23be78a7..ee774994145 100644 --- a/src/Interpreters/InterpreterDeleteQuery.cpp +++ b/src/Interpreters/InterpreterDeleteQuery.cpp @@ -35,7 +35,7 @@ InterpreterDeleteQuery::InterpreterDeleteQuery(const ASTPtr & query_ptr_, Contex BlockIO InterpreterDeleteQuery::execute() { - FunctionNameNormalizer().visit(query_ptr.get()); + FunctionNameNormalizer::visit(query_ptr.get()); const ASTDeleteQuery & delete_query = query_ptr->as(); auto table_id = getContext()->resolveStorageID(delete_query, Context::ResolveOrdinary); diff --git a/src/Interpreters/InterpreterDropFunctionQuery.cpp b/src/Interpreters/InterpreterDropFunctionQuery.cpp index 2661fd9058c..7a273d4969b 100644 --- a/src/Interpreters/InterpreterDropFunctionQuery.cpp +++ b/src/Interpreters/InterpreterDropFunctionQuery.cpp @@ -21,7 +21,7 @@ namespace ErrorCodes BlockIO InterpreterDropFunctionQuery::execute() { - FunctionNameNormalizer().visit(query_ptr.get()); + FunctionNameNormalizer::visit(query_ptr.get()); const auto updated_query_ptr = removeOnClusterClauseIfNeeded(query_ptr, getContext()); ASTDropFunctionQuery & drop_function_query = updated_query_ptr->as(); diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index c47e3bdc49f..75baefeffba 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -412,8 +412,8 @@ InterpreterSelectQuery::InterpreterSelectQuery( if (!options.is_subquery) { if (context->getSettingsRef().enable_global_with_statement) - ApplyWithAliasVisitor().visit(query_ptr); - ApplyWithSubqueryVisitor().visit(query_ptr); + ApplyWithAliasVisitor::visit(query_ptr); + ApplyWithSubqueryVisitor::visit(query_ptr); } query_info.query = query_ptr->clone(); @@ -609,7 +609,7 @@ InterpreterSelectQuery::InterpreterSelectQuery( if (view) { query_info.is_parameterized_view = view->isParameterizedView(); - view->replaceWithSubquery(getSelectQuery(), view_table, metadata_snapshot, view->isParameterizedView()); + StorageView::replaceWithSubquery(getSelectQuery(), view_table, metadata_snapshot, view->isParameterizedView()); } syntax_analyzer_result = TreeRewriter(context).analyzeSelect( @@ -629,7 +629,7 @@ InterpreterSelectQuery::InterpreterSelectQuery( if (view) { /// Restore original view name. Save rewritten subquery for future usage in StorageView. - query_info.view_query = view->restoreViewName(getSelectQuery(), view_table); + query_info.view_query = StorageView::restoreViewName(getSelectQuery(), view_table); view = nullptr; } @@ -2434,7 +2434,7 @@ void InterpreterSelectQuery::executeFetchColumns(QueryProcessingStage::Enum proc agg_count.create(place); SCOPE_EXIT_MEMORY_SAFE(agg_count.destroy(place)); - agg_count.set(place, *num_rows); + AggregateFunctionCount::set(place, *num_rows); auto column = ColumnAggregateFunction::create(func); column->insertFrom(place); diff --git a/src/Interpreters/TreeRewriter.cpp b/src/Interpreters/TreeRewriter.cpp index 5588fc55a64..914b3c3037d 100644 --- a/src/Interpreters/TreeRewriter.cpp +++ b/src/Interpreters/TreeRewriter.cpp @@ -1587,7 +1587,7 @@ void TreeRewriter::normalize( /// already normalized on initiator node, or not normalized and should remain unnormalized for /// compatibility. if (context_->getClientInfo().query_kind != ClientInfo::QueryKind::SECONDARY_QUERY && settings.normalize_function_names) - FunctionNameNormalizer().visit(query.get()); + FunctionNameNormalizer::visit(query.get()); if (settings.optimize_move_to_prewhere) { diff --git a/src/Interpreters/evaluateConstantExpression.cpp b/src/Interpreters/evaluateConstantExpression.cpp index b5c3e00e299..4e1a2bcf5ee 100644 --- a/src/Interpreters/evaluateConstantExpression.cpp +++ b/src/Interpreters/evaluateConstantExpression.cpp @@ -73,7 +73,7 @@ std::optional evaluateConstantExpressionImpl(c /// already normalized on initiator node, or not normalized and should remain unnormalized for /// compatibility. if (context->getClientInfo().query_kind != ClientInfo::QueryKind::SECONDARY_QUERY && context->getSettingsRef().normalize_function_names) - FunctionNameNormalizer().visit(ast.get()); + FunctionNameNormalizer::visit(ast.get()); auto syntax_result = TreeRewriter(context, no_throw).analyze(ast, source_columns); if (!syntax_result) diff --git a/src/Planner/PlannerActionsVisitor.cpp b/src/Planner/PlannerActionsVisitor.cpp index 326dd683343..656b6cdaa6e 100644 --- a/src/Planner/PlannerActionsVisitor.cpp +++ b/src/Planner/PlannerActionsVisitor.cpp @@ -169,7 +169,7 @@ public: { const auto & in_first_argument_node = function_node.getArguments().getNodes().at(0); const auto & in_second_argument_node = function_node.getArguments().getNodes().at(1); - in_function_second_argument_node_name = planner_context.createSetKey(in_first_argument_node->getResultType(), in_second_argument_node); + in_function_second_argument_node_name = PlannerContext::createSetKey(in_first_argument_node->getResultType(), in_second_argument_node); } WriteBufferFromOwnString buffer; diff --git a/src/Planner/PlannerJoinTree.cpp b/src/Planner/PlannerJoinTree.cpp index e9c3795176a..adab31adb40 100644 --- a/src/Planner/PlannerJoinTree.cpp +++ b/src/Planner/PlannerJoinTree.cpp @@ -305,7 +305,7 @@ bool applyTrivialCountIfPossible( AggregateDataPtr place = state.data(); agg_count.create(place); SCOPE_EXIT_MEMORY_SAFE(agg_count.destroy(place)); - agg_count.set(place, num_rows.value()); + AggregateFunctionCount::set(place, num_rows.value()); auto column = ColumnAggregateFunction::create(function_node.getAggregateFunction()); column->insertFrom(place); diff --git a/src/Processors/Executors/ExecutorTasks.cpp b/src/Processors/Executors/ExecutorTasks.cpp index ec1fc539884..1039cf0e97a 100644 --- a/src/Processors/Executors/ExecutorTasks.cpp +++ b/src/Processors/Executors/ExecutorTasks.cpp @@ -121,7 +121,7 @@ void ExecutorTasks::pushTasks(Queue & queue, Queue & async_queue, ExecutionThrea /// Take local task from queue if has one. if (!queue.empty() && !context.hasAsyncTasks() - && context.num_scheduled_local_tasks < context.max_scheduled_local_tasks) + && context.num_scheduled_local_tasks < ExecutionThreadContext::max_scheduled_local_tasks) { ++context.num_scheduled_local_tasks; context.setTask(queue.front()); diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp index f4607cad040..bee42c3ddde 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp +++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp @@ -1408,8 +1408,8 @@ static void buildIndexes( if (metadata_snapshot->hasPartitionKey()) { const auto & partition_key = metadata_snapshot->getPartitionKey(); - auto minmax_columns_names = data.getMinMaxColumnsNames(partition_key); - auto minmax_expression_actions = data.getMinMaxExpr(partition_key, ExpressionActionsSettings::fromContext(context)); + auto minmax_columns_names = MergeTreeData::getMinMaxColumnsNames(partition_key); + auto minmax_expression_actions = MergeTreeData::getMinMaxExpr(partition_key, ExpressionActionsSettings::fromContext(context)); indexes->minmax_idx_condition.emplace(filter_actions_dag, context, minmax_columns_names, minmax_expression_actions); indexes->partition_pruner.emplace(metadata_snapshot, filter_actions_dag, context, false /* strict */); diff --git a/src/Server/HTTP/ReadHeaders.cpp b/src/Server/HTTP/ReadHeaders.cpp index b7057501064..d6c7b8ddc0f 100644 --- a/src/Server/HTTP/ReadHeaders.cpp +++ b/src/Server/HTTP/ReadHeaders.cpp @@ -77,7 +77,7 @@ void readHeaders( skipToNextLineOrEOF(in); Poco::trimRightInPlace(value); - headers.add(name, headers.decodeWord(value)); + headers.add(name, Poco::Net::MessageHeader::decodeWord(value)); ++fields; } } diff --git a/src/Storages/AlterCommands.cpp b/src/Storages/AlterCommands.cpp index eae5e1a8a47..281fc72dfc4 100644 --- a/src/Storages/AlterCommands.cpp +++ b/src/Storages/AlterCommands.cpp @@ -1143,7 +1143,7 @@ void AlterCommands::apply(StorageInMemoryMetadata & metadata, ContextPtr context { auto minmax_columns = metadata_copy.getColumnsRequiredForPartitionKey(); auto partition_key = metadata_copy.partition_key.expression_list_ast->clone(); - FunctionNameNormalizer().visit(partition_key.get()); + FunctionNameNormalizer::visit(partition_key.get()); auto primary_key_asts = metadata_copy.primary_key.expression_list_ast->children; metadata_copy.minmax_count_projection.emplace(ProjectionDescription::getMinMaxCountProjection( metadata_copy.columns, partition_key, minmax_columns, primary_key_asts, context)); diff --git a/src/Storages/FileLog/StorageFileLog.h b/src/Storages/FileLog/StorageFileLog.h index 91d58540c94..0434213c558 100644 --- a/src/Storages/FileLog/StorageFileLog.h +++ b/src/Storages/FileLog/StorageFileLog.h @@ -177,7 +177,7 @@ private: }; std::shared_ptr task; - std::unique_ptr directory_watch = nullptr; + std::unique_ptr directory_watch; void loadFiles(); diff --git a/src/Storages/KeyDescription.cpp b/src/Storages/KeyDescription.cpp index d63b40e2b11..2a697fa5654 100644 --- a/src/Storages/KeyDescription.cpp +++ b/src/Storages/KeyDescription.cpp @@ -172,7 +172,7 @@ KeyDescription KeyDescription::parse(const String & str, const ColumnsDescriptio ParserExpression parser; ASTPtr ast = parseQuery(parser, "(" + str + ")", 0, DBMS_DEFAULT_MAX_PARSER_DEPTH, DBMS_DEFAULT_MAX_PARSER_BACKTRACKS); - FunctionNameNormalizer().visit(ast.get()); + FunctionNameNormalizer::visit(ast.get()); return getKeyFromAST(ast, columns, context); } diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp index 570175f6614..8da46b39801 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp +++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp @@ -79,8 +79,8 @@ void IMergeTreeDataPart::MinMaxIndex::load(const MergeTreeData & data, const Par auto metadata_snapshot = data.getInMemoryMetadataPtr(); const auto & partition_key = metadata_snapshot->getPartitionKey(); - auto minmax_column_names = data.getMinMaxColumnsNames(partition_key); - auto minmax_column_types = data.getMinMaxColumnsTypes(partition_key); + auto minmax_column_names = MergeTreeData::getMinMaxColumnsNames(partition_key); + auto minmax_column_types = MergeTreeData::getMinMaxColumnsTypes(partition_key); size_t minmax_idx_size = minmax_column_types.size(); hyperrectangle.reserve(minmax_idx_size); @@ -112,8 +112,8 @@ IMergeTreeDataPart::MinMaxIndex::WrittenFiles IMergeTreeDataPart::MinMaxIndex::s auto metadata_snapshot = data.getInMemoryMetadataPtr(); const auto & partition_key = metadata_snapshot->getPartitionKey(); - auto minmax_column_names = data.getMinMaxColumnsNames(partition_key); - auto minmax_column_types = data.getMinMaxColumnsTypes(partition_key); + auto minmax_column_names = MergeTreeData::getMinMaxColumnsNames(partition_key); + auto minmax_column_types = MergeTreeData::getMinMaxColumnsTypes(partition_key); return store(minmax_column_names, minmax_column_types, part_storage, out_checksums); } @@ -204,7 +204,7 @@ void IMergeTreeDataPart::MinMaxIndex::appendFiles(const MergeTreeData & data, St { auto metadata_snapshot = data.getInMemoryMetadataPtr(); const auto & partition_key = metadata_snapshot->getPartitionKey(); - auto minmax_column_names = data.getMinMaxColumnsNames(partition_key); + auto minmax_column_names = MergeTreeData::getMinMaxColumnsNames(partition_key); size_t minmax_idx_size = minmax_column_names.size(); for (size_t i = 0; i < minmax_idx_size; ++i) { @@ -1213,7 +1213,7 @@ void IMergeTreeDataPart::appendFilesOfPartitionAndMinMaxIndex(Strings & files) c return; if (!parent_part) - partition.appendFiles(storage, files); + MergeTreePartition::appendFiles(storage, files); if (!parent_part) minmax_idx->appendFiles(storage, files); @@ -2061,7 +2061,7 @@ void IMergeTreeDataPart::checkConsistencyBase() const if (!isEmpty() && !parent_part) { - for (const String & col_name : storage.getMinMaxColumnsNames(partition_key)) + for (const String & col_name : MergeTreeData::getMinMaxColumnsNames(partition_key)) { if (!checksums.files.contains("minmax_" + escapeForFileName(col_name) + ".idx")) throw Exception(ErrorCodes::NO_FILE_IN_DATA_PART, "No minmax idx file checksum for column {}", col_name); @@ -2101,7 +2101,7 @@ void IMergeTreeDataPart::checkConsistencyBase() const if (!parent_part) { - for (const String & col_name : storage.getMinMaxColumnsNames(partition_key)) + for (const String & col_name : MergeTreeData::getMinMaxColumnsNames(partition_key)) check_file_not_empty("minmax_" + escapeForFileName(col_name) + ".idx"); } } diff --git a/src/Storages/MergeTree/KeyCondition.cpp b/src/Storages/MergeTree/KeyCondition.cpp index 2d57ea40c9c..a779311c22b 100644 --- a/src/Storages/MergeTree/KeyCondition.cpp +++ b/src/Storages/MergeTree/KeyCondition.cpp @@ -1964,8 +1964,8 @@ KeyCondition::Description KeyCondition::getDescription() const /// This means that logical NOT is applied to leaf. bool negate = false; - std::unique_ptr left = nullptr; - std::unique_ptr right = nullptr; + std::unique_ptr left; + std::unique_ptr right; }; /// The algorithm is the same as in KeyCondition::checkInHyperrectangle diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 8faed72b198..e5ace0e5969 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -5362,7 +5362,7 @@ void MergeTreeData::restoreDataFromBackup(RestorerFromBackup & restorer, const S return; if (!restorer.isNonEmptyTableAllowed() && getTotalActiveSizeInBytes() && backup->hasFiles(data_path_in_backup)) - restorer.throwTableIsNotEmpty(getStorageID()); + RestorerFromBackup::throwTableIsNotEmpty(getStorageID()); restorePartsFromBackup(restorer, data_path_in_backup, partitions); } @@ -6687,7 +6687,7 @@ Block MergeTreeData::getMinMaxCountProjectionBlock( auto * place = arena.alignedAlloc(size_of_state, align_of_state); func->create(place); if (const AggregateFunctionCount * agg_count = typeid_cast(func.get())) - agg_count->set(place, value.get()); + AggregateFunctionCount::set(place, value.get()); else { auto value_column = func->getArgumentTypes().front()->createColumnConst(1, value)->convertToFullColumnIfConst(); diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index 6471f510291..bcc936c5739 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -513,11 +513,11 @@ void MergeTreeDataSelectExecutor::filterPartsByPartition( { chassert(minmax_idx_condition && partition_pruner); const auto & partition_key = metadata_snapshot->getPartitionKey(); - minmax_columns_types = data.getMinMaxColumnsTypes(partition_key); + minmax_columns_types = MergeTreeData::getMinMaxColumnsTypes(partition_key); if (settings.force_index_by_date && (minmax_idx_condition->alwaysUnknownOrTrue() && partition_pruner->isUseless())) { - auto minmax_columns_names = data.getMinMaxColumnsNames(partition_key); + auto minmax_columns_names = MergeTreeData::getMinMaxColumnsNames(partition_key); throw Exception(ErrorCodes::INDEX_NOT_USED, "Neither MinMax index by columns ({}) nor partition expr is used and setting 'force_index_by_date' is set", fmt::join(minmax_columns_names, ", ")); diff --git a/src/Storages/MergeTree/MergeTreeDataWriter.cpp b/src/Storages/MergeTree/MergeTreeDataWriter.cpp index cadd94867ec..64d4b1fd7ff 100644 --- a/src/Storages/MergeTree/MergeTreeDataWriter.cpp +++ b/src/Storages/MergeTree/MergeTreeDataWriter.cpp @@ -426,7 +426,7 @@ MergeTreeDataWriter::TemporaryPart MergeTreeDataWriter::writeTempPartImpl( column.type = block.getByName(column.name).type; auto minmax_idx = std::make_shared(); - minmax_idx->update(block, data.getMinMaxColumnsNames(metadata_snapshot->getPartitionKey())); + minmax_idx->update(block, MergeTreeData::getMinMaxColumnsNames(metadata_snapshot->getPartitionKey())); MergeTreePartition partition(block_with_partition.partition); @@ -656,7 +656,7 @@ MergeTreeDataWriter::TemporaryPart MergeTreeDataWriter::writeProjectionPartImpl( /// Size of part would not be greater than block.bytes() + epsilon size_t expected_size = block.bytes(); // just check if there is enough space on parent volume - data.reserveSpace(expected_size, parent_part->getDataPartStorage()); + MergeTreeData::reserveSpace(expected_size, parent_part->getDataPartStorage()); part_type = data.choosePartFormatOnDisk(expected_size, block.rows()).part_type; auto new_data_part = parent_part->getProjectionPartBuilder(part_name, is_temp).withPartType(part_type).build(); diff --git a/src/Storages/MergeTree/MergeTreeSink.cpp b/src/Storages/MergeTree/MergeTreeSink.cpp index 3ead766cba9..82c015f33ed 100644 --- a/src/Storages/MergeTree/MergeTreeSink.cpp +++ b/src/Storages/MergeTree/MergeTreeSink.cpp @@ -195,7 +195,7 @@ void MergeTreeSink::finishDelayedChunk() { auto counters_snapshot = std::make_shared(partition.part_counters.getPartiallyAtomicSnapshot()); PartLog::addNewPart(storage.getContext(), PartLog::PartLogEntry(part, partition.elapsed_ns, counters_snapshot)); - storage.incrementInsertedPartsProfileEvent(part->getType()); + StorageMergeTree::incrementInsertedPartsProfileEvent(part->getType()); /// Initiate async merge - it will be done if it's good time for merge and if there are space in 'background_pool'. storage.background_operations_assignee.trigger(); diff --git a/src/Storages/MergeTree/MutateFromLogEntryTask.cpp b/src/Storages/MergeTree/MutateFromLogEntryTask.cpp index 7536eb45903..3415b08cebb 100644 --- a/src/Storages/MergeTree/MutateFromLogEntryTask.cpp +++ b/src/Storages/MergeTree/MutateFromLogEntryTask.cpp @@ -116,7 +116,7 @@ ReplicatedMergeMutateTaskBase::PrepareResult MutateFromLogEntryTask::prepare() /// Once we mutate part, we must reserve space on the same disk, because mutations can possibly create hardlinks. /// Can throw an exception. - reserved_space = storage.reserveSpace(estimated_space_for_result, source_part->getDataPartStorage()); + reserved_space = StorageReplicatedMergeTree::reserveSpace(estimated_space_for_result, source_part->getDataPartStorage()); future_mutated_part->updatePath(storage, reserved_space.get()); table_lock_holder = storage.lockForShare( diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp index 90e1cb0606e..a971c4fda1c 100644 --- a/src/Storages/MergeTree/MutateTask.cpp +++ b/src/Storages/MergeTree/MutateTask.cpp @@ -980,13 +980,13 @@ struct MutationContext QueryPipelineBuilder mutating_pipeline_builder; QueryPipeline mutating_pipeline; // in - std::unique_ptr mutating_executor{nullptr}; + std::unique_ptr mutating_executor; ProgressCallback progress_callback; Block updated_header; std::unique_ptr interpreter; - UInt64 watch_prev_elapsed{0}; - std::unique_ptr stage_progress{nullptr}; + UInt64 watch_prev_elapsed = 0; + std::unique_ptr stage_progress; MutationCommands commands_for_part; MutationCommands for_interpreter; @@ -998,12 +998,12 @@ struct MutationContext NameSet materialized_statistics; MergeTreeData::MutableDataPartPtr new_data_part; - IMergedBlockOutputStreamPtr out{nullptr}; + IMergedBlockOutputStreamPtr out; String mrk_extension; std::vector projections_to_build; - IMergeTreeDataPart::MinMaxIndexPtr minmax_idx{nullptr}; + IMergeTreeDataPart::MinMaxIndexPtr minmax_idx; std::set indices_to_recalc; std::set stats_to_recalc; @@ -1283,7 +1283,7 @@ bool PartMergerWriter::mutateOriginalPartAndPrepareProjections() if (MutationHelpers::checkOperationIsNotCanceled(*ctx->merges_blocker, ctx->mutate_entry) && ctx->mutating_executor->pull(cur_block)) { if (ctx->minmax_idx) - ctx->minmax_idx->update(cur_block, ctx->data->getMinMaxColumnsNames(ctx->metadata_snapshot->getPartitionKey())); + ctx->minmax_idx->update(cur_block, MergeTreeData::getMinMaxColumnsNames(ctx->metadata_snapshot->getPartitionKey())); ctx->out->write(cur_block); diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp index 7fcf6b971bb..e1f28c2a951 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp @@ -289,7 +289,7 @@ void ReplicatedMergeTreeSinkImpl::consume(Chunk chunk) throw Exception(ErrorCodes::LOGICAL_ERROR, "No chunk info for async inserts"); } - auto part_blocks = storage.writer.splitBlockIntoParts(std::move(block), max_parts_per_block, metadata_snapshot, context, async_insert_info); + auto part_blocks = MergeTreeDataWriter::splitBlockIntoParts(std::move(block), max_parts_per_block, metadata_snapshot, context, async_insert_info); using DelayedPartition = typename ReplicatedMergeTreeSinkImpl::DelayedChunk::Partition; using DelayedPartitions = std::vector; diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp index 0ca7a4d74d9..268f2d66c96 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp @@ -33,7 +33,7 @@ static String formattedASTNormalized(const ASTPtr & ast) if (!ast) return ""; auto ast_normalized = ast->clone(); - FunctionNameNormalizer().visit(ast_normalized.get()); + FunctionNameNormalizer::visit(ast_normalized.get()); WriteBufferFromOwnString buf; formatAST(*ast_normalized, buf, false, true); return buf.str(); diff --git a/src/Storages/MergeTree/registerStorageMergeTree.cpp b/src/Storages/MergeTree/registerStorageMergeTree.cpp index 9a3c17923d8..d552a4b6fa5 100644 --- a/src/Storages/MergeTree/registerStorageMergeTree.cpp +++ b/src/Storages/MergeTree/registerStorageMergeTree.cpp @@ -585,7 +585,7 @@ static StoragePtr create(const StorageFactory::Arguments & args) auto minmax_columns = metadata.getColumnsRequiredForPartitionKey(); auto partition_key = metadata.partition_key.expression_list_ast->clone(); - FunctionNameNormalizer().visit(partition_key.get()); + FunctionNameNormalizer::visit(partition_key.get()); auto primary_key_asts = metadata.primary_key.expression_list_ast->children; metadata.minmax_count_projection.emplace(ProjectionDescription::getMinMaxCountProjection( columns, partition_key, minmax_columns, primary_key_asts, context)); @@ -694,7 +694,7 @@ static StoragePtr create(const StorageFactory::Arguments & args) auto minmax_columns = metadata.getColumnsRequiredForPartitionKey(); auto partition_key = metadata.partition_key.expression_list_ast->clone(); - FunctionNameNormalizer().visit(partition_key.get()); + FunctionNameNormalizer::visit(partition_key.get()); auto primary_key_asts = metadata.primary_key.expression_list_ast->children; metadata.minmax_count_projection.emplace(ProjectionDescription::getMinMaxCountProjection( columns, partition_key, minmax_columns, primary_key_asts, context)); diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp index 0d220f2fd5d..f747bbf6b28 100644 --- a/src/Storages/StorageFile.cpp +++ b/src/Storages/StorageFile.cpp @@ -1741,7 +1741,7 @@ public: void initialize() { - std::unique_ptr naked_buffer = nullptr; + std::unique_ptr naked_buffer; if (use_table_fd) { naked_buffer = std::make_unique(table_fd, DBMS_DEFAULT_BUFFER_SIZE); diff --git a/src/Storages/StorageFile.h b/src/Storages/StorageFile.h index 588429284f0..945ee4f369f 100644 --- a/src/Storages/StorageFile.h +++ b/src/Storages/StorageFile.h @@ -286,7 +286,7 @@ private: std::unique_ptr reader; std::shared_ptr archive_reader; - std::unique_ptr file_enumerator = nullptr; + std::unique_ptr file_enumerator; ColumnsDescription columns_description; NamesAndTypesList requested_columns; diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp index aad4fc36a1b..c9f451b6bb1 100644 --- a/src/Storages/StorageMergeTree.cpp +++ b/src/Storages/StorageMergeTree.cpp @@ -436,7 +436,7 @@ CurrentlyMergingPartsTagger::CurrentlyMergingPartsTagger( /// if we mutate part, than we should reserve space on the same disk, because mutations possible can create hardlinks if (is_mutation) { - reserved_space = storage.tryReserveSpace(total_size, future_part->parts[0]->getDataPartStorage()); + reserved_space = StorageMergeTree::tryReserveSpace(total_size, future_part->parts[0]->getDataPartStorage()); } else { diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 8ca061db4ec..c4b84a0ae8c 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -10512,7 +10512,7 @@ void StorageReplicatedMergeTree::restoreDataFromBackup(RestorerFromBackup & rest } auto backup = restorer.getBackup(); if (!empty && backup->hasFiles(data_path_in_backup)) - restorer.throwTableIsNotEmpty(getStorageID()); + RestorerFromBackup::throwTableIsNotEmpty(getStorageID()); } restorePartsFromBackup(restorer, data_path_in_backup, partitions); diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp index 2d3aef312bf..6cda0fca60b 100644 --- a/src/Storages/StorageS3.cpp +++ b/src/Storages/StorageS3.cpp @@ -207,7 +207,7 @@ public: , list_objects_scheduler(threadPoolCallbackRunner(list_objects_pool, "ListObjects")) , file_progress_callback(file_progress_callback_) { - if (globbed_uri.bucket.find_first_of("*?{") != globbed_uri.bucket.npos) + if (globbed_uri.bucket.find_first_of("*?{") != std::string::npos) throw Exception(ErrorCodes::UNEXPECTED_EXPRESSION, "Expression can not have wildcards inside bucket name"); const String key_prefix = globbed_uri.key.substr(0, globbed_uri.key.find_first_of("*?{")); From 9626506585666a220b5d82fa276fac45e4bf3ccf Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Wed, 3 Apr 2024 19:18:31 +0000 Subject: [PATCH 143/150] More fixes --- src/Coordination/FourLetterCommand.cpp | 2 +- .../UserDefinedSQLFunctionFactory.cpp | 2 +- src/Functions/decodeHTMLComponent.cpp | 2 +- src/Processors/Formats/Impl/NativeFormat.cpp | 2 +- src/Processors/Merges/Algorithms/Graphite.cpp | 2 +- src/Processors/QueryPlan/JoinStep.cpp | 2 +- src/Storages/Distributed/DistributedSink.cpp | 2 +- src/Storages/MergeTree/KeyCondition.cpp | 4 ++-- src/Storages/MergeTree/MergeFromLogEntryTask.cpp | 2 +- .../MergeTree/MergePlainMergeTreeTask.cpp | 2 +- .../MergeTreeIndexConditionBloomFilter.cpp | 2 +- src/Storages/MergeTree/MergeTreeMarksLoader.cpp | 2 +- src/Storages/MergeTree/MergeTreePartsMover.cpp | 2 +- src/Storages/MergeTree/MergeTreeSettings.cpp | 2 +- src/Storages/MergeTree/MergeTreeSink.cpp | 2 +- .../MergeTree/ReplicatedMergeTreeSink.cpp | 2 +- .../ReplicatedMergeTreeTableMetadata.cpp | 2 +- src/Storages/StorageFuzzJSON.cpp | 16 ++++++++-------- src/Storages/TTLDescription.cpp | 2 +- 19 files changed, 27 insertions(+), 27 deletions(-) diff --git a/src/Coordination/FourLetterCommand.cpp b/src/Coordination/FourLetterCommand.cpp index d7fa5abe742..25254e10441 100644 --- a/src/Coordination/FourLetterCommand.cpp +++ b/src/Coordination/FourLetterCommand.cpp @@ -592,7 +592,7 @@ String RecalculateCommand::run() String CleanResourcesCommand::run() { - keeper_dispatcher.cleanResources(); + KeeperDispatcher::cleanResources(); return "ok"; } diff --git a/src/Functions/UserDefined/UserDefinedSQLFunctionFactory.cpp b/src/Functions/UserDefined/UserDefinedSQLFunctionFactory.cpp index e37e4a23b63..e22cd6d0022 100644 --- a/src/Functions/UserDefined/UserDefinedSQLFunctionFactory.cpp +++ b/src/Functions/UserDefined/UserDefinedSQLFunctionFactory.cpp @@ -86,7 +86,7 @@ namespace auto & res = typeid_cast(*ptr); res.if_not_exists = false; res.or_replace = false; - FunctionNameNormalizer().visit(res.function_core.get()); + FunctionNameNormalizer::visit(res.function_core.get()); return ptr; } } diff --git a/src/Functions/decodeHTMLComponent.cpp b/src/Functions/decodeHTMLComponent.cpp index 2cd95127266..cb6ba0b07e2 100644 --- a/src/Functions/decodeHTMLComponent.cpp +++ b/src/Functions/decodeHTMLComponent.cpp @@ -108,7 +108,7 @@ namespace // null terminate the sequence seq.push_back('\0'); // lookup the html sequence in the perfect hashmap. - const auto * res = hash.Lookup(seq.data(), strlen(seq.data())); + const auto * res = HTMLCharacterHash::Lookup(seq.data(), strlen(seq.data())); // reset so that it's reused in the next iteration seq.clear(); if (res) diff --git a/src/Processors/Formats/Impl/NativeFormat.cpp b/src/Processors/Formats/Impl/NativeFormat.cpp index 73ffc02bbc1..a7a49ab6a8c 100644 --- a/src/Processors/Formats/Impl/NativeFormat.cpp +++ b/src/Processors/Formats/Impl/NativeFormat.cpp @@ -82,7 +82,7 @@ public: std::string getContentType() const override { - return writer.getContentType(); + return NativeWriter::getContentType(); } protected: diff --git a/src/Processors/Merges/Algorithms/Graphite.cpp b/src/Processors/Merges/Algorithms/Graphite.cpp index 817961b709c..a75c2b87720 100644 --- a/src/Processors/Merges/Algorithms/Graphite.cpp +++ b/src/Processors/Merges/Algorithms/Graphite.cpp @@ -76,7 +76,7 @@ inline static const Patterns & selectPatternsForMetricType(const Graphite::Param if (params.patterns_typed) { std::string_view path_view = path; - if (path_view.find("?"sv) == path_view.npos) + if (path_view.find("?"sv) == std::string::npos) return params.patterns_plain; else return params.patterns_tagged; diff --git a/src/Processors/QueryPlan/JoinStep.cpp b/src/Processors/QueryPlan/JoinStep.cpp index 1931b1eb3a1..8fe2515e323 100644 --- a/src/Processors/QueryPlan/JoinStep.cpp +++ b/src/Processors/QueryPlan/JoinStep.cpp @@ -31,7 +31,7 @@ std::vector> describeJoinActions(const JoinPtr & join) description.emplace_back("ASOF inequality", toString(table_join.getAsofInequality())); if (!table_join.getClauses().empty()) - description.emplace_back("Clauses", table_join.formatClauses(table_join.getClauses(), true /*short_format*/)); + description.emplace_back("Clauses", TableJoin::formatClauses(table_join.getClauses(), true /*short_format*/)); return description; } diff --git a/src/Storages/Distributed/DistributedSink.cpp b/src/Storages/Distributed/DistributedSink.cpp index ddbcc6d473f..b89a8d7bcfd 100644 --- a/src/Storages/Distributed/DistributedSink.cpp +++ b/src/Storages/Distributed/DistributedSink.cpp @@ -613,7 +613,7 @@ IColumn::Selector DistributedSink::createSelector(const Block & source_block) co const auto & key_column = current_block_with_sharding_key_expr.getByName(storage.getShardingKeyColumnName()); - return storage.createSelector(cluster, key_column); + return StorageDistributed::createSelector(cluster, key_column); } diff --git a/src/Storages/MergeTree/KeyCondition.cpp b/src/Storages/MergeTree/KeyCondition.cpp index a779311c22b..2d57ea40c9c 100644 --- a/src/Storages/MergeTree/KeyCondition.cpp +++ b/src/Storages/MergeTree/KeyCondition.cpp @@ -1964,8 +1964,8 @@ KeyCondition::Description KeyCondition::getDescription() const /// This means that logical NOT is applied to leaf. bool negate = false; - std::unique_ptr left; - std::unique_ptr right; + std::unique_ptr left = nullptr; + std::unique_ptr right = nullptr; }; /// The algorithm is the same as in KeyCondition::checkInHyperrectangle diff --git a/src/Storages/MergeTree/MergeFromLogEntryTask.cpp b/src/Storages/MergeTree/MergeFromLogEntryTask.cpp index 3d1c5db07b5..e8d55f75b08 100644 --- a/src/Storages/MergeTree/MergeFromLogEntryTask.cpp +++ b/src/Storages/MergeTree/MergeFromLogEntryTask.cpp @@ -426,7 +426,7 @@ bool MergeFromLogEntryTask::finalize(ReplicatedMergeMutateTaskBase::PartLogWrite ProfileEvents::increment(ProfileEvents::ReplicatedPartMerges); write_part_log({}); - storage.incrementMergedPartsProfileEvent(part->getType()); + StorageReplicatedMergeTree::incrementMergedPartsProfileEvent(part->getType()); return true; } diff --git a/src/Storages/MergeTree/MergePlainMergeTreeTask.cpp b/src/Storages/MergeTree/MergePlainMergeTreeTask.cpp index c218acce903..866a63911c3 100644 --- a/src/Storages/MergeTree/MergePlainMergeTreeTask.cpp +++ b/src/Storages/MergeTree/MergePlainMergeTreeTask.cpp @@ -149,7 +149,7 @@ void MergePlainMergeTreeTask::finish() ThreadFuzzer::maybeInjectMemoryLimitException(); write_part_log({}); - storage.incrementMergedPartsProfileEvent(new_part->getType()); + StorageMergeTree::incrementMergedPartsProfileEvent(new_part->getType()); transfer_profile_counters_to_initial_query(); if (auto txn_ = txn_holder.getTransaction()) diff --git a/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.cpp b/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.cpp index f506230b5ea..7ab90dac5b0 100644 --- a/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.cpp @@ -590,7 +590,7 @@ bool MergeTreeIndexConditionBloomFilter::traverseTreeEquals( for (const auto & f : value_field.get()) { - if ((f.isNull() && !is_nullable) || f.isDecimal(f.getType())) + if ((f.isNull() && !is_nullable) || f.isDecimal(f.getType())) /// NOLINT(readability-static-accessed-through-instance) return false; auto converted = convertFieldToType(f, *actual_type); diff --git a/src/Storages/MergeTree/MergeTreeMarksLoader.cpp b/src/Storages/MergeTree/MergeTreeMarksLoader.cpp index 6798f97e494..1e9a320fa95 100644 --- a/src/Storages/MergeTree/MergeTreeMarksLoader.cpp +++ b/src/Storages/MergeTree/MergeTreeMarksLoader.cpp @@ -210,7 +210,7 @@ MarkCache::MappedPtr MergeTreeMarksLoader::loadMarksSync() if (mark_cache) { - auto key = mark_cache->hash(fs::path(data_part_storage->getFullPath()) / mrk_path); + auto key = MarkCache::hash(fs::path(data_part_storage->getFullPath()) / mrk_path); if (save_marks_in_cache) { auto callback = [this] { return loadMarksImpl(); }; diff --git a/src/Storages/MergeTree/MergeTreePartsMover.cpp b/src/Storages/MergeTree/MergeTreePartsMover.cpp index d32bc6d1826..1db70162bff 100644 --- a/src/Storages/MergeTree/MergeTreePartsMover.cpp +++ b/src/Storages/MergeTree/MergeTreePartsMover.cpp @@ -158,7 +158,7 @@ bool MergeTreePartsMover::selectPartsForMove( { auto destination = data->getDestinationForMoveTTL(*ttl_entry); if (destination && !data->isPartInTTLDestination(*ttl_entry, *part)) - reservation = data->tryReserveSpace(part->getBytesOnDisk(), data->getDestinationForMoveTTL(*ttl_entry)); + reservation = MergeTreeData::tryReserveSpace(part->getBytesOnDisk(), data->getDestinationForMoveTTL(*ttl_entry)); } if (reservation) /// Found reservation by TTL rule. diff --git a/src/Storages/MergeTree/MergeTreeSettings.cpp b/src/Storages/MergeTree/MergeTreeSettings.cpp index b42da22239e..5d6f08d3c53 100644 --- a/src/Storages/MergeTree/MergeTreeSettings.cpp +++ b/src/Storages/MergeTree/MergeTreeSettings.cpp @@ -230,7 +230,7 @@ void MergeTreeColumnSettings::validate(const SettingsChanges & changes) "Setting {} is unknown or not supported at column level, supported settings: {}", change.name, fmt::join(allowed_column_level_settings, ", ")); - merge_tree_settings.checkCanSet(change.name, change.value); + MergeTreeSettings::checkCanSet(change.name, change.value); } } diff --git a/src/Storages/MergeTree/MergeTreeSink.cpp b/src/Storages/MergeTree/MergeTreeSink.cpp index 82c015f33ed..b7dede3cb00 100644 --- a/src/Storages/MergeTree/MergeTreeSink.cpp +++ b/src/Storages/MergeTree/MergeTreeSink.cpp @@ -63,7 +63,7 @@ void MergeTreeSink::consume(Chunk chunk) if (!storage_snapshot->object_columns.empty()) convertDynamicColumnsToTuples(block, storage_snapshot); - auto part_blocks = storage.writer.splitBlockIntoParts(std::move(block), max_parts_per_block, metadata_snapshot, context); + auto part_blocks = MergeTreeDataWriter::splitBlockIntoParts(std::move(block), max_parts_per_block, metadata_snapshot, context); using DelayedPartitions = std::vector; DelayedPartitions partitions; diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp index e1f28c2a951..8913e9f7e27 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp @@ -449,7 +449,7 @@ void ReplicatedMergeTreeSinkImpl::finishDelayedChunk(const ZooKeeperWithF int error = (deduplicate && deduplicated) ? ErrorCodes::INSERT_WAS_DEDUPLICATED : 0; auto counters_snapshot = std::make_shared(partition.part_counters.getPartiallyAtomicSnapshot()); PartLog::addNewPart(storage.getContext(), PartLog::PartLogEntry(part, partition.elapsed_ns, counters_snapshot), ExecutionStatus(error)); - storage.incrementInsertedPartsProfileEvent(part->getType()); + StorageReplicatedMergeTree::incrementInsertedPartsProfileEvent(part->getType()); } catch (...) { diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp index 268f2d66c96..287a4d20543 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp @@ -43,7 +43,7 @@ ReplicatedMergeTreeTableMetadata::ReplicatedMergeTreeTableMetadata(const MergeTr { if (data.format_version < MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING) { - auto minmax_idx_column_names = data.getMinMaxColumnsNames(metadata_snapshot->getPartitionKey()); + auto minmax_idx_column_names = MergeTreeData::getMinMaxColumnsNames(metadata_snapshot->getPartitionKey()); date_column = minmax_idx_column_names[data.minmax_idx_date_column_pos]; } diff --git a/src/Storages/StorageFuzzJSON.cpp b/src/Storages/StorageFuzzJSON.cpp index 87790dd2fdc..918f54b1672 100644 --- a/src/Storages/StorageFuzzJSON.cpp +++ b/src/Storages/StorageFuzzJSON.cpp @@ -364,7 +364,7 @@ JSONNode & fuzzSingleJSONNode(JSONNode & n, const StorageFuzzJSON::Configuration if (val.fixed) val.fixed = generateRandomFixedValue(config, rnd); - else if (val.array && val.array->size() < config.max_array_size && node_count + val.array->size() < config.value_number_limit) + else if (val.array && val.array->size() < config.max_array_size && node_count + val.array->size() < StorageFuzzJSON::Configuration::value_number_limit) { if (val.array->empty()) val.array->push_back(generateRandomJSONNode(config, rnd, /*with_key*/ false, depth)); @@ -377,7 +377,7 @@ JSONNode & fuzzSingleJSONNode(JSONNode & n, const StorageFuzzJSON::Configuration } ++node_count; } - else if (val.object && val.object->size() < config.max_object_size && node_count + val.object->size() < config.value_number_limit) + else if (val.object && val.object->size() < config.max_object_size && node_count + val.object->size() < StorageFuzzJSON::Configuration::value_number_limit) { val.object->push_back(generateRandomJSONNode(config, rnd, /*with_key*/ true, depth)); ++node_count; @@ -619,11 +619,11 @@ void StorageFuzzJSON::processNamedCollectionResult(Configuration & configuration { configuration.max_output_length = collection.get("max_output_length"); - if (configuration.max_output_length < 2 || configuration.max_output_length > configuration.output_length_limit) + if (configuration.max_output_length < 2 || configuration.max_output_length > StorageFuzzJSON::Configuration::output_length_limit) throw Exception( ErrorCodes::BAD_ARGUMENTS, "The value of the 'max_output_length' argument must be within the interval [2, {}.]", - configuration.output_length_limit); + StorageFuzzJSON::Configuration::output_length_limit); } if (collection.has("max_nesting_level")) @@ -638,11 +638,11 @@ void StorageFuzzJSON::processNamedCollectionResult(Configuration & configuration if (collection.has("max_string_value_length")) { auto max_string_value_length = collection.get("max_string_value_length"); - if (max_string_value_length > configuration.output_length_limit) + if (max_string_value_length > StorageFuzzJSON::Configuration::output_length_limit) throw Exception( ErrorCodes::BAD_ARGUMENTS, "The value of the 'max_string_value_length' argument must be at most {}.", - configuration.output_length_limit); + StorageFuzzJSON::Configuration::output_length_limit); configuration.max_string_value_length = std::min(max_string_value_length, configuration.max_output_length); } @@ -650,11 +650,11 @@ void StorageFuzzJSON::processNamedCollectionResult(Configuration & configuration if (collection.has("max_key_length")) { auto max_key_length = collection.get("max_key_length"); - if (max_key_length > configuration.output_length_limit) + if (max_key_length > StorageFuzzJSON::Configuration::output_length_limit) throw Exception( ErrorCodes::BAD_ARGUMENTS, "The value of the 'max_key_length' argument must be less or equal than {}.", - configuration.output_length_limit); + StorageFuzzJSON::Configuration::output_length_limit); configuration.max_key_length = std::min(max_key_length, configuration.max_output_length); configuration.min_key_length = std::min(configuration.min_key_length, configuration.max_key_length); } diff --git a/src/Storages/TTLDescription.cpp b/src/Storages/TTLDescription.cpp index 3d1ce76dff1..6e7ea32ee59 100644 --- a/src/Storages/TTLDescription.cpp +++ b/src/Storages/TTLDescription.cpp @@ -426,7 +426,7 @@ TTLTableDescription TTLTableDescription::parse(const String & str, const Columns ParserTTLExpressionList parser; ASTPtr ast = parseQuery(parser, str, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH, DBMS_DEFAULT_MAX_PARSER_BACKTRACKS); - FunctionNameNormalizer().visit(ast.get()); + FunctionNameNormalizer::visit(ast.get()); return getTTLForTableFromAST(ast, columns, context, primary_key, context->getSettingsRef().allow_suspicious_ttl_expressions); } From 2578ceddadf0271eac6c48b059bd46b873a5ae5d Mon Sep 17 00:00:00 2001 From: Joseph Redfern Date: Wed, 3 Apr 2024 21:09:47 +0100 Subject: [PATCH 144/150] Correct typo in "use_invironment_credentials" configuration option `use_environment_credentials was incorrectly specified as `use_invironment_credentials` --- docs/en/operations/storing-data.md | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/docs/en/operations/storing-data.md b/docs/en/operations/storing-data.md index 9ffbb64c1ed..2c642dd2f0b 100644 --- a/docs/en/operations/storing-data.md +++ b/docs/en/operations/storing-data.md @@ -36,7 +36,7 @@ E.g. configuration option s3 https://s3.eu-west-1.amazonaws.com/clickhouse-eu-west-1.clickhouse.com/data/ - 1 + 1 ``` @@ -47,7 +47,7 @@ is equal to configuration (from `24.1`): s3 local https://s3.eu-west-1.amazonaws.com/clickhouse-eu-west-1.clickhouse.com/data/ - 1 + 1 ``` @@ -56,7 +56,7 @@ Configuration s3_plain https://s3.eu-west-1.amazonaws.com/clickhouse-eu-west-1.clickhouse.com/data/ - 1 + 1 ``` @@ -67,7 +67,7 @@ is equal to s3 plain https://s3.eu-west-1.amazonaws.com/clickhouse-eu-west-1.clickhouse.com/data/ - 1 + 1 ``` @@ -79,7 +79,7 @@ Example of full storage configuration will look like: s3 https://s3.eu-west-1.amazonaws.com/clickhouse-eu-west-1.clickhouse.com/data/ - 1 + 1 @@ -105,7 +105,7 @@ Starting with 24.1 clickhouse version, it can also look like: s3 local https://s3.eu-west-1.amazonaws.com/clickhouse-eu-west-1.clickhouse.com/data/ - 1 + 1 @@ -324,7 +324,7 @@ Configuration: s3_plain https://s3.eu-west-1.amazonaws.com/clickhouse-eu-west-1.clickhouse.com/data/ - 1 + 1 ``` @@ -337,7 +337,7 @@ Configuration: azure plain https://s3.eu-west-1.amazonaws.com/clickhouse-eu-west-1.clickhouse.com/data/ - 1 + 1 ``` From d905b83369931400bbd87faeef138fc68ac17455 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Wed, 3 Apr 2024 20:10:18 +0000 Subject: [PATCH 145/150] Fix something bad --- src/Dictionaries/FlatDictionary.cpp | 3 ++- src/Functions/decodeHTMLComponent.cpp | 3 +-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Dictionaries/FlatDictionary.cpp b/src/Dictionaries/FlatDictionary.cpp index e3b1e8a84e2..7509af31fac 100644 --- a/src/Dictionaries/FlatDictionary.cpp +++ b/src/Dictionaries/FlatDictionary.cpp @@ -412,7 +412,8 @@ void FlatDictionary::blockToAttributes(const Block & block) { const auto keys_column = block.safeGetByPosition(0).column; - DictionaryKeysExtractor keys_extractor({ keys_column }, DictionaryKeysArenaHolder::getComplexKeyArena()); + DictionaryKeysArenaHolder arena_holder; + DictionaryKeysExtractor keys_extractor({ keys_column }, arena_holder.getComplexKeyArena()); /// NOLINT(readability-static-accessed-through-instance) size_t keys_size = keys_extractor.getKeysSize(); static constexpr size_t key_offset = 1; diff --git a/src/Functions/decodeHTMLComponent.cpp b/src/Functions/decodeHTMLComponent.cpp index cb6ba0b07e2..4db3c43f946 100644 --- a/src/Functions/decodeHTMLComponent.cpp +++ b/src/Functions/decodeHTMLComponent.cpp @@ -70,8 +70,7 @@ namespace const char * src_pos = src; const char * src_end = src + src_size; char * dst_pos = dst; - // perfect hashmap to lookup html character references - HTMLCharacterHash hash; + // to hold char seq for lookup, reuse it std::vector seq; while (true) From dda1a0b9f0d09777fb7d0cc79ca26d8a58f16476 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Wed, 3 Apr 2024 20:44:02 +0000 Subject: [PATCH 146/150] Also enable modernize-use-override --- .clang-tidy | 1 - 1 file changed, 1 deletion(-) diff --git a/.clang-tidy b/.clang-tidy index 13c1b116ead..c98bee71d1a 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -96,7 +96,6 @@ Checks: [ '-modernize-use-default-member-init', '-modernize-use-emplace', '-modernize-use-nodiscard', - '-modernize-use-override', '-modernize-use-trailing-return-type', '-performance-inefficient-string-concatenation', From 3c61a7f4bfc30fa23c8c9a0e90cddb12b5617cda Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Wed, 3 Apr 2024 23:34:31 +0200 Subject: [PATCH 147/150] Update storing-data.md From b91d446630a6054abcbc01251fba3abf032a62b6 Mon Sep 17 00:00:00 2001 From: kssenii Date: Thu, 4 Apr 2024 10:57:29 +0200 Subject: [PATCH 148/150] Ping CI From d5229da1ec26918f8904eff0d91eabc8db89f1db Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Thu, 4 Apr 2024 09:09:03 +0000 Subject: [PATCH 149/150] Fix tidy build --- src/Functions/GatherUtils/Sources.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Functions/GatherUtils/Sources.h b/src/Functions/GatherUtils/Sources.h index 01b63338559..4e3009a695d 100644 --- a/src/Functions/GatherUtils/Sources.h +++ b/src/Functions/GatherUtils/Sources.h @@ -144,7 +144,7 @@ struct NumericArraySource : public ArraySourceImpl> #pragma clang diagnostic ignored "-Wsuggest-override" #pragma clang diagnostic ignored "-Wsuggest-destructor-override" -/// NOLINTBEGIN(hicpp-use-override) +/// NOLINTBEGIN(hicpp-use-override, modernize-use-override) template struct ConstSource : public Base @@ -233,7 +233,7 @@ struct ConstSource : public Base } }; -/// NOLINTEND(hicpp-use-override) +/// NOLINTEND(hicpp-use-override, modernize-use-override) #pragma clang diagnostic pop From a0925e6bc4efcbb0d9c3e8e8475810a566c63fa1 Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Thu, 4 Apr 2024 14:19:19 +0200 Subject: [PATCH 150/150] empty commit