From 9fc9fa78503b3c657b2e482eb7e99f7e51a78107 Mon Sep 17 00:00:00 2001 From: jsc0218 Date: Mon, 19 Feb 2024 02:17:00 +0000 Subject: [PATCH 01/44] client side --- src/Access/Common/AccessFlags.cpp | 9 ++++++++- src/Access/Common/AccessFlags.h | 4 ++++ src/Access/Common/AccessType.h | 2 ++ src/Storages/System/StorageSystemPrivileges.cpp | 2 ++ .../queries/0_stateless/01271_show_privileges.reference | 1 + 5 files changed, 17 insertions(+), 1 deletion(-) diff --git a/src/Access/Common/AccessFlags.cpp b/src/Access/Common/AccessFlags.cpp index 8612fc2309e..42cf34e167a 100644 --- a/src/Access/Common/AccessFlags.cpp +++ b/src/Access/Common/AccessFlags.cpp @@ -102,6 +102,7 @@ namespace const Flags & getTableFlags() const { return all_flags_for_target[TABLE]; } const Flags & getColumnFlags() const { return all_flags_for_target[COLUMN]; } const Flags & getDictionaryFlags() const { return all_flags_for_target[DICTIONARY]; } + const Flags & getTableEngineFlags() const { return all_flags_for_target[TABLE_ENGINE]; } const Flags & getNamedCollectionFlags() const { return all_flags_for_target[NAMED_COLLECTION]; } const Flags & getAllFlagsGrantableOnGlobalLevel() const { return getAllFlags(); } const Flags & getAllFlagsGrantableOnGlobalWithParameterLevel() const { return getGlobalWithParameterFlags(); } @@ -120,6 +121,7 @@ namespace VIEW = TABLE, COLUMN, DICTIONARY, + TABLE_ENGINE, NAMED_COLLECTION, }; @@ -300,7 +302,7 @@ namespace collectAllFlags(child.get()); all_flags_grantable_on_table_level = all_flags_for_target[TABLE] | all_flags_for_target[DICTIONARY] | all_flags_for_target[COLUMN]; - all_flags_grantable_on_global_with_parameter_level = all_flags_for_target[NAMED_COLLECTION]; + all_flags_grantable_on_global_with_parameter_level = all_flags_for_target[TABLE_ENGINE] | all_flags_for_target[NAMED_COLLECTION]; all_flags_grantable_on_database_level = all_flags_for_target[DATABASE] | all_flags_grantable_on_table_level; } @@ -383,6 +385,10 @@ AccessFlags::ParameterType AccessFlags::getParameterType() const if (isEmpty() || !AccessFlags::allGlobalWithParameterFlags().contains(*this)) return AccessFlags::NONE; + /// All flags refer to TABLE ENGINE access type. + if (AccessFlags::allTableEngineFlags().contains(*this)) + return AccessFlags::TABLE_ENGINE; + /// All flags refer to NAMED COLLECTION access type. if (AccessFlags::allNamedCollectionFlags().contains(*this)) return AccessFlags::NAMED_COLLECTION; @@ -404,6 +410,7 @@ AccessFlags AccessFlags::allDatabaseFlags() { return Helper::instance().getDatab AccessFlags AccessFlags::allTableFlags() { return Helper::instance().getTableFlags(); } AccessFlags AccessFlags::allColumnFlags() { return Helper::instance().getColumnFlags(); } AccessFlags AccessFlags::allDictionaryFlags() { return Helper::instance().getDictionaryFlags(); } +AccessFlags AccessFlags::allTableEngineFlags() { return Helper::instance().getTableEngineFlags(); } AccessFlags AccessFlags::allNamedCollectionFlags() { return Helper::instance().getNamedCollectionFlags(); } AccessFlags AccessFlags::allFlagsGrantableOnGlobalLevel() { return Helper::instance().getAllFlagsGrantableOnGlobalLevel(); } AccessFlags AccessFlags::allFlagsGrantableOnGlobalWithParameterLevel() { return Helper::instance().getAllFlagsGrantableOnGlobalWithParameterLevel(); } diff --git a/src/Access/Common/AccessFlags.h b/src/Access/Common/AccessFlags.h index c9672da7d92..0b2a3b8eb8e 100644 --- a/src/Access/Common/AccessFlags.h +++ b/src/Access/Common/AccessFlags.h @@ -56,6 +56,7 @@ public: enum ParameterType { NONE, + TABLE_ENGINE, NAMED_COLLECTION, }; ParameterType getParameterType() const; @@ -100,6 +101,9 @@ public: /// Returns all the flags related to a dictionary. static AccessFlags allDictionaryFlags(); + /// Returns all the flags related to a table engine. + static AccessFlags allTableEngineFlags(); + /// Returns all the flags related to a named collection. static AccessFlags allNamedCollectionFlags(); diff --git a/src/Access/Common/AccessType.h b/src/Access/Common/AccessType.h index b305b6fca86..222284480a0 100644 --- a/src/Access/Common/AccessType.h +++ b/src/Access/Common/AccessType.h @@ -151,6 +151,8 @@ enum class AccessType M(NAMED_COLLECTION, "NAMED COLLECTION USAGE, USE NAMED COLLECTION", NAMED_COLLECTION, NAMED_COLLECTION_ADMIN) \ M(NAMED_COLLECTION_ADMIN, "NAMED COLLECTION CONTROL", NAMED_COLLECTION, ALL) \ \ + M(TABLE_ENGINE, "TABLE ENGINE", TABLE_ENGINE, ALL) \ + \ M(SYSTEM_SHUTDOWN, "SYSTEM KILL, SHUTDOWN", GLOBAL, SYSTEM) \ M(SYSTEM_DROP_DNS_CACHE, "SYSTEM DROP DNS, DROP DNS CACHE, DROP DNS", GLOBAL, SYSTEM_DROP_CACHE) \ M(SYSTEM_DROP_MARK_CACHE, "SYSTEM DROP MARK, DROP MARK CACHE, DROP MARKS", GLOBAL, SYSTEM_DROP_CACHE) \ diff --git a/src/Storages/System/StorageSystemPrivileges.cpp b/src/Storages/System/StorageSystemPrivileges.cpp index f45f3c6ed01..115fc7fd9d0 100644 --- a/src/Storages/System/StorageSystemPrivileges.cpp +++ b/src/Storages/System/StorageSystemPrivileges.cpp @@ -28,6 +28,7 @@ namespace DICTIONARY, VIEW, COLUMN, + TABLE_ENGINE, NAMED_COLLECTION, }; @@ -40,6 +41,7 @@ namespace enum_values.emplace_back("DICTIONARY", static_cast(DICTIONARY)); enum_values.emplace_back("VIEW", static_cast(VIEW)); enum_values.emplace_back("COLUMN", static_cast(COLUMN)); + enum_values.emplace_back("TABLE_ENGINE", static_cast(TABLE_ENGINE)); enum_values.emplace_back("NAMED_COLLECTION", static_cast(NAMED_COLLECTION)); return enum_values; } diff --git a/tests/queries/0_stateless/01271_show_privileges.reference b/tests/queries/0_stateless/01271_show_privileges.reference index 6a7e4748130..4bdb894b0cd 100644 --- a/tests/queries/0_stateless/01271_show_privileges.reference +++ b/tests/queries/0_stateless/01271_show_privileges.reference @@ -101,6 +101,7 @@ SHOW NAMED COLLECTIONS ['SHOW NAMED COLLECTIONS'] NAMED_COLLECTION NAMED COLLECT SHOW NAMED COLLECTIONS SECRETS ['SHOW NAMED COLLECTIONS SECRETS'] NAMED_COLLECTION NAMED COLLECTION ADMIN NAMED COLLECTION ['NAMED COLLECTION USAGE','USE NAMED COLLECTION'] NAMED_COLLECTION NAMED COLLECTION ADMIN NAMED COLLECTION ADMIN ['NAMED COLLECTION CONTROL'] NAMED_COLLECTION ALL +TABLE ENGINE ['TABLE ENGINE'] TABLE_ENGINE ALL SYSTEM SHUTDOWN ['SYSTEM KILL','SHUTDOWN'] GLOBAL SYSTEM SYSTEM DROP DNS CACHE ['SYSTEM DROP DNS','DROP DNS CACHE','DROP DNS'] GLOBAL SYSTEM DROP CACHE SYSTEM DROP MARK CACHE ['SYSTEM DROP MARK','DROP MARK CACHE','DROP MARKS'] GLOBAL SYSTEM DROP CACHE From 2a88f61c963dc7009ed58886d4b31ec91df34890 Mon Sep 17 00:00:00 2001 From: jsc0218 Date: Mon, 19 Feb 2024 21:39:34 +0000 Subject: [PATCH 02/44] check permission in the server side --- src/Access/Common/AccessFlags.cpp | 7 ++++++- src/Access/tests/gtest_access_rights_ops.cpp | 2 +- src/Interpreters/InterpreterCreateQuery.cpp | 1 + 3 files changed, 8 insertions(+), 2 deletions(-) diff --git a/src/Access/Common/AccessFlags.cpp b/src/Access/Common/AccessFlags.cpp index 42cf34e167a..8047b7813e5 100644 --- a/src/Access/Common/AccessFlags.cpp +++ b/src/Access/Common/AccessFlags.cpp @@ -369,11 +369,16 @@ std::unordered_map AccessFlags::splitIn { std::unordered_map result; + auto table_engine_flags = AccessFlags::allTableEngineFlags() & *this; + if (table_engine_flags) + result.emplace(ParameterType::TABLE_ENGINE, table_engine_flags); + auto named_collection_flags = AccessFlags::allNamedCollectionFlags() & *this; if (named_collection_flags) result.emplace(ParameterType::NAMED_COLLECTION, named_collection_flags); - auto other_flags = (~AccessFlags::allNamedCollectionFlags()) & *this; + auto other_flags = (~AccessFlags::allTableEngineFlags()) & + (~AccessFlags::allNamedCollectionFlags()) & *this; if (other_flags) result.emplace(ParameterType::NONE, other_flags); diff --git a/src/Access/tests/gtest_access_rights_ops.cpp b/src/Access/tests/gtest_access_rights_ops.cpp index a7594503992..e3449a027c3 100644 --- a/src/Access/tests/gtest_access_rights_ops.cpp +++ b/src/Access/tests/gtest_access_rights_ops.cpp @@ -50,7 +50,7 @@ TEST(AccessRights, Union) "GRANT SHOW, SELECT, ALTER, CREATE DATABASE, CREATE TABLE, CREATE VIEW, " "CREATE DICTIONARY, DROP DATABASE, DROP TABLE, DROP VIEW, DROP DICTIONARY, UNDROP TABLE, " "TRUNCATE, OPTIMIZE, BACKUP, CREATE ROW POLICY, ALTER ROW POLICY, DROP ROW POLICY, " - "SHOW ROW POLICIES, SYSTEM MERGES, SYSTEM TTL MERGES, SYSTEM FETCHES, " + "SHOW ROW POLICIES, TABLE ENGINE, SYSTEM MERGES, SYSTEM TTL MERGES, SYSTEM FETCHES, " "SYSTEM MOVES, SYSTEM PULLING REPLICATION LOG, SYSTEM CLEANUP, SYSTEM VIEWS, SYSTEM SENDS, SYSTEM REPLICATION QUEUES, " "SYSTEM DROP REPLICA, SYSTEM SYNC REPLICA, SYSTEM RESTART REPLICA, " "SYSTEM RESTORE REPLICA, SYSTEM WAIT LOADING PARTS, SYSTEM SYNC DATABASE REPLICA, SYSTEM FLUSH DISTRIBUTED, dictGet ON db1.*, GRANT NAMED COLLECTION ADMIN ON db1"); diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index c491ee30321..2343fcd7da1 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -1891,6 +1891,7 @@ AccessRightsElements InterpreterCreateQuery::getRequiredAccess() const auto source_access_type = StorageFactory::instance().getSourceAccessType(create.storage->engine->name); if (source_access_type != AccessType::NONE) required_access.emplace_back(source_access_type); + required_access.emplace_back(AccessType::TABLE_ENGINE, create.storage->engine->name); } return required_access; From afa7a95c8c0ffaebfb974dce05e3a6c6616c089d Mon Sep 17 00:00:00 2001 From: jsc0218 Date: Tue, 20 Feb 2024 02:11:31 +0000 Subject: [PATCH 03/44] add backward compatibility --- docs/en/sql-reference/statements/grant.md | 10 ++++++++++ programs/server/config.xml | 4 ++++ src/Access/AccessControl.cpp | 1 + src/Access/AccessControl.h | 4 ++++ src/Access/Common/AccessType.h | 2 +- src/Access/ContextAccess.cpp | 3 +++ 6 files changed, 23 insertions(+), 1 deletion(-) diff --git a/docs/en/sql-reference/statements/grant.md b/docs/en/sql-reference/statements/grant.md index e6073f3523a..6106ff88de8 100644 --- a/docs/en/sql-reference/statements/grant.md +++ b/docs/en/sql-reference/statements/grant.md @@ -201,6 +201,7 @@ Hierarchy of privileges: - `S3` - [dictGet](#grant-dictget) - [displaySecretsInShowAndSelect](#grant-display-secrets) +- [TABLE ENGINE](#grant-table-engine) Examples of how this hierarchy is treated: @@ -495,6 +496,15 @@ and [`format_display_secrets_in_show_and_select` format setting](../../operations/settings/formats#format_display_secrets_in_show_and_select) are turned on. +### TABLE ENGINE + +Allows using a specified table engine when creating a table. Applies to [table engines](../../engines/table-engines/index.md). + +**Examples** + +- `GRANT TABLE ENGINE ON * TO john` +- `GRANT TABLE ENGINE ON TinyLog TO john` + ### ALL Grants all the privileges on regulated entity to a user account or a role. diff --git a/programs/server/config.xml b/programs/server/config.xml index 6a40818332b..cb7fba90c52 100644 --- a/programs/server/config.xml +++ b/programs/server/config.xml @@ -742,6 +742,10 @@ It also enables 'changeable_in_readonly' constraint type --> true + + false + 600 diff --git a/src/Access/AccessControl.cpp b/src/Access/AccessControl.cpp index d02af01126a..da047d1cb1d 100644 --- a/src/Access/AccessControl.cpp +++ b/src/Access/AccessControl.cpp @@ -285,6 +285,7 @@ void AccessControl::setUpFromMainConfig(const Poco::Util::AbstractConfiguration setSelectFromSystemDatabaseRequiresGrant(config_.getBool("access_control_improvements.select_from_system_db_requires_grant", false)); setSelectFromInformationSchemaRequiresGrant(config_.getBool("access_control_improvements.select_from_information_schema_requires_grant", false)); setSettingsConstraintsReplacePrevious(config_.getBool("access_control_improvements.settings_constraints_replace_previous", false)); + setTableEnginesRequireGrant(config_.getBool("access_control_improvements.table_engines_require_grant", false)); addStoragesFromMainConfig(config_, config_path_, get_zookeeper_function_); diff --git a/src/Access/AccessControl.h b/src/Access/AccessControl.h index 904f77faf90..60b6d49cd1f 100644 --- a/src/Access/AccessControl.h +++ b/src/Access/AccessControl.h @@ -182,6 +182,9 @@ public: void setSettingsConstraintsReplacePrevious(bool enable) { settings_constraints_replace_previous = enable; } bool doesSettingsConstraintsReplacePrevious() const { return settings_constraints_replace_previous; } + void setTableEnginesRequireGrant(bool enable) { table_engines_require_grant = enable; } + bool doesTableEnginesRequireGrant() const { return table_engines_require_grant; } + std::shared_ptr getContextAccess(const ContextAccessParams & params) const; std::shared_ptr getEnabledRoles( @@ -258,6 +261,7 @@ private: std::atomic_bool select_from_system_db_requires_grant = false; std::atomic_bool select_from_information_schema_requires_grant = false; std::atomic_bool settings_constraints_replace_previous = false; + std::atomic_bool table_engines_require_grant = false; std::atomic_int bcrypt_workfactor = 12; std::atomic default_password_type = AuthenticationType::SHA256_PASSWORD; }; diff --git a/src/Access/Common/AccessType.h b/src/Access/Common/AccessType.h index 222284480a0..93c5bd13030 100644 --- a/src/Access/Common/AccessType.h +++ b/src/Access/Common/AccessType.h @@ -12,7 +12,7 @@ enum class AccessType /// Macro M should be defined as M(name, aliases, node_type, parent_group_name) /// where name is identifier with underscores (instead of spaces); /// aliases is a string containing comma-separated list; -/// node_type either specifies access type's level (GLOBAL/NAMED_COLLECTION/DATABASE/TABLE/DICTIONARY/VIEW/COLUMNS), +/// node_type either specifies access type's level (GLOBAL/NAMED_COLLECTION/TABLE_ENGINE/DATABASE/TABLE/DICTIONARY/VIEW/COLUMNS), /// or specifies that the access type is a GROUP of other access types; /// parent_group_name is the name of the group containing this access type (or NONE if there is no such group). /// NOTE A parent group must be declared AFTER all its children. diff --git a/src/Access/ContextAccess.cpp b/src/Access/ContextAccess.cpp index 0943e797e3f..3de431a8f8c 100644 --- a/src/Access/ContextAccess.cpp +++ b/src/Access/ContextAccess.cpp @@ -547,6 +547,9 @@ bool ContextAccess::checkAccessImplHelper(AccessFlags flags, const Args &... arg if (flags & AccessType::CLUSTER && !access_control->doesOnClusterQueriesRequireClusterGrant()) flags &= ~AccessType::CLUSTER; + if (flags & AccessType::TABLE_ENGINE && !access_control->doesTableEnginesRequireGrant()) + flags &= ~AccessType::TABLE_ENGINE; + if (!flags) return true; From fa6bf25800a6de571001d176b3cbc0e58cdc8473 Mon Sep 17 00:00:00 2001 From: jsc0218 Date: Wed, 21 Feb 2024 03:51:38 +0000 Subject: [PATCH 04/44] add test --- src/Access/tests/gtest_access_rights_ops.cpp | 4 +- .../test_grant_and_revoke/configs/config.xml | 5 +++ .../integration/test_grant_and_revoke/test.py | 38 +++++++++++++++++-- 3 files changed, 42 insertions(+), 5 deletions(-) create mode 100644 tests/integration/test_grant_and_revoke/configs/config.xml diff --git a/src/Access/tests/gtest_access_rights_ops.cpp b/src/Access/tests/gtest_access_rights_ops.cpp index e3449a027c3..793e27a9de5 100644 --- a/src/Access/tests/gtest_access_rights_ops.cpp +++ b/src/Access/tests/gtest_access_rights_ops.cpp @@ -50,10 +50,10 @@ TEST(AccessRights, Union) "GRANT SHOW, SELECT, ALTER, CREATE DATABASE, CREATE TABLE, CREATE VIEW, " "CREATE DICTIONARY, DROP DATABASE, DROP TABLE, DROP VIEW, DROP DICTIONARY, UNDROP TABLE, " "TRUNCATE, OPTIMIZE, BACKUP, CREATE ROW POLICY, ALTER ROW POLICY, DROP ROW POLICY, " - "SHOW ROW POLICIES, TABLE ENGINE, SYSTEM MERGES, SYSTEM TTL MERGES, SYSTEM FETCHES, " + "SHOW ROW POLICIES, SYSTEM MERGES, SYSTEM TTL MERGES, SYSTEM FETCHES, " "SYSTEM MOVES, SYSTEM PULLING REPLICATION LOG, SYSTEM CLEANUP, SYSTEM VIEWS, SYSTEM SENDS, SYSTEM REPLICATION QUEUES, " "SYSTEM DROP REPLICA, SYSTEM SYNC REPLICA, SYSTEM RESTART REPLICA, " - "SYSTEM RESTORE REPLICA, SYSTEM WAIT LOADING PARTS, SYSTEM SYNC DATABASE REPLICA, SYSTEM FLUSH DISTRIBUTED, dictGet ON db1.*, GRANT NAMED COLLECTION ADMIN ON db1"); + "SYSTEM RESTORE REPLICA, SYSTEM WAIT LOADING PARTS, SYSTEM SYNC DATABASE REPLICA, SYSTEM FLUSH DISTRIBUTED, dictGet ON db1.*, GRANT NAMED COLLECTION ADMIN ON db1, GRANT TABLE ENGINE ON db1"); } diff --git a/tests/integration/test_grant_and_revoke/configs/config.xml b/tests/integration/test_grant_and_revoke/configs/config.xml new file mode 100644 index 00000000000..4eed5ee0c8a --- /dev/null +++ b/tests/integration/test_grant_and_revoke/configs/config.xml @@ -0,0 +1,5 @@ + + + true + + diff --git a/tests/integration/test_grant_and_revoke/test.py b/tests/integration/test_grant_and_revoke/test.py index a86a1208f49..bc5b27640dc 100644 --- a/tests/integration/test_grant_and_revoke/test.py +++ b/tests/integration/test_grant_and_revoke/test.py @@ -5,9 +5,8 @@ from helpers.test_tools import TSV cluster = ClickHouseCluster(__file__) instance = cluster.add_instance( "instance", - user_configs=[ - "configs/users.d/users.xml", - ], + main_configs=["configs/config.xml"], + user_configs=["configs/users.d/users.xml"], ) @@ -719,3 +718,36 @@ def test_current_grants_override(): "REVOKE SELECT ON test.* FROM B", ] ) + + +def test_table_engine_grant_and_revoke(): + instance.query("DROP USER IF EXISTS A") + instance.query("CREATE USER A") + instance.query("GRANT CREATE TABLE ON test.table1 TO A") + assert "Not enough privileges" in instance.query_and_get_error( + "CREATE TABLE test.table1(a Integer) engine=TinyLog", user="A" + ) + + instance.query("GRANT TABLE ENGINE ON TinyLog TO A") + + assert "Not enough privileges" not in instance.query( + "CREATE TABLE test.table1(a Integer) engine=TinyLog", user="A" + ) + + assert instance.query("SHOW GRANTS FOR A") == TSV( + [ + "GRANT TABLE ENGINE ON TinyLog TO A", + "GRANT CREATE TABLE ON test.table1 TO A", + ] + ) + + instance.query("REVOKE TABLE ENGINE ON TinyLog FROM A") + + assert "Not enough privileges" in instance.query_and_get_error( + "CREATE TABLE test.table1(a Integer) engine=TinyLog", user="A" + ) + + instance.query("REVOKE CREATE TABLE ON test.table1 FROM A") + instance.query("DROP TABLE test.table1") + + assert instance.query("SHOW GRANTS FOR A") == TSV([]) From 24342012ffceb2936c8f44e111ee1c10eb13a89a Mon Sep 17 00:00:00 2001 From: jsc0218 Date: Wed, 28 Feb 2024 18:11:33 +0000 Subject: [PATCH 05/44] fix --- src/Access/Common/AccessFlags.cpp | 16 ++++++++-------- src/Access/Common/AccessFlags.h | 6 +++--- src/Access/tests/gtest_access_rights_ops.cpp | 2 +- src/Storages/System/StorageSystemPrivileges.cpp | 4 ++-- 4 files changed, 14 insertions(+), 14 deletions(-) diff --git a/src/Access/Common/AccessFlags.cpp b/src/Access/Common/AccessFlags.cpp index 9f30a42ce2e..a76d161bf36 100644 --- a/src/Access/Common/AccessFlags.cpp +++ b/src/Access/Common/AccessFlags.cpp @@ -103,8 +103,8 @@ namespace const Flags & getColumnFlags() const { return all_flags_for_target[COLUMN]; } const Flags & getDictionaryFlags() const { return all_flags_for_target[DICTIONARY]; } const Flags & getTableEngineFlags() const { return all_flags_for_target[TABLE_ENGINE]; } - const Flags & getNamedCollectionFlags() const { return all_flags_for_target[NAMED_COLLECTION]; } const Flags & getUserNameFlags() const { return all_flags_for_target[USER_NAME]; } + const Flags & getNamedCollectionFlags() const { return all_flags_for_target[NAMED_COLLECTION]; } const Flags & getAllFlagsGrantableOnGlobalLevel() const { return getAllFlags(); } const Flags & getAllFlagsGrantableOnGlobalWithParameterLevel() const { return getGlobalWithParameterFlags(); } const Flags & getAllFlagsGrantableOnDatabaseLevel() const { return all_flags_grantable_on_database_level; } @@ -122,9 +122,9 @@ namespace VIEW = TABLE, COLUMN, DICTIONARY, - TABLE_ENGINE, NAMED_COLLECTION, USER_NAME, + TABLE_ENGINE, }; struct Node; @@ -355,7 +355,7 @@ namespace std::unordered_map keyword_to_flags_map; std::vector access_type_to_flags_mapping; Flags all_flags; - Flags all_flags_for_target[static_cast(USER_NAME) + 1]; + Flags all_flags_for_target[static_cast(TABLE_ENGINE) + 1]; Flags all_flags_grantable_on_database_level; Flags all_flags_grantable_on_table_level; Flags all_flags_grantable_on_global_with_parameter_level; @@ -395,10 +395,6 @@ AccessFlags::ParameterType AccessFlags::getParameterType() const if (isEmpty() || !AccessFlags::allGlobalWithParameterFlags().contains(*this)) return AccessFlags::NONE; - /// All flags refer to TABLE ENGINE access type. - if (AccessFlags::allTableEngineFlags().contains(*this)) - return AccessFlags::TABLE_ENGINE; - /// All flags refer to NAMED COLLECTION access type. if (AccessFlags::allNamedCollectionFlags().contains(*this)) return AccessFlags::NAMED_COLLECTION; @@ -406,6 +402,10 @@ AccessFlags::ParameterType AccessFlags::getParameterType() const if (AccessFlags::allUserNameFlags().contains(*this)) return AccessFlags::USER_NAME; + /// All flags refer to TABLE ENGINE access type. + if (AccessFlags::allTableEngineFlags().contains(*this)) + return AccessFlags::TABLE_ENGINE; + throw Exception(ErrorCodes::MIXED_ACCESS_PARAMETER_TYPES, "Having mixed parameter types: {}", toString()); } @@ -423,9 +423,9 @@ AccessFlags AccessFlags::allDatabaseFlags() { return Helper::instance().getDatab AccessFlags AccessFlags::allTableFlags() { return Helper::instance().getTableFlags(); } AccessFlags AccessFlags::allColumnFlags() { return Helper::instance().getColumnFlags(); } AccessFlags AccessFlags::allDictionaryFlags() { return Helper::instance().getDictionaryFlags(); } -AccessFlags AccessFlags::allTableEngineFlags() { return Helper::instance().getTableEngineFlags(); } AccessFlags AccessFlags::allNamedCollectionFlags() { return Helper::instance().getNamedCollectionFlags(); } AccessFlags AccessFlags::allUserNameFlags() { return Helper::instance().getUserNameFlags(); } +AccessFlags AccessFlags::allTableEngineFlags() { return Helper::instance().getTableEngineFlags(); } AccessFlags AccessFlags::allFlagsGrantableOnGlobalLevel() { return Helper::instance().getAllFlagsGrantableOnGlobalLevel(); } AccessFlags AccessFlags::allFlagsGrantableOnGlobalWithParameterLevel() { return Helper::instance().getAllFlagsGrantableOnGlobalWithParameterLevel(); } AccessFlags AccessFlags::allFlagsGrantableOnDatabaseLevel() { return Helper::instance().getAllFlagsGrantableOnDatabaseLevel(); } diff --git a/src/Access/Common/AccessFlags.h b/src/Access/Common/AccessFlags.h index 9204c9e370b..e2c0611be52 100644 --- a/src/Access/Common/AccessFlags.h +++ b/src/Access/Common/AccessFlags.h @@ -102,15 +102,15 @@ public: /// Returns all the flags related to a dictionary. static AccessFlags allDictionaryFlags(); - /// Returns all the flags related to a table engine. - static AccessFlags allTableEngineFlags(); - /// Returns all the flags related to a named collection. static AccessFlags allNamedCollectionFlags(); /// Returns all the flags related to a user. static AccessFlags allUserNameFlags(); + /// Returns all the flags related to a table engine. + static AccessFlags allTableEngineFlags(); + /// Returns all the flags which could be granted on the global level. /// The same as allFlags(). static AccessFlags allFlagsGrantableOnGlobalLevel(); diff --git a/src/Access/tests/gtest_access_rights_ops.cpp b/src/Access/tests/gtest_access_rights_ops.cpp index 1711952624e..66ff0238e67 100644 --- a/src/Access/tests/gtest_access_rights_ops.cpp +++ b/src/Access/tests/gtest_access_rights_ops.cpp @@ -54,7 +54,7 @@ TEST(AccessRights, Union) "SYSTEM MOVES, SYSTEM PULLING REPLICATION LOG, SYSTEM CLEANUP, SYSTEM VIEWS, SYSTEM SENDS, SYSTEM REPLICATION QUEUES, " "SYSTEM DROP REPLICA, SYSTEM SYNC REPLICA, SYSTEM RESTART REPLICA, " "SYSTEM RESTORE REPLICA, SYSTEM WAIT LOADING PARTS, SYSTEM SYNC DATABASE REPLICA, SYSTEM FLUSH DISTRIBUTED, dictGet ON db1.*, " - "GRANT SET DEFINER ON db1, GRANT NAMED COLLECTION ADMIN ON db1, GRANT TABLE ENGINE ON db1"); + "GRANT TABLE ENGINE ON db1, GRANT SET DEFINER ON db1, GRANT NAMED COLLECTION ADMIN ON db1"); } diff --git a/src/Storages/System/StorageSystemPrivileges.cpp b/src/Storages/System/StorageSystemPrivileges.cpp index a353a6ff104..13088cdd1dc 100644 --- a/src/Storages/System/StorageSystemPrivileges.cpp +++ b/src/Storages/System/StorageSystemPrivileges.cpp @@ -28,9 +28,9 @@ namespace DICTIONARY, VIEW, COLUMN, - TABLE_ENGINE, NAMED_COLLECTION, USER_NAME, + TABLE_ENGINE, }; DataTypeEnum8::Values getLevelEnumValues() @@ -42,9 +42,9 @@ namespace enum_values.emplace_back("DICTIONARY", static_cast(DICTIONARY)); enum_values.emplace_back("VIEW", static_cast(VIEW)); enum_values.emplace_back("COLUMN", static_cast(COLUMN)); - enum_values.emplace_back("TABLE_ENGINE", static_cast(TABLE_ENGINE)); enum_values.emplace_back("NAMED_COLLECTION", static_cast(NAMED_COLLECTION)); enum_values.emplace_back("USER_NAME", static_cast(USER_NAME)); + enum_values.emplace_back("TABLE_ENGINE", static_cast(TABLE_ENGINE)); return enum_values; } } From 96d4c9146b4eac56225bd85bdd12a8eb3f8a72e0 Mon Sep 17 00:00:00 2001 From: jsc0218 Date: Thu, 29 Feb 2024 03:35:47 +0000 Subject: [PATCH 06/44] solve redundance with sources grant --- src/Interpreters/InterpreterCreateQuery.cpp | 8 ++++-- .../integration/test_grant_and_revoke/test.py | 25 ++++++++++++++++--- 2 files changed, 28 insertions(+), 5 deletions(-) diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index 130c02ae4ee..47f419d2a1c 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -724,8 +724,10 @@ InterpreterCreateQuery::TableProperties InterpreterCreateQuery::getTableProperti if (create.storage && create.storage->engine) { auto source_access_type = StorageFactory::instance().getSourceAccessType(create.storage->engine->name); - if (source_access_type != AccessType::NONE) + const auto & access_control = getContext()->getAccessControl(); + if (source_access_type != AccessType::NONE && !access_control.doesTableEnginesRequireGrant()) getContext()->checkAccess(source_access_type); + getContext()->checkAccess(AccessType::TABLE_ENGINE, create.storage->engine->name); } TableProperties properties; @@ -1849,7 +1851,9 @@ AccessRightsElements InterpreterCreateQuery::getRequiredAccess() const if (create.storage && create.storage->engine) { auto source_access_type = StorageFactory::instance().getSourceAccessType(create.storage->engine->name); - if (source_access_type != AccessType::NONE) + const auto & access_control = getContext()->getAccessControl(); + /// We just need to check GRANT TABLE ENGINE for sources if grant of table engine is enabled. + if (source_access_type != AccessType::NONE && !access_control.doesTableEnginesRequireGrant()) required_access.emplace_back(source_access_type); required_access.emplace_back(AccessType::TABLE_ENGINE, create.storage->engine->name); } diff --git a/tests/integration/test_grant_and_revoke/test.py b/tests/integration/test_grant_and_revoke/test.py index bc5b27640dc..f1b9f8e7e14 100644 --- a/tests/integration/test_grant_and_revoke/test.py +++ b/tests/integration/test_grant_and_revoke/test.py @@ -370,6 +370,7 @@ def test_implicit_create_temporary_table_grant(): ) instance.query("GRANT CREATE TABLE ON test.* TO A") + instance.query("GRANT TABLE ENGINE ON Memory TO A") instance.query("CREATE TEMPORARY TABLE tmp(name String)", user="A") instance.query("REVOKE CREATE TABLE ON *.* FROM A") @@ -730,9 +731,7 @@ def test_table_engine_grant_and_revoke(): instance.query("GRANT TABLE ENGINE ON TinyLog TO A") - assert "Not enough privileges" not in instance.query( - "CREATE TABLE test.table1(a Integer) engine=TinyLog", user="A" - ) + instance.query("CREATE TABLE test.table1(a Integer) engine=TinyLog", user="A") assert instance.query("SHOW GRANTS FOR A") == TSV( [ @@ -751,3 +750,23 @@ def test_table_engine_grant_and_revoke(): instance.query("DROP TABLE test.table1") assert instance.query("SHOW GRANTS FOR A") == TSV([]) + + +def test_table_engine_and_source_grant(): + instance.query("DROP USER IF EXISTS A") + instance.query("CREATE USER A") + + instance.query("GRANT CREATE TABLE ON test.table1 TO A") + instance.query("GRANT TABLE ENGINE ON PostgreSQL TO A") + # We don't need the following statement as GRANT TABLE ENGINE covers it already. + # instance.query("GRANT POSTGRES ON *.* TO A") + + instance.query( + """ + CREATE TABLE test.table1(a Integer) + engine=PostgreSQL('localhost:5432', 'dummy', 'dummy', 'dummy', 'dummy'); + """, + user="A", + ) + + instance.query("DROP TABLE test.table1") From c5dc35e5c416f8473fbeb4a035447ea4af1592b4 Mon Sep 17 00:00:00 2001 From: jsc0218 Date: Thu, 14 Mar 2024 01:31:45 +0000 Subject: [PATCH 07/44] try enable grant of table engine in test --- programs/server/config.xml | 2 +- tests/config/config.d/enable_access_control_improvements.xml | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/programs/server/config.xml b/programs/server/config.xml index 28b25ed190e..4c59cd9d220 100644 --- a/programs/server/config.xml +++ b/programs/server/config.xml @@ -744,7 +744,7 @@ - false + true 600 diff --git a/tests/config/config.d/enable_access_control_improvements.xml b/tests/config/config.d/enable_access_control_improvements.xml index 564b656a0ad..350c6fc4f44 100644 --- a/tests/config/config.d/enable_access_control_improvements.xml +++ b/tests/config/config.d/enable_access_control_improvements.xml @@ -5,5 +5,6 @@ true true true + true From 1676cf6dd3ea84d5f4c0d1c2643263fdd71f039f Mon Sep 17 00:00:00 2001 From: jsc0218 Date: Sat, 16 Mar 2024 22:18:42 +0000 Subject: [PATCH 08/44] fix test --- .../test_backup_restore_new/test.py | 1 + .../configs/user_default.xml | 1 + tests/integration/test_distributed_ddl/test.py | 18 +++++++++++++++--- .../test_grant_and_revoke/configs/config.xml | 2 +- .../integration/test_settings_profile/test.py | 1 + tests/integration/test_storage_s3/test.py | 1 + .../0_stateless/02184_table_engine_access.sh | 2 +- .../02232_allow_only_replicated_engine.sh | 3 +++ ...database_replicated_no_arguments_for_rmt.sh | 1 + .../02561_temporary_table_grants.sh | 11 +++++++---- 10 files changed, 32 insertions(+), 9 deletions(-) diff --git a/tests/integration/test_backup_restore_new/test.py b/tests/integration/test_backup_restore_new/test.py index 34ffdf7a8df..2e714c29771 100644 --- a/tests/integration/test_backup_restore_new/test.py +++ b/tests/integration/test_backup_restore_new/test.py @@ -1036,6 +1036,7 @@ def test_required_privileges(): ) instance.query("GRANT INSERT, CREATE ON test.table2 TO u1") + instance.query("GRANT TABLE ENGINE ON MergeTree TO u1") instance.query( f"RESTORE TABLE test.table AS test.table2 FROM {backup_name}", user="u1" ) diff --git a/tests/integration/test_dictionaries_ddl/configs/user_default.xml b/tests/integration/test_dictionaries_ddl/configs/user_default.xml index bf004102329..6851f1c3a70 100644 --- a/tests/integration/test_dictionaries_ddl/configs/user_default.xml +++ b/tests/integration/test_dictionaries_ddl/configs/user_default.xml @@ -4,6 +4,7 @@ default test + Log diff --git a/tests/integration/test_distributed_ddl/test.py b/tests/integration/test_distributed_ddl/test.py index 7cee60a7f35..09db5fc6a0e 100755 --- a/tests/integration/test_distributed_ddl/test.py +++ b/tests/integration/test_distributed_ddl/test.py @@ -291,20 +291,32 @@ def test_allowed_databases(test_cluster): instance.query("CREATE DATABASE IF NOT EXISTS db2 ON CLUSTER cluster") instance.query( - "CREATE TABLE db1.t1 ON CLUSTER cluster (i Int8) ENGINE = Memory", + "CREATE TABLE IF NOT EXISTS db1.t1 ON CLUSTER cluster (i Int8) ENGINE = Memory" + ) + instance.query( + "CREATE TABLE IF NOT EXISTS db2.t2 ON CLUSTER cluster (i Int8) ENGINE = Memory" + ) + instance.query( + "CREATE TABLE IF NOT EXISTS t3 ON CLUSTER cluster (i Int8) ENGINE = Memory" + ) + + instance.query( + "SELECT * FROM db1.t1", settings={"user": "restricted_user"}, ) with pytest.raises(Exception): instance.query( - "CREATE TABLE db2.t2 ON CLUSTER cluster (i Int8) ENGINE = Memory", + "SELECT * FROM db2.t2", settings={"user": "restricted_user"}, ) + with pytest.raises(Exception): instance.query( - "CREATE TABLE t3 ON CLUSTER cluster (i Int8) ENGINE = Memory", + "SELECT * FROM t3", settings={"user": "restricted_user"}, ) + with pytest.raises(Exception): instance.query( "DROP DATABASE db2 ON CLUSTER cluster", settings={"user": "restricted_user"} diff --git a/tests/integration/test_grant_and_revoke/configs/config.xml b/tests/integration/test_grant_and_revoke/configs/config.xml index 4eed5ee0c8a..fa009296dd3 100644 --- a/tests/integration/test_grant_and_revoke/configs/config.xml +++ b/tests/integration/test_grant_and_revoke/configs/config.xml @@ -1,5 +1,5 @@ - + true diff --git a/tests/integration/test_settings_profile/test.py b/tests/integration/test_settings_profile/test.py index 61237af08c5..e5c0a072ff9 100644 --- a/tests/integration/test_settings_profile/test.py +++ b/tests/integration/test_settings_profile/test.py @@ -621,6 +621,7 @@ def test_allow_ddl(): ) instance.query("GRANT CREATE ON tbl TO robin") + instance.query("GRANT TABLE ENGINE ON Log TO robin") instance.query("CREATE TABLE tbl(a Int32) ENGINE=Log", user="robin") instance.query("DROP TABLE tbl") diff --git a/tests/integration/test_storage_s3/test.py b/tests/integration/test_storage_s3/test.py index dbbe670e8ca..fa05e2a90a1 100644 --- a/tests/integration/test_storage_s3/test.py +++ b/tests/integration/test_storage_s3/test.py @@ -989,6 +989,7 @@ def test_predefined_connection_configuration(started_cluster): instance.query("GRANT CREATE ON *.* TO user") instance.query("GRANT SOURCES ON *.* TO user") instance.query("GRANT SELECT ON *.* TO user") + instance.query("GRANT TABLE ENGINE ON S3 TO user") instance.query(f"drop table if exists {name}", user="user") error = instance.query_and_get_error( diff --git a/tests/queries/0_stateless/02184_table_engine_access.sh b/tests/queries/0_stateless/02184_table_engine_access.sh index dbbf28e46d4..ddf1d9701a7 100755 --- a/tests/queries/0_stateless/02184_table_engine_access.sh +++ b/tests/queries/0_stateless/02184_table_engine_access.sh @@ -16,7 +16,7 @@ $CLICKHOUSE_CLIENT --query "CREATE TABLE url ENGINE=URL('https://clickhouse.com' $CLICKHOUSE_CLIENT --user=user_test_02184 --password=user_test_02184 --query "CREATE TABLE t AS url" 2>&1| grep -Fo "ACCESS_DENIED" | uniq -$CLICKHOUSE_CLIENT --query "GRANT URL ON *.* TO user_test_02184;" +$CLICKHOUSE_CLIENT --query "GRANT TABLE ENGINE ON URL TO user_test_02184;" $CLICKHOUSE_CLIENT --user=user_test_02184 --password=user_test_02184 --query "CREATE TABLE t AS url" $CLICKHOUSE_CLIENT --query "SHOW CREATE TABLE t" $CLICKHOUSE_CLIENT --query "DROP TABLE t" diff --git a/tests/queries/0_stateless/02232_allow_only_replicated_engine.sh b/tests/queries/0_stateless/02232_allow_only_replicated_engine.sh index 193d5fdb6d5..19ac97068ae 100755 --- a/tests/queries/0_stateless/02232_allow_only_replicated_engine.sh +++ b/tests/queries/0_stateless/02232_allow_only_replicated_engine.sh @@ -9,6 +9,9 @@ ${CLICKHOUSE_CLIENT} -q "create table mute_stylecheck (x UInt32) engine = Replic ${CLICKHOUSE_CLIENT} -q "CREATE USER user_${CLICKHOUSE_DATABASE} settings database_replicated_allow_only_replicated_engine=1" ${CLICKHOUSE_CLIENT} -q "GRANT CREATE TABLE ON ${CLICKHOUSE_DATABASE}_db.* TO user_${CLICKHOUSE_DATABASE}" +${CLICKHOUSE_CLIENT} -q "GRANT TABLE ENGINE ON Memory TO user_${CLICKHOUSE_DATABASE}" +${CLICKHOUSE_CLIENT} -q "GRANT TABLE ENGINE ON MergeTree TO user_${CLICKHOUSE_DATABASE}" +${CLICKHOUSE_CLIENT} -q "GRANT TABLE ENGINE ON ReplicatedMergeTree TO user_${CLICKHOUSE_DATABASE}" ${CLICKHOUSE_CLIENT} --allow_experimental_database_replicated=1 --query "CREATE DATABASE ${CLICKHOUSE_DATABASE}_db engine = Replicated('/clickhouse/databases/${CLICKHOUSE_TEST_ZOOKEEPER_PREFIX}/${CLICKHOUSE_DATABASE}_db', '{shard}', '{replica}')" ${CLICKHOUSE_CLIENT} --distributed_ddl_output_mode=none --user "user_${CLICKHOUSE_DATABASE}" --query "CREATE TABLE ${CLICKHOUSE_DATABASE}_db.tab_memory (x UInt32) engine = Memory;" ${CLICKHOUSE_CLIENT} --distributed_ddl_output_mode=none --user "user_${CLICKHOUSE_DATABASE}" -n --query "CREATE TABLE ${CLICKHOUSE_DATABASE}_db.tab_mt (x UInt32) engine = MergeTree order by x;" 2>&1 | grep -o "Only tables with a Replicated engine" diff --git a/tests/queries/0_stateless/02514_database_replicated_no_arguments_for_rmt.sh b/tests/queries/0_stateless/02514_database_replicated_no_arguments_for_rmt.sh index ee51640488e..a050f7b00d7 100755 --- a/tests/queries/0_stateless/02514_database_replicated_no_arguments_for_rmt.sh +++ b/tests/queries/0_stateless/02514_database_replicated_no_arguments_for_rmt.sh @@ -12,6 +12,7 @@ ${CLICKHOUSE_CLIENT} -q "create table mute_stylecheck (x UInt32) engine = Replic ${CLICKHOUSE_CLIENT} -q "CREATE USER user_${CLICKHOUSE_DATABASE} settings database_replicated_allow_replicated_engine_arguments=0" ${CLICKHOUSE_CLIENT} -q "GRANT CREATE TABLE ON ${CLICKHOUSE_DATABASE}_db.* TO user_${CLICKHOUSE_DATABASE}" +${CLICKHOUSE_CLIENT} -q "GRANT TABLE ENGINE ON ReplicatedMergeTree TO user_${CLICKHOUSE_DATABASE}" ${CLICKHOUSE_CLIENT} --allow_experimental_database_replicated=1 --query "CREATE DATABASE ${CLICKHOUSE_DATABASE}_db engine = Replicated('/clickhouse/databases/${CLICKHOUSE_TEST_ZOOKEEPER_PREFIX}/${CLICKHOUSE_DATABASE}_db', '{shard}', '{replica}')" ${CLICKHOUSE_CLIENT} --distributed_ddl_output_mode=none --user "user_${CLICKHOUSE_DATABASE}" -n --query "CREATE TABLE ${CLICKHOUSE_DATABASE}_db.tab_rmt_ok (x UInt32) engine = ReplicatedMergeTree order by x;" ${CLICKHOUSE_CLIENT} --distributed_ddl_output_mode=none --user "user_${CLICKHOUSE_DATABASE}" -n --query "CREATE TABLE ${CLICKHOUSE_DATABASE}_db.tab_rmt_fail (x UInt32) engine = ReplicatedMergeTree('/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/root/{shard}', '{replica}') order by x; -- { serverError 80 }" diff --git a/tests/queries/0_stateless/02561_temporary_table_grants.sh b/tests/queries/0_stateless/02561_temporary_table_grants.sh index 33784f1d536..9e371a25d7c 100755 --- a/tests/queries/0_stateless/02561_temporary_table_grants.sh +++ b/tests/queries/0_stateless/02561_temporary_table_grants.sh @@ -13,23 +13,26 @@ $CLICKHOUSE_CLIENT --query "CREATE USER $user IDENTIFIED WITH PLAINTEXT_PASSWORD $CLICKHOUSE_CLIENT --user $user --password hello --query "CREATE TEMPORARY TABLE table_memory_02561(name String)" 2>&1 | grep -F "Not enough privileges. To execute this query, it's necessary to have the grant CREATE TEMPORARY TABLE" > /dev/null && echo "OK" $CLICKHOUSE_CLIENT --query "GRANT CREATE TEMPORARY TABLE ON *.* TO $user" +$CLICKHOUSE_CLIENT --query "GRANT TABLE ENGINE ON Memory TO $user" + $CLICKHOUSE_CLIENT --user $user --password hello --query "CREATE TEMPORARY TABLE table_memory_02561(name String)" $CLICKHOUSE_CLIENT --user $user --password hello --query "CREATE TEMPORARY TABLE table_merge_tree_02561(name String) ENGINE = MergeTree() ORDER BY name" 2>&1 | grep -F "Not enough privileges. To execute this query, it's necessary to have the grant CREATE ARBITRARY TEMPORARY TABLE" > /dev/null && echo "OK" $CLICKHOUSE_CLIENT --query "GRANT CREATE ARBITRARY TEMPORARY TABLE ON *.* TO $user" +$CLICKHOUSE_CLIENT --query "GRANT TABLE ENGINE ON MergeTree TO $user" $CLICKHOUSE_CLIENT --user $user --password hello --query "CREATE TEMPORARY TABLE table_merge_tree_02561(name String) ENGINE = MergeTree() ORDER BY name" -$CLICKHOUSE_CLIENT --user $user --password hello --query "CREATE TEMPORARY TABLE table_file_02561(name String) ENGINE = File(TabSeparated)" 2>&1 | grep -F "Not enough privileges. To execute this query, it's necessary to have the grant FILE" > /dev/null && echo "OK" +$CLICKHOUSE_CLIENT --user $user --password hello --query "CREATE TEMPORARY TABLE table_file_02561(name String) ENGINE = File(TabSeparated)" 2>&1 | grep -F "Not enough privileges. To execute this query, it's necessary to have the grant TABLE ENGINE ON File" > /dev/null && echo "OK" -$CLICKHOUSE_CLIENT --query "GRANT FILE ON *.* TO $user" +$CLICKHOUSE_CLIENT --query "GRANT TABLE ENGINE ON File TO $user" $CLICKHOUSE_CLIENT --user $user --password hello --query "CREATE TEMPORARY TABLE table_file_02561(name String) ENGINE = File(TabSeparated)" -$CLICKHOUSE_CLIENT --user $user --password hello --query "CREATE TEMPORARY TABLE table_url_02561(name String) ENGINE = URL('http://127.0.0.1:8123?query=select+12', 'RawBLOB')" 2>&1 | grep -F "Not enough privileges. To execute this query, it's necessary to have the grant URL" > /dev/null && echo "OK" +$CLICKHOUSE_CLIENT --user $user --password hello --query "CREATE TEMPORARY TABLE table_url_02561(name String) ENGINE = URL('http://127.0.0.1:8123?query=select+12', 'RawBLOB')" 2>&1 | grep -F "Not enough privileges. To execute this query, it's necessary to have the grant TABLE ENGINE ON URL" > /dev/null && echo "OK" -$CLICKHOUSE_CLIENT --query "GRANT URL ON *.* TO $user" +$CLICKHOUSE_CLIENT --query "GRANT TABLE ENGINE ON URL TO $user" $CLICKHOUSE_CLIENT --user $user --password hello --query "CREATE TEMPORARY TABLE table_url_02561(name String) ENGINE = URL('http://127.0.0.1:8123?query=select+12', 'RawBLOB')" From b2b9706ba2408b3fea5c2f2782e60ef0cb26127c Mon Sep 17 00:00:00 2001 From: JackyWoo Date: Tue, 2 Apr 2024 16:07:18 +0800 Subject: [PATCH 09/44] Add query progress to table zookeeper --- src/Storages/System/StorageSystemZooKeeper.cpp | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/src/Storages/System/StorageSystemZooKeeper.cpp b/src/Storages/System/StorageSystemZooKeeper.cpp index d1bf86ba8ef..604e29df0ec 100644 --- a/src/Storages/System/StorageSystemZooKeeper.cpp +++ b/src/Storages/System/StorageSystemZooKeeper.cpp @@ -622,6 +622,20 @@ Chunk SystemZooKeeperSource::generate() ZooKeeperRetriesControl("", nullptr, retries_seetings, query_status).retryLoop( [&]() { get_responses = get_zookeeper()->tryGet(paths_to_get); }); + /// Add children count to query total rows. We can not get total rows in advance, + /// because it is too heavy to get row count for non exact paths. + /// Please be aware that there might be minor setbacks in the query progress, + /// but overall it should reflect the advancement of the query. + size_t children_count = 0; + for (size_t i = 0, size = get_tasks.size(); i < size; ++i) + { + auto & res = get_responses[i]; + if (res.error == Coordination::Error::ZNONODE) + continue; /// Node was deleted meanwhile. + children_count += res.stat.numChildren; + } + addTotalRowsApprox(children_count); + for (size_t i = 0, size = get_tasks.size(); i < size; ++i) { auto & res = get_responses[i]; From 02eca8b0be6e239b084b22a54e84e76d21afd3f9 Mon Sep 17 00:00:00 2001 From: jsc0218 Date: Wed, 3 Apr 2024 02:35:49 +0000 Subject: [PATCH 10/44] avoid grant twice for source access types and table engines --- src/Access/ContextAccess.cpp | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/src/Access/ContextAccess.cpp b/src/Access/ContextAccess.cpp index 3de431a8f8c..499656e805b 100644 --- a/src/Access/ContextAccess.cpp +++ b/src/Access/ContextAccess.cpp @@ -204,6 +204,35 @@ namespace res.grant(AccessType::SELECT, DatabaseCatalog::INFORMATION_SCHEMA_UPPERCASE); } + /// There is overlap between AccessType sources and table engines, so the following code avoids user granting twice. + std::vector> source_and_table_engines = { + {AccessType::FILE, "File"}, + {AccessType::URL, "URL"}, + {AccessType::REMOTE, "Distributed"}, + {AccessType::MONGO, "MongoDB"}, + {AccessType::REDIS, "Redis"}, + {AccessType::MYSQL, "MySQL"}, + {AccessType::POSTGRES, "PostgreSQL"}, + {AccessType::SQLITE, "SQLite"}, + {AccessType::ODBC, "ODBC"}, + {AccessType::JDBC, "JDBC"}, + {AccessType::HDFS, "HDFS"}, + {AccessType::S3, "S3"}, + {AccessType::HIVE, "Hive"}, + {AccessType::AZURE, "AzureBlobStorage"} + }; + + for (const auto & source_and_table_engine : source_and_table_engines) + { + const auto & source = std::get<0>(source_and_table_engine); + const auto & table_engine = std::get<1>(source_and_table_engine); + if (res.isGranted(source) || res.isGranted(AccessType::TABLE_ENGINE, table_engine)) + { + res.grant(source); + res.grant(AccessType::TABLE_ENGINE, table_engine); + } + } + return res; } From a1e3a4a58c95b763ad7687caf085d2e0eb12f001 Mon Sep 17 00:00:00 2001 From: jsc0218 Date: Thu, 4 Apr 2024 02:02:58 +0000 Subject: [PATCH 11/44] fix test --- .../integration/test_grant_and_revoke/test.py | 28 ++++++++++++++++--- .../0_stateless/02184_table_engine_access.sh | 2 +- .../02561_temporary_table_grants.sh | 4 +-- 3 files changed, 27 insertions(+), 7 deletions(-) diff --git a/tests/integration/test_grant_and_revoke/test.py b/tests/integration/test_grant_and_revoke/test.py index 5169f738add..b3a82d3f7a5 100644 --- a/tests/integration/test_grant_and_revoke/test.py +++ b/tests/integration/test_grant_and_revoke/test.py @@ -754,15 +754,35 @@ def test_table_engine_grant_and_revoke(): def test_table_engine_and_source_grant(): instance.query("DROP USER IF EXISTS A") instance.query("CREATE USER A") - instance.query("GRANT CREATE TABLE ON test.table1 TO A") + instance.query("GRANT TABLE ENGINE ON PostgreSQL TO A") - # We don't need the following statement as GRANT TABLE ENGINE covers it already. - # instance.query("GRANT POSTGRES ON *.* TO A") instance.query( """ - CREATE TABLE test.table1(a Integer) + CREATE TABLE test.table1(a Integer) + engine=PostgreSQL('localhost:5432', 'dummy', 'dummy', 'dummy', 'dummy'); + """, + user="A", + ) + + instance.query("DROP TABLE test.table1") + + instance.query("REVOKE TABLE ENGINE ON PostgreSQL FROM A") + + assert "Not enough privileges" in instance.query_and_get_error( + """ + CREATE TABLE test.table1(a Integer) + engine=PostgreSQL('localhost:5432', 'dummy', 'dummy', 'dummy', 'dummy'); + """, + user="A", + ) + + instance.query("GRANT SOURCES ON *.* TO A") + + instance.query( + """ + CREATE TABLE test.table1(a Integer) engine=PostgreSQL('localhost:5432', 'dummy', 'dummy', 'dummy', 'dummy'); """, user="A", diff --git a/tests/queries/0_stateless/02184_table_engine_access.sh b/tests/queries/0_stateless/02184_table_engine_access.sh index ddf1d9701a7..dbbf28e46d4 100755 --- a/tests/queries/0_stateless/02184_table_engine_access.sh +++ b/tests/queries/0_stateless/02184_table_engine_access.sh @@ -16,7 +16,7 @@ $CLICKHOUSE_CLIENT --query "CREATE TABLE url ENGINE=URL('https://clickhouse.com' $CLICKHOUSE_CLIENT --user=user_test_02184 --password=user_test_02184 --query "CREATE TABLE t AS url" 2>&1| grep -Fo "ACCESS_DENIED" | uniq -$CLICKHOUSE_CLIENT --query "GRANT TABLE ENGINE ON URL TO user_test_02184;" +$CLICKHOUSE_CLIENT --query "GRANT URL ON *.* TO user_test_02184;" $CLICKHOUSE_CLIENT --user=user_test_02184 --password=user_test_02184 --query "CREATE TABLE t AS url" $CLICKHOUSE_CLIENT --query "SHOW CREATE TABLE t" $CLICKHOUSE_CLIENT --query "DROP TABLE t" diff --git a/tests/queries/0_stateless/02561_temporary_table_grants.sh b/tests/queries/0_stateless/02561_temporary_table_grants.sh index 9e371a25d7c..6bd6383d310 100755 --- a/tests/queries/0_stateless/02561_temporary_table_grants.sh +++ b/tests/queries/0_stateless/02561_temporary_table_grants.sh @@ -26,13 +26,13 @@ $CLICKHOUSE_CLIENT --user $user --password hello --query "CREATE TEMPORARY TABLE $CLICKHOUSE_CLIENT --user $user --password hello --query "CREATE TEMPORARY TABLE table_file_02561(name String) ENGINE = File(TabSeparated)" 2>&1 | grep -F "Not enough privileges. To execute this query, it's necessary to have the grant TABLE ENGINE ON File" > /dev/null && echo "OK" -$CLICKHOUSE_CLIENT --query "GRANT TABLE ENGINE ON File TO $user" +$CLICKHOUSE_CLIENT --query "GRANT FILE ON *.* TO $user" $CLICKHOUSE_CLIENT --user $user --password hello --query "CREATE TEMPORARY TABLE table_file_02561(name String) ENGINE = File(TabSeparated)" $CLICKHOUSE_CLIENT --user $user --password hello --query "CREATE TEMPORARY TABLE table_url_02561(name String) ENGINE = URL('http://127.0.0.1:8123?query=select+12', 'RawBLOB')" 2>&1 | grep -F "Not enough privileges. To execute this query, it's necessary to have the grant TABLE ENGINE ON URL" > /dev/null && echo "OK" -$CLICKHOUSE_CLIENT --query "GRANT TABLE ENGINE ON URL TO $user" +$CLICKHOUSE_CLIENT --query "GRANT URL ON *.* TO $user" $CLICKHOUSE_CLIENT --user $user --password hello --query "CREATE TEMPORARY TABLE table_url_02561(name String) ENGINE = URL('http://127.0.0.1:8123?query=select+12', 'RawBLOB')" From 74453c384fc5755dc13ba1539b5ba4ea079c9dc5 Mon Sep 17 00:00:00 2001 From: Shaun Struwig <41984034+Blargian@users.noreply.github.com> Date: Fri, 12 Apr 2024 16:25:23 +0200 Subject: [PATCH 12/44] Add missing `tanh` function --- .../sql-reference/functions/math-functions.md | 31 +++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/docs/en/sql-reference/functions/math-functions.md b/docs/en/sql-reference/functions/math-functions.md index fc659891b5c..e59878f901b 100644 --- a/docs/en/sql-reference/functions/math-functions.md +++ b/docs/en/sql-reference/functions/math-functions.md @@ -557,6 +557,37 @@ Result: │ 0 │ └──────────┘ ``` +## tanh + +Returns the [hyperbolic tangent](https://www.mathworks.com/help/matlab/ref/tanh.html). + +**Syntax** + +``` sql +tanh(x) +``` + +**Arguments** + +- `x` — The angle, in radians. Values from the interval: `-∞ < x < +∞`. [Float64](../../sql-reference/data-types/float.md#float32-float64). + +**Returned value** + +- Values from the interval: `-1 < tanh(x) < 1`. + +Type: [Float64](../../sql-reference/data-types/float.md#float32-float64). + +**Example** + +``` sql +SELECT tanh(0); +``` + +Result: + +```result +0 +``` ## atanh From 47c653f87cae3b05afa3f301cd5f868674dc2f04 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Fri, 12 Apr 2024 15:38:50 +0000 Subject: [PATCH 13/44] better retries in azure sdk --- programs/server/Server.cpp | 39 +++++++++++ src/Core/ServerSettings.h | 1 + .../AzureBlobStorage/AzureBlobStorageAuth.cpp | 70 +++++++++++++------ .../AzureBlobStorage/AzureBlobStorageAuth.h | 5 +- 4 files changed, 89 insertions(+), 26 deletions(-) diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index a048bebc45b..f07c2469358 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -138,6 +138,7 @@ #if USE_AZURE_BLOB_STORAGE # include +# include #endif #include @@ -612,6 +613,43 @@ static void sanityChecks(Server & server) } } +static void initializeAzureSDKLogger(const ServerSettings & server_settings, int server_logs_level) +{ +#if USE_AZURE_BLOB_STORAGE + if (!server_settings.enable_azure_sdk_logging) + return; + + using AzureLogsLevel = Azure::Core::Diagnostics::Logger::Level; + + static const std::unordered_map> azure_to_server_mapping = + { + {AzureLogsLevel::Error, {Poco::Message::PRIO_DEBUG, LogsLevel::debug}}, + {AzureLogsLevel::Warning, {Poco::Message::PRIO_DEBUG, LogsLevel::debug}}, + {AzureLogsLevel::Informational, {Poco::Message::PRIO_TRACE, LogsLevel::trace}}, + {AzureLogsLevel::Verbose, {Poco::Message::PRIO_TEST, LogsLevel::test}}, + }; + + static const std::map server_to_azure_mapping = + { + {Poco::Message::PRIO_DEBUG, AzureLogsLevel::Warning}, + {Poco::Message::PRIO_TRACE, AzureLogsLevel::Informational}, + {Poco::Message::PRIO_TEST, AzureLogsLevel::Verbose}, + }; + + static const LoggerPtr azure_sdk_logger = getLogger("AzureSDK"); + + auto it = server_to_azure_mapping.lower_bound(static_cast(server_logs_level)); + chassert(it != server_to_azure_mapping.end()); + Azure::Core::Diagnostics::Logger::SetLevel(it->second); + + Azure::Core::Diagnostics::Logger::SetListener([](AzureLogsLevel level, const std::string & message) + { + auto [poco_level, db_level] = azure_to_server_mapping.at(level); + LOG_IMPL(azure_sdk_logger, db_level, poco_level, fmt::runtime(message)); + }); +#endif +} + int Server::main(const std::vector & /*args*/) try { @@ -1858,6 +1896,7 @@ try /// Build loggers before tables startup to make log messages from tables /// attach available in system.text_log buildLoggers(config(), logger()); + initializeAzureSDKLogger(server_settings, logger().getLevel()); /// After the system database is created, attach virtual system tables (in addition to query_log and part_log) attachSystemTablesServer(global_context, *database_catalog.getSystemDatabase(), has_zookeeper); attachInformationSchema(global_context, *database_catalog.getDatabase(DatabaseCatalog::INFORMATION_SCHEMA)); diff --git a/src/Core/ServerSettings.h b/src/Core/ServerSettings.h index 46e2dc649a6..57d74143038 100644 --- a/src/Core/ServerSettings.h +++ b/src/Core/ServerSettings.h @@ -139,6 +139,7 @@ namespace DB M(UInt64, http_connections_store_limit, 5000, "Connections above this limit reset after use. Set to 0 to turn connection cache off. The limit applies to the http connections which do not belong to any disk or storage.", 0) \ M(UInt64, global_profiler_real_time_period_ns, 0, "Period for real clock timer of global profiler (in nanoseconds). Set 0 value to turn off the real clock global profiler. Recommended value is at least 10000000 (100 times a second) for single queries or 1000000000 (once a second) for cluster-wide profiling.", 0) \ M(UInt64, global_profiler_cpu_time_period_ns, 0, "Period for CPU clock timer of global profiler (in nanoseconds). Set 0 value to turn off the CPU clock global profiler. Recommended value is at least 10000000 (100 times a second) for single queries or 1000000000 (once a second) for cluster-wide profiling.", 0) \ + M(Bool, enable_azure_sdk_logging, false, "Enables logging from Azure sdk", 0) \ /// If you add a setting which can be updated at runtime, please update 'changeable_settings' map in StorageSystemServerSettings.cpp diff --git a/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageAuth.cpp b/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageAuth.cpp index e7ee768876f..1639f1dd03e 100644 --- a/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageAuth.cpp +++ b/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageAuth.cpp @@ -4,8 +4,8 @@ #include #include -#include #include +#include #include #include @@ -138,35 +138,34 @@ AzureBlobStorageEndpoint processAzureBlobStorageEndpoint(const Poco::Util::Abstr template -std::unique_ptr getClientWithConnectionString(const String & connection_str, const String & container_name) = delete; - +std::unique_ptr getClientWithConnectionString(const String & connection_str, const String & container_name, const BlobClientOptions & client_options) = delete; template<> -std::unique_ptr getClientWithConnectionString( - const String & connection_str, const String & /*container_name*/) +std::unique_ptr getClientWithConnectionString(const String & connection_str, const String & /*container_name*/, const BlobClientOptions & client_options) { - return std::make_unique(BlobServiceClient::CreateFromConnectionString(connection_str)); + return std::make_unique(BlobServiceClient::CreateFromConnectionString(connection_str, client_options)); } - template<> -std::unique_ptr getClientWithConnectionString( - const String & connection_str, const String & container_name) +std::unique_ptr getClientWithConnectionString(const String & connection_str, const String & container_name, const BlobClientOptions & client_options) { - return std::make_unique(BlobContainerClient::CreateFromConnectionString(connection_str, container_name)); + return std::make_unique(BlobContainerClient::CreateFromConnectionString(connection_str, container_name, client_options)); } - template std::unique_ptr getAzureBlobStorageClientWithAuth( - const String & url, const String & container_name, const Poco::Util::AbstractConfiguration & config, const String & config_prefix) + const String & url, + const String & container_name, + const Poco::Util::AbstractConfiguration & config, + const String & config_prefix, + const Azure::Storage::Blobs::BlobClientOptions & client_options) { std::string connection_str; if (config.has(config_prefix + ".connection_string")) connection_str = config.getString(config_prefix + ".connection_string"); if (!connection_str.empty()) - return getClientWithConnectionString(connection_str, container_name); + return getClientWithConnectionString(connection_str, container_name, client_options); if (config.has(config_prefix + ".account_key") && config.has(config_prefix + ".account_name")) { @@ -174,38 +173,63 @@ std::unique_ptr getAzureBlobStorageClientWithAuth( config.getString(config_prefix + ".account_name"), config.getString(config_prefix + ".account_key") ); - return std::make_unique(url, storage_shared_key_credential); + return std::make_unique(url, storage_shared_key_credential, client_options); } auto managed_identity_credential = std::make_shared(); - return std::make_unique(url, managed_identity_credential); + return std::make_unique(url, managed_identity_credential, client_options); } +Azure::Storage::Blobs::BlobClientOptions getAzureBlobClientOptions(const Poco::Util::AbstractConfiguration & config, const String & config_prefix) +{ + Azure::Core::Http::Policies::RetryOptions retry_options; + retry_options.MaxRetries = config.getUInt(config_prefix + ".max_tries", 10); + retry_options.RetryDelay = std::chrono::milliseconds(config.getUInt(config_prefix + ".retry_initial_backoff_ms", 10)); + retry_options.MaxRetryDelay = std::chrono::milliseconds(config.getUInt(config_prefix + ".retry_initial_backoff_ms", 1000)); -std::unique_ptr getAzureBlobContainerClient( - const Poco::Util::AbstractConfiguration & config, const String & config_prefix) + using CurlOptions = Azure::Core::Http::CurlTransportOptions; + CurlOptions curl_options{.NoSignal = true}; + + if (config.has(config_prefix + ".ip_resolve")) + { + auto value = config.getString(config_prefix + ".curl_ip_resolve"); + if (value == "ipv4") + curl_options.IPResolve = CurlOptions::CURL_IPRESOLVE_V4; + else if (value == "ipv6") + curl_options.IPResolve = CurlOptions::CURL_IPRESOLVE_V6; + else + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Undexpected value for option 'curl_ip_resolve': {}. Expected one of 'ipv4' or 'ipv6'", value); + } + + Azure::Storage::Blobs::BlobClientOptions client_options; + client_options.Retry = retry_options; + client_options.Transport.Transport = std::make_shared(curl_options); + + return client_options; +} + +std::unique_ptr getAzureBlobContainerClient(const Poco::Util::AbstractConfiguration & config, const String & config_prefix) { auto endpoint = processAzureBlobStorageEndpoint(config, config_prefix); auto container_name = endpoint.container_name; auto final_url = endpoint.getEndpoint(); + auto client_options = getAzureBlobClientOptions(config, config_prefix); if (endpoint.container_already_exists.value_or(false)) - return getAzureBlobStorageClientWithAuth(final_url, container_name, config, config_prefix); + return getAzureBlobStorageClientWithAuth(final_url, container_name, config, config_prefix, client_options); - auto blob_service_client = getAzureBlobStorageClientWithAuth( - endpoint.getEndpointWithoutContainer(), container_name, config, config_prefix); + auto blob_service_client = getAzureBlobStorageClientWithAuth(endpoint.getEndpointWithoutContainer(), container_name, config, config_prefix, client_options); try { - return std::make_unique( - blob_service_client->CreateBlobContainer(container_name).Value); + return std::make_unique(blob_service_client->CreateBlobContainer(container_name).Value); } catch (const Azure::Storage::StorageException & e) { /// If container_already_exists is not set (in config), ignore already exists error. /// (Conflict - The specified container already exists) if (!endpoint.container_already_exists.has_value() && e.StatusCode == Azure::Core::Http::HttpStatusCode::Conflict) - return getAzureBlobStorageClientWithAuth(final_url, container_name, config, config_prefix); + return getAzureBlobStorageClientWithAuth(final_url, container_name, config, config_prefix, client_options); throw; } } diff --git a/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageAuth.h b/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageAuth.h index 20bf05d5ba6..84544465719 100644 --- a/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageAuth.h +++ b/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageAuth.h @@ -45,12 +45,11 @@ struct AzureBlobStorageEndpoint } }; -std::unique_ptr getAzureBlobContainerClient( - const Poco::Util::AbstractConfiguration & config, const String & config_prefix); +std::unique_ptr getAzureBlobContainerClient(const Poco::Util::AbstractConfiguration & config, const String & config_prefix); AzureBlobStorageEndpoint processAzureBlobStorageEndpoint(const Poco::Util::AbstractConfiguration & config, const String & config_prefix); -std::unique_ptr getAzureBlobStorageSettings(const Poco::Util::AbstractConfiguration & config, const String & config_prefix, ContextPtr /*context*/); +std::unique_ptr getAzureBlobStorageSettings(const Poco::Util::AbstractConfiguration & config, const String & config_prefix, ContextPtr context); } From 03916cc13fcde44aa960b075f404a49ca6cc789c Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Fri, 12 Apr 2024 18:26:30 +0000 Subject: [PATCH 14/44] fix build and update submodule --- contrib/azure | 2 +- .../ObjectStorages/AzureBlobStorage/AzureBlobStorageAuth.cpp | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/contrib/azure b/contrib/azure index e71395e44f3..d29ae0f8e3a 160000 --- a/contrib/azure +++ b/contrib/azure @@ -1 +1 @@ -Subproject commit e71395e44f309f97b5a486f5c2c59b82f85dd2d2 +Subproject commit d29ae0f8e3ae48f0e7a1771ef4f7eda41371b8a0 diff --git a/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageAuth.cpp b/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageAuth.cpp index 1639f1dd03e..b9531eaeb8c 100644 --- a/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageAuth.cpp +++ b/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageAuth.cpp @@ -185,12 +185,12 @@ Azure::Storage::Blobs::BlobClientOptions getAzureBlobClientOptions(const Poco::U Azure::Core::Http::Policies::RetryOptions retry_options; retry_options.MaxRetries = config.getUInt(config_prefix + ".max_tries", 10); retry_options.RetryDelay = std::chrono::milliseconds(config.getUInt(config_prefix + ".retry_initial_backoff_ms", 10)); - retry_options.MaxRetryDelay = std::chrono::milliseconds(config.getUInt(config_prefix + ".retry_initial_backoff_ms", 1000)); + retry_options.MaxRetryDelay = std::chrono::milliseconds(config.getUInt(config_prefix + ".retry_max_backoff_ms", 1000)); using CurlOptions = Azure::Core::Http::CurlTransportOptions; CurlOptions curl_options{.NoSignal = true}; - if (config.has(config_prefix + ".ip_resolve")) + if (config.has(config_prefix + ".curl_ip_resolve")) { auto value = config.getString(config_prefix + ".curl_ip_resolve"); if (value == "ipv4") From 9e54a833edcd9aeaa1013f5558396b387c21a38a Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Fri, 12 Apr 2024 18:41:40 +0000 Subject: [PATCH 15/44] fix build --- programs/server/Server.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index f07c2469358..daaadd1b21f 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -613,7 +613,9 @@ static void sanityChecks(Server & server) } } -static void initializeAzureSDKLogger(const ServerSettings & server_settings, int server_logs_level) +static void initializeAzureSDKLogger( + [[ maybe_unused ]] const ServerSettings & server_settings, + [[ maybe_unused ]] int server_logs_level) { #if USE_AZURE_BLOB_STORAGE if (!server_settings.enable_azure_sdk_logging) From fe52022f55611e63162603b718baab54be2d11fc Mon Sep 17 00:00:00 2001 From: Shaun Struwig <41984034+Blargian@users.noreply.github.com> Date: Sat, 13 Apr 2024 11:52:27 +0200 Subject: [PATCH 16/44] add `tanh` to aspell-dict add `tanh` to aspell-dict --- utils/check-style/aspell-ignore/en/aspell-dict.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt index c65a593c632..afbdf88ecad 100644 --- a/utils/check-style/aspell-ignore/en/aspell-dict.txt +++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt @@ -949,6 +949,7 @@ TablesLoaderForegroundThreads TablesLoaderForegroundThreadsActive TablesToDropQueueSize TargetSpecific +tanh Telegraf TemplateIgnoreSpaces TemporaryFilesForAggregation From 9cbfcf47630bf3c0944d7725c147f51e2452f512 Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Mon, 15 Apr 2024 14:32:02 +0200 Subject: [PATCH 17/44] Do not fail job on failed get_job_id_url --- tests/ci/build_download_helper.py | 6 +++++- tests/ci/env_helper.py | 27 +++++++++++++++++++-------- 2 files changed, 24 insertions(+), 9 deletions(-) diff --git a/tests/ci/build_download_helper.py b/tests/ci/build_download_helper.py index 0d89515d5d8..d540552f768 100644 --- a/tests/ci/build_download_helper.py +++ b/tests/ci/build_download_helper.py @@ -23,6 +23,10 @@ class DownloadException(Exception): pass +class APIException(Exception): + pass + + def get_with_retries( url: str, retries: int = DOWNLOAD_RETRIES_COUNT, @@ -109,7 +113,7 @@ def get_gh_api( logging.info("Exception '%s' while getting, retry %i", exc, try_cnt) time.sleep(sleep) - raise exc + raise APIException("Unable to request data from GH API") from exc def get_build_name_for_check(check_name: str) -> str: diff --git a/tests/ci/env_helper.py b/tests/ci/env_helper.py index fa09d073177..bb3271cc5bb 100644 --- a/tests/ci/env_helper.py +++ b/tests/ci/env_helper.py @@ -5,7 +5,7 @@ import os from os import path as p from typing import Tuple -from build_download_helper import get_gh_api +from build_download_helper import APIException, get_gh_api module_dir = p.abspath(p.dirname(__file__)) git_root = p.abspath(p.join(module_dir, "..", "..")) @@ -42,23 +42,35 @@ _GITHUB_JOB_URL = "" _GITHUB_JOB_API_URL = "" -def GITHUB_JOB_ID() -> str: +def GITHUB_JOB_ID(safe: bool = True) -> str: global _GITHUB_JOB_ID global _GITHUB_JOB_URL global _GITHUB_JOB_API_URL if _GITHUB_JOB_ID: return _GITHUB_JOB_ID - _GITHUB_JOB_ID, _GITHUB_JOB_URL, _GITHUB_JOB_API_URL = get_job_id_url(GITHUB_JOB) + try: + _GITHUB_JOB_ID, _GITHUB_JOB_URL, _GITHUB_JOB_API_URL = get_job_id_url( + GITHUB_JOB + ) + except APIException as e: + logging.warning("Unable to retrieve the job info from GH API: %s", e) + if not safe: + raise e return _GITHUB_JOB_ID -def GITHUB_JOB_URL() -> str: - GITHUB_JOB_ID() +def GITHUB_JOB_URL(safe: bool = True) -> str: + try: + GITHUB_JOB_ID() + except APIException: + if safe: + logging.warning("Using run URL as a fallback to not fail the job") + return GITHUB_RUN_URL return _GITHUB_JOB_URL -def GITHUB_JOB_API_URL() -> str: - GITHUB_JOB_ID() +def GITHUB_JOB_API_URL(safe: bool = True) -> str: + GITHUB_JOB_ID(safe) return _GITHUB_JOB_API_URL @@ -93,7 +105,6 @@ def get_job_id_url(job_name: str) -> Tuple[str, str, str]: ): job_id = "0" - # FIXME: until it's here, we can't move to reusable workflows if not job_url: # This is a terrible workaround for the case of another broken part of # GitHub actions. For nested workflows it doesn't provide a proper job_name From a91da66961bfb90db7849c636c498067b482d9b7 Mon Sep 17 00:00:00 2001 From: peter279k Date: Tue, 16 Apr 2024 19:57:18 +0800 Subject: [PATCH 18/44] Add the loading data with cleaning approach --- .../example-datasets/wikistat.md | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/docs/en/getting-started/example-datasets/wikistat.md b/docs/en/getting-started/example-datasets/wikistat.md index d913ccd9b31..393ee64b50d 100644 --- a/docs/en/getting-started/example-datasets/wikistat.md +++ b/docs/en/getting-started/example-datasets/wikistat.md @@ -64,3 +64,22 @@ clickhouse-local --query " WHERE length(values) = 5 FORMAT Native " | clickhouse-client --query "INSERT INTO wikistat FORMAT Native" ``` + +Or loading the cleaning data: + +``` +INSERT INTO wikistat WITH + parseDateTimeBestEffort(extract(_file, '^pageviews-([\\d\\-]+)\\.gz$')) AS time, + splitByChar(' ', line) AS values, + splitByChar('.', values[1]) AS projects +SELECT + time, + projects[1] AS project, + projects[2] AS subproject, + decodeURLComponent(values[2]) AS path, + CAST(values[3], 'UInt64') AS hits +FROM s3( + 'https://clickhouse-public-datasets.s3.amazonaws.com/wikistat/original/pageviews*.gz', + LineAsString) +WHERE length(values) >= 3 +``` From eb540fcece0325fb8c9eac468acaf3b2d79d4ffd Mon Sep 17 00:00:00 2001 From: avogar Date: Tue, 16 Apr 2024 12:36:20 +0000 Subject: [PATCH 19/44] Makr DataTypeVariant as comparable --- src/DataTypes/DataTypeVariant.h | 1 + .../0_stateless/03096_variant_in_primary_key.reference | 4 ++++ tests/queries/0_stateless/03096_variant_in_primary_key.sql | 7 +++++++ 3 files changed, 12 insertions(+) create mode 100644 tests/queries/0_stateless/03096_variant_in_primary_key.reference create mode 100644 tests/queries/0_stateless/03096_variant_in_primary_key.sql diff --git a/src/DataTypes/DataTypeVariant.h b/src/DataTypes/DataTypeVariant.h index dadc85ac3b3..ab471d37b2f 100644 --- a/src/DataTypes/DataTypeVariant.h +++ b/src/DataTypes/DataTypeVariant.h @@ -42,6 +42,7 @@ public: bool equals(const IDataType & rhs) const override; bool isParametric() const override { return true; } + bool isComparable() const override { return true; } bool haveSubtypes() const override { return true; } bool textCanContainOnlyValidUTF8() const override; bool haveMaximumSizeOfValue() const override; diff --git a/tests/queries/0_stateless/03096_variant_in_primary_key.reference b/tests/queries/0_stateless/03096_variant_in_primary_key.reference new file mode 100644 index 00000000000..c701d7d3c26 --- /dev/null +++ b/tests/queries/0_stateless/03096_variant_in_primary_key.reference @@ -0,0 +1,4 @@ +1 str_1 +1 str_2 +1 1 +1 2 diff --git a/tests/queries/0_stateless/03096_variant_in_primary_key.sql b/tests/queries/0_stateless/03096_variant_in_primary_key.sql new file mode 100644 index 00000000000..48fbc821bcc --- /dev/null +++ b/tests/queries/0_stateless/03096_variant_in_primary_key.sql @@ -0,0 +1,7 @@ +set allow_experimental_variant_type=1; +drop table if exists test; +create table test (id UInt64, v Variant(UInt64, String)) engine=MergeTree order by (id, v); +insert into test values (1, 1), (1, 'str_1'), (1, 2), (1, 'str_2'); +select * from test; +drop table test; + From 80fa1ab89a3a2a493ecd166e3f50e74b5d2a6820 Mon Sep 17 00:00:00 2001 From: jsc0218 Date: Wed, 17 Apr 2024 00:31:15 +0000 Subject: [PATCH 20/44] fix --- src/Access/ContextAccess.cpp | 34 +++++++++++++------ src/Interpreters/InterpreterCreateQuery.cpp | 13 ------- .../configs/users.d/restricted_user.xml | 1 + .../integration/test_distributed_ddl/test.py | 16 ++------- tests/integration/test_storage_s3/test.py | 1 - .../02232_allow_only_replicated_engine.sh | 4 +-- 6 files changed, 29 insertions(+), 40 deletions(-) diff --git a/src/Access/ContextAccess.cpp b/src/Access/ContextAccess.cpp index 499656e805b..2736d13e751 100644 --- a/src/Access/ContextAccess.cpp +++ b/src/Access/ContextAccess.cpp @@ -205,7 +205,7 @@ namespace } /// There is overlap between AccessType sources and table engines, so the following code avoids user granting twice. - std::vector> source_and_table_engines = { + static const std::vector> source_and_table_engines = { {AccessType::FILE, "File"}, {AccessType::URL, "URL"}, {AccessType::REMOTE, "Distributed"}, @@ -222,14 +222,31 @@ namespace {AccessType::AZURE, "AzureBlobStorage"} }; - for (const auto & source_and_table_engine : source_and_table_engines) + /// Sync SOURCE and TABLE_ENGINE, so only need to check TABLE_ENGINE later. + if (access_control.doesTableEnginesRequireGrant()) { - const auto & source = std::get<0>(source_and_table_engine); - const auto & table_engine = std::get<1>(source_and_table_engine); - if (res.isGranted(source) || res.isGranted(AccessType::TABLE_ENGINE, table_engine)) + for (const auto & source_and_table_engine : source_and_table_engines) { - res.grant(source); - res.grant(AccessType::TABLE_ENGINE, table_engine); + const auto & source = std::get<0>(source_and_table_engine); + if (res.isGranted(source)) + { + const auto & table_engine = std::get<1>(source_and_table_engine); + res.grant(AccessType::TABLE_ENGINE, table_engine); + } + } + } + else + { + /// Add TABLE_ENGINE on * and then remove TABLE_ENGINE on particular engines. + res.grant(AccessType::TABLE_ENGINE); + for (const auto & source_and_table_engine : source_and_table_engines) + { + const auto & source = std::get<0>(source_and_table_engine); + if (!res.isGranted(source)) + { + const auto & table_engine = std::get<1>(source_and_table_engine); + res.revoke(AccessType::TABLE_ENGINE, table_engine); + } } } @@ -576,9 +593,6 @@ bool ContextAccess::checkAccessImplHelper(AccessFlags flags, const Args &... arg if (flags & AccessType::CLUSTER && !access_control->doesOnClusterQueriesRequireClusterGrant()) flags &= ~AccessType::CLUSTER; - if (flags & AccessType::TABLE_ENGINE && !access_control->doesTableEnginesRequireGrant()) - flags &= ~AccessType::TABLE_ENGINE; - if (!flags) return true; diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index 4a95df4f661..acf00b5184e 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -723,13 +723,7 @@ InterpreterCreateQuery::TableProperties InterpreterCreateQuery::getTableProperti /// We have to check access rights again (in case engine was changed). if (create.storage && create.storage->engine) - { - auto source_access_type = StorageFactory::instance().getSourceAccessType(create.storage->engine->name); - const auto & access_control = getContext()->getAccessControl(); - if (source_access_type != AccessType::NONE && !access_control.doesTableEnginesRequireGrant()) - getContext()->checkAccess(source_access_type); getContext()->checkAccess(AccessType::TABLE_ENGINE, create.storage->engine->name); - } TableProperties properties; TableLockHolder as_storage_lock; @@ -1835,14 +1829,7 @@ AccessRightsElements InterpreterCreateQuery::getRequiredAccess() const required_access.emplace_back(AccessType::SELECT | AccessType::INSERT, create.to_table_id.database_name, create.to_table_id.table_name); if (create.storage && create.storage->engine) - { - auto source_access_type = StorageFactory::instance().getSourceAccessType(create.storage->engine->name); - const auto & access_control = getContext()->getAccessControl(); - /// We just need to check GRANT TABLE ENGINE for sources if grant of table engine is enabled. - if (source_access_type != AccessType::NONE && !access_control.doesTableEnginesRequireGrant()) - required_access.emplace_back(source_access_type); required_access.emplace_back(AccessType::TABLE_ENGINE, create.storage->engine->name); - } return required_access; } diff --git a/tests/integration/test_distributed_ddl/configs/users.d/restricted_user.xml b/tests/integration/test_distributed_ddl/configs/users.d/restricted_user.xml index 196249466a5..562c4553458 100644 --- a/tests/integration/test_distributed_ddl/configs/users.d/restricted_user.xml +++ b/tests/integration/test_distributed_ddl/configs/users.d/restricted_user.xml @@ -10,6 +10,7 @@ db1 + Memory diff --git a/tests/integration/test_distributed_ddl/test.py b/tests/integration/test_distributed_ddl/test.py index 09db5fc6a0e..f08c6265b82 100755 --- a/tests/integration/test_distributed_ddl/test.py +++ b/tests/integration/test_distributed_ddl/test.py @@ -291,29 +291,19 @@ def test_allowed_databases(test_cluster): instance.query("CREATE DATABASE IF NOT EXISTS db2 ON CLUSTER cluster") instance.query( - "CREATE TABLE IF NOT EXISTS db1.t1 ON CLUSTER cluster (i Int8) ENGINE = Memory" - ) - instance.query( - "CREATE TABLE IF NOT EXISTS db2.t2 ON CLUSTER cluster (i Int8) ENGINE = Memory" - ) - instance.query( - "CREATE TABLE IF NOT EXISTS t3 ON CLUSTER cluster (i Int8) ENGINE = Memory" - ) - - instance.query( - "SELECT * FROM db1.t1", + "CREATE TABLE db1.t1 ON CLUSTER cluster (i Int8) ENGINE = Memory", settings={"user": "restricted_user"}, ) with pytest.raises(Exception): instance.query( - "SELECT * FROM db2.t2", + "CREATE TABLE db2.t2 ON CLUSTER cluster (i Int8) ENGINE = Memory", settings={"user": "restricted_user"}, ) with pytest.raises(Exception): instance.query( - "SELECT * FROM t3", + "CREATE TABLE t3 ON CLUSTER cluster (i Int8) ENGINE = Memory", settings={"user": "restricted_user"}, ) diff --git a/tests/integration/test_storage_s3/test.py b/tests/integration/test_storage_s3/test.py index 72fd49a7ccc..6d5b84a8143 100644 --- a/tests/integration/test_storage_s3/test.py +++ b/tests/integration/test_storage_s3/test.py @@ -989,7 +989,6 @@ def test_predefined_connection_configuration(started_cluster): instance.query("GRANT CREATE ON *.* TO user") instance.query("GRANT SOURCES ON *.* TO user") instance.query("GRANT SELECT ON *.* TO user") - instance.query("GRANT TABLE ENGINE ON S3 TO user") instance.query(f"drop table if exists {name}", user="user") error = instance.query_and_get_error( diff --git a/tests/queries/0_stateless/02232_allow_only_replicated_engine.sh b/tests/queries/0_stateless/02232_allow_only_replicated_engine.sh index 19ac97068ae..791102b9cbd 100755 --- a/tests/queries/0_stateless/02232_allow_only_replicated_engine.sh +++ b/tests/queries/0_stateless/02232_allow_only_replicated_engine.sh @@ -9,9 +9,7 @@ ${CLICKHOUSE_CLIENT} -q "create table mute_stylecheck (x UInt32) engine = Replic ${CLICKHOUSE_CLIENT} -q "CREATE USER user_${CLICKHOUSE_DATABASE} settings database_replicated_allow_only_replicated_engine=1" ${CLICKHOUSE_CLIENT} -q "GRANT CREATE TABLE ON ${CLICKHOUSE_DATABASE}_db.* TO user_${CLICKHOUSE_DATABASE}" -${CLICKHOUSE_CLIENT} -q "GRANT TABLE ENGINE ON Memory TO user_${CLICKHOUSE_DATABASE}" -${CLICKHOUSE_CLIENT} -q "GRANT TABLE ENGINE ON MergeTree TO user_${CLICKHOUSE_DATABASE}" -${CLICKHOUSE_CLIENT} -q "GRANT TABLE ENGINE ON ReplicatedMergeTree TO user_${CLICKHOUSE_DATABASE}" +${CLICKHOUSE_CLIENT} -q "GRANT TABLE ENGINE ON Memory, TABLE ENGINE ON MergeTree, TABLE ENGINE ON ReplicatedMergeTree TO user_${CLICKHOUSE_DATABASE}" ${CLICKHOUSE_CLIENT} --allow_experimental_database_replicated=1 --query "CREATE DATABASE ${CLICKHOUSE_DATABASE}_db engine = Replicated('/clickhouse/databases/${CLICKHOUSE_TEST_ZOOKEEPER_PREFIX}/${CLICKHOUSE_DATABASE}_db', '{shard}', '{replica}')" ${CLICKHOUSE_CLIENT} --distributed_ddl_output_mode=none --user "user_${CLICKHOUSE_DATABASE}" --query "CREATE TABLE ${CLICKHOUSE_DATABASE}_db.tab_memory (x UInt32) engine = Memory;" ${CLICKHOUSE_CLIENT} --distributed_ddl_output_mode=none --user "user_${CLICKHOUSE_DATABASE}" -n --query "CREATE TABLE ${CLICKHOUSE_DATABASE}_db.tab_mt (x UInt32) engine = MergeTree order by x;" 2>&1 | grep -o "Only tables with a Replicated engine" From 1c0d104b482e01e9a99d6c1a92cf4f1c34cec51f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Wed, 17 Apr 2024 01:31:40 +0100 Subject: [PATCH 21/44] Fix argMin/argMax combinator state --- ...gregateFunctionCombinatorsArgMinArgMax.cpp | 4 ++-- .../03127_argMin_combinator_state.reference | 12 ++++++++++ .../03127_argMin_combinator_state.sql | 22 +++++++++++++++++++ 3 files changed, 36 insertions(+), 2 deletions(-) create mode 100644 tests/queries/0_stateless/03127_argMin_combinator_state.reference create mode 100644 tests/queries/0_stateless/03127_argMin_combinator_state.sql diff --git a/src/AggregateFunctions/Combinators/AggregateFunctionCombinatorsArgMinArgMax.cpp b/src/AggregateFunctions/Combinators/AggregateFunctionCombinatorsArgMinArgMax.cpp index 71c2bab6f6b..a1716f18725 100644 --- a/src/AggregateFunctions/Combinators/AggregateFunctionCombinatorsArgMinArgMax.cpp +++ b/src/AggregateFunctions/Combinators/AggregateFunctionCombinatorsArgMinArgMax.cpp @@ -68,9 +68,9 @@ public: String getName() const override { if constexpr (isMin) - return "ArgMin"; + return nested_function->getName() + "ArgMin"; else - return "ArgMax"; + return nested_function->getName() + "ArgMax"; } bool isState() const override { return nested_function->isState(); } diff --git a/tests/queries/0_stateless/03127_argMin_combinator_state.reference b/tests/queries/0_stateless/03127_argMin_combinator_state.reference new file mode 100644 index 00000000000..33482fd5fbf --- /dev/null +++ b/tests/queries/0_stateless/03127_argMin_combinator_state.reference @@ -0,0 +1,12 @@ +AggregateFunction(sumArgMin, UInt64, UInt64) +54 +0 45 +1 46 +2 47 +3 48 +4 49 +5 50 +6 51 +7 52 +8 53 +9 54 diff --git a/tests/queries/0_stateless/03127_argMin_combinator_state.sql b/tests/queries/0_stateless/03127_argMin_combinator_state.sql new file mode 100644 index 00000000000..2eb209ed510 --- /dev/null +++ b/tests/queries/0_stateless/03127_argMin_combinator_state.sql @@ -0,0 +1,22 @@ +SELECT toTypeName(sumArgMinState(number, number)) FROM numbers(1); +SELECT sumArgMinState(number, number) AS a FROM numbers(3) FORMAT Null; + +DROP TABLE IF EXISTS argmax_comb; +CREATE TABLE argmax_comb( + id UInt64, + state AggregateFunction(avgArgMax, Float64, UInt64) + ) + ENGINE=MergeTree() ORDER BY tuple(); +INSERT INTO argmax_comb + SELECT + CAST(number % 10, 'UInt64') AS id, + avgArgMaxState(CAST(number, 'Float64'), id) + FROM numbers(100) + GROUP BY id; +SELECT avgArgMaxMerge(state) FROM argmax_comb; +SELECT + id, + avgArgMaxMerge(state) +FROM argmax_comb +GROUP BY id +ORDER BY id ASC; \ No newline at end of file From b9925ecce0f50fe813a1ab0b41c1b256711d4a0b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Wed, 17 Apr 2024 11:40:20 +0100 Subject: [PATCH 22/44] Add test with projections --- ...128_argMin_combinator_projection.reference | 0 .../03128_argMin_combinator_projection.sql | 73 +++++++++++++++++++ 2 files changed, 73 insertions(+) create mode 100644 tests/queries/0_stateless/03128_argMin_combinator_projection.reference create mode 100644 tests/queries/0_stateless/03128_argMin_combinator_projection.sql diff --git a/tests/queries/0_stateless/03128_argMin_combinator_projection.reference b/tests/queries/0_stateless/03128_argMin_combinator_projection.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/03128_argMin_combinator_projection.sql b/tests/queries/0_stateless/03128_argMin_combinator_projection.sql new file mode 100644 index 00000000000..f0a7f025c95 --- /dev/null +++ b/tests/queries/0_stateless/03128_argMin_combinator_projection.sql @@ -0,0 +1,73 @@ +DROP TABLE IF EXISTS combinator_argMin_table_r1 SYNC; +DROP TABLE IF EXISTS combinator_argMin_table_r2 SYNC; + +CREATE TABLE combinator_argMin_table_r1 +( + `id` Int32, + `value` Int32, + `agg_time` DateTime, + PROJECTION first_items + ( + SELECT + id, + minArgMin(agg_time, value), + maxArgMax(agg_time, value) + GROUP BY id + ) +) +ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/test_03128/combinator_argMin_table', 'r1') +ORDER BY (id); + +INSERT INTO combinator_argMin_table_r1 + SELECT + number % 10 as id, + number as value, + '01-01-2024 00:00:00' + INTERVAL number DAY + FROM + numbers(100); + +INSERT INTO combinator_argMin_table_r1 + SELECT + number % 10 as id, + number * 10 as value, + '01-01-2024 00:00:00' + INTERVAL number DAY + FROM + numbers(100); + +SELECT + id, + minArgMin(agg_time, value), + maxArgMax(agg_time, value) +FROM combinator_argMin_table_r1 +GROUP BY id +ORDER BY id +SETTINGS force_optimize_projection=1; + +-- We check replication by creating another replica +CREATE TABLE combinator_argMin_table_r2 +( + `id` Int32, + `value` Int32, + `agg_time` DateTime, + PROJECTION first_items + ( + SELECT + id, + minArgMin(agg_time, value), + maxArgMax(agg_time, value) + GROUP BY id + ) +) +ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/test_03128/combinator_argMin_table', 'r2') +ORDER BY (id); + +SYSTEM SYNC REPLICA combinator_argMin_table_r2; + +SELECT + id, + minArgMin(agg_time, value), + maxArgMax(agg_time, value) +FROM combinator_argMin_table_r2 +GROUP BY id +ORDER BY id +SETTINGS force_optimize_projection=1; \ No newline at end of file From c83fc0b2805767cf08885e1730bae19432f5d25f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Wed, 17 Apr 2024 13:05:27 +0100 Subject: [PATCH 23/44] Missing ref --- ...128_argMin_combinator_projection.reference | 20 +++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/tests/queries/0_stateless/03128_argMin_combinator_projection.reference b/tests/queries/0_stateless/03128_argMin_combinator_projection.reference index e69de29bb2d..ec9160308f2 100644 --- a/tests/queries/0_stateless/03128_argMin_combinator_projection.reference +++ b/tests/queries/0_stateless/03128_argMin_combinator_projection.reference @@ -0,0 +1,20 @@ +0 2024-01-01 00:00:00 2024-03-31 00:00:00 +1 2024-01-02 00:00:00 2024-04-01 00:00:00 +2 2024-01-03 00:00:00 2024-04-02 00:00:00 +3 2024-01-04 00:00:00 2024-04-03 00:00:00 +4 2024-01-05 00:00:00 2024-04-04 00:00:00 +5 2024-01-06 00:00:00 2024-04-05 00:00:00 +6 2024-01-07 00:00:00 2024-04-06 00:00:00 +7 2024-01-08 00:00:00 2024-04-07 00:00:00 +8 2024-01-09 00:00:00 2024-04-08 00:00:00 +9 2024-01-10 00:00:00 2024-04-09 00:00:00 +0 2024-01-01 00:00:00 2024-03-31 00:00:00 +1 2024-01-02 00:00:00 2024-04-01 00:00:00 +2 2024-01-03 00:00:00 2024-04-02 00:00:00 +3 2024-01-04 00:00:00 2024-04-03 00:00:00 +4 2024-01-05 00:00:00 2024-04-04 00:00:00 +5 2024-01-06 00:00:00 2024-04-05 00:00:00 +6 2024-01-07 00:00:00 2024-04-06 00:00:00 +7 2024-01-08 00:00:00 2024-04-07 00:00:00 +8 2024-01-09 00:00:00 2024-04-08 00:00:00 +9 2024-01-10 00:00:00 2024-04-09 00:00:00 From 861f42ea376eb1c38b0ec1a9a4aff8810cda46d0 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Wed, 17 Apr 2024 13:14:55 +0000 Subject: [PATCH 24/44] fix azure submodule --- contrib/azure | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/azure b/contrib/azure index e71395e44f3..ad2d3d42356 160000 --- a/contrib/azure +++ b/contrib/azure @@ -1 +1 @@ -Subproject commit e71395e44f309f97b5a486f5c2c59b82f85dd2d2 +Subproject commit ad2d3d423565b8a8e7b7ec863eae9318a8283878 From 952f23a4164614b7233318d50d4c68aa1a150da7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Wed, 17 Apr 2024 14:48:48 +0000 Subject: [PATCH 25/44] Unify a batch of tests --- tests/queries/0_stateless/00121_drop_column_zookeeper.sql | 3 ++- tests/queries/0_stateless/00155_long_merges.sh | 2 +- .../0_stateless/00215_primary_key_order_zookeeper_long.sql | 3 ++- ...0226_zookeeper_deduplication_and_unexpected_parts_long.sql | 3 ++- .../00236_replicated_drop_on_non_leader_zookeeper_long.sql | 3 ++- .../00446_clear_column_in_partition_concurrent_zookeeper.sh | 3 ++- .../00446_clear_column_in_partition_zookeeper_long.sql | 3 ++- .../00502_custom_partitioning_replicated_zookeeper_long.sql | 3 ++- .../00509_extended_storage_definition_syntax_zookeeper.sql | 4 +++- .../00516_deduplication_after_drop_partition_zookeeper.sql | 1 - 10 files changed, 18 insertions(+), 10 deletions(-) diff --git a/tests/queries/0_stateless/00121_drop_column_zookeeper.sql b/tests/queries/0_stateless/00121_drop_column_zookeeper.sql index ed1f654f847..915551aa84f 100644 --- a/tests/queries/0_stateless/00121_drop_column_zookeeper.sql +++ b/tests/queries/0_stateless/00121_drop_column_zookeeper.sql @@ -1,5 +1,6 @@ --- Tags: zookeeper, no-replicated-database +-- Tags: zookeeper, no-replicated-database, no-shared-merge-tree -- Tag no-replicated-database: Old syntax is not allowed +-- no-shared-merge-tree: implemented replacement DROP TABLE IF EXISTS alter_00121 SYNC; set allow_deprecated_syntax_for_merge_tree=1; diff --git a/tests/queries/0_stateless/00155_long_merges.sh b/tests/queries/0_stateless/00155_long_merges.sh index 8ecca0aeb42..7c0bdd217c9 100755 --- a/tests/queries/0_stateless/00155_long_merges.sh +++ b/tests/queries/0_stateless/00155_long_merges.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: long, no-debug +# Tags: long, no-debug, no-asan, no-tsan, no-msan CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/00215_primary_key_order_zookeeper_long.sql b/tests/queries/0_stateless/00215_primary_key_order_zookeeper_long.sql index f031ae7f54f..218f8de919d 100644 --- a/tests/queries/0_stateless/00215_primary_key_order_zookeeper_long.sql +++ b/tests/queries/0_stateless/00215_primary_key_order_zookeeper_long.sql @@ -1,5 +1,6 @@ --- Tags: long, zookeeper, no-replicated-database +-- Tags: long, zookeeper, no-replicated-database, no-shared-merge-tree -- Tag no-replicated-database: Old syntax is not allowed +-- no-shared-merge-tree: implemented replacement DROP TABLE IF EXISTS primary_key; set allow_deprecated_syntax_for_merge_tree=1; diff --git a/tests/queries/0_stateless/00226_zookeeper_deduplication_and_unexpected_parts_long.sql b/tests/queries/0_stateless/00226_zookeeper_deduplication_and_unexpected_parts_long.sql index 727c793efb0..8968f83de17 100644 --- a/tests/queries/0_stateless/00226_zookeeper_deduplication_and_unexpected_parts_long.sql +++ b/tests/queries/0_stateless/00226_zookeeper_deduplication_and_unexpected_parts_long.sql @@ -1,5 +1,6 @@ --- Tags: long, zookeeper, no-replicated-database +-- Tags: long, zookeeper, no-replicated-database, no-shared-merge-tree -- Tag no-replicated-database: Old syntax is not allowed +-- no-shared-merge-tree: implemented replacement DROP TABLE IF EXISTS deduplication; set allow_deprecated_syntax_for_merge_tree=1; diff --git a/tests/queries/0_stateless/00236_replicated_drop_on_non_leader_zookeeper_long.sql b/tests/queries/0_stateless/00236_replicated_drop_on_non_leader_zookeeper_long.sql index 78319c3edd4..aa5d7e10b4f 100644 --- a/tests/queries/0_stateless/00236_replicated_drop_on_non_leader_zookeeper_long.sql +++ b/tests/queries/0_stateless/00236_replicated_drop_on_non_leader_zookeeper_long.sql @@ -1,5 +1,6 @@ --- Tags: long, replica, no-replicated-database +-- Tags: long, replica, no-replicated-database, no-shared-merge-tree -- Tag no-replicated-database: Old syntax is not allowed +-- no-shared-merge-tree: implemented replacement SET replication_alter_partitions_sync = 2; diff --git a/tests/queries/0_stateless/00446_clear_column_in_partition_concurrent_zookeeper.sh b/tests/queries/0_stateless/00446_clear_column_in_partition_concurrent_zookeeper.sh index eee84aa7754..2d95c4e46c2 100755 --- a/tests/queries/0_stateless/00446_clear_column_in_partition_concurrent_zookeeper.sh +++ b/tests/queries/0_stateless/00446_clear_column_in_partition_concurrent_zookeeper.sh @@ -1,6 +1,7 @@ #!/usr/bin/env bash -# Tags: zookeeper, no-replicated-database +# Tags: zookeeper, no-replicated-database, no-shared-merge-tree # Tag no-replicated-database: Old syntax is not allowed +# no-shared-merge-tree -- old syntax CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/00446_clear_column_in_partition_zookeeper_long.sql b/tests/queries/0_stateless/00446_clear_column_in_partition_zookeeper_long.sql index c820cce11af..5427119f40c 100644 --- a/tests/queries/0_stateless/00446_clear_column_in_partition_zookeeper_long.sql +++ b/tests/queries/0_stateless/00446_clear_column_in_partition_zookeeper_long.sql @@ -1,4 +1,5 @@ --- Tags: long, zookeeper +-- Tags: long, zookeeper, no-shared-merge-tree +-- no-shared-merge-tree: different sychronization, replaced with another test SELECT '===Ordinary case==='; diff --git a/tests/queries/0_stateless/00502_custom_partitioning_replicated_zookeeper_long.sql b/tests/queries/0_stateless/00502_custom_partitioning_replicated_zookeeper_long.sql index a5e33bffb0d..06484f53dad 100644 --- a/tests/queries/0_stateless/00502_custom_partitioning_replicated_zookeeper_long.sql +++ b/tests/queries/0_stateless/00502_custom_partitioning_replicated_zookeeper_long.sql @@ -1,4 +1,5 @@ --- Tags: long, replica +-- Tags: long, replica, no-shared-merge-tree +-- no-shared-merge-tree: different synchronization SET replication_alter_partitions_sync = 2; SET insert_keeper_fault_injection_probability=0; diff --git a/tests/queries/0_stateless/00509_extended_storage_definition_syntax_zookeeper.sql b/tests/queries/0_stateless/00509_extended_storage_definition_syntax_zookeeper.sql index 3f322c8ce18..cd5fef883fe 100644 --- a/tests/queries/0_stateless/00509_extended_storage_definition_syntax_zookeeper.sql +++ b/tests/queries/0_stateless/00509_extended_storage_definition_syntax_zookeeper.sql @@ -1,4 +1,6 @@ --- Tags: zookeeper +-- Tags: zookeeper, no-shared-merge-tree +-- Tag no-parallel: leftovers +-- no-shared-merge-tree: boring test, nothing new SET optimize_on_insert = 0; diff --git a/tests/queries/0_stateless/00516_deduplication_after_drop_partition_zookeeper.sql b/tests/queries/0_stateless/00516_deduplication_after_drop_partition_zookeeper.sql index 24e581bb201..fb996684d65 100644 --- a/tests/queries/0_stateless/00516_deduplication_after_drop_partition_zookeeper.sql +++ b/tests/queries/0_stateless/00516_deduplication_after_drop_partition_zookeeper.sql @@ -1,7 +1,6 @@ -- Tags: zookeeper DROP TABLE IF EXISTS deduplication_by_partition; -set allow_deprecated_syntax_for_merge_tree=1; CREATE TABLE deduplication_by_partition(d Date, x UInt32) ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/test_00516/deduplication_by_partition', 'r1') order by x partition by toYYYYMM(d); From 6b09b1f8977e294af9d23910a37ec27bb53059ac Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Wed, 17 Apr 2024 14:56:00 +0200 Subject: [PATCH 26/44] Use GitHub in ci.py --- tests/ci/ci.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/ci/ci.py b/tests/ci/ci.py index f11d62e9136..33a5059b537 100644 --- a/tests/ci/ci.py +++ b/tests/ci/ci.py @@ -17,7 +17,7 @@ from typing import Any, Dict, List, Optional, Sequence, Set, Tuple, Union import docker_images_helper import upload_result_helper from build_check import get_release_or_pr -from ci_config import CI_CONFIG, Build, CIStages, Labels, JobNames +from ci_config import CI_CONFIG, Build, CIStages, JobNames, Labels from ci_utils import GHActions, is_hex, normalize_string from clickhouse_helper import ( CiLogsCredentials, @@ -49,7 +49,7 @@ from env_helper import ( from get_robot_token import get_best_robot_token from git_helper import GIT_PREFIX, Git from git_helper import Runner as GitRunner -from github import Github +from github_helper import GitHub from pr_info import PRInfo from report import ERROR, SUCCESS, BuildResult, JobReport from s3_helper import S3Helper @@ -1504,7 +1504,7 @@ def _update_gh_statuses_action(indata: Dict, s3: S3Helper) -> None: # create GH status pr_info = PRInfo() - commit = get_commit(Github(get_best_robot_token(), per_page=100), pr_info.sha) + commit = get_commit(GitHub(get_best_robot_token(), per_page=100), pr_info.sha) def _concurrent_create_status(job: str, batch: int, num_batches: int) -> None: job_status = ci_cache.get_successful(job, batch, num_batches) @@ -1994,7 +1994,7 @@ def main() -> int: else: # this is a test job - check if GH commit status or cache record is present commit = get_commit( - Github(get_best_robot_token(), per_page=100), pr_info.sha + GitHub(get_best_robot_token(), per_page=100), pr_info.sha ) # rerun helper check @@ -2110,7 +2110,7 @@ def main() -> int: additional_urls=additional_urls or None, ) commit = get_commit( - Github(get_best_robot_token(), per_page=100), pr_info.sha + GitHub(get_best_robot_token(), per_page=100), pr_info.sha ) post_commit_status( commit, From 5c4c2c16dd45be761817f34de8a0107d72c6da3c Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Wed, 17 Apr 2024 22:23:41 +0200 Subject: [PATCH 27/44] Convert PRInfo.is_* to properties --- tests/ci/build_report_check.py | 2 +- tests/ci/ci.py | 36 ++++++++++++++++---------------- tests/ci/commit_status_helper.py | 4 ++-- tests/ci/docker_server.py | 9 ++++---- tests/ci/finish_check.py | 2 +- tests/ci/jepsen_check.py | 2 +- tests/ci/pr_info.py | 13 +++++++++--- tests/ci/run_check.py | 4 ++-- tests/ci/style_check.py | 2 +- 9 files changed, 41 insertions(+), 33 deletions(-) diff --git a/tests/ci/build_report_check.py b/tests/ci/build_report_check.py index 48640f15ac0..c1dd2910788 100644 --- a/tests/ci/build_report_check.py +++ b/tests/ci/build_report_check.py @@ -50,7 +50,7 @@ def main(): builds_for_check = CI_CONFIG.get_builds_for_report( build_check_name, - release=pr_info.is_release(), + release=pr_info.is_release, backport=pr_info.head_ref.startswith("backport/"), ) required_builds = len(builds_for_check) diff --git a/tests/ci/ci.py b/tests/ci/ci.py index 33a5059b537..24d6d95bd0d 100644 --- a/tests/ci/ci.py +++ b/tests/ci/ci.py @@ -770,7 +770,7 @@ class CiOptions: res = CiOptions() pr_info = PRInfo() if ( - not pr_info.is_pr() and not debug_message + not pr_info.is_pr and not debug_message ): # if commit_message is provided it's test/debug scenario - do not return # CI options can be configured in PRs only # if debug_message is provided - it's a test @@ -1218,19 +1218,19 @@ def _mark_success_action( if job_config.run_always or job_config.run_by_label: print(f"Job [{job}] runs always or by label in CI - do not cache") else: - if pr_info.is_master(): + if pr_info.is_master: pass # delete method is disabled for ci_cache. need it? # pending enabled for master branch jobs only # ci_cache.delete_pending(job, batch, num_batches, release_branch=True) if job_status and job_status.is_ok(): ci_cache.push_successful( - job, batch, num_batches, job_status, pr_info.is_release_branch() + job, batch, num_batches, job_status, pr_info.is_release_branch ) print(f"Job [{job}] is ok") elif job_status and not job_status.is_ok(): ci_cache.push_failed( - job, batch, num_batches, job_status, pr_info.is_release_branch() + job, batch, num_batches, job_status, pr_info.is_release_branch ) print(f"Job [{job}] is failed with status [{job_status.status}]") else: @@ -1238,7 +1238,7 @@ def _mark_success_action( description="dummy description", status=ERROR, report_url="dummy url" ) ci_cache.push_failed( - job, batch, num_batches, job_status, pr_info.is_release_branch() + job, batch, num_batches, job_status, pr_info.is_release_branch ) print(f"No CommitStatusData for [{job}], push dummy failure to ci_cache") @@ -1354,9 +1354,9 @@ def _configure_jobs( batches_to_do: List[int] = [] add_to_skip = False - if job_config.pr_only and pr_info.is_release_branch(): + if job_config.pr_only and pr_info.is_release_branch: continue - if job_config.release_only and not pr_info.is_release_branch(): + if job_config.release_only and not pr_info.is_release_branch: continue # fill job randomization buckets (for jobs with configured @random_bucket property)) @@ -1379,7 +1379,7 @@ def _configure_jobs( job, batch, num_batches, - release_branch=pr_info.is_release_branch() + release_branch=pr_info.is_release_branch and job_config.required_on_release_branch, ): # ci cache is enabled and job is not in the cache - add @@ -1390,7 +1390,7 @@ def _configure_jobs( job, batch, num_batches, - release_branch=pr_info.is_release_branch() + release_branch=pr_info.is_release_branch and job_config.required_on_release_branch, ): if job in jobs_to_wait: @@ -1413,7 +1413,7 @@ def _configure_jobs( # treat job as being skipped only if it's controlled by digest jobs_to_skip.append(job) - if not pr_info.is_release_branch(): + if not pr_info.is_release_branch: # randomization bucket filtering (pick one random job from each bucket, for jobs with configured random_bucket property) for _, jobs in randomization_buckets.items(): jobs_to_remove_randomization = set() @@ -1435,7 +1435,7 @@ def _configure_jobs( jobs_to_do, jobs_to_skip, jobs_params ) - if pr_info.is_merge_queue(): + if pr_info.is_merge_queue: # FIXME: Quick support for MQ workflow which is only StyleCheck for now jobs_to_do = [JobNames.STYLE_CHECK] jobs_to_skip = [] @@ -1551,7 +1551,7 @@ def _fetch_commit_tokens(message: str, pr_info: PRInfo) -> List[str]: ] print(f"CI modifyers from commit message: [{res}]") res_2 = [] - if pr_info.is_pr(): + if pr_info.is_pr: matches = [match[-1] for match in re.findall(pattern, pr_info.body)] res_2 = [ match @@ -1626,7 +1626,7 @@ def _upload_build_artifacts( # Upload head master binaries static_bin_name = CI_CONFIG.build_config[build_name].static_binary_name - if pr_info.is_master() and static_bin_name: + if pr_info.is_master and static_bin_name: # Full binary with debug info: s3_path_full = "/".join((pr_info.base_ref, static_bin_name, "clickhouse-full")) binary_full = Path(job_report.build_dir_for_upload) / "clickhouse" @@ -1908,11 +1908,11 @@ def main() -> int: if not args.skip_jobs: ci_cache = CiCache(s3, jobs_data["digests"]) - if pr_info.is_master(): + if pr_info.is_master: # wait for pending jobs to be finished, await_jobs is a long blocking call # wait pending jobs (for now only on release/master branches) ready_jobs_batches_dict = ci_cache.await_jobs( - jobs_data.get("jobs_to_wait", {}), pr_info.is_release_branch() + jobs_data.get("jobs_to_wait", {}), pr_info.is_release_branch ) jobs_to_do = jobs_data["jobs_to_do"] jobs_to_skip = jobs_data["jobs_to_skip"] @@ -1929,7 +1929,7 @@ def main() -> int: del jobs_params[job] # set planned jobs as pending in the CI cache if on the master - if pr_info.is_master(): + if pr_info.is_master: for job in jobs_data["jobs_to_do"]: config = CI_CONFIG.get_job_config(job) if config.run_always or config.run_by_label: @@ -1939,7 +1939,7 @@ def main() -> int: job, job_params["batches"], config.num_batches, - release_branch=pr_info.is_release_branch(), + release_branch=pr_info.is_release_branch, ) if "jobs_to_wait" in jobs_data: @@ -2121,7 +2121,7 @@ def main() -> int: pr_info, dump_to_file=True, ) - if not pr_info.is_merge_queue(): + if not pr_info.is_merge_queue: # in the merge queue mergeable status must be set only in FinishCheck (last job in wf) update_mergeable_check( commit, diff --git a/tests/ci/commit_status_helper.py b/tests/ci/commit_status_helper.py index 56728c3d3ba..2ee526bdd39 100644 --- a/tests/ci/commit_status_helper.py +++ b/tests/ci/commit_status_helper.py @@ -149,7 +149,7 @@ def set_status_comment(commit: Commit, pr_info: PRInfo) -> None: one, so the method does nothing for simple pushes and pull requests with `release`/`release-lts` labels""" - if pr_info.is_merge_queue(): + if pr_info.is_merge_queue: # skip report creation for the MQ return @@ -448,7 +448,7 @@ def update_mergeable_check(commit: Commit, pr_info: PRInfo, check_name: str) -> ) # FIXME: For now, always set mergeable check in the Merge Queue. It's required to pass MQ - if not_run and not pr_info.is_merge_queue(): + if not_run and not pr_info.is_merge_queue: # Let's avoid unnecessary work return diff --git a/tests/ci/docker_server.py b/tests/ci/docker_server.py index 230f3e56110..151cc5a4c02 100644 --- a/tests/ci/docker_server.py +++ b/tests/ci/docker_server.py @@ -362,7 +362,7 @@ def main(): del args.image_repo del args.push - if pr_info.is_master(): + if pr_info.is_master: push = True image = DockerImageData(image_path, image_repo, False) @@ -374,9 +374,10 @@ def main(): for arch, build_name in zip(ARCH, ("package_release", "package_aarch64")): if not args.bucket_prefix: - repo_urls[ - arch - ] = f"{S3_DOWNLOAD}/{S3_BUILDS_BUCKET}/{release_or_pr}/{pr_info.sha}/{build_name}" + repo_urls[arch] = ( + f"{S3_DOWNLOAD}/{S3_BUILDS_BUCKET}/" + f"{release_or_pr}/{pr_info.sha}/{build_name}" + ) else: repo_urls[arch] = f"{args.bucket_prefix}/{build_name}" if args.allow_build_reuse: diff --git a/tests/ci/finish_check.py b/tests/ci/finish_check.py index 617f4c9d88c..79926b33dc0 100644 --- a/tests/ci/finish_check.py +++ b/tests/ci/finish_check.py @@ -28,7 +28,7 @@ def main(): statuses = get_commit_filtered_statuses(commit) trigger_mergeable_check(commit, statuses) - if not pr_info.is_merge_queue(): + if not pr_info.is_merge_queue: statuses = [s for s in statuses if s.context == CI_STATUS_NAME] if not statuses: return diff --git a/tests/ci/jepsen_check.py b/tests/ci/jepsen_check.py index 011ecff635e..6ed411a11ef 100644 --- a/tests/ci/jepsen_check.py +++ b/tests/ci/jepsen_check.py @@ -200,7 +200,7 @@ def main(): # always use latest docker_image = KEEPER_IMAGE_NAME if args.program == "keeper" else SERVER_IMAGE_NAME - if pr_info.is_scheduled() or pr_info.is_dispatched(): + if pr_info.is_scheduled or pr_info.is_dispatched: # get latest clcikhouse by the static link for latest master buit - get its version and provide permanent url for this version to the jepsen build_url = f"{S3_URL}/{S3_BUILDS_BUCKET}/master/amd64/clickhouse" download_build_with_progress(build_url, Path(TEMP_PATH) / "clickhouse") diff --git a/tests/ci/pr_info.py b/tests/ci/pr_info.py index 293004fc4f3..c61e62f334c 100644 --- a/tests/ci/pr_info.py +++ b/tests/ci/pr_info.py @@ -310,27 +310,34 @@ class PRInfo: if need_changed_files: self.fetch_changed_files() + @property def is_master(self) -> bool: return self.number == 0 and self.head_ref == "master" + @property def is_release(self) -> bool: return self.number == 0 and bool( re.match(r"^2[1-9]\.[1-9][0-9]*$", self.head_ref) ) + @property def is_release_branch(self) -> bool: return self.number == 0 + @property def is_pr(self): return self.event_type == EventType.PULL_REQUEST - def is_scheduled(self): + @property + def is_scheduled(self) -> bool: return self.event_type == EventType.SCHEDULE - def is_merge_queue(self): + @property + def is_merge_queue(self) -> bool: return self.event_type == EventType.MERGE_QUEUE - def is_dispatched(self): + @property + def is_dispatched(self) -> bool: return self.event_type == EventType.DISPATCH def compare_pr_url(self, pr_object: dict) -> str: diff --git a/tests/ci/run_check.py b/tests/ci/run_check.py index 435a5f726f2..262786d8228 100644 --- a/tests/ci/run_check.py +++ b/tests/ci/run_check.py @@ -8,6 +8,7 @@ from github import Github # isort: on +from cherry_pick import Labels from commit_status_helper import ( CI_STATUS_NAME, create_ci_report, @@ -26,7 +27,6 @@ from lambda_shared_package.lambda_shared.pr import ( ) from pr_info import PRInfo from report import FAILURE, PENDING, SUCCESS -from cherry_pick import Labels TRUSTED_ORG_IDS = { 54801242, # clickhouse @@ -202,7 +202,7 @@ def main(): ci_report_url = create_ci_report(pr_info, []) print("::notice ::Can run") - if not pr_info.is_merge_queue(): + if not pr_info.is_merge_queue: # we need clean CI status for MQ to merge (no pending statuses) post_commit_status( commit, diff --git a/tests/ci/style_check.py b/tests/ci/style_check.py index 4580f007606..d49cd283e9f 100644 --- a/tests/ci/style_check.py +++ b/tests/ci/style_check.py @@ -132,7 +132,7 @@ def main(): pr_info = PRInfo() - if pr_info.is_merge_queue() and args.push: + if pr_info.is_merge_queue and args.push: print("Auto style fix will be disabled for Merge Queue workflow") args.push = False From 666b269d679fab8e8cf77a5a2266a89dc0f9d33b Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Wed, 17 Apr 2024 22:37:27 +0200 Subject: [PATCH 28/44] Fix workflow dependencies after #62556 --- .github/workflows/pull_request.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml index 476cdd57e18..c2e76de5e14 100644 --- a/.github/workflows/pull_request.yml +++ b/.github/workflows/pull_request.yml @@ -97,7 +97,7 @@ jobs: # for main CI chain # Builds_1: - needs: [RunConfig, FastTest] + needs: [RunConfig, StyleCheck, FastTest] if: ${{ !failure() && !cancelled() && contains(fromJson(needs.RunConfig.outputs.data).stages_data.stages_to_do, 'Builds_1') }} # using callable wf (reusable_stage.yml) allows to group all nested jobs under a tab uses: ./.github/workflows/reusable_build_stage.yml From be0f0f4909029e8e90db895983af7ab4f8ec839b Mon Sep 17 00:00:00 2001 From: Igor Markelov Date: Wed, 17 Apr 2024 21:56:21 +0000 Subject: [PATCH 29/44] Fix typo in exception explanation --- src/Columns/IColumnUnique.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Columns/IColumnUnique.h b/src/Columns/IColumnUnique.h index a455ddb155a..f71f19a5da6 100644 --- a/src/Columns/IColumnUnique.h +++ b/src/Columns/IColumnUnique.h @@ -154,7 +154,7 @@ public: void updatePermutation(PermutationSortDirection, PermutationSortStability, size_t, int, Permutation &, EqualRanges &) const override { - throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method getPermutation is not supported for ColumnUnique."); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method updatePermutation is not supported for ColumnUnique."); } std::vector scatter(IColumn::ColumnIndex, const IColumn::Selector &) const override From 6e5afb8b89c67ff96df5f0bb22417688195c4777 Mon Sep 17 00:00:00 2001 From: jsc0218 Date: Thu, 18 Apr 2024 02:00:28 +0000 Subject: [PATCH 30/44] grant all table engines in allow_database --- src/Access/UsersConfigAccessStorage.cpp | 1 + tests/integration/test_dictionaries_ddl/configs/user_default.xml | 1 - .../test_distributed_ddl/configs/users.d/restricted_user.xml | 1 - 3 files changed, 1 insertion(+), 2 deletions(-) diff --git a/src/Access/UsersConfigAccessStorage.cpp b/src/Access/UsersConfigAccessStorage.cpp index b4b843fc77e..4144ecae15f 100644 --- a/src/Access/UsersConfigAccessStorage.cpp +++ b/src/Access/UsersConfigAccessStorage.cpp @@ -371,6 +371,7 @@ namespace if (databases) { user->access.revoke(AccessFlags::allFlags() - AccessFlags::allGlobalFlags()); + user->access.grantWithGrantOption(AccessType::TABLE_ENGINE); user->access.grantWithGrantOption(AccessFlags::allDictionaryFlags(), IDictionary::NO_DATABASE_TAG); for (const String & database : *databases) user->access.grantWithGrantOption(AccessFlags::allFlags(), database); diff --git a/tests/integration/test_dictionaries_ddl/configs/user_default.xml b/tests/integration/test_dictionaries_ddl/configs/user_default.xml index 6851f1c3a70..bf004102329 100644 --- a/tests/integration/test_dictionaries_ddl/configs/user_default.xml +++ b/tests/integration/test_dictionaries_ddl/configs/user_default.xml @@ -4,7 +4,6 @@ default test - Log diff --git a/tests/integration/test_distributed_ddl/configs/users.d/restricted_user.xml b/tests/integration/test_distributed_ddl/configs/users.d/restricted_user.xml index 562c4553458..196249466a5 100644 --- a/tests/integration/test_distributed_ddl/configs/users.d/restricted_user.xml +++ b/tests/integration/test_distributed_ddl/configs/users.d/restricted_user.xml @@ -10,7 +10,6 @@ db1 - Memory From 6afdce0b5959ee46b339a4d13dceeca75fcff2bd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Thu, 18 Apr 2024 11:06:37 +0100 Subject: [PATCH 31/44] Update 00509_extended_storage_definition_syntax_zookeeper.sql Remove old comment --- .../00509_extended_storage_definition_syntax_zookeeper.sql | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/queries/0_stateless/00509_extended_storage_definition_syntax_zookeeper.sql b/tests/queries/0_stateless/00509_extended_storage_definition_syntax_zookeeper.sql index cd5fef883fe..c0b70f87de8 100644 --- a/tests/queries/0_stateless/00509_extended_storage_definition_syntax_zookeeper.sql +++ b/tests/queries/0_stateless/00509_extended_storage_definition_syntax_zookeeper.sql @@ -1,5 +1,4 @@ -- Tags: zookeeper, no-shared-merge-tree --- Tag no-parallel: leftovers -- no-shared-merge-tree: boring test, nothing new SET optimize_on_insert = 0; From 98d47e4ef5f542743c4c26283440c1d66ad27997 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Thu, 18 Apr 2024 12:12:14 +0200 Subject: [PATCH 32/44] Update src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageAuth.cpp Co-authored-by: SmitaRKulkarni --- .../ObjectStorages/AzureBlobStorage/AzureBlobStorageAuth.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageAuth.cpp b/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageAuth.cpp index b9531eaeb8c..557a59dea66 100644 --- a/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageAuth.cpp +++ b/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageAuth.cpp @@ -198,7 +198,7 @@ Azure::Storage::Blobs::BlobClientOptions getAzureBlobClientOptions(const Poco::U else if (value == "ipv6") curl_options.IPResolve = CurlOptions::CURL_IPRESOLVE_V6; else - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Undexpected value for option 'curl_ip_resolve': {}. Expected one of 'ipv4' or 'ipv6'", value); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unexpected value for option 'curl_ip_resolve': {}. Expected one of 'ipv4' or 'ipv6'", value); } Azure::Storage::Blobs::BlobClientOptions client_options; From 69cc3c9e8e24de14132cabb2c9c36abb48ed476c Mon Sep 17 00:00:00 2001 From: Dmitry Novik Date: Thu, 18 Apr 2024 13:02:55 +0200 Subject: [PATCH 33/44] Analyzer: Fix exception message --- src/Analyzer/Passes/QueryAnalysisPass.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Analyzer/Passes/QueryAnalysisPass.cpp b/src/Analyzer/Passes/QueryAnalysisPass.cpp index f45ca4bc31c..5faf8dd97c0 100644 --- a/src/Analyzer/Passes/QueryAnalysisPass.cpp +++ b/src/Analyzer/Passes/QueryAnalysisPass.cpp @@ -5174,8 +5174,8 @@ ProjectionNames QueryAnalyzer::resolveLambda(const QueryTreeNodePtr & lambda_nod throw Exception(ErrorCodes::BAD_ARGUMENTS, "Lambda {} expect {} arguments. Actual {}. In scope {}", lambda_to_resolve.formatASTForErrorMessage(), - arguments_size, lambda_arguments_nodes_size, + arguments_size, scope.scope_node->formatASTForErrorMessage()); /// Initialize aliases in lambda scope From d6a7fa315e8692603478b37ed4351d1209d1e86c Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Thu, 18 Apr 2024 14:44:38 +0000 Subject: [PATCH 34/44] fix crash in function mergeTreeIndex --- .../MergeTree/MergeTreeReaderCompact.cpp | 1 - src/Storages/StorageMergeTreeIndex.cpp | 16 ++++++++++++++-- .../03128_merge_tree_index_lazy_load.reference | 8 ++++++++ .../03128_merge_tree_index_lazy_load.sql | 16 ++++++++++++++++ 4 files changed, 38 insertions(+), 3 deletions(-) create mode 100644 tests/queries/0_stateless/03128_merge_tree_index_lazy_load.reference create mode 100644 tests/queries/0_stateless/03128_merge_tree_index_lazy_load.sql diff --git a/src/Storages/MergeTree/MergeTreeReaderCompact.cpp b/src/Storages/MergeTree/MergeTreeReaderCompact.cpp index a22bff6b8d2..53acfd539fb 100644 --- a/src/Storages/MergeTree/MergeTreeReaderCompact.cpp +++ b/src/Storages/MergeTree/MergeTreeReaderCompact.cpp @@ -204,7 +204,6 @@ void MergeTreeReaderCompact::readPrefix( else serialization = getSerializationInPart(name_and_type); - deserialize_settings.getter = buffer_getter; serialization->deserializeBinaryBulkStatePrefix(deserialize_settings, deserialize_binary_bulk_state_map[name_and_type.name]); } diff --git a/src/Storages/StorageMergeTreeIndex.cpp b/src/Storages/StorageMergeTreeIndex.cpp index 329275f4605..9ecefc5a3dc 100644 --- a/src/Storages/StorageMergeTreeIndex.cpp +++ b/src/Storages/StorageMergeTreeIndex.cpp @@ -68,8 +68,8 @@ protected: const auto & part_name_column = StorageMergeTreeIndex::part_name_column; const auto & mark_number_column = StorageMergeTreeIndex::mark_number_column; const auto & rows_in_granule_column = StorageMergeTreeIndex::rows_in_granule_column; - const auto & index = part->getIndex(); + Columns result_columns(num_columns); for (size_t pos = 0; pos < num_columns; ++pos) { @@ -79,7 +79,19 @@ protected: if (index_header.has(column_name)) { size_t index_position = index_header.getPositionByName(column_name); - result_columns[pos] = index[index_position]; + + /// Some of the columns from suffix of primary index may be not loaded + /// according to setting 'primary_key_ratio_of_unique_prefix_values_to_skip_suffix_columns'. + if (index_position < index.size()) + { + result_columns[pos] = index[index_position]; + } + else + { + const auto & index_type = index_header.getByPosition(index_position).type; + auto index_column = index_type->createColumnConstWithDefaultValue(num_rows); + result_columns[pos] = index_column->convertToFullColumnIfConst(); + } } else if (column_name == part_name_column.name) { diff --git a/tests/queries/0_stateless/03128_merge_tree_index_lazy_load.reference b/tests/queries/0_stateless/03128_merge_tree_index_lazy_load.reference new file mode 100644 index 00000000000..022457178ec --- /dev/null +++ b/tests/queries/0_stateless/03128_merge_tree_index_lazy_load.reference @@ -0,0 +1,8 @@ +0 0 0 +1 4 4 +2 8 8 +3 9 9 +0 0 0 +1 4 0 +2 8 0 +3 9 0 diff --git a/tests/queries/0_stateless/03128_merge_tree_index_lazy_load.sql b/tests/queries/0_stateless/03128_merge_tree_index_lazy_load.sql new file mode 100644 index 00000000000..19f00e7dcad --- /dev/null +++ b/tests/queries/0_stateless/03128_merge_tree_index_lazy_load.sql @@ -0,0 +1,16 @@ +DROP TABLE IF EXISTS t_index_lazy_load; + +CREATE TABLE t_index_lazy_load (a UInt64, b UInt64) +ENGINE = MergeTree ORDER BY (a, b) +SETTINGS index_granularity = 4, primary_key_ratio_of_unique_prefix_values_to_skip_suffix_columns = 0.5; + +INSERT INTO t_index_lazy_load SELECT number, number FROM numbers(10); + +SELECT mark_number, a, b FROM mergeTreeIndex(currentDatabase(), t_index_lazy_load) ORDER BY mark_number; + +DETACH TABLE t_index_lazy_load; +ATTACH TABLE t_index_lazy_load; + +SELECT mark_number, a, b FROM mergeTreeIndex(currentDatabase(), t_index_lazy_load) ORDER BY mark_number; + +DROP TABLE t_index_lazy_load; From 8caa2dfb222b22d76268c3a40b07b6c78571aade Mon Sep 17 00:00:00 2001 From: Peter Date: Thu, 18 Apr 2024 23:48:48 +0800 Subject: [PATCH 35/44] Be consistency for creating and inserting SQL --- docs/en/getting-started/example-datasets/wikistat.md | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/docs/en/getting-started/example-datasets/wikistat.md b/docs/en/getting-started/example-datasets/wikistat.md index 393ee64b50d..befcf7aecdd 100644 --- a/docs/en/getting-started/example-datasets/wikistat.md +++ b/docs/en/getting-started/example-datasets/wikistat.md @@ -40,8 +40,7 @@ CREATE TABLE wikistat project LowCardinality(String), subproject LowCardinality(String), path String CODEC(ZSTD(3)), - hits UInt64 CODEC(ZSTD(3)), - size UInt64 CODEC(ZSTD(3)) + hits UInt64 CODEC(ZSTD(3)) ) ENGINE = MergeTree ORDER BY (path, time); @@ -58,8 +57,7 @@ clickhouse-local --query " values[1] AS project, values[2] AS subproject, values[3] AS path, - (values[4])::UInt64 AS hits, - (values[5])::UInt64 AS size + (values[4])::UInt64 AS hits FROM file('pageviews*.gz', LineAsString) WHERE length(values) = 5 FORMAT Native " | clickhouse-client --query "INSERT INTO wikistat FORMAT Native" From 1dd97b4c150b2382991e4e4fa9fe9b69addafe70 Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Thu, 18 Apr 2024 16:12:41 +0200 Subject: [PATCH 36/44] Fix the output for shellcheck --- utils/check-style/check_shell.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/check-style/check_shell.sh b/utils/check-style/check_shell.sh index 94f06220f88..095aeed3d3d 100755 --- a/utils/check-style/check_shell.sh +++ b/utils/check-style/check_shell.sh @@ -5,7 +5,7 @@ cd /ClickHouse/utils/check-style || echo -e "failure\tRepo not found" > /test_ou start_total=$(date +%s) start=$(date +%s) -./shellcheck-run.sh |& tee /test_output/shellcheck.txt +./shellcheck-run.sh |& tee /test_output/shellcheck_output.txt runtime=$(($(date +%s)-start)) echo "Check shellcheck. Done. $runtime seconds." From 63d2f75f05114d5ae28e75802def75b06f8c8e4a Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Thu, 18 Apr 2024 16:25:33 +0200 Subject: [PATCH 37/44] Fix accumulated shellcheck issues --- docker/test/stateful/run.sh | 2 +- docker/test/stress/run.sh | 2 +- .../0_stateless/02864_restore_table_with_broken_part.sh | 5 +++-- ..._from_old_backup_with_matview_inner_table_metadata.sh | 5 +++-- tests/queries/0_stateless/03032_async_backup_restore.sh | 9 +++++---- .../03032_dynamically_resize_filesystem_cache_2.sh | 1 - tests/queries/0_stateless/03094_recursive_type_proto.sh | 2 +- 7 files changed, 14 insertions(+), 12 deletions(-) diff --git a/docker/test/stateful/run.sh b/docker/test/stateful/run.sh index c2e9fdfe41d..6e19e57ed53 100755 --- a/docker/test/stateful/run.sh +++ b/docker/test/stateful/run.sh @@ -25,7 +25,7 @@ azurite-blob --blobHost 0.0.0.0 --blobPort 10000 --debug /azurite_log & config_logs_export_cluster /etc/clickhouse-server/config.d/system_logs_export.yaml cache_policy="" -if [ $(($RANDOM%2)) -eq 1 ]; then +if [ $((RANDOM % 2)) -eq 1 ]; then cache_policy="SLRU" else cache_policy="LRU" diff --git a/docker/test/stress/run.sh b/docker/test/stress/run.sh index 81cc61c90bc..4caa9e48885 100644 --- a/docker/test/stress/run.sh +++ b/docker/test/stress/run.sh @@ -72,7 +72,7 @@ mv /var/log/clickhouse-server/clickhouse-server.log /var/log/clickhouse-server/c # Randomize cache policies. cache_policy="" -if [ $(($RANDOM%2)) -eq 1 ]; then +if [ $((RANDOM % 2)) -eq 1 ]; then cache_policy="SLRU" else cache_policy="LRU" diff --git a/tests/queries/0_stateless/02864_restore_table_with_broken_part.sh b/tests/queries/0_stateless/02864_restore_table_with_broken_part.sh index fe26784dab4..08313e2fd3b 100755 --- a/tests/queries/0_stateless/02864_restore_table_with_broken_part.sh +++ b/tests/queries/0_stateless/02864_restore_table_with_broken_part.sh @@ -12,10 +12,11 @@ function install_test_backup() local test_backup_filename="$1" local test_backup_path="$CURDIR/backups/${test_backup_filename}" - local backups_disk_root=$($CLICKHOUSE_CLIENT --query "SELECT path FROM system.disks WHERE name='backups'") + local backups_disk_root + backups_disk_root=$($CLICKHOUSE_CLIENT --query "SELECT path FROM system.disks WHERE name='backups'") if [ -z "${backups_disk_root}" ]; then - echo Disk \'${backups_disk_root}\' not found + echo "Disk '${backups_disk_root}' not found" exit 1 fi diff --git a/tests/queries/0_stateless/03001_restore_from_old_backup_with_matview_inner_table_metadata.sh b/tests/queries/0_stateless/03001_restore_from_old_backup_with_matview_inner_table_metadata.sh index 3a3d0edc38f..8d987dbf1df 100755 --- a/tests/queries/0_stateless/03001_restore_from_old_backup_with_matview_inner_table_metadata.sh +++ b/tests/queries/0_stateless/03001_restore_from_old_backup_with_matview_inner_table_metadata.sh @@ -12,10 +12,11 @@ function install_test_backup() local test_backup_filename="$1" local test_backup_path="$CURDIR/backups/${test_backup_filename}" - local backups_disk_root=$($CLICKHOUSE_CLIENT --query "SELECT path FROM system.disks WHERE name='backups'") + local backups_disk_root + backups_disk_root=$($CLICKHOUSE_CLIENT --query "SELECT path FROM system.disks WHERE name='backups'") if [ -z "${backups_disk_root}" ]; then - echo Disk \'${backups_disk_root}\' not found + echo "Disk '${backups_disk_root}' not found" exit 1 fi diff --git a/tests/queries/0_stateless/03032_async_backup_restore.sh b/tests/queries/0_stateless/03032_async_backup_restore.sh index 81fe12bb0f1..14a8ef7f417 100755 --- a/tests/queries/0_stateless/03032_async_backup_restore.sh +++ b/tests/queries/0_stateless/03032_async_backup_restore.sh @@ -15,7 +15,7 @@ function start_async() { local command="$1" local first_column="s/^\([^\t]*\)\t.*/\1/" - echo $(${CLICKHOUSE_CLIENT} --query "$command" | sed "${first_column}") + ${CLICKHOUSE_CLIENT} --query "$command" | sed "${first_column}" } function wait_status() @@ -25,7 +25,8 @@ function wait_status() local timeout=60 local start=$EPOCHSECONDS while true; do - local current_status=$(${CLICKHOUSE_CLIENT} --query "SELECT status FROM system.backups WHERE id='${operation_id}'") + local current_status + current_status=$(${CLICKHOUSE_CLIENT} --query "SELECT status FROM system.backups WHERE id='${operation_id}'") if [ "${current_status}" == "${expected_status}" ]; then echo "${current_status}" break @@ -41,11 +42,11 @@ function wait_status() # Making a backup. backup_name="Disk('backups', '${CLICKHOUSE_TEST_UNIQUE_NAME}')" backup_operation_id=$(start_async "BACKUP TABLE tbl TO ${backup_name} ASYNC") -wait_status ${backup_operation_id} "BACKUP_CREATED" +wait_status "${backup_operation_id}" "BACKUP_CREATED" # Restoring from that backup. restore_operation_id=$(start_async "RESTORE TABLE tbl AS tbl2 FROM ${backup_name} ASYNC") -wait_status ${restore_operation_id} "RESTORED" +wait_status "${restore_operation_id}" "RESTORED" # Check the result of that restoration. ${CLICKHOUSE_CLIENT} --query "SELECT * FROM tbl2" diff --git a/tests/queries/0_stateless/03032_dynamically_resize_filesystem_cache_2.sh b/tests/queries/0_stateless/03032_dynamically_resize_filesystem_cache_2.sh index 79c43048b89..526c4f84030 100755 --- a/tests/queries/0_stateless/03032_dynamically_resize_filesystem_cache_2.sh +++ b/tests/queries/0_stateless/03032_dynamically_resize_filesystem_cache_2.sh @@ -19,7 +19,6 @@ prev_max_size=$($CLICKHOUSE_CLIENT --query "SELECT max_size FROM system.filesyst $CLICKHOUSE_CLIENT --query "SELECT current_size > 0 FROM system.filesystem_cache_settings WHERE cache_name = '$disk_name' FORMAT TabSeparated" config_path=/etc/clickhouse-server/config.d/storage_conf.xml -config_path_tmp=$config_path.tmp new_max_size=$($CLICKHOUSE_CLIENT --query "SELECT divide(max_size, 2) FROM system.filesystem_cache_settings WHERE cache_name = '$disk_name'") sed -i "s|$prev_max_size<\/max_size>|$new_max_size<\/max_size>|" $config_path diff --git a/tests/queries/0_stateless/03094_recursive_type_proto.sh b/tests/queries/0_stateless/03094_recursive_type_proto.sh index 98a1b54ff9e..9e26f8674cd 100755 --- a/tests/queries/0_stateless/03094_recursive_type_proto.sh +++ b/tests/queries/0_stateless/03094_recursive_type_proto.sh @@ -5,5 +5,5 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -SCHEMADIR=$CURDIR/format_schemas +SCHEMADIR="$CUR_DIR/format_schemas" $CLICKHOUSE_LOCAL -q "DESCRIBE TABLE file('nonexist', 'Protobuf') SETTINGS format_schema='$SCHEMADIR/03094_recursive_type.proto:Struct'" |& grep -c CANNOT_PARSE_PROTOBUF_SCHEMA From 2ad89ed96c276e2edb3e2d2d798ca1772df5227f Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Thu, 18 Apr 2024 17:23:34 +0100 Subject: [PATCH 38/44] add language tags to parts of code --- docs/en/getting-started/example-datasets/wikistat.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/en/getting-started/example-datasets/wikistat.md b/docs/en/getting-started/example-datasets/wikistat.md index befcf7aecdd..e7132d27e86 100644 --- a/docs/en/getting-started/example-datasets/wikistat.md +++ b/docs/en/getting-started/example-datasets/wikistat.md @@ -13,7 +13,7 @@ And the presentation: https://presentations.clickhouse.com/fosdem2023/ Data source: https://dumps.wikimedia.org/other/pageviews/ Getting the list of links: -``` +``` shell for i in {2015..2023}; do for j in {01..12}; do echo "${i}-${j}" >&2 @@ -24,7 +24,7 @@ done | sort | uniq | tee links.txt ``` Downloading the data: -``` +``` shell sed -r 's!pageviews-([0-9]{4})([0-9]{2})[0-9]{2}-[0-9]+\.gz!https://dumps.wikimedia.org/other/pageviews/\1/\1-\2/\0!' \ links.txt | xargs -P3 wget --continue ``` @@ -48,7 +48,7 @@ ORDER BY (path, time); Loading the data: -``` +``` shell clickhouse-local --query " WITH replaceRegexpOne(_path, '^.+pageviews-(\\d{4})(\\d{2})(\\d{2})-(\\d{2})(\\d{2})(\\d{2}).gz$', '\1-\2-\3 \4-\5-\6')::DateTime AS time, extractGroups(line, '^([^ \\.]+)(\\.[^ ]+)? +([^ ]+) +(\\d+) +(\\d+)$') AS values @@ -65,7 +65,7 @@ clickhouse-local --query " Or loading the cleaning data: -``` +``` sql INSERT INTO wikistat WITH parseDateTimeBestEffort(extract(_file, '^pageviews-([\\d\\-]+)\\.gz$')) AS time, splitByChar(' ', line) AS values, From ab46df2cad1ff41901ba946f06f5ec9134dfdcc9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Thu, 18 Apr 2024 17:47:03 +0200 Subject: [PATCH 39/44] Fix the 03094_recursive_type_proto test --- tests/queries/0_stateless/03094_recursive_type_proto.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/03094_recursive_type_proto.sh b/tests/queries/0_stateless/03094_recursive_type_proto.sh index 9e26f8674cd..d867729f1a5 100755 --- a/tests/queries/0_stateless/03094_recursive_type_proto.sh +++ b/tests/queries/0_stateless/03094_recursive_type_proto.sh @@ -6,4 +6,4 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . "$CUR_DIR"/../shell_config.sh SCHEMADIR="$CUR_DIR/format_schemas" -$CLICKHOUSE_LOCAL -q "DESCRIBE TABLE file('nonexist', 'Protobuf') SETTINGS format_schema='$SCHEMADIR/03094_recursive_type.proto:Struct'" |& grep -c CANNOT_PARSE_PROTOBUF_SCHEMA +$CLICKHOUSE_LOCAL -q "DESCRIBE TABLE file('nonexist', 'Protobuf') SETTINGS format_schema='$SCHEMADIR/03094_recursive_type.proto:Struct'" |& grep -c BAD_ARGUMENTS From 20ae9fdf6b03c32e3ad0b409d39f6744e6671644 Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Fri, 19 Apr 2024 10:58:47 +0200 Subject: [PATCH 40/44] Add forgotten "raise" on non-safe GITHUB_JOB_URL --- tests/ci/env_helper.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/ci/env_helper.py b/tests/ci/env_helper.py index bb3271cc5bb..155a1acaca5 100644 --- a/tests/ci/env_helper.py +++ b/tests/ci/env_helper.py @@ -66,6 +66,8 @@ def GITHUB_JOB_URL(safe: bool = True) -> str: if safe: logging.warning("Using run URL as a fallback to not fail the job") return GITHUB_RUN_URL + raise + return _GITHUB_JOB_URL From 58bf18999d46b9562ccf48db62490475d6c7d004 Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Fri, 19 Apr 2024 11:43:33 +0200 Subject: [PATCH 41/44] Add isort configuration to not spoil imports --- pyproject.toml | 4 ++++ tests/ci/artifacts_helper.py | 3 --- tests/ci/build_download_helper.py | 3 --- tests/ci/commit_status_helper.py | 3 --- tests/ci/compatibility_check.py | 3 --- tests/ci/docker_images_check.py | 11 +++++------ tests/ci/docker_manifests_merge.py | 3 --- tests/ci/finish_check.py | 3 --- tests/ci/merge_pr.py | 3 --- tests/ci/performance_comparison_check.py | 3 --- tests/ci/pr_info.py | 4 ---- tests/ci/run_check.py | 3 --- tests/sqllogic/runner.py | 3 --- tests/sqllogic/test_parser.py | 15 ++++++++------- 14 files changed, 17 insertions(+), 47 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 3d05abd9ec2..279d077a695 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -7,6 +7,10 @@ max-branches=50 max-nested-blocks=10 max-statements=200 +[tool.isort] +profile = "black" +src_paths = ["src", "tests/ci", "tests/sqllogic"] + [tool.pylint.FORMAT] #ignore-long-lines = (# )??$ diff --git a/tests/ci/artifacts_helper.py b/tests/ci/artifacts_helper.py index 5feca927a96..37abf0bdefb 100644 --- a/tests/ci/artifacts_helper.py +++ b/tests/ci/artifacts_helper.py @@ -10,11 +10,8 @@ from pathlib import Path from shutil import copy2 from typing import List, Optional, Union -# isort: off from github.Commit import Commit -# isort: on - from build_download_helper import download_build_with_progress from commit_status_helper import post_commit_status from compress_files import SUFFIX, compress_fast, decompress_fast diff --git a/tests/ci/build_download_helper.py b/tests/ci/build_download_helper.py index 0d89515d5d8..3f33b58c774 100644 --- a/tests/ci/build_download_helper.py +++ b/tests/ci/build_download_helper.py @@ -8,11 +8,8 @@ import time from pathlib import Path from typing import Any, Callable, List, Optional, Union -# isort: off import requests -# isort: on - import get_robot_token as grt # we need an updated ROBOT_TOKEN from ci_config import CI_CONFIG diff --git a/tests/ci/commit_status_helper.py b/tests/ci/commit_status_helper.py index 2ee526bdd39..98d355742d6 100644 --- a/tests/ci/commit_status_helper.py +++ b/tests/ci/commit_status_helper.py @@ -9,7 +9,6 @@ from dataclasses import asdict, dataclass from pathlib import Path from typing import Dict, List, Optional, Union -# isort: off from github import Github from github.Commit import Commit from github.CommitStatus import CommitStatus @@ -18,8 +17,6 @@ from github.GithubObject import NotSet from github.IssueComment import IssueComment from github.Repository import Repository -# isort: on - from ci_config import CHECK_DESCRIPTIONS, REQUIRED_CHECKS, CheckDescription from env_helper import GITHUB_REPOSITORY, GITHUB_RUN_URL, TEMP_PATH from pr_info import SKIP_MERGEABLE_CHECK_LABEL, PRInfo diff --git a/tests/ci/compatibility_check.py b/tests/ci/compatibility_check.py index 5e980660749..e7fee827320 100644 --- a/tests/ci/compatibility_check.py +++ b/tests/ci/compatibility_check.py @@ -8,11 +8,8 @@ import sys from pathlib import Path from typing import List, Tuple -# isort: off from pip._vendor.packaging.version import Version -# isort: on - from build_download_helper import download_builds_filter from docker_images_helper import DockerImage, get_docker_image, pull_image from env_helper import REPORT_PATH, TEMP_PATH diff --git a/tests/ci/docker_images_check.py b/tests/ci/docker_images_check.py index b04a3975545..786a529e0a9 100644 --- a/tests/ci/docker_images_check.py +++ b/tests/ci/docker_images_check.py @@ -8,11 +8,8 @@ import time from pathlib import Path from typing import List, Optional, Tuple -# isort: off from github import Github -# isort: on - from clickhouse_helper import ClickHouseHelper, prepare_tests_results_for_clickhouse from commit_status_helper import format_description, get_commit, post_commit_status from docker_images_helper import DockerImageData, docker_login, get_images_oredered_list @@ -225,9 +222,11 @@ def main(): parent_version = ( None if not image.parent - else image_tags[image.parent] - if not args.suffix - else f"{image_tags[image.parent]}-{args.suffix}" + else ( + image_tags[image.parent] + if not args.suffix + else f"{image_tags[image.parent]}-{args.suffix}" + ) ) res = process_single_image( diff --git a/tests/ci/docker_manifests_merge.py b/tests/ci/docker_manifests_merge.py index 3c122545735..6c6a88330ea 100644 --- a/tests/ci/docker_manifests_merge.py +++ b/tests/ci/docker_manifests_merge.py @@ -8,11 +8,8 @@ import subprocess import sys from typing import List, Tuple -# isort: off from github import Github -# isort: on - from clickhouse_helper import ClickHouseHelper, prepare_tests_results_for_clickhouse from commit_status_helper import format_description, get_commit, post_commit_status from docker_images_helper import docker_login, get_images_oredered_list diff --git a/tests/ci/finish_check.py b/tests/ci/finish_check.py index 79926b33dc0..a433cf25aa0 100644 --- a/tests/ci/finish_check.py +++ b/tests/ci/finish_check.py @@ -1,11 +1,8 @@ #!/usr/bin/env python3 import logging -# isort: off from github import Github -# isort: on - from commit_status_helper import ( CI_STATUS_NAME, get_commit, diff --git a/tests/ci/merge_pr.py b/tests/ci/merge_pr.py index cc92fe4f42c..450ece62d4b 100644 --- a/tests/ci/merge_pr.py +++ b/tests/ci/merge_pr.py @@ -9,13 +9,10 @@ from os import getenv from pprint import pformat from typing import Dict, List -# isort: off from github.PaginatedList import PaginatedList from github.PullRequestReview import PullRequestReview from github.WorkflowRun import WorkflowRun -# isort: on - from commit_status_helper import get_commit_filtered_statuses from get_robot_token import get_best_robot_token from github_helper import GitHub, NamedUser, PullRequest, Repository diff --git a/tests/ci/performance_comparison_check.py b/tests/ci/performance_comparison_check.py index c238fbae603..0c779b515bd 100644 --- a/tests/ci/performance_comparison_check.py +++ b/tests/ci/performance_comparison_check.py @@ -9,11 +9,8 @@ import sys import traceback from pathlib import Path -# isort: off from github import Github -# isort: on - from build_download_helper import download_builds_filter from ci_config import CI_CONFIG from clickhouse_helper import get_instance_id, get_instance_type diff --git a/tests/ci/pr_info.py b/tests/ci/pr_info.py index c61e62f334c..f134487c0a2 100644 --- a/tests/ci/pr_info.py +++ b/tests/ci/pr_info.py @@ -6,12 +6,8 @@ import re from typing import Dict, List, Set, Union from urllib.parse import quote -# isort: off -# for some reason this line moves to the end from unidiff import PatchSet # type: ignore -# isort: on - from build_download_helper import get_gh_api from env_helper import ( GITHUB_EVENT_PATH, diff --git a/tests/ci/run_check.py b/tests/ci/run_check.py index 262786d8228..204f7b0ece1 100644 --- a/tests/ci/run_check.py +++ b/tests/ci/run_check.py @@ -3,11 +3,8 @@ import logging import sys from typing import Tuple -# isort: off from github import Github -# isort: on - from cherry_pick import Labels from commit_status_helper import ( CI_STATUS_NAME, diff --git a/tests/sqllogic/runner.py b/tests/sqllogic/runner.py index 2e8e098a099..c4eb0574481 100755 --- a/tests/sqllogic/runner.py +++ b/tests/sqllogic/runner.py @@ -10,11 +10,8 @@ import multiprocessing import os from functools import reduce -# isort: off from deepdiff import DeepDiff # pylint:disable=import-error; for style check -# isort: on - from connection import Engines, default_clickhouse_odbc_conn_str, setup_connection from test_runner import RequestType, Status, TestRunner diff --git a/tests/sqllogic/test_parser.py b/tests/sqllogic/test_parser.py index 648fa9f6bf6..c0abcaecd25 100755 --- a/tests/sqllogic/test_parser.py +++ b/tests/sqllogic/test_parser.py @@ -7,14 +7,10 @@ from functools import reduce from hashlib import md5 from itertools import chain -# isort: off # pylint:disable=import-error; for style check import sqlglot from sqlglot.expressions import ColumnDef, PrimaryKeyColumnConstraint -# pylint:enable=import-error; for style check -# isort: on - from exceptions import ( DataResultDiffer, Error, @@ -23,6 +19,9 @@ from exceptions import ( QueryExecutionError, ) +# pylint:enable=import-error; for style check + + logger = logging.getLogger("parser") logger.setLevel(logging.DEBUG) @@ -474,9 +473,11 @@ class QueryResult: f"values_count: {self.values_count}" if self.values_count else "", f"data_hash: {self.data_hash}" if self.data_hash else "", f"exception: {self.exception}" if self.exception else "", - f"hash_threshold: {self.hash_threshold}" - if self.hash_threshold - else "", + ( + f"hash_threshold: {self.hash_threshold}" + if self.hash_threshold + else "" + ), ] if x ) From eb23de35618366a0c262556e324b195f3e891e9a Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Fri, 19 Apr 2024 11:45:12 +0200 Subject: [PATCH 42/44] Fail the build reports on non-success status to restart on the next attempt --- tests/ci/ci.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/ci/ci.py b/tests/ci/ci.py index 24d6d95bd0d..3d712e9d662 100644 --- a/tests/ci/ci.py +++ b/tests/ci/ci.py @@ -2121,6 +2121,8 @@ def main() -> int: pr_info, dump_to_file=True, ) + if job_report.status != SUCCESS: + exit_code = 1 if not pr_info.is_merge_queue: # in the merge queue mergeable status must be set only in FinishCheck (last job in wf) update_mergeable_check( From c9e41251a66b2f22a4b19ed2b5ac0d0e4c18265c Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Fri, 19 Apr 2024 14:27:10 +0200 Subject: [PATCH 43/44] Fail build_report jobs on non-success statuses --- tests/ci/build_report_check.py | 3 ++- tests/ci/ci.py | 2 -- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/tests/ci/build_report_check.py b/tests/ci/build_report_check.py index c1dd2910788..cc8e226e495 100644 --- a/tests/ci/build_report_check.py +++ b/tests/ci/build_report_check.py @@ -139,7 +139,8 @@ def main(): additional_files=[report_path], ).dump() - if summary_status == ERROR: + # We should fail the report job to rerun it in the following attempts + if summary_status != SUCCESS: sys.exit(1) diff --git a/tests/ci/ci.py b/tests/ci/ci.py index 3d712e9d662..24d6d95bd0d 100644 --- a/tests/ci/ci.py +++ b/tests/ci/ci.py @@ -2121,8 +2121,6 @@ def main() -> int: pr_info, dump_to_file=True, ) - if job_report.status != SUCCESS: - exit_code = 1 if not pr_info.is_merge_queue: # in the merge queue mergeable status must be set only in FinishCheck (last job in wf) update_mergeable_check( From e5ca964808b48c61f7201b0552b2eb5af9fcc631 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A1nos=20Benjamin=20Antal?= Date: Sat, 20 Apr 2024 10:42:27 +0000 Subject: [PATCH 44/44] Add client.id to librdkafka logs --- src/Storages/Kafka/StorageKafka.cpp | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/src/Storages/Kafka/StorageKafka.cpp b/src/Storages/Kafka/StorageKafka.cpp index 7845ccb02b0..03a30d47d91 100644 --- a/src/Storages/Kafka/StorageKafka.cpp +++ b/src/Storages/Kafka/StorageKafka.cpp @@ -915,11 +915,22 @@ void StorageKafka::updateGlobalConfiguration(cppkafka::Configuration & kafka_con #endif // USE_KRB5 // No need to add any prefix, messages can be distinguished - kafka_config.set_log_callback([this](cppkafka::KafkaHandleBase &, int level, const std::string & facility, const std::string & message) - { - auto [poco_level, client_logs_level] = parseSyslogLevel(level); - LOG_IMPL(log, client_logs_level, poco_level, "[rdk:{}] {}", facility, message); - }); + kafka_config.set_log_callback( + [this](cppkafka::KafkaHandleBase & handle, int level, const std::string & facility, const std::string & message) + { + auto [poco_level, client_logs_level] = parseSyslogLevel(level); + const auto & kafka_object_config = handle.get_configuration(); + const std::string client_id_key{"client.id"}; + chassert(kafka_object_config.has_property(client_id_key) && "Kafka configuration doesn't have expected client.id set"); + LOG_IMPL( + log, + client_logs_level, + poco_level, + "[client.id:{}] [rdk:{}] {}", + kafka_object_config.get(client_id_key), + facility, + message); + }); /// NOTE: statistics should be consumed, otherwise it creates too much /// entries in the queue, that leads to memory leak and slow shutdown.