From dacdbe469eba03d3fdb3bf28c9d1457b0d9d83d1 Mon Sep 17 00:00:00 2001 From: pufit Date: Thu, 30 Mar 2023 18:15:08 -0400 Subject: [PATCH 01/71] GRANT CURRENT GRANTS implementation --- .../Access/InterpreterGrantQuery.cpp | 7 +++++++ src/Parsers/Access/ASTGrantQuery.cpp | 2 ++ src/Parsers/Access/ASTGrantQuery.h | 2 ++ src/Parsers/Access/ParserGrantQuery.cpp | 16 +++++++++++++--- 4 files changed, 24 insertions(+), 3 deletions(-) diff --git a/src/Interpreters/Access/InterpreterGrantQuery.cpp b/src/Interpreters/Access/InterpreterGrantQuery.cpp index f7e170965e2..65810977a10 100644 --- a/src/Interpreters/Access/InterpreterGrantQuery.cpp +++ b/src/Interpreters/Access/InterpreterGrantQuery.cpp @@ -366,6 +366,13 @@ BlockIO InterpreterGrantQuery::execute() AccessRightsElements elements_to_grant, elements_to_revoke; collectAccessRightsElementsToGrantOrRevoke(query, elements_to_grant, elements_to_revoke); + if (query.current_grants) + { + AccessRights new_rights(elements_to_grant); + new_rights.makeIntersection(*current_user_access->getAccessRightsWithImplicit()); + elements_to_grant = new_rights.getElements(); + } + std::vector roles_to_grant; RolesOrUsersSet roles_to_revoke; collectRolesToGrantOrRevoke(access_control, query, roles_to_grant, roles_to_revoke); diff --git a/src/Parsers/Access/ASTGrantQuery.cpp b/src/Parsers/Access/ASTGrantQuery.cpp index 1c86f175fad..331f1798d77 100644 --- a/src/Parsers/Access/ASTGrantQuery.cpp +++ b/src/Parsers/Access/ASTGrantQuery.cpp @@ -147,6 +147,8 @@ void ASTGrantQuery::formatImpl(const FormatSettings & settings, FormatState &, F "ASTGrantQuery can contain either roles or access rights elements " "to grant or revoke, not both of them"); } + else if (current_grants) + settings.ostr << (settings.hilite ? hilite_keyword : "") << " CURRENT GRANTS" << (settings.hilite ? hilite_none : ""); else formatElementsWithoutOptions(access_rights_elements, settings); diff --git a/src/Parsers/Access/ASTGrantQuery.h b/src/Parsers/Access/ASTGrantQuery.h index 8c7df3cd57e..2ccbac3dac8 100644 --- a/src/Parsers/Access/ASTGrantQuery.h +++ b/src/Parsers/Access/ASTGrantQuery.h @@ -26,6 +26,8 @@ public: bool admin_option = false; bool replace_access = false; bool replace_granted_roles = false; + bool current_grants = false; + std::shared_ptr grantees; String getID(char) const override; diff --git a/src/Parsers/Access/ParserGrantQuery.cpp b/src/Parsers/Access/ParserGrantQuery.cpp index 28a1846df74..863c5435352 100644 --- a/src/Parsers/Access/ParserGrantQuery.cpp +++ b/src/Parsers/Access/ParserGrantQuery.cpp @@ -43,7 +43,6 @@ namespace { if (!str.empty()) str += " "; - std::string_view word{pos->begin, pos->size()}; str += std::string_view(pos->begin, pos->size()); ++pos; } @@ -284,8 +283,18 @@ bool ParserGrantQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) AccessRightsElements elements; std::shared_ptr roles; - if (!parseElementsWithoutOptions(pos, expected, elements) && !parseRoles(pos, expected, is_revoke, attach_mode, roles)) - return false; + + bool current_grants = false; + if (!is_revoke && ParserKeyword{"CURRENT GRANTS"}.ignore(pos, expected)) + { + current_grants = true; + elements.emplace_back(AccessType::ALL); + } + else + { + if (!parseElementsWithoutOptions(pos, expected, elements) && !parseRoles(pos, expected, is_revoke, attach_mode, roles)) + return false; + } if (cluster.empty()) parseOnCluster(pos, expected, cluster); @@ -353,6 +362,7 @@ bool ParserGrantQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) query->admin_option = admin_option; query->replace_access = replace_access; query->replace_granted_roles = replace_role; + query->current_grants = current_grants; return true; } From f2790a15ffd18d55111f4f1bf7eb4844960eab5f Mon Sep 17 00:00:00 2001 From: pufit Date: Fri, 31 Mar 2023 17:35:32 -0400 Subject: [PATCH 02/71] CURRENT GRANTS(...) implementation --- src/Parsers/Access/ParserGrantQuery.cpp | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/src/Parsers/Access/ParserGrantQuery.cpp b/src/Parsers/Access/ParserGrantQuery.cpp index 863c5435352..cd24dc610ed 100644 --- a/src/Parsers/Access/ParserGrantQuery.cpp +++ b/src/Parsers/Access/ParserGrantQuery.cpp @@ -288,7 +288,16 @@ bool ParserGrantQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) if (!is_revoke && ParserKeyword{"CURRENT GRANTS"}.ignore(pos, expected)) { current_grants = true; - elements.emplace_back(AccessType::ALL); + if (ParserToken(TokenType::OpeningRoundBracket).ignore(pos, expected)) + { + if (!parseElementsWithoutOptions(pos, expected, elements) && !parseRoles(pos, expected, is_revoke, attach_mode, roles)) + return false; + + if (!ParserToken(TokenType::ClosingRoundBracket).ignore(pos, expected)) + return false; + } + else + elements.emplace_back(AccessType::ALL); } else { From 8441b0a3e1c3fe66a9dd434d707fb385897fd06e Mon Sep 17 00:00:00 2001 From: pufit Date: Tue, 4 Apr 2023 20:43:15 -0400 Subject: [PATCH 03/71] Fix comments, tests, docs --- docs/en/sql-reference/statements/grant.md | 12 +++++++++++ docs/ru/sql-reference/statements/grant.md | 13 ++++++++++++ .../Access/InterpreterGrantQuery.cpp | 2 +- src/Parsers/Access/ParserGrantQuery.cpp | 8 ++++---- .../integration/test_grant_and_revoke/test.py | 20 +++++++++++++++++++ 5 files changed, 50 insertions(+), 5 deletions(-) diff --git a/docs/en/sql-reference/statements/grant.md b/docs/en/sql-reference/statements/grant.md index 1d9b2c9ea30..9bfc659a179 100644 --- a/docs/en/sql-reference/statements/grant.md +++ b/docs/en/sql-reference/statements/grant.md @@ -36,6 +36,18 @@ GRANT [ON CLUSTER cluster_name] role [,...] TO {user | another_role | CURRENT_US The `WITH ADMIN OPTION` clause grants [ADMIN OPTION](#admin-option-privilege) privilege to `user` or `role`. The `WITH REPLACE OPTION` clause replace old roles by new role for the `user` or `role`, if is not specified it appends roles. +## Grant Current Grants Syntax +``` sql +GRANT CURRENT GRANTS[(privilege[(column_name [,...])] [,...] ON {db.table|db.*|*.*|table|*})] TO {user | role | CURRENT_USER} [,...] [WITH GRANT OPTION] [WITH REPLACE OPTION] +``` + +- `privilege` — Type of privilege. +- `role` — ClickHouse user role. +- `user` — ClickHouse user account. + +Using the `CURRENT GRANTS` statement allows you to give all specified privileges to the given user or role. +If none of the privileges were specified, then the given user or role will receive all available privileges for `CURRENT_USER`. + ## Usage To use `GRANT`, your account must have the `GRANT OPTION` privilege. You can grant privileges only inside the scope of your account privileges. diff --git a/docs/ru/sql-reference/statements/grant.md b/docs/ru/sql-reference/statements/grant.md index 73c63850750..ba902688483 100644 --- a/docs/ru/sql-reference/statements/grant.md +++ b/docs/ru/sql-reference/statements/grant.md @@ -37,6 +37,19 @@ GRANT [ON CLUSTER cluster_name] role [,...] TO {user | another_role | CURRENT_US `WITH ADMIN OPTION` присваивает привилегию [ADMIN OPTION](#admin-option-privilege) пользователю или роли. `WITH REPLACE OPTION` заменяет все старые роли новыми ролями для пользователя `user` или `role`, если не указано, добавляет новые новые роли. +## Синтаксис присвоения текущих привилегий {#grant-current-grants-syntax} + +```sql +GRANT [ON CLUSTER cluster_name] privilege[(column_name [,...])] [,...] ON {db.table|db.*|*.*|table|*} TO {user | role | CURRENT_USER} [,...] [WITH GRANT OPTION] [WITH REPLACE OPTION] +``` + +- `privilege` — Тип привилегии +- `role` — Роль пользователя ClickHouse. +- `user` — Пользователь ClickHouse. + +Использование выражения `CURRENT GRANTS` позволяет присвоить все указанные и доступные для присвоения привилегии. +Если список привелегий не задан, то указанный пользователь или роль получат все доступные привилегии для `CURRENT_USER`. + ## Использование {#grant-usage} Для использования `GRANT` пользователь должен иметь привилегию `GRANT OPTION`. Пользователь может выдавать привилегии только внутри области действий назначенных ему самому привилегий. diff --git a/src/Interpreters/Access/InterpreterGrantQuery.cpp b/src/Interpreters/Access/InterpreterGrantQuery.cpp index 65810977a10..acca4f628f6 100644 --- a/src/Interpreters/Access/InterpreterGrantQuery.cpp +++ b/src/Interpreters/Access/InterpreterGrantQuery.cpp @@ -369,7 +369,7 @@ BlockIO InterpreterGrantQuery::execute() if (query.current_grants) { AccessRights new_rights(elements_to_grant); - new_rights.makeIntersection(*current_user_access->getAccessRightsWithImplicit()); + new_rights.makeIntersection(*current_user_access->getAccessRights()); elements_to_grant = new_rights.getElements(); } diff --git a/src/Parsers/Access/ParserGrantQuery.cpp b/src/Parsers/Access/ParserGrantQuery.cpp index cd24dc610ed..b8e0d7f7030 100644 --- a/src/Parsers/Access/ParserGrantQuery.cpp +++ b/src/Parsers/Access/ParserGrantQuery.cpp @@ -285,7 +285,7 @@ bool ParserGrantQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) std::shared_ptr roles; bool current_grants = false; - if (!is_revoke && ParserKeyword{"CURRENT GRANTS"}.ignore(pos, expected)) + if (!is_revoke && cluster.empty() && ParserKeyword{"CURRENT GRANTS"}.ignore(pos, expected)) { current_grants = true; if (ParserToken(TokenType::OpeningRoundBracket).ignore(pos, expected)) @@ -305,14 +305,14 @@ bool ParserGrantQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) return false; } - if (cluster.empty()) + if (cluster.empty() && !current_grants) parseOnCluster(pos, expected, cluster); std::shared_ptr grantees; if (!parseToGrantees(pos, expected, is_revoke, grantees)) return false; - if (cluster.empty()) + if (cluster.empty() && !current_grants) parseOnCluster(pos, expected, cluster); if (!is_revoke) @@ -326,7 +326,7 @@ bool ParserGrantQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) is_replace = true; } - if (cluster.empty()) + if (cluster.empty() && !current_grants) parseOnCluster(pos, expected, cluster); if (grant_option && roles) diff --git a/tests/integration/test_grant_and_revoke/test.py b/tests/integration/test_grant_and_revoke/test.py index 4ad046fe5d2..a2cfa1f0889 100644 --- a/tests/integration/test_grant_and_revoke/test.py +++ b/tests/integration/test_grant_and_revoke/test.py @@ -585,3 +585,23 @@ def test_grant_with_replace_option(): assert instance.query("SHOW GRANTS FOR B") == TSV( ["GRANT INSERT ON test.table TO B"] ) + + +def test_grant_current_grants(): + instance.query("CREATE USER A") + instance.query("GRANT SELECT, CREATE TABLE, CREATE VIEW ON test.* TO A WITH GRANT OPTION") + assert instance.query("SHOW GRANTS FOR A") == TSV( + ["GRANT SELECT, CREATE TABLE, CREATE VIEW ON test.* TO A WITH GRANT OPTION"] + ) + + instance.query("CREATE USER B") + instance.query("GRANT CURRENT GRANTS TO B", user="A") + assert instance.query("SHOW GRANTS FOR B") == TSV( + ["GRANT SELECT, CREATE TABLE, CREATE VIEW ON test.* TO B"] + ) + + instance.query("CREATE USER C") + instance.query("GRANT CURRENT GRANTS(CREATE ON test.*) TO C", user="A") + assert instance.query("SHOW GRANTS FOR C") == TSV( + ["GRANT CREATE TABLE, CREATE VIEW ON test.* TO C"] + ) From 3c89d3f49df4381c86599d5ae5ffe5b151cec65a Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Wed, 5 Apr 2023 01:00:27 +0000 Subject: [PATCH 04/71] Automatic style fix --- tests/integration/test_grant_and_revoke/test.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/integration/test_grant_and_revoke/test.py b/tests/integration/test_grant_and_revoke/test.py index a2cfa1f0889..fc5d179a973 100644 --- a/tests/integration/test_grant_and_revoke/test.py +++ b/tests/integration/test_grant_and_revoke/test.py @@ -589,7 +589,9 @@ def test_grant_with_replace_option(): def test_grant_current_grants(): instance.query("CREATE USER A") - instance.query("GRANT SELECT, CREATE TABLE, CREATE VIEW ON test.* TO A WITH GRANT OPTION") + instance.query( + "GRANT SELECT, CREATE TABLE, CREATE VIEW ON test.* TO A WITH GRANT OPTION" + ) assert instance.query("SHOW GRANTS FOR A") == TSV( ["GRANT SELECT, CREATE TABLE, CREATE VIEW ON test.* TO A WITH GRANT OPTION"] ) From 144ee7aed48ddf026efd03dddcc076c1314864ea Mon Sep 17 00:00:00 2001 From: pufit Date: Wed, 5 Apr 2023 22:52:31 -0400 Subject: [PATCH 05/71] Fix logic, additional tests --- .../Access/InterpreterGrantQuery.cpp | 76 ++++++++++++++++--- src/Parsers/Access/ASTGrantQuery.h | 1 + src/Parsers/Access/ParserGrantQuery.cpp | 13 ++-- 3 files changed, 74 insertions(+), 16 deletions(-) diff --git a/src/Interpreters/Access/InterpreterGrantQuery.cpp b/src/Interpreters/Access/InterpreterGrantQuery.cpp index acca4f628f6..5d06cb8fe4c 100644 --- a/src/Interpreters/Access/InterpreterGrantQuery.cpp +++ b/src/Interpreters/Access/InterpreterGrantQuery.cpp @@ -330,6 +330,67 @@ namespace updateGrantedAccessRightsAndRolesTemplate(*role, elements_to_grant, elements_to_revoke, roles_to_grant, roles_to_revoke, admin_option); } + template + void grantCurrentGrantsTemplate( + T & grantee, + const AccessRightsElements & elements_to_grant, + const AccessRightsElements & elements_to_revoke, + std::shared_ptr current_user_access, + bool grant_option) + { + if (!elements_to_revoke.empty()) + grantee.access.revoke(elements_to_revoke); + + /// We need to collect all current user's grant and filter them by grant option. + AccessRightsElements current_user_grantable_elements; + auto available_grant_elements = current_user_access->getAccessRights()->getElements(); + std::copy_if( + available_grant_elements.begin(), + available_grant_elements.end(), + std::back_inserter(current_user_grantable_elements), + [](AccessRightsElement x) { return x.grant_option || x.is_partial_revoke; }); + + AccessRights current_user_rights; + for (auto & element : current_user_grantable_elements) + { + if (element.is_partial_revoke) + current_user_rights.revoke(element); + else + current_user_rights.grant(element); + } + + /// If elements_to_grant was not specified it will grant all available for user grants. + /// Otherwise, we will intersect available grants with given set. + if (elements_to_grant.empty()) + { + if (!grant_option) + current_user_rights.revokeGrantOption(AccessType::ALL); + + grantee.access.makeUnion(current_user_rights); + } + else + { + AccessRights new_rights(elements_to_grant); + new_rights.makeIntersection(current_user_rights); + + grantee.access.makeUnion(new_rights); + } + } + + /// Grants current user's grants with grant options to specified user. + void grantCurrentGrants( + IAccessEntity & grantee, + const AccessRightsElements & elements_to_grant, + const AccessRightsElements & elements_to_revoke, + std::shared_ptr current_user_access, + bool grant_option) + { + if (auto * user = typeid_cast(&grantee)) + grantCurrentGrantsTemplate(*user, elements_to_grant, elements_to_revoke, current_user_access, grant_option); + else if (auto * role = typeid_cast(&grantee)) + grantCurrentGrantsTemplate(*role, elements_to_grant, elements_to_revoke, current_user_access, grant_option); + } + /// Updates grants of a specified user or role. void updateFromQuery(IAccessEntity & grantee, const ASTGrantQuery & query) { @@ -366,13 +427,6 @@ BlockIO InterpreterGrantQuery::execute() AccessRightsElements elements_to_grant, elements_to_revoke; collectAccessRightsElementsToGrantOrRevoke(query, elements_to_grant, elements_to_revoke); - if (query.current_grants) - { - AccessRights new_rights(elements_to_grant); - new_rights.makeIntersection(*current_user_access->getAccessRights()); - elements_to_grant = new_rights.getElements(); - } - std::vector roles_to_grant; RolesOrUsersSet roles_to_revoke; collectRolesToGrantOrRevoke(access_control, query, roles_to_grant, roles_to_revoke); @@ -393,7 +447,8 @@ BlockIO InterpreterGrantQuery::execute() elements_to_grant.replaceEmptyDatabase(current_database); elements_to_revoke.replaceEmptyDatabase(current_database); bool need_check_grantees_are_allowed = true; - checkGrantOption(access_control, *current_user_access, grantees, need_check_grantees_are_allowed, elements_to_grant, elements_to_revoke); + if (!query.current_grants) + checkGrantOption(access_control, *current_user_access, grantees, need_check_grantees_are_allowed, elements_to_grant, elements_to_revoke); /// Check if the current user has corresponding roles granted with admin option. checkAdminOption(access_control, *current_user_access, grantees, need_check_grantees_are_allowed, roles_to_grant, roles_to_revoke, query.admin_option); @@ -405,7 +460,10 @@ BlockIO InterpreterGrantQuery::execute() auto update_func = [&](const AccessEntityPtr & entity) -> AccessEntityPtr { auto clone = entity->clone(); - updateGrantedAccessRightsAndRoles(*clone, elements_to_grant, elements_to_revoke, roles_to_grant, roles_to_revoke, query.admin_option); + if (query.current_grants) + grantCurrentGrants(*clone, elements_to_grant, elements_to_revoke, current_user_access, query.with_grant_option); + else + updateGrantedAccessRightsAndRoles(*clone, elements_to_grant, elements_to_revoke, roles_to_grant, roles_to_revoke, query.admin_option); return clone; }; diff --git a/src/Parsers/Access/ASTGrantQuery.h b/src/Parsers/Access/ASTGrantQuery.h index 2ccbac3dac8..365214d1b01 100644 --- a/src/Parsers/Access/ASTGrantQuery.h +++ b/src/Parsers/Access/ASTGrantQuery.h @@ -27,6 +27,7 @@ public: bool replace_access = false; bool replace_granted_roles = false; bool current_grants = false; + bool with_grant_option = false; std::shared_ptr grantees; diff --git a/src/Parsers/Access/ParserGrantQuery.cpp b/src/Parsers/Access/ParserGrantQuery.cpp index b8e0d7f7030..ce1ed8a4a96 100644 --- a/src/Parsers/Access/ParserGrantQuery.cpp +++ b/src/Parsers/Access/ParserGrantQuery.cpp @@ -285,19 +285,17 @@ bool ParserGrantQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) std::shared_ptr roles; bool current_grants = false; - if (!is_revoke && cluster.empty() && ParserKeyword{"CURRENT GRANTS"}.ignore(pos, expected)) + if (!is_revoke && ParserKeyword{"CURRENT GRANTS"}.ignore(pos, expected)) { current_grants = true; if (ParserToken(TokenType::OpeningRoundBracket).ignore(pos, expected)) { - if (!parseElementsWithoutOptions(pos, expected, elements) && !parseRoles(pos, expected, is_revoke, attach_mode, roles)) + if (!parseElementsWithoutOptions(pos, expected, elements)) return false; if (!ParserToken(TokenType::ClosingRoundBracket).ignore(pos, expected)) return false; } - else - elements.emplace_back(AccessType::ALL); } else { @@ -305,14 +303,14 @@ bool ParserGrantQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) return false; } - if (cluster.empty() && !current_grants) + if (cluster.empty()) parseOnCluster(pos, expected, cluster); std::shared_ptr grantees; if (!parseToGrantees(pos, expected, is_revoke, grantees)) return false; - if (cluster.empty() && !current_grants) + if (cluster.empty()) parseOnCluster(pos, expected, cluster); if (!is_revoke) @@ -326,7 +324,7 @@ bool ParserGrantQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) is_replace = true; } - if (cluster.empty() && !current_grants) + if (cluster.empty()) parseOnCluster(pos, expected, cluster); if (grant_option && roles) @@ -372,6 +370,7 @@ bool ParserGrantQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) query->replace_access = replace_access; query->replace_granted_roles = replace_role; query->current_grants = current_grants; + query->with_grant_option = grant_option; return true; } From a3c5acb106d12fc47b602656faa261f77d0d5fae Mon Sep 17 00:00:00 2001 From: pufit Date: Wed, 5 Apr 2023 22:52:48 -0400 Subject: [PATCH 06/71] Additional tests --- .../integration/test_grant_and_revoke/test.py | 53 +++++++++++++++++++ 1 file changed, 53 insertions(+) diff --git a/tests/integration/test_grant_and_revoke/test.py b/tests/integration/test_grant_and_revoke/test.py index fc5d179a973..668cd20c554 100644 --- a/tests/integration/test_grant_and_revoke/test.py +++ b/tests/integration/test_grant_and_revoke/test.py @@ -607,3 +607,56 @@ def test_grant_current_grants(): assert instance.query("SHOW GRANTS FOR C") == TSV( ["GRANT CREATE TABLE, CREATE VIEW ON test.* TO C"] ) + + +def test_grant_current_grants_with_partial_revoke(): + instance.query("CREATE USER A") + instance.query("GRANT CREATE TABLE ON *.* TO A") + instance.query("GRANT SELECT ON *.* TO A WITH GRANT OPTION") + instance.query("REVOKE SELECT ON test.* FROM A") + instance.query("GRANT SELECT ON test.table TO A WITH GRANT OPTION") + assert instance.query("SHOW GRANTS FOR A") == TSV( + [ + "GRANT CREATE TABLE ON *.* TO A", + "GRANT SELECT ON *.* TO A WITH GRANT OPTION", + "REVOKE SELECT ON test.* FROM A", + "GRANT SELECT ON test.table TO A WITH GRANT OPTION", + ] + ) + + instance.query("CREATE USER B") + instance.query("GRANT CURRENT GRANTS TO B", user="A") + assert instance.query("SHOW GRANTS FOR B") == TSV( + [ + "GRANT SELECT ON *.* TO B", + "REVOKE SELECT ON test.* FROM B", + "GRANT SELECT ON test.table TO B", + ] + ) + + +def test_current_grants_override(): + instance.query("CREATE USER A") + instance.query("GRANT SELECT ON *.* TO A WITH GRANT OPTION") + instance.query("REVOKE SELECT ON test.* FROM A") + assert instance.query("SHOW GRANTS FOR A") == TSV( + [ + "GRANT SELECT ON *.* TO A WITH GRANT OPTION", + "REVOKE SELECT ON test.* FROM A", + ] + ) + + instance.query("CREATE USER B") + instance.query("GRANT SELECT ON test.table TO B") + assert instance.query("SHOW GRANTS FOR B") == TSV( + ["GRANT SELECT ON test.table TO B"] + ) + + instance.query("GRANT CURRENT GRANTS TO B", user="A") + assert instance.query("SHOW GRANTS FOR B") == TSV( + [ + "GRANT SELECT ON *.* TO B", + "REVOKE SELECT ON test.* FROM B", + "GRANT SELECT ON test.table TO B", + ] + ) From d1fa9596b885b541f9b639d53c6eb879af08736e Mon Sep 17 00:00:00 2001 From: pufit Date: Thu, 6 Apr 2023 10:56:35 -0400 Subject: [PATCH 07/71] Prevent execution on cluster --- src/Interpreters/Access/InterpreterGrantQuery.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/Interpreters/Access/InterpreterGrantQuery.cpp b/src/Interpreters/Access/InterpreterGrantQuery.cpp index 5d06cb8fe4c..6bbc5b7963d 100644 --- a/src/Interpreters/Access/InterpreterGrantQuery.cpp +++ b/src/Interpreters/Access/InterpreterGrantQuery.cpp @@ -17,6 +17,7 @@ namespace DB { namespace ErrorCodes { + extern const int BAD_ARGUMENTS; extern const int LOGICAL_ERROR; } @@ -434,6 +435,9 @@ BlockIO InterpreterGrantQuery::execute() /// Executing on cluster. if (!query.cluster.empty()) { + if (query.current_grants) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "GRANT CURRENT GRANTS can't be executed on cluster."); + auto required_access = getRequiredAccessForExecutingOnCluster(elements_to_grant, elements_to_revoke); checkAdminOptionForExecutingOnCluster(*current_user_access, roles_to_grant, roles_to_revoke); current_user_access->checkGranteesAreAllowed(grantees); From 078b8f53990072ad797488a1f66f57390f0cf14c Mon Sep 17 00:00:00 2001 From: pufit Date: Tue, 11 Apr 2023 19:55:59 -0400 Subject: [PATCH 08/71] Small refactor, additional tests --- docs/ru/sql-reference/statements/grant.md | 2 +- .../Access/InterpreterGrantQuery.cpp | 49 ++++++---------- src/Parsers/Access/ASTGrantQuery.h | 1 - src/Parsers/Access/ParserGrantQuery.cpp | 6 +- .../integration/test_grant_and_revoke/test.py | 56 ++++++++++++++++++- 5 files changed, 76 insertions(+), 38 deletions(-) diff --git a/docs/ru/sql-reference/statements/grant.md b/docs/ru/sql-reference/statements/grant.md index ba902688483..8c291b9f878 100644 --- a/docs/ru/sql-reference/statements/grant.md +++ b/docs/ru/sql-reference/statements/grant.md @@ -40,7 +40,7 @@ GRANT [ON CLUSTER cluster_name] role [,...] TO {user | another_role | CURRENT_US ## Синтаксис присвоения текущих привилегий {#grant-current-grants-syntax} ```sql -GRANT [ON CLUSTER cluster_name] privilege[(column_name [,...])] [,...] ON {db.table|db.*|*.*|table|*} TO {user | role | CURRENT_USER} [,...] [WITH GRANT OPTION] [WITH REPLACE OPTION] +GRANT CURRENT GRANTS[(privilege[(column_name [,...])] [,...] ON {db.table|db.*|*.*|table|*})] TO {user | role | CURRENT_USER} [,...] [WITH GRANT OPTION] [WITH REPLACE OPTION] ``` - `privilege` — Тип привилегии diff --git a/src/Interpreters/Access/InterpreterGrantQuery.cpp b/src/Interpreters/Access/InterpreterGrantQuery.cpp index 6bbc5b7963d..f527e2928f2 100644 --- a/src/Interpreters/Access/InterpreterGrantQuery.cpp +++ b/src/Interpreters/Access/InterpreterGrantQuery.cpp @@ -334,14 +334,22 @@ namespace template void grantCurrentGrantsTemplate( T & grantee, - const AccessRightsElements & elements_to_grant, - const AccessRightsElements & elements_to_revoke, - std::shared_ptr current_user_access, - bool grant_option) + const AccessRights & rights_to_grant, + const AccessRightsElements & elements_to_revoke) { if (!elements_to_revoke.empty()) grantee.access.revoke(elements_to_revoke); + grantee.access.makeUnion(rights_to_grant); + } + + /// Grants current user's grants with grant options to specified user. + void grantCurrentGrants( + IAccessEntity & grantee, + const AccessRightsElements & elements_to_grant, + const AccessRightsElements & elements_to_revoke, + std::shared_ptr current_user_access) + { /// We need to collect all current user's grant and filter them by grant option. AccessRightsElements current_user_grantable_elements; auto available_grant_elements = current_user_access->getAccessRights()->getElements(); @@ -360,36 +368,13 @@ namespace current_user_rights.grant(element); } - /// If elements_to_grant was not specified it will grant all available for user grants. - /// Otherwise, we will intersect available grants with given set. - if (elements_to_grant.empty()) - { - if (!grant_option) - current_user_rights.revokeGrantOption(AccessType::ALL); + AccessRights new_rights(elements_to_grant); + new_rights.makeIntersection(current_user_rights); - grantee.access.makeUnion(current_user_rights); - } - else - { - AccessRights new_rights(elements_to_grant); - new_rights.makeIntersection(current_user_rights); - - grantee.access.makeUnion(new_rights); - } - } - - /// Grants current user's grants with grant options to specified user. - void grantCurrentGrants( - IAccessEntity & grantee, - const AccessRightsElements & elements_to_grant, - const AccessRightsElements & elements_to_revoke, - std::shared_ptr current_user_access, - bool grant_option) - { if (auto * user = typeid_cast(&grantee)) - grantCurrentGrantsTemplate(*user, elements_to_grant, elements_to_revoke, current_user_access, grant_option); + grantCurrentGrantsTemplate(*user, new_rights, elements_to_revoke); else if (auto * role = typeid_cast(&grantee)) - grantCurrentGrantsTemplate(*role, elements_to_grant, elements_to_revoke, current_user_access, grant_option); + grantCurrentGrantsTemplate(*role, new_rights, elements_to_revoke); } /// Updates grants of a specified user or role. @@ -465,7 +450,7 @@ BlockIO InterpreterGrantQuery::execute() { auto clone = entity->clone(); if (query.current_grants) - grantCurrentGrants(*clone, elements_to_grant, elements_to_revoke, current_user_access, query.with_grant_option); + grantCurrentGrants(*clone, elements_to_grant, elements_to_revoke, current_user_access); else updateGrantedAccessRightsAndRoles(*clone, elements_to_grant, elements_to_revoke, roles_to_grant, roles_to_revoke, query.admin_option); return clone; diff --git a/src/Parsers/Access/ASTGrantQuery.h b/src/Parsers/Access/ASTGrantQuery.h index 365214d1b01..2ccbac3dac8 100644 --- a/src/Parsers/Access/ASTGrantQuery.h +++ b/src/Parsers/Access/ASTGrantQuery.h @@ -27,7 +27,6 @@ public: bool replace_access = false; bool replace_granted_roles = false; bool current_grants = false; - bool with_grant_option = false; std::shared_ptr grantees; diff --git a/src/Parsers/Access/ParserGrantQuery.cpp b/src/Parsers/Access/ParserGrantQuery.cpp index ce1ed8a4a96..928326e6cb0 100644 --- a/src/Parsers/Access/ParserGrantQuery.cpp +++ b/src/Parsers/Access/ParserGrantQuery.cpp @@ -296,6 +296,11 @@ bool ParserGrantQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) if (!ParserToken(TokenType::ClosingRoundBracket).ignore(pos, expected)) return false; } + + /// If no elements were specified it will grant all available for user grants. + /// Using `.size() == 0` because `.empty()` is overridden and returns true for NONE elements. + if (elements.size() == 0) // NOLINT + elements.emplace_back(AccessType::ALL); } else { @@ -370,7 +375,6 @@ bool ParserGrantQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) query->replace_access = replace_access; query->replace_granted_roles = replace_role; query->current_grants = current_grants; - query->with_grant_option = grant_option; return true; } diff --git a/tests/integration/test_grant_and_revoke/test.py b/tests/integration/test_grant_and_revoke/test.py index 668cd20c554..bc61a6a8cdb 100644 --- a/tests/integration/test_grant_and_revoke/test.py +++ b/tests/integration/test_grant_and_revoke/test.py @@ -20,6 +20,9 @@ def start_cluster(): instance.query( "CREATE TABLE test.table(x UInt32, y UInt32) ENGINE = MergeTree ORDER BY tuple()" ) + instance.query( + "CREATE TABLE test.table2(x UInt32, y UInt32) ENGINE = MergeTree ORDER BY tuple()" + ) instance.query("INSERT INTO test.table VALUES (1,5), (2,10)") yield cluster @@ -608,19 +611,29 @@ def test_grant_current_grants(): ["GRANT CREATE TABLE, CREATE VIEW ON test.* TO C"] ) + instance.query("DROP USER IF EXISTS C") + instance.query("CREATE USER C") + instance.query("GRANT CURRENT GRANTS(NONE ON *.*) TO C", user="A") + assert instance.query("SHOW GRANTS FOR C") == TSV([]) + def test_grant_current_grants_with_partial_revoke(): instance.query("CREATE USER A") instance.query("GRANT CREATE TABLE ON *.* TO A") + instance.query("REVOKE CREATE TABLE ON test.* FROM A") + instance.query("GRANT CREATE TABLE ON test.table TO A WITH GRANT OPTION") instance.query("GRANT SELECT ON *.* TO A WITH GRANT OPTION") instance.query("REVOKE SELECT ON test.* FROM A") instance.query("GRANT SELECT ON test.table TO A WITH GRANT OPTION") + instance.query("GRANT SELECT ON test.table2 TO A") + assert instance.query("SHOW GRANTS FOR A") == TSV( [ "GRANT CREATE TABLE ON *.* TO A", "GRANT SELECT ON *.* TO A WITH GRANT OPTION", - "REVOKE SELECT ON test.* FROM A", - "GRANT SELECT ON test.table TO A WITH GRANT OPTION", + "REVOKE SELECT, CREATE TABLE ON test.* FROM A", + "GRANT SELECT, CREATE TABLE ON test.table TO A WITH GRANT OPTION", + "GRANT SELECT ON test.table2 TO A", ] ) @@ -630,7 +643,29 @@ def test_grant_current_grants_with_partial_revoke(): [ "GRANT SELECT ON *.* TO B", "REVOKE SELECT ON test.* FROM B", - "GRANT SELECT ON test.table TO B", + "GRANT SELECT, CREATE TABLE ON test.table TO B", + ] + ) + + instance.query("DROP USER IF EXISTS B") + instance.query("CREATE USER B") + instance.query("GRANT CURRENT GRANTS TO B WITH GRANT OPTION", user="A") + assert instance.query("SHOW GRANTS FOR B") == TSV( + [ + "GRANT SELECT ON *.* TO B WITH GRANT OPTION", + "REVOKE SELECT ON test.* FROM B", + "GRANT SELECT, CREATE TABLE ON test.table TO B WITH GRANT OPTION", + ] + ) + + instance.query("DROP USER IF EXISTS C") + instance.query("CREATE USER C") + instance.query("GRANT SELECT ON test.* TO B") + instance.query("GRANT CURRENT GRANTS TO C", user="B") + assert instance.query("SHOW GRANTS FOR C") == TSV( + [ + "GRANT SELECT ON *.* TO C", + "GRANT CREATE TABLE ON test.table TO C", ] ) @@ -660,3 +695,18 @@ def test_current_grants_override(): "GRANT SELECT ON test.table TO B", ] ) + + instance.query("DROP USER IF EXISTS B") + instance.query("CREATE USER B") + instance.query("GRANT SELECT ON test.table TO B") + assert instance.query("SHOW GRANTS FOR B") == TSV( + ["GRANT SELECT ON test.table TO B"] + ) + + instance.query("GRANT CURRENT GRANTS TO B WITH REPLACE OPTION", user="A") + assert instance.query("SHOW GRANTS FOR B") == TSV( + [ + "GRANT SELECT ON *.* TO B", + "REVOKE SELECT ON test.* FROM B", + ] + ) From f8e4d6bb41eae52d2dcc0da9284fffedc8d1c134 Mon Sep 17 00:00:00 2001 From: Dmitry Novik Date: Wed, 12 Apr 2023 15:18:28 +0000 Subject: [PATCH 09/71] Add CI run with new analyzer enabled --- .github/workflows/pull_request.yml | 35 ++++++++++++++++++++++++++++++ tests/ci/functional_test_check.py | 2 ++ tests/config/install.sh | 4 ++++ tests/config/users.d/analyzer.xml | 7 ++++++ 4 files changed, 48 insertions(+) create mode 100644 tests/config/users.d/analyzer.xml diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml index 6fccc0542b7..38a7a8713ae 100644 --- a/.github/workflows/pull_request.yml +++ b/.github/workflows/pull_request.yml @@ -1308,6 +1308,40 @@ jobs: docker ps --quiet | xargs --no-run-if-empty docker kill ||: docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: sudo rm -fr "$TEMP_PATH" + FunctionalStatelessTestReleaseAnalyzer: + needs: [BuilderDebRelease] + runs-on: [self-hosted, func-tester] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/stateless_analyzer + REPORTS_PATH=${{runner.temp}}/reports_dir + CHECK_NAME=Stateless tests (release, analyzer) + REPO_COPY=${{runner.temp}}/stateless_analyzer/ClickHouse + KILL_TIMEOUT=10800 + EOF + - name: Download json reports + uses: actions/download-artifact@v3 + with: + path: ${{ env.REPORTS_PATH }} + - name: Check out repository code + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + - name: Functional test + run: | + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" + python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT" + - name: Cleanup + if: always() + run: | + docker ps --quiet | xargs --no-run-if-empty docker kill ||: + docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: + sudo rm -fr "$TEMP_PATH" FunctionalStatelessTestReleaseS3_0: needs: [BuilderDebRelease] runs-on: [self-hosted, func-tester] @@ -4755,6 +4789,7 @@ jobs: - FunctionalStatelessTestReleaseDatabaseReplicated2 - FunctionalStatelessTestReleaseDatabaseReplicated3 - FunctionalStatelessTestReleaseWideParts + - FunctionalStatelessTestReleaseAnalyzer - FunctionalStatelessTestAarch64 - FunctionalStatelessTestAsan0 - FunctionalStatelessTestAsan1 diff --git a/tests/ci/functional_test_check.py b/tests/ci/functional_test_check.py index 8e55c084f21..f10cd634605 100644 --- a/tests/ci/functional_test_check.py +++ b/tests/ci/functional_test_check.py @@ -53,6 +53,8 @@ def get_additional_envs(check_name, run_by_hash_num, run_by_hash_total): result.append("USE_PARALLEL_REPLICAS=1") if "s3 storage" in check_name: result.append("USE_S3_STORAGE_FOR_MERGE_TREE=1") + if "analyzer" in check_name: + result.append("USE_NEW_ANALYZER=1") if run_by_hash_total != 0: result.append(f"RUN_BY_HASH_NUM={run_by_hash_num}") diff --git a/tests/config/install.sh b/tests/config/install.sh index 77e8a8460ad..efa5a9c086e 100755 --- a/tests/config/install.sh +++ b/tests/config/install.sh @@ -79,6 +79,10 @@ ln -sf $SRC_PATH/users.d/marks.xml $DEST_SERVER_PATH/users.d/ ln -sf $SRC_PATH/users.d/insert_keeper_retries.xml $DEST_SERVER_PATH/users.d/ ln -sf $SRC_PATH/users.d/prefetch_settings.xml $DEST_SERVER_PATH/users.d/ +if [[ -n "$USE_NEW_ANALYZER" ]] && [[ "$USE_NEW_ANALYZER" -eq 1 ]]; then + ln -sf $SRC_PATH/users.d/analyzer.xml $DEST_SERVER_PATH/users.d/ +fi + # FIXME DataPartsExchange may hang for http_send_timeout seconds # when nobody is going to read from the other side of socket (due to "Fetching of part was cancelled"), # but socket is owned by HTTPSessionPool, so it's not closed. diff --git a/tests/config/users.d/analyzer.xml b/tests/config/users.d/analyzer.xml new file mode 100644 index 00000000000..aa374364ef0 --- /dev/null +++ b/tests/config/users.d/analyzer.xml @@ -0,0 +1,7 @@ + + + + 1 + + + From 360fc596945d4b78de47545b71a543790331042b Mon Sep 17 00:00:00 2001 From: Dmitry Novik Date: Wed, 12 Apr 2023 16:46:30 +0000 Subject: [PATCH 10/71] Handle known broken tests --- docker/test/stateless-analyzer/Dockerfile | 7 + .../test/stateless-analyzer/broken_tests.txt | 170 +++++++++++++ docker/test/stateless-analyzer/run.sh | 230 ++++++++++++++++++ .../util/process_functional_tests_result.py | 30 ++- tests/ci/functional_test_check.py | 2 + 5 files changed, 431 insertions(+), 8 deletions(-) create mode 100644 docker/test/stateless-analyzer/Dockerfile create mode 100644 docker/test/stateless-analyzer/broken_tests.txt create mode 100755 docker/test/stateless-analyzer/run.sh diff --git a/docker/test/stateless-analyzer/Dockerfile b/docker/test/stateless-analyzer/Dockerfile new file mode 100644 index 00000000000..9cbcc69c795 --- /dev/null +++ b/docker/test/stateless-analyzer/Dockerfile @@ -0,0 +1,7 @@ +ARG FROM_TAG=latest +FROM clickhouse/stateless-test:$FROM_TAG + +COPY run.sh / +COPY broken_tests.txt / + +CMD ["/bin/bash", "/run.sh"] diff --git a/docker/test/stateless-analyzer/broken_tests.txt b/docker/test/stateless-analyzer/broken_tests.txt new file mode 100644 index 00000000000..cca1c556a98 --- /dev/null +++ b/docker/test/stateless-analyzer/broken_tests.txt @@ -0,0 +1,170 @@ +00223_shard_distributed_aggregation_memory_efficient +00562_in_subquery_merge_tree +00593_union_all_assert_columns_removed +00597_push_down_predicate_long +00673_subquery_prepared_set_performance +00700_decimal_compare +00717_merge_and_distributed +00725_memory_tracking +00754_distributed_optimize_skip_select_on_unused_shards +00754_distributed_optimize_skip_select_on_unused_shards_with_prewhere +00814_replicated_minimalistic_part_header_zookeeper +00838_unique_index +00897_flatten +00927_asof_joins +00940_order_by_read_in_order_query_plan +00945_bloom_filter_index +00952_input_function +00953_zookeeper_suetin_deduplication_bug +00979_set_index_not +00981_in_subquery_with_tuple +01049_join_low_card_bug_long +01062_pm_all_join_with_block_continuation +01064_incremental_streaming_from_2_src_with_feedback +01071_force_optimize_skip_unused_shards +01072_optimize_skip_unused_shards_const_expr_eval +01083_expressions_in_engine_arguments +01086_odbc_roundtrip +01142_join_lc_and_nullable_in_key +01142_merge_join_lc_and_nullable_in_key +01152_cross_replication +01155_rename_move_materialized_view +01173_transaction_control_queries +01211_optimize_skip_unused_shards_type_mismatch +01213_optimize_skip_unused_shards_DISTINCT +01214_test_storage_merge_aliases_with_where +01231_distributed_aggregation_memory_efficient_mix_levels +01232_extremes +01244_optimize_distributed_group_by_sharding_key +01247_optimize_distributed_group_by_sharding_key_dist_on_dist +01268_mv_scalars +01268_shard_avgweighted +01270_optimize_skip_unused_shards_low_cardinality +01319_optimize_skip_unused_shards_nesting +01353_low_cardinality_join_types +01357_version_collapsing_attach_detach_zookeeper +01396_inactive_replica_cleanup_nodes_zookeeper +01414_mutations_and_errors_zookeeper +01428_nullable_asof_join +01455_shard_leaf_max_rows_bytes_to_read +01476_right_full_join_switch +01477_lc_in_merge_join_left_key +01487_distributed_in_not_default_db +01495_subqueries_in_with_statement +01504_rocksdb +01505_trivial_count_with_partition_predicate +01527_dist_sharding_key_dictGet_reload +01528_allow_nondeterministic_optimize_skip_unused_shards +01540_verbatim_partition_pruning +01560_merge_distributed_join +01563_distributed_query_finish +01576_alias_column_rewrite +01582_distinct_optimization +01583_const_column_in_set_index +01584_distributed_buffer_cannot_find_column +01585_use_index_for_global_in +01585_use_index_for_global_in_with_null +01586_columns_pruning +01586_replicated_mutations_empty_partition +01593_concurrent_alter_mutations_kill_many_replicas_long +01600_detach_permanently +01615_random_one_shard_insertion +01622_constraints_simple_optimization +01624_soft_constraints +01650_drop_part_and_deduplication_zookeeper_long +01651_bugs_from_15889 +01655_plan_optimizations +01655_plan_optimizations_optimize_read_in_window_order +01656_test_query_log_factories_info +01674_filter_by_uint8 +01681_bloom_filter_nullable_column +01700_system_zookeeper_path_in +01705_normalize_create_alter_function_names +01710_aggregate_projections +01710_force_use_projection +01710_minmax_count_projection +01710_normal_projection_fix1 +01710_projection_additional_filters +01710_projection_aggregation_in_order +01710_projection_in_index +01710_projection_vertical_merges +01710_projection_with_joins +01710_projections +01710_projections_in_distributed_query +01710_projections_optimize_aggregation_in_order +01710_projections_partial_optimize_aggregation_in_order +01721_join_implicit_cast_long +01737_move_order_key_to_prewhere_select_final +01739_index_hint +01747_join_view_filter_dictionary +01748_partition_id_pruning +01753_system_zookeeper_query_param_path_long +01756_optimize_skip_unused_shards_rewrite_in +01757_optimize_skip_unused_shards_limit +01758_optimize_skip_unused_shards_once +01759_optimize_skip_unused_shards_zero_shards +01761_cast_to_enum_nullable +01786_explain_merge_tree +01848_partition_value_column +01889_key_condition_function_chains +01889_sqlite_read_write +01890_materialized_distributed_join +01901_in_literal_shard_prune +01925_join_materialized_columns +01925_test_group_by_const_consistency +01925_test_storage_merge_aliases +01930_optimize_skip_unused_shards_rewrite_in +01947_mv_subquery +01949_clickhouse_local_with_remote_localhost +01951_distributed_push_down_limit +01952_optimize_distributed_group_by_sharding_key +02000_join_on_const +02001_shard_num_shard_count +02024_join_on_or_long +02131_used_row_policies_in_query_log +02139_MV_with_scalar_subquery +02149_read_in_order_fixed_prefix +02174_cte_scalar_cache_mv +02221_system_zookeeper_unrestricted_like +02242_join_rocksdb +02267_join_dup_columns_issue36199 +02302_s3_file_pruning +02317_distinct_in_order_optimization_explain +02336_sort_optimization_with_fill +02341_global_join_cte +02343_aggregation_pipeline +02343_group_by_use_nulls_distributed +02345_implicit_transaction +02346_additional_filters +02346_additional_filters_distr +02346_additional_filters_index +02352_grouby_shadows_arg +02354_annoy +02366_union_decimal_conversion +02375_rocksdb_with_filters +02377_optimize_sorting_by_input_stream_properties_explain +02382_join_and_filtering_set +02402_merge_engine_with_view +02404_memory_bound_merging +02421_decimal_in_precision_issue_41125 +02426_orc_bug +02428_decimal_in_floating_point_literal +02428_parameterized_view +02458_use_structure_from_insertion_table +02479_mysql_connect_to_self +02479_race_condition_between_insert_and_droppin_mv +02481_aggregation_in_order_plan +02481_merge_array_join_sample_by +02483_check_virtuals_shile_using_structure_from_insertion_table +02493_inconsistent_hex_and_binary_number +02494_optimize_group_by_function_keys_and_alias_columns +02494_zero_copy_and_projection_and_mutation_work_together +02496_remove_redundant_sorting +02500_remove_redundant_distinct +02511_complex_literals_as_aggregate_function_parameters +02515_cleanup_async_insert_block_ids +02516_projections_and_context +02521_aggregation_by_partitions +02535_max_parallel_replicas_custom_key +02554_fix_grouping_sets_predicate_push_down +02575_merge_prewhere_different_default_kind \ No newline at end of file diff --git a/docker/test/stateless-analyzer/run.sh b/docker/test/stateless-analyzer/run.sh new file mode 100755 index 00000000000..d3bb78400e2 --- /dev/null +++ b/docker/test/stateless-analyzer/run.sh @@ -0,0 +1,230 @@ +#!/bin/bash + +# fail on errors, verbose and export all env variables +set -e -x -a + +# Choose random timezone for this test run. +TZ="$(rg -v '#' /usr/share/zoneinfo/zone.tab | awk '{print $3}' | shuf | head -n1)" +echo "Choosen random timezone $TZ" +ln -snf "/usr/share/zoneinfo/$TZ" /etc/localtime && echo "$TZ" > /etc/timezone + +dpkg -i package_folder/clickhouse-common-static_*.deb +dpkg -i package_folder/clickhouse-common-static-dbg_*.deb +dpkg -i package_folder/clickhouse-server_*.deb +dpkg -i package_folder/clickhouse-client_*.deb + +ln -s /usr/share/clickhouse-test/clickhouse-test /usr/bin/clickhouse-test + +# install test configs +/usr/share/clickhouse-test/config/install.sh + +if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then + echo "Azure is disabled" +else + azurite-blob --blobHost 0.0.0.0 --blobPort 10000 --debug /azurite_log & +fi + +./setup_minio.sh stateless +./setup_hdfs_minicluster.sh + +# For flaky check we also enable thread fuzzer +if [ "$NUM_TRIES" -gt "1" ]; then + export THREAD_FUZZER_CPU_TIME_PERIOD_US=1000 + export THREAD_FUZZER_SLEEP_PROBABILITY=0.1 + export THREAD_FUZZER_SLEEP_TIME_US=100000 + + export THREAD_FUZZER_pthread_mutex_lock_BEFORE_MIGRATE_PROBABILITY=1 + export THREAD_FUZZER_pthread_mutex_lock_AFTER_MIGRATE_PROBABILITY=1 + export THREAD_FUZZER_pthread_mutex_unlock_BEFORE_MIGRATE_PROBABILITY=1 + export THREAD_FUZZER_pthread_mutex_unlock_AFTER_MIGRATE_PROBABILITY=1 + + export THREAD_FUZZER_pthread_mutex_lock_BEFORE_SLEEP_PROBABILITY=0.001 + export THREAD_FUZZER_pthread_mutex_lock_AFTER_SLEEP_PROBABILITY=0.001 + export THREAD_FUZZER_pthread_mutex_unlock_BEFORE_SLEEP_PROBABILITY=0.001 + export THREAD_FUZZER_pthread_mutex_unlock_AFTER_SLEEP_PROBABILITY=0.001 + export THREAD_FUZZER_pthread_mutex_lock_BEFORE_SLEEP_TIME_US=10000 + export THREAD_FUZZER_pthread_mutex_lock_AFTER_SLEEP_TIME_US=10000 + export THREAD_FUZZER_pthread_mutex_unlock_BEFORE_SLEEP_TIME_US=10000 + export THREAD_FUZZER_pthread_mutex_unlock_AFTER_SLEEP_TIME_US=10000 + + mkdir -p /var/run/clickhouse-server + # simpliest way to forward env variables to server + sudo -E -u clickhouse /usr/bin/clickhouse-server --config /etc/clickhouse-server/config.xml --daemon --pid-file /var/run/clickhouse-server/clickhouse-server.pid +else + sudo clickhouse start +fi + +if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then + mkdir -p /var/run/clickhouse-server1 + sudo chown clickhouse:clickhouse /var/run/clickhouse-server1 + sudo -E -u clickhouse /usr/bin/clickhouse server --config /etc/clickhouse-server1/config.xml --daemon \ + --pid-file /var/run/clickhouse-server1/clickhouse-server.pid \ + -- --path /var/lib/clickhouse1/ --logger.stderr /var/log/clickhouse-server/stderr1.log \ + --logger.log /var/log/clickhouse-server/clickhouse-server1.log --logger.errorlog /var/log/clickhouse-server/clickhouse-server1.err.log \ + --tcp_port 19000 --tcp_port_secure 19440 --http_port 18123 --https_port 18443 --interserver_http_port 19009 --tcp_with_proxy_port 19010 \ + --mysql_port 19004 --postgresql_port 19005 \ + --keeper_server.tcp_port 19181 --keeper_server.server_id 2 \ + --prometheus.port 19988 \ + --macros.replica r2 # It doesn't work :( + + mkdir -p /var/run/clickhouse-server2 + sudo chown clickhouse:clickhouse /var/run/clickhouse-server2 + sudo -E -u clickhouse /usr/bin/clickhouse server --config /etc/clickhouse-server2/config.xml --daemon \ + --pid-file /var/run/clickhouse-server2/clickhouse-server.pid \ + -- --path /var/lib/clickhouse2/ --logger.stderr /var/log/clickhouse-server/stderr2.log \ + --logger.log /var/log/clickhouse-server/clickhouse-server2.log --logger.errorlog /var/log/clickhouse-server/clickhouse-server2.err.log \ + --tcp_port 29000 --tcp_port_secure 29440 --http_port 28123 --https_port 28443 --interserver_http_port 29009 --tcp_with_proxy_port 29010 \ + --mysql_port 29004 --postgresql_port 29005 \ + --keeper_server.tcp_port 29181 --keeper_server.server_id 3 \ + --prometheus.port 29988 \ + --macros.shard s2 # It doesn't work :( + + MAX_RUN_TIME=$((MAX_RUN_TIME < 9000 ? MAX_RUN_TIME : 9000)) # min(MAX_RUN_TIME, 2.5 hours) + MAX_RUN_TIME=$((MAX_RUN_TIME != 0 ? MAX_RUN_TIME : 9000)) # set to 2.5 hours if 0 (unlimited) +fi + +sleep 5 + +function run_tests() +{ + set -x + # We can have several additional options so we pass them as array because it is more ideologically correct. + read -ra ADDITIONAL_OPTIONS <<< "${ADDITIONAL_OPTIONS:-}" + + HIGH_LEVEL_COVERAGE=YES + + # Use random order in flaky check + if [ "$NUM_TRIES" -gt "1" ]; then + ADDITIONAL_OPTIONS+=('--order=random') + HIGH_LEVEL_COVERAGE=NO + fi + + if [[ -n "$USE_S3_STORAGE_FOR_MERGE_TREE" ]] && [[ "$USE_S3_STORAGE_FOR_MERGE_TREE" -eq 1 ]]; then + ADDITIONAL_OPTIONS+=('--s3-storage') + fi + + if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then + ADDITIONAL_OPTIONS+=('--replicated-database') + ADDITIONAL_OPTIONS+=('--jobs') + ADDITIONAL_OPTIONS+=('2') + else + # Too many tests fail for DatabaseReplicated in parallel. All other + # configurations are OK. + ADDITIONAL_OPTIONS+=('--jobs') + ADDITIONAL_OPTIONS+=('8') + fi + + if [[ -n "$RUN_BY_HASH_NUM" ]] && [[ -n "$RUN_BY_HASH_TOTAL" ]]; then + ADDITIONAL_OPTIONS+=('--run-by-hash-num') + ADDITIONAL_OPTIONS+=("$RUN_BY_HASH_NUM") + ADDITIONAL_OPTIONS+=('--run-by-hash-total') + ADDITIONAL_OPTIONS+=("$RUN_BY_HASH_TOTAL") + HIGH_LEVEL_COVERAGE=NO + fi + + if [[ -n "$USE_DATABASE_ORDINARY" ]] && [[ "$USE_DATABASE_ORDINARY" -eq 1 ]]; then + ADDITIONAL_OPTIONS+=('--db-engine=Ordinary') + fi + + if [[ "${HIGH_LEVEL_COVERAGE}" = "YES" ]]; then + ADDITIONAL_OPTIONS+=('--report-coverage') + fi + + ADDITIONAL_OPTIONS+=('--report-logs-stats') + + set +e + clickhouse-test --testname --shard --zookeeper --check-zookeeper-session --hung-check --print-time \ + --test-runs "$NUM_TRIES" "${ADDITIONAL_OPTIONS[@]}" 2>&1 \ + | ts '%Y-%m-%d %H:%M:%S' \ + | tee -a test_output/test_result.txt + set -e +} + +export -f run_tests + +if [ "$NUM_TRIES" -gt "1" ]; then + # We don't run tests with Ordinary database in PRs, only in master. + # So run new/changed tests with Ordinary at least once in flaky check. + timeout "$MAX_RUN_TIME" bash -c 'NUM_TRIES=1; USE_DATABASE_ORDINARY=1; run_tests' \ + | sed 's/All tests have finished//' | sed 's/No tests were run//' ||: +fi + +timeout "$MAX_RUN_TIME" bash -c run_tests ||: + +echo "Files in current directory" +ls -la ./ +echo "Files in root directory" +ls -la / + +/process_functional_tests_result.py --broken_tests=broken_tests.txt || echo -e "failure\tCannot parse results" > /test_output/check_status.tsv + +clickhouse-client -q "system flush logs" ||: + +# Stop server so we can safely read data with clickhouse-local. +# Why do we read data with clickhouse-local? +# Because it's the simplest way to read it when server has crashed. +sudo clickhouse stop ||: +if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then + sudo clickhouse stop --pid-path /var/run/clickhouse-server1 ||: + sudo clickhouse stop --pid-path /var/run/clickhouse-server2 ||: +fi + +rg -Fa "" /var/log/clickhouse-server/clickhouse-server.log ||: +rg -A50 -Fa "============" /var/log/clickhouse-server/stderr.log ||: +zstd --threads=0 < /var/log/clickhouse-server/clickhouse-server.log > /test_output/clickhouse-server.log.zst & + +# Compress tables. +# +# NOTE: +# - that due to tests with s3 storage we cannot use /var/lib/clickhouse/data +# directly +# - even though ci auto-compress some files (but not *.tsv) it does this only +# for files >64MB, we want this files to be compressed explicitly +for table in query_log zookeeper_log trace_log transactions_info_log +do + clickhouse-local --path /var/lib/clickhouse/ --only-system-tables -q "select * from system.$table format TSVWithNamesAndTypes" | zstd --threads=0 > /test_output/$table.tsv.zst ||: + if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then + clickhouse-local --path /var/lib/clickhouse1/ --only-system-tables -q "select * from system.$table format TSVWithNamesAndTypes" | zstd --threads=0 > /test_output/$table.1.tsv.zst ||: + clickhouse-local --path /var/lib/clickhouse2/ --only-system-tables -q "select * from system.$table format TSVWithNamesAndTypes" | zstd --threads=0 > /test_output/$table.2.tsv.zst ||: + fi +done + +# Also export trace log in flamegraph-friendly format. +for trace_type in CPU Memory Real +do + clickhouse-local --path /var/lib/clickhouse/ --only-system-tables -q " + select + arrayStringConcat((arrayMap(x -> concat(splitByChar('/', addressToLine(x))[-1], '#', demangle(addressToSymbol(x)) ), trace)), ';') AS stack, + count(*) AS samples + from system.trace_log + where trace_type = '$trace_type' + group by trace + order by samples desc + settings allow_introspection_functions = 1 + format TabSeparated" \ + | zstd --threads=0 > "/test_output/trace-log-$trace_type-flamegraph.tsv.zst" ||: +done + + +# Compressed (FIXME: remove once only github actions will be left) +rm /var/log/clickhouse-server/clickhouse-server.log +mv /var/log/clickhouse-server/stderr.log /test_output/ ||: +if [[ -n "$WITH_COVERAGE" ]] && [[ "$WITH_COVERAGE" -eq 1 ]]; then + tar --zstd -chf /test_output/clickhouse_coverage.tar.zst /profraw ||: +fi + +tar -chf /test_output/coordination.tar /var/lib/clickhouse/coordination ||: + +if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then + rg -Fa "" /var/log/clickhouse-server/clickhouse-server1.log ||: + rg -Fa "" /var/log/clickhouse-server/clickhouse-server2.log ||: + zstd --threads=0 < /var/log/clickhouse-server/clickhouse-server1.log > /test_output/clickhouse-server1.log.zst ||: + zstd --threads=0 < /var/log/clickhouse-server/clickhouse-server2.log > /test_output/clickhouse-server2.log.zst ||: + # FIXME: remove once only github actions will be left + rm /var/log/clickhouse-server/clickhouse-server1.log + rm /var/log/clickhouse-server/clickhouse-server2.log + mv /var/log/clickhouse-server/stderr1.log /test_output/ ||: + mv /var/log/clickhouse-server/stderr2.log /test_output/ ||: + tar -chf /test_output/coordination1.tar /var/lib/clickhouse1/coordination ||: + tar -chf /test_output/coordination2.tar /var/lib/clickhouse2/coordination ||: +fi diff --git a/docker/test/util/process_functional_tests_result.py b/docker/test/util/process_functional_tests_result.py index da58db8e45d..84e1ad1dc2a 100755 --- a/docker/test/util/process_functional_tests_result.py +++ b/docker/test/util/process_functional_tests_result.py @@ -18,7 +18,7 @@ SUCCESS_FINISH_SIGNS = ["All tests have finished", "No tests were run"] RETRIES_SIGN = "Some tests were restarted" -def process_test_log(log_path): +def process_test_log(log_path, broken_tests): total = 0 skipped = 0 unknown = 0 @@ -62,8 +62,12 @@ def process_test_log(log_path): failed += 1 test_results.append((test_name, "Timeout", test_time, [])) elif FAIL_SIGN in line: - failed += 1 - test_results.append((test_name, "FAIL", test_time, [])) + if test_name in broken_tests: + success += 1 + test_results.append((test_name, "OK", test_time, [])) + else: + failed += 1 + test_results.append((test_name, "FAIL", test_time, [])) elif UNKNOWN_SIGN in line: unknown += 1 test_results.append((test_name, "FAIL", test_time, [])) @@ -71,8 +75,12 @@ def process_test_log(log_path): skipped += 1 test_results.append((test_name, "SKIPPED", test_time, [])) else: - success += int(OK_SIGN in line) - test_results.append((test_name, "OK", test_time, [])) + if OK_SIGN in line and test_name in broken_tests: + failed += 1 + test_results.append((test_name, "FAIL", test_time, ["Test is expected to fail!\n"])) + else: + success += int(OK_SIGN in line) + test_results.append((test_name, "OK", test_time, [])) test_end = False elif ( len(test_results) > 0 and test_results[-1][1] == "FAIL" and not test_end @@ -110,7 +118,7 @@ def process_test_log(log_path): ) -def process_result(result_path): +def process_result(result_path, broken_tests): test_results = [] state = "success" description = "" @@ -134,7 +142,7 @@ def process_result(result_path): success_finish, retries, test_results, - ) = process_test_log(result_path) + ) = process_test_log(result_path, broken_tests) is_flacky_check = 1 < int(os.environ.get("NUM_TRIES", 1)) logging.info("Is flaky check: %s", is_flacky_check) # If no tests were run (success == 0) it indicates an error (e.g. server did not start or crashed immediately) @@ -186,9 +194,15 @@ if __name__ == "__main__": parser.add_argument("--in-results-dir", default="/test_output/") parser.add_argument("--out-results-file", default="/test_output/test_results.tsv") parser.add_argument("--out-status-file", default="/test_output/check_status.tsv") + parser.add_argument("--broken-tests", default="") args = parser.parse_args() - state, description, test_results = process_result(args.in_results_dir) + broken_tests = list() + if len(args.broken_tests) != 0: + with open(args.broken_tests) as f: + broken_tests = f.readlines() + + state, description, test_results = process_result(args.in_results_dir, broken_tests) logging.info("Result parsed") status = (state, description) write_results(args.out_results_file, args.out_status_file, test_results, status) diff --git a/tests/ci/functional_test_check.py b/tests/ci/functional_test_check.py index f10cd634605..c4ce77c050e 100644 --- a/tests/ci/functional_test_check.py +++ b/tests/ci/functional_test_check.py @@ -64,6 +64,8 @@ def get_additional_envs(check_name, run_by_hash_num, run_by_hash_total): def get_image_name(check_name): + if "analyzer" in check_name.lower(): + return "clickhouse/stateless-analyzer" if "stateless" in check_name.lower(): return "clickhouse/stateless-test" if "stateful" in check_name.lower(): From fa7b72570b5caa3600d44a6dff4764fd2351fe88 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Wed, 12 Apr 2023 17:09:37 +0000 Subject: [PATCH 11/71] Automatic style fix --- docker/test/util/process_functional_tests_result.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/docker/test/util/process_functional_tests_result.py b/docker/test/util/process_functional_tests_result.py index 84e1ad1dc2a..365a1b22523 100755 --- a/docker/test/util/process_functional_tests_result.py +++ b/docker/test/util/process_functional_tests_result.py @@ -77,7 +77,14 @@ def process_test_log(log_path, broken_tests): else: if OK_SIGN in line and test_name in broken_tests: failed += 1 - test_results.append((test_name, "FAIL", test_time, ["Test is expected to fail!\n"])) + test_results.append( + ( + test_name, + "FAIL", + test_time, + ["Test is expected to fail!\n"], + ) + ) else: success += int(OK_SIGN in line) test_results.append((test_name, "OK", test_time, [])) From aa0f3bc994786aa0ce0735981f1e9cc0c21b0897 Mon Sep 17 00:00:00 2001 From: pufit Date: Wed, 12 Apr 2023 20:28:17 -0400 Subject: [PATCH 12/71] Solve pr issues --- .../Access/InterpreterGrantQuery.cpp | 33 ++++++++++++------- src/Parsers/Access/ParserGrantQuery.cpp | 29 +++++++++++++--- .../integration/test_grant_and_revoke/test.py | 21 ++++++++---- 3 files changed, 60 insertions(+), 23 deletions(-) diff --git a/src/Interpreters/Access/InterpreterGrantQuery.cpp b/src/Interpreters/Access/InterpreterGrantQuery.cpp index f527e2928f2..0bce0b2a6bd 100644 --- a/src/Interpreters/Access/InterpreterGrantQuery.cpp +++ b/src/Interpreters/Access/InterpreterGrantQuery.cpp @@ -346,11 +346,21 @@ namespace /// Grants current user's grants with grant options to specified user. void grantCurrentGrants( IAccessEntity & grantee, - const AccessRightsElements & elements_to_grant, - const AccessRightsElements & elements_to_revoke, - std::shared_ptr current_user_access) + const AccessRights & new_rights, + const AccessRightsElements & elements_to_revoke) + { + if (auto * user = typeid_cast(&grantee)) + grantCurrentGrantsTemplate(*user, new_rights, elements_to_revoke); + else if (auto * role = typeid_cast(&grantee)) + grantCurrentGrantsTemplate(*role, new_rights, elements_to_revoke); + } + + /// Calculates all available rights to grant with current user intersection. + void calculateCurrentGrantRightsWithIntersection( + AccessRights & rights, + std::shared_ptr current_user_access, + const AccessRightsElements & elements_to_grant) { - /// We need to collect all current user's grant and filter them by grant option. AccessRightsElements current_user_grantable_elements; auto available_grant_elements = current_user_access->getAccessRights()->getElements(); std::copy_if( @@ -368,13 +378,8 @@ namespace current_user_rights.grant(element); } - AccessRights new_rights(elements_to_grant); - new_rights.makeIntersection(current_user_rights); - - if (auto * user = typeid_cast(&grantee)) - grantCurrentGrantsTemplate(*user, new_rights, elements_to_revoke); - else if (auto * role = typeid_cast(&grantee)) - grantCurrentGrantsTemplate(*role, new_rights, elements_to_revoke); + rights.grant(elements_to_grant); + rights.makeIntersection(current_user_rights); } /// Updates grants of a specified user or role. @@ -445,12 +450,16 @@ BlockIO InterpreterGrantQuery::execute() if (need_check_grantees_are_allowed) current_user_access->checkGranteesAreAllowed(grantees); + AccessRights new_rights; + if (query.current_grants) + calculateCurrentGrantRightsWithIntersection(new_rights, current_user_access, elements_to_grant); + /// Update roles and users listed in `grantees`. auto update_func = [&](const AccessEntityPtr & entity) -> AccessEntityPtr { auto clone = entity->clone(); if (query.current_grants) - grantCurrentGrants(*clone, elements_to_grant, elements_to_revoke, current_user_access); + grantCurrentGrants(*clone, new_rights, elements_to_revoke); else updateGrantedAccessRightsAndRoles(*clone, elements_to_grant, elements_to_revoke, roles_to_grant, roles_to_revoke, query.admin_option); return clone; diff --git a/src/Parsers/Access/ParserGrantQuery.cpp b/src/Parsers/Access/ParserGrantQuery.cpp index 928326e6cb0..2ff110f136a 100644 --- a/src/Parsers/Access/ParserGrantQuery.cpp +++ b/src/Parsers/Access/ParserGrantQuery.cpp @@ -295,12 +295,31 @@ bool ParserGrantQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) if (!ParserToken(TokenType::ClosingRoundBracket).ignore(pos, expected)) return false; - } - /// If no elements were specified it will grant all available for user grants. - /// Using `.size() == 0` because `.empty()` is overridden and returns true for NONE elements. - if (elements.size() == 0) // NOLINT - elements.emplace_back(AccessType::ALL); + /// If no elements were specified it will grant all available for user grants. + /// Using `.size() == 0` because `.empty()` is overridden and returns true for NONE elements. + /// Specifically, this should handle `GRANT CURRENT GRANTS() ...` + if (elements.size() == 0) // NOLINT + elements.emplace_back(AccessType::ALL); + } + else + { + AccessRightsElement default_element(AccessType::ALL); + + if (!ParserKeyword{"ON"}.ignore(pos, expected)) + return false; + + String database_name, table_name; + bool any_database = false, any_table = false; + if (!parseDatabaseAndTableNameOrAsterisks(pos, expected, database_name, any_database, table_name, any_table)) + return false; + + default_element.any_database = any_database; + default_element.database = database_name; + default_element.any_table = any_table; + default_element.table = table_name; + elements.push_back(std::move(default_element)); + } } else { diff --git a/tests/integration/test_grant_and_revoke/test.py b/tests/integration/test_grant_and_revoke/test.py index bc61a6a8cdb..ee5d4b5df93 100644 --- a/tests/integration/test_grant_and_revoke/test.py +++ b/tests/integration/test_grant_and_revoke/test.py @@ -600,7 +600,7 @@ def test_grant_current_grants(): ) instance.query("CREATE USER B") - instance.query("GRANT CURRENT GRANTS TO B", user="A") + instance.query("GRANT CURRENT GRANTS ON *.* TO B", user="A") assert instance.query("SHOW GRANTS FOR B") == TSV( ["GRANT SELECT, CREATE TABLE, CREATE VIEW ON test.* TO B"] ) @@ -638,7 +638,7 @@ def test_grant_current_grants_with_partial_revoke(): ) instance.query("CREATE USER B") - instance.query("GRANT CURRENT GRANTS TO B", user="A") + instance.query("GRANT CURRENT GRANTS ON *.* TO B", user="A") assert instance.query("SHOW GRANTS FOR B") == TSV( [ "GRANT SELECT ON *.* TO B", @@ -649,7 +649,7 @@ def test_grant_current_grants_with_partial_revoke(): instance.query("DROP USER IF EXISTS B") instance.query("CREATE USER B") - instance.query("GRANT CURRENT GRANTS TO B WITH GRANT OPTION", user="A") + instance.query("GRANT CURRENT GRANTS ON *.* TO B WITH GRANT OPTION", user="A") assert instance.query("SHOW GRANTS FOR B") == TSV( [ "GRANT SELECT ON *.* TO B WITH GRANT OPTION", @@ -661,7 +661,7 @@ def test_grant_current_grants_with_partial_revoke(): instance.query("DROP USER IF EXISTS C") instance.query("CREATE USER C") instance.query("GRANT SELECT ON test.* TO B") - instance.query("GRANT CURRENT GRANTS TO C", user="B") + instance.query("GRANT CURRENT GRANTS ON *.* TO C", user="B") assert instance.query("SHOW GRANTS FOR C") == TSV( [ "GRANT SELECT ON *.* TO C", @@ -669,6 +669,15 @@ def test_grant_current_grants_with_partial_revoke(): ] ) + instance.query("DROP USER IF EXISTS B") + instance.query("CREATE USER B") + instance.query("GRANT CURRENT GRANTS ON test.* TO B WITH GRANT OPTION", user="A") + assert instance.query("SHOW GRANTS FOR B") == TSV( + [ + "GRANT SELECT, CREATE TABLE ON test.table TO B WITH GRANT OPTION", + ] + ) + def test_current_grants_override(): instance.query("CREATE USER A") @@ -687,7 +696,7 @@ def test_current_grants_override(): ["GRANT SELECT ON test.table TO B"] ) - instance.query("GRANT CURRENT GRANTS TO B", user="A") + instance.query("GRANT CURRENT GRANTS ON *.* TO B", user="A") assert instance.query("SHOW GRANTS FOR B") == TSV( [ "GRANT SELECT ON *.* TO B", @@ -703,7 +712,7 @@ def test_current_grants_override(): ["GRANT SELECT ON test.table TO B"] ) - instance.query("GRANT CURRENT GRANTS TO B WITH REPLACE OPTION", user="A") + instance.query("GRANT CURRENT GRANTS ON *.* TO B WITH REPLACE OPTION", user="A") assert instance.query("SHOW GRANTS FOR B") == TSV( [ "GRANT SELECT ON *.* TO B", From da9a539cf79edb3f8daf8521ea7cac14eb0271a1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Wed, 12 Apr 2023 15:36:23 +0200 Subject: [PATCH 13/71] Reduce the usage of Arena.h --- .../AggregateFunctionDistinct.h | 1 + .../AggregateFunctionGroupUniqArray.h | 1 + .../AggregateFunctionHistogram.h | 2 +- .../AggregateFunctionIntervalLengthSum.h | 1 - .../AggregateFunctionKolmogorovSmirnovTest.h | 1 - .../AggregateFunctionMannWhitney.h | 1 - .../AggregateFunctionRankCorrelation.h | 2 -- .../AggregateFunctionRetention.h | 1 - .../AggregateFunctionTopK.h | 1 + .../AggregateFunctionWindowFunnel.h | 1 - src/Common/TLDListsHolder.h | 2 +- src/Dictionaries/DictionaryHelpers.h | 3 +- src/Dictionaries/IPAddressDictionary.h | 3 +- src/Dictionaries/RegExpTreeDictionary.h | 1 - src/IO/ReadHelpers.h | 17 ---------- src/IO/ReadHelpersArena.h | 31 +++++++++++++++++++ src/Interpreters/AggregationCommon.h | 3 +- src/Interpreters/Aggregator.h | 5 ++- .../Impl/ParallelFormattingOutputFormat.h | 1 - .../Transforms/TotalsHavingTransform.h | 4 --- 20 files changed, 46 insertions(+), 36 deletions(-) create mode 100644 src/IO/ReadHelpersArena.h diff --git a/src/AggregateFunctions/AggregateFunctionDistinct.h b/src/AggregateFunctions/AggregateFunctionDistinct.h index e09e0ef621d..4338dcff5c0 100644 --- a/src/AggregateFunctions/AggregateFunctionDistinct.h +++ b/src/AggregateFunctions/AggregateFunctionDistinct.h @@ -9,6 +9,7 @@ #include #include #include +#include namespace DB diff --git a/src/AggregateFunctions/AggregateFunctionGroupUniqArray.h b/src/AggregateFunctions/AggregateFunctionGroupUniqArray.h index bc7ccb08267..4cd7a7932b0 100644 --- a/src/AggregateFunctions/AggregateFunctionGroupUniqArray.h +++ b/src/AggregateFunctions/AggregateFunctionGroupUniqArray.h @@ -4,6 +4,7 @@ #include #include +#include #include #include diff --git a/src/AggregateFunctions/AggregateFunctionHistogram.h b/src/AggregateFunctions/AggregateFunctionHistogram.h index 3a98737f199..967bc9bb517 100644 --- a/src/AggregateFunctions/AggregateFunctionHistogram.h +++ b/src/AggregateFunctions/AggregateFunctionHistogram.h @@ -2,7 +2,6 @@ #include -#include #include #include @@ -29,6 +28,7 @@ namespace DB { struct Settings; +class Arena; namespace ErrorCodes { diff --git a/src/AggregateFunctions/AggregateFunctionIntervalLengthSum.h b/src/AggregateFunctions/AggregateFunctionIntervalLengthSum.h index e31c62802f1..16e9388d4bb 100644 --- a/src/AggregateFunctions/AggregateFunctionIntervalLengthSum.h +++ b/src/AggregateFunctions/AggregateFunctionIntervalLengthSum.h @@ -6,7 +6,6 @@ #include -#include #include #include #include diff --git a/src/AggregateFunctions/AggregateFunctionKolmogorovSmirnovTest.h b/src/AggregateFunctions/AggregateFunctionKolmogorovSmirnovTest.h index 51e0950f782..737f37b1fba 100644 --- a/src/AggregateFunctions/AggregateFunctionKolmogorovSmirnovTest.h +++ b/src/AggregateFunctions/AggregateFunctionKolmogorovSmirnovTest.h @@ -5,7 +5,6 @@ #include #include #include -#include #include #include #include diff --git a/src/AggregateFunctions/AggregateFunctionMannWhitney.h b/src/AggregateFunctions/AggregateFunctionMannWhitney.h index 86075440169..ac6ce0d0ca9 100644 --- a/src/AggregateFunctions/AggregateFunctionMannWhitney.h +++ b/src/AggregateFunctions/AggregateFunctionMannWhitney.h @@ -6,7 +6,6 @@ #include #include #include -#include #include #include #include diff --git a/src/AggregateFunctions/AggregateFunctionRankCorrelation.h b/src/AggregateFunctions/AggregateFunctionRankCorrelation.h index 4f9ca55f9f5..4f7d04100cf 100644 --- a/src/AggregateFunctions/AggregateFunctionRankCorrelation.h +++ b/src/AggregateFunctions/AggregateFunctionRankCorrelation.h @@ -14,8 +14,6 @@ #include #include -#include - namespace DB { struct Settings; diff --git a/src/AggregateFunctions/AggregateFunctionRetention.h b/src/AggregateFunctions/AggregateFunctionRetention.h index 7ecb9509dd5..63ff5921540 100644 --- a/src/AggregateFunctions/AggregateFunctionRetention.h +++ b/src/AggregateFunctions/AggregateFunctionRetention.h @@ -8,7 +8,6 @@ #include #include #include -#include #include #include diff --git a/src/AggregateFunctions/AggregateFunctionTopK.h b/src/AggregateFunctions/AggregateFunctionTopK.h index f1e57608195..89985c0ea6b 100644 --- a/src/AggregateFunctions/AggregateFunctionTopK.h +++ b/src/AggregateFunctions/AggregateFunctionTopK.h @@ -2,6 +2,7 @@ #include #include +#include #include #include diff --git a/src/AggregateFunctions/AggregateFunctionWindowFunnel.h b/src/AggregateFunctions/AggregateFunctionWindowFunnel.h index c4a9fa1b936..e83c5277d26 100644 --- a/src/AggregateFunctions/AggregateFunctionWindowFunnel.h +++ b/src/AggregateFunctions/AggregateFunctionWindowFunnel.h @@ -6,7 +6,6 @@ #include #include #include -#include #include #include diff --git a/src/Common/TLDListsHolder.h b/src/Common/TLDListsHolder.h index 5ea8c5afe9f..be399843c08 100644 --- a/src/Common/TLDListsHolder.h +++ b/src/Common/TLDListsHolder.h @@ -3,7 +3,6 @@ #include #include #include -#include #include #include #include @@ -11,6 +10,7 @@ namespace DB { +class Arena; enum TLDType { diff --git a/src/Dictionaries/DictionaryHelpers.h b/src/Dictionaries/DictionaryHelpers.h index 4fc080f2960..1de7be0bf4f 100644 --- a/src/Dictionaries/DictionaryHelpers.h +++ b/src/Dictionaries/DictionaryHelpers.h @@ -1,6 +1,5 @@ #pragma once -#include #include #include #include @@ -29,6 +28,8 @@ namespace ErrorCodes extern const int BAD_ARGUMENTS; } +class Arena; + /** Simple helper for getting default. * Initialized with default value and default values column. * If default values column is not null default value is taken from column. diff --git a/src/Dictionaries/IPAddressDictionary.h b/src/Dictionaries/IPAddressDictionary.h index 67827c6524e..40dc5dd6782 100644 --- a/src/Dictionaries/IPAddressDictionary.h +++ b/src/Dictionaries/IPAddressDictionary.h @@ -5,7 +5,6 @@ #include #include #include -#include #include #include #include @@ -18,6 +17,8 @@ namespace DB { +class Arena; + class IPAddressDictionary final : public IDictionary { public: diff --git a/src/Dictionaries/RegExpTreeDictionary.h b/src/Dictionaries/RegExpTreeDictionary.h index 17a0c6bbef3..4e8e20bba2d 100644 --- a/src/Dictionaries/RegExpTreeDictionary.h +++ b/src/Dictionaries/RegExpTreeDictionary.h @@ -10,7 +10,6 @@ #include #include -#include #include #include #include diff --git a/src/IO/ReadHelpers.h b/src/IO/ReadHelpers.h index 9c0c9525773..f1f001dd2c5 100644 --- a/src/IO/ReadHelpers.h +++ b/src/IO/ReadHelpers.h @@ -27,7 +27,6 @@ #include #include #include -#include #include #include @@ -142,22 +141,6 @@ inline void readStringBinary(std::string & s, ReadBuffer & buf, size_t max_strin buf.readStrict(s.data(), size); } - -inline StringRef readStringBinaryInto(Arena & arena, ReadBuffer & buf) -{ - size_t size = 0; - readVarUInt(size, buf); - - if (unlikely(size > DEFAULT_MAX_STRING_SIZE)) - throw Exception(ErrorCodes::TOO_LARGE_STRING_SIZE, "Too large string size."); - - char * data = arena.alloc(size); - buf.readStrict(data, size); - - return StringRef(data, size); -} - - template void readVectorBinary(std::vector & v, ReadBuffer & buf) { diff --git a/src/IO/ReadHelpersArena.h b/src/IO/ReadHelpersArena.h new file mode 100644 index 00000000000..3ff65817e33 --- /dev/null +++ b/src/IO/ReadHelpersArena.h @@ -0,0 +1,31 @@ +#include +#include +#include +#include +#include + + +namespace DB +{ + + +namespace ErrorCodes +{ + extern const int TOO_LARGE_STRING_SIZE; +} + +inline StringRef readStringBinaryInto(Arena & arena, ReadBuffer & buf) +{ + size_t size = 0; + readVarUInt(size, buf); + + if (unlikely(size > DEFAULT_MAX_STRING_SIZE)) + throw Exception(ErrorCodes::TOO_LARGE_STRING_SIZE, "Too large string size."); + + char * data = arena.alloc(size); + buf.readStrict(data, size); + + return StringRef(data, size); +} + +} diff --git a/src/Interpreters/AggregationCommon.h b/src/Interpreters/AggregationCommon.h index 32b01ee0416..2e6da40ff1f 100644 --- a/src/Interpreters/AggregationCommon.h +++ b/src/Interpreters/AggregationCommon.h @@ -3,7 +3,6 @@ #include #include -#include #include #include #include @@ -25,6 +24,8 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; } +class Arena; + using Sizes = std::vector; /// When packing the values of nullable columns at a given row, we have to diff --git a/src/Interpreters/Aggregator.h b/src/Interpreters/Aggregator.h index 5fb94c5f4e8..92d0c27cac4 100644 --- a/src/Interpreters/Aggregator.h +++ b/src/Interpreters/Aggregator.h @@ -7,7 +7,6 @@ #include -#include #include #include #include @@ -47,6 +46,10 @@ namespace ErrorCodes extern const int UNKNOWN_AGGREGATED_DATA_VARIANT; } +class Arena; +using ArenaPtr = std::shared_ptr; +using Arenas = std::vector; + /** Different data structures that can be used for aggregation * For efficiency, the aggregation data itself is put into the pool. * Data and pool ownership (states of aggregate functions) diff --git a/src/Processors/Formats/Impl/ParallelFormattingOutputFormat.h b/src/Processors/Formats/Impl/ParallelFormattingOutputFormat.h index 4e5aaab5dcb..fddcd059be5 100644 --- a/src/Processors/Formats/Impl/ParallelFormattingOutputFormat.h +++ b/src/Processors/Formats/Impl/ParallelFormattingOutputFormat.h @@ -2,7 +2,6 @@ #include -#include #include #include #include diff --git a/src/Processors/Transforms/TotalsHavingTransform.h b/src/Processors/Transforms/TotalsHavingTransform.h index f252d683b9a..350956c9c6b 100644 --- a/src/Processors/Transforms/TotalsHavingTransform.h +++ b/src/Processors/Transforms/TotalsHavingTransform.h @@ -2,14 +2,10 @@ #include #include -#include namespace DB { -class Arena; -using ArenaPtr = std::shared_ptr; - class ExpressionActions; using ExpressionActionsPtr = std::shared_ptr; From 2b70e08f23b72f75a763dcf11bde4ad9348faca4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Wed, 12 Apr 2023 17:09:05 +0200 Subject: [PATCH 14/71] Don't count unreserved bytes in Arenas as read_bytes --- src/Columns/ColumnAggregateFunction.cpp | 9 ++-- src/Common/Arena.h | 17 +++++-- src/Common/ArenaWithFreeLists.h | 9 ++-- src/Common/examples/arena_with_free_lists.cpp | 6 +-- src/Coordination/SnapshotableHashTable.h | 5 +- src/Dictionaries/CacheDictionaryStorage.h | 2 +- src/Dictionaries/FlatDictionary.cpp | 2 +- src/Dictionaries/HashedArrayDictionary.cpp | 2 +- src/Dictionaries/HashedDictionary.cpp | 2 +- src/Dictionaries/IPAddressDictionary.cpp | 2 +- src/Dictionaries/RangeHashedDictionary.h | 2 +- src/Interpreters/HashJoin.cpp | 2 +- .../Algorithms/AggregatingSortedAlgorithm.cpp | 6 +-- .../Algorithms/SummingSortedAlgorithm.cpp | 6 +-- ...714_read_bytes_aggregateFunction.reference | 6 +++ .../02714_read_bytes_aggregateFunction.sql | 49 +++++++++++++++++++ 16 files changed, 93 insertions(+), 34 deletions(-) create mode 100644 tests/queries/0_stateless/02714_read_bytes_aggregateFunction.reference create mode 100644 tests/queries/0_stateless/02714_read_bytes_aggregateFunction.sql diff --git a/src/Columns/ColumnAggregateFunction.cpp b/src/Columns/ColumnAggregateFunction.cpp index fd46b38ada8..b52f078c315 100644 --- a/src/Columns/ColumnAggregateFunction.cpp +++ b/src/Columns/ColumnAggregateFunction.cpp @@ -385,8 +385,7 @@ void ColumnAggregateFunction::updateHashFast(SipHash & hash) const /// threads, so we can't know the size of these data. size_t ColumnAggregateFunction::byteSize() const { - return data.size() * sizeof(data[0]) - + (my_arena ? my_arena->size() : 0); + return data.size() * sizeof(data[0]) + (my_arena ? my_arena->reservedBytes() : 0); } size_t ColumnAggregateFunction::byteSizeAt(size_t) const @@ -395,11 +394,11 @@ size_t ColumnAggregateFunction::byteSizeAt(size_t) const return sizeof(data[0]) + func->sizeOfData(); } -/// Like in byteSize(), the size is underestimated. +/// Similar to byteSize() the size is underestimated. +/// In this case it's also overestimated at the same time as it counts all the bytes allocated by the arena, used or not size_t ColumnAggregateFunction::allocatedBytes() const { - return data.allocated_bytes() - + (my_arena ? my_arena->size() : 0); + return data.allocated_bytes() + (my_arena ? my_arena->allocatedBytes() : 0); } void ColumnAggregateFunction::protect() diff --git a/src/Common/Arena.h b/src/Common/Arena.h index 5772dff6bca..4b358795972 100644 --- a/src/Common/Arena.h +++ b/src/Common/Arena.h @@ -299,10 +299,21 @@ public: return res; } - /// Size of MemoryChunks in bytes. - size_t size() const + /// Size of all MemoryChunks in bytes. + size_t allocatedBytes() const { return size_in_bytes; } + + /// Total reserved space of all MemoryChunks in bytes. + size_t reservedBytes() const { - return size_in_bytes; + size_t used_bytes = 0; + auto * current = head; + while (current) + { + used_bytes += current->pos - current->begin; + current = current->prev; + } + + return used_bytes; } /// Bad method, don't use it -- the MemoryChunks are not your business, the entire diff --git a/src/Common/ArenaWithFreeLists.h b/src/Common/ArenaWithFreeLists.h index 53a59c98299..76760a20320 100644 --- a/src/Common/ArenaWithFreeLists.h +++ b/src/Common/ArenaWithFreeLists.h @@ -107,10 +107,7 @@ public: } /// Size of the allocated pool in bytes - size_t size() const - { - return pool.size(); - } + size_t allocatedBytes() const { return pool.allocatedBytes(); } }; class SynchronizedArenaWithFreeLists : private ArenaWithFreeLists @@ -135,10 +132,10 @@ public: } /// Size of the allocated pool in bytes - size_t size() const + size_t allocatedBytes() const { std::lock_guard lock{mutex}; - return ArenaWithFreeLists::size(); + return ArenaWithFreeLists::allocatedBytes(); } private: mutable std::mutex mutex; diff --git a/src/Common/examples/arena_with_free_lists.cpp b/src/Common/examples/arena_with_free_lists.cpp index 4f209ccb5b2..3f1b3e88328 100644 --- a/src/Common/examples/arena_with_free_lists.cpp +++ b/src/Common/examples/arena_with_free_lists.cpp @@ -270,7 +270,7 @@ int main(int argc, char ** argv) watch.stop(); std::cerr - << "Insert info arena. Bytes: " << arena.size() + << "Insert info arena. Bytes: " << arena.allocatedBytes() << ", elapsed: " << watch.elapsedSeconds() << " (" << data.size() / watch.elapsedSeconds() << " elem/sec.," << " " << sum_strings_size / 1048576.0 / watch.elapsedSeconds() << " MiB/sec.)" @@ -298,7 +298,7 @@ int main(int argc, char ** argv) watch.stop(); std::cerr - << "Randomly remove and insert elements. Bytes: " << arena.size() + << "Randomly remove and insert elements. Bytes: " << arena.allocatedBytes() << ", elapsed: " << watch.elapsedSeconds() << " (" << data.size() / watch.elapsedSeconds() << " elem/sec.," << " " << bytes / 1048576.0 / watch.elapsedSeconds() << " MiB/sec.)" @@ -331,7 +331,7 @@ int main(int argc, char ** argv) watch.stop(); std::cerr - << "Filling cache. Bytes: " << arena.size() + << "Filling cache. Bytes: " << arena.allocatedBytes() << ", elapsed: " << watch.elapsedSeconds() << " (" << data.size() / watch.elapsedSeconds() << " elem/sec.," << " " << bytes / 1048576.0 / watch.elapsedSeconds() << " MiB/sec.)" diff --git a/src/Coordination/SnapshotableHashTable.h b/src/Coordination/SnapshotableHashTable.h index cfa3098b4a1..7db546bd4c8 100644 --- a/src/Coordination/SnapshotableHashTable.h +++ b/src/Coordination/SnapshotableHashTable.h @@ -333,10 +333,7 @@ public: } } - uint64_t keyArenaSize() const - { - return arena.size(); - } + uint64_t keyArenaSize() const { return arena.allocatedBytes(); } iterator begin() { return list.begin(); } const_iterator begin() const { return list.cbegin(); } diff --git a/src/Dictionaries/CacheDictionaryStorage.h b/src/Dictionaries/CacheDictionaryStorage.h index 5b52fbde00d..ba17cebebba 100644 --- a/src/Dictionaries/CacheDictionaryStorage.h +++ b/src/Dictionaries/CacheDictionaryStorage.h @@ -157,7 +157,7 @@ public: }); } - return arena.size() + sizeof(Cell) * configuration.max_size_in_cells + attributes_size_in_bytes; + return arena.allocatedBytes() + sizeof(Cell) * configuration.max_size_in_cells + attributes_size_in_bytes; } private: diff --git a/src/Dictionaries/FlatDictionary.cpp b/src/Dictionaries/FlatDictionary.cpp index cc345b97abe..d3699a150c4 100644 --- a/src/Dictionaries/FlatDictionary.cpp +++ b/src/Dictionaries/FlatDictionary.cpp @@ -505,7 +505,7 @@ void FlatDictionary::calculateBytesAllocated() bytes_allocated += hierarchical_index_bytes_allocated; } - bytes_allocated += string_arena.size(); + bytes_allocated += string_arena.allocatedBytes(); } FlatDictionary::Attribute FlatDictionary::createAttribute(const DictionaryAttribute & dictionary_attribute) diff --git a/src/Dictionaries/HashedArrayDictionary.cpp b/src/Dictionaries/HashedArrayDictionary.cpp index 9e6ce0597cb..880f68cea95 100644 --- a/src/Dictionaries/HashedArrayDictionary.cpp +++ b/src/Dictionaries/HashedArrayDictionary.cpp @@ -797,7 +797,7 @@ void HashedArrayDictionary::calculateBytesAllocated() bytes_allocated += hierarchical_index_bytes_allocated; } - bytes_allocated += string_arena.size(); + bytes_allocated += string_arena.allocatedBytes(); } template diff --git a/src/Dictionaries/HashedDictionary.cpp b/src/Dictionaries/HashedDictionary.cpp index 5cfac20e572..b27a0b5160e 100644 --- a/src/Dictionaries/HashedDictionary.cpp +++ b/src/Dictionaries/HashedDictionary.cpp @@ -1022,7 +1022,7 @@ void HashedDictionary::calculateBytesAlloc } for (const auto & arena : string_arenas) - bytes_allocated += arena->size(); + bytes_allocated += arena->allocatedBytes(); } template diff --git a/src/Dictionaries/IPAddressDictionary.cpp b/src/Dictionaries/IPAddressDictionary.cpp index ff1c784750b..6bb06de7506 100644 --- a/src/Dictionaries/IPAddressDictionary.cpp +++ b/src/Dictionaries/IPAddressDictionary.cpp @@ -541,7 +541,7 @@ template <> void IPAddressDictionary::addAttributeSize(const Attribute & attribute) { addAttributeSize(attribute); - bytes_allocated += sizeof(Arena) + attribute.string_arena->size(); + bytes_allocated += sizeof(Arena) + attribute.string_arena->allocatedBytes(); } void IPAddressDictionary::calculateBytesAllocated() diff --git a/src/Dictionaries/RangeHashedDictionary.h b/src/Dictionaries/RangeHashedDictionary.h index d6bb510542e..600530a3e70 100644 --- a/src/Dictionaries/RangeHashedDictionary.h +++ b/src/Dictionaries/RangeHashedDictionary.h @@ -726,7 +726,7 @@ void RangeHashedDictionary::calculateBytesAllocated() if (update_field_loaded_block) bytes_allocated += update_field_loaded_block->allocatedBytes(); - bytes_allocated += string_arena.size(); + bytes_allocated += string_arena.allocatedBytes(); } template diff --git a/src/Interpreters/HashJoin.cpp b/src/Interpreters/HashJoin.cpp index fe0244ff314..68bde0d3edb 100644 --- a/src/Interpreters/HashJoin.cpp +++ b/src/Interpreters/HashJoin.cpp @@ -517,7 +517,7 @@ size_t HashJoin::getTotalByteCount() const res += data->blocks_allocated_size; res += data->blocks_nullmaps_allocated_size; - res += data->pool.size(); + res += data->pool.allocatedBytes(); if (data->type != Type::CROSS) { diff --git a/src/Processors/Merges/Algorithms/AggregatingSortedAlgorithm.cpp b/src/Processors/Merges/Algorithms/AggregatingSortedAlgorithm.cpp index ef103eb508c..74cccdb08dd 100644 --- a/src/Processors/Merges/Algorithms/AggregatingSortedAlgorithm.cpp +++ b/src/Processors/Merges/Algorithms/AggregatingSortedAlgorithm.cpp @@ -168,7 +168,7 @@ AggregatingSortedAlgorithm::AggregatingMergedData::AggregatingMergedData( if (def.allocates_memory_in_arena) { arena = std::make_unique(); - arena_size = arena->size(); + arena_size = arena->allocatedBytes(); } } @@ -194,10 +194,10 @@ void AggregatingSortedAlgorithm::AggregatingMergedData::startGroup(const ColumnR /// To avoid this, reset arena if and only if: /// - arena is required (i.e. SimpleAggregateFunction(any, String) in PK), /// - arena was used in the previous groups. - if (def.allocates_memory_in_arena && arena->size() > arena_size) + if (def.allocates_memory_in_arena && arena->allocatedBytes() > arena_size) { arena = std::make_unique(); - arena_size = arena->size(); + arena_size = arena->allocatedBytes(); } is_group_started = true; diff --git a/src/Processors/Merges/Algorithms/SummingSortedAlgorithm.cpp b/src/Processors/Merges/Algorithms/SummingSortedAlgorithm.cpp index d8e95e6b950..7f4dcfba6c2 100644 --- a/src/Processors/Merges/Algorithms/SummingSortedAlgorithm.cpp +++ b/src/Processors/Merges/Algorithms/SummingSortedAlgorithm.cpp @@ -505,7 +505,7 @@ SummingSortedAlgorithm::SummingMergedData::SummingMergedData( if (def.allocates_memory_in_arena) { arena = std::make_unique(); - arena_size = arena->size(); + arena_size = arena->allocatedBytes(); } } @@ -519,10 +519,10 @@ void SummingSortedAlgorithm::SummingMergedData::startGroup(ColumnRawPtrs & raw_c for (auto & desc : def.columns_to_aggregate) desc.createState(); - if (def.allocates_memory_in_arena && arena->size() > arena_size) + if (def.allocates_memory_in_arena && arena->allocatedBytes() > arena_size) { arena = std::make_unique(); - arena_size = arena->size(); + arena_size = arena->allocatedBytes(); } if (def.maps_to_sum.empty()) diff --git a/tests/queries/0_stateless/02714_read_bytes_aggregateFunction.reference b/tests/queries/0_stateless/02714_read_bytes_aggregateFunction.reference new file mode 100644 index 00000000000..d315d85a11e --- /dev/null +++ b/tests/queries/0_stateless/02714_read_bytes_aggregateFunction.reference @@ -0,0 +1,6 @@ +UInt64 1 8 +UInt64 10 80 +UInt64 1000 8000 +AggregateFunction(argMax, String, DateTime) 1 80 +AggregateFunction(argMax, String, DateTime) 10 800 +AggregateFunction(argMax, String, DateTime) 1000 80000 diff --git a/tests/queries/0_stateless/02714_read_bytes_aggregateFunction.sql b/tests/queries/0_stateless/02714_read_bytes_aggregateFunction.sql new file mode 100644 index 00000000000..f66857ddee6 --- /dev/null +++ b/tests/queries/0_stateless/02714_read_bytes_aggregateFunction.sql @@ -0,0 +1,49 @@ +CREATE TABLE test (id UInt64, `amax` AggregateFunction(argMax, String, DateTime)) +ENGINE=MergeTree() +ORDER BY id +SETTINGS ratio_of_defaults_for_sparse_serialization=1 -- Sparse columns will take more bytes for a single row +AS + SELECT number, argMaxState(number::String, '2023-04-12 16:23:01'::DateTime) + FROM numbers(1) + GROUP BY number; + +SELECT sum(id) FROM test FORMAT Null; +SELECT argMaxMerge(amax) FROM test FORMAT Null; + +INSERT INTO test + SELECT number, argMaxState(number::String, '2023-04-12 16:23:01'::DateTime) + FROM numbers(9) + GROUP BY number; + +SELECT sum(id) FROM test FORMAT Null; +SELECT argMaxMerge(amax) FROM test FORMAT Null; + +INSERT INTO test +SELECT number, argMaxState(number::String, '2023-04-12 16:23:01'::DateTime) +FROM numbers(990) +GROUP BY number; + +SELECT sum(id) FROM test FORMAT Null; +SELECT argMaxMerge(amax) FROM test FORMAT Null; + +SYSTEM FLUSH LOGS; + +SELECT 'UInt64', + read_rows, + read_bytes +FROM system.query_log +WHERE + current_database = currentDatabase() AND + query = 'SELECT sum(id) FROM test FORMAT Null;' AND + type = 2 AND event_date >= yesterday() +ORDER BY event_time_microseconds; + +SELECT 'AggregateFunction(argMax, String, DateTime)', + read_rows, + read_bytes +FROM system.query_log +WHERE + current_database = currentDatabase() AND + query = 'SELECT argMaxMerge(amax) FROM test FORMAT Null;' AND + type = 2 AND event_date >= yesterday() +ORDER BY event_time_microseconds; From e239df4c25e89bc5391ea37dc9ec3b37b68807d4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Thu, 13 Apr 2023 13:41:44 +0200 Subject: [PATCH 15/71] Keep count to avoid iterating over lists --- src/Common/Arena.h | 32 ++++++++----------- .../02714_read_bytes_aggregateFunction.sql | 10 ++++++ 2 files changed, 24 insertions(+), 18 deletions(-) diff --git a/src/Common/Arena.h b/src/Common/Arena.h index 4b358795972..829d0687e0d 100644 --- a/src/Common/Arena.h +++ b/src/Common/Arena.h @@ -80,7 +80,8 @@ private: /// Last contiguous MemoryChunk of memory. MemoryChunk * head; - size_t size_in_bytes; + size_t allocated_bytes; + size_t reserved_bytes; size_t page_size; static size_t roundUpToPageSize(size_t s, size_t page_size) @@ -119,7 +120,7 @@ private: void NO_INLINE addMemoryChunk(size_t min_size) { head = new MemoryChunk(nextSize(min_size + pad_right), head); - size_in_bytes += head->size(); + allocated_bytes += head->size(); } friend class ArenaAllocator; @@ -127,9 +128,12 @@ private: public: explicit Arena(size_t initial_size_ = 4096, size_t growth_factor_ = 2, size_t linear_growth_threshold_ = 128 * 1024 * 1024) - : growth_factor(growth_factor_), linear_growth_threshold(linear_growth_threshold_), - head(new MemoryChunk(initial_size_, nullptr)), size_in_bytes(head->size()), - page_size(static_cast(::getPageSize())) + : growth_factor(growth_factor_) + , linear_growth_threshold(linear_growth_threshold_) + , head(new MemoryChunk(initial_size_, nullptr)) + , allocated_bytes(head->size()) + , reserved_bytes(0) + , page_size(static_cast(::getPageSize())) { } @@ -141,6 +145,7 @@ public: /// Get piece of memory, without alignment. char * alloc(size_t size) { + reserved_bytes += size; if (unlikely(static_cast(size) > head->end - head->pos)) addMemoryChunk(size); @@ -153,6 +158,7 @@ public: /// Get piece of memory with alignment char * alignedAlloc(size_t size, size_t alignment) { + reserved_bytes += size; do { void * head_pos = head->pos; @@ -184,6 +190,7 @@ public: */ void * rollback(size_t size) { + reserved_bytes -= size; head->pos -= size; ASAN_POISON_MEMORY_REGION(head->pos, size + pad_right); return head->pos; @@ -300,21 +307,10 @@ public: } /// Size of all MemoryChunks in bytes. - size_t allocatedBytes() const { return size_in_bytes; } + size_t allocatedBytes() const { return allocated_bytes; } /// Total reserved space of all MemoryChunks in bytes. - size_t reservedBytes() const - { - size_t used_bytes = 0; - auto * current = head; - while (current) - { - used_bytes += current->pos - current->begin; - current = current->prev; - } - - return used_bytes; - } + size_t reservedBytes() const { return reserved_bytes; } /// Bad method, don't use it -- the MemoryChunks are not your business, the entire /// purpose of the arena code is to manage them for you, so if you find diff --git a/tests/queries/0_stateless/02714_read_bytes_aggregateFunction.sql b/tests/queries/0_stateless/02714_read_bytes_aggregateFunction.sql index f66857ddee6..26bc9ebe62b 100644 --- a/tests/queries/0_stateless/02714_read_bytes_aggregateFunction.sql +++ b/tests/queries/0_stateless/02714_read_bytes_aggregateFunction.sql @@ -38,6 +38,16 @@ WHERE type = 2 AND event_date >= yesterday() ORDER BY event_time_microseconds; +-- Size of ColumnAggregateFunction: Number of pointers * pointer size + arena size +-- 1 * 8 + AggregateFunction(argMax, String, DateTime) +-- +-- Size of AggregateFunction(argMax, String, DateTime): +-- SingleValueDataString() + SingleValueDataFixed(DateTime) +-- SingleValueDataString = 64B for small strings, 64B + string size + 1 for larger +-- SingleValueDataFixed(DateTime) = 1 + 4. With padding = 8 +-- SingleValueDataString Total: 72B +-- +-- ColumnAggregateFunction total: 8 + 72 = 80 SELECT 'AggregateFunction(argMax, String, DateTime)', read_rows, read_bytes From 3df33d74f520b5604a1f2c0bf03844ffd7682a4c Mon Sep 17 00:00:00 2001 From: Dmitry Novik Date: Thu, 13 Apr 2023 17:22:12 +0200 Subject: [PATCH 16/71] Get rid of another docker image --- docker/test/stateless-analyzer/Dockerfile | 7 - docker/test/stateless-analyzer/run.sh | 230 ------------------ docker/test/stateless/run.sh | 6 +- .../broken_tests.txt | 0 tests/ci/functional_test_check.py | 1 + 5 files changed, 6 insertions(+), 238 deletions(-) delete mode 100644 docker/test/stateless-analyzer/Dockerfile delete mode 100755 docker/test/stateless-analyzer/run.sh rename {docker/test/stateless-analyzer => tests}/broken_tests.txt (100%) diff --git a/docker/test/stateless-analyzer/Dockerfile b/docker/test/stateless-analyzer/Dockerfile deleted file mode 100644 index 9cbcc69c795..00000000000 --- a/docker/test/stateless-analyzer/Dockerfile +++ /dev/null @@ -1,7 +0,0 @@ -ARG FROM_TAG=latest -FROM clickhouse/stateless-test:$FROM_TAG - -COPY run.sh / -COPY broken_tests.txt / - -CMD ["/bin/bash", "/run.sh"] diff --git a/docker/test/stateless-analyzer/run.sh b/docker/test/stateless-analyzer/run.sh deleted file mode 100755 index d3bb78400e2..00000000000 --- a/docker/test/stateless-analyzer/run.sh +++ /dev/null @@ -1,230 +0,0 @@ -#!/bin/bash - -# fail on errors, verbose and export all env variables -set -e -x -a - -# Choose random timezone for this test run. -TZ="$(rg -v '#' /usr/share/zoneinfo/zone.tab | awk '{print $3}' | shuf | head -n1)" -echo "Choosen random timezone $TZ" -ln -snf "/usr/share/zoneinfo/$TZ" /etc/localtime && echo "$TZ" > /etc/timezone - -dpkg -i package_folder/clickhouse-common-static_*.deb -dpkg -i package_folder/clickhouse-common-static-dbg_*.deb -dpkg -i package_folder/clickhouse-server_*.deb -dpkg -i package_folder/clickhouse-client_*.deb - -ln -s /usr/share/clickhouse-test/clickhouse-test /usr/bin/clickhouse-test - -# install test configs -/usr/share/clickhouse-test/config/install.sh - -if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then - echo "Azure is disabled" -else - azurite-blob --blobHost 0.0.0.0 --blobPort 10000 --debug /azurite_log & -fi - -./setup_minio.sh stateless -./setup_hdfs_minicluster.sh - -# For flaky check we also enable thread fuzzer -if [ "$NUM_TRIES" -gt "1" ]; then - export THREAD_FUZZER_CPU_TIME_PERIOD_US=1000 - export THREAD_FUZZER_SLEEP_PROBABILITY=0.1 - export THREAD_FUZZER_SLEEP_TIME_US=100000 - - export THREAD_FUZZER_pthread_mutex_lock_BEFORE_MIGRATE_PROBABILITY=1 - export THREAD_FUZZER_pthread_mutex_lock_AFTER_MIGRATE_PROBABILITY=1 - export THREAD_FUZZER_pthread_mutex_unlock_BEFORE_MIGRATE_PROBABILITY=1 - export THREAD_FUZZER_pthread_mutex_unlock_AFTER_MIGRATE_PROBABILITY=1 - - export THREAD_FUZZER_pthread_mutex_lock_BEFORE_SLEEP_PROBABILITY=0.001 - export THREAD_FUZZER_pthread_mutex_lock_AFTER_SLEEP_PROBABILITY=0.001 - export THREAD_FUZZER_pthread_mutex_unlock_BEFORE_SLEEP_PROBABILITY=0.001 - export THREAD_FUZZER_pthread_mutex_unlock_AFTER_SLEEP_PROBABILITY=0.001 - export THREAD_FUZZER_pthread_mutex_lock_BEFORE_SLEEP_TIME_US=10000 - export THREAD_FUZZER_pthread_mutex_lock_AFTER_SLEEP_TIME_US=10000 - export THREAD_FUZZER_pthread_mutex_unlock_BEFORE_SLEEP_TIME_US=10000 - export THREAD_FUZZER_pthread_mutex_unlock_AFTER_SLEEP_TIME_US=10000 - - mkdir -p /var/run/clickhouse-server - # simpliest way to forward env variables to server - sudo -E -u clickhouse /usr/bin/clickhouse-server --config /etc/clickhouse-server/config.xml --daemon --pid-file /var/run/clickhouse-server/clickhouse-server.pid -else - sudo clickhouse start -fi - -if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then - mkdir -p /var/run/clickhouse-server1 - sudo chown clickhouse:clickhouse /var/run/clickhouse-server1 - sudo -E -u clickhouse /usr/bin/clickhouse server --config /etc/clickhouse-server1/config.xml --daemon \ - --pid-file /var/run/clickhouse-server1/clickhouse-server.pid \ - -- --path /var/lib/clickhouse1/ --logger.stderr /var/log/clickhouse-server/stderr1.log \ - --logger.log /var/log/clickhouse-server/clickhouse-server1.log --logger.errorlog /var/log/clickhouse-server/clickhouse-server1.err.log \ - --tcp_port 19000 --tcp_port_secure 19440 --http_port 18123 --https_port 18443 --interserver_http_port 19009 --tcp_with_proxy_port 19010 \ - --mysql_port 19004 --postgresql_port 19005 \ - --keeper_server.tcp_port 19181 --keeper_server.server_id 2 \ - --prometheus.port 19988 \ - --macros.replica r2 # It doesn't work :( - - mkdir -p /var/run/clickhouse-server2 - sudo chown clickhouse:clickhouse /var/run/clickhouse-server2 - sudo -E -u clickhouse /usr/bin/clickhouse server --config /etc/clickhouse-server2/config.xml --daemon \ - --pid-file /var/run/clickhouse-server2/clickhouse-server.pid \ - -- --path /var/lib/clickhouse2/ --logger.stderr /var/log/clickhouse-server/stderr2.log \ - --logger.log /var/log/clickhouse-server/clickhouse-server2.log --logger.errorlog /var/log/clickhouse-server/clickhouse-server2.err.log \ - --tcp_port 29000 --tcp_port_secure 29440 --http_port 28123 --https_port 28443 --interserver_http_port 29009 --tcp_with_proxy_port 29010 \ - --mysql_port 29004 --postgresql_port 29005 \ - --keeper_server.tcp_port 29181 --keeper_server.server_id 3 \ - --prometheus.port 29988 \ - --macros.shard s2 # It doesn't work :( - - MAX_RUN_TIME=$((MAX_RUN_TIME < 9000 ? MAX_RUN_TIME : 9000)) # min(MAX_RUN_TIME, 2.5 hours) - MAX_RUN_TIME=$((MAX_RUN_TIME != 0 ? MAX_RUN_TIME : 9000)) # set to 2.5 hours if 0 (unlimited) -fi - -sleep 5 - -function run_tests() -{ - set -x - # We can have several additional options so we pass them as array because it is more ideologically correct. - read -ra ADDITIONAL_OPTIONS <<< "${ADDITIONAL_OPTIONS:-}" - - HIGH_LEVEL_COVERAGE=YES - - # Use random order in flaky check - if [ "$NUM_TRIES" -gt "1" ]; then - ADDITIONAL_OPTIONS+=('--order=random') - HIGH_LEVEL_COVERAGE=NO - fi - - if [[ -n "$USE_S3_STORAGE_FOR_MERGE_TREE" ]] && [[ "$USE_S3_STORAGE_FOR_MERGE_TREE" -eq 1 ]]; then - ADDITIONAL_OPTIONS+=('--s3-storage') - fi - - if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then - ADDITIONAL_OPTIONS+=('--replicated-database') - ADDITIONAL_OPTIONS+=('--jobs') - ADDITIONAL_OPTIONS+=('2') - else - # Too many tests fail for DatabaseReplicated in parallel. All other - # configurations are OK. - ADDITIONAL_OPTIONS+=('--jobs') - ADDITIONAL_OPTIONS+=('8') - fi - - if [[ -n "$RUN_BY_HASH_NUM" ]] && [[ -n "$RUN_BY_HASH_TOTAL" ]]; then - ADDITIONAL_OPTIONS+=('--run-by-hash-num') - ADDITIONAL_OPTIONS+=("$RUN_BY_HASH_NUM") - ADDITIONAL_OPTIONS+=('--run-by-hash-total') - ADDITIONAL_OPTIONS+=("$RUN_BY_HASH_TOTAL") - HIGH_LEVEL_COVERAGE=NO - fi - - if [[ -n "$USE_DATABASE_ORDINARY" ]] && [[ "$USE_DATABASE_ORDINARY" -eq 1 ]]; then - ADDITIONAL_OPTIONS+=('--db-engine=Ordinary') - fi - - if [[ "${HIGH_LEVEL_COVERAGE}" = "YES" ]]; then - ADDITIONAL_OPTIONS+=('--report-coverage') - fi - - ADDITIONAL_OPTIONS+=('--report-logs-stats') - - set +e - clickhouse-test --testname --shard --zookeeper --check-zookeeper-session --hung-check --print-time \ - --test-runs "$NUM_TRIES" "${ADDITIONAL_OPTIONS[@]}" 2>&1 \ - | ts '%Y-%m-%d %H:%M:%S' \ - | tee -a test_output/test_result.txt - set -e -} - -export -f run_tests - -if [ "$NUM_TRIES" -gt "1" ]; then - # We don't run tests with Ordinary database in PRs, only in master. - # So run new/changed tests with Ordinary at least once in flaky check. - timeout "$MAX_RUN_TIME" bash -c 'NUM_TRIES=1; USE_DATABASE_ORDINARY=1; run_tests' \ - | sed 's/All tests have finished//' | sed 's/No tests were run//' ||: -fi - -timeout "$MAX_RUN_TIME" bash -c run_tests ||: - -echo "Files in current directory" -ls -la ./ -echo "Files in root directory" -ls -la / - -/process_functional_tests_result.py --broken_tests=broken_tests.txt || echo -e "failure\tCannot parse results" > /test_output/check_status.tsv - -clickhouse-client -q "system flush logs" ||: - -# Stop server so we can safely read data with clickhouse-local. -# Why do we read data with clickhouse-local? -# Because it's the simplest way to read it when server has crashed. -sudo clickhouse stop ||: -if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then - sudo clickhouse stop --pid-path /var/run/clickhouse-server1 ||: - sudo clickhouse stop --pid-path /var/run/clickhouse-server2 ||: -fi - -rg -Fa "" /var/log/clickhouse-server/clickhouse-server.log ||: -rg -A50 -Fa "============" /var/log/clickhouse-server/stderr.log ||: -zstd --threads=0 < /var/log/clickhouse-server/clickhouse-server.log > /test_output/clickhouse-server.log.zst & - -# Compress tables. -# -# NOTE: -# - that due to tests with s3 storage we cannot use /var/lib/clickhouse/data -# directly -# - even though ci auto-compress some files (but not *.tsv) it does this only -# for files >64MB, we want this files to be compressed explicitly -for table in query_log zookeeper_log trace_log transactions_info_log -do - clickhouse-local --path /var/lib/clickhouse/ --only-system-tables -q "select * from system.$table format TSVWithNamesAndTypes" | zstd --threads=0 > /test_output/$table.tsv.zst ||: - if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then - clickhouse-local --path /var/lib/clickhouse1/ --only-system-tables -q "select * from system.$table format TSVWithNamesAndTypes" | zstd --threads=0 > /test_output/$table.1.tsv.zst ||: - clickhouse-local --path /var/lib/clickhouse2/ --only-system-tables -q "select * from system.$table format TSVWithNamesAndTypes" | zstd --threads=0 > /test_output/$table.2.tsv.zst ||: - fi -done - -# Also export trace log in flamegraph-friendly format. -for trace_type in CPU Memory Real -do - clickhouse-local --path /var/lib/clickhouse/ --only-system-tables -q " - select - arrayStringConcat((arrayMap(x -> concat(splitByChar('/', addressToLine(x))[-1], '#', demangle(addressToSymbol(x)) ), trace)), ';') AS stack, - count(*) AS samples - from system.trace_log - where trace_type = '$trace_type' - group by trace - order by samples desc - settings allow_introspection_functions = 1 - format TabSeparated" \ - | zstd --threads=0 > "/test_output/trace-log-$trace_type-flamegraph.tsv.zst" ||: -done - - -# Compressed (FIXME: remove once only github actions will be left) -rm /var/log/clickhouse-server/clickhouse-server.log -mv /var/log/clickhouse-server/stderr.log /test_output/ ||: -if [[ -n "$WITH_COVERAGE" ]] && [[ "$WITH_COVERAGE" -eq 1 ]]; then - tar --zstd -chf /test_output/clickhouse_coverage.tar.zst /profraw ||: -fi - -tar -chf /test_output/coordination.tar /var/lib/clickhouse/coordination ||: - -if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then - rg -Fa "" /var/log/clickhouse-server/clickhouse-server1.log ||: - rg -Fa "" /var/log/clickhouse-server/clickhouse-server2.log ||: - zstd --threads=0 < /var/log/clickhouse-server/clickhouse-server1.log > /test_output/clickhouse-server1.log.zst ||: - zstd --threads=0 < /var/log/clickhouse-server/clickhouse-server2.log > /test_output/clickhouse-server2.log.zst ||: - # FIXME: remove once only github actions will be left - rm /var/log/clickhouse-server/clickhouse-server1.log - rm /var/log/clickhouse-server/clickhouse-server2.log - mv /var/log/clickhouse-server/stderr1.log /test_output/ ||: - mv /var/log/clickhouse-server/stderr2.log /test_output/ ||: - tar -chf /test_output/coordination1.tar /var/lib/clickhouse1/coordination ||: - tar -chf /test_output/coordination2.tar /var/lib/clickhouse2/coordination ||: -fi diff --git a/docker/test/stateless/run.sh b/docker/test/stateless/run.sh index e509809c028..6c8376b7676 100755 --- a/docker/test/stateless/run.sh +++ b/docker/test/stateless/run.sh @@ -156,7 +156,11 @@ ls -la ./ echo "Files in root directory" ls -la / -/process_functional_tests_result.py || echo -e "failure\tCannot parse results" > /test_output/check_status.tsv +if [[ -n "$USE_NEW_ANALYZER" ]] && [[ "$USE_NEW_ANALYZER" -eq 1 ]]; then + /process_functional_tests_result.py --broken-tests=/broken_tests.txt || echo -e "failure\tCannot parse results" > /test_output/check_status.tsv +else + /process_functional_tests_result.py || echo -e "failure\tCannot parse results" > /test_output/check_status.tsv +fi clickhouse-client -q "system flush logs" ||: diff --git a/docker/test/stateless-analyzer/broken_tests.txt b/tests/broken_tests.txt similarity index 100% rename from docker/test/stateless-analyzer/broken_tests.txt rename to tests/broken_tests.txt diff --git a/tests/ci/functional_test_check.py b/tests/ci/functional_test_check.py index c4ce77c050e..30fd1babfca 100644 --- a/tests/ci/functional_test_check.py +++ b/tests/ci/functional_test_check.py @@ -111,6 +111,7 @@ def get_run_command( return ( f"docker run --volume={builds_path}:/package_folder " f"--volume={repo_tests_path}:/usr/share/clickhouse-test " + f"--volume={repo_tests_path}/broken_tests.txt:/broken_tests.txt " f"--volume={result_path}:/test_output --volume={server_log_path}:/var/log/clickhouse-server " f"--cap-add=SYS_PTRACE {env_str} {additional_options_str} {image}" ) From a9fcff72f35922fe2dbaa394d36b37352b4a93a1 Mon Sep 17 00:00:00 2001 From: pufit Date: Thu, 13 Apr 2023 12:57:21 -0400 Subject: [PATCH 17/71] Update docs --- docs/en/sql-reference/statements/grant.md | 2 +- docs/ru/sql-reference/statements/grant.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/en/sql-reference/statements/grant.md b/docs/en/sql-reference/statements/grant.md index 9bfc659a179..e7b239e40a9 100644 --- a/docs/en/sql-reference/statements/grant.md +++ b/docs/en/sql-reference/statements/grant.md @@ -38,7 +38,7 @@ The `WITH REPLACE OPTION` clause replace old roles by new role for the `user` or ## Grant Current Grants Syntax ``` sql -GRANT CURRENT GRANTS[(privilege[(column_name [,...])] [,...] ON {db.table|db.*|*.*|table|*})] TO {user | role | CURRENT_USER} [,...] [WITH GRANT OPTION] [WITH REPLACE OPTION] +GRANT CURRENT GRANTS{(privilege[(column_name [,...])] [,...] ON {db.table|db.*|*.*|table|*}) | ON {db.table|db.*|*.*|table|*}} TO {user | role | CURRENT_USER} [,...] [WITH GRANT OPTION] [WITH REPLACE OPTION] ``` - `privilege` — Type of privilege. diff --git a/docs/ru/sql-reference/statements/grant.md b/docs/ru/sql-reference/statements/grant.md index 8c291b9f878..9b8fafabfcc 100644 --- a/docs/ru/sql-reference/statements/grant.md +++ b/docs/ru/sql-reference/statements/grant.md @@ -40,7 +40,7 @@ GRANT [ON CLUSTER cluster_name] role [,...] TO {user | another_role | CURRENT_US ## Синтаксис присвоения текущих привилегий {#grant-current-grants-syntax} ```sql -GRANT CURRENT GRANTS[(privilege[(column_name [,...])] [,...] ON {db.table|db.*|*.*|table|*})] TO {user | role | CURRENT_USER} [,...] [WITH GRANT OPTION] [WITH REPLACE OPTION] +GRANT CURRENT GRANTS{(privilege[(column_name [,...])] [,...] ON {db.table|db.*|*.*|table|*}) | ON {db.table|db.*|*.*|table|*}} TO {user | role | CURRENT_USER} [,...] [WITH GRANT OPTION] [WITH REPLACE OPTION] ``` - `privilege` — Тип привилегии From 2c20850cde6e5ce62f55da8845df560a4657d019 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Fri, 14 Apr 2023 10:35:33 +0200 Subject: [PATCH 18/71] Style --- src/IO/ReadHelpersArena.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/IO/ReadHelpersArena.h b/src/IO/ReadHelpersArena.h index 3ff65817e33..b88d5c037d4 100644 --- a/src/IO/ReadHelpersArena.h +++ b/src/IO/ReadHelpersArena.h @@ -1,3 +1,5 @@ +#pragma once + #include #include #include From d2351df69e02646a5e70e209b903c8d7fa93bad7 Mon Sep 17 00:00:00 2001 From: Ilya Yatsishin <2159081+qoega@users.noreply.github.com> Date: Fri, 14 Apr 2023 15:13:10 +0200 Subject: [PATCH 19/71] Update tests/ci/functional_test_check.py --- tests/ci/functional_test_check.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/ci/functional_test_check.py b/tests/ci/functional_test_check.py index 30fd1babfca..4d3e3604397 100644 --- a/tests/ci/functional_test_check.py +++ b/tests/ci/functional_test_check.py @@ -65,7 +65,7 @@ def get_additional_envs(check_name, run_by_hash_num, run_by_hash_total): def get_image_name(check_name): if "analyzer" in check_name.lower(): - return "clickhouse/stateless-analyzer" + return "clickhouse/stateless-test" if "stateless" in check_name.lower(): return "clickhouse/stateless-test" if "stateful" in check_name.lower(): From b04a4ec28769dd8991a17b1964bfe03025a17f6c Mon Sep 17 00:00:00 2001 From: Dmitry Novik Date: Fri, 14 Apr 2023 15:25:06 +0200 Subject: [PATCH 20/71] Fix used image --- tests/ci/functional_test_check.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/tests/ci/functional_test_check.py b/tests/ci/functional_test_check.py index 4d3e3604397..3a9951c1cdb 100644 --- a/tests/ci/functional_test_check.py +++ b/tests/ci/functional_test_check.py @@ -64,8 +64,6 @@ def get_additional_envs(check_name, run_by_hash_num, run_by_hash_total): def get_image_name(check_name): - if "analyzer" in check_name.lower(): - return "clickhouse/stateless-test" if "stateless" in check_name.lower(): return "clickhouse/stateless-test" if "stateful" in check_name.lower(): From db71bbb08da0832d1f08bbdad62c6379944c85ae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Mon, 17 Apr 2023 10:59:40 +0200 Subject: [PATCH 21/71] Fix examples build --- src/Interpreters/examples/string_hash_map.cpp | 2 +- src/Interpreters/examples/string_hash_set.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Interpreters/examples/string_hash_map.cpp b/src/Interpreters/examples/string_hash_map.cpp index 15df8c399de..f55ed983fbc 100644 --- a/src/Interpreters/examples/string_hash_map.cpp +++ b/src/Interpreters/examples/string_hash_map.cpp @@ -156,7 +156,7 @@ void NO_INLINE bench(const std::vector & data, DB::Arena &, const cha } watch.stop(); - std::cerr << "arena-memory " << pool.size() + map.getBufferSizeInBytes() << std::endl; + std::cerr << "arena-memory " << pool.allocatedBytes() + map.getBufferSizeInBytes() << std::endl; std::cerr << "single-run " << std::setprecision(3) << watch.elapsedSeconds() << std::endl; } diff --git a/src/Interpreters/examples/string_hash_set.cpp b/src/Interpreters/examples/string_hash_set.cpp index 355789e97ec..527ada1579d 100644 --- a/src/Interpreters/examples/string_hash_set.cpp +++ b/src/Interpreters/examples/string_hash_set.cpp @@ -34,7 +34,7 @@ void NO_INLINE bench(const std::vector & data, DB::Arena & pool, cons } watch.stop(); - std::cerr << "arena-memory " << pool.size() + set.getBufferSizeInBytes() << std::endl; + std::cerr << "arena-memory " << pool.allocatedBytes() + set.getBufferSizeInBytes() << std::endl; std::cerr << "single-run " << std::setprecision(3) << watch.elapsedSeconds() << std::endl; } From 7ed4c413e3706ad6fbff573c1021d61ed5175d56 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=87=8C=E6=B6=9B?= Date: Fri, 7 Apr 2023 18:53:18 +0800 Subject: [PATCH 22/71] add settings allow_suspicious_indices --- src/Core/SettingsChangesHistory.h | 1 + src/Storages/MergeTree/MergeTreeData.cpp | 42 +++++++++++++++++-- src/Storages/MergeTree/MergeTreeData.h | 2 + src/Storages/MergeTree/MergeTreeSettings.h | 1 + .../02710_allow_suspicious_indices.reference | 0 .../02710_allow_suspicious_indices.sql | 20 +++++++++ 6 files changed, 62 insertions(+), 4 deletions(-) create mode 100644 tests/queries/0_stateless/02710_allow_suspicious_indices.reference create mode 100644 tests/queries/0_stateless/02710_allow_suspicious_indices.sql diff --git a/src/Core/SettingsChangesHistory.h b/src/Core/SettingsChangesHistory.h index 4f89397ed9d..b6366fcd96f 100644 --- a/src/Core/SettingsChangesHistory.h +++ b/src/Core/SettingsChangesHistory.h @@ -80,6 +80,7 @@ namespace SettingsChangesHistory /// It's used to implement `compatibility` setting (see https://github.com/ClickHouse/ClickHouse/issues/35972) static std::map settings_changes_history = { + {"23.4", {{"allow_suspicious_indices", true, false, "If true, index can defined with identical expressions"}}}, {"23.3", {{"output_format_parquet_version", "1.0", "2.latest", "Use latest Parquet format version for output format"}, {"input_format_json_ignore_unknown_keys_in_named_tuple", false, true, "Improve parsing JSON objects as named tuples"}, {"input_format_native_allow_types_conversion", false, true, "Allow types conversion in Native input forma"}, diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index a49dbe21f1b..000a6242e57 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -43,8 +43,11 @@ #include #include #include +#include #include #include +#include +#include #include #include #include @@ -54,6 +57,7 @@ #include #include #include +#include #include #include #include @@ -86,6 +90,7 @@ #include #include +#include #include #include @@ -454,6 +459,7 @@ void MergeTreeData::checkProperties( "{} is greater than the sorting key length: {}", primary_key_size, sorting_key_size); NameSet primary_key_columns_set; + bool allow_suspicious_indices = getSettings()->allow_suspicious_indices; for (size_t i = 0; i < sorting_key_size; ++i) { @@ -467,7 +473,7 @@ void MergeTreeData::checkProperties( "Primary key must be a prefix of the sorting key, " "but the column in the position {} is {}", i, sorting_key_column +", not " + pk_column); - if (!primary_key_columns_set.emplace(pk_column).second) + if (!primary_key_columns_set.emplace(pk_column).second && !allow_suspicious_indices && !attach) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Primary key contains duplicate columns"); } @@ -530,6 +536,12 @@ void MergeTreeData::checkProperties( for (const auto & index : new_metadata.secondary_indices) { + if (!allow_suspicious_indices && !attach) + { + const auto * index_ast = typeid_cast(index.definition_ast.get()); + checkSuspiciousIndices(index_ast->expr->as()); + } + MergeTreeIndexFactory::instance().validate(index, attach); if (indices_names.find(index.name) != indices_names.end()) @@ -989,6 +1001,20 @@ std::optional MergeTreeData::totalRowsByPartitionPredicateImpl( return res; } +void MergeTreeData::checkSuspiciousIndices(const ASTFunction & index_function) const +{ + NameSet index_key_set; + for (const auto & child : index_function.arguments->children) + { + const auto & tree_hash = child->getTreeHash(); + const String key = toString(tree_hash.first); + + if (!index_key_set.emplace(key).second) + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "Primary key or secondary indices contains duplicate expressios"); + } +} + String MergeTreeData::MergingParams::getModeName() const { @@ -2976,10 +3002,18 @@ void MergeTreeData::checkAlterIsPossible(const AlterCommands & commands, Context } } - if (command.type == AlterCommand::MODIFY_ORDER_BY && !is_custom_partitioned) + if (command.type == AlterCommand::MODIFY_ORDER_BY) { - throw Exception(ErrorCodes::BAD_ARGUMENTS, - "ALTER MODIFY ORDER BY is not supported for default-partitioned tables created with the old syntax"); + bool allow_suspicious_indices = getSettings()->allow_suspicious_indices; + if (!allow_suspicious_indices) + { + const auto * alter_ast = command.ast->as(); + checkSuspiciousIndices(alter_ast->order_by->as()); + } + + if (!is_custom_partitioned) + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "ALTER MODIFY ORDER BY is not supported for default-partitioned tables created with the old syntax"); } if (command.type == AlterCommand::MODIFY_TTL && !is_custom_partitioned) { diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h index 8312efa216d..b0fe631891f 100644 --- a/src/Storages/MergeTree/MergeTreeData.h +++ b/src/Storages/MergeTree/MergeTreeData.h @@ -431,6 +431,8 @@ public: bool supportsLightweightDelete() const override; + void checkSuspiciousIndices(const ASTFunction & index_funtion) const; + NamesAndTypesList getVirtuals() const override; bool mayBenefitFromIndexForIn(const ASTPtr & left_in_operand, ContextPtr, const StorageMetadataPtr & metadata_snapshot) const override; diff --git a/src/Storages/MergeTree/MergeTreeSettings.h b/src/Storages/MergeTree/MergeTreeSettings.h index 4aabcbbf865..fd44f7ec5f5 100644 --- a/src/Storages/MergeTree/MergeTreeSettings.h +++ b/src/Storages/MergeTree/MergeTreeSettings.h @@ -132,6 +132,7 @@ struct Settings; M(Bool, compatibility_allow_sampling_expression_not_in_primary_key, false, "Allow to create a table with sampling expression not in primary key. This is needed only to temporarily allow to run the server with wrong tables for backward compatibility.", 0) \ M(Bool, use_minimalistic_checksums_in_zookeeper, true, "Use small format (dozens bytes) for part checksums in ZooKeeper instead of ordinary ones (dozens KB). Before enabling check that all replicas support new format.", 0) \ M(Bool, use_minimalistic_part_header_in_zookeeper, true, "Store part header (checksums and columns) in a compact format and a single part znode instead of separate znodes (/columns and /checksums). This can dramatically reduce snapshot size in ZooKeeper. Before enabling check that all replicas support new format.", 0) \ + M(Bool, allow_suspicious_indices, false, "If true, index can be defined with identical expressions", 0) \ M(UInt64, finished_mutations_to_keep, 100, "How many records about mutations that are done to keep. If zero, then keep all of them.", 0) \ M(UInt64, min_merge_bytes_to_use_direct_io, 10ULL * 1024 * 1024 * 1024, "Minimal amount of bytes to enable O_DIRECT in merge (0 - disabled).", 0) \ M(UInt64, index_granularity_bytes, 10 * 1024 * 1024, "Approximate amount of bytes in single granule (0 - disabled).", 0) \ diff --git a/tests/queries/0_stateless/02710_allow_suspicious_indices.reference b/tests/queries/0_stateless/02710_allow_suspicious_indices.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02710_allow_suspicious_indices.sql b/tests/queries/0_stateless/02710_allow_suspicious_indices.sql new file mode 100644 index 00000000000..b521a14a1f1 --- /dev/null +++ b/tests/queries/0_stateless/02710_allow_suspicious_indices.sql @@ -0,0 +1,20 @@ +DROP TABLE IF EXISTS allow_suspicious_indices; +CREATE TABLE allow_suspicious_indices (id UInt32) ENGINE = MergeTree() order by (id, id) settings allow_suspicious_indices = 1; +DROP TABLE IF EXISTS allow_suspicious_indices; +CREATE TABLE allow_suspicious_indices (id UInt32) ENGINE = MergeTree() order by (id, id) settings allow_suspicious_indices = 0; -- { serverError BAD_ARGUMENTS } +DROP TABLE IF EXISTS allow_suspicious_indices; +CREATE TABLE allow_suspicious_indices (id UInt32, index idx (id, id) TYPE minmax GRANULARITY 1) ENGINE = MergeTree() order by id settings allow_suspicious_indices = 1; +DROP TABLE IF EXISTS allow_suspicious_indices; +CREATE TABLE allow_suspicious_indices (id UInt32, index idx (id, id) TYPE minmax) ENGINE = MergeTree() order by id settings allow_suspicious_indices = 1; +DROP TABLE IF EXISTS allow_suspicious_indices; +CREATE TABLE allow_suspicious_indices (id UInt32, index idx (id, id) TYPE minmax GRANULARITY 1) ENGINE = MergeTree() order by id settings allow_suspicious_indices = 0; -- { serverError BAD_ARGUMENTS } +DROP TABLE IF EXISTS allow_suspicious_indices; +CREATE TABLE allow_suspicious_indices (id UInt32, index idx (id, id) TYPE minmax) ENGINE = MergeTree() order by id settings allow_suspicious_indices = 0; -- { serverError BAD_ARGUMENTS } +DROP TABLE IF EXISTS allow_suspicious_indices; +CREATE TABLE allow_suspicious_indices (id UInt32) ENGINE = MergeTree() order by id + 1 settings allow_suspicious_indices = 0; +ALTER TABLE allow_suspicious_indices ADD COLUMN `id2` UInt32, MODIFY ORDER BY (id + 1, id2 + 1, id2 + 1); -- { serverError BAD_ARGUMENTS } +DROP TABLE IF EXISTS allow_suspicious_indices; +CREATE TABLE allow_suspicious_indices (id UInt32) ENGINE = MergeTree() order by id + 1 settings allow_suspicious_indices = 0; +ALTER TABLE allow_suspicious_indices ADD COLUMN `id2` UInt32, MODIFY ORDER BY (id + 1, id2 + 1, id + 1); -- { serverError BAD_ARGUMENTS } +DROP TABLE IF EXISTS allow_suspicious_indices; + From d1a14900ea5654941f2df83bc0d1adc18a7542a3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=87=8C=E6=B6=9B?= Date: Mon, 17 Apr 2023 20:26:31 +0800 Subject: [PATCH 23/71] remove unused h --- src/Storages/MergeTree/MergeTreeData.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 000a6242e57..44fca62291e 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -90,7 +90,6 @@ #include #include -#include #include #include From 61e552d660e8da2b3dcd5c55a7e1c4f4435a6c0a Mon Sep 17 00:00:00 2001 From: Dmitry Novik Date: Mon, 17 Apr 2023 15:23:02 +0000 Subject: [PATCH 24/71] Fix used build --- tests/ci/ci_config.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/ci/ci_config.py b/tests/ci/ci_config.py index 2f35b337cb3..811ae16a495 100644 --- a/tests/ci/ci_config.py +++ b/tests/ci/ci_config.py @@ -259,6 +259,9 @@ CI_CONFIG = { "Stateless tests (release, wide parts enabled)": { "required_build": "package_release", }, + "Stateless tests (release, analyzer)": { + "required_build": "package_release", + }, "Stateless tests (release, DatabaseOrdinary)": { "required_build": "package_release", }, From 6197d3d55fcfcaf60c3f0be5361b76d2e461a1d7 Mon Sep 17 00:00:00 2001 From: Dmitry Novik Date: Tue, 18 Apr 2023 00:12:30 +0000 Subject: [PATCH 25/71] Pass volume only for analyzer --- docker/test/stateless/run.sh | 6 +----- docker/test/util/process_functional_tests_result.py | 4 ++-- tests/ci/functional_test_check.py | 5 ++++- 3 files changed, 7 insertions(+), 8 deletions(-) diff --git a/docker/test/stateless/run.sh b/docker/test/stateless/run.sh index 6c8376b7676..e509809c028 100755 --- a/docker/test/stateless/run.sh +++ b/docker/test/stateless/run.sh @@ -156,11 +156,7 @@ ls -la ./ echo "Files in root directory" ls -la / -if [[ -n "$USE_NEW_ANALYZER" ]] && [[ "$USE_NEW_ANALYZER" -eq 1 ]]; then - /process_functional_tests_result.py --broken-tests=/broken_tests.txt || echo -e "failure\tCannot parse results" > /test_output/check_status.tsv -else - /process_functional_tests_result.py || echo -e "failure\tCannot parse results" > /test_output/check_status.tsv -fi +/process_functional_tests_result.py || echo -e "failure\tCannot parse results" > /test_output/check_status.tsv clickhouse-client -q "system flush logs" ||: diff --git a/docker/test/util/process_functional_tests_result.py b/docker/test/util/process_functional_tests_result.py index 365a1b22523..59bec585681 100755 --- a/docker/test/util/process_functional_tests_result.py +++ b/docker/test/util/process_functional_tests_result.py @@ -201,11 +201,11 @@ if __name__ == "__main__": parser.add_argument("--in-results-dir", default="/test_output/") parser.add_argument("--out-results-file", default="/test_output/test_results.tsv") parser.add_argument("--out-status-file", default="/test_output/check_status.tsv") - parser.add_argument("--broken-tests", default="") + parser.add_argument("--broken-tests", default="/broken_tests.txt") args = parser.parse_args() broken_tests = list() - if len(args.broken_tests) != 0: + if os.path.exists(args.broken_tests): with open(args.broken_tests) as f: broken_tests = f.readlines() diff --git a/tests/ci/functional_test_check.py b/tests/ci/functional_test_check.py index 3a9951c1cdb..ce8deb206f7 100644 --- a/tests/ci/functional_test_check.py +++ b/tests/ci/functional_test_check.py @@ -73,6 +73,7 @@ def get_image_name(check_name): def get_run_command( + check_name, builds_path, repo_tests_path, result_path, @@ -105,11 +106,12 @@ def get_run_command( envs += [f"-e {e}" for e in additional_envs] env_str = " ".join(envs) + volume_with_broken_test = "--volume={repo_tests_path}/broken_tests.txt:/broken_tests.txt" if "analyzer" in check_name else "" return ( f"docker run --volume={builds_path}:/package_folder " f"--volume={repo_tests_path}:/usr/share/clickhouse-test " - f"--volume={repo_tests_path}/broken_tests.txt:/broken_tests.txt " + f"{volume_with_broken_test} " f"--volume={result_path}:/test_output --volume={server_log_path}:/var/log/clickhouse-server " f"--cap-add=SYS_PTRACE {env_str} {additional_options_str} {image}" ) @@ -325,6 +327,7 @@ def main(): additional_envs.append("GLOBAL_TAGS=no-random-settings") run_command = get_run_command( + check_name, packages_path, repo_tests_path, result_path, From f6127a68acf4d613e5b8e7d4bcfd6170f78c20ac Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Tue, 18 Apr 2023 00:56:29 +0000 Subject: [PATCH 26/71] Automatic style fix --- tests/ci/functional_test_check.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tests/ci/functional_test_check.py b/tests/ci/functional_test_check.py index ce8deb206f7..044eeb6fb2c 100644 --- a/tests/ci/functional_test_check.py +++ b/tests/ci/functional_test_check.py @@ -106,7 +106,11 @@ def get_run_command( envs += [f"-e {e}" for e in additional_envs] env_str = " ".join(envs) - volume_with_broken_test = "--volume={repo_tests_path}/broken_tests.txt:/broken_tests.txt" if "analyzer" in check_name else "" + volume_with_broken_test = ( + "--volume={repo_tests_path}/broken_tests.txt:/broken_tests.txt" + if "analyzer" in check_name + else "" + ) return ( f"docker run --volume={builds_path}:/package_folder " From a975211185094978833e74930b0a0dab513ad148 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=87=8C=E6=B6=9B?= Date: Tue, 18 Apr 2023 10:26:35 +0800 Subject: [PATCH 27/71] add more tests --- src/Storages/MergeTree/MergeTreeData.cpp | 28 +++++++++---------- src/Storages/MergeTree/MergeTreeData.h | 2 -- .../02710_allow_suspicious_indices.sql | 10 ++++--- 3 files changed, 19 insertions(+), 21 deletions(-) diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 44fca62291e..49ad9f5493f 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -176,6 +176,19 @@ namespace ErrorCodes extern const int SOCKET_TIMEOUT; } +static void checkSuspiciousIndices(const ASTFunction & index_function) +{ + NameSet index_key_set; + for (const auto & child : index_function.arguments->children) + { + const auto & tree_hash = child->getTreeHash(); + const String key = toString(tree_hash.first); + + if (!index_key_set.emplace(key).second) + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "Primary key or secondary indices contains duplicate expressios"); + } +} static void checkSampleExpression(const StorageInMemoryMetadata & metadata, bool allow_sampling_expression_not_in_primary_key, bool check_sample_column_is_correct) { @@ -1000,21 +1013,6 @@ std::optional MergeTreeData::totalRowsByPartitionPredicateImpl( return res; } -void MergeTreeData::checkSuspiciousIndices(const ASTFunction & index_function) const -{ - NameSet index_key_set; - for (const auto & child : index_function.arguments->children) - { - const auto & tree_hash = child->getTreeHash(); - const String key = toString(tree_hash.first); - - if (!index_key_set.emplace(key).second) - throw Exception(ErrorCodes::BAD_ARGUMENTS, - "Primary key or secondary indices contains duplicate expressios"); - } -} - - String MergeTreeData::MergingParams::getModeName() const { switch (mode) diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h index b0fe631891f..8312efa216d 100644 --- a/src/Storages/MergeTree/MergeTreeData.h +++ b/src/Storages/MergeTree/MergeTreeData.h @@ -431,8 +431,6 @@ public: bool supportsLightweightDelete() const override; - void checkSuspiciousIndices(const ASTFunction & index_funtion) const; - NamesAndTypesList getVirtuals() const override; bool mayBenefitFromIndexForIn(const ASTPtr & left_in_operand, ContextPtr, const StorageMetadataPtr & metadata_snapshot) const override; diff --git a/tests/queries/0_stateless/02710_allow_suspicious_indices.sql b/tests/queries/0_stateless/02710_allow_suspicious_indices.sql index b521a14a1f1..57dd381202c 100644 --- a/tests/queries/0_stateless/02710_allow_suspicious_indices.sql +++ b/tests/queries/0_stateless/02710_allow_suspicious_indices.sql @@ -3,12 +3,8 @@ CREATE TABLE allow_suspicious_indices (id UInt32) ENGINE = MergeTree() order by DROP TABLE IF EXISTS allow_suspicious_indices; CREATE TABLE allow_suspicious_indices (id UInt32) ENGINE = MergeTree() order by (id, id) settings allow_suspicious_indices = 0; -- { serverError BAD_ARGUMENTS } DROP TABLE IF EXISTS allow_suspicious_indices; -CREATE TABLE allow_suspicious_indices (id UInt32, index idx (id, id) TYPE minmax GRANULARITY 1) ENGINE = MergeTree() order by id settings allow_suspicious_indices = 1; -DROP TABLE IF EXISTS allow_suspicious_indices; CREATE TABLE allow_suspicious_indices (id UInt32, index idx (id, id) TYPE minmax) ENGINE = MergeTree() order by id settings allow_suspicious_indices = 1; DROP TABLE IF EXISTS allow_suspicious_indices; -CREATE TABLE allow_suspicious_indices (id UInt32, index idx (id, id) TYPE minmax GRANULARITY 1) ENGINE = MergeTree() order by id settings allow_suspicious_indices = 0; -- { serverError BAD_ARGUMENTS } -DROP TABLE IF EXISTS allow_suspicious_indices; CREATE TABLE allow_suspicious_indices (id UInt32, index idx (id, id) TYPE minmax) ENGINE = MergeTree() order by id settings allow_suspicious_indices = 0; -- { serverError BAD_ARGUMENTS } DROP TABLE IF EXISTS allow_suspicious_indices; CREATE TABLE allow_suspicious_indices (id UInt32) ENGINE = MergeTree() order by id + 1 settings allow_suspicious_indices = 0; @@ -17,4 +13,10 @@ DROP TABLE IF EXISTS allow_suspicious_indices; CREATE TABLE allow_suspicious_indices (id UInt32) ENGINE = MergeTree() order by id + 1 settings allow_suspicious_indices = 0; ALTER TABLE allow_suspicious_indices ADD COLUMN `id2` UInt32, MODIFY ORDER BY (id + 1, id2 + 1, id + 1); -- { serverError BAD_ARGUMENTS } DROP TABLE IF EXISTS allow_suspicious_indices; +CREATE TABLE allow_suspicious_indices (id UInt32) ENGINE = MergeTree() order by id + 1 settings allow_suspicious_indices = 0; +ALTER TABLE allow_suspicious_indices ADD INDEX idx (id+1, id+1) type minmax; -- { serverError BAD_ARGUMENTS } +DROP TABLE IF EXISTS allow_suspicious_indices; +CREATE TABLE allow_suspicious_indices (id UInt32) ENGINE = MergeTree() order by id + 1 settings allow_suspicious_indices = 1; +ALTER TABLE allow_suspicious_indices ADD INDEX idx (id+1, id+1) type minmax; +DROP TABLE IF EXISTS allow_suspicious_indices; From 5557f4524cb971e09577d5b9756f01024c13d221 Mon Sep 17 00:00:00 2001 From: Dmitry Novik Date: Tue, 18 Apr 2023 08:44:46 +0000 Subject: [PATCH 28/71] Fix string formatting --- tests/ci/functional_test_check.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/ci/functional_test_check.py b/tests/ci/functional_test_check.py index 044eeb6fb2c..813386bc0db 100644 --- a/tests/ci/functional_test_check.py +++ b/tests/ci/functional_test_check.py @@ -107,7 +107,7 @@ def get_run_command( env_str = " ".join(envs) volume_with_broken_test = ( - "--volume={repo_tests_path}/broken_tests.txt:/broken_tests.txt" + f"--volume={repo_tests_path}/broken_tests.txt:/broken_tests.txt" if "analyzer" in check_name else "" ) From 308970da83c33cf4107ded167dcb29aa4131b20a Mon Sep 17 00:00:00 2001 From: Dmitry Novik Date: Tue, 18 Apr 2023 13:53:13 +0000 Subject: [PATCH 29/71] Add logging --- docker/test/util/process_functional_tests_result.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/docker/test/util/process_functional_tests_result.py b/docker/test/util/process_functional_tests_result.py index 59bec585681..53913b2fbe7 100755 --- a/docker/test/util/process_functional_tests_result.py +++ b/docker/test/util/process_functional_tests_result.py @@ -206,8 +206,10 @@ if __name__ == "__main__": broken_tests = list() if os.path.exists(args.broken_tests): + logging.info(f"File {args.broken_tests} with broken tests found") with open(args.broken_tests) as f: - broken_tests = f.readlines() + broken_tests = f.read().splitlines() + logging.info(f"Broken tests in the list: {len(broken_tests)}") state, description, test_results = process_result(args.in_results_dir, broken_tests) logging.info("Result parsed") From 544972bcda3089bf022ced6ff0c154e9b1888a4a Mon Sep 17 00:00:00 2001 From: pufit Date: Thu, 20 Apr 2023 18:31:35 -0400 Subject: [PATCH 30/71] Solve PR issues --- .../Access/InterpreterGrantQuery.cpp | 11 ++-- src/Parsers/Access/ParserGrantQuery.cpp | 65 ++++++++++--------- 2 files changed, 37 insertions(+), 39 deletions(-) diff --git a/src/Interpreters/Access/InterpreterGrantQuery.cpp b/src/Interpreters/Access/InterpreterGrantQuery.cpp index 0bce0b2a6bd..77474d68795 100644 --- a/src/Interpreters/Access/InterpreterGrantQuery.cpp +++ b/src/Interpreters/Access/InterpreterGrantQuery.cpp @@ -363,15 +363,12 @@ namespace { AccessRightsElements current_user_grantable_elements; auto available_grant_elements = current_user_access->getAccessRights()->getElements(); - std::copy_if( - available_grant_elements.begin(), - available_grant_elements.end(), - std::back_inserter(current_user_grantable_elements), - [](AccessRightsElement x) { return x.grant_option || x.is_partial_revoke; }); - AccessRights current_user_rights; - for (auto & element : current_user_grantable_elements) + for (auto & element : available_grant_elements) { + if (!element.grant_option && !element.is_partial_revoke) + continue; + if (element.is_partial_revoke) current_user_rights.revoke(element); else diff --git a/src/Parsers/Access/ParserGrantQuery.cpp b/src/Parsers/Access/ParserGrantQuery.cpp index 2ff110f136a..6869d665385 100644 --- a/src/Parsers/Access/ParserGrantQuery.cpp +++ b/src/Parsers/Access/ParserGrantQuery.cpp @@ -183,6 +183,37 @@ namespace }); } + bool parseCurrentGrants(IParser::Pos & pos, Expected & expected, AccessRightsElements & elements) + { + if (ParserToken(TokenType::OpeningRoundBracket).ignore(pos, expected)) + { + if (!parseElementsWithoutOptions(pos, expected, elements)) + return false; + + if (!ParserToken(TokenType::ClosingRoundBracket).ignore(pos, expected)) + return false; + } + else + { + AccessRightsElement default_element(AccessType::ALL); + + if (!ParserKeyword{"ON"}.ignore(pos, expected)) + return false; + + String database_name, table_name; + bool any_database = false, any_table = false; + if (!parseDatabaseAndTableNameOrAsterisks(pos, expected, database_name, any_database, table_name, any_table)) + return false; + + default_element.any_database = any_database; + default_element.database = database_name; + default_element.any_table = any_table; + default_element.table = table_name; + elements.push_back(std::move(default_element)); + } + + return true; + } void throwIfNotGrantable(AccessRightsElements & elements) { @@ -288,38 +319,8 @@ bool ParserGrantQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) if (!is_revoke && ParserKeyword{"CURRENT GRANTS"}.ignore(pos, expected)) { current_grants = true; - if (ParserToken(TokenType::OpeningRoundBracket).ignore(pos, expected)) - { - if (!parseElementsWithoutOptions(pos, expected, elements)) - return false; - - if (!ParserToken(TokenType::ClosingRoundBracket).ignore(pos, expected)) - return false; - - /// If no elements were specified it will grant all available for user grants. - /// Using `.size() == 0` because `.empty()` is overridden and returns true for NONE elements. - /// Specifically, this should handle `GRANT CURRENT GRANTS() ...` - if (elements.size() == 0) // NOLINT - elements.emplace_back(AccessType::ALL); - } - else - { - AccessRightsElement default_element(AccessType::ALL); - - if (!ParserKeyword{"ON"}.ignore(pos, expected)) - return false; - - String database_name, table_name; - bool any_database = false, any_table = false; - if (!parseDatabaseAndTableNameOrAsterisks(pos, expected, database_name, any_database, table_name, any_table)) - return false; - - default_element.any_database = any_database; - default_element.database = database_name; - default_element.any_table = any_table; - default_element.table = table_name; - elements.push_back(std::move(default_element)); - } + if (!parseCurrentGrants(pos, expected, elements)) + return false; } else { From de18bf72b4cd2b0fdc508b26e399e038a17ed703 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=87=8C=E6=B6=9B?= Date: Fri, 21 Apr 2023 17:27:26 +0800 Subject: [PATCH 31/71] modify code --- src/Core/Settings.h | 1 + src/Storages/MergeTree/MergeTreeData.cpp | 50 +++++++++++-------- src/Storages/MergeTree/MergeTreeData.h | 4 +- src/Storages/MergeTree/MergeTreeSettings.h | 2 +- src/Storages/StorageMergeTree.cpp | 2 +- .../02710_allow_suspicious_indices.sql | 12 +++-- tests/queries/0_stateless/lt.sql | 7 +++ 7 files changed, 50 insertions(+), 28 deletions(-) create mode 100644 tests/queries/0_stateless/lt.sql diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 88724b118ee..84719e79c4e 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -129,6 +129,7 @@ class IColumn; \ M(Bool, allow_suspicious_low_cardinality_types, false, "In CREATE TABLE statement allows specifying LowCardinality modifier for types of small fixed size (8 or less). Enabling this may increase merge times and memory consumption.", 0) \ M(Bool, allow_suspicious_fixed_string_types, false, "In CREATE TABLE statement allows creating columns of type FixedString(n) with n > 256. FixedString with length >= 256 is suspicious and most likely indicates misusage", 0) \ + M(Bool, allow_suspicious_indices, false, "Reject primary/secondary indexes and sorting keys with identical expressions", 0) \ M(Bool, compile_expressions, true, "Compile some scalar functions and operators to native code.", 0) \ M(UInt64, min_count_to_compile_expression, 3, "The number of identical expressions before they are JIT-compiled", 0) \ M(Bool, compile_aggregate_expressions, false, "Compile aggregate functions to native code. This feature has a bug and should not be used.", 0) \ diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 49ad9f5493f..47b0ea4701b 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -1,3 +1,4 @@ +#include "Interpreters/Context_fwd.h" #include "Storages/MergeTree/MergeTreeDataPartBuilder.h" #include @@ -80,6 +81,7 @@ #include #include +#include #include #include #include @@ -176,10 +178,10 @@ namespace ErrorCodes extern const int SOCKET_TIMEOUT; } -static void checkSuspiciousIndices(const ASTFunction & index_function) +static void checkSuspiciousIndices(const ASTFunction * index_function) { NameSet index_key_set; - for (const auto & child : index_function.arguments->children) + for (const auto & child : index_function->arguments->children) { const auto & tree_hash = child->getTreeHash(); const String key = toString(tree_hash.first); @@ -456,7 +458,10 @@ static void checkKeyExpression(const ExpressionActions & expr, const Block & sam } void MergeTreeData::checkProperties( - const StorageInMemoryMetadata & new_metadata, const StorageInMemoryMetadata & old_metadata, bool attach) const + const StorageInMemoryMetadata & new_metadata, + const StorageInMemoryMetadata & old_metadata, + bool attach, + ContextPtr local_context) const { if (!new_metadata.sorting_key.definition_ast) throw Exception(ErrorCodes::BAD_ARGUMENTS, "ORDER BY cannot be empty"); @@ -471,8 +476,18 @@ void MergeTreeData::checkProperties( "{} is greater than the sorting key length: {}", primary_key_size, sorting_key_size); NameSet primary_key_columns_set; + bool allow_suspicious_indices = getSettings()->allow_suspicious_indices; + if (local_context) + allow_suspicious_indices = local_context->getSettingsRef().allow_suspicious_indices ? true : allow_suspicious_indices; + + if (!allow_suspicious_indices && !attach) + { + if (const auto * index_function = typeid_cast(new_sorting_key.definition_ast.get())) + checkSuspiciousIndices(index_function); + } + for (size_t i = 0; i < sorting_key_size; ++i) { const String & sorting_key_column = new_sorting_key.column_names[i]; @@ -485,9 +500,6 @@ void MergeTreeData::checkProperties( "Primary key must be a prefix of the sorting key, " "but the column in the position {} is {}", i, sorting_key_column +", not " + pk_column); - if (!primary_key_columns_set.emplace(pk_column).second && !allow_suspicious_indices && !attach) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Primary key contains duplicate columns"); - } } @@ -551,7 +563,8 @@ void MergeTreeData::checkProperties( if (!allow_suspicious_indices && !attach) { const auto * index_ast = typeid_cast(index.definition_ast.get()); - checkSuspiciousIndices(index_ast->expr->as()); + if (const auto * index_function = typeid_cast(index_ast->expr)) + checkSuspiciousIndices(index_function); } MergeTreeIndexFactory::instance().validate(index, attach); @@ -579,9 +592,12 @@ void MergeTreeData::checkProperties( checkKeyExpression(*new_sorting_key.expression, new_sorting_key.sample_block, "Sorting", allow_nullable_key); } -void MergeTreeData::setProperties(const StorageInMemoryMetadata & new_metadata, const StorageInMemoryMetadata & old_metadata, bool attach) +void MergeTreeData::setProperties(const StorageInMemoryMetadata & new_metadata, + const StorageInMemoryMetadata & old_metadata, + bool attach, + const ContextPtr local_context) { - checkProperties(new_metadata, old_metadata, attach); + checkProperties(new_metadata, old_metadata, attach, local_context); setInMemoryMetadata(new_metadata); } @@ -2999,18 +3015,10 @@ void MergeTreeData::checkAlterIsPossible(const AlterCommands & commands, Context } } - if (command.type == AlterCommand::MODIFY_ORDER_BY) + if (command.type == AlterCommand::MODIFY_ORDER_BY && !is_custom_partitioned) { - bool allow_suspicious_indices = getSettings()->allow_suspicious_indices; - if (!allow_suspicious_indices) - { - const auto * alter_ast = command.ast->as(); - checkSuspiciousIndices(alter_ast->order_by->as()); - } - - if (!is_custom_partitioned) - throw Exception(ErrorCodes::BAD_ARGUMENTS, - "ALTER MODIFY ORDER BY is not supported for default-partitioned tables created with the old syntax"); + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "ALTER MODIFY ORDER BY is not supported for default-partitioned tables created with the old syntax"); } if (command.type == AlterCommand::MODIFY_TTL && !is_custom_partitioned) { @@ -3131,7 +3139,7 @@ void MergeTreeData::checkAlterIsPossible(const AlterCommands & commands, Context } } - checkProperties(new_metadata, old_metadata); + checkProperties(new_metadata, old_metadata, false, local_context); checkTTLExpressions(new_metadata, old_metadata); if (!columns_to_check_conversion.empty()) diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h index 8312efa216d..ceec8d25477 100644 --- a/src/Storages/MergeTree/MergeTreeData.h +++ b/src/Storages/MergeTree/MergeTreeData.h @@ -1218,9 +1218,9 @@ protected: /// The same for clearOldTemporaryDirectories. std::mutex clear_old_temporary_directories_mutex; - void checkProperties(const StorageInMemoryMetadata & new_metadata, const StorageInMemoryMetadata & old_metadata, bool attach = false) const; + void checkProperties(const StorageInMemoryMetadata & new_metadata, const StorageInMemoryMetadata & old_metadata, bool attach = false, ContextPtr local_context = nullptr) const; - void setProperties(const StorageInMemoryMetadata & new_metadata, const StorageInMemoryMetadata & old_metadata, bool attach = false); + void setProperties(const StorageInMemoryMetadata & new_metadata, const StorageInMemoryMetadata & old_metadata, bool attach = false, ContextPtr local_context = nullptr); void checkPartitionKeyAndInitMinMax(const KeyDescription & new_partition_key); diff --git a/src/Storages/MergeTree/MergeTreeSettings.h b/src/Storages/MergeTree/MergeTreeSettings.h index fd44f7ec5f5..56d0d6431a4 100644 --- a/src/Storages/MergeTree/MergeTreeSettings.h +++ b/src/Storages/MergeTree/MergeTreeSettings.h @@ -129,10 +129,10 @@ struct Settings; M(UInt64, vertical_merge_algorithm_min_columns_to_activate, 11, "Minimal amount of non-PK columns to activate Vertical merge algorithm.", 0) \ \ /** Compatibility settings */ \ + M(Bool, allow_suspicious_indices, false, "Reject primary/secondary indexes and sorting keys with identical expressions", 0) \ M(Bool, compatibility_allow_sampling_expression_not_in_primary_key, false, "Allow to create a table with sampling expression not in primary key. This is needed only to temporarily allow to run the server with wrong tables for backward compatibility.", 0) \ M(Bool, use_minimalistic_checksums_in_zookeeper, true, "Use small format (dozens bytes) for part checksums in ZooKeeper instead of ordinary ones (dozens KB). Before enabling check that all replicas support new format.", 0) \ M(Bool, use_minimalistic_part_header_in_zookeeper, true, "Store part header (checksums and columns) in a compact format and a single part znode instead of separate znodes (/columns and /checksums). This can dramatically reduce snapshot size in ZooKeeper. Before enabling check that all replicas support new format.", 0) \ - M(Bool, allow_suspicious_indices, false, "If true, index can be defined with identical expressions", 0) \ M(UInt64, finished_mutations_to_keep, 100, "How many records about mutations that are done to keep. If zero, then keep all of them.", 0) \ M(UInt64, min_merge_bytes_to_use_direct_io, 10ULL * 1024 * 1024 * 1024, "Minimal amount of bytes to enable O_DIRECT in merge (0 - disabled).", 0) \ M(UInt64, index_granularity_bytes, 10 * 1024 * 1024, "Approximate amount of bytes in single granule (0 - disabled).", 0) \ diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp index 71a826fbc22..e292f68e668 100644 --- a/src/Storages/StorageMergeTree.cpp +++ b/src/Storages/StorageMergeTree.cpp @@ -330,7 +330,7 @@ void StorageMergeTree::alter( changeSettings(new_metadata.settings_changes, table_lock_holder); checkTTLExpressions(new_metadata, old_metadata); /// Reinitialize primary key because primary key column types might have changed. - setProperties(new_metadata, old_metadata); + setProperties(new_metadata, old_metadata, false, local_context); DatabaseCatalog::instance().getDatabase(table_id.database_name)->alterTable(local_context, table_id, new_metadata); diff --git a/tests/queries/0_stateless/02710_allow_suspicious_indices.sql b/tests/queries/0_stateless/02710_allow_suspicious_indices.sql index 57dd381202c..38ec6ca5b4e 100644 --- a/tests/queries/0_stateless/02710_allow_suspicious_indices.sql +++ b/tests/queries/0_stateless/02710_allow_suspicious_indices.sql @@ -12,11 +12,17 @@ ALTER TABLE allow_suspicious_indices ADD COLUMN `id2` UInt32, MODIFY ORDER BY (i DROP TABLE IF EXISTS allow_suspicious_indices; CREATE TABLE allow_suspicious_indices (id UInt32) ENGINE = MergeTree() order by id + 1 settings allow_suspicious_indices = 0; ALTER TABLE allow_suspicious_indices ADD COLUMN `id2` UInt32, MODIFY ORDER BY (id + 1, id2 + 1, id + 1); -- { serverError BAD_ARGUMENTS } -DROP TABLE IF EXISTS allow_suspicious_indices; +DROP TABLE IF EXISTS allow_suspicious_indices; CREATE TABLE allow_suspicious_indices (id UInt32) ENGINE = MergeTree() order by id + 1 settings allow_suspicious_indices = 0; ALTER TABLE allow_suspicious_indices ADD INDEX idx (id+1, id+1) type minmax; -- { serverError BAD_ARGUMENTS } -DROP TABLE IF EXISTS allow_suspicious_indices; +DROP TABLE IF EXISTS allow_suspicious_indices; CREATE TABLE allow_suspicious_indices (id UInt32) ENGINE = MergeTree() order by id + 1 settings allow_suspicious_indices = 1; ALTER TABLE allow_suspicious_indices ADD INDEX idx (id+1, id+1) type minmax; -DROP TABLE IF EXISTS allow_suspicious_indices; +DROP TABLE IF EXISTS allow_suspicious_indices; +SET allow_suspicious_indices = 1; +CREATE TABLE allow_suspicious_indices (id UInt32) ENGINE = MergeTree() order by (id); +ALTER TABLE allow_suspicious_indices ADD COLUMN `id2` UInt32, MODIFY ORDER BY (id, id2 + 1, id2 + 1); +DROP TABLE IF EXISTS allow_suspicious_indices; +CREATE TABLE allow_suspicious_indices (id UInt32) ENGINE = MergeTree() order by (id); +ALTER TABLE allow_suspicious_indices ADD INDEX idx (id+1, id+1) type minmax; diff --git a/tests/queries/0_stateless/lt.sql b/tests/queries/0_stateless/lt.sql new file mode 100644 index 00000000000..2753c1a3691 --- /dev/null +++ b/tests/queries/0_stateless/lt.sql @@ -0,0 +1,7 @@ +SET allow_suspicious_indices = 1; +DROP TABLE IF EXISTS allow_suspicious_indices; +CREATE TABLE allow_suspicious_indices (id UInt32) ENGINE = MergeTree() order by (id); +ALTER TABLE allow_suspicious_indices ADD COLUMN `id2` UInt32, MODIFY ORDER BY (id, id2 + 1, id2 + 1); +DROP TABLE IF EXISTS allow_suspicious_indices; +CREATE TABLE allow_suspicious_indices (id UInt32) ENGINE = MergeTree() order by (id); +ALTER TABLE allow_suspicious_indices ADD INDEX idx (id+1, id+1) type minmax; From 797d5bbfe799f35608d6234311230b8f11c4319d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=87=8C=E6=B6=9B?= Date: Fri, 21 Apr 2023 18:08:14 +0800 Subject: [PATCH 32/71] modify code --- src/Storages/MergeTree/MergeTreeData.cpp | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 47b0ea4701b..97b58b71cbf 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -1,4 +1,3 @@ -#include "Interpreters/Context_fwd.h" #include "Storages/MergeTree/MergeTreeDataPartBuilder.h" #include @@ -43,6 +42,7 @@ #include #include #include +#include #include #include #include @@ -459,8 +459,8 @@ static void checkKeyExpression(const ExpressionActions & expr, const Block & sam void MergeTreeData::checkProperties( const StorageInMemoryMetadata & new_metadata, - const StorageInMemoryMetadata & old_metadata, - bool attach, + const StorageInMemoryMetadata & old_metadata, + bool attach, ContextPtr local_context) const { if (!new_metadata.sorting_key.definition_ast) @@ -592,10 +592,10 @@ void MergeTreeData::checkProperties( checkKeyExpression(*new_sorting_key.expression, new_sorting_key.sample_block, "Sorting", allow_nullable_key); } -void MergeTreeData::setProperties(const StorageInMemoryMetadata & new_metadata, - const StorageInMemoryMetadata & old_metadata, - bool attach, - const ContextPtr local_context) +void MergeTreeData::setProperties(const StorageInMemoryMetadata & new_metadata, + const StorageInMemoryMetadata & old_metadata, + bool attach, + const ContextPtr local_context) { checkProperties(new_metadata, old_metadata, attach, local_context); setInMemoryMetadata(new_metadata); From 5fef967872c172751164951e07bb472084bbe22c Mon Sep 17 00:00:00 2001 From: Dmitry Novik Date: Fri, 21 Apr 2023 18:11:54 +0000 Subject: [PATCH 33/71] Make error message better --- docker/test/util/process_functional_tests_result.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/test/util/process_functional_tests_result.py b/docker/test/util/process_functional_tests_result.py index 53913b2fbe7..e0f2fbddd2a 100755 --- a/docker/test/util/process_functional_tests_result.py +++ b/docker/test/util/process_functional_tests_result.py @@ -82,7 +82,7 @@ def process_test_log(log_path, broken_tests): test_name, "FAIL", test_time, - ["Test is expected to fail!\n"], + ["Test is expected to fail! Please, update broken_tests.txt!\n"], ) ) else: From e7e080b4a2639d8d930bcae6b34e3dd6bd38471f Mon Sep 17 00:00:00 2001 From: Dmitry Novik Date: Fri, 21 Apr 2023 18:12:49 +0000 Subject: [PATCH 34/71] Update broken_tests.txt --- tests/broken_tests.txt | 48 ------------------------------------------ 1 file changed, 48 deletions(-) diff --git a/tests/broken_tests.txt b/tests/broken_tests.txt index cca1c556a98..1441650f5fd 100644 --- a/tests/broken_tests.txt +++ b/tests/broken_tests.txt @@ -8,14 +8,11 @@ 00725_memory_tracking 00754_distributed_optimize_skip_select_on_unused_shards 00754_distributed_optimize_skip_select_on_unused_shards_with_prewhere -00814_replicated_minimalistic_part_header_zookeeper 00838_unique_index -00897_flatten 00927_asof_joins 00940_order_by_read_in_order_query_plan 00945_bloom_filter_index 00952_input_function -00953_zookeeper_suetin_deduplication_bug 00979_set_index_not 00981_in_subquery_with_tuple 01049_join_low_card_bug_long @@ -42,9 +39,6 @@ 01270_optimize_skip_unused_shards_low_cardinality 01319_optimize_skip_unused_shards_nesting 01353_low_cardinality_join_types -01357_version_collapsing_attach_detach_zookeeper -01396_inactive_replica_cleanup_nodes_zookeeper -01414_mutations_and_errors_zookeeper 01428_nullable_asof_join 01455_shard_leaf_max_rows_bytes_to_read 01476_right_full_join_switch @@ -52,70 +46,43 @@ 01487_distributed_in_not_default_db 01495_subqueries_in_with_statement 01504_rocksdb -01505_trivial_count_with_partition_predicate 01527_dist_sharding_key_dictGet_reload 01528_allow_nondeterministic_optimize_skip_unused_shards 01540_verbatim_partition_pruning 01560_merge_distributed_join 01563_distributed_query_finish 01576_alias_column_rewrite -01582_distinct_optimization 01583_const_column_in_set_index 01584_distributed_buffer_cannot_find_column 01585_use_index_for_global_in 01585_use_index_for_global_in_with_null 01586_columns_pruning -01586_replicated_mutations_empty_partition -01593_concurrent_alter_mutations_kill_many_replicas_long -01600_detach_permanently 01615_random_one_shard_insertion -01622_constraints_simple_optimization 01624_soft_constraints -01650_drop_part_and_deduplication_zookeeper_long 01651_bugs_from_15889 01655_plan_optimizations 01655_plan_optimizations_optimize_read_in_window_order 01656_test_query_log_factories_info -01674_filter_by_uint8 01681_bloom_filter_nullable_column 01700_system_zookeeper_path_in -01705_normalize_create_alter_function_names -01710_aggregate_projections -01710_force_use_projection -01710_minmax_count_projection -01710_normal_projection_fix1 01710_projection_additional_filters -01710_projection_aggregation_in_order -01710_projection_in_index -01710_projection_vertical_merges -01710_projection_with_joins -01710_projections -01710_projections_in_distributed_query -01710_projections_optimize_aggregation_in_order -01710_projections_partial_optimize_aggregation_in_order 01721_join_implicit_cast_long -01737_move_order_key_to_prewhere_select_final 01739_index_hint 01747_join_view_filter_dictionary 01748_partition_id_pruning -01753_system_zookeeper_query_param_path_long 01756_optimize_skip_unused_shards_rewrite_in 01757_optimize_skip_unused_shards_limit 01758_optimize_skip_unused_shards_once 01759_optimize_skip_unused_shards_zero_shards 01761_cast_to_enum_nullable 01786_explain_merge_tree -01848_partition_value_column 01889_key_condition_function_chains -01889_sqlite_read_write 01890_materialized_distributed_join 01901_in_literal_shard_prune 01925_join_materialized_columns -01925_test_group_by_const_consistency 01925_test_storage_merge_aliases 01930_optimize_skip_unused_shards_rewrite_in 01947_mv_subquery -01949_clickhouse_local_with_remote_localhost 01951_distributed_push_down_limit 01952_optimize_distributed_group_by_sharding_key 02000_join_on_const @@ -123,21 +90,15 @@ 02024_join_on_or_long 02131_used_row_policies_in_query_log 02139_MV_with_scalar_subquery -02149_read_in_order_fixed_prefix 02174_cte_scalar_cache_mv -02221_system_zookeeper_unrestricted_like 02242_join_rocksdb 02267_join_dup_columns_issue36199 02302_s3_file_pruning 02317_distinct_in_order_optimization_explain -02336_sort_optimization_with_fill 02341_global_join_cte 02343_aggregation_pipeline -02343_group_by_use_nulls_distributed 02345_implicit_transaction -02346_additional_filters 02346_additional_filters_distr -02346_additional_filters_index 02352_grouby_shadows_arg 02354_annoy 02366_union_decimal_conversion @@ -151,20 +112,11 @@ 02428_decimal_in_floating_point_literal 02428_parameterized_view 02458_use_structure_from_insertion_table -02479_mysql_connect_to_self 02479_race_condition_between_insert_and_droppin_mv -02481_aggregation_in_order_plan 02481_merge_array_join_sample_by -02483_check_virtuals_shile_using_structure_from_insertion_table 02493_inconsistent_hex_and_binary_number 02494_optimize_group_by_function_keys_and_alias_columns -02494_zero_copy_and_projection_and_mutation_work_together -02496_remove_redundant_sorting -02500_remove_redundant_distinct 02511_complex_literals_as_aggregate_function_parameters -02515_cleanup_async_insert_block_ids -02516_projections_and_context 02521_aggregation_by_partitions -02535_max_parallel_replicas_custom_key 02554_fix_grouping_sets_predicate_push_down 02575_merge_prewhere_different_default_kind \ No newline at end of file From eda137918007dc893f140946db7cee203c2cad0d Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Fri, 21 Apr 2023 19:04:36 +0000 Subject: [PATCH 35/71] Automatic style fix --- docker/test/util/process_functional_tests_result.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/docker/test/util/process_functional_tests_result.py b/docker/test/util/process_functional_tests_result.py index e0f2fbddd2a..3c1c6e2a795 100755 --- a/docker/test/util/process_functional_tests_result.py +++ b/docker/test/util/process_functional_tests_result.py @@ -82,7 +82,9 @@ def process_test_log(log_path, broken_tests): test_name, "FAIL", test_time, - ["Test is expected to fail! Please, update broken_tests.txt!\n"], + [ + "Test is expected to fail! Please, update broken_tests.txt!\n" + ], ) ) else: From 18672c2d4bf464042d9481e697205ac0d4324c8e Mon Sep 17 00:00:00 2001 From: flynn Date: Sat, 22 Apr 2023 14:17:31 +0000 Subject: [PATCH 36/71] bitCount support FixedString data type --- src/Functions/FunctionUnaryArithmetic.h | 88 +++++++++++++++++-- src/Functions/bitAnd.cpp | 4 +- src/Functions/bitCount.cpp | 4 +- src/Functions/bitHammingDistance.cpp | 4 +- src/Functions/bitNot.cpp | 4 +- src/Functions/bitOr.cpp | 4 +- src/Functions/factorial.cpp | 1 - .../0_stateless/01066_bit_count.reference | 1 + tests/queries/0_stateless/01066_bit_count.sql | 2 + 9 files changed, 93 insertions(+), 19 deletions(-) diff --git a/src/Functions/FunctionUnaryArithmetic.h b/src/Functions/FunctionUnaryArithmetic.h index 2e4f8abd323..78dd20da75b 100644 --- a/src/Functions/FunctionUnaryArithmetic.h +++ b/src/Functions/FunctionUnaryArithmetic.h @@ -130,6 +130,48 @@ struct FixedStringUnaryOperationImpl } }; +template +struct FixedStringUnaryOperationReduceImpl +{ + MULTITARGET_FUNCTION_AVX512BW_AVX512F_AVX2_SSE42( + MULTITARGET_FUNCTION_HEADER(static UInt64 NO_INLINE), + vectorImpl, + MULTITARGET_FUNCTION_BODY((const UInt8 * start, const UInt8 * end) { /// NOLINT + UInt64 res = 0; + while (start < end) + res += Op::apply(*start++); + return res; + })) + + static UInt64 NO_INLINE vector(const UInt8 * start, const UInt8 * end) + { +#if USE_MULTITARGET_CODE + if (isArchSupported(TargetArch::AVX512BW)) + { + return vectorImplAVX512BW(start, end); + } + + if (isArchSupported(TargetArch::AVX512F)) + { + return vectorImplAVX512F(start, end); + } + + if (isArchSupported(TargetArch::AVX2)) + { + return vectorImplAVX2(start, end); + } + + if (isArchSupported(TargetArch::SSE42)) + { + return vectorImplSSE42(start, end); + } +#endif + + return vectorImpl(start, end); + } +}; + + template struct FunctionUnaryArithmeticMonotonicity; @@ -143,6 +185,7 @@ class FunctionUnaryArithmetic : public IFunction { static constexpr bool allow_decimal = IsUnaryOperation::negate || IsUnaryOperation::abs || IsUnaryOperation::sign; static constexpr bool allow_fixed_string = Op::allow_fixed_string; + static constexpr bool reduce_fixed_string_for_chars = allow_fixed_string && Op::reduce_fixed_string_for_chars; static constexpr bool is_sign_function = IsUnaryOperation::sign; ContextPtr context; @@ -232,9 +275,18 @@ public: using DataType = std::decay_t; if constexpr (std::is_same_v) { - if constexpr (!Op::allow_fixed_string) + if constexpr (!allow_fixed_string) return false; - result = std::make_shared(type.getN()); + /// For `bitCount`, when argument is FixedString, it's return type + /// should be integer instead of FixedString, the return value is + /// the sum of `bitCount` apply to each chars. + else + { + if constexpr (reduce_fixed_string_for_chars) + result = std::make_shared(); + else + result = std::make_shared(type.getN()); + } } else if constexpr (std::is_same_v) { @@ -282,12 +334,32 @@ public: { if (const auto * col = checkAndGetColumn(arguments[0].column.get())) { - auto col_res = ColumnFixedString::create(col->getN()); - auto & vec_res = col_res->getChars(); - vec_res.resize(col->size() * col->getN()); - FixedStringUnaryOperationImpl>::vector(col->getChars(), vec_res); - result_column = std::move(col_res); - return true; + if constexpr (reduce_fixed_string_for_chars) + { + auto size = col->size(); + + auto col_res = ColumnUInt64::create(size); + auto & vec_res = col_res->getData(); + + const auto & chars = col->getChars(); + auto n = col->getN(); + for (size_t i = 0; i < size; ++i) + { + vec_res[i] = FixedStringUnaryOperationReduceImpl>::vector( + chars.data() + n * i, chars.data() + n * (i + 1)); + } + result_column = std::move(col_res); + return true; + } + else + { + auto col_res = ColumnFixedString::create(col->getN()); + auto & vec_res = col_res->getChars(); + vec_res.resize(col->size() * col->getN()); + FixedStringUnaryOperationImpl>::vector(col->getChars(), vec_res); + result_column = std::move(col_res); + return true; + } } } } diff --git a/src/Functions/bitAnd.cpp b/src/Functions/bitAnd.cpp index 28f61ec66e1..8efc5181919 100644 --- a/src/Functions/bitAnd.cpp +++ b/src/Functions/bitAnd.cpp @@ -16,8 +16,8 @@ template struct BitAndImpl { using ResultType = typename NumberTraits::ResultOfBit::Type; - static constexpr const bool allow_fixed_string = true; - static const constexpr bool allow_string_integer = false; + static constexpr bool allow_fixed_string = true; + static constexpr bool allow_string_integer = false; template static inline Result apply(A a, B b) diff --git a/src/Functions/bitCount.cpp b/src/Functions/bitCount.cpp index d425dd1dca2..2891a05ea0a 100644 --- a/src/Functions/bitCount.cpp +++ b/src/Functions/bitCount.cpp @@ -13,8 +13,8 @@ template struct BitCountImpl { using ResultType = UInt8; - static constexpr bool allow_fixed_string = false; - static const constexpr bool allow_string_integer = false; + static constexpr bool allow_fixed_string = true; + static constexpr bool reduce_fixed_string_for_chars = true; static inline ResultType apply(A a) { diff --git a/src/Functions/bitHammingDistance.cpp b/src/Functions/bitHammingDistance.cpp index 75928c2a8af..2eaa397dd04 100644 --- a/src/Functions/bitHammingDistance.cpp +++ b/src/Functions/bitHammingDistance.cpp @@ -8,8 +8,8 @@ template struct BitHammingDistanceImpl { using ResultType = UInt8; - static const constexpr bool allow_fixed_string = false; - static const constexpr bool allow_string_integer = false; + static constexpr bool allow_fixed_string = true; + static constexpr bool allow_string_integer = false; template static inline NO_SANITIZE_UNDEFINED Result apply(A a, B b) diff --git a/src/Functions/bitNot.cpp b/src/Functions/bitNot.cpp index f8bfad64494..971303d7342 100644 --- a/src/Functions/bitNot.cpp +++ b/src/Functions/bitNot.cpp @@ -17,8 +17,8 @@ template struct BitNotImpl { using ResultType = typename NumberTraits::ResultOfBitNot::Type; - static const constexpr bool allow_fixed_string = true; - static const constexpr bool allow_string_integer = false; + static constexpr bool allow_fixed_string = true; + static constexpr bool reduce_fixed_string_for_chars = false; static inline ResultType apply(A a) { diff --git a/src/Functions/bitOr.cpp b/src/Functions/bitOr.cpp index acdad33f38c..9e19fc55219 100644 --- a/src/Functions/bitOr.cpp +++ b/src/Functions/bitOr.cpp @@ -15,8 +15,8 @@ template struct BitOrImpl { using ResultType = typename NumberTraits::ResultOfBit::Type; - static constexpr const bool allow_fixed_string = true; - static const constexpr bool allow_string_integer = false; + static constexpr bool allow_fixed_string = true; + static constexpr bool allow_string_integer = false; template static inline Result apply(A a, B b) diff --git a/src/Functions/factorial.cpp b/src/Functions/factorial.cpp index 4e96391bccd..0634e5f1721 100644 --- a/src/Functions/factorial.cpp +++ b/src/Functions/factorial.cpp @@ -18,7 +18,6 @@ struct FactorialImpl using ResultType = UInt64; static const constexpr bool allow_decimal = false; static const constexpr bool allow_fixed_string = false; - static const constexpr bool allow_string_integer = false; static inline NO_SANITIZE_UNDEFINED ResultType apply(A a) { diff --git a/tests/queries/0_stateless/01066_bit_count.reference b/tests/queries/0_stateless/01066_bit_count.reference index 4a3b084b4a2..136387f6412 100644 --- a/tests/queries/0_stateless/01066_bit_count.reference +++ b/tests/queries/0_stateless/01066_bit_count.reference @@ -19,3 +19,4 @@ 1 10 000000000000F03F -1 11 000000000000F0BF inf 11 000000000000F07F +Hello, world!!!! 55 diff --git a/tests/queries/0_stateless/01066_bit_count.sql b/tests/queries/0_stateless/01066_bit_count.sql index d50b2657542..855885fb1e1 100644 --- a/tests/queries/0_stateless/01066_bit_count.sql +++ b/tests/queries/0_stateless/01066_bit_count.sql @@ -11,3 +11,5 @@ SELECT bitCount(toInt16(-1)); SELECT bitCount(toInt8(-1)); SELECT x, bitCount(x), hex(reinterpretAsString(x)) FROM VALUES ('x Float64', (1), (-1), (inf)); + +SELECT toFixedString('Hello, world!!!!', 16) AS x, bitCount(x); From 9842b1a977d8047c94b37a17ea244f57ef1cd9c3 Mon Sep 17 00:00:00 2001 From: flynn Date: Sat, 22 Apr 2023 14:20:45 +0000 Subject: [PATCH 37/71] revert --- src/Functions/bitHammingDistance.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Functions/bitHammingDistance.cpp b/src/Functions/bitHammingDistance.cpp index 2eaa397dd04..040f45eaedd 100644 --- a/src/Functions/bitHammingDistance.cpp +++ b/src/Functions/bitHammingDistance.cpp @@ -8,7 +8,7 @@ template struct BitHammingDistanceImpl { using ResultType = UInt8; - static constexpr bool allow_fixed_string = true; + static constexpr bool allow_fixed_string = false; static constexpr bool allow_string_integer = false; template From a6269bff5784ef255baa596c9cb0f9d1d0d19066 Mon Sep 17 00:00:00 2001 From: flynn Date: Sat, 22 Apr 2023 14:45:38 +0000 Subject: [PATCH 38/71] fix build --- src/Functions/FunctionUnaryArithmetic.h | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/src/Functions/FunctionUnaryArithmetic.h b/src/Functions/FunctionUnaryArithmetic.h index 78dd20da75b..43313ce1673 100644 --- a/src/Functions/FunctionUnaryArithmetic.h +++ b/src/Functions/FunctionUnaryArithmetic.h @@ -1,15 +1,16 @@ #pragma once -#include -#include -#include -#include -#include -#include #include #include -#include +#include +#include +#include +#include +#include +#include +#include #include +#include #include #include From f4fe64335d7015fbd43fa462cfd92c0e41bb4bde Mon Sep 17 00:00:00 2001 From: flynn Date: Sat, 22 Apr 2023 17:21:43 +0000 Subject: [PATCH 39/71] support string --- src/Functions/FunctionUnaryArithmetic.h | 81 +++++++++++++++++++++---- src/Functions/IsOperation.h | 2 + src/Functions/abs.cpp | 3 +- src/Functions/bitCount.cpp | 3 +- src/Functions/bitNot.cpp | 3 +- src/Functions/bitSwapLastTwo.cpp | 3 +- src/Functions/bitWrapperFunc.cpp | 3 +- src/Functions/factorial.cpp | 2 +- src/Functions/intExp10.cpp | 3 +- src/Functions/intExp2.cpp | 3 +- src/Functions/negate.cpp | 3 +- src/Functions/roundAge.cpp | 3 +- src/Functions/roundDuration.cpp | 3 +- src/Functions/roundToExp2.cpp | 3 +- src/Functions/sign.cpp | 3 +- 15 files changed, 84 insertions(+), 37 deletions(-) diff --git a/src/Functions/FunctionUnaryArithmetic.h b/src/Functions/FunctionUnaryArithmetic.h index 43313ce1673..c104a4c52f3 100644 --- a/src/Functions/FunctionUnaryArithmetic.h +++ b/src/Functions/FunctionUnaryArithmetic.h @@ -2,10 +2,12 @@ #include #include +#include #include #include #include #include +#include #include #include #include @@ -31,7 +33,6 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; } - template struct UnaryOperationImpl { @@ -172,8 +173,6 @@ struct FixedStringUnaryOperationReduceImpl } }; - - template struct FunctionUnaryArithmeticMonotonicity; @@ -185,8 +184,8 @@ template