diff --git a/contrib/rocksdb b/contrib/rocksdb index 07c77549a20..6ff0adefdc8 160000 --- a/contrib/rocksdb +++ b/contrib/rocksdb @@ -1 +1 @@ -Subproject commit 07c77549a20b63ff6981b400085eba36bb5c80c4 +Subproject commit 6ff0adefdc84dac44e78804f7ca4122fe992cf8d diff --git a/docs/en/engines/table-engines/mergetree-family/mergetree.md b/docs/en/engines/table-engines/mergetree-family/mergetree.md index 9d259456ea5..f0cdd75f90d 100644 --- a/docs/en/engines/table-engines/mergetree-family/mergetree.md +++ b/docs/en/engines/table-engines/mergetree-family/mergetree.md @@ -76,7 +76,7 @@ For a description of parameters, see the [CREATE query description](../../../sql - `SAMPLE BY` — An expression for sampling. Optional. - If a sampling expression is used, the primary key must contain it. Example: `SAMPLE BY intHash32(UserID) ORDER BY (CounterID, EventDate, intHash32(UserID))`. + If a sampling expression is used, the primary key must contain it. The result of sampling expression must be unsigned integer. Example: `SAMPLE BY intHash32(UserID) ORDER BY (CounterID, EventDate, intHash32(UserID))`. - `TTL` — A list of rules specifying storage duration of rows and defining logic of automatic parts movement [between disks and volumes](#table_engine-mergetree-multiple-volumes). Optional. diff --git a/docs/en/interfaces/formats.md b/docs/en/interfaces/formats.md index f32b4b26c8b..015afd1cd24 100644 --- a/docs/en/interfaces/formats.md +++ b/docs/en/interfaces/formats.md @@ -1130,17 +1130,18 @@ The table below shows supported data types and how they match ClickHouse [data t | `boolean`, `int`, `long`, `float`, `double` | [Int64](../sql-reference/data-types/int-uint.md), [UInt64](../sql-reference/data-types/int-uint.md) | `long` | | `boolean`, `int`, `long`, `float`, `double` | [Float32](../sql-reference/data-types/float.md) | `float` | | `boolean`, `int`, `long`, `float`, `double` | [Float64](../sql-reference/data-types/float.md) | `double` | -| `bytes`, `string`, `fixed`, `enum` | [String](../sql-reference/data-types/string.md) | `bytes` | +| `bytes`, `string`, `fixed`, `enum` | [String](../sql-reference/data-types/string.md) | `bytes` or `string` \* | | `bytes`, `string`, `fixed` | [FixedString(N)](../sql-reference/data-types/fixedstring.md) | `fixed(N)` | | `enum` | [Enum(8\|16)](../sql-reference/data-types/enum.md) | `enum` | | `array(T)` | [Array(T)](../sql-reference/data-types/array.md) | `array(T)` | | `union(null, T)`, `union(T, null)` | [Nullable(T)](../sql-reference/data-types/date.md) | `union(null, T)` | | `null` | [Nullable(Nothing)](../sql-reference/data-types/special-data-types/nothing.md) | `null` | -| `int (date)` \* | [Date](../sql-reference/data-types/date.md) | `int (date)` \* | -| `long (timestamp-millis)` \* | [DateTime64(3)](../sql-reference/data-types/datetime.md) | `long (timestamp-millis)` \* | -| `long (timestamp-micros)` \* | [DateTime64(6)](../sql-reference/data-types/datetime.md) | `long (timestamp-micros)` \* | +| `int (date)` \** | [Date](../sql-reference/data-types/date.md) | `int (date)` \** | +| `long (timestamp-millis)` \** | [DateTime64(3)](../sql-reference/data-types/datetime.md) | `long (timestamp-millis)` \* | +| `long (timestamp-micros)` \** | [DateTime64(6)](../sql-reference/data-types/datetime.md) | `long (timestamp-micros)` \* | -\* [Avro logical types](https://avro.apache.org/docs/current/spec.html#Logical+Types) +\* `bytes` is default, controlled by [output_format_avro_string_column_pattern](../operations/settings/settings.md#settings-output_format_avro_string_column_pattern) +\** [Avro logical types](https://avro.apache.org/docs/current/spec.html#Logical+Types) Unsupported Avro data types: `record` (non-root), `map` diff --git a/docs/en/operations/settings/merge-tree-settings.md b/docs/en/operations/settings/merge-tree-settings.md index 791ac344bcf..9ace094a4d8 100644 --- a/docs/en/operations/settings/merge-tree-settings.md +++ b/docs/en/operations/settings/merge-tree-settings.md @@ -278,4 +278,16 @@ Possible values: Default value: `0`. +## check_sample_column_is_correct {#check_sample_column_is_correct} + +Enables to check column for sampling or sampling expression is correct at table creation. + +Possible values: + +- true — Check column or sampling expression is correct at table creation. +- false — Do not check column or sampling expression is correct at table creation. + +Default value: `true`. + +By default, the ClickHouse server check column for sampling or sampling expression at table creation. If you already had tables with incorrect sampling expression, set value `false` to make ClickHouse server do not raise exception when ClickHouse server is starting. [Original article](https://clickhouse.tech/docs/en/operations/settings/merge_tree_settings/) diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index 1441e90b33f..5042aeae162 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -1738,7 +1738,7 @@ Default value: 0. ## optimize_functions_to_subcolumns {#optimize-functions-to-subcolumns} -Enables or disables optimization by transforming some functions to reading subcolumns. This reduces the amount of data to read. +Enables or disables optimization by transforming some functions to reading subcolumns. This reduces the amount of data to read. These functions can be transformed: @@ -1969,6 +1969,13 @@ Possible values: 32 (32 bytes) - 1073741824 (1 GiB) Default value: 32768 (32 KiB) +## output_format_avro_string_column_pattern {#output_format_avro_string_column_pattern} + +Regexp of column names of type String to output as Avro `string` (default is `bytes`). +RE2 syntax is supported. + +Type: string + ## format_avro_schema_registry_url {#format_avro_schema_registry_url} Sets [Confluent Schema Registry](https://docs.confluent.io/current/schema-registry/index.html) URL to use with [AvroConfluent](../../interfaces/formats.md#data-format-avro-confluent) format. diff --git a/docs/zh/engines/database-engines/atomic.md b/docs/zh/engines/database-engines/atomic.md index f019b94a00b..73e044b5e98 100644 --- a/docs/zh/engines/database-engines/atomic.md +++ b/docs/zh/engines/database-engines/atomic.md @@ -6,12 +6,12 @@ toc_title: Atomic # Atomic {#atomic} -It is supports non-blocking `DROP` and `RENAME TABLE` queries and atomic `EXCHANGE TABLES t1 AND t2` queries. Atomic database engine is used by default. +它支持非阻塞 DROP 和 RENAME TABLE 查询以及原子 EXCHANGE TABLES t1 AND t2 查询。默认情况下使用Atomic数据库引擎。 -## Creating a Database {#creating-a-database} +## 创建数据库 {#creating-a-database} ```sql CREATE DATABASE test ENGINE = Atomic; ``` -[Original article](https://clickhouse.tech/docs/en/engines/database_engines/atomic/) +[原文](https://clickhouse.tech/docs/en/engines/database_engines/atomic/) diff --git a/docs/zh/engines/table-engines/mergetree-family/collapsingmergetree.md b/docs/zh/engines/table-engines/mergetree-family/collapsingmergetree.md index 6d1dfac7686..6fb57dc19d9 100644 --- a/docs/zh/engines/table-engines/mergetree-family/collapsingmergetree.md +++ b/docs/zh/engines/table-engines/mergetree-family/collapsingmergetree.md @@ -1,4 +1,4 @@ -# 折叠树 {#table_engine-collapsingmergetree} +# CollapsingMergeTree {#table_engine-collapsingmergetree} 该引擎继承于 [MergeTree](mergetree.md),并在数据块合并算法中添加了折叠行的逻辑。 @@ -203,4 +203,4 @@ SELECT * FROM UAct FINAL 这种查询数据的方法是非常低效的。不要在大表中使用它。 -[来源文章](https://clickhouse.tech/docs/en/operations/table_engines/collapsingmergetree/) +[原文](https://clickhouse.tech/docs/en/operations/table_engines/collapsingmergetree/) diff --git a/docs/zh/engines/table-engines/mergetree-family/versionedcollapsingmergetree.md b/docs/zh/engines/table-engines/mergetree-family/versionedcollapsingmergetree.md index 3b89da9f595..dc9871c1a31 100644 --- a/docs/zh/engines/table-engines/mergetree-family/versionedcollapsingmergetree.md +++ b/docs/zh/engines/table-engines/mergetree-family/versionedcollapsingmergetree.md @@ -3,7 +3,7 @@ toc_priority: 37 toc_title: "版本折叠MergeTree" --- -# 版本折叠MergeTree {#versionedcollapsingmergetree} +# VersionedCollapsingMergeTree {#versionedcollapsingmergetree} 这个引擎: diff --git a/docs/zh/interfaces/tcp.md b/docs/zh/interfaces/tcp.md index b779b9fea40..571fd22b758 100644 --- a/docs/zh/interfaces/tcp.md +++ b/docs/zh/interfaces/tcp.md @@ -5,6 +5,6 @@ toc_title: 原生接口(TCP) # 原生接口(TCP){#native-interface-tcp} -原生接口用于[命令行客户端](cli.md),用于分布式查询处理期间的服务器间通信,以及其他C++程序。可惜的是,原生的ClickHouse协议还没有正式的规范,但它可以从ClickHouse[源代码](https://github.com/ClickHouse/ClickHouse/tree/master/src/Client)通过拦截和分析TCP流量进行反向工程。 +原生接口协议用于[命令行客户端](cli.md),用于分布式查询处理期间的服务器间通信,以及其他C++ 程序。不幸的是,原生ClickHouse协议还没有正式的规范,但它可以从ClickHouse源代码[从这里开始](https://github.com/ClickHouse/ClickHouse/tree/master/src/Client)或通过拦截和分析TCP流量进行逆向工程。 -[来源文章](https://clickhouse.tech/docs/zh/interfaces/tcp/) +[原文](https://clickhouse.tech/docs/en/interfaces/tcp/) diff --git a/docs/zh/operations/index.md b/docs/zh/operations/index.md index f35858279f5..5139f083ceb 100644 --- a/docs/zh/operations/index.md +++ b/docs/zh/operations/index.md @@ -5,9 +5,21 @@ toc_title: "操作" # 操作 {#operations} -Clickhouse运维手册主要包含下面几部分: +ClickHouse操作手册由以下主要部分组成: -- 安装要求 +- [安装要求](../operations/requirements.md) +- [监控](../operations/monitoring.md) +- [故障排除](../operations/troubleshooting.md) +- [使用建议](../operations/tips.md) +- [更新程序](../operations/update.md) +- [访问权限](../operations/access-rights.md) +- [数据备份](../operations/backup.md) +- [配置文件](../operations/configuration-files.md) +- [配额](../operations/quotas.md) +- [系统表](../operations/system-tables/index.md) +- [服务器配置参数](../operations/server-configuration-parameters/index.md) +- [如何用ClickHouse测试你的硬件](../operations/performance-test.md) +- [设置](../operations/settings/index.md) +- [实用工具](../operations/utilities/index.md) - -[原始文章](https://clickhouse.tech/docs/en/operations/) +[原文](https://clickhouse.tech/docs/en/operations/) diff --git a/programs/server/play.html b/programs/server/play.html index 5e0377aa8f7..7b13807f2d9 100644 --- a/programs/server/play.html +++ b/programs/server/play.html @@ -613,10 +613,16 @@ } /// Huge JS libraries should be loaded only if needed. - function loadJS(src) { + function loadJS(src, integrity) { return new Promise((resolve, reject) => { const script = document.createElement('script'); script.src = src; + if (integrity) { + script.crossOrigin = 'anonymous'; + script.integrity = integrity; + } else { + console.warn('no integrity for', src) + } script.addEventListener('load', function() { resolve(true); }); document.head.appendChild(script); }); @@ -627,10 +633,14 @@ if (load_dagre_promise) { return load_dagre_promise; } load_dagre_promise = Promise.all([ - loadJS('https://dagrejs.github.io/project/dagre/v0.8.5/dagre.min.js'), - loadJS('https://dagrejs.github.io/project/graphlib-dot/v0.6.4/graphlib-dot.min.js'), - loadJS('https://dagrejs.github.io/project/dagre-d3/v0.6.4/dagre-d3.min.js'), - loadJS('https://cdn.jsdelivr.net/npm/d3@7.0.0'), + loadJS('https://dagrejs.github.io/project/dagre/v0.8.5/dagre.min.js', + 'sha384-2IH3T69EIKYC4c+RXZifZRvaH5SRUdacJW7j6HtE5rQbvLhKKdawxq6vpIzJ7j9M'), + loadJS('https://dagrejs.github.io/project/graphlib-dot/v0.6.4/graphlib-dot.min.js', + 'sha384-Q7oatU+b+y0oTkSoiRH9wTLH6sROySROCILZso/AbMMm9uKeq++r8ujD4l4f+CWj'), + loadJS('https://dagrejs.github.io/project/dagre-d3/v0.6.4/dagre-d3.min.js', + 'sha384-9N1ty7Yz7VKL3aJbOk+8ParYNW8G5W+MvxEfFL9G7CRYPmkHI9gJqyAfSI/8190W'), + loadJS('https://cdn.jsdelivr.net/npm/d3@7.0.0', + 'sha384-S+Kf0r6YzKIhKA8d1k2/xtYv+j0xYUU3E7+5YLrcPVab6hBh/r1J6cq90OXhw80u'), ]); return load_dagre_promise; diff --git a/src/Access/AccessControlManager.cpp b/src/Access/AccessControlManager.cpp index 66023c1c0ea..7ae5eeb8288 100644 --- a/src/Access/AccessControlManager.cpp +++ b/src/Access/AccessControlManager.cpp @@ -64,7 +64,12 @@ public: std::lock_guard lock{mutex}; auto x = cache.get(params); if (x) - return *x; + { + if ((*x)->getUser()) + return *x; + /// No user, probably the user has been dropped while it was in the cache. + cache.remove(params); + } auto res = std::shared_ptr(new ContextAccess(manager, params)); cache.add(params, res); return res; diff --git a/src/Access/AccessRights.cpp b/src/Access/AccessRights.cpp index f9c1d23350d..d4b2dc8a252 100644 --- a/src/Access/AccessRights.cpp +++ b/src/Access/AccessRights.cpp @@ -655,7 +655,7 @@ private: for (auto & [lhs_childname, lhs_child] : *children) { if (!rhs.tryGetChild(lhs_childname)) - lhs_child.flags |= rhs.flags & lhs_child.getAllGrantableFlags(); + lhs_child.addGrantsRec(rhs.flags); } } } @@ -673,7 +673,7 @@ private: for (auto & [lhs_childname, lhs_child] : *children) { if (!rhs.tryGetChild(lhs_childname)) - lhs_child.flags &= rhs.flags; + lhs_child.removeGrantsRec(~rhs.flags); } } } @@ -1041,17 +1041,15 @@ void AccessRights::makeIntersection(const AccessRights & other) auto helper = [](std::unique_ptr & root_node, const std::unique_ptr & other_root_node) { if (!root_node) + return; + if (!other_root_node) { - if (other_root_node) - root_node = std::make_unique(*other_root_node); + root_node = nullptr; return; } - if (other_root_node) - { - root_node->makeIntersection(*other_root_node); - if (!root_node->flags && !root_node->children) - root_node = nullptr; - } + root_node->makeIntersection(*other_root_node); + if (!root_node->flags && !root_node->children) + root_node = nullptr; }; helper(root, other.root); helper(root_with_grant_option, other.root_with_grant_option); diff --git a/src/Access/ContextAccess.cpp b/src/Access/ContextAccess.cpp index 90495a83dfc..697e1ce39f5 100644 --- a/src/Access/ContextAccess.cpp +++ b/src/Access/ContextAccess.cpp @@ -163,11 +163,10 @@ void ContextAccess::setUser(const UserPtr & user_) const if (!user) { /// User has been dropped. - auto nothing_granted = std::make_shared(); - access = nothing_granted; - access_with_implicit = nothing_granted; subscription_for_user_change = {}; subscription_for_roles_changes = {}; + access = nullptr; + access_with_implicit = nullptr; enabled_roles = nullptr; roles_info = nullptr; enabled_row_policies = nullptr; @@ -252,32 +251,45 @@ String ContextAccess::getUserName() const std::shared_ptr ContextAccess::getRolesInfo() const { std::lock_guard lock{mutex}; - return roles_info; + if (roles_info) + return roles_info; + static const auto no_roles = std::make_shared(); + return no_roles; } std::shared_ptr ContextAccess::getEnabledRowPolicies() const { std::lock_guard lock{mutex}; - return enabled_row_policies; + if (enabled_row_policies) + return enabled_row_policies; + static const auto no_row_policies = std::make_shared(); + return no_row_policies; } ASTPtr ContextAccess::getRowPolicyCondition(const String & database, const String & table_name, RowPolicy::ConditionType index, const ASTPtr & extra_condition) const { std::lock_guard lock{mutex}; - return enabled_row_policies ? enabled_row_policies->getCondition(database, table_name, index, extra_condition) : nullptr; + if (enabled_row_policies) + return enabled_row_policies->getCondition(database, table_name, index, extra_condition); + return nullptr; } std::shared_ptr ContextAccess::getQuota() const { std::lock_guard lock{mutex}; - return enabled_quota; + if (enabled_quota) + return enabled_quota; + static const auto unlimited_quota = EnabledQuota::getUnlimitedQuota(); + return unlimited_quota; } std::optional ContextAccess::getQuotaUsage() const { std::lock_guard lock{mutex}; - return enabled_quota ? enabled_quota->getUsage() : std::optional{}; + if (enabled_quota) + return enabled_quota->getUsage(); + return {}; } @@ -288,7 +300,7 @@ std::shared_ptr ContextAccess::getFullAccess() auto full_access = std::shared_ptr(new ContextAccess); full_access->is_full_access = true; full_access->access = std::make_shared(AccessRights::getFullAccess()); - full_access->enabled_quota = EnabledQuota::getUnlimitedQuota(); + full_access->access_with_implicit = std::make_shared(addImplicitAccessRights(*full_access->access)); return full_access; }(); return res; @@ -298,28 +310,40 @@ std::shared_ptr ContextAccess::getFullAccess() std::shared_ptr ContextAccess::getDefaultSettings() const { std::lock_guard lock{mutex}; - return enabled_settings ? enabled_settings->getSettings() : nullptr; + if (enabled_settings) + return enabled_settings->getSettings(); + static const auto everything_by_default = std::make_shared(); + return everything_by_default; } std::shared_ptr ContextAccess::getSettingsConstraints() const { std::lock_guard lock{mutex}; - return enabled_settings ? enabled_settings->getConstraints() : nullptr; + if (enabled_settings) + return enabled_settings->getConstraints(); + static const auto no_constraints = std::make_shared(); + return no_constraints; } std::shared_ptr ContextAccess::getAccessRights() const { std::lock_guard lock{mutex}; - return access; + if (access) + return access; + static const auto nothing_granted = std::make_shared(); + return nothing_granted; } std::shared_ptr ContextAccess::getAccessRightsWithImplicit() const { std::lock_guard lock{mutex}; - return access_with_implicit; + if (access_with_implicit) + return access_with_implicit; + static const auto nothing_granted = std::make_shared(); + return nothing_granted; } @@ -551,7 +575,7 @@ bool ContextAccess::checkAdminOptionImplHelper(const Container & role_ids, const for (auto it = std::begin(role_ids); it != std::end(role_ids); ++it, ++i) { const UUID & role_id = *it; - if (info && info->enabled_roles_with_admin_option.count(role_id)) + if (info->enabled_roles_with_admin_option.count(role_id)) continue; if (throw_if_denied) @@ -560,7 +584,7 @@ bool ContextAccess::checkAdminOptionImplHelper(const Container & role_ids, const if (!role_name) role_name = "ID {" + toString(role_id) + "}"; - if (info && info->enabled_roles.count(role_id)) + if (info->enabled_roles.count(role_id)) show_error("Not enough privileges. " "Role " + backQuote(*role_name) + " is granted, but without ADMIN option. " "To execute this query it's necessary to have the role " + backQuoteIfNeed(*role_name) + " granted with ADMIN option.", diff --git a/src/Access/ContextAccess.h b/src/Access/ContextAccess.h index a4373be4ff0..c7c4726c535 100644 --- a/src/Access/ContextAccess.h +++ b/src/Access/ContextAccess.h @@ -71,11 +71,9 @@ public: String getUserName() const; /// Returns information about current and enabled roles. - /// The function can return nullptr. std::shared_ptr getRolesInfo() const; /// Returns information about enabled row policies. - /// The function can return nullptr. std::shared_ptr getEnabledRowPolicies() const; /// Returns the row policy filter for a specified table. @@ -83,16 +81,13 @@ public: ASTPtr getRowPolicyCondition(const String & database, const String & table_name, RowPolicy::ConditionType index, const ASTPtr & extra_condition = nullptr) const; /// Returns the quota to track resource consumption. - /// The function returns nullptr if no tracking or limitation is needed. std::shared_ptr getQuota() const; std::optional getQuotaUsage() const; /// Returns the default settings, i.e. the settings to apply on user's login. - /// The function returns nullptr if it's no need to apply settings. std::shared_ptr getDefaultSettings() const; /// Returns the settings' constraints. - /// The function returns nullptr if there are no constraints. std::shared_ptr getSettingsConstraints() const; /// Returns the current access rights. diff --git a/src/Access/EnabledRowPolicies.cpp b/src/Access/EnabledRowPolicies.cpp index efd5ed4ae10..674dab3e0f0 100644 --- a/src/Access/EnabledRowPolicies.cpp +++ b/src/Access/EnabledRowPolicies.cpp @@ -12,8 +12,11 @@ size_t EnabledRowPolicies::Hash::operator()(const MixedConditionKey & key) const } -EnabledRowPolicies::EnabledRowPolicies(const Params & params_) - : params(params_) +EnabledRowPolicies::EnabledRowPolicies() : params() +{ +} + +EnabledRowPolicies::EnabledRowPolicies(const Params & params_) : params(params_) { } diff --git a/src/Access/EnabledRowPolicies.h b/src/Access/EnabledRowPolicies.h index 0ca4f16fcf1..5e819733963 100644 --- a/src/Access/EnabledRowPolicies.h +++ b/src/Access/EnabledRowPolicies.h @@ -32,6 +32,7 @@ public: friend bool operator >=(const Params & lhs, const Params & rhs) { return !(lhs < rhs); } }; + EnabledRowPolicies(); ~EnabledRowPolicies(); using ConditionType = RowPolicy::ConditionType; diff --git a/src/Access/SettingsConstraints.cpp b/src/Access/SettingsConstraints.cpp index 316f869fc79..988900e57d2 100644 --- a/src/Access/SettingsConstraints.cpp +++ b/src/Access/SettingsConstraints.cpp @@ -18,6 +18,8 @@ namespace ErrorCodes } +SettingsConstraints::SettingsConstraints() = default; + SettingsConstraints::SettingsConstraints(const AccessControlManager & manager_) : manager(&manager_) { } @@ -199,10 +201,13 @@ bool SettingsConstraints::checkImpl(const Settings & current_settings, SettingCh } }; - if (reaction == THROW_ON_VIOLATION) - manager->checkSettingNameIsAllowed(setting_name); - else if (!manager->isSettingNameAllowed(setting_name)) - return false; + if (manager) + { + if (reaction == THROW_ON_VIOLATION) + manager->checkSettingNameIsAllowed(setting_name); + else if (!manager->isSettingNameAllowed(setting_name)) + return false; + } Field current_value, new_value; if (current_settings.tryGet(setting_name, current_value)) diff --git a/src/Access/SettingsConstraints.h b/src/Access/SettingsConstraints.h index 4259fe15e25..cdec2bb293c 100644 --- a/src/Access/SettingsConstraints.h +++ b/src/Access/SettingsConstraints.h @@ -51,6 +51,7 @@ class AccessControlManager; class SettingsConstraints { public: + SettingsConstraints(); SettingsConstraints(const AccessControlManager & manager_); SettingsConstraints(const SettingsConstraints & src); SettingsConstraints & operator =(const SettingsConstraints & src); diff --git a/src/Access/tests/gtest_access_rights_ops.cpp b/src/Access/tests/gtest_access_rights_ops.cpp new file mode 100644 index 00000000000..3d7b396a6f2 --- /dev/null +++ b/src/Access/tests/gtest_access_rights_ops.cpp @@ -0,0 +1,94 @@ +#include +#include + +using namespace DB; + + +TEST(AccessRights, Union) +{ + AccessRights lhs, rhs; + lhs.grant(AccessType::CREATE_TABLE, "db1", "tb1"); + rhs.grant(AccessType::SELECT, "db2"); + lhs.makeUnion(rhs); + ASSERT_EQ(lhs.toString(), "GRANT CREATE TABLE ON db1.tb1, GRANT SELECT ON db2.*"); + + lhs.clear(); + rhs.clear(); + rhs.grant(AccessType::SELECT, "db2"); + lhs.grant(AccessType::CREATE_TABLE, "db1", "tb1"); + lhs.makeUnion(rhs); + ASSERT_EQ(lhs.toString(), "GRANT CREATE TABLE ON db1.tb1, GRANT SELECT ON db2.*"); + + lhs = {}; + rhs = {}; + lhs.grant(AccessType::SELECT); + rhs.grant(AccessType::SELECT, "db1", "tb1"); + lhs.makeUnion(rhs); + ASSERT_EQ(lhs.toString(), "GRANT SELECT ON *.*"); + + lhs = {}; + rhs = {}; + lhs.grant(AccessType::SELECT, "db1", "tb1", Strings{"col1", "col2"}); + rhs.grant(AccessType::SELECT, "db1", "tb1", Strings{"col2", "col3"}); + lhs.makeUnion(rhs); + ASSERT_EQ(lhs.toString(), "GRANT SELECT(col1, col2, col3) ON db1.tb1"); + + lhs = {}; + rhs = {}; + lhs.grant(AccessType::SELECT, "db1", "tb1", Strings{"col1", "col2"}); + rhs.grantWithGrantOption(AccessType::SELECT, "db1", "tb1", Strings{"col2", "col3"}); + lhs.makeUnion(rhs); + ASSERT_EQ(lhs.toString(), "GRANT SELECT(col1) ON db1.tb1, GRANT SELECT(col2, col3) ON db1.tb1 WITH GRANT OPTION"); + + lhs = {}; + rhs = {}; + lhs.grant(AccessType::INSERT); + rhs.grant(AccessType::ALL, "db1"); + lhs.makeUnion(rhs); + ASSERT_EQ(lhs.toString(), "GRANT INSERT ON *.*, GRANT SHOW, SELECT, ALTER, CREATE DATABASE, CREATE TABLE, CREATE VIEW, CREATE DICTIONARY, DROP, TRUNCATE, OPTIMIZE, SYSTEM MERGES, SYSTEM TTL MERGES, SYSTEM FETCHES, SYSTEM MOVES, SYSTEM SENDS, SYSTEM REPLICATION QUEUES, SYSTEM DROP REPLICA, SYSTEM SYNC REPLICA, SYSTEM RESTART REPLICA, SYSTEM RESTORE REPLICA, SYSTEM FLUSH DISTRIBUTED, dictGet ON db1.*"); +} + + +TEST(AccessRights, Intersection) +{ + AccessRights lhs, rhs; + lhs.grant(AccessType::CREATE_TABLE, "db1", "tb1"); + rhs.grant(AccessType::SELECT, "db2"); + lhs.makeIntersection(rhs); + ASSERT_EQ(lhs.toString(), "GRANT USAGE ON *.*"); + + lhs.clear(); + rhs.clear(); + lhs.grant(AccessType::SELECT, "db2"); + rhs.grant(AccessType::CREATE_TABLE, "db1", "tb1"); + lhs.makeIntersection(rhs); + ASSERT_EQ(lhs.toString(), "GRANT USAGE ON *.*"); + + lhs = {}; + rhs = {}; + lhs.grant(AccessType::SELECT); + rhs.grant(AccessType::SELECT, "db1", "tb1"); + lhs.makeIntersection(rhs); + ASSERT_EQ(lhs.toString(), "GRANT SELECT ON db1.tb1"); + + lhs = {}; + rhs = {}; + lhs.grant(AccessType::SELECT, "db1", "tb1", Strings{"col1", "col2"}); + rhs.grant(AccessType::SELECT, "db1", "tb1", Strings{"col2", "col3"}); + lhs.makeIntersection(rhs); + ASSERT_EQ(lhs.toString(), "GRANT SELECT(col2) ON db1.tb1"); + + lhs = {}; + rhs = {}; + lhs.grant(AccessType::SELECT, "db1", "tb1", Strings{"col1", "col2"}); + rhs.grantWithGrantOption(AccessType::SELECT, "db1", "tb1", Strings{"col2", "col3"}); + lhs.makeIntersection(rhs); + ASSERT_EQ(lhs.toString(), "GRANT SELECT(col2) ON db1.tb1"); + + lhs = {}; + rhs = {}; + lhs.grant(AccessType::INSERT); + rhs.grant(AccessType::ALL, "db1"); + lhs.makeIntersection(rhs); + ASSERT_EQ(lhs.toString(), "GRANT INSERT ON db1.*"); +} diff --git a/src/Common/DNSResolver.cpp b/src/Common/DNSResolver.cpp index 8b006bc550d..4fe0f0bb8c8 100644 --- a/src/Common/DNSResolver.cpp +++ b/src/Common/DNSResolver.cpp @@ -109,11 +109,23 @@ static DNSResolver::IPAddresses resolveIPAddressImpl(const std::string & host) /// It should not affect client address checking, since client cannot connect from IPv6 address /// if server has no IPv6 addresses. flags |= Poco::Net::DNS::DNS_HINT_AI_ADDRCONFIG; + + DNSResolver::IPAddresses addresses; + + try + { #if defined(ARCADIA_BUILD) - auto addresses = Poco::Net::DNS::hostByName(host, &Poco::Net::DNS::DEFAULT_DNS_TIMEOUT, flags).addresses(); + addresses = Poco::Net::DNS::hostByName(host, &Poco::Net::DNS::DEFAULT_DNS_TIMEOUT, flags).addresses(); #else - auto addresses = Poco::Net::DNS::hostByName(host, flags).addresses(); + addresses = Poco::Net::DNS::hostByName(host, flags).addresses(); #endif + } + catch (const Poco::Net::DNSException & e) + { + LOG_ERROR(&Poco::Logger::get("DNSResolver"), "Cannot resolve host ({}), error {}: {}.", host, e.code(), e.message()); + addresses.clear(); + } + if (addresses.empty()) throw Exception("Not found address of host: " + host, ErrorCodes::DNS_ERROR); diff --git a/src/Core/Block.cpp b/src/Core/Block.cpp index 7a735569238..8074fabfa2d 100644 --- a/src/Core/Block.cpp +++ b/src/Core/Block.cpp @@ -375,9 +375,13 @@ void Block::setColumn(size_t position, ColumnWithTypeAndName && column) throw Exception(ErrorCodes::POSITION_OUT_OF_BOUND, "Position {} out of bound in Block::setColumn(), max position {}", position, toString(data.size())); - data[position].name = std::move(column.name); - data[position].type = std::move(column.type); - data[position].column = std::move(column.column); + if (data[position].name != column.name) + { + index_by_name.erase(data[position].name); + index_by_name.emplace(column.name, position); + } + + data[position] = std::move(column); } diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 77e6d0c674a..5a6e65b996f 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -526,6 +526,7 @@ class IColumn; M(Bool, input_format_values_accurate_types_of_literals, true, "For Values format: when parsing and interpreting expressions using template, check actual type of literal to avoid possible overflow and precision issues.", 0) \ M(Bool, input_format_avro_allow_missing_fields, false, "For Avro/AvroConfluent format: when field is not found in schema use default value instead of error", 0) \ M(URI, format_avro_schema_registry_url, "", "For AvroConfluent format: Confluent Schema Registry URL.", 0) \ + M(String, output_format_avro_string_column_pattern, "", "For Avro format: regexp of String columns to select as AVRO string.", 0) \ \ M(Bool, output_format_json_quote_64bit_integers, true, "Controls quoting of 64-bit integers in JSON output format.", 0) \ \ diff --git a/src/Databases/SQLite/SQLiteUtils.h b/src/Databases/SQLite/SQLiteUtils.h index d405e869b85..56ca1ca8cf3 100644 --- a/src/Databases/SQLite/SQLiteUtils.h +++ b/src/Databases/SQLite/SQLiteUtils.h @@ -7,7 +7,7 @@ #if USE_SQLITE #include #include -#include +#include // Y_IGNORE namespace DB diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp index a00839fc5f5..7cd034aab23 100644 --- a/src/Formats/FormatFactory.cpp +++ b/src/Formats/FormatFactory.cpp @@ -60,6 +60,7 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings) format_settings.avro.output_codec = settings.output_format_avro_codec; format_settings.avro.output_sync_interval = settings.output_format_avro_sync_interval; format_settings.avro.schema_registry_url = settings.format_avro_schema_registry_url.toString(); + format_settings.avro.string_column_pattern = settings.output_format_avro_string_column_pattern.toString(); format_settings.csv.allow_double_quotes = settings.format_csv_allow_double_quotes; format_settings.csv.allow_single_quotes = settings.format_csv_allow_single_quotes; format_settings.csv.crlf_end_of_line = settings.output_format_csv_crlf_end_of_line; diff --git a/src/Formats/FormatSettings.h b/src/Formats/FormatSettings.h index 1773f2cc2c6..57dfb9023cc 100644 --- a/src/Formats/FormatSettings.h +++ b/src/Formats/FormatSettings.h @@ -61,6 +61,7 @@ struct FormatSettings String output_codec; UInt64 output_sync_interval = 16 * 1024; bool allow_missing_fields = false; + String string_column_pattern; } avro; struct CSV @@ -169,4 +170,3 @@ struct FormatSettings }; } - diff --git a/src/Interpreters/AsynchronousMetrics.cpp b/src/Interpreters/AsynchronousMetrics.cpp index da514759eb5..6b2940154f8 100644 --- a/src/Interpreters/AsynchronousMetrics.cpp +++ b/src/Interpreters/AsynchronousMetrics.cpp @@ -546,13 +546,16 @@ void AsynchronousMetrics::update(std::chrono::system_clock::time_point update_ti Int64 peak = total_memory_tracker.getPeak(); Int64 new_amount = data.resident; - LOG_DEBUG(&Poco::Logger::get("AsynchronousMetrics"), - "MemoryTracking: was {}, peak {}, will set to {} (RSS), difference: {}", - ReadableSize(amount), - ReadableSize(peak), - ReadableSize(new_amount), - ReadableSize(new_amount - amount) - ); + Int64 difference = new_amount - amount; + + /// Log only if difference is high. This is for convenience. The threshold is arbitrary. + if (difference >= 1048576 || difference <= -1048576) + LOG_TRACE(&Poco::Logger::get("AsynchronousMetrics"), + "MemoryTracking: was {}, peak {}, will set to {} (RSS), difference: {}", + ReadableSize(amount), + ReadableSize(peak), + ReadableSize(new_amount), + ReadableSize(difference)); total_memory_tracker.set(new_amount); CurrentMetrics::set(CurrentMetrics::MemoryTracking, new_amount); diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index cbf2c0820f5..ccb356a4ca7 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -1198,26 +1198,22 @@ void Context::applySettingsChanges(const SettingsChanges & changes) void Context::checkSettingsConstraints(const SettingChange & change) const { - if (auto settings_constraints = getSettingsConstraints()) - settings_constraints->check(settings, change); + getSettingsConstraints()->check(settings, change); } void Context::checkSettingsConstraints(const SettingsChanges & changes) const { - if (auto settings_constraints = getSettingsConstraints()) - settings_constraints->check(settings, changes); + getSettingsConstraints()->check(settings, changes); } void Context::checkSettingsConstraints(SettingsChanges & changes) const { - if (auto settings_constraints = getSettingsConstraints()) - settings_constraints->check(settings, changes); + getSettingsConstraints()->check(settings, changes); } void Context::clampToSettingsConstraints(SettingsChanges & changes) const { - if (auto settings_constraints = getSettingsConstraints()) - settings_constraints->clamp(settings, changes); + getSettingsConstraints()->clamp(settings, changes); } std::shared_ptr Context::getSettingsConstraints() const diff --git a/src/Interpreters/InterpreterGrantQuery.cpp b/src/Interpreters/InterpreterGrantQuery.cpp index 7487ca79bde..d5fcb82d408 100644 --- a/src/Interpreters/InterpreterGrantQuery.cpp +++ b/src/Interpreters/InterpreterGrantQuery.cpp @@ -93,24 +93,28 @@ namespace const AccessControlManager & access_control, const ContextAccess & access, const ASTGrantQuery & query, - const std::vector & grantees_from_query) + const std::vector & grantees_from_query, + bool & need_check_grantees_are_allowed) { const auto & elements = query.access_rights_elements; + need_check_grantees_are_allowed = true; if (elements.empty()) + { + /// No access rights to grant or revoke. + need_check_grantees_are_allowed = false; return; + } - /// To execute the command GRANT the current user needs to have the access granted - /// with GRANT OPTION. if (!query.is_revoke) { + /// To execute the command GRANT the current user needs to have the access granted with GRANT OPTION. access.checkGrantOption(elements); - checkGranteesAreAllowed(access_control, access, grantees_from_query); return; } if (access.hasGrantOption(elements)) { - checkGranteesAreAllowed(access_control, access, grantees_from_query); + /// Simple case: the current user has the grant option for all the access rights specified for REVOKE. return; } @@ -137,6 +141,7 @@ namespace all_granted_access.makeUnion(user->access); } } + need_check_grantees_are_allowed = false; /// already checked AccessRights required_access; if (elements[0].is_partial_revoke) @@ -158,21 +163,28 @@ namespace } } - std::vector getRoleIDsAndCheckAdminOption( const AccessControlManager & access_control, const ContextAccess & access, const ASTGrantQuery & query, const RolesOrUsersSet & roles_from_query, - const std::vector & grantees_from_query) + const std::vector & grantees_from_query, + bool & need_check_grantees_are_allowed) { - std::vector matching_ids; + need_check_grantees_are_allowed = true; + if (roles_from_query.empty()) + { + /// No roles to grant or revoke. + need_check_grantees_are_allowed = false; + return {}; + } + std::vector matching_ids; if (!query.is_revoke) { + /// To execute the command GRANT the current user needs to have the roles granted with ADMIN OPTION. matching_ids = roles_from_query.getMatchingIDs(access_control); access.checkAdminOption(matching_ids); - checkGranteesAreAllowed(access_control, access, grantees_from_query); return matching_ids; } @@ -181,7 +193,7 @@ namespace matching_ids = roles_from_query.getMatchingIDs(); if (access.hasAdminOption(matching_ids)) { - checkGranteesAreAllowed(access_control, access, grantees_from_query); + /// Simple case: the current user has the admin option for all the roles specified for REVOKE. return matching_ids; } } @@ -209,6 +221,7 @@ namespace all_granted_roles.makeUnion(user->granted_roles); } } + need_check_grantees_are_allowed = false; /// already checked const auto & all_granted_roles_set = query.admin_option ? all_granted_roles.getGrantedWithAdminOption() : all_granted_roles.getGranted(); if (roles_from_query.all) @@ -218,6 +231,33 @@ namespace access.checkAdminOption(matching_ids); return matching_ids; } + + void checkGrantOptionAndGrantees( + const AccessControlManager & access_control, + const ContextAccess & access, + const ASTGrantQuery & query, + const std::vector & grantees_from_query) + { + bool need_check_grantees_are_allowed = true; + checkGrantOption(access_control, access, query, grantees_from_query, need_check_grantees_are_allowed); + if (need_check_grantees_are_allowed) + checkGranteesAreAllowed(access_control, access, grantees_from_query); + } + + std::vector getRoleIDsAndCheckAdminOptionAndGrantees( + const AccessControlManager & access_control, + const ContextAccess & access, + const ASTGrantQuery & query, + const RolesOrUsersSet & roles_from_query, + const std::vector & grantees_from_query) + { + bool need_check_grantees_are_allowed = true; + auto role_ids = getRoleIDsAndCheckAdminOption( + access_control, access, query, roles_from_query, grantees_from_query, need_check_grantees_are_allowed); + if (need_check_grantees_are_allowed) + checkGranteesAreAllowed(access_control, access, grantees_from_query); + return role_ids; + } } @@ -243,7 +283,7 @@ BlockIO InterpreterGrantQuery::execute() /// Check if the current user has corresponding roles granted with admin option. std::vector roles; if (roles_set) - roles = getRoleIDsAndCheckAdminOption(access_control, *getContext()->getAccess(), query, *roles_set, grantees); + roles = getRoleIDsAndCheckAdminOptionAndGrantees(access_control, *getContext()->getAccess(), query, *roles_set, grantees); if (!query.cluster.empty()) { @@ -258,7 +298,7 @@ BlockIO InterpreterGrantQuery::execute() /// Check if the current user has corresponding access rights with grant option. if (!query.access_rights_elements.empty()) - checkGrantOption(access_control, *getContext()->getAccess(), query, grantees); + checkGrantOptionAndGrantees(access_control, *getContext()->getAccess(), query, grantees); /// Update roles and users listed in `grantees`. auto update_func = [&](const AccessEntityPtr & entity) -> AccessEntityPtr diff --git a/src/Interpreters/MergeJoin.cpp b/src/Interpreters/MergeJoin.cpp index 46833f9769e..8f9d94b6079 100644 --- a/src/Interpreters/MergeJoin.cpp +++ b/src/Interpreters/MergeJoin.cpp @@ -1048,7 +1048,10 @@ private: } if (rows_added >= max_block_size) + { + ++block_number; break; + } } return rows_added; diff --git a/src/Processors/Formats/Impl/AvroRowOutputFormat.cpp b/src/Processors/Formats/Impl/AvroRowOutputFormat.cpp index 81922bdde80..c4ec8736a81 100644 --- a/src/Processors/Formats/Impl/AvroRowOutputFormat.cpp +++ b/src/Processors/Formats/Impl/AvroRowOutputFormat.cpp @@ -41,6 +41,7 @@ #include #include +#include namespace DB { @@ -48,8 +49,34 @@ namespace ErrorCodes { extern const int ILLEGAL_COLUMN; extern const int BAD_ARGUMENTS; + extern const int CANNOT_COMPILE_REGEXP; } +class AvroSerializerTraits +{ +public: + explicit AvroSerializerTraits(const FormatSettings & settings_) + : string_to_string_regexp(settings_.avro.string_column_pattern) + { + if (!string_to_string_regexp.ok()) + throw DB::Exception( + "Avro: cannot compile re2: " + settings_.avro.string_column_pattern + ", error: " + string_to_string_regexp.error() + + ". Look at https://github.com/google/re2/wiki/Syntax for reference.", + DB::ErrorCodes::CANNOT_COMPILE_REGEXP); + } + + bool isStringAsString(const String & column_name) + { + return RE2::FullMatch(column_name, string_to_string_regexp); + } + + ~AvroSerializerTraits() = default; + +private: + const RE2 string_to_string_regexp; +}; + + class OutputStreamWriteBufferAdapter : public avro::OutputStream { public: @@ -75,7 +102,7 @@ private: }; -AvroSerializer::SchemaWithSerializeFn AvroSerializer::createSchemaWithSerializeFn(DataTypePtr data_type, size_t & type_name_increment) +AvroSerializer::SchemaWithSerializeFn AvroSerializer::createSchemaWithSerializeFn(DataTypePtr data_type, size_t & type_name_increment, const String & column_name) { ++type_name_increment; @@ -161,11 +188,20 @@ AvroSerializer::SchemaWithSerializeFn AvroSerializer::createSchemaWithSerializeF }}; } case TypeIndex::String: - return {avro::BytesSchema(), [](const IColumn & column, size_t row_num, avro::Encoder & encoder) - { - const StringRef & s = assert_cast(column).getDataAt(row_num); - encoder.encodeBytes(reinterpret_cast(s.data), s.size); - }}; + if (traits->isStringAsString(column_name)) + return {avro::StringSchema(), [](const IColumn & column, size_t row_num, avro::Encoder & encoder) + { + const StringRef & s = assert_cast(column).getDataAt(row_num); + encoder.encodeString(s.toString()); + } + }; + else + return {avro::BytesSchema(), [](const IColumn & column, size_t row_num, avro::Encoder & encoder) + { + const StringRef & s = assert_cast(column).getDataAt(row_num); + encoder.encodeBytes(reinterpret_cast(s.data), s.size); + } + }; case TypeIndex::FixedString: { auto size = data_type->getSizeOfValueInMemory(); @@ -223,7 +259,7 @@ AvroSerializer::SchemaWithSerializeFn AvroSerializer::createSchemaWithSerializeF case TypeIndex::Array: { const auto & array_type = assert_cast(*data_type); - auto nested_mapping = createSchemaWithSerializeFn(array_type.getNestedType(), type_name_increment); + auto nested_mapping = createSchemaWithSerializeFn(array_type.getNestedType(), type_name_increment, column_name); auto schema = avro::ArraySchema(nested_mapping.schema); return {schema, [nested_mapping](const IColumn & column, size_t row_num, avro::Encoder & encoder) { @@ -249,7 +285,7 @@ AvroSerializer::SchemaWithSerializeFn AvroSerializer::createSchemaWithSerializeF case TypeIndex::Nullable: { auto nested_type = removeNullable(data_type); - auto nested_mapping = createSchemaWithSerializeFn(nested_type, type_name_increment); + auto nested_mapping = createSchemaWithSerializeFn(nested_type, type_name_increment, column_name); if (nested_type->getTypeId() == TypeIndex::Nothing) { return nested_mapping; @@ -278,7 +314,7 @@ AvroSerializer::SchemaWithSerializeFn AvroSerializer::createSchemaWithSerializeF case TypeIndex::LowCardinality: { const auto & nested_type = removeLowCardinality(data_type); - auto nested_mapping = createSchemaWithSerializeFn(nested_type, type_name_increment); + auto nested_mapping = createSchemaWithSerializeFn(nested_type, type_name_increment, column_name); return {nested_mapping.schema, [nested_mapping](const IColumn & column, size_t row_num, avro::Encoder & encoder) { const auto & col = assert_cast(column); @@ -294,7 +330,8 @@ AvroSerializer::SchemaWithSerializeFn AvroSerializer::createSchemaWithSerializeF } -AvroSerializer::AvroSerializer(const ColumnsWithTypeAndName & columns) +AvroSerializer::AvroSerializer(const ColumnsWithTypeAndName & columns, std::unique_ptr traits_) + : traits(std::move(traits_)) { avro::RecordSchema record_schema("row"); @@ -303,7 +340,7 @@ AvroSerializer::AvroSerializer(const ColumnsWithTypeAndName & columns) { try { - auto field_mapping = createSchemaWithSerializeFn(column.type, type_name_increment); + auto field_mapping = createSchemaWithSerializeFn(column.type, type_name_increment, column.name); serialize_fns.push_back(field_mapping.serialize); //TODO: verify name starts with A-Za-z_ record_schema.addField(column.name, field_mapping.schema); @@ -314,7 +351,7 @@ AvroSerializer::AvroSerializer(const ColumnsWithTypeAndName & columns) throw; } } - schema.setSchema(record_schema); + valid_schema.setSchema(record_schema); } void AvroSerializer::serializeRow(const Columns & columns, size_t row_num, avro::Encoder & encoder) @@ -350,7 +387,7 @@ AvroRowOutputFormat::AvroRowOutputFormat( WriteBuffer & out_, const Block & header_, const RowOutputFormatParams & params_, const FormatSettings & settings_) : IRowOutputFormat(header_, out_, params_) , settings(settings_) - , serializer(header_.getColumnsWithTypeAndName()) + , serializer(header_.getColumnsWithTypeAndName(), std::make_unique(settings)) , file_writer( std::make_unique(out_), serializer.getSchema(), diff --git a/src/Processors/Formats/Impl/AvroRowOutputFormat.h b/src/Processors/Formats/Impl/AvroRowOutputFormat.h index 8d0581d3307..fa4cedf1cc2 100644 --- a/src/Processors/Formats/Impl/AvroRowOutputFormat.h +++ b/src/Processors/Formats/Impl/AvroRowOutputFormat.h @@ -18,11 +18,13 @@ namespace DB { class WriteBuffer; +class AvroSerializerTraits; + class AvroSerializer { public: - AvroSerializer(const ColumnsWithTypeAndName & columns); - const avro::ValidSchema & getSchema() const { return schema; } + AvroSerializer(const ColumnsWithTypeAndName & columns, std::unique_ptr); + const avro::ValidSchema & getSchema() const { return valid_schema; } void serializeRow(const Columns & columns, size_t row_num, avro::Encoder & encoder); private: @@ -34,10 +36,11 @@ private: }; /// Type names for different complex types (e.g. enums, fixed strings) must be unique. We use simple incremental number to give them different names. - static SchemaWithSerializeFn createSchemaWithSerializeFn(DataTypePtr data_type, size_t & type_name_increment); + /*static*/ SchemaWithSerializeFn createSchemaWithSerializeFn(DataTypePtr data_type, size_t & type_name_increment, const String & column_name); std::vector serialize_fns; - avro::ValidSchema schema; + avro::ValidSchema valid_schema; + std::unique_ptr traits; }; class AvroRowOutputFormat : public IRowOutputFormat diff --git a/src/Processors/Transforms/JoiningTransform.cpp b/src/Processors/Transforms/JoiningTransform.cpp index 87f22319bba..79fd14be293 100644 --- a/src/Processors/Transforms/JoiningTransform.cpp +++ b/src/Processors/Transforms/JoiningTransform.cpp @@ -180,11 +180,9 @@ void JoiningTransform::transform(Chunk & chunk) Block JoiningTransform::readExecute(Chunk & chunk) { Block res; - // std::cerr << "=== Chunk rows " << chunk.getNumRows() << " cols " << chunk.getNumColumns() << std::endl; if (!not_processed) { - // std::cerr << "!not_processed " << std::endl; if (chunk.hasColumns()) res = inputs.front().getHeader().cloneWithColumns(chunk.detachColumns()); @@ -193,7 +191,6 @@ Block JoiningTransform::readExecute(Chunk & chunk) } else if (not_processed->empty()) /// There's not processed data inside expression. { - // std::cerr << "not_processed->empty() " << std::endl; if (chunk.hasColumns()) res = inputs.front().getHeader().cloneWithColumns(chunk.detachColumns()); @@ -202,12 +199,10 @@ Block JoiningTransform::readExecute(Chunk & chunk) } else { - // std::cerr << "not not_processed->empty() " << std::endl; res = std::move(not_processed->block); join->joinBlock(res, not_processed); } - // std::cerr << "Res block rows " << res.rows() << " cols " << res.columns() << std::endl; return res; } diff --git a/src/Processors/Transforms/WindowTransform.h b/src/Processors/Transforms/WindowTransform.h index 611b03ebf72..d7211f9edd7 100644 --- a/src/Processors/Transforms/WindowTransform.h +++ b/src/Processors/Transforms/WindowTransform.h @@ -139,7 +139,9 @@ public: } const Columns & inputAt(const RowNumber & x) const - { return const_cast(this)->inputAt(x); } + { + return const_cast(this)->inputAt(x); + } auto & blockAt(const uint64_t block_number) { @@ -149,13 +151,19 @@ public: } const auto & blockAt(const uint64_t block_number) const - { return const_cast(this)->blockAt(block_number); } + { + return const_cast(this)->blockAt(block_number); + } auto & blockAt(const RowNumber & x) - { return blockAt(x.block); } + { + return blockAt(x.block); + } const auto & blockAt(const RowNumber & x) const - { return const_cast(this)->blockAt(x); } + { + return const_cast(this)->blockAt(x); + } size_t blockRowsNumber(const RowNumber & x) const { @@ -225,10 +233,14 @@ public: } RowNumber blocksEnd() const - { return RowNumber{first_block_number + blocks.size(), 0}; } + { + return RowNumber{first_block_number + blocks.size(), 0}; + } RowNumber blocksBegin() const - { return RowNumber{first_block_number, 0}; } + { + return RowNumber{first_block_number, 0}; + } public: /* diff --git a/src/Storages/System/StorageSystemSettings.cpp b/src/Storages/System/StorageSystemSettings.cpp index 1aca7e45190..d90ca27cbc0 100644 --- a/src/Storages/System/StorageSystemSettings.cpp +++ b/src/Storages/System/StorageSystemSettings.cpp @@ -40,8 +40,7 @@ void StorageSystemSettings::fillData(MutableColumns & res_columns, ContextPtr co Field min, max; bool read_only = false; - if (settings_constraints) - settings_constraints->get(setting_name, min, max, read_only); + settings_constraints->get(setting_name, min, max, read_only); /// These two columns can accept strings only. if (!min.isNull()) diff --git a/tests/integration/test_backward_compatibility/test_cte_distributed.py b/tests/integration/test_backward_compatibility/test_cte_distributed.py new file mode 100644 index 00000000000..3aec527524b --- /dev/null +++ b/tests/integration/test_backward_compatibility/test_cte_distributed.py @@ -0,0 +1,54 @@ +import pytest + +from helpers.cluster import ClickHouseCluster + +cluster = ClickHouseCluster(__file__, name="cte_distributed") +node1 = cluster.add_instance('node1', with_zookeeper=False) +node2 = cluster.add_instance('node2', + with_zookeeper=False, image='yandex/clickhouse-server', tag='21.7.3.14', stay_alive=True, + with_installed_binary=True) + + +@pytest.fixture(scope="module") +def start_cluster(): + try: + cluster.start() + yield cluster + + finally: + cluster.shutdown() + + + +def test_cte_distributed(start_cluster): + node2.query(""" +WITH + quantile(0.05)(cnt) as p05, + quantile(0.95)(cnt) as p95, + p95 - p05 as inter_percentile_range +SELECT + sum(cnt) as total_requests, + count() as data_points, + inter_percentile_range +FROM ( + SELECT + count() as cnt + FROM remote('node{1,2}', numbers(10)) + GROUP BY number +)""") + + node1.query(""" +WITH + quantile(0.05)(cnt) as p05, + quantile(0.95)(cnt) as p95, + p95 - p05 as inter_percentile_range +SELECT + sum(cnt) as total_requests, + count() as data_points, + inter_percentile_range +FROM ( + SELECT + count() as cnt + FROM remote('node{1,2}', numbers(10)) + GROUP BY number +)""") diff --git a/tests/integration/test_create_user_and_login/test.py b/tests/integration/test_create_user_and_login/test.py index 58a48bde95d..d0edde2233b 100644 --- a/tests/integration/test_create_user_and_login/test.py +++ b/tests/integration/test_create_user_and_login/test.py @@ -1,5 +1,8 @@ import pytest +import time +import logging from helpers.cluster import ClickHouseCluster +from helpers.test_tools import assert_eq_with_retry cluster = ClickHouseCluster(__file__) instance = cluster.add_instance('instance') @@ -38,3 +41,46 @@ def test_grant_create_user(): instance.query("GRANT CREATE USER ON *.* TO A") instance.query("CREATE USER B", user='A') assert instance.query("SELECT 1", user='B') == "1\n" + + +def test_login_as_dropped_user(): + for _ in range(0, 2): + instance.query("CREATE USER A") + assert instance.query("SELECT 1", user='A') == "1\n" + + instance.query("DROP USER A") + expected_error = "no user with such name" + assert expected_error in instance.query_and_get_error("SELECT 1", user='A') + + +def test_login_as_dropped_user_xml(): + for _ in range(0, 2): + instance.exec_in_container(["bash", "-c" , """ + cat > /etc/clickhouse-server/users.d/user_c.xml << EOF + + + + + + + + +EOF"""]) + + assert_eq_with_retry(instance, "SELECT name FROM system.users WHERE name='C'", "C") + + instance.exec_in_container(["bash", "-c" , "rm /etc/clickhouse-server/users.d/user_c.xml"]) + + expected_error = "no user with such name" + while True: + out, err = instance.query_and_get_answer_with_error("SELECT 1", user='C') + if expected_error in err: + logging.debug(f"Got error '{expected_error}' just as expected") + break + if out == "1\n": + logging.debug(f"Got output '1', retrying...") + time.sleep(0.5) + continue + raise Exception(f"Expected either output '1' or error '{expected_error}', got output={out} and error={err}") + + assert instance.query("SELECT name FROM system.users WHERE name='C'") == "" diff --git a/tests/queries/0_stateless/00965_shard_unresolvable_addresses.sql b/tests/queries/0_stateless/00965_shard_unresolvable_addresses.sql index b6b981c7d00..6d57cd0447d 100644 --- a/tests/queries/0_stateless/00965_shard_unresolvable_addresses.sql +++ b/tests/queries/0_stateless/00965_shard_unresolvable_addresses.sql @@ -1,2 +1,2 @@ -SELECT count() FROM remote('127.0.0.1,localhos', system.one); -- { serverError 279 } +SELECT count() FROM remote('127.0.0.1,localhos', system.one); -- { serverError 198 } SELECT count() FROM remote('127.0.0.1|localhos', system.one); diff --git a/tests/queries/0_stateless/01060_avro.reference b/tests/queries/0_stateless/01060_avro.reference index 338ffe0cf96..7a5aa43a36a 100644 --- a/tests/queries/0_stateless/01060_avro.reference +++ b/tests/queries/0_stateless/01060_avro.reference @@ -58,3 +58,9 @@ not found 0 1000 147 += string column pattern +"русская строка" +Ok +1 0 +1 1 +1 1 diff --git a/tests/queries/0_stateless/01060_avro.sh b/tests/queries/0_stateless/01060_avro.sh index 4779cd64953..8c37014a593 100755 --- a/tests/queries/0_stateless/01060_avro.sh +++ b/tests/queries/0_stateless/01060_avro.sh @@ -89,3 +89,22 @@ ${CLICKHOUSE_LOCAL} -q "select toInt64(number) as a from numbers(1000) format A # type supported via conversion ${CLICKHOUSE_LOCAL} -q "select toInt16(123) as a format Avro" | wc -c | tr -d ' ' + +echo '=' string column pattern +${CLICKHOUSE_LOCAL} -q "select 'русская строка' as a format Avro SETTINGS output_format_avro_string_column_pattern = 'a'" | ${CLICKHOUSE_LOCAL} --input-format Avro --output-format CSV -S "a String" -q 'select * from table' + +# it is expected that invalid UTF-8 can be created +${CLICKHOUSE_LOCAL} -q "select '\x61\xF0\x80\x80\x80b' as a format Avro" > /dev/null && echo Ok + +A_NEEDLE="'\"name\":\"a\",\"type\":\"string\"'" +B_NEEDLE="'\"name\":\"b\",\"type\":\"string\"'" +PATTERNQUERY="select 'русская строка' as a, 'русская строка' as b format Avro SETTINGS output_format_avro_string_column_pattern =" + +PATTERNPATTERN="'a'" +${CLICKHOUSE_LOCAL} -q "$PATTERNQUERY $PATTERNPATTERN" | tr -d '\n' | ${CLICKHOUSE_LOCAL} --structure "avro_raw String" --input-format LineAsString -q "select countSubstrings(avro_raw, $A_NEEDLE), countSubstrings(avro_raw, $B_NEEDLE) from table" + +PATTERNPATTERN="'a|b'" +${CLICKHOUSE_LOCAL} -q "$PATTERNQUERY $PATTERNPATTERN" | tr -d '\n' | ${CLICKHOUSE_LOCAL} --structure "avro_raw String" --input-format LineAsString -q "select countSubstrings(avro_raw, $A_NEEDLE), countSubstrings(avro_raw, $B_NEEDLE) from table" + +PATTERNPATTERN="'.*'" +${CLICKHOUSE_LOCAL} -q "$PATTERNQUERY $PATTERNPATTERN" | tr -d '\n' | ${CLICKHOUSE_LOCAL} --structure "avro_raw String" --input-format LineAsString -q "select countSubstrings(avro_raw, $A_NEEDLE), countSubstrings(avro_raw, $B_NEEDLE) from table" diff --git a/tests/queries/0_stateless/01943_pmj_non_joined_stuck.reference b/tests/queries/0_stateless/01943_pmj_non_joined_stuck.reference new file mode 100644 index 00000000000..58501cbd0fc --- /dev/null +++ b/tests/queries/0_stateless/01943_pmj_non_joined_stuck.reference @@ -0,0 +1,16 @@ +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 diff --git a/tests/queries/0_stateless/01943_pmj_non_joined_stuck.sql b/tests/queries/0_stateless/01943_pmj_non_joined_stuck.sql new file mode 100644 index 00000000000..ad7331ee2db --- /dev/null +++ b/tests/queries/0_stateless/01943_pmj_non_joined_stuck.sql @@ -0,0 +1,19 @@ +SET max_block_size = 6, join_algorithm = 'partial_merge'; + +SELECT count() == 4 FROM (SELECT 1 AS s) AS js1 ALL RIGHT JOIN (SELECT arrayJoin([2, 2, 2, 2]) AS s) AS js2 USING (s); +SELECT count() == 5 FROM (SELECT 1 AS s) AS js1 ALL RIGHT JOIN (SELECT arrayJoin([2, 2, 2, 2, 2]) AS s) AS js2 USING (s); +SELECT count() == 6 FROM (SELECT 1 AS s) AS js1 ALL RIGHT JOIN (SELECT arrayJoin([2, 2, 2, 2, 2, 2]) AS s) AS js2 USING (s); +SELECT count() == 7 FROM (SELECT 1 AS s) AS js1 ALL RIGHT JOIN (SELECT arrayJoin([2, 2, 2, 2, 2, 2, 2]) AS s) AS js2 USING (s); +SELECT count() == 8 FROM (SELECT 1 AS s) AS js1 ALL RIGHT JOIN (SELECT arrayJoin([2, 2, 2, 2, 2, 2, 2, 3]) AS s) AS js2 USING (s); +SELECT count() == 9 FROM (SELECT 1 AS s) AS js1 ALL RIGHT JOIN (SELECT arrayJoin([2, 2, 2, 2, 2, 2, 2, 3, 3]) AS s) AS js2 USING (s); +SELECT count() == 10 FROM (SELECT 1 AS s) AS js1 ALL RIGHT JOIN (SELECT arrayJoin([2, 2, 2, 2, 2, 2, 2, 3, 3, 3]) AS s) AS js2 USING (s); +SELECT count() == 11 FROM (SELECT 1 AS s) AS js1 ALL RIGHT JOIN (SELECT arrayJoin([2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3]) AS s) AS js2 USING (s); +SELECT count() == 12 FROM (SELECT 1 AS s) AS js1 ALL RIGHT JOIN (SELECT arrayJoin([2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3]) AS s) AS js2 USING (s); +SELECT count() == 13 FROM (SELECT 1 AS s) AS js1 ALL RIGHT JOIN (SELECT arrayJoin([2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3]) AS s) AS js2 USING (s); +SELECT count() == 14 FROM (SELECT 1 AS s) AS js1 ALL RIGHT JOIN (SELECT arrayJoin([2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3]) AS s) AS js2 USING (s); +SELECT count() == 15 FROM (SELECT 1 AS s) AS js1 ALL RIGHT JOIN (SELECT arrayJoin([2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3]) AS s) AS js2 USING (s); + +SELECT count() == 8 FROM (SELECT 1 AS s) AS js1 FULL JOIN (SELECT arrayJoin([2, 2, 2, 2, 2, 2, 2]) AS s) AS js2 USING (s); +SELECT count() == 9 FROM (SELECT 1 AS s) AS js1 FULL JOIN (SELECT arrayJoin([2, 2, 2, 2, 2, 2, 2, 3]) AS s) AS js2 USING (s); +SELECT count() == 10 FROM (SELECT 1 AS s) AS js1 FULL JOIN (SELECT arrayJoin([2, 2, 2, 2, 2, 2, 2, 3, 3]) AS s) AS js2 USING (s); +SELECT count() == 11 FROM (SELECT 1 AS s) AS js1 FULL JOIN (SELECT arrayJoin([2, 2, 2, 2, 2, 2, 2, 3, 3, 3]) AS s) AS js2 USING (s); diff --git a/tests/queries/0_stateless/01946_test_wrong_host_name_access.reference b/tests/queries/0_stateless/01946_test_wrong_host_name_access.reference new file mode 100644 index 00000000000..1191247b6d9 --- /dev/null +++ b/tests/queries/0_stateless/01946_test_wrong_host_name_access.reference @@ -0,0 +1,2 @@ +1 +2 diff --git a/tests/queries/0_stateless/01946_test_wrong_host_name_access.sh b/tests/queries/0_stateless/01946_test_wrong_host_name_access.sh new file mode 100755 index 00000000000..288a3438dc9 --- /dev/null +++ b/tests/queries/0_stateless/01946_test_wrong_host_name_access.sh @@ -0,0 +1,18 @@ +#!/usr/bin/env bash + +MYHOSTNAME=$(hostname -f) + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +${CLICKHOUSE_CLIENT} --multiquery --query " + DROP USER IF EXISTS dns_fail_1, dns_fail_2; + CREATE USER dns_fail_1 HOST NAME 'non.existing.host.name', '${MYHOSTNAME}'; + CREATE USER dns_fail_2 HOST NAME '${MYHOSTNAME}', 'non.existing.host.name';" + +${CLICKHOUSE_CLIENT} --query "SELECT 1" --user dns_fail_1 --host ${MYHOSTNAME} + +${CLICKHOUSE_CLIENT} --query "SELECT 2" --user dns_fail_2 --host ${MYHOSTNAME} + +${CLICKHOUSE_CLIENT} --query "DROP USER IF EXISTS dns_fail_1, dns_fail_2" diff --git a/tests/queries/skip_list.json b/tests/queries/skip_list.json index fd800d3bc33..8bdb97cf350 100644 --- a/tests/queries/skip_list.json +++ b/tests/queries/skip_list.json @@ -514,6 +514,7 @@ "01915_create_or_replace_dictionary", "01925_test_storage_merge_aliases", "01933_client_replxx_convert_history", /// Uses non unique history file - "01902_table_function_merge_db_repr" + "01902_table_function_merge_db_repr", + "01946_test_wrong_host_name_access" ] } diff --git a/utils/list-versions/version_date.tsv b/utils/list-versions/version_date.tsv index 28c2d7b1523..c8a8f3ac228 100644 --- a/utils/list-versions/version_date.tsv +++ b/utils/list-versions/version_date.tsv @@ -1,5 +1,6 @@ v21.7.3.14-stable 2021-07-13 v21.7.2.7-stable 2021-07-09 +v21.6.8.62-stable 2021-07-13 v21.6.7.57-stable 2021-07-09 v21.6.6.51-stable 2021-07-02 v21.6.5.37-stable 2021-06-19