Merge branch 'master' into ch_fixedstring_len

This commit is contained in:
Duc Canh Le 2022-09-20 20:22:14 +08:00 committed by GitHub
commit c3970afc45
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
241 changed files with 3717 additions and 1536 deletions

View File

@ -5,8 +5,9 @@
#include <type_traits>
/** \brief Returns value `from` converted to type `To` while retaining bit representation.
* `To` and `From` must satisfy `CopyConstructible`.
/** Returns value `from` converted to type `To` while retaining bit representation.
* `To` and `From` must satisfy `CopyConstructible`.
* In contrast to std::bit_cast can cast types of different width.
*/
template <typename To, typename From>
std::decay_t<To> bit_cast(const From & from)
@ -15,13 +16,3 @@ std::decay_t<To> bit_cast(const From & from)
memcpy(static_cast<void*>(&res), &from, std::min(sizeof(res), sizeof(from)));
return res;
}
/** \brief Returns value `from` converted to type `To` while retaining bit representation.
* `To` and `From` must satisfy `CopyConstructible`.
*/
template <typename To, typename From>
std::decay_t<To> safe_bit_cast(const From & from)
{
static_assert(sizeof(To) == sizeof(From), "bit cast on types of different width");
return bit_cast<To, From>(from);
}

2
contrib/krb5 vendored

@ -1 +1 @@
Subproject commit d879821c7a4c70b0c3ad739d9951d1a2b1903df7
Subproject commit b89e20367b074bd02dd118a6534099b21e88b3c3

View File

@ -46,7 +46,7 @@ RUN apt-get install binutils-riscv64-linux-gnu
# Architecture of the image when BuildKit/buildx is used
ARG TARGETARCH
ARG NFPM_VERSION=2.16.0
ARG NFPM_VERSION=2.18.1
RUN arch=${TARGETARCH:-amd64} \
&& curl -Lo /tmp/nfpm.deb "https://github.com/goreleaser/nfpm/releases/download/v${NFPM_VERSION}/nfpm_${arch}.deb" \

View File

@ -179,17 +179,17 @@ pigz < /var/log/clickhouse-server/clickhouse-server.log > /test_output/clickhous
# for files >64MB, we want this files to be compressed explicitly
for table in query_log zookeeper_log trace_log transactions_info_log
do
clickhouse-local --path /var/lib/clickhouse/ -q "select * from system.$table format TSVWithNamesAndTypes" | pigz > /test_output/$table.tsv.gz ||:
clickhouse-local --path /var/lib/clickhouse/ --only-system-tables -q "select * from system.$table format TSVWithNamesAndTypes" | pigz > /test_output/$table.tsv.gz ||:
if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then
clickhouse-local --path /var/lib/clickhouse1/ -q "select * from system.$table format TSVWithNamesAndTypes" | pigz > /test_output/$table.1.tsv.gz ||:
clickhouse-local --path /var/lib/clickhouse2/ -q "select * from system.$table format TSVWithNamesAndTypes" | pigz > /test_output/$table.2.tsv.gz ||:
clickhouse-local --path /var/lib/clickhouse1/ --only-system-tables -q "select * from system.$table format TSVWithNamesAndTypes" | pigz > /test_output/$table.1.tsv.gz ||:
clickhouse-local --path /var/lib/clickhouse2/ --only-system-tables -q "select * from system.$table format TSVWithNamesAndTypes" | pigz > /test_output/$table.2.tsv.gz ||:
fi
done
# Also export trace log in flamegraph-friendly format.
for trace_type in CPU Memory Real
do
clickhouse-local --path /var/lib/clickhouse/ -q "
clickhouse-local --path /var/lib/clickhouse/ --only-system-tables -q "
select
arrayStringConcat((arrayMap(x -> concat(splitByChar('/', addressToLine(x))[-1], '#', demangle(addressToSymbol(x)) ), trace)), ';') AS stack,
count(*) AS samples

View File

@ -0,0 +1,34 @@
---
sidebar_position: 1
sidebar_label: 2022
---
# 2022 Changelog
### ClickHouse release v22.6.8.35-stable (b91dc59a565) FIXME as compared to v22.6.7.7-stable (8eae2af3b9a)
#### New Feature
* Backported in [#40868](https://github.com/ClickHouse/ClickHouse/issues/40868): Add setting to disable limit on kafka_num_consumers. Closes [#40331](https://github.com/ClickHouse/ClickHouse/issues/40331). [#40670](https://github.com/ClickHouse/ClickHouse/pull/40670) ([Kruglov Pavel](https://github.com/Avogar)).
#### Bug Fix
* Backported in [#41274](https://github.com/ClickHouse/ClickHouse/issues/41274): Fix memory safety issues with functions `encrypt` and `contingency` if Array of Nullable is used as an argument. This fixes [#41004](https://github.com/ClickHouse/ClickHouse/issues/41004). [#40195](https://github.com/ClickHouse/ClickHouse/pull/40195) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
#### Bug Fix (user-visible misbehavior in official stable or prestable release)
* Backported in [#41282](https://github.com/ClickHouse/ClickHouse/issues/41282): Fix unused unknown columns introduced by WITH statement. This fixes [#37812](https://github.com/ClickHouse/ClickHouse/issues/37812) . [#39131](https://github.com/ClickHouse/ClickHouse/pull/39131) ([Amos Bird](https://github.com/amosbird)).
* Backported in [#40905](https://github.com/ClickHouse/ClickHouse/issues/40905): Fix potential deadlock in WriteBufferFromS3 during task scheduling failure. [#40070](https://github.com/ClickHouse/ClickHouse/pull/40070) ([Maksim Kita](https://github.com/kitaisreal)).
* Backported in [#40864](https://github.com/ClickHouse/ClickHouse/issues/40864): - Fix crash while parsing values of type `Object` that contains arrays of variadic dimension. [#40483](https://github.com/ClickHouse/ClickHouse/pull/40483) ([Duc Canh Le](https://github.com/canhld94)).
* Backported in [#40803](https://github.com/ClickHouse/ClickHouse/issues/40803): During insertion of a new query to the `ProcessList` allocations happen. If we reach the memory limit during these allocations we can not use `OvercommitTracker`, because `ProcessList::mutex` is already acquired. Fixes [#40611](https://github.com/ClickHouse/ClickHouse/issues/40611). [#40677](https://github.com/ClickHouse/ClickHouse/pull/40677) ([Dmitry Novik](https://github.com/novikd)).
* Backported in [#40891](https://github.com/ClickHouse/ClickHouse/issues/40891): Fix memory leak while pushing to MVs w/o query context (from Kafka/...). [#40732](https://github.com/ClickHouse/ClickHouse/pull/40732) ([Azat Khuzhin](https://github.com/azat)).
* Backported in [#41133](https://github.com/ClickHouse/ClickHouse/issues/41133): Fix access rights for `DESCRIBE TABLE url()` and some other `DESCRIBE TABLE <table_function>()`. [#40975](https://github.com/ClickHouse/ClickHouse/pull/40975) ([Vitaly Baranov](https://github.com/vitlibar)).
* Backported in [#41360](https://github.com/ClickHouse/ClickHouse/issues/41360): Fix incorrect logical error `Expected relative path` in disk object storage. Related to [#41246](https://github.com/ClickHouse/ClickHouse/issues/41246). [#41297](https://github.com/ClickHouse/ClickHouse/pull/41297) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Backported in [#41357](https://github.com/ClickHouse/ClickHouse/issues/41357): Add column type check before UUID insertion in MsgPack format. [#41309](https://github.com/ClickHouse/ClickHouse/pull/41309) ([Kruglov Pavel](https://github.com/Avogar)).
#### NOT FOR CHANGELOG / INSIGNIFICANT
* use ROBOT_CLICKHOUSE_COMMIT_TOKEN for create-pull-request [#40067](https://github.com/ClickHouse/ClickHouse/pull/40067) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
* use input token instead of env var [#40421](https://github.com/ClickHouse/ClickHouse/pull/40421) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
* Migrate artifactory [#40831](https://github.com/ClickHouse/ClickHouse/pull/40831) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Docker server version [#41256](https://github.com/ClickHouse/ClickHouse/pull/41256) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Increase open files limit [#41345](https://github.com/ClickHouse/ClickHouse/pull/41345) ([Eugene Konkov](https://github.com/ekonkov)).

View File

@ -26,13 +26,33 @@ The constraints are defined as the following:
<setting_name_4>
<readonly/>
</setting_name_4>
<setting_name_5>
<min>lower_boundary</min>
<max>upper_boundary</max>
<changeable_in_readonly/>
</setting_name_5>
</constraints>
</user_name>
</profiles>
```
If the user tries to violate the constraints an exception is thrown and the setting isnt changed.
There are supported three types of constraints: `min`, `max`, `readonly`. The `min` and `max` constraints specify upper and lower boundaries for a numeric setting and can be used in combination. The `readonly` constraint specifies that the user cannot change the corresponding setting at all.
There are supported few types of constraints: `min`, `max`, `readonly` (with alias `const`) and `changeable_in_readonly`. The `min` and `max` constraints specify upper and lower boundaries for a numeric setting and can be used in combination. The `readonly` or `const` constraint specifies that the user cannot change the corresponding setting at all. The `changeable_in_readonly` constraint type allows user to change the setting within `min`/`max` range even if `readonly` setting is set to 1, otherwise settings are not allow to be changed in `readonly=1` mode. Note that `changeable_in_readonly` is supported only if `settings_constraints_replace_previous` is enabled:
``` xml
<access_control_improvements>
<settings_constraints_replace_previous>true<settings_constraints_replace_previous>
</access_control_improvements>
```
If there are multiple profiles active for a user, then constraints are merged. Merge process depends on `settings_constraints_replace_previous`:
- **true** (recommended): constraints for the same setting are replaced during merge, such that the last constraint is used and all previous are ignored including fields that are not set in new constraint.
- **false** (default): constraints for the same setting are merged in a way that every not set type of constraint is taken from previous profile and every set type of constraint is replaced by value from new profile.
Read-only mode is enabled by `readonly` setting (not to confuse with `readonly` constraint type):
- `readonly=0`: No read-only restrictions.
- `readonly=1`: Only read queries are allowed and settings cannot be changes unless `changeable_in_readonly` is set.
- `readonly=2`: Only read queries are allowed, but settings can be changed, except for `readonly` setting itself.
**Example:** Let `users.xml` includes lines:

View File

@ -37,8 +37,7 @@ After setting `readonly = 1`, the user cant change `readonly` and `allow_ddl`
When using the `GET` method in the [HTTP interface](../../interfaces/http.md), `readonly = 1` is set automatically. To modify data, use the `POST` method.
Setting `readonly = 1` prohibit the user from changing all the settings. There is a way to prohibit the user
from changing only specific settings, for details see [constraints on settings](../../operations/settings/constraints-on-settings.md).
Setting `readonly = 1` prohibit the user from changing all the settings. There is a way to prohibit the user from changing only specific settings. Also there is a way to allow changing only specific settings under `readonly = 1` restrictions. For details see [constraints on settings](../../operations/settings/constraints-on-settings.md).
Default value: 0

View File

@ -3433,7 +3433,7 @@ Possible values:
- 0 — Disabled.
- 1 — Enabled.
Default value: 0.
Default value: 1.
## input_format_with_names_use_header {#input_format_with_names_use_header}

View File

@ -155,7 +155,7 @@ Example of configuration for versions earlier than 22.8:
<endpoint>...</endpoint>
... s3 configuration ...
<data_cache_enabled>1</data_cache_enabled>
<data_cache_size>10000000</data_cache_size>
<data_cache_max_size>10000000</data_cache_max_size>
</s3>
</disks>
<policies>

View File

@ -7,13 +7,8 @@ title: "Geo Data Types"
ClickHouse supports data types for representing geographical objects — locations, lands, etc.
:::warning
Currently geo data types are an experimental feature. To work with them you must set `allow_experimental_geo_types = 1`.
:::
**See Also**
- [Representing simple geographical features](https://en.wikipedia.org/wiki/GeoJSON).
- [allow_experimental_geo_types](../../operations/settings/settings.md#allow-experimental-geo-types) setting.
## Point
@ -24,7 +19,6 @@ Currently geo data types are an experimental feature. To work with them you must
Query:
```sql
SET allow_experimental_geo_types = 1;
CREATE TABLE geo_point (p Point) ENGINE = Memory();
INSERT INTO geo_point VALUES((10, 10));
SELECT p, toTypeName(p) FROM geo_point;
@ -46,7 +40,6 @@ Result:
Query:
```sql
SET allow_experimental_geo_types = 1;
CREATE TABLE geo_ring (r Ring) ENGINE = Memory();
INSERT INTO geo_ring VALUES([(0, 0), (10, 0), (10, 10), (0, 10)]);
SELECT r, toTypeName(r) FROM geo_ring;
@ -68,7 +61,6 @@ Result:
This is a polygon with one hole:
```sql
SET allow_experimental_geo_types = 1;
CREATE TABLE geo_polygon (pg Polygon) ENGINE = Memory();
INSERT INTO geo_polygon VALUES([[(20, 20), (50, 20), (50, 50), (20, 50)], [(30, 30), (50, 50), (50, 30)]]);
SELECT pg, toTypeName(pg) FROM geo_polygon;
@ -91,7 +83,6 @@ Result:
This multipolygon consists of two separate polygons — the first one without holes, and the second with one hole:
```sql
SET allow_experimental_geo_types = 1;
CREATE TABLE geo_multipolygon (mpg MultiPolygon) ENGINE = Memory();
INSERT INTO geo_multipolygon VALUES([[[(0, 0), (10, 0), (10, 10), (0, 10)]], [[(20, 20), (50, 20), (50, 50), (20, 50)],[(30, 30), (50, 50), (50, 30)]]]);
SELECT mpg, toTypeName(mpg) FROM geo_multipolygon;

View File

@ -13,5 +13,5 @@ Syntax:
``` sql
ALTER ROLE [IF EXISTS] name1 [ON CLUSTER cluster_name1] [RENAME TO new_name1]
[, name2 [ON CLUSTER cluster_name2] [RENAME TO new_name2] ...]
[SETTINGS variable [= value] [MIN [=] min_value] [MAX [=] max_value] [READONLY|WRITABLE] | PROFILE 'profile_name'] [,...]
[SETTINGS variable [= value] [MIN [=] min_value] [MAX [=] max_value] [CONST|READONLY|WRITABLE|CHANGEABLE_IN_READONLY] | PROFILE 'profile_name'] [,...]
```

View File

@ -13,5 +13,5 @@ Syntax:
``` sql
ALTER SETTINGS PROFILE [IF EXISTS] TO name1 [ON CLUSTER cluster_name1] [RENAME TO new_name1]
[, name2 [ON CLUSTER cluster_name2] [RENAME TO new_name2] ...]
[SETTINGS variable [= value] [MIN [=] min_value] [MAX [=] max_value] [READONLY|WRITABLE] | INHERIT 'profile_name'] [,...]
[SETTINGS variable [= value] [MIN [=] min_value] [MAX [=] max_value] [CONST|READONLY|WRITABLE|CHANGEABLE_IN_READONLY] | INHERIT 'profile_name'] [,...]
```

View File

@ -11,7 +11,7 @@ Syntax:
``` sql
CREATE ROLE [IF NOT EXISTS | OR REPLACE] name1 [, name2 ...]
[SETTINGS variable [= value] [MIN [=] min_value] [MAX [=] max_value] [READONLY|WRITABLE] | PROFILE 'profile_name'] [,...]
[SETTINGS variable [= value] [MIN [=] min_value] [MAX [=] max_value] [CONST|READONLY|WRITABLE|CHANGEABLE_IN_READONLY] | PROFILE 'profile_name'] [,...]
```
## Managing Roles

View File

@ -12,7 +12,7 @@ Syntax:
``` sql
CREATE SETTINGS PROFILE [IF NOT EXISTS | OR REPLACE] TO name1 [ON CLUSTER cluster_name1]
[, name2 [ON CLUSTER cluster_name2] ...]
[SETTINGS variable [= value] [MIN [=] min_value] [MAX [=] max_value] [READONLY|WRITABLE] | INHERIT 'profile_name'] [,...]
[SETTINGS variable [= value] [MIN [=] min_value] [MAX [=] max_value] [CONST|READONLY|WRITABLE|CHANGEABLE_IN_READONLY] | INHERIT 'profile_name'] [,...]
```
`ON CLUSTER` clause allows creating settings profiles on a cluster, see [Distributed DDL](../../../sql-reference/distributed-ddl.md).

View File

@ -13,6 +13,6 @@ sidebar_label: ROLE
``` sql
ALTER ROLE [IF EXISTS] name1 [ON CLUSTER cluster_name1] [RENAME TO new_name1]
[, name2 [ON CLUSTER cluster_name2] [RENAME TO new_name2] ...]
[SETTINGS variable [= value] [MIN [=] min_value] [MAX [=] max_value] [READONLY|WRITABLE] | PROFILE 'profile_name'] [,...]
[SETTINGS variable [= value] [MIN [=] min_value] [MAX [=] max_value] [CONST|READONLY|WRITABLE|CHANGEABLE_IN_READONLY] | PROFILE 'profile_name'] [,...]
```

View File

@ -13,6 +13,6 @@ sidebar_label: SETTINGS PROFILE
``` sql
ALTER SETTINGS PROFILE [IF EXISTS] TO name1 [ON CLUSTER cluster_name1] [RENAME TO new_name1]
[, name2 [ON CLUSTER cluster_name2] [RENAME TO new_name2] ...]
[SETTINGS variable [= value] [MIN [=] min_value] [MAX [=] max_value] [READONLY|WRITABLE] | INHERIT 'profile_name'] [,...]
[SETTINGS variable [= value] [MIN [=] min_value] [MAX [=] max_value] [CONST|READONLY|WRITABLE|CHANGEABLE_IN_READONLY] | INHERIT 'profile_name'] [,...]
```

View File

@ -12,7 +12,7 @@ sidebar_label: "Роль"
```sql
CREATE ROLE [IF NOT EXISTS | OR REPLACE] name1 [, name2 ...]
[SETTINGS variable [= value] [MIN [=] min_value] [MAX [=] max_value] [READONLY|WRITABLE] | PROFILE 'profile_name'] [,...]
[SETTINGS variable [= value] [MIN [=] min_value] [MAX [=] max_value] [CONST|READONLY|WRITABLE|CHANGEABLE_IN_READONLY] | PROFILE 'profile_name'] [,...]
```
## Управление ролями {#managing-roles}

View File

@ -13,7 +13,7 @@ sidebar_label: "Профиль настроек"
``` sql
CREATE SETTINGS PROFILE [IF NOT EXISTS | OR REPLACE] TO name1 [ON CLUSTER cluster_name1]
[, name2 [ON CLUSTER cluster_name2] ...]
[SETTINGS variable [= value] [MIN [=] min_value] [MAX [=] max_value] [READONLY|WRITABLE] | INHERIT 'profile_name'] [,...]
[SETTINGS variable [= value] [MIN [=] min_value] [MAX [=] max_value] [CONST|READONLY|WRITABLE|CHANGEABLE_IN_READONLY] | INHERIT 'profile_name'] [,...]
```
Секция `ON CLUSTER` позволяет создавать профили на кластере, см. [Распределенные DDL запросы](../../../sql-reference/distributed-ddl.md).

View File

@ -500,7 +500,7 @@ ALTER USER [IF EXISTS] name [ON CLUSTER cluster_name]
[IDENTIFIED [WITH {PLAINTEXT_PASSWORD|SHA256_PASSWORD|DOUBLE_SHA1_PASSWORD}] BY {'password'|'hash'}]
[[ADD|DROP] HOST {LOCAL | NAME 'name' | REGEXP 'name_regexp' | IP 'address' | LIKE 'pattern'} [,...] | ANY | NONE]
[DEFAULT ROLE role [,...] | ALL | ALL EXCEPT role [,...] ]
[SETTINGS variable [= value] [MIN [=] min_value] [MAX [=] max_value] [READONLY|WRITABLE] | PROFILE 'profile_name'] [,...]
[SETTINGS variable [= value] [MIN [=] min_value] [MAX [=] max_value] [CONST|READONLY|WRITABLE|CHANGEABLE_IN_READONLY] | PROFILE 'profile_name'] [,...]
```
### 说明 {#alter-user-dscr}
@ -540,7 +540,7 @@ ALTER USER user DEFAULT ROLE ALL EXCEPT role1, role2
``` sql
ALTER ROLE [IF EXISTS] name [ON CLUSTER cluster_name]
[RENAME TO new_name]
[SETTINGS variable [= value] [MIN [=] min_value] [MAX [=] max_value] [READONLY|WRITABLE] | PROFILE 'profile_name'] [,...]
[SETTINGS variable [= value] [MIN [=] min_value] [MAX [=] max_value] [CONST|READONLY|WRITABLE|CHANGEABLE_IN_READONLY] | PROFILE 'profile_name'] [,...]
```
## 修改row policy {#alter-row-policy-statement}
@ -584,7 +584,7 @@ ALTER QUOTA [IF EXISTS] name [ON CLUSTER cluster_name]
``` sql
ALTER SETTINGS PROFILE [IF EXISTS] name [ON CLUSTER cluster_name]
[RENAME TO new_name]
[SETTINGS variable [= value] [MIN [=] min_value] [MAX [=] max_value] [READONLY|WRITABLE] | INHERIT 'profile_name'] [,...]
[SETTINGS variable [= value] [MIN [=] min_value] [MAX [=] max_value] [CONST|READONLY|WRITABLE|CHANGEABLE_IN_READONLY] | INHERIT 'profile_name'] [,...]
```
[Original article](https://clickhouse.com/docs/en/query_language/alter/) <!--hide-->

View File

@ -13,5 +13,5 @@ sidebar_label: 角色
``` sql
ALTER ROLE [IF EXISTS] name1 [ON CLUSTER cluster_name1] [RENAME TO new_name1]
[, name2 [ON CLUSTER cluster_name2] [RENAME TO new_name2] ...]
[SETTINGS variable [= value] [MIN [=] min_value] [MAX [=] max_value] [READONLY|WRITABLE] | PROFILE 'profile_name'] [,...]
[SETTINGS variable [= value] [MIN [=] min_value] [MAX [=] max_value] [CONST|READONLY|WRITABLE|CHANGEABLE_IN_READONLY] | PROFILE 'profile_name'] [,...]
```

View File

@ -13,5 +13,5 @@ sidebar_label: 配置文件设置
``` sql
ALTER SETTINGS PROFILE [IF EXISTS] TO name1 [ON CLUSTER cluster_name1] [RENAME TO new_name1]
[, name2 [ON CLUSTER cluster_name2] [RENAME TO new_name2] ...]
[SETTINGS variable [= value] [MIN [=] min_value] [MAX [=] max_value] [READONLY|WRITABLE] | INHERIT 'profile_name'] [,...]
[SETTINGS variable [= value] [MIN [=] min_value] [MAX [=] max_value] [CONST|READONLY|WRITABLE|CHANGEABLE_IN_READONLY] | INHERIT 'profile_name'] [,...]
```

View File

@ -30,6 +30,10 @@ overrides:
depends:
- clickhouse-common-static = ${CLICKHOUSE_VERSION_STRING}
deb:
fields:
Source: clickhouse
contents:
- src: root/etc/clickhouse-client/config.xml
dst: /etc/clickhouse-client/config.xml

View File

@ -20,6 +20,10 @@ description: |
debugging symbols for clickhouse-common-static
This package contains the debugging symbols for clickhouse-common.
deb:
fields:
Source: clickhouse
contents:
- src: root/usr/lib/debug/usr/bin/clickhouse.debug
dst: /usr/lib/debug/usr/bin/clickhouse.debug

View File

@ -26,6 +26,10 @@ description: |
that allows generating analytical data reports in real time.
This package provides common files for both clickhouse server and client
deb:
fields:
Source: clickhouse
contents:
- src: root/usr/bin/clickhouse
dst: /usr/bin/clickhouse

View File

@ -14,6 +14,10 @@ description: |
debugging symbols for clickhouse-keeper
This package contains the debugging symbols for clickhouse-keeper.
deb:
fields:
Source: clickhouse
contents:
- src: root/usr/lib/debug/usr/bin/clickhouse-keeper.debug
dst: /usr/lib/debug/usr/bin/clickhouse-keeper.debug

View File

@ -22,6 +22,9 @@ description: |
Static clickhouse-keeper binary
A stand-alone clickhouse-keeper package
deb:
fields:
Source: clickhouse
contents:
- src: root/etc/clickhouse-keeper

View File

@ -37,6 +37,10 @@ overrides:
depends:
- clickhouse-common-static = ${CLICKHOUSE_VERSION_STRING}
deb:
fields:
Source: clickhouse
contents:
- src: root/etc/clickhouse-server
dst: /etc/clickhouse-server

View File

@ -622,9 +622,13 @@ void LocalServer::processConfig()
attachSystemTablesLocal(global_context, *createMemoryDatabaseIfNotExists(global_context, DatabaseCatalog::SYSTEM_DATABASE));
attachInformationSchema(global_context, *createMemoryDatabaseIfNotExists(global_context, DatabaseCatalog::INFORMATION_SCHEMA));
attachInformationSchema(global_context, *createMemoryDatabaseIfNotExists(global_context, DatabaseCatalog::INFORMATION_SCHEMA_UPPERCASE));
loadMetadata(global_context);
startupSystemTables();
DatabaseCatalog::instance().loadDatabases();
if (!config().has("only-system-tables"))
{
loadMetadata(global_context);
DatabaseCatalog::instance().loadDatabases();
}
LOG_DEBUG(log, "Loaded metadata.");
}
@ -715,6 +719,7 @@ void LocalServer::addOptions(OptionsDescription & options_description)
("no-system-tables", "do not attach system tables (better startup time)")
("path", po::value<std::string>(), "Storage path")
("only-system-tables", "attach only system tables from specified path")
("top_level_domains_path", po::value<std::string>(), "Path to lists with custom TLDs")
;
}
@ -743,6 +748,8 @@ void LocalServer::processOptions(const OptionsDescription &, const CommandLineOp
config().setString("table-structure", options["structure"].as<std::string>());
if (options.count("no-system-tables"))
config().setBool("no-system-tables", true);
if (options.count("only-system-tables"))
config().setBool("only-system-tables", true);
if (options.count("input-format"))
config().setString("table-data-format", options["input-format"].as<std::string>());

View File

@ -1381,7 +1381,7 @@ int Server::main(const std::vector<std::string> & /*args*/)
global_context->setConfigReloadCallback([&]()
{
main_config_reloader->reload();
access_control.reload();
access_control.reload(AccessControl::ReloadMode::USERS_CONFIG_ONLY);
});
/// Limit on total number of concurrently executed queries.
@ -1474,6 +1474,23 @@ int Server::main(const std::vector<std::string> & /*args*/)
/// try set up encryption. There are some errors in config, error will be printed and server wouldn't start.
CompressionCodecEncrypted::Configuration::instance().load(config(), "encryption_codecs");
std::unique_ptr<DNSCacheUpdater> dns_cache_updater;
if (config().has("disable_internal_dns_cache") && config().getInt("disable_internal_dns_cache"))
{
/// Disable DNS caching at all
DNSResolver::instance().setDisableCacheFlag();
LOG_DEBUG(log, "DNS caching disabled");
}
else
{
/// Initialize a watcher periodically updating DNS cache
dns_cache_updater = std::make_unique<DNSCacheUpdater>(
global_context, config().getInt("dns_cache_update_period", 15), config().getUInt("dns_max_consecutive_failures", 5));
}
if (dns_cache_updater)
dns_cache_updater->start();
SCOPE_EXIT({
/// Stop reloading of the main config. This must be done before `global_context->shutdown()` because
/// otherwise the reloading may pass a changed config to some destroyed parts of ContextSharedPart.
@ -1627,20 +1644,6 @@ int Server::main(const std::vector<std::string> & /*args*/)
LOG_INFO(log, "Query Profiler and TraceCollector are disabled because they require PHDR cache to be created"
" (otherwise the function 'dl_iterate_phdr' is not lock free and not async-signal safe).");
std::unique_ptr<DNSCacheUpdater> dns_cache_updater;
if (config().has("disable_internal_dns_cache") && config().getInt("disable_internal_dns_cache"))
{
/// Disable DNS caching at all
DNSResolver::instance().setDisableCacheFlag();
LOG_DEBUG(log, "DNS caching disabled");
}
else
{
/// Initialize a watcher periodically updating DNS cache
dns_cache_updater = std::make_unique<DNSCacheUpdater>(
global_context, config().getInt("dns_cache_update_period", 15), config().getUInt("dns_max_consecutive_failures", 5));
}
#if defined(OS_LINUX)
auto tasks_stats_provider = TasksStatsCounters::findBestAvailableProvider();
if (tasks_stats_provider == TasksStatsCounters::MetricsProvider::None)
@ -1705,8 +1708,6 @@ int Server::main(const std::vector<std::string> & /*args*/)
main_config_reloader->start();
access_control.startPeriodicReloading();
if (dns_cache_updater)
dns_cache_updater->start();
{
LOG_INFO(log, "Available RAM: {}; physical cores: {}; logical cores: {}.",

View File

@ -65,7 +65,7 @@
For example, as below:
{"date_time":"1650918987.180175","thread_name":"#1","thread_id":"254545","level":"Trace","query_id":"","logger_name":"BaseDaemon","message":"Received signal 2","source_file":"../base/daemon/BaseDaemon.cpp; virtual void SignalListener::run()","source_line":"192"}
To enable JSON logging support, please uncomment the entire <formatting> tag below.
a) You can modify key names by changing values under tag values inside <names> tag.
For example, to change DATE_TIME to MY_DATE_TIME, you can do like:
<date_time>MY_DATE_TIME</date_time>
@ -661,6 +661,13 @@
executed by any user. You can change this behaviour by setting this to true.
If it's set to true then this query requires "GRANT SELECT ON information_schema.<table>" just like as for ordinary tables. -->
<select_from_information_schema_requires_grant>false</select_from_information_schema_requires_grant>
<!-- By default, for backward compatibility a settings profile constraint for a specific setting inherit every not set field from
previous profile. You can change this behaviour by setting this to true.
If it's set to true then if settings profile has a constraint for a specific setting, then this constraint completely cancels all
actions of previous constraint (defined in other profiles) for the same specific setting, including fields that are not set by new constraint.
It also enables 'changeable_in_readonly' constraint type -->
<settings_constraints_replace_previous>false</settings_constraints_replace_previous>
</access_control_improvements>
<!-- Default profile of settings. -->

View File

@ -171,6 +171,7 @@ void AccessControl::setUpFromMainConfig(const Poco::Util::AbstractConfiguration
setOnClusterQueriesRequireClusterGrant(config_.getBool("access_control_improvements.on_cluster_queries_require_cluster_grant", false));
setSelectFromSystemDatabaseRequiresGrant(config_.getBool("access_control_improvements.select_from_system_db_requires_grant", false));
setSelectFromInformationSchemaRequiresGrant(config_.getBool("access_control_improvements.select_from_information_schema_requires_grant", false));
setSettingsConstraintsReplacePrevious(config_.getBool("access_control_improvements.settings_constraints_replace_previous", false));
addStoragesFromMainConfig(config_, config_path_, get_zookeeper_function_);
}
@ -390,9 +391,9 @@ void AccessControl::addStoragesFromMainConfig(
}
void AccessControl::reload()
void AccessControl::reload(ReloadMode reload_mode)
{
MultipleAccessStorage::reload();
MultipleAccessStorage::reload(reload_mode);
changes_notifier->sendNotifications();
}

View File

@ -99,8 +99,8 @@ public:
const String & config_path,
const zkutil::GetZooKeeper & get_zookeeper_function);
/// Reloads and updates entities in this storage. This function is used to implement SYSTEM RELOAD CONFIG.
void reload() override;
/// Reloads and updates all access entities.
void reload(ReloadMode reload_mode) override;
using OnChangedHandler = std::function<void(const UUID & /* id */, const AccessEntityPtr & /* new or changed entity, null if removed */)>;
@ -158,6 +158,9 @@ public:
void setSelectFromInformationSchemaRequiresGrant(bool enable) { select_from_information_schema_requires_grant = enable; }
bool doesSelectFromInformationSchemaRequireGrant() const { return select_from_information_schema_requires_grant; }
void setSettingsConstraintsReplacePrevious(bool enable) { settings_constraints_replace_previous = enable; }
bool doesSettingsConstraintsReplacePrevious() const { return settings_constraints_replace_previous; }
std::shared_ptr<const ContextAccess> getContextAccess(
const UUID & user_id,
const std::vector<UUID> & current_roles,
@ -223,6 +226,7 @@ private:
std::atomic_bool on_cluster_queries_require_cluster_grant = false;
std::atomic_bool select_from_system_db_requires_grant = false;
std::atomic_bool select_from_information_schema_requires_grant = false;
std::atomic_bool settings_constraints_replace_previous = false;
};
}

View File

@ -143,6 +143,7 @@ enum class AccessType
M(SYSTEM_DROP_SCHEMA_CACHE, "SYSTEM DROP SCHEMA CACHE, DROP SCHEMA CACHE", GLOBAL, SYSTEM_DROP_CACHE) \
M(SYSTEM_DROP_CACHE, "DROP CACHE", GROUP, SYSTEM) \
M(SYSTEM_RELOAD_CONFIG, "RELOAD CONFIG", GLOBAL, SYSTEM_RELOAD) \
M(SYSTEM_RELOAD_USERS, "RELOAD USERS", GLOBAL, SYSTEM_RELOAD) \
M(SYSTEM_RELOAD_SYMBOLS, "RELOAD SYMBOLS", GLOBAL, SYSTEM_RELOAD) \
M(SYSTEM_RELOAD_DICTIONARY, "SYSTEM RELOAD DICTIONARIES, RELOAD DICTIONARY, RELOAD DICTIONARIES", GLOBAL, SYSTEM_RELOAD) \
M(SYSTEM_RELOAD_MODEL, "SYSTEM RELOAD MODELS, RELOAD MODEL, RELOAD MODELS", GLOBAL, SYSTEM_RELOAD) \

View File

@ -15,6 +15,7 @@
#include <Poco/JSON/Stringifier.h>
#include <boost/algorithm/string/case_conv.hpp>
#include <boost/range/adaptor/map.hpp>
#include <boost/range/algorithm/copy.hpp>
#include <base/range.h>
#include <filesystem>
#include <fstream>
@ -182,8 +183,8 @@ DiskAccessStorage::DiskAccessStorage(const String & storage_name_, const String
if (should_rebuild_lists)
{
rebuildLists();
writeLists();
LOG_WARNING(getLogger(), "Recovering lists in directory {}", directory_path);
reloadAllAndRebuildLists();
}
}
@ -224,63 +225,57 @@ bool DiskAccessStorage::isPathEqual(const String & directory_path_) const
}
void DiskAccessStorage::clear()
{
entries_by_id.clear();
for (auto type : collections::range(AccessEntityType::MAX))
entries_by_name_and_type[static_cast<size_t>(type)].clear();
}
bool DiskAccessStorage::readLists()
{
clear();
std::vector<std::tuple<UUID, String, AccessEntityType>> ids_names_types;
bool ok = true;
for (auto type : collections::range(AccessEntityType::MAX))
{
auto & entries_by_name = entries_by_name_and_type[static_cast<size_t>(type)];
auto file_path = getListFilePath(directory_path, type);
if (!std::filesystem::exists(file_path))
{
LOG_WARNING(getLogger(), "File {} doesn't exist", file_path);
ok = false;
break;
return false;
}
try
{
for (const auto & [id, name] : readListFile(file_path))
{
auto & entry = entries_by_id[id];
entry.id = id;
entry.type = type;
entry.name = name;
entries_by_name[entry.name] = &entry;
}
ids_names_types.emplace_back(id, name, type);
}
catch (...)
{
tryLogCurrentException(getLogger(), "Could not read " + file_path);
ok = false;
break;
return false;
}
}
if (!ok)
clear();
return ok;
entries_by_id.clear();
for (auto type : collections::range(AccessEntityType::MAX))
entries_by_name_and_type[static_cast<size_t>(type)].clear();
for (auto & [id, name, type] : ids_names_types)
{
auto & entry = entries_by_id[id];
entry.id = id;
entry.type = type;
entry.name = std::move(name);
auto & entries_by_name = entries_by_name_and_type[static_cast<size_t>(type)];
entries_by_name[entry.name] = &entry;
}
return true;
}
bool DiskAccessStorage::writeLists()
void DiskAccessStorage::writeLists()
{
if (failed_to_write_lists)
return false; /// We don't try to write list files after the first fail.
/// The next restart of the server will invoke rebuilding of the list files.
return; /// We don't try to write list files after the first fail.
/// The next restart of the server will invoke rebuilding of the list files.
if (types_of_lists_to_write.empty())
return true;
return;
for (const auto & type : types_of_lists_to_write)
{
@ -299,14 +294,13 @@ bool DiskAccessStorage::writeLists()
tryLogCurrentException(getLogger(), "Could not write " + file_path);
failed_to_write_lists = true;
types_of_lists_to_write.clear();
return false;
return;
}
}
/// The list files was successfully written, we don't need the 'need_rebuild_lists.mark' file any longer.
std::filesystem::remove(getNeedRebuildListsMarkFilePath(directory_path));
types_of_lists_to_write.clear();
return true;
}
@ -364,10 +358,9 @@ void DiskAccessStorage::stopListsWritingThread()
/// Reads and parses all the "<id>.sql" files from a specified directory
/// and then saves the files "users.list", "roles.list", etc. to the same directory.
bool DiskAccessStorage::rebuildLists()
void DiskAccessStorage::reloadAllAndRebuildLists()
{
LOG_WARNING(getLogger(), "Recovering lists in directory {}", directory_path);
clear();
std::vector<std::pair<UUID, AccessEntityPtr>> all_entities;
for (const auto & directory_entry : std::filesystem::directory_iterator(directory_path))
{
@ -386,21 +379,55 @@ bool DiskAccessStorage::rebuildLists()
if (!entity)
continue;
const String & name = entity->getName();
auto type = entity->getType();
auto & entry = entries_by_id[id];
entry.id = id;
entry.type = type;
entry.name = name;
entry.entity = entity;
auto & entries_by_name = entries_by_name_and_type[static_cast<size_t>(type)];
entries_by_name[entry.name] = &entry;
all_entities.emplace_back(id, entity);
}
setAllInMemory(all_entities);
for (auto type : collections::range(AccessEntityType::MAX))
types_of_lists_to_write.insert(type);
return true;
failed_to_write_lists = false; /// Try again writing lists.
writeLists();
}
void DiskAccessStorage::setAllInMemory(const std::vector<std::pair<UUID, AccessEntityPtr>> & all_entities)
{
/// Remove conflicting entities from the specified list.
auto entities_without_conflicts = all_entities;
clearConflictsInEntitiesList(entities_without_conflicts, getLogger());
/// Remove entities which are not used anymore.
boost::container::flat_set<UUID> ids_to_keep;
ids_to_keep.reserve(entities_without_conflicts.size());
for (const auto & [id, _] : entities_without_conflicts)
ids_to_keep.insert(id);
removeAllExceptInMemory(ids_to_keep);
/// Insert or update entities.
for (const auto & [id, entity] : entities_without_conflicts)
insertNoLock(id, entity, /* replace_if_exists = */ true, /* throw_if_exists = */ false, /* write_on_disk= */ false);
}
void DiskAccessStorage::removeAllExceptInMemory(const boost::container::flat_set<UUID> & ids_to_keep)
{
for (auto it = entries_by_id.begin(); it != entries_by_id.end();)
{
const auto & id = it->first;
++it; /// We must go to the next element in the map `entries_by_id` here because otherwise removeNoLock() can invalidate our iterator.
if (!ids_to_keep.contains(id))
removeNoLock(id, /* throw_if_not_exists */ true, /* write_on_disk= */ false);
}
}
void DiskAccessStorage::reload(ReloadMode reload_mode)
{
if (reload_mode != ReloadMode::ALL)
return;
std::lock_guard lock{mutex};
reloadAllAndRebuildLists();
}
@ -471,21 +498,21 @@ std::optional<std::pair<String, AccessEntityType>> DiskAccessStorage::readNameWi
std::optional<UUID> DiskAccessStorage::insertImpl(const AccessEntityPtr & new_entity, bool replace_if_exists, bool throw_if_exists)
{
UUID id = generateRandomID();
if (insertWithID(id, new_entity, replace_if_exists, throw_if_exists))
if (insertWithID(id, new_entity, replace_if_exists, throw_if_exists, /* write_on_disk= */ true))
return id;
return std::nullopt;
}
bool DiskAccessStorage::insertWithID(const UUID & id, const AccessEntityPtr & new_entity, bool replace_if_exists, bool throw_if_exists)
bool DiskAccessStorage::insertWithID(const UUID & id, const AccessEntityPtr & new_entity, bool replace_if_exists, bool throw_if_exists, bool write_on_disk)
{
std::lock_guard lock{mutex};
return insertNoLock(id, new_entity, replace_if_exists, throw_if_exists);
return insertNoLock(id, new_entity, replace_if_exists, throw_if_exists, write_on_disk);
}
bool DiskAccessStorage::insertNoLock(const UUID & id, const AccessEntityPtr & new_entity, bool replace_if_exists, bool throw_if_exists)
bool DiskAccessStorage::insertNoLock(const UUID & id, const AccessEntityPtr & new_entity, bool replace_if_exists, bool throw_if_exists, bool write_on_disk)
{
const String & name = new_entity->getName();
AccessEntityType type = new_entity->getType();
@ -497,6 +524,9 @@ bool DiskAccessStorage::insertNoLock(const UUID & id, const AccessEntityPtr & ne
auto & entries_by_name = entries_by_name_and_type[static_cast<size_t>(type)];
auto it_by_name = entries_by_name.find(name);
bool name_collision = (it_by_name != entries_by_name.end());
UUID id_by_name;
if (name_collision)
id_by_name = it_by_name->second->id;
if (name_collision && !replace_if_exists)
{
@ -507,19 +537,57 @@ bool DiskAccessStorage::insertNoLock(const UUID & id, const AccessEntityPtr & ne
}
auto it_by_id = entries_by_id.find(id);
if (it_by_id != entries_by_id.end())
bool id_collision = (it_by_id != entries_by_id.end());
if (id_collision && !replace_if_exists)
{
const auto & existing_entry = it_by_id->second;
throwIDCollisionCannotInsert(id, type, name, existing_entry.entity->getType(), existing_entry.entity->getName());
if (throw_if_exists)
{
const auto & existing_entry = it_by_id->second;
throwIDCollisionCannotInsert(id, type, name, existing_entry.type, existing_entry.name);
}
else
return false;
}
scheduleWriteLists(type);
writeAccessEntityToDisk(id, *new_entity);
if (write_on_disk)
scheduleWriteLists(type);
if (name_collision && replace_if_exists)
removeNoLock(it_by_name->second->id, /* throw_if_not_exists = */ false);
/// Remove collisions if necessary.
if (name_collision && (id_by_name != id))
{
assert(replace_if_exists);
removeNoLock(id_by_name, /* throw_if_not_exists= */ false, write_on_disk);
}
if (id_collision)
{
assert(replace_if_exists);
auto & existing_entry = it_by_id->second;
if (existing_entry.type == new_entity->getType())
{
if (!existing_entry.entity || (*existing_entry.entity != *new_entity))
{
if (write_on_disk)
writeAccessEntityToDisk(id, *new_entity);
if (existing_entry.name != new_entity->getName())
{
entries_by_name.erase(existing_entry.name);
[[maybe_unused]] bool inserted = entries_by_name.emplace(new_entity->getName(), &existing_entry).second;
assert(inserted);
}
existing_entry.entity = new_entity;
changes_notifier.onEntityUpdated(id, new_entity);
}
return true;
}
removeNoLock(id, /* throw_if_not_exists= */ false, write_on_disk);
}
/// Do insertion.
if (write_on_disk)
writeAccessEntityToDisk(id, *new_entity);
auto & entry = entries_by_id[id];
entry.id = id;
entry.type = type;
@ -535,11 +603,11 @@ bool DiskAccessStorage::insertNoLock(const UUID & id, const AccessEntityPtr & ne
bool DiskAccessStorage::removeImpl(const UUID & id, bool throw_if_not_exists)
{
std::lock_guard lock{mutex};
return removeNoLock(id, throw_if_not_exists);
return removeNoLock(id, throw_if_not_exists, /* write_on_disk= */ true);
}
bool DiskAccessStorage::removeNoLock(const UUID & id, bool throw_if_not_exists)
bool DiskAccessStorage::removeNoLock(const UUID & id, bool throw_if_not_exists, bool write_on_disk)
{
auto it = entries_by_id.find(id);
if (it == entries_by_id.end())
@ -556,8 +624,11 @@ bool DiskAccessStorage::removeNoLock(const UUID & id, bool throw_if_not_exists)
if (readonly)
throwReadonlyCannotRemove(type, entry.name);
scheduleWriteLists(type);
deleteAccessEntityOnDisk(id);
if (write_on_disk)
{
scheduleWriteLists(type);
deleteAccessEntityOnDisk(id);
}
/// Do removing.
UUID removed_id = id;
@ -573,11 +644,11 @@ bool DiskAccessStorage::removeNoLock(const UUID & id, bool throw_if_not_exists)
bool DiskAccessStorage::updateImpl(const UUID & id, const UpdateFunc & update_func, bool throw_if_not_exists)
{
std::lock_guard lock{mutex};
return updateNoLock(id, update_func, throw_if_not_exists);
return updateNoLock(id, update_func, throw_if_not_exists, /* write_on_disk= */ true);
}
bool DiskAccessStorage::updateNoLock(const UUID & id, const UpdateFunc & update_func, bool throw_if_not_exists)
bool DiskAccessStorage::updateNoLock(const UUID & id, const UpdateFunc & update_func, bool throw_if_not_exists, bool write_on_disk)
{
auto it = entries_by_id.find(id);
if (it == entries_by_id.end())
@ -613,10 +684,13 @@ bool DiskAccessStorage::updateNoLock(const UUID & id, const UpdateFunc & update_
{
if (entries_by_name.contains(new_name))
throwNameCollisionCannotRename(type, old_name, new_name);
scheduleWriteLists(type);
if (write_on_disk)
scheduleWriteLists(type);
}
writeAccessEntityToDisk(id, *new_entity);
if (write_on_disk)
writeAccessEntityToDisk(id, *new_entity);
entry.entity = new_entity;
if (name_changed)
@ -668,7 +742,7 @@ void DiskAccessStorage::restoreFromBackup(RestorerFromBackup & restorer)
restorer.addDataRestoreTask([this, entities = std::move(entities), replace_if_exists, throw_if_exists]
{
for (const auto & [id, entity] : entities)
insertWithID(id, entity, replace_if_exists, throw_if_exists);
insertWithID(id, entity, replace_if_exists, throw_if_exists, /* write_on_disk= */ true);
});
}

View File

@ -27,6 +27,8 @@ public:
void setReadOnly(bool readonly_) { readonly = readonly_; }
bool isReadOnly() const override { return readonly; }
void reload(ReloadMode reload_mode) override;
bool exists(const UUID & id) const override;
bool isBackupAllowed() const override { return backup_allowed; }
@ -41,19 +43,20 @@ private:
bool removeImpl(const UUID & id, bool throw_if_not_exists) override;
bool updateImpl(const UUID & id, const UpdateFunc & update_func, bool throw_if_not_exists) override;
void clear();
bool readLists();
bool writeLists();
void scheduleWriteLists(AccessEntityType type);
bool rebuildLists();
bool readLists() TSA_REQUIRES(mutex);
void writeLists() TSA_REQUIRES(mutex);
void scheduleWriteLists(AccessEntityType type) TSA_REQUIRES(mutex);
void reloadAllAndRebuildLists() TSA_REQUIRES(mutex);
void setAllInMemory(const std::vector<std::pair<UUID, AccessEntityPtr>> & all_entities) TSA_REQUIRES(mutex);
void removeAllExceptInMemory(const boost::container::flat_set<UUID> & ids_to_keep) TSA_REQUIRES(mutex);
void listsWritingThreadFunc();
void listsWritingThreadFunc() TSA_NO_THREAD_SAFETY_ANALYSIS;
void stopListsWritingThread();
bool insertWithID(const UUID & id, const AccessEntityPtr & new_entity, bool replace_if_exists, bool throw_if_exists);
bool insertNoLock(const UUID & id, const AccessEntityPtr & new_entity, bool replace_if_exists, bool throw_if_exists);
bool removeNoLock(const UUID & id, bool throw_if_not_exists);
bool updateNoLock(const UUID & id, const UpdateFunc & update_func, bool throw_if_not_exists);
bool insertWithID(const UUID & id, const AccessEntityPtr & new_entity, bool replace_if_exists, bool throw_if_exists, bool write_on_disk);
bool insertNoLock(const UUID & id, const AccessEntityPtr & new_entity, bool replace_if_exists, bool throw_if_exists, bool write_on_disk) TSA_REQUIRES(mutex);
bool updateNoLock(const UUID & id, const UpdateFunc & update_func, bool throw_if_not_exists, bool write_on_disk) TSA_REQUIRES(mutex);
bool removeNoLock(const UUID & id, bool throw_if_not_exists, bool write_on_disk) TSA_REQUIRES(mutex);
AccessEntityPtr readAccessEntityFromDisk(const UUID & id) const;
void writeAccessEntityToDisk(const UUID & id, const IAccessEntity & entity) const;
@ -69,13 +72,22 @@ private:
};
String directory_path;
std::unordered_map<UUID, Entry> entries_by_id;
std::unordered_map<std::string_view, Entry *> entries_by_name_and_type[static_cast<size_t>(AccessEntityType::MAX)];
boost::container::flat_set<AccessEntityType> types_of_lists_to_write;
bool failed_to_write_lists = false; /// Whether writing of the list files has been failed since the recent restart of the server.
ThreadFromGlobalPool lists_writing_thread; /// List files are written in a separate thread.
std::condition_variable lists_writing_thread_should_exit; /// Signals `lists_writing_thread` to exit.
std::unordered_map<UUID, Entry> entries_by_id TSA_GUARDED_BY(mutex);
std::unordered_map<std::string_view, Entry *> entries_by_name_and_type[static_cast<size_t>(AccessEntityType::MAX)] TSA_GUARDED_BY(mutex);
boost::container::flat_set<AccessEntityType> types_of_lists_to_write TSA_GUARDED_BY(mutex);
/// Whether writing of the list files has been failed since the recent restart of the server.
bool failed_to_write_lists TSA_GUARDED_BY(mutex) = false;
/// List files are written in a separate thread.
ThreadFromGlobalPool lists_writing_thread;
/// Signals `lists_writing_thread` to exit.
std::condition_variable lists_writing_thread_should_exit;
bool lists_writing_thread_is_waiting = false;
AccessChangesNotifier & changes_notifier;
std::atomic<bool> readonly;
std::atomic<bool> backup_allowed;

View File

@ -13,6 +13,7 @@
#include <base/FnTraits.h>
#include <boost/algorithm/string/join.hpp>
#include <boost/algorithm/string/replace.hpp>
#include <boost/range/adaptor/reversed.hpp>
#include <boost/range/algorithm_ext/erase.hpp>
@ -562,6 +563,62 @@ UUID IAccessStorage::generateRandomID()
}
void IAccessStorage::clearConflictsInEntitiesList(std::vector<std::pair<UUID, AccessEntityPtr>> & entities, const Poco::Logger * log_)
{
std::unordered_map<UUID, size_t> positions_by_id;
std::unordered_map<std::string_view, size_t> positions_by_type_and_name[static_cast<size_t>(AccessEntityType::MAX)];
std::vector<size_t> positions_to_remove;
for (size_t pos = 0; pos != entities.size(); ++pos)
{
const auto & [id, entity] = entities[pos];
if (auto it = positions_by_id.find(id); it == positions_by_id.end())
{
positions_by_id[id] = pos;
}
else if (it->second != pos)
{
/// Conflict: same ID is used for multiple entities. We will ignore them.
positions_to_remove.emplace_back(pos);
positions_to_remove.emplace_back(it->second);
}
std::string_view entity_name = entity->getName();
auto & positions_by_name = positions_by_type_and_name[static_cast<size_t>(entity->getType())];
if (auto it = positions_by_name.find(entity_name); it == positions_by_name.end())
{
positions_by_name[entity_name] = pos;
}
else if (it->second != pos)
{
/// Conflict: same name and type are used for multiple entities. We will ignore them.
positions_to_remove.emplace_back(pos);
positions_to_remove.emplace_back(it->second);
}
}
if (positions_to_remove.empty())
return;
std::sort(positions_to_remove.begin(), positions_to_remove.end());
positions_to_remove.erase(std::unique(positions_to_remove.begin(), positions_to_remove.end()), positions_to_remove.end());
for (size_t pos : positions_to_remove)
{
LOG_WARNING(
log_,
"Skipping {} (id={}) due to conflicts with other access entities",
entities[pos].second->formatTypeWithName(),
toString(entities[pos].first));
}
/// Remove conflicting entities.
for (size_t pos : positions_to_remove | boost::adaptors::reversed) /// Must remove in reversive order.
entities.erase(entities.begin() + pos);
}
Poco::Logger * IAccessStorage::getLogger() const
{
Poco::Logger * ptr = log.load();

View File

@ -42,15 +42,26 @@ public:
/// Returns true if this entity is readonly.
virtual bool isReadOnly(const UUID &) const { return isReadOnly(); }
/// Reloads and updates entities in this storage. This function is used to implement SYSTEM RELOAD CONFIG.
virtual void reload() {}
/// Starts periodic reloading and update of entities in this storage.
/// Starts periodic reloading and updating of entities in this storage.
virtual void startPeriodicReloading() {}
/// Stops periodic reloading and update of entities in this storage.
/// Stops periodic reloading and updating of entities in this storage.
virtual void stopPeriodicReloading() {}
enum class ReloadMode
{
/// Try to reload all access storages (including users.xml, local(disk) access storage, replicated(in zk) access storage.
/// This mode is invoked by the SYSTEM RELOAD USERS command.
ALL,
/// Only reloads users.xml
/// This mode is invoked by the SYSTEM RELOAD CONFIG command.
USERS_CONFIG_ONLY,
};
/// Makes this storage to reload and update access entities right now.
virtual void reload(ReloadMode /* reload_mode */) {}
/// Returns the identifiers of all the entities of a specified type contained in the storage.
std::vector<UUID> findAll(AccessEntityType type) const;
@ -177,6 +188,7 @@ protected:
static UUID generateRandomID();
Poco::Logger * getLogger() const;
static String formatEntityTypeWithName(AccessEntityType type, const String & name) { return AccessEntityTypeInfo::get(type).formatEntityNameWithType(name); }
static void clearConflictsInEntitiesList(std::vector<std::pair<UUID, AccessEntityPtr>> & entities, const Poco::Logger * log_);
[[noreturn]] void throwNotFound(const UUID & id) const;
[[noreturn]] void throwNotFound(AccessEntityType type, const String & name) const;
[[noreturn]] static void throwBadCast(const UUID & id, AccessEntityType type, const String & name, AccessEntityType required_type);

View File

@ -5,7 +5,6 @@
#include <base/scope_guard.h>
#include <boost/container/flat_set.hpp>
#include <boost/range/adaptor/map.hpp>
#include <boost/range/algorithm/copy.hpp>
namespace DB
@ -90,6 +89,9 @@ bool MemoryAccessStorage::insertNoLock(const UUID & id, const AccessEntityPtr &
auto & entries_by_name = entries_by_name_and_type[static_cast<size_t>(type)];
auto it_by_name = entries_by_name.find(name);
bool name_collision = (it_by_name != entries_by_name.end());
UUID id_by_name;
if (name_collision)
id_by_name = it_by_name->second->id;
if (name_collision && !replace_if_exists)
{
@ -100,16 +102,43 @@ bool MemoryAccessStorage::insertNoLock(const UUID & id, const AccessEntityPtr &
}
auto it_by_id = entries_by_id.find(id);
if (it_by_id != entries_by_id.end())
bool id_collision = (it_by_id != entries_by_id.end());
if (id_collision && !replace_if_exists)
{
const auto & existing_entry = it_by_id->second;
throwIDCollisionCannotInsert(id, type, name, existing_entry.entity->getType(), existing_entry.entity->getName());
if (throw_if_exists)
throwIDCollisionCannotInsert(id, type, name, existing_entry.entity->getType(), existing_entry.entity->getName());
else
return false;
}
if (name_collision && replace_if_exists)
/// Remove collisions if necessary.
if (name_collision && (id_by_name != id))
{
const auto & existing_entry = *(it_by_name->second);
removeNoLock(existing_entry.id, /* throw_if_not_exists = */ false);
assert(replace_if_exists);
removeNoLock(id_by_name, /* throw_if_not_exists= */ true);
}
if (id_collision)
{
assert(replace_if_exists);
auto & existing_entry = it_by_id->second;
if (existing_entry.entity->getType() == new_entity->getType())
{
if (*existing_entry.entity != *new_entity)
{
if (existing_entry.entity->getName() != new_entity->getName())
{
entries_by_name.erase(existing_entry.entity->getName());
[[maybe_unused]] bool inserted = entries_by_name.emplace(new_entity->getName(), &existing_entry).second;
assert(inserted);
}
existing_entry.entity = new_entity;
changes_notifier.onEntityUpdated(id, new_entity);
}
return true;
}
removeNoLock(id, /* throw_if_not_exists= */ true);
}
/// Do insertion.
@ -201,6 +230,29 @@ bool MemoryAccessStorage::updateNoLock(const UUID & id, const UpdateFunc & updat
}
void MemoryAccessStorage::removeAllExcept(const std::vector<UUID> & ids_to_keep)
{
std::lock_guard lock{mutex};
removeAllExceptNoLock(ids_to_keep);
}
void MemoryAccessStorage::removeAllExceptNoLock(const std::vector<UUID> & ids_to_keep)
{
removeAllExceptNoLock(boost::container::flat_set<UUID>{ids_to_keep.begin(), ids_to_keep.end()});
}
void MemoryAccessStorage::removeAllExceptNoLock(const boost::container::flat_set<UUID> & ids_to_keep)
{
for (auto it = entries_by_id.begin(); it != entries_by_id.end();)
{
const auto & id = it->first;
++it; /// We must go to the next element in the map `entries_by_id` here because otherwise removeNoLock() can invalidate our iterator.
if (!ids_to_keep.contains(id))
removeNoLock(id, /* throw_if_not_exists */ true);
}
}
void MemoryAccessStorage::setAll(const std::vector<AccessEntityPtr> & all_entities)
{
std::vector<std::pair<UUID, AccessEntityPtr>> entities_with_ids;
@ -215,61 +267,20 @@ void MemoryAccessStorage::setAll(const std::vector<std::pair<UUID, AccessEntityP
{
std::lock_guard lock{mutex};
boost::container::flat_set<UUID> not_used_ids;
std::vector<UUID> conflicting_ids;
/// Remove conflicting entities from the specified list.
auto entities_without_conflicts = all_entities;
clearConflictsInEntitiesList(entities_without_conflicts, getLogger());
/// Get the list of currently used IDs. Later we will remove those of them which are not used anymore.
for (const auto & id : entries_by_id | boost::adaptors::map_keys)
not_used_ids.emplace(id);
/// Get the list of conflicting IDs and update the list of currently used ones.
for (const auto & [id, entity] : all_entities)
{
auto it = entries_by_id.find(id);
if (it != entries_by_id.end())
{
not_used_ids.erase(id); /// ID is used.
Entry & entry = it->second;
if (entry.entity->getType() != entity->getType())
conflicting_ids.emplace_back(id); /// Conflict: same ID, different type.
}
const auto & entries_by_name = entries_by_name_and_type[static_cast<size_t>(entity->getType())];
auto it2 = entries_by_name.find(entity->getName());
if (it2 != entries_by_name.end())
{
Entry & entry = *(it2->second);
if (entry.id != id)
conflicting_ids.emplace_back(entry.id); /// Conflict: same name and type, different ID.
}
}
/// Remove entities which are not used anymore and which are in conflict with new entities.
boost::container::flat_set<UUID> ids_to_remove = std::move(not_used_ids);
boost::range::copy(conflicting_ids, std::inserter(ids_to_remove, ids_to_remove.end()));
for (const auto & id : ids_to_remove)
removeNoLock(id, /* throw_if_not_exists = */ false);
/// Remove entities which are not used anymore.
boost::container::flat_set<UUID> ids_to_keep;
ids_to_keep.reserve(entities_without_conflicts.size());
for (const auto & [id, _] : entities_without_conflicts)
ids_to_keep.insert(id);
removeAllExceptNoLock(ids_to_keep);
/// Insert or update entities.
for (const auto & [id, entity] : all_entities)
{
auto it = entries_by_id.find(id);
if (it != entries_by_id.end())
{
if (*(it->second.entity) != *entity)
{
const AccessEntityPtr & changed_entity = entity;
updateNoLock(id,
[&changed_entity](const AccessEntityPtr &) { return changed_entity; },
/* throw_if_not_exists = */ true);
}
}
else
{
insertNoLock(id, entity, /* replace_if_exists = */ false, /* throw_if_exists = */ true);
}
}
for (const auto & [id, entity] : entities_without_conflicts)
insertNoLock(id, entity, /* replace_if_exists = */ true, /* throw_if_exists = */ false);
}

View File

@ -22,6 +22,15 @@ public:
const char * getStorageType() const override { return STORAGE_TYPE; }
/// Inserts an entity with a specified ID.
/// If `replace_if_exists == true` it can replace an existing entry with such ID and also remove an existing entry
/// with such name & type.
bool insertWithID(const UUID & id, const AccessEntityPtr & new_entity, bool replace_if_exists, bool throw_if_exists);
/// Removes all entities except the specified list `ids_to_keep`.
/// The function skips IDs not contained in the storage.
void removeAllExcept(const std::vector<UUID> & ids_to_keep);
/// Sets all entities at once.
void setAll(const std::vector<AccessEntityPtr> & all_entities);
void setAll(const std::vector<std::pair<UUID, AccessEntityPtr>> & all_entities);
@ -39,11 +48,13 @@ private:
bool removeImpl(const UUID & id, bool throw_if_not_exists) override;
bool updateImpl(const UUID & id, const UpdateFunc & update_func, bool throw_if_not_exists) override;
bool insertWithID(const UUID & id, const AccessEntityPtr & new_entity, bool replace_if_exists, bool throw_if_exists);
bool insertNoLock(const UUID & id, const AccessEntityPtr & entity, bool replace_if_exists, bool throw_if_exists) TSA_REQUIRES(mutex);
bool removeNoLock(const UUID & id, bool throw_if_not_exists) TSA_REQUIRES(mutex);
bool updateNoLock(const UUID & id, const UpdateFunc & update_func, bool throw_if_not_exists) TSA_REQUIRES(mutex);
void removeAllExceptNoLock(const std::vector<UUID> & ids_to_keep) TSA_REQUIRES(mutex);
void removeAllExceptNoLock(const boost::container::flat_set<UUID> & ids_to_keep) TSA_REQUIRES(mutex);
struct Entry
{
UUID id;
@ -54,6 +65,6 @@ private:
std::unordered_map<UUID, Entry> entries_by_id TSA_GUARDED_BY(mutex); /// We want to search entries both by ID and by the pair of name and type.
std::unordered_map<String, Entry *> entries_by_name_and_type[static_cast<size_t>(AccessEntityType::MAX)] TSA_GUARDED_BY(mutex);
AccessChangesNotifier & changes_notifier;
bool backup_allowed = false;
const bool backup_allowed = false;
};
}

View File

@ -223,13 +223,6 @@ bool MultipleAccessStorage::isReadOnly(const UUID & id) const
}
void MultipleAccessStorage::reload()
{
auto storages = getStoragesInternal();
for (const auto & storage : *storages)
storage->reload();
}
void MultipleAccessStorage::startPeriodicReloading()
{
auto storages = getStoragesInternal();
@ -244,6 +237,13 @@ void MultipleAccessStorage::stopPeriodicReloading()
storage->stopPeriodicReloading();
}
void MultipleAccessStorage::reload(ReloadMode reload_mode)
{
auto storages = getStoragesInternal();
for (const auto & storage : *storages)
storage->reload(reload_mode);
}
std::optional<UUID> MultipleAccessStorage::insertImpl(const AccessEntityPtr & entity, bool replace_if_exists, bool throw_if_exists)
{

View File

@ -25,9 +25,9 @@ public:
bool isReadOnly() const override;
bool isReadOnly(const UUID & id) const override;
void reload() override;
void startPeriodicReloading() override;
void stopPeriodicReloading() override;
void reload(ReloadMode reload_mode) override;
void setStorages(const std::vector<StoragePtr> & storages);
void addStorage(const StoragePtr & new_storage);

View File

@ -301,4 +301,5 @@ std::vector<QuotaUsage> QuotaCache::getAllQuotasUsage() const
}
return all_usage;
}
}

View File

@ -24,8 +24,8 @@ namespace DB
{
namespace ErrorCodes
{
extern const int BAD_ARGUMENTS;
extern const int NO_ZOOKEEPER;
extern const int BAD_ARGUMENTS;
extern const int NO_ZOOKEEPER;
}
static UUID parseUUID(const String & text)
@ -46,6 +46,7 @@ ReplicatedAccessStorage::ReplicatedAccessStorage(
, zookeeper_path(zookeeper_path_)
, get_zookeeper(get_zookeeper_)
, watched_queue(std::make_shared<ConcurrentBoundedQueue<UUID>>(std::numeric_limits<size_t>::max()))
, memory_storage(storage_name_, changes_notifier_, false)
, changes_notifier(changes_notifier_)
, backup_allowed(allow_backup_)
{
@ -59,7 +60,7 @@ ReplicatedAccessStorage::ReplicatedAccessStorage(
if (zookeeper_path.front() != '/')
zookeeper_path = "/" + zookeeper_path;
initializeZookeeper();
initZooKeeperWithRetries(/* max_retries= */ 2);
}
ReplicatedAccessStorage::~ReplicatedAccessStorage()
@ -121,15 +122,14 @@ bool ReplicatedAccessStorage::insertWithID(const UUID & id, const AccessEntityPt
const String & name = new_entity->getName();
LOG_DEBUG(getLogger(), "Inserting entity of type {} named {} with id {}", type_info.name, name, toString(id));
auto zookeeper = get_zookeeper();
auto zookeeper = getZooKeeper();
bool ok = false;
retryOnZooKeeperUserError(10, [&]{ ok = insertZooKeeper(zookeeper, id, new_entity, replace_if_exists, throw_if_exists); });
if (!ok)
return false;
std::lock_guard lock{mutex};
refreshEntityNoLock(zookeeper, id);
refreshEntity(zookeeper, id);
return true;
}
@ -163,51 +163,79 @@ bool ReplicatedAccessStorage::insertZooKeeper(
if (res == Coordination::Error::ZNODEEXISTS)
{
if (!throw_if_exists && !replace_if_exists)
return false; /// Couldn't insert a new entity.
if (throw_if_exists)
{
if (responses[0]->error == Coordination::Error::ZNODEEXISTS)
{
/// To fail with a nice error message, we need info about what already exists.
/// This itself could fail if the conflicting uuid disappears in the meantime.
/// If that happens, then we'll just retry from the start.
String existing_entity_definition = zookeeper->get(entity_path);
AccessEntityPtr existing_entity = deserializeAccessEntity(existing_entity_definition, entity_path);
AccessEntityType existing_type = existing_entity->getType();
String existing_name = existing_entity->getName();
throwIDCollisionCannotInsert(id, type, name, existing_type, existing_name);
}
else
{
/// Couldn't insert the new entity because there is an existing entity with such name.
throwNameCollisionCannotInsert(type, name);
}
}
assert(replace_if_exists);
Coordination::Requests replace_ops;
if (responses[0]->error == Coordination::Error::ZNODEEXISTS)
{
/// The UUID already exists, simply fail.
/// To fail with a nice error message, we need info about what already exists.
/// This itself could fail if the conflicting uuid disappears in the meantime.
/// The UUID is already associated with some existing entity, we will get rid of the conflicting entity first.
/// This itself could fail if the conflicting entity disappears in the meantime.
/// If that happens, then we'll just retry from the start.
String existing_entity_definition = zookeeper->get(entity_path);
Coordination::Stat stat;
String existing_entity_definition = zookeeper->get(entity_path, &stat);
auto existing_entity = deserializeAccessEntity(existing_entity_definition, entity_path);
const String & existing_entity_name = existing_entity->getName();
const AccessEntityType existing_entity_type = existing_entity->getType();
const AccessEntityTypeInfo existing_entity_type_info = AccessEntityTypeInfo::get(existing_entity_type);
const String existing_name_path = zookeeper_path + "/" + existing_entity_type_info.unique_char + "/" + escapeForFileName(existing_entity_name);
AccessEntityPtr existing_entity = deserializeAccessEntity(existing_entity_definition, entity_path);
AccessEntityType existing_type = existing_entity->getType();
String existing_name = existing_entity->getName();
throwIDCollisionCannotInsert(id, type, name, existing_type, existing_name);
}
else if (replace_if_exists)
{
/// The name already exists for this type.
/// If asked to, we need to replace the existing entity.
if (existing_name_path != name_path)
replace_ops.emplace_back(zkutil::makeRemoveRequest(existing_name_path, -1));
/// First get the uuid of the existing entity
/// This itself could fail if the conflicting name disappears in the meantime.
/// If that happens, then we'll just retry from the start.
Coordination::Stat name_stat;
String existing_entity_uuid = zookeeper->get(name_path, &name_stat);
const String existing_entity_path = zookeeper_path + "/uuid/" + existing_entity_uuid;
Coordination::Requests replace_ops;
replace_ops.emplace_back(zkutil::makeRemoveRequest(existing_entity_path, -1));
replace_ops.emplace_back(zkutil::makeCreateRequest(entity_path, new_entity_definition, zkutil::CreateMode::Persistent));
replace_ops.emplace_back(zkutil::makeSetRequest(name_path, entity_uuid, name_stat.version));
/// If this fails, then we'll just retry from the start.
zookeeper->multi(replace_ops);
/// Everything's fine, the new entity has been inserted instead of an existing entity.
return true;
replace_ops.emplace_back(zkutil::makeSetRequest(entity_path, new_entity_definition, stat.version));
}
else
{
/// Couldn't insert the new entity because there is an existing entity with such name.
if (throw_if_exists)
throwNameCollisionCannotInsert(type, name);
else
return false;
replace_ops.emplace_back(zkutil::makeCreateRequest(entity_path, new_entity_definition, zkutil::CreateMode::Persistent));
}
if (responses[1]->error == Coordination::Error::ZNODEEXISTS)
{
/// The name is already associated with some existing entity, we will get rid of the conflicting entity first.
/// This itself could fail if the conflicting entity disappears in the meantime.
/// If that happens, then we'll just retry from the start.
Coordination::Stat stat;
String existing_entity_uuid = zookeeper->get(name_path, &stat);
const String existing_entity_path = zookeeper_path + "/uuid/" + existing_entity_uuid;
if (existing_entity_path != entity_path)
replace_ops.emplace_back(zkutil::makeRemoveRequest(existing_entity_path, -1));
replace_ops.emplace_back(zkutil::makeSetRequest(name_path, entity_uuid, stat.version));
}
else
{
replace_ops.emplace_back(zkutil::makeCreateRequest(name_path, entity_uuid, zkutil::CreateMode::Persistent));
}
/// If this fails, then we'll just retry from the start.
zookeeper->multi(replace_ops);
/// Everything's fine, the new entity has been inserted instead of an existing entity.
return true;
}
/// If this fails, then we'll just retry from the start.
@ -221,7 +249,7 @@ bool ReplicatedAccessStorage::removeImpl(const UUID & id, bool throw_if_not_exis
{
LOG_DEBUG(getLogger(), "Removing entity {}", toString(id));
auto zookeeper = get_zookeeper();
auto zookeeper = getZooKeeper();
bool ok = false;
retryOnZooKeeperUserError(10, [&] { ok = removeZooKeeper(zookeeper, id, throw_if_not_exists); });
@ -273,15 +301,14 @@ bool ReplicatedAccessStorage::updateImpl(const UUID & id, const UpdateFunc & upd
{
LOG_DEBUG(getLogger(), "Updating entity {}", toString(id));
auto zookeeper = get_zookeeper();
auto zookeeper = getZooKeeper();
bool ok = false;
retryOnZooKeeperUserError(10, [&] { ok = updateZooKeeper(zookeeper, id, update_func, throw_if_not_exists); });
if (!ok)
return false;
std::lock_guard lock{mutex};
refreshEntityNoLock(zookeeper, id);
refreshEntity(zookeeper, id);
return true;
}
@ -349,50 +376,110 @@ void ReplicatedAccessStorage::runWatchingThread()
{
LOG_DEBUG(getLogger(), "Started watching thread");
setThreadName("ReplACLWatch");
while (watching)
{
bool refreshed = false;
try
{
if (!initialized)
initializeZookeeper();
if (refresh())
changes_notifier.sendNotifications();
initZooKeeperIfNeeded();
refreshed = refresh();
}
catch (...)
{
tryLogCurrentException(getLogger(), "Unexpected error, will try to restart worker thread:");
tryLogCurrentException(getLogger(), "Will try to restart watching thread after error");
resetAfterError();
sleepForSeconds(5);
continue;
}
if (refreshed)
{
try
{
changes_notifier.sendNotifications();
}
catch (...)
{
tryLogCurrentException(getLogger(), "Error while sending notifications");
}
}
}
}
void ReplicatedAccessStorage::resetAfterError()
{
initialized = false;
UUID id;
while (watched_queue->tryPop(id)) {}
std::lock_guard lock{mutex};
for (const auto type : collections::range(AccessEntityType::MAX))
entries_by_name_and_type[static_cast<size_t>(type)].clear();
entries_by_id.clear();
/// Make watching thread reinitialize ZooKeeper and reread everything.
std::lock_guard lock{cached_zookeeper_mutex};
cached_zookeeper = nullptr;
}
void ReplicatedAccessStorage::initializeZookeeper()
void ReplicatedAccessStorage::initZooKeeperWithRetries(size_t max_retries)
{
assert(!initialized);
auto zookeeper = get_zookeeper();
for (size_t attempt = 0; attempt < max_retries; ++attempt)
{
try
{
initZooKeeperIfNeeded();
break; /// If we're here the initialization has been successful.
}
catch (const Exception & e)
{
bool need_another_attempt = false;
if (!zookeeper)
throw Exception("Can't have Replicated access without ZooKeeper", ErrorCodes::NO_ZOOKEEPER);
if (const auto * coordination_exception = dynamic_cast<const Coordination::Exception *>(&e);
coordination_exception && Coordination::isHardwareError(coordination_exception->code))
{
/// In case of a network error we'll try to initialize again.
LOG_ERROR(getLogger(), "Initialization failed. Error: {}", e.message());
need_another_attempt = (attempt + 1 < max_retries);
}
createRootNodes(zookeeper);
if (!need_another_attempt)
throw;
}
}
}
refreshEntities(zookeeper);
void ReplicatedAccessStorage::initZooKeeperIfNeeded()
{
getZooKeeper();
}
initialized = true;
zkutil::ZooKeeperPtr ReplicatedAccessStorage::getZooKeeper()
{
std::lock_guard lock{cached_zookeeper_mutex};
return getZooKeeperNoLock();
}
zkutil::ZooKeeperPtr ReplicatedAccessStorage::getZooKeeperNoLock()
{
if (!cached_zookeeper || cached_zookeeper->expired())
{
auto zookeeper = get_zookeeper();
if (!zookeeper)
throw Exception("Can't have Replicated access without ZooKeeper", ErrorCodes::NO_ZOOKEEPER);
/// It's possible that we connected to different [Zoo]Keeper instance
/// so we may read a bit stale state.
zookeeper->sync(zookeeper_path);
createRootNodes(zookeeper);
refreshEntities(zookeeper, /* all= */ true);
cached_zookeeper = zookeeper;
}
return cached_zookeeper;
}
void ReplicatedAccessStorage::reload(ReloadMode reload_mode)
{
if (reload_mode != ReloadMode::ALL)
return;
/// Reinitialize ZooKeeper and reread everything.
std::lock_guard lock{cached_zookeeper_mutex};
cached_zookeeper = nullptr;
getZooKeeperNoLock();
}
void ReplicatedAccessStorage::createRootNodes(const zkutil::ZooKeeperPtr & zookeeper)
@ -414,10 +501,10 @@ bool ReplicatedAccessStorage::refresh()
if (!watched_queue->tryPop(id, /* timeout_ms: */ 10000))
return false;
auto zookeeper = get_zookeeper();
auto zookeeper = getZooKeeper();
if (id == UUIDHelpers::Nil)
refreshEntities(zookeeper);
refreshEntities(zookeeper, /* all= */ false);
else
refreshEntity(zookeeper, id);
@ -425,10 +512,16 @@ bool ReplicatedAccessStorage::refresh()
}
void ReplicatedAccessStorage::refreshEntities(const zkutil::ZooKeeperPtr & zookeeper)
void ReplicatedAccessStorage::refreshEntities(const zkutil::ZooKeeperPtr & zookeeper, bool all)
{
LOG_DEBUG(getLogger(), "Refreshing entities list");
if (all)
{
/// It doesn't make sense to keep the queue because we will reread everything in this function.
watched_queue->clear();
}
const String zookeeper_uuids_path = zookeeper_path + "/uuid";
auto watch_entities_list = [watched_queue = watched_queue](const Coordination::WatchResponse &)
{
@ -437,185 +530,129 @@ void ReplicatedAccessStorage::refreshEntities(const zkutil::ZooKeeperPtr & zooke
Coordination::Stat stat;
const auto entity_uuid_strs = zookeeper->getChildrenWatch(zookeeper_uuids_path, &stat, watch_entities_list);
std::unordered_set<UUID> entity_uuids;
std::vector<UUID> entity_uuids;
entity_uuids.reserve(entity_uuid_strs.size());
for (const String & entity_uuid_str : entity_uuid_strs)
entity_uuids.insert(parseUUID(entity_uuid_str));
entity_uuids.emplace_back(parseUUID(entity_uuid_str));
std::lock_guard lock{mutex};
std::vector<UUID> entities_to_remove;
/// Locally remove entities that were removed from ZooKeeper
for (const auto & pair : entries_by_id)
if (all)
{
const UUID & entity_uuid = pair.first;
if (!entity_uuids.contains(entity_uuid))
entities_to_remove.push_back(entity_uuid);
/// all=true means we read & parse all access entities from ZooKeeper.
std::vector<std::pair<UUID, AccessEntityPtr>> entities;
for (const auto & uuid : entity_uuids)
{
if (auto entity = tryReadEntityFromZooKeeper(zookeeper, uuid))
entities.emplace_back(uuid, entity);
}
memory_storage.setAll(entities);
}
for (const auto & entity_uuid : entities_to_remove)
removeEntityNoLock(entity_uuid);
/// Locally add entities that were added to ZooKeeper
for (const auto & entity_uuid : entity_uuids)
else
{
const auto it = entries_by_id.find(entity_uuid);
if (it == entries_by_id.end())
refreshEntityNoLock(zookeeper, entity_uuid);
/// all=false means we read & parse only new access entities from ZooKeeper.
memory_storage.removeAllExcept(entity_uuids);
for (const auto & uuid : entity_uuids)
{
if (!memory_storage.exists(uuid))
refreshEntityNoLock(zookeeper, uuid);
}
}
LOG_DEBUG(getLogger(), "Refreshing entities list finished");
}
void ReplicatedAccessStorage::refreshEntity(const zkutil::ZooKeeperPtr & zookeeper, const UUID & id)
{
LOG_DEBUG(getLogger(), "Refreshing entity {}", toString(id));
auto entity = tryReadEntityFromZooKeeper(zookeeper, id);
std::lock_guard lock{mutex};
refreshEntityNoLock(zookeeper, id);
if (entity)
setEntityNoLock(id, entity);
else
removeEntityNoLock(id);
}
void ReplicatedAccessStorage::refreshEntityNoLock(const zkutil::ZooKeeperPtr & zookeeper, const UUID & id)
{
LOG_DEBUG(getLogger(), "Refreshing entity {}", toString(id));
auto entity = tryReadEntityFromZooKeeper(zookeeper, id);
if (entity)
setEntityNoLock(id, entity);
else
removeEntityNoLock(id);
}
AccessEntityPtr ReplicatedAccessStorage::tryReadEntityFromZooKeeper(const zkutil::ZooKeeperPtr & zookeeper, const UUID & id) const
{
const auto watch_entity = [watched_queue = watched_queue, id](const Coordination::WatchResponse & response)
{
if (response.type == Coordination::Event::CHANGED)
[[maybe_unused]] bool push_result = watched_queue->push(id);
};
Coordination::Stat entity_stat;
const String entity_path = zookeeper_path + "/uuid/" + toString(id);
String entity_definition;
const bool exists = zookeeper->tryGetWatch(entity_path, entity_definition, &entity_stat, watch_entity);
if (exists)
bool exists = zookeeper->tryGetWatch(entity_path, entity_definition, &entity_stat, watch_entity);
if (!exists)
return nullptr;
try
{
const AccessEntityPtr entity = deserializeAccessEntity(entity_definition, entity_path);
setEntityNoLock(id, entity);
return deserializeAccessEntity(entity_definition, entity_path);
}
else
catch (...)
{
removeEntityNoLock(id);
tryLogCurrentException(getLogger(), "Error while reading the definition of " + toString(id));
return nullptr;
}
}
void ReplicatedAccessStorage::setEntityNoLock(const UUID & id, const AccessEntityPtr & entity)
{
LOG_DEBUG(getLogger(), "Setting id {} to entity named {}", toString(id), entity->getName());
const AccessEntityType type = entity->getType();
const String & name = entity->getName();
/// If the type+name already exists and is a different entity, remove old entity
auto & entries_by_name = entries_by_name_and_type[static_cast<size_t>(type)];
if (auto it = entries_by_name.find(name); it != entries_by_name.end() && it->second->id != id)
{
removeEntityNoLock(it->second->id);
}
/// If the entity already exists under a different type+name, remove old type+name
bool existed_before = false;
if (auto it = entries_by_id.find(id); it != entries_by_id.end())
{
existed_before = true;
const AccessEntityPtr & existing_entity = it->second.entity;
const AccessEntityType existing_type = existing_entity->getType();
const String & existing_name = existing_entity->getName();
if (existing_type != type || existing_name != name)
{
auto & existing_entries_by_name = entries_by_name_and_type[static_cast<size_t>(existing_type)];
existing_entries_by_name.erase(existing_name);
}
}
auto & entry = entries_by_id[id];
entry.id = id;
entry.entity = entity;
entries_by_name[name] = &entry;
if (initialized)
{
if (existed_before)
changes_notifier.onEntityUpdated(id, entity);
else
changes_notifier.onEntityAdded(id, entity);
}
memory_storage.insertWithID(id, entity, /* replace_if_exists= */ true, /* throw_if_exists= */ false);
}
void ReplicatedAccessStorage::removeEntityNoLock(const UUID & id)
{
LOG_DEBUG(getLogger(), "Removing entity with id {}", toString(id));
const auto it = entries_by_id.find(id);
if (it == entries_by_id.end())
{
LOG_DEBUG(getLogger(), "Id {} not found, ignoring removal", toString(id));
return;
}
const Entry & entry = it->second;
const AccessEntityType type = entry.entity->getType();
const String & name = entry.entity->getName();
auto & entries_by_name = entries_by_name_and_type[static_cast<size_t>(type)];
const auto name_it = entries_by_name.find(name);
if (name_it == entries_by_name.end())
LOG_WARNING(getLogger(), "Entity {} not found in names, ignoring removal of name", toString(id));
else if (name_it->second != &(it->second))
LOG_WARNING(getLogger(), "Name {} not pointing to entity {}, ignoring removal of name", name, toString(id));
else
entries_by_name.erase(name);
UUID removed_id = id;
entries_by_id.erase(id);
LOG_DEBUG(getLogger(), "Removed entity with id {}", toString(id));
changes_notifier.onEntityRemoved(removed_id, type);
memory_storage.remove(id, /* throw_if_not_exists= */ false);
}
std::optional<UUID> ReplicatedAccessStorage::findImpl(AccessEntityType type, const String & name) const
{
std::lock_guard lock{mutex};
const auto & entries_by_name = entries_by_name_and_type[static_cast<size_t>(type)];
const auto it = entries_by_name.find(name);
if (it == entries_by_name.end())
return {};
const Entry * entry = it->second;
return entry->id;
return memory_storage.find(type, name);
}
std::vector<UUID> ReplicatedAccessStorage::findAllImpl(AccessEntityType type) const
{
std::lock_guard lock{mutex};
std::vector<UUID> result;
result.reserve(entries_by_id.size());
for (const auto & [id, entry] : entries_by_id)
if (entry.entity->isTypeOf(type))
result.emplace_back(id);
return result;
return memory_storage.findAll(type);
}
bool ReplicatedAccessStorage::exists(const UUID & id) const
{
std::lock_guard lock{mutex};
return entries_by_id.contains(id);
return memory_storage.exists(id);
}
AccessEntityPtr ReplicatedAccessStorage::readImpl(const UUID & id, bool throw_if_not_exists) const
{
std::lock_guard lock{mutex};
const auto it = entries_by_id.find(id);
if (it == entries_by_id.end())
{
if (throw_if_not_exists)
throwNotFound(id);
else
return nullptr;
}
const Entry & entry = it->second;
return entry.entity;
return memory_storage.read(id, throw_if_not_exists);
}

View File

@ -1,20 +1,13 @@
#pragma once
#include <atomic>
#include <list>
#include <memory>
#include <mutex>
#include <unordered_map>
#include <base/defines.h>
#include <base/scope_guard.h>
#include <Common/ThreadPool.h>
#include <Common/ZooKeeper/Common.h>
#include <Common/ZooKeeper/ZooKeeper.h>
#include <Common/ConcurrentBoundedQueue.h>
#include <Access/IAccessStorage.h>
#include <Access/MemoryAccessStorage.h>
namespace DB
@ -34,6 +27,7 @@ public:
void startPeriodicReloading() override { startWatchingThread(); }
void stopPeriodicReloading() override { stopWatchingThread(); }
void reload(ReloadMode reload_mode) override;
bool exists(const UUID & id) const override;
@ -43,9 +37,10 @@ public:
private:
String zookeeper_path;
zkutil::GetZooKeeper get_zookeeper;
const zkutil::GetZooKeeper get_zookeeper;
std::atomic<bool> initialized = false;
zkutil::ZooKeeperPtr cached_zookeeper TSA_GUARDED_BY(cached_zookeeper_mutex);
std::mutex cached_zookeeper_mutex;
std::atomic<bool> watching = false;
ThreadFromGlobalPool watching_thread;
@ -60,7 +55,10 @@ private:
bool removeZooKeeper(const zkutil::ZooKeeperPtr & zookeeper, const UUID & id, bool throw_if_not_exists);
bool updateZooKeeper(const zkutil::ZooKeeperPtr & zookeeper, const UUID & id, const UpdateFunc & update_func, bool throw_if_not_exists);
void initializeZookeeper();
void initZooKeeperWithRetries(size_t max_retries);
void initZooKeeperIfNeeded();
zkutil::ZooKeeperPtr getZooKeeper();
zkutil::ZooKeeperPtr getZooKeeperNoLock() TSA_REQUIRES(cached_zookeeper_mutex);
void createRootNodes(const zkutil::ZooKeeperPtr & zookeeper);
void startWatchingThread();
@ -70,27 +68,21 @@ private:
void resetAfterError();
bool refresh();
void refreshEntities(const zkutil::ZooKeeperPtr & zookeeper);
void refreshEntities(const zkutil::ZooKeeperPtr & zookeeper, bool all);
void refreshEntity(const zkutil::ZooKeeperPtr & zookeeper, const UUID & id);
void refreshEntityNoLock(const zkutil::ZooKeeperPtr & zookeeper, const UUID & id) TSA_REQUIRES(mutex);
AccessEntityPtr tryReadEntityFromZooKeeper(const zkutil::ZooKeeperPtr & zookeeper, const UUID & id) const;
void setEntityNoLock(const UUID & id, const AccessEntityPtr & entity) TSA_REQUIRES(mutex);
void removeEntityNoLock(const UUID & id) TSA_REQUIRES(mutex);
struct Entry
{
UUID id;
AccessEntityPtr entity;
};
std::optional<UUID> findImpl(AccessEntityType type, const String & name) const override;
std::vector<UUID> findAllImpl(AccessEntityType type) const override;
AccessEntityPtr readImpl(const UUID & id, bool throw_if_not_exists) const override;
mutable std::mutex mutex;
std::unordered_map<UUID, Entry> entries_by_id TSA_GUARDED_BY(mutex);
std::unordered_map<String, Entry *> entries_by_name_and_type[static_cast<size_t>(AccessEntityType::MAX)] TSA_GUARDED_BY(mutex);
MemoryAccessStorage memory_storage TSA_GUARDED_BY(mutex);
AccessChangesNotifier & changes_notifier;
bool backup_allowed = false;
const bool backup_allowed = false;
};
}

View File

@ -35,88 +35,43 @@ void SettingsConstraints::clear()
constraints.clear();
}
void SettingsConstraints::setMinValue(std::string_view setting_name, const Field & min_value)
void SettingsConstraints::set(const String & setting_name, const Field & min_value, const Field & max_value, SettingConstraintWritability writability)
{
getConstraintRef(setting_name).min_value = Settings::castValueUtil(setting_name, min_value);
auto & constraint = constraints[setting_name];
if (!min_value.isNull())
constraint.min_value = Settings::castValueUtil(setting_name, min_value);
if (!max_value.isNull())
constraint.max_value = Settings::castValueUtil(setting_name, max_value);
constraint.writability = writability;
}
Field SettingsConstraints::getMinValue(std::string_view setting_name) const
void SettingsConstraints::get(const Settings & current_settings, std::string_view setting_name, Field & min_value, Field & max_value, SettingConstraintWritability & writability) const
{
const auto * ptr = tryGetConstraint(setting_name);
if (ptr)
return ptr->min_value;
else
return {};
}
void SettingsConstraints::setMaxValue(std::string_view setting_name, const Field & max_value)
{
getConstraintRef(setting_name).max_value = Settings::castValueUtil(setting_name, max_value);
}
Field SettingsConstraints::getMaxValue(std::string_view setting_name) const
{
const auto * ptr = tryGetConstraint(setting_name);
if (ptr)
return ptr->max_value;
else
return {};
}
void SettingsConstraints::setReadOnly(std::string_view setting_name, bool read_only)
{
getConstraintRef(setting_name).read_only = read_only;
}
bool SettingsConstraints::isReadOnly(std::string_view setting_name) const
{
const auto * ptr = tryGetConstraint(setting_name);
if (ptr)
return ptr->read_only;
else
return false;
}
void SettingsConstraints::set(std::string_view setting_name, const Field & min_value, const Field & max_value, bool read_only)
{
auto & ref = getConstraintRef(setting_name);
ref.min_value = Settings::castValueUtil(setting_name, min_value);
ref.max_value = Settings::castValueUtil(setting_name, max_value);
ref.read_only = read_only;
}
void SettingsConstraints::get(std::string_view setting_name, Field & min_value, Field & max_value, bool & read_only) const
{
const auto * ptr = tryGetConstraint(setting_name);
if (ptr)
{
min_value = ptr->min_value;
max_value = ptr->max_value;
read_only = ptr->read_only;
}
else
{
min_value = Field{};
max_value = Field{};
read_only = false;
}
auto checker = getChecker(current_settings, setting_name);
min_value = checker.constraint.min_value;
max_value = checker.constraint.max_value;
writability = checker.constraint.writability;
}
void SettingsConstraints::merge(const SettingsConstraints & other)
{
for (const auto & [other_name, other_constraint] : other.constraints)
if (access_control->doesSettingsConstraintsReplacePrevious())
{
auto & constraint = getConstraintRef(other_name);
if (!other_constraint.min_value.isNull())
constraint.min_value = other_constraint.min_value;
if (!other_constraint.max_value.isNull())
constraint.max_value = other_constraint.max_value;
if (other_constraint.read_only)
constraint.read_only = true;
for (const auto & [other_name, other_constraint] : other.constraints)
constraints[other_name] = other_constraint;
}
else
{
for (const auto & [other_name, other_constraint] : other.constraints)
{
auto & constraint = constraints[other_name];
if (!other_constraint.min_value.isNull())
constraint.min_value = other_constraint.min_value;
if (!other_constraint.max_value.isNull())
constraint.max_value = other_constraint.max_value;
if (other_constraint.writability == SettingConstraintWritability::CONST)
constraint.writability = SettingConstraintWritability::CONST; // NOTE: In this mode <readonly/> flag cannot be overridden to be false
}
}
}
@ -180,26 +135,6 @@ bool SettingsConstraints::checkImpl(const Settings & current_settings, SettingCh
}
};
bool cannot_compare = false;
auto less = [&](const Field & left, const Field & right)
{
cannot_compare = false;
if (reaction == THROW_ON_VIOLATION)
return applyVisitor(FieldVisitorAccurateLess{}, left, right);
else
{
try
{
return applyVisitor(FieldVisitorAccurateLess{}, left, right);
}
catch (...)
{
cannot_compare = true;
return false;
}
}
};
if (reaction == THROW_ON_VIOLATION)
{
try
@ -239,115 +174,119 @@ bool SettingsConstraints::checkImpl(const Settings & current_settings, SettingCh
return false;
}
if (!current_settings.allow_ddl && setting_name == "allow_ddl")
return getChecker(current_settings, setting_name).check(change, new_value, reaction);
}
bool SettingsConstraints::Checker::check(SettingChange & change, const Field & new_value, ReactionOnViolation reaction) const
{
const String & setting_name = change.name;
auto less_or_cannot_compare = [=](const Field & left, const Field & right)
{
if (reaction == THROW_ON_VIOLATION)
throw Exception("Cannot modify 'allow_ddl' setting when DDL queries are prohibited for the user", ErrorCodes::QUERY_IS_PROHIBITED);
return applyVisitor(FieldVisitorAccurateLess{}, left, right);
else
return false;
}
/** The `readonly` value is understood as follows:
* 0 - everything allowed.
* 1 - only read queries can be made; you can not change the settings.
* 2 - You can only do read queries and you can change the settings, except for the `readonly` setting.
*/
if (current_settings.readonly == 1)
{
if (reaction == THROW_ON_VIOLATION)
throw Exception("Cannot modify '" + setting_name + "' setting in readonly mode", ErrorCodes::READONLY);
else
return false;
}
if (current_settings.readonly > 1 && setting_name == "readonly")
{
if (reaction == THROW_ON_VIOLATION)
throw Exception("Cannot modify 'readonly' setting in readonly mode", ErrorCodes::READONLY);
else
return false;
}
const Constraint * constraint = tryGetConstraint(setting_name);
if (constraint)
{
if (constraint->read_only)
{
if (reaction == THROW_ON_VIOLATION)
throw Exception("Setting " + setting_name + " should not be changed", ErrorCodes::SETTING_CONSTRAINT_VIOLATION);
else
return false;
}
const Field & min_value = constraint->min_value;
const Field & max_value = constraint->max_value;
if (!min_value.isNull() && !max_value.isNull() && (less(max_value, min_value) || cannot_compare))
{
if (reaction == THROW_ON_VIOLATION)
throw Exception("Setting " + setting_name + " should not be changed", ErrorCodes::SETTING_CONSTRAINT_VIOLATION);
else
return false;
}
if (!min_value.isNull() && (less(new_value, min_value) || cannot_compare))
{
if (reaction == THROW_ON_VIOLATION)
try
{
throw Exception(
"Setting " + setting_name + " shouldn't be less than " + applyVisitor(FieldVisitorToString(), constraint->min_value),
ErrorCodes::SETTING_CONSTRAINT_VIOLATION);
return applyVisitor(FieldVisitorAccurateLess{}, left, right);
}
else
change.value = min_value;
}
if (!max_value.isNull() && (less(max_value, new_value) || cannot_compare))
{
if (reaction == THROW_ON_VIOLATION)
catch (...)
{
throw Exception(
"Setting " + setting_name + " shouldn't be greater than " + applyVisitor(FieldVisitorToString(), constraint->max_value),
ErrorCodes::SETTING_CONSTRAINT_VIOLATION);
return true;
}
else
change.value = max_value;
}
};
if (!explain.empty())
{
if (reaction == THROW_ON_VIOLATION)
throw Exception(explain, code);
else
return false;
}
if (constraint.writability == SettingConstraintWritability::CONST)
{
if (reaction == THROW_ON_VIOLATION)
throw Exception("Setting " + setting_name + " should not be changed", ErrorCodes::SETTING_CONSTRAINT_VIOLATION);
else
return false;
}
const auto & min_value = constraint.min_value;
const auto & max_value = constraint.max_value;
if (!min_value.isNull() && !max_value.isNull() && less_or_cannot_compare(max_value, min_value))
{
if (reaction == THROW_ON_VIOLATION)
throw Exception("Setting " + setting_name + " should not be changed", ErrorCodes::SETTING_CONSTRAINT_VIOLATION);
else
return false;
}
if (!min_value.isNull() && less_or_cannot_compare(new_value, min_value))
{
if (reaction == THROW_ON_VIOLATION)
{
throw Exception(
"Setting " + setting_name + " shouldn't be less than " + applyVisitor(FieldVisitorToString(), min_value),
ErrorCodes::SETTING_CONSTRAINT_VIOLATION);
}
else
change.value = min_value;
}
if (!max_value.isNull() && less_or_cannot_compare(max_value, new_value))
{
if (reaction == THROW_ON_VIOLATION)
{
throw Exception(
"Setting " + setting_name + " shouldn't be greater than " + applyVisitor(FieldVisitorToString(), max_value),
ErrorCodes::SETTING_CONSTRAINT_VIOLATION);
}
else
change.value = max_value;
}
return true;
}
SettingsConstraints::Constraint & SettingsConstraints::getConstraintRef(std::string_view setting_name)
SettingsConstraints::Checker SettingsConstraints::getChecker(const Settings & current_settings, std::string_view setting_name) const
{
if (!current_settings.allow_ddl && setting_name == "allow_ddl")
return Checker("Cannot modify 'allow_ddl' setting when DDL queries are prohibited for the user", ErrorCodes::QUERY_IS_PROHIBITED);
/** The `readonly` value is understood as follows:
* 0 - no read-only restrictions.
* 1 - only read requests, as well as changing settings with `changable_in_readonly` flag.
* 2 - only read requests, as well as changing settings, except for the `readonly` setting.
*/
if (current_settings.readonly > 1 && setting_name == "readonly")
return Checker("Cannot modify 'readonly' setting in readonly mode", ErrorCodes::READONLY);
auto it = constraints.find(setting_name);
if (it == constraints.end())
if (current_settings.readonly == 1)
{
auto setting_name_ptr = std::make_shared<const String>(setting_name);
Constraint new_constraint;
new_constraint.setting_name = setting_name_ptr;
it = constraints.emplace(*setting_name_ptr, std::move(new_constraint)).first;
if (it == constraints.end() || it->second.writability != SettingConstraintWritability::CHANGEABLE_IN_READONLY)
return Checker("Cannot modify '" + String(setting_name) + "' setting in readonly mode", ErrorCodes::READONLY);
}
return it->second;
else // For both readonly=0 and readonly=2
{
if (it == constraints.end())
return Checker(); // Allowed
}
return Checker(it->second);
}
const SettingsConstraints::Constraint * SettingsConstraints::tryGetConstraint(std::string_view setting_name) const
{
auto it = constraints.find(setting_name);
if (it == constraints.end())
return nullptr;
return &it->second;
}
bool SettingsConstraints::Constraint::operator==(const Constraint & other) const
{
return (read_only == other.read_only) && (min_value == other.min_value) && (max_value == other.max_value)
&& (*setting_name == *other.setting_name);
return writability == other.writability && min_value == other.min_value && max_value == other.max_value;
}
bool operator ==(const SettingsConstraints & left, const SettingsConstraints & right)
{
return (left.constraints == right.constraints);
return left.constraints == right.constraints;
}
}

View File

@ -1,9 +1,9 @@
#pragma once
#include <Access/SettingsProfileElement.h>
#include <Common/SettingsChanges.h>
#include <unordered_map>
namespace Poco::Util
{
class AbstractConfiguration;
@ -35,18 +35,22 @@ class AccessControl;
* <max>20000000000</max>
* </max_memory_usage>
* <force_index_by_date>
* <readonly/>
* <const/>
* </force_index_by_date>
* <max_threads>
* <changable_in_readonly/>
* </max_threads>
* </constraints>
* </user_profile>
* </profiles>
*
* This class also checks that we are not in the read-only mode.
* If a setting cannot be change due to the read-only mode this class throws an exception.
* The value of `readonly` value is understood as follows:
* 0 - everything allowed.
* 1 - only read queries can be made; you can not change the settings.
* 2 - you can only do read queries and you can change the settings, except for the `readonly` setting.
* The value of `readonly` is understood as follows:
* 0 - not read-only mode, no additional checks.
* 1 - only read queries, as well as changing settings with <changable_in_readonly/> flag.
* 2 - only read queries and you can change the settings, except for the `readonly` setting.
*
*/
class SettingsConstraints
{
@ -61,17 +65,8 @@ public:
void clear();
bool empty() const { return constraints.empty(); }
void setMinValue(std::string_view setting_name, const Field & min_value);
Field getMinValue(std::string_view setting_name) const;
void setMaxValue(std::string_view setting_name, const Field & max_value);
Field getMaxValue(std::string_view setting_name) const;
void setReadOnly(std::string_view setting_name, bool read_only);
bool isReadOnly(std::string_view setting_name) const;
void set(std::string_view setting_name, const Field & min_value, const Field & max_value, bool read_only);
void get(std::string_view setting_name, Field & min_value, Field & max_value, bool & read_only) const;
void set(const String & setting_name, const Field & min_value, const Field & max_value, SettingConstraintWritability writability);
void get(const Settings & current_settings, std::string_view setting_name, Field & min_value, Field & max_value, SettingConstraintWritability & writability) const;
void merge(const SettingsConstraints & other);
@ -87,10 +82,15 @@ public:
friend bool operator !=(const SettingsConstraints & left, const SettingsConstraints & right) { return !(left == right); }
private:
enum ReactionOnViolation
{
THROW_ON_VIOLATION,
CLAMP_ON_VIOLATION,
};
struct Constraint
{
std::shared_ptr<const String> setting_name;
bool read_only = false;
SettingConstraintWritability writability = SettingConstraintWritability::WRITABLE;
Field min_value;
Field max_value;
@ -98,18 +98,53 @@ private:
bool operator !=(const Constraint & other) const { return !(*this == other); }
};
enum ReactionOnViolation
struct Checker
{
THROW_ON_VIOLATION,
CLAMP_ON_VIOLATION,
Constraint constraint;
String explain;
int code = 0;
// Allows everything
Checker() = default;
// Forbidden with explanation
Checker(const String & explain_, int code_)
: constraint{.writability = SettingConstraintWritability::CONST}
, explain(explain_)
, code(code_)
{}
// Allow or forbid depending on range defined by constraint, also used to return stored constraint
explicit Checker(const Constraint & constraint_)
: constraint(constraint_)
{}
// Perform checking
bool check(SettingChange & change, const Field & new_value, ReactionOnViolation reaction) const;
};
struct StringHash
{
using is_transparent = void;
size_t operator()(std::string_view txt) const
{
return std::hash<std::string_view>{}(txt);
}
size_t operator()(const String & txt) const
{
return std::hash<String>{}(txt);
}
};
bool checkImpl(const Settings & current_settings, SettingChange & change, ReactionOnViolation reaction) const;
Constraint & getConstraintRef(std::string_view setting_name);
const Constraint * tryGetConstraint(std::string_view setting_name) const;
Checker getChecker(const Settings & current_settings, std::string_view setting_name) const;
std::unordered_map<std::string_view, Constraint> constraints;
const AccessControl * access_control = nullptr;
// Special container for heterogeneous lookups: to avoid `String` construction during `find(std::string_view)`
using Constraints = std::unordered_map<String, Constraint, StringHash, std::equal_to<>>;
Constraints constraints;
const AccessControl * access_control;
};
}

View File

@ -3,6 +3,7 @@
#include <Access/AccessControl.h>
#include <Access/SettingsProfile.h>
#include <Core/Settings.h>
#include <Common/SettingConstraintWritability.h>
#include <Common/SettingsChanges.h>
#include <IO/ReadHelpers.h>
#include <IO/WriteHelpers.h>
@ -18,6 +19,10 @@ namespace
constexpr const char ALLOW_BACKUP_SETTING_NAME[] = "allow_backup";
}
namespace ErrorCodes
{
extern const int NOT_IMPLEMENTED;
}
SettingsProfileElement::SettingsProfileElement(const ASTSettingsProfileElement & ast)
{
@ -46,17 +51,20 @@ void SettingsProfileElement::init(const ASTSettingsProfileElement & ast, const A
{
setting_name = ast.setting_name;
/// Optionally check if a setting with that name is allowed.
if (access_control)
{
/// Check if a setting with that name is allowed.
if (setting_name != ALLOW_BACKUP_SETTING_NAME)
access_control->checkSettingNameIsAllowed(setting_name);
/// Check if a CHANGEABLE_IN_READONLY is allowed.
if (ast.writability == SettingConstraintWritability::CHANGEABLE_IN_READONLY && !access_control->doesSettingsConstraintsReplacePrevious())
throw Exception("CHANGEABLE_IN_READONLY for " + setting_name + " is not allowed unless settings_constraints_replace_previous is enabled", ErrorCodes::NOT_IMPLEMENTED);
}
value = ast.value;
min_value = ast.min_value;
max_value = ast.max_value;
readonly = ast.readonly;
writability = ast.writability;
if (!value.isNull())
value = Settings::castValueUtil(setting_name, value);
@ -80,7 +88,7 @@ std::shared_ptr<ASTSettingsProfileElement> SettingsProfileElement::toAST() const
ast->value = value;
ast->min_value = min_value;
ast->max_value = max_value;
ast->readonly = readonly;
ast->writability = writability;
return ast;
}
@ -101,7 +109,7 @@ std::shared_ptr<ASTSettingsProfileElement> SettingsProfileElement::toASTWithName
ast->value = value;
ast->min_value = min_value;
ast->max_value = max_value;
ast->readonly = readonly;
ast->writability = writability;
return ast;
}
@ -205,17 +213,12 @@ SettingsConstraints SettingsProfileElements::toSettingsConstraints(const AccessC
{
SettingsConstraints res{access_control};
for (const auto & elem : *this)
{
if (!elem.setting_name.empty() && (elem.setting_name != ALLOW_BACKUP_SETTING_NAME))
{
if (!elem.min_value.isNull())
res.setMinValue(elem.setting_name, elem.min_value);
if (!elem.max_value.isNull())
res.setMaxValue(elem.setting_name, elem.max_value);
if (elem.readonly)
res.setReadOnly(elem.setting_name, *elem.readonly);
}
}
if (!elem.setting_name.empty() && elem.setting_name != ALLOW_BACKUP_SETTING_NAME)
res.set(
elem.setting_name,
elem.min_value,
elem.max_value,
elem.writability ? *elem.writability : SettingConstraintWritability::WRITABLE);
return res;
}

View File

@ -2,6 +2,7 @@
#include <Core/Field.h>
#include <Core/UUID.h>
#include <Common/SettingConstraintWritability.h>
#include <optional>
#include <unordered_map>
#include <vector>
@ -25,9 +26,9 @@ struct SettingsProfileElement
Field value;
Field min_value;
Field max_value;
std::optional<bool> readonly;
std::optional<SettingConstraintWritability> writability;
auto toTuple() const { return std::tie(parent_profile, setting_name, value, min_value, max_value, readonly); }
auto toTuple() const { return std::tie(parent_profile, setting_name, value, min_value, max_value, writability); }
friend bool operator==(const SettingsProfileElement & lhs, const SettingsProfileElement & rhs) { return lhs.toTuple() == rhs.toTuple(); }
friend bool operator!=(const SettingsProfileElement & lhs, const SettingsProfileElement & rhs) { return !(lhs == rhs); }
friend bool operator <(const SettingsProfileElement & lhs, const SettingsProfileElement & rhs) { return lhs.toTuple() < rhs.toTuple(); }

View File

@ -441,17 +441,32 @@ namespace
String path_to_name = path_to_constraints + "." + setting_name;
config.keys(path_to_name, constraint_types);
size_t writability_count = 0;
for (const String & constraint_type : constraint_types)
{
if (constraint_type == "min")
profile_element.min_value = Settings::stringToValueUtil(setting_name, config.getString(path_to_name + "." + constraint_type));
else if (constraint_type == "max")
profile_element.max_value = Settings::stringToValueUtil(setting_name, config.getString(path_to_name + "." + constraint_type));
else if (constraint_type == "readonly")
profile_element.readonly = true;
else if (constraint_type == "readonly" || constraint_type == "const")
{
writability_count++;
profile_element.writability = SettingConstraintWritability::CONST;
}
else if (constraint_type == "changeable_in_readonly")
{
writability_count++;
if (access_control.doesSettingsConstraintsReplacePrevious())
profile_element.writability = SettingConstraintWritability::CHANGEABLE_IN_READONLY;
else
throw Exception("Setting changeable_in_readonly for " + setting_name + " is not allowed unless settings_constraints_replace_previous is enabled", ErrorCodes::NOT_IMPLEMENTED);
}
else
throw Exception("Setting " + constraint_type + " value for " + setting_name + " isn't supported", ErrorCodes::NOT_IMPLEMENTED);
}
if (writability_count > 1)
throw Exception("Not more than one constraint writability specifier (const/readonly/changeable_in_readonly) is allowed for " + setting_name, ErrorCodes::NOT_IMPLEMENTED);
profile_elements.push_back(std::move(profile_element));
}
@ -635,13 +650,6 @@ void UsersConfigAccessStorage::load(
/* already_loaded = */ false);
}
void UsersConfigAccessStorage::reload()
{
std::lock_guard lock{load_mutex};
if (config_reloader)
config_reloader->reload();
}
void UsersConfigAccessStorage::startPeriodicReloading()
{
std::lock_guard lock{load_mutex};
@ -656,6 +664,13 @@ void UsersConfigAccessStorage::stopPeriodicReloading()
config_reloader->stop();
}
void UsersConfigAccessStorage::reload(ReloadMode /* reload_mode */)
{
std::lock_guard lock{load_mutex};
if (config_reloader)
config_reloader->reload();
}
std::optional<UUID> UsersConfigAccessStorage::findImpl(AccessEntityType type, const String & name) const
{
return memory_storage.find(type, name);

View File

@ -38,9 +38,9 @@ public:
const String & preprocessed_dir = {},
const zkutil::GetZooKeeper & get_zookeeper_function = {});
void reload() override;
void startPeriodicReloading() override;
void stopPeriodicReloading() override;
void reload(ReloadMode reload_mode) override;
bool exists(const UUID & id) const override;

View File

@ -64,6 +64,11 @@ public:
return nested_func->isVersioned();
}
size_t getVersionFromRevision(size_t revision) const override
{
return nested_func->getVersionFromRevision(revision);
}
size_t getDefaultVersion() const override
{
return nested_func->getDefaultVersion();
@ -79,6 +84,11 @@ public:
nested_func->destroy(place);
}
void destroyUpToState(AggregateDataPtr __restrict place) const noexcept override
{
nested_func->destroyUpToState(place);
}
bool hasTrivialDestructor() const override
{
return nested_func->hasTrivialDestructor();

View File

@ -35,12 +35,12 @@ private:
using Counter = UInt64;
size_t category_count;
Counter & counter(AggregateDataPtr __restrict place, size_t i, bool what) const
static Counter & counter(AggregateDataPtr __restrict place, size_t i, bool what)
{
return reinterpret_cast<Counter *>(place)[i * 2 + (what ? 1 : 0)];
}
const Counter & counter(ConstAggregateDataPtr __restrict place, size_t i, bool what) const
static const Counter & counter(ConstAggregateDataPtr __restrict place, size_t i, bool what)
{
return reinterpret_cast<const Counter *>(place)[i * 2 + (what ? 1 : 0)];
}

View File

@ -225,6 +225,12 @@ public:
nested_func->destroy(getNestedPlace(place));
}
void destroyUpToState(AggregateDataPtr __restrict place) const noexcept override
{
this->data(place).~Data();
nested_func->destroyUpToState(getNestedPlace(place));
}
String getName() const override
{
return nested_func->getName() + "Distinct";
@ -245,6 +251,21 @@ public:
return nested_func->isState();
}
bool isVersioned() const override
{
return nested_func->isVersioned();
}
size_t getVersionFromRevision(size_t revision) const override
{
return nested_func->getVersionFromRevision(revision);
}
size_t getDefaultVersion() const override
{
return nested_func->getDefaultVersion();
}
AggregateFunctionPtr getNestedFunction() const override { return nested_func; }
};

View File

@ -66,6 +66,7 @@ private:
if (old_size < new_size)
{
char * old_state = state.array_of_aggregate_datas;
char * new_state = arena.alignedAlloc(
new_size * nested_size_of_data,
nested_func->alignOfData());
@ -134,23 +135,43 @@ public:
return nested_func->isVersioned();
}
size_t getVersionFromRevision(size_t revision) const override
{
return nested_func->getVersionFromRevision(revision);
}
size_t getDefaultVersion() const override
{
return nested_func->getDefaultVersion();
}
void destroy(AggregateDataPtr __restrict place) const noexcept override
template <bool up_to_state>
void destroyImpl(AggregateDataPtr __restrict place) const noexcept
{
AggregateFunctionForEachData & state = data(place);
char * nested_state = state.array_of_aggregate_datas;
for (size_t i = 0; i < state.dynamic_array_size; ++i)
{
nested_func->destroy(nested_state);
if constexpr (up_to_state)
nested_func->destroyUpToState(nested_state);
else
nested_func->destroy(nested_state);
nested_state += nested_size_of_data;
}
}
void destroy(AggregateDataPtr __restrict place) const noexcept override
{
destroyImpl<false>(place);
}
void destroyUpToState(AggregateDataPtr __restrict place) const noexcept override
{
destroyImpl<true>(place);
}
bool hasTrivialDestructor() const override
{
return nested_func->hasTrivialDestructor();

View File

@ -71,6 +71,11 @@ public:
return nested_func->isVersioned();
}
size_t getVersionFromRevision(size_t revision) const override
{
return nested_func->getVersionFromRevision(revision);
}
size_t getDefaultVersion() const override
{
return nested_func->getDefaultVersion();
@ -86,6 +91,11 @@ public:
nested_func->destroy(place);
}
void destroyUpToState(AggregateDataPtr __restrict place) const noexcept override
{
nested_func->destroyUpToState(place);
}
bool hasTrivialDestructor() const override
{
return nested_func->hasTrivialDestructor();

View File

@ -84,6 +84,26 @@ private:
using Base = IAggregateFunctionDataHelper<Data, AggregateFunctionMap<KeyType>>;
public:
bool isState() const override
{
return nested_func->isState();
}
bool isVersioned() const override
{
return nested_func->isVersioned();
}
size_t getVersionFromRevision(size_t revision) const override
{
return nested_func->getVersionFromRevision(revision);
}
size_t getDefaultVersion() const override
{
return nested_func->getDefaultVersion();
}
AggregateFunctionMap(AggregateFunctionPtr nested, const DataTypes & types) : Base(types, nested->getParameters()), nested_func(nested)
{
if (types.empty())
@ -187,6 +207,32 @@ public:
}
}
template <bool up_to_state>
void destroyImpl(AggregateDataPtr __restrict place) const noexcept
{
AggregateFunctionMapCombinatorData<KeyType> & state = Base::data(place);
for (const auto & [key, nested_place] : state.merged_maps)
{
if constexpr (up_to_state)
nested_func->destroyUpToState(nested_place);
else
nested_func->destroy(nested_place);
}
state.~Data();
}
void destroy(AggregateDataPtr __restrict place) const noexcept override
{
destroyImpl<false>(place);
}
void destroyUpToState(AggregateDataPtr __restrict place) const noexcept override
{
destroyImpl<true>(place);
}
void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
{
auto & merged_maps = this->data(place).merged_maps;

View File

@ -80,6 +80,11 @@ public:
nested_func->destroy(place);
}
void destroyUpToState(AggregateDataPtr __restrict place) const noexcept override
{
nested_func->destroyUpToState(place);
}
bool hasTrivialDestructor() const override
{
return nested_func->hasTrivialDestructor();
@ -126,6 +131,11 @@ public:
}
AggregateFunctionPtr getNestedFunction() const override { return nested_func; }
bool isState() const override
{
return nested_func->isState();
}
};
}

View File

@ -114,6 +114,11 @@ public:
nested_function->destroy(nestedPlace(place));
}
void destroyUpToState(AggregateDataPtr __restrict place) const noexcept override
{
nested_function->destroyUpToState(nestedPlace(place));
}
bool hasTrivialDestructor() const override
{
return nested_function->hasTrivialDestructor();
@ -189,6 +194,21 @@ public:
return nested_function->isState();
}
bool isVersioned() const override
{
return nested_function->isVersioned();
}
size_t getVersionFromRevision(size_t revision) const override
{
return nested_function->getVersionFromRevision(revision);
}
size_t getDefaultVersion() const override
{
return nested_function->getDefaultVersion();
}
AggregateFunctionPtr getNestedFunction() const override { return nested_function; }
#if USE_EMBEDDED_COMPILER
@ -468,9 +488,15 @@ public:
}
}
bool found_one = false;
/// We can have 0 nullable filters if we don't skip nulls
if (nullable_filters.size() == 0)
{
this->setFlag(place);
this->nested_function->addBatchSinglePlace(row_begin, row_end, this->nestedPlace(place), nested_columns, arena, -1);
return;
}
chassert(nullable_filters.size() > 0); /// We work under the assumption that we reach this because one argument was NULL
bool found_one = false;
if (nullable_filters.size() == 1)
{
/// We can avoid making copies of the only filter but we still need to check that there is data to be added

View File

@ -98,6 +98,11 @@ public:
nested_function->destroy(place);
}
void destroyUpToState(AggregateDataPtr __restrict place) const noexcept override
{
nested_function->destroyUpToState(place);
}
void add(
AggregateDataPtr __restrict place,
const IColumn ** columns,

View File

@ -91,6 +91,21 @@ public:
return nested_function->isState();
}
bool isVersioned() const override
{
return nested_function->isVersioned();
}
size_t getVersionFromRevision(size_t revision) const override
{
return nested_function->getVersionFromRevision(revision);
}
size_t getDefaultVersion() const override
{
return nested_function->getDefaultVersion();
}
bool allocatesMemoryInArena() const override
{
return nested_function->allocatesMemoryInArena();
@ -134,6 +149,12 @@ public:
nested_function->destroy(place + i * size_of_data);
}
void destroyUpToState(AggregateDataPtr __restrict place) const noexcept override
{
for (size_t i = 0; i < total; ++i)
nested_function->destroyUpToState(place + i * size_of_data);
}
void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override
{
Key key;

View File

@ -56,10 +56,22 @@ public:
return nested_func->getDefaultVersion();
}
bool isState() const override
{
return nested_func->isState();
}
size_t getVersionFromRevision(size_t revision) const override
{
return nested_func->getVersionFromRevision(revision);
}
void create(AggregateDataPtr __restrict place) const override { nested_func->create(place); }
void destroy(AggregateDataPtr __restrict place) const noexcept override { nested_func->destroy(place); }
void destroyUpToState(AggregateDataPtr __restrict place) const noexcept override { nested_func->destroyUpToState(place); }
bool hasTrivialDestructor() const override { return nested_func->hasTrivialDestructor(); }
size_t sizeOfData() const override { return nested_func->sizeOfData(); }

View File

@ -69,6 +69,8 @@ public:
nested_func->destroy(place);
}
void destroyUpToState(AggregateDataPtr __restrict) const noexcept override {}
bool hasTrivialDestructor() const override
{
return nested_func->hasTrivialDestructor();

View File

@ -113,6 +113,17 @@ public:
/// Delete data for aggregation.
virtual void destroy(AggregateDataPtr __restrict place) const noexcept = 0;
/// Delete all combinator states that were used after combinator -State.
/// For example for uniqArrayStateForEachMap(...) it will destroy
/// states that were created by combinators Map and ForEach.
/// It's needed because ColumnAggregateFunction in the result will be
/// responsible only for destruction of states that were created
/// by aggregate function and all combinators before -State combinator.
virtual void destroyUpToState(AggregateDataPtr __restrict place) const noexcept
{
destroy(place);
}
/// It is not necessary to delete data.
virtual bool hasTrivialDestructor() const = 0;
@ -277,8 +288,7 @@ public:
Arena * arena) const = 0;
/** Insert result of aggregate function into result column with batch size.
* If destroy_place_after_insert is true. Then implementation of this method
* must destroy aggregate place if insert state into result column was successful.
* The implementation of this method will destroy aggregate place up to -State if insert state into result column was successful.
* All places that were not inserted must be destroyed if there was exception during insert into result column.
*/
virtual void insertResultIntoBatch(
@ -287,8 +297,7 @@ public:
AggregateDataPtr * places,
size_t place_offset,
IColumn & to,
Arena * arena,
bool destroy_place_after_insert) const = 0;
Arena * arena) const = 0;
/** Destroy batch of aggregate places.
*/
@ -612,8 +621,7 @@ public:
AggregateDataPtr * places,
size_t place_offset,
IColumn & to,
Arena * arena,
bool destroy_place_after_insert) const override
Arena * arena) const override
{
size_t batch_index = row_begin;
@ -622,9 +630,9 @@ public:
for (; batch_index < row_end; ++batch_index)
{
static_cast<const Derived *>(this)->insertResultInto(places[batch_index] + place_offset, to, arena);
if (destroy_place_after_insert)
static_cast<const Derived *>(this)->destroy(places[batch_index] + place_offset);
/// For State AggregateFunction ownership of aggregate place is passed to result column after insert,
/// so we need to destroy all states up to state of -State combinator.
static_cast<const Derived *>(this)->destroyUpToState(places[batch_index] + place_offset);
}
}
catch (...)

View File

@ -137,9 +137,41 @@ Field QueryFuzzer::fuzzField(Field field)
break;
}
}
else if (type == Field::Types::Array || type == Field::Types::Tuple)
else if (type == Field::Types::Array)
{
auto & arr = field.reinterpret<FieldVector>();
auto & arr = field.get<Array>();
if (fuzz_rand() % 5 == 0 && !arr.empty())
{
size_t pos = fuzz_rand() % arr.size();
arr.erase(arr.begin() + pos);
std::cerr << "erased\n";
}
if (fuzz_rand() % 5 == 0)
{
if (!arr.empty())
{
size_t pos = fuzz_rand() % arr.size();
arr.insert(arr.begin() + pos, fuzzField(arr[pos]));
std::cerr << fmt::format("inserted (pos {})\n", pos);
}
else
{
arr.insert(arr.begin(), getRandomField(0));
std::cerr << "inserted (0)\n";
}
}
for (auto & element : arr)
{
element = fuzzField(element);
}
}
else if (type == Field::Types::Tuple)
{
auto & arr = field.get<Tuple>();
if (fuzz_rand() % 5 == 0 && !arr.empty())
{

View File

@ -162,7 +162,7 @@ MutableColumnPtr ColumnAggregateFunction::convertToValues(MutableColumnPtr colum
};
callback(res);
res->forEachSubcolumn(callback);
res->forEachSubcolumnRecursively(callback);
for (auto * val : data)
func->insertResultInto(val, *res, &column_aggregate_func.createOrGetArena());

View File

@ -157,6 +157,14 @@ public:
callback(data);
}
void forEachSubcolumnRecursively(ColumnCallback callback) override
{
callback(offsets);
offsets->forEachSubcolumnRecursively(callback);
callback(data);
data->forEachSubcolumnRecursively(callback);
}
bool structureEquals(const IColumn & rhs) const override
{
if (const auto * rhs_concrete = typeid_cast<const ColumnArray *>(&rhs))

View File

@ -235,6 +235,12 @@ public:
callback(data);
}
void forEachSubcolumnRecursively(ColumnCallback callback) override
{
callback(data);
data->forEachSubcolumnRecursively(callback);
}
bool structureEquals(const IColumn & rhs) const override
{
if (const auto * rhs_concrete = typeid_cast<const ColumnConst *>(&rhs))

View File

@ -173,6 +173,19 @@ public:
callback(dictionary.getColumnUniquePtr());
}
void forEachSubcolumnRecursively(ColumnCallback callback) override
{
callback(idx.getPositionsPtr());
idx.getPositionsPtr()->forEachSubcolumnRecursively(callback);
/// Column doesn't own dictionary if it's shared.
if (!dictionary.isShared())
{
callback(dictionary.getColumnUniquePtr());
dictionary.getColumnUniquePtr()->forEachSubcolumnRecursively(callback);
}
}
bool structureEquals(const IColumn & rhs) const override
{
if (const auto * rhs_low_cardinality = typeid_cast<const ColumnLowCardinality *>(&rhs))

View File

@ -278,6 +278,12 @@ void ColumnMap::forEachSubcolumn(ColumnCallback callback)
callback(nested);
}
void ColumnMap::forEachSubcolumnRecursively(ColumnCallback callback)
{
callback(nested);
nested->forEachSubcolumnRecursively(callback);
}
bool ColumnMap::structureEquals(const IColumn & rhs) const
{
if (const auto * rhs_map = typeid_cast<const ColumnMap *>(&rhs))

View File

@ -89,6 +89,7 @@ public:
size_t allocatedBytes() const override;
void protect() override;
void forEachSubcolumn(ColumnCallback callback) override;
void forEachSubcolumnRecursively(ColumnCallback callback) override;
bool structureEquals(const IColumn & rhs) const override;
double getRatioOfDefaultRows(double sample_ratio) const override;
void getIndicesOfNonDefaultRows(Offsets & indices, size_t from, size_t limit) const override;

View File

@ -136,6 +136,14 @@ public:
callback(null_map);
}
void forEachSubcolumnRecursively(ColumnCallback callback) override
{
callback(nested_column);
nested_column->forEachSubcolumnRecursively(callback);
callback(null_map);
null_map->forEachSubcolumnRecursively(callback);
}
bool structureEquals(const IColumn & rhs) const override
{
if (const auto * rhs_nullable = typeid_cast<const ColumnNullable *>(&rhs))

View File

@ -671,6 +671,18 @@ void ColumnObject::forEachSubcolumn(ColumnCallback callback)
callback(part);
}
void ColumnObject::forEachSubcolumnRecursively(ColumnCallback callback)
{
for (auto & entry : subcolumns)
{
for (auto & part : entry->data.data)
{
callback(part);
part->forEachSubcolumnRecursively(callback);
}
}
}
void ColumnObject::insert(const Field & field)
{
const auto & object = field.get<const Object &>();

View File

@ -211,6 +211,7 @@ public:
size_t byteSize() const override;
size_t allocatedBytes() const override;
void forEachSubcolumn(ColumnCallback callback) override;
void forEachSubcolumnRecursively(ColumnCallback callback) override;
void insert(const Field & field) override;
void insertDefault() override;
void insertFrom(const IColumn & src, size_t n) override;

View File

@ -750,6 +750,14 @@ void ColumnSparse::forEachSubcolumn(ColumnCallback callback)
callback(offsets);
}
void ColumnSparse::forEachSubcolumnRecursively(ColumnCallback callback)
{
callback(values);
values->forEachSubcolumnRecursively(callback);
callback(offsets);
offsets->forEachSubcolumnRecursively(callback);
}
const IColumn::Offsets & ColumnSparse::getOffsetsData() const
{
return assert_cast<const ColumnUInt64 &>(*offsets).getData();

View File

@ -140,6 +140,7 @@ public:
ColumnPtr compress() const override;
void forEachSubcolumn(ColumnCallback callback) override;
void forEachSubcolumnRecursively(ColumnCallback callback) override;
bool structureEquals(const IColumn & rhs) const override;

View File

@ -501,6 +501,15 @@ void ColumnTuple::forEachSubcolumn(ColumnCallback callback)
callback(column);
}
void ColumnTuple::forEachSubcolumnRecursively(ColumnCallback callback)
{
for (auto & column : columns)
{
callback(column);
column->forEachSubcolumnRecursively(callback);
}
}
bool ColumnTuple::structureEquals(const IColumn & rhs) const
{
if (const auto * rhs_tuple = typeid_cast<const ColumnTuple *>(&rhs))

View File

@ -97,6 +97,7 @@ public:
size_t allocatedBytes() const override;
void protect() override;
void forEachSubcolumn(ColumnCallback callback) override;
void forEachSubcolumnRecursively(ColumnCallback callback) override;
bool structureEquals(const IColumn & rhs) const override;
bool isCollationSupported() const override;
ColumnPtr compress() const override;

View File

@ -1,4 +1,5 @@
#pragma once
#include <Columns/IColumnUnique.h>
#include <Columns/IColumnImpl.h>
#include <Columns/ReverseIndex.h>
@ -7,16 +8,17 @@
#include <Columns/ColumnNullable.h>
#include <Columns/ColumnString.h>
#include <Columns/ColumnFixedString.h>
#include <Columns/ColumnConst.h>
#include <DataTypes/DataTypeNullable.h>
#include <DataTypes/NumberTraits.h>
#include <Common/typeid_cast.h>
#include <Common/assert_cast.h>
#include <base/range.h>
#include <Common/FieldVisitors.h>
#include <base/range.h>
#include <base/unaligned.h>
#include "Columns/ColumnConst.h"
namespace DB
@ -111,6 +113,15 @@ public:
nested_column_nullable = ColumnNullable::create(column_holder, nested_null_mask);
}
void forEachSubcolumnRecursively(IColumn::ColumnCallback callback) override
{
callback(column_holder);
column_holder->forEachSubcolumnRecursively(callback);
reverse_index.setColumn(getRawColumnPtr());
if (is_nullable)
nested_column_nullable = ColumnNullable::create(column_holder, nested_null_mask);
}
bool structureEquals(const IColumn & rhs) const override
{
if (auto rhs_concrete = typeid_cast<const ColumnUnique *>(&rhs))
@ -305,17 +316,52 @@ size_t ColumnUnique<ColumnType>::getNullValueIndex() const
return 0;
}
namespace
{
class FieldVisitorGetData : public StaticVisitor<>
{
public:
StringRef res;
[[noreturn]] static void throwUnsupported()
{
throw Exception("Unsupported field type", ErrorCodes::LOGICAL_ERROR);
}
[[noreturn]] void operator() (const Null &) { throwUnsupported(); }
[[noreturn]] void operator() (const Array &) { throwUnsupported(); }
[[noreturn]] void operator() (const Tuple &) { throwUnsupported(); }
[[noreturn]] void operator() (const Map &) { throwUnsupported(); }
[[noreturn]] void operator() (const Object &) { throwUnsupported(); }
[[noreturn]] void operator() (const AggregateFunctionStateData &) { throwUnsupported(); }
void operator() (const String & x) { res = {x.data(), x.size()}; }
void operator() (const UInt64 & x) { res = {reinterpret_cast<const char *>(&x), sizeof(x)}; }
void operator() (const UInt128 & x) { res = {reinterpret_cast<const char *>(&x), sizeof(x)}; }
void operator() (const UInt256 & x) { res = {reinterpret_cast<const char *>(&x), sizeof(x)}; }
void operator() (const Int64 & x) { res = {reinterpret_cast<const char *>(&x), sizeof(x)}; }
void operator() (const Int128 & x) { res = {reinterpret_cast<const char *>(&x), sizeof(x)}; }
void operator() (const Int256 & x) { res = {reinterpret_cast<const char *>(&x), sizeof(x)}; }
void operator() (const UUID & x) { res = {reinterpret_cast<const char *>(&x), sizeof(x)}; }
void operator() (const Float64 & x) { res = {reinterpret_cast<const char *>(&x), sizeof(x)}; }
void operator() (const DecimalField<Decimal32> & x) { res = {reinterpret_cast<const char *>(&x), sizeof(x)}; }
void operator() (const DecimalField<Decimal64> & x) { res = {reinterpret_cast<const char *>(&x), sizeof(x)}; }
void operator() (const DecimalField<Decimal128> & x) { res = {reinterpret_cast<const char *>(&x), sizeof(x)}; }
void operator() (const DecimalField<Decimal256> & x) { res = {reinterpret_cast<const char *>(&x), sizeof(x)}; }
void operator() (const bool & x) { res = {reinterpret_cast<const char *>(&x), sizeof(x)}; }
};
}
template <typename ColumnType>
size_t ColumnUnique<ColumnType>::uniqueInsert(const Field & x)
{
if (x.isNull())
return getNullValueIndex();
if (valuesHaveFixedSize())
return uniqueInsertData(&x.reinterpret<char>(), size_of_value_if_fixed);
const auto & val = x.get<String>();
return uniqueInsertData(val.data(), val.size());
FieldVisitorGetData visitor;
applyVisitor(visitor, x);
return uniqueInsertData(visitor.res.data, visitor.res.size);
}
template <typename ColumnType>

View File

@ -414,6 +414,9 @@ public:
using ColumnCallback = std::function<void(WrappedPtr&)>;
virtual void forEachSubcolumn(ColumnCallback) {}
/// Similar to forEachSubcolumn but it also do recursive calls.
virtual void forEachSubcolumnRecursively(ColumnCallback) {}
/// Columns have equal structure.
/// If true - you can use "compareAt", "insertFrom", etc. methods.
[[nodiscard]] virtual bool structureEquals(const IColumn &) const

View File

@ -94,21 +94,7 @@ public:
T operator() (const DecimalField<U> & x) const
{
if constexpr (std::is_floating_point_v<T>)
return x.getValue(). template convertTo<T>() / x.getScaleMultiplier(). template convertTo<T>();
else if constexpr (std::is_same_v<T, UInt128>)
{
if constexpr (sizeof(U) < 16)
{
return UInt128(0, (x.getValue() / x.getScaleMultiplier()).value);
}
else if constexpr (sizeof(U) == 16)
{
auto tmp = (x.getValue() / x.getScaleMultiplier()).value;
return UInt128(tmp >> 64, UInt64(tmp));
}
else
throw Exception("No conversion to old UInt128 from " + demangle(typeid(U).name()), ErrorCodes::NOT_IMPLEMENTED);
}
return x.getValue().template convertTo<T>() / x.getScaleMultiplier().template convertTo<T>();
else
return (x.getValue() / x.getScaleMultiplier()). template convertTo<T>();
}
@ -134,4 +120,3 @@ public:
};
}

View File

@ -15,7 +15,7 @@ FieldVisitorSum::FieldVisitorSum(const Field & rhs_) : rhs(rhs_) {}
bool FieldVisitorSum::operator() (Int64 & x) const { return this->operator()(reinterpret_cast<UInt64 &>(x)); }
bool FieldVisitorSum::operator() (UInt64 & x) const
{
x += rhs.reinterpret<UInt64>();
x += applyVisitor(FieldVisitorConvertToNumber<UInt64>(), rhs);
return x != 0;
}

View File

@ -1,6 +1,7 @@
#pragma once
#include <Common/FieldVisitors.h>
#include <Common/FieldVisitorConvertToNumber.h>
namespace DB
@ -41,7 +42,7 @@ public:
requires is_big_int_v<T>
bool operator() (T & x) const
{
x += rhs.reinterpret<T>();
x += applyVisitor(FieldVisitorConvertToNumber<T>(), rhs);
return x != T(0);
}
};

View File

@ -115,7 +115,13 @@ protected:
}
/// Minimum amount of memory to allocate for num_elements, including padding.
static size_t minimum_memory_for_elements(size_t num_elements) { return byte_size(num_elements) + pad_right + pad_left; } /// NOLINT
static size_t minimum_memory_for_elements(size_t num_elements)
{
size_t amount;
if (__builtin_add_overflow(byte_size(num_elements), pad_left + pad_right, &amount))
throw Exception("Amount of memory requested to allocate is more than allowed", ErrorCodes::CANNOT_ALLOCATE_MEMORY);
return amount;
}
void alloc_for_num_elements(size_t num_elements) /// NOLINT
{

View File

@ -301,6 +301,30 @@ The server successfully detected this situation and will download merged part fr
M(DiskS3WriteRequestsThrottling, "Number of 429 and 503 errors in POST, DELETE, PUT and PATCH requests to DiskS3 storage.") \
M(DiskS3WriteRequestsRedirects, "Number of redirects in POST, DELETE, PUT and PATCH requests to DiskS3 storage.") \
\
M(S3DeleteObjects, "Number of S3 API DeleteObject(s) calls.") \
M(S3CopyObject, "Number of S3 API CopyObject calls.") \
M(S3ListObjects, "Number of S3 API ListObjects calls.") \
M(S3HeadObject, "Number of S3 API HeadObject calls.") \
M(S3CreateMultipartUpload, "Number of S3 API CreateMultipartUpload calls.") \
M(S3UploadPartCopy, "Number of S3 API UploadPartCopy calls.") \
M(S3UploadPart, "Number of S3 API UploadPart calls.") \
M(S3AbortMultipartUpload, "Number of S3 API AbortMultipartUpload calls.") \
M(S3CompleteMultipartUpload, "Number of S3 API CompleteMultipartUpload calls.") \
M(S3PutObject, "Number of S3 API PutObject calls.") \
M(S3GetObject, "Number of S3 API GetObject calls.") \
\
M(DiskS3DeleteObjects, "Number of DiskS3 API DeleteObject(s) calls.") \
M(DiskS3CopyObject, "Number of DiskS3 API CopyObject calls.") \
M(DiskS3ListObjects, "Number of DiskS3 API ListObjects calls.") \
M(DiskS3HeadObject, "Number of DiskS3 API HeadObject calls.") \
M(DiskS3CreateMultipartUpload, "Number of DiskS3 API CreateMultipartUpload calls.") \
M(DiskS3UploadPartCopy, "Number of DiskS3 API UploadPartCopy calls.") \
M(DiskS3UploadPart, "Number of DiskS3 API UploadPart calls.") \
M(DiskS3AbortMultipartUpload, "Number of DiskS3 API AbortMultipartUpload calls.") \
M(DiskS3CompleteMultipartUpload, "Number of DiskS3 API CompleteMultipartUpload calls.") \
M(DiskS3PutObject, "Number of DiskS3 API PutObject calls.") \
M(DiskS3GetObject, "Number of DiskS3 API GetObject calls.") \
\
M(ReadBufferFromS3Microseconds, "Time spend in reading from S3.") \
M(ReadBufferFromS3Bytes, "Bytes read from S3.") \
M(ReadBufferFromS3RequestsErrors, "Number of exceptions while reading from S3.") \
@ -411,6 +435,7 @@ The server successfully detected this situation and will download merged part fr
M(OverflowThrow, "Number of times, data processing was cancelled by query complexity limitation with setting '*_overflow_mode' = 'throw' and exception was thrown.") \
M(OverflowAny, "Number of times approximate GROUP BY was in effect: when aggregation was performed only on top of first 'max_rows_to_group_by' unique keys and other keys were ignored due to 'group_by_overflow_mode' = 'any'.") \
namespace ProfileEvents
{

View File

@ -0,0 +1,23 @@
#pragma once
namespace DB
{
enum class SettingConstraintWritability
{
// Default. Setting can be change within specified range only in `readonly=0` or `readonly=2` mode.
WRITABLE,
// Setting cannot be changed at all.
// Either READONLY or CONST keyword in SQL syntax can be used (<readonly/> or <const/> in config.xml) to enable this.
// NOTE: name `CONST` is chosen to avoid confusion with `readonly` setting.
CONST,
// Setting can be changed within specified range, regardless of `readonly` setting value.
CHANGEABLE_IN_READONLY,
MAX
};
}

View File

@ -370,7 +370,7 @@ void KeeperDispatcher::shutdown()
/// Clear all registered sessions
std::lock_guard lock(session_to_response_callback_mutex);
if (hasLeader())
if (server && hasLeader())
{
close_requests.reserve(session_to_response_callback.size());
// send to leader CLOSE requests for active sessions
@ -394,7 +394,7 @@ void KeeperDispatcher::shutdown()
}
// if there is no leader, there is no reason to do CLOSE because it's a write request
if (hasLeader() && !close_requests.empty())
if (server && hasLeader() && !close_requests.empty())
{
LOG_INFO(log, "Trying to close {} session(s)", close_requests.size());
const auto raft_result = server->putRequestBatch(close_requests);

View File

@ -520,7 +520,7 @@ bool KeeperServer::isFollower() const
bool KeeperServer::isLeaderAlive() const
{
return raft_instance->is_leader_alive();
return raft_instance && raft_instance->is_leader_alive();
}
/// TODO test whether taking failed peer in count

View File

@ -425,16 +425,6 @@ public:
bool isNegativeInfinity() const { return which == Types::Null && get<Null>().isNegativeInfinity(); }
bool isPositiveInfinity() const { return which == Types::Null && get<Null>().isPositiveInfinity(); }
template <typename T>
T & reinterpret();
template <typename T>
const T & reinterpret() const
{
auto * mutable_this = const_cast<std::decay_t<decltype(*this)> *>(this);
return mutable_this->reinterpret<T>();
}
template <typename T> bool tryGet(T & result)
{
const Types::Which requested = TypeToEnum<std::decay_t<T>>::value;
@ -552,7 +542,7 @@ public:
case Types::Float64:
{
// Compare as UInt64 so that NaNs compare as equal.
return reinterpret<UInt64>() == rhs.reinterpret<UInt64>();
return std::bit_cast<UInt64>(get<Float64>()) == std::bit_cast<UInt64>(rhs.get<Float64>());
}
case Types::UUID: return get<UUID>() == rhs.get<UUID>();
case Types::String: return get<String>() == rhs.get<String>();
@ -843,30 +833,6 @@ auto & Field::safeGet()
}
template <typename T>
T & Field::reinterpret()
{
assert(which != Types::String); // See specialization for char
using ValueType = std::decay_t<T>;
ValueType * MAY_ALIAS ptr = reinterpret_cast<ValueType *>(&storage);
return *ptr;
}
// Specialize reinterpreting to char (used in ColumnUnique) to make sure Strings are reinterpreted correctly
// inline to avoid multiple definitions
template <>
inline char & Field::reinterpret<char>()
{
if (which == Types::String)
{
// For String we want to return a pointer to the data, not the start of the class
// as the layout of std::string depends on the STD version and options
char * ptr = reinterpret_cast<String *>(&storage)->data();
return *ptr;
}
return *reinterpret_cast<char *>(&storage);
}
template <typename T>
Field::Field(T && rhs, enable_if_not_field_or_bool_or_stringlike_t<T>) //-V730
{

View File

@ -86,6 +86,7 @@ static constexpr UInt64 operator""_GiB(unsigned long long value)
M(UInt64, s3_upload_part_size_multiply_parts_count_threshold, 1000, "Each time this number of parts was uploaded to S3 s3_min_upload_part_size multiplied by s3_upload_part_size_multiply_factor.", 0) \
M(UInt64, s3_max_single_part_upload_size, 32*1024*1024, "The maximum size of object to upload using singlepart upload to S3.", 0) \
M(UInt64, s3_max_single_read_retries, 4, "The maximum number of retries during single S3 read.", 0) \
M(UInt64, s3_max_unexpected_write_error_retries, 4, "The maximum number of retries in case of unexpected errors during S3 write.", 0) \
M(UInt64, s3_max_redirects, 10, "Max number of S3 redirects hops allowed.", 0) \
M(UInt64, s3_max_connections, 1024, "The maximum number of connections per server.", 0) \
M(Bool, s3_truncate_on_insert, false, "Enables or disables truncate before insert in s3 engine tables.", 0) \
@ -353,7 +354,7 @@ static constexpr UInt64 operator""_GiB(unsigned long long value)
M(UInt64, max_ast_elements, 50000, "Maximum size of query syntax tree in number of nodes. Checked after parsing.", 0) \
M(UInt64, max_expanded_ast_elements, 500000, "Maximum size of query syntax tree in number of nodes after expansion of aliases and the asterisk.", 0) \
\
M(UInt64, readonly, 0, "0 - everything is allowed. 1 - only read requests. 2 - only read requests, as well as changing settings, except for the 'readonly' setting.", 0) \
M(UInt64, readonly, 0, "0 - no read-only restrictions. 1 - only read requests, as well as changing explicitly allowed settings. 2 - only read requests, as well as changing settings, except for the 'readonly' setting.", 0) \
\
M(UInt64, max_rows_in_set, 0, "Maximum size of the set (in number of elements) resulting from the execution of the IN section.", 0) \
M(UInt64, max_bytes_in_set, 0, "Maximum size of the set (in bytes in memory) resulting from the execution of the IN section.", 0) \

View File

@ -20,8 +20,13 @@ namespace ErrorCodes
extern const int CANNOT_READ_ALL_DATA;
extern const int CANNOT_READ_ARRAY_FROM_TEXT;
extern const int LOGICAL_ERROR;
extern const int TOO_LARGE_ARRAY_SIZE;
}
static constexpr size_t MAX_ARRAY_SIZE = 1ULL << 30;
static constexpr size_t MAX_ARRAYS_SIZE = 1ULL << 40;
void SerializationArray::serializeBinary(const Field & field, WriteBuffer & ostr) const
{
const Array & a = field.get<const Array &>();
@ -125,7 +130,12 @@ namespace
{
ColumnArray::Offset current_size = 0;
readIntBinary(current_size, istr);
current_offset += current_size;
if (unlikely(current_size > MAX_ARRAY_SIZE))
throw Exception(ErrorCodes::TOO_LARGE_ARRAY_SIZE, "Array size is too large: {}", current_size);
if (unlikely(__builtin_add_overflow(current_offset, current_size, &current_offset)))
throw Exception(ErrorCodes::TOO_LARGE_ARRAY_SIZE, "Deserialization of array offsets will lead to overflow");
offset_values[i] = current_offset;
++i;
}
@ -348,6 +358,9 @@ void SerializationArray::deserializeBinaryBulkWithMultipleStreams(
throw Exception("Nested column is longer than last offset", ErrorCodes::LOGICAL_ERROR);
size_t nested_limit = last_offset - nested_column->size();
if (unlikely(nested_limit > MAX_ARRAYS_SIZE))
throw Exception(ErrorCodes::TOO_LARGE_ARRAY_SIZE, "Array sizes are too large: {}", nested_limit);
/// Adjust value size hint. Divide it to the average array size.
settings.avg_value_size_hint = nested_limit ? settings.avg_value_size_hint / nested_limit * offset_values.size() : 0;

View File

@ -24,6 +24,8 @@ namespace ErrorCodes
extern const int TOO_LARGE_STRING_SIZE;
}
static constexpr size_t MAX_STRINGS_SIZE = 1ULL << 30;
void SerializationFixedString::serializeBinary(const Field & field, WriteBuffer & ostr) const
{
const String & s = field.get<const String &>();
@ -85,8 +87,17 @@ void SerializationFixedString::deserializeBinaryBulk(IColumn & column, ReadBuffe
ColumnFixedString::Chars & data = typeid_cast<ColumnFixedString &>(column).getChars();
size_t initial_size = data.size();
size_t max_bytes = limit * n;
data.resize(initial_size + max_bytes);
size_t max_bytes;
size_t new_data_size;
if (unlikely(__builtin_mul_overflow(limit, n, &max_bytes)))
throw Exception(ErrorCodes::TOO_LARGE_STRING_SIZE, "Deserializing FixedString will lead to overflow");
if (unlikely(max_bytes > MAX_STRINGS_SIZE))
throw Exception(ErrorCodes::TOO_LARGE_STRING_SIZE, "Too large sizes of FixedString to deserialize: {}", max_bytes);
if (unlikely(__builtin_add_overflow(initial_size, max_bytes, &new_data_size)))
throw Exception(ErrorCodes::TOO_LARGE_STRING_SIZE, "Deserializing FixedString will lead to overflow");
data.resize(new_data_size);
size_t read_bytes = istr.readBig(reinterpret_cast<char *>(&data[initial_size]), max_bytes);
if (read_bytes % n != 0)

View File

@ -17,6 +17,7 @@
#include <emmintrin.h>
#endif
namespace DB
{

View File

@ -185,6 +185,7 @@ void DatabaseOnDisk::createTable(
if (create.attach_short_syntax)
{
/// Metadata already exists, table was detached
assert(fs::exists(getObjectMetadataPath(table_name)));
removeDetachedPermanentlyFlag(local_context, table_name, table_metadata_path, true);
attachTable(local_context, table_name, table, getTableDataPath(create));
return;

View File

@ -1259,4 +1259,24 @@ void DatabaseReplicated::createTableRestoredFromBackup(
}
}
bool DatabaseReplicated::shouldReplicateQuery(const ContextPtr & query_context, const ASTPtr & query_ptr) const
{
if (query_context->getClientInfo().is_replicated_database_internal)
return false;
/// Some ALTERs are not replicated on database level
if (const auto * alter = query_ptr->as<const ASTAlterQuery>())
{
return !alter->isAttachAlter() && !alter->isFetchAlter() && !alter->isDropPartitionAlter();
}
/// DROP DATABASE is not replicated
if (const auto * drop = query_ptr->as<const ASTDropQuery>())
{
return drop->table.get();
}
return true;
}
}

View File

@ -46,7 +46,7 @@ public:
/// Try to execute DLL query on current host as initial query. If query is succeed,
/// then it will be executed on all replicas.
BlockIO tryEnqueueReplicatedDDL(const ASTPtr & query, ContextPtr query_context, bool internal = false);
BlockIO tryEnqueueReplicatedDDL(const ASTPtr & query, ContextPtr query_context, bool internal) override;
bool hasReplicationThread() const override { return true; }
@ -75,6 +75,8 @@ public:
std::vector<std::pair<ASTPtr, StoragePtr>> getTablesForBackup(const FilterByNameFunction & filter, const ContextPtr & local_context) const override;
void createTableRestoredFromBackup(const ASTPtr & create_table_query, ContextMutablePtr local_context, std::shared_ptr<IRestoreCoordination> restore_coordination, UInt64 timeout_ms) override;
bool shouldReplicateQuery(const ContextPtr & query_context, const ASTPtr & query_ptr) const override;
friend struct DatabaseReplicatedTask;
friend class DatabaseReplicatedDDLWorker;
private:

View File

@ -8,6 +8,7 @@
#include <base/types.h>
#include <Common/Exception.h>
#include <Common/ThreadPool.h>
#include <QueryPipeline/BlockIO.h>
#include <ctime>
#include <functional>
@ -338,6 +339,13 @@ public:
throw Exception(ErrorCodes::LOGICAL_ERROR, "Database engine {} does not run a replication thread!", getEngineName());
}
virtual bool shouldReplicateQuery(const ContextPtr & /*query_context*/, const ASTPtr & /*query_ptr*/) const { return false; }
virtual BlockIO tryEnqueueReplicatedDDL(const ASTPtr & /*query*/, ContextPtr /*query_context*/, [[maybe_unused]] bool internal = false) /// NOLINT
{
throw Exception(ErrorCodes::LOGICAL_ERROR, "Database engine {} does not have replicated DDL queue", getEngineName());
}
/// Returns CREATE TABLE queries and corresponding tables prepared for writing to a backup.
virtual std::vector<std::pair<ASTPtr, StoragePtr>> getTablesForBackup(const FilterByNameFunction & filter, const ContextPtr & context) const;

View File

@ -71,6 +71,7 @@ public:
virtual const String & getName() const = 0;
/// Reserve the specified number of bytes.
/// Returns valid reservation or nullptr when failure.
virtual ReservationPtr reserve(UInt64 bytes) = 0;
virtual ~Space() = default;

View File

@ -11,12 +11,16 @@ namespace ProfileEvents
{
extern const Event CachedWriteBufferCacheWriteBytes;
extern const Event CachedWriteBufferCacheWriteMicroseconds;
extern const Event FileSegmentWriteMicroseconds;
}
namespace DB
{
namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
}
namespace
{
class SwapHelper
@ -31,6 +35,178 @@ namespace
};
}
FileSegmentRangeWriter::FileSegmentRangeWriter(
FileCache * cache_,
const FileSegment::Key & key_,
std::shared_ptr<FilesystemCacheLog> cache_log_,
const String & query_id_,
const String & source_path_)
: cache(cache_)
, key(key_)
, cache_log(cache_log_)
, query_id(query_id_)
, source_path(source_path_)
, current_file_segment_it(file_segments_holder.file_segments.end())
{
}
bool FileSegmentRangeWriter::write(const char * data, size_t size, size_t offset, bool is_persistent)
{
if (finalized)
return false;
auto & file_segments = file_segments_holder.file_segments;
if (current_file_segment_it == file_segments.end())
{
current_file_segment_it = allocateFileSegment(current_file_segment_write_offset, is_persistent);
}
else
{
auto file_segment = *current_file_segment_it;
assert(file_segment->getCurrentWriteOffset() == current_file_segment_write_offset);
if (current_file_segment_write_offset != offset)
{
throw Exception(
ErrorCodes::LOGICAL_ERROR,
"Cannot write file segment at offset {}, because current write offset is: {}",
offset, current_file_segment_write_offset);
}
if (file_segment->range().size() == file_segment->getDownloadedSize())
{
completeFileSegment(*file_segment);
current_file_segment_it = allocateFileSegment(current_file_segment_write_offset, is_persistent);
}
}
auto & file_segment = *current_file_segment_it;
auto downloader = file_segment->getOrSetDownloader();
if (downloader != FileSegment::getCallerId())
throw Exception(ErrorCodes::LOGICAL_ERROR, "Failed to set a downloader. ({})", file_segment->getInfoForLog());
SCOPE_EXIT({
if (file_segment->isDownloader())
file_segment->completePartAndResetDownloader();
});
bool reserved = file_segment->reserve(size);
if (!reserved)
{
file_segment->completeWithState(FileSegment::State::PARTIALLY_DOWNLOADED_NO_CONTINUATION);
appendFilesystemCacheLog(*file_segment);
LOG_DEBUG(
&Poco::Logger::get("FileSegmentRangeWriter"),
"Unsuccessful space reservation attempt (size: {}, file segment info: {}",
size, file_segment->getInfoForLog());
return false;
}
try
{
file_segment->write(data, size, offset);
}
catch (...)
{
file_segment->completePartAndResetDownloader();
throw;
}
file_segment->completePartAndResetDownloader();
current_file_segment_write_offset += size;
return true;
}
void FileSegmentRangeWriter::finalize()
{
if (finalized)
return;
auto & file_segments = file_segments_holder.file_segments;
if (file_segments.empty() || current_file_segment_it == file_segments.end())
return;
completeFileSegment(**current_file_segment_it);
finalized = true;
}
FileSegmentRangeWriter::~FileSegmentRangeWriter()
{
try
{
if (!finalized)
finalize();
}
catch (...)
{
tryLogCurrentException(__PRETTY_FUNCTION__);
}
}
FileSegments::iterator FileSegmentRangeWriter::allocateFileSegment(size_t offset, bool is_persistent)
{
/**
* Allocate a new file segment starting `offset`.
* File segment capacity will equal `max_file_segment_size`, but actual size is 0.
*/
std::lock_guard cache_lock(cache->mutex);
CreateFileSegmentSettings create_settings
{
.is_persistent = is_persistent,
};
/// We set max_file_segment_size to be downloaded,
/// if we have less size to write, file segment will be resized in complete() method.
auto file_segment = cache->createFileSegmentForDownload(
key, offset, cache->max_file_segment_size, create_settings, cache_lock);
return file_segments_holder.add(std::move(file_segment));
}
void FileSegmentRangeWriter::appendFilesystemCacheLog(const FileSegment & file_segment)
{
if (cache_log)
{
auto file_segment_range = file_segment.range();
size_t file_segment_right_bound = file_segment_range.left + file_segment.getDownloadedSize() - 1;
FilesystemCacheLogElement elem
{
.event_time = std::chrono::system_clock::to_time_t(std::chrono::system_clock::now()),
.query_id = query_id,
.source_file_path = source_path,
.file_segment_range = { file_segment_range.left, file_segment_right_bound },
.requested_range = {},
.cache_type = FilesystemCacheLogElement::CacheType::WRITE_THROUGH_CACHE,
.file_segment_size = file_segment_range.size(),
.read_from_cache_attempted = false,
.read_buffer_id = {},
.profile_counters = nullptr,
};
cache_log->add(elem);
}
}
void FileSegmentRangeWriter::completeFileSegment(FileSegment & file_segment)
{
/// File segment can be detached if space reservation failed.
if (file_segment.isDetached())
return;
file_segment.completeWithoutState();
appendFilesystemCacheLog(file_segment);
}
CachedOnDiskWriteBufferFromFile::CachedOnDiskWriteBufferFromFile(
std::unique_ptr<WriteBuffer> impl_,
FileCachePtr cache_,
@ -47,7 +223,6 @@ CachedOnDiskWriteBufferFromFile::CachedOnDiskWriteBufferFromFile(
, is_persistent_cache_file(is_persistent_cache_file_)
, query_id(query_id_)
, enable_cache_log(!query_id_.empty() && settings_.enable_filesystem_cache_log)
, cache_log(Context::getGlobalContextInstance()->getFilesystemCacheLog())
{
}
@ -82,8 +257,11 @@ void CachedOnDiskWriteBufferFromFile::cacheData(char * data, size_t size)
if (!cache_writer)
{
cache_writer = std::make_unique<FileSegmentRangeWriter>(
cache.get(), key, [this](const FileSegment & file_segment) { appendFilesystemCacheLog(file_segment); });
std::shared_ptr<FilesystemCacheLog> cache_log;
if (enable_cache_log)
cache_log = Context::getGlobalContextInstance()->getFilesystemCacheLog();
cache_writer = std::make_unique<FileSegmentRangeWriter>(cache.get(), key, cache_log, query_id, source_path);
}
Stopwatch watch(CLOCK_MONOTONIC);
@ -119,37 +297,9 @@ void CachedOnDiskWriteBufferFromFile::cacheData(char * data, size_t size)
ProfileEvents::increment(ProfileEvents::CachedWriteBufferCacheWriteBytes, size);
ProfileEvents::increment(ProfileEvents::CachedWriteBufferCacheWriteMicroseconds, watch.elapsedMicroseconds());
current_file_segment_counters.increment(
ProfileEvents::FileSegmentWriteMicroseconds, watch.elapsedMicroseconds());
cache_in_error_state_or_disabled = false;
}
void CachedOnDiskWriteBufferFromFile::appendFilesystemCacheLog(const FileSegment & file_segment)
{
if (cache_log)
{
auto file_segment_range = file_segment.range();
FilesystemCacheLogElement elem
{
.event_time = std::chrono::system_clock::to_time_t(std::chrono::system_clock::now()),
.query_id = query_id,
.source_file_path = source_path,
.file_segment_range = { file_segment_range.left, file_segment_range.right },
.requested_range = {},
.cache_type = FilesystemCacheLogElement::CacheType::WRITE_THROUGH_CACHE,
.file_segment_size = file_segment_range.size(),
.read_from_cache_attempted = false,
.read_buffer_id = {},
.profile_counters = std::make_shared<ProfileEvents::Counters::Snapshot>(current_file_segment_counters.getPartiallyAtomicSnapshot()),
};
current_file_segment_counters.reset();
cache_log->add(elem);
}
}
void CachedOnDiskWriteBufferFromFile::finalizeImpl()
{
try

View File

@ -13,11 +13,57 @@ class Logger;
namespace DB
{
/**
* We want to write eventually some size, which is not known until the very end.
* Therefore we allocate file segments lazily. Each file segment is assigned capacity
* of max_file_segment_size, but reserved_size remains 0, until call to tryReserve().
* Once current file segment is full (reached max_file_segment_size), we allocate a
* new file segment. All allocated file segments resize in file segments holder.
* If at the end of all writes, the last file segment is not full, then it is resized.
*/
class FileSegmentRangeWriter
{
public:
FileSegmentRangeWriter(
FileCache * cache_, const FileSegment::Key & key_,
std::shared_ptr<FilesystemCacheLog> cache_log_, const String & query_id_, const String & source_path_);
/**
* Write a range of file segments. Allocate file segment of `max_file_segment_size` and write to
* it until it is full and then allocate next file segment.
*/
bool write(const char * data, size_t size, size_t offset, bool is_persistent);
void finalize();
~FileSegmentRangeWriter();
private:
FileSegments::iterator allocateFileSegment(size_t offset, bool is_persistent);
void appendFilesystemCacheLog(const FileSegment & file_segment);
void completeFileSegment(FileSegment & file_segment);
FileCache * cache;
FileSegment::Key key;
std::shared_ptr<FilesystemCacheLog> cache_log;
String query_id;
String source_path;
FileSegmentsHolder file_segments_holder{};
FileSegments::iterator current_file_segment_it;
size_t current_file_segment_write_offset = 0;
bool finalized = false;
};
/**
* Write buffer for filesystem caching on write operations.
*/
class FileSegmentRangeWriter;
class CachedOnDiskWriteBufferFromFile final : public WriteBufferFromFileDecorator
{
public:
@ -36,7 +82,6 @@ public:
private:
void cacheData(char * data, size_t size);
void appendFilesystemCacheLog(const FileSegment & file_segment);
Poco::Logger * log;
@ -49,11 +94,9 @@ private:
const String query_id;
bool enable_cache_log;
std::shared_ptr<FilesystemCacheLog> cache_log;
bool cache_in_error_state_or_disabled = false;
ProfileEvents::Counters current_file_segment_counters;
std::unique_ptr<FileSegmentRangeWriter> cache_writer;
};

View File

@ -108,8 +108,19 @@ std::future<IAsynchronousReader::Result> ThreadPoolReader::submit(Request reques
if (has_pread_nowait_support.load(std::memory_order_relaxed))
{
/// It reports real time spent including the time spent while thread was preempted doing nothing.
/// And it is Ok for the purpose of this watch (it is used to lower the number of threads to read from tables).
/// Sometimes it is better to use taskstats::blkio_delay_total, but it is quite expensive to get it
/// (TaskStatsInfoGetter has about 500K RPS).
Stopwatch watch(CLOCK_MONOTONIC);
SCOPE_EXIT({
watch.stop();
ProfileEvents::increment(ProfileEvents::ThreadPoolReaderPageCacheHitElapsedMicroseconds, watch.elapsedMicroseconds());
ProfileEvents::increment(ProfileEvents::DiskReadElapsedMicroseconds, watch.elapsedMicroseconds());
});
std::promise<Result> promise;
std::future<Result> future = promise.get_future();
@ -135,11 +146,6 @@ std::future<IAsynchronousReader::Result> ThreadPoolReader::submit(Request reques
{
/// The file has ended.
promise.set_value({0, 0});
watch.stop();
ProfileEvents::increment(ProfileEvents::ThreadPoolReaderPageCacheHitElapsedMicroseconds, watch.elapsedMicroseconds());
ProfileEvents::increment(ProfileEvents::DiskReadElapsedMicroseconds, watch.elapsedMicroseconds());
return future;
}
@ -179,18 +185,10 @@ std::future<IAsynchronousReader::Result> ThreadPoolReader::submit(Request reques
if (bytes_read)
{
/// It reports real time spent including the time spent while thread was preempted doing nothing.
/// And it is Ok for the purpose of this watch (it is used to lower the number of threads to read from tables).
/// Sometimes it is better to use taskstats::blkio_delay_total, but it is quite expensive to get it
/// (TaskStatsInfoGetter has about 500K RPS).
watch.stop();
/// Read successfully from page cache.
ProfileEvents::increment(ProfileEvents::ThreadPoolReaderPageCacheHit);
ProfileEvents::increment(ProfileEvents::ThreadPoolReaderPageCacheHitBytes, bytes_read);
ProfileEvents::increment(ProfileEvents::ReadBufferFromFileDescriptorReadBytes, bytes_read);
ProfileEvents::increment(ProfileEvents::ThreadPoolReaderPageCacheHitElapsedMicroseconds, watch.elapsedMicroseconds());
ProfileEvents::increment(ProfileEvents::DiskReadElapsedMicroseconds, watch.elapsedMicroseconds());
promise.set_value({bytes_read, request.ignore});
return future;
@ -226,6 +224,12 @@ std::future<IAsynchronousReader::Result> ThreadPoolReader::submit(Request reques
setThreadName("ThreadPoolRead");
Stopwatch watch(CLOCK_MONOTONIC);
SCOPE_EXIT({
watch.stop();
ProfileEvents::increment(ProfileEvents::ThreadPoolReaderPageCacheMissElapsedMicroseconds, watch.elapsedMicroseconds());
ProfileEvents::increment(ProfileEvents::DiskReadElapsedMicroseconds, watch.elapsedMicroseconds());
});
size_t bytes_read = 0;
while (!bytes_read)
@ -254,8 +258,6 @@ std::future<IAsynchronousReader::Result> ThreadPoolReader::submit(Request reques
ProfileEvents::increment(ProfileEvents::ThreadPoolReaderPageCacheMissBytes, bytes_read);
ProfileEvents::increment(ProfileEvents::ReadBufferFromFileDescriptorReadBytes, bytes_read);
ProfileEvents::increment(ProfileEvents::ThreadPoolReaderPageCacheMissElapsedMicroseconds, watch.elapsedMicroseconds());
ProfileEvents::increment(ProfileEvents::DiskReadElapsedMicroseconds, watch.elapsedMicroseconds());
return Result{ .size = bytes_read, .offset = request.ignore };
});

Some files were not shown because too many files have changed in this diff Show More