Merge remote-tracking branch 'upstream/master' into rename-some-cache-commands

This commit is contained in:
kssenii 2022-09-20 13:02:18 +02:00
commit b8079e4577
175 changed files with 2720 additions and 1202 deletions

View File

@ -46,7 +46,7 @@ RUN apt-get install binutils-riscv64-linux-gnu
# Architecture of the image when BuildKit/buildx is used
ARG TARGETARCH
ARG NFPM_VERSION=2.16.0
ARG NFPM_VERSION=2.18.1
RUN arch=${TARGETARCH:-amd64} \
&& curl -Lo /tmp/nfpm.deb "https://github.com/goreleaser/nfpm/releases/download/v${NFPM_VERSION}/nfpm_${arch}.deb" \

View File

@ -179,17 +179,17 @@ pigz < /var/log/clickhouse-server/clickhouse-server.log > /test_output/clickhous
# for files >64MB, we want this files to be compressed explicitly
for table in query_log zookeeper_log trace_log transactions_info_log
do
clickhouse-local --path /var/lib/clickhouse/ -q "select * from system.$table format TSVWithNamesAndTypes" | pigz > /test_output/$table.tsv.gz ||:
clickhouse-local --path /var/lib/clickhouse/ --only-system-tables -q "select * from system.$table format TSVWithNamesAndTypes" | pigz > /test_output/$table.tsv.gz ||:
if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then
clickhouse-local --path /var/lib/clickhouse1/ -q "select * from system.$table format TSVWithNamesAndTypes" | pigz > /test_output/$table.1.tsv.gz ||:
clickhouse-local --path /var/lib/clickhouse2/ -q "select * from system.$table format TSVWithNamesAndTypes" | pigz > /test_output/$table.2.tsv.gz ||:
clickhouse-local --path /var/lib/clickhouse1/ --only-system-tables -q "select * from system.$table format TSVWithNamesAndTypes" | pigz > /test_output/$table.1.tsv.gz ||:
clickhouse-local --path /var/lib/clickhouse2/ --only-system-tables -q "select * from system.$table format TSVWithNamesAndTypes" | pigz > /test_output/$table.2.tsv.gz ||:
fi
done
# Also export trace log in flamegraph-friendly format.
for trace_type in CPU Memory Real
do
clickhouse-local --path /var/lib/clickhouse/ -q "
clickhouse-local --path /var/lib/clickhouse/ --only-system-tables -q "
select
arrayStringConcat((arrayMap(x -> concat(splitByChar('/', addressToLine(x))[-1], '#', demangle(addressToSymbol(x)) ), trace)), ';') AS stack,
count(*) AS samples

View File

@ -0,0 +1,34 @@
---
sidebar_position: 1
sidebar_label: 2022
---
# 2022 Changelog
### ClickHouse release v22.6.8.35-stable (b91dc59a565) FIXME as compared to v22.6.7.7-stable (8eae2af3b9a)
#### New Feature
* Backported in [#40868](https://github.com/ClickHouse/ClickHouse/issues/40868): Add setting to disable limit on kafka_num_consumers. Closes [#40331](https://github.com/ClickHouse/ClickHouse/issues/40331). [#40670](https://github.com/ClickHouse/ClickHouse/pull/40670) ([Kruglov Pavel](https://github.com/Avogar)).
#### Bug Fix
* Backported in [#41274](https://github.com/ClickHouse/ClickHouse/issues/41274): Fix memory safety issues with functions `encrypt` and `contingency` if Array of Nullable is used as an argument. This fixes [#41004](https://github.com/ClickHouse/ClickHouse/issues/41004). [#40195](https://github.com/ClickHouse/ClickHouse/pull/40195) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
#### Bug Fix (user-visible misbehavior in official stable or prestable release)
* Backported in [#41282](https://github.com/ClickHouse/ClickHouse/issues/41282): Fix unused unknown columns introduced by WITH statement. This fixes [#37812](https://github.com/ClickHouse/ClickHouse/issues/37812) . [#39131](https://github.com/ClickHouse/ClickHouse/pull/39131) ([Amos Bird](https://github.com/amosbird)).
* Backported in [#40905](https://github.com/ClickHouse/ClickHouse/issues/40905): Fix potential deadlock in WriteBufferFromS3 during task scheduling failure. [#40070](https://github.com/ClickHouse/ClickHouse/pull/40070) ([Maksim Kita](https://github.com/kitaisreal)).
* Backported in [#40864](https://github.com/ClickHouse/ClickHouse/issues/40864): - Fix crash while parsing values of type `Object` that contains arrays of variadic dimension. [#40483](https://github.com/ClickHouse/ClickHouse/pull/40483) ([Duc Canh Le](https://github.com/canhld94)).
* Backported in [#40803](https://github.com/ClickHouse/ClickHouse/issues/40803): During insertion of a new query to the `ProcessList` allocations happen. If we reach the memory limit during these allocations we can not use `OvercommitTracker`, because `ProcessList::mutex` is already acquired. Fixes [#40611](https://github.com/ClickHouse/ClickHouse/issues/40611). [#40677](https://github.com/ClickHouse/ClickHouse/pull/40677) ([Dmitry Novik](https://github.com/novikd)).
* Backported in [#40891](https://github.com/ClickHouse/ClickHouse/issues/40891): Fix memory leak while pushing to MVs w/o query context (from Kafka/...). [#40732](https://github.com/ClickHouse/ClickHouse/pull/40732) ([Azat Khuzhin](https://github.com/azat)).
* Backported in [#41133](https://github.com/ClickHouse/ClickHouse/issues/41133): Fix access rights for `DESCRIBE TABLE url()` and some other `DESCRIBE TABLE <table_function>()`. [#40975](https://github.com/ClickHouse/ClickHouse/pull/40975) ([Vitaly Baranov](https://github.com/vitlibar)).
* Backported in [#41360](https://github.com/ClickHouse/ClickHouse/issues/41360): Fix incorrect logical error `Expected relative path` in disk object storage. Related to [#41246](https://github.com/ClickHouse/ClickHouse/issues/41246). [#41297](https://github.com/ClickHouse/ClickHouse/pull/41297) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Backported in [#41357](https://github.com/ClickHouse/ClickHouse/issues/41357): Add column type check before UUID insertion in MsgPack format. [#41309](https://github.com/ClickHouse/ClickHouse/pull/41309) ([Kruglov Pavel](https://github.com/Avogar)).
#### NOT FOR CHANGELOG / INSIGNIFICANT
* use ROBOT_CLICKHOUSE_COMMIT_TOKEN for create-pull-request [#40067](https://github.com/ClickHouse/ClickHouse/pull/40067) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
* use input token instead of env var [#40421](https://github.com/ClickHouse/ClickHouse/pull/40421) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
* Migrate artifactory [#40831](https://github.com/ClickHouse/ClickHouse/pull/40831) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Docker server version [#41256](https://github.com/ClickHouse/ClickHouse/pull/41256) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Increase open files limit [#41345](https://github.com/ClickHouse/ClickHouse/pull/41345) ([Eugene Konkov](https://github.com/ekonkov)).

View File

@ -26,13 +26,33 @@ The constraints are defined as the following:
<setting_name_4>
<readonly/>
</setting_name_4>
<setting_name_5>
<min>lower_boundary</min>
<max>upper_boundary</max>
<changeable_in_readonly/>
</setting_name_5>
</constraints>
</user_name>
</profiles>
```
If the user tries to violate the constraints an exception is thrown and the setting isnt changed.
There are supported three types of constraints: `min`, `max`, `readonly`. The `min` and `max` constraints specify upper and lower boundaries for a numeric setting and can be used in combination. The `readonly` constraint specifies that the user cannot change the corresponding setting at all.
There are supported few types of constraints: `min`, `max`, `readonly` (with alias `const`) and `changeable_in_readonly`. The `min` and `max` constraints specify upper and lower boundaries for a numeric setting and can be used in combination. The `readonly` or `const` constraint specifies that the user cannot change the corresponding setting at all. The `changeable_in_readonly` constraint type allows user to change the setting within `min`/`max` range even if `readonly` setting is set to 1, otherwise settings are not allow to be changed in `readonly=1` mode. Note that `changeable_in_readonly` is supported only if `settings_constraints_replace_previous` is enabled:
``` xml
<access_control_improvements>
<settings_constraints_replace_previous>true<settings_constraints_replace_previous>
</access_control_improvements>
```
If there are multiple profiles active for a user, then constraints are merged. Merge process depends on `settings_constraints_replace_previous`:
- **true** (recommended): constraints for the same setting are replaced during merge, such that the last constraint is used and all previous are ignored including fields that are not set in new constraint.
- **false** (default): constraints for the same setting are merged in a way that every not set type of constraint is taken from previous profile and every set type of constraint is replaced by value from new profile.
Read-only mode is enabled by `readonly` setting (not to confuse with `readonly` constraint type):
- `readonly=0`: No read-only restrictions.
- `readonly=1`: Only read queries are allowed and settings cannot be changes unless `changeable_in_readonly` is set.
- `readonly=2`: Only read queries are allowed, but settings can be changed, except for `readonly` setting itself.
**Example:** Let `users.xml` includes lines:

View File

@ -37,8 +37,7 @@ After setting `readonly = 1`, the user cant change `readonly` and `allow_ddl`
When using the `GET` method in the [HTTP interface](../../interfaces/http.md), `readonly = 1` is set automatically. To modify data, use the `POST` method.
Setting `readonly = 1` prohibit the user from changing all the settings. There is a way to prohibit the user
from changing only specific settings, for details see [constraints on settings](../../operations/settings/constraints-on-settings.md).
Setting `readonly = 1` prohibit the user from changing all the settings. There is a way to prohibit the user from changing only specific settings. Also there is a way to allow changing only specific settings under `readonly = 1` restrictions. For details see [constraints on settings](../../operations/settings/constraints-on-settings.md).
Default value: 0

View File

@ -3433,7 +3433,7 @@ Possible values:
- 0 — Disabled.
- 1 — Enabled.
Default value: 0.
Default value: 1.
## input_format_with_names_use_header {#input_format_with_names_use_header}

View File

@ -13,5 +13,5 @@ Syntax:
``` sql
ALTER ROLE [IF EXISTS] name1 [ON CLUSTER cluster_name1] [RENAME TO new_name1]
[, name2 [ON CLUSTER cluster_name2] [RENAME TO new_name2] ...]
[SETTINGS variable [= value] [MIN [=] min_value] [MAX [=] max_value] [READONLY|WRITABLE] | PROFILE 'profile_name'] [,...]
[SETTINGS variable [= value] [MIN [=] min_value] [MAX [=] max_value] [CONST|READONLY|WRITABLE|CHANGEABLE_IN_READONLY] | PROFILE 'profile_name'] [,...]
```

View File

@ -13,5 +13,5 @@ Syntax:
``` sql
ALTER SETTINGS PROFILE [IF EXISTS] TO name1 [ON CLUSTER cluster_name1] [RENAME TO new_name1]
[, name2 [ON CLUSTER cluster_name2] [RENAME TO new_name2] ...]
[SETTINGS variable [= value] [MIN [=] min_value] [MAX [=] max_value] [READONLY|WRITABLE] | INHERIT 'profile_name'] [,...]
[SETTINGS variable [= value] [MIN [=] min_value] [MAX [=] max_value] [CONST|READONLY|WRITABLE|CHANGEABLE_IN_READONLY] | INHERIT 'profile_name'] [,...]
```

View File

@ -11,7 +11,7 @@ Syntax:
``` sql
CREATE ROLE [IF NOT EXISTS | OR REPLACE] name1 [, name2 ...]
[SETTINGS variable [= value] [MIN [=] min_value] [MAX [=] max_value] [READONLY|WRITABLE] | PROFILE 'profile_name'] [,...]
[SETTINGS variable [= value] [MIN [=] min_value] [MAX [=] max_value] [CONST|READONLY|WRITABLE|CHANGEABLE_IN_READONLY] | PROFILE 'profile_name'] [,...]
```
## Managing Roles

View File

@ -12,7 +12,7 @@ Syntax:
``` sql
CREATE SETTINGS PROFILE [IF NOT EXISTS | OR REPLACE] TO name1 [ON CLUSTER cluster_name1]
[, name2 [ON CLUSTER cluster_name2] ...]
[SETTINGS variable [= value] [MIN [=] min_value] [MAX [=] max_value] [READONLY|WRITABLE] | INHERIT 'profile_name'] [,...]
[SETTINGS variable [= value] [MIN [=] min_value] [MAX [=] max_value] [CONST|READONLY|WRITABLE|CHANGEABLE_IN_READONLY] | INHERIT 'profile_name'] [,...]
```
`ON CLUSTER` clause allows creating settings profiles on a cluster, see [Distributed DDL](../../../sql-reference/distributed-ddl.md).

View File

@ -13,6 +13,6 @@ sidebar_label: ROLE
``` sql
ALTER ROLE [IF EXISTS] name1 [ON CLUSTER cluster_name1] [RENAME TO new_name1]
[, name2 [ON CLUSTER cluster_name2] [RENAME TO new_name2] ...]
[SETTINGS variable [= value] [MIN [=] min_value] [MAX [=] max_value] [READONLY|WRITABLE] | PROFILE 'profile_name'] [,...]
[SETTINGS variable [= value] [MIN [=] min_value] [MAX [=] max_value] [CONST|READONLY|WRITABLE|CHANGEABLE_IN_READONLY] | PROFILE 'profile_name'] [,...]
```

View File

@ -13,6 +13,6 @@ sidebar_label: SETTINGS PROFILE
``` sql
ALTER SETTINGS PROFILE [IF EXISTS] TO name1 [ON CLUSTER cluster_name1] [RENAME TO new_name1]
[, name2 [ON CLUSTER cluster_name2] [RENAME TO new_name2] ...]
[SETTINGS variable [= value] [MIN [=] min_value] [MAX [=] max_value] [READONLY|WRITABLE] | INHERIT 'profile_name'] [,...]
[SETTINGS variable [= value] [MIN [=] min_value] [MAX [=] max_value] [CONST|READONLY|WRITABLE|CHANGEABLE_IN_READONLY] | INHERIT 'profile_name'] [,...]
```

View File

@ -12,7 +12,7 @@ sidebar_label: "Роль"
```sql
CREATE ROLE [IF NOT EXISTS | OR REPLACE] name1 [, name2 ...]
[SETTINGS variable [= value] [MIN [=] min_value] [MAX [=] max_value] [READONLY|WRITABLE] | PROFILE 'profile_name'] [,...]
[SETTINGS variable [= value] [MIN [=] min_value] [MAX [=] max_value] [CONST|READONLY|WRITABLE|CHANGEABLE_IN_READONLY] | PROFILE 'profile_name'] [,...]
```
## Управление ролями {#managing-roles}

View File

@ -13,7 +13,7 @@ sidebar_label: "Профиль настроек"
``` sql
CREATE SETTINGS PROFILE [IF NOT EXISTS | OR REPLACE] TO name1 [ON CLUSTER cluster_name1]
[, name2 [ON CLUSTER cluster_name2] ...]
[SETTINGS variable [= value] [MIN [=] min_value] [MAX [=] max_value] [READONLY|WRITABLE] | INHERIT 'profile_name'] [,...]
[SETTINGS variable [= value] [MIN [=] min_value] [MAX [=] max_value] [CONST|READONLY|WRITABLE|CHANGEABLE_IN_READONLY] | INHERIT 'profile_name'] [,...]
```
Секция `ON CLUSTER` позволяет создавать профили на кластере, см. [Распределенные DDL запросы](../../../sql-reference/distributed-ddl.md).

View File

@ -500,7 +500,7 @@ ALTER USER [IF EXISTS] name [ON CLUSTER cluster_name]
[IDENTIFIED [WITH {PLAINTEXT_PASSWORD|SHA256_PASSWORD|DOUBLE_SHA1_PASSWORD}] BY {'password'|'hash'}]
[[ADD|DROP] HOST {LOCAL | NAME 'name' | REGEXP 'name_regexp' | IP 'address' | LIKE 'pattern'} [,...] | ANY | NONE]
[DEFAULT ROLE role [,...] | ALL | ALL EXCEPT role [,...] ]
[SETTINGS variable [= value] [MIN [=] min_value] [MAX [=] max_value] [READONLY|WRITABLE] | PROFILE 'profile_name'] [,...]
[SETTINGS variable [= value] [MIN [=] min_value] [MAX [=] max_value] [CONST|READONLY|WRITABLE|CHANGEABLE_IN_READONLY] | PROFILE 'profile_name'] [,...]
```
### 说明 {#alter-user-dscr}
@ -540,7 +540,7 @@ ALTER USER user DEFAULT ROLE ALL EXCEPT role1, role2
``` sql
ALTER ROLE [IF EXISTS] name [ON CLUSTER cluster_name]
[RENAME TO new_name]
[SETTINGS variable [= value] [MIN [=] min_value] [MAX [=] max_value] [READONLY|WRITABLE] | PROFILE 'profile_name'] [,...]
[SETTINGS variable [= value] [MIN [=] min_value] [MAX [=] max_value] [CONST|READONLY|WRITABLE|CHANGEABLE_IN_READONLY] | PROFILE 'profile_name'] [,...]
```
## 修改row policy {#alter-row-policy-statement}
@ -584,7 +584,7 @@ ALTER QUOTA [IF EXISTS] name [ON CLUSTER cluster_name]
``` sql
ALTER SETTINGS PROFILE [IF EXISTS] name [ON CLUSTER cluster_name]
[RENAME TO new_name]
[SETTINGS variable [= value] [MIN [=] min_value] [MAX [=] max_value] [READONLY|WRITABLE] | INHERIT 'profile_name'] [,...]
[SETTINGS variable [= value] [MIN [=] min_value] [MAX [=] max_value] [CONST|READONLY|WRITABLE|CHANGEABLE_IN_READONLY] | INHERIT 'profile_name'] [,...]
```
[Original article](https://clickhouse.com/docs/en/query_language/alter/) <!--hide-->

View File

@ -13,5 +13,5 @@ sidebar_label: 角色
``` sql
ALTER ROLE [IF EXISTS] name1 [ON CLUSTER cluster_name1] [RENAME TO new_name1]
[, name2 [ON CLUSTER cluster_name2] [RENAME TO new_name2] ...]
[SETTINGS variable [= value] [MIN [=] min_value] [MAX [=] max_value] [READONLY|WRITABLE] | PROFILE 'profile_name'] [,...]
[SETTINGS variable [= value] [MIN [=] min_value] [MAX [=] max_value] [CONST|READONLY|WRITABLE|CHANGEABLE_IN_READONLY] | PROFILE 'profile_name'] [,...]
```

View File

@ -13,5 +13,5 @@ sidebar_label: 配置文件设置
``` sql
ALTER SETTINGS PROFILE [IF EXISTS] TO name1 [ON CLUSTER cluster_name1] [RENAME TO new_name1]
[, name2 [ON CLUSTER cluster_name2] [RENAME TO new_name2] ...]
[SETTINGS variable [= value] [MIN [=] min_value] [MAX [=] max_value] [READONLY|WRITABLE] | INHERIT 'profile_name'] [,...]
[SETTINGS variable [= value] [MIN [=] min_value] [MAX [=] max_value] [CONST|READONLY|WRITABLE|CHANGEABLE_IN_READONLY] | INHERIT 'profile_name'] [,...]
```

View File

@ -30,6 +30,10 @@ overrides:
depends:
- clickhouse-common-static = ${CLICKHOUSE_VERSION_STRING}
deb:
fields:
Source: clickhouse
contents:
- src: root/etc/clickhouse-client/config.xml
dst: /etc/clickhouse-client/config.xml

View File

@ -20,6 +20,10 @@ description: |
debugging symbols for clickhouse-common-static
This package contains the debugging symbols for clickhouse-common.
deb:
fields:
Source: clickhouse
contents:
- src: root/usr/lib/debug/usr/bin/clickhouse.debug
dst: /usr/lib/debug/usr/bin/clickhouse.debug

View File

@ -26,6 +26,10 @@ description: |
that allows generating analytical data reports in real time.
This package provides common files for both clickhouse server and client
deb:
fields:
Source: clickhouse
contents:
- src: root/usr/bin/clickhouse
dst: /usr/bin/clickhouse

View File

@ -14,6 +14,10 @@ description: |
debugging symbols for clickhouse-keeper
This package contains the debugging symbols for clickhouse-keeper.
deb:
fields:
Source: clickhouse
contents:
- src: root/usr/lib/debug/usr/bin/clickhouse-keeper.debug
dst: /usr/lib/debug/usr/bin/clickhouse-keeper.debug

View File

@ -22,6 +22,9 @@ description: |
Static clickhouse-keeper binary
A stand-alone clickhouse-keeper package
deb:
fields:
Source: clickhouse
contents:
- src: root/etc/clickhouse-keeper

View File

@ -37,6 +37,10 @@ overrides:
depends:
- clickhouse-common-static = ${CLICKHOUSE_VERSION_STRING}
deb:
fields:
Source: clickhouse
contents:
- src: root/etc/clickhouse-server
dst: /etc/clickhouse-server

View File

@ -622,9 +622,13 @@ void LocalServer::processConfig()
attachSystemTablesLocal(global_context, *createMemoryDatabaseIfNotExists(global_context, DatabaseCatalog::SYSTEM_DATABASE));
attachInformationSchema(global_context, *createMemoryDatabaseIfNotExists(global_context, DatabaseCatalog::INFORMATION_SCHEMA));
attachInformationSchema(global_context, *createMemoryDatabaseIfNotExists(global_context, DatabaseCatalog::INFORMATION_SCHEMA_UPPERCASE));
loadMetadata(global_context);
startupSystemTables();
DatabaseCatalog::instance().loadDatabases();
if (!config().has("only-system-tables"))
{
loadMetadata(global_context);
DatabaseCatalog::instance().loadDatabases();
}
LOG_DEBUG(log, "Loaded metadata.");
}
@ -715,6 +719,7 @@ void LocalServer::addOptions(OptionsDescription & options_description)
("no-system-tables", "do not attach system tables (better startup time)")
("path", po::value<std::string>(), "Storage path")
("only-system-tables", "attach only system tables from specified path")
("top_level_domains_path", po::value<std::string>(), "Path to lists with custom TLDs")
;
}
@ -743,6 +748,8 @@ void LocalServer::processOptions(const OptionsDescription &, const CommandLineOp
config().setString("table-structure", options["structure"].as<std::string>());
if (options.count("no-system-tables"))
config().setBool("no-system-tables", true);
if (options.count("only-system-tables"))
config().setBool("only-system-tables", true);
if (options.count("input-format"))
config().setString("table-data-format", options["input-format"].as<std::string>());

View File

@ -1381,7 +1381,7 @@ int Server::main(const std::vector<std::string> & /*args*/)
global_context->setConfigReloadCallback([&]()
{
main_config_reloader->reload();
access_control.reload();
access_control.reload(AccessControl::ReloadMode::USERS_CONFIG_ONLY);
});
/// Limit on total number of concurrently executed queries.
@ -1474,6 +1474,23 @@ int Server::main(const std::vector<std::string> & /*args*/)
/// try set up encryption. There are some errors in config, error will be printed and server wouldn't start.
CompressionCodecEncrypted::Configuration::instance().load(config(), "encryption_codecs");
std::unique_ptr<DNSCacheUpdater> dns_cache_updater;
if (config().has("disable_internal_dns_cache") && config().getInt("disable_internal_dns_cache"))
{
/// Disable DNS caching at all
DNSResolver::instance().setDisableCacheFlag();
LOG_DEBUG(log, "DNS caching disabled");
}
else
{
/// Initialize a watcher periodically updating DNS cache
dns_cache_updater = std::make_unique<DNSCacheUpdater>(
global_context, config().getInt("dns_cache_update_period", 15), config().getUInt("dns_max_consecutive_failures", 5));
}
if (dns_cache_updater)
dns_cache_updater->start();
SCOPE_EXIT({
/// Stop reloading of the main config. This must be done before `global_context->shutdown()` because
/// otherwise the reloading may pass a changed config to some destroyed parts of ContextSharedPart.
@ -1627,20 +1644,6 @@ int Server::main(const std::vector<std::string> & /*args*/)
LOG_INFO(log, "Query Profiler and TraceCollector are disabled because they require PHDR cache to be created"
" (otherwise the function 'dl_iterate_phdr' is not lock free and not async-signal safe).");
std::unique_ptr<DNSCacheUpdater> dns_cache_updater;
if (config().has("disable_internal_dns_cache") && config().getInt("disable_internal_dns_cache"))
{
/// Disable DNS caching at all
DNSResolver::instance().setDisableCacheFlag();
LOG_DEBUG(log, "DNS caching disabled");
}
else
{
/// Initialize a watcher periodically updating DNS cache
dns_cache_updater = std::make_unique<DNSCacheUpdater>(
global_context, config().getInt("dns_cache_update_period", 15), config().getUInt("dns_max_consecutive_failures", 5));
}
#if defined(OS_LINUX)
auto tasks_stats_provider = TasksStatsCounters::findBestAvailableProvider();
if (tasks_stats_provider == TasksStatsCounters::MetricsProvider::None)
@ -1705,8 +1708,6 @@ int Server::main(const std::vector<std::string> & /*args*/)
main_config_reloader->start();
access_control.startPeriodicReloading();
if (dns_cache_updater)
dns_cache_updater->start();
{
LOG_INFO(log, "Available RAM: {}; physical cores: {}; logical cores: {}.",

View File

@ -65,7 +65,7 @@
For example, as below:
{"date_time":"1650918987.180175","thread_name":"#1","thread_id":"254545","level":"Trace","query_id":"","logger_name":"BaseDaemon","message":"Received signal 2","source_file":"../base/daemon/BaseDaemon.cpp; virtual void SignalListener::run()","source_line":"192"}
To enable JSON logging support, please uncomment the entire <formatting> tag below.
a) You can modify key names by changing values under tag values inside <names> tag.
For example, to change DATE_TIME to MY_DATE_TIME, you can do like:
<date_time>MY_DATE_TIME</date_time>
@ -661,6 +661,13 @@
executed by any user. You can change this behaviour by setting this to true.
If it's set to true then this query requires "GRANT SELECT ON information_schema.<table>" just like as for ordinary tables. -->
<select_from_information_schema_requires_grant>false</select_from_information_schema_requires_grant>
<!-- By default, for backward compatibility a settings profile constraint for a specific setting inherit every not set field from
previous profile. You can change this behaviour by setting this to true.
If it's set to true then if settings profile has a constraint for a specific setting, then this constraint completely cancels all
actions of previous constraint (defined in other profiles) for the same specific setting, including fields that are not set by new constraint.
It also enables 'changeable_in_readonly' constraint type -->
<settings_constraints_replace_previous>false</settings_constraints_replace_previous>
</access_control_improvements>
<!-- Default profile of settings. -->

View File

@ -171,6 +171,7 @@ void AccessControl::setUpFromMainConfig(const Poco::Util::AbstractConfiguration
setOnClusterQueriesRequireClusterGrant(config_.getBool("access_control_improvements.on_cluster_queries_require_cluster_grant", false));
setSelectFromSystemDatabaseRequiresGrant(config_.getBool("access_control_improvements.select_from_system_db_requires_grant", false));
setSelectFromInformationSchemaRequiresGrant(config_.getBool("access_control_improvements.select_from_information_schema_requires_grant", false));
setSettingsConstraintsReplacePrevious(config_.getBool("access_control_improvements.settings_constraints_replace_previous", false));
addStoragesFromMainConfig(config_, config_path_, get_zookeeper_function_);
}
@ -390,9 +391,9 @@ void AccessControl::addStoragesFromMainConfig(
}
void AccessControl::reload()
void AccessControl::reload(ReloadMode reload_mode)
{
MultipleAccessStorage::reload();
MultipleAccessStorage::reload(reload_mode);
changes_notifier->sendNotifications();
}

View File

@ -99,8 +99,8 @@ public:
const String & config_path,
const zkutil::GetZooKeeper & get_zookeeper_function);
/// Reloads and updates entities in this storage. This function is used to implement SYSTEM RELOAD CONFIG.
void reload() override;
/// Reloads and updates all access entities.
void reload(ReloadMode reload_mode) override;
using OnChangedHandler = std::function<void(const UUID & /* id */, const AccessEntityPtr & /* new or changed entity, null if removed */)>;
@ -158,6 +158,9 @@ public:
void setSelectFromInformationSchemaRequiresGrant(bool enable) { select_from_information_schema_requires_grant = enable; }
bool doesSelectFromInformationSchemaRequireGrant() const { return select_from_information_schema_requires_grant; }
void setSettingsConstraintsReplacePrevious(bool enable) { settings_constraints_replace_previous = enable; }
bool doesSettingsConstraintsReplacePrevious() const { return settings_constraints_replace_previous; }
std::shared_ptr<const ContextAccess> getContextAccess(
const UUID & user_id,
const std::vector<UUID> & current_roles,
@ -223,6 +226,7 @@ private:
std::atomic_bool on_cluster_queries_require_cluster_grant = false;
std::atomic_bool select_from_system_db_requires_grant = false;
std::atomic_bool select_from_information_schema_requires_grant = false;
std::atomic_bool settings_constraints_replace_previous = false;
};
}

View File

@ -143,6 +143,7 @@ enum class AccessType
M(SYSTEM_DROP_SCHEMA_CACHE, "SYSTEM DROP SCHEMA CACHE, DROP SCHEMA CACHE", GLOBAL, SYSTEM_DROP_CACHE) \
M(SYSTEM_DROP_CACHE, "DROP CACHE", GROUP, SYSTEM) \
M(SYSTEM_RELOAD_CONFIG, "RELOAD CONFIG", GLOBAL, SYSTEM_RELOAD) \
M(SYSTEM_RELOAD_USERS, "RELOAD USERS", GLOBAL, SYSTEM_RELOAD) \
M(SYSTEM_RELOAD_SYMBOLS, "RELOAD SYMBOLS", GLOBAL, SYSTEM_RELOAD) \
M(SYSTEM_RELOAD_DICTIONARY, "SYSTEM RELOAD DICTIONARIES, RELOAD DICTIONARY, RELOAD DICTIONARIES", GLOBAL, SYSTEM_RELOAD) \
M(SYSTEM_RELOAD_MODEL, "SYSTEM RELOAD MODELS, RELOAD MODEL, RELOAD MODELS", GLOBAL, SYSTEM_RELOAD) \

View File

@ -15,6 +15,7 @@
#include <Poco/JSON/Stringifier.h>
#include <boost/algorithm/string/case_conv.hpp>
#include <boost/range/adaptor/map.hpp>
#include <boost/range/algorithm/copy.hpp>
#include <base/range.h>
#include <filesystem>
#include <fstream>
@ -182,8 +183,8 @@ DiskAccessStorage::DiskAccessStorage(const String & storage_name_, const String
if (should_rebuild_lists)
{
rebuildLists();
writeLists();
LOG_WARNING(getLogger(), "Recovering lists in directory {}", directory_path);
reloadAllAndRebuildLists();
}
}
@ -224,63 +225,57 @@ bool DiskAccessStorage::isPathEqual(const String & directory_path_) const
}
void DiskAccessStorage::clear()
{
entries_by_id.clear();
for (auto type : collections::range(AccessEntityType::MAX))
entries_by_name_and_type[static_cast<size_t>(type)].clear();
}
bool DiskAccessStorage::readLists()
{
clear();
std::vector<std::tuple<UUID, String, AccessEntityType>> ids_names_types;
bool ok = true;
for (auto type : collections::range(AccessEntityType::MAX))
{
auto & entries_by_name = entries_by_name_and_type[static_cast<size_t>(type)];
auto file_path = getListFilePath(directory_path, type);
if (!std::filesystem::exists(file_path))
{
LOG_WARNING(getLogger(), "File {} doesn't exist", file_path);
ok = false;
break;
return false;
}
try
{
for (const auto & [id, name] : readListFile(file_path))
{
auto & entry = entries_by_id[id];
entry.id = id;
entry.type = type;
entry.name = name;
entries_by_name[entry.name] = &entry;
}
ids_names_types.emplace_back(id, name, type);
}
catch (...)
{
tryLogCurrentException(getLogger(), "Could not read " + file_path);
ok = false;
break;
return false;
}
}
if (!ok)
clear();
return ok;
entries_by_id.clear();
for (auto type : collections::range(AccessEntityType::MAX))
entries_by_name_and_type[static_cast<size_t>(type)].clear();
for (auto & [id, name, type] : ids_names_types)
{
auto & entry = entries_by_id[id];
entry.id = id;
entry.type = type;
entry.name = std::move(name);
auto & entries_by_name = entries_by_name_and_type[static_cast<size_t>(type)];
entries_by_name[entry.name] = &entry;
}
return true;
}
bool DiskAccessStorage::writeLists()
void DiskAccessStorage::writeLists()
{
if (failed_to_write_lists)
return false; /// We don't try to write list files after the first fail.
/// The next restart of the server will invoke rebuilding of the list files.
return; /// We don't try to write list files after the first fail.
/// The next restart of the server will invoke rebuilding of the list files.
if (types_of_lists_to_write.empty())
return true;
return;
for (const auto & type : types_of_lists_to_write)
{
@ -299,14 +294,13 @@ bool DiskAccessStorage::writeLists()
tryLogCurrentException(getLogger(), "Could not write " + file_path);
failed_to_write_lists = true;
types_of_lists_to_write.clear();
return false;
return;
}
}
/// The list files was successfully written, we don't need the 'need_rebuild_lists.mark' file any longer.
std::filesystem::remove(getNeedRebuildListsMarkFilePath(directory_path));
types_of_lists_to_write.clear();
return true;
}
@ -364,10 +358,9 @@ void DiskAccessStorage::stopListsWritingThread()
/// Reads and parses all the "<id>.sql" files from a specified directory
/// and then saves the files "users.list", "roles.list", etc. to the same directory.
bool DiskAccessStorage::rebuildLists()
void DiskAccessStorage::reloadAllAndRebuildLists()
{
LOG_WARNING(getLogger(), "Recovering lists in directory {}", directory_path);
clear();
std::vector<std::pair<UUID, AccessEntityPtr>> all_entities;
for (const auto & directory_entry : std::filesystem::directory_iterator(directory_path))
{
@ -386,21 +379,55 @@ bool DiskAccessStorage::rebuildLists()
if (!entity)
continue;
const String & name = entity->getName();
auto type = entity->getType();
auto & entry = entries_by_id[id];
entry.id = id;
entry.type = type;
entry.name = name;
entry.entity = entity;
auto & entries_by_name = entries_by_name_and_type[static_cast<size_t>(type)];
entries_by_name[entry.name] = &entry;
all_entities.emplace_back(id, entity);
}
setAllInMemory(all_entities);
for (auto type : collections::range(AccessEntityType::MAX))
types_of_lists_to_write.insert(type);
return true;
failed_to_write_lists = false; /// Try again writing lists.
writeLists();
}
void DiskAccessStorage::setAllInMemory(const std::vector<std::pair<UUID, AccessEntityPtr>> & all_entities)
{
/// Remove conflicting entities from the specified list.
auto entities_without_conflicts = all_entities;
clearConflictsInEntitiesList(entities_without_conflicts, getLogger());
/// Remove entities which are not used anymore.
boost::container::flat_set<UUID> ids_to_keep;
ids_to_keep.reserve(entities_without_conflicts.size());
for (const auto & [id, _] : entities_without_conflicts)
ids_to_keep.insert(id);
removeAllExceptInMemory(ids_to_keep);
/// Insert or update entities.
for (const auto & [id, entity] : entities_without_conflicts)
insertNoLock(id, entity, /* replace_if_exists = */ true, /* throw_if_exists = */ false, /* write_on_disk= */ false);
}
void DiskAccessStorage::removeAllExceptInMemory(const boost::container::flat_set<UUID> & ids_to_keep)
{
for (auto it = entries_by_id.begin(); it != entries_by_id.end();)
{
const auto & id = it->first;
++it; /// We must go to the next element in the map `entries_by_id` here because otherwise removeNoLock() can invalidate our iterator.
if (!ids_to_keep.contains(id))
removeNoLock(id, /* throw_if_not_exists */ true, /* write_on_disk= */ false);
}
}
void DiskAccessStorage::reload(ReloadMode reload_mode)
{
if (reload_mode != ReloadMode::ALL)
return;
std::lock_guard lock{mutex};
reloadAllAndRebuildLists();
}
@ -471,21 +498,21 @@ std::optional<std::pair<String, AccessEntityType>> DiskAccessStorage::readNameWi
std::optional<UUID> DiskAccessStorage::insertImpl(const AccessEntityPtr & new_entity, bool replace_if_exists, bool throw_if_exists)
{
UUID id = generateRandomID();
if (insertWithID(id, new_entity, replace_if_exists, throw_if_exists))
if (insertWithID(id, new_entity, replace_if_exists, throw_if_exists, /* write_on_disk= */ true))
return id;
return std::nullopt;
}
bool DiskAccessStorage::insertWithID(const UUID & id, const AccessEntityPtr & new_entity, bool replace_if_exists, bool throw_if_exists)
bool DiskAccessStorage::insertWithID(const UUID & id, const AccessEntityPtr & new_entity, bool replace_if_exists, bool throw_if_exists, bool write_on_disk)
{
std::lock_guard lock{mutex};
return insertNoLock(id, new_entity, replace_if_exists, throw_if_exists);
return insertNoLock(id, new_entity, replace_if_exists, throw_if_exists, write_on_disk);
}
bool DiskAccessStorage::insertNoLock(const UUID & id, const AccessEntityPtr & new_entity, bool replace_if_exists, bool throw_if_exists)
bool DiskAccessStorage::insertNoLock(const UUID & id, const AccessEntityPtr & new_entity, bool replace_if_exists, bool throw_if_exists, bool write_on_disk)
{
const String & name = new_entity->getName();
AccessEntityType type = new_entity->getType();
@ -497,6 +524,9 @@ bool DiskAccessStorage::insertNoLock(const UUID & id, const AccessEntityPtr & ne
auto & entries_by_name = entries_by_name_and_type[static_cast<size_t>(type)];
auto it_by_name = entries_by_name.find(name);
bool name_collision = (it_by_name != entries_by_name.end());
UUID id_by_name;
if (name_collision)
id_by_name = it_by_name->second->id;
if (name_collision && !replace_if_exists)
{
@ -507,19 +537,57 @@ bool DiskAccessStorage::insertNoLock(const UUID & id, const AccessEntityPtr & ne
}
auto it_by_id = entries_by_id.find(id);
if (it_by_id != entries_by_id.end())
bool id_collision = (it_by_id != entries_by_id.end());
if (id_collision && !replace_if_exists)
{
const auto & existing_entry = it_by_id->second;
throwIDCollisionCannotInsert(id, type, name, existing_entry.entity->getType(), existing_entry.entity->getName());
if (throw_if_exists)
{
const auto & existing_entry = it_by_id->second;
throwIDCollisionCannotInsert(id, type, name, existing_entry.type, existing_entry.name);
}
else
return false;
}
scheduleWriteLists(type);
writeAccessEntityToDisk(id, *new_entity);
if (write_on_disk)
scheduleWriteLists(type);
if (name_collision && replace_if_exists)
removeNoLock(it_by_name->second->id, /* throw_if_not_exists = */ false);
/// Remove collisions if necessary.
if (name_collision && (id_by_name != id))
{
assert(replace_if_exists);
removeNoLock(id_by_name, /* throw_if_not_exists= */ false, write_on_disk);
}
if (id_collision)
{
assert(replace_if_exists);
auto & existing_entry = it_by_id->second;
if (existing_entry.type == new_entity->getType())
{
if (!existing_entry.entity || (*existing_entry.entity != *new_entity))
{
if (write_on_disk)
writeAccessEntityToDisk(id, *new_entity);
if (existing_entry.name != new_entity->getName())
{
entries_by_name.erase(existing_entry.name);
[[maybe_unused]] bool inserted = entries_by_name.emplace(new_entity->getName(), &existing_entry).second;
assert(inserted);
}
existing_entry.entity = new_entity;
changes_notifier.onEntityUpdated(id, new_entity);
}
return true;
}
removeNoLock(id, /* throw_if_not_exists= */ false, write_on_disk);
}
/// Do insertion.
if (write_on_disk)
writeAccessEntityToDisk(id, *new_entity);
auto & entry = entries_by_id[id];
entry.id = id;
entry.type = type;
@ -535,11 +603,11 @@ bool DiskAccessStorage::insertNoLock(const UUID & id, const AccessEntityPtr & ne
bool DiskAccessStorage::removeImpl(const UUID & id, bool throw_if_not_exists)
{
std::lock_guard lock{mutex};
return removeNoLock(id, throw_if_not_exists);
return removeNoLock(id, throw_if_not_exists, /* write_on_disk= */ true);
}
bool DiskAccessStorage::removeNoLock(const UUID & id, bool throw_if_not_exists)
bool DiskAccessStorage::removeNoLock(const UUID & id, bool throw_if_not_exists, bool write_on_disk)
{
auto it = entries_by_id.find(id);
if (it == entries_by_id.end())
@ -556,8 +624,11 @@ bool DiskAccessStorage::removeNoLock(const UUID & id, bool throw_if_not_exists)
if (readonly)
throwReadonlyCannotRemove(type, entry.name);
scheduleWriteLists(type);
deleteAccessEntityOnDisk(id);
if (write_on_disk)
{
scheduleWriteLists(type);
deleteAccessEntityOnDisk(id);
}
/// Do removing.
UUID removed_id = id;
@ -573,11 +644,11 @@ bool DiskAccessStorage::removeNoLock(const UUID & id, bool throw_if_not_exists)
bool DiskAccessStorage::updateImpl(const UUID & id, const UpdateFunc & update_func, bool throw_if_not_exists)
{
std::lock_guard lock{mutex};
return updateNoLock(id, update_func, throw_if_not_exists);
return updateNoLock(id, update_func, throw_if_not_exists, /* write_on_disk= */ true);
}
bool DiskAccessStorage::updateNoLock(const UUID & id, const UpdateFunc & update_func, bool throw_if_not_exists)
bool DiskAccessStorage::updateNoLock(const UUID & id, const UpdateFunc & update_func, bool throw_if_not_exists, bool write_on_disk)
{
auto it = entries_by_id.find(id);
if (it == entries_by_id.end())
@ -613,10 +684,13 @@ bool DiskAccessStorage::updateNoLock(const UUID & id, const UpdateFunc & update_
{
if (entries_by_name.contains(new_name))
throwNameCollisionCannotRename(type, old_name, new_name);
scheduleWriteLists(type);
if (write_on_disk)
scheduleWriteLists(type);
}
writeAccessEntityToDisk(id, *new_entity);
if (write_on_disk)
writeAccessEntityToDisk(id, *new_entity);
entry.entity = new_entity;
if (name_changed)
@ -668,7 +742,7 @@ void DiskAccessStorage::restoreFromBackup(RestorerFromBackup & restorer)
restorer.addDataRestoreTask([this, entities = std::move(entities), replace_if_exists, throw_if_exists]
{
for (const auto & [id, entity] : entities)
insertWithID(id, entity, replace_if_exists, throw_if_exists);
insertWithID(id, entity, replace_if_exists, throw_if_exists, /* write_on_disk= */ true);
});
}

View File

@ -27,6 +27,8 @@ public:
void setReadOnly(bool readonly_) { readonly = readonly_; }
bool isReadOnly() const override { return readonly; }
void reload(ReloadMode reload_mode) override;
bool exists(const UUID & id) const override;
bool isBackupAllowed() const override { return backup_allowed; }
@ -41,19 +43,20 @@ private:
bool removeImpl(const UUID & id, bool throw_if_not_exists) override;
bool updateImpl(const UUID & id, const UpdateFunc & update_func, bool throw_if_not_exists) override;
void clear();
bool readLists();
bool writeLists();
void scheduleWriteLists(AccessEntityType type);
bool rebuildLists();
bool readLists() TSA_REQUIRES(mutex);
void writeLists() TSA_REQUIRES(mutex);
void scheduleWriteLists(AccessEntityType type) TSA_REQUIRES(mutex);
void reloadAllAndRebuildLists() TSA_REQUIRES(mutex);
void setAllInMemory(const std::vector<std::pair<UUID, AccessEntityPtr>> & all_entities) TSA_REQUIRES(mutex);
void removeAllExceptInMemory(const boost::container::flat_set<UUID> & ids_to_keep) TSA_REQUIRES(mutex);
void listsWritingThreadFunc();
void listsWritingThreadFunc() TSA_NO_THREAD_SAFETY_ANALYSIS;
void stopListsWritingThread();
bool insertWithID(const UUID & id, const AccessEntityPtr & new_entity, bool replace_if_exists, bool throw_if_exists);
bool insertNoLock(const UUID & id, const AccessEntityPtr & new_entity, bool replace_if_exists, bool throw_if_exists);
bool removeNoLock(const UUID & id, bool throw_if_not_exists);
bool updateNoLock(const UUID & id, const UpdateFunc & update_func, bool throw_if_not_exists);
bool insertWithID(const UUID & id, const AccessEntityPtr & new_entity, bool replace_if_exists, bool throw_if_exists, bool write_on_disk);
bool insertNoLock(const UUID & id, const AccessEntityPtr & new_entity, bool replace_if_exists, bool throw_if_exists, bool write_on_disk) TSA_REQUIRES(mutex);
bool updateNoLock(const UUID & id, const UpdateFunc & update_func, bool throw_if_not_exists, bool write_on_disk) TSA_REQUIRES(mutex);
bool removeNoLock(const UUID & id, bool throw_if_not_exists, bool write_on_disk) TSA_REQUIRES(mutex);
AccessEntityPtr readAccessEntityFromDisk(const UUID & id) const;
void writeAccessEntityToDisk(const UUID & id, const IAccessEntity & entity) const;
@ -69,13 +72,22 @@ private:
};
String directory_path;
std::unordered_map<UUID, Entry> entries_by_id;
std::unordered_map<std::string_view, Entry *> entries_by_name_and_type[static_cast<size_t>(AccessEntityType::MAX)];
boost::container::flat_set<AccessEntityType> types_of_lists_to_write;
bool failed_to_write_lists = false; /// Whether writing of the list files has been failed since the recent restart of the server.
ThreadFromGlobalPool lists_writing_thread; /// List files are written in a separate thread.
std::condition_variable lists_writing_thread_should_exit; /// Signals `lists_writing_thread` to exit.
std::unordered_map<UUID, Entry> entries_by_id TSA_GUARDED_BY(mutex);
std::unordered_map<std::string_view, Entry *> entries_by_name_and_type[static_cast<size_t>(AccessEntityType::MAX)] TSA_GUARDED_BY(mutex);
boost::container::flat_set<AccessEntityType> types_of_lists_to_write TSA_GUARDED_BY(mutex);
/// Whether writing of the list files has been failed since the recent restart of the server.
bool failed_to_write_lists TSA_GUARDED_BY(mutex) = false;
/// List files are written in a separate thread.
ThreadFromGlobalPool lists_writing_thread;
/// Signals `lists_writing_thread` to exit.
std::condition_variable lists_writing_thread_should_exit;
bool lists_writing_thread_is_waiting = false;
AccessChangesNotifier & changes_notifier;
std::atomic<bool> readonly;
std::atomic<bool> backup_allowed;

View File

@ -13,6 +13,7 @@
#include <base/FnTraits.h>
#include <boost/algorithm/string/join.hpp>
#include <boost/algorithm/string/replace.hpp>
#include <boost/range/adaptor/reversed.hpp>
#include <boost/range/algorithm_ext/erase.hpp>
@ -562,6 +563,62 @@ UUID IAccessStorage::generateRandomID()
}
void IAccessStorage::clearConflictsInEntitiesList(std::vector<std::pair<UUID, AccessEntityPtr>> & entities, const Poco::Logger * log_)
{
std::unordered_map<UUID, size_t> positions_by_id;
std::unordered_map<std::string_view, size_t> positions_by_type_and_name[static_cast<size_t>(AccessEntityType::MAX)];
std::vector<size_t> positions_to_remove;
for (size_t pos = 0; pos != entities.size(); ++pos)
{
const auto & [id, entity] = entities[pos];
if (auto it = positions_by_id.find(id); it == positions_by_id.end())
{
positions_by_id[id] = pos;
}
else if (it->second != pos)
{
/// Conflict: same ID is used for multiple entities. We will ignore them.
positions_to_remove.emplace_back(pos);
positions_to_remove.emplace_back(it->second);
}
std::string_view entity_name = entity->getName();
auto & positions_by_name = positions_by_type_and_name[static_cast<size_t>(entity->getType())];
if (auto it = positions_by_name.find(entity_name); it == positions_by_name.end())
{
positions_by_name[entity_name] = pos;
}
else if (it->second != pos)
{
/// Conflict: same name and type are used for multiple entities. We will ignore them.
positions_to_remove.emplace_back(pos);
positions_to_remove.emplace_back(it->second);
}
}
if (positions_to_remove.empty())
return;
std::sort(positions_to_remove.begin(), positions_to_remove.end());
positions_to_remove.erase(std::unique(positions_to_remove.begin(), positions_to_remove.end()), positions_to_remove.end());
for (size_t pos : positions_to_remove)
{
LOG_WARNING(
log_,
"Skipping {} (id={}) due to conflicts with other access entities",
entities[pos].second->formatTypeWithName(),
toString(entities[pos].first));
}
/// Remove conflicting entities.
for (size_t pos : positions_to_remove | boost::adaptors::reversed) /// Must remove in reversive order.
entities.erase(entities.begin() + pos);
}
Poco::Logger * IAccessStorage::getLogger() const
{
Poco::Logger * ptr = log.load();

View File

@ -42,15 +42,26 @@ public:
/// Returns true if this entity is readonly.
virtual bool isReadOnly(const UUID &) const { return isReadOnly(); }
/// Reloads and updates entities in this storage. This function is used to implement SYSTEM RELOAD CONFIG.
virtual void reload() {}
/// Starts periodic reloading and update of entities in this storage.
/// Starts periodic reloading and updating of entities in this storage.
virtual void startPeriodicReloading() {}
/// Stops periodic reloading and update of entities in this storage.
/// Stops periodic reloading and updating of entities in this storage.
virtual void stopPeriodicReloading() {}
enum class ReloadMode
{
/// Try to reload all access storages (including users.xml, local(disk) access storage, replicated(in zk) access storage.
/// This mode is invoked by the SYSTEM RELOAD USERS command.
ALL,
/// Only reloads users.xml
/// This mode is invoked by the SYSTEM RELOAD CONFIG command.
USERS_CONFIG_ONLY,
};
/// Makes this storage to reload and update access entities right now.
virtual void reload(ReloadMode /* reload_mode */) {}
/// Returns the identifiers of all the entities of a specified type contained in the storage.
std::vector<UUID> findAll(AccessEntityType type) const;
@ -177,6 +188,7 @@ protected:
static UUID generateRandomID();
Poco::Logger * getLogger() const;
static String formatEntityTypeWithName(AccessEntityType type, const String & name) { return AccessEntityTypeInfo::get(type).formatEntityNameWithType(name); }
static void clearConflictsInEntitiesList(std::vector<std::pair<UUID, AccessEntityPtr>> & entities, const Poco::Logger * log_);
[[noreturn]] void throwNotFound(const UUID & id) const;
[[noreturn]] void throwNotFound(AccessEntityType type, const String & name) const;
[[noreturn]] static void throwBadCast(const UUID & id, AccessEntityType type, const String & name, AccessEntityType required_type);

View File

@ -5,7 +5,6 @@
#include <base/scope_guard.h>
#include <boost/container/flat_set.hpp>
#include <boost/range/adaptor/map.hpp>
#include <boost/range/algorithm/copy.hpp>
namespace DB
@ -90,6 +89,9 @@ bool MemoryAccessStorage::insertNoLock(const UUID & id, const AccessEntityPtr &
auto & entries_by_name = entries_by_name_and_type[static_cast<size_t>(type)];
auto it_by_name = entries_by_name.find(name);
bool name_collision = (it_by_name != entries_by_name.end());
UUID id_by_name;
if (name_collision)
id_by_name = it_by_name->second->id;
if (name_collision && !replace_if_exists)
{
@ -100,16 +102,43 @@ bool MemoryAccessStorage::insertNoLock(const UUID & id, const AccessEntityPtr &
}
auto it_by_id = entries_by_id.find(id);
if (it_by_id != entries_by_id.end())
bool id_collision = (it_by_id != entries_by_id.end());
if (id_collision && !replace_if_exists)
{
const auto & existing_entry = it_by_id->second;
throwIDCollisionCannotInsert(id, type, name, existing_entry.entity->getType(), existing_entry.entity->getName());
if (throw_if_exists)
throwIDCollisionCannotInsert(id, type, name, existing_entry.entity->getType(), existing_entry.entity->getName());
else
return false;
}
if (name_collision && replace_if_exists)
/// Remove collisions if necessary.
if (name_collision && (id_by_name != id))
{
const auto & existing_entry = *(it_by_name->second);
removeNoLock(existing_entry.id, /* throw_if_not_exists = */ false);
assert(replace_if_exists);
removeNoLock(id_by_name, /* throw_if_not_exists= */ true);
}
if (id_collision)
{
assert(replace_if_exists);
auto & existing_entry = it_by_id->second;
if (existing_entry.entity->getType() == new_entity->getType())
{
if (*existing_entry.entity != *new_entity)
{
if (existing_entry.entity->getName() != new_entity->getName())
{
entries_by_name.erase(existing_entry.entity->getName());
[[maybe_unused]] bool inserted = entries_by_name.emplace(new_entity->getName(), &existing_entry).second;
assert(inserted);
}
existing_entry.entity = new_entity;
changes_notifier.onEntityUpdated(id, new_entity);
}
return true;
}
removeNoLock(id, /* throw_if_not_exists= */ true);
}
/// Do insertion.
@ -201,6 +230,29 @@ bool MemoryAccessStorage::updateNoLock(const UUID & id, const UpdateFunc & updat
}
void MemoryAccessStorage::removeAllExcept(const std::vector<UUID> & ids_to_keep)
{
std::lock_guard lock{mutex};
removeAllExceptNoLock(ids_to_keep);
}
void MemoryAccessStorage::removeAllExceptNoLock(const std::vector<UUID> & ids_to_keep)
{
removeAllExceptNoLock(boost::container::flat_set<UUID>{ids_to_keep.begin(), ids_to_keep.end()});
}
void MemoryAccessStorage::removeAllExceptNoLock(const boost::container::flat_set<UUID> & ids_to_keep)
{
for (auto it = entries_by_id.begin(); it != entries_by_id.end();)
{
const auto & id = it->first;
++it; /// We must go to the next element in the map `entries_by_id` here because otherwise removeNoLock() can invalidate our iterator.
if (!ids_to_keep.contains(id))
removeNoLock(id, /* throw_if_not_exists */ true);
}
}
void MemoryAccessStorage::setAll(const std::vector<AccessEntityPtr> & all_entities)
{
std::vector<std::pair<UUID, AccessEntityPtr>> entities_with_ids;
@ -215,61 +267,20 @@ void MemoryAccessStorage::setAll(const std::vector<std::pair<UUID, AccessEntityP
{
std::lock_guard lock{mutex};
boost::container::flat_set<UUID> not_used_ids;
std::vector<UUID> conflicting_ids;
/// Remove conflicting entities from the specified list.
auto entities_without_conflicts = all_entities;
clearConflictsInEntitiesList(entities_without_conflicts, getLogger());
/// Get the list of currently used IDs. Later we will remove those of them which are not used anymore.
for (const auto & id : entries_by_id | boost::adaptors::map_keys)
not_used_ids.emplace(id);
/// Get the list of conflicting IDs and update the list of currently used ones.
for (const auto & [id, entity] : all_entities)
{
auto it = entries_by_id.find(id);
if (it != entries_by_id.end())
{
not_used_ids.erase(id); /// ID is used.
Entry & entry = it->second;
if (entry.entity->getType() != entity->getType())
conflicting_ids.emplace_back(id); /// Conflict: same ID, different type.
}
const auto & entries_by_name = entries_by_name_and_type[static_cast<size_t>(entity->getType())];
auto it2 = entries_by_name.find(entity->getName());
if (it2 != entries_by_name.end())
{
Entry & entry = *(it2->second);
if (entry.id != id)
conflicting_ids.emplace_back(entry.id); /// Conflict: same name and type, different ID.
}
}
/// Remove entities which are not used anymore and which are in conflict with new entities.
boost::container::flat_set<UUID> ids_to_remove = std::move(not_used_ids);
boost::range::copy(conflicting_ids, std::inserter(ids_to_remove, ids_to_remove.end()));
for (const auto & id : ids_to_remove)
removeNoLock(id, /* throw_if_not_exists = */ false);
/// Remove entities which are not used anymore.
boost::container::flat_set<UUID> ids_to_keep;
ids_to_keep.reserve(entities_without_conflicts.size());
for (const auto & [id, _] : entities_without_conflicts)
ids_to_keep.insert(id);
removeAllExceptNoLock(ids_to_keep);
/// Insert or update entities.
for (const auto & [id, entity] : all_entities)
{
auto it = entries_by_id.find(id);
if (it != entries_by_id.end())
{
if (*(it->second.entity) != *entity)
{
const AccessEntityPtr & changed_entity = entity;
updateNoLock(id,
[&changed_entity](const AccessEntityPtr &) { return changed_entity; },
/* throw_if_not_exists = */ true);
}
}
else
{
insertNoLock(id, entity, /* replace_if_exists = */ false, /* throw_if_exists = */ true);
}
}
for (const auto & [id, entity] : entities_without_conflicts)
insertNoLock(id, entity, /* replace_if_exists = */ true, /* throw_if_exists = */ false);
}

View File

@ -22,6 +22,15 @@ public:
const char * getStorageType() const override { return STORAGE_TYPE; }
/// Inserts an entity with a specified ID.
/// If `replace_if_exists == true` it can replace an existing entry with such ID and also remove an existing entry
/// with such name & type.
bool insertWithID(const UUID & id, const AccessEntityPtr & new_entity, bool replace_if_exists, bool throw_if_exists);
/// Removes all entities except the specified list `ids_to_keep`.
/// The function skips IDs not contained in the storage.
void removeAllExcept(const std::vector<UUID> & ids_to_keep);
/// Sets all entities at once.
void setAll(const std::vector<AccessEntityPtr> & all_entities);
void setAll(const std::vector<std::pair<UUID, AccessEntityPtr>> & all_entities);
@ -39,11 +48,13 @@ private:
bool removeImpl(const UUID & id, bool throw_if_not_exists) override;
bool updateImpl(const UUID & id, const UpdateFunc & update_func, bool throw_if_not_exists) override;
bool insertWithID(const UUID & id, const AccessEntityPtr & new_entity, bool replace_if_exists, bool throw_if_exists);
bool insertNoLock(const UUID & id, const AccessEntityPtr & entity, bool replace_if_exists, bool throw_if_exists) TSA_REQUIRES(mutex);
bool removeNoLock(const UUID & id, bool throw_if_not_exists) TSA_REQUIRES(mutex);
bool updateNoLock(const UUID & id, const UpdateFunc & update_func, bool throw_if_not_exists) TSA_REQUIRES(mutex);
void removeAllExceptNoLock(const std::vector<UUID> & ids_to_keep) TSA_REQUIRES(mutex);
void removeAllExceptNoLock(const boost::container::flat_set<UUID> & ids_to_keep) TSA_REQUIRES(mutex);
struct Entry
{
UUID id;
@ -54,6 +65,6 @@ private:
std::unordered_map<UUID, Entry> entries_by_id TSA_GUARDED_BY(mutex); /// We want to search entries both by ID and by the pair of name and type.
std::unordered_map<String, Entry *> entries_by_name_and_type[static_cast<size_t>(AccessEntityType::MAX)] TSA_GUARDED_BY(mutex);
AccessChangesNotifier & changes_notifier;
bool backup_allowed = false;
const bool backup_allowed = false;
};
}

View File

@ -223,13 +223,6 @@ bool MultipleAccessStorage::isReadOnly(const UUID & id) const
}
void MultipleAccessStorage::reload()
{
auto storages = getStoragesInternal();
for (const auto & storage : *storages)
storage->reload();
}
void MultipleAccessStorage::startPeriodicReloading()
{
auto storages = getStoragesInternal();
@ -244,6 +237,13 @@ void MultipleAccessStorage::stopPeriodicReloading()
storage->stopPeriodicReloading();
}
void MultipleAccessStorage::reload(ReloadMode reload_mode)
{
auto storages = getStoragesInternal();
for (const auto & storage : *storages)
storage->reload(reload_mode);
}
std::optional<UUID> MultipleAccessStorage::insertImpl(const AccessEntityPtr & entity, bool replace_if_exists, bool throw_if_exists)
{

View File

@ -25,9 +25,9 @@ public:
bool isReadOnly() const override;
bool isReadOnly(const UUID & id) const override;
void reload() override;
void startPeriodicReloading() override;
void stopPeriodicReloading() override;
void reload(ReloadMode reload_mode) override;
void setStorages(const std::vector<StoragePtr> & storages);
void addStorage(const StoragePtr & new_storage);

View File

@ -301,4 +301,5 @@ std::vector<QuotaUsage> QuotaCache::getAllQuotasUsage() const
}
return all_usage;
}
}

View File

@ -24,8 +24,8 @@ namespace DB
{
namespace ErrorCodes
{
extern const int BAD_ARGUMENTS;
extern const int NO_ZOOKEEPER;
extern const int BAD_ARGUMENTS;
extern const int NO_ZOOKEEPER;
}
static UUID parseUUID(const String & text)
@ -46,6 +46,7 @@ ReplicatedAccessStorage::ReplicatedAccessStorage(
, zookeeper_path(zookeeper_path_)
, get_zookeeper(get_zookeeper_)
, watched_queue(std::make_shared<ConcurrentBoundedQueue<UUID>>(std::numeric_limits<size_t>::max()))
, memory_storage(storage_name_, changes_notifier_, false)
, changes_notifier(changes_notifier_)
, backup_allowed(allow_backup_)
{
@ -59,7 +60,7 @@ ReplicatedAccessStorage::ReplicatedAccessStorage(
if (zookeeper_path.front() != '/')
zookeeper_path = "/" + zookeeper_path;
initializeZookeeper();
initZooKeeperWithRetries(/* max_retries= */ 2);
}
ReplicatedAccessStorage::~ReplicatedAccessStorage()
@ -121,15 +122,14 @@ bool ReplicatedAccessStorage::insertWithID(const UUID & id, const AccessEntityPt
const String & name = new_entity->getName();
LOG_DEBUG(getLogger(), "Inserting entity of type {} named {} with id {}", type_info.name, name, toString(id));
auto zookeeper = get_zookeeper();
auto zookeeper = getZooKeeper();
bool ok = false;
retryOnZooKeeperUserError(10, [&]{ ok = insertZooKeeper(zookeeper, id, new_entity, replace_if_exists, throw_if_exists); });
if (!ok)
return false;
std::lock_guard lock{mutex};
refreshEntityNoLock(zookeeper, id);
refreshEntity(zookeeper, id);
return true;
}
@ -163,51 +163,79 @@ bool ReplicatedAccessStorage::insertZooKeeper(
if (res == Coordination::Error::ZNODEEXISTS)
{
if (!throw_if_exists && !replace_if_exists)
return false; /// Couldn't insert a new entity.
if (throw_if_exists)
{
if (responses[0]->error == Coordination::Error::ZNODEEXISTS)
{
/// To fail with a nice error message, we need info about what already exists.
/// This itself could fail if the conflicting uuid disappears in the meantime.
/// If that happens, then we'll just retry from the start.
String existing_entity_definition = zookeeper->get(entity_path);
AccessEntityPtr existing_entity = deserializeAccessEntity(existing_entity_definition, entity_path);
AccessEntityType existing_type = existing_entity->getType();
String existing_name = existing_entity->getName();
throwIDCollisionCannotInsert(id, type, name, existing_type, existing_name);
}
else
{
/// Couldn't insert the new entity because there is an existing entity with such name.
throwNameCollisionCannotInsert(type, name);
}
}
assert(replace_if_exists);
Coordination::Requests replace_ops;
if (responses[0]->error == Coordination::Error::ZNODEEXISTS)
{
/// The UUID already exists, simply fail.
/// To fail with a nice error message, we need info about what already exists.
/// This itself could fail if the conflicting uuid disappears in the meantime.
/// The UUID is already associated with some existing entity, we will get rid of the conflicting entity first.
/// This itself could fail if the conflicting entity disappears in the meantime.
/// If that happens, then we'll just retry from the start.
String existing_entity_definition = zookeeper->get(entity_path);
Coordination::Stat stat;
String existing_entity_definition = zookeeper->get(entity_path, &stat);
auto existing_entity = deserializeAccessEntity(existing_entity_definition, entity_path);
const String & existing_entity_name = existing_entity->getName();
const AccessEntityType existing_entity_type = existing_entity->getType();
const AccessEntityTypeInfo existing_entity_type_info = AccessEntityTypeInfo::get(existing_entity_type);
const String existing_name_path = zookeeper_path + "/" + existing_entity_type_info.unique_char + "/" + escapeForFileName(existing_entity_name);
AccessEntityPtr existing_entity = deserializeAccessEntity(existing_entity_definition, entity_path);
AccessEntityType existing_type = existing_entity->getType();
String existing_name = existing_entity->getName();
throwIDCollisionCannotInsert(id, type, name, existing_type, existing_name);
}
else if (replace_if_exists)
{
/// The name already exists for this type.
/// If asked to, we need to replace the existing entity.
if (existing_name_path != name_path)
replace_ops.emplace_back(zkutil::makeRemoveRequest(existing_name_path, -1));
/// First get the uuid of the existing entity
/// This itself could fail if the conflicting name disappears in the meantime.
/// If that happens, then we'll just retry from the start.
Coordination::Stat name_stat;
String existing_entity_uuid = zookeeper->get(name_path, &name_stat);
const String existing_entity_path = zookeeper_path + "/uuid/" + existing_entity_uuid;
Coordination::Requests replace_ops;
replace_ops.emplace_back(zkutil::makeRemoveRequest(existing_entity_path, -1));
replace_ops.emplace_back(zkutil::makeCreateRequest(entity_path, new_entity_definition, zkutil::CreateMode::Persistent));
replace_ops.emplace_back(zkutil::makeSetRequest(name_path, entity_uuid, name_stat.version));
/// If this fails, then we'll just retry from the start.
zookeeper->multi(replace_ops);
/// Everything's fine, the new entity has been inserted instead of an existing entity.
return true;
replace_ops.emplace_back(zkutil::makeSetRequest(entity_path, new_entity_definition, stat.version));
}
else
{
/// Couldn't insert the new entity because there is an existing entity with such name.
if (throw_if_exists)
throwNameCollisionCannotInsert(type, name);
else
return false;
replace_ops.emplace_back(zkutil::makeCreateRequest(entity_path, new_entity_definition, zkutil::CreateMode::Persistent));
}
if (responses[1]->error == Coordination::Error::ZNODEEXISTS)
{
/// The name is already associated with some existing entity, we will get rid of the conflicting entity first.
/// This itself could fail if the conflicting entity disappears in the meantime.
/// If that happens, then we'll just retry from the start.
Coordination::Stat stat;
String existing_entity_uuid = zookeeper->get(name_path, &stat);
const String existing_entity_path = zookeeper_path + "/uuid/" + existing_entity_uuid;
if (existing_entity_path != entity_path)
replace_ops.emplace_back(zkutil::makeRemoveRequest(existing_entity_path, -1));
replace_ops.emplace_back(zkutil::makeSetRequest(name_path, entity_uuid, stat.version));
}
else
{
replace_ops.emplace_back(zkutil::makeCreateRequest(name_path, entity_uuid, zkutil::CreateMode::Persistent));
}
/// If this fails, then we'll just retry from the start.
zookeeper->multi(replace_ops);
/// Everything's fine, the new entity has been inserted instead of an existing entity.
return true;
}
/// If this fails, then we'll just retry from the start.
@ -221,7 +249,7 @@ bool ReplicatedAccessStorage::removeImpl(const UUID & id, bool throw_if_not_exis
{
LOG_DEBUG(getLogger(), "Removing entity {}", toString(id));
auto zookeeper = get_zookeeper();
auto zookeeper = getZooKeeper();
bool ok = false;
retryOnZooKeeperUserError(10, [&] { ok = removeZooKeeper(zookeeper, id, throw_if_not_exists); });
@ -273,15 +301,14 @@ bool ReplicatedAccessStorage::updateImpl(const UUID & id, const UpdateFunc & upd
{
LOG_DEBUG(getLogger(), "Updating entity {}", toString(id));
auto zookeeper = get_zookeeper();
auto zookeeper = getZooKeeper();
bool ok = false;
retryOnZooKeeperUserError(10, [&] { ok = updateZooKeeper(zookeeper, id, update_func, throw_if_not_exists); });
if (!ok)
return false;
std::lock_guard lock{mutex};
refreshEntityNoLock(zookeeper, id);
refreshEntity(zookeeper, id);
return true;
}
@ -349,50 +376,110 @@ void ReplicatedAccessStorage::runWatchingThread()
{
LOG_DEBUG(getLogger(), "Started watching thread");
setThreadName("ReplACLWatch");
while (watching)
{
bool refreshed = false;
try
{
if (!initialized)
initializeZookeeper();
if (refresh())
changes_notifier.sendNotifications();
initZooKeeperIfNeeded();
refreshed = refresh();
}
catch (...)
{
tryLogCurrentException(getLogger(), "Unexpected error, will try to restart worker thread:");
tryLogCurrentException(getLogger(), "Will try to restart watching thread after error");
resetAfterError();
sleepForSeconds(5);
continue;
}
if (refreshed)
{
try
{
changes_notifier.sendNotifications();
}
catch (...)
{
tryLogCurrentException(getLogger(), "Error while sending notifications");
}
}
}
}
void ReplicatedAccessStorage::resetAfterError()
{
initialized = false;
UUID id;
while (watched_queue->tryPop(id)) {}
std::lock_guard lock{mutex};
for (const auto type : collections::range(AccessEntityType::MAX))
entries_by_name_and_type[static_cast<size_t>(type)].clear();
entries_by_id.clear();
/// Make watching thread reinitialize ZooKeeper and reread everything.
std::lock_guard lock{cached_zookeeper_mutex};
cached_zookeeper = nullptr;
}
void ReplicatedAccessStorage::initializeZookeeper()
void ReplicatedAccessStorage::initZooKeeperWithRetries(size_t max_retries)
{
assert(!initialized);
auto zookeeper = get_zookeeper();
for (size_t attempt = 0; attempt < max_retries; ++attempt)
{
try
{
initZooKeeperIfNeeded();
break; /// If we're here the initialization has been successful.
}
catch (const Exception & e)
{
bool need_another_attempt = false;
if (!zookeeper)
throw Exception("Can't have Replicated access without ZooKeeper", ErrorCodes::NO_ZOOKEEPER);
if (const auto * coordination_exception = dynamic_cast<const Coordination::Exception *>(&e);
coordination_exception && Coordination::isHardwareError(coordination_exception->code))
{
/// In case of a network error we'll try to initialize again.
LOG_ERROR(getLogger(), "Initialization failed. Error: {}", e.message());
need_another_attempt = (attempt + 1 < max_retries);
}
createRootNodes(zookeeper);
if (!need_another_attempt)
throw;
}
}
}
refreshEntities(zookeeper);
void ReplicatedAccessStorage::initZooKeeperIfNeeded()
{
getZooKeeper();
}
initialized = true;
zkutil::ZooKeeperPtr ReplicatedAccessStorage::getZooKeeper()
{
std::lock_guard lock{cached_zookeeper_mutex};
return getZooKeeperNoLock();
}
zkutil::ZooKeeperPtr ReplicatedAccessStorage::getZooKeeperNoLock()
{
if (!cached_zookeeper || cached_zookeeper->expired())
{
auto zookeeper = get_zookeeper();
if (!zookeeper)
throw Exception("Can't have Replicated access without ZooKeeper", ErrorCodes::NO_ZOOKEEPER);
/// It's possible that we connected to different [Zoo]Keeper instance
/// so we may read a bit stale state.
zookeeper->sync(zookeeper_path);
createRootNodes(zookeeper);
refreshEntities(zookeeper, /* all= */ true);
cached_zookeeper = zookeeper;
}
return cached_zookeeper;
}
void ReplicatedAccessStorage::reload(ReloadMode reload_mode)
{
if (reload_mode != ReloadMode::ALL)
return;
/// Reinitialize ZooKeeper and reread everything.
std::lock_guard lock{cached_zookeeper_mutex};
cached_zookeeper = nullptr;
getZooKeeperNoLock();
}
void ReplicatedAccessStorage::createRootNodes(const zkutil::ZooKeeperPtr & zookeeper)
@ -414,10 +501,10 @@ bool ReplicatedAccessStorage::refresh()
if (!watched_queue->tryPop(id, /* timeout_ms: */ 10000))
return false;
auto zookeeper = get_zookeeper();
auto zookeeper = getZooKeeper();
if (id == UUIDHelpers::Nil)
refreshEntities(zookeeper);
refreshEntities(zookeeper, /* all= */ false);
else
refreshEntity(zookeeper, id);
@ -425,10 +512,16 @@ bool ReplicatedAccessStorage::refresh()
}
void ReplicatedAccessStorage::refreshEntities(const zkutil::ZooKeeperPtr & zookeeper)
void ReplicatedAccessStorage::refreshEntities(const zkutil::ZooKeeperPtr & zookeeper, bool all)
{
LOG_DEBUG(getLogger(), "Refreshing entities list");
if (all)
{
/// It doesn't make sense to keep the queue because we will reread everything in this function.
watched_queue->clear();
}
const String zookeeper_uuids_path = zookeeper_path + "/uuid";
auto watch_entities_list = [watched_queue = watched_queue](const Coordination::WatchResponse &)
{
@ -437,185 +530,129 @@ void ReplicatedAccessStorage::refreshEntities(const zkutil::ZooKeeperPtr & zooke
Coordination::Stat stat;
const auto entity_uuid_strs = zookeeper->getChildrenWatch(zookeeper_uuids_path, &stat, watch_entities_list);
std::unordered_set<UUID> entity_uuids;
std::vector<UUID> entity_uuids;
entity_uuids.reserve(entity_uuid_strs.size());
for (const String & entity_uuid_str : entity_uuid_strs)
entity_uuids.insert(parseUUID(entity_uuid_str));
entity_uuids.emplace_back(parseUUID(entity_uuid_str));
std::lock_guard lock{mutex};
std::vector<UUID> entities_to_remove;
/// Locally remove entities that were removed from ZooKeeper
for (const auto & pair : entries_by_id)
if (all)
{
const UUID & entity_uuid = pair.first;
if (!entity_uuids.contains(entity_uuid))
entities_to_remove.push_back(entity_uuid);
/// all=true means we read & parse all access entities from ZooKeeper.
std::vector<std::pair<UUID, AccessEntityPtr>> entities;
for (const auto & uuid : entity_uuids)
{
if (auto entity = tryReadEntityFromZooKeeper(zookeeper, uuid))
entities.emplace_back(uuid, entity);
}
memory_storage.setAll(entities);
}
for (const auto & entity_uuid : entities_to_remove)
removeEntityNoLock(entity_uuid);
/// Locally add entities that were added to ZooKeeper
for (const auto & entity_uuid : entity_uuids)
else
{
const auto it = entries_by_id.find(entity_uuid);
if (it == entries_by_id.end())
refreshEntityNoLock(zookeeper, entity_uuid);
/// all=false means we read & parse only new access entities from ZooKeeper.
memory_storage.removeAllExcept(entity_uuids);
for (const auto & uuid : entity_uuids)
{
if (!memory_storage.exists(uuid))
refreshEntityNoLock(zookeeper, uuid);
}
}
LOG_DEBUG(getLogger(), "Refreshing entities list finished");
}
void ReplicatedAccessStorage::refreshEntity(const zkutil::ZooKeeperPtr & zookeeper, const UUID & id)
{
LOG_DEBUG(getLogger(), "Refreshing entity {}", toString(id));
auto entity = tryReadEntityFromZooKeeper(zookeeper, id);
std::lock_guard lock{mutex};
refreshEntityNoLock(zookeeper, id);
if (entity)
setEntityNoLock(id, entity);
else
removeEntityNoLock(id);
}
void ReplicatedAccessStorage::refreshEntityNoLock(const zkutil::ZooKeeperPtr & zookeeper, const UUID & id)
{
LOG_DEBUG(getLogger(), "Refreshing entity {}", toString(id));
auto entity = tryReadEntityFromZooKeeper(zookeeper, id);
if (entity)
setEntityNoLock(id, entity);
else
removeEntityNoLock(id);
}
AccessEntityPtr ReplicatedAccessStorage::tryReadEntityFromZooKeeper(const zkutil::ZooKeeperPtr & zookeeper, const UUID & id) const
{
const auto watch_entity = [watched_queue = watched_queue, id](const Coordination::WatchResponse & response)
{
if (response.type == Coordination::Event::CHANGED)
[[maybe_unused]] bool push_result = watched_queue->push(id);
};
Coordination::Stat entity_stat;
const String entity_path = zookeeper_path + "/uuid/" + toString(id);
String entity_definition;
const bool exists = zookeeper->tryGetWatch(entity_path, entity_definition, &entity_stat, watch_entity);
if (exists)
bool exists = zookeeper->tryGetWatch(entity_path, entity_definition, &entity_stat, watch_entity);
if (!exists)
return nullptr;
try
{
const AccessEntityPtr entity = deserializeAccessEntity(entity_definition, entity_path);
setEntityNoLock(id, entity);
return deserializeAccessEntity(entity_definition, entity_path);
}
else
catch (...)
{
removeEntityNoLock(id);
tryLogCurrentException(getLogger(), "Error while reading the definition of " + toString(id));
return nullptr;
}
}
void ReplicatedAccessStorage::setEntityNoLock(const UUID & id, const AccessEntityPtr & entity)
{
LOG_DEBUG(getLogger(), "Setting id {} to entity named {}", toString(id), entity->getName());
const AccessEntityType type = entity->getType();
const String & name = entity->getName();
/// If the type+name already exists and is a different entity, remove old entity
auto & entries_by_name = entries_by_name_and_type[static_cast<size_t>(type)];
if (auto it = entries_by_name.find(name); it != entries_by_name.end() && it->second->id != id)
{
removeEntityNoLock(it->second->id);
}
/// If the entity already exists under a different type+name, remove old type+name
bool existed_before = false;
if (auto it = entries_by_id.find(id); it != entries_by_id.end())
{
existed_before = true;
const AccessEntityPtr & existing_entity = it->second.entity;
const AccessEntityType existing_type = existing_entity->getType();
const String & existing_name = existing_entity->getName();
if (existing_type != type || existing_name != name)
{
auto & existing_entries_by_name = entries_by_name_and_type[static_cast<size_t>(existing_type)];
existing_entries_by_name.erase(existing_name);
}
}
auto & entry = entries_by_id[id];
entry.id = id;
entry.entity = entity;
entries_by_name[name] = &entry;
if (initialized)
{
if (existed_before)
changes_notifier.onEntityUpdated(id, entity);
else
changes_notifier.onEntityAdded(id, entity);
}
memory_storage.insertWithID(id, entity, /* replace_if_exists= */ true, /* throw_if_exists= */ false);
}
void ReplicatedAccessStorage::removeEntityNoLock(const UUID & id)
{
LOG_DEBUG(getLogger(), "Removing entity with id {}", toString(id));
const auto it = entries_by_id.find(id);
if (it == entries_by_id.end())
{
LOG_DEBUG(getLogger(), "Id {} not found, ignoring removal", toString(id));
return;
}
const Entry & entry = it->second;
const AccessEntityType type = entry.entity->getType();
const String & name = entry.entity->getName();
auto & entries_by_name = entries_by_name_and_type[static_cast<size_t>(type)];
const auto name_it = entries_by_name.find(name);
if (name_it == entries_by_name.end())
LOG_WARNING(getLogger(), "Entity {} not found in names, ignoring removal of name", toString(id));
else if (name_it->second != &(it->second))
LOG_WARNING(getLogger(), "Name {} not pointing to entity {}, ignoring removal of name", name, toString(id));
else
entries_by_name.erase(name);
UUID removed_id = id;
entries_by_id.erase(id);
LOG_DEBUG(getLogger(), "Removed entity with id {}", toString(id));
changes_notifier.onEntityRemoved(removed_id, type);
memory_storage.remove(id, /* throw_if_not_exists= */ false);
}
std::optional<UUID> ReplicatedAccessStorage::findImpl(AccessEntityType type, const String & name) const
{
std::lock_guard lock{mutex};
const auto & entries_by_name = entries_by_name_and_type[static_cast<size_t>(type)];
const auto it = entries_by_name.find(name);
if (it == entries_by_name.end())
return {};
const Entry * entry = it->second;
return entry->id;
return memory_storage.find(type, name);
}
std::vector<UUID> ReplicatedAccessStorage::findAllImpl(AccessEntityType type) const
{
std::lock_guard lock{mutex};
std::vector<UUID> result;
result.reserve(entries_by_id.size());
for (const auto & [id, entry] : entries_by_id)
if (entry.entity->isTypeOf(type))
result.emplace_back(id);
return result;
return memory_storage.findAll(type);
}
bool ReplicatedAccessStorage::exists(const UUID & id) const
{
std::lock_guard lock{mutex};
return entries_by_id.contains(id);
return memory_storage.exists(id);
}
AccessEntityPtr ReplicatedAccessStorage::readImpl(const UUID & id, bool throw_if_not_exists) const
{
std::lock_guard lock{mutex};
const auto it = entries_by_id.find(id);
if (it == entries_by_id.end())
{
if (throw_if_not_exists)
throwNotFound(id);
else
return nullptr;
}
const Entry & entry = it->second;
return entry.entity;
return memory_storage.read(id, throw_if_not_exists);
}

View File

@ -1,20 +1,13 @@
#pragma once
#include <atomic>
#include <list>
#include <memory>
#include <mutex>
#include <unordered_map>
#include <base/defines.h>
#include <base/scope_guard.h>
#include <Common/ThreadPool.h>
#include <Common/ZooKeeper/Common.h>
#include <Common/ZooKeeper/ZooKeeper.h>
#include <Common/ConcurrentBoundedQueue.h>
#include <Access/IAccessStorage.h>
#include <Access/MemoryAccessStorage.h>
namespace DB
@ -34,6 +27,7 @@ public:
void startPeriodicReloading() override { startWatchingThread(); }
void stopPeriodicReloading() override { stopWatchingThread(); }
void reload(ReloadMode reload_mode) override;
bool exists(const UUID & id) const override;
@ -43,9 +37,10 @@ public:
private:
String zookeeper_path;
zkutil::GetZooKeeper get_zookeeper;
const zkutil::GetZooKeeper get_zookeeper;
std::atomic<bool> initialized = false;
zkutil::ZooKeeperPtr cached_zookeeper TSA_GUARDED_BY(cached_zookeeper_mutex);
std::mutex cached_zookeeper_mutex;
std::atomic<bool> watching = false;
ThreadFromGlobalPool watching_thread;
@ -60,7 +55,10 @@ private:
bool removeZooKeeper(const zkutil::ZooKeeperPtr & zookeeper, const UUID & id, bool throw_if_not_exists);
bool updateZooKeeper(const zkutil::ZooKeeperPtr & zookeeper, const UUID & id, const UpdateFunc & update_func, bool throw_if_not_exists);
void initializeZookeeper();
void initZooKeeperWithRetries(size_t max_retries);
void initZooKeeperIfNeeded();
zkutil::ZooKeeperPtr getZooKeeper();
zkutil::ZooKeeperPtr getZooKeeperNoLock() TSA_REQUIRES(cached_zookeeper_mutex);
void createRootNodes(const zkutil::ZooKeeperPtr & zookeeper);
void startWatchingThread();
@ -70,27 +68,21 @@ private:
void resetAfterError();
bool refresh();
void refreshEntities(const zkutil::ZooKeeperPtr & zookeeper);
void refreshEntities(const zkutil::ZooKeeperPtr & zookeeper, bool all);
void refreshEntity(const zkutil::ZooKeeperPtr & zookeeper, const UUID & id);
void refreshEntityNoLock(const zkutil::ZooKeeperPtr & zookeeper, const UUID & id) TSA_REQUIRES(mutex);
AccessEntityPtr tryReadEntityFromZooKeeper(const zkutil::ZooKeeperPtr & zookeeper, const UUID & id) const;
void setEntityNoLock(const UUID & id, const AccessEntityPtr & entity) TSA_REQUIRES(mutex);
void removeEntityNoLock(const UUID & id) TSA_REQUIRES(mutex);
struct Entry
{
UUID id;
AccessEntityPtr entity;
};
std::optional<UUID> findImpl(AccessEntityType type, const String & name) const override;
std::vector<UUID> findAllImpl(AccessEntityType type) const override;
AccessEntityPtr readImpl(const UUID & id, bool throw_if_not_exists) const override;
mutable std::mutex mutex;
std::unordered_map<UUID, Entry> entries_by_id TSA_GUARDED_BY(mutex);
std::unordered_map<String, Entry *> entries_by_name_and_type[static_cast<size_t>(AccessEntityType::MAX)] TSA_GUARDED_BY(mutex);
MemoryAccessStorage memory_storage TSA_GUARDED_BY(mutex);
AccessChangesNotifier & changes_notifier;
bool backup_allowed = false;
const bool backup_allowed = false;
};
}

View File

@ -35,88 +35,43 @@ void SettingsConstraints::clear()
constraints.clear();
}
void SettingsConstraints::setMinValue(std::string_view setting_name, const Field & min_value)
void SettingsConstraints::set(const String & setting_name, const Field & min_value, const Field & max_value, SettingConstraintWritability writability)
{
getConstraintRef(setting_name).min_value = Settings::castValueUtil(setting_name, min_value);
auto & constraint = constraints[setting_name];
if (!min_value.isNull())
constraint.min_value = Settings::castValueUtil(setting_name, min_value);
if (!max_value.isNull())
constraint.max_value = Settings::castValueUtil(setting_name, max_value);
constraint.writability = writability;
}
Field SettingsConstraints::getMinValue(std::string_view setting_name) const
void SettingsConstraints::get(const Settings & current_settings, std::string_view setting_name, Field & min_value, Field & max_value, SettingConstraintWritability & writability) const
{
const auto * ptr = tryGetConstraint(setting_name);
if (ptr)
return ptr->min_value;
else
return {};
}
void SettingsConstraints::setMaxValue(std::string_view setting_name, const Field & max_value)
{
getConstraintRef(setting_name).max_value = Settings::castValueUtil(setting_name, max_value);
}
Field SettingsConstraints::getMaxValue(std::string_view setting_name) const
{
const auto * ptr = tryGetConstraint(setting_name);
if (ptr)
return ptr->max_value;
else
return {};
}
void SettingsConstraints::setReadOnly(std::string_view setting_name, bool read_only)
{
getConstraintRef(setting_name).read_only = read_only;
}
bool SettingsConstraints::isReadOnly(std::string_view setting_name) const
{
const auto * ptr = tryGetConstraint(setting_name);
if (ptr)
return ptr->read_only;
else
return false;
}
void SettingsConstraints::set(std::string_view setting_name, const Field & min_value, const Field & max_value, bool read_only)
{
auto & ref = getConstraintRef(setting_name);
ref.min_value = Settings::castValueUtil(setting_name, min_value);
ref.max_value = Settings::castValueUtil(setting_name, max_value);
ref.read_only = read_only;
}
void SettingsConstraints::get(std::string_view setting_name, Field & min_value, Field & max_value, bool & read_only) const
{
const auto * ptr = tryGetConstraint(setting_name);
if (ptr)
{
min_value = ptr->min_value;
max_value = ptr->max_value;
read_only = ptr->read_only;
}
else
{
min_value = Field{};
max_value = Field{};
read_only = false;
}
auto checker = getChecker(current_settings, setting_name);
min_value = checker.constraint.min_value;
max_value = checker.constraint.max_value;
writability = checker.constraint.writability;
}
void SettingsConstraints::merge(const SettingsConstraints & other)
{
for (const auto & [other_name, other_constraint] : other.constraints)
if (access_control->doesSettingsConstraintsReplacePrevious())
{
auto & constraint = getConstraintRef(other_name);
if (!other_constraint.min_value.isNull())
constraint.min_value = other_constraint.min_value;
if (!other_constraint.max_value.isNull())
constraint.max_value = other_constraint.max_value;
if (other_constraint.read_only)
constraint.read_only = true;
for (const auto & [other_name, other_constraint] : other.constraints)
constraints[other_name] = other_constraint;
}
else
{
for (const auto & [other_name, other_constraint] : other.constraints)
{
auto & constraint = constraints[other_name];
if (!other_constraint.min_value.isNull())
constraint.min_value = other_constraint.min_value;
if (!other_constraint.max_value.isNull())
constraint.max_value = other_constraint.max_value;
if (other_constraint.writability == SettingConstraintWritability::CONST)
constraint.writability = SettingConstraintWritability::CONST; // NOTE: In this mode <readonly/> flag cannot be overridden to be false
}
}
}
@ -180,26 +135,6 @@ bool SettingsConstraints::checkImpl(const Settings & current_settings, SettingCh
}
};
bool cannot_compare = false;
auto less = [&](const Field & left, const Field & right)
{
cannot_compare = false;
if (reaction == THROW_ON_VIOLATION)
return applyVisitor(FieldVisitorAccurateLess{}, left, right);
else
{
try
{
return applyVisitor(FieldVisitorAccurateLess{}, left, right);
}
catch (...)
{
cannot_compare = true;
return false;
}
}
};
if (reaction == THROW_ON_VIOLATION)
{
try
@ -239,115 +174,119 @@ bool SettingsConstraints::checkImpl(const Settings & current_settings, SettingCh
return false;
}
if (!current_settings.allow_ddl && setting_name == "allow_ddl")
return getChecker(current_settings, setting_name).check(change, new_value, reaction);
}
bool SettingsConstraints::Checker::check(SettingChange & change, const Field & new_value, ReactionOnViolation reaction) const
{
const String & setting_name = change.name;
auto less_or_cannot_compare = [=](const Field & left, const Field & right)
{
if (reaction == THROW_ON_VIOLATION)
throw Exception("Cannot modify 'allow_ddl' setting when DDL queries are prohibited for the user", ErrorCodes::QUERY_IS_PROHIBITED);
return applyVisitor(FieldVisitorAccurateLess{}, left, right);
else
return false;
}
/** The `readonly` value is understood as follows:
* 0 - everything allowed.
* 1 - only read queries can be made; you can not change the settings.
* 2 - You can only do read queries and you can change the settings, except for the `readonly` setting.
*/
if (current_settings.readonly == 1)
{
if (reaction == THROW_ON_VIOLATION)
throw Exception("Cannot modify '" + setting_name + "' setting in readonly mode", ErrorCodes::READONLY);
else
return false;
}
if (current_settings.readonly > 1 && setting_name == "readonly")
{
if (reaction == THROW_ON_VIOLATION)
throw Exception("Cannot modify 'readonly' setting in readonly mode", ErrorCodes::READONLY);
else
return false;
}
const Constraint * constraint = tryGetConstraint(setting_name);
if (constraint)
{
if (constraint->read_only)
{
if (reaction == THROW_ON_VIOLATION)
throw Exception("Setting " + setting_name + " should not be changed", ErrorCodes::SETTING_CONSTRAINT_VIOLATION);
else
return false;
}
const Field & min_value = constraint->min_value;
const Field & max_value = constraint->max_value;
if (!min_value.isNull() && !max_value.isNull() && (less(max_value, min_value) || cannot_compare))
{
if (reaction == THROW_ON_VIOLATION)
throw Exception("Setting " + setting_name + " should not be changed", ErrorCodes::SETTING_CONSTRAINT_VIOLATION);
else
return false;
}
if (!min_value.isNull() && (less(new_value, min_value) || cannot_compare))
{
if (reaction == THROW_ON_VIOLATION)
try
{
throw Exception(
"Setting " + setting_name + " shouldn't be less than " + applyVisitor(FieldVisitorToString(), constraint->min_value),
ErrorCodes::SETTING_CONSTRAINT_VIOLATION);
return applyVisitor(FieldVisitorAccurateLess{}, left, right);
}
else
change.value = min_value;
}
if (!max_value.isNull() && (less(max_value, new_value) || cannot_compare))
{
if (reaction == THROW_ON_VIOLATION)
catch (...)
{
throw Exception(
"Setting " + setting_name + " shouldn't be greater than " + applyVisitor(FieldVisitorToString(), constraint->max_value),
ErrorCodes::SETTING_CONSTRAINT_VIOLATION);
return true;
}
else
change.value = max_value;
}
};
if (!explain.empty())
{
if (reaction == THROW_ON_VIOLATION)
throw Exception(explain, code);
else
return false;
}
if (constraint.writability == SettingConstraintWritability::CONST)
{
if (reaction == THROW_ON_VIOLATION)
throw Exception("Setting " + setting_name + " should not be changed", ErrorCodes::SETTING_CONSTRAINT_VIOLATION);
else
return false;
}
const auto & min_value = constraint.min_value;
const auto & max_value = constraint.max_value;
if (!min_value.isNull() && !max_value.isNull() && less_or_cannot_compare(max_value, min_value))
{
if (reaction == THROW_ON_VIOLATION)
throw Exception("Setting " + setting_name + " should not be changed", ErrorCodes::SETTING_CONSTRAINT_VIOLATION);
else
return false;
}
if (!min_value.isNull() && less_or_cannot_compare(new_value, min_value))
{
if (reaction == THROW_ON_VIOLATION)
{
throw Exception(
"Setting " + setting_name + " shouldn't be less than " + applyVisitor(FieldVisitorToString(), min_value),
ErrorCodes::SETTING_CONSTRAINT_VIOLATION);
}
else
change.value = min_value;
}
if (!max_value.isNull() && less_or_cannot_compare(max_value, new_value))
{
if (reaction == THROW_ON_VIOLATION)
{
throw Exception(
"Setting " + setting_name + " shouldn't be greater than " + applyVisitor(FieldVisitorToString(), max_value),
ErrorCodes::SETTING_CONSTRAINT_VIOLATION);
}
else
change.value = max_value;
}
return true;
}
SettingsConstraints::Constraint & SettingsConstraints::getConstraintRef(std::string_view setting_name)
SettingsConstraints::Checker SettingsConstraints::getChecker(const Settings & current_settings, std::string_view setting_name) const
{
if (!current_settings.allow_ddl && setting_name == "allow_ddl")
return Checker("Cannot modify 'allow_ddl' setting when DDL queries are prohibited for the user", ErrorCodes::QUERY_IS_PROHIBITED);
/** The `readonly` value is understood as follows:
* 0 - no read-only restrictions.
* 1 - only read requests, as well as changing settings with `changable_in_readonly` flag.
* 2 - only read requests, as well as changing settings, except for the `readonly` setting.
*/
if (current_settings.readonly > 1 && setting_name == "readonly")
return Checker("Cannot modify 'readonly' setting in readonly mode", ErrorCodes::READONLY);
auto it = constraints.find(setting_name);
if (it == constraints.end())
if (current_settings.readonly == 1)
{
auto setting_name_ptr = std::make_shared<const String>(setting_name);
Constraint new_constraint;
new_constraint.setting_name = setting_name_ptr;
it = constraints.emplace(*setting_name_ptr, std::move(new_constraint)).first;
if (it == constraints.end() || it->second.writability != SettingConstraintWritability::CHANGEABLE_IN_READONLY)
return Checker("Cannot modify '" + String(setting_name) + "' setting in readonly mode", ErrorCodes::READONLY);
}
return it->second;
else // For both readonly=0 and readonly=2
{
if (it == constraints.end())
return Checker(); // Allowed
}
return Checker(it->second);
}
const SettingsConstraints::Constraint * SettingsConstraints::tryGetConstraint(std::string_view setting_name) const
{
auto it = constraints.find(setting_name);
if (it == constraints.end())
return nullptr;
return &it->second;
}
bool SettingsConstraints::Constraint::operator==(const Constraint & other) const
{
return (read_only == other.read_only) && (min_value == other.min_value) && (max_value == other.max_value)
&& (*setting_name == *other.setting_name);
return writability == other.writability && min_value == other.min_value && max_value == other.max_value;
}
bool operator ==(const SettingsConstraints & left, const SettingsConstraints & right)
{
return (left.constraints == right.constraints);
return left.constraints == right.constraints;
}
}

View File

@ -1,9 +1,9 @@
#pragma once
#include <Access/SettingsProfileElement.h>
#include <Common/SettingsChanges.h>
#include <unordered_map>
namespace Poco::Util
{
class AbstractConfiguration;
@ -35,18 +35,22 @@ class AccessControl;
* <max>20000000000</max>
* </max_memory_usage>
* <force_index_by_date>
* <readonly/>
* <const/>
* </force_index_by_date>
* <max_threads>
* <changable_in_readonly/>
* </max_threads>
* </constraints>
* </user_profile>
* </profiles>
*
* This class also checks that we are not in the read-only mode.
* If a setting cannot be change due to the read-only mode this class throws an exception.
* The value of `readonly` value is understood as follows:
* 0 - everything allowed.
* 1 - only read queries can be made; you can not change the settings.
* 2 - you can only do read queries and you can change the settings, except for the `readonly` setting.
* The value of `readonly` is understood as follows:
* 0 - not read-only mode, no additional checks.
* 1 - only read queries, as well as changing settings with <changable_in_readonly/> flag.
* 2 - only read queries and you can change the settings, except for the `readonly` setting.
*
*/
class SettingsConstraints
{
@ -61,17 +65,8 @@ public:
void clear();
bool empty() const { return constraints.empty(); }
void setMinValue(std::string_view setting_name, const Field & min_value);
Field getMinValue(std::string_view setting_name) const;
void setMaxValue(std::string_view setting_name, const Field & max_value);
Field getMaxValue(std::string_view setting_name) const;
void setReadOnly(std::string_view setting_name, bool read_only);
bool isReadOnly(std::string_view setting_name) const;
void set(std::string_view setting_name, const Field & min_value, const Field & max_value, bool read_only);
void get(std::string_view setting_name, Field & min_value, Field & max_value, bool & read_only) const;
void set(const String & setting_name, const Field & min_value, const Field & max_value, SettingConstraintWritability writability);
void get(const Settings & current_settings, std::string_view setting_name, Field & min_value, Field & max_value, SettingConstraintWritability & writability) const;
void merge(const SettingsConstraints & other);
@ -87,10 +82,15 @@ public:
friend bool operator !=(const SettingsConstraints & left, const SettingsConstraints & right) { return !(left == right); }
private:
enum ReactionOnViolation
{
THROW_ON_VIOLATION,
CLAMP_ON_VIOLATION,
};
struct Constraint
{
std::shared_ptr<const String> setting_name;
bool read_only = false;
SettingConstraintWritability writability = SettingConstraintWritability::WRITABLE;
Field min_value;
Field max_value;
@ -98,18 +98,53 @@ private:
bool operator !=(const Constraint & other) const { return !(*this == other); }
};
enum ReactionOnViolation
struct Checker
{
THROW_ON_VIOLATION,
CLAMP_ON_VIOLATION,
Constraint constraint;
String explain;
int code = 0;
// Allows everything
Checker() = default;
// Forbidden with explanation
Checker(const String & explain_, int code_)
: constraint{.writability = SettingConstraintWritability::CONST}
, explain(explain_)
, code(code_)
{}
// Allow or forbid depending on range defined by constraint, also used to return stored constraint
explicit Checker(const Constraint & constraint_)
: constraint(constraint_)
{}
// Perform checking
bool check(SettingChange & change, const Field & new_value, ReactionOnViolation reaction) const;
};
struct StringHash
{
using is_transparent = void;
size_t operator()(std::string_view txt) const
{
return std::hash<std::string_view>{}(txt);
}
size_t operator()(const String & txt) const
{
return std::hash<String>{}(txt);
}
};
bool checkImpl(const Settings & current_settings, SettingChange & change, ReactionOnViolation reaction) const;
Constraint & getConstraintRef(std::string_view setting_name);
const Constraint * tryGetConstraint(std::string_view setting_name) const;
Checker getChecker(const Settings & current_settings, std::string_view setting_name) const;
std::unordered_map<std::string_view, Constraint> constraints;
const AccessControl * access_control = nullptr;
// Special container for heterogeneous lookups: to avoid `String` construction during `find(std::string_view)`
using Constraints = std::unordered_map<String, Constraint, StringHash, std::equal_to<>>;
Constraints constraints;
const AccessControl * access_control;
};
}

View File

@ -3,6 +3,7 @@
#include <Access/AccessControl.h>
#include <Access/SettingsProfile.h>
#include <Core/Settings.h>
#include <Common/SettingConstraintWritability.h>
#include <Common/SettingsChanges.h>
#include <IO/ReadHelpers.h>
#include <IO/WriteHelpers.h>
@ -18,6 +19,10 @@ namespace
constexpr const char ALLOW_BACKUP_SETTING_NAME[] = "allow_backup";
}
namespace ErrorCodes
{
extern const int NOT_IMPLEMENTED;
}
SettingsProfileElement::SettingsProfileElement(const ASTSettingsProfileElement & ast)
{
@ -46,17 +51,20 @@ void SettingsProfileElement::init(const ASTSettingsProfileElement & ast, const A
{
setting_name = ast.setting_name;
/// Optionally check if a setting with that name is allowed.
if (access_control)
{
/// Check if a setting with that name is allowed.
if (setting_name != ALLOW_BACKUP_SETTING_NAME)
access_control->checkSettingNameIsAllowed(setting_name);
/// Check if a CHANGEABLE_IN_READONLY is allowed.
if (ast.writability == SettingConstraintWritability::CHANGEABLE_IN_READONLY && !access_control->doesSettingsConstraintsReplacePrevious())
throw Exception("CHANGEABLE_IN_READONLY for " + setting_name + " is not allowed unless settings_constraints_replace_previous is enabled", ErrorCodes::NOT_IMPLEMENTED);
}
value = ast.value;
min_value = ast.min_value;
max_value = ast.max_value;
readonly = ast.readonly;
writability = ast.writability;
if (!value.isNull())
value = Settings::castValueUtil(setting_name, value);
@ -80,7 +88,7 @@ std::shared_ptr<ASTSettingsProfileElement> SettingsProfileElement::toAST() const
ast->value = value;
ast->min_value = min_value;
ast->max_value = max_value;
ast->readonly = readonly;
ast->writability = writability;
return ast;
}
@ -101,7 +109,7 @@ std::shared_ptr<ASTSettingsProfileElement> SettingsProfileElement::toASTWithName
ast->value = value;
ast->min_value = min_value;
ast->max_value = max_value;
ast->readonly = readonly;
ast->writability = writability;
return ast;
}
@ -205,17 +213,12 @@ SettingsConstraints SettingsProfileElements::toSettingsConstraints(const AccessC
{
SettingsConstraints res{access_control};
for (const auto & elem : *this)
{
if (!elem.setting_name.empty() && (elem.setting_name != ALLOW_BACKUP_SETTING_NAME))
{
if (!elem.min_value.isNull())
res.setMinValue(elem.setting_name, elem.min_value);
if (!elem.max_value.isNull())
res.setMaxValue(elem.setting_name, elem.max_value);
if (elem.readonly)
res.setReadOnly(elem.setting_name, *elem.readonly);
}
}
if (!elem.setting_name.empty() && elem.setting_name != ALLOW_BACKUP_SETTING_NAME)
res.set(
elem.setting_name,
elem.min_value,
elem.max_value,
elem.writability ? *elem.writability : SettingConstraintWritability::WRITABLE);
return res;
}

View File

@ -2,6 +2,7 @@
#include <Core/Field.h>
#include <Core/UUID.h>
#include <Common/SettingConstraintWritability.h>
#include <optional>
#include <unordered_map>
#include <vector>
@ -25,9 +26,9 @@ struct SettingsProfileElement
Field value;
Field min_value;
Field max_value;
std::optional<bool> readonly;
std::optional<SettingConstraintWritability> writability;
auto toTuple() const { return std::tie(parent_profile, setting_name, value, min_value, max_value, readonly); }
auto toTuple() const { return std::tie(parent_profile, setting_name, value, min_value, max_value, writability); }
friend bool operator==(const SettingsProfileElement & lhs, const SettingsProfileElement & rhs) { return lhs.toTuple() == rhs.toTuple(); }
friend bool operator!=(const SettingsProfileElement & lhs, const SettingsProfileElement & rhs) { return !(lhs == rhs); }
friend bool operator <(const SettingsProfileElement & lhs, const SettingsProfileElement & rhs) { return lhs.toTuple() < rhs.toTuple(); }

View File

@ -441,17 +441,32 @@ namespace
String path_to_name = path_to_constraints + "." + setting_name;
config.keys(path_to_name, constraint_types);
size_t writability_count = 0;
for (const String & constraint_type : constraint_types)
{
if (constraint_type == "min")
profile_element.min_value = Settings::stringToValueUtil(setting_name, config.getString(path_to_name + "." + constraint_type));
else if (constraint_type == "max")
profile_element.max_value = Settings::stringToValueUtil(setting_name, config.getString(path_to_name + "." + constraint_type));
else if (constraint_type == "readonly")
profile_element.readonly = true;
else if (constraint_type == "readonly" || constraint_type == "const")
{
writability_count++;
profile_element.writability = SettingConstraintWritability::CONST;
}
else if (constraint_type == "changeable_in_readonly")
{
writability_count++;
if (access_control.doesSettingsConstraintsReplacePrevious())
profile_element.writability = SettingConstraintWritability::CHANGEABLE_IN_READONLY;
else
throw Exception("Setting changeable_in_readonly for " + setting_name + " is not allowed unless settings_constraints_replace_previous is enabled", ErrorCodes::NOT_IMPLEMENTED);
}
else
throw Exception("Setting " + constraint_type + " value for " + setting_name + " isn't supported", ErrorCodes::NOT_IMPLEMENTED);
}
if (writability_count > 1)
throw Exception("Not more than one constraint writability specifier (const/readonly/changeable_in_readonly) is allowed for " + setting_name, ErrorCodes::NOT_IMPLEMENTED);
profile_elements.push_back(std::move(profile_element));
}
@ -635,13 +650,6 @@ void UsersConfigAccessStorage::load(
/* already_loaded = */ false);
}
void UsersConfigAccessStorage::reload()
{
std::lock_guard lock{load_mutex};
if (config_reloader)
config_reloader->reload();
}
void UsersConfigAccessStorage::startPeriodicReloading()
{
std::lock_guard lock{load_mutex};
@ -656,6 +664,13 @@ void UsersConfigAccessStorage::stopPeriodicReloading()
config_reloader->stop();
}
void UsersConfigAccessStorage::reload(ReloadMode /* reload_mode */)
{
std::lock_guard lock{load_mutex};
if (config_reloader)
config_reloader->reload();
}
std::optional<UUID> UsersConfigAccessStorage::findImpl(AccessEntityType type, const String & name) const
{
return memory_storage.find(type, name);

View File

@ -38,9 +38,9 @@ public:
const String & preprocessed_dir = {},
const zkutil::GetZooKeeper & get_zookeeper_function = {});
void reload() override;
void startPeriodicReloading() override;
void stopPeriodicReloading() override;
void reload(ReloadMode reload_mode) override;
bool exists(const UUID & id) const override;

View File

@ -64,6 +64,11 @@ public:
return nested_func->isVersioned();
}
size_t getVersionFromRevision(size_t revision) const override
{
return nested_func->getVersionFromRevision(revision);
}
size_t getDefaultVersion() const override
{
return nested_func->getDefaultVersion();
@ -79,6 +84,11 @@ public:
nested_func->destroy(place);
}
void destroyUpToState(AggregateDataPtr __restrict place) const noexcept override
{
nested_func->destroyUpToState(place);
}
bool hasTrivialDestructor() const override
{
return nested_func->hasTrivialDestructor();

View File

@ -225,6 +225,12 @@ public:
nested_func->destroy(getNestedPlace(place));
}
void destroyUpToState(AggregateDataPtr __restrict place) const noexcept override
{
this->data(place).~Data();
nested_func->destroyUpToState(getNestedPlace(place));
}
String getName() const override
{
return nested_func->getName() + "Distinct";
@ -245,6 +251,21 @@ public:
return nested_func->isState();
}
bool isVersioned() const override
{
return nested_func->isVersioned();
}
size_t getVersionFromRevision(size_t revision) const override
{
return nested_func->getVersionFromRevision(revision);
}
size_t getDefaultVersion() const override
{
return nested_func->getDefaultVersion();
}
AggregateFunctionPtr getNestedFunction() const override { return nested_func; }
};

View File

@ -66,6 +66,7 @@ private:
if (old_size < new_size)
{
char * old_state = state.array_of_aggregate_datas;
char * new_state = arena.alignedAlloc(
new_size * nested_size_of_data,
nested_func->alignOfData());
@ -134,23 +135,43 @@ public:
return nested_func->isVersioned();
}
size_t getVersionFromRevision(size_t revision) const override
{
return nested_func->getVersionFromRevision(revision);
}
size_t getDefaultVersion() const override
{
return nested_func->getDefaultVersion();
}
void destroy(AggregateDataPtr __restrict place) const noexcept override
template <bool up_to_state>
void destroyImpl(AggregateDataPtr __restrict place) const noexcept
{
AggregateFunctionForEachData & state = data(place);
char * nested_state = state.array_of_aggregate_datas;
for (size_t i = 0; i < state.dynamic_array_size; ++i)
{
nested_func->destroy(nested_state);
if constexpr (up_to_state)
nested_func->destroyUpToState(nested_state);
else
nested_func->destroy(nested_state);
nested_state += nested_size_of_data;
}
}
void destroy(AggregateDataPtr __restrict place) const noexcept override
{
destroyImpl<false>(place);
}
void destroyUpToState(AggregateDataPtr __restrict place) const noexcept override
{
destroyImpl<true>(place);
}
bool hasTrivialDestructor() const override
{
return nested_func->hasTrivialDestructor();

View File

@ -71,6 +71,11 @@ public:
return nested_func->isVersioned();
}
size_t getVersionFromRevision(size_t revision) const override
{
return nested_func->getVersionFromRevision(revision);
}
size_t getDefaultVersion() const override
{
return nested_func->getDefaultVersion();
@ -86,6 +91,11 @@ public:
nested_func->destroy(place);
}
void destroyUpToState(AggregateDataPtr __restrict place) const noexcept override
{
nested_func->destroyUpToState(place);
}
bool hasTrivialDestructor() const override
{
return nested_func->hasTrivialDestructor();

View File

@ -84,6 +84,26 @@ private:
using Base = IAggregateFunctionDataHelper<Data, AggregateFunctionMap<KeyType>>;
public:
bool isState() const override
{
return nested_func->isState();
}
bool isVersioned() const override
{
return nested_func->isVersioned();
}
size_t getVersionFromRevision(size_t revision) const override
{
return nested_func->getVersionFromRevision(revision);
}
size_t getDefaultVersion() const override
{
return nested_func->getDefaultVersion();
}
AggregateFunctionMap(AggregateFunctionPtr nested, const DataTypes & types) : Base(types, nested->getParameters()), nested_func(nested)
{
if (types.empty())
@ -187,6 +207,32 @@ public:
}
}
template <bool up_to_state>
void destroyImpl(AggregateDataPtr __restrict place) const noexcept
{
AggregateFunctionMapCombinatorData<KeyType> & state = Base::data(place);
for (const auto & [key, nested_place] : state.merged_maps)
{
if constexpr (up_to_state)
nested_func->destroyUpToState(nested_place);
else
nested_func->destroy(nested_place);
}
state.~Data();
}
void destroy(AggregateDataPtr __restrict place) const noexcept override
{
destroyImpl<false>(place);
}
void destroyUpToState(AggregateDataPtr __restrict place) const noexcept override
{
destroyImpl<true>(place);
}
void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
{
auto & merged_maps = this->data(place).merged_maps;

View File

@ -80,6 +80,11 @@ public:
nested_func->destroy(place);
}
void destroyUpToState(AggregateDataPtr __restrict place) const noexcept override
{
nested_func->destroyUpToState(place);
}
bool hasTrivialDestructor() const override
{
return nested_func->hasTrivialDestructor();
@ -126,6 +131,11 @@ public:
}
AggregateFunctionPtr getNestedFunction() const override { return nested_func; }
bool isState() const override
{
return nested_func->isState();
}
};
}

View File

@ -114,6 +114,11 @@ public:
nested_function->destroy(nestedPlace(place));
}
void destroyUpToState(AggregateDataPtr __restrict place) const noexcept override
{
nested_function->destroyUpToState(nestedPlace(place));
}
bool hasTrivialDestructor() const override
{
return nested_function->hasTrivialDestructor();
@ -189,6 +194,21 @@ public:
return nested_function->isState();
}
bool isVersioned() const override
{
return nested_function->isVersioned();
}
size_t getVersionFromRevision(size_t revision) const override
{
return nested_function->getVersionFromRevision(revision);
}
size_t getDefaultVersion() const override
{
return nested_function->getDefaultVersion();
}
AggregateFunctionPtr getNestedFunction() const override { return nested_function; }
#if USE_EMBEDDED_COMPILER
@ -468,9 +488,15 @@ public:
}
}
bool found_one = false;
/// We can have 0 nullable filters if we don't skip nulls
if (nullable_filters.size() == 0)
{
this->setFlag(place);
this->nested_function->addBatchSinglePlace(row_begin, row_end, this->nestedPlace(place), nested_columns, arena, -1);
return;
}
chassert(nullable_filters.size() > 0); /// We work under the assumption that we reach this because one argument was NULL
bool found_one = false;
if (nullable_filters.size() == 1)
{
/// We can avoid making copies of the only filter but we still need to check that there is data to be added

View File

@ -98,6 +98,11 @@ public:
nested_function->destroy(place);
}
void destroyUpToState(AggregateDataPtr __restrict place) const noexcept override
{
nested_function->destroyUpToState(place);
}
void add(
AggregateDataPtr __restrict place,
const IColumn ** columns,

View File

@ -91,6 +91,21 @@ public:
return nested_function->isState();
}
bool isVersioned() const override
{
return nested_function->isVersioned();
}
size_t getVersionFromRevision(size_t revision) const override
{
return nested_function->getVersionFromRevision(revision);
}
size_t getDefaultVersion() const override
{
return nested_function->getDefaultVersion();
}
bool allocatesMemoryInArena() const override
{
return nested_function->allocatesMemoryInArena();
@ -134,6 +149,12 @@ public:
nested_function->destroy(place + i * size_of_data);
}
void destroyUpToState(AggregateDataPtr __restrict place) const noexcept override
{
for (size_t i = 0; i < total; ++i)
nested_function->destroyUpToState(place + i * size_of_data);
}
void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override
{
Key key;

View File

@ -56,10 +56,22 @@ public:
return nested_func->getDefaultVersion();
}
bool isState() const override
{
return nested_func->isState();
}
size_t getVersionFromRevision(size_t revision) const override
{
return nested_func->getVersionFromRevision(revision);
}
void create(AggregateDataPtr __restrict place) const override { nested_func->create(place); }
void destroy(AggregateDataPtr __restrict place) const noexcept override { nested_func->destroy(place); }
void destroyUpToState(AggregateDataPtr __restrict place) const noexcept override { nested_func->destroyUpToState(place); }
bool hasTrivialDestructor() const override { return nested_func->hasTrivialDestructor(); }
size_t sizeOfData() const override { return nested_func->sizeOfData(); }

View File

@ -69,6 +69,8 @@ public:
nested_func->destroy(place);
}
void destroyUpToState(AggregateDataPtr __restrict) const noexcept override {}
bool hasTrivialDestructor() const override
{
return nested_func->hasTrivialDestructor();

View File

@ -113,6 +113,17 @@ public:
/// Delete data for aggregation.
virtual void destroy(AggregateDataPtr __restrict place) const noexcept = 0;
/// Delete all combinator states that were used after combinator -State.
/// For example for uniqArrayStateForEachMap(...) it will destroy
/// states that were created by combinators Map and ForEach.
/// It's needed because ColumnAggregateFunction in the result will be
/// responsible only for destruction of states that were created
/// by aggregate function and all combinators before -State combinator.
virtual void destroyUpToState(AggregateDataPtr __restrict place) const noexcept
{
destroy(place);
}
/// It is not necessary to delete data.
virtual bool hasTrivialDestructor() const = 0;
@ -277,8 +288,7 @@ public:
Arena * arena) const = 0;
/** Insert result of aggregate function into result column with batch size.
* If destroy_place_after_insert is true. Then implementation of this method
* must destroy aggregate place if insert state into result column was successful.
* The implementation of this method will destroy aggregate place up to -State if insert state into result column was successful.
* All places that were not inserted must be destroyed if there was exception during insert into result column.
*/
virtual void insertResultIntoBatch(
@ -287,8 +297,7 @@ public:
AggregateDataPtr * places,
size_t place_offset,
IColumn & to,
Arena * arena,
bool destroy_place_after_insert) const = 0;
Arena * arena) const = 0;
/** Destroy batch of aggregate places.
*/
@ -612,8 +621,7 @@ public:
AggregateDataPtr * places,
size_t place_offset,
IColumn & to,
Arena * arena,
bool destroy_place_after_insert) const override
Arena * arena) const override
{
size_t batch_index = row_begin;
@ -622,9 +630,9 @@ public:
for (; batch_index < row_end; ++batch_index)
{
static_cast<const Derived *>(this)->insertResultInto(places[batch_index] + place_offset, to, arena);
if (destroy_place_after_insert)
static_cast<const Derived *>(this)->destroy(places[batch_index] + place_offset);
/// For State AggregateFunction ownership of aggregate place is passed to result column after insert,
/// so we need to destroy all states up to state of -State combinator.
static_cast<const Derived *>(this)->destroyUpToState(places[batch_index] + place_offset);
}
}
catch (...)

View File

@ -162,7 +162,7 @@ MutableColumnPtr ColumnAggregateFunction::convertToValues(MutableColumnPtr colum
};
callback(res);
res->forEachSubcolumn(callback);
res->forEachSubcolumnRecursively(callback);
for (auto * val : data)
func->insertResultInto(val, *res, &column_aggregate_func.createOrGetArena());

View File

@ -157,6 +157,14 @@ public:
callback(data);
}
void forEachSubcolumnRecursively(ColumnCallback callback) override
{
callback(offsets);
offsets->forEachSubcolumnRecursively(callback);
callback(data);
data->forEachSubcolumnRecursively(callback);
}
bool structureEquals(const IColumn & rhs) const override
{
if (const auto * rhs_concrete = typeid_cast<const ColumnArray *>(&rhs))

View File

@ -235,6 +235,12 @@ public:
callback(data);
}
void forEachSubcolumnRecursively(ColumnCallback callback) override
{
callback(data);
data->forEachSubcolumnRecursively(callback);
}
bool structureEquals(const IColumn & rhs) const override
{
if (const auto * rhs_concrete = typeid_cast<const ColumnConst *>(&rhs))

View File

@ -173,6 +173,19 @@ public:
callback(dictionary.getColumnUniquePtr());
}
void forEachSubcolumnRecursively(ColumnCallback callback) override
{
callback(idx.getPositionsPtr());
idx.getPositionsPtr()->forEachSubcolumnRecursively(callback);
/// Column doesn't own dictionary if it's shared.
if (!dictionary.isShared())
{
callback(dictionary.getColumnUniquePtr());
dictionary.getColumnUniquePtr()->forEachSubcolumnRecursively(callback);
}
}
bool structureEquals(const IColumn & rhs) const override
{
if (const auto * rhs_low_cardinality = typeid_cast<const ColumnLowCardinality *>(&rhs))

View File

@ -278,6 +278,12 @@ void ColumnMap::forEachSubcolumn(ColumnCallback callback)
callback(nested);
}
void ColumnMap::forEachSubcolumnRecursively(ColumnCallback callback)
{
callback(nested);
nested->forEachSubcolumnRecursively(callback);
}
bool ColumnMap::structureEquals(const IColumn & rhs) const
{
if (const auto * rhs_map = typeid_cast<const ColumnMap *>(&rhs))

View File

@ -89,6 +89,7 @@ public:
size_t allocatedBytes() const override;
void protect() override;
void forEachSubcolumn(ColumnCallback callback) override;
void forEachSubcolumnRecursively(ColumnCallback callback) override;
bool structureEquals(const IColumn & rhs) const override;
double getRatioOfDefaultRows(double sample_ratio) const override;
void getIndicesOfNonDefaultRows(Offsets & indices, size_t from, size_t limit) const override;

View File

@ -136,6 +136,14 @@ public:
callback(null_map);
}
void forEachSubcolumnRecursively(ColumnCallback callback) override
{
callback(nested_column);
nested_column->forEachSubcolumnRecursively(callback);
callback(null_map);
null_map->forEachSubcolumnRecursively(callback);
}
bool structureEquals(const IColumn & rhs) const override
{
if (const auto * rhs_nullable = typeid_cast<const ColumnNullable *>(&rhs))

View File

@ -671,6 +671,18 @@ void ColumnObject::forEachSubcolumn(ColumnCallback callback)
callback(part);
}
void ColumnObject::forEachSubcolumnRecursively(ColumnCallback callback)
{
for (auto & entry : subcolumns)
{
for (auto & part : entry->data.data)
{
callback(part);
part->forEachSubcolumnRecursively(callback);
}
}
}
void ColumnObject::insert(const Field & field)
{
const auto & object = field.get<const Object &>();

View File

@ -211,6 +211,7 @@ public:
size_t byteSize() const override;
size_t allocatedBytes() const override;
void forEachSubcolumn(ColumnCallback callback) override;
void forEachSubcolumnRecursively(ColumnCallback callback) override;
void insert(const Field & field) override;
void insertDefault() override;
void insertFrom(const IColumn & src, size_t n) override;

View File

@ -750,6 +750,14 @@ void ColumnSparse::forEachSubcolumn(ColumnCallback callback)
callback(offsets);
}
void ColumnSparse::forEachSubcolumnRecursively(ColumnCallback callback)
{
callback(values);
values->forEachSubcolumnRecursively(callback);
callback(offsets);
offsets->forEachSubcolumnRecursively(callback);
}
const IColumn::Offsets & ColumnSparse::getOffsetsData() const
{
return assert_cast<const ColumnUInt64 &>(*offsets).getData();

View File

@ -140,6 +140,7 @@ public:
ColumnPtr compress() const override;
void forEachSubcolumn(ColumnCallback callback) override;
void forEachSubcolumnRecursively(ColumnCallback callback) override;
bool structureEquals(const IColumn & rhs) const override;

View File

@ -501,6 +501,15 @@ void ColumnTuple::forEachSubcolumn(ColumnCallback callback)
callback(column);
}
void ColumnTuple::forEachSubcolumnRecursively(ColumnCallback callback)
{
for (auto & column : columns)
{
callback(column);
column->forEachSubcolumnRecursively(callback);
}
}
bool ColumnTuple::structureEquals(const IColumn & rhs) const
{
if (const auto * rhs_tuple = typeid_cast<const ColumnTuple *>(&rhs))

View File

@ -97,6 +97,7 @@ public:
size_t allocatedBytes() const override;
void protect() override;
void forEachSubcolumn(ColumnCallback callback) override;
void forEachSubcolumnRecursively(ColumnCallback callback) override;
bool structureEquals(const IColumn & rhs) const override;
bool isCollationSupported() const override;
ColumnPtr compress() const override;

View File

@ -113,6 +113,15 @@ public:
nested_column_nullable = ColumnNullable::create(column_holder, nested_null_mask);
}
void forEachSubcolumnRecursively(IColumn::ColumnCallback callback) override
{
callback(column_holder);
column_holder->forEachSubcolumnRecursively(callback);
reverse_index.setColumn(getRawColumnPtr());
if (is_nullable)
nested_column_nullable = ColumnNullable::create(column_holder, nested_null_mask);
}
bool structureEquals(const IColumn & rhs) const override
{
if (auto rhs_concrete = typeid_cast<const ColumnUnique *>(&rhs))

View File

@ -414,6 +414,9 @@ public:
using ColumnCallback = std::function<void(WrappedPtr&)>;
virtual void forEachSubcolumn(ColumnCallback) {}
/// Similar to forEachSubcolumn but it also do recursive calls.
virtual void forEachSubcolumnRecursively(ColumnCallback) {}
/// Columns have equal structure.
/// If true - you can use "compareAt", "insertFrom", etc. methods.
[[nodiscard]] virtual bool structureEquals(const IColumn &) const

View File

@ -301,6 +301,30 @@ The server successfully detected this situation and will download merged part fr
M(DiskS3WriteRequestsThrottling, "Number of 429 and 503 errors in POST, DELETE, PUT and PATCH requests to DiskS3 storage.") \
M(DiskS3WriteRequestsRedirects, "Number of redirects in POST, DELETE, PUT and PATCH requests to DiskS3 storage.") \
\
M(S3DeleteObjects, "Number of S3 API DeleteObject(s) calls.") \
M(S3CopyObject, "Number of S3 API CopyObject calls.") \
M(S3ListObjects, "Number of S3 API ListObjects calls.") \
M(S3HeadObject, "Number of S3 API HeadObject calls.") \
M(S3CreateMultipartUpload, "Number of S3 API CreateMultipartUpload calls.") \
M(S3UploadPartCopy, "Number of S3 API UploadPartCopy calls.") \
M(S3UploadPart, "Number of S3 API UploadPart calls.") \
M(S3AbortMultipartUpload, "Number of S3 API AbortMultipartUpload calls.") \
M(S3CompleteMultipartUpload, "Number of S3 API CompleteMultipartUpload calls.") \
M(S3PutObject, "Number of S3 API PutObject calls.") \
M(S3GetObject, "Number of S3 API GetObject calls.") \
\
M(DiskS3DeleteObjects, "Number of DiskS3 API DeleteObject(s) calls.") \
M(DiskS3CopyObject, "Number of DiskS3 API CopyObject calls.") \
M(DiskS3ListObjects, "Number of DiskS3 API ListObjects calls.") \
M(DiskS3HeadObject, "Number of DiskS3 API HeadObject calls.") \
M(DiskS3CreateMultipartUpload, "Number of DiskS3 API CreateMultipartUpload calls.") \
M(DiskS3UploadPartCopy, "Number of DiskS3 API UploadPartCopy calls.") \
M(DiskS3UploadPart, "Number of DiskS3 API UploadPart calls.") \
M(DiskS3AbortMultipartUpload, "Number of DiskS3 API AbortMultipartUpload calls.") \
M(DiskS3CompleteMultipartUpload, "Number of DiskS3 API CompleteMultipartUpload calls.") \
M(DiskS3PutObject, "Number of DiskS3 API PutObject calls.") \
M(DiskS3GetObject, "Number of DiskS3 API GetObject calls.") \
\
M(ReadBufferFromS3Microseconds, "Time spend in reading from S3.") \
M(ReadBufferFromS3Bytes, "Bytes read from S3.") \
M(ReadBufferFromS3RequestsErrors, "Number of exceptions while reading from S3.") \
@ -411,6 +435,7 @@ The server successfully detected this situation and will download merged part fr
M(OverflowThrow, "Number of times, data processing was cancelled by query complexity limitation with setting '*_overflow_mode' = 'throw' and exception was thrown.") \
M(OverflowAny, "Number of times approximate GROUP BY was in effect: when aggregation was performed only on top of first 'max_rows_to_group_by' unique keys and other keys were ignored due to 'group_by_overflow_mode' = 'any'.") \
namespace ProfileEvents
{

View File

@ -0,0 +1,23 @@
#pragma once
namespace DB
{
enum class SettingConstraintWritability
{
// Default. Setting can be change within specified range only in `readonly=0` or `readonly=2` mode.
WRITABLE,
// Setting cannot be changed at all.
// Either READONLY or CONST keyword in SQL syntax can be used (<readonly/> or <const/> in config.xml) to enable this.
// NOTE: name `CONST` is chosen to avoid confusion with `readonly` setting.
CONST,
// Setting can be changed within specified range, regardless of `readonly` setting value.
CHANGEABLE_IN_READONLY,
MAX
};
}

View File

@ -86,6 +86,7 @@ static constexpr UInt64 operator""_GiB(unsigned long long value)
M(UInt64, s3_upload_part_size_multiply_parts_count_threshold, 1000, "Each time this number of parts was uploaded to S3 s3_min_upload_part_size multiplied by s3_upload_part_size_multiply_factor.", 0) \
M(UInt64, s3_max_single_part_upload_size, 32*1024*1024, "The maximum size of object to upload using singlepart upload to S3.", 0) \
M(UInt64, s3_max_single_read_retries, 4, "The maximum number of retries during single S3 read.", 0) \
M(UInt64, s3_max_unexpected_write_error_retries, 4, "The maximum number of retries in case of unexpected errors during S3 write.", 0) \
M(UInt64, s3_max_redirects, 10, "Max number of S3 redirects hops allowed.", 0) \
M(UInt64, s3_max_connections, 1024, "The maximum number of connections per server.", 0) \
M(Bool, s3_truncate_on_insert, false, "Enables or disables truncate before insert in s3 engine tables.", 0) \
@ -352,7 +353,7 @@ static constexpr UInt64 operator""_GiB(unsigned long long value)
M(UInt64, max_ast_elements, 50000, "Maximum size of query syntax tree in number of nodes. Checked after parsing.", 0) \
M(UInt64, max_expanded_ast_elements, 500000, "Maximum size of query syntax tree in number of nodes after expansion of aliases and the asterisk.", 0) \
\
M(UInt64, readonly, 0, "0 - everything is allowed. 1 - only read requests. 2 - only read requests, as well as changing settings, except for the 'readonly' setting.", 0) \
M(UInt64, readonly, 0, "0 - no read-only restrictions. 1 - only read requests, as well as changing explicitly allowed settings. 2 - only read requests, as well as changing settings, except for the 'readonly' setting.", 0) \
\
M(UInt64, max_rows_in_set, 0, "Maximum size of the set (in number of elements) resulting from the execution of the IN section.", 0) \
M(UInt64, max_bytes_in_set, 0, "Maximum size of the set (in bytes in memory) resulting from the execution of the IN section.", 0) \

View File

@ -185,6 +185,7 @@ void DatabaseOnDisk::createTable(
if (create.attach_short_syntax)
{
/// Metadata already exists, table was detached
assert(fs::exists(getObjectMetadataPath(table_name)));
removeDetachedPermanentlyFlag(local_context, table_name, table_metadata_path, true);
attachTable(local_context, table_name, table, getTableDataPath(create));
return;

View File

@ -1259,4 +1259,24 @@ void DatabaseReplicated::createTableRestoredFromBackup(
}
}
bool DatabaseReplicated::shouldReplicateQuery(const ContextPtr & query_context, const ASTPtr & query_ptr) const
{
if (query_context->getClientInfo().is_replicated_database_internal)
return false;
/// Some ALTERs are not replicated on database level
if (const auto * alter = query_ptr->as<const ASTAlterQuery>())
{
return !alter->isAttachAlter() && !alter->isFetchAlter() && !alter->isDropPartitionAlter();
}
/// DROP DATABASE is not replicated
if (const auto * drop = query_ptr->as<const ASTDropQuery>())
{
return drop->table.get();
}
return true;
}
}

View File

@ -46,7 +46,7 @@ public:
/// Try to execute DLL query on current host as initial query. If query is succeed,
/// then it will be executed on all replicas.
BlockIO tryEnqueueReplicatedDDL(const ASTPtr & query, ContextPtr query_context, bool internal = false);
BlockIO tryEnqueueReplicatedDDL(const ASTPtr & query, ContextPtr query_context, bool internal) override;
bool hasReplicationThread() const override { return true; }
@ -75,6 +75,8 @@ public:
std::vector<std::pair<ASTPtr, StoragePtr>> getTablesForBackup(const FilterByNameFunction & filter, const ContextPtr & local_context) const override;
void createTableRestoredFromBackup(const ASTPtr & create_table_query, ContextMutablePtr local_context, std::shared_ptr<IRestoreCoordination> restore_coordination, UInt64 timeout_ms) override;
bool shouldReplicateQuery(const ContextPtr & query_context, const ASTPtr & query_ptr) const override;
friend struct DatabaseReplicatedTask;
friend class DatabaseReplicatedDDLWorker;
private:

View File

@ -8,6 +8,7 @@
#include <base/types.h>
#include <Common/Exception.h>
#include <Common/ThreadPool.h>
#include <QueryPipeline/BlockIO.h>
#include <ctime>
#include <functional>
@ -338,6 +339,13 @@ public:
throw Exception(ErrorCodes::LOGICAL_ERROR, "Database engine {} does not run a replication thread!", getEngineName());
}
virtual bool shouldReplicateQuery(const ContextPtr & /*query_context*/, const ASTPtr & /*query_ptr*/) const { return false; }
virtual BlockIO tryEnqueueReplicatedDDL(const ASTPtr & /*query*/, ContextPtr /*query_context*/, [[maybe_unused]] bool internal = false) /// NOLINT
{
throw Exception(ErrorCodes::LOGICAL_ERROR, "Database engine {} does not have replicated DDL queue", getEngineName());
}
/// Returns CREATE TABLE queries and corresponding tables prepared for writing to a backup.
virtual std::vector<std::pair<ASTPtr, StoragePtr>> getTablesForBackup(const FilterByNameFunction & filter, const ContextPtr & context) const;

View File

@ -71,6 +71,7 @@ public:
virtual const String & getName() const = 0;
/// Reserve the specified number of bytes.
/// Returns valid reservation or nullptr when failure.
virtual ReservationPtr reserve(UInt64 bytes) = 0;
virtual ~Space() = default;

View File

@ -11,12 +11,16 @@ namespace ProfileEvents
{
extern const Event CachedWriteBufferCacheWriteBytes;
extern const Event CachedWriteBufferCacheWriteMicroseconds;
extern const Event FileSegmentWriteMicroseconds;
}
namespace DB
{
namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
}
namespace
{
class SwapHelper
@ -31,6 +35,178 @@ namespace
};
}
FileSegmentRangeWriter::FileSegmentRangeWriter(
FileCache * cache_,
const FileSegment::Key & key_,
std::shared_ptr<FilesystemCacheLog> cache_log_,
const String & query_id_,
const String & source_path_)
: cache(cache_)
, key(key_)
, cache_log(cache_log_)
, query_id(query_id_)
, source_path(source_path_)
, current_file_segment_it(file_segments_holder.file_segments.end())
{
}
bool FileSegmentRangeWriter::write(const char * data, size_t size, size_t offset, bool is_persistent)
{
if (finalized)
return false;
auto & file_segments = file_segments_holder.file_segments;
if (current_file_segment_it == file_segments.end())
{
current_file_segment_it = allocateFileSegment(current_file_segment_write_offset, is_persistent);
}
else
{
auto file_segment = *current_file_segment_it;
assert(file_segment->getCurrentWriteOffset() == current_file_segment_write_offset);
if (current_file_segment_write_offset != offset)
{
throw Exception(
ErrorCodes::LOGICAL_ERROR,
"Cannot write file segment at offset {}, because current write offset is: {}",
offset, current_file_segment_write_offset);
}
if (file_segment->range().size() == file_segment->getDownloadedSize())
{
completeFileSegment(*file_segment);
current_file_segment_it = allocateFileSegment(current_file_segment_write_offset, is_persistent);
}
}
auto & file_segment = *current_file_segment_it;
auto downloader = file_segment->getOrSetDownloader();
if (downloader != FileSegment::getCallerId())
throw Exception(ErrorCodes::LOGICAL_ERROR, "Failed to set a downloader. ({})", file_segment->getInfoForLog());
SCOPE_EXIT({
if (file_segment->isDownloader())
file_segment->completePartAndResetDownloader();
});
bool reserved = file_segment->reserve(size);
if (!reserved)
{
file_segment->completeWithState(FileSegment::State::PARTIALLY_DOWNLOADED_NO_CONTINUATION);
appendFilesystemCacheLog(*file_segment);
LOG_DEBUG(
&Poco::Logger::get("FileSegmentRangeWriter"),
"Unsuccessful space reservation attempt (size: {}, file segment info: {}",
size, file_segment->getInfoForLog());
return false;
}
try
{
file_segment->write(data, size, offset);
}
catch (...)
{
file_segment->completePartAndResetDownloader();
throw;
}
file_segment->completePartAndResetDownloader();
current_file_segment_write_offset += size;
return true;
}
void FileSegmentRangeWriter::finalize()
{
if (finalized)
return;
auto & file_segments = file_segments_holder.file_segments;
if (file_segments.empty() || current_file_segment_it == file_segments.end())
return;
completeFileSegment(**current_file_segment_it);
finalized = true;
}
FileSegmentRangeWriter::~FileSegmentRangeWriter()
{
try
{
if (!finalized)
finalize();
}
catch (...)
{
tryLogCurrentException(__PRETTY_FUNCTION__);
}
}
FileSegments::iterator FileSegmentRangeWriter::allocateFileSegment(size_t offset, bool is_persistent)
{
/**
* Allocate a new file segment starting `offset`.
* File segment capacity will equal `max_file_segment_size`, but actual size is 0.
*/
std::lock_guard cache_lock(cache->mutex);
CreateFileSegmentSettings create_settings
{
.is_persistent = is_persistent,
};
/// We set max_file_segment_size to be downloaded,
/// if we have less size to write, file segment will be resized in complete() method.
auto file_segment = cache->createFileSegmentForDownload(
key, offset, cache->max_file_segment_size, create_settings, cache_lock);
return file_segments_holder.add(std::move(file_segment));
}
void FileSegmentRangeWriter::appendFilesystemCacheLog(const FileSegment & file_segment)
{
if (cache_log)
{
auto file_segment_range = file_segment.range();
size_t file_segment_right_bound = file_segment_range.left + file_segment.getDownloadedSize() - 1;
FilesystemCacheLogElement elem
{
.event_time = std::chrono::system_clock::to_time_t(std::chrono::system_clock::now()),
.query_id = query_id,
.source_file_path = source_path,
.file_segment_range = { file_segment_range.left, file_segment_right_bound },
.requested_range = {},
.cache_type = FilesystemCacheLogElement::CacheType::WRITE_THROUGH_CACHE,
.file_segment_size = file_segment_range.size(),
.read_from_cache_attempted = false,
.read_buffer_id = {},
.profile_counters = nullptr,
};
cache_log->add(elem);
}
}
void FileSegmentRangeWriter::completeFileSegment(FileSegment & file_segment)
{
/// File segment can be detached if space reservation failed.
if (file_segment.isDetached())
return;
file_segment.completeWithoutState();
appendFilesystemCacheLog(file_segment);
}
CachedOnDiskWriteBufferFromFile::CachedOnDiskWriteBufferFromFile(
std::unique_ptr<WriteBuffer> impl_,
FileCachePtr cache_,
@ -47,7 +223,6 @@ CachedOnDiskWriteBufferFromFile::CachedOnDiskWriteBufferFromFile(
, is_persistent_cache_file(is_persistent_cache_file_)
, query_id(query_id_)
, enable_cache_log(!query_id_.empty() && settings_.enable_filesystem_cache_log)
, cache_log(Context::getGlobalContextInstance()->getFilesystemCacheLog())
{
}
@ -82,8 +257,11 @@ void CachedOnDiskWriteBufferFromFile::cacheData(char * data, size_t size)
if (!cache_writer)
{
cache_writer = std::make_unique<FileSegmentRangeWriter>(
cache.get(), key, [this](const FileSegment & file_segment) { appendFilesystemCacheLog(file_segment); });
std::shared_ptr<FilesystemCacheLog> cache_log;
if (enable_cache_log)
cache_log = Context::getGlobalContextInstance()->getFilesystemCacheLog();
cache_writer = std::make_unique<FileSegmentRangeWriter>(cache.get(), key, cache_log, query_id, source_path);
}
Stopwatch watch(CLOCK_MONOTONIC);
@ -119,37 +297,9 @@ void CachedOnDiskWriteBufferFromFile::cacheData(char * data, size_t size)
ProfileEvents::increment(ProfileEvents::CachedWriteBufferCacheWriteBytes, size);
ProfileEvents::increment(ProfileEvents::CachedWriteBufferCacheWriteMicroseconds, watch.elapsedMicroseconds());
current_file_segment_counters.increment(
ProfileEvents::FileSegmentWriteMicroseconds, watch.elapsedMicroseconds());
cache_in_error_state_or_disabled = false;
}
void CachedOnDiskWriteBufferFromFile::appendFilesystemCacheLog(const FileSegment & file_segment)
{
if (cache_log)
{
auto file_segment_range = file_segment.range();
FilesystemCacheLogElement elem
{
.event_time = std::chrono::system_clock::to_time_t(std::chrono::system_clock::now()),
.query_id = query_id,
.source_file_path = source_path,
.file_segment_range = { file_segment_range.left, file_segment_range.right },
.requested_range = {},
.cache_type = FilesystemCacheLogElement::CacheType::WRITE_THROUGH_CACHE,
.file_segment_size = file_segment_range.size(),
.read_from_cache_attempted = false,
.read_buffer_id = {},
.profile_counters = std::make_shared<ProfileEvents::Counters::Snapshot>(current_file_segment_counters.getPartiallyAtomicSnapshot()),
};
current_file_segment_counters.reset();
cache_log->add(elem);
}
}
void CachedOnDiskWriteBufferFromFile::finalizeImpl()
{
try

View File

@ -13,11 +13,57 @@ class Logger;
namespace DB
{
/**
* We want to write eventually some size, which is not known until the very end.
* Therefore we allocate file segments lazily. Each file segment is assigned capacity
* of max_file_segment_size, but reserved_size remains 0, until call to tryReserve().
* Once current file segment is full (reached max_file_segment_size), we allocate a
* new file segment. All allocated file segments resize in file segments holder.
* If at the end of all writes, the last file segment is not full, then it is resized.
*/
class FileSegmentRangeWriter
{
public:
FileSegmentRangeWriter(
FileCache * cache_, const FileSegment::Key & key_,
std::shared_ptr<FilesystemCacheLog> cache_log_, const String & query_id_, const String & source_path_);
/**
* Write a range of file segments. Allocate file segment of `max_file_segment_size` and write to
* it until it is full and then allocate next file segment.
*/
bool write(const char * data, size_t size, size_t offset, bool is_persistent);
void finalize();
~FileSegmentRangeWriter();
private:
FileSegments::iterator allocateFileSegment(size_t offset, bool is_persistent);
void appendFilesystemCacheLog(const FileSegment & file_segment);
void completeFileSegment(FileSegment & file_segment);
FileCache * cache;
FileSegment::Key key;
std::shared_ptr<FilesystemCacheLog> cache_log;
String query_id;
String source_path;
FileSegmentsHolder file_segments_holder{};
FileSegments::iterator current_file_segment_it;
size_t current_file_segment_write_offset = 0;
bool finalized = false;
};
/**
* Write buffer for filesystem caching on write operations.
*/
class FileSegmentRangeWriter;
class CachedOnDiskWriteBufferFromFile final : public WriteBufferFromFileDecorator
{
public:
@ -36,7 +82,6 @@ public:
private:
void cacheData(char * data, size_t size);
void appendFilesystemCacheLog(const FileSegment & file_segment);
Poco::Logger * log;
@ -49,11 +94,9 @@ private:
const String query_id;
bool enable_cache_log;
std::shared_ptr<FilesystemCacheLog> cache_log;
bool cache_in_error_state_or_disabled = false;
ProfileEvents::Counters current_file_segment_counters;
std::unique_ptr<FileSegmentRangeWriter> cache_writer;
};

View File

@ -61,6 +61,7 @@ ReadSettings IObjectStorage::patchSettings(const ReadSettings & read_settings) c
std::unique_lock lock{throttlers_mutex};
ReadSettings settings{read_settings};
settings.remote_throttler = remote_read_throttler;
settings.for_object_storage = true;
return settings;
}
@ -69,6 +70,7 @@ WriteSettings IObjectStorage::patchSettings(const WriteSettings & write_settings
std::unique_lock lock{throttlers_mutex};
WriteSettings settings{write_settings};
settings.remote_throttler = remote_write_throttler;
settings.for_object_storage = true;
return settings;
}

View File

@ -1,4 +1,5 @@
#include <Disks/ObjectStorages/S3/S3ObjectStorage.h>
#include <Common/ProfileEvents.h>
#if USE_AWS_S3
@ -31,6 +32,28 @@
#include <Common/logger_useful.h>
#include <Common/MultiVersion.h>
namespace ProfileEvents
{
extern const Event S3DeleteObjects;
extern const Event S3HeadObject;
extern const Event S3ListObjects;
extern const Event S3CopyObject;
extern const Event S3CreateMultipartUpload;
extern const Event S3UploadPartCopy;
extern const Event S3AbortMultipartUpload;
extern const Event S3CompleteMultipartUpload;
extern const Event DiskS3DeleteObjects;
extern const Event DiskS3HeadObject;
extern const Event DiskS3ListObjects;
extern const Event DiskS3CopyObject;
extern const Event DiskS3CreateMultipartUpload;
extern const Event DiskS3UploadPartCopy;
extern const Event DiskS3AbortMultipartUpload;
extern const Event DiskS3CompleteMultipartUpload;
}
namespace DB
{
@ -90,12 +113,27 @@ void logIfError(const Aws::Utils::Outcome<Result, Error> & response, std::functi
std::string S3ObjectStorage::generateBlobNameForPath(const std::string & /* path */)
{
return getRandomASCIIString(32);
/// Path to store the new S3 object.
/// Total length is 32 a-z characters for enough randomness.
/// First 3 characters are used as a prefix for
/// https://aws.amazon.com/premiumsupport/knowledge-center/s3-object-key-naming-pattern/
constexpr size_t key_name_total_size = 32;
constexpr size_t key_name_prefix_size = 3;
/// Path to store new S3 object.
return fmt::format("{}/{}",
getRandomASCIIString(key_name_prefix_size),
getRandomASCIIString(key_name_total_size - key_name_prefix_size));
}
Aws::S3::Model::HeadObjectOutcome S3ObjectStorage::requestObjectHeadData(const std::string & bucket_from, const std::string & key) const
{
auto client_ptr = client.get();
ProfileEvents::increment(ProfileEvents::S3HeadObject);
ProfileEvents::increment(ProfileEvents::DiskS3HeadObject);
Aws::S3::Model::HeadObjectRequest request;
request.SetBucket(bucket_from);
request.SetKey(key);
@ -219,6 +257,8 @@ void S3ObjectStorage::listPrefix(const std::string & path, RelativePathsWithSize
Aws::S3::Model::ListObjectsV2Outcome outcome;
do
{
ProfileEvents::increment(ProfileEvents::S3ListObjects);
ProfileEvents::increment(ProfileEvents::DiskS3ListObjects);
outcome = client_ptr->ListObjectsV2(request);
throwIfError(outcome);
@ -239,6 +279,8 @@ void S3ObjectStorage::removeObjectImpl(const StoredObject & object, bool if_exis
{
auto client_ptr = client.get();
ProfileEvents::increment(ProfileEvents::S3DeleteObjects);
ProfileEvents::increment(ProfileEvents::DiskS3DeleteObjects);
Aws::S3::Model::DeleteObjectRequest request;
request.SetBucket(bucket);
request.SetKey(object.absolute_path);
@ -284,6 +326,9 @@ void S3ObjectStorage::removeObjectsImpl(const StoredObjects & objects, bool if_e
Aws::S3::Model::Delete delkeys;
delkeys.SetObjects(current_chunk);
ProfileEvents::increment(ProfileEvents::S3DeleteObjects);
ProfileEvents::increment(ProfileEvents::DiskS3DeleteObjects);
Aws::S3::Model::DeleteObjectsRequest request;
request.SetBucket(bucket);
request.SetDelete(delkeys);
@ -357,6 +402,9 @@ void S3ObjectStorage::copyObjectImpl(
std::optional<ObjectAttributes> metadata) const
{
auto client_ptr = client.get();
ProfileEvents::increment(ProfileEvents::S3CopyObject);
ProfileEvents::increment(ProfileEvents::DiskS3CopyObject);
Aws::S3::Model::CopyObjectRequest request;
request.SetCopySource(src_bucket + "/" + src_key);
request.SetBucket(dst_bucket);
@ -405,6 +453,8 @@ void S3ObjectStorage::copyObjectMultipartImpl(
String multipart_upload_id;
{
ProfileEvents::increment(ProfileEvents::S3CreateMultipartUpload);
ProfileEvents::increment(ProfileEvents::DiskS3CreateMultipartUpload);
Aws::S3::Model::CreateMultipartUploadRequest request;
request.SetBucket(dst_bucket);
request.SetKey(dst_key);
@ -423,6 +473,8 @@ void S3ObjectStorage::copyObjectMultipartImpl(
size_t upload_part_size = settings_ptr->s3_settings.min_upload_part_size;
for (size_t position = 0, part_number = 1; position < size; ++part_number, position += upload_part_size)
{
ProfileEvents::increment(ProfileEvents::S3UploadPartCopy);
ProfileEvents::increment(ProfileEvents::DiskS3UploadPartCopy);
Aws::S3::Model::UploadPartCopyRequest part_request;
part_request.SetCopySource(src_bucket + "/" + src_key);
part_request.SetBucket(dst_bucket);
@ -434,6 +486,8 @@ void S3ObjectStorage::copyObjectMultipartImpl(
auto outcome = client_ptr->UploadPartCopy(part_request);
if (!outcome.IsSuccess())
{
ProfileEvents::increment(ProfileEvents::S3AbortMultipartUpload);
ProfileEvents::increment(ProfileEvents::DiskS3AbortMultipartUpload);
Aws::S3::Model::AbortMultipartUploadRequest abort_request;
abort_request.SetBucket(dst_bucket);
abort_request.SetKey(dst_key);
@ -448,6 +502,8 @@ void S3ObjectStorage::copyObjectMultipartImpl(
}
{
ProfileEvents::increment(ProfileEvents::S3CompleteMultipartUpload);
ProfileEvents::increment(ProfileEvents::DiskS3CompleteMultipartUpload);
Aws::S3::Model::CompleteMultipartUploadRequest req;
req.SetBucket(dst_bucket);
req.SetKey(dst_key);

View File

@ -39,6 +39,7 @@ std::unique_ptr<S3ObjectStorageSettings> getSettings(const Poco::Util::AbstractC
rw_settings.upload_part_size_multiply_parts_count_threshold = config.getUInt64(config_prefix + ".s3_upload_part_size_multiply_parts_count_threshold", context->getSettingsRef().s3_upload_part_size_multiply_parts_count_threshold);
rw_settings.max_single_part_upload_size = config.getUInt64(config_prefix + ".s3_max_single_part_upload_size", context->getSettingsRef().s3_max_single_part_upload_size);
rw_settings.check_objects_after_upload = config.getUInt64(config_prefix + ".s3_check_objects_after_upload", context->getSettingsRef().s3_check_objects_after_upload);
rw_settings.max_unexpected_write_error_retries = config.getUInt64(config_prefix + ".s3_max_unexpected_write_error_retries", context->getSettingsRef().s3_max_unexpected_write_error_retries);
return std::make_unique<S3ObjectStorageSettings>(
rw_settings,

View File

@ -3,6 +3,7 @@
#include <string>
#include <Disks/ObjectStorages/IObjectStorage_fwd.h>
namespace DB
{

View File

@ -82,6 +82,9 @@ private:
ReservationPtr reserve(uint64_t bytes)
{
ReservationPtr reservation = disk->reserve(bytes);
if (!reservation)
return {};
/// Not just subtract bytes, but update the value,
/// since some reservations may be done directly via IDisk, or not by ClickHouse.
free_size = reservation->getUnreservedSpace();

View File

@ -2,6 +2,7 @@
#include <Columns/ColumnString.h>
#include <Poco/UTF8Encoding.h>
#include <Common/UTF8Helpers.h>
#include <base/defines.h>
#ifdef __SSE2__
#include <emmintrin.h>
@ -89,9 +90,11 @@ struct LowerUpperUTF8Impl
ColumnString::Chars & res_data,
ColumnString::Offsets & res_offsets)
{
if (data.empty())
return;
res_data.resize(data.size());
res_offsets.assign(offsets);
array(data.data(), data.data() + data.size(), res_data.data());
array(data.data(), data.data() + data.size(), offsets, res_data.data());
}
static void vectorFixed(const ColumnString::Chars &, size_t, ColumnString::Chars &)
@ -164,8 +167,11 @@ private:
static constexpr auto ascii_upper_bound = '\x7f';
static constexpr auto flip_case_mask = 'A' ^ 'a';
static void array(const UInt8 * src, const UInt8 * src_end, UInt8 * dst)
static void array(const UInt8 * src, const UInt8 * src_end, const ColumnString::Offsets & offsets, UInt8 * dst)
{
auto offset_it = offsets.begin();
const UInt8 * begin = src;
#ifdef __SSE2__
static constexpr auto bytes_sse = sizeof(__m128i);
const auto * src_end_sse = src + (src_end - src) / bytes_sse * bytes_sse;
@ -213,10 +219,17 @@ private:
else
{
/// UTF-8
const auto * expected_end = src + bytes_sse;
size_t offset_from_begin = src - begin;
while (offset_from_begin >= *offset_it)
++offset_it;
/// Do not allow one row influence another (since row may have invalid sequence, and break the next)
const UInt8 * row_end = begin + *offset_it;
chassert(row_end >= src);
const UInt8 * expected_end = std::min(src + bytes_sse, row_end);
while (src < expected_end)
toCase(src, src_end, dst);
toCase(src, expected_end, dst);
/// adjust src_end_sse by pushing it forward or backward
const auto diff = src - expected_end;
@ -229,10 +242,22 @@ private:
}
}
}
/// Find which offset src has now
while (offset_it != offsets.end() && static_cast<size_t>(src - begin) >= *offset_it)
++offset_it;
#endif
/// handle remaining symbols
/// handle remaining symbols, row by row (to avoid influence of bad UTF8 symbols from one row, to another)
while (src < src_end)
toCase(src, src_end, dst);
{
const UInt8 * row_end = begin + *offset_it;
chassert(row_end >= src);
while (src < row_end)
toCase(src, row_end, dst);
++offset_it;
}
}
};

View File

@ -24,6 +24,8 @@ namespace ProfileEvents
extern const Event ReadBufferFromS3Bytes;
extern const Event ReadBufferFromS3RequestsErrors;
extern const Event ReadBufferSeekCancelConnection;
extern const Event S3GetObject;
extern const Event DiskS3GetObject;
}
namespace DB
@ -248,7 +250,7 @@ size_t ReadBufferFromS3::getFileSize()
if (file_size)
return *file_size;
auto object_size = S3::getObjectSize(client_ptr, bucket, key, version_id);
auto object_size = S3::getObjectSize(client_ptr, bucket, key, version_id, true, read_settings.for_object_storage);
file_size = object_size;
return *file_size;
@ -315,6 +317,10 @@ std::unique_ptr<ReadBuffer> ReadBufferFromS3::initialize()
offset);
}
ProfileEvents::increment(ProfileEvents::S3GetObject);
if (read_settings.for_object_storage)
ProfileEvents::increment(ProfileEvents::DiskS3GetObject);
Aws::S3::Model::GetObjectOutcome outcome = client_ptr->GetObject(req);
if (outcome.IsSuccess())

View File

@ -105,6 +105,9 @@ struct ReadSettings
size_t http_retry_max_backoff_ms = 1600;
bool http_skip_not_found_url_for_globs = true;
/// Monitoring
bool for_object_storage = false; // to choose which profile events should be incremented
ReadSettings adjustBufferSize(size_t file_size) const
{
ReadSettings res = *this;

View File

@ -35,6 +35,12 @@
# include <fstream>
namespace ProfileEvents
{
extern const Event S3HeadObject;
extern const Event DiskS3HeadObject;
}
namespace DB
{
@ -812,8 +818,12 @@ namespace S3
}
S3::ObjectInfo getObjectInfo(std::shared_ptr<const Aws::S3::S3Client> client_ptr, const String & bucket, const String & key, const String & version_id, bool throw_on_error)
S3::ObjectInfo getObjectInfo(std::shared_ptr<const Aws::S3::S3Client> client_ptr, const String & bucket, const String & key, const String & version_id, bool throw_on_error, bool for_disk_s3)
{
ProfileEvents::increment(ProfileEvents::S3HeadObject);
if (for_disk_s3)
ProfileEvents::increment(ProfileEvents::DiskS3HeadObject);
Aws::S3::Model::HeadObjectRequest req;
req.SetBucket(bucket);
req.SetKey(key);
@ -835,9 +845,9 @@ namespace S3
return {};
}
size_t getObjectSize(std::shared_ptr<const Aws::S3::S3Client> client_ptr, const String & bucket, const String & key, const String & version_id, bool throw_on_error)
size_t getObjectSize(std::shared_ptr<const Aws::S3::S3Client> client_ptr, const String & bucket, const String & key, const String & version_id, bool throw_on_error, bool for_disk_s3)
{
return getObjectInfo(client_ptr, bucket, key, version_id, throw_on_error).size;
return getObjectInfo(client_ptr, bucket, key, version_id, throw_on_error, for_disk_s3).size;
}
}

View File

@ -124,9 +124,9 @@ struct ObjectInfo
time_t last_modification_time = 0;
};
S3::ObjectInfo getObjectInfo(std::shared_ptr<const Aws::S3::S3Client> client_ptr, const String & bucket, const String & key, const String & version_id = {}, bool throw_on_error = true);
S3::ObjectInfo getObjectInfo(std::shared_ptr<const Aws::S3::S3Client> client_ptr, const String & bucket, const String & key, const String & version_id, bool throw_on_error, bool for_disk_s3);
size_t getObjectSize(std::shared_ptr<const Aws::S3::S3Client> client_ptr, const String & bucket, const String & key, const String & version_id = {}, bool throw_on_error = true);
size_t getObjectSize(std::shared_ptr<const Aws::S3::S3Client> client_ptr, const String & bucket, const String & key, const String & version_id, bool throw_on_error, bool for_disk_s3);
}

View File

@ -1,4 +1,5 @@
#include <Common/config.h>
#include <Common/ProfileEvents.h>
#if USE_AWS_S3
@ -24,6 +25,19 @@
namespace ProfileEvents
{
extern const Event WriteBufferFromS3Bytes;
extern const Event S3WriteBytes;
extern const Event S3HeadObject;
extern const Event S3CreateMultipartUpload;
extern const Event S3CompleteMultipartUpload;
extern const Event S3UploadPart;
extern const Event S3PutObject;
extern const Event DiskS3HeadObject;
extern const Event DiskS3CreateMultipartUpload;
extern const Event DiskS3CompleteMultipartUpload;
extern const Event DiskS3UploadPart;
extern const Event DiskS3PutObject;
}
namespace DB
@ -167,10 +181,15 @@ void WriteBufferFromS3::finalizeImpl()
{
LOG_TRACE(log, "Checking object {} exists after upload", key);
Aws::S3::Model::HeadObjectRequest request;
request.SetBucket(bucket);
request.SetKey(key);
ProfileEvents::increment(ProfileEvents::S3HeadObject);
if (write_settings.for_object_storage)
ProfileEvents::increment(ProfileEvents::DiskS3HeadObject);
auto response = client_ptr->HeadObject(request);
if (!response.IsSuccess())
@ -192,6 +211,10 @@ void WriteBufferFromS3::createMultipartUpload()
if (object_metadata.has_value())
req.SetMetadata(object_metadata.value());
ProfileEvents::increment(ProfileEvents::S3CreateMultipartUpload);
if (write_settings.for_object_storage)
ProfileEvents::increment(ProfileEvents::DiskS3CreateMultipartUpload);
auto outcome = client_ptr->CreateMultipartUpload(req);
if (outcome.IsSuccess())
@ -303,6 +326,10 @@ void WriteBufferFromS3::fillUploadRequest(Aws::S3::Model::UploadPartRequest & re
void WriteBufferFromS3::processUploadRequest(UploadPartTask & task)
{
ProfileEvents::increment(ProfileEvents::S3UploadPart);
if (write_settings.for_object_storage)
ProfileEvents::increment(ProfileEvents::DiskS3UploadPart);
auto outcome = client_ptr->UploadPart(task.req);
if (outcome.IsSuccess())
@ -340,16 +367,33 @@ void WriteBufferFromS3::completeMultipartUpload()
req.SetMultipartUpload(multipart_upload);
auto outcome = client_ptr->CompleteMultipartUpload(req);
if (outcome.IsSuccess())
LOG_TRACE(log, "Multipart upload has completed. Bucket: {}, Key: {}, Upload_id: {}, Parts: {}", bucket, key, multipart_upload_id, tags.size());
else
size_t max_retry = std::max(s3_settings.max_unexpected_write_error_retries, 1UL);
for (size_t i = 0; i < max_retry; ++i)
{
throw S3Exception(
outcome.GetError().GetErrorType(),
"Message: {}, Key: {}, Bucket: {}, Tags: {}",
outcome.GetError().GetMessage(), key, bucket, fmt::join(tags.begin(), tags.end(), " "));
ProfileEvents::increment(ProfileEvents::S3CompleteMultipartUpload);
if (write_settings.for_object_storage)
ProfileEvents::increment(ProfileEvents::DiskS3CompleteMultipartUpload);
auto outcome = client_ptr->CompleteMultipartUpload(req);
if (outcome.IsSuccess())
{
LOG_TRACE(log, "Multipart upload has completed. Bucket: {}, Key: {}, Upload_id: {}, Parts: {}", bucket, key, multipart_upload_id, tags.size());
break;
}
else if (outcome.GetError().GetErrorType() == Aws::S3::S3Errors::NO_SUCH_KEY)
{
/// For unknown reason, at least MinIO can respond with NO_SUCH_KEY for put requests
/// BTW, NO_SUCH_UPLOAD is expected error and we shouldn't retry it
LOG_INFO(log, "Multipart upload failed with NO_SUCH_KEY error for Bucket: {}, Key: {}, Upload_id: {}, Parts: {}, will retry", bucket, key, multipart_upload_id, tags.size());
}
else
{
throw S3Exception(
outcome.GetError().GetErrorType(),
"Message: {}, Key: {}, Bucket: {}, Tags: {}",
outcome.GetError().GetMessage(), key, bucket, fmt::join(tags.begin(), tags.end(), " "));
}
}
}
@ -429,15 +473,30 @@ void WriteBufferFromS3::fillPutRequest(Aws::S3::Model::PutObjectRequest & req)
void WriteBufferFromS3::processPutRequest(const PutObjectTask & task)
{
auto outcome = client_ptr->PutObject(task.req);
bool with_pool = static_cast<bool>(schedule);
if (outcome.IsSuccess())
LOG_TRACE(log, "Single part upload has completed. Bucket: {}, Key: {}, Object size: {}, WithPool: {}", bucket, key, task.req.GetContentLength(), with_pool);
else
throw S3Exception(
outcome.GetError().GetErrorType(),
"Message: {}, Key: {}, Bucket: {}, Object size: {}, WithPool: {}",
outcome.GetError().GetMessage(), key, bucket, task.req.GetContentLength(), with_pool);
size_t max_retry = std::max(s3_settings.max_unexpected_write_error_retries, 1UL);
for (size_t i = 0; i < max_retry; ++i)
{
ProfileEvents::increment(ProfileEvents::S3PutObject);
if (write_settings.for_object_storage)
ProfileEvents::increment(ProfileEvents::DiskS3PutObject);
auto outcome = client_ptr->PutObject(task.req);
bool with_pool = static_cast<bool>(schedule);
if (outcome.IsSuccess())
{
LOG_TRACE(log, "Single part upload has completed. Bucket: {}, Key: {}, Object size: {}, WithPool: {}", bucket, key, task.req.GetContentLength(), with_pool);
break;
}
else if (outcome.GetError().GetErrorType() == Aws::S3::S3Errors::NO_SUCH_KEY)
{
/// For unknown reason, at least MinIO can respond with NO_SUCH_KEY for put requests
LOG_INFO(log, "Single part upload failed with NO_SUCH_KEY error for Bucket: {}, Key: {}, Object size: {}, WithPool: {}, will retry", bucket, key, task.req.GetContentLength(), with_pool);
}
else
throw S3Exception(
outcome.GetError().GetErrorType(),
"Message: {}, Key: {}, Bucket: {}, Object size: {}, WithPool: {}",
outcome.GetError().GetMessage(), key, bucket, task.req.GetContentLength(), with_pool);
}
}
void WriteBufferFromS3::waitForReadyBackGroundTasks()

View File

@ -15,6 +15,9 @@ struct WriteSettings
bool enable_filesystem_cache_on_write_operations = false;
bool enable_filesystem_cache_log = false;
bool is_file_cache_persistent = false;
/// Monitoring
bool for_object_storage = false; // to choose which profile events should be incremented
};
}

View File

@ -1,8 +1,9 @@
#include <memory>
#include <Common/quoteString.h>
#include <Common/typeid_cast.h>
#include <Columns/ColumnArray.h>
#include <Columns/ColumnFixedString.h>
#include <Common/FieldVisitorsAccurateComparison.h>
#include <Core/ColumnNumbers.h>
#include <Core/ColumnWithTypeAndName.h>
@ -22,9 +23,12 @@
#include <DataTypes/DataTypeLowCardinality.h>
#include <DataTypes/DataTypesNumber.h>
#include <DataTypes/FieldToDataType.h>
#include <DataTypes/DataTypesDecimal.h>
#include <Columns/ColumnSet.h>
#include <Columns/ColumnArray.h>
#include <Columns/ColumnConst.h>
#include <Columns/ColumnFixedString.h>
#include <Columns/ColumnSet.h>
#include <Storages/StorageSet.h>
@ -87,6 +91,22 @@ static size_t getTypeDepth(const DataTypePtr & type)
return 0;
}
/// Applies stricter rules than convertFieldToType:
/// Doesn't allow :
/// - loss of precision with `Decimals`
static bool convertFieldToTypeStrict(const Field & from_value, const IDataType & to_type, Field & result_value)
{
result_value = convertFieldToType(from_value, to_type);
if (Field::isDecimal(from_value.getType()) && Field::isDecimal(result_value.getType()))
return applyVisitor(FieldVisitorAccurateEquals{}, from_value, result_value);
return true;
}
/// The `convertFieldToTypeStrict` is used to prevent unexpected results in case of conversion with loss of precision.
/// Example: `SELECT 33.3 :: Decimal(9, 1) AS a WHERE a IN (33.33 :: Decimal(9, 2))`
/// 33.33 in the set is converted to 33.3, but it is not equal to 33.3 in the column, so the result should still be empty.
/// We can not include values that don't represent any possible value from the type of filtered column to the set.
template<typename Collection>
static Block createBlockFromCollection(const Collection & collection, const DataTypes & types, bool transform_null_in)
{
@ -103,10 +123,11 @@ static Block createBlockFromCollection(const Collection & collection, const Data
{
if (columns_num == 1)
{
auto field = convertFieldToType(value, *types[0]);
Field field;
bool is_conversion_ok = convertFieldToTypeStrict(value, *types[0], field);
bool need_insert_null = transform_null_in && types[0]->isNullable();
if (!field.isNull() || need_insert_null)
columns[0]->insert(std::move(field));
if (is_conversion_ok && (!field.isNull() || need_insert_null))
columns[0]->insert(field);
}
else
{
@ -127,7 +148,10 @@ static Block createBlockFromCollection(const Collection & collection, const Data
size_t i = 0;
for (; i < tuple_size; ++i)
{
tuple_values[i] = convertFieldToType(tuple[i], *types[i]);
bool is_conversion_ok = convertFieldToTypeStrict(tuple[i], *types[i], tuple_values[i]);
if (!is_conversion_ok)
break;
bool need_insert_null = transform_null_in && types[i]->isNullable();
if (tuple_values[i].isNull() && !need_insert_null)
break;

View File

@ -50,19 +50,15 @@ OutputBlockColumns prepareOutputBlockColumns(
if (aggregate_functions[i]->isState())
{
/// The ColumnAggregateFunction column captures the shared ownership of the arena with aggregate function states.
if (auto * column_aggregate_func = typeid_cast<ColumnAggregateFunction *>(final_aggregate_columns[i].get()))
for (auto & pool : aggregates_pools)
column_aggregate_func->addArena(pool);
/// Aggregate state can be wrapped into array if aggregate function ends with -Resample combinator.
final_aggregate_columns[i]->forEachSubcolumn(
[&aggregates_pools](auto & subcolumn)
{
if (auto * column_aggregate_func = typeid_cast<ColumnAggregateFunction *>(subcolumn.get()))
for (auto & pool : aggregates_pools)
column_aggregate_func->addArena(pool);
});
auto callback = [&](auto & subcolumn)
{
/// The ColumnAggregateFunction column captures the shared ownership of the arena with aggregate function states.
if (auto * column_aggregate_func = typeid_cast<ColumnAggregateFunction *>(subcolumn.get()))
for (auto & pool : aggregates_pools)
column_aggregate_func->addArena(pool);
};
callback(final_aggregate_columns[i]);
final_aggregate_columns[i]->forEachSubcolumnRecursively(callback);
}
}
}

View File

@ -1754,8 +1754,11 @@ inline void Aggregator::insertAggregatesIntoColumns(Mapped & mapped, MutableColu
* It is also tricky, because there are aggregate functions with "-State" modifier.
* When we call "insertResultInto" for them, they insert a pointer to the state to ColumnAggregateFunction
* and ColumnAggregateFunction will take ownership of this state.
* So, for aggregate functions with "-State" modifier, the state must not be destroyed
* after it has been transferred to ColumnAggregateFunction.
* So, for aggregate functions with "-State" modifier, only states of all combinators that are used
* after -State will be destroyed after result has been transferred to ColumnAggregateFunction.
* For example, if we have function `uniqStateForEachMap` after aggregation we should destroy all states that
* were created by combinators `-ForEach` and `-Map`, because resulting ColumnAggregateFunction will be
* responsible only for destruction of the states created by `uniq` function.
* But we should mark that the data no longer owns these states.
*/
@ -1778,8 +1781,8 @@ inline void Aggregator::insertAggregatesIntoColumns(Mapped & mapped, MutableColu
/** Destroy states that are no longer needed. This loop does not throw.
*
* Don't destroy states for "-State" aggregate functions,
* because the ownership of this state is transferred to ColumnAggregateFunction
* For functions with -State combinator we destroy only states of all combinators that are used
* after -State, because the ownership of the rest states is transferred to ColumnAggregateFunction
* and ColumnAggregateFunction will take care.
*
* But it's only for states that has been transferred to ColumnAggregateFunction
@ -1787,10 +1790,10 @@ inline void Aggregator::insertAggregatesIntoColumns(Mapped & mapped, MutableColu
*/
for (size_t destroy_i = 0; destroy_i < params.aggregates_size; ++destroy_i)
{
/// If ownership was not transferred to ColumnAggregateFunction.
if (!(destroy_i < insert_i && aggregate_functions[destroy_i]->isState()))
aggregate_functions[destroy_i]->destroy(
mapped + offsets_of_aggregate_states[destroy_i]);
if (destroy_i < insert_i)
aggregate_functions[destroy_i]->destroyUpToState(mapped + offsets_of_aggregate_states[destroy_i]);
else
aggregate_functions[destroy_i]->destroy(mapped + offsets_of_aggregate_states[destroy_i]);
}
/// Mark the cell as destroyed so it will not be destroyed in destructor.
@ -1855,12 +1858,7 @@ Block Aggregator::insertResultsIntoColumns(PaddedPODArray<AggregateDataPtr> & pl
size_t destroy_index = aggregate_functions_destroy_index;
++aggregate_functions_destroy_index;
/// For State AggregateFunction ownership of aggregate place is passed to result column after insert
bool is_state = aggregate_functions[destroy_index]->isState();
bool destroy_place_after_insert = !is_state;
aggregate_functions[destroy_index]->insertResultIntoBatch(
0, places.size(), places.data(), offset, *final_aggregate_column, arena, destroy_place_after_insert);
aggregate_functions[destroy_index]->insertResultIntoBatch(0, places.size(), places.data(), offset, *final_aggregate_column, arena);
}
}
catch (...)

View File

@ -125,12 +125,6 @@ size_t FileSegment::getDownloadedSizeUnlocked(std::unique_lock<std::mutex> & /*
return downloaded_size;
}
size_t FileSegment::getRemainingSizeToDownload() const
{
std::unique_lock segment_lock(mutex);
return range().size() - getDownloadedSizeUnlocked(segment_lock);
}
bool FileSegment::isDownloaded() const
{
std::lock_guard segment_lock(mutex);
@ -836,186 +830,4 @@ String FileSegmentsHolder::toString()
return ranges;
}
FileSegmentRangeWriter::FileSegmentRangeWriter(
FileCache * cache_,
const FileSegment::Key & key_,
OnCompleteFileSegmentCallback && on_complete_file_segment_func_)
: cache(cache_)
, key(key_)
, current_file_segment_it(file_segments_holder.file_segments.end())
, on_complete_file_segment_func(on_complete_file_segment_func_)
{
}
FileSegments::iterator FileSegmentRangeWriter::allocateFileSegment(size_t offset, bool is_persistent)
{
/**
* Allocate a new file segment starting `offset`.
* File segment capacity will equal `max_file_segment_size`, but actual size is 0.
*/
std::lock_guard cache_lock(cache->mutex);
CreateFileSegmentSettings create_settings
{
.is_persistent = is_persistent,
};
/// We set max_file_segment_size to be downloaded,
/// if we have less size to write, file segment will be resized in complete() method.
auto file_segment = cache->createFileSegmentForDownload(
key, offset, cache->max_file_segment_size, create_settings, cache_lock);
return file_segments_holder.add(std::move(file_segment));
}
void FileSegmentRangeWriter::completeFileSegment(FileSegment & file_segment)
{
/**
* Complete file segment based on downaloaded size.
*/
/// File segment can be detached if space reservation failed.
if (file_segment.isDetached())
return;
size_t current_downloaded_size = file_segment.getDownloadedSize();
/// file_segment->complete(DOWNLOADED) is not enough, because file segment capacity
/// was initially set with a margin as `max_file_segment_size`. => We need to always
/// resize to actual size after download finished.
if (current_downloaded_size != file_segment.range().size())
{
/// Current file segment is downloaded as a part of write-through cache
/// and therefore cannot be concurrently accessed. Nevertheless, it can be
/// accessed by cache system tables if someone read from them,
/// therefore we need a mutex.
std::unique_lock segment_lock(file_segment.mutex);
assert(current_downloaded_size <= file_segment.range().size());
file_segment.segment_range = FileSegment::Range(
file_segment.segment_range.left,
file_segment.segment_range.left + current_downloaded_size - 1);
file_segment.reserved_size = current_downloaded_size;
file_segment.setDownloadedUnlocked(segment_lock);
}
file_segment.completeWithoutState();
on_complete_file_segment_func(file_segment);
}
bool FileSegmentRangeWriter::write(const char * data, size_t size, size_t offset, bool is_persistent)
{
/**
* Write a range of file segments. Allocate file segment of `max_file_segment_size` and write to
* it until it is full and then allocate next file segment.
*/
if (finalized)
return false;
auto & file_segments = file_segments_holder.file_segments;
if (current_file_segment_it == file_segments.end())
{
current_file_segment_it = allocateFileSegment(current_file_segment_write_offset, is_persistent);
}
else
{
if (current_file_segment_write_offset != offset)
{
throw Exception(
ErrorCodes::LOGICAL_ERROR,
"Cannot write file segment at offset {}, because current write offset is: {}",
offset, current_file_segment_write_offset);
}
size_t current_write_offset = (*current_file_segment_it)->getCurrentWriteOffset();
auto current_file_segment = *current_file_segment_it;
if (current_file_segment->getRemainingSizeToDownload() == 0)
{
completeFileSegment(*current_file_segment);
current_file_segment_it = allocateFileSegment(current_file_segment_write_offset, is_persistent);
}
else if (current_write_offset != offset)
{
throw Exception(
ErrorCodes::LOGICAL_ERROR,
"Cannot file segment download offset {} does not match current write offset {}",
current_write_offset, offset);
}
}
auto & file_segment = *current_file_segment_it;
auto downloader = file_segment->getOrSetDownloader();
if (downloader != FileSegment::getCallerId())
throw Exception(ErrorCodes::LOGICAL_ERROR, "Failed to set a downloader. ({})", file_segment->getInfoForLog());
SCOPE_EXIT({
if (file_segment->isDownloader())
{
file_segment->completePartAndResetDownloader();
}
});
bool reserved = file_segment->reserve(size);
if (!reserved)
{
file_segment->completeWithState(FileSegment::State::PARTIALLY_DOWNLOADED_NO_CONTINUATION);
on_complete_file_segment_func(*file_segment);
LOG_DEBUG(
&Poco::Logger::get("FileSegmentRangeWriter"),
"Unsuccessful space reservation attempt (size: {}, file segment info: {}",
size, file_segment->getInfoForLog());
return false;
}
try
{
file_segment->write(data, size, offset);
}
catch (...)
{
file_segment->completePartAndResetDownloader();
throw;
}
file_segment->completePartAndResetDownloader();
current_file_segment_write_offset += size;
return true;
}
void FileSegmentRangeWriter::finalize()
{
if (finalized)
return;
auto & file_segments = file_segments_holder.file_segments;
if (file_segments.empty() || current_file_segment_it == file_segments.end())
return;
completeFileSegment(**current_file_segment_it);
finalized = true;
}
FileSegmentRangeWriter::~FileSegmentRangeWriter()
{
try
{
if (!finalized)
finalize();
}
catch (...)
{
tryLogCurrentException(__PRETTY_FUNCTION__);
}
}
}

View File

@ -217,8 +217,6 @@ public:
void resetRemoteFileReader();
size_t getRemainingSizeToDownload() const;
private:
size_t getFirstNonDownloadedOffsetUnlocked(std::unique_lock<std::mutex> & segment_lock) const;
size_t getCurrentWriteOffsetUnlocked(std::unique_lock<std::mutex> & segment_lock) const;
@ -327,47 +325,4 @@ struct FileSegmentsHolder : private boost::noncopyable
FileSegments file_segments{};
};
/**
* We want to write eventually some size, which is not known until the very end.
* Therefore we allocate file segments lazily. Each file segment is assigned capacity
* of max_file_segment_size, but reserved_size remains 0, until call to tryReserve().
* Once current file segment is full (reached max_file_segment_size), we allocate a
* new file segment. All allocated file segments resize in file segments holder.
* If at the end of all writes, the last file segment is not full, then it is resized.
*/
class FileSegmentRangeWriter
{
public:
using OnCompleteFileSegmentCallback = std::function<void(const FileSegment & file_segment)>;
FileSegmentRangeWriter(
FileCache * cache_,
const FileSegment::Key & key_,
/// A callback which is called right after each file segment is completed.
/// It is used to write into filesystem cache log.
OnCompleteFileSegmentCallback && on_complete_file_segment_func_);
~FileSegmentRangeWriter();
bool write(const char * data, size_t size, size_t offset, bool is_persistent);
void finalize();
private:
FileSegments::iterator allocateFileSegment(size_t offset, bool is_persistent);
void completeFileSegment(FileSegment & file_segment);
FileCache * cache;
FileSegment::Key key;
FileSegmentsHolder file_segments_holder;
FileSegments::iterator current_file_segment_it;
size_t current_file_segment_write_offset = 0;
bool finalized = false;
OnCompleteFileSegmentCallback on_complete_file_segment_func;
};
}

Some files were not shown because too many files have changed in this diff Show More