Merge remote-tracking branch 'blessed/master' into 02294_decimal_second_errors

This commit is contained in:
Raúl Marín 2024-01-23 20:24:39 +01:00
commit a9f6dfa74d
111 changed files with 1202 additions and 2647 deletions

View File

@ -249,10 +249,13 @@ stop_logs_replication
successfuly_saved=0 successfuly_saved=0
for table in query_log zookeeper_log trace_log transactions_info_log for table in query_log zookeeper_log trace_log transactions_info_log
do do
clickhouse-client -q "select * from system.$table format TSVWithNamesAndTypes" | zstd --threads=0 > /test_output/$table.tsv.zst || successfuly_saved=$((successfuly_saved+$?)) clickhouse-client -q "select * from system.$table format TSVWithNamesAndTypes" | zstd --threads=0 > /test_output/$table.tsv.zst
successfuly_saved=$?
if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then
clickhouse-client -q "select * from system.$table format TSVWithNamesAndTypes" | zstd --threads=0 > /test_output/$table.1.tsv.zst || successfuly_saved=$((successfuly_saved+$?)) clickhouse-client -q "select * from system.$table format TSVWithNamesAndTypes" | zstd --threads=0 > /test_output/$table.1.tsv.zst
clickhouse-client -q "select * from system.$table format TSVWithNamesAndTypes" | zstd --threads=0 > /test_output/$table.2.tsv.zst || successfuly_saved=$((successfuly_saved+$?)) successfuly_saved=$((successfuly_saved | $?))
clickhouse-client -q "select * from system.$table format TSVWithNamesAndTypes" | zstd --threads=0 > /test_output/$table.2.tsv.zst
successfuly_saved=$((successfuly_saved | $?))
fi fi
done done

View File

@ -11,6 +11,7 @@ sidebar_label: 2023
* Remove the `status_info` configuration option and dictionaries status from the default Prometheus handler. [#54090](https://github.com/ClickHouse/ClickHouse/pull/54090) ([Alexey Milovidov](https://github.com/alexey-milovidov)). * Remove the `status_info` configuration option and dictionaries status from the default Prometheus handler. [#54090](https://github.com/ClickHouse/ClickHouse/pull/54090) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* The experimental parts metadata cache is removed from the codebase. [#54215](https://github.com/ClickHouse/ClickHouse/pull/54215) ([Alexey Milovidov](https://github.com/alexey-milovidov)). * The experimental parts metadata cache is removed from the codebase. [#54215](https://github.com/ClickHouse/ClickHouse/pull/54215) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Disable setting `input_format_json_try_infer_numbers_from_strings` by default, so we don't try to infer numbers from strings in JSON formats by default to avoid possible parsing errors when sample data contains strings that looks like a number. [#55099](https://github.com/ClickHouse/ClickHouse/pull/55099) ([Kruglov Pavel](https://github.com/Avogar)). * Disable setting `input_format_json_try_infer_numbers_from_strings` by default, so we don't try to infer numbers from strings in JSON formats by default to avoid possible parsing errors when sample data contains strings that looks like a number. [#55099](https://github.com/ClickHouse/ClickHouse/pull/55099) ([Kruglov Pavel](https://github.com/Avogar)).
* IPv6 bloom filter indexes created prior to March 2023 are not compatible with current version and have to be rebuilt. [#54200](https://github.com/ClickHouse/ClickHouse/pull/54200) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
#### New Feature #### New Feature
* Added new type of authentication based on SSH keys. It works only for Native TCP protocol. [#41109](https://github.com/ClickHouse/ClickHouse/pull/41109) ([George Gamezardashvili](https://github.com/InfJoker)). * Added new type of authentication based on SSH keys. It works only for Native TCP protocol. [#41109](https://github.com/ClickHouse/ClickHouse/pull/41109) ([George Gamezardashvili](https://github.com/InfJoker)).

View File

@ -39,8 +39,8 @@ If you need to update rows frequently, we recommend using the [`ReplacingMergeTr
``` sql ``` sql
CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
( (
name1 [type1] [[NOT] NULL] [DEFAULT|MATERIALIZED|ALIAS|EPHEMERAL expr1] [COMMENT ...] [CODEC(codec1)] [STATISTIC(stat1)] [TTL expr1] [PRIMARY KEY], name1 [type1] [[NOT] NULL] [DEFAULT|MATERIALIZED|ALIAS|EPHEMERAL expr1] [COMMENT ...] [CODEC(codec1)] [STATISTIC(stat1)] [TTL expr1] [PRIMARY KEY] [SETTINGS (name = value, ...)],
name2 [type2] [[NOT] NULL] [DEFAULT|MATERIALIZED|ALIAS|EPHEMERAL expr2] [COMMENT ...] [CODEC(codec2)] [STATISTIC(stat2)] [TTL expr2] [PRIMARY KEY], name2 [type2] [[NOT] NULL] [DEFAULT|MATERIALIZED|ALIAS|EPHEMERAL expr2] [COMMENT ...] [CODEC(codec2)] [STATISTIC(stat2)] [TTL expr2] [PRIMARY KEY] [SETTINGS (name = value, ...)],
... ...
INDEX index_name1 expr1 TYPE type1(...) [GRANULARITY value1], INDEX index_name1 expr1 TYPE type1(...) [GRANULARITY value1],
INDEX index_name2 expr2 TYPE type2(...) [GRANULARITY value2], INDEX index_name2 expr2 TYPE type2(...) [GRANULARITY value2],
@ -56,7 +56,7 @@ ORDER BY expr
[DELETE|TO DISK 'xxx'|TO VOLUME 'xxx' [, ...] ] [DELETE|TO DISK 'xxx'|TO VOLUME 'xxx' [, ...] ]
[WHERE conditions] [WHERE conditions]
[GROUP BY key_expr [SET v1 = aggr_func(v1) [, v2 = aggr_func(v2) ...]] ] ] [GROUP BY key_expr [SET v1 = aggr_func(v1) [, v2 = aggr_func(v2) ...]] ] ]
[SETTINGS name=value, ...] [SETTINGS name = value, ...]
``` ```
For a description of parameters, see the [CREATE query description](/docs/en/sql-reference/statements/create/table.md). For a description of parameters, see the [CREATE query description](/docs/en/sql-reference/statements/create/table.md).
@ -620,7 +620,7 @@ The `TTL` clause cant be used for key columns.
#### Creating a table with `TTL`: #### Creating a table with `TTL`:
``` sql ``` sql
CREATE TABLE example_table CREATE TABLE tab
( (
d DateTime, d DateTime,
a Int TTL d + INTERVAL 1 MONTH, a Int TTL d + INTERVAL 1 MONTH,
@ -635,7 +635,7 @@ ORDER BY d;
#### Adding TTL to a column of an existing table #### Adding TTL to a column of an existing table
``` sql ``` sql
ALTER TABLE example_table ALTER TABLE tab
MODIFY COLUMN MODIFY COLUMN
c String TTL d + INTERVAL 1 DAY; c String TTL d + INTERVAL 1 DAY;
``` ```
@ -643,7 +643,7 @@ ALTER TABLE example_table
#### Altering TTL of the column #### Altering TTL of the column
``` sql ``` sql
ALTER TABLE example_table ALTER TABLE tab
MODIFY COLUMN MODIFY COLUMN
c String TTL d + INTERVAL 1 MONTH; c String TTL d + INTERVAL 1 MONTH;
``` ```
@ -681,7 +681,7 @@ If a column is not part of the `GROUP BY` expression and is not set explicitly i
#### Creating a table with `TTL`: #### Creating a table with `TTL`:
``` sql ``` sql
CREATE TABLE example_table CREATE TABLE tab
( (
d DateTime, d DateTime,
a Int a Int
@ -697,7 +697,7 @@ TTL d + INTERVAL 1 MONTH DELETE,
#### Altering `TTL` of the table: #### Altering `TTL` of the table:
``` sql ``` sql
ALTER TABLE example_table ALTER TABLE tab
MODIFY TTL d + INTERVAL 1 DAY; MODIFY TTL d + INTERVAL 1 DAY;
``` ```
@ -1366,7 +1366,7 @@ In this sample configuration:
The statistic declaration is in the columns section of the `CREATE` query for tables from the `*MergeTree*` Family when we enable `set allow_experimental_statistic = 1`. The statistic declaration is in the columns section of the `CREATE` query for tables from the `*MergeTree*` Family when we enable `set allow_experimental_statistic = 1`.
``` sql ``` sql
CREATE TABLE example_table CREATE TABLE tab
( (
a Int64 STATISTIC(tdigest), a Int64 STATISTIC(tdigest),
b Float64 b Float64
@ -1378,8 +1378,8 @@ ORDER BY a
We can also manipulate statistics with `ALTER` statements. We can also manipulate statistics with `ALTER` statements.
```sql ```sql
ALTER TABLE example_table ADD STATISTIC b TYPE tdigest; ALTER TABLE tab ADD STATISTIC b TYPE tdigest;
ALTER TABLE example_table DROP STATISTIC a TYPE tdigest; ALTER TABLE tab DROP STATISTIC a TYPE tdigest;
``` ```
These lightweight statistics aggregate information about distribution of values in columns. These lightweight statistics aggregate information about distribution of values in columns.
@ -1390,3 +1390,42 @@ They can be used for query optimization when we enable `set allow_statistic_opti
- `tdigest` - `tdigest`
Stores distribution of values from numeric columns in [TDigest](https://github.com/tdunning/t-digest) sketch. Stores distribution of values from numeric columns in [TDigest](https://github.com/tdunning/t-digest) sketch.
## Column-level Settings {#column-level-settings}
Certain MergeTree settings can be override at column level:
- `max_compress_block_size` — Maximum size of blocks of uncompressed data before compressing for writing to a table.
- `min_compress_block_size` — Minimum size of blocks of uncompressed data required for compression when writing the next mark.
Example:
```sql
CREATE TABLE tab
(
id Int64,
document String SETTINGS (min_compress_block_size = 16777216, max_compress_block_size = 16777216)
)
ENGINE = MergeTree
ORDER BY id
```
Column-level settings can be modified or removed using [ALTER MODIFY COLUMN](/docs/en/sql-reference/statements/alter/column.md), for example:
- Remove `SETTINGS` from column declaration:
```sql
ALTER TABLE tab MODIFY COLUMN document REMOVE SETTINGS;
```
- Modify a setting:
```sql
ALTER TABLE tab MODIFY COLUMN document MODIFY SETTING min_compress_block_size = 8192;
```
- Reset one or more settings, also removes the setting declaration in the column expression of the table's CREATE query.
```sql
ALTER TABLE tab MODIFY COLUMN document RESET SETTING min_compress_block_size;
```

View File

@ -10,7 +10,7 @@ Columns:
- `hostname` ([LowCardinality(String)](../../sql-reference/data-types/string.md)) — Hostname of the server executing the query. - `hostname` ([LowCardinality(String)](../../sql-reference/data-types/string.md)) — Hostname of the server executing the query.
- `event_date` ([Date](../../sql-reference/data-types/date.md)) — Event date. - `event_date` ([Date](../../sql-reference/data-types/date.md)) — Event date.
- `event_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Event time. - `event_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Event time.
- `name` ([String](../../sql-reference/data-types/string.md)) — Metric name. - `metric` ([String](../../sql-reference/data-types/string.md)) — Metric name.
- `value` ([Float64](../../sql-reference/data-types/float.md)) — Metric value. - `value` ([Float64](../../sql-reference/data-types/float.md)) — Metric value.
**Example** **Example**

View File

@ -23,10 +23,11 @@ The following actions are supported:
- [RENAME COLUMN](#rename-column) — Renames an existing column. - [RENAME COLUMN](#rename-column) — Renames an existing column.
- [CLEAR COLUMN](#clear-column) — Resets column values. - [CLEAR COLUMN](#clear-column) — Resets column values.
- [COMMENT COLUMN](#comment-column) — Adds a text comment to the column. - [COMMENT COLUMN](#comment-column) — Adds a text comment to the column.
- [MODIFY COLUMN](#modify-column) — Changes columns type, default expression and TTL. - [MODIFY COLUMN](#modify-column) — Changes columns type, default expression, TTL, and column settings.
- [MODIFY COLUMN REMOVE](#modify-column-remove) — Removes one of the column properties. - [MODIFY COLUMN REMOVE](#modify-column-remove) — Removes one of the column properties.
- [MODIFY COLUMN MODIFY SETTING](#modify-column-modify-setting) - Changes column settings.
- [MODIFY COLUMN RESET SETTING](#modify-column-reset-setting) - Reset column settings.
- [MATERIALIZE COLUMN](#materialize-column) — Materializes the column in the parts where the column is missing. - [MATERIALIZE COLUMN](#materialize-column) — Materializes the column in the parts where the column is missing.
These actions are described in detail below. These actions are described in detail below.
## ADD COLUMN ## ADD COLUMN
@ -75,7 +76,7 @@ Deletes the column with the name `name`. If the `IF EXISTS` clause is specified,
Deletes data from the file system. Since this deletes entire files, the query is completed almost instantly. Deletes data from the file system. Since this deletes entire files, the query is completed almost instantly.
:::tip :::tip
You cant delete a column if it is referenced by [materialized view](/docs/en/sql-reference/statements/create/view.md/#materialized). Otherwise, it returns an error. You cant delete a column if it is referenced by [materialized view](/docs/en/sql-reference/statements/create/view.md/#materialized). Otherwise, it returns an error.
::: :::
@ -208,7 +209,7 @@ The `ALTER` query for changing columns is replicated. The instructions are saved
## MODIFY COLUMN REMOVE ## MODIFY COLUMN REMOVE
Removes one of the column properties: `DEFAULT`, `ALIAS`, `MATERIALIZED`, `CODEC`, `COMMENT`, `TTL`. Removes one of the column properties: `DEFAULT`, `ALIAS`, `MATERIALIZED`, `CODEC`, `COMMENT`, `TTL`, `SETTING`.
Syntax: Syntax:
@ -228,6 +229,43 @@ ALTER TABLE table_with_ttl MODIFY COLUMN column_ttl REMOVE TTL;
- [REMOVE TTL](ttl.md). - [REMOVE TTL](ttl.md).
## MODIFY COLUMN MODIFY SETTING
Modify a column setting.
Syntax:
```sql
ALTER TABLE table_name MODIFY COLUMN MODIFY SETTING name=value,...;
```
**Example**
Modify column's `max_compress_block_size` to `1MB`:
```sql
ALTER TABLE table_name MODIFY COLUMN MODIFY SETTING max_compress_block_size = 1048576;
```
## MODIFY COLUMN RESET SETTING
Reset a column setting, also removes the setting declaration in the column expression of the table's CREATE query.
Syntax:
```sql
ALTER TABLE table_name MODIFY COLUMN RESET SETTING name,...;
```
**Example**
Remove column setting `max_compress_block_size` to `1MB`:
```sql
ALTER TABLE table_name MODIFY COLUMN REMOVE SETTING max_compress_block_size;
```
## MATERIALIZE COLUMN ## MATERIALIZE COLUMN
Materializes or updates a column with an expression for a default value (`DEFAULT` or `MATERIALIZED`). Materializes or updates a column with an expression for a default value (`DEFAULT` or `MATERIALIZED`).

View File

@ -112,7 +112,7 @@ Note that:
For the query to run successfully, the following conditions must be met: For the query to run successfully, the following conditions must be met:
- Both tables must have the same structure. - Both tables must have the same structure.
- Both tables must have the same order by key and the same primary key. - Both tables must have the same partition key, the same order by key and the same primary key.
- Both tables must have the same indices and projections. - Both tables must have the same indices and projections.
- Both tables must have the same storage policy. - Both tables must have the same storage policy.

View File

@ -1900,6 +1900,7 @@ try
/// Must be done after initialization of `servers`, because async_metrics will access `servers` variable from its thread. /// Must be done after initialization of `servers`, because async_metrics will access `servers` variable from its thread.
async_metrics.start(); async_metrics.start();
global_context->setAsynchronousMetrics(&async_metrics);
main_config_reloader->start(); main_config_reloader->start();
access_control.startPeriodicReloading(); access_control.startPeriodicReloading();

View File

@ -170,6 +170,7 @@ enum class AccessType
M(SYSTEM_RELOAD_MODEL, "SYSTEM RELOAD MODELS, RELOAD MODEL, RELOAD MODELS", GLOBAL, SYSTEM_RELOAD) \ M(SYSTEM_RELOAD_MODEL, "SYSTEM RELOAD MODELS, RELOAD MODEL, RELOAD MODELS", GLOBAL, SYSTEM_RELOAD) \
M(SYSTEM_RELOAD_FUNCTION, "SYSTEM RELOAD FUNCTIONS, RELOAD FUNCTION, RELOAD FUNCTIONS", GLOBAL, SYSTEM_RELOAD) \ M(SYSTEM_RELOAD_FUNCTION, "SYSTEM RELOAD FUNCTIONS, RELOAD FUNCTION, RELOAD FUNCTIONS", GLOBAL, SYSTEM_RELOAD) \
M(SYSTEM_RELOAD_EMBEDDED_DICTIONARIES, "RELOAD EMBEDDED DICTIONARIES", GLOBAL, SYSTEM_RELOAD) /* implicitly enabled by the grant SYSTEM_RELOAD_DICTIONARY ON *.* */\ M(SYSTEM_RELOAD_EMBEDDED_DICTIONARIES, "RELOAD EMBEDDED DICTIONARIES", GLOBAL, SYSTEM_RELOAD) /* implicitly enabled by the grant SYSTEM_RELOAD_DICTIONARY ON *.* */\
M(SYSTEM_RELOAD_ASYNCHRONOUS_METRICS, "RELOAD ASYNCHRONOUS METRICS", GLOBAL, SYSTEM_RELOAD) \
M(SYSTEM_RELOAD, "", GROUP, SYSTEM) \ M(SYSTEM_RELOAD, "", GROUP, SYSTEM) \
M(SYSTEM_RESTART_DISK, "SYSTEM RESTART DISK", GLOBAL, SYSTEM) \ M(SYSTEM_RESTART_DISK, "SYSTEM RESTART DISK", GLOBAL, SYSTEM) \
M(SYSTEM_MERGES, "SYSTEM STOP MERGES, SYSTEM START MERGES, STOP MERGES, START MERGES", TABLE, SYSTEM) \ M(SYSTEM_MERGES, "SYSTEM STOP MERGES, SYSTEM START MERGES, STOP MERGES, START MERGES", TABLE, SYSTEM) \

View File

@ -10,14 +10,6 @@
namespace DB namespace DB
{ {
/** Rewrites `sum(column +/- literal)` into two individual functions
* `sum(column)` and `literal * count(column)`.
* sum(column + literal) -> sum(column) + literal * count(column)
* sum(literal + column) -> literal * count(column) + sum(column)
* sum(column - literal) -> sum(column) - literal * count(column)
* sum(literal - column) -> literal * count(column) - sum(column)
*/
namespace namespace
{ {
@ -29,6 +21,9 @@ public:
void enterImpl(QueryTreeNodePtr & node) void enterImpl(QueryTreeNodePtr & node)
{ {
if (!getSettings().optimize_arithmetic_operations_in_aggregate_functions)
return;
static const std::unordered_set<String> func_supported = { static const std::unordered_set<String> func_supported = {
"plus", "plus",
"minus" "minus"

View File

@ -5,6 +5,14 @@
namespace DB namespace DB
{ {
/**
* Rewrites `sum(column +/- literal)` into two individual functions
* `sum(column)` and `literal * count(column)`.
* sum(column + literal) -> sum(column) + literal * count(column)
* sum(literal + column) -> literal * count(column) + sum(column)
* sum(column - literal) -> sum(column) - literal * count(column)
* sum(literal - column) -> literal * count(column) - sum(column)
*/
class RewriteSumFunctionWithSumAndCountPass final : public IQueryTreePass class RewriteSumFunctionWithSumAndCountPass final : public IQueryTreePass
{ {
public: public:

View File

@ -98,7 +98,7 @@ AsynchronousMetrics::AsynchronousMetrics(
} }
#if defined(OS_LINUX) #if defined(OS_LINUX)
void AsynchronousMetrics::openSensors() void AsynchronousMetrics::openSensors() TSA_REQUIRES(data_mutex)
{ {
LOG_TRACE(log, "Scanning /sys/class/thermal"); LOG_TRACE(log, "Scanning /sys/class/thermal");
@ -136,7 +136,7 @@ void AsynchronousMetrics::openSensors()
} }
} }
void AsynchronousMetrics::openBlockDevices() void AsynchronousMetrics::openBlockDevices() TSA_REQUIRES(data_mutex)
{ {
LOG_TRACE(log, "Scanning /sys/block"); LOG_TRACE(log, "Scanning /sys/block");
@ -163,7 +163,7 @@ void AsynchronousMetrics::openBlockDevices()
} }
} }
void AsynchronousMetrics::openEDAC() void AsynchronousMetrics::openEDAC() TSA_REQUIRES(data_mutex)
{ {
LOG_TRACE(log, "Scanning /sys/devices/system/edac"); LOG_TRACE(log, "Scanning /sys/devices/system/edac");
@ -194,7 +194,7 @@ void AsynchronousMetrics::openEDAC()
} }
} }
void AsynchronousMetrics::openSensorsChips() void AsynchronousMetrics::openSensorsChips() TSA_REQUIRES(data_mutex)
{ {
LOG_TRACE(log, "Scanning /sys/class/hwmon"); LOG_TRACE(log, "Scanning /sys/class/hwmon");
@ -281,7 +281,7 @@ void AsynchronousMetrics::stop()
try try
{ {
{ {
std::lock_guard lock{mutex}; std::lock_guard lock(thread_mutex);
quit = true; quit = true;
} }
@ -306,11 +306,14 @@ AsynchronousMetrics::~AsynchronousMetrics()
AsynchronousMetricValues AsynchronousMetrics::getValues() const AsynchronousMetricValues AsynchronousMetrics::getValues() const
{ {
std::lock_guard lock{mutex}; std::lock_guard lock(data_mutex);
return values; return values;
} }
static auto get_next_update_time(std::chrono::seconds update_period) namespace
{
auto get_next_update_time(std::chrono::seconds update_period)
{ {
using namespace std::chrono; using namespace std::chrono;
@ -334,6 +337,8 @@ static auto get_next_update_time(std::chrono::seconds update_period)
return time_next; return time_next;
} }
}
void AsynchronousMetrics::run() void AsynchronousMetrics::run()
{ {
setThreadName("AsyncMetrics"); setThreadName("AsyncMetrics");
@ -344,9 +349,9 @@ void AsynchronousMetrics::run()
{ {
// Wait first, so that the first metric collection is also on even time. // Wait first, so that the first metric collection is also on even time.
std::unique_lock lock{mutex}; std::unique_lock lock(thread_mutex);
if (wait_cond.wait_until(lock, next_update_time, if (wait_cond.wait_until(lock, next_update_time,
[this] { return quit; })) [this] TSA_REQUIRES(thread_mutex) { return quit; }))
{ {
break; break;
} }
@ -364,6 +369,9 @@ void AsynchronousMetrics::run()
} }
#if USE_JEMALLOC #if USE_JEMALLOC
namespace
{
uint64_t updateJemallocEpoch() uint64_t updateJemallocEpoch()
{ {
uint64_t value = 0; uint64_t value = 0;
@ -373,7 +381,7 @@ uint64_t updateJemallocEpoch()
} }
template <typename Value> template <typename Value>
static Value saveJemallocMetricImpl( Value saveJemallocMetricImpl(
AsynchronousMetricValues & values, AsynchronousMetricValues & values,
const std::string & jemalloc_full_name, const std::string & jemalloc_full_name,
const std::string & clickhouse_full_name) const std::string & clickhouse_full_name)
@ -386,7 +394,7 @@ static Value saveJemallocMetricImpl(
} }
template<typename Value> template<typename Value>
static Value saveJemallocMetric(AsynchronousMetricValues & values, Value saveJemallocMetric(AsynchronousMetricValues & values,
const std::string & metric_name) const std::string & metric_name)
{ {
return saveJemallocMetricImpl<Value>(values, return saveJemallocMetricImpl<Value>(values,
@ -395,13 +403,15 @@ static Value saveJemallocMetric(AsynchronousMetricValues & values,
} }
template<typename Value> template<typename Value>
static Value saveAllArenasMetric(AsynchronousMetricValues & values, Value saveAllArenasMetric(AsynchronousMetricValues & values,
const std::string & metric_name) const std::string & metric_name)
{ {
return saveJemallocMetricImpl<Value>(values, return saveJemallocMetricImpl<Value>(values,
fmt::format("stats.arenas.{}.{}", MALLCTL_ARENAS_ALL, metric_name), fmt::format("stats.arenas.{}.{}", MALLCTL_ARENAS_ALL, metric_name),
fmt::format("jemalloc.arenas.all.{}", metric_name)); fmt::format("jemalloc.arenas.all.{}", metric_name));
} }
}
#endif #endif
@ -547,21 +557,23 @@ AsynchronousMetrics::NetworkInterfaceStatValues::operator-(const AsynchronousMet
#endif #endif
void AsynchronousMetrics::update(TimePoint update_time) void AsynchronousMetrics::update(TimePoint update_time, bool force_update)
{ {
Stopwatch watch; Stopwatch watch;
AsynchronousMetricValues new_values; AsynchronousMetricValues new_values;
std::lock_guard lock(data_mutex);
auto current_time = std::chrono::system_clock::now(); auto current_time = std::chrono::system_clock::now();
auto time_after_previous_update = current_time - previous_update_time; auto time_since_previous_update = current_time - previous_update_time;
previous_update_time = update_time; previous_update_time = update_time;
double update_interval = 0.; double update_interval = 0.;
if (first_run) if (first_run)
update_interval = update_period.count(); update_interval = update_period.count();
else else
update_interval = std::chrono::duration_cast<std::chrono::microseconds>(time_after_previous_update).count() / 1e6; update_interval = std::chrono::duration_cast<std::chrono::microseconds>(time_since_previous_update).count() / 1e6;
new_values["AsynchronousMetricsUpdateInterval"] = { update_interval, "Metrics update interval" }; new_values["AsynchronousMetricsUpdateInterval"] = { update_interval, "Metrics update interval" };
/// This is also a good indicator of system responsiveness. /// This is also a good indicator of system responsiveness.
@ -815,7 +827,7 @@ void AsynchronousMetrics::update(TimePoint update_time)
if (-1 == hz) if (-1 == hz)
throw ErrnoException(ErrorCodes::CANNOT_SYSCONF, "Cannot call 'sysconf' to obtain system HZ"); throw ErrnoException(ErrorCodes::CANNOT_SYSCONF, "Cannot call 'sysconf' to obtain system HZ");
double multiplier = 1.0 / hz / (std::chrono::duration_cast<std::chrono::nanoseconds>(time_after_previous_update).count() / 1e9); double multiplier = 1.0 / hz / (std::chrono::duration_cast<std::chrono::nanoseconds>(time_since_previous_update).count() / 1e9);
size_t num_cpus = 0; size_t num_cpus = 0;
ProcStatValuesOther current_other_values{}; ProcStatValuesOther current_other_values{};
@ -1572,7 +1584,7 @@ void AsynchronousMetrics::update(TimePoint update_time)
/// Add more metrics as you wish. /// Add more metrics as you wish.
updateImpl(new_values, update_time, current_time); updateImpl(update_time, current_time, force_update, first_run, new_values);
new_values["AsynchronousMetricsCalculationTimeSpent"] = { watch.elapsedSeconds(), "Time in seconds spent for calculation of asynchronous metrics (this is the overhead of asynchronous metrics)." }; new_values["AsynchronousMetricsCalculationTimeSpent"] = { watch.elapsedSeconds(), "Time in seconds spent for calculation of asynchronous metrics (this is the overhead of asynchronous metrics)." };
@ -1581,7 +1593,6 @@ void AsynchronousMetrics::update(TimePoint update_time)
first_run = false; first_run = false;
// Finally, update the current metrics. // Finally, update the current metrics.
std::lock_guard lock(mutex);
values = new_values; values = new_values;
} }

View File

@ -56,8 +56,13 @@ struct ProtocolServerMetrics
*/ */
class AsynchronousMetrics class AsynchronousMetrics
{ {
protected:
using Duration = std::chrono::seconds;
using TimePoint = std::chrono::system_clock::time_point;
public: public:
using ProtocolServerMetricsFunc = std::function<std::vector<ProtocolServerMetrics>()>; using ProtocolServerMetricsFunc = std::function<std::vector<ProtocolServerMetrics>()>;
AsynchronousMetrics( AsynchronousMetrics(
int update_period_seconds, int update_period_seconds,
const ProtocolServerMetricsFunc & protocol_server_metrics_func_); const ProtocolServerMetricsFunc & protocol_server_metrics_func_);
@ -69,62 +74,66 @@ public:
void stop(); void stop();
void update(TimePoint update_time, bool force_update = false);
/// Returns copy of all values. /// Returns copy of all values.
AsynchronousMetricValues getValues() const; AsynchronousMetricValues getValues() const;
protected: protected:
using Duration = std::chrono::seconds;
using TimePoint = std::chrono::system_clock::time_point;
const Duration update_period; const Duration update_period;
/// Some values are incremental and we have to calculate the difference.
/// On first run we will only collect the values to subtract later.
bool first_run = true;
TimePoint previous_update_time;
Poco::Logger * log; Poco::Logger * log;
private: private:
virtual void updateImpl(AsynchronousMetricValues & new_values, TimePoint update_time, TimePoint current_time) = 0; virtual void updateImpl(TimePoint update_time, TimePoint current_time, bool force_update, bool first_run, AsynchronousMetricValues & new_values) = 0;
virtual void logImpl(AsynchronousMetricValues &) {} virtual void logImpl(AsynchronousMetricValues &) {}
ProtocolServerMetricsFunc protocol_server_metrics_func; ProtocolServerMetricsFunc protocol_server_metrics_func;
mutable std::mutex mutex; std::unique_ptr<ThreadFromGlobalPool> thread;
mutable std::mutex thread_mutex;
std::condition_variable wait_cond; std::condition_variable wait_cond;
bool quit {false}; bool quit TSA_GUARDED_BY(thread_mutex) = false;
AsynchronousMetricValues values;
mutable std::mutex data_mutex;
/// Some values are incremental and we have to calculate the difference.
/// On first run we will only collect the values to subtract later.
bool first_run TSA_GUARDED_BY(data_mutex) = true;
TimePoint previous_update_time TSA_GUARDED_BY(data_mutex);
AsynchronousMetricValues values TSA_GUARDED_BY(data_mutex);
#if defined(OS_LINUX) || defined(OS_FREEBSD) #if defined(OS_LINUX) || defined(OS_FREEBSD)
MemoryStatisticsOS memory_stat; MemoryStatisticsOS memory_stat TSA_GUARDED_BY(data_mutex);
#endif #endif
#if defined(OS_LINUX) #if defined(OS_LINUX)
std::optional<ReadBufferFromFilePRead> meminfo; std::optional<ReadBufferFromFilePRead> meminfo TSA_GUARDED_BY(data_mutex);
std::optional<ReadBufferFromFilePRead> loadavg; std::optional<ReadBufferFromFilePRead> loadavg TSA_GUARDED_BY(data_mutex);
std::optional<ReadBufferFromFilePRead> proc_stat; std::optional<ReadBufferFromFilePRead> proc_stat TSA_GUARDED_BY(data_mutex);
std::optional<ReadBufferFromFilePRead> cpuinfo; std::optional<ReadBufferFromFilePRead> cpuinfo TSA_GUARDED_BY(data_mutex);
std::optional<ReadBufferFromFilePRead> file_nr; std::optional<ReadBufferFromFilePRead> file_nr TSA_GUARDED_BY(data_mutex);
std::optional<ReadBufferFromFilePRead> uptime; std::optional<ReadBufferFromFilePRead> uptime TSA_GUARDED_BY(data_mutex);
std::optional<ReadBufferFromFilePRead> net_dev; std::optional<ReadBufferFromFilePRead> net_dev TSA_GUARDED_BY(data_mutex);
std::optional<ReadBufferFromFilePRead> cgroupmem_limit_in_bytes; std::optional<ReadBufferFromFilePRead> cgroupmem_limit_in_bytes TSA_GUARDED_BY(data_mutex);
std::optional<ReadBufferFromFilePRead> cgroupmem_usage_in_bytes; std::optional<ReadBufferFromFilePRead> cgroupmem_usage_in_bytes TSA_GUARDED_BY(data_mutex);
std::optional<ReadBufferFromFilePRead> cgroupcpu_cfs_period; std::optional<ReadBufferFromFilePRead> cgroupcpu_cfs_period TSA_GUARDED_BY(data_mutex);
std::optional<ReadBufferFromFilePRead> cgroupcpu_cfs_quota; std::optional<ReadBufferFromFilePRead> cgroupcpu_cfs_quota TSA_GUARDED_BY(data_mutex);
std::optional<ReadBufferFromFilePRead> cgroupcpu_max; std::optional<ReadBufferFromFilePRead> cgroupcpu_max TSA_GUARDED_BY(data_mutex);
std::vector<std::unique_ptr<ReadBufferFromFilePRead>> thermal; std::vector<std::unique_ptr<ReadBufferFromFilePRead>> thermal TSA_GUARDED_BY(data_mutex);
std::unordered_map<String /* device name */, std::unordered_map<String /* device name */,
std::unordered_map<String /* label name */, std::unordered_map<String /* label name */,
std::unique_ptr<ReadBufferFromFilePRead>>> hwmon_devices; std::unique_ptr<ReadBufferFromFilePRead>>> hwmon_devices TSA_GUARDED_BY(data_mutex);
std::vector<std::pair< std::vector<std::pair<
std::unique_ptr<ReadBufferFromFilePRead> /* correctable errors */, std::unique_ptr<ReadBufferFromFilePRead> /* correctable errors */,
std::unique_ptr<ReadBufferFromFilePRead> /* uncorrectable errors */>> edac; std::unique_ptr<ReadBufferFromFilePRead> /* uncorrectable errors */>> edac TSA_GUARDED_BY(data_mutex);
std::unordered_map<String /* device name */, std::unique_ptr<ReadBufferFromFilePRead>> block_devs; std::unordered_map<String /* device name */, std::unique_ptr<ReadBufferFromFilePRead>> block_devs TSA_GUARDED_BY(data_mutex);
/// TODO: socket statistics. /// TODO: socket statistics.
@ -154,9 +163,9 @@ private:
ProcStatValuesOther operator-(const ProcStatValuesOther & other) const; ProcStatValuesOther operator-(const ProcStatValuesOther & other) const;
}; };
ProcStatValuesCPU proc_stat_values_all_cpus{}; ProcStatValuesCPU proc_stat_values_all_cpus TSA_GUARDED_BY(data_mutex) {};
ProcStatValuesOther proc_stat_values_other{}; ProcStatValuesOther proc_stat_values_other TSA_GUARDED_BY(data_mutex) {};
std::vector<ProcStatValuesCPU> proc_stat_values_per_cpu; std::vector<ProcStatValuesCPU> proc_stat_values_per_cpu TSA_GUARDED_BY(data_mutex);
/// https://www.kernel.org/doc/Documentation/block/stat.txt /// https://www.kernel.org/doc/Documentation/block/stat.txt
struct BlockDeviceStatValues struct BlockDeviceStatValues
@ -181,7 +190,7 @@ private:
BlockDeviceStatValues operator-(const BlockDeviceStatValues & other) const; BlockDeviceStatValues operator-(const BlockDeviceStatValues & other) const;
}; };
std::unordered_map<String /* device name */, BlockDeviceStatValues> block_device_stats; std::unordered_map<String /* device name */, BlockDeviceStatValues> block_device_stats TSA_GUARDED_BY(data_mutex);
struct NetworkInterfaceStatValues struct NetworkInterfaceStatValues
{ {
@ -197,9 +206,9 @@ private:
NetworkInterfaceStatValues operator-(const NetworkInterfaceStatValues & other) const; NetworkInterfaceStatValues operator-(const NetworkInterfaceStatValues & other) const;
}; };
std::unordered_map<String /* device name */, NetworkInterfaceStatValues> network_interface_stats; std::unordered_map<String /* device name */, NetworkInterfaceStatValues> network_interface_stats TSA_GUARDED_BY(data_mutex);
Stopwatch block_devices_rescan_delay; Stopwatch block_devices_rescan_delay TSA_GUARDED_BY(data_mutex);
void openSensors(); void openSensors();
void openBlockDevices(); void openBlockDevices();
@ -207,10 +216,7 @@ private:
void openEDAC(); void openEDAC();
#endif #endif
std::unique_ptr<ThreadFromGlobalPool> thread;
void run(); void run();
void update(TimePoint update_time);
}; };
} }

View File

@ -121,7 +121,7 @@ KeeperAsynchronousMetrics::~KeeperAsynchronousMetrics()
stop(); stop();
} }
void KeeperAsynchronousMetrics::updateImpl(AsynchronousMetricValues & new_values, TimePoint /*update_time*/, TimePoint /*current_time*/) void KeeperAsynchronousMetrics::updateImpl(TimePoint /*update_time*/, TimePoint /*current_time*/, bool /*force_update*/, bool /*first_run*/, AsynchronousMetricValues & new_values)
{ {
#if USE_NURAFT #if USE_NURAFT
{ {

View File

@ -19,7 +19,7 @@ public:
private: private:
ContextPtr context; ContextPtr context;
void updateImpl(AsynchronousMetricValues & new_values, TimePoint update_time, TimePoint current_time) override; void updateImpl(TimePoint update_time, TimePoint current_time, bool force_update, bool first_run, AsynchronousMetricValues & new_values) override;
}; };

View File

@ -208,6 +208,11 @@ struct KeeperServer::KeeperRaftServer : public nuraft::raft_server
return sm_commit_exec_in_progress_; return sm_commit_exec_in_progress_;
} }
void setServingRequest(bool value)
{
serving_req_ = value;
}
using nuraft::raft_server::raft_server; using nuraft::raft_server::raft_server;
// peers are initially marked as responding because at least one cycle // peers are initially marked as responding because at least one cycle
@ -687,6 +692,14 @@ nuraft::cb_func::ReturnCode KeeperServer::callbackFunc(nuraft::cb_func::Type typ
if (req.get_type() != nuraft::msg_type::append_entries_request) if (req.get_type() != nuraft::msg_type::append_entries_request)
break; break;
if (req.log_entries().empty())
break;
/// committing/preprocessing of local logs can take some time
/// and we don't want election to start during that time so we
/// set serving requests to avoid elections on timeout
raft_instance->setServingRequest(true);
SCOPE_EXIT(raft_instance->setServingRequest(false));
/// maybe we got snapshot installed /// maybe we got snapshot installed
if (state_machine->last_commit_index() >= last_log_idx_on_disk && !raft_instance->isCommitInProgress()) if (state_machine->last_commit_index() >= last_log_idx_on_disk && !raft_instance->isCommitInProgress())
preprocess_logs(); preprocess_logs();

View File

@ -97,7 +97,8 @@ static std::map<ClickHouseVersion, SettingsChangesHistory::SettingsChanges> sett
{"output_format_arrow_use_64_bit_indexes_for_dictionary", false, false, "Allow to use 64 bit indexes type in Arrow dictionaries"}, {"output_format_arrow_use_64_bit_indexes_for_dictionary", false, false, "Allow to use 64 bit indexes type in Arrow dictionaries"},
{"max_rows_in_set_to_optimize_join", 100000, 0, "Disable join optimization as it prevents from read in order optimization"}, {"max_rows_in_set_to_optimize_join", 100000, 0, "Disable join optimization as it prevents from read in order optimization"},
{"output_format_pretty_color", true, "auto", "Setting is changed to allow also for auto value, disabling ANSI escapes if output is not a tty"}, {"output_format_pretty_color", true, "auto", "Setting is changed to allow also for auto value, disabling ANSI escapes if output is not a tty"},
{"function_visible_width_behavior", 0, 1, "We changed the default behavior of `visibleWidth` to be more precise"}}}, {"function_visible_width_behavior", 0, 1, "We changed the default behavior of `visibleWidth` to be more precise"},
{"max_estimated_execution_time", 0, 0, "Separate max_execution_time and max_estimated_execution_time"}}},
{"23.12", {{"allow_suspicious_ttl_expressions", true, false, "It is a new setting, and in previous versions the behavior was equivalent to allowing."}, {"23.12", {{"allow_suspicious_ttl_expressions", true, false, "It is a new setting, and in previous versions the behavior was equivalent to allowing."},
{"input_format_parquet_allow_missing_columns", false, true, "Allow missing columns in Parquet files by default"}, {"input_format_parquet_allow_missing_columns", false, true, "Allow missing columns in Parquet files by default"},
{"input_format_orc_allow_missing_columns", false, true, "Allow missing columns in ORC files by default"}, {"input_format_orc_allow_missing_columns", false, true, "Allow missing columns in ORC files by default"},

View File

@ -4,9 +4,9 @@
#include <Disks/DiskFactory.h> #include <Disks/DiskFactory.h>
#include <IO/FileEncryptionCommon.h> #include <IO/FileEncryptionCommon.h>
#include <IO/ReadBufferFromEncryptedFile.h> #include <IO/ReadBufferFromEncryptedFile.h>
#include <IO/ReadBufferFromFileDecorator.h>
#include <IO/ReadBufferFromString.h> #include <IO/ReadBufferFromString.h>
#include <IO/WriteBufferFromEncryptedFile.h> #include <IO/WriteBufferFromEncryptedFile.h>
#include <IO/ReadBufferFromEmptyFile.h>
#include <boost/algorithm/hex.hpp> #include <boost/algorithm/hex.hpp>
#include <Common/quoteString.h> #include <Common/quoteString.h>
#include <Common/typeid_cast.h> #include <Common/typeid_cast.h>
@ -374,7 +374,7 @@ std::unique_ptr<ReadBufferFromFileBase> DiskEncrypted::readFile(
{ {
/// File is empty, that's a normal case, see DiskEncrypted::truncateFile(). /// File is empty, that's a normal case, see DiskEncrypted::truncateFile().
/// There is no header so we just return `ReadBufferFromString("")`. /// There is no header so we just return `ReadBufferFromString("")`.
return std::make_unique<ReadBufferFromFileDecorator>(std::make_unique<ReadBufferFromString>(std::string_view{}), wrapped_path); return std::make_unique<ReadBufferFromEmptyFile>(wrapped_path);
} }
auto encryption_settings = current_settings.get(); auto encryption_settings = current_settings.get();
FileEncryption::Header header = readHeader(*buffer); FileEncryption::Header header = readHeader(*buffer);

View File

@ -6,7 +6,6 @@
#include <Common/Exception.h> #include <Common/Exception.h>
#include <boost/algorithm/hex.hpp> #include <boost/algorithm/hex.hpp>
#include <IO/ReadBufferFromEncryptedFile.h> #include <IO/ReadBufferFromEncryptedFile.h>
#include <IO/ReadBufferFromFileDecorator.h>
#include <IO/ReadBufferFromString.h> #include <IO/ReadBufferFromString.h>
#include <IO/WriteBufferFromEncryptedFile.h> #include <IO/WriteBufferFromEncryptedFile.h>
#include <Common/quoteString.h> #include <Common/quoteString.h>

View File

@ -560,8 +560,9 @@ void CachedOnDiskReadBufferFromFile::predownload(FileSegment & file_segment)
ProfileEvents::FileSegmentPredownloadMicroseconds, predownload_watch.elapsedMicroseconds()); ProfileEvents::FileSegmentPredownloadMicroseconds, predownload_watch.elapsedMicroseconds());
}); });
OpenTelemetry::SpanHolder span{ OpenTelemetry::SpanHolder span("CachedOnDiskReadBufferFromFile::predownload");
fmt::format("CachedOnDiskReadBufferFromFile::predownload(key={}, size={})", file_segment.key().toString(), bytes_to_predownload)}; span.addAttribute("clickhouse.key", file_segment.key().toString());
span.addAttribute("clickhouse.size", bytes_to_predownload);
if (bytes_to_predownload) if (bytes_to_predownload)
{ {

View File

@ -1,6 +1,6 @@
#include "ReadBufferFromRemoteFSGather.h" #include "ReadBufferFromRemoteFSGather.h"
#include <IO/SeekableReadBuffer.h> #include <IO/ReadBufferFromFileBase.h>
#include <Disks/IO/CachedOnDiskReadBufferFromFile.h> #include <Disks/IO/CachedOnDiskReadBufferFromFile.h>
#include <Disks/ObjectStorages/Cached/CachedObjectStorage.h> #include <Disks/ObjectStorages/Cached/CachedObjectStorage.h>
@ -61,7 +61,7 @@ ReadBufferFromRemoteFSGather::ReadBufferFromRemoteFSGather(
current_object = blobs_to_read.front(); current_object = blobs_to_read.front();
} }
SeekableReadBufferPtr ReadBufferFromRemoteFSGather::createImplementationBuffer(const StoredObject & object) std::unique_ptr<ReadBufferFromFileBase> ReadBufferFromRemoteFSGather::createImplementationBuffer(const StoredObject & object)
{ {
if (current_buf && !with_cache) if (current_buf && !with_cache)
{ {
@ -78,7 +78,7 @@ SeekableReadBufferPtr ReadBufferFromRemoteFSGather::createImplementationBuffer(c
if (with_cache) if (with_cache)
{ {
auto cache_key = settings.remote_fs_cache->createKeyForPath(object_path); auto cache_key = settings.remote_fs_cache->createKeyForPath(object_path);
return std::make_shared<CachedOnDiskReadBufferFromFile>( return std::make_unique<CachedOnDiskReadBufferFromFile>(
object_path, object_path,
cache_key, cache_key,
settings.remote_fs_cache, settings.remote_fs_cache,

View File

@ -53,7 +53,7 @@ public:
bool isContentCached(size_t offset, size_t size) override; bool isContentCached(size_t offset, size_t size) override;
private: private:
SeekableReadBufferPtr createImplementationBuffer(const StoredObject & object); std::unique_ptr<ReadBufferFromFileBase> createImplementationBuffer(const StoredObject & object);
bool nextImpl() override; bool nextImpl() override;
@ -80,7 +80,7 @@ private:
StoredObject current_object; StoredObject current_object;
size_t current_buf_idx = 0; size_t current_buf_idx = 0;
SeekableReadBufferPtr current_buf; std::unique_ptr<ReadBufferFromFileBase> current_buf;
Poco::Logger * log; Poco::Logger * log;
}; };

View File

@ -39,7 +39,7 @@ std::unique_ptr<ReadBufferFromFileBase> createReadBufferFromFileBase(
size_t alignment) size_t alignment)
{ {
if (file_size.has_value() && !*file_size) if (file_size.has_value() && !*file_size)
return std::make_unique<ReadBufferFromEmptyFile>(); return std::make_unique<ReadBufferFromEmptyFile>(filename);
size_t estimated_size = 0; size_t estimated_size = 0;
if (read_hint.has_value()) if (read_hint.has_value())

View File

@ -531,7 +531,7 @@ std::unique_ptr<ReadBufferFromFileBase> DiskObjectStorage::readFile(
const bool file_can_be_empty = !file_size.has_value() || *file_size == 0; const bool file_can_be_empty = !file_size.has_value() || *file_size == 0;
if (storage_objects.empty() && file_can_be_empty) if (storage_objects.empty() && file_can_be_empty)
return std::make_unique<ReadBufferFromEmptyFile>(); return std::make_unique<ReadBufferFromEmptyFile>(path);
return object_storage->readObjects( return object_storage->readObjects(
storage_objects, storage_objects,

View File

@ -228,7 +228,12 @@ public:
off_t getPosition() override off_t getPosition() override
{ {
throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "getPosition not supported when reading from archive"); throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "getPosition is not supported when reading from archive");
}
size_t getFileOffsetOfBufferEnd() const override
{
throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "getFileOffsetOfBufferEnd is not supported when reading from archive");
} }
String getFileName() const override { return handle.getFileName(); } String getFileName() const override { return handle.getFileName(); }

View File

@ -15,6 +15,7 @@ namespace ErrorCodes
extern const int CANNOT_UNPACK_ARCHIVE; extern const int CANNOT_UNPACK_ARCHIVE;
extern const int LOGICAL_ERROR; extern const int LOGICAL_ERROR;
extern const int SEEK_POSITION_OUT_OF_BOUND; extern const int SEEK_POSITION_OUT_OF_BOUND;
extern const int UNSUPPORTED_METHOD;
extern const int CANNOT_SEEK_THROUGH_FILE; extern const int CANNOT_SEEK_THROUGH_FILE;
} }
@ -252,6 +253,11 @@ public:
checkResult(err); checkResult(err);
} }
size_t getFileOffsetOfBufferEnd() const override
{
throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "getFileOffsetOfBufferEnd is not supported when reading from zip archive");
}
off_t seek(off_t off, int whence) override off_t seek(off_t off, int whence) override
{ {
off_t current_pos = getPosition(); off_t current_pos = getPosition();

View File

@ -4,8 +4,7 @@
namespace DB namespace DB
{ {
BoundedReadBuffer::BoundedReadBuffer(std::unique_ptr<SeekableReadBuffer> impl_) BoundedReadBuffer::BoundedReadBuffer(std::unique_ptr<ReadBufferFromFileBase> impl_) : impl(std::move(impl_))
: ReadBufferFromFileDecorator(std::move(impl_))
{ {
} }

View File

@ -1,5 +1,5 @@
#pragma once #pragma once
#include <IO/ReadBufferFromFileDecorator.h> #include <IO/ReadBufferFromFileBase.h>
namespace DB namespace DB
@ -7,10 +7,10 @@ namespace DB
/// A buffer which allows to make an underlying buffer as right bounded, /// A buffer which allows to make an underlying buffer as right bounded,
/// e.g. the buffer cannot return data beyond offset specified in `setReadUntilPosition`. /// e.g. the buffer cannot return data beyond offset specified in `setReadUntilPosition`.
class BoundedReadBuffer : public ReadBufferFromFileDecorator class BoundedReadBuffer : public ReadBufferFromFileBase
{ {
public: public:
explicit BoundedReadBuffer(std::unique_ptr<SeekableReadBuffer> impl_); explicit BoundedReadBuffer(std::unique_ptr<ReadBufferFromFileBase> impl_);
bool supportsRightBoundedReads() const override { return true; } bool supportsRightBoundedReads() const override { return true; }
@ -23,6 +23,8 @@ public:
off_t seek(off_t off, int whence) override; off_t seek(off_t off, int whence) override;
size_t getFileOffsetOfBufferEnd() const override { return file_offset_of_buffer_end; } size_t getFileOffsetOfBufferEnd() const override { return file_offset_of_buffer_end; }
String getFileName() const override { return impl->getFileName(); }
size_t getFileSize() override { return impl->getFileSize(); }
/// file_offset_of_buffer_end can differ from impl's file_offset_of_buffer_end /// file_offset_of_buffer_end can differ from impl's file_offset_of_buffer_end
/// because of resizing of the tail. => Need to also override getPosition() as /// because of resizing of the tail. => Need to also override getPosition() as
@ -30,6 +32,8 @@ public:
off_t getPosition() override; off_t getPosition() override;
private: private:
std::unique_ptr<ReadBufferFromFileBase> impl;
std::optional<size_t> read_until_position; std::optional<size_t> read_until_position;
/// atomic because can be used in log or exception messages while being updated. /// atomic because can be used in log or exception messages while being updated.
std::atomic<size_t> file_offset_of_buffer_end = 0; std::atomic<size_t> file_offset_of_buffer_end = 0;

View File

@ -18,7 +18,6 @@ public:
/// Returns adjusted position, i.e. returns `3` if the position in the nested buffer is `start_offset + 3`. /// Returns adjusted position, i.e. returns `3` if the position in the nested buffer is `start_offset + 3`.
off_t getPosition() override; off_t getPosition() override;
off_t seek(off_t off, int whence) override; off_t seek(off_t off, int whence) override;
private: private:

View File

@ -92,6 +92,11 @@ size_t MMapReadBufferFromFileDescriptor::getFileSize()
return getSizeFromFileDescriptor(getFD(), getFileName()); return getSizeFromFileDescriptor(getFD(), getFileName());
} }
size_t MMapReadBufferFromFileDescriptor::getFileOffsetOfBufferEnd() const
{
return mapped.getOffset() + mapped.getLength();
}
size_t MMapReadBufferFromFileDescriptor::readBigAt(char * to, size_t n, size_t offset, const std::function<bool(size_t)> &) size_t MMapReadBufferFromFileDescriptor::readBigAt(char * to, size_t n, size_t offset, const std::function<bool(size_t)> &)
{ {
if (offset >= mapped.getLength()) if (offset >= mapped.getLength())

View File

@ -36,6 +36,8 @@ public:
std::string getFileName() const override; std::string getFileName() const override;
size_t getFileOffsetOfBufferEnd() const override;
int getFD() const; int getFD() const;
size_t getFileSize() override; size_t getFileSize() override;

View File

@ -76,4 +76,9 @@ off_t MMapReadBufferFromFileWithCache::seek(off_t offset, int whence)
return new_pos; return new_pos;
} }
size_t MMapReadBufferFromFileWithCache::getFileOffsetOfBufferEnd() const
{
return mapped->getOffset() + mapped->getLength();
}
} }

View File

@ -19,7 +19,7 @@ public:
off_t getPosition() override; off_t getPosition() override;
std::string getFileName() const override; std::string getFileName() const override;
off_t seek(off_t offset, int whence) override; off_t seek(off_t offset, int whence) override;
size_t getFileOffsetOfBufferEnd() const override;
bool isRegularLocalFile(size_t * /* out_view_offset */) override { return true; } bool isRegularLocalFile(size_t * /* out_view_offset */) override { return true; }
private: private:

View File

@ -14,12 +14,18 @@ namespace DB
/// - ThreadPoolReader /// - ThreadPoolReader
class ReadBufferFromEmptyFile : public ReadBufferFromFileBase class ReadBufferFromEmptyFile : public ReadBufferFromFileBase
{ {
public:
explicit ReadBufferFromEmptyFile(const String & file_name_) : file_name(file_name_) {}
private: private:
String file_name;
bool nextImpl() override { return false; } bool nextImpl() override { return false; }
std::string getFileName() const override { return "<empty>"; } std::string getFileName() const override { return file_name; }
off_t seek(off_t /*off*/, int /*whence*/) override { return 0; } off_t seek(off_t /*off*/, int /*whence*/) override { return 0; }
off_t getPosition() override { return 0; } off_t getPosition() override { return 0; }
size_t getFileSize() override { return 0; } size_t getFileSize() override { return 0; }
size_t getFileOffsetOfBufferEnd() const override { return 0; }
}; };
} }

View File

@ -101,6 +101,18 @@ bool ReadBufferFromEncryptedFile::nextImpl()
return true; return true;
} }
size_t ReadBufferFromEncryptedFile::getFileSize()
{
size_t size = in->getFileSize();
return size > FileEncryption::Header::kSize ? size - FileEncryption::Header::kSize : size;
}
size_t ReadBufferFromEncryptedFile::getFileOffsetOfBufferEnd() const
{
size_t file_offset = in->getFileOffsetOfBufferEnd();
return file_offset > FileEncryption::Header::kSize ? file_offset - FileEncryption::Header::kSize : file_offset;
}
} }
#endif #endif

View File

@ -27,10 +27,10 @@ public:
std::string getFileName() const override { return in->getFileName(); } std::string getFileName() const override { return in->getFileName(); }
void setReadUntilPosition(size_t position) override { in->setReadUntilPosition(position + FileEncryption::Header::kSize); } void setReadUntilPosition(size_t position) override { in->setReadUntilPosition(position + FileEncryption::Header::kSize); }
void setReadUntilEnd() override { in->setReadUntilEnd(); } void setReadUntilEnd() override { in->setReadUntilEnd(); }
size_t getFileSize() override { return in->getFileSize(); } size_t getFileSize() override;
size_t getFileOffsetOfBufferEnd() const override;
private: private:
bool nextImpl() override; bool nextImpl() override;

View File

@ -60,6 +60,12 @@ public:
/// file offset and what getPosition() returns. /// file offset and what getPosition() returns.
virtual bool isRegularLocalFile(size_t * /* out_view_offset */ = nullptr) { return false; } virtual bool isRegularLocalFile(size_t * /* out_view_offset */ = nullptr) { return false; }
/// NOTE: This method should be thread-safe against seek(), since it can be
/// used in CachedOnDiskReadBufferFromFile from multiple threads (because
/// it first releases the buffer, and then do logging, and so other thread
/// can already call seek() which will lead to data-race).
virtual size_t getFileOffsetOfBufferEnd() const = 0;
protected: protected:
std::optional<size_t> file_size; std::optional<size_t> file_size;
ProfileCallback profile_callback; ProfileCallback profile_callback;

View File

@ -1,60 +0,0 @@
#include <IO/ReadBufferFromFileDecorator.h>
namespace DB
{
ReadBufferFromFileDecorator::ReadBufferFromFileDecorator(std::unique_ptr<SeekableReadBuffer> impl_)
: ReadBufferFromFileDecorator(std::move(impl_), "")
{
}
ReadBufferFromFileDecorator::ReadBufferFromFileDecorator(std::unique_ptr<SeekableReadBuffer> impl_, const String & file_name_)
: impl(std::move(impl_)), file_name(file_name_)
{
swap(*impl);
}
std::string ReadBufferFromFileDecorator::getFileName() const
{
if (!file_name.empty())
return file_name;
return getFileNameFromReadBuffer(*impl);
}
off_t ReadBufferFromFileDecorator::getPosition()
{
swap(*impl);
auto position = impl->getPosition();
swap(*impl);
return position;
}
off_t ReadBufferFromFileDecorator::seek(off_t off, int whence)
{
swap(*impl);
auto result = impl->seek(off, whence);
swap(*impl);
return result;
}
bool ReadBufferFromFileDecorator::nextImpl()
{
swap(*impl);
auto result = impl->next();
swap(*impl);
return result;
}
size_t ReadBufferFromFileDecorator::getFileSize()
{
return getFileSizeFromReadBuffer(*impl);
}
}

View File

@ -1,37 +0,0 @@
#pragma once
#include <IO/ReadBufferFromFileBase.h>
namespace DB
{
/// Delegates all reads to underlying buffer. Doesn't have own memory.
class ReadBufferFromFileDecorator : public ReadBufferFromFileBase
{
public:
explicit ReadBufferFromFileDecorator(std::unique_ptr<SeekableReadBuffer> impl_);
ReadBufferFromFileDecorator(std::unique_ptr<SeekableReadBuffer> impl_, const String & file_name_);
std::string getFileName() const override;
off_t getPosition() override;
off_t seek(off_t off, int whence) override;
bool nextImpl() override;
bool isWithFileSize() const { return dynamic_cast<const WithFileSize *>(impl.get()) != nullptr; }
const ReadBuffer & getWrappedReadBuffer() const { return *impl; }
ReadBuffer & getWrappedReadBuffer() { return *impl; }
size_t getFileSize() override;
protected:
std::unique_ptr<SeekableReadBuffer> impl;
String file_name;
};
}

View File

@ -20,7 +20,6 @@ public:
: SeekableReadBuffer(const_cast<char *>(str.data()), str.size(), 0) {} : SeekableReadBuffer(const_cast<char *>(str.data()), str.size(), 0) {}
off_t seek(off_t off, int whence) override; off_t seek(off_t off, int whence) override;
off_t getPosition() override; off_t getPosition() override;
}; };

View File

@ -44,12 +44,6 @@ public:
virtual String getInfoForLog() { return ""; } virtual String getInfoForLog() { return ""; }
/// NOTE: This method should be thread-safe against seek(), since it can be
/// used in CachedOnDiskReadBufferFromFile from multiple threads (because
/// it first releases the buffer, and then do logging, and so other thread
/// can already call seek() which will lead to data-race).
virtual size_t getFileOffsetOfBufferEnd() const { throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method getFileOffsetOfBufferEnd() not implemented"); }
/// If true, setReadUntilPosition() guarantees that eof will be reported at the given position. /// If true, setReadUntilPosition() guarantees that eof will be reported at the given position.
virtual bool supportsRightBoundedReads() const { return false; } virtual bool supportsRightBoundedReads() const { return false; }

View File

@ -2,7 +2,6 @@
#include <IO/ReadBufferFromFile.h> #include <IO/ReadBufferFromFile.h>
#include <IO/CompressedReadBufferWrapper.h> #include <IO/CompressedReadBufferWrapper.h>
#include <IO/ParallelReadBuffer.h> #include <IO/ParallelReadBuffer.h>
#include <IO/ReadBufferFromFileDecorator.h>
#include <IO/PeekableReadBuffer.h> #include <IO/PeekableReadBuffer.h>
namespace DB namespace DB
@ -17,23 +16,15 @@ template <typename T>
static size_t getFileSize(T & in) static size_t getFileSize(T & in)
{ {
if (auto * with_file_size = dynamic_cast<WithFileSize *>(&in)) if (auto * with_file_size = dynamic_cast<WithFileSize *>(&in))
{
return with_file_size->getFileSize(); return with_file_size->getFileSize();
}
throw Exception(ErrorCodes::UNKNOWN_FILE_SIZE, "Cannot find out file size"); throw Exception(ErrorCodes::UNKNOWN_FILE_SIZE, "Cannot find out file size");
} }
size_t getFileSizeFromReadBuffer(ReadBuffer & in) size_t getFileSizeFromReadBuffer(ReadBuffer & in)
{ {
if (auto * delegate = dynamic_cast<ReadBufferFromFileDecorator *>(&in)) if (auto * compressed = dynamic_cast<CompressedReadBufferWrapper *>(&in))
{
return getFileSize(delegate->getWrappedReadBuffer());
}
else if (auto * compressed = dynamic_cast<CompressedReadBufferWrapper *>(&in))
{
return getFileSize(compressed->getWrappedReadBuffer()); return getFileSize(compressed->getWrappedReadBuffer());
}
return getFileSize(in); return getFileSize(in);
} }
@ -52,11 +43,7 @@ std::optional<size_t> tryGetFileSizeFromReadBuffer(ReadBuffer & in)
bool isBufferWithFileSize(const ReadBuffer & in) bool isBufferWithFileSize(const ReadBuffer & in)
{ {
if (const auto * delegate = dynamic_cast<const ReadBufferFromFileDecorator *>(&in)) if (const auto * compressed = dynamic_cast<const CompressedReadBufferWrapper *>(&in))
{
return delegate->isWithFileSize();
}
else if (const auto * compressed = dynamic_cast<const CompressedReadBufferWrapper *>(&in))
{ {
return isBufferWithFileSize(compressed->getWrappedReadBuffer()); return isBufferWithFileSize(compressed->getWrappedReadBuffer());
} }
@ -66,11 +53,7 @@ bool isBufferWithFileSize(const ReadBuffer & in)
size_t getDataOffsetMaybeCompressed(const ReadBuffer & in) size_t getDataOffsetMaybeCompressed(const ReadBuffer & in)
{ {
if (const auto * delegate = dynamic_cast<const ReadBufferFromFileDecorator *>(&in)) if (const auto * compressed = dynamic_cast<const CompressedReadBufferWrapper *>(&in))
{
return getDataOffsetMaybeCompressed(delegate->getWrappedReadBuffer());
}
else if (const auto * compressed = dynamic_cast<const CompressedReadBufferWrapper *>(&in))
{ {
return getDataOffsetMaybeCompressed(compressed->getWrappedReadBuffer()); return getDataOffsetMaybeCompressed(compressed->getWrappedReadBuffer());
} }

View File

@ -178,6 +178,11 @@ void WriteBufferFromS3::preFinalize()
void WriteBufferFromS3::finalizeImpl() void WriteBufferFromS3::finalizeImpl()
{ {
OpenTelemetry::SpanHolder span("WriteBufferFromS3::finalizeImpl");
span.addAttribute("clickhouse.s3_bucket", bucket);
span.addAttribute("clickhouse.s3_key", key);
span.addAttribute("clickhouse.total_size", total_size);
LOG_TRACE(limitedLog, "finalizeImpl WriteBufferFromS3. {}.", getShortLogDetails()); LOG_TRACE(limitedLog, "finalizeImpl WriteBufferFromS3. {}.", getShortLogDetails());
if (!is_prefinalized) if (!is_prefinalized)
@ -188,6 +193,8 @@ void WriteBufferFromS3::finalizeImpl()
task_tracker->waitAll(); task_tracker->waitAll();
span.addAttributeIfNotZero("clickhouse.multipart_upload_parts", multipart_tags.size());
if (!multipart_upload_id.empty()) if (!multipart_upload_id.empty())
{ {
completeMultipartUpload(); completeMultipartUpload();

View File

@ -424,7 +424,9 @@ void FileSegment::write(const char * from, size_t size, size_t offset)
FileSegment::State FileSegment::wait(size_t offset) FileSegment::State FileSegment::wait(size_t offset)
{ {
OpenTelemetry::SpanHolder span{fmt::format("FileSegment::wait({})", key().toString())}; OpenTelemetry::SpanHolder span("FileSegment::wait");
span.addAttribute("clickhouse.key", key().toString());
span.addAttribute("clickhouse.offset", offset);
auto lock = lockFileSegment(); auto lock = lockFileSegment();

View File

@ -322,7 +322,6 @@ void executeQuery(
void executeQueryWithParallelReplicas( void executeQueryWithParallelReplicas(
QueryPlan & query_plan, QueryPlan & query_plan,
const StorageID & main_table,
SelectStreamFactory & stream_factory, SelectStreamFactory & stream_factory,
const ASTPtr & query_ast, const ASTPtr & query_ast,
ContextPtr context, ContextPtr context,
@ -414,7 +413,6 @@ void executeQueryWithParallelReplicas(
std::move(coordinator), std::move(coordinator),
stream_factory.header, stream_factory.header,
stream_factory.processed_stage, stream_factory.processed_stage,
main_table,
new_context, new_context,
getThrottler(new_context), getThrottler(new_context),
std::move(scalars), std::move(scalars),

View File

@ -70,7 +70,6 @@ void executeQuery(
void executeQueryWithParallelReplicas( void executeQueryWithParallelReplicas(
QueryPlan & query_plan, QueryPlan & query_plan,
const StorageID & main_table,
SelectStreamFactory & stream_factory, SelectStreamFactory & stream_factory,
const ASTPtr & query_ast, const ASTPtr & query_ast,
ContextPtr context, ContextPtr context,

View File

@ -290,6 +290,7 @@ struct ContextSharedPart : boost::noncopyable
mutable QueryCachePtr query_cache TSA_GUARDED_BY(mutex); /// Cache of query results. mutable QueryCachePtr query_cache TSA_GUARDED_BY(mutex); /// Cache of query results.
mutable MarkCachePtr index_mark_cache TSA_GUARDED_BY(mutex); /// Cache of marks in compressed files of MergeTree indices. mutable MarkCachePtr index_mark_cache TSA_GUARDED_BY(mutex); /// Cache of marks in compressed files of MergeTree indices.
mutable MMappedFileCachePtr mmap_cache TSA_GUARDED_BY(mutex); /// Cache of mmapped files to avoid frequent open/map/unmap/close and to reuse from several threads. mutable MMappedFileCachePtr mmap_cache TSA_GUARDED_BY(mutex); /// Cache of mmapped files to avoid frequent open/map/unmap/close and to reuse from several threads.
AsynchronousMetrics * asynchronous_metrics TSA_GUARDED_BY(mutex) = nullptr; /// Points to asynchronous metrics
ProcessList process_list; /// Executing queries at the moment. ProcessList process_list; /// Executing queries at the moment.
SessionTracker session_tracker; SessionTracker session_tracker;
GlobalOvercommitTracker global_overcommit_tracker; GlobalOvercommitTracker global_overcommit_tracker;
@ -1644,6 +1645,11 @@ void Context::addQueryAccessInfo(const QualifiedProjectionName & qualified_proje
"{}.{}", qualified_projection_name.storage_id.getFullTableName(), backQuoteIfNeed(qualified_projection_name.projection_name))); "{}.{}", qualified_projection_name.storage_id.getFullTableName(), backQuoteIfNeed(qualified_projection_name.projection_name)));
} }
Context::QueryFactoriesInfo Context::getQueryFactoriesInfo() const
{
return query_factories_info;
}
void Context::addQueryFactoriesInfo(QueryLogFactories factory_type, const String & created_object) const void Context::addQueryFactoriesInfo(QueryLogFactories factory_type, const String & created_object) const
{ {
if (isGlobalContext()) if (isGlobalContext())
@ -2860,6 +2866,18 @@ void Context::clearCaches() const
/// Intentionally not clearing the query cache which is transactionally inconsistent by design. /// Intentionally not clearing the query cache which is transactionally inconsistent by design.
} }
void Context::setAsynchronousMetrics(AsynchronousMetrics * asynchronous_metrics_)
{
std::lock_guard lock(shared->mutex);
shared->asynchronous_metrics = asynchronous_metrics_;
}
AsynchronousMetrics * Context::getAsynchronousMetrics() const
{
SharedLockGuard lock(shared->mutex);
return shared->asynchronous_metrics;
}
ThreadPool & Context::getPrefetchThreadpool() const ThreadPool & Context::getPrefetchThreadpool() const
{ {
callOnce(shared->prefetch_threadpool_initialized, [&] { callOnce(shared->prefetch_threadpool_initialized, [&] {

View File

@ -70,6 +70,7 @@ class IUserDefinedSQLObjectsStorage;
class InterserverCredentials; class InterserverCredentials;
using InterserverCredentialsPtr = std::shared_ptr<const InterserverCredentials>; using InterserverCredentialsPtr = std::shared_ptr<const InterserverCredentials>;
class InterserverIOHandler; class InterserverIOHandler;
class AsynchronousMetrics;
class BackgroundSchedulePool; class BackgroundSchedulePool;
class MergeList; class MergeList;
class MovesList; class MovesList;
@ -373,25 +374,6 @@ protected:
QueryFactoriesInfo(QueryFactoriesInfo && rhs) = delete; QueryFactoriesInfo(QueryFactoriesInfo && rhs) = delete;
QueryFactoriesInfo & operator=(QueryFactoriesInfo rhs)
{
swap(rhs);
return *this;
}
void swap(QueryFactoriesInfo & rhs)
{
std::swap(aggregate_functions, rhs.aggregate_functions);
std::swap(aggregate_function_combinators, rhs.aggregate_function_combinators);
std::swap(database_engines, rhs.database_engines);
std::swap(data_type_families, rhs.data_type_families);
std::swap(dictionaries, rhs.dictionaries);
std::swap(formats, rhs.formats);
std::swap(functions, rhs.functions);
std::swap(storages, rhs.storages);
std::swap(table_functions, rhs.table_functions);
}
std::unordered_set<std::string> aggregate_functions; std::unordered_set<std::string> aggregate_functions;
std::unordered_set<std::string> aggregate_function_combinators; std::unordered_set<std::string> aggregate_function_combinators;
std::unordered_set<std::string> database_engines; std::unordered_set<std::string> database_engines;
@ -727,7 +709,7 @@ public:
TableFunction TableFunction
}; };
const QueryFactoriesInfo & getQueryFactoriesInfo() const { return query_factories_info; } QueryFactoriesInfo getQueryFactoriesInfo() const;
void addQueryFactoriesInfo(QueryLogFactories factory_type, const String & created_object) const; void addQueryFactoriesInfo(QueryLogFactories factory_type, const String & created_object) const;
/// For table functions s3/file/url/hdfs/input we can use structure from /// For table functions s3/file/url/hdfs/input we can use structure from
@ -1014,6 +996,9 @@ public:
/// ----------------------------------------------------------------------------------------------------- /// -----------------------------------------------------------------------------------------------------
void setAsynchronousMetrics(AsynchronousMetrics * asynchronous_metrics_);
AsynchronousMetrics * getAsynchronousMetrics() const;
ThreadPool & getPrefetchThreadpool() const; ThreadPool & getPrefetchThreadpool() const;
/// Note: prefetchThreadpool is different from threadpoolReader /// Note: prefetchThreadpool is different from threadpoolReader

View File

@ -11,6 +11,8 @@
#include <Common/atomicRename.h> #include <Common/atomicRename.h>
#include <Common/PoolId.h> #include <Common/PoolId.h>
#include <Common/logger_useful.h> #include <Common/logger_useful.h>
#include <Parsers/ASTSetQuery.h>
#include <Storages/MergeTree/MergeTreeSettings.h>
#include <base/hex.h> #include <base/hex.h>
#include <Core/Defines.h> #include <Core/Defines.h>
@ -463,6 +465,14 @@ ASTPtr InterpreterCreateQuery::formatColumns(const ColumnsDescription & columns)
column_declaration->children.push_back(column_declaration->ttl); column_declaration->children.push_back(column_declaration->ttl);
} }
if (!column.settings.empty())
{
auto settings = std::make_shared<ASTSetQuery>();
settings->is_standalone = false;
settings->changes = column.settings;
column_declaration->settings = std::move(settings);
}
columns_list->children.push_back(column_declaration_ptr); columns_list->children.push_back(column_declaration_ptr);
} }
@ -670,6 +680,12 @@ ColumnsDescription InterpreterCreateQuery::getColumnsDescription(
if (col_decl.ttl) if (col_decl.ttl)
column.ttl = col_decl.ttl; column.ttl = col_decl.ttl;
if (col_decl.settings)
{
column.settings = col_decl.settings->as<ASTSetQuery &>().changes;
MergeTreeColumnSettings::validate(column.settings);
}
res.add(std::move(column)); res.add(std::move(column));
} }

View File

@ -561,6 +561,14 @@ BlockIO InterpreterSystemQuery::execute()
getContext()->checkAccess(AccessType::SYSTEM_RELOAD_USERS); getContext()->checkAccess(AccessType::SYSTEM_RELOAD_USERS);
system_context->getAccessControl().reload(AccessControl::ReloadMode::ALL); system_context->getAccessControl().reload(AccessControl::ReloadMode::ALL);
break; break;
case Type::RELOAD_ASYNCHRONOUS_METRICS:
{
getContext()->checkAccess(AccessType::SYSTEM_RELOAD_ASYNCHRONOUS_METRICS);
auto * asynchronous_metrics = system_context->getAsynchronousMetrics();
if (asynchronous_metrics)
asynchronous_metrics->update(std::chrono::system_clock::now(), /*force_update*/ true);
break;
}
case Type::STOP_MERGES: case Type::STOP_MERGES:
startStopAction(ActionLocks::PartsMerge, false); startStopAction(ActionLocks::PartsMerge, false);
break; break;
@ -1225,6 +1233,11 @@ AccessRightsElements InterpreterSystemQuery::getRequiredAccessForDDLOnCluster()
required_access.emplace_back(AccessType::SYSTEM_RELOAD_USERS); required_access.emplace_back(AccessType::SYSTEM_RELOAD_USERS);
break; break;
} }
case Type::RELOAD_ASYNCHRONOUS_METRICS:
{
required_access.emplace_back(AccessType::SYSTEM_RELOAD_ASYNCHRONOUS_METRICS);
break;
}
case Type::STOP_MERGES: case Type::STOP_MERGES:
case Type::START_MERGES: case Type::START_MERGES:
{ {

View File

@ -1,17 +1,13 @@
#pragma once #pragma once
#include <AggregateFunctions/AggregateFunctionFactory.h> #include <AggregateFunctions/AggregateFunctionFactory.h>
#include <Core/Range.h>
#include <DataTypes/DataTypeFactory.h> #include <DataTypes/DataTypeFactory.h>
#include <DataTypes/FieldToDataType.h>
#include <Functions/FunctionFactory.h> #include <Functions/FunctionFactory.h>
#include <IO/WriteHelpers.h> #include <IO/WriteHelpers.h>
#include <Interpreters/IdentifierSemantic.h>
#include <Interpreters/InDepthNodeVisitor.h> #include <Interpreters/InDepthNodeVisitor.h>
#include <Interpreters/applyFunction.h> #include <Interpreters/IdentifierSemantic.h>
#include <Parsers/ASTFunction.h> #include <Parsers/ASTFunction.h>
#include <Parsers/ASTIdentifier.h> #include <Parsers/ASTIdentifier.h>
#include <Parsers/ASTLiteral.h>
#include <Parsers/ASTOrderByElement.h> #include <Parsers/ASTOrderByElement.h>
#include <Parsers/ASTTablesInSelectQuery.h> #include <Parsers/ASTTablesInSelectQuery.h>
#include <Parsers/IAST.h> #include <Parsers/IAST.h>
@ -37,8 +33,6 @@ public:
ASTIdentifier * identifier = nullptr; ASTIdentifier * identifier = nullptr;
DataTypePtr arg_data_type = {}; DataTypePtr arg_data_type = {};
Range range = Range::createWholeUniverse();
void reject() { monotonicity.is_monotonic = false; } void reject() { monotonicity.is_monotonic = false; }
bool isRejected() const { return !monotonicity.is_monotonic; } bool isRejected() const { return !monotonicity.is_monotonic; }
@ -103,30 +97,13 @@ public:
if (data.isRejected()) if (data.isRejected())
return; return;
/// Monotonicity check only works for functions that contain at most two arguments and one of them must be a constant. /// TODO: monotonicity for functions of several arguments
if (!ast_function.arguments) if (!ast_function.arguments || ast_function.arguments->children.size() != 1)
{ {
data.reject(); data.reject();
return; return;
} }
auto arguments_size = ast_function.arguments->children.size();
if (arguments_size == 0 || arguments_size > 2)
{
data.reject();
return;
}
else if (arguments_size == 2)
{
/// If the function has two arguments, then one of them must be a constant.
if (!ast_function.arguments->children[0]->as<ASTLiteral>() && !ast_function.arguments->children[1]->as<ASTLiteral>())
{
data.reject();
return;
}
}
if (!data.canOptimize(ast_function)) if (!data.canOptimize(ast_function))
{ {
data.reject(); data.reject();
@ -147,33 +124,14 @@ public:
return; return;
} }
auto function_arguments = getFunctionArguments(ast_function, data); ColumnsWithTypeAndName args;
args.emplace_back(data.arg_data_type, "tmp");
auto function_base = function->build(function_arguments); auto function_base = function->build(args);
if (function_base && function_base->hasInformationAboutMonotonicity()) if (function_base && function_base->hasInformationAboutMonotonicity())
{ {
bool is_positive = data.monotonicity.is_positive; bool is_positive = data.monotonicity.is_positive;
data.monotonicity = function_base->getMonotonicityForRange(*data.arg_data_type, data.range.left, data.range.right); data.monotonicity = function_base->getMonotonicityForRange(*data.arg_data_type, Field(), Field());
auto & key_range = data.range;
/// If we apply function to open interval, we can get empty intervals in result.
/// E.g. for ('2020-01-03', '2020-01-20') after applying 'toYYYYMM' we will get ('202001', '202001').
/// To avoid this we make range left and right included.
/// Any function that treats NULL specially is not monotonic.
/// Thus we can safely use isNull() as an -Inf/+Inf indicator here.
if (!key_range.left.isNull())
{
key_range.left = applyFunction(function_base, data.arg_data_type, key_range.left);
key_range.left_included = true;
}
if (!key_range.right.isNull())
{
key_range.right = applyFunction(function_base, data.arg_data_type, key_range.right);
key_range.right_included = true;
}
if (!is_positive) if (!is_positive)
data.monotonicity.is_positive = !data.monotonicity.is_positive; data.monotonicity.is_positive = !data.monotonicity.is_positive;
@ -185,53 +143,13 @@ public:
static bool needChildVisit(const ASTPtr & parent, const ASTPtr &) static bool needChildVisit(const ASTPtr & parent, const ASTPtr &)
{ {
/// Multi-argument functions with all but one constant arguments can be monotonic. /// Currently we check monotonicity only for single-argument functions.
/// Although, multi-argument functions with all but one constant arguments can also be monotonic.
if (const auto * func = typeid_cast<const ASTFunction *>(parent.get())) if (const auto * func = typeid_cast<const ASTFunction *>(parent.get()))
return func->arguments->children.size() <= 2; return func->arguments->children.size() < 2;
return true; return true;
} }
static ColumnWithTypeAndName extractLiteralColumnAndTypeFromAstLiteral(const ASTLiteral * literal)
{
ColumnWithTypeAndName result;
result.type = applyVisitor(FieldToDataType(), literal->value);
result.column = result.type->createColumnConst(0, literal->value);
return result;
}
static ColumnsWithTypeAndName getFunctionArguments(const ASTFunction & ast_function, const Data & data)
{
ColumnsWithTypeAndName args;
auto arguments_size = ast_function.arguments->children.size();
chassert(arguments_size == 1 || arguments_size == 2);
if (arguments_size == 2)
{
if (ast_function.arguments->children[0]->as<ASTLiteral>())
{
const auto * literal = ast_function.arguments->children[0]->as<ASTLiteral>();
args.push_back(extractLiteralColumnAndTypeFromAstLiteral(literal));
args.emplace_back(data.arg_data_type, "tmp");
}
else
{
const auto * literal = ast_function.arguments->children[1]->as<ASTLiteral>();
args.emplace_back(data.arg_data_type, "tmp");
args.push_back(extractLiteralColumnAndTypeFromAstLiteral(literal));
}
}
else
{
args.emplace_back(data.arg_data_type, "tmp");
}
return args;
}
}; };
using MonotonicityCheckVisitor = ConstInDepthNodeVisitor<MonotonicityCheckMatcher, false>; using MonotonicityCheckVisitor = ConstInDepthNodeVisitor<MonotonicityCheckMatcher, false>;

View File

@ -69,7 +69,7 @@ ServerAsynchronousMetrics::~ServerAsynchronousMetrics()
stop(); stop();
} }
void ServerAsynchronousMetrics::updateImpl(AsynchronousMetricValues & new_values, TimePoint update_time, TimePoint current_time) void ServerAsynchronousMetrics::updateImpl(TimePoint update_time, TimePoint current_time, bool force_update, bool first_run, AsynchronousMetricValues & new_values)
{ {
if (auto mark_cache = getContext()->getMarkCache()) if (auto mark_cache = getContext()->getMarkCache())
{ {
@ -377,7 +377,7 @@ void ServerAsynchronousMetrics::updateImpl(AsynchronousMetricValues & new_values
} }
#endif #endif
updateHeavyMetricsIfNeeded(current_time, update_time, new_values); updateHeavyMetricsIfNeeded(current_time, update_time, force_update, first_run, new_values);
} }
void ServerAsynchronousMetrics::logImpl(AsynchronousMetricValues & new_values) void ServerAsynchronousMetrics::logImpl(AsynchronousMetricValues & new_values)
@ -421,19 +421,19 @@ void ServerAsynchronousMetrics::updateDetachedPartsStats()
detached_parts_stats = current_values; detached_parts_stats = current_values;
} }
void ServerAsynchronousMetrics::updateHeavyMetricsIfNeeded(TimePoint current_time, TimePoint update_time, AsynchronousMetricValues & new_values) void ServerAsynchronousMetrics::updateHeavyMetricsIfNeeded(TimePoint current_time, TimePoint update_time, bool force_update, bool first_run, AsynchronousMetricValues & new_values)
{ {
const auto time_after_previous_update = current_time - heavy_metric_previous_update_time; const auto time_since_previous_update = current_time - heavy_metric_previous_update_time;
const bool update_heavy_metric = time_after_previous_update >= heavy_metric_update_period || first_run; const bool update_heavy_metrics = (time_since_previous_update >= heavy_metric_update_period) || force_update || first_run;
Stopwatch watch; Stopwatch watch;
if (update_heavy_metric) if (update_heavy_metrics)
{ {
heavy_metric_previous_update_time = update_time; heavy_metric_previous_update_time = update_time;
if (first_run) if (first_run)
heavy_update_interval = heavy_metric_update_period.count(); heavy_update_interval = heavy_metric_update_period.count();
else else
heavy_update_interval = std::chrono::duration_cast<std::chrono::microseconds>(time_after_previous_update).count() / 1e6; heavy_update_interval = std::chrono::duration_cast<std::chrono::microseconds>(time_since_previous_update).count() / 1e6;
/// Test shows that listing 100000 entries consuming around 0.15 sec. /// Test shows that listing 100000 entries consuming around 0.15 sec.
updateDetachedPartsStats(); updateDetachedPartsStats();

View File

@ -18,7 +18,7 @@ public:
~ServerAsynchronousMetrics() override; ~ServerAsynchronousMetrics() override;
private: private:
void updateImpl(AsynchronousMetricValues & new_values, TimePoint update_time, TimePoint current_time) override; void updateImpl(TimePoint update_time, TimePoint current_time, bool force_update, bool first_run, AsynchronousMetricValues & new_values) override;
void logImpl(AsynchronousMetricValues & new_values) override; void logImpl(AsynchronousMetricValues & new_values) override;
const Duration heavy_metric_update_period; const Duration heavy_metric_update_period;
@ -34,7 +34,7 @@ private:
DetachedPartsStats detached_parts_stats{}; DetachedPartsStats detached_parts_stats{};
void updateDetachedPartsStats(); void updateDetachedPartsStats();
void updateHeavyMetricsIfNeeded(TimePoint current_time, TimePoint update_time, AsynchronousMetricValues & new_values); void updateHeavyMetricsIfNeeded(TimePoint current_time, TimePoint update_time, bool force_update, bool first_run, AsynchronousMetricValues & new_values);
}; };
} }

View File

@ -752,7 +752,8 @@ void TreeOptimizer::apply(ASTPtr & query, TreeRewriterResult & result,
} }
/// Rewrite sum(column +/- literal) function with sum(column) +/- literal * count(column). /// Rewrite sum(column +/- literal) function with sum(column) +/- literal * count(column).
rewriteSumFunctionWithSumAndCount(query, tables_with_columns); if (settings.optimize_arithmetic_operations_in_aggregate_functions)
rewriteSumFunctionWithSumAndCount(query, tables_with_columns);
/// Rewrite date filters to avoid the calls of converters such as toYear, toYYYYMM, etc. /// Rewrite date filters to avoid the calls of converters such as toYear, toYYYYMM, etc.
optimizeDateFilters(select_query, tables_with_columns, context); optimizeDateFilters(select_query, tables_with_columns, context);

View File

@ -1,43 +0,0 @@
#include <Interpreters/applyFunction.h>
#include <Core/Range.h>
#include <Functions/IFunction.h>
namespace DB
{
static Field applyFunctionForField(const FunctionBasePtr & func, const DataTypePtr & arg_type, const Field & arg_value)
{
ColumnsWithTypeAndName columns{
{arg_type->createColumnConst(1, arg_value), arg_type, "x"},
};
auto col = func->execute(columns, func->getResultType(), 1);
return (*col)[0];
}
FieldRef applyFunction(const FunctionBasePtr & func, const DataTypePtr & current_type, const FieldRef & field)
{
/// Fallback for fields without block reference.
if (field.isExplicit())
return applyFunctionForField(func, current_type, field);
String result_name = "_" + func->getName() + "_" + toString(field.column_idx);
const auto & columns = field.columns;
size_t result_idx = columns->size();
for (size_t i = 0; i < result_idx; ++i)
if ((*columns)[i].name == result_name)
result_idx = i;
if (result_idx == columns->size())
{
ColumnsWithTypeAndName args{(*columns)[field.column_idx]};
field.columns->emplace_back(ColumnWithTypeAndName{nullptr, func->getResultType(), result_name});
(*columns)[result_idx].column = func->execute(args, (*columns)[result_idx].type, columns->front().column->size());
}
return {field.columns, field.row_idx, result_idx};
}
}

View File

@ -1,16 +0,0 @@
#pragma once
#include <memory>
namespace DB
{
struct FieldRef;
class IFunctionBase;
class IDataType;
using DataTypePtr = std::shared_ptr<const IDataType>;
using FunctionBasePtr = std::shared_ptr<const IFunctionBase>;
FieldRef applyFunction(const FunctionBasePtr & func, const DataTypePtr & current_type, const FieldRef & field);
}

View File

@ -257,7 +257,7 @@ addStatusInfoToQueryLogElement(QueryLogElement & element, const QueryStatusInfo
element.query_projections.insert(access_info.projections.begin(), access_info.projections.end()); element.query_projections.insert(access_info.projections.begin(), access_info.projections.end());
element.query_views.insert(access_info.views.begin(), access_info.views.end()); element.query_views.insert(access_info.views.begin(), access_info.views.end());
const auto & factories_info = context_ptr->getQueryFactoriesInfo(); const auto factories_info = context_ptr->getQueryFactoriesInfo();
element.used_aggregate_functions = factories_info.aggregate_functions; element.used_aggregate_functions = factories_info.aggregate_functions;
element.used_aggregate_function_combinators = factories_info.aggregate_function_combinators; element.used_aggregate_function_combinators = factories_info.aggregate_function_combinators;
element.used_database_engines = factories_info.database_engines; element.used_database_engines = factories_info.database_engines;

View File

@ -57,70 +57,83 @@ ASTPtr ASTColumnDeclaration::clone() const
res->children.push_back(res->collation); res->children.push_back(res->collation);
} }
if (settings)
{
res->settings = settings->clone();
res->children.push_back(res->settings);
}
return res; return res;
} }
void ASTColumnDeclaration::formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const void ASTColumnDeclaration::formatImpl(const FormatSettings & format_settings, FormatState & state, FormatStateStacked frame) const
{ {
frame.need_parens = false; frame.need_parens = false;
/// We have to always backquote column names to avoid ambiguouty with INDEX and other declarations in CREATE query. /// We have to always backquote column names to avoid ambiguouty with INDEX and other declarations in CREATE query.
settings.ostr << backQuote(name); format_settings.ostr << backQuote(name);
if (type) if (type)
{ {
settings.ostr << ' '; format_settings.ostr << ' ';
FormatStateStacked type_frame = frame; FormatStateStacked type_frame = frame;
type_frame.indent = 0; type_frame.indent = 0;
type->formatImpl(settings, state, type_frame); type->formatImpl(format_settings, state, type_frame);
} }
if (null_modifier) if (null_modifier)
{ {
settings.ostr << ' ' << (settings.hilite ? hilite_keyword : "") format_settings.ostr << ' ' << (format_settings.hilite ? hilite_keyword : "")
<< (*null_modifier ? "" : "NOT ") << "NULL" << (settings.hilite ? hilite_none : ""); << (*null_modifier ? "" : "NOT ") << "NULL" << (format_settings.hilite ? hilite_none : "");
} }
if (default_expression) if (default_expression)
{ {
settings.ostr << ' ' << (settings.hilite ? hilite_keyword : "") << default_specifier << (settings.hilite ? hilite_none : ""); format_settings.ostr << ' ' << (format_settings.hilite ? hilite_keyword : "") << default_specifier << (format_settings.hilite ? hilite_none : "");
if (!ephemeral_default) if (!ephemeral_default)
{ {
settings.ostr << ' '; format_settings.ostr << ' ';
default_expression->formatImpl(settings, state, frame); default_expression->formatImpl(format_settings, state, frame);
} }
} }
if (comment) if (comment)
{ {
settings.ostr << ' ' << (settings.hilite ? hilite_keyword : "") << "COMMENT" << (settings.hilite ? hilite_none : "") << ' '; format_settings.ostr << ' ' << (format_settings.hilite ? hilite_keyword : "") << "COMMENT" << (format_settings.hilite ? hilite_none : "") << ' ';
comment->formatImpl(settings, state, frame); comment->formatImpl(format_settings, state, frame);
} }
if (codec) if (codec)
{ {
settings.ostr << ' '; format_settings.ostr << ' ';
codec->formatImpl(settings, state, frame); codec->formatImpl(format_settings, state, frame);
} }
if (stat_type) if (stat_type)
{ {
settings.ostr << ' '; format_settings.ostr << ' ';
stat_type->formatImpl(settings, state, frame); stat_type->formatImpl(format_settings, state, frame);
} }
if (ttl) if (ttl)
{ {
settings.ostr << ' ' << (settings.hilite ? hilite_keyword : "") << "TTL" << (settings.hilite ? hilite_none : "") << ' '; format_settings.ostr << ' ' << (format_settings.hilite ? hilite_keyword : "") << "TTL" << (format_settings.hilite ? hilite_none : "") << ' ';
ttl->formatImpl(settings, state, frame); ttl->formatImpl(format_settings, state, frame);
} }
if (collation) if (collation)
{ {
settings.ostr << ' ' << (settings.hilite ? hilite_keyword : "") << "COLLATE" << (settings.hilite ? hilite_none : "") << ' '; format_settings.ostr << ' ' << (format_settings.hilite ? hilite_keyword : "") << "COLLATE" << (format_settings.hilite ? hilite_none : "") << ' ';
collation->formatImpl(settings, state, frame); collation->formatImpl(format_settings, state, frame);
}
if (settings)
{
format_settings.ostr << ' ' << (format_settings.hilite ? hilite_keyword : "") << "SETTINGS" << (format_settings.hilite ? hilite_none : "") << ' ' << '(';
settings->formatImpl(format_settings, state, frame);
format_settings.ostr << ')';
} }
} }

View File

@ -22,12 +22,13 @@ public:
ASTPtr stat_type; ASTPtr stat_type;
ASTPtr ttl; ASTPtr ttl;
ASTPtr collation; ASTPtr collation;
ASTPtr settings;
bool primary_key_specifier = false; bool primary_key_specifier = false;
String getID(char delim) const override { return "ColumnDeclaration" + (delim + name); } String getID(char delim) const override { return "ColumnDeclaration" + (delim + name); }
ASTPtr clone() const override; ASTPtr clone() const override;
void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override; void formatImpl(const FormatSettings & format_settings, FormatState & state, FormatStateStacked frame) const override;
}; };
} }

View File

@ -67,6 +67,7 @@ public:
RELOAD_EMBEDDED_DICTIONARIES, RELOAD_EMBEDDED_DICTIONARIES,
RELOAD_CONFIG, RELOAD_CONFIG,
RELOAD_USERS, RELOAD_USERS,
RELOAD_ASYNCHRONOUS_METRICS,
RESTART_DISK, RESTART_DISK,
STOP_MERGES, STOP_MERGES,
START_MERGES, START_MERGES,

View File

@ -111,6 +111,7 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected
ParserKeyword s_comment("COMMENT"); ParserKeyword s_comment("COMMENT");
ParserKeyword s_codec("CODEC"); ParserKeyword s_codec("CODEC");
ParserKeyword s_ttl("TTL"); ParserKeyword s_ttl("TTL");
ParserKeyword s_settings("SETTINGS");
ParserKeyword s_remove_ttl("REMOVE TTL"); ParserKeyword s_remove_ttl("REMOVE TTL");
ParserKeyword s_remove_sample_by("REMOVE SAMPLE BY"); ParserKeyword s_remove_sample_by("REMOVE SAMPLE BY");
@ -725,9 +726,21 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected
command->remove_property = "CODEC"; command->remove_property = "CODEC";
else if (s_ttl.ignore(pos, expected)) else if (s_ttl.ignore(pos, expected))
command->remove_property = "TTL"; command->remove_property = "TTL";
else if (s_settings.ignore(pos, expected))
command->remove_property = "SETTINGS";
else else
return false; return false;
} }
else if (s_modify_setting.ignore(pos, expected))
{
if (!parser_settings.parse(pos, command_settings_changes, expected))
return false;
}
else if (s_reset_setting.ignore(pos, expected))
{
if (!parser_reset_setting.parse(pos, command_settings_resets, expected))
return false;
}
else else
{ {
if (s_first.ignore(pos, expected)) if (s_first.ignore(pos, expected))

View File

@ -10,6 +10,7 @@
#include <Parsers/ExpressionListParsers.h> #include <Parsers/ExpressionListParsers.h>
#include <Parsers/IParserBase.h> #include <Parsers/IParserBase.h>
#include <Parsers/ParserDataType.h> #include <Parsers/ParserDataType.h>
#include <Parsers/ParserSetQuery.h>
#include <Poco/String.h> #include <Poco/String.h>
namespace DB namespace DB
@ -120,8 +121,6 @@ using ParserCompoundColumnDeclaration = IParserColumnDeclaration<ParserCompoundI
template <typename NameParser> template <typename NameParser>
bool IParserColumnDeclaration<NameParser>::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) bool IParserColumnDeclaration<NameParser>::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
{ {
NameParser name_parser;
ParserDataType type_parser;
ParserKeyword s_default{"DEFAULT"}; ParserKeyword s_default{"DEFAULT"};
ParserKeyword s_null{"NULL"}; ParserKeyword s_null{"NULL"};
ParserKeyword s_not{"NOT"}; ParserKeyword s_not{"NOT"};
@ -134,9 +133,15 @@ bool IParserColumnDeclaration<NameParser>::parseImpl(Pos & pos, ASTPtr & node, E
ParserKeyword s_stat{"STATISTIC"}; ParserKeyword s_stat{"STATISTIC"};
ParserKeyword s_ttl{"TTL"}; ParserKeyword s_ttl{"TTL"};
ParserKeyword s_remove{"REMOVE"}; ParserKeyword s_remove{"REMOVE"};
ParserKeyword s_modify_setting("MODIFY SETTING");
ParserKeyword s_reset_setting("RESET SETTING");
ParserKeyword s_settings("SETTINGS");
ParserKeyword s_type{"TYPE"}; ParserKeyword s_type{"TYPE"};
ParserKeyword s_collate{"COLLATE"}; ParserKeyword s_collate{"COLLATE"};
ParserKeyword s_primary_key{"PRIMARY KEY"}; ParserKeyword s_primary_key{"PRIMARY KEY"};
NameParser name_parser;
ParserDataType type_parser;
ParserExpression expr_parser; ParserExpression expr_parser;
ParserStringLiteral string_literal_parser; ParserStringLiteral string_literal_parser;
ParserLiteral literal_parser; ParserLiteral literal_parser;
@ -144,6 +149,7 @@ bool IParserColumnDeclaration<NameParser>::parseImpl(Pos & pos, ASTPtr & node, E
ParserCollation collation_parser; ParserCollation collation_parser;
ParserStatisticType stat_type_parser; ParserStatisticType stat_type_parser;
ParserExpression expression_parser; ParserExpression expression_parser;
ParserSetQuery settings_parser(true);
/// mandatory column name /// mandatory column name
ASTPtr name; ASTPtr name;
@ -155,11 +161,12 @@ bool IParserColumnDeclaration<NameParser>::parseImpl(Pos & pos, ASTPtr & node, E
/// This keyword may occur only in MODIFY COLUMN query. We check it here /// This keyword may occur only in MODIFY COLUMN query. We check it here
/// because ParserDataType parses types as an arbitrary identifiers and /// because ParserDataType parses types as an arbitrary identifiers and
/// doesn't check that parsed string is existing data type. In this way /// doesn't check that parsed string is existing data type. In this way,
/// REMOVE keyword can be parsed as data type and further parsing will fail. /// REMOVE, MODIFY SETTING, or RESET SETTING can be parsed as data type
/// So we just check this keyword and in case of success return column /// and further parsing will fail. So we just check these keyword and in
/// declaration with name only. /// case of success return column declaration with name only.
if (!require_type && s_remove.checkWithoutMoving(pos, expected)) if (!require_type
&& (s_remove.checkWithoutMoving(pos, expected) || s_modify_setting.checkWithoutMoving(pos, expected) || s_reset_setting.checkWithoutMoving(pos, expected)))
{ {
if (!check_keywords_after_name) if (!check_keywords_after_name)
return false; return false;
@ -181,6 +188,7 @@ bool IParserColumnDeclaration<NameParser>::parseImpl(Pos & pos, ASTPtr & node, E
ASTPtr stat_type_expression; ASTPtr stat_type_expression;
ASTPtr ttl_expression; ASTPtr ttl_expression;
ASTPtr collation_expression; ASTPtr collation_expression;
ASTPtr settings;
bool primary_key_specifier = false; bool primary_key_specifier = false;
auto null_check_without_moving = [&]() -> bool auto null_check_without_moving = [&]() -> bool
@ -321,6 +329,28 @@ bool IParserColumnDeclaration<NameParser>::parseImpl(Pos & pos, ASTPtr & node, E
primary_key_specifier = true; primary_key_specifier = true;
} }
auto old_pos = pos;
if (s_settings.ignore(pos, expected))
{
/// When the keyword `SETTINGS` appear here, it can be a column settings declaration or query settings
/// For example:
/// - Column settings: `ALTER TABLE xx MODIFY COLUMN yy <new_type> SETTINGS (name = value)`
/// - Query settings: ` ALTER TABLE xx MODIFY COLUMN yy <new_type> SETTINGS mutation_sync = 2`
/// So after parsing keyword `SETTINGS`, we check if it's followed by an `(` then it's the column
/// settings, otherwise it's the query settings and we need to move `pos` back to origin position.
ParserToken parser_opening_bracket(TokenType::OpeningRoundBracket);
if (parser_opening_bracket.ignore(pos, expected))
{
if (!settings_parser.parse(pos, settings, expected))
return false;
ParserToken parser_closing_bracket(TokenType::ClosingRoundBracket);
if (!parser_closing_bracket.ignore(pos, expected))
return false;
}
else
pos = old_pos;
}
node = column_declaration; node = column_declaration;
if (type) if (type)
@ -351,6 +381,12 @@ bool IParserColumnDeclaration<NameParser>::parseImpl(Pos & pos, ASTPtr & node, E
column_declaration->children.push_back(std::move(codec_expression)); column_declaration->children.push_back(std::move(codec_expression));
} }
if (settings)
{
column_declaration->settings = settings;
column_declaration->children.push_back(std::move(settings));
}
if (stat_type_expression) if (stat_type_expression)
{ {
column_declaration->stat_type = stat_type_expression; column_declaration->stat_type = stat_type_expression;
@ -362,6 +398,7 @@ bool IParserColumnDeclaration<NameParser>::parseImpl(Pos & pos, ASTPtr & node, E
column_declaration->ttl = ttl_expression; column_declaration->ttl = ttl_expression;
column_declaration->children.push_back(std::move(ttl_expression)); column_declaration->children.push_back(std::move(ttl_expression));
} }
if (collation_expression) if (collation_expression)
{ {
column_declaration->collation = collation_expression; column_declaration->collation = collation_expression;

View File

@ -3,11 +3,6 @@
namespace DB namespace DB
{ {
String queryToStringNullable(const ASTPtr & query)
{
return query ? queryToString(query) : "";
}
String queryToString(const ASTPtr & query) String queryToString(const ASTPtr & query)
{ {
return queryToString(*query); return queryToString(*query);

View File

@ -6,5 +6,4 @@ namespace DB
{ {
String queryToString(const ASTPtr & query); String queryToString(const ASTPtr & query);
String queryToString(const IAST & query); String queryToString(const IAST & query);
String queryToStringNullable(const ASTPtr & query);
} }

View File

@ -1391,7 +1391,7 @@ void Planner::buildPlanForQueryNode()
} }
} }
if (query_context->canUseTaskBasedParallelReplicas() || !settings.parallel_replicas_custom_key.value.empty()) if (!settings.parallel_replicas_custom_key.value.empty())
{ {
/// Check support for JOIN for parallel replicas with custom key /// Check support for JOIN for parallel replicas with custom key
if (planner_context->getTableExpressionNodeToData().size() > 1) if (planner_context->getTableExpressionNodeToData().size() > 1)

View File

@ -357,7 +357,6 @@ ReadFromParallelRemoteReplicasStep::ReadFromParallelRemoteReplicasStep(
ParallelReplicasReadingCoordinatorPtr coordinator_, ParallelReplicasReadingCoordinatorPtr coordinator_,
Block header_, Block header_,
QueryProcessingStage::Enum stage_, QueryProcessingStage::Enum stage_,
StorageID main_table_,
ContextMutablePtr context_, ContextMutablePtr context_,
ThrottlerPtr throttler_, ThrottlerPtr throttler_,
Scalars scalars_, Scalars scalars_,
@ -369,7 +368,6 @@ ReadFromParallelRemoteReplicasStep::ReadFromParallelRemoteReplicasStep(
, query_ast(query_ast_) , query_ast(query_ast_)
, coordinator(std::move(coordinator_)) , coordinator(std::move(coordinator_))
, stage(std::move(stage_)) , stage(std::move(stage_))
, main_table(std::move(main_table_))
, context(context_) , context(context_)
, throttler(throttler_) , throttler(throttler_)
, scalars(scalars_) , scalars(scalars_)

View File

@ -76,7 +76,6 @@ public:
ParallelReplicasReadingCoordinatorPtr coordinator_, ParallelReplicasReadingCoordinatorPtr coordinator_,
Block header_, Block header_,
QueryProcessingStage::Enum stage_, QueryProcessingStage::Enum stage_,
StorageID main_table_,
ContextMutablePtr context_, ContextMutablePtr context_,
ThrottlerPtr throttler_, ThrottlerPtr throttler_,
Scalars scalars_, Scalars scalars_,
@ -99,7 +98,6 @@ private:
ASTPtr query_ast; ASTPtr query_ast;
ParallelReplicasReadingCoordinatorPtr coordinator; ParallelReplicasReadingCoordinatorPtr coordinator;
QueryProcessingStage::Enum stage; QueryProcessingStage::Enum stage;
StorageID main_table;
ContextMutablePtr context; ContextMutablePtr context;
ThrottlerPtr throttler; ThrottlerPtr throttler;
Scalars scalars; Scalars scalars;

View File

@ -36,6 +36,7 @@
#include <Storages/MergeTree/MergeTreeData.h> #include <Storages/MergeTree/MergeTreeData.h>
#include <Common/typeid_cast.h> #include <Common/typeid_cast.h>
#include <Common/randomSeed.h> #include <Common/randomSeed.h>
#include <Storages/MergeTree/MergeTreeSettings.h>
#include <ranges> #include <ranges>
@ -74,6 +75,8 @@ AlterCommand::RemoveProperty removePropertyFromString(const String & property)
return AlterCommand::RemoveProperty::CODEC; return AlterCommand::RemoveProperty::CODEC;
else if (property == "TTL") else if (property == "TTL")
return AlterCommand::RemoveProperty::TTL; return AlterCommand::RemoveProperty::TTL;
else if (property == "SETTINGS")
return AlterCommand::RemoveProperty::SETTINGS;
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Cannot remove unknown property '{}'", property); throw Exception(ErrorCodes::BAD_ARGUMENTS, "Cannot remove unknown property '{}'", property);
} }
@ -173,6 +176,25 @@ std::optional<AlterCommand> AlterCommand::parse(const ASTAlterCommand * command_
if (ast_col_decl.codec) if (ast_col_decl.codec)
command.codec = ast_col_decl.codec; command.codec = ast_col_decl.codec;
if (ast_col_decl.settings)
command.settings_changes = ast_col_decl.settings->as<ASTSetQuery &>().changes;
/// At most only one of ast_col_decl.settings or command_ast->settings_changes is non-null
if (command_ast->settings_changes)
{
command.settings_changes = command_ast->settings_changes->as<ASTSetQuery &>().changes;
command.append_column_setting = true;
}
if (command_ast->settings_resets)
{
for (const ASTPtr & identifier_ast : command_ast->settings_resets->children)
{
const auto & identifier = identifier_ast->as<ASTIdentifier &>();
command.settings_resets.emplace(identifier.name());
}
}
if (command_ast->column) if (command_ast->column)
command.after_column = getIdentifierName(command_ast->column); command.after_column = getIdentifierName(command_ast->column);
@ -501,6 +523,10 @@ void AlterCommand::apply(StorageInMemoryMetadata & metadata, ContextPtr context)
{ {
column.ttl.reset(); column.ttl.reset();
} }
else if (to_remove == RemoveProperty::SETTINGS)
{
column.settings.clear();
}
else else
{ {
if (codec) if (codec)
@ -515,6 +541,22 @@ void AlterCommand::apply(StorageInMemoryMetadata & metadata, ContextPtr context)
if (data_type) if (data_type)
column.type = data_type; column.type = data_type;
if (!settings_changes.empty())
{
MergeTreeColumnSettings::validate(settings_changes);
if (append_column_setting)
for (const auto & change : settings_changes)
column.settings.setSetting(change.name, change.value);
else
column.settings = settings_changes;
}
if (!settings_resets.empty())
{
for (const auto & setting : settings_resets)
column.settings.removeSetting(setting);
}
/// User specified default expression or changed /// User specified default expression or changed
/// datatype. We have to replace default. /// datatype. We have to replace default.
if (default_expression || data_type) if (default_expression || data_type)
@ -1357,7 +1399,6 @@ void AlterCommands::validate(const StoragePtr & table, ContextPtr context) const
ErrorCodes::BAD_ARGUMENTS, ErrorCodes::BAD_ARGUMENTS,
"Column {} doesn't have COMMENT, cannot remove it", "Column {} doesn't have COMMENT, cannot remove it",
backQuote(column_name)); backQuote(column_name));
} }
modified_columns.emplace(column_name); modified_columns.emplace(column_name);

View File

@ -64,7 +64,8 @@ struct AlterCommand
/// Other properties /// Other properties
COMMENT, COMMENT,
CODEC, CODEC,
TTL TTL,
SETTINGS
}; };
Type type = UNKNOWN; Type type = UNKNOWN;
@ -137,10 +138,10 @@ struct AlterCommand
/// For ADD and MODIFY /// For ADD and MODIFY
ASTPtr codec = nullptr; ASTPtr codec = nullptr;
/// For MODIFY SETTING /// For MODIFY SETTING or MODIFY COLUMN MODIFY SETTING
SettingsChanges settings_changes; SettingsChanges settings_changes;
/// For RESET SETTING /// For RESET SETTING or MODIFY COLUMN RESET SETTING
std::set<String> settings_resets; std::set<String> settings_resets;
/// For MODIFY_QUERY /// For MODIFY_QUERY
@ -155,6 +156,9 @@ struct AlterCommand
/// What to remove from column (or TTL) /// What to remove from column (or TTL)
RemoveProperty to_remove = RemoveProperty::NO_PROPERTY; RemoveProperty to_remove = RemoveProperty::NO_PROPERTY;
/// Is this MODIFY COLUMN MODIFY SETTING or MODIFY COLUMN column with settings declaration)
bool append_column_setting = false;
static std::optional<AlterCommand> parse(const ASTAlterCommand * command); static std::optional<AlterCommand> parse(const ASTAlterCommand * command);
void apply(StorageInMemoryMetadata & metadata, ContextPtr context) const; void apply(StorageInMemoryMetadata & metadata, ContextPtr context) const;

View File

@ -24,6 +24,7 @@
#include <Interpreters/Context.h> #include <Interpreters/Context.h>
#include <Storages/IStorage.h> #include <Storages/IStorage.h>
#include <Common/typeid_cast.h> #include <Common/typeid_cast.h>
#include "Parsers/ASTSetQuery.h"
#include <Core/Defines.h> #include <Core/Defines.h>
#include <Compression/CompressionFactory.h> #include <Compression/CompressionFactory.h>
#include <Interpreters/ExpressionAnalyzer.h> #include <Interpreters/ExpressionAnalyzer.h>
@ -72,6 +73,7 @@ bool ColumnDescription::operator==(const ColumnDescription & other) const
&& default_desc == other.default_desc && default_desc == other.default_desc
&& stat == other.stat && stat == other.stat
&& ast_to_str(codec) == ast_to_str(other.codec) && ast_to_str(codec) == ast_to_str(other.codec)
&& settings == other.settings
&& ast_to_str(ttl) == ast_to_str(other.ttl); && ast_to_str(ttl) == ast_to_str(other.ttl);
} }
@ -104,6 +106,18 @@ void ColumnDescription::writeText(WriteBuffer & buf) const
writeEscapedString(queryToString(codec), buf); writeEscapedString(queryToString(codec), buf);
} }
if (!settings.empty())
{
writeChar('\t', buf);
DB::writeText("SETTINGS ", buf);
DB::writeText("(", buf);
ASTSetQuery ast;
ast.is_standalone = false;
ast.changes = settings;
writeEscapedString(queryToString(ast), buf);
DB::writeText(")", buf);
}
if (stat) if (stat)
{ {
writeChar('\t', buf); writeChar('\t', buf);
@ -154,6 +168,9 @@ void ColumnDescription::readText(ReadBuffer & buf)
if (col_ast->ttl) if (col_ast->ttl)
ttl = col_ast->ttl; ttl = col_ast->ttl;
if (col_ast->settings)
settings = col_ast->settings->as<ASTSetQuery &>().changes;
} }
else else
throw Exception(ErrorCodes::CANNOT_PARSE_TEXT, "Cannot parse column description"); throw Exception(ErrorCodes::CANNOT_PARSE_TEXT, "Cannot parse column description");

View File

@ -7,6 +7,7 @@
#include <Core/NamesAndAliases.h> #include <Core/NamesAndAliases.h>
#include <Interpreters/Context_fwd.h> #include <Interpreters/Context_fwd.h>
#include <Storages/ColumnDefault.h> #include <Storages/ColumnDefault.h>
#include <Common/SettingsChanges.h>
#include <Storages/StatisticsDescription.h> #include <Storages/StatisticsDescription.h>
#include <Common/Exception.h> #include <Common/Exception.h>
@ -83,6 +84,7 @@ struct ColumnDescription
ColumnDefault default_desc; ColumnDefault default_desc;
String comment; String comment;
ASTPtr codec; ASTPtr codec;
SettingsChanges settings;
ASTPtr ttl; ASTPtr ttl;
std::optional<StatisticDescription> stat; std::optional<StatisticDescription> stat;

View File

@ -37,7 +37,7 @@ namespace ErrorCodes
AsynchronousReadBufferFromHDFS::AsynchronousReadBufferFromHDFS( AsynchronousReadBufferFromHDFS::AsynchronousReadBufferFromHDFS(
IAsynchronousReader & reader_, const ReadSettings & settings_, std::shared_ptr<ReadBufferFromHDFS> impl_) IAsynchronousReader & reader_, const ReadSettings & settings_, std::shared_ptr<ReadBufferFromHDFS> impl_)
: BufferWithOwnMemory<SeekableReadBuffer>(settings_.remote_fs_buffer_size) : ReadBufferFromFileBase(settings_.remote_fs_buffer_size, nullptr, 0)
, reader(reader_) , reader(reader_)
, base_priority(settings_.priority) , base_priority(settings_.priority)
, impl(std::move(impl_)) , impl(std::move(impl_))

View File

@ -21,7 +21,7 @@ namespace DB
class IAsynchronousReader; class IAsynchronousReader;
class AsynchronousReadBufferFromHDFS : public BufferWithOwnMemory<SeekableReadBuffer>, public WithFileName, public WithFileSize class AsynchronousReadBufferFromHDFS : public ReadBufferFromFileBase
{ {
public: public:
AsynchronousReadBufferFromHDFS( AsynchronousReadBufferFromHDFS(

View File

@ -81,7 +81,6 @@ void IMergeTreeDataPart::MinMaxIndex::load(const MergeTreeData & data, const Par
auto minmax_column_types = data.getMinMaxColumnsTypes(partition_key); auto minmax_column_types = data.getMinMaxColumnsTypes(partition_key);
size_t minmax_idx_size = minmax_column_types.size(); size_t minmax_idx_size = minmax_column_types.size();
hyperrectangle.clear();
hyperrectangle.reserve(minmax_idx_size); hyperrectangle.reserve(minmax_idx_size);
for (size_t i = 0; i < minmax_idx_size; ++i) for (size_t i = 0; i < minmax_idx_size; ++i)
{ {
@ -105,39 +104,6 @@ void IMergeTreeDataPart::MinMaxIndex::load(const MergeTreeData & data, const Par
initialized = true; initialized = true;
} }
Block IMergeTreeDataPart::MinMaxIndex::getBlock(const MergeTreeData & data) const
{
if (!initialized)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Attempt to get block from uninitialized MinMax index.");
Block block;
const auto metadata_snapshot = data.getInMemoryMetadataPtr();
const auto & partition_key = metadata_snapshot->getPartitionKey();
const auto minmax_column_names = data.getMinMaxColumnsNames(partition_key);
const auto minmax_column_types = data.getMinMaxColumnsTypes(partition_key);
const auto minmax_idx_size = minmax_column_types.size();
for (size_t i = 0; i < minmax_idx_size; ++i)
{
const auto & data_type = minmax_column_types[i];
const auto & column_name = minmax_column_names[i];
const auto column = data_type->createColumn();
const auto min_val = hyperrectangle.at(i).left;
const auto max_val = hyperrectangle.at(i).right;
column->insert(min_val);
column->insert(max_val);
block.insert(ColumnWithTypeAndName(column->getPtr(), data_type, column_name));
}
return block;
}
IMergeTreeDataPart::MinMaxIndex::WrittenFiles IMergeTreeDataPart::MinMaxIndex::store( IMergeTreeDataPart::MinMaxIndex::WrittenFiles IMergeTreeDataPart::MinMaxIndex::store(
const MergeTreeData & data, IDataPartStorage & part_storage, Checksums & out_checksums) const const MergeTreeData & data, IDataPartStorage & part_storage, Checksums & out_checksums) const
{ {
@ -219,7 +185,8 @@ void IMergeTreeDataPart::MinMaxIndex::merge(const MinMaxIndex & other)
if (!initialized) if (!initialized)
{ {
*this = other; hyperrectangle = other.hyperrectangle;
initialized = true;
} }
else else
{ {

View File

@ -336,7 +336,6 @@ public:
} }
void load(const MergeTreeData & data, const PartMetadataManagerPtr & manager); void load(const MergeTreeData & data, const PartMetadataManagerPtr & manager);
Block getBlock(const MergeTreeData & data) const;
using WrittenFiles = std::vector<std::unique_ptr<WriteBufferFromFileBase>>; using WrittenFiles = std::vector<std::unique_ptr<WriteBufferFromFileBase>>;

View File

@ -1,37 +1,36 @@
#include <Columns/ColumnConst.h> #include <Storages/MergeTree/KeyCondition.h>
#include <Columns/ColumnSet.h> #include <Storages/MergeTree/BoolMask.h>
#include <DataTypes/DataTypesNumber.h>
#include <DataTypes/DataTypeLowCardinality.h> #include <DataTypes/DataTypeLowCardinality.h>
#include <DataTypes/DataTypeNullable.h> #include <DataTypes/DataTypeNullable.h>
#include <DataTypes/DataTypeNothing.h> #include <DataTypes/DataTypeNothing.h>
#include <DataTypes/DataTypeString.h> #include <DataTypes/DataTypeString.h>
#include <DataTypes/DataTypesNumber.h>
#include <DataTypes/FieldToDataType.h> #include <DataTypes/FieldToDataType.h>
#include <DataTypes/Utils.h>
#include <DataTypes/getLeastSupertype.h> #include <DataTypes/getLeastSupertype.h>
#include <Functions/CastOverloadResolver.h> #include <DataTypes/Utils.h>
#include <Functions/FunctionFactory.h>
#include <Functions/IFunction.h>
#include <Functions/indexHint.h>
#include <IO/Operators.h>
#include <IO/WriteBufferFromString.h>
#include <Interpreters/ExpressionActions.h>
#include <Interpreters/ExpressionAnalyzer.h>
#include <Interpreters/Set.h>
#include <Interpreters/TreeRewriter.h> #include <Interpreters/TreeRewriter.h>
#include <Interpreters/applyFunction.h> #include <Interpreters/ExpressionAnalyzer.h>
#include <Interpreters/ExpressionActions.h>
#include <Interpreters/castColumn.h> #include <Interpreters/castColumn.h>
#include <Interpreters/convertFieldToType.h>
#include <Interpreters/misc.h> #include <Interpreters/misc.h>
#include <Parsers/ASTIdentifier.h> #include <Functions/FunctionFactory.h>
#include <Parsers/ASTLiteral.h> #include <Functions/indexHint.h>
#include <Parsers/ASTSelectQuery.h> #include <Functions/CastOverloadResolver.h>
#include <Parsers/queryToString.h> #include <Functions/IFunction.h>
#include <Storages/MergeTree/BoolMask.h>
#include <Storages/MergeTree/KeyCondition.h>
#include <Storages/MergeTree/MergeTreeIndexUtils.h>
#include <Common/FieldVisitorToString.h> #include <Common/FieldVisitorToString.h>
#include <Common/MortonUtils.h> #include <Common/MortonUtils.h>
#include <Common/typeid_cast.h> #include <Common/typeid_cast.h>
#include <Columns/ColumnSet.h>
#include <Columns/ColumnConst.h>
#include <Interpreters/convertFieldToType.h>
#include <Interpreters/Set.h>
#include <Parsers/queryToString.h>
#include <Parsers/ASTIdentifier.h>
#include <Parsers/ASTLiteral.h>
#include <Parsers/ASTSelectQuery.h>
#include <IO/WriteBufferFromString.h>
#include <IO/Operators.h>
#include <Storages/MergeTree/MergeTreeIndexUtils.h>
#include <algorithm> #include <algorithm>
#include <cassert> #include <cassert>
@ -837,6 +836,21 @@ bool KeyCondition::getConstant(const ASTPtr & expr, Block & block_with_constants
return node.tryGetConstant(out_value, out_type); return node.tryGetConstant(out_value, out_type);
} }
static Field applyFunctionForField(
const FunctionBasePtr & func,
const DataTypePtr & arg_type,
const Field & arg_value)
{
ColumnsWithTypeAndName columns
{
{ arg_type->createColumnConst(1, arg_value), arg_type, "x" },
};
auto col = func->execute(columns, func->getResultType(), 1);
return (*col)[0];
}
/// The case when arguments may have types different than in the primary key. /// The case when arguments may have types different than in the primary key.
static std::pair<Field, DataTypePtr> applyFunctionForFieldOfUnknownType( static std::pair<Field, DataTypePtr> applyFunctionForFieldOfUnknownType(
const FunctionBasePtr & func, const FunctionBasePtr & func,
@ -876,6 +890,33 @@ static std::pair<Field, DataTypePtr> applyBinaryFunctionForFieldOfUnknownType(
return {std::move(result), std::move(return_type)}; return {std::move(result), std::move(return_type)};
} }
static FieldRef applyFunction(const FunctionBasePtr & func, const DataTypePtr & current_type, const FieldRef & field)
{
/// Fallback for fields without block reference.
if (field.isExplicit())
return applyFunctionForField(func, current_type, field);
String result_name = "_" + func->getName() + "_" + toString(field.column_idx);
const auto & columns = field.columns;
size_t result_idx = columns->size();
for (size_t i = 0; i < result_idx; ++i)
{
if ((*columns)[i].name == result_name)
result_idx = i;
}
if (result_idx == columns->size())
{
ColumnsWithTypeAndName args{(*columns)[field.column_idx]};
field.columns->emplace_back(ColumnWithTypeAndName {nullptr, func->getResultType(), result_name});
(*columns)[result_idx].column = func->execute(args, (*columns)[result_idx].type, columns->front().column->size());
}
return {field.columns, field.row_idx, result_idx};
}
/** When table's key has expression with these functions from a column, /** When table's key has expression with these functions from a column,
* and when a column in a query is compared with a constant, such as: * and when a column in a query is compared with a constant, such as:
* CREATE TABLE (x String) ORDER BY toDate(x) * CREATE TABLE (x String) ORDER BY toDate(x)

View File

@ -8,6 +8,21 @@
#include <Backups/BackupEntryWrappedWith.h> #include <Backups/BackupEntryWrappedWith.h>
#include <Backups/IBackup.h> #include <Backups/IBackup.h>
#include <Backups/RestorerFromBackup.h> #include <Backups/RestorerFromBackup.h>
#include <Common/Config/ConfigHelper.h>
#include <Common/CurrentMetrics.h>
#include <Common/Increment.h>
#include <Common/ProfileEventsScope.h>
#include <Common/SimpleIncrement.h>
#include <Common/Stopwatch.h>
#include <Common/StringUtils/StringUtils.h>
#include <Common/ThreadFuzzer.h>
#include <Common/escapeForFileName.h>
#include <Common/getNumberOfPhysicalCPUCores.h>
#include <Common/noexcept_scope.h>
#include <Common/quoteString.h>
#include <Common/scope_guard_safe.h>
#include <Common/typeid_cast.h>
#include <Storages/MergeTree/RangesInDataPart.h>
#include <Compression/CompressedReadBuffer.h> #include <Compression/CompressedReadBuffer.h>
#include <Core/QueryProcessingStage.h> #include <Core/QueryProcessingStage.h>
#include <DataTypes/DataTypeEnum.h> #include <DataTypes/DataTypeEnum.h>
@ -28,20 +43,19 @@
#include <IO/WriteHelpers.h> #include <IO/WriteHelpers.h>
#include <Interpreters/Aggregator.h> #include <Interpreters/Aggregator.h>
#include <Interpreters/Context.h> #include <Interpreters/Context.h>
#include <Interpreters/convertFieldToType.h>
#include <Interpreters/evaluateConstantExpression.h>
#include <Interpreters/ExpressionAnalyzer.h> #include <Interpreters/ExpressionAnalyzer.h>
#include <Interpreters/InterpreterSelectQuery.h> #include <Interpreters/InterpreterSelectQuery.h>
#include <Interpreters/MergeTreeTransaction.h> #include <Interpreters/MergeTreeTransaction.h>
#include <Interpreters/PartLog.h> #include <Interpreters/PartLog.h>
#include <Interpreters/TransactionLog.h> #include <Interpreters/TransactionLog.h>
#include <Interpreters/TreeRewriter.h> #include <Interpreters/TreeRewriter.h>
#include <Interpreters/convertFieldToType.h>
#include <Interpreters/evaluateConstantExpression.h>
#include <Interpreters/inplaceBlockConversions.h> #include <Interpreters/inplaceBlockConversions.h>
#include <Parsers/ASTAlterQuery.h>
#include <Parsers/ASTExpressionList.h> #include <Parsers/ASTExpressionList.h>
#include <Parsers/ASTFunction.h>
#include <Parsers/ASTHelpers.h>
#include <Parsers/ASTIndexDeclaration.h> #include <Parsers/ASTIndexDeclaration.h>
#include <Parsers/ASTHelpers.h>
#include <Parsers/ASTFunction.h>
#include <Parsers/ASTLiteral.h> #include <Parsers/ASTLiteral.h>
#include <Parsers/ASTNameTypePair.h> #include <Parsers/ASTNameTypePair.h>
#include <Parsers/ASTPartition.h> #include <Parsers/ASTPartition.h>
@ -50,41 +64,26 @@
#include <Parsers/ExpressionListParsers.h> #include <Parsers/ExpressionListParsers.h>
#include <Parsers/parseQuery.h> #include <Parsers/parseQuery.h>
#include <Parsers/queryToString.h> #include <Parsers/queryToString.h>
#include <Parsers/ASTAlterQuery.h>
#include <Processors/Formats/IInputFormat.h> #include <Processors/Formats/IInputFormat.h>
#include <Processors/QueryPlan/QueryIdHolder.h> #include <Processors/QueryPlan/QueryIdHolder.h>
#include <Processors/QueryPlan/ReadFromMergeTree.h> #include <Processors/QueryPlan/ReadFromMergeTree.h>
#include <Storages/AlterCommands.h> #include <Storages/AlterCommands.h>
#include <Storages/BlockNumberColumn.h> #include <Storages/BlockNumberColumn.h>
#include <Storages/Freeze.h> #include <Storages/Freeze.h>
#include <Storages/MergeTree/ActiveDataPartSet.h>
#include <Storages/MergeTree/DataPartStorageOnDiskFull.h> #include <Storages/MergeTree/DataPartStorageOnDiskFull.h>
#include <Storages/MergeTree/MergeTreeDataPartBuilder.h> #include <Storages/MergeTree/MergeTreeDataPartBuilder.h>
#include <Storages/MergeTree/MergeTreeDataPartCloner.h>
#include <Storages/MergeTree/MergeTreeDataPartCompact.h> #include <Storages/MergeTree/MergeTreeDataPartCompact.h>
#include <Storages/MergeTree/MergeTreeDataPartInMemory.h> #include <Storages/MergeTree/MergeTreeDataPartInMemory.h>
#include <Storages/MergeTree/MergeTreeDataPartWide.h> #include <Storages/MergeTree/MergeTreeDataPartWide.h>
#include <Storages/Statistics/Estimator.h> #include <Storages/Statistics/Estimator.h>
#include <Storages/MergeTree/MergeTreeSelectProcessor.h> #include <Storages/MergeTree/MergeTreeSelectProcessor.h>
#include <Storages/MergeTree/RangesInDataPart.h>
#include <Storages/MergeTree/checkDataPart.h> #include <Storages/MergeTree/checkDataPart.h>
#include <Storages/MutationCommands.h> #include <Storages/MutationCommands.h>
#include <Storages/MergeTree/ActiveDataPartSet.h>
#include <Storages/StorageMergeTree.h> #include <Storages/StorageMergeTree.h>
#include <Storages/StorageReplicatedMergeTree.h> #include <Storages/StorageReplicatedMergeTree.h>
#include <Storages/VirtualColumnUtils.h> #include <Storages/VirtualColumnUtils.h>
#include <Common/Config/ConfigHelper.h>
#include <Common/CurrentMetrics.h>
#include <Common/Increment.h>
#include <Common/ProfileEventsScope.h>
#include <Common/SimpleIncrement.h>
#include <Common/Stopwatch.h>
#include <Common/StringUtils/StringUtils.h>
#include <Common/ThreadFuzzer.h>
#include <Common/escapeForFileName.h>
#include <Common/getNumberOfPhysicalCPUCores.h>
#include <Common/noexcept_scope.h>
#include <Common/quoteString.h>
#include <Common/scope_guard_safe.h>
#include <Common/typeid_cast.h>
#include <boost/range/algorithm_ext/erase.hpp> #include <boost/range/algorithm_ext/erase.hpp>
#include <boost/algorithm/string/join.hpp> #include <boost/algorithm/string/join.hpp>
@ -198,50 +197,6 @@ namespace ErrorCodes
extern const int LIMIT_EXCEEDED; extern const int LIMIT_EXCEEDED;
} }
static size_t getPartitionAstFieldsCount(const ASTPartition & partition_ast, ASTPtr partition_value_ast)
{
if (partition_ast.fields_count.has_value())
return *partition_ast.fields_count;
if (partition_value_ast->as<ASTLiteral>())
return 1;
const auto * tuple_ast = partition_value_ast->as<ASTFunction>();
if (!tuple_ast)
{
throw Exception(
ErrorCodes::INVALID_PARTITION_VALUE, "Expected literal or tuple for partition key, got {}", partition_value_ast->getID());
}
if (tuple_ast->name != "tuple")
{
if (!isFunctionCast(tuple_ast))
throw Exception(ErrorCodes::INVALID_PARTITION_VALUE, "Expected tuple for complex partition key, got {}", tuple_ast->name);
if (tuple_ast->arguments->as<ASTExpressionList>()->children.empty())
throw Exception(ErrorCodes::INVALID_PARTITION_VALUE, "Expected tuple for complex partition key, got {}", tuple_ast->name);
auto first_arg = tuple_ast->arguments->as<ASTExpressionList>()->children.at(0);
if (const auto * inner_tuple = first_arg->as<ASTFunction>(); inner_tuple && inner_tuple->name == "tuple")
{
const auto * arguments_ast = tuple_ast->arguments->as<ASTExpressionList>();
return arguments_ast ? arguments_ast->children.size() : 0;
}
else if (const auto * inner_literal_tuple = first_arg->as<ASTLiteral>(); inner_literal_tuple)
{
return inner_literal_tuple->value.getType() == Field::Types::Tuple ? inner_literal_tuple->value.safeGet<Tuple>().size() : 1;
}
throw Exception(ErrorCodes::INVALID_PARTITION_VALUE, "Expected tuple for complex partition key, got {}", tuple_ast->name);
}
else
{
const auto * arguments_ast = tuple_ast->arguments->as<ASTExpressionList>();
return arguments_ast ? arguments_ast->children.size() : 0;
}
}
static void checkSuspiciousIndices(const ASTFunction * index_function) static void checkSuspiciousIndices(const ASTFunction * index_function)
{ {
std::unordered_set<UInt64> unique_index_expression_hashes; std::unordered_set<UInt64> unique_index_expression_hashes;
@ -4899,7 +4854,7 @@ void MergeTreeData::removePartContributionToColumnAndSecondaryIndexSizes(const D
} }
void MergeTreeData::checkAlterPartitionIsPossible( void MergeTreeData::checkAlterPartitionIsPossible(
const PartitionCommands & commands, const StorageMetadataPtr & /*metadata_snapshot*/, const Settings & settings, ContextPtr) const const PartitionCommands & commands, const StorageMetadataPtr & /*metadata_snapshot*/, const Settings & settings, ContextPtr local_context) const
{ {
for (const auto & command : commands) for (const auto & command : commands)
{ {
@ -4927,15 +4882,7 @@ void MergeTreeData::checkAlterPartitionIsPossible(
throw DB::Exception(ErrorCodes::SUPPORT_IS_DISABLED, "Only support DROP/DETACH PARTITION ALL currently"); throw DB::Exception(ErrorCodes::SUPPORT_IS_DISABLED, "Only support DROP/DETACH PARTITION ALL currently");
} }
else else
{ getPartitionIDFromQuery(command.partition, local_context);
// The below `getPartitionIDFromQuery` call will not work for attach / replace because it assumes the partition expressions
// are the same and deliberately uses this storage. Later on, `MergeTreeData::replaceFrom` is called, and it makes the right
// call to `getPartitionIDFromQuery` using source storage.
// Note: `PartitionCommand::REPLACE_PARTITION` is used both for `REPLACE PARTITION` and `ATTACH PARTITION FROM` queries.
// But not for `ATTACH PARTITION` queries.
if (command.type != PartitionCommand::REPLACE_PARTITION)
getPartitionIDFromQuery(command.partition, getContext());
}
} }
} }
} }
@ -5669,8 +5616,69 @@ String MergeTreeData::getPartitionIDFromQuery(const ASTPtr & ast, ContextPtr loc
MergeTreePartInfo::validatePartitionID(partition_ast.id->clone(), format_version); MergeTreePartInfo::validatePartitionID(partition_ast.id->clone(), format_version);
return partition_ast.id->as<ASTLiteral>()->value.safeGet<String>(); return partition_ast.id->as<ASTLiteral>()->value.safeGet<String>();
} }
size_t partition_ast_fields_count = 0;
ASTPtr partition_value_ast = partition_ast.value->clone(); ASTPtr partition_value_ast = partition_ast.value->clone();
auto partition_ast_fields_count = getPartitionAstFieldsCount(partition_ast, partition_value_ast); if (!partition_ast.fields_count.has_value())
{
if (partition_value_ast->as<ASTLiteral>())
{
partition_ast_fields_count = 1;
}
else if (const auto * tuple_ast = partition_value_ast->as<ASTFunction>())
{
if (tuple_ast->name != "tuple")
{
if (isFunctionCast(tuple_ast))
{
if (tuple_ast->arguments->as<ASTExpressionList>()->children.empty())
{
throw Exception(
ErrorCodes::INVALID_PARTITION_VALUE, "Expected tuple for complex partition key, got {}", tuple_ast->name);
}
auto first_arg = tuple_ast->arguments->as<ASTExpressionList>()->children.at(0);
if (const auto * inner_tuple = first_arg->as<ASTFunction>(); inner_tuple && inner_tuple->name == "tuple")
{
const auto * arguments_ast = tuple_ast->arguments->as<ASTExpressionList>();
if (arguments_ast)
partition_ast_fields_count = arguments_ast->children.size();
else
partition_ast_fields_count = 0;
}
else if (const auto * inner_literal_tuple = first_arg->as<ASTLiteral>(); inner_literal_tuple)
{
if (inner_literal_tuple->value.getType() == Field::Types::Tuple)
partition_ast_fields_count = inner_literal_tuple->value.safeGet<Tuple>().size();
else
partition_ast_fields_count = 1;
}
else
{
throw Exception(
ErrorCodes::INVALID_PARTITION_VALUE, "Expected tuple for complex partition key, got {}", tuple_ast->name);
}
}
else
throw Exception(ErrorCodes::INVALID_PARTITION_VALUE, "Expected tuple for complex partition key, got {}", tuple_ast->name);
}
else
{
const auto * arguments_ast = tuple_ast->arguments->as<ASTExpressionList>();
if (arguments_ast)
partition_ast_fields_count = arguments_ast->children.size();
else
partition_ast_fields_count = 0;
}
}
else
{
throw Exception(
ErrorCodes::INVALID_PARTITION_VALUE, "Expected literal or tuple for partition key, got {}", partition_value_ast->getID());
}
}
else
{
partition_ast_fields_count = *partition_ast.fields_count;
}
if (format_version < MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING) if (format_version < MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING)
{ {
@ -7006,35 +7014,23 @@ MergeTreeData & MergeTreeData::checkStructureAndGetMergeTreeData(IStorage & sour
if (my_snapshot->getColumns().getAllPhysical().sizeOfDifference(src_snapshot->getColumns().getAllPhysical())) if (my_snapshot->getColumns().getAllPhysical().sizeOfDifference(src_snapshot->getColumns().getAllPhysical()))
throw Exception(ErrorCodes::INCOMPATIBLE_COLUMNS, "Tables have different structure"); throw Exception(ErrorCodes::INCOMPATIBLE_COLUMNS, "Tables have different structure");
if (queryToStringNullable(my_snapshot->getSortingKeyAST()) != queryToStringNullable(src_snapshot->getSortingKeyAST())) auto query_to_string = [] (const ASTPtr & ast)
{
return ast ? queryToString(ast) : "";
};
if (query_to_string(my_snapshot->getSortingKeyAST()) != query_to_string(src_snapshot->getSortingKeyAST()))
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Tables have different ordering"); throw Exception(ErrorCodes::BAD_ARGUMENTS, "Tables have different ordering");
if (query_to_string(my_snapshot->getPartitionKeyAST()) != query_to_string(src_snapshot->getPartitionKeyAST()))
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Tables have different partition key");
if (format_version != src_data->format_version) if (format_version != src_data->format_version)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Tables have different format_version"); throw Exception(ErrorCodes::BAD_ARGUMENTS, "Tables have different format_version");
if (queryToStringNullable(my_snapshot->getPrimaryKeyAST()) != queryToStringNullable(src_snapshot->getPrimaryKeyAST())) if (query_to_string(my_snapshot->getPrimaryKeyAST()) != query_to_string(src_snapshot->getPrimaryKeyAST()))
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Tables have different primary key"); throw Exception(ErrorCodes::BAD_ARGUMENTS, "Tables have different primary key");
const auto is_a_subset_of = [](const auto & lhs, const auto & rhs)
{
if (lhs.size() > rhs.size())
return false;
const auto rhs_set = NameSet(rhs.begin(), rhs.end());
for (const auto & lhs_element : lhs)
if (!rhs_set.contains(lhs_element))
return false;
return true;
};
if (!is_a_subset_of(my_snapshot->getColumnsRequiredForPartitionKey(), src_snapshot->getColumnsRequiredForPartitionKey()))
{
throw Exception(
ErrorCodes::BAD_ARGUMENTS,
"Destination table partition expression columns must be a subset of source table partition expression columns");
}
const auto check_definitions = [](const auto & my_descriptions, const auto & src_descriptions) const auto check_definitions = [](const auto & my_descriptions, const auto & src_descriptions)
{ {
if (my_descriptions.size() != src_descriptions.size()) if (my_descriptions.size() != src_descriptions.size())
@ -7075,56 +7071,128 @@ std::pair<MergeTreeData::MutableDataPartPtr, scope_guard> MergeTreeData::cloneAn
const ReadSettings & read_settings, const ReadSettings & read_settings,
const WriteSettings & write_settings) const WriteSettings & write_settings)
{ {
return MergeTreeDataPartCloner::clone( /// Check that the storage policy contains the disk where the src_part is located.
this, src_part, metadata_snapshot, dst_part_info, tmp_part_prefix, require_part_metadata, params, read_settings, write_settings); bool does_storage_policy_allow_same_disk = false;
} for (const DiskPtr & disk : getStoragePolicy()->getDisks())
{
if (disk->getName() == src_part->getDataPartStorage().getDiskName())
{
does_storage_policy_allow_same_disk = true;
break;
}
}
if (!does_storage_policy_allow_same_disk)
throw Exception(
ErrorCodes::BAD_ARGUMENTS,
"Could not clone and load part {} because disk does not belong to storage policy",
quoteString(src_part->getDataPartStorage().getFullPath()));
std::pair<MergeTreeData::MutableDataPartPtr, scope_guard> MergeTreeData::cloneAndLoadPartOnSameDiskWithDifferentPartitionKey( String dst_part_name = src_part->getNewName(dst_part_info);
const MergeTreeData::DataPartPtr & src_part, String tmp_dst_part_name = tmp_part_prefix + dst_part_name;
const MergeTreePartition & new_partition, auto temporary_directory_lock = getTemporaryPartDirectoryHolder(tmp_dst_part_name);
const String & partition_id,
const IMergeTreeDataPart::MinMaxIndex & min_max_index,
const String & tmp_part_prefix,
const StorageMetadataPtr & my_metadata_snapshot,
const IDataPartStorage::ClonePartParams & clone_params,
ContextPtr local_context,
Int64 min_block,
Int64 max_block
)
{
MergeTreePartInfo dst_part_info(partition_id, min_block, max_block, src_part->info.level);
return MergeTreeDataPartCloner::cloneWithDistinctPartitionExpression( /// Why it is needed if we only hardlink files?
this, auto reservation = src_part->getDataPartStorage().reserve(src_part->getBytesOnDisk());
src_part, auto src_part_storage = src_part->getDataPartStoragePtr();
my_metadata_snapshot,
dst_part_info,
tmp_part_prefix,
local_context->getReadSettings(),
local_context->getWriteSettings(),
new_partition,
min_max_index,
false,
clone_params);
}
std::pair<MergeTreePartition, IMergeTreeDataPart::MinMaxIndex> MergeTreeData::createPartitionAndMinMaxIndexFromSourcePart( scope_guard src_flushed_tmp_dir_lock;
const MergeTreeData::DataPartPtr & src_part, MergeTreeData::MutableDataPartPtr src_flushed_tmp_part;
const StorageMetadataPtr & metadata_snapshot,
ContextPtr local_context)
{
const auto & src_data = src_part->storage;
auto metadata_manager = std::make_shared<PartMetadataManagerOrdinary>(src_part.get()); /// If source part is in memory, flush it to disk and clone it already in on-disk format
IMergeTreeDataPart::MinMaxIndex min_max_index; /// Protect tmp dir from removing by cleanup thread with src_flushed_tmp_dir_lock
/// Construct src_flushed_tmp_part in order to delete part with its directory at destructor
if (auto src_part_in_memory = asInMemoryPart(src_part))
{
auto flushed_part_path = *src_part_in_memory->getRelativePathForPrefix(tmp_part_prefix);
min_max_index.load(src_data, metadata_manager); auto tmp_src_part_file_name = fs::path(tmp_dst_part_name).filename();
src_flushed_tmp_dir_lock = src_part->storage.getTemporaryPartDirectoryHolder(tmp_src_part_file_name);
MergeTreePartition new_partition; auto flushed_part_storage = src_part_in_memory->flushToDisk(flushed_part_path, metadata_snapshot);
new_partition.create(metadata_snapshot, min_max_index.getBlock(src_data), 0u, local_context); src_flushed_tmp_part = MergeTreeDataPartBuilder(*this, src_part->name, flushed_part_storage)
.withPartInfo(src_part->info)
.withPartFormatFromDisk()
.build();
return {new_partition, min_max_index}; src_flushed_tmp_part->is_temp = true;
src_part_storage = flushed_part_storage;
}
String with_copy;
if (params.copy_instead_of_hardlink)
with_copy = " (copying data)";
auto dst_part_storage = src_part_storage->freeze(
relative_data_path,
tmp_dst_part_name,
read_settings,
write_settings,
/* save_metadata_callback= */ {},
params);
if (params.metadata_version_to_write.has_value())
{
chassert(!params.keep_metadata_version);
auto out_metadata = dst_part_storage->writeFile(IMergeTreeDataPart::METADATA_VERSION_FILE_NAME, 4096, getContext()->getWriteSettings());
writeText(metadata_snapshot->getMetadataVersion(), *out_metadata);
out_metadata->finalize();
if (getSettings()->fsync_after_insert)
out_metadata->sync();
}
LOG_DEBUG(log, "Clone{} part {} to {}{}",
src_flushed_tmp_part ? " flushed" : "",
src_part_storage->getFullPath(),
std::string(fs::path(dst_part_storage->getFullRootPath()) / tmp_dst_part_name),
with_copy);
auto dst_data_part = MergeTreeDataPartBuilder(*this, dst_part_name, dst_part_storage)
.withPartFormatFromDisk()
.build();
if (!params.copy_instead_of_hardlink && params.hardlinked_files)
{
params.hardlinked_files->source_part_name = src_part->name;
params.hardlinked_files->source_table_shared_id = src_part->storage.getTableSharedID();
for (auto it = src_part->getDataPartStorage().iterate(); it->isValid(); it->next())
{
if (!params.files_to_copy_instead_of_hardlinks.contains(it->name())
&& it->name() != IMergeTreeDataPart::DELETE_ON_DESTROY_MARKER_FILE_NAME_DEPRECATED
&& it->name() != IMergeTreeDataPart::TXN_VERSION_METADATA_FILE_NAME)
{
params.hardlinked_files->hardlinks_from_source_part.insert(it->name());
}
}
auto projections = src_part->getProjectionParts();
for (const auto & [name, projection_part] : projections)
{
const auto & projection_storage = projection_part->getDataPartStorage();
for (auto it = projection_storage.iterate(); it->isValid(); it->next())
{
auto file_name_with_projection_prefix = fs::path(projection_storage.getPartDirectory()) / it->name();
if (!params.files_to_copy_instead_of_hardlinks.contains(file_name_with_projection_prefix)
&& it->name() != IMergeTreeDataPart::DELETE_ON_DESTROY_MARKER_FILE_NAME_DEPRECATED
&& it->name() != IMergeTreeDataPart::TXN_VERSION_METADATA_FILE_NAME)
{
params.hardlinked_files->hardlinks_from_source_part.insert(file_name_with_projection_prefix);
}
}
}
}
/// We should write version metadata on part creation to distinguish it from parts that were created without transaction.
TransactionID tid = params.txn ? params.txn->tid : Tx::PrehistoricTID;
dst_data_part->version.setCreationTID(tid, nullptr);
dst_data_part->storeVersionMetadata();
dst_data_part->is_temp = true;
dst_data_part->loadColumnsChecksumsIndexes(require_part_metadata, true);
dst_data_part->modification_time = dst_part_storage->getLastModified().epochTime();
return std::make_pair(dst_data_part, std::move(temporary_directory_lock));
} }
String MergeTreeData::getFullPathOnDisk(const DiskPtr & disk) const String MergeTreeData::getFullPathOnDisk(const DiskPtr & disk) const

View File

@ -231,7 +231,6 @@ public:
} }
}; };
using DataParts = std::set<DataPartPtr, LessDataPart>; using DataParts = std::set<DataPartPtr, LessDataPart>;
using MutableDataParts = std::set<MutableDataPartPtr, LessDataPart>; using MutableDataParts = std::set<MutableDataPartPtr, LessDataPart>;
using DataPartsVector = std::vector<DataPartPtr>; using DataPartsVector = std::vector<DataPartPtr>;
@ -849,23 +848,6 @@ public:
const ReadSettings & read_settings, const ReadSettings & read_settings,
const WriteSettings & write_settings); const WriteSettings & write_settings);
std::pair<MergeTreeData::MutableDataPartPtr, scope_guard> cloneAndLoadPartOnSameDiskWithDifferentPartitionKey(
const MergeTreeData::DataPartPtr & src_part,
const MergeTreePartition & new_partition,
const String & partition_id,
const IMergeTreeDataPart::MinMaxIndex & min_max_index,
const String & tmp_part_prefix,
const StorageMetadataPtr & my_metadata_snapshot,
const IDataPartStorage::ClonePartParams & clone_params,
ContextPtr local_context,
Int64 min_block,
Int64 max_block);
static std::pair<MergeTreePartition, IMergeTreeDataPart::MinMaxIndex> createPartitionAndMinMaxIndexFromSourcePart(
const MergeTreeData::DataPartPtr & src_part,
const StorageMetadataPtr & metadata_snapshot,
ContextPtr local_context);
virtual std::vector<MergeTreeMutationStatus> getMutationsStatus() const = 0; virtual std::vector<MergeTreeMutationStatus> getMutationsStatus() const = 0;
/// Returns true if table can create new parts with adaptive granularity /// Returns true if table can create new parts with adaptive granularity

View File

@ -1,320 +0,0 @@
#include <Interpreters/MergeTreeTransaction.h>
#include <Storages/MergeTree/MergeTreeData.h>
#include <Storages/MergeTree/MergeTreeDataPartBuilder.h>
#include <Storages/MergeTree/MergeTreeDataPartCloner.h>
#include <Common/escapeForFileName.h>
#include <Common/logger_useful.h>
namespace DB
{
namespace ErrorCodes
{
extern const int BAD_ARGUMENTS;
}
static Poco::Logger * log = &Poco::Logger::get("MergeTreeDataPartCloner");
namespace DistinctPartitionExpression
{
std::unique_ptr<WriteBufferFromFileBase> updatePartitionFile(
const MergeTreeData & merge_tree_data,
const MergeTreePartition & partition,
const MergeTreeData::MutableDataPartPtr & dst_part,
IDataPartStorage & storage)
{
storage.removeFile("partition.dat");
// Leverage already implemented MergeTreePartition::store to create & store partition.dat.
// Checksum is re-calculated later.
return partition.store(merge_tree_data, storage, dst_part->checksums);
}
IMergeTreeDataPart::MinMaxIndex::WrittenFiles updateMinMaxFiles(
const MergeTreeData & merge_tree_data,
const MergeTreeData::MutableDataPartPtr & dst_part,
IDataPartStorage & storage,
const StorageMetadataPtr & metadata_snapshot)
{
for (const auto & column_name : MergeTreeData::getMinMaxColumnsNames(metadata_snapshot->partition_key))
{
auto file = "minmax_" + escapeForFileName(column_name) + ".idx";
storage.removeFile(file);
}
return dst_part->minmax_idx->store(merge_tree_data, storage, dst_part->checksums);
}
void finalizeNewFiles(const std::vector<std::unique_ptr<WriteBufferFromFileBase>> & files, bool sync_new_files)
{
for (const auto & file : files)
{
file->finalize();
if (sync_new_files)
file->sync();
}
}
void updateNewPartFiles(
const MergeTreeData & merge_tree_data,
const MergeTreeData::MutableDataPartPtr & dst_part,
const MergeTreePartition & new_partition,
const IMergeTreeDataPart::MinMaxIndex & new_min_max_index,
const StorageMetadataPtr & src_metadata_snapshot,
bool sync_new_files)
{
auto & storage = dst_part->getDataPartStorage();
*dst_part->minmax_idx = new_min_max_index;
auto partition_file = updatePartitionFile(merge_tree_data, new_partition, dst_part, storage);
auto min_max_files = updateMinMaxFiles(merge_tree_data, dst_part, storage, src_metadata_snapshot);
IMergeTreeDataPart::MinMaxIndex::WrittenFiles written_files;
if (partition_file)
written_files.emplace_back(std::move(partition_file));
written_files.insert(written_files.end(), std::make_move_iterator(min_max_files.begin()), std::make_move_iterator(min_max_files.end()));
finalizeNewFiles(written_files, sync_new_files);
// MergeTreeDataPartCloner::finalize_part calls IMergeTreeDataPart::loadColumnsChecksumsIndexes, which will re-create
// the checksum file if it doesn't exist. Relying on that is cumbersome, but this refactoring is simply a code extraction
// with small improvements. It can be further improved in the future.
storage.removeFile("checksums.txt");
}
}
namespace
{
bool doesStoragePolicyAllowSameDisk(MergeTreeData * merge_tree_data, const MergeTreeData::DataPartPtr & src_part)
{
for (const DiskPtr & disk : merge_tree_data->getStoragePolicy()->getDisks())
if (disk->getName() == src_part->getDataPartStorage().getDiskName())
return true;
return false;
}
DataPartStoragePtr flushPartStorageToDiskIfInMemory(
MergeTreeData * merge_tree_data,
const MergeTreeData::DataPartPtr & src_part,
const StorageMetadataPtr & metadata_snapshot,
const String & tmp_part_prefix,
const String & tmp_dst_part_name,
scope_guard & src_flushed_tmp_dir_lock,
MergeTreeData::MutableDataPartPtr src_flushed_tmp_part)
{
if (auto src_part_in_memory = asInMemoryPart(src_part))
{
auto flushed_part_path = src_part_in_memory->getRelativePathForPrefix(tmp_part_prefix);
auto tmp_src_part_file_name = fs::path(tmp_dst_part_name).filename();
src_flushed_tmp_dir_lock = src_part->storage.getTemporaryPartDirectoryHolder(tmp_src_part_file_name);
auto flushed_part_storage = src_part_in_memory->flushToDisk(*flushed_part_path, metadata_snapshot);
src_flushed_tmp_part = MergeTreeDataPartBuilder(*merge_tree_data, src_part->name, flushed_part_storage)
.withPartInfo(src_part->info)
.withPartFormatFromDisk()
.build();
src_flushed_tmp_part->is_temp = true;
return flushed_part_storage;
}
return src_part->getDataPartStoragePtr();
}
std::shared_ptr<IDataPartStorage> hardlinkAllFiles(
MergeTreeData * merge_tree_data,
const DB::ReadSettings & read_settings,
const DB::WriteSettings & write_settings,
const DataPartStoragePtr & storage,
const String & path,
const DB::IDataPartStorage::ClonePartParams & params)
{
return storage->freeze(
merge_tree_data->getRelativeDataPath(),
path,
read_settings,
write_settings,
/*save_metadata_callback=*/{},
params);
}
std::pair<MergeTreeData::MutableDataPartPtr, scope_guard> cloneSourcePart(
MergeTreeData * merge_tree_data,
const MergeTreeData::DataPartPtr & src_part,
const StorageMetadataPtr & metadata_snapshot,
const MergeTreePartInfo & dst_part_info,
const String & tmp_part_prefix,
const ReadSettings & read_settings,
const WriteSettings & write_settings,
const DB::IDataPartStorage::ClonePartParams & params)
{
const auto dst_part_name = src_part->getNewName(dst_part_info);
const auto tmp_dst_part_name = tmp_part_prefix + dst_part_name;
auto temporary_directory_lock = merge_tree_data->getTemporaryPartDirectoryHolder(tmp_dst_part_name);
src_part->getDataPartStorage().reserve(src_part->getBytesOnDisk());
scope_guard src_flushed_tmp_dir_lock;
MergeTreeData::MutableDataPartPtr src_flushed_tmp_part;
auto src_part_storage = flushPartStorageToDiskIfInMemory(
merge_tree_data, src_part, metadata_snapshot, tmp_part_prefix, tmp_dst_part_name, src_flushed_tmp_dir_lock, src_flushed_tmp_part);
auto dst_part_storage = hardlinkAllFiles(merge_tree_data, read_settings, write_settings, src_part_storage, tmp_dst_part_name, params);
if (params.metadata_version_to_write.has_value())
{
chassert(!params.keep_metadata_version);
auto out_metadata = dst_part_storage->writeFile(
IMergeTreeDataPart::METADATA_VERSION_FILE_NAME, 4096, merge_tree_data->getContext()->getWriteSettings());
writeText(metadata_snapshot->getMetadataVersion(), *out_metadata);
out_metadata->finalize();
if (merge_tree_data->getSettings()->fsync_after_insert)
out_metadata->sync();
}
LOG_DEBUG(
log,
"Clone {} part {} to {}{}",
src_flushed_tmp_part ? "flushed" : "",
src_part_storage->getFullPath(),
std::string(fs::path(dst_part_storage->getFullRootPath()) / tmp_dst_part_name),
false);
auto part = MergeTreeDataPartBuilder(*merge_tree_data, dst_part_name, dst_part_storage).withPartFormatFromDisk().build();
return std::make_pair(part, std::move(temporary_directory_lock));
}
void handleHardLinkedParameterFiles(const MergeTreeData::DataPartPtr & src_part, const DB::IDataPartStorage::ClonePartParams & params)
{
const auto & hardlinked_files = params.hardlinked_files;
hardlinked_files->source_part_name = src_part->name;
hardlinked_files->source_table_shared_id = src_part->storage.getTableSharedID();
for (auto it = src_part->getDataPartStorage().iterate(); it->isValid(); it->next())
{
if (!params.files_to_copy_instead_of_hardlinks.contains(it->name())
&& it->name() != IMergeTreeDataPart::DELETE_ON_DESTROY_MARKER_FILE_NAME_DEPRECATED
&& it->name() != IMergeTreeDataPart::TXN_VERSION_METADATA_FILE_NAME)
{
hardlinked_files->hardlinks_from_source_part.insert(it->name());
}
}
}
void handleProjections(const MergeTreeData::DataPartPtr & src_part, const DB::IDataPartStorage::ClonePartParams & params)
{
auto projections = src_part->getProjectionParts();
for (const auto & [name, projection_part] : projections)
{
const auto & projection_storage = projection_part->getDataPartStorage();
for (auto it = projection_storage.iterate(); it->isValid(); it->next())
{
auto file_name_with_projection_prefix = fs::path(projection_storage.getPartDirectory()) / it->name();
if (!params.files_to_copy_instead_of_hardlinks.contains(file_name_with_projection_prefix)
&& it->name() != IMergeTreeDataPart::DELETE_ON_DESTROY_MARKER_FILE_NAME_DEPRECATED
&& it->name() != IMergeTreeDataPart::TXN_VERSION_METADATA_FILE_NAME)
{
params.hardlinked_files->hardlinks_from_source_part.insert(file_name_with_projection_prefix);
}
}
}
}
MergeTreeData::MutableDataPartPtr finalizePart(
const MergeTreeData::MutableDataPartPtr & dst_part, const DB::IDataPartStorage::ClonePartParams & params, bool require_part_metadata)
{
/// We should write version metadata on part creation to distinguish it from parts that were created without transaction.
TransactionID tid = params.txn ? params.txn->tid : Tx::PrehistoricTID;
dst_part->version.setCreationTID(tid, nullptr);
dst_part->storeVersionMetadata();
dst_part->is_temp = true;
dst_part->loadColumnsChecksumsIndexes(require_part_metadata, true);
dst_part->modification_time = dst_part->getDataPartStorage().getLastModified().epochTime();
return dst_part;
}
std::pair<MergeTreeDataPartCloner::MutableDataPartPtr, scope_guard> cloneAndHandleHardlinksAndProjections(
MergeTreeData * merge_tree_data,
const DataPartPtr & src_part,
const StorageMetadataPtr & metadata_snapshot,
const MergeTreePartInfo & dst_part_info,
const String & tmp_part_prefix,
const ReadSettings & read_settings,
const WriteSettings & write_settings,
const IDataPartStorage::ClonePartParams & params)
{
if (!doesStoragePolicyAllowSameDisk(merge_tree_data, src_part))
throw Exception(
ErrorCodes::BAD_ARGUMENTS,
"Could not clone and load part {} because disk does not belong to storage policy",
quoteString(src_part->getDataPartStorage().getFullPath()));
auto [destination_part, temporary_directory_lock] = cloneSourcePart(
merge_tree_data, src_part, metadata_snapshot, dst_part_info, tmp_part_prefix, read_settings, write_settings, params);
if (!params.copy_instead_of_hardlink && params.hardlinked_files)
{
handleHardLinkedParameterFiles(src_part, params);
handleProjections(src_part, params);
}
return std::make_pair(destination_part, std::move(temporary_directory_lock));
}
}
std::pair<MergeTreeDataPartCloner::MutableDataPartPtr, scope_guard> MergeTreeDataPartCloner::clone(
MergeTreeData * merge_tree_data,
const DataPartPtr & src_part,
const StorageMetadataPtr & metadata_snapshot,
const MergeTreePartInfo & dst_part_info,
const String & tmp_part_prefix,
bool require_part_metadata,
const IDataPartStorage::ClonePartParams & params,
const ReadSettings & read_settings,
const WriteSettings & write_settings)
{
auto [destination_part, temporary_directory_lock] = cloneAndHandleHardlinksAndProjections(
merge_tree_data, src_part, metadata_snapshot, dst_part_info, tmp_part_prefix, read_settings, write_settings, params);
return std::make_pair(finalizePart(destination_part, params, require_part_metadata), std::move(temporary_directory_lock));
}
std::pair<MergeTreeDataPartCloner::MutableDataPartPtr, scope_guard> MergeTreeDataPartCloner::cloneWithDistinctPartitionExpression(
MergeTreeData * merge_tree_data,
const DataPartPtr & src_part,
const StorageMetadataPtr & metadata_snapshot,
const MergeTreePartInfo & dst_part_info,
const String & tmp_part_prefix,
const ReadSettings & read_settings,
const WriteSettings & write_settings,
const MergeTreePartition & new_partition,
const IMergeTreeDataPart::MinMaxIndex & new_min_max_index,
bool sync_new_files,
const IDataPartStorage::ClonePartParams & params)
{
auto [destination_part, temporary_directory_lock] = cloneAndHandleHardlinksAndProjections(
merge_tree_data, src_part, metadata_snapshot, dst_part_info, tmp_part_prefix, read_settings, write_settings, params);
DistinctPartitionExpression::updateNewPartFiles(
*merge_tree_data, destination_part, new_partition, new_min_max_index, src_part->storage.getInMemoryMetadataPtr(), sync_new_files);
return std::make_pair(finalizePart(destination_part, params, false), std::move(temporary_directory_lock));
}
}

View File

@ -1,43 +0,0 @@
#pragma once
namespace DB
{
struct StorageInMemoryMetadata;
using StorageMetadataPtr = std::shared_ptr<const StorageInMemoryMetadata>;
struct MergeTreePartition;
class IMergeTreeDataPart;
class MergeTreeDataPartCloner
{
public:
using DataPart = IMergeTreeDataPart;
using MutableDataPartPtr = std::shared_ptr<DataPart>;
using DataPartPtr = std::shared_ptr<const DataPart>;
static std::pair<MutableDataPartPtr, scope_guard> clone(
MergeTreeData * merge_tree_data,
const DataPartPtr & src_part,
const StorageMetadataPtr & metadata_snapshot,
const MergeTreePartInfo & dst_part_info,
const String & tmp_part_prefix,
bool require_part_metadata,
const IDataPartStorage::ClonePartParams & params,
const ReadSettings & read_settings,
const WriteSettings & write_settings);
static std::pair<MutableDataPartPtr, scope_guard> cloneWithDistinctPartitionExpression(
MergeTreeData * merge_tree_data,
const DataPartPtr & src_part,
const StorageMetadataPtr & metadata_snapshot,
const MergeTreePartInfo & dst_part_info,
const String & tmp_part_prefix,
const ReadSettings & read_settings,
const WriteSettings & write_settings,
const MergeTreePartition & new_partition,
const IMergeTreeDataPart::MinMaxIndex & new_min_max_index,
bool sync_new_files,
const IDataPartStorage::ClonePartParams & params);
};
}

View File

@ -7,6 +7,7 @@
#include <Columns/ColumnSparse.h> #include <Columns/ColumnSparse.h>
#include <Common/logger_useful.h> #include <Common/logger_useful.h>
#include <Storages/BlockNumberColumn.h> #include <Storages/BlockNumberColumn.h>
#include <Storages/ColumnsDescription.h>
namespace DB namespace DB
{ {
@ -143,13 +144,22 @@ void MergeTreeDataPartWriterWide::addStreams(
auto ast = parseQuery(codec_parser, "(" + Poco::toUpper(settings.marks_compression_codec) + ")", 0, DBMS_DEFAULT_MAX_PARSER_DEPTH); auto ast = parseQuery(codec_parser, "(" + Poco::toUpper(settings.marks_compression_codec) + ")", 0, DBMS_DEFAULT_MAX_PARSER_DEPTH);
CompressionCodecPtr marks_compression_codec = CompressionCodecFactory::instance().get(ast, nullptr); CompressionCodecPtr marks_compression_codec = CompressionCodecFactory::instance().get(ast, nullptr);
const auto column_desc = metadata_snapshot->columns.tryGetColumnDescription(GetColumnsOptions(GetColumnsOptions::AllPhysical), column.getNameInStorage());
UInt64 max_compress_block_size = 0;
if (column_desc)
if (const auto * value = column_desc->settings.tryGet("max_compress_block_size"))
max_compress_block_size = value->safeGet<UInt64>();
if (!max_compress_block_size)
max_compress_block_size = settings.max_compress_block_size;
column_streams[stream_name] = std::make_unique<Stream<false>>( column_streams[stream_name] = std::make_unique<Stream<false>>(
stream_name, stream_name,
data_part->getDataPartStoragePtr(), data_part->getDataPartStoragePtr(),
stream_name, DATA_FILE_EXTENSION, stream_name, DATA_FILE_EXTENSION,
stream_name, marks_file_extension, stream_name, marks_file_extension,
compression_codec, compression_codec,
settings.max_compress_block_size, max_compress_block_size,
marks_compression_codec, marks_compression_codec,
settings.marks_compress_block_size, settings.marks_compress_block_size,
settings.query_write_settings); settings.query_write_settings);
@ -323,6 +333,13 @@ StreamsWithMarks MergeTreeDataPartWriterWide::getCurrentMarksForColumn(
WrittenOffsetColumns & offset_columns) WrittenOffsetColumns & offset_columns)
{ {
StreamsWithMarks result; StreamsWithMarks result;
const auto column_desc = metadata_snapshot->columns.tryGetColumnDescription(GetColumnsOptions(GetColumnsOptions::AllPhysical), column.getNameInStorage());
UInt64 min_compress_block_size = 0;
if (column_desc)
if (const auto * value = column_desc->settings.tryGet("min_compress_block_size"))
min_compress_block_size = value->safeGet<UInt64>();
if (!min_compress_block_size)
min_compress_block_size = settings.min_compress_block_size;
data_part->getSerialization(column.name)->enumerateStreams([&] (const ISerialization::SubstreamPath & substream_path) data_part->getSerialization(column.name)->enumerateStreams([&] (const ISerialization::SubstreamPath & substream_path)
{ {
bool is_offsets = !substream_path.empty() && substream_path.back().type == ISerialization::Substream::ArraySizes; bool is_offsets = !substream_path.empty() && substream_path.back().type == ISerialization::Substream::ArraySizes;
@ -335,7 +352,7 @@ StreamsWithMarks MergeTreeDataPartWriterWide::getCurrentMarksForColumn(
auto & stream = *column_streams[stream_name]; auto & stream = *column_streams[stream_name];
/// There could already be enough data to compress into the new block. /// There could already be enough data to compress into the new block.
if (stream.compressed_hashing.offset() >= settings.min_compress_block_size) if (stream.compressed_hashing.offset() >= min_compress_block_size)
stream.compressed_hashing.next(); stream.compressed_hashing.next();
StreamNameAndMark stream_with_mark; StreamNameAndMark stream_with_mark;

View File

@ -2,6 +2,7 @@
#include <Storages/MergeTree/MergedBlockOutputStream.h> #include <Storages/MergeTree/MergedBlockOutputStream.h>
#include <Storages/MergeTree/DataPartStorageOnDiskFull.h> #include <Storages/MergeTree/DataPartStorageOnDiskFull.h>
#include <Columns/ColumnConst.h> #include <Columns/ColumnConst.h>
#include <Common/OpenTelemetryTraceContext.h>
#include <Common/HashTable/HashMap.h> #include <Common/HashTable/HashMap.h>
#include <Common/Exception.h> #include <Common/Exception.h>
#include <Disks/createVolume.h> #include <Disks/createVolume.h>
@ -314,8 +315,13 @@ Block MergeTreeDataWriter::mergeBlock(
IColumn::Permutation *& permutation, IColumn::Permutation *& permutation,
const MergeTreeData::MergingParams & merging_params) const MergeTreeData::MergingParams & merging_params)
{ {
OpenTelemetry::SpanHolder span("MergeTreeDataWriter::mergeBlock");
size_t block_size = block.rows(); size_t block_size = block.rows();
span.addAttribute("clickhouse.rows", block_size);
span.addAttribute("clickhouse.columns", block.columns());
auto get_merging_algorithm = [&]() -> std::shared_ptr<IMergingAlgorithm> auto get_merging_algorithm = [&]() -> std::shared_ptr<IMergingAlgorithm>
{ {
switch (merging_params.mode) switch (merging_params.mode)
@ -351,6 +357,8 @@ Block MergeTreeDataWriter::mergeBlock(
if (!merging_algorithm) if (!merging_algorithm)
return block; return block;
span.addAttribute("clickhouse.merging_algorithm", merging_algorithm->getName());
Chunk chunk(block.getColumns(), block_size); Chunk chunk(block.getColumns(), block_size);
IMergingAlgorithm::Input input; IMergingAlgorithm::Input input;

View File

@ -467,45 +467,6 @@ void MergeTreePartition::create(const StorageMetadataPtr & metadata_snapshot, Bl
} }
} }
void MergeTreePartition::createAndValidateMinMaxPartitionIds(
const StorageMetadataPtr & metadata_snapshot, Block block_with_min_max_partition_ids, ContextPtr context)
{
if (!metadata_snapshot->hasPartitionKey())
return;
auto partition_key_names_and_types = executePartitionByExpression(metadata_snapshot, block_with_min_max_partition_ids, context);
value.resize(partition_key_names_and_types.size());
/// Executing partition_by expression adds new columns to passed block according to partition functions.
/// The block is passed by reference and is used afterwards. `moduloLegacy` needs to be substituted back
/// with just `modulo`, because it was a temporary substitution.
static constexpr std::string_view modulo_legacy_function_name = "moduloLegacy";
size_t i = 0;
for (const auto & element : partition_key_names_and_types)
{
auto & partition_column = block_with_min_max_partition_ids.getByName(element.name);
if (element.name.starts_with(modulo_legacy_function_name))
partition_column.name.replace(0, modulo_legacy_function_name.size(), "modulo");
Field extracted_min_partition_id_field;
Field extracted_max_partition_id_field;
partition_column.column->get(0, extracted_min_partition_id_field);
partition_column.column->get(1, extracted_max_partition_id_field);
if (extracted_min_partition_id_field != extracted_max_partition_id_field)
{
throw Exception(
ErrorCodes::INVALID_PARTITION_VALUE,
"Can not create the partition. A partition can not contain values that have different partition ids");
}
partition_column.column->get(0u, value[i++]);
}
}
NamesAndTypesList MergeTreePartition::executePartitionByExpression(const StorageMetadataPtr & metadata_snapshot, Block & block, ContextPtr context) NamesAndTypesList MergeTreePartition::executePartitionByExpression(const StorageMetadataPtr & metadata_snapshot, Block & block, ContextPtr context)
{ {
auto adjusted_partition_key = adjustPartitionKey(metadata_snapshot, context); auto adjusted_partition_key = adjustPartitionKey(metadata_snapshot, context);

View File

@ -1,12 +1,11 @@
#pragma once #pragma once
#include <Core/Field.h> #include <base/types.h>
#include <Disks/IDisk.h> #include <Disks/IDisk.h>
#include <IO/WriteBuffer.h> #include <IO/WriteBuffer.h>
#include <Storages/KeyDescription.h> #include <Storages/KeyDescription.h>
#include <Storages/MergeTree/IPartMetadataManager.h> #include <Storages/MergeTree/IPartMetadataManager.h>
#include <Storages/MergeTree/PartMetadataManagerOrdinary.h> #include <Core/Field.h>
#include <base/types.h>
namespace DB namespace DB
{ {
@ -52,11 +51,6 @@ public:
void create(const StorageMetadataPtr & metadata_snapshot, Block block, size_t row, ContextPtr context); void create(const StorageMetadataPtr & metadata_snapshot, Block block, size_t row, ContextPtr context);
/// Copy of MergeTreePartition::create, but also validates if min max partition keys are equal. If they are different,
/// it means the partition can't be created because the data doesn't belong to the same partition.
void createAndValidateMinMaxPartitionIds(
const StorageMetadataPtr & metadata_snapshot, Block block_with_min_max_partition_ids, ContextPtr context);
static void appendFiles(const MergeTreeData & storage, Strings & files); static void appendFiles(const MergeTreeData & storage, Strings & files);
/// Adjust partition key and execute its expression on block. Return sample block according to used expression. /// Adjust partition key and execute its expression on block. Return sample block according to used expression.

View File

@ -1,91 +0,0 @@
#include <Interpreters/MonotonicityCheckVisitor.h>
#include <Interpreters/getTableExpressions.h>
#include <Storages/MergeTree/MergeTreeData.h>
#include <Storages/MergeTree/MergeTreePartitionCompatibilityVerifier.h>
#include <Storages/MergeTree/MergeTreePartitionGlobalMinMaxIdxCalculator.h>
namespace DB
{
namespace ErrorCodes
{
extern const int BAD_ARGUMENTS;
}
namespace
{
bool isDestinationPartitionExpressionMonotonicallyIncreasing(
const std::vector<Range> & hyperrectangle, const MergeTreeData & destination_storage)
{
auto destination_table_metadata = destination_storage.getInMemoryMetadataPtr();
auto key_description = destination_table_metadata->getPartitionKey();
auto definition_ast = key_description.definition_ast->clone();
auto table_identifier = std::make_shared<ASTIdentifier>(destination_storage.getStorageID().getTableName());
auto table_with_columns
= TableWithColumnNamesAndTypes{DatabaseAndTableWithAlias(table_identifier), destination_table_metadata->getColumns().getOrdinary()};
auto expression_list = extractKeyExpressionList(definition_ast);
MonotonicityCheckVisitor::Data data{{table_with_columns}, destination_storage.getContext(), /*group_by_function_hashes*/ {}};
for (auto i = 0u; i < expression_list->children.size(); i++)
{
data.range = hyperrectangle[i];
MonotonicityCheckVisitor(data).visit(expression_list->children[i]);
if (!data.monotonicity.is_monotonic || !data.monotonicity.is_positive)
return false;
}
return true;
}
bool isExpressionDirectSubsetOf(const ASTPtr source, const ASTPtr destination)
{
auto source_expression_list = extractKeyExpressionList(source);
auto destination_expression_list = extractKeyExpressionList(destination);
std::unordered_set<std::string> source_columns;
for (auto i = 0u; i < source_expression_list->children.size(); ++i)
source_columns.insert(source_expression_list->children[i]->getColumnName());
for (auto i = 0u; i < destination_expression_list->children.size(); ++i)
if (!source_columns.contains(destination_expression_list->children[i]->getColumnName()))
return false;
return true;
}
}
void MergeTreePartitionCompatibilityVerifier::verify(
const MergeTreeData & source_storage, const MergeTreeData & destination_storage, const DataPartsVector & source_parts)
{
const auto source_metadata = source_storage.getInMemoryMetadataPtr();
const auto destination_metadata = destination_storage.getInMemoryMetadataPtr();
const auto source_partition_key_ast = source_metadata->getPartitionKeyAST();
const auto destination_partition_key_ast = destination_metadata->getPartitionKeyAST();
// If destination partition expression columns are a subset of source partition expression columns,
// there is no need to check for monotonicity.
if (isExpressionDirectSubsetOf(source_partition_key_ast, destination_partition_key_ast))
return;
const auto src_global_min_max_indexes = MergeTreePartitionGlobalMinMaxIdxCalculator::calculate(source_parts, destination_storage);
assert(!src_global_min_max_indexes.hyperrectangle.empty());
if (!isDestinationPartitionExpressionMonotonicallyIncreasing(src_global_min_max_indexes.hyperrectangle, destination_storage))
throw DB::Exception(ErrorCodes::BAD_ARGUMENTS, "Destination table partition expression is not monotonically increasing");
MergeTreePartition().createAndValidateMinMaxPartitionIds(
destination_storage.getInMemoryMetadataPtr(),
src_global_min_max_indexes.getBlock(destination_storage),
destination_storage.getContext());
}
}

View File

@ -1,30 +0,0 @@
#pragma once
#include <Core/Field.h>
#include <Storages/MergeTree/IMergeTreeDataPart.h>
namespace DB
{
/*
* Verifies that source and destination partitions are compatible.
* To be compatible, one of the following criteria must be met:
* 1. Destination partition expression columns are a subset of source partition columns; or
* 2. Destination partition expression is monotonic on the source global min_max idx Range AND the computer partition id for
* the source global min_max idx range is the same.
*
* If not, an exception is thrown.
* */
class MergeTreePartitionCompatibilityVerifier
{
public:
using DataPart = IMergeTreeDataPart;
using DataPartPtr = std::shared_ptr<const DataPart>;
using DataPartsVector = std::vector<DataPartPtr>;
static void
verify(const MergeTreeData & source_storage, const MergeTreeData & destination_storage, const DataPartsVector & source_parts);
};
}

View File

@ -1,25 +0,0 @@
#include <Storages/MergeTree/MergeTreePartitionGlobalMinMaxIdxCalculator.h>
namespace DB
{
IMergeTreeDataPart::MinMaxIndex
MergeTreePartitionGlobalMinMaxIdxCalculator::calculate(const DataPartsVector & parts, const MergeTreeData & storage)
{
IMergeTreeDataPart::MinMaxIndex global_min_max_indexes;
for (const auto & part : parts)
{
auto metadata_manager = std::make_shared<PartMetadataManagerOrdinary>(part.get());
auto local_min_max_index = MergeTreeData::DataPart::MinMaxIndex();
local_min_max_index.load(storage, metadata_manager);
global_min_max_indexes.merge(local_min_max_index);
}
return global_min_max_indexes;
}
}

View File

@ -1,24 +0,0 @@
#pragma once
#include <utility>
#include <Core/Field.h>
#include <Storages/MergeTree/MergeTreeData.h>
namespace DB
{
/*
* Calculates global min max indexes for a given set of parts on given storage.
* */
class MergeTreePartitionGlobalMinMaxIdxCalculator
{
using DataPart = IMergeTreeDataPart;
using DataPartPtr = std::shared_ptr<const DataPart>;
using DataPartsVector = std::vector<DataPartPtr>;
public:
static IMergeTreeDataPart::MinMaxIndex calculate(const DataPartsVector & parts, const MergeTreeData & storage);
};
}

View File

@ -213,6 +213,27 @@ void MergeTreeSettings::sanityCheck(size_t background_pool_tasks) const
} }
} }
void MergeTreeColumnSettings::validate(const SettingsChanges & changes)
{
static const MergeTreeSettings merge_tree_settings;
static const std::set<String> allowed_column_level_settings =
{
"min_compress_block_size",
"max_compress_block_size"
};
for (const auto & change : changes)
{
if (!allowed_column_level_settings.contains(change.name))
throw Exception(
ErrorCodes::UNKNOWN_SETTING,
"Setting {} is unknown or not supported at column level, supported settings: {}",
change.name,
fmt::join(allowed_column_level_settings, ", "));
merge_tree_settings.checkCanSet(change.name, change.value);
}
}
std::vector<String> MergeTreeSettings::getAllRegisteredNames() const std::vector<String> MergeTreeSettings::getAllRegisteredNames() const
{ {

View File

@ -277,4 +277,11 @@ struct MergeTreeSettings : public BaseSettings<MergeTreeSettingsTraits>, public
using MergeTreeSettingsPtr = std::shared_ptr<const MergeTreeSettings>; using MergeTreeSettingsPtr = std::shared_ptr<const MergeTreeSettings>;
/// Column-level Merge-Tree settings which overwrite MergeTree settings
namespace MergeTreeColumnSettings
{
void validate(const SettingsChanges & changes);
}
} }

View File

@ -5,9 +5,9 @@
#include <optional> #include <optional>
#include <ranges> #include <ranges>
#include <base/sort.h>
#include <Backups/BackupEntriesCollector.h> #include <Backups/BackupEntriesCollector.h>
#include <Databases/IDatabase.h> #include <Databases/IDatabase.h>
#include <IO/copyData.h>
#include "Common/Exception.h" #include "Common/Exception.h"
#include <Common/MemoryTracker.h> #include <Common/MemoryTracker.h>
#include <Common/escapeForFileName.h> #include <Common/escapeForFileName.h>
@ -20,30 +20,27 @@
#include <Interpreters/TransactionLog.h> #include <Interpreters/TransactionLog.h>
#include <Interpreters/ClusterProxy/executeQuery.h> #include <Interpreters/ClusterProxy/executeQuery.h>
#include <Interpreters/ClusterProxy/SelectStreamFactory.h> #include <Interpreters/ClusterProxy/SelectStreamFactory.h>
#include <Interpreters/InterpreterAlterQuery.h>
#include <Interpreters/InterpreterSelectQueryAnalyzer.h> #include <Interpreters/InterpreterSelectQueryAnalyzer.h>
#include <IO/copyData.h>
#include <Parsers/ASTCheckQuery.h> #include <Parsers/ASTCheckQuery.h>
#include <Parsers/ASTFunction.h> #include <Parsers/ASTFunction.h>
#include <Parsers/ASTLiteral.h> #include <Parsers/ASTLiteral.h>
#include <Parsers/ASTPartition.h> #include <Parsers/ASTPartition.h>
#include <Parsers/ASTSetQuery.h> #include <Parsers/ASTSetQuery.h>
#include <Parsers/formatAST.h>
#include <Parsers/queryToString.h> #include <Parsers/queryToString.h>
#include <Parsers/formatAST.h>
#include <Planner/Utils.h> #include <Planner/Utils.h>
#include <Storages/buildQueryTreeForShard.h>
#include <Storages/MergeTree/MergeTreeData.h> #include <Storages/MergeTree/MergeTreeData.h>
#include <Storages/MergeTree/ActiveDataPartSet.h> #include <Storages/MergeTree/ActiveDataPartSet.h>
#include <Storages/AlterCommands.h> #include <Storages/AlterCommands.h>
#include <Storages/MergeTree/MergeList.h>
#include <Storages/MergeTree/MergePlainMergeTreeTask.h>
#include <Storages/MergeTree/MergeTreeDataPartInMemory.h>
#include <Storages/MergeTree/MergeTreePartitionCompatibilityVerifier.h>
#include <Storages/MergeTree/MergeTreeSink.h>
#include <Storages/MergeTree/PartMetadataManagerOrdinary.h>
#include <Storages/MergeTree/PartitionPruner.h>
#include <Storages/MergeTree/checkDataPart.h>
#include <Storages/PartitionCommands.h> #include <Storages/PartitionCommands.h>
#include <base/sort.h> #include <Storages/MergeTree/MergeTreeSink.h>
#include <Storages/buildQueryTreeForShard.h> #include <Storages/MergeTree/MergeTreeDataPartInMemory.h>
#include <Storages/MergeTree/MergePlainMergeTreeTask.h>
#include <Storages/MergeTree/PartitionPruner.h>
#include <Storages/MergeTree/MergeList.h>
#include <Storages/MergeTree/checkDataPart.h>
#include <QueryPipeline/Pipe.h> #include <QueryPipeline/Pipe.h>
#include <Processors/QueryPlan/QueryPlan.h> #include <Processors/QueryPlan/QueryPlan.h>
#include <Processors/QueryPlan/BuildQueryPipelineSettings.h> #include <Processors/QueryPlan/BuildQueryPipelineSettings.h>
@ -218,16 +215,25 @@ void StorageMergeTree::read(
{ {
if (local_context->canUseParallelReplicasOnInitiator() && local_context->getSettingsRef().parallel_replicas_for_non_replicated_merge_tree) if (local_context->canUseParallelReplicasOnInitiator() && local_context->getSettingsRef().parallel_replicas_for_non_replicated_merge_tree)
{ {
const auto table_id = getStorageID(); ASTPtr modified_query_ast;
const auto & modified_query_ast = ClusterProxy::rewriteSelectQuery(
local_context, query_info.query,
table_id.database_name, table_id.table_name, /*remote_table_function_ptr*/nullptr);
Block header; Block header;
if (local_context->getSettingsRef().allow_experimental_analyzer) if (local_context->getSettingsRef().allow_experimental_analyzer)
header = InterpreterSelectQueryAnalyzer::getSampleBlock(modified_query_ast, local_context, SelectQueryOptions(processed_stage).analyze()); {
QueryTreeNodePtr modified_query_tree = query_info.query_tree->clone();
rewriteJoinToGlobalJoin(modified_query_tree);
modified_query_tree = buildQueryTreeForShard(query_info, modified_query_tree);
header = InterpreterSelectQueryAnalyzer::getSampleBlock(
modified_query_tree, local_context, SelectQueryOptions(processed_stage).analyze());
modified_query_ast = queryNodeToSelectQuery(modified_query_tree);
}
else else
header = InterpreterSelectQuery(modified_query_ast, local_context, SelectQueryOptions(processed_stage).analyze()).getSampleBlock(); {
const auto table_id = getStorageID();
modified_query_ast = ClusterProxy::rewriteSelectQuery(local_context, query_info.query,
table_id.database_name, table_id.table_name, /*remote_table_function_ptr*/nullptr);
header
= InterpreterSelectQuery(modified_query_ast, local_context, SelectQueryOptions(processed_stage).analyze()).getSampleBlock();
}
ClusterProxy::SelectStreamFactory select_stream_factory = ClusterProxy::SelectStreamFactory select_stream_factory =
ClusterProxy::SelectStreamFactory( ClusterProxy::SelectStreamFactory(
@ -238,7 +244,6 @@ void StorageMergeTree::read(
ClusterProxy::executeQueryWithParallelReplicas( ClusterProxy::executeQueryWithParallelReplicas(
query_plan, query_plan,
getStorageID(),
select_stream_factory, select_stream_factory,
modified_query_ast, modified_query_ast,
local_context, local_context,
@ -2044,74 +2049,42 @@ void StorageMergeTree::replacePartitionFrom(const StoragePtr & source_table, con
ProfileEventsScope profile_events_scope; ProfileEventsScope profile_events_scope;
MergeTreeData & src_data = checkStructureAndGetMergeTreeData(source_table, source_metadata_snapshot, my_metadata_snapshot); MergeTreeData & src_data = checkStructureAndGetMergeTreeData(source_table, source_metadata_snapshot, my_metadata_snapshot);
String partition_id = src_data.getPartitionIDFromQuery(partition, local_context); String partition_id = getPartitionIDFromQuery(partition, local_context);
DataPartsVector src_parts = src_data.getVisibleDataPartsVectorInPartition(local_context, partition_id); DataPartsVector src_parts = src_data.getVisibleDataPartsVectorInPartition(local_context, partition_id);
bool attach_empty_partition = !replace && src_parts.empty();
if (attach_empty_partition)
return;
MutableDataPartsVector dst_parts; MutableDataPartsVector dst_parts;
std::vector<scope_guard> dst_parts_locks; std::vector<scope_guard> dst_parts_locks;
static const String TMP_PREFIX = "tmp_replace_from_"; static const String TMP_PREFIX = "tmp_replace_from_";
const auto my_partition_expression = my_metadata_snapshot->getPartitionKeyAST(); for (const DataPartPtr & src_part : src_parts)
const auto src_partition_expression = source_metadata_snapshot->getPartitionKeyAST();
const auto is_partition_exp_different = queryToStringNullable(my_partition_expression) != queryToStringNullable(src_partition_expression);
if (is_partition_exp_different && !src_parts.empty())
MergeTreePartitionCompatibilityVerifier::verify(src_data, /* destination_storage */ *this, src_parts);
for (DataPartPtr & src_part : src_parts)
{ {
if (!canReplacePartition(src_part)) if (!canReplacePartition(src_part))
throw Exception(ErrorCodes::BAD_ARGUMENTS, throw Exception(ErrorCodes::BAD_ARGUMENTS,
"Cannot replace partition '{}' because part '{}' has inconsistent granularity with table", "Cannot replace partition '{}' because part '{}' has inconsistent granularity with table",
partition_id, src_part->name); partition_id, src_part->name);
IDataPartStorage::ClonePartParams clone_params{.txn = local_context->getCurrentTransaction()};
/// This will generate unique name in scope of current server process. /// This will generate unique name in scope of current server process.
auto index = insert_increment.get(); Int64 temp_index = insert_increment.get();
MergeTreePartInfo dst_part_info(partition_id, temp_index, temp_index, src_part->info.level);
if (is_partition_exp_different) IDataPartStorage::ClonePartParams clone_params{.txn = local_context->getCurrentTransaction()};
{ auto [dst_part, part_lock] = cloneAndLoadDataPartOnSameDisk(
auto [new_partition, new_min_max_index] = createPartitionAndMinMaxIndexFromSourcePart( src_part,
src_part, my_metadata_snapshot, local_context); TMP_PREFIX,
dst_part_info,
auto [dst_part, part_lock] = cloneAndLoadPartOnSameDiskWithDifferentPartitionKey( my_metadata_snapshot,
src_part, clone_params,
new_partition, local_context->getReadSettings(),
new_partition.getID(*this), local_context->getWriteSettings());
new_min_max_index, dst_parts.emplace_back(std::move(dst_part));
TMP_PREFIX, dst_parts_locks.emplace_back(std::move(part_lock));
my_metadata_snapshot,
clone_params,
local_context,
index,
index);
dst_parts.emplace_back(std::move(dst_part));
dst_parts_locks.emplace_back(std::move(part_lock));
}
else
{
MergeTreePartInfo dst_part_info(partition_id, index, index, src_part->info.level);
auto [dst_part, part_lock] = cloneAndLoadDataPartOnSameDisk(
src_part,
TMP_PREFIX,
dst_part_info,
my_metadata_snapshot,
clone_params,
local_context->getReadSettings(),
local_context->getWriteSettings());
dst_parts.emplace_back(std::move(dst_part));
dst_parts_locks.emplace_back(std::move(part_lock));
}
} }
/// ATTACH empty part set
if (!replace && dst_parts.empty())
return;
MergeTreePartInfo drop_range; MergeTreePartInfo drop_range;
if (replace) if (replace)
{ {

View File

@ -26,21 +26,22 @@
#include <base/sort.h> #include <base/sort.h>
#include <Storages/buildQueryTreeForShard.h>
#include <Storages/AlterCommands.h> #include <Storages/AlterCommands.h>
#include <Storages/ColumnsDescription.h> #include <Storages/ColumnsDescription.h>
#include <Storages/Freeze.h> #include <Storages/Freeze.h>
#include <Storages/MergeTree/AsyncBlockIDsCache.h> #include <Storages/MergeTree/AsyncBlockIDsCache.h>
#include <Storages/MergeTree/DataPartStorageOnDiskFull.h> #include <Storages/MergeTree/DataPartStorageOnDiskFull.h>
#include <Storages/MergeTree/extractZkPathFromCreateQuery.h>
#include <Storages/MergeTree/IMergeTreeDataPart.h> #include <Storages/MergeTree/IMergeTreeDataPart.h>
#include <Storages/MergeTree/LeaderElection.h> #include <Storages/MergeTree/LeaderElection.h>
#include <Storages/MergeTree/MergedBlockOutputStream.h>
#include <Storages/MergeTree/MergeFromLogEntryTask.h> #include <Storages/MergeTree/MergeFromLogEntryTask.h>
#include <Storages/MergeTree/MergeList.h> #include <Storages/MergeTree/MergeList.h>
#include <Storages/MergeTree/MergeTreeBackgroundExecutor.h> #include <Storages/MergeTree/MergeTreeBackgroundExecutor.h>
#include <Storages/MergeTree/MergeTreeDataFormatVersion.h> #include <Storages/MergeTree/MergeTreeDataFormatVersion.h>
#include <Storages/MergeTree/MergeTreePartInfo.h> #include <Storages/MergeTree/MergeTreePartInfo.h>
#include <Storages/MergeTree/MergeTreePartitionCompatibilityVerifier.h>
#include <Storages/MergeTree/MergeTreeReaderCompact.h> #include <Storages/MergeTree/MergeTreeReaderCompact.h>
#include <Storages/MergeTree/MergedBlockOutputStream.h>
#include <Storages/MergeTree/MutateFromLogEntryTask.h> #include <Storages/MergeTree/MutateFromLogEntryTask.h>
#include <Storages/MergeTree/PinnedPartUUIDs.h> #include <Storages/MergeTree/PinnedPartUUIDs.h>
#include <Storages/MergeTree/ReplicatedMergeTreeAddress.h> #include <Storages/MergeTree/ReplicatedMergeTreeAddress.h>
@ -52,11 +53,9 @@
#include <Storages/MergeTree/ReplicatedMergeTreeSink.h> #include <Storages/MergeTree/ReplicatedMergeTreeSink.h>
#include <Storages/MergeTree/ReplicatedMergeTreeTableMetadata.h> #include <Storages/MergeTree/ReplicatedMergeTreeTableMetadata.h>
#include <Storages/MergeTree/ZeroCopyLock.h> #include <Storages/MergeTree/ZeroCopyLock.h>
#include <Storages/MergeTree/extractZkPathFromCreateQuery.h>
#include <Storages/PartitionCommands.h> #include <Storages/PartitionCommands.h>
#include <Storages/StorageReplicatedMergeTree.h> #include <Storages/StorageReplicatedMergeTree.h>
#include <Storages/VirtualColumnUtils.h> #include <Storages/VirtualColumnUtils.h>
#include <Storages/buildQueryTreeForShard.h>
#include <Databases/DatabaseOnDisk.h> #include <Databases/DatabaseOnDisk.h>
#include <Databases/DatabaseReplicated.h> #include <Databases/DatabaseReplicated.h>
@ -2714,48 +2713,16 @@ bool StorageReplicatedMergeTree::executeReplaceRange(LogEntry & entry)
.copy_instead_of_hardlink = storage_settings_ptr->always_use_copy_instead_of_hardlinks || ((our_zero_copy_enabled || source_zero_copy_enabled) && part_desc->src_table_part->isStoredOnRemoteDiskWithZeroCopySupport()), .copy_instead_of_hardlink = storage_settings_ptr->always_use_copy_instead_of_hardlinks || ((our_zero_copy_enabled || source_zero_copy_enabled) && part_desc->src_table_part->isStoredOnRemoteDiskWithZeroCopySupport()),
.metadata_version_to_write = metadata_snapshot->getMetadataVersion() .metadata_version_to_write = metadata_snapshot->getMetadataVersion()
}; };
auto [res_part, temporary_part_lock] = cloneAndLoadDataPartOnSameDisk(
const auto my_partition_expression = metadata_snapshot->getPartitionKeyAST(); part_desc->src_table_part,
const auto src_partition_expression = source_table->getInMemoryMetadataPtr()->getPartitionKeyAST(); TMP_PREFIX + "clone_",
part_desc->new_part_info,
const auto is_partition_exp_different = queryToStringNullable(my_partition_expression) != queryToStringNullable(src_partition_expression); metadata_snapshot,
clone_params,
if (is_partition_exp_different) getContext()->getReadSettings(),
{ getContext()->getWriteSettings());
auto [new_partition, new_min_max_index] = createPartitionAndMinMaxIndexFromSourcePart( part_desc->res_part = std::move(res_part);
part_desc->src_table_part, metadata_snapshot, getContext()); part_desc->temporary_part_lock = std::move(temporary_part_lock);
auto partition_id = new_partition.getID(*this);
auto [res_part, temporary_part_lock] = cloneAndLoadPartOnSameDiskWithDifferentPartitionKey(
part_desc->src_table_part,
new_partition,
partition_id,
new_min_max_index,
TMP_PREFIX + "clone_",
metadata_snapshot,
clone_params,
getContext(),
part_desc->new_part_info.min_block,
part_desc->new_part_info.max_block);
part_desc->res_part = std::move(res_part);
part_desc->temporary_part_lock = std::move(temporary_part_lock);
}
else
{
auto [res_part, temporary_part_lock] = cloneAndLoadDataPartOnSameDisk(
part_desc->src_table_part,
TMP_PREFIX + "clone_",
part_desc->new_part_info,
metadata_snapshot,
clone_params,
getContext()->getReadSettings(),
getContext()->getWriteSettings());
part_desc->res_part = std::move(res_part);
part_desc->temporary_part_lock = std::move(temporary_part_lock);
}
} }
else if (!part_desc->replica.empty()) else if (!part_desc->replica.empty())
{ {
@ -5418,7 +5385,9 @@ void StorageReplicatedMergeTree::readParallelReplicasImpl(
if (local_context->getSettingsRef().allow_experimental_analyzer) if (local_context->getSettingsRef().allow_experimental_analyzer)
{ {
auto modified_query_tree = buildQueryTreeForShard(query_info, query_info.query_tree); QueryTreeNodePtr modified_query_tree = query_info.query_tree->clone();
rewriteJoinToGlobalJoin(modified_query_tree);
modified_query_tree = buildQueryTreeForShard(query_info, modified_query_tree);
header = InterpreterSelectQueryAnalyzer::getSampleBlock( header = InterpreterSelectQueryAnalyzer::getSampleBlock(
modified_query_tree, local_context, SelectQueryOptions(processed_stage).analyze()); modified_query_tree, local_context, SelectQueryOptions(processed_stage).analyze());
@ -5441,7 +5410,6 @@ void StorageReplicatedMergeTree::readParallelReplicasImpl(
ClusterProxy::executeQueryWithParallelReplicas( ClusterProxy::executeQueryWithParallelReplicas(
query_plan, query_plan,
getStorageID(),
select_stream_factory, select_stream_factory,
modified_query_ast, modified_query_ast,
local_context, local_context,
@ -7885,22 +7853,11 @@ void StorageReplicatedMergeTree::replacePartitionFrom(
ProfileEventsScope profile_events_scope; ProfileEventsScope profile_events_scope;
MergeTreeData & src_data = checkStructureAndGetMergeTreeData(source_table, source_metadata_snapshot, metadata_snapshot); MergeTreeData & src_data = checkStructureAndGetMergeTreeData(source_table, source_metadata_snapshot, metadata_snapshot);
String partition_id = src_data.getPartitionIDFromQuery(partition, query_context); String partition_id = getPartitionIDFromQuery(partition, query_context);
/// NOTE: Some covered parts may be missing in src_all_parts if corresponding log entries are not executed yet. /// NOTE: Some covered parts may be missing in src_all_parts if corresponding log entries are not executed yet.
DataPartsVector src_all_parts = src_data.getVisibleDataPartsVectorInPartition(query_context, partition_id); DataPartsVector src_all_parts = src_data.getVisibleDataPartsVectorInPartition(query_context, partition_id);
bool attach_empty_partition = !replace && src_all_parts.empty();
if (attach_empty_partition)
return;
const auto my_partition_expression = metadata_snapshot->getPartitionKeyAST();
const auto src_partition_expression = source_metadata_snapshot->getPartitionKeyAST();
const auto is_partition_exp_different = queryToStringNullable(my_partition_expression) != queryToStringNullable(src_partition_expression);
if (is_partition_exp_different && !src_all_parts.empty())
MergeTreePartitionCompatibilityVerifier::verify(src_data, /* destination_storage */ *this, src_all_parts);
LOG_DEBUG(log, "Cloning {} parts", src_all_parts.size()); LOG_DEBUG(log, "Cloning {} parts", src_all_parts.size());
static const String TMP_PREFIX = "tmp_replace_from_"; static const String TMP_PREFIX = "tmp_replace_from_";
@ -7955,18 +7912,6 @@ void StorageReplicatedMergeTree::replacePartitionFrom(
"Cannot replace partition '{}' because part '{}" "Cannot replace partition '{}' because part '{}"
"' has inconsistent granularity with table", partition_id, src_part->name); "' has inconsistent granularity with table", partition_id, src_part->name);
IMergeTreeDataPart::MinMaxIndex min_max_index = *src_part->minmax_idx;
MergeTreePartition merge_tree_partition = src_part->partition;
if (is_partition_exp_different)
{
auto [new_partition, new_min_max_index] = createPartitionAndMinMaxIndexFromSourcePart(src_part, metadata_snapshot, query_context);
merge_tree_partition = new_partition;
min_max_index = new_min_max_index;
partition_id = merge_tree_partition.getID(*this);
}
String hash_hex = src_part->checksums.getTotalChecksumHex(); String hash_hex = src_part->checksums.getTotalChecksumHex();
const bool is_duplicated_part = replaced_parts.contains(hash_hex); const bool is_duplicated_part = replaced_parts.contains(hash_hex);
replaced_parts.insert(hash_hex); replaced_parts.insert(hash_hex);
@ -7985,52 +7930,27 @@ void StorageReplicatedMergeTree::replacePartitionFrom(
continue; continue;
} }
UInt64 index = lock->getNumber();
MergeTreePartInfo dst_part_info(partition_id, index, index, src_part->info.level);
bool zero_copy_enabled = storage_settings_ptr->allow_remote_fs_zero_copy_replication bool zero_copy_enabled = storage_settings_ptr->allow_remote_fs_zero_copy_replication
|| dynamic_cast<const MergeTreeData *>(source_table.get())->getSettings()->allow_remote_fs_zero_copy_replication; || dynamic_cast<const MergeTreeData *>(source_table.get())->getSettings()->allow_remote_fs_zero_copy_replication;
UInt64 index = lock->getNumber();
IDataPartStorage::ClonePartParams clone_params IDataPartStorage::ClonePartParams clone_params
{ {
.copy_instead_of_hardlink = storage_settings_ptr->always_use_copy_instead_of_hardlinks || (zero_copy_enabled && src_part->isStoredOnRemoteDiskWithZeroCopySupport()), .copy_instead_of_hardlink = storage_settings_ptr->always_use_copy_instead_of_hardlinks || (zero_copy_enabled && src_part->isStoredOnRemoteDiskWithZeroCopySupport()),
.metadata_version_to_write = metadata_snapshot->getMetadataVersion() .metadata_version_to_write = metadata_snapshot->getMetadataVersion()
}; };
auto [dst_part, part_lock] = cloneAndLoadDataPartOnSameDisk(
if (is_partition_exp_different) src_part,
{ TMP_PREFIX,
auto [dst_part, part_lock] = cloneAndLoadPartOnSameDiskWithDifferentPartitionKey( dst_part_info,
src_part, metadata_snapshot,
merge_tree_partition, clone_params,
partition_id, query_context->getReadSettings(),
min_max_index, query_context->getWriteSettings());
TMP_PREFIX,
metadata_snapshot,
clone_params,
query_context,
index,
index);
dst_parts.emplace_back(dst_part);
dst_parts_locks.emplace_back(std::move(part_lock));
}
else
{
MergeTreePartInfo dst_part_info(partition_id, index, index, src_part->info.level);
auto [dst_part, part_lock] = cloneAndLoadDataPartOnSameDisk(
src_part,
TMP_PREFIX,
dst_part_info,
metadata_snapshot,
clone_params,
query_context->getReadSettings(),
query_context->getWriteSettings());
dst_parts.emplace_back(dst_part);
dst_parts_locks.emplace_back(std::move(part_lock));
}
src_parts.emplace_back(src_part); src_parts.emplace_back(src_part);
dst_parts.emplace_back(dst_part);
dst_parts_locks.emplace_back(std::move(part_lock));
ephemeral_locks.emplace_back(std::move(*lock)); ephemeral_locks.emplace_back(std::move(*lock));
block_id_paths.emplace_back(block_id_path); block_id_paths.emplace_back(block_id_path);
part_checksums.emplace_back(hash_hex); part_checksums.emplace_back(hash_hex);

View File

@ -373,11 +373,37 @@ QueryTreeNodePtr buildQueryTreeForShard(SelectQueryInfo & query_info, QueryTreeN
removeGroupingFunctionSpecializations(query_tree_to_modify); removeGroupingFunctionSpecializations(query_tree_to_modify);
// std::cerr << "====================== build 1 \n" << query_tree_to_modify->dumpTree() << std::endl;
createUniqueTableAliases(query_tree_to_modify, nullptr, planner_context->getQueryContext()); createUniqueTableAliases(query_tree_to_modify, nullptr, planner_context->getQueryContext());
// std::cerr << "====================== build 2 \n" << query_tree_to_modify->dumpTree() << std::endl;
return query_tree_to_modify; return query_tree_to_modify;
} }
class RewriteJoinToGlobalJoinVisitor : public InDepthQueryTreeVisitor<RewriteJoinToGlobalJoinVisitor>
{
public:
using Base = InDepthQueryTreeVisitor<RewriteJoinToGlobalJoinVisitor>;
using Base::Base;
void visitImpl(QueryTreeNodePtr & node)
{
if (auto * join_node = node->as<JoinNode>())
join_node->setLocality(JoinLocality::Global);
}
static bool needChildVisit(QueryTreeNodePtr & parent, QueryTreeNodePtr & child)
{
auto * join_node = parent->as<JoinNode>();
if (join_node && join_node->getRightTableExpression() == child)
return false;
return true;
}
};
void rewriteJoinToGlobalJoin(QueryTreeNodePtr query_tree_to_modify)
{
RewriteJoinToGlobalJoinVisitor visitor;
visitor.visit(query_tree_to_modify);
}
} }

View File

@ -12,4 +12,6 @@ using QueryTreeNodePtr = std::shared_ptr<IQueryTreeNode>;
QueryTreeNodePtr buildQueryTreeForShard(SelectQueryInfo & query_info, QueryTreeNodePtr query_tree_to_modify); QueryTreeNodePtr buildQueryTreeForShard(SelectQueryInfo & query_info, QueryTreeNodePtr query_tree_to_modify);
void rewriteJoinToGlobalJoin(QueryTreeNodePtr query_tree_to_modify);
} }

View File

@ -27,8 +27,9 @@
00917_multiple_joins_denny_crane 00917_multiple_joins_denny_crane
02725_agg_projection_resprect_PK 02725_agg_projection_resprect_PK
02763_row_policy_storage_merge_alias 02763_row_policy_storage_merge_alias
02784_parallel_replicas_automatic_decision_join
02818_parameterized_view_with_cte_multiple_usage 02818_parameterized_view_with_cte_multiple_usage
# Check after constants refactoring
02901_parallel_replicas_rollup
# Flaky. Please don't delete them without fixing them: # Flaky. Please don't delete them without fixing them:
01287_max_execution_speed 01287_max_execution_speed
02003_WithMergeableStateAfterAggregationAndLimit_LIMIT_BY_LIMIT_OFFSET 02003_WithMergeableStateAfterAggregationAndLimit_LIMIT_BY_LIMIT_OFFSET

View File

@ -1,17 +0,0 @@
<clickhouse>
<remote_servers>
<test_cluster>
<shard>
<internal_replication>true</internal_replication>
<replica>
<host>replica1</host>
<port>9000</port>
</replica>
<replica>
<host>replica2</host>
<port>9000</port>
</replica>
</shard>
</test_cluster>
</remote_servers>
</clickhouse>

View File

@ -1,214 +0,0 @@
import pytest
from helpers.cluster import ClickHouseCluster
from helpers.test_tools import assert_eq_with_retry
cluster = ClickHouseCluster(__file__)
replica1 = cluster.add_instance(
"replica1", with_zookeeper=True, main_configs=["configs/remote_servers.xml"]
)
replica2 = cluster.add_instance(
"replica2", with_zookeeper=True, main_configs=["configs/remote_servers.xml"]
)
@pytest.fixture(scope="module")
def start_cluster():
try:
cluster.start()
yield cluster
except Exception as ex:
print(ex)
finally:
cluster.shutdown()
def cleanup(nodes):
for node in nodes:
node.query("DROP TABLE IF EXISTS source SYNC")
node.query("DROP TABLE IF EXISTS destination SYNC")
def create_table(node, table_name, replicated):
replica = node.name
engine = (
f"ReplicatedMergeTree('/clickhouse/tables/1/{table_name}', '{replica}')"
if replicated
else "MergeTree()"
)
partition_expression = (
"toYYYYMMDD(timestamp)" if table_name == "source" else "toYYYYMM(timestamp)"
)
node.query_with_retry(
"""
CREATE TABLE {table_name}(timestamp DateTime)
ENGINE = {engine}
ORDER BY tuple() PARTITION BY {partition_expression}
SETTINGS cleanup_delay_period=1, cleanup_delay_period_random_add=1, max_cleanup_delay_period=1;
""".format(
table_name=table_name,
engine=engine,
partition_expression=partition_expression,
)
)
def test_both_replicated(start_cluster):
for node in [replica1, replica2]:
create_table(node, "source", True)
create_table(node, "destination", True)
replica1.query("INSERT INTO source VALUES ('2010-03-02 02:01:01')")
replica1.query("SYSTEM SYNC REPLICA source")
replica1.query("SYSTEM SYNC REPLICA destination")
replica1.query(
f"ALTER TABLE destination ATTACH PARTITION ID '20100302' FROM source"
)
assert_eq_with_retry(
replica1, f"SELECT * FROM destination", "2010-03-02 02:01:01\n"
)
assert_eq_with_retry(
replica1,
f"SELECT * FROM destination",
replica2.query(f"SELECT * FROM destination"),
)
cleanup([replica1, replica2])
def test_only_destination_replicated(start_cluster):
create_table(replica1, "source", False)
create_table(replica1, "destination", True)
create_table(replica2, "destination", True)
replica1.query("INSERT INTO source VALUES ('2010-03-02 02:01:01')")
replica1.query("SYSTEM SYNC REPLICA destination")
replica1.query(
f"ALTER TABLE destination ATTACH PARTITION ID '20100302' FROM source"
)
assert_eq_with_retry(
replica1, f"SELECT * FROM destination", "2010-03-02 02:01:01\n"
)
assert_eq_with_retry(
replica1,
f"SELECT * FROM destination",
replica2.query(f"SELECT * FROM destination"),
)
cleanup([replica1, replica2])
def test_both_replicated_partitioned_to_unpartitioned(start_cluster):
def create_tables(nodes):
for node in nodes:
source_engine = (
f"ReplicatedMergeTree('/clickhouse/tables/1/source', '{node.name}')"
)
node.query(
"""
CREATE TABLE source(timestamp DateTime)
ENGINE = {engine}
ORDER BY tuple() PARTITION BY toYYYYMMDD(timestamp)
SETTINGS cleanup_delay_period=1, cleanup_delay_period_random_add=1, max_cleanup_delay_period=1;
""".format(
engine=source_engine,
)
)
destination_engine = f"ReplicatedMergeTree('/clickhouse/tables/1/destination', '{node.name}')"
node.query(
"""
CREATE TABLE destination(timestamp DateTime)
ENGINE = {engine}
ORDER BY tuple() PARTITION BY tuple()
SETTINGS cleanup_delay_period=1, cleanup_delay_period_random_add=1, max_cleanup_delay_period=1;
""".format(
engine=destination_engine,
)
)
create_tables([replica1, replica2])
replica1.query("INSERT INTO source VALUES ('2010-03-02 02:01:01')")
replica1.query("INSERT INTO source VALUES ('2010-03-03 02:01:01')")
replica1.query("SYSTEM SYNC REPLICA source")
replica1.query("SYSTEM SYNC REPLICA destination")
replica1.query(
f"ALTER TABLE destination ATTACH PARTITION ID '20100302' FROM source"
)
replica1.query(
f"ALTER TABLE destination ATTACH PARTITION ID '20100303' FROM source"
)
assert_eq_with_retry(
replica1,
f"SELECT * FROM destination ORDER BY timestamp",
"2010-03-02 02:01:01\n2010-03-03 02:01:01\n",
)
assert_eq_with_retry(
replica1,
f"SELECT * FROM destination ORDER BY timestamp",
replica2.query(f"SELECT * FROM destination ORDER BY timestamp"),
)
cleanup([replica1, replica2])
def test_both_replicated_different_exp_same_id(start_cluster):
def create_tables(nodes):
for node in nodes:
source_engine = (
f"ReplicatedMergeTree('/clickhouse/tables/1/source', '{node.name}')"
)
node.query(
"""
CREATE TABLE source(a UInt16,b UInt16,c UInt16,extra UInt64,Path String,Time DateTime,Value Float64,Timestamp Int64,sign Int8)
ENGINE = {engine}
ORDER BY tuple() PARTITION BY a % 3
SETTINGS cleanup_delay_period=1, cleanup_delay_period_random_add=1, max_cleanup_delay_period=1;
""".format(
engine=source_engine,
)
)
destination_engine = f"ReplicatedMergeTree('/clickhouse/tables/1/destination', '{node.name}')"
node.query(
"""
CREATE TABLE destination(a UInt16,b UInt16,c UInt16,extra UInt64,Path String,Time DateTime,Value Float64,Timestamp Int64,sign Int8)
ENGINE = {engine}
ORDER BY tuple() PARTITION BY a
SETTINGS cleanup_delay_period=1, cleanup_delay_period_random_add=1, max_cleanup_delay_period=1;
""".format(
engine=destination_engine,
)
)
create_tables([replica1, replica2])
replica1.query(
"INSERT INTO source (a, b, c, extra, sign) VALUES (1, 5, 9, 1000, 1)"
)
replica1.query(
"INSERT INTO source (a, b, c, extra, sign) VALUES (2, 6, 10, 1000, 1)"
)
replica1.query("SYSTEM SYNC REPLICA source")
replica1.query("SYSTEM SYNC REPLICA destination")
replica1.query(f"ALTER TABLE destination ATTACH PARTITION 1 FROM source")
replica1.query(f"ALTER TABLE destination ATTACH PARTITION 2 FROM source")
assert_eq_with_retry(
replica1,
f"SELECT * FROM destination ORDER BY a",
"1\t5\t9\t1000\t\t1970-01-01 00:00:00\t0\t0\t1\n2\t6\t10\t1000\t\t1970-01-01 00:00:00\t0\t0\t1\n",
)
assert_eq_with_retry(
replica1,
f"SELECT * FROM destination ORDER BY a",
replica2.query(f"SELECT * FROM destination ORDER BY a"),
)
cleanup([replica1, replica2])

View File

@ -0,0 +1,6 @@
<clickhouse>
<asynchronous_metrics_update_period_s>60000</asynchronous_metrics_update_period_s>
<asynchronous_heavy_metrics_update_period_s>60000</asynchronous_heavy_metrics_update_period_s>
</clickhouse>

View File

@ -0,0 +1,47 @@
import os
import pytest
import shutil
import time
from helpers.cluster import ClickHouseCluster
# Tests that SYSTEM RELOAD ASYNCHRONOUS METRICS works.
# Config default.xml sets a large refresh interval of asynchronous metrics, so that the periodic updates don't interfere with the manual
# update below.
cluster = ClickHouseCluster(__file__)
node = cluster.add_instance(
"node",
main_configs=["configs/default.xml"],
stay_alive=True,
)
@pytest.fixture(scope="module")
def start_cluster():
try:
cluster.start()
yield cluster
finally:
cluster.shutdown()
SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__))
CONFIG_DIR = os.path.join(SCRIPT_DIR, "configs")
def test_system_reload_async_metrics(start_cluster):
node.query("SYSTEM DROP QUERY CACHE")
res1 = node.query(
"SELECT value FROM system.asynchronous_metrics WHERE metric = 'NumberOfTables'"
)
# create table and test that the table creation is reflected in the asynchronous metrics
node.query("CREATE TABLE tab (col UInt64) ENGINE MergeTree ORDER BY tuple()")
node.query("SYSTEM RELOAD ASYNCHRONOUS METRICS")
res2 = node.query(
"SELECT value FROM system.asynchronous_metrics WHERE metric = 'NumberOfTables'"
)
assert int(res1.rstrip()) + 1 == int(res2.rstrip())

View File

@ -120,6 +120,7 @@ SYSTEM RELOAD DICTIONARY ['SYSTEM RELOAD DICTIONARIES','RELOAD DICTIONARY','RELO
SYSTEM RELOAD MODEL ['SYSTEM RELOAD MODELS','RELOAD MODEL','RELOAD MODELS'] GLOBAL SYSTEM RELOAD SYSTEM RELOAD MODEL ['SYSTEM RELOAD MODELS','RELOAD MODEL','RELOAD MODELS'] GLOBAL SYSTEM RELOAD
SYSTEM RELOAD FUNCTION ['SYSTEM RELOAD FUNCTIONS','RELOAD FUNCTION','RELOAD FUNCTIONS'] GLOBAL SYSTEM RELOAD SYSTEM RELOAD FUNCTION ['SYSTEM RELOAD FUNCTIONS','RELOAD FUNCTION','RELOAD FUNCTIONS'] GLOBAL SYSTEM RELOAD
SYSTEM RELOAD EMBEDDED DICTIONARIES ['RELOAD EMBEDDED DICTIONARIES'] GLOBAL SYSTEM RELOAD SYSTEM RELOAD EMBEDDED DICTIONARIES ['RELOAD EMBEDDED DICTIONARIES'] GLOBAL SYSTEM RELOAD
SYSTEM RELOAD ASYNCHRONOUS METRICS ['RELOAD ASYNCHRONOUS METRICS'] GLOBAL SYSTEM RELOAD
SYSTEM RELOAD [] \N SYSTEM SYSTEM RELOAD [] \N SYSTEM
SYSTEM RESTART DISK ['SYSTEM RESTART DISK'] GLOBAL SYSTEM SYSTEM RESTART DISK ['SYSTEM RESTART DISK'] GLOBAL SYSTEM
SYSTEM MERGES ['SYSTEM STOP MERGES','SYSTEM START MERGES','STOP MERGES','START MERGES'] TABLE SYSTEM SYSTEM MERGES ['SYSTEM STOP MERGES','SYSTEM START MERGES','STOP MERGES','START MERGES'] TABLE SYSTEM

Some files were not shown because too many files have changed in this diff Show More