mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-21 15:12:02 +00:00
Merge branch 'master' into tests/improve-hung-check
This commit is contained in:
commit
2cd0ba7fff
1
.github/workflows/backport_branches.yml
vendored
1
.github/workflows/backport_branches.yml
vendored
@ -683,3 +683,4 @@ jobs:
|
||||
run: |
|
||||
cd "$GITHUB_WORKSPACE/tests/ci"
|
||||
python3 finish_check.py
|
||||
python3 merge_pr.py
|
||||
|
1
.github/workflows/docs_check.yml
vendored
1
.github/workflows/docs_check.yml
vendored
@ -169,3 +169,4 @@ jobs:
|
||||
run: |
|
||||
cd "$GITHUB_WORKSPACE/tests/ci"
|
||||
python3 finish_check.py
|
||||
python3 merge_pr.py --check-approved
|
||||
|
1
.github/workflows/pull_request.yml
vendored
1
.github/workflows/pull_request.yml
vendored
@ -4388,3 +4388,4 @@ jobs:
|
||||
run: |
|
||||
cd "$GITHUB_WORKSPACE/tests/ci"
|
||||
python3 finish_check.py
|
||||
python3 merge_pr.py --check-approved
|
||||
|
2
contrib/poco
vendored
2
contrib/poco
vendored
@ -1 +1 @@
|
||||
Subproject commit 799234226187c0ae0b8c90f23465b25ed7956e56
|
||||
Subproject commit 0ab9bba7ccad3c8dacce04a35cb3b78218547ab4
|
@ -5,6 +5,7 @@ set -x
|
||||
|
||||
# core.COMM.PID-TID
|
||||
sysctl kernel.core_pattern='core.%e.%p-%P'
|
||||
dmesg --clear ||:
|
||||
|
||||
set -e
|
||||
set -u
|
||||
@ -368,6 +369,7 @@ if [ -f core.zst ]; then
|
||||
fi
|
||||
|
||||
rg --text -F '<Fatal>' server.log > fatal.log ||:
|
||||
dmesg -T > dmesg.log ||:
|
||||
|
||||
zstd --threads=0 server.log
|
||||
|
||||
@ -396,6 +398,7 @@ p.links a { padding: 5px; margin: 3px; background: #FFF; line-height: 2; white-s
|
||||
<a href="fuzzer.log">fuzzer.log</a>
|
||||
<a href="server.log.zst">server.log.zst</a>
|
||||
<a href="main.log">main.log</a>
|
||||
<a href="dmesg.log">dmesg.log</a>
|
||||
${CORE_LINK}
|
||||
</p>
|
||||
<table>
|
||||
|
@ -128,6 +128,7 @@ function run_tests()
|
||||
|
||||
if [[ "${HIGH_LEVEL_COVERAGE}" = "YES" ]]; then
|
||||
ADDITIONAL_OPTIONS+=('--report-coverage')
|
||||
ADDITIONAL_OPTIONS+=('--report-logs-stats')
|
||||
fi
|
||||
|
||||
set +e
|
||||
|
@ -289,6 +289,7 @@ if __name__ == "__main__":
|
||||
"--database=system",
|
||||
"--hung-check",
|
||||
"--stress",
|
||||
"--report-logs-stats",
|
||||
"00001_select_1",
|
||||
]
|
||||
)
|
||||
|
@ -136,3 +136,7 @@ DESCRIBE TABLE test_database.test_table;
|
||||
│ data │ Nullable(String) │
|
||||
└────────┴───────────────────┘
|
||||
```
|
||||
|
||||
## Related content
|
||||
|
||||
- Blog: [ClickHouse and PostgreSQL - a match made in data heaven - part 1](https://clickhouse.com/blog/migrating-data-between-clickhouse-postgres)
|
||||
|
@ -175,3 +175,6 @@ CREATE TABLE pg_table_schema_with_dots (a UInt32)
|
||||
|
||||
- [The `postgresql` table function](../../../sql-reference/table-functions/postgresql.md)
|
||||
- [Using PostgreSQL as a dictionary source](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md#dicts-external_dicts_dict_sources-postgresql)
|
||||
|
||||
## Related content
|
||||
- Blog: [ClickHouse and PostgreSQL - a match made in data heaven - part 1](https://clickhouse.com/blog/migrating-data-between-clickhouse-postgres)
|
||||
|
@ -1203,12 +1203,14 @@ SELECT * FROM json_each_row_nested
|
||||
- [input_format_json_read_bools_as_numbers](/docs/en/operations/settings/settings.md/#input_format_json_read_bools_as_numbers) - allow to parse bools as numbers in JSON input formats. Default value - `true`.
|
||||
- [input_format_json_read_numbers_as_strings](/docs/en/operations/settings/settings.md/#input_format_json_read_numbers_as_strings) - allow to parse numbers as strings in JSON input formats. Default value - `false`.
|
||||
- [input_format_json_read_objects_as_strings](/docs/en/operations/settings/settings.md/#input_format_json_read_objects_as_strings) - allow to parse JSON objects as strings in JSON input formats. Default value - `false`.
|
||||
- [input_format_json_named_tuples_as_objects](/docs/en/operations/settings/settings.md/#input_format_json_named_tuples_as_objects) - parse named tuple columns as JSON objects. Default value - `true`.
|
||||
- [input_format_json_defaults_for_missing_elements_in_named_tuple](/docs/en/operations/settings/settings.md/#input_format_json_defaults_for_missing_elements_in_named_tuple) - insert default values for missing elements in JSON object while parsing named tuple. Default value - `true`.
|
||||
- [output_format_json_quote_64bit_integers](/docs/en/operations/settings/settings.md/#output_format_json_quote_64bit_integers) - controls quoting of 64-bit integers in JSON output format. Default value - `true`.
|
||||
- [output_format_json_quote_64bit_floats](/docs/en/operations/settings/settings.md/#output_format_json_quote_64bit_floats) - controls quoting of 64-bit floats in JSON output format. Default value - `false`.
|
||||
- [output_format_json_quote_denormals](/docs/en/operations/settings/settings.md/#output_format_json_quote_denormals) - enables '+nan', '-nan', '+inf', '-inf' outputs in JSON output format. Default value - `false`.
|
||||
- [output_format_json_quote_decimals](/docs/en/operations/settings/settings.md/#output_format_json_quote_decimals) - controls quoting of decimals in JSON output format. Default value - `false`.
|
||||
- [output_format_json_escape_forward_slashes](/docs/en/operations/settings/settings.md/#output_format_json_escape_forward_slashes) - controls escaping forward slashes for string outputs in JSON output format. Default value - `true`.
|
||||
- [output_format_json_named_tuples_as_objects](/docs/en/operations/settings/settings.md/#output_format_json_named_tuples_as_objects) - serialize named tuple columns as JSON objects. Default value - `false`.
|
||||
- [output_format_json_named_tuples_as_objects](/docs/en/operations/settings/settings.md/#output_format_json_named_tuples_as_objects) - serialize named tuple columns as JSON objects. Default value - `true`.
|
||||
- [output_format_json_array_of_rows](/docs/en/operations/settings/settings.md/#output_format_json_array_of_rows) - output a JSON array of all rows in JSONEachRow(Compact) format. Default value - `false`.
|
||||
- [output_format_json_validate_utf8](/docs/en/operations/settings/settings.md/#output_format_json_validate_utf8) - enables validation of UTF-8 sequences in JSON output formats (note that it doesn't impact formats JSON/JSONCompact/JSONColumnsWithMetadata, they always validate utf8). Default value - `false`.
|
||||
|
||||
|
@ -266,7 +266,7 @@ Default value: 0.
|
||||
|
||||
Limits the size in bytes of the hash table used when joining tables.
|
||||
|
||||
This settings applies to [SELECT … JOIN](../../sql-reference/statements/select/join.md#select-join) operations and [Join table engine](../../engines/table-engines/special/join.md).
|
||||
This setting applies to [SELECT … JOIN](../../sql-reference/statements/select/join.md#select-join) operations and [Join table engine](../../engines/table-engines/special/join.md).
|
||||
|
||||
If the query contains joins, ClickHouse checks this setting for every intermediate result.
|
||||
|
||||
|
@ -402,40 +402,62 @@ Default value: `ALL`.
|
||||
|
||||
## join_algorithm {#settings-join_algorithm}
|
||||
|
||||
Specifies [JOIN](../../sql-reference/statements/select/join.md) algorithm.
|
||||
Specifies which [JOIN](../../sql-reference/statements/select/join.md) algorithm is used.
|
||||
|
||||
Several algorithms can be specified, and an available one would be chosen for a particular query based on kind/strictness and table engine.
|
||||
|
||||
Possible values:
|
||||
|
||||
- `default` — `hash` or `direct`, if possible (same as `direct,hash`)
|
||||
### `default`
|
||||
|
||||
- `hash` — [Hash join algorithm](https://en.wikipedia.org/wiki/Hash_join) is used. The most generic implementation that supports all combinations of kind and strictness and multiple join keys that are combined with `OR` in the `JOIN ON` section.
|
||||
This is the equivalent of `hash` or `direct`, if possible (same as `direct,hash`)
|
||||
|
||||
- `parallel_hash` - a variation of `hash` join that splits the data into buckets and builds several hashtables instead of one concurrently to speed up this process.
|
||||
### `grace_hash`
|
||||
|
||||
[Grace hash join](https://en.wikipedia.org/wiki/Hash_join#Grace_hash_join) is used. Grace hash provides an algorithm option that provides performant complex joins while limiting memory use.
|
||||
|
||||
The first phase of a grace join reads the right table and splits it into N buckets depending on the hash value of key columns (initially, N is `grace_hash_join_initial_buckets`). This is done in a way to ensure that each bucket can be processed independently. Rows from the first bucket are added to an in-memory hash table while the others are saved to disk. If the hash table grows beyond the memory limit (e.g., as set by [`max_bytes_in_join`](/docs/en/operations/settings/query-complexity.md/#settings-max_bytes_in_join)), the number of buckets is increased and the assigned bucket for each row. Any rows which don’t belong to the current bucket are flushed and reassigned.
|
||||
|
||||
### `hash`
|
||||
|
||||
[Hash join algorithm](https://en.wikipedia.org/wiki/Hash_join) is used. The most generic implementation that supports all combinations of kind and strictness and multiple join keys that are combined with `OR` in the `JOIN ON` section.
|
||||
|
||||
### `parallel_hash`
|
||||
|
||||
A variation of `hash` join that splits the data into buckets and builds several hashtables instead of one concurrently to speed up this process.
|
||||
|
||||
When using the `hash` algorithm, the right part of `JOIN` is uploaded into RAM.
|
||||
|
||||
- `partial_merge` — a variation of the [sort-merge algorithm](https://en.wikipedia.org/wiki/Sort-merge_join), where only the right table is fully sorted.
|
||||
### `partial_merge`
|
||||
|
||||
A variation of the [sort-merge algorithm](https://en.wikipedia.org/wiki/Sort-merge_join), where only the right table is fully sorted.
|
||||
|
||||
The `RIGHT JOIN` and `FULL JOIN` are supported only with `ALL` strictness (`SEMI`, `ANTI`, `ANY`, and `ASOF` are not supported).
|
||||
|
||||
When using `partial_merge` algorithm, ClickHouse sorts the data and dumps it to the disk. The `partial_merge` algorithm in ClickHouse differs slightly from the classic realization. First, ClickHouse sorts the right table by joining keys in blocks and creates a min-max index for sorted blocks. Then it sorts parts of the left table by `join key` and joins them over the right table. The min-max index is also used to skip unneeded right table blocks.
|
||||
When using the `partial_merge` algorithm, ClickHouse sorts the data and dumps it to the disk. The `partial_merge` algorithm in ClickHouse differs slightly from the classic realization. First, ClickHouse sorts the right table by joining keys in blocks and creates a min-max index for sorted blocks. Then it sorts parts of the left table by the `join key` and joins them over the right table. The min-max index is also used to skip unneeded right table blocks.
|
||||
|
||||
- `direct` - can be applied when the right storage supports key-value requests.
|
||||
### `direct`
|
||||
|
||||
This algorithm can be applied when the storage for the right table supports key-value requests.
|
||||
|
||||
The `direct` algorithm performs a lookup in the right table using rows from the left table as keys. It's supported only by special storage such as [Dictionary](../../engines/table-engines/special/dictionary.md/#dictionary) or [EmbeddedRocksDB](../../engines/table-engines/integrations/embedded-rocksdb.md) and only the `LEFT` and `INNER` JOINs.
|
||||
|
||||
- `auto` — try `hash` join and switch on the fly to another algorithm if the memory limit is violated.
|
||||
### `auto`
|
||||
|
||||
- `full_sorting_merge` — [Sort-merge algorithm](https://en.wikipedia.org/wiki/Sort-merge_join) with full sorting joined tables before joining.
|
||||
When set to `auto`, `hash` join is tried first, and the algorithm is switched on the fly to another algorithm if the memory limit is violated.
|
||||
|
||||
- `prefer_partial_merge` — ClickHouse always tries to use `partial_merge` join if possible, otherwise, it uses `hash`. *Deprecated*, same as `partial_merge,hash`.
|
||||
### `full_sorting_merge`
|
||||
|
||||
[Sort-merge algorithm](https://en.wikipedia.org/wiki/Sort-merge_join) with full sorting joined tables before joining.
|
||||
|
||||
### `prefer_partial_merge`
|
||||
|
||||
ClickHouse always tries to use `partial_merge` join if possible, otherwise, it uses `hash`. *Deprecated*, same as `partial_merge,hash`.
|
||||
|
||||
|
||||
## join_any_take_last_row {#settings-join_any_take_last_row}
|
||||
|
||||
Changes behaviour of join operations with `ANY` strictness.
|
||||
Changes the behaviour of join operations with `ANY` strictness.
|
||||
|
||||
:::warning
|
||||
This setting applies only for `JOIN` operations with [Join](../../engines/table-engines/special/join.md) engine tables.
|
||||
@ -498,7 +520,7 @@ Default value: 65536.
|
||||
|
||||
Limits the number of files allowed for parallel sorting in MergeJoin operations when they are executed on disk.
|
||||
|
||||
The bigger the value of the setting, the more RAM used and the less disk I/O needed.
|
||||
The bigger the value of the setting, the more RAM is used and the less disk I/O is needed.
|
||||
|
||||
Possible values:
|
||||
|
||||
@ -514,12 +536,12 @@ Enables legacy ClickHouse server behaviour in `ANY INNER|LEFT JOIN` operations.
|
||||
Use this setting only for backward compatibility if your use cases depend on legacy `JOIN` behaviour.
|
||||
:::
|
||||
|
||||
When the legacy behaviour enabled:
|
||||
When the legacy behaviour is enabled:
|
||||
|
||||
- Results of `t1 ANY LEFT JOIN t2` and `t2 ANY RIGHT JOIN t1` operations are not equal because ClickHouse uses the logic with many-to-one left-to-right table keys mapping.
|
||||
- Results of `ANY INNER JOIN` operations contain all rows from the left table like the `SEMI LEFT JOIN` operations do.
|
||||
|
||||
When the legacy behaviour disabled:
|
||||
When the legacy behaviour is disabled:
|
||||
|
||||
- Results of `t1 ANY LEFT JOIN t2` and `t2 ANY RIGHT JOIN t1` operations are equal because ClickHouse uses the logic which provides one-to-many keys mapping in `ANY RIGHT JOIN` operations.
|
||||
- Results of `ANY INNER JOIN` operations contain one row per key from both the left and right tables.
|
||||
@ -572,7 +594,7 @@ Default value: `163840`.
|
||||
|
||||
## merge_tree_min_rows_for_concurrent_read_for_remote_filesystem {#merge-tree-min-rows-for-concurrent-read-for-remote-filesystem}
|
||||
|
||||
The minimum number of lines to read from one file before [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) engine can parallelize reading, when reading from remote filesystem.
|
||||
The minimum number of lines to read from one file before the [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) engine can parallelize reading, when reading from remote filesystem.
|
||||
|
||||
Possible values:
|
||||
|
||||
@ -706,7 +728,7 @@ log_queries=1
|
||||
|
||||
## log_queries_min_query_duration_ms {#settings-log-queries-min-query-duration-ms}
|
||||
|
||||
If enabled (non-zero), queries faster then the value of this setting will not be logged (you can think about this as a `long_query_time` for [MySQL Slow Query Log](https://dev.mysql.com/doc/refman/5.7/en/slow-query-log.html)), and this basically means that you will not find them in the following tables:
|
||||
If enabled (non-zero), queries faster than the value of this setting will not be logged (you can think about this as a `long_query_time` for [MySQL Slow Query Log](https://dev.mysql.com/doc/refman/5.7/en/slow-query-log.html)), and this basically means that you will not find them in the following tables:
|
||||
|
||||
- `system.query_log`
|
||||
- `system.query_thread_log`
|
||||
@ -741,7 +763,7 @@ log_queries_min_type='EXCEPTION_WHILE_PROCESSING'
|
||||
|
||||
Setting up query threads logging.
|
||||
|
||||
Query threads log into [system.query_thread_log](../../operations/system-tables/query_thread_log.md) table. This setting have effect only when [log_queries](#settings-log-queries) is true. Queries’ threads run by ClickHouse with this setup are logged according to the rules in the [query_thread_log](../../operations/server-configuration-parameters/settings.md/#server_configuration_parameters-query_thread_log) server configuration parameter.
|
||||
Query threads log into the [system.query_thread_log](../../operations/system-tables/query_thread_log.md) table. This setting has effect only when [log_queries](#settings-log-queries) is true. Queries’ threads run by ClickHouse with this setup are logged according to the rules in the [query_thread_log](../../operations/server-configuration-parameters/settings.md/#server_configuration_parameters-query_thread_log) server configuration parameter.
|
||||
|
||||
Possible values:
|
||||
|
||||
@ -760,7 +782,7 @@ log_query_threads=1
|
||||
|
||||
Setting up query views logging.
|
||||
|
||||
When a query run by ClickHouse with this setup on has associated views (materialized or live views), they are logged in the [query_views_log](../../operations/server-configuration-parameters/settings.md/#server_configuration_parameters-query_views_log) server configuration parameter.
|
||||
When a query run by ClickHouse with this setting enabled has associated views (materialized or live views), they are logged in the [query_views_log](../../operations/server-configuration-parameters/settings.md/#server_configuration_parameters-query_views_log) server configuration parameter.
|
||||
|
||||
Example:
|
||||
|
||||
@ -787,7 +809,7 @@ It can be used to improve the readability of server logs. Additionally, it helps
|
||||
|
||||
Possible values:
|
||||
|
||||
- Any string no longer than [max_query_size](#settings-max_query_size). If length is exceeded, the server throws an exception.
|
||||
- Any string no longer than [max_query_size](#settings-max_query_size). If the max_query_size is exceeded, the server throws an exception.
|
||||
|
||||
Default value: empty string.
|
||||
|
||||
@ -821,11 +843,11 @@ The setting also does not have a purpose when using INSERT SELECT, since data is
|
||||
|
||||
Default value: 1,048,576.
|
||||
|
||||
The default is slightly more than `max_block_size`. The reason for this is because certain table engines (`*MergeTree`) form a data part on the disk for each inserted block, which is a fairly large entity. Similarly, `*MergeTree` tables sort data during insertion, and a large enough block size allow sorting more data in RAM.
|
||||
The default is slightly more than `max_block_size`. The reason for this is that certain table engines (`*MergeTree`) form a data part on the disk for each inserted block, which is a fairly large entity. Similarly, `*MergeTree` tables sort data during insertion, and a large enough block size allow sorting more data in RAM.
|
||||
|
||||
## min_insert_block_size_rows {#min-insert-block-size-rows}
|
||||
|
||||
Sets the minimum number of rows in the block which can be inserted into a table by an `INSERT` query. Smaller-sized blocks are squashed into bigger ones.
|
||||
Sets the minimum number of rows in the block that can be inserted into a table by an `INSERT` query. Smaller-sized blocks are squashed into bigger ones.
|
||||
|
||||
Possible values:
|
||||
|
||||
@ -891,7 +913,7 @@ Higher values will lead to higher memory usage.
|
||||
|
||||
## max_compress_block_size {#max-compress-block-size}
|
||||
|
||||
The maximum size of blocks of uncompressed data before compressing for writing to a table. By default, 1,048,576 (1 MiB). Specifying smaller block size generally leads to slightly reduced compression ratio, the compression and decompression speed increases slightly due to cache locality, and memory consumption is reduced.
|
||||
The maximum size of blocks of uncompressed data before compressing for writing to a table. By default, 1,048,576 (1 MiB). Specifying a smaller block size generally leads to slightly reduced compression ratio, the compression and decompression speed increases slightly due to cache locality, and memory consumption is reduced.
|
||||
|
||||
:::warning
|
||||
This is an expert-level setting, and you shouldn't change it if you're just getting started with ClickHouse.
|
||||
@ -935,7 +957,7 @@ Default value: 1000.
|
||||
|
||||
## interactive_delay {#interactive-delay}
|
||||
|
||||
The interval in microseconds for checking whether request execution has been cancelled and sending the progress.
|
||||
The interval in microseconds for checking whether request execution has been canceled and sending the progress.
|
||||
|
||||
Default value: 100,000 (checks for cancelling and sends the progress ten times per second).
|
||||
|
||||
@ -4122,7 +4144,20 @@ Enabled by default.
|
||||
|
||||
Serialize named tuple columns as JSON objects.
|
||||
|
||||
Disabled by default.
|
||||
Enabled by default.
|
||||
|
||||
### input_format_json_named_tuples_as_objects {#input_format_json_named_tuples_as_objects}
|
||||
|
||||
Parse named tuple columns as JSON objects.
|
||||
|
||||
Enabled by default.
|
||||
|
||||
### input_format_json_defaults_for_missing_elements_in_named_tuple {#input_format_json_defaults_for_missing_elements_in_named_tuple}
|
||||
|
||||
Insert default values for missing elements in JSON object while parsing named tuple.
|
||||
This setting works only when setting `input_format_json_named_tuples_as_objects` is enabled.
|
||||
|
||||
Enabled by default.
|
||||
|
||||
### output_format_json_array_of_rows {#output_format_json_array_of_rows}
|
||||
|
||||
|
@ -120,5 +120,6 @@ Read 186 rows, 4.15 KiB in 0.035 sec., 5302 rows/sec., 118.34 KiB/sec.
|
||||
|
||||
## Related Content
|
||||
|
||||
- [Extracting, converting, and querying data in local files using clickhouse-local](https://clickhouse.com/blog/extracting-converting-querying-local-files-with-sql-clickhouse-local)
|
||||
- [Getting Data Into ClickHouse - Part 1](https://clickhouse.com/blog/getting-data-into-clickhouse-part-1)
|
||||
- [Exploring massive, real-world data sets: 100+ Years of Weather Records in ClickHouse](https://clickhouse.com/blog/real-world-data-noaa-climate-data)
|
||||
|
@ -57,6 +57,7 @@ ClickHouse-specific aggregate functions:
|
||||
- [uniqCombined](../../../sql-reference/aggregate-functions/reference/uniqcombined.md)
|
||||
- [uniqCombined64](../../../sql-reference/aggregate-functions/reference/uniqcombined64.md)
|
||||
- [uniqHLL12](../../../sql-reference/aggregate-functions/reference/uniqhll12.md)
|
||||
- [uniqTheta](../../../sql-reference/aggregate-functions/reference/uniqthetasketch.md)
|
||||
- [quantile](../../../sql-reference/aggregate-functions/reference/quantile.md)
|
||||
- [quantiles](../../../sql-reference/aggregate-functions/reference/quantiles.md)
|
||||
- [quantileExact](../../../sql-reference/aggregate-functions/reference/quantileexact.md)
|
||||
@ -77,4 +78,6 @@ ClickHouse-specific aggregate functions:
|
||||
- [contingency](./contingency.md)
|
||||
- [cramersV](./cramersv.md)
|
||||
- [cramersVBiasCorrected](./cramersvbiascorrected.md)
|
||||
- [theilsU](./theilsu.md)
|
||||
- [theilsU](./theilsu.md)
|
||||
- [maxIntersections](./maxintersections.md)
|
||||
- [maxIntersectionsPosition](./maxintersectionsposition.md)
|
||||
|
@ -0,0 +1,64 @@
|
||||
---
|
||||
slug: /en/sql-reference/aggregate-functions/reference/maxintersections
|
||||
sidebar_position: 360
|
||||
title: maxIntersections
|
||||
---
|
||||
|
||||
# maxIntersections
|
||||
|
||||
Aggregate function that calculates the maximum number of times that a group of intervals intersects each other (if all the intervals intersect at least once).
|
||||
|
||||
The syntax is:
|
||||
|
||||
```sql
|
||||
maxIntersections(start_column, end_column)
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `start_column` – the numeric column that represents the start of each interval. If `start_column` is `NULL` or 0 then the interval will be skipped.
|
||||
|
||||
- `end_column` - the numeric column that represents the end of each interval. If `end_column` is `NULL` or 0 then the interval will be skipped.
|
||||
|
||||
**Returned value**
|
||||
|
||||
Returns the maximum number of intersected intervals.
|
||||
|
||||
**Example**
|
||||
|
||||
```sql
|
||||
CREATE TABLE my_events (
|
||||
start UInt32,
|
||||
end UInt32
|
||||
)
|
||||
Engine = MergeTree
|
||||
ORDER BY tuple();
|
||||
|
||||
INSERT INTO my_events VALUES
|
||||
(1, 3),
|
||||
(1, 6),
|
||||
(2, 5),
|
||||
(3, 7);
|
||||
```
|
||||
|
||||
The intervals look like the following:
|
||||
|
||||
```response
|
||||
1 - 3
|
||||
1 - - - - 6
|
||||
2 - - 5
|
||||
3 - - - 7
|
||||
```
|
||||
|
||||
Three of these intervals have a common value (the value is `4`, but the value that is common is not important, we are measuring the count of the intersections). The intervals `(1,3)` and `(3,7)` share an endpoint but are not considered intersecting by the `maxIntersections` function.
|
||||
|
||||
```sql
|
||||
SELECT maxIntersections(start, end) FROM my_events;
|
||||
```
|
||||
|
||||
Response:
|
||||
```response
|
||||
3
|
||||
```
|
||||
|
||||
If you have multiple occurrences of the maximum interval, you can use the [`maxIntersectionsPosition` function](./maxintersectionsposition.md) to locate the number and location of those occurrences.
|
@ -0,0 +1,64 @@
|
||||
---
|
||||
slug: /en/sql-reference/aggregate-functions/reference/maxintersectionsposition
|
||||
sidebar_position: 361
|
||||
title: maxIntersectionsPosition
|
||||
---
|
||||
|
||||
# maxIntersectionsPosition
|
||||
|
||||
Aggregate function that calculates the positions of the occurrences of the [`maxIntersections` function](./maxintersections.md).
|
||||
|
||||
The syntax is:
|
||||
|
||||
```sql
|
||||
maxIntersectionsPosition(start_column, end_column)
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `start_column` – the numeric column that represents the start of each interval. If `start_column` is `NULL` or 0 then the interval will be skipped.
|
||||
|
||||
- `end_column` - the numeric column that represents the end of each interval. If `end_column` is `NULL` or 0 then the interval will be skipped.
|
||||
|
||||
**Returned value**
|
||||
|
||||
Returns the start positions of the maximum number of intersected intervals.
|
||||
|
||||
**Example**
|
||||
|
||||
```sql
|
||||
CREATE TABLE my_events (
|
||||
start UInt32,
|
||||
end UInt32
|
||||
)
|
||||
Engine = MergeTree
|
||||
ORDER BY tuple();
|
||||
|
||||
INSERT INTO my_events VALUES
|
||||
(1, 3),
|
||||
(1, 6),
|
||||
(2, 5),
|
||||
(3, 7);
|
||||
```
|
||||
|
||||
The intervals look like the following:
|
||||
|
||||
```response
|
||||
1 - 3
|
||||
1 - - - - 6
|
||||
2 - - 5
|
||||
3 - - - 7
|
||||
```
|
||||
|
||||
Notice that three of these intervals have the value 4 in common, and that starts with the 2nd interval:
|
||||
|
||||
```sql
|
||||
SELECT maxIntersectionsPosition(start, end) FROM my_events;
|
||||
```
|
||||
|
||||
Response:
|
||||
```response
|
||||
2
|
||||
```
|
||||
|
||||
In other words, the `(1,6)` row is the start of the 3 intervals that intersect, and 3 is the maximum number of intervals that intersect.
|
@ -0,0 +1,68 @@
|
||||
---
|
||||
slug: /en/sql-reference/aggregate-functions/reference/quantileInterpolatedWeighted
|
||||
sidebar_position: 203
|
||||
---
|
||||
|
||||
# quantileInterpolatedWeighted
|
||||
|
||||
Computes [quantile](https://en.wikipedia.org/wiki/Quantile) of a numeric data sequence using linear interpolation, taking into account the weight of each element.
|
||||
|
||||
To get the interpolated value, all the passed values are combined into an array, which are then sorted by their corresponding weights. Quantile interpolation is then performed using the [weighted percentile method](https://en.wikipedia.org/wiki/Percentile#The_weighted_percentile_method) by building a cumulative distribution based on weights and then a linear interpolation is performed using the weights and the values to compute the quantiles.
|
||||
|
||||
When using multiple `quantile*` functions with different levels in a query, the internal states are not combined (that is, the query works less efficiently than it could). In this case, use the [quantiles](../../../sql-reference/aggregate-functions/reference/quantiles.md#quantiles) function.
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
quantileInterpolatedWeighted(level)(expr, weight)
|
||||
```
|
||||
|
||||
Alias: `medianInterpolatedWeighted`.
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `level` — Level of quantile. Optional parameter. Constant floating-point number from 0 to 1. We recommend using a `level` value in the range of `[0.01, 0.99]`. Default value: 0.5. At `level=0.5` the function calculates [median](https://en.wikipedia.org/wiki/Median).
|
||||
- `expr` — Expression over the column values resulting in numeric [data types](../../../sql-reference/data-types/index.md#data_types), [Date](../../../sql-reference/data-types/date.md) or [DateTime](../../../sql-reference/data-types/datetime.md).
|
||||
- `weight` — Column with weights of sequence members. Weight is a number of value occurrences.
|
||||
|
||||
**Returned value**
|
||||
|
||||
- Quantile of the specified level.
|
||||
|
||||
Type:
|
||||
|
||||
- [Float64](../../../sql-reference/data-types/float.md) for numeric data type input.
|
||||
- [Date](../../../sql-reference/data-types/date.md) if input values have the `Date` type.
|
||||
- [DateTime](../../../sql-reference/data-types/datetime.md) if input values have the `DateTime` type.
|
||||
|
||||
**Example**
|
||||
|
||||
Input table:
|
||||
|
||||
``` text
|
||||
┌─n─┬─val─┐
|
||||
│ 0 │ 3 │
|
||||
│ 1 │ 2 │
|
||||
│ 2 │ 1 │
|
||||
│ 5 │ 4 │
|
||||
└───┴─────┘
|
||||
```
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT quantileInterpolatedWeighted(n, val) FROM t
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─quantileInterpolatedWeighted(n, val)─┐
|
||||
│ 1 │
|
||||
└──────────────────────────────────────┘
|
||||
```
|
||||
|
||||
**See Also**
|
||||
|
||||
- [median](../../../sql-reference/aggregate-functions/reference/median.md#median)
|
||||
- [quantiles](../../../sql-reference/aggregate-functions/reference/quantiles.md#quantiles)
|
@ -9,7 +9,7 @@ sidebar_position: 201
|
||||
|
||||
Syntax: `quantiles(level1, level2, …)(x)`
|
||||
|
||||
All the quantile functions also have corresponding quantiles functions: `quantiles`, `quantilesDeterministic`, `quantilesTiming`, `quantilesTimingWeighted`, `quantilesExact`, `quantilesExactWeighted`, `quantilesTDigest`, `quantilesBFloat16`. These functions calculate all the quantiles of the listed levels in one pass, and return an array of the resulting values.
|
||||
All the quantile functions also have corresponding quantiles functions: `quantiles`, `quantilesDeterministic`, `quantilesTiming`, `quantilesTimingWeighted`, `quantilesExact`, `quantilesExactWeighted`, `quantileInterpolatedWeighted`, `quantilesTDigest`, `quantilesBFloat16`. These functions calculate all the quantiles of the listed levels in one pass, and return an array of the resulting values.
|
||||
|
||||
## quantilesExactExclusive
|
||||
|
||||
|
@ -6,6 +6,10 @@ sidebar_label: JSON
|
||||
|
||||
# JSON
|
||||
|
||||
:::warning
|
||||
This feature is experimental and is not production ready. If you need to work with JSON documents, consider using [this guide](/docs/en/guides/developer/working-with-json/json-load-data.md) instead.
|
||||
:::
|
||||
|
||||
Stores JavaScript Object Notation (JSON) documents in a single column.
|
||||
|
||||
`JSON` is an alias for `Object('json')`.
|
||||
|
@ -121,7 +121,7 @@ Accepts an empty array and returns a one-element array that is equal to the defa
|
||||
|
||||
## range(end), range(\[start, \] end \[, step\])
|
||||
|
||||
Returns an array of `UInt` numbers from `start` to `end - 1` by `step`.
|
||||
Returns an array of numbers from `start` to `end - 1` by `step`. The supported types are [UInt8, UInt16, UInt32, UInt64, Int8, Int16, Int32, Int64](../data-types/int-uint.md).
|
||||
|
||||
**Syntax**
|
||||
``` sql
|
||||
@ -130,31 +130,30 @@ range([start, ] end [, step])
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `start` — The first element of the array. Optional, required if `step` is used. Default value: 0. [UInt](../data-types/int-uint.md)
|
||||
- `end` — The number before which the array is constructed. Required. [UInt](../data-types/int-uint.md)
|
||||
- `step` — Determines the incremental step between each element in the array. Optional. Default value: 1. [UInt](../data-types/int-uint.md)
|
||||
- `start` — The first element of the array. Optional, required if `step` is used. Default value: 0.
|
||||
- `end` — The number before which the array is constructed. Required.
|
||||
- `step` — Determines the incremental step between each element in the array. Optional. Default value: 1.
|
||||
|
||||
**Returned value**
|
||||
|
||||
- Array of `UInt` numbers from `start` to `end - 1` by `step`.
|
||||
- Array of numbers from `start` to `end - 1` by `step`.
|
||||
|
||||
**Implementation details**
|
||||
|
||||
- All arguments must be positive values: `start`, `end`, `step` are `UInt` data types, as well as elements of the returned array.
|
||||
- All arguments `start`, `end`, `step` must be below data types: `UInt8`, `UInt16`, `UInt32`, `UInt64`,`Int8`, `Int16`, `Int32`, `Int64`, as well as elements of the returned array, which's type is a super type of all arguments's.
|
||||
- An exception is thrown if query results in arrays with a total length of more than number of elements specified by the [function_range_max_elements_in_block](../../operations/settings/settings.md#settings-function_range_max_elements_in_block) setting.
|
||||
|
||||
|
||||
**Examples**
|
||||
|
||||
Query:
|
||||
``` sql
|
||||
SELECT range(5), range(1, 5), range(1, 5, 2);
|
||||
SELECT range(5), range(1, 5), range(1, 5, 2), range(-1, 5, 2);
|
||||
```
|
||||
Result:
|
||||
```txt
|
||||
┌─range(5)────┬─range(1, 5)─┬─range(1, 5, 2)─┐
|
||||
│ [0,1,2,3,4] │ [1,2,3,4] │ [1,3] │
|
||||
└─────────────┴─────────────┴────────────────┘
|
||||
┌─range(5)────┬─range(1, 5)─┬─range(1, 5, 2)─┬─range(-1, 5, 2)─┐
|
||||
│ [0,1,2,3,4] │ [1,2,3,4] │ [1,3] │ [-1,1,3] │
|
||||
└─────────────┴─────────────┴────────────────┴─────────────────┘
|
||||
```
|
||||
|
||||
## array(x1, …), operator \[x1, …\]
|
||||
|
@ -39,3 +39,16 @@ SELECT * FROM generateRandom('a Array(Int8), d Decimal32(4), c Tuple(DateTime64(
|
||||
│ [68] │ -67417.0770 │ ('2080-03-12 14:17:31.269','110425e5-413f-10a6-05ba-fa6b3e929f15') │
|
||||
└──────────┴──────────────┴────────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
```sql
|
||||
CREATE TABLE random (a Array(Int8), d Decimal32(4), c Tuple(DateTime64(3), UUID)) engine=Memory;
|
||||
INSERT INTO random SELECT * FROM generateRandom() LIMIT 2;
|
||||
SELECT * FROM random;
|
||||
```
|
||||
|
||||
```text
|
||||
┌─a────────────────────────────┬────────────d─┬─c──────────────────────────────────────────────────────────────────┐
|
||||
│ [] │ 68091.8197 │ ('2037-10-02 12:44:23.368','039ecab7-81c2-45ee-208c-844e5c6c5652') │
|
||||
│ [8,-83,0,-22,65,9,-30,28,64] │ -186233.4909 │ ('2062-01-11 00:06:04.124','69563ea1-5ad1-f870-16d8-67061da0df25') │
|
||||
└──────────────────────────────┴──────────────┴────────────────────────────────────────────────────────────────────┘
|
||||
```
|
@ -117,7 +117,7 @@ SELECT notEmpty([1,2]);
|
||||
|
||||
## range(end), range(\[start, \] end \[, step\]) {#range}
|
||||
|
||||
返回一个以`step`作为增量步长的从`start`到`end - 1`的`UInt`类型数字数组。
|
||||
返回一个以`step`作为增量步长的从`start`到`end - 1`的整形数字数组, 支持类型包括[`UInt8`, `UInt16`, `UInt32`, `UInt64`, `Int8`, `Int16`, `Int32`, `Int64`](../data-types/int-uint.md)。
|
||||
|
||||
**语法**
|
||||
``` sql
|
||||
@ -126,31 +126,30 @@ range([start, ] end [, step])
|
||||
|
||||
**参数**
|
||||
|
||||
- `start` — 数组的第一个元素。可选项,如果设置了`step`时同样需要`start`,默认值为:0,类型为[UInt](../data-types/int-uint.md)。
|
||||
- `end` — 计数到`end`结束,但不包括`end`,必填项,类型为[UInt](../data-types/int-uint.md)。
|
||||
- `step` — 确定数组中每个元素之间的增量步长。可选项,默认值为:1,类型为[UInt](../data-types/int-uint.md)。
|
||||
- `start` — 数组的第一个元素。可选项,如果设置了`step`时同样需要`start`,默认值为:0。
|
||||
- `end` — 计数到`end`结束,但不包括`end`,必填项。
|
||||
- `step` — 确定数组中每个元素之间的增量步长。可选项,默认值为:1。
|
||||
|
||||
**返回值**
|
||||
|
||||
- 以`step`作为增量步长的从`start`到`end - 1`的`UInt`类型数字数组。
|
||||
- 以`step`作为增量步长的从`start`到`end - 1`的数字数组。
|
||||
|
||||
**注意事项**
|
||||
|
||||
- 所有参数必须是正值:`start`、`end`、`step`,类型均为`UInt`,结果数组的元素与此相同。
|
||||
- 所有参数`start`、`end`、`step`必须属于以下几种类型之一:[`UInt8`, `UInt16`, `UInt32`, `UInt64`, `Int8`, `Int16`, `Int32`, `Int64`](../data-types/int-uint.md)。结果数组的元素数据类型为所有入参类型的最小超类,也必须属于以上几种类型之一。
|
||||
- 如果查询结果的数组总长度超过[function_range_max_elements_in_block](../../operations/settings/settings.md#settings-function_range_max_elements_in_block)指定的元素数,将会抛出异常。
|
||||
|
||||
|
||||
**示例**
|
||||
|
||||
查询语句:
|
||||
``` sql
|
||||
SELECT range(5), range(1, 5), range(1, 5, 2);
|
||||
SELECT range(5), range(1, 5), range(1, 5, 2), range(-1, 5, 2);
|
||||
```
|
||||
结果:
|
||||
```txt
|
||||
┌─range(5)────┬─range(1, 5)─┬─range(1, 5, 2)─┐
|
||||
│ [0,1,2,3,4] │ [1,2,3,4] │ [1,3] │
|
||||
└─────────────┴─────────────┴────────────────┘
|
||||
┌─range(5)────┬─range(1, 5)─┬─range(1, 5, 2)─┬─range(-1, 5, 2)─┐
|
||||
│ [0,1,2,3,4] │ [1,2,3,4] │ [1,3] │ [-1,1,3] │
|
||||
└─────────────┴─────────────┴────────────────┴─────────────────┘
|
||||
```
|
||||
|
||||
## array(x1, …), operator \[x1, …\] {#arrayx1-operator-x1}
|
||||
|
@ -140,6 +140,7 @@ namespace CurrentMetrics
|
||||
namespace ProfileEvents
|
||||
{
|
||||
extern const Event MainConfigLoads;
|
||||
extern const Event ServerStartupMilliseconds;
|
||||
}
|
||||
|
||||
namespace fs = std::filesystem;
|
||||
@ -652,6 +653,8 @@ static void sanityChecks(Server & server)
|
||||
int Server::main(const std::vector<std::string> & /*args*/)
|
||||
try
|
||||
{
|
||||
Stopwatch startup_watch;
|
||||
|
||||
Poco::Logger * log = &logger();
|
||||
|
||||
UseSSL use_ssl;
|
||||
@ -1822,6 +1825,9 @@ try
|
||||
LOG_INFO(log, "Ready for connections.");
|
||||
}
|
||||
|
||||
startup_watch.stop();
|
||||
ProfileEvents::increment(ProfileEvents::ServerStartupMilliseconds, startup_watch.elapsedMilliseconds());
|
||||
|
||||
try
|
||||
{
|
||||
global_context->startClusterDiscovery();
|
||||
|
@ -167,6 +167,7 @@ enum class AccessType
|
||||
M(SYSTEM_SYNC_REPLICA, "SYNC REPLICA", TABLE, SYSTEM) \
|
||||
M(SYSTEM_RESTART_REPLICA, "RESTART REPLICA", TABLE, SYSTEM) \
|
||||
M(SYSTEM_RESTORE_REPLICA, "RESTORE REPLICA", TABLE, SYSTEM) \
|
||||
M(SYSTEM_WAIT_LOADING_PARTS, "WAIT LOADING PARTS", TABLE, SYSTEM) \
|
||||
M(SYSTEM_SYNC_DATABASE_REPLICA, "SYNC DATABASE REPLICA", DATABASE, SYSTEM) \
|
||||
M(SYSTEM_SYNC_TRANSACTION_LOG, "SYNC TRANSACTION LOG", GLOBAL, SYSTEM) \
|
||||
M(SYSTEM_FLUSH_DISTRIBUTED, "FLUSH DISTRIBUTED", TABLE, SYSTEM_FLUSH) \
|
||||
|
@ -53,7 +53,7 @@ TEST(AccessRights, Union)
|
||||
"SHOW ROW POLICIES, SYSTEM MERGES, SYSTEM TTL MERGES, SYSTEM FETCHES, "
|
||||
"SYSTEM MOVES, SYSTEM SENDS, SYSTEM REPLICATION QUEUES, "
|
||||
"SYSTEM DROP REPLICA, SYSTEM SYNC REPLICA, SYSTEM RESTART REPLICA, "
|
||||
"SYSTEM RESTORE REPLICA, SYSTEM SYNC DATABASE REPLICA, SYSTEM FLUSH DISTRIBUTED, dictGet ON db1.*");
|
||||
"SYSTEM RESTORE REPLICA, SYSTEM WAIT LOADING PARTS, SYSTEM SYNC DATABASE REPLICA, SYSTEM FLUSH DISTRIBUTED, dictGet ON db1.*");
|
||||
}
|
||||
|
||||
|
||||
|
@ -207,7 +207,7 @@ private:
|
||||
{
|
||||
// Fuse points if their text representations differ only in last digit
|
||||
auto min_diff = 10 * (points[left].mean + points[right].mean) * std::numeric_limits<Mean>::epsilon();
|
||||
if (points[left].mean + min_diff >= points[right].mean)
|
||||
if (points[left].mean + std::fabs(min_diff) >= points[right].mean)
|
||||
{
|
||||
points[left] = points[left] + points[right];
|
||||
}
|
||||
|
@ -232,6 +232,9 @@ struct NameQuantilesExactInclusive { static constexpr auto name = "quantilesExac
|
||||
struct NameQuantileExactWeighted { static constexpr auto name = "quantileExactWeighted"; };
|
||||
struct NameQuantilesExactWeighted { static constexpr auto name = "quantilesExactWeighted"; };
|
||||
|
||||
struct NameQuantileInterpolatedWeighted { static constexpr auto name = "quantileInterpolatedWeighted"; };
|
||||
struct NameQuantilesInterpolatedWeighted { static constexpr auto name = "quantilesInterpolatedWeighted"; };
|
||||
|
||||
struct NameQuantileTiming { static constexpr auto name = "quantileTiming"; };
|
||||
struct NameQuantileTimingWeighted { static constexpr auto name = "quantileTimingWeighted"; };
|
||||
struct NameQuantilesTiming { static constexpr auto name = "quantilesTiming"; };
|
||||
|
@ -0,0 +1,70 @@
|
||||
#include <AggregateFunctions/AggregateFunctionQuantile.h>
|
||||
#include <AggregateFunctions/QuantileInterpolatedWeighted.h>
|
||||
#include <AggregateFunctions/AggregateFunctionFactory.h>
|
||||
#include <AggregateFunctions/Helpers.h>
|
||||
#include <DataTypes/DataTypeDate.h>
|
||||
#include <DataTypes/DataTypeDateTime.h>
|
||||
#include <Core/Field.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
struct Settings;
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
template <typename Value, bool _> using FuncQuantileInterpolatedWeighted = AggregateFunctionQuantile<Value, QuantileInterpolatedWeighted<Value>, NameQuantileInterpolatedWeighted, true, void, false>;
|
||||
template <typename Value, bool _> using FuncQuantilesInterpolatedWeighted = AggregateFunctionQuantile<Value, QuantileInterpolatedWeighted<Value>, NameQuantilesInterpolatedWeighted, true, void, true>;
|
||||
|
||||
template <template <typename, bool> class Function>
|
||||
AggregateFunctionPtr createAggregateFunctionQuantile(
|
||||
const std::string & name, const DataTypes & argument_types, const Array & params, const Settings *)
|
||||
{
|
||||
/// Second argument type check doesn't depend on the type of the first one.
|
||||
Function<void, true>::assertSecondArg(argument_types);
|
||||
|
||||
const DataTypePtr & argument_type = argument_types[0];
|
||||
WhichDataType which(argument_type);
|
||||
|
||||
#define DISPATCH(TYPE) \
|
||||
if (which.idx == TypeIndex::TYPE) return std::make_shared<Function<TYPE, true>>(argument_types, params);
|
||||
FOR_BASIC_NUMERIC_TYPES(DISPATCH)
|
||||
#undef DISPATCH
|
||||
if (which.idx == TypeIndex::Date) return std::make_shared<Function<DataTypeDate::FieldType, false>>(argument_types, params);
|
||||
if (which.idx == TypeIndex::DateTime) return std::make_shared<Function<DataTypeDateTime::FieldType, false>>(argument_types, params);
|
||||
|
||||
if (which.idx == TypeIndex::Decimal32) return std::make_shared<Function<Decimal32, false>>(argument_types, params);
|
||||
if (which.idx == TypeIndex::Decimal64) return std::make_shared<Function<Decimal64, false>>(argument_types, params);
|
||||
if (which.idx == TypeIndex::Decimal128) return std::make_shared<Function<Decimal128, false>>(argument_types, params);
|
||||
if (which.idx == TypeIndex::Decimal256) return std::make_shared<Function<Decimal256, false>>(argument_types, params);
|
||||
if (which.idx == TypeIndex::DateTime64) return std::make_shared<Function<DateTime64, false>>(argument_types, params);
|
||||
|
||||
if (which.idx == TypeIndex::Int128) return std::make_shared<Function<Int128, true>>(argument_types, params);
|
||||
if (which.idx == TypeIndex::UInt128) return std::make_shared<Function<UInt128, true>>(argument_types, params);
|
||||
if (which.idx == TypeIndex::Int256) return std::make_shared<Function<Int256, true>>(argument_types, params);
|
||||
if (which.idx == TypeIndex::UInt256) return std::make_shared<Function<UInt256, true>>(argument_types, params);
|
||||
|
||||
throw Exception("Illegal type " + argument_type->getName() + " of argument for aggregate function " + name,
|
||||
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
||||
}
|
||||
}
|
||||
|
||||
void registerAggregateFunctionsQuantileInterpolatedWeighted(AggregateFunctionFactory & factory)
|
||||
{
|
||||
/// For aggregate functions returning array we cannot return NULL on empty set.
|
||||
AggregateFunctionProperties properties = { .returns_default_when_only_null = true };
|
||||
|
||||
factory.registerFunction(NameQuantileInterpolatedWeighted::name, createAggregateFunctionQuantile<FuncQuantileInterpolatedWeighted>);
|
||||
factory.registerFunction(NameQuantilesInterpolatedWeighted::name, { createAggregateFunctionQuantile<FuncQuantilesInterpolatedWeighted>, properties });
|
||||
|
||||
/// 'median' is an alias for 'quantile'
|
||||
factory.registerAlias("medianInterpolatedWeighted", NameQuantileInterpolatedWeighted::name);
|
||||
}
|
||||
|
||||
}
|
308
src/AggregateFunctions/QuantileInterpolatedWeighted.h
Normal file
308
src/AggregateFunctions/QuantileInterpolatedWeighted.h
Normal file
@ -0,0 +1,308 @@
|
||||
#pragma once
|
||||
|
||||
#include <base/sort.h>
|
||||
|
||||
#include <Common/HashTable/HashMap.h>
|
||||
#include <Common/NaNUtils.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
struct Settings;
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int NOT_IMPLEMENTED;
|
||||
}
|
||||
|
||||
/** Approximates Quantile by:
|
||||
* - sorting input values and weights
|
||||
* - building a cumulative distribution based on weights
|
||||
* - performing linear interpolation between the weights and values
|
||||
*
|
||||
*/
|
||||
template <typename Value>
|
||||
struct QuantileInterpolatedWeighted
|
||||
{
|
||||
struct Int128Hash
|
||||
{
|
||||
size_t operator()(Int128 x) const
|
||||
{
|
||||
return CityHash_v1_0_2::Hash128to64({x >> 64, x & 0xffffffffffffffffll});
|
||||
}
|
||||
};
|
||||
|
||||
using Weight = UInt64;
|
||||
using UnderlyingType = NativeType<Value>;
|
||||
using Hasher = std::conditional_t<std::is_same_v<Value, Decimal128>, Int128Hash, HashCRC32<UnderlyingType>>;
|
||||
|
||||
/// When creating, the hash table must be small.
|
||||
using Map = HashMapWithStackMemory<UnderlyingType, Weight, Hasher, 4>;
|
||||
|
||||
Map map;
|
||||
|
||||
void add(const Value & x)
|
||||
{
|
||||
/// We must skip NaNs as they are not compatible with comparison sorting.
|
||||
if (!isNaN(x))
|
||||
++map[x];
|
||||
}
|
||||
|
||||
void add(const Value & x, Weight weight)
|
||||
{
|
||||
if (!isNaN(x))
|
||||
map[x] += weight;
|
||||
}
|
||||
|
||||
void merge(const QuantileInterpolatedWeighted & rhs)
|
||||
{
|
||||
for (const auto & pair : rhs.map)
|
||||
map[pair.getKey()] += pair.getMapped();
|
||||
}
|
||||
|
||||
void serialize(WriteBuffer & buf) const
|
||||
{
|
||||
map.write(buf);
|
||||
}
|
||||
|
||||
void deserialize(ReadBuffer & buf)
|
||||
{
|
||||
typename Map::Reader reader(buf);
|
||||
while (reader.next())
|
||||
{
|
||||
const auto & pair = reader.get();
|
||||
map[pair.first] = pair.second;
|
||||
}
|
||||
}
|
||||
|
||||
Value get(Float64 level) const
|
||||
{
|
||||
return getImpl<Value>(level);
|
||||
}
|
||||
|
||||
void getMany(const Float64 * levels, const size_t * indices, size_t size, Value * result) const
|
||||
{
|
||||
getManyImpl<Value>(levels, indices, size, result);
|
||||
}
|
||||
|
||||
/// The same, but in the case of an empty state, NaN is returned.
|
||||
Float64 getFloat(Float64) const
|
||||
{
|
||||
throw Exception("Method getFloat is not implemented for QuantileInterpolatedWeighted", ErrorCodes::NOT_IMPLEMENTED);
|
||||
}
|
||||
|
||||
void getManyFloat(const Float64 *, const size_t *, size_t, Float64 *) const
|
||||
{
|
||||
throw Exception("Method getManyFloat is not implemented for QuantileInterpolatedWeighted", ErrorCodes::NOT_IMPLEMENTED);
|
||||
}
|
||||
|
||||
private:
|
||||
using Pair = typename std::pair<UnderlyingType, Float64>;
|
||||
|
||||
/// Get the value of the `level` quantile. The level must be between 0 and 1.
|
||||
template <typename T>
|
||||
T getImpl(Float64 level) const
|
||||
{
|
||||
size_t size = map.size();
|
||||
|
||||
if (0 == size)
|
||||
return std::numeric_limits<Value>::quiet_NaN();
|
||||
|
||||
/// Maintain a vector of pair of values and weights for easier sorting and for building
|
||||
/// a cumulative distribution using the provided weights.
|
||||
std::vector<Pair> value_weight_pairs;
|
||||
value_weight_pairs.reserve(size);
|
||||
|
||||
/// Note: weight provided must be a 64-bit integer
|
||||
/// Float64 is used as accumulator here to get approximate results.
|
||||
/// But weight used in the internal array is stored as Float64 as we
|
||||
/// do some quantile estimation operation which involves division and
|
||||
/// require Float64 level of precision.
|
||||
|
||||
Float64 sum_weight = 0;
|
||||
for (const auto & pair : map)
|
||||
{
|
||||
sum_weight += pair.getMapped();
|
||||
auto value = pair.getKey();
|
||||
auto weight = pair.getMapped();
|
||||
value_weight_pairs.push_back({value, weight});
|
||||
}
|
||||
|
||||
::sort(value_weight_pairs.begin(), value_weight_pairs.end(), [](const Pair & a, const Pair & b) { return a.first < b.first; });
|
||||
|
||||
Float64 accumulated = 0;
|
||||
|
||||
/// vector for populating and storing the cumulative sum using the provided weights.
|
||||
/// example: [0,1,2,3,4,5] -> [0,1,3,6,10,15]
|
||||
std::vector<Float64> weights_cum_sum;
|
||||
weights_cum_sum.reserve(size);
|
||||
|
||||
for (size_t idx = 0; idx < size; ++idx)
|
||||
{
|
||||
accumulated += value_weight_pairs[idx].second;
|
||||
weights_cum_sum.push_back(accumulated);
|
||||
}
|
||||
|
||||
/// The following estimation of quantile is general and the idea is:
|
||||
/// https://en.wikipedia.org/wiki/Percentile#The_weighted_percentile_method
|
||||
|
||||
/// calculates a simple cumulative distribution based on weights
|
||||
if (sum_weight != 0)
|
||||
{
|
||||
for (size_t idx = 0; idx < size; ++idx)
|
||||
value_weight_pairs[idx].second = (weights_cum_sum[idx] - 0.5 * value_weight_pairs[idx].second) / sum_weight;
|
||||
}
|
||||
|
||||
/// perform linear interpolation
|
||||
size_t idx = 0;
|
||||
if (size >= 2)
|
||||
{
|
||||
if (level >= value_weight_pairs[size - 2].second)
|
||||
{
|
||||
idx = size - 2;
|
||||
}
|
||||
else
|
||||
{
|
||||
size_t start = 0, end = size - 1;
|
||||
while (start <= end)
|
||||
{
|
||||
size_t mid = start + (end - start) / 2;
|
||||
if (mid > size)
|
||||
break;
|
||||
if (level > value_weight_pairs[mid + 1].second)
|
||||
start = mid + 1;
|
||||
else
|
||||
{
|
||||
idx = mid;
|
||||
end = mid - 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
size_t l = idx;
|
||||
size_t u = idx + 1 < size ? idx + 1 : idx;
|
||||
|
||||
Float64 xl = value_weight_pairs[l].second, xr = value_weight_pairs[u].second;
|
||||
UnderlyingType yl = value_weight_pairs[l].first, yr = value_weight_pairs[u].first;
|
||||
|
||||
if (level < xl)
|
||||
yr = yl;
|
||||
if (level > xr)
|
||||
yl = yr;
|
||||
|
||||
return static_cast<T>(interpolate(level, xl, xr, yl, yr));
|
||||
}
|
||||
|
||||
/// Get the `size` values of `levels` quantiles. Write `size` results starting with `result` address.
|
||||
/// indices - an array of index levels such that the corresponding elements will go in ascending order.
|
||||
template <typename T>
|
||||
void getManyImpl(const Float64 * levels, const size_t * indices, size_t num_levels, Value * result) const
|
||||
{
|
||||
size_t size = map.size();
|
||||
|
||||
if (0 == size)
|
||||
{
|
||||
for (size_t i = 0; i < num_levels; ++i)
|
||||
result[i] = Value();
|
||||
return;
|
||||
}
|
||||
|
||||
std::vector<Pair> value_weight_pairs;
|
||||
value_weight_pairs.reserve(size);
|
||||
|
||||
Float64 sum_weight = 0;
|
||||
for (const auto & pair : map)
|
||||
{
|
||||
sum_weight += pair.getMapped();
|
||||
auto value = pair.getKey();
|
||||
auto weight = pair.getMapped();
|
||||
value_weight_pairs.push_back({value, weight});
|
||||
}
|
||||
|
||||
::sort(value_weight_pairs.begin(), value_weight_pairs.end(), [](const Pair & a, const Pair & b) { return a.first < b.first; });
|
||||
|
||||
Float64 accumulated = 0;
|
||||
|
||||
/// vector for populating and storing the cumulative sum using the provided weights.
|
||||
/// example: [0,1,2,3,4,5] -> [0,1,3,6,10,15]
|
||||
std::vector<Float64> weights_cum_sum;
|
||||
weights_cum_sum.reserve(size);
|
||||
|
||||
for (size_t idx = 0; idx < size; ++idx)
|
||||
{
|
||||
accumulated += value_weight_pairs[idx].second;
|
||||
weights_cum_sum.emplace_back(accumulated);
|
||||
}
|
||||
|
||||
|
||||
/// The following estimation of quantile is general and the idea is:
|
||||
/// https://en.wikipedia.org/wiki/Percentile#The_weighted_percentile_method
|
||||
|
||||
/// calculates a simple cumulative distribution based on weights
|
||||
if (sum_weight != 0)
|
||||
{
|
||||
for (size_t idx = 0; idx < size; ++idx)
|
||||
value_weight_pairs[idx].second = (weights_cum_sum[idx] - 0.5 * value_weight_pairs[idx].second) / sum_weight;
|
||||
}
|
||||
|
||||
for (size_t level_index = 0; level_index < num_levels; ++level_index)
|
||||
{
|
||||
/// perform linear interpolation for every level
|
||||
auto level = levels[indices[level_index]];
|
||||
|
||||
size_t idx = 0;
|
||||
if (size >= 2)
|
||||
{
|
||||
if (level >= value_weight_pairs[size - 2].second)
|
||||
{
|
||||
idx = size - 2;
|
||||
}
|
||||
else
|
||||
{
|
||||
size_t start = 0, end = size - 1;
|
||||
while (start <= end)
|
||||
{
|
||||
size_t mid = start + (end - start) / 2;
|
||||
if (mid > size)
|
||||
break;
|
||||
if (level > value_weight_pairs[mid + 1].second)
|
||||
start = mid + 1;
|
||||
else
|
||||
{
|
||||
idx = mid;
|
||||
end = mid - 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
size_t l = idx;
|
||||
size_t u = idx + 1 < size ? idx + 1 : idx;
|
||||
|
||||
Float64 xl = value_weight_pairs[l].second, xr = value_weight_pairs[u].second;
|
||||
UnderlyingType yl = value_weight_pairs[l].first, yr = value_weight_pairs[u].first;
|
||||
|
||||
if (level < xl)
|
||||
yr = yl;
|
||||
if (level > xr)
|
||||
yl = yr;
|
||||
|
||||
result[indices[level_index]] = static_cast<T>(interpolate(level, xl, xr, yl, yr));
|
||||
}
|
||||
}
|
||||
|
||||
/// This ignores overflows or NaN's that might arise during add, sub and mul operations and doesn't aim to provide exact
|
||||
/// results since `the quantileInterpolatedWeighted` function itself relies mainly on approximation.
|
||||
UnderlyingType NO_SANITIZE_UNDEFINED interpolate(Float64 level, Float64 xl, Float64 xr, UnderlyingType yl, UnderlyingType yr) const
|
||||
{
|
||||
UnderlyingType dy = yr - yl;
|
||||
Float64 dx = xr - xl;
|
||||
dx = dx == 0 ? 1 : dx; /// to handle NaN behavior that might arise during integer division below.
|
||||
|
||||
/// yl + (dy / dx) * (level - xl)
|
||||
return static_cast<UnderlyingType>(yl + (dy / dx) * (level - xl));
|
||||
}
|
||||
};
|
||||
|
||||
}
|
@ -21,6 +21,7 @@ void registerAggregateFunctionsQuantile(AggregateFunctionFactory &);
|
||||
void registerAggregateFunctionsQuantileDeterministic(AggregateFunctionFactory &);
|
||||
void registerAggregateFunctionsQuantileExact(AggregateFunctionFactory &);
|
||||
void registerAggregateFunctionsQuantileExactWeighted(AggregateFunctionFactory &);
|
||||
void registerAggregateFunctionsQuantileInterpolatedWeighted(AggregateFunctionFactory &);
|
||||
void registerAggregateFunctionsQuantileExactLow(AggregateFunctionFactory &);
|
||||
void registerAggregateFunctionsQuantileExactHigh(AggregateFunctionFactory &);
|
||||
void registerAggregateFunctionsQuantileExactInclusive(AggregateFunctionFactory &);
|
||||
@ -106,6 +107,7 @@ void registerAggregateFunctions()
|
||||
registerAggregateFunctionsQuantileDeterministic(factory);
|
||||
registerAggregateFunctionsQuantileExact(factory);
|
||||
registerAggregateFunctionsQuantileExactWeighted(factory);
|
||||
registerAggregateFunctionsQuantileInterpolatedWeighted(factory);
|
||||
registerAggregateFunctionsQuantileExactLow(factory);
|
||||
registerAggregateFunctionsQuantileExactHigh(factory);
|
||||
registerAggregateFunctionsQuantileExactInclusive(factory);
|
||||
|
@ -11,6 +11,7 @@
|
||||
#include <Parsers/ASTQualifiedAsterisk.h>
|
||||
#include <Parsers/ASTColumnsMatcher.h>
|
||||
#include <Parsers/ASTExpressionList.h>
|
||||
#include <Parsers/ASTColumnsTransformers.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -206,19 +207,43 @@ QueryTreeNodePtr MatcherNode::cloneImpl() const
|
||||
ASTPtr MatcherNode::toASTImpl() const
|
||||
{
|
||||
ASTPtr result;
|
||||
ASTPtr transformers;
|
||||
|
||||
if (!children.empty())
|
||||
{
|
||||
transformers = std::make_shared<ASTColumnsTransformerList>();
|
||||
|
||||
for (const auto & child : children)
|
||||
transformers->children.push_back(child->toAST());
|
||||
}
|
||||
|
||||
if (matcher_type == MatcherNodeType::ASTERISK)
|
||||
{
|
||||
if (qualified_identifier.empty())
|
||||
{
|
||||
result = std::make_shared<ASTAsterisk>();
|
||||
auto asterisk = std::make_shared<ASTAsterisk>();
|
||||
|
||||
if (transformers)
|
||||
{
|
||||
asterisk->transformers = std::move(transformers);
|
||||
asterisk->children.push_back(asterisk->transformers);
|
||||
}
|
||||
|
||||
result = asterisk;
|
||||
}
|
||||
else
|
||||
{
|
||||
auto qualified_asterisk = std::make_shared<ASTQualifiedAsterisk>();
|
||||
|
||||
auto identifier_parts = qualified_identifier.getParts();
|
||||
qualified_asterisk->children.push_back(std::make_shared<ASTIdentifier>(std::move(identifier_parts)));
|
||||
qualified_asterisk->qualifier = std::make_shared<ASTIdentifier>(std::move(identifier_parts));
|
||||
qualified_asterisk->children.push_back(qualified_asterisk->qualifier);
|
||||
|
||||
if (transformers)
|
||||
{
|
||||
qualified_asterisk->transformers = std::move(transformers);
|
||||
qualified_asterisk->children.push_back(qualified_asterisk->transformers);
|
||||
}
|
||||
|
||||
result = qualified_asterisk;
|
||||
}
|
||||
@ -229,6 +254,13 @@ ASTPtr MatcherNode::toASTImpl() const
|
||||
{
|
||||
auto regexp_matcher = std::make_shared<ASTColumnsRegexpMatcher>();
|
||||
regexp_matcher->setPattern(columns_matcher->pattern());
|
||||
|
||||
if (transformers)
|
||||
{
|
||||
regexp_matcher->transformers = std::move(transformers);
|
||||
regexp_matcher->children.push_back(regexp_matcher->transformers);
|
||||
}
|
||||
|
||||
result = regexp_matcher;
|
||||
}
|
||||
else
|
||||
@ -237,7 +269,14 @@ ASTPtr MatcherNode::toASTImpl() const
|
||||
regexp_matcher->setPattern(columns_matcher->pattern());
|
||||
|
||||
auto identifier_parts = qualified_identifier.getParts();
|
||||
regexp_matcher->children.push_back(std::make_shared<ASTIdentifier>(std::move(identifier_parts)));
|
||||
regexp_matcher->qualifier = std::make_shared<ASTIdentifier>(std::move(identifier_parts));
|
||||
regexp_matcher->children.push_back(regexp_matcher->qualifier);
|
||||
|
||||
if (transformers)
|
||||
{
|
||||
regexp_matcher->transformers = std::move(transformers);
|
||||
regexp_matcher->children.push_back(regexp_matcher->transformers);
|
||||
}
|
||||
|
||||
result = regexp_matcher;
|
||||
}
|
||||
@ -257,23 +296,36 @@ ASTPtr MatcherNode::toASTImpl() const
|
||||
{
|
||||
auto columns_list_matcher = std::make_shared<ASTColumnsListMatcher>();
|
||||
columns_list_matcher->column_list = std::move(column_list);
|
||||
columns_list_matcher->children.push_back(columns_list_matcher->column_list);
|
||||
|
||||
if (transformers)
|
||||
{
|
||||
columns_list_matcher->transformers = std::move(transformers);
|
||||
columns_list_matcher->children.push_back(columns_list_matcher->transformers);
|
||||
}
|
||||
|
||||
result = columns_list_matcher;
|
||||
}
|
||||
else
|
||||
{
|
||||
auto columns_list_matcher = std::make_shared<ASTQualifiedColumnsListMatcher>();
|
||||
columns_list_matcher->column_list = std::move(column_list);
|
||||
|
||||
auto identifier_parts = qualified_identifier.getParts();
|
||||
columns_list_matcher->children.push_back(std::make_shared<ASTIdentifier>(std::move(identifier_parts)));
|
||||
columns_list_matcher->qualifier = std::make_shared<ASTIdentifier>(std::move(identifier_parts));
|
||||
columns_list_matcher->column_list = std::move(column_list);
|
||||
columns_list_matcher->children.push_back(columns_list_matcher->qualifier);
|
||||
columns_list_matcher->children.push_back(columns_list_matcher->column_list);
|
||||
|
||||
if (transformers)
|
||||
{
|
||||
columns_list_matcher->transformers = std::move(transformers);
|
||||
columns_list_matcher->children.push_back(columns_list_matcher->transformers);
|
||||
}
|
||||
|
||||
result = columns_list_matcher;
|
||||
}
|
||||
}
|
||||
|
||||
for (const auto & child : children)
|
||||
result->children.push_back(child->toAST());
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
|
@ -73,7 +73,7 @@ public:
|
||||
if (!inner_function_node)
|
||||
return;
|
||||
|
||||
auto & inner_function_arguments_nodes = inner_function_node->getArguments().getNodes();
|
||||
const auto & inner_function_arguments_nodes = inner_function_node->getArguments().getNodes();
|
||||
if (inner_function_arguments_nodes.size() != 2)
|
||||
return;
|
||||
|
||||
@ -119,13 +119,15 @@ public:
|
||||
{
|
||||
lower_function_name = function_name_if_constant_is_negative;
|
||||
}
|
||||
resolveAggregateFunctionNode(*aggregate_function_node, inner_function_arguments_nodes[1], lower_function_name);
|
||||
|
||||
auto inner_function = aggregate_function_arguments_nodes[0];
|
||||
auto inner_function_right_argument = std::move(inner_function_arguments_nodes[1]);
|
||||
aggregate_function_arguments_nodes = {inner_function_right_argument};
|
||||
inner_function_arguments_nodes[1] = node;
|
||||
node = std::move(inner_function);
|
||||
auto inner_function_clone = inner_function_node->clone();
|
||||
auto & inner_function_clone_arguments = inner_function_clone->as<FunctionNode &>().getArguments();
|
||||
auto & inner_function_clone_arguments_nodes = inner_function_clone_arguments.getNodes();
|
||||
auto inner_function_clone_right_argument = inner_function_clone_arguments_nodes[1];
|
||||
aggregate_function_arguments_nodes = {inner_function_clone_right_argument};
|
||||
resolveAggregateFunctionNode(*aggregate_function_node, inner_function_clone_right_argument, lower_function_name);
|
||||
inner_function_clone_arguments_nodes[1] = node;
|
||||
node = std::move(inner_function_clone);
|
||||
}
|
||||
else if (right_argument_constant_node)
|
||||
{
|
||||
@ -136,18 +138,20 @@ public:
|
||||
{
|
||||
lower_function_name = function_name_if_constant_is_negative;
|
||||
}
|
||||
resolveAggregateFunctionNode(*aggregate_function_node, inner_function_arguments_nodes[0], function_name_if_constant_is_negative);
|
||||
|
||||
auto inner_function = aggregate_function_arguments_nodes[0];
|
||||
auto inner_function_left_argument = std::move(inner_function_arguments_nodes[0]);
|
||||
aggregate_function_arguments_nodes = {inner_function_left_argument};
|
||||
inner_function_arguments_nodes[0] = node;
|
||||
node = std::move(inner_function);
|
||||
auto inner_function_clone = inner_function_node->clone();
|
||||
auto & inner_function_clone_arguments = inner_function_clone->as<FunctionNode &>().getArguments();
|
||||
auto & inner_function_clone_arguments_nodes = inner_function_clone_arguments.getNodes();
|
||||
auto inner_function_clone_left_argument = inner_function_clone_arguments_nodes[0];
|
||||
aggregate_function_arguments_nodes = {inner_function_clone_left_argument};
|
||||
resolveAggregateFunctionNode(*aggregate_function_node, inner_function_clone_left_argument, lower_function_name);
|
||||
inner_function_clone_arguments_nodes[0] = node;
|
||||
node = std::move(inner_function_clone);
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
static inline void resolveAggregateFunctionNode(FunctionNode & function_node, QueryTreeNodePtr & argument, const String & aggregate_function_name)
|
||||
static inline void resolveAggregateFunctionNode(FunctionNode & function_node, const QueryTreeNodePtr & argument, const String & aggregate_function_name)
|
||||
{
|
||||
auto function_aggregate_function = function_node.getAggregateFunction();
|
||||
|
||||
|
124
src/Analyzer/Passes/OptimizeRedundantFunctionsInOrderByPass.cpp
Normal file
124
src/Analyzer/Passes/OptimizeRedundantFunctionsInOrderByPass.cpp
Normal file
@ -0,0 +1,124 @@
|
||||
#include <Analyzer/Passes/OptimizeRedundantFunctionsInOrderByPass.h>
|
||||
#include <Analyzer/ColumnNode.h>
|
||||
#include <Analyzer/FunctionNode.h>
|
||||
#include <Analyzer/HashUtils.h>
|
||||
#include <Analyzer/InDepthQueryTreeVisitor.h>
|
||||
#include <Analyzer/QueryNode.h>
|
||||
#include <Analyzer/SortNode.h>
|
||||
#include <Functions/IFunction.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
class OptimizeRedundantFunctionsInOrderByVisitor : public InDepthQueryTreeVisitor<OptimizeRedundantFunctionsInOrderByVisitor>
|
||||
{
|
||||
public:
|
||||
static bool needChildVisit(QueryTreeNodePtr & node, QueryTreeNodePtr & /*parent*/)
|
||||
{
|
||||
if (node->as<FunctionNode>())
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
void visitImpl(QueryTreeNodePtr & node)
|
||||
{
|
||||
auto * query = node->as<QueryNode>();
|
||||
if (!query)
|
||||
return;
|
||||
|
||||
if (!query->hasOrderBy())
|
||||
return;
|
||||
|
||||
auto & order_by = query->getOrderBy();
|
||||
for (auto & elem : order_by.getNodes())
|
||||
{
|
||||
auto * order_by_elem = elem->as<SortNode>();
|
||||
if (order_by_elem->withFill())
|
||||
return;
|
||||
}
|
||||
|
||||
QueryTreeNodes new_order_by_nodes;
|
||||
new_order_by_nodes.reserve(order_by.getNodes().size());
|
||||
|
||||
for (auto & elem : order_by.getNodes())
|
||||
{
|
||||
auto & order_by_expr = elem->as<SortNode>()->getExpression();
|
||||
switch (order_by_expr->getNodeType())
|
||||
{
|
||||
case QueryTreeNodeType::FUNCTION:
|
||||
{
|
||||
if (isRedundantExpression(order_by_expr))
|
||||
continue;
|
||||
break;
|
||||
}
|
||||
case QueryTreeNodeType::COLUMN:
|
||||
{
|
||||
existing_keys.insert(order_by_expr);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
new_order_by_nodes.push_back(elem);
|
||||
}
|
||||
existing_keys.clear();
|
||||
|
||||
if (new_order_by_nodes.size() < order_by.getNodes().size())
|
||||
order_by.getNodes() = std::move(new_order_by_nodes);
|
||||
}
|
||||
|
||||
private:
|
||||
QueryTreeNodePtrWithHashSet existing_keys;
|
||||
|
||||
bool isRedundantExpression(QueryTreeNodePtr function)
|
||||
{
|
||||
QueryTreeNodes nodes_to_process{ function };
|
||||
while (!nodes_to_process.empty())
|
||||
{
|
||||
auto node = nodes_to_process.back();
|
||||
nodes_to_process.pop_back();
|
||||
|
||||
// TODO: handle constants here
|
||||
switch (node->getNodeType())
|
||||
{
|
||||
case QueryTreeNodeType::FUNCTION:
|
||||
{
|
||||
auto * function_node = node->as<FunctionNode>();
|
||||
const auto & function_arguments = function_node->getArguments().getNodes();
|
||||
if (function_arguments.empty())
|
||||
return false;
|
||||
const auto & function_base = function_node->getFunction();
|
||||
if (!function_base || !function_base->isDeterministicInScopeOfQuery())
|
||||
return false;
|
||||
|
||||
// Process arguments in order
|
||||
for (auto it = function_arguments.rbegin(); it != function_arguments.rend(); ++it)
|
||||
nodes_to_process.push_back(*it);
|
||||
break;
|
||||
}
|
||||
case QueryTreeNodeType::COLUMN:
|
||||
{
|
||||
if (!existing_keys.contains(node))
|
||||
return false;
|
||||
break;
|
||||
}
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
void OptimizeRedundantFunctionsInOrderByPass::run(QueryTreeNodePtr query_tree_node, ContextPtr /*context*/)
|
||||
{
|
||||
OptimizeRedundantFunctionsInOrderByVisitor().visit(query_tree_node);
|
||||
}
|
||||
|
||||
}
|
@ -0,0 +1,23 @@
|
||||
#pragma once
|
||||
|
||||
#include <Analyzer/IQueryTreePass.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
/** If ORDER BY has argument x followed by f(x) transforms it to ORDER BY x.
|
||||
* Optimize ORDER BY x, y, f(x), g(x, y), f(h(x)), t(f(x), g(x)) into ORDER BY x, y
|
||||
* in case if f(), g(), h(), t() are deterministic (in scope of query).
|
||||
* Don't optimize ORDER BY f(x), g(x), x even if f(x) is bijection for x or g(x).
|
||||
*/
|
||||
class OptimizeRedundantFunctionsInOrderByPass final : public IQueryTreePass
|
||||
{
|
||||
public:
|
||||
String getName() override { return "OptimizeRedundantFunctionsInOrderBy"; }
|
||||
|
||||
String getDescription() override { return "If ORDER BY has argument x followed by f(x) transforms it to ORDER BY x."; }
|
||||
|
||||
void run(QueryTreeNodePtr query_tree_node, ContextPtr context) override;
|
||||
};
|
||||
|
||||
}
|
@ -77,11 +77,11 @@ public:
|
||||
if (!nested_function || nested_function->getFunctionName() != "if")
|
||||
return;
|
||||
|
||||
auto & nested_if_function_arguments_nodes = nested_function->getArguments().getNodes();
|
||||
const auto & nested_if_function_arguments_nodes = nested_function->getArguments().getNodes();
|
||||
if (nested_if_function_arguments_nodes.size() != 3)
|
||||
return;
|
||||
|
||||
auto & cond_argument = nested_if_function_arguments_nodes[0];
|
||||
const auto & cond_argument = nested_if_function_arguments_nodes[0];
|
||||
const auto * if_true_condition_constant_node = nested_if_function_arguments_nodes[1]->as<ConstantNode>();
|
||||
const auto * if_false_condition_constant_node = nested_if_function_arguments_nodes[2]->as<ConstantNode>();
|
||||
|
||||
@ -101,7 +101,7 @@ public:
|
||||
/// Rewrite `sum(if(cond, 1, 0))` into `countIf(cond)`.
|
||||
if (if_true_condition_value == 1 && if_false_condition_value == 0)
|
||||
{
|
||||
function_node_arguments_nodes[0] = std::move(nested_if_function_arguments_nodes[0]);
|
||||
function_node_arguments_nodes[0] = nested_if_function_arguments_nodes[0];
|
||||
function_node_arguments_nodes.resize(1);
|
||||
|
||||
resolveAsCountIfAggregateFunction(*function_node, function_node_arguments_nodes[0]->getResultType());
|
||||
@ -120,7 +120,7 @@ public:
|
||||
auto not_function = std::make_shared<FunctionNode>("not");
|
||||
|
||||
auto & not_function_arguments = not_function->getArguments().getNodes();
|
||||
not_function_arguments.push_back(std::move(nested_if_function_arguments_nodes[0]));
|
||||
not_function_arguments.push_back(nested_if_function_arguments_nodes[0]);
|
||||
|
||||
not_function->resolveAsFunction(FunctionFactory::instance().get("not", context)->build(not_function->getArgumentColumns()));
|
||||
|
||||
|
@ -111,7 +111,7 @@ private:
|
||||
|
||||
QueryTreeNodePtr buildJoinTree(const ASTPtr & tables_in_select_query, const ContextPtr & context) const;
|
||||
|
||||
ColumnTransformersNodes buildColumnTransformers(const ASTPtr & matcher_expression, size_t start_child_index, const ContextPtr & context) const;
|
||||
ColumnTransformersNodes buildColumnTransformers(const ASTPtr & matcher_expression, const ContextPtr & context) const;
|
||||
|
||||
ASTPtr query;
|
||||
QueryTreeNodePtr query_tree_node;
|
||||
@ -439,13 +439,13 @@ QueryTreeNodePtr QueryTreeBuilder::buildExpression(const ASTPtr & expression, co
|
||||
}
|
||||
else if (const auto * asterisk = expression->as<ASTAsterisk>())
|
||||
{
|
||||
auto column_transformers = buildColumnTransformers(expression, 0 /*start_child_index*/, context);
|
||||
auto column_transformers = buildColumnTransformers(asterisk->transformers, context);
|
||||
result = std::make_shared<MatcherNode>(std::move(column_transformers));
|
||||
}
|
||||
else if (const auto * qualified_asterisk = expression->as<ASTQualifiedAsterisk>())
|
||||
{
|
||||
auto & qualified_identifier = qualified_asterisk->children.at(0)->as<ASTTableIdentifier &>();
|
||||
auto column_transformers = buildColumnTransformers(expression, 1 /*start_child_index*/, context);
|
||||
auto & qualified_identifier = qualified_asterisk->qualifier->as<ASTIdentifier &>();
|
||||
auto column_transformers = buildColumnTransformers(qualified_asterisk->transformers, context);
|
||||
result = std::make_shared<MatcherNode>(Identifier(qualified_identifier.name_parts), std::move(column_transformers));
|
||||
}
|
||||
else if (const auto * ast_literal = expression->as<ASTLiteral>())
|
||||
@ -543,7 +543,7 @@ QueryTreeNodePtr QueryTreeBuilder::buildExpression(const ASTPtr & expression, co
|
||||
}
|
||||
else if (const auto * columns_regexp_matcher = expression->as<ASTColumnsRegexpMatcher>())
|
||||
{
|
||||
auto column_transformers = buildColumnTransformers(expression, 0 /*start_child_index*/, context);
|
||||
auto column_transformers = buildColumnTransformers(columns_regexp_matcher->transformers, context);
|
||||
result = std::make_shared<MatcherNode>(columns_regexp_matcher->getMatcher(), std::move(column_transformers));
|
||||
}
|
||||
else if (const auto * columns_list_matcher = expression->as<ASTColumnsListMatcher>())
|
||||
@ -557,18 +557,18 @@ QueryTreeNodePtr QueryTreeBuilder::buildExpression(const ASTPtr & expression, co
|
||||
column_list_identifiers.emplace_back(Identifier{column_list_identifier.name_parts});
|
||||
}
|
||||
|
||||
auto column_transformers = buildColumnTransformers(expression, 0 /*start_child_index*/, context);
|
||||
auto column_transformers = buildColumnTransformers(columns_list_matcher->transformers, context);
|
||||
result = std::make_shared<MatcherNode>(std::move(column_list_identifiers), std::move(column_transformers));
|
||||
}
|
||||
else if (const auto * qualified_columns_regexp_matcher = expression->as<ASTQualifiedColumnsRegexpMatcher>())
|
||||
{
|
||||
auto & qualified_identifier = qualified_columns_regexp_matcher->children.at(0)->as<ASTTableIdentifier &>();
|
||||
auto column_transformers = buildColumnTransformers(expression, 1 /*start_child_index*/, context);
|
||||
auto & qualified_identifier = qualified_columns_regexp_matcher->qualifier->as<ASTIdentifier &>();
|
||||
auto column_transformers = buildColumnTransformers(qualified_columns_regexp_matcher->transformers, context);
|
||||
result = std::make_shared<MatcherNode>(Identifier(qualified_identifier.name_parts), qualified_columns_regexp_matcher->getMatcher(), std::move(column_transformers));
|
||||
}
|
||||
else if (const auto * qualified_columns_list_matcher = expression->as<ASTQualifiedColumnsListMatcher>())
|
||||
{
|
||||
auto & qualified_identifier = qualified_columns_list_matcher->children.at(0)->as<ASTTableIdentifier &>();
|
||||
auto & qualified_identifier = qualified_columns_list_matcher->qualifier->as<ASTIdentifier &>();
|
||||
|
||||
Identifiers column_list_identifiers;
|
||||
column_list_identifiers.reserve(qualified_columns_list_matcher->column_list->children.size());
|
||||
@ -579,7 +579,7 @@ QueryTreeNodePtr QueryTreeBuilder::buildExpression(const ASTPtr & expression, co
|
||||
column_list_identifiers.emplace_back(Identifier{column_list_identifier.name_parts});
|
||||
}
|
||||
|
||||
auto column_transformers = buildColumnTransformers(expression, 1 /*start_child_index*/, context);
|
||||
auto column_transformers = buildColumnTransformers(qualified_columns_list_matcher->transformers, context);
|
||||
result = std::make_shared<MatcherNode>(Identifier(qualified_identifier.name_parts), std::move(column_list_identifiers), std::move(column_transformers));
|
||||
}
|
||||
else
|
||||
@ -833,15 +833,15 @@ QueryTreeNodePtr QueryTreeBuilder::buildJoinTree(const ASTPtr & tables_in_select
|
||||
}
|
||||
|
||||
|
||||
ColumnTransformersNodes QueryTreeBuilder::buildColumnTransformers(const ASTPtr & matcher_expression, size_t start_child_index, const ContextPtr & context) const
|
||||
ColumnTransformersNodes QueryTreeBuilder::buildColumnTransformers(const ASTPtr & matcher_expression, const ContextPtr & context) const
|
||||
{
|
||||
ColumnTransformersNodes column_transformers;
|
||||
size_t children_size = matcher_expression->children.size();
|
||||
|
||||
for (; start_child_index < children_size; ++start_child_index)
|
||||
if (!matcher_expression)
|
||||
return column_transformers;
|
||||
|
||||
for (const auto & child : matcher_expression->children)
|
||||
{
|
||||
const auto & child = matcher_expression->children[start_child_index];
|
||||
|
||||
if (auto * apply_transformer = child->as<ASTColumnsApplyTransformer>())
|
||||
{
|
||||
if (apply_transformer->lambda)
|
||||
|
@ -15,6 +15,7 @@
|
||||
#include <Analyzer/Passes/OrderByLimitByDuplicateEliminationPass.h>
|
||||
#include <Analyzer/Passes/FuseFunctionsPass.h>
|
||||
#include <Analyzer/Passes/IfTransformStringsToEnumPass.h>
|
||||
#include <Analyzer/Passes/OptimizeRedundantFunctionsInOrderByPass.h>
|
||||
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <IO/Operators.h>
|
||||
@ -91,7 +92,6 @@ public:
|
||||
* TODO: Support setting optimize_move_functions_out_of_any.
|
||||
* TODO: Support setting optimize_aggregators_of_group_by_keys.
|
||||
* TODO: Support setting optimize_duplicate_order_by_and_distinct.
|
||||
* TODO: Support setting optimize_redundant_functions_in_order_by.
|
||||
* TODO: Support setting optimize_monotonous_functions_in_order_by.
|
||||
* TODO: Support settings.optimize_or_like_chain.
|
||||
* TODO: Add optimizations based on function semantics. Example: SELECT * FROM test_table WHERE id != id. (id is not nullable column).
|
||||
@ -203,6 +203,9 @@ void addQueryTreePasses(QueryTreePassManager & manager)
|
||||
if (settings.optimize_if_chain_to_multiif)
|
||||
manager.addPass(std::make_unique<IfChainToMultiIfPass>());
|
||||
|
||||
if (settings.optimize_redundant_functions_in_order_by)
|
||||
manager.addPass(std::make_unique<OptimizeRedundantFunctionsInOrderByPass>());
|
||||
|
||||
manager.addPass(std::make_unique<OrderByTupleEliminationPass>());
|
||||
manager.addPass(std::make_unique<OrderByLimitByDuplicateEliminationPass>());
|
||||
|
||||
|
@ -156,10 +156,9 @@ void BackupWriterS3::copyObjectImpl(
|
||||
const String & src_key,
|
||||
const String & dst_bucket,
|
||||
const String & dst_key,
|
||||
const Aws::S3::Model::HeadObjectResult & head,
|
||||
size_t size,
|
||||
const std::optional<ObjectAttributes> & metadata) const
|
||||
{
|
||||
size_t size = head.GetContentLength();
|
||||
LOG_TRACE(log, "Copying {} bytes using single-operation copy", size);
|
||||
|
||||
Aws::S3::Model::CopyObjectRequest request;
|
||||
@ -177,7 +176,7 @@ void BackupWriterS3::copyObjectImpl(
|
||||
if (!outcome.IsSuccess() && (outcome.GetError().GetExceptionName() == "EntityTooLarge"
|
||||
|| outcome.GetError().GetExceptionName() == "InvalidRequest"))
|
||||
{ // Can't come here with MinIO, MinIO allows single part upload for large objects.
|
||||
copyObjectMultipartImpl(src_bucket, src_key, dst_bucket, dst_key, head, metadata);
|
||||
copyObjectMultipartImpl(src_bucket, src_key, dst_bucket, dst_key, size, metadata);
|
||||
return;
|
||||
}
|
||||
|
||||
@ -191,10 +190,9 @@ void BackupWriterS3::copyObjectMultipartImpl(
|
||||
const String & src_key,
|
||||
const String & dst_bucket,
|
||||
const String & dst_key,
|
||||
const Aws::S3::Model::HeadObjectResult & head,
|
||||
size_t size,
|
||||
const std::optional<ObjectAttributes> & metadata) const
|
||||
{
|
||||
size_t size = head.GetContentLength();
|
||||
LOG_TRACE(log, "Copying {} bytes using multipart upload copy", size);
|
||||
|
||||
String multipart_upload_id;
|
||||
@ -309,16 +307,16 @@ void BackupWriterS3::copyFileNative(DiskPtr from_disk, const String & file_name_
|
||||
std::string source_bucket = object_storage->getObjectsNamespace();
|
||||
auto file_path = fs::path(s3_uri.key) / file_name_to;
|
||||
|
||||
auto head = S3::headObject(*client, source_bucket, objects[0].absolute_path).GetResult();
|
||||
if (static_cast<size_t>(head.GetContentLength()) < request_settings.getUploadSettings().max_single_operation_copy_size)
|
||||
auto size = S3::getObjectSize(*client, source_bucket, objects[0].absolute_path);
|
||||
if (size < request_settings.getUploadSettings().max_single_operation_copy_size)
|
||||
{
|
||||
copyObjectImpl(
|
||||
source_bucket, objects[0].absolute_path, s3_uri.bucket, file_path, head);
|
||||
source_bucket, objects[0].absolute_path, s3_uri.bucket, file_path, size);
|
||||
}
|
||||
else
|
||||
{
|
||||
copyObjectMultipartImpl(
|
||||
source_bucket, objects[0].absolute_path, s3_uri.bucket, file_path, head);
|
||||
source_bucket, objects[0].absolute_path, s3_uri.bucket, file_path, size);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -67,7 +67,7 @@ private:
|
||||
const String & src_key,
|
||||
const String & dst_bucket,
|
||||
const String & dst_key,
|
||||
const Aws::S3::Model::HeadObjectResult & head,
|
||||
size_t size,
|
||||
const std::optional<ObjectAttributes> & metadata = std::nullopt) const;
|
||||
|
||||
void copyObjectMultipartImpl(
|
||||
@ -75,7 +75,7 @@ private:
|
||||
const String & src_key,
|
||||
const String & dst_bucket,
|
||||
const String & dst_key,
|
||||
const Aws::S3::Model::HeadObjectResult & head,
|
||||
size_t size,
|
||||
const std::optional<ObjectAttributes> & metadata = std::nullopt) const;
|
||||
|
||||
void removeFilesBatch(const Strings & file_names);
|
||||
|
@ -309,6 +309,8 @@ The server successfully detected this situation and will download merged part fr
|
||||
M(S3CopyObject, "Number of S3 API CopyObject calls.") \
|
||||
M(S3ListObjects, "Number of S3 API ListObjects calls.") \
|
||||
M(S3HeadObject, "Number of S3 API HeadObject calls.") \
|
||||
M(S3GetObjectAttributes, "Number of S3 API GetObjectAttributes calls.") \
|
||||
M(S3GetObjectMetadata, "Number of S3 API GetObject calls for getting metadata.") \
|
||||
M(S3CreateMultipartUpload, "Number of S3 API CreateMultipartUpload calls.") \
|
||||
M(S3UploadPartCopy, "Number of S3 API UploadPartCopy calls.") \
|
||||
M(S3UploadPart, "Number of S3 API UploadPart calls.") \
|
||||
@ -321,6 +323,8 @@ The server successfully detected this situation and will download merged part fr
|
||||
M(DiskS3CopyObject, "Number of DiskS3 API CopyObject calls.") \
|
||||
M(DiskS3ListObjects, "Number of DiskS3 API ListObjects calls.") \
|
||||
M(DiskS3HeadObject, "Number of DiskS3 API HeadObject calls.") \
|
||||
M(DiskS3GetObjectAttributes, "Number of DiskS3 API GetObjectAttributes calls.") \
|
||||
M(DiskS3GetObjectMetadata, "Number of DiskS3 API GetObject calls for getting metadata.") \
|
||||
M(DiskS3CreateMultipartUpload, "Number of DiskS3 API CreateMultipartUpload calls.") \
|
||||
M(DiskS3UploadPartCopy, "Number of DiskS3 API UploadPartCopy calls.") \
|
||||
M(DiskS3UploadPart, "Number of DiskS3 API UploadPart calls.") \
|
||||
@ -449,7 +453,8 @@ The server successfully detected this situation and will download merged part fr
|
||||
M(OverflowBreak, "Number of times, data processing was cancelled by query complexity limitation with setting '*_overflow_mode' = 'break' and the result is incomplete.") \
|
||||
M(OverflowThrow, "Number of times, data processing was cancelled by query complexity limitation with setting '*_overflow_mode' = 'throw' and exception was thrown.") \
|
||||
M(OverflowAny, "Number of times approximate GROUP BY was in effect: when aggregation was performed only on top of first 'max_rows_to_group_by' unique keys and other keys were ignored due to 'group_by_overflow_mode' = 'any'.") \
|
||||
|
||||
\
|
||||
M(ServerStartupMilliseconds, "Time elapsed from starting server to listening to sockets in milliseconds")\
|
||||
|
||||
namespace ProfileEvents
|
||||
{
|
||||
|
@ -7,6 +7,29 @@
|
||||
#include <Poco/Message.h>
|
||||
#include <Common/CurrentThread.h>
|
||||
|
||||
/// This wrapper is useful to save formatted message into a String before sending it to a logger
|
||||
class LogToStrImpl
|
||||
{
|
||||
String & out_str;
|
||||
Poco::Logger * logger;
|
||||
bool propagate_to_actual_log = true;
|
||||
public:
|
||||
LogToStrImpl(String & out_str_, Poco::Logger * logger_) : out_str(out_str_) , logger(logger_) {}
|
||||
LogToStrImpl & operator -> () { return *this; }
|
||||
bool is(Poco::Message::Priority priority) { propagate_to_actual_log &= logger->is(priority); return true; }
|
||||
LogToStrImpl * getChannel() {return this; }
|
||||
const String & name() const { return logger->name(); }
|
||||
void log(const Poco::Message & message)
|
||||
{
|
||||
out_str = message.getText();
|
||||
if (!propagate_to_actual_log)
|
||||
return;
|
||||
if (auto * channel = logger->getChannel())
|
||||
channel->log(message);
|
||||
}
|
||||
};
|
||||
|
||||
#define LogToStr(x, y) std::make_unique<LogToStrImpl>(x, y)
|
||||
|
||||
namespace
|
||||
{
|
||||
@ -17,8 +40,37 @@ namespace
|
||||
|
||||
[[maybe_unused]] const ::Poco::Logger * getLogger(const ::Poco::Logger * logger) { return logger; };
|
||||
[[maybe_unused]] const ::Poco::Logger * getLogger(const std::atomic<::Poco::Logger *> & logger) { return logger.load(); };
|
||||
[[maybe_unused]] std::unique_ptr<LogToStrImpl> getLogger(std::unique_ptr<LogToStrImpl> && logger) { return logger; };
|
||||
|
||||
template<typename T> struct is_fmt_runtime : std::false_type {};
|
||||
template<typename T> struct is_fmt_runtime<fmt::basic_runtime<T>> : std::true_type {};
|
||||
|
||||
/// Usually we use LOG_*(...) macros with either string literals or fmt::runtime(whatever) as a format string.
|
||||
/// This function is useful to get a string_view to a static format string passed to LOG_* macro.
|
||||
template <typename T> constexpr std::string_view tryGetStaticFormatString(T && x)
|
||||
{
|
||||
if constexpr (is_fmt_runtime<T>::value)
|
||||
{
|
||||
/// It definitely was fmt::runtime(something).
|
||||
/// We are not sure about a lifetime of the string, so return empty view.
|
||||
/// Also it can be arbitrary string, not a formatting pattern.
|
||||
/// So returning empty pattern will not pollute the set of patterns.
|
||||
return std::string_view();
|
||||
}
|
||||
else
|
||||
{
|
||||
/// Most likely it was a string literal.
|
||||
/// Unfortunately, there's no good way to check if something is a string literal.
|
||||
/// But fmtlib requires a format string to be compile-time constant unless fmt::runtime is used.
|
||||
static_assert(std::is_nothrow_convertible<T, const char * const>::value);
|
||||
static_assert(!std::is_pointer<T>::value);
|
||||
return std::string_view(x);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#define LOG_IMPL_FIRST_ARG(X, ...) X
|
||||
|
||||
/// Logs a message to a specified logger with that level.
|
||||
/// If more than one argument is provided,
|
||||
/// the first argument is interpreted as template with {}-substitutions
|
||||
@ -30,7 +82,7 @@ namespace
|
||||
auto _logger = ::getLogger(logger); \
|
||||
const bool _is_clients_log = (DB::CurrentThread::getGroup() != nullptr) && \
|
||||
(DB::CurrentThread::getGroup()->client_logs_level >= (priority)); \
|
||||
if (_logger->is((PRIORITY)) || _is_clients_log) \
|
||||
if (_is_clients_log || _logger->is((PRIORITY))) \
|
||||
{ \
|
||||
std::string formatted_message = numArgs(__VA_ARGS__) > 1 ? fmt::format(__VA_ARGS__) : firstArg(__VA_ARGS__); \
|
||||
if (auto _channel = _logger->getChannel()) \
|
||||
@ -40,7 +92,7 @@ namespace
|
||||
file_function += "; "; \
|
||||
file_function += __PRETTY_FUNCTION__; \
|
||||
Poco::Message poco_message(_logger->name(), formatted_message, \
|
||||
(PRIORITY), file_function.c_str(), __LINE__); \
|
||||
(PRIORITY), file_function.c_str(), __LINE__, tryGetStaticFormatString(LOG_IMPL_FIRST_ARG(__VA_ARGS__))); \
|
||||
_channel->log(poco_message); \
|
||||
} \
|
||||
} \
|
||||
|
@ -773,6 +773,8 @@ static constexpr UInt64 operator""_GiB(unsigned long long value)
|
||||
M(Bool, input_format_json_validate_types_from_metadata, true, "For JSON/JSONCompact/JSONColumnsWithMetadata input formats this controls whether format parser should check if data types from input metadata match data types of the corresponding columns from the table", 0) \
|
||||
M(Bool, input_format_json_read_numbers_as_strings, false, "Allow to parse numbers as strings in JSON input formats", 0) \
|
||||
M(Bool, input_format_json_read_objects_as_strings, true, "Allow to parse JSON objects as strings in JSON input formats", 0) \
|
||||
M(Bool, input_format_json_named_tuples_as_objects, true, "Deserialize named tuple columns as JSON objects", 0) \
|
||||
M(Bool, input_format_json_defaults_for_missing_elements_in_named_tuple, true, "Insert default value in named tuple element if it's missing in json object", 0) \
|
||||
M(Bool, input_format_try_infer_integers, true, "Try to infer integers instead of floats while schema inference in text formats", 0) \
|
||||
M(Bool, input_format_try_infer_dates, true, "Try to infer dates from string fields while schema inference in text formats", 0) \
|
||||
M(Bool, input_format_try_infer_datetimes, true, "Try to infer datetimes from string fields while schema inference in text formats", 0) \
|
||||
|
@ -80,7 +80,8 @@ namespace SettingsChangesHistory
|
||||
/// It's used to implement `compatibility` setting (see https://github.com/ClickHouse/ClickHouse/issues/35972)
|
||||
static std::map<ClickHouseVersion, SettingsChangesHistory::SettingsChanges> settings_changes_history =
|
||||
{
|
||||
{"23.1", {{"input_format_json_read_objects_as_strings", 0, 1, "Enable reading nested json objects as strings while object type is experimental"}}},
|
||||
{"23.1", {{"input_format_json_read_objects_as_strings", 0, 1, "Enable reading nested json objects as strings while object type is experimental"},
|
||||
{"input_format_json_defaults_for_missing_elements_in_named_tuple", false, true, "Allow missing elements in JSON objects while reading named tuples by default"}}},
|
||||
{"22.12", {{"max_size_to_preallocate_for_aggregation", 10'000'000, 100'000'000, "This optimizes performance"},
|
||||
{"query_plan_aggregation_in_order", 0, 1, "Enable some refactoring around query plan"},
|
||||
{"format_binary_max_string_size", 0, 1_GiB, "Prevent allocating large amount of memory"}}},
|
||||
|
@ -16,6 +16,7 @@ namespace ErrorCodes
|
||||
{
|
||||
extern const int SIZES_OF_COLUMNS_IN_TUPLE_DOESNT_MATCH;
|
||||
extern const int NOT_FOUND_COLUMN_IN_BLOCK;
|
||||
extern const int INCORRECT_DATA;
|
||||
}
|
||||
|
||||
|
||||
@ -154,7 +155,7 @@ void SerializationTuple::deserializeText(IColumn & column, ReadBuffer & istr, co
|
||||
|
||||
void SerializationTuple::serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
|
||||
{
|
||||
if (settings.json.named_tuples_as_objects
|
||||
if (settings.json.write_named_tuples_as_objects
|
||||
&& have_explicit_names)
|
||||
{
|
||||
writeChar('{', ostr);
|
||||
@ -185,7 +186,7 @@ void SerializationTuple::serializeTextJSON(const IColumn & column, size_t row_nu
|
||||
|
||||
void SerializationTuple::deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
|
||||
{
|
||||
if (settings.json.named_tuples_as_objects
|
||||
if (settings.json.read_named_tuples_as_objects
|
||||
&& have_explicit_names)
|
||||
{
|
||||
skipWhitespaceIfAny(istr);
|
||||
@ -194,12 +195,15 @@ void SerializationTuple::deserializeTextJSON(IColumn & column, ReadBuffer & istr
|
||||
|
||||
addElementSafe(elems.size(), column, [&]
|
||||
{
|
||||
// Require all elements but in arbitrary order.
|
||||
for (size_t i = 0; i < elems.size(); ++i)
|
||||
std::vector<UInt8> seen_elements(elems.size(), 0);
|
||||
size_t i = 0;
|
||||
while (!istr.eof() && *istr.position() != '}')
|
||||
{
|
||||
if (i == elems.size())
|
||||
throw Exception(ErrorCodes::INCORRECT_DATA, "Unexpected number of elements in named tuple. Expected no more than {}", elems.size());
|
||||
|
||||
if (i > 0)
|
||||
{
|
||||
skipWhitespaceIfAny(istr);
|
||||
assertChar(',', istr);
|
||||
skipWhitespaceIfAny(istr);
|
||||
}
|
||||
@ -211,12 +215,35 @@ void SerializationTuple::deserializeTextJSON(IColumn & column, ReadBuffer & istr
|
||||
skipWhitespaceIfAny(istr);
|
||||
|
||||
const size_t element_pos = getPositionByName(name);
|
||||
seen_elements[element_pos] = 1;
|
||||
auto & element_column = extractElementColumn(column, element_pos);
|
||||
elems[element_pos]->deserializeTextJSON(element_column, istr, settings);
|
||||
|
||||
skipWhitespaceIfAny(istr);
|
||||
++i;
|
||||
}
|
||||
|
||||
skipWhitespaceIfAny(istr);
|
||||
assertChar('}', istr);
|
||||
|
||||
/// Check if we have missing elements.
|
||||
if (i != elems.size())
|
||||
{
|
||||
for (size_t element_pos = 0; element_pos != seen_elements.size(); ++element_pos)
|
||||
{
|
||||
if (seen_elements[element_pos])
|
||||
continue;
|
||||
|
||||
if (!settings.json.defaults_for_missing_elements_in_named_tuple)
|
||||
throw Exception(
|
||||
ErrorCodes::INCORRECT_DATA,
|
||||
"JSON object doesn't contain tuple element {}. If you want to insert defaults in case of missing elements, "
|
||||
"enable setting input_format_json_defaults_for_missing_elements_in_named_tuple",
|
||||
elems[element_pos]->getElementName());
|
||||
|
||||
auto & element_column = extractElementColumn(column, element_pos);
|
||||
element_column.insertDefault();
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
else
|
||||
|
@ -2,6 +2,7 @@
|
||||
#include <Dictionaries/getDictionaryConfigurationFromAST.h>
|
||||
#include <Interpreters/Cluster.h>
|
||||
#include <Interpreters/Context.h>
|
||||
#include <Interpreters/misc.h>
|
||||
#include <Interpreters/InDepthNodeVisitor.h>
|
||||
#include <Interpreters/evaluateConstantExpression.h>
|
||||
#include <Interpreters/getClusterName.h>
|
||||
@ -175,7 +176,7 @@ namespace
|
||||
/// Finds dependencies of a function.
|
||||
void visitFunction(const ASTFunction & function)
|
||||
{
|
||||
if (function.name == "joinGet" || function.name == "dictHas" || function.name == "dictIsIn" || function.name.starts_with("dictGet"))
|
||||
if (functionIsJoinGet(function.name) || functionIsDictGet(function.name))
|
||||
{
|
||||
/// dictGet('dict_name', attr_names, id_expr)
|
||||
/// dictHas('dict_name', id_expr)
|
||||
|
@ -1,6 +1,7 @@
|
||||
#include <Databases/DDLLoadingDependencyVisitor.h>
|
||||
#include <Dictionaries/getDictionaryConfigurationFromAST.h>
|
||||
#include <Interpreters/Context.h>
|
||||
#include <Interpreters/misc.h>
|
||||
#include <Parsers/ASTCreateQuery.h>
|
||||
#include <Parsers/ASTFunction.h>
|
||||
#include <Parsers/ASTIdentifier.h>
|
||||
@ -52,23 +53,41 @@ bool DDLMatcherBase::needChildVisit(const ASTPtr & node, const ASTPtr & child)
|
||||
return true;
|
||||
}
|
||||
|
||||
ssize_t DDLMatcherBase::getPositionOfTableNameArgument(const ASTFunction & function)
|
||||
ssize_t DDLMatcherBase::getPositionOfTableNameArgumentToEvaluate(const ASTFunction & function)
|
||||
{
|
||||
if (function.name == "joinGet" ||
|
||||
function.name == "dictHas" ||
|
||||
function.name == "dictIsIn" ||
|
||||
function.name.starts_with("dictGet"))
|
||||
if (functionIsJoinGet(function.name) || functionIsDictGet(function.name))
|
||||
return 0;
|
||||
|
||||
if (Poco::toLower(function.name) == "in")
|
||||
return -1;
|
||||
}
|
||||
|
||||
ssize_t DDLMatcherBase::getPositionOfTableNameArgumentToVisit(const ASTFunction & function)
|
||||
{
|
||||
ssize_t maybe_res = getPositionOfTableNameArgumentToEvaluate(function);
|
||||
if (0 <= maybe_res)
|
||||
return maybe_res;
|
||||
|
||||
if (functionIsInOrGlobalInOperator(function.name))
|
||||
{
|
||||
if (function.children.empty())
|
||||
return -1;
|
||||
|
||||
const auto * args = function.children[0]->as<ASTExpressionList>();
|
||||
if (!args || args->children.size() != 2)
|
||||
return -1;
|
||||
|
||||
if (args->children[1]->as<ASTFunction>())
|
||||
return -1;
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
void DDLLoadingDependencyVisitor::visit(const ASTFunction & function, Data & data)
|
||||
{
|
||||
ssize_t table_name_arg_idx = getPositionOfTableNameArgument(function);
|
||||
ssize_t table_name_arg_idx = getPositionOfTableNameArgumentToVisit(function);
|
||||
if (table_name_arg_idx < 0)
|
||||
return;
|
||||
extractTableNameFromArgument(function, data, table_name_arg_idx);
|
||||
|
@ -23,7 +23,8 @@ class DDLMatcherBase
|
||||
{
|
||||
public:
|
||||
static bool needChildVisit(const ASTPtr & node, const ASTPtr & child);
|
||||
static ssize_t getPositionOfTableNameArgument(const ASTFunction & function);
|
||||
static ssize_t getPositionOfTableNameArgumentToVisit(const ASTFunction & function);
|
||||
static ssize_t getPositionOfTableNameArgumentToEvaluate(const ASTFunction & function);
|
||||
};
|
||||
|
||||
/// Visits ASTCreateQuery and extracts the names of all tables which should be loaded before a specified table.
|
||||
|
@ -23,7 +23,7 @@ void NormalizeAndEvaluateConstants::visit(const ASTFunction & function, Data & d
|
||||
{
|
||||
/// Replace expressions like "dictGet(currentDatabase() || '.dict', 'value', toUInt32(1))"
|
||||
/// with "dictGet('db_name.dict', 'value', toUInt32(1))"
|
||||
ssize_t table_name_arg_idx = getPositionOfTableNameArgument(function);
|
||||
ssize_t table_name_arg_idx = getPositionOfTableNameArgumentToEvaluate(function);
|
||||
if (table_name_arg_idx < 0)
|
||||
return;
|
||||
|
||||
|
@ -171,8 +171,9 @@ bool AsynchronousReadIndirectBufferFromRemoteFS::nextImpl()
|
||||
if (!hasPendingDataToRead())
|
||||
return false;
|
||||
|
||||
size_t size, offset;
|
||||
chassert(file_offset_of_buffer_end <= impl->getFileSize());
|
||||
|
||||
size_t size, offset;
|
||||
if (prefetch_future.valid())
|
||||
{
|
||||
ProfileEventTimeIncrement<Microseconds> watch(ProfileEvents::AsynchronousRemoteReadWaitMicroseconds);
|
||||
@ -210,8 +211,8 @@ bool AsynchronousReadIndirectBufferFromRemoteFS::nextImpl()
|
||||
/// In case of multiple files for the same file in clickhouse (i.e. log family)
|
||||
/// file_offset_of_buffer_end will not match getImplementationBufferOffset()
|
||||
/// so we use [impl->getImplementationBufferOffset(), impl->getFileSize()]
|
||||
assert(file_offset_of_buffer_end >= impl->getImplementationBufferOffset());
|
||||
assert(file_offset_of_buffer_end <= impl->getFileSize());
|
||||
chassert(file_offset_of_buffer_end >= impl->getImplementationBufferOffset());
|
||||
chassert(file_offset_of_buffer_end <= impl->getFileSize());
|
||||
|
||||
return bytes_read;
|
||||
}
|
||||
@ -277,6 +278,15 @@ off_t AsynchronousReadIndirectBufferFromRemoteFS::seek(off_t offset, int whence)
|
||||
/// First reset the buffer so the next read will fetch new data to the buffer.
|
||||
resetWorkingBuffer();
|
||||
|
||||
if (read_until_position && new_pos > *read_until_position)
|
||||
{
|
||||
ProfileEvents::increment(ProfileEvents::RemoteFSSeeksWithReset);
|
||||
impl->reset();
|
||||
|
||||
file_offset_of_buffer_end = new_pos = *read_until_position; /// read_until_position is a non-included boundary.
|
||||
return new_pos;
|
||||
}
|
||||
|
||||
/**
|
||||
* Lazy ignore. Save number of bytes to ignore and ignore it either for prefetch buffer or current buffer.
|
||||
* Note: we read in range [file_offset_of_buffer_end, read_until_position).
|
||||
|
@ -256,7 +256,7 @@ size_t ReadBufferFromRemoteFSGather::getFileSize() const
|
||||
String ReadBufferFromRemoteFSGather::getInfoForLog()
|
||||
{
|
||||
if (!current_buf)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot get info: buffer not initialized");
|
||||
return "";
|
||||
|
||||
return current_buf->getInfoForLog();
|
||||
}
|
||||
|
@ -125,14 +125,19 @@ std::string S3ObjectStorage::generateBlobNameForPath(const std::string & /* path
|
||||
getRandomASCIIString(key_name_total_size - key_name_prefix_size));
|
||||
}
|
||||
|
||||
Aws::S3::Model::HeadObjectOutcome S3ObjectStorage::requestObjectHeadData(const std::string & bucket_from, const std::string & key) const
|
||||
size_t S3ObjectStorage::getObjectSize(const std::string & bucket_from, const std::string & key) const
|
||||
{
|
||||
return S3::headObject(*client.get(), bucket_from, key, "", true);
|
||||
return S3::getObjectSize(*client.get(), bucket_from, key, {}, /* for_disk_s3= */ true);
|
||||
}
|
||||
|
||||
bool S3ObjectStorage::exists(const StoredObject & object) const
|
||||
{
|
||||
return S3::objectExists(*client.get(), bucket, object.absolute_path, "", true);
|
||||
return S3::objectExists(*client.get(), bucket, object.absolute_path, {}, /* for_disk_s3= */ true);
|
||||
}
|
||||
|
||||
void S3ObjectStorage::checkObjectExists(const std::string & bucket_from, const std::string & key, std::string_view description) const
|
||||
{
|
||||
return S3::checkObjectExists(*client.get(), bucket_from, key, {}, /* for_disk_s3= */ true, description);
|
||||
}
|
||||
|
||||
std::unique_ptr<ReadBufferFromFileBase> S3ObjectStorage::readObjects( /// NOLINT
|
||||
@ -409,13 +414,10 @@ ObjectMetadata S3ObjectStorage::getObjectMetadata(const std::string & path) cons
|
||||
{
|
||||
ObjectMetadata result;
|
||||
|
||||
auto object_head = requestObjectHeadData(bucket, path);
|
||||
throwIfError(object_head);
|
||||
|
||||
auto & object_head_result = object_head.GetResult();
|
||||
result.size_bytes = object_head_result.GetContentLength();
|
||||
result.last_modified = object_head_result.GetLastModified().Millis();
|
||||
result.attributes = object_head_result.GetMetadata();
|
||||
auto object_info = S3::getObjectInfo(*client.get(), bucket, path, {}, /* for_disk_s3= */ true);
|
||||
result.size_bytes = object_info.size;
|
||||
result.last_modified = object_info.last_modification_time;
|
||||
result.attributes = S3::getObjectMetadata(*client.get(), bucket, path, {}, /* for_disk_s3= */ true);
|
||||
|
||||
return result;
|
||||
}
|
||||
@ -442,7 +444,7 @@ void S3ObjectStorage::copyObjectImpl(
|
||||
const String & src_key,
|
||||
const String & dst_bucket,
|
||||
const String & dst_key,
|
||||
std::optional<Aws::S3::Model::HeadObjectResult> head,
|
||||
size_t size,
|
||||
std::optional<ObjectAttributes> metadata) const
|
||||
{
|
||||
auto client_ptr = client.get();
|
||||
@ -464,7 +466,7 @@ void S3ObjectStorage::copyObjectImpl(
|
||||
if (!outcome.IsSuccess() && (outcome.GetError().GetExceptionName() == "EntityTooLarge"
|
||||
|| outcome.GetError().GetExceptionName() == "InvalidRequest"))
|
||||
{ // Can't come here with MinIO, MinIO allows single part upload for large objects.
|
||||
copyObjectMultipartImpl(src_bucket, src_key, dst_bucket, dst_key, head, metadata);
|
||||
copyObjectMultipartImpl(src_bucket, src_key, dst_bucket, dst_key, size, metadata);
|
||||
return;
|
||||
}
|
||||
|
||||
@ -472,12 +474,7 @@ void S3ObjectStorage::copyObjectImpl(
|
||||
|
||||
auto settings_ptr = s3_settings.get();
|
||||
if (settings_ptr->request_settings.check_objects_after_upload)
|
||||
{
|
||||
auto object_head = requestObjectHeadData(dst_bucket, dst_key);
|
||||
if (!object_head.IsSuccess())
|
||||
throw Exception(ErrorCodes::S3_ERROR, "Object {} from bucket {} disappeared immediately after upload, it's a bug in S3 or S3 API.", dst_key, dst_bucket);
|
||||
}
|
||||
|
||||
checkObjectExists(dst_bucket, dst_key, "Immediately after upload");
|
||||
}
|
||||
|
||||
void S3ObjectStorage::copyObjectMultipartImpl(
|
||||
@ -485,15 +482,11 @@ void S3ObjectStorage::copyObjectMultipartImpl(
|
||||
const String & src_key,
|
||||
const String & dst_bucket,
|
||||
const String & dst_key,
|
||||
std::optional<Aws::S3::Model::HeadObjectResult> head,
|
||||
size_t size,
|
||||
std::optional<ObjectAttributes> metadata) const
|
||||
{
|
||||
if (!head)
|
||||
head = requestObjectHeadData(src_bucket, src_key).GetResult();
|
||||
|
||||
auto settings_ptr = s3_settings.get();
|
||||
auto client_ptr = client.get();
|
||||
size_t size = head->GetContentLength();
|
||||
|
||||
String multipart_upload_id;
|
||||
|
||||
@ -569,29 +562,24 @@ void S3ObjectStorage::copyObjectMultipartImpl(
|
||||
}
|
||||
|
||||
if (settings_ptr->request_settings.check_objects_after_upload)
|
||||
{
|
||||
auto object_head = requestObjectHeadData(dst_bucket, dst_key);
|
||||
if (!object_head.IsSuccess())
|
||||
throw Exception(ErrorCodes::S3_ERROR, "Object {} from bucket {} disappeared immediately after upload, it's a bug in S3 or S3 API.", dst_key, dst_bucket);
|
||||
}
|
||||
|
||||
checkObjectExists(dst_bucket, dst_key, "Immediately after upload");
|
||||
}
|
||||
|
||||
void S3ObjectStorage::copyObject( // NOLINT
|
||||
const StoredObject & object_from, const StoredObject & object_to, std::optional<ObjectAttributes> object_to_attributes)
|
||||
{
|
||||
auto head = requestObjectHeadData(bucket, object_from.absolute_path).GetResult();
|
||||
auto size = getObjectSize(bucket, object_from.absolute_path);
|
||||
static constexpr int64_t multipart_upload_threashold = 5UL * 1024 * 1024 * 1024;
|
||||
|
||||
if (head.GetContentLength() >= multipart_upload_threashold)
|
||||
if (size >= multipart_upload_threashold)
|
||||
{
|
||||
copyObjectMultipartImpl(
|
||||
bucket, object_from.absolute_path, bucket, object_to.absolute_path, head, object_to_attributes);
|
||||
bucket, object_from.absolute_path, bucket, object_to.absolute_path, size, object_to_attributes);
|
||||
}
|
||||
else
|
||||
{
|
||||
copyObjectImpl(
|
||||
bucket, object_from.absolute_path, bucket, object_to.absolute_path, head, object_to_attributes);
|
||||
bucket, object_from.absolute_path, bucket, object_to.absolute_path, size, object_to_attributes);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -172,7 +172,7 @@ private:
|
||||
const String & src_key,
|
||||
const String & dst_bucket,
|
||||
const String & dst_key,
|
||||
std::optional<Aws::S3::Model::HeadObjectResult> head = std::nullopt,
|
||||
size_t size,
|
||||
std::optional<ObjectAttributes> metadata = std::nullopt) const;
|
||||
|
||||
void copyObjectMultipartImpl(
|
||||
@ -180,13 +180,14 @@ private:
|
||||
const String & src_key,
|
||||
const String & dst_bucket,
|
||||
const String & dst_key,
|
||||
std::optional<Aws::S3::Model::HeadObjectResult> head = std::nullopt,
|
||||
size_t size,
|
||||
std::optional<ObjectAttributes> metadata = std::nullopt) const;
|
||||
|
||||
void removeObjectImpl(const StoredObject & object, bool if_exists);
|
||||
void removeObjectsImpl(const StoredObjects & objects, bool if_exists);
|
||||
|
||||
Aws::S3::Model::HeadObjectOutcome requestObjectHeadData(const std::string & bucket_from, const std::string & key) const;
|
||||
size_t getObjectSize(const std::string & bucket_from, const std::string & key) const;
|
||||
void checkObjectExists(const std::string & bucket_from, const std::string & key, std::string_view description) const;
|
||||
|
||||
std::string bucket;
|
||||
|
||||
|
@ -90,7 +90,9 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings)
|
||||
format_settings.input_allow_errors_ratio = settings.input_format_allow_errors_ratio;
|
||||
format_settings.json.array_of_rows = settings.output_format_json_array_of_rows;
|
||||
format_settings.json.escape_forward_slashes = settings.output_format_json_escape_forward_slashes;
|
||||
format_settings.json.named_tuples_as_objects = settings.output_format_json_named_tuples_as_objects;
|
||||
format_settings.json.write_named_tuples_as_objects = settings.output_format_json_named_tuples_as_objects;
|
||||
format_settings.json.read_named_tuples_as_objects = settings.input_format_json_named_tuples_as_objects;
|
||||
format_settings.json.defaults_for_missing_elements_in_named_tuple = settings.input_format_json_defaults_for_missing_elements_in_named_tuple;
|
||||
format_settings.json.quote_64bit_integers = settings.output_format_json_quote_64bit_integers;
|
||||
format_settings.json.quote_64bit_floats = settings.output_format_json_quote_64bit_floats;
|
||||
format_settings.json.quote_denormals = settings.output_format_json_quote_denormals;
|
||||
|
@ -153,7 +153,9 @@ struct FormatSettings
|
||||
bool quote_denormals = true;
|
||||
bool quote_decimals = false;
|
||||
bool escape_forward_slashes = true;
|
||||
bool named_tuples_as_objects = false;
|
||||
bool read_named_tuples_as_objects = false;
|
||||
bool write_named_tuples_as_objects = false;
|
||||
bool defaults_for_missing_elements_in_named_tuple = false;
|
||||
bool serialize_as_strings = false;
|
||||
bool read_bools_as_numbers = true;
|
||||
bool read_numbers_as_strings = true;
|
||||
|
@ -118,6 +118,16 @@ struct MatchImpl
|
||||
if (haystack_offsets.empty())
|
||||
return;
|
||||
|
||||
/// Shortcut for the silly but practical case that the pattern matches everything/nothing independently of the haystack:
|
||||
/// - col [not] [i]like '%' / '%%'
|
||||
/// - match(col, '.*')
|
||||
if ((is_like && (needle == "%" or needle == "%%")) || (!is_like && (needle == ".*" || needle == ".*?")))
|
||||
{
|
||||
for (auto & x : res)
|
||||
x = !negate;
|
||||
return;
|
||||
}
|
||||
|
||||
/// Special case that the [I]LIKE expression reduces to finding a substring in a string
|
||||
String strstr_pattern;
|
||||
if (is_like && impl::likePatternIsSubstring(needle, strstr_pattern))
|
||||
@ -267,6 +277,16 @@ struct MatchImpl
|
||||
if (haystack.empty())
|
||||
return;
|
||||
|
||||
/// Shortcut for the silly but practical case that the pattern matches everything/nothing independently of the haystack:
|
||||
/// - col [not] [i]like '%' / '%%'
|
||||
/// - match(col, '.*')
|
||||
if ((is_like && (needle == "%" or needle == "%%")) || (!is_like && (needle == ".*" || needle == ".*?")))
|
||||
{
|
||||
for (auto & x : res)
|
||||
x = !negate;
|
||||
return;
|
||||
}
|
||||
|
||||
/// Special case that the [I]LIKE expression reduces to finding a substring in a string
|
||||
String strstr_pattern;
|
||||
if (is_like && impl::likePatternIsSubstring(needle, strstr_pattern))
|
||||
|
@ -250,7 +250,7 @@ size_t ReadBufferFromS3::getFileSize()
|
||||
if (file_size)
|
||||
return *file_size;
|
||||
|
||||
auto object_size = S3::getObjectSize(*client_ptr, bucket, key, version_id, true, read_settings.for_object_storage);
|
||||
auto object_size = S3::getObjectSize(*client_ptr, bucket, key, version_id, /* for_disk_s3= */ read_settings.for_object_storage);
|
||||
|
||||
file_size = object_size;
|
||||
return *file_size;
|
||||
|
@ -27,6 +27,8 @@
|
||||
# include <aws/core/utils/UUID.h>
|
||||
# include <aws/core/http/HttpClientFactory.h>
|
||||
# include <aws/s3/S3Client.h>
|
||||
# include <aws/s3/model/GetObjectAttributesRequest.h>
|
||||
# include <aws/s3/model/GetObjectRequest.h>
|
||||
# include <aws/s3/model/HeadObjectRequest.h>
|
||||
|
||||
# include <IO/S3/PocoHTTPClientFactory.h>
|
||||
@ -40,7 +42,11 @@
|
||||
|
||||
namespace ProfileEvents
|
||||
{
|
||||
extern const Event S3GetObjectAttributes;
|
||||
extern const Event S3GetObjectMetadata;
|
||||
extern const Event S3HeadObject;
|
||||
extern const Event DiskS3GetObjectAttributes;
|
||||
extern const Event DiskS3GetObjectMetadata;
|
||||
extern const Event DiskS3HeadObject;
|
||||
}
|
||||
|
||||
@ -699,6 +705,92 @@ public:
|
||||
}
|
||||
};
|
||||
|
||||
/// Extracts the endpoint from a constructed S3 client.
|
||||
String getEndpoint(const Aws::S3::S3Client & client)
|
||||
{
|
||||
const auto * endpoint_provider = dynamic_cast<const Aws::S3::Endpoint::S3DefaultEpProviderBase *>(const_cast<Aws::S3::S3Client &>(client).accessEndpointProvider().get());
|
||||
if (!endpoint_provider)
|
||||
return {};
|
||||
String endpoint;
|
||||
endpoint_provider->GetBuiltInParameters().GetParameter("Endpoint").GetString(endpoint);
|
||||
return endpoint;
|
||||
}
|
||||
|
||||
/// Performs a request to get the size and last modification time of an object.
|
||||
/// The function performs either HeadObject or GetObjectAttributes request depending on the endpoint.
|
||||
std::pair<std::optional<DB::S3::ObjectInfo>, Aws::S3::S3Error> tryGetObjectInfo(
|
||||
const Aws::S3::S3Client & client, const String & bucket, const String & key, const String & version_id, bool for_disk_s3)
|
||||
{
|
||||
auto endpoint = getEndpoint(client);
|
||||
bool use_get_object_attributes_request = (endpoint.find(".amazonaws.com") != String::npos);
|
||||
|
||||
if (use_get_object_attributes_request)
|
||||
{
|
||||
/// It's better not to use `HeadObject` requests for AWS S3 because they don't work well with the global region.
|
||||
/// Details: `HeadObject` request never returns a response body (even if there is an error) however
|
||||
/// if the request was sent without specifying a region in the endpoint (i.e. for example "https://test.s3.amazonaws.com/mydata.csv"
|
||||
/// instead of "https://test.s3-us-west-2.amazonaws.com/mydata.csv") then that response body is one of the main ways
|
||||
/// to determine the correct region and try to repeat the request again with the correct region.
|
||||
/// For any other request type (`GetObject`, `ListObjects`, etc.) AWS SDK does that because they have response bodies,
|
||||
/// but for `HeadObject` there is no response body so this way doesn't work. That's why we use `GetObjectAttributes` request instead.
|
||||
/// See https://github.com/aws/aws-sdk-cpp/issues/1558 and also the function S3ErrorMarshaller::ExtractRegion() for more information.
|
||||
|
||||
ProfileEvents::increment(ProfileEvents::S3GetObjectAttributes);
|
||||
if (for_disk_s3)
|
||||
ProfileEvents::increment(ProfileEvents::DiskS3GetObjectAttributes);
|
||||
|
||||
Aws::S3::Model::GetObjectAttributesRequest req;
|
||||
req.SetBucket(bucket);
|
||||
req.SetKey(key);
|
||||
|
||||
if (!version_id.empty())
|
||||
req.SetVersionId(version_id);
|
||||
|
||||
req.SetObjectAttributes({Aws::S3::Model::ObjectAttributes::ObjectSize});
|
||||
|
||||
auto outcome = client.GetObjectAttributes(req);
|
||||
if (outcome.IsSuccess())
|
||||
{
|
||||
const auto & result = outcome.GetResult();
|
||||
DB::S3::ObjectInfo object_info;
|
||||
object_info.size = static_cast<size_t>(result.GetObjectSize());
|
||||
object_info.last_modification_time = result.GetLastModified().Millis() / 1000;
|
||||
return {object_info, {}};
|
||||
}
|
||||
|
||||
return {std::nullopt, outcome.GetError()};
|
||||
}
|
||||
else
|
||||
{
|
||||
/// By default we use `HeadObject` requests.
|
||||
/// We cannot just use `GetObjectAttributes` requests always because some S3 providers (e.g. Minio)
|
||||
/// don't support `GetObjectAttributes` requests.
|
||||
|
||||
ProfileEvents::increment(ProfileEvents::S3HeadObject);
|
||||
if (for_disk_s3)
|
||||
ProfileEvents::increment(ProfileEvents::DiskS3HeadObject);
|
||||
|
||||
Aws::S3::Model::HeadObjectRequest req;
|
||||
req.SetBucket(bucket);
|
||||
req.SetKey(key);
|
||||
|
||||
if (!version_id.empty())
|
||||
req.SetVersionId(version_id);
|
||||
|
||||
auto outcome = client.HeadObject(req);
|
||||
if (outcome.IsSuccess())
|
||||
{
|
||||
const auto & result = outcome.GetResult();
|
||||
DB::S3::ObjectInfo object_info;
|
||||
object_info.size = static_cast<size_t>(result.GetContentLength());
|
||||
object_info.last_modification_time = result.GetLastModified().Millis() / 1000;
|
||||
return {object_info, {}};
|
||||
}
|
||||
|
||||
return {std::nullopt, outcome.GetError()};
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
@ -894,54 +986,33 @@ namespace S3
|
||||
return error == Aws::S3::S3Errors::RESOURCE_NOT_FOUND || error == Aws::S3::S3Errors::NO_SUCH_KEY;
|
||||
}
|
||||
|
||||
Aws::S3::Model::HeadObjectOutcome headObject(const Aws::S3::S3Client & client, const String & bucket, const String & key, const String & version_id, bool for_disk_s3)
|
||||
ObjectInfo getObjectInfo(const Aws::S3::S3Client & client, const String & bucket, const String & key, const String & version_id, bool for_disk_s3, bool throw_on_error)
|
||||
{
|
||||
ProfileEvents::increment(ProfileEvents::S3HeadObject);
|
||||
if (for_disk_s3)
|
||||
ProfileEvents::increment(ProfileEvents::DiskS3HeadObject);
|
||||
|
||||
Aws::S3::Model::HeadObjectRequest req;
|
||||
req.SetBucket(bucket);
|
||||
req.SetKey(key);
|
||||
|
||||
if (!version_id.empty())
|
||||
req.SetVersionId(version_id);
|
||||
|
||||
return client.HeadObject(req);
|
||||
}
|
||||
|
||||
S3::ObjectInfo getObjectInfo(const Aws::S3::S3Client & client, const String & bucket, const String & key, const String & version_id, bool throw_on_error, bool for_disk_s3)
|
||||
{
|
||||
auto outcome = headObject(client, bucket, key, version_id, for_disk_s3);
|
||||
|
||||
if (outcome.IsSuccess())
|
||||
auto [object_info, error] = tryGetObjectInfo(client, bucket, key, version_id, for_disk_s3);
|
||||
if (object_info)
|
||||
{
|
||||
auto read_result = outcome.GetResultWithOwnership();
|
||||
return {.size = static_cast<size_t>(read_result.GetContentLength()), .last_modification_time = read_result.GetLastModified().Millis() / 1000};
|
||||
return *object_info;
|
||||
}
|
||||
else if (throw_on_error)
|
||||
{
|
||||
const auto & error = outcome.GetError();
|
||||
throw DB::Exception(ErrorCodes::S3_ERROR,
|
||||
"Failed to HEAD object: {}. HTTP response code: {}",
|
||||
"Failed to get object attributes: {}. HTTP response code: {}",
|
||||
error.GetMessage(), static_cast<size_t>(error.GetResponseCode()));
|
||||
}
|
||||
return {};
|
||||
}
|
||||
|
||||
size_t getObjectSize(const Aws::S3::S3Client & client, const String & bucket, const String & key, const String & version_id, bool throw_on_error, bool for_disk_s3)
|
||||
size_t getObjectSize(const Aws::S3::S3Client & client, const String & bucket, const String & key, const String & version_id, bool for_disk_s3, bool throw_on_error)
|
||||
{
|
||||
return getObjectInfo(client, bucket, key, version_id, throw_on_error, for_disk_s3).size;
|
||||
return getObjectInfo(client, bucket, key, version_id, for_disk_s3, throw_on_error).size;
|
||||
}
|
||||
|
||||
bool objectExists(const Aws::S3::S3Client & client, const String & bucket, const String & key, const String & version_id, bool for_disk_s3)
|
||||
{
|
||||
auto outcome = headObject(client, bucket, key, version_id, for_disk_s3);
|
||||
|
||||
if (outcome.IsSuccess())
|
||||
auto [object_info, error] = tryGetObjectInfo(client, bucket, key, version_id, for_disk_s3);
|
||||
if (object_info)
|
||||
return true;
|
||||
|
||||
const auto & error = outcome.GetError();
|
||||
if (isNotFoundError(error.GetErrorType()))
|
||||
return false;
|
||||
|
||||
@ -949,6 +1020,48 @@ namespace S3
|
||||
"Failed to check existence of key {} in bucket {}: {}",
|
||||
key, bucket, error.GetMessage());
|
||||
}
|
||||
|
||||
void checkObjectExists(const Aws::S3::S3Client & client, const String & bucket, const String & key, const String & version_id, bool for_disk_s3, std::string_view description)
|
||||
{
|
||||
auto [object_info, error] = tryGetObjectInfo(client, bucket, key, version_id, for_disk_s3);
|
||||
if (object_info)
|
||||
return;
|
||||
throw S3Exception(error.GetErrorType(), "{}Object {} in bucket {} suddenly disappeared: {}",
|
||||
(description.empty() ? "" : (String(description) + ": ")), key, bucket, error.GetMessage());
|
||||
}
|
||||
|
||||
std::map<String, String> getObjectMetadata(const Aws::S3::S3Client & client, const String & bucket, const String & key, const String & version_id, bool for_disk_s3, bool throw_on_error)
|
||||
{
|
||||
ProfileEvents::increment(ProfileEvents::S3GetObjectMetadata);
|
||||
if (for_disk_s3)
|
||||
ProfileEvents::increment(ProfileEvents::DiskS3GetObjectMetadata);
|
||||
|
||||
/// We must not use the `HeadObject` request, see the comment about `HeadObjectRequest` in S3Common.h.
|
||||
|
||||
Aws::S3::Model::GetObjectRequest req;
|
||||
req.SetBucket(bucket);
|
||||
req.SetKey(key);
|
||||
|
||||
/// Only the first byte will be read.
|
||||
/// We don't need that first byte but the range should be set otherwise the entire object will be read.
|
||||
req.SetRange("bytes=0-0");
|
||||
|
||||
if (!version_id.empty())
|
||||
req.SetVersionId(version_id);
|
||||
|
||||
auto outcome = client.GetObject(req);
|
||||
|
||||
if (outcome.IsSuccess())
|
||||
return outcome.GetResult().GetMetadata();
|
||||
|
||||
if (!throw_on_error)
|
||||
return {};
|
||||
|
||||
const auto & error = outcome.GetError();
|
||||
throw S3Exception(error.GetErrorType(),
|
||||
"Failed to get metadata of key {} in bucket {}: {}",
|
||||
key, bucket, error.GetMessage());
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -11,15 +11,15 @@
|
||||
#if USE_AWS_S3
|
||||
|
||||
#include <base/types.h>
|
||||
#include <aws/core/Aws.h>
|
||||
#include <aws/core/client/ClientConfiguration.h>
|
||||
#include <aws/s3/S3Client.h>
|
||||
#include <aws/s3/S3Errors.h>
|
||||
#include <Poco/URI.h>
|
||||
|
||||
#include <Common/Exception.h>
|
||||
#include <Common/Throttler_fwd.h>
|
||||
|
||||
#include <Poco/URI.h>
|
||||
#include <aws/core/Aws.h>
|
||||
#include <aws/s3/S3Errors.h>
|
||||
|
||||
|
||||
namespace Aws::S3 { class S3Client; }
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -121,22 +121,29 @@ struct URI
|
||||
static void validateBucket(const String & bucket, const Poco::URI & uri);
|
||||
};
|
||||
|
||||
/// WARNING: Don't use `HeadObjectRequest`! Use the functions below instead.
|
||||
/// For explanation see the comment about `HeadObject` request in the function tryGetObjectInfo().
|
||||
|
||||
struct ObjectInfo
|
||||
{
|
||||
size_t size = 0;
|
||||
time_t last_modification_time = 0;
|
||||
};
|
||||
|
||||
bool isNotFoundError(Aws::S3::S3Errors error);
|
||||
ObjectInfo getObjectInfo(const Aws::S3::S3Client & client, const String & bucket, const String & key, const String & version_id = "", bool for_disk_s3 = false, bool throw_on_error = true);
|
||||
|
||||
Aws::S3::Model::HeadObjectOutcome headObject(const Aws::S3::S3Client & client, const String & bucket, const String & key, const String & version_id = "", bool for_disk_s3 = false);
|
||||
|
||||
S3::ObjectInfo getObjectInfo(const Aws::S3::S3Client & client, const String & bucket, const String & key, const String & version_id, bool throw_on_error, bool for_disk_s3);
|
||||
|
||||
size_t getObjectSize(const Aws::S3::S3Client & client, const String & bucket, const String & key, const String & version_id, bool throw_on_error, bool for_disk_s3);
|
||||
size_t getObjectSize(const Aws::S3::S3Client & client, const String & bucket, const String & key, const String & version_id = "", bool for_disk_s3 = false, bool throw_on_error = true);
|
||||
|
||||
bool objectExists(const Aws::S3::S3Client & client, const String & bucket, const String & key, const String & version_id = "", bool for_disk_s3 = false);
|
||||
|
||||
/// Throws an exception if a specified object doesn't exist. `description` is used as a part of the error message.
|
||||
void checkObjectExists(const Aws::S3::S3Client & client, const String & bucket, const String & key, const String & version_id = "", bool for_disk_s3 = false, std::string_view description = {});
|
||||
|
||||
bool isNotFoundError(Aws::S3::S3Errors error);
|
||||
|
||||
/// Returns the object's metadata.
|
||||
std::map<String, String> getObjectMetadata(const Aws::S3::S3Client & client, const String & bucket, const String & key, const String & version_id = "", bool for_disk_s3 = false, bool throw_on_error = true);
|
||||
|
||||
}
|
||||
#endif
|
||||
|
||||
|
@ -182,12 +182,8 @@ void WriteBufferFromS3::finalizeImpl()
|
||||
if (check_objects_after_upload)
|
||||
{
|
||||
LOG_TRACE(log, "Checking object {} exists after upload", key);
|
||||
|
||||
auto response = S3::headObject(*client_ptr, bucket, key, "", write_settings.for_object_storage);
|
||||
if (!response.IsSuccess())
|
||||
throw S3Exception(fmt::format("Object {} from bucket {} disappeared immediately after upload, it's a bug in S3 or S3 API.", key, bucket), response.GetError().GetErrorType());
|
||||
else
|
||||
LOG_TRACE(log, "Object {} exists after upload", key);
|
||||
S3::checkObjectExists(*client_ptr, bucket, key, {}, /* for_disk_s3= */ write_settings.for_object_storage, "Immediately after upload");
|
||||
LOG_TRACE(log, "Object {} exists after upload", key);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -28,13 +28,29 @@ DatabaseAndTableWithAlias::DatabaseAndTableWithAlias(const ASTTableIdentifier &
|
||||
database = current_database;
|
||||
}
|
||||
|
||||
DatabaseAndTableWithAlias::DatabaseAndTableWithAlias(const ASTIdentifier & identifier, const String & current_database)
|
||||
{
|
||||
alias = identifier.tryGetAlias();
|
||||
|
||||
if (identifier.name_parts.size() == 2)
|
||||
std::tie(database, table) = std::tie(identifier.name_parts[0], identifier.name_parts[1]);
|
||||
else if (identifier.name_parts.size() == 1)
|
||||
table = identifier.name_parts[0];
|
||||
else
|
||||
throw Exception("Logical error: invalid identifier", ErrorCodes::LOGICAL_ERROR);
|
||||
|
||||
if (database.empty())
|
||||
database = current_database;
|
||||
}
|
||||
|
||||
DatabaseAndTableWithAlias::DatabaseAndTableWithAlias(const ASTPtr & node, const String & current_database)
|
||||
{
|
||||
const auto * identifier = node->as<ASTTableIdentifier>();
|
||||
if (!identifier)
|
||||
throw Exception("Logical error: table identifier expected", ErrorCodes::LOGICAL_ERROR);
|
||||
|
||||
*this = DatabaseAndTableWithAlias(*identifier, current_database);
|
||||
if (const auto * table_identifier = node->as<ASTTableIdentifier>())
|
||||
*this = DatabaseAndTableWithAlias(*table_identifier, current_database);
|
||||
else if (const auto * identifier = node->as<ASTIdentifier>())
|
||||
*this = DatabaseAndTableWithAlias(*identifier, current_database);
|
||||
else
|
||||
throw Exception("Logical error: identifier or table identifier expected", ErrorCodes::LOGICAL_ERROR);
|
||||
}
|
||||
|
||||
DatabaseAndTableWithAlias::DatabaseAndTableWithAlias(const ASTTableExpression & table_expression, const String & current_database)
|
||||
|
@ -14,6 +14,7 @@ namespace DB
|
||||
{
|
||||
|
||||
class ASTSelectQuery;
|
||||
class ASTIdentifier;
|
||||
class ASTTableIdentifier;
|
||||
struct ASTTableExpression;
|
||||
|
||||
@ -28,6 +29,7 @@ struct DatabaseAndTableWithAlias
|
||||
|
||||
DatabaseAndTableWithAlias() = default;
|
||||
explicit DatabaseAndTableWithAlias(const ASTPtr & identifier_node, const String & current_database = "");
|
||||
explicit DatabaseAndTableWithAlias(const ASTIdentifier & identifier, const String & current_database = "");
|
||||
explicit DatabaseAndTableWithAlias(const ASTTableIdentifier & identifier, const String & current_database = "");
|
||||
explicit DatabaseAndTableWithAlias(const ASTTableExpression & table_expression, const String & current_database = "");
|
||||
|
||||
|
@ -25,6 +25,7 @@ static const std::unordered_map<String, String> quantile_fuse_name_mapping = {
|
||||
{NameQuantileExactInclusive::name, NameQuantilesExactInclusive::name},
|
||||
{NameQuantileExactLow::name, NameQuantilesExactLow::name},
|
||||
{NameQuantileExactWeighted::name, NameQuantilesExactWeighted::name},
|
||||
{NameQuantileInterpolatedWeighted::name, NameQuantilesInterpolatedWeighted::name},
|
||||
{NameQuantileTDigest::name, NameQuantilesTDigest::name},
|
||||
{NameQuantileTDigestWeighted::name, NameQuantilesTDigestWeighted::name},
|
||||
{NameQuantileTiming::name, NameQuantilesTiming::name},
|
||||
@ -61,9 +62,11 @@ void GatherFunctionQuantileData::FuseQuantileAggregatesData::addFuncNode(ASTPtr
|
||||
|
||||
const auto & arguments = func->arguments->children;
|
||||
|
||||
|
||||
bool need_two_args = func->name == NameQuantileDeterministic::name || func->name == NameQuantileExactWeighted::name
|
||||
|| func->name == NameQuantileTimingWeighted::name || func->name == NameQuantileTDigestWeighted::name
|
||||
|| func->name == NameQuantileBFloat16Weighted::name;
|
||||
|| func->name == NameQuantileInterpolatedWeighted::name || func->name == NameQuantileTimingWeighted::name
|
||||
|| func->name == NameQuantileTDigestWeighted::name || func->name == NameQuantileBFloat16Weighted::name;
|
||||
|
||||
if (arguments.size() != (need_two_args ? 2 : 1))
|
||||
return;
|
||||
|
||||
|
@ -288,6 +288,20 @@ struct ExplainSettings : public Settings
|
||||
}
|
||||
};
|
||||
|
||||
struct QuerySyntaxSettings
|
||||
{
|
||||
bool oneline = false;
|
||||
|
||||
constexpr static char name[] = "SYNTAX";
|
||||
|
||||
std::unordered_map<std::string, std::reference_wrapper<bool>> boolean_settings =
|
||||
{
|
||||
{"oneline", oneline},
|
||||
};
|
||||
|
||||
std::unordered_map<std::string, std::reference_wrapper<Int64>> integer_settings;
|
||||
};
|
||||
|
||||
template <typename Settings>
|
||||
ExplainSettings<Settings> checkAndGetSettings(const ASTPtr & ast_settings)
|
||||
{
|
||||
@ -362,13 +376,12 @@ QueryPipeline InterpreterExplainQuery::executeImpl()
|
||||
}
|
||||
case ASTExplainQuery::AnalyzedSyntax:
|
||||
{
|
||||
if (ast.getSettings())
|
||||
throw Exception("Settings are not supported for EXPLAIN SYNTAX query.", ErrorCodes::UNKNOWN_SETTING);
|
||||
auto settings = checkAndGetSettings<QuerySyntaxSettings>(ast.getSettings());
|
||||
|
||||
ExplainAnalyzedSyntaxVisitor::Data data(getContext());
|
||||
ExplainAnalyzedSyntaxVisitor(data).visit(query);
|
||||
|
||||
ast.getExplainedQuery()->format(IAST::FormatSettings(buf, false));
|
||||
ast.getExplainedQuery()->format(IAST::FormatSettings(buf, settings.oneline));
|
||||
break;
|
||||
}
|
||||
case ASTExplainQuery::QueryTree:
|
||||
|
@ -487,7 +487,7 @@ BlockIO InterpreterSystemQuery::execute()
|
||||
dropDatabaseReplica(query);
|
||||
break;
|
||||
case Type::SYNC_REPLICA:
|
||||
syncReplica(query);
|
||||
syncReplica();
|
||||
break;
|
||||
case Type::SYNC_DATABASE_REPLICA:
|
||||
syncReplicatedDatabase(query);
|
||||
@ -507,6 +507,9 @@ BlockIO InterpreterSystemQuery::execute()
|
||||
case Type::RESTORE_REPLICA:
|
||||
restoreReplica();
|
||||
break;
|
||||
case Type::WAIT_LOADING_PARTS:
|
||||
waitLoadingParts();
|
||||
break;
|
||||
case Type::RESTART_DISK:
|
||||
restartDisk(query.disk);
|
||||
case Type::FLUSH_LOGS:
|
||||
@ -852,7 +855,7 @@ void InterpreterSystemQuery::dropDatabaseReplica(ASTSystemQuery & query)
|
||||
throw Exception("Invalid query", ErrorCodes::LOGICAL_ERROR);
|
||||
}
|
||||
|
||||
void InterpreterSystemQuery::syncReplica(ASTSystemQuery &)
|
||||
void InterpreterSystemQuery::syncReplica()
|
||||
{
|
||||
getContext()->checkAccess(AccessType::SYSTEM_SYNC_REPLICA, table_id);
|
||||
StoragePtr table = DatabaseCatalog::instance().getTable(table_id, getContext());
|
||||
@ -872,6 +875,23 @@ void InterpreterSystemQuery::syncReplica(ASTSystemQuery &)
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, table_is_not_replicated.data(), table_id.getNameForLogs());
|
||||
}
|
||||
|
||||
void InterpreterSystemQuery::waitLoadingParts()
|
||||
{
|
||||
getContext()->checkAccess(AccessType::SYSTEM_WAIT_LOADING_PARTS, table_id);
|
||||
StoragePtr table = DatabaseCatalog::instance().getTable(table_id, getContext());
|
||||
|
||||
if (auto * merge_tree = dynamic_cast<MergeTreeData *>(table.get()))
|
||||
{
|
||||
LOG_TRACE(log, "Waiting for loading of parts of table {}", table_id.getFullTableName());
|
||||
merge_tree->waitForOutdatedPartsToBeLoaded();
|
||||
LOG_TRACE(log, "Finished waiting for loading of parts of table {}", table_id.getFullTableName());
|
||||
}
|
||||
else
|
||||
{
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS,
|
||||
"Command WAIT LOADING PARTS is supported only for MergeTree table, but got: {}", table->getName());
|
||||
}
|
||||
}
|
||||
|
||||
void InterpreterSystemQuery::syncReplicatedDatabase(ASTSystemQuery & query)
|
||||
{
|
||||
@ -1071,6 +1091,11 @@ AccessRightsElements InterpreterSystemQuery::getRequiredAccessForDDLOnCluster()
|
||||
required_access.emplace_back(AccessType::SYSTEM_RESTART_REPLICA);
|
||||
break;
|
||||
}
|
||||
case Type::WAIT_LOADING_PARTS:
|
||||
{
|
||||
required_access.emplace_back(AccessType::SYSTEM_WAIT_LOADING_PARTS, query.getDatabase(), query.getTable());
|
||||
break;
|
||||
}
|
||||
case Type::SYNC_DATABASE_REPLICA:
|
||||
{
|
||||
required_access.emplace_back(AccessType::SYSTEM_SYNC_DATABASE_REPLICA, query.getDatabase());
|
||||
|
@ -56,7 +56,8 @@ private:
|
||||
|
||||
void restartReplica(const StorageID & replica, ContextMutablePtr system_context);
|
||||
void restartReplicas(ContextMutablePtr system_context);
|
||||
void syncReplica(ASTSystemQuery & query);
|
||||
void syncReplica();
|
||||
void waitLoadingParts();
|
||||
|
||||
void syncReplicatedDatabase(ASTSystemQuery & query);
|
||||
|
||||
|
@ -49,7 +49,8 @@ ASTPtr makeSubqueryTemplate()
|
||||
ASTPtr makeSubqueryQualifiedAsterisk()
|
||||
{
|
||||
auto asterisk = std::make_shared<ASTQualifiedAsterisk>();
|
||||
asterisk->children.emplace_back(std::make_shared<ASTTableIdentifier>("--.s"));
|
||||
asterisk->qualifier = std::make_shared<ASTIdentifier>("--.s");
|
||||
asterisk->children.push_back(asterisk->qualifier);
|
||||
return asterisk;
|
||||
}
|
||||
|
||||
@ -153,24 +154,34 @@ private:
|
||||
for (auto & table_name : data.tables_order)
|
||||
data.addTableColumns(table_name, columns);
|
||||
|
||||
for (const auto & transformer : asterisk->children)
|
||||
IASTColumnsTransformer::transform(transformer, columns);
|
||||
if (asterisk->transformers)
|
||||
{
|
||||
for (const auto & transformer : asterisk->transformers->children)
|
||||
IASTColumnsTransformer::transform(transformer, columns);
|
||||
}
|
||||
}
|
||||
else if (const auto * qualified_asterisk = child->as<ASTQualifiedAsterisk>())
|
||||
{
|
||||
has_asterisks = true;
|
||||
|
||||
auto & identifier = child->children[0]->as<ASTTableIdentifier &>();
|
||||
if (!qualified_asterisk->qualifier)
|
||||
throw Exception("Logical error: qualified asterisk must have a qualifier", ErrorCodes::LOGICAL_ERROR);
|
||||
|
||||
auto & identifier = qualified_asterisk->qualifier->as<ASTIdentifier &>();
|
||||
|
||||
data.addTableColumns(identifier.name(), columns);
|
||||
|
||||
// QualifiedAsterisk's transformers start to appear at child 1
|
||||
for (const auto * it = qualified_asterisk->children.begin() + 1; it != qualified_asterisk->children.end(); ++it)
|
||||
if (qualified_asterisk->transformers)
|
||||
{
|
||||
if (it->get()->as<ASTColumnsApplyTransformer>() || it->get()->as<ASTColumnsExceptTransformer>() || it->get()->as<ASTColumnsReplaceTransformer>())
|
||||
IASTColumnsTransformer::transform(*it, columns);
|
||||
else
|
||||
throw Exception("Logical error: qualified asterisk must only have children of IASTColumnsTransformer type", ErrorCodes::LOGICAL_ERROR);
|
||||
for (const auto & transformer : qualified_asterisk->transformers->children)
|
||||
{
|
||||
if (transformer->as<ASTColumnsApplyTransformer>() ||
|
||||
transformer->as<ASTColumnsExceptTransformer>() ||
|
||||
transformer->as<ASTColumnsReplaceTransformer>())
|
||||
IASTColumnsTransformer::transform(transformer, columns);
|
||||
else
|
||||
throw Exception("Logical error: qualified asterisk must only have children of IASTColumnsTransformer type", ErrorCodes::LOGICAL_ERROR);
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (const auto * columns_list_matcher = child->as<ASTColumnsListMatcher>())
|
||||
@ -180,8 +191,11 @@ private:
|
||||
for (const auto & ident : columns_list_matcher->column_list->children)
|
||||
columns.emplace_back(ident->clone());
|
||||
|
||||
for (const auto & transformer : columns_list_matcher->children)
|
||||
IASTColumnsTransformer::transform(transformer, columns);
|
||||
if (columns_list_matcher->transformers)
|
||||
{
|
||||
for (const auto & transformer : columns_list_matcher->transformers->children)
|
||||
IASTColumnsTransformer::transform(transformer, columns);
|
||||
}
|
||||
}
|
||||
else if (const auto * columns_regexp_matcher = child->as<ASTColumnsRegexpMatcher>())
|
||||
{
|
||||
@ -193,8 +207,11 @@ private:
|
||||
columns,
|
||||
[&](const String & column_name) { return columns_regexp_matcher->isColumnMatching(column_name); });
|
||||
|
||||
for (const auto & transformer : columns_regexp_matcher->children)
|
||||
IASTColumnsTransformer::transform(transformer, columns);
|
||||
if (columns_regexp_matcher->transformers)
|
||||
{
|
||||
for (const auto & transformer : columns_regexp_matcher->transformers->children)
|
||||
IASTColumnsTransformer::transform(transformer, columns);
|
||||
}
|
||||
}
|
||||
else
|
||||
data.new_select_expression_list->children.push_back(child);
|
||||
@ -425,6 +442,7 @@ private:
|
||||
{
|
||||
if (data.expression_list->children.empty())
|
||||
data.expression_list->children.emplace_back(std::make_shared<ASTAsterisk>());
|
||||
|
||||
select.setExpression(ASTSelectQuery::Expression::SELECT, std::move(data.expression_list));
|
||||
}
|
||||
data.done = true;
|
||||
|
@ -154,7 +154,7 @@ private:
|
||||
|
||||
static void visit(const ASTQualifiedAsterisk & node, const ASTPtr &, Data & data)
|
||||
{
|
||||
auto & identifier = node.children[0]->as<ASTTableIdentifier &>();
|
||||
auto & identifier = node.qualifier->as<ASTIdentifier &>();
|
||||
bool rewritten = false;
|
||||
for (const auto & table : data)
|
||||
{
|
||||
|
@ -303,7 +303,6 @@ bool MergeTreeTransaction::rollback() noexcept
|
||||
part->version.unlockRemovalTID(tid, TransactionInfoContext{part->storage.getStorageID(), part->name});
|
||||
}
|
||||
|
||||
|
||||
assert([&]()
|
||||
{
|
||||
std::lock_guard lock{mutex};
|
||||
|
@ -49,7 +49,9 @@ NamesAndTypesList TextLogElement::getNamesAndTypes()
|
||||
{"revision", std::make_shared<DataTypeUInt32>()},
|
||||
|
||||
{"source_file", std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>())},
|
||||
{"source_line", std::make_shared<DataTypeUInt64>()}
|
||||
{"source_line", std::make_shared<DataTypeUInt64>()},
|
||||
|
||||
{"message_format_string", std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>())},
|
||||
};
|
||||
}
|
||||
|
||||
@ -74,6 +76,8 @@ void TextLogElement::appendToBlock(MutableColumns & columns) const
|
||||
|
||||
columns[i++]->insert(source_file);
|
||||
columns[i++]->insert(source_line);
|
||||
|
||||
columns[i++]->insert(message_format_string);
|
||||
}
|
||||
|
||||
TextLog::TextLog(ContextPtr context_, const String & database_name_,
|
||||
|
@ -28,6 +28,8 @@ struct TextLogElement
|
||||
String source_file;
|
||||
UInt64 source_line{};
|
||||
|
||||
std::string_view message_format_string;
|
||||
|
||||
static std::string name() { return "TextLog"; }
|
||||
static NamesAndTypesList getNamesAndTypes();
|
||||
static NamesAndAliases getNamesAndAliases() { return {}; }
|
||||
|
@ -156,21 +156,19 @@ void TranslateQualifiedNamesMatcher::visit(ASTFunction & node, const ASTPtr &, D
|
||||
func_arguments->children.clear();
|
||||
}
|
||||
|
||||
void TranslateQualifiedNamesMatcher::visit(const ASTQualifiedAsterisk &, const ASTPtr & ast, Data & data)
|
||||
void TranslateQualifiedNamesMatcher::visit(const ASTQualifiedAsterisk & node, const ASTPtr &, Data & data)
|
||||
{
|
||||
if (ast->children.empty())
|
||||
throw Exception("Logical error: qualified asterisk must have children", ErrorCodes::LOGICAL_ERROR);
|
||||
|
||||
auto & ident = ast->children[0];
|
||||
if (!node.qualifier)
|
||||
throw Exception("Logical error: qualified asterisk must have a qualifier", ErrorCodes::LOGICAL_ERROR);
|
||||
|
||||
/// @note it could contain table alias as table name.
|
||||
DatabaseAndTableWithAlias db_and_table(ident);
|
||||
DatabaseAndTableWithAlias db_and_table(node.qualifier);
|
||||
|
||||
for (const auto & known_table : data.tables)
|
||||
if (db_and_table.satisfies(known_table.table, true))
|
||||
return;
|
||||
|
||||
throw Exception("Unknown qualified identifier: " + ident->getAliasOrColumnName(), ErrorCodes::UNKNOWN_IDENTIFIER);
|
||||
throw Exception("Unknown qualified identifier: " + node.qualifier->getAliasOrColumnName(), ErrorCodes::UNKNOWN_IDENTIFIER);
|
||||
}
|
||||
|
||||
void TranslateQualifiedNamesMatcher::visit(ASTTableJoin & join, const ASTPtr & , Data & data)
|
||||
@ -266,16 +264,22 @@ void TranslateQualifiedNamesMatcher::visit(ASTExpressionList & node, const ASTPt
|
||||
first_table = false;
|
||||
}
|
||||
|
||||
for (const auto & transformer : asterisk->children)
|
||||
IASTColumnsTransformer::transform(transformer, columns);
|
||||
if (asterisk->transformers)
|
||||
{
|
||||
for (const auto & transformer : asterisk->transformers->children)
|
||||
IASTColumnsTransformer::transform(transformer, columns);
|
||||
}
|
||||
}
|
||||
else if (auto * asterisk_column_list = child->as<ASTColumnsListMatcher>())
|
||||
{
|
||||
for (const auto & ident : asterisk_column_list->column_list->children)
|
||||
columns.emplace_back(ident->clone());
|
||||
|
||||
for (const auto & transformer : asterisk_column_list->children)
|
||||
IASTColumnsTransformer::transform(transformer, columns);
|
||||
if (asterisk_column_list->transformers)
|
||||
{
|
||||
for (const auto & transformer : asterisk_column_list->transformers->children)
|
||||
IASTColumnsTransformer::transform(transformer, columns);
|
||||
}
|
||||
}
|
||||
else if (const auto * asterisk_regexp_pattern = child->as<ASTColumnsRegexpMatcher>())
|
||||
{
|
||||
@ -292,12 +296,15 @@ void TranslateQualifiedNamesMatcher::visit(ASTExpressionList & node, const ASTPt
|
||||
first_table = false;
|
||||
}
|
||||
|
||||
for (const auto & transformer : asterisk_regexp_pattern->children)
|
||||
IASTColumnsTransformer::transform(transformer, columns);
|
||||
if (asterisk_regexp_pattern->transformers)
|
||||
{
|
||||
for (const auto & transformer : asterisk_regexp_pattern->transformers->children)
|
||||
IASTColumnsTransformer::transform(transformer, columns);
|
||||
}
|
||||
}
|
||||
else if (const auto * qualified_asterisk = child->as<ASTQualifiedAsterisk>())
|
||||
{
|
||||
DatabaseAndTableWithAlias ident_db_and_name(qualified_asterisk->children[0]);
|
||||
DatabaseAndTableWithAlias ident_db_and_name(qualified_asterisk->qualifier);
|
||||
|
||||
for (const auto & table : tables_with_columns)
|
||||
{
|
||||
@ -309,10 +316,10 @@ void TranslateQualifiedNamesMatcher::visit(ASTExpressionList & node, const ASTPt
|
||||
}
|
||||
}
|
||||
|
||||
// QualifiedAsterisk's transformers start to appear at child 1
|
||||
for (const auto * it = qualified_asterisk->children.begin() + 1; it != qualified_asterisk->children.end(); ++it)
|
||||
if (qualified_asterisk->transformers)
|
||||
{
|
||||
IASTColumnsTransformer::transform(*it, columns);
|
||||
for (const auto & transformer : qualified_asterisk->transformers->children)
|
||||
IASTColumnsTransformer::transform(transformer, columns);
|
||||
}
|
||||
}
|
||||
else
|
||||
|
@ -133,6 +133,8 @@ void OwnSplitChannel::logSplit(const Poco::Message & msg)
|
||||
elem.source_file = msg.getSourceFile();
|
||||
|
||||
elem.source_line = msg.getSourceLine();
|
||||
elem.message_format_string = msg.getFormatString();
|
||||
|
||||
std::shared_ptr<TextLog> text_log_locked{};
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(text_log_mutex);
|
||||
|
@ -8,21 +8,37 @@ namespace DB
|
||||
ASTPtr ASTAsterisk::clone() const
|
||||
{
|
||||
auto clone = std::make_shared<ASTAsterisk>(*this);
|
||||
clone->cloneChildren();
|
||||
|
||||
if (expression) { clone->expression = expression->clone(); clone->children.push_back(clone->expression); }
|
||||
if (transformers) { clone->transformers = transformers->clone(); clone->children.push_back(clone->transformers); }
|
||||
|
||||
return clone;
|
||||
}
|
||||
|
||||
void ASTAsterisk::appendColumnName(WriteBuffer & ostr) const { ostr.write('*'); }
|
||||
void ASTAsterisk::appendColumnName(WriteBuffer & ostr) const
|
||||
{
|
||||
if (expression)
|
||||
{
|
||||
expression->appendColumnName(ostr);
|
||||
writeCString(".", ostr);
|
||||
}
|
||||
|
||||
ostr.write('*');
|
||||
}
|
||||
|
||||
void ASTAsterisk::formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const
|
||||
{
|
||||
if (expression)
|
||||
{
|
||||
expression->formatImpl(settings, state, frame);
|
||||
settings.ostr << ".";
|
||||
}
|
||||
|
||||
settings.ostr << "*";
|
||||
|
||||
/// Format column transformers
|
||||
for (const auto & child : children)
|
||||
if (transformers)
|
||||
{
|
||||
settings.ostr << ' ';
|
||||
child->formatImpl(settings, state, frame);
|
||||
transformers->formatImpl(settings, state, frame);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -16,6 +16,8 @@ public:
|
||||
ASTPtr clone() const override;
|
||||
void appendColumnName(WriteBuffer & ostr) const override;
|
||||
|
||||
ASTPtr expression;
|
||||
ASTPtr transformers;
|
||||
protected:
|
||||
void formatImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const override;
|
||||
};
|
||||
|
@ -18,12 +18,20 @@ namespace ErrorCodes
|
||||
ASTPtr ASTColumnsRegexpMatcher::clone() const
|
||||
{
|
||||
auto clone = std::make_shared<ASTColumnsRegexpMatcher>(*this);
|
||||
clone->cloneChildren();
|
||||
|
||||
if (expression) { clone->expression = expression->clone(); clone->children.push_back(clone->expression); }
|
||||
if (transformers) { clone->transformers = transformers->clone(); clone->children.push_back(clone->transformers); }
|
||||
|
||||
return clone;
|
||||
}
|
||||
|
||||
void ASTColumnsRegexpMatcher::appendColumnName(WriteBuffer & ostr) const
|
||||
{
|
||||
if (expression)
|
||||
{
|
||||
expression->appendColumnName(ostr);
|
||||
writeCString(".", ostr);
|
||||
}
|
||||
writeCString("COLUMNS(", ostr);
|
||||
writeQuotedString(original_pattern, ostr);
|
||||
writeChar(')', ostr);
|
||||
@ -38,15 +46,21 @@ void ASTColumnsRegexpMatcher::updateTreeHashImpl(SipHash & hash_state) const
|
||||
|
||||
void ASTColumnsRegexpMatcher::formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const
|
||||
{
|
||||
settings.ostr << (settings.hilite ? hilite_keyword : "") << "COLUMNS" << (settings.hilite ? hilite_none : "") << "(";
|
||||
settings.ostr << (settings.hilite ? hilite_keyword : "");
|
||||
|
||||
if (expression)
|
||||
{
|
||||
expression->formatImpl(settings, state, frame);
|
||||
settings.ostr << ".";
|
||||
}
|
||||
|
||||
settings.ostr << "COLUMNS" << (settings.hilite ? hilite_none : "") << "(";
|
||||
settings.ostr << quoteString(original_pattern);
|
||||
settings.ostr << ")";
|
||||
|
||||
/// Format column transformers
|
||||
for (const auto & child : children)
|
||||
if (transformers)
|
||||
{
|
||||
settings.ostr << ' ';
|
||||
child->formatImpl(settings, state, frame);
|
||||
transformers->formatImpl(settings, state, frame);
|
||||
}
|
||||
}
|
||||
|
||||
@ -60,6 +74,11 @@ void ASTColumnsRegexpMatcher::setPattern(String pattern)
|
||||
DB::ErrorCodes::CANNOT_COMPILE_REGEXP);
|
||||
}
|
||||
|
||||
const String & ASTColumnsRegexpMatcher::getPattern() const
|
||||
{
|
||||
return original_pattern;
|
||||
}
|
||||
|
||||
const std::shared_ptr<re2::RE2> & ASTColumnsRegexpMatcher::getMatcher() const
|
||||
{
|
||||
return column_matcher;
|
||||
@ -73,19 +92,23 @@ bool ASTColumnsRegexpMatcher::isColumnMatching(const String & column_name) const
|
||||
ASTPtr ASTColumnsListMatcher::clone() const
|
||||
{
|
||||
auto clone = std::make_shared<ASTColumnsListMatcher>(*this);
|
||||
clone->column_list = column_list->clone();
|
||||
clone->cloneChildren();
|
||||
return clone;
|
||||
}
|
||||
|
||||
void ASTColumnsListMatcher::updateTreeHashImpl(SipHash & hash_state) const
|
||||
{
|
||||
column_list->updateTreeHash(hash_state);
|
||||
IAST::updateTreeHashImpl(hash_state);
|
||||
if (expression) { clone->expression = expression->clone(); clone->children.push_back(clone->expression); }
|
||||
if (transformers) { clone->transformers = transformers->clone(); clone->children.push_back(clone->transformers); }
|
||||
|
||||
clone->column_list = column_list->clone();
|
||||
clone->children.push_back(clone->column_list);
|
||||
|
||||
return clone;
|
||||
}
|
||||
|
||||
void ASTColumnsListMatcher::appendColumnName(WriteBuffer & ostr) const
|
||||
{
|
||||
if (expression)
|
||||
{
|
||||
expression->appendColumnName(ostr);
|
||||
writeCString(".", ostr);
|
||||
}
|
||||
writeCString("COLUMNS(", ostr);
|
||||
for (auto * it = column_list->children.begin(); it != column_list->children.end(); ++it)
|
||||
{
|
||||
@ -99,7 +122,15 @@ void ASTColumnsListMatcher::appendColumnName(WriteBuffer & ostr) const
|
||||
|
||||
void ASTColumnsListMatcher::formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const
|
||||
{
|
||||
settings.ostr << (settings.hilite ? hilite_keyword : "") << "COLUMNS" << (settings.hilite ? hilite_none : "") << "(";
|
||||
settings.ostr << (settings.hilite ? hilite_keyword : "");
|
||||
|
||||
if (expression)
|
||||
{
|
||||
expression->formatImpl(settings, state, frame);
|
||||
settings.ostr << ".";
|
||||
}
|
||||
|
||||
settings.ostr << "COLUMNS" << (settings.hilite ? hilite_none : "") << "(";
|
||||
|
||||
for (ASTs::const_iterator it = column_list->children.begin(); it != column_list->children.end(); ++it)
|
||||
{
|
||||
@ -111,33 +142,39 @@ void ASTColumnsListMatcher::formatImpl(const FormatSettings & settings, FormatSt
|
||||
}
|
||||
settings.ostr << ")";
|
||||
|
||||
/// Format column transformers
|
||||
for (const auto & child : children)
|
||||
if (transformers)
|
||||
{
|
||||
settings.ostr << ' ';
|
||||
child->formatImpl(settings, state, frame);
|
||||
transformers->formatImpl(settings, state, frame);
|
||||
}
|
||||
}
|
||||
|
||||
ASTPtr ASTQualifiedColumnsRegexpMatcher::clone() const
|
||||
{
|
||||
auto clone = std::make_shared<ASTQualifiedColumnsRegexpMatcher>(*this);
|
||||
clone->cloneChildren();
|
||||
|
||||
if (transformers) { clone->transformers = transformers->clone(); clone->children.push_back(clone->transformers); }
|
||||
|
||||
clone->qualifier = qualifier->clone();
|
||||
clone->children.push_back(clone->qualifier);
|
||||
|
||||
return clone;
|
||||
}
|
||||
|
||||
void ASTQualifiedColumnsRegexpMatcher::appendColumnName(WriteBuffer & ostr) const
|
||||
{
|
||||
const auto & qualifier = children.at(0);
|
||||
qualifier->appendColumnName(ostr);
|
||||
writeCString(".COLUMNS(", ostr);
|
||||
writeQuotedString(original_pattern, ostr);
|
||||
writeChar(')', ostr);
|
||||
}
|
||||
|
||||
void ASTQualifiedColumnsRegexpMatcher::setPattern(String pattern)
|
||||
void ASTQualifiedColumnsRegexpMatcher::setPattern(String pattern, bool set_matcher)
|
||||
{
|
||||
original_pattern = std::move(pattern);
|
||||
|
||||
if (!set_matcher)
|
||||
return;
|
||||
|
||||
column_matcher = std::make_shared<RE2>(original_pattern, RE2::Quiet);
|
||||
if (!column_matcher->ok())
|
||||
throw DB::Exception(
|
||||
@ -166,35 +203,35 @@ void ASTQualifiedColumnsRegexpMatcher::formatImpl(const FormatSettings & setting
|
||||
{
|
||||
settings.ostr << (settings.hilite ? hilite_keyword : "");
|
||||
|
||||
const auto & qualifier = children.at(0);
|
||||
qualifier->formatImpl(settings, state, frame);
|
||||
|
||||
settings.ostr << ".COLUMNS" << (settings.hilite ? hilite_none : "") << "(";
|
||||
settings.ostr << quoteString(original_pattern);
|
||||
settings.ostr << ")";
|
||||
|
||||
/// Format column transformers
|
||||
size_t children_size = children.size();
|
||||
|
||||
for (size_t i = 1; i < children_size; ++i)
|
||||
if (transformers)
|
||||
{
|
||||
const auto & child = children[i];
|
||||
settings.ostr << ' ';
|
||||
child->formatImpl(settings, state, frame);
|
||||
transformers->formatImpl(settings, state, frame);
|
||||
}
|
||||
}
|
||||
|
||||
ASTPtr ASTQualifiedColumnsListMatcher::clone() const
|
||||
{
|
||||
auto clone = std::make_shared<ASTQualifiedColumnsListMatcher>(*this);
|
||||
|
||||
if (transformers) { clone->transformers = transformers->clone(); clone->children.push_back(clone->transformers); }
|
||||
|
||||
clone->qualifier = qualifier->clone();
|
||||
clone->column_list = column_list->clone();
|
||||
clone->cloneChildren();
|
||||
|
||||
clone->children.push_back(clone->qualifier);
|
||||
clone->children.push_back(clone->column_list);
|
||||
|
||||
return clone;
|
||||
}
|
||||
|
||||
void ASTQualifiedColumnsListMatcher::appendColumnName(WriteBuffer & ostr) const
|
||||
{
|
||||
const auto & qualifier = children.at(0);
|
||||
qualifier->appendColumnName(ostr);
|
||||
writeCString(".COLUMNS(", ostr);
|
||||
|
||||
@ -208,19 +245,10 @@ void ASTQualifiedColumnsListMatcher::appendColumnName(WriteBuffer & ostr) const
|
||||
writeChar(')', ostr);
|
||||
}
|
||||
|
||||
void ASTQualifiedColumnsListMatcher::updateTreeHashImpl(SipHash & hash_state) const
|
||||
{
|
||||
column_list->updateTreeHash(hash_state);
|
||||
IAST::updateTreeHashImpl(hash_state);
|
||||
}
|
||||
|
||||
void ASTQualifiedColumnsListMatcher::formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const
|
||||
{
|
||||
settings.ostr << (settings.hilite ? hilite_keyword : "");
|
||||
|
||||
const auto & qualifier = children.at(0);
|
||||
qualifier->formatImpl(settings, state, frame);
|
||||
|
||||
settings.ostr << ".COLUMNS" << (settings.hilite ? hilite_none : "") << "(";
|
||||
|
||||
for (ASTs::const_iterator it = column_list->children.begin(); it != column_list->children.end(); ++it)
|
||||
@ -232,14 +260,9 @@ void ASTQualifiedColumnsListMatcher::formatImpl(const FormatSettings & settings,
|
||||
}
|
||||
settings.ostr << ")";
|
||||
|
||||
/// Format column transformers
|
||||
size_t children_size = children.size();
|
||||
|
||||
for (size_t i = 1; i < children_size; ++i)
|
||||
if (transformers)
|
||||
{
|
||||
const auto & child = children[i];
|
||||
settings.ostr << ' ';
|
||||
child->formatImpl(settings, state, frame);
|
||||
transformers->formatImpl(settings, state, frame);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -24,10 +24,13 @@ public:
|
||||
|
||||
void appendColumnName(WriteBuffer & ostr) const override;
|
||||
void setPattern(String pattern);
|
||||
const String & getPattern() const;
|
||||
const std::shared_ptr<re2::RE2> & getMatcher() const;
|
||||
bool isColumnMatching(const String & column_name) const;
|
||||
void updateTreeHashImpl(SipHash & hash_state) const override;
|
||||
|
||||
ASTPtr expression;
|
||||
ASTPtr transformers;
|
||||
protected:
|
||||
void formatImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const override;
|
||||
|
||||
@ -43,9 +46,10 @@ public:
|
||||
String getID(char) const override { return "ColumnsListMatcher"; }
|
||||
ASTPtr clone() const override;
|
||||
void appendColumnName(WriteBuffer & ostr) const override;
|
||||
void updateTreeHashImpl(SipHash & hash_state) const override;
|
||||
|
||||
ASTPtr expression;
|
||||
ASTPtr column_list;
|
||||
ASTPtr transformers;
|
||||
protected:
|
||||
void formatImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const override;
|
||||
};
|
||||
@ -59,10 +63,12 @@ public:
|
||||
|
||||
void appendColumnName(WriteBuffer & ostr) const override;
|
||||
const std::shared_ptr<re2::RE2> & getMatcher() const;
|
||||
void setPattern(String pattern);
|
||||
void setPattern(String pattern, bool set_matcher = true);
|
||||
void setMatcher(std::shared_ptr<re2::RE2> matcher);
|
||||
void updateTreeHashImpl(SipHash & hash_state) const override;
|
||||
|
||||
ASTPtr qualifier;
|
||||
ASTPtr transformers;
|
||||
protected:
|
||||
void formatImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const override;
|
||||
|
||||
@ -78,9 +84,10 @@ public:
|
||||
String getID(char) const override { return "QualifiedColumnsListMatcher"; }
|
||||
ASTPtr clone() const override;
|
||||
void appendColumnName(WriteBuffer & ostr) const override;
|
||||
void updateTreeHashImpl(SipHash & hash_state) const override;
|
||||
|
||||
ASTPtr qualifier;
|
||||
ASTPtr column_list;
|
||||
ASTPtr transformers;
|
||||
protected:
|
||||
void formatImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const override;
|
||||
};
|
||||
|
@ -19,6 +19,15 @@ namespace ErrorCodes
|
||||
extern const int CANNOT_COMPILE_REGEXP;
|
||||
}
|
||||
|
||||
void ASTColumnsTransformerList::formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const
|
||||
{
|
||||
for (const auto & child : children)
|
||||
{
|
||||
settings.ostr << ' ';
|
||||
child->formatImpl(settings, state, frame);
|
||||
}
|
||||
}
|
||||
|
||||
void IASTColumnsTransformer::transform(const ASTPtr & transformer, ASTs & nodes)
|
||||
{
|
||||
if (const auto * apply = transformer->as<ASTColumnsApplyTransformer>())
|
||||
|
@ -9,6 +9,23 @@ namespace re2
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
/// A list of column transformers
|
||||
class ASTColumnsTransformerList : public IAST
|
||||
{
|
||||
public:
|
||||
String getID(char) const override { return "ColumnsTransformerList"; }
|
||||
ASTPtr clone() const override
|
||||
{
|
||||
auto clone = std::make_shared<ASTColumnsTransformerList>(*this);
|
||||
clone->cloneChildren();
|
||||
return clone;
|
||||
}
|
||||
|
||||
protected:
|
||||
void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override;
|
||||
};
|
||||
|
||||
class IASTColumnsTransformer : public IAST
|
||||
{
|
||||
public:
|
||||
|
@ -6,6 +6,10 @@
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int BAD_ARGUMENTS;
|
||||
}
|
||||
|
||||
/// AST, EXPLAIN or other query with meaning of explanation query instead of execution
|
||||
class ASTExplainQuery : public ASTQueryWithOutput
|
||||
@ -23,6 +27,45 @@ public:
|
||||
CurrentTransaction, /// 'EXPLAIN CURRENT TRANSACTION'
|
||||
};
|
||||
|
||||
static String toString(ExplainKind kind)
|
||||
{
|
||||
switch (kind)
|
||||
{
|
||||
case ParsedAST: return "EXPLAIN AST";
|
||||
case AnalyzedSyntax: return "EXPLAIN SYNTAX";
|
||||
case QueryTree: return "EXPLAIN QUERY TREE";
|
||||
case QueryPlan: return "EXPLAIN";
|
||||
case QueryPipeline: return "EXPLAIN PIPELINE";
|
||||
case QueryEstimates: return "EXPLAIN ESTIMATE";
|
||||
case TableOverride: return "EXPLAIN TABLE OVERRIDE";
|
||||
case CurrentTransaction: return "EXPLAIN CURRENT TRANSACTION";
|
||||
}
|
||||
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
static ExplainKind fromString(const String & str)
|
||||
{
|
||||
if (str == "EXPLAIN AST")
|
||||
return ParsedAST;
|
||||
if (str == "EXPLAIN SYNTAX")
|
||||
return AnalyzedSyntax;
|
||||
if (str == "EXPLAIN QUERY TREE")
|
||||
return QueryTree;
|
||||
if (str == "EXPLAIN" || str == "EXPLAIN PLAN")
|
||||
return QueryPlan;
|
||||
if (str == "EXPLAIN PIPELINE")
|
||||
return QueryPipeline;
|
||||
if (str == "EXPLAIN ESTIMATE")
|
||||
return QueryEstimates;
|
||||
if (str == "EXPLAIN TABLE OVERRIDE")
|
||||
return TableOverride;
|
||||
if (str == "EXPLAIN CURRENT TRANSACTION")
|
||||
return CurrentTransaction;
|
||||
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown explain kind '{}'", str);
|
||||
}
|
||||
|
||||
explicit ASTExplainQuery(ExplainKind kind_) : kind(kind_) {}
|
||||
|
||||
String getID(char delim) const override { return "Explain" + (delim + toString(kind)); }
|
||||
@ -103,23 +146,6 @@ private:
|
||||
/// Used by EXPLAIN TABLE OVERRIDE
|
||||
ASTPtr table_function;
|
||||
ASTPtr table_override;
|
||||
|
||||
static String toString(ExplainKind kind)
|
||||
{
|
||||
switch (kind)
|
||||
{
|
||||
case ParsedAST: return "EXPLAIN AST";
|
||||
case AnalyzedSyntax: return "EXPLAIN SYNTAX";
|
||||
case QueryTree: return "EXPLAIN QUERY TREE";
|
||||
case QueryPlan: return "EXPLAIN";
|
||||
case QueryPipeline: return "EXPLAIN PIPELINE";
|
||||
case QueryEstimates: return "EXPLAIN ESTIMATE";
|
||||
case TableOverride: return "EXPLAIN TABLE OVERRIDE";
|
||||
case CurrentTransaction: return "EXPLAIN CURRENT TRANSACTION";
|
||||
}
|
||||
|
||||
UNREACHABLE();
|
||||
}
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -7,22 +7,18 @@ namespace DB
|
||||
|
||||
void ASTQualifiedAsterisk::appendColumnName(WriteBuffer & ostr) const
|
||||
{
|
||||
const auto & qualifier = children.at(0);
|
||||
qualifier->appendColumnName(ostr);
|
||||
writeCString(".*", ostr);
|
||||
}
|
||||
|
||||
void ASTQualifiedAsterisk::formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const
|
||||
{
|
||||
const auto & qualifier = children.at(0);
|
||||
qualifier->formatImpl(settings, state, frame);
|
||||
settings.ostr << ".*";
|
||||
|
||||
/// Format column transformers
|
||||
for (ASTs::const_iterator it = children.begin() + 1; it != children.end(); ++it)
|
||||
if (transformers)
|
||||
{
|
||||
settings.ostr << ' ';
|
||||
(*it)->formatImpl(settings, state, frame);
|
||||
transformers->formatImpl(settings, state, frame);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -17,11 +17,18 @@ public:
|
||||
ASTPtr clone() const override
|
||||
{
|
||||
auto clone = std::make_shared<ASTQualifiedAsterisk>(*this);
|
||||
clone->cloneChildren();
|
||||
|
||||
if (transformers) { clone->transformers = transformers->clone(); clone->children.push_back(clone->transformers); }
|
||||
|
||||
clone->qualifier = qualifier->clone();
|
||||
clone->children.push_back(clone->qualifier);
|
||||
|
||||
return clone;
|
||||
}
|
||||
void appendColumnName(WriteBuffer & ostr) const override;
|
||||
|
||||
ASTPtr qualifier;
|
||||
ASTPtr transformers;
|
||||
protected:
|
||||
void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override;
|
||||
};
|
||||
|
@ -166,6 +166,7 @@ void ASTSystemQuery::formatImpl(const FormatSettings & settings, FormatState &,
|
||||
else if ( type == Type::RESTART_REPLICA
|
||||
|| type == Type::RESTORE_REPLICA
|
||||
|| type == Type::SYNC_REPLICA
|
||||
|| type == Type::WAIT_LOADING_PARTS
|
||||
|| type == Type::FLUSH_DISTRIBUTED
|
||||
|| type == Type::RELOAD_DICTIONARY
|
||||
|| type == Type::RELOAD_MODEL
|
||||
|
@ -35,6 +35,7 @@ public:
|
||||
RESTART_REPLICAS,
|
||||
RESTART_REPLICA,
|
||||
RESTORE_REPLICA,
|
||||
WAIT_LOADING_PARTS,
|
||||
DROP_REPLICA,
|
||||
DROP_DATABASE_REPLICA,
|
||||
SYNC_REPLICA,
|
||||
|
@ -28,6 +28,8 @@
|
||||
#include <Parsers/ASTWindowDefinition.h>
|
||||
#include <Parsers/ASTAssignment.h>
|
||||
#include <Parsers/ASTColumnsMatcher.h>
|
||||
#include <Parsers/ASTExplainQuery.h>
|
||||
#include <Parsers/ASTSetQuery.h>
|
||||
#include <Parsers/ASTSelectQuery.h>
|
||||
#include <Parsers/ASTTablesInSelectQuery.h>
|
||||
#include <Parsers/ExpressionListParsers.h>
|
||||
@ -116,8 +118,40 @@ bool ParserSubquery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
|
||||
}
|
||||
else if (ASTPtr explain_node; explain.parse(pos, explain_node, expected))
|
||||
{
|
||||
/// Replace SELECT * FROM (EXPLAIN SELECT ...) with SELECT * FROM viewExplain(EXPLAIN SELECT ...)
|
||||
result_node = buildSelectFromTableFunction(makeASTFunction("viewExplain", explain_node));
|
||||
const auto & explain_query = explain_node->as<const ASTExplainQuery &>();
|
||||
|
||||
if (explain_query.getTableFunction() || explain_query.getTableOverride())
|
||||
throw Exception("EXPLAIN in a subquery cannot have a table function or table override", ErrorCodes::BAD_ARGUMENTS);
|
||||
|
||||
/// Replace subquery `(EXPLAIN <kind> <explain_settings> SELECT ...)`
|
||||
/// with `(SELECT * FROM viewExplain("<kind>", "<explain_settings>", SELECT ...))`
|
||||
|
||||
String kind_str = ASTExplainQuery::toString(explain_query.getKind());
|
||||
|
||||
String settings_str;
|
||||
if (ASTPtr settings_ast = explain_query.getSettings())
|
||||
{
|
||||
if (!settings_ast->as<ASTSetQuery>())
|
||||
throw Exception("EXPLAIN settings must be a SET query", ErrorCodes::BAD_ARGUMENTS);
|
||||
settings_str = queryToString(settings_ast);
|
||||
}
|
||||
|
||||
const ASTPtr & explained_ast = explain_query.getExplainedQuery();
|
||||
if (explained_ast)
|
||||
{
|
||||
auto view_explain = makeASTFunction("viewExplain",
|
||||
std::make_shared<ASTLiteral>(kind_str),
|
||||
std::make_shared<ASTLiteral>(settings_str),
|
||||
explained_ast);
|
||||
result_node = buildSelectFromTableFunction(view_explain);
|
||||
}
|
||||
else
|
||||
{
|
||||
auto view_explain = makeASTFunction("viewExplain",
|
||||
std::make_shared<ASTLiteral>(kind_str),
|
||||
std::make_shared<ASTLiteral>(settings_str));
|
||||
result_node = buildSelectFromTableFunction(view_explain);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -1623,13 +1657,21 @@ bool ParserAsterisk::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
|
||||
{
|
||||
++pos;
|
||||
auto asterisk = std::make_shared<ASTAsterisk>();
|
||||
auto transformers = std::make_shared<ASTColumnsTransformerList>();
|
||||
ParserColumnsTransformers transformers_p(allowed_transformers);
|
||||
ASTPtr transformer;
|
||||
while (transformers_p.parse(pos, transformer, expected))
|
||||
{
|
||||
asterisk->children.push_back(transformer);
|
||||
transformers->children.push_back(transformer);
|
||||
}
|
||||
node = asterisk;
|
||||
|
||||
if (!transformers->children.empty())
|
||||
{
|
||||
asterisk->transformers = std::move(transformers);
|
||||
asterisk->children.push_back(asterisk->transformers);
|
||||
}
|
||||
|
||||
node = std::move(asterisk);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
@ -1638,7 +1680,7 @@ bool ParserAsterisk::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
|
||||
|
||||
bool ParserQualifiedAsterisk::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
|
||||
{
|
||||
if (!ParserCompoundIdentifier(true, true).parse(pos, node, expected))
|
||||
if (!ParserCompoundIdentifier(false, true).parse(pos, node, expected))
|
||||
return false;
|
||||
|
||||
if (pos->type != TokenType::Dot)
|
||||
@ -1650,13 +1692,23 @@ bool ParserQualifiedAsterisk::parseImpl(Pos & pos, ASTPtr & node, Expected & exp
|
||||
++pos;
|
||||
|
||||
auto res = std::make_shared<ASTQualifiedAsterisk>();
|
||||
res->children.push_back(node);
|
||||
auto transformers = std::make_shared<ASTColumnsTransformerList>();
|
||||
ParserColumnsTransformers transformers_p;
|
||||
ASTPtr transformer;
|
||||
while (transformers_p.parse(pos, transformer, expected))
|
||||
{
|
||||
res->children.push_back(transformer);
|
||||
transformers->children.push_back(transformer);
|
||||
}
|
||||
|
||||
res->qualifier = std::move(node);
|
||||
res->children.push_back(res->qualifier);
|
||||
|
||||
if (!transformers->children.empty())
|
||||
{
|
||||
res->transformers = std::move(transformers);
|
||||
res->children.push_back(res->transformers);
|
||||
}
|
||||
|
||||
node = std::move(res);
|
||||
return true;
|
||||
}
|
||||
@ -1680,28 +1732,44 @@ static bool parseColumnsMatcherBody(IParser::Pos & pos, ASTPtr & node, Expected
|
||||
return false;
|
||||
++pos;
|
||||
|
||||
auto transformers = std::make_shared<ASTColumnsTransformerList>();
|
||||
ParserColumnsTransformers transformers_p(allowed_transformers);
|
||||
ASTPtr transformer;
|
||||
while (transformers_p.parse(pos, transformer, expected))
|
||||
{
|
||||
transformers->children.push_back(transformer);
|
||||
}
|
||||
|
||||
ASTPtr res;
|
||||
if (column_list)
|
||||
{
|
||||
auto list_matcher = std::make_shared<ASTColumnsListMatcher>();
|
||||
list_matcher->column_list = column_list;
|
||||
res = list_matcher;
|
||||
|
||||
list_matcher->column_list = std::move(column_list);
|
||||
list_matcher->children.push_back(list_matcher->column_list);
|
||||
|
||||
if (!transformers->children.empty())
|
||||
{
|
||||
list_matcher->transformers = std::move(transformers);
|
||||
list_matcher->children.push_back(list_matcher->transformers);
|
||||
}
|
||||
|
||||
node = std::move(list_matcher);
|
||||
}
|
||||
else
|
||||
{
|
||||
auto regexp_matcher = std::make_shared<ASTColumnsRegexpMatcher>();
|
||||
regexp_matcher->setPattern(regex_node->as<ASTLiteral &>().value.get<String>());
|
||||
res = regexp_matcher;
|
||||
|
||||
if (!transformers->children.empty())
|
||||
{
|
||||
regexp_matcher->transformers = std::move(transformers);
|
||||
regexp_matcher->children.push_back(regexp_matcher->transformers);
|
||||
}
|
||||
|
||||
node = std::move(regexp_matcher);
|
||||
}
|
||||
|
||||
ParserColumnsTransformers transformers_p(allowed_transformers);
|
||||
ASTPtr transformer;
|
||||
while (transformers_p.parse(pos, transformer, expected))
|
||||
{
|
||||
res->children.push_back(transformer);
|
||||
}
|
||||
|
||||
node = std::move(res);
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -1717,29 +1785,19 @@ bool ParserColumnsMatcher::parseImpl(Pos & pos, ASTPtr & node, Expected & expect
|
||||
|
||||
bool ParserQualifiedColumnsMatcher::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
|
||||
{
|
||||
if (!ParserCompoundIdentifier(true, true).parse(pos, node, expected))
|
||||
if (!ParserCompoundIdentifier(false, true).parse(pos, node, expected))
|
||||
return false;
|
||||
|
||||
auto identifier_node = node;
|
||||
const auto & identifier_node_typed = identifier_node->as<ASTTableIdentifier &>();
|
||||
auto & identifier_node_typed = identifier_node->as<ASTIdentifier &>();
|
||||
auto & name_parts = identifier_node_typed.name_parts;
|
||||
|
||||
/// ParserCompoundIdentifier parse identifier.COLUMNS
|
||||
if (identifier_node_typed.name_parts.size() == 1 || identifier_node_typed.name_parts.back() != "COLUMNS")
|
||||
if (name_parts.size() == 1 || name_parts.back() != "COLUMNS")
|
||||
return false;
|
||||
|
||||
/// TODO: ASTTableIdentifier can contain only 2 parts
|
||||
|
||||
if (identifier_node_typed.name_parts.size() == 2)
|
||||
{
|
||||
auto table_name = identifier_node_typed.name_parts[0];
|
||||
identifier_node = std::make_shared<ASTTableIdentifier>(table_name);
|
||||
}
|
||||
else
|
||||
{
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR,
|
||||
"Expected identifier to contain no more than 2 parts. Actual {}",
|
||||
identifier_node_typed.full_name);
|
||||
}
|
||||
name_parts.pop_back();
|
||||
identifier_node = std::make_shared<ASTIdentifier>(std::move(name_parts), false, std::move(node->children));
|
||||
|
||||
if (!parseColumnsMatcherBody(pos, node, expected, allowed_transformers))
|
||||
return false;
|
||||
@ -1747,28 +1805,36 @@ bool ParserQualifiedColumnsMatcher::parseImpl(Pos & pos, ASTPtr & node, Expected
|
||||
if (auto * columns_list_matcher = node->as<ASTColumnsListMatcher>())
|
||||
{
|
||||
auto result = std::make_shared<ASTQualifiedColumnsListMatcher>();
|
||||
result->qualifier = std::move(identifier_node);
|
||||
result->column_list = std::move(columns_list_matcher->column_list);
|
||||
|
||||
result->children.reserve(columns_list_matcher->children.size() + 1);
|
||||
result->children.push_back(std::move(identifier_node));
|
||||
result->children.push_back(result->qualifier);
|
||||
result->children.push_back(result->column_list);
|
||||
|
||||
for (auto && child : columns_list_matcher->children)
|
||||
result->children.push_back(std::move(child));
|
||||
if (columns_list_matcher->transformers)
|
||||
{
|
||||
result->transformers = std::move(columns_list_matcher->transformers);
|
||||
result->children.push_back(result->transformers);
|
||||
}
|
||||
|
||||
node = result;
|
||||
node = std::move(result);
|
||||
}
|
||||
else if (auto * column_regexp_matcher = node->as<ASTColumnsRegexpMatcher>())
|
||||
{
|
||||
auto result = std::make_shared<ASTQualifiedColumnsRegexpMatcher>();
|
||||
result->setPattern(column_regexp_matcher->getPattern(), false);
|
||||
result->setMatcher(column_regexp_matcher->getMatcher());
|
||||
|
||||
result->children.reserve(column_regexp_matcher->children.size() + 1);
|
||||
result->children.push_back(std::move(identifier_node));
|
||||
result->qualifier = std::move(identifier_node);
|
||||
result->children.push_back(result->qualifier);
|
||||
|
||||
for (auto && child : column_regexp_matcher->children)
|
||||
result->children.push_back(std::move(child));
|
||||
if (column_regexp_matcher->transformers)
|
||||
{
|
||||
result->transformers = std::move(column_regexp_matcher->transformers);
|
||||
result->children.push_back(result->transformers);
|
||||
}
|
||||
|
||||
node = result;
|
||||
node = std::move(result);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -4,6 +4,7 @@
|
||||
#include <Parsers/ParserSetQuery.h>
|
||||
|
||||
#include <Parsers/ASTAsterisk.h>
|
||||
#include <Parsers/ASTColumnsMatcher.h>
|
||||
#include <Parsers/ASTExpressionList.h>
|
||||
#include <Parsers/ASTFunction.h>
|
||||
#include <Parsers/ASTFunctionWithKeyValueArguments.h>
|
||||
@ -2194,7 +2195,7 @@ struct ParserExpressionImpl
|
||||
using Layers = std::vector<std::unique_ptr<Layer>>;
|
||||
|
||||
Action tryParseOperand(Layers & layers, IParser::Pos & pos, Expected & expected);
|
||||
static Action tryParseOperator(Layers & layers, IParser::Pos & pos, Expected & expected);
|
||||
Action tryParseOperator(Layers & layers, IParser::Pos & pos, Expected & expected);
|
||||
};
|
||||
|
||||
|
||||
@ -2523,8 +2524,6 @@ Action ParserExpressionImpl::tryParseOperand(Layers & layers, IParser::Pos & pos
|
||||
|
||||
Action ParserExpressionImpl::tryParseOperator(Layers & layers, IParser::Pos & pos, Expected & expected)
|
||||
{
|
||||
ASTPtr tmp;
|
||||
|
||||
/// ParserExpression can be called in this part of the query:
|
||||
/// ALTER TABLE partition_all2 CLEAR INDEX [ p ] IN PARTITION ALL
|
||||
///
|
||||
@ -2544,17 +2543,17 @@ Action ParserExpressionImpl::tryParseOperator(Layers & layers, IParser::Pos & po
|
||||
|
||||
if (cur_op == operators_table.end())
|
||||
{
|
||||
ASTPtr alias;
|
||||
ParserAlias alias_parser(layers.back()->allow_alias_without_as_keyword);
|
||||
auto old_pos = pos;
|
||||
|
||||
if (layers.back()->allow_alias &&
|
||||
!layers.back()->parsed_alias &&
|
||||
alias_parser.parse(pos, tmp, expected) &&
|
||||
layers.back()->insertAlias(tmp))
|
||||
alias_parser.parse(pos, alias, expected) &&
|
||||
layers.back()->insertAlias(alias))
|
||||
{
|
||||
layers.back()->parsed_alias = true;
|
||||
return Action::OPERATOR;
|
||||
}
|
||||
pos = old_pos;
|
||||
return Action::NONE;
|
||||
}
|
||||
|
||||
@ -2618,33 +2617,57 @@ Action ParserExpressionImpl::tryParseOperator(Layers & layers, IParser::Pos & po
|
||||
layers.back()->pushOperand(function);
|
||||
}
|
||||
|
||||
/// Dot (TupleElement operator) can be a beginning of a .* or .COLUMNS expressions
|
||||
if (op.type == OperatorType::TupleElement)
|
||||
{
|
||||
ASTPtr tmp;
|
||||
if (asterisk_parser.parse(pos, tmp, expected) ||
|
||||
columns_matcher_parser.parse(pos, tmp, expected))
|
||||
{
|
||||
if (auto * asterisk = tmp->as<ASTAsterisk>())
|
||||
{
|
||||
if (!layers.back()->popOperand(asterisk->expression))
|
||||
return Action::NONE;
|
||||
}
|
||||
else if (auto * columns_list_matcher = tmp->as<ASTColumnsListMatcher>())
|
||||
{
|
||||
if (!layers.back()->popOperand(columns_list_matcher->expression))
|
||||
return Action::NONE;
|
||||
}
|
||||
else if (auto * columns_regexp_matcher = tmp->as<ASTColumnsRegexpMatcher>())
|
||||
{
|
||||
if (!layers.back()->popOperand(columns_regexp_matcher->expression))
|
||||
return Action::NONE;
|
||||
}
|
||||
|
||||
layers.back()->pushOperand(std::move(tmp));
|
||||
return Action::OPERATOR;
|
||||
}
|
||||
}
|
||||
|
||||
layers.back()->pushOperator(op);
|
||||
|
||||
if (op.type == OperatorType::ArrayElement)
|
||||
layers.push_back(std::make_unique<ArrayElementLayer>());
|
||||
|
||||
|
||||
Action next = Action::OPERAND;
|
||||
|
||||
/// isNull & isNotNull are postfix unary operators
|
||||
if (op.type == OperatorType::IsNull)
|
||||
next = Action::OPERATOR;
|
||||
|
||||
if (op.type == OperatorType::StartBetween || op.type == OperatorType::StartNotBetween)
|
||||
layers.back()->between_counter++;
|
||||
return Action::OPERATOR;
|
||||
|
||||
if (op.type == OperatorType::Cast)
|
||||
{
|
||||
next = Action::OPERATOR;
|
||||
|
||||
ASTPtr type_ast;
|
||||
if (!ParserDataType().parse(pos, type_ast, expected))
|
||||
return Action::NONE;
|
||||
|
||||
layers.back()->pushOperand(std::make_shared<ASTLiteral>(queryToString(type_ast)));
|
||||
return Action::OPERATOR;
|
||||
}
|
||||
|
||||
return next;
|
||||
if (op.type == OperatorType::ArrayElement)
|
||||
layers.push_back(std::make_unique<ArrayElementLayer>());
|
||||
|
||||
if (op.type == OperatorType::StartBetween || op.type == OperatorType::StartNotBetween)
|
||||
layers.back()->between_counter++;
|
||||
|
||||
return Action::OPERAND;
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -253,6 +253,7 @@ bool ParserSystemQuery::parseImpl(IParser::Pos & pos, ASTPtr & node, Expected &
|
||||
|
||||
case Type::RESTART_REPLICA:
|
||||
case Type::SYNC_REPLICA:
|
||||
case Type::WAIT_LOADING_PARTS:
|
||||
{
|
||||
if (!parseQueryWithOnCluster(res, pos, expected))
|
||||
return false;
|
||||
|
@ -196,7 +196,7 @@ void PipelineExecutor::executeSingleThread(size_t thread_num)
|
||||
|
||||
#ifndef NDEBUG
|
||||
auto & context = tasks.getThreadContext(thread_num);
|
||||
LOG_TRACE(log,
|
||||
LOG_TEST(log,
|
||||
"Thread finished. Total time: {} sec. Execution time: {} sec. Processing time: {} sec. Wait time: {} sec.",
|
||||
context.total_time_ns / 1e9,
|
||||
context.execution_time_ns / 1e9,
|
||||
|
@ -304,7 +304,9 @@ AvroDeserializer::DeserializeFn AvroDeserializer::createDeserializeFn(avro::Node
|
||||
};
|
||||
}
|
||||
|
||||
if (null_as_default)
|
||||
/// If the Union is ['Null', Nested-Type], since the Nested-Type can not be inside
|
||||
/// Nullable, so we will get Nested-Type, instead of Nullable type.
|
||||
if (null_as_default || !target.isNullable())
|
||||
{
|
||||
auto nested_deserialize = this->createDeserializeFn(root_node->leafAt(non_null_union_index), target_type);
|
||||
return [non_null_union_index, nested_deserialize](IColumn & column, avro::Decoder & decoder)
|
||||
@ -1001,7 +1003,7 @@ DataTypePtr AvroSchemaReader::avroNodeToDataType(avro::NodePtr node)
|
||||
case avro::Type::AVRO_STRING:
|
||||
return std::make_shared<DataTypeString>();
|
||||
case avro::Type::AVRO_BYTES:
|
||||
return std::make_shared<DataTypeFloat32>();
|
||||
return std::make_shared<DataTypeString>();
|
||||
case avro::Type::AVRO_ENUM:
|
||||
{
|
||||
if (node->names() < 128)
|
||||
|
@ -402,14 +402,10 @@ void TCPHandler::runImpl()
|
||||
{
|
||||
auto callback = [this]()
|
||||
{
|
||||
{
|
||||
std::lock_guard task_callback_lock(task_callback_mutex);
|
||||
std::scoped_lock lock(task_callback_mutex, fatal_error_mutex);
|
||||
|
||||
if (isQueryCancelled())
|
||||
return true;
|
||||
}
|
||||
|
||||
std::lock_guard lock(fatal_error_mutex);
|
||||
if (isQueryCancelled())
|
||||
return true;
|
||||
|
||||
sendProgress();
|
||||
sendSelectProfileEvents();
|
||||
@ -424,6 +420,9 @@ void TCPHandler::runImpl()
|
||||
}
|
||||
|
||||
state.io.onFinish();
|
||||
|
||||
std::lock_guard lock(task_callback_mutex);
|
||||
|
||||
/// Send final progress after calling onFinish(), since it will update the progress.
|
||||
///
|
||||
/// NOTE: we cannot send Progress for regular INSERT (with VALUES)
|
||||
@ -446,8 +445,11 @@ void TCPHandler::runImpl()
|
||||
if (state.is_connection_closed)
|
||||
break;
|
||||
|
||||
sendLogs();
|
||||
sendEndOfStream();
|
||||
{
|
||||
std::lock_guard lock(task_callback_mutex);
|
||||
sendLogs();
|
||||
sendEndOfStream();
|
||||
}
|
||||
|
||||
/// QueryState should be cleared before QueryScope, since otherwise
|
||||
/// the MemoryTracker will be wrong for possible deallocations.
|
||||
@ -760,6 +762,9 @@ void TCPHandler::processOrdinaryQueryWithProcessors()
|
||||
}
|
||||
}
|
||||
|
||||
/// Defer locking to cover a part of the scope below and everything after it
|
||||
std::unique_lock progress_lock(task_callback_mutex, std::defer_lock);
|
||||
|
||||
{
|
||||
PullingAsyncPipelineExecutor executor(pipeline);
|
||||
CurrentMetrics::Increment query_thread_metric_increment{CurrentMetrics::QueryThread};
|
||||
@ -796,6 +801,11 @@ void TCPHandler::processOrdinaryQueryWithProcessors()
|
||||
}
|
||||
}
|
||||
|
||||
/// This lock wasn't acquired before and we make .lock() call here
|
||||
/// so everything under this line is covered even together
|
||||
/// with sendProgress() out of the scope
|
||||
progress_lock.lock();
|
||||
|
||||
/** If data has run out, we will send the profiling data and total values to
|
||||
* the last zero block to be able to use
|
||||
* this information in the suffix output of stream.
|
||||
|
@ -60,6 +60,7 @@ namespace ErrorCodes
|
||||
extern const int TOO_MANY_PARTITIONS;
|
||||
extern const int DISTRIBUTED_TOO_MANY_PENDING_BYTES;
|
||||
extern const int ARGUMENT_OUT_OF_BOUND;
|
||||
extern const int LOGICAL_ERROR;
|
||||
}
|
||||
|
||||
|
||||
@ -365,18 +366,22 @@ StorageDistributedDirectoryMonitor::StorageDistributedDirectoryMonitor(
|
||||
const std::string & relative_path_,
|
||||
ConnectionPoolPtr pool_,
|
||||
ActionBlocker & monitor_blocker_,
|
||||
BackgroundSchedulePool & bg_pool)
|
||||
BackgroundSchedulePool & bg_pool,
|
||||
bool initialize_from_disk)
|
||||
: storage(storage_)
|
||||
, pool(std::move(pool_))
|
||||
, disk(disk_)
|
||||
, relative_path(relative_path_)
|
||||
, path(fs::path(disk->getPath()) / relative_path / "")
|
||||
, broken_relative_path(fs::path(relative_path) / "broken")
|
||||
, broken_path(fs::path(path) / "broken" / "")
|
||||
, should_batch_inserts(storage.getDistributedSettingsRef().monitor_batch_inserts)
|
||||
, split_batch_on_failure(storage.getDistributedSettingsRef().monitor_split_batch_on_failure)
|
||||
, dir_fsync(storage.getDistributedSettingsRef().fsync_directories)
|
||||
, min_batched_block_size_rows(storage.getContext()->getSettingsRef().min_insert_block_size_rows)
|
||||
, min_batched_block_size_bytes(storage.getContext()->getSettingsRef().min_insert_block_size_bytes)
|
||||
, current_batch_file_path(path + "current_batch.txt")
|
||||
, pending_files(std::numeric_limits<size_t>::max())
|
||||
, default_sleep_time(storage.getDistributedSettingsRef().monitor_sleep_time_ms.totalMilliseconds())
|
||||
, sleep_time(default_sleep_time)
|
||||
, max_sleep_time(storage.getDistributedSettingsRef().monitor_max_sleep_time_ms.totalMilliseconds())
|
||||
@ -385,6 +390,11 @@ StorageDistributedDirectoryMonitor::StorageDistributedDirectoryMonitor(
|
||||
, metric_pending_files(CurrentMetrics::DistributedFilesToInsert, 0)
|
||||
, metric_broken_files(CurrentMetrics::BrokenDistributedFilesToInsert, 0)
|
||||
{
|
||||
fs::create_directory(broken_path);
|
||||
|
||||
if (initialize_from_disk)
|
||||
initializeFilesFromDisk();
|
||||
|
||||
task_handle = bg_pool.createTask(getLoggerName() + "/Bg", [this]{ run(); });
|
||||
task_handle->activateAndSchedule();
|
||||
}
|
||||
@ -392,35 +402,29 @@ StorageDistributedDirectoryMonitor::StorageDistributedDirectoryMonitor(
|
||||
|
||||
StorageDistributedDirectoryMonitor::~StorageDistributedDirectoryMonitor()
|
||||
{
|
||||
if (!quit)
|
||||
if (!pending_files.isFinished())
|
||||
{
|
||||
quit = true;
|
||||
pending_files.clearAndFinish();
|
||||
task_handle->deactivate();
|
||||
}
|
||||
}
|
||||
|
||||
void StorageDistributedDirectoryMonitor::flushAllData()
|
||||
{
|
||||
if (quit)
|
||||
if (pending_files.isFinished())
|
||||
return;
|
||||
|
||||
std::lock_guard lock{mutex};
|
||||
|
||||
const auto & files = getFiles();
|
||||
if (!files.empty())
|
||||
{
|
||||
processFiles(files);
|
||||
|
||||
/// Update counters.
|
||||
getFiles();
|
||||
}
|
||||
if (!hasPendingFiles())
|
||||
return;
|
||||
processFiles();
|
||||
}
|
||||
|
||||
void StorageDistributedDirectoryMonitor::shutdownAndDropAllData()
|
||||
{
|
||||
if (!quit)
|
||||
if (!pending_files.isFinished())
|
||||
{
|
||||
quit = true;
|
||||
pending_files.clearAndFinish();
|
||||
task_handle->deactivate();
|
||||
}
|
||||
|
||||
@ -434,19 +438,21 @@ void StorageDistributedDirectoryMonitor::run()
|
||||
std::lock_guard lock{mutex};
|
||||
|
||||
bool do_sleep = false;
|
||||
while (!quit)
|
||||
while (!pending_files.isFinished())
|
||||
{
|
||||
do_sleep = true;
|
||||
|
||||
const auto & files = getFiles();
|
||||
if (files.empty())
|
||||
if (!hasPendingFiles())
|
||||
break;
|
||||
|
||||
if (!monitor_blocker.isCancelled())
|
||||
{
|
||||
try
|
||||
{
|
||||
do_sleep = !processFiles(files);
|
||||
processFiles();
|
||||
/// No errors while processing existing files.
|
||||
/// Let's see maybe there are more files to process.
|
||||
do_sleep = false;
|
||||
|
||||
std::lock_guard status_lock(status_mutex);
|
||||
status.last_exception = std::exception_ptr{};
|
||||
@ -470,9 +476,7 @@ void StorageDistributedDirectoryMonitor::run()
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
LOG_DEBUG(log, "Skipping send data over distributed table.");
|
||||
}
|
||||
|
||||
const auto now = std::chrono::system_clock::now();
|
||||
if (now - last_decrease_time > decrease_error_count_period)
|
||||
@ -487,10 +491,7 @@ void StorageDistributedDirectoryMonitor::run()
|
||||
break;
|
||||
}
|
||||
|
||||
/// Update counters.
|
||||
getFiles();
|
||||
|
||||
if (!quit && do_sleep)
|
||||
if (!pending_files.isFinished() && do_sleep)
|
||||
task_handle->scheduleAfter(sleep_time.count());
|
||||
}
|
||||
|
||||
@ -568,41 +569,83 @@ ConnectionPoolPtr StorageDistributedDirectoryMonitor::createPool(const std::stri
|
||||
settings.distributed_replica_error_cap);
|
||||
}
|
||||
|
||||
|
||||
std::map<UInt64, std::string> StorageDistributedDirectoryMonitor::getFiles()
|
||||
bool StorageDistributedDirectoryMonitor::hasPendingFiles() const
|
||||
{
|
||||
std::map<UInt64, std::string> files;
|
||||
return fs::exists(current_batch_file_path) || !current_batch_file.empty() || !pending_files.empty();
|
||||
}
|
||||
|
||||
void StorageDistributedDirectoryMonitor::initializeFilesFromDisk()
|
||||
{
|
||||
/// NOTE: This method does not requires to hold status_mutex, hence, no TSA
|
||||
/// annotations in the header file.
|
||||
|
||||
fs::directory_iterator end;
|
||||
for (fs::directory_iterator it{path}; it != end; ++it)
|
||||
|
||||
/// Initialize pending files
|
||||
{
|
||||
const auto & file_path_str = it->path();
|
||||
if (!it->is_directory() && startsWith(fs::path(file_path_str).extension(), ".bin"))
|
||||
size_t bytes_count = 0;
|
||||
|
||||
for (fs::directory_iterator it{path}; it != end; ++it)
|
||||
{
|
||||
files[parse<UInt64>(fs::path(file_path_str).stem())] = file_path_str;
|
||||
const auto & file_path = it->path();
|
||||
const auto & base_name = file_path.stem().string();
|
||||
if (!it->is_directory() && startsWith(fs::path(file_path).extension(), ".bin") && parse<UInt64>(base_name))
|
||||
{
|
||||
const std::string & file_path_str = file_path.string();
|
||||
if (!pending_files.push(file_path_str))
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot add pending file");
|
||||
bytes_count += fs::file_size(file_path);
|
||||
}
|
||||
else if (base_name != "tmp" && base_name != "broken")
|
||||
{
|
||||
/// It is OK to log current_batch.txt here too (useful for debugging).
|
||||
LOG_WARNING(log, "Unexpected file {} in {}", file_path.string(), path);
|
||||
}
|
||||
}
|
||||
|
||||
LOG_TRACE(log, "Files set to {}", pending_files.size());
|
||||
LOG_TRACE(log, "Bytes set to {}", bytes_count);
|
||||
|
||||
metric_pending_files.changeTo(pending_files.size());
|
||||
status.files_count = pending_files.size();
|
||||
status.bytes_count = bytes_count;
|
||||
}
|
||||
|
||||
return files;
|
||||
/// Initialize broken files
|
||||
{
|
||||
size_t broken_bytes_count = 0;
|
||||
size_t broken_files = 0;
|
||||
|
||||
for (fs::directory_iterator it{broken_path}; it != end; ++it)
|
||||
{
|
||||
const auto & file_path = it->path();
|
||||
if (!it->is_directory() && startsWith(fs::path(file_path).extension(), ".bin") && parse<UInt64>(file_path.stem()))
|
||||
broken_bytes_count += fs::file_size(file_path);
|
||||
else
|
||||
LOG_WARNING(log, "Unexpected file {} in {}", file_path.string(), broken_path);
|
||||
}
|
||||
|
||||
LOG_TRACE(log, "Broken files set to {}", broken_files);
|
||||
LOG_TRACE(log, "Broken bytes set to {}", broken_bytes_count);
|
||||
|
||||
metric_broken_files.changeTo(broken_files);
|
||||
status.broken_files_count = broken_files;
|
||||
status.broken_bytes_count = broken_bytes_count;
|
||||
}
|
||||
}
|
||||
bool StorageDistributedDirectoryMonitor::processFiles(const std::map<UInt64, std::string> & files)
|
||||
void StorageDistributedDirectoryMonitor::processFiles()
|
||||
{
|
||||
if (should_batch_inserts)
|
||||
{
|
||||
processFilesWithBatching(files);
|
||||
}
|
||||
processFilesWithBatching();
|
||||
else
|
||||
{
|
||||
for (const auto & file : files)
|
||||
{
|
||||
if (quit)
|
||||
return true;
|
||||
/// Process unprocessed file.
|
||||
if (!current_batch_file.empty())
|
||||
processFile(current_batch_file);
|
||||
|
||||
processFile(file.second);
|
||||
}
|
||||
while (pending_files.tryPop(current_batch_file))
|
||||
processFile(current_batch_file);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void StorageDistributedDirectoryMonitor::processFile(const std::string & file_path)
|
||||
@ -649,7 +692,11 @@ void StorageDistributedDirectoryMonitor::processFile(const std::string & file_pa
|
||||
thread_trace_context->root_span.addAttribute(std::current_exception());
|
||||
|
||||
e.addMessage(fmt::format("While sending {}", file_path));
|
||||
maybeMarkAsBroken(file_path, e);
|
||||
if (isFileBrokenErrorCode(e.code(), e.isRemoteException()))
|
||||
{
|
||||
markAsBroken(file_path);
|
||||
current_batch_file.clear();
|
||||
}
|
||||
throw;
|
||||
}
|
||||
catch (...)
|
||||
@ -662,6 +709,7 @@ void StorageDistributedDirectoryMonitor::processFile(const std::string & file_pa
|
||||
|
||||
auto dir_sync_guard = getDirectorySyncGuard(dir_fsync, disk, relative_path);
|
||||
markAsSend(file_path);
|
||||
current_batch_file.clear();
|
||||
LOG_TRACE(log, "Finished processing `{}` (took {} ms)", file_path, watch.elapsedMilliseconds());
|
||||
}
|
||||
|
||||
@ -701,23 +749,19 @@ struct StorageDistributedDirectoryMonitor::BatchHeader
|
||||
|
||||
struct StorageDistributedDirectoryMonitor::Batch
|
||||
{
|
||||
std::vector<UInt64> file_indices;
|
||||
size_t total_rows = 0;
|
||||
size_t total_bytes = 0;
|
||||
bool recovered = false;
|
||||
|
||||
StorageDistributedDirectoryMonitor & parent;
|
||||
const std::map<UInt64, String> & file_index_to_path;
|
||||
std::vector<std::string> files;
|
||||
|
||||
bool split_batch_on_failure = true;
|
||||
bool fsync = false;
|
||||
bool dir_fsync = false;
|
||||
|
||||
Batch(
|
||||
StorageDistributedDirectoryMonitor & parent_,
|
||||
const std::map<UInt64, String> & file_index_to_path_)
|
||||
explicit Batch(StorageDistributedDirectoryMonitor & parent_)
|
||||
: parent(parent_)
|
||||
, file_index_to_path(file_index_to_path_)
|
||||
, split_batch_on_failure(parent.split_batch_on_failure)
|
||||
, fsync(parent.storage.getDistributedSettingsRef().fsync_after_insert)
|
||||
, dir_fsync(parent.dir_fsync)
|
||||
@ -732,7 +776,7 @@ struct StorageDistributedDirectoryMonitor::Batch
|
||||
|
||||
void send()
|
||||
{
|
||||
if (file_indices.empty())
|
||||
if (files.empty())
|
||||
return;
|
||||
|
||||
CurrentMetrics::Increment metric_increment{CurrentMetrics::DistributedSend};
|
||||
@ -775,7 +819,7 @@ struct StorageDistributedDirectoryMonitor::Batch
|
||||
}
|
||||
catch (const Exception & e)
|
||||
{
|
||||
if (split_batch_on_failure && file_indices.size() > 1 && isSplittableErrorCode(e.code(), e.isRemoteException()))
|
||||
if (split_batch_on_failure && files.size() > 1 && isSplittableErrorCode(e.code(), e.isRemoteException()))
|
||||
{
|
||||
tryLogCurrentException(parent.log, "Trying to split batch due to");
|
||||
sendSeparateFiles();
|
||||
@ -795,44 +839,28 @@ struct StorageDistributedDirectoryMonitor::Batch
|
||||
}
|
||||
else
|
||||
{
|
||||
std::vector<std::string> files;
|
||||
for (const auto && file_info : file_index_to_path | boost::adaptors::indexed())
|
||||
{
|
||||
if (file_info.index() > 8)
|
||||
{
|
||||
files.push_back("...");
|
||||
break;
|
||||
}
|
||||
|
||||
files.push_back(file_info.value().second);
|
||||
}
|
||||
e.addMessage(fmt::format("While sending batch, nums: {}, files: {}", file_index_to_path.size(), fmt::join(files, "\n")));
|
||||
|
||||
e.addMessage(fmt::format("While sending a batch of {} files, files: {}", files.size(), fmt::join(files, "\n")));
|
||||
throw;
|
||||
}
|
||||
}
|
||||
|
||||
if (!batch_broken)
|
||||
{
|
||||
LOG_TRACE(parent.log, "Sent a batch of {} files (took {} ms).", file_indices.size(), watch.elapsedMilliseconds());
|
||||
LOG_TRACE(parent.log, "Sent a batch of {} files (took {} ms).", files.size(), watch.elapsedMilliseconds());
|
||||
|
||||
auto dir_sync_guard = getDirectorySyncGuard(dir_fsync, parent.disk, parent.relative_path);
|
||||
for (UInt64 file_index : file_indices)
|
||||
parent.markAsSend(file_index_to_path.at(file_index));
|
||||
for (const auto & file : files)
|
||||
parent.markAsSend(file);
|
||||
}
|
||||
else if (!batch_marked_as_broken)
|
||||
{
|
||||
LOG_ERROR(parent.log, "Marking a batch of {} files as broken.", file_indices.size());
|
||||
LOG_ERROR(parent.log, "Marking a batch of {} files as broken, files: {}", files.size(), fmt::join(files, "\n"));
|
||||
|
||||
for (UInt64 file_idx : file_indices)
|
||||
{
|
||||
auto file_path = file_index_to_path.find(file_idx);
|
||||
if (file_path != file_index_to_path.end())
|
||||
parent.markAsBroken(file_path->second);
|
||||
}
|
||||
for (const auto & file : files)
|
||||
parent.markAsBroken(file);
|
||||
}
|
||||
|
||||
file_indices.clear();
|
||||
files.clear();
|
||||
total_rows = 0;
|
||||
total_bytes = 0;
|
||||
recovered = false;
|
||||
@ -842,8 +870,11 @@ struct StorageDistributedDirectoryMonitor::Batch
|
||||
|
||||
void writeText(WriteBuffer & out)
|
||||
{
|
||||
for (UInt64 file_idx : file_indices)
|
||||
out << file_idx << '\n';
|
||||
for (const auto & file : files)
|
||||
{
|
||||
UInt64 file_index = parse<UInt64>(fs::path(file).stem());
|
||||
out << file_index << '\n';
|
||||
}
|
||||
}
|
||||
|
||||
void readText(ReadBuffer & in)
|
||||
@ -852,8 +883,9 @@ struct StorageDistributedDirectoryMonitor::Batch
|
||||
{
|
||||
UInt64 idx;
|
||||
in >> idx >> "\n";
|
||||
file_indices.push_back(idx);
|
||||
files.push_back(fmt::format("{}/{}.bin", parent.path, idx));
|
||||
}
|
||||
|
||||
recovered = true;
|
||||
}
|
||||
|
||||
@ -865,14 +897,9 @@ private:
|
||||
|
||||
IConnectionPool::Entry connection;
|
||||
|
||||
for (UInt64 file_idx : file_indices)
|
||||
for (const auto & file : files)
|
||||
{
|
||||
auto file_path = file_index_to_path.find(file_idx);
|
||||
if (file_path == file_index_to_path.end())
|
||||
throw Exception(ErrorCodes::DISTRIBUTED_BROKEN_BATCH_INFO,
|
||||
"Failed to send batch: file with index {} is absent", file_idx);
|
||||
|
||||
ReadBufferFromFile in(file_path->second);
|
||||
ReadBufferFromFile in(file);
|
||||
const auto & distributed_header = readDistributedHeader(in, parent.log);
|
||||
|
||||
OpenTelemetry::TracingContextHolder thread_trace_context(__PRETTY_FUNCTION__,
|
||||
@ -886,7 +913,7 @@ private:
|
||||
compression_expected = connection->getCompression() == Protocol::Compression::Enable;
|
||||
|
||||
LOG_DEBUG(parent.log, "Sending a batch of {} files to {} ({} rows, {} bytes).",
|
||||
file_indices.size(),
|
||||
files.size(),
|
||||
connection->getDescription(),
|
||||
formatReadableQuantity(total_rows),
|
||||
formatReadableSizeWithBinarySuffix(total_bytes));
|
||||
@ -907,19 +934,11 @@ private:
|
||||
{
|
||||
size_t broken_files = 0;
|
||||
|
||||
for (UInt64 file_idx : file_indices)
|
||||
for (const auto & file : files)
|
||||
{
|
||||
auto file_path = file_index_to_path.find(file_idx);
|
||||
if (file_path == file_index_to_path.end())
|
||||
{
|
||||
LOG_ERROR(parent.log, "Failed to send one file from batch: file with index {} is absent", file_idx);
|
||||
++broken_files;
|
||||
continue;
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
ReadBufferFromFile in(file_path->second);
|
||||
ReadBufferFromFile in(file);
|
||||
const auto & distributed_header = readDistributedHeader(in, parent.log);
|
||||
|
||||
// this function is called in a separated thread, so we set up the trace context from the file
|
||||
@ -941,9 +960,11 @@ private:
|
||||
}
|
||||
catch (Exception & e)
|
||||
{
|
||||
e.addMessage(fmt::format("While sending {}", file_path->second));
|
||||
parent.maybeMarkAsBroken(file_path->second, e);
|
||||
++broken_files;
|
||||
if (isFileBrokenErrorCode(e.code(), e.isRemoteException()))
|
||||
{
|
||||
parent.markAsBroken(file);
|
||||
++broken_files;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -1023,13 +1044,18 @@ std::shared_ptr<ISource> StorageDistributedDirectoryMonitor::createSourceFromFil
|
||||
return std::make_shared<DirectoryMonitorSource>(file_name);
|
||||
}
|
||||
|
||||
bool StorageDistributedDirectoryMonitor::addAndSchedule(size_t file_size, size_t ms)
|
||||
bool StorageDistributedDirectoryMonitor::addAndSchedule(const std::string & file_path, size_t file_size, size_t ms)
|
||||
{
|
||||
if (quit)
|
||||
/// NOTE: It is better not to throw in this case, since the file is already
|
||||
/// on disk (see DistributedSink), and it will be processed next time.
|
||||
if (pending_files.isFinished())
|
||||
return false;
|
||||
|
||||
if (!pending_files.push(file_path))
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot add pending file");
|
||||
|
||||
{
|
||||
std::lock_guard status_lock(status_mutex);
|
||||
std::lock_guard lock(status_mutex);
|
||||
metric_pending_files.add();
|
||||
status.bytes_count += file_size;
|
||||
++status.files_count;
|
||||
@ -1045,33 +1071,25 @@ StorageDistributedDirectoryMonitor::Status StorageDistributedDirectoryMonitor::g
|
||||
return current_status;
|
||||
}
|
||||
|
||||
void StorageDistributedDirectoryMonitor::processFilesWithBatching(const std::map<UInt64, std::string> & files)
|
||||
void StorageDistributedDirectoryMonitor::processFilesWithBatching()
|
||||
{
|
||||
std::unordered_set<UInt64> file_indices_to_skip;
|
||||
|
||||
/// Possibly, we failed to send a batch on the previous iteration. Try to send exactly the same batch.
|
||||
if (fs::exists(current_batch_file_path))
|
||||
{
|
||||
/// Possibly, we failed to send a batch on the previous iteration. Try to send exactly the same batch.
|
||||
Batch batch(*this, files);
|
||||
Batch batch(*this);
|
||||
ReadBufferFromFile in{current_batch_file_path};
|
||||
batch.readText(in);
|
||||
file_indices_to_skip.insert(batch.file_indices.begin(), batch.file_indices.end());
|
||||
batch.send();
|
||||
|
||||
auto dir_sync_guard = getDirectorySyncGuard(dir_fsync, disk, relative_path);
|
||||
fs::remove(current_batch_file_path);
|
||||
}
|
||||
|
||||
std::unordered_map<BatchHeader, Batch, BatchHeader::Hash> header_to_batch;
|
||||
|
||||
for (const auto & file : files)
|
||||
std::string file_path;
|
||||
while (pending_files.tryPop(file_path))
|
||||
{
|
||||
if (quit)
|
||||
return;
|
||||
|
||||
UInt64 file_idx = file.first;
|
||||
const String & file_path = file.second;
|
||||
|
||||
if (file_indices_to_skip.contains(file_idx))
|
||||
continue;
|
||||
|
||||
size_t total_rows = 0;
|
||||
size_t total_bytes = 0;
|
||||
Block header;
|
||||
@ -1110,8 +1128,9 @@ void StorageDistributedDirectoryMonitor::processFilesWithBatching(const std::map
|
||||
}
|
||||
catch (const Exception & e)
|
||||
{
|
||||
if (maybeMarkAsBroken(file_path, e))
|
||||
if (isFileBrokenErrorCode(e.code(), e.isRemoteException()))
|
||||
{
|
||||
markAsBroken(file_path);
|
||||
tryLogCurrentException(log, "File is marked broken due to");
|
||||
continue;
|
||||
}
|
||||
@ -1125,9 +1144,9 @@ void StorageDistributedDirectoryMonitor::processFilesWithBatching(const std::map
|
||||
std::move(distributed_header.client_info),
|
||||
std::move(header)
|
||||
);
|
||||
Batch & batch = header_to_batch.try_emplace(batch_header, *this, files).first->second;
|
||||
Batch & batch = header_to_batch.try_emplace(batch_header, *this).first->second;
|
||||
|
||||
batch.file_indices.push_back(file_idx);
|
||||
batch.files.push_back(file_path);
|
||||
batch.total_rows += total_rows;
|
||||
batch.total_bytes += total_bytes;
|
||||
|
||||
@ -1155,16 +1174,10 @@ void StorageDistributedDirectoryMonitor::processFilesWithBatching(const std::map
|
||||
|
||||
void StorageDistributedDirectoryMonitor::markAsBroken(const std::string & file_path)
|
||||
{
|
||||
const auto last_path_separator_pos = file_path.rfind('/');
|
||||
const auto & base_path = file_path.substr(0, last_path_separator_pos + 1);
|
||||
const auto & file_name = file_path.substr(last_path_separator_pos + 1);
|
||||
const String & broken_path = fs::path(base_path) / "broken/";
|
||||
const String & broken_file_path = fs::path(broken_path) / file_name;
|
||||
|
||||
fs::create_directory(broken_path);
|
||||
const String & broken_file_path = fs::path(broken_path) / fs::path(file_path).filename();
|
||||
|
||||
auto dir_sync_guard = getDirectorySyncGuard(dir_fsync, disk, relative_path);
|
||||
auto broken_dir_sync_guard = getDirectorySyncGuard(dir_fsync, disk, fs::path(relative_path) / "broken/");
|
||||
auto broken_dir_sync_guard = getDirectorySyncGuard(dir_fsync, disk, broken_relative_path);
|
||||
|
||||
{
|
||||
std::lock_guard status_lock(status_mutex);
|
||||
@ -1198,21 +1211,9 @@ void StorageDistributedDirectoryMonitor::markAsSend(const std::string & file_pat
|
||||
fs::remove(file_path);
|
||||
}
|
||||
|
||||
bool StorageDistributedDirectoryMonitor::maybeMarkAsBroken(const std::string & file_path, const Exception & e)
|
||||
{
|
||||
/// Mark file as broken if necessary.
|
||||
if (isFileBrokenErrorCode(e.code(), e.isRemoteException()))
|
||||
{
|
||||
markAsBroken(file_path);
|
||||
return true;
|
||||
}
|
||||
else
|
||||
return false;
|
||||
}
|
||||
|
||||
std::string StorageDistributedDirectoryMonitor::getLoggerName() const
|
||||
{
|
||||
return storage.getStorageID().getFullTableName() + ".DirectoryMonitor";
|
||||
return storage.getStorageID().getFullTableName() + ".DirectoryMonitor." + disk->getName();
|
||||
}
|
||||
|
||||
void StorageDistributedDirectoryMonitor::updatePath(const std::string & new_relative_path)
|
||||
|
@ -1,6 +1,7 @@
|
||||
#pragma once
|
||||
|
||||
#include <Core/BackgroundSchedulePool.h>
|
||||
#include <Common/ConcurrentBoundedQueue.h>
|
||||
#include <Client/ConnectionPool.h>
|
||||
|
||||
#include <atomic>
|
||||
@ -38,7 +39,8 @@ public:
|
||||
const std::string & relative_path_,
|
||||
ConnectionPoolPtr pool_,
|
||||
ActionBlocker & monitor_blocker_,
|
||||
BackgroundSchedulePool & bg_pool);
|
||||
BackgroundSchedulePool & bg_pool,
|
||||
bool initialize_from_disk);
|
||||
|
||||
~StorageDistributedDirectoryMonitor();
|
||||
|
||||
@ -53,7 +55,7 @@ public:
|
||||
static std::shared_ptr<ISource> createSourceFromFile(const String & file_name);
|
||||
|
||||
/// For scheduling via DistributedSink.
|
||||
bool addAndSchedule(size_t file_size, size_t ms);
|
||||
bool addAndSchedule(const std::string & file_path, size_t file_size, size_t ms);
|
||||
|
||||
struct InternalStatus
|
||||
{
|
||||
@ -78,14 +80,15 @@ public:
|
||||
private:
|
||||
void run();
|
||||
|
||||
std::map<UInt64, std::string> getFiles();
|
||||
bool processFiles(const std::map<UInt64, std::string> & files);
|
||||
bool hasPendingFiles() const;
|
||||
|
||||
void initializeFilesFromDisk();
|
||||
void processFiles();
|
||||
void processFile(const std::string & file_path);
|
||||
void processFilesWithBatching(const std::map<UInt64, std::string> & files);
|
||||
void processFilesWithBatching();
|
||||
|
||||
void markAsBroken(const std::string & file_path);
|
||||
void markAsSend(const std::string & file_path);
|
||||
bool maybeMarkAsBroken(const std::string & file_path, const Exception & e);
|
||||
|
||||
std::string getLoggerName() const;
|
||||
|
||||
@ -95,25 +98,33 @@ private:
|
||||
DiskPtr disk;
|
||||
std::string relative_path;
|
||||
std::string path;
|
||||
std::string broken_relative_path;
|
||||
std::string broken_path;
|
||||
|
||||
const bool should_batch_inserts = false;
|
||||
const bool split_batch_on_failure = true;
|
||||
const bool dir_fsync = false;
|
||||
const size_t min_batched_block_size_rows = 0;
|
||||
const size_t min_batched_block_size_bytes = 0;
|
||||
String current_batch_file_path;
|
||||
|
||||
/// This is pending data (due to some error) for should_batch_inserts==true
|
||||
std::string current_batch_file_path;
|
||||
/// This is pending data (due to some error) for should_batch_inserts==false
|
||||
std::string current_batch_file;
|
||||
|
||||
struct BatchHeader;
|
||||
struct Batch;
|
||||
|
||||
std::mutex status_mutex;
|
||||
|
||||
InternalStatus status;
|
||||
|
||||
ConcurrentBoundedQueue<std::string> pending_files;
|
||||
|
||||
const std::chrono::milliseconds default_sleep_time;
|
||||
std::chrono::milliseconds sleep_time;
|
||||
const std::chrono::milliseconds max_sleep_time;
|
||||
std::chrono::time_point<std::chrono::system_clock> last_decrease_time {std::chrono::system_clock::now()};
|
||||
std::atomic<bool> quit {false};
|
||||
std::mutex mutex;
|
||||
Poco::Logger * log;
|
||||
ActionBlocker & monitor_blocker;
|
||||
|
@ -724,6 +724,9 @@ void DistributedSink::writeToShard(const Cluster::ShardInfo & shard_info, const
|
||||
return guard;
|
||||
};
|
||||
|
||||
std::vector<std::string> bin_files;
|
||||
bin_files.reserve(dir_names.size());
|
||||
|
||||
auto it = dir_names.begin();
|
||||
/// on first iteration write block to a temporary directory for subsequent
|
||||
/// hardlinking to ensure the inode is not freed until we're done
|
||||
@ -802,8 +805,8 @@ void DistributedSink::writeToShard(const Cluster::ShardInfo & shard_info, const
|
||||
}
|
||||
|
||||
// Create hardlink here to reuse increment number
|
||||
const std::string block_file_path(fs::path(path) / file_name);
|
||||
createHardLink(first_file_tmp_path, block_file_path);
|
||||
bin_files.push_back(fs::path(path) / file_name);
|
||||
createHardLink(first_file_tmp_path, bin_files.back());
|
||||
auto dir_sync_guard = make_directory_sync_guard(*it);
|
||||
}
|
||||
++it;
|
||||
@ -814,8 +817,8 @@ void DistributedSink::writeToShard(const Cluster::ShardInfo & shard_info, const
|
||||
const std::string path(fs::path(disk_path) / (data_path + *it));
|
||||
fs::create_directory(path);
|
||||
|
||||
const std::string block_file_path(fs::path(path) / (toString(storage.file_names_increment.get()) + ".bin"));
|
||||
createHardLink(first_file_tmp_path, block_file_path);
|
||||
bin_files.push_back(fs::path(path) / (toString(storage.file_names_increment.get()) + ".bin"));
|
||||
createHardLink(first_file_tmp_path, bin_files.back());
|
||||
auto dir_sync_guard = make_directory_sync_guard(*it);
|
||||
}
|
||||
|
||||
@ -826,10 +829,13 @@ void DistributedSink::writeToShard(const Cluster::ShardInfo & shard_info, const
|
||||
|
||||
/// Notify
|
||||
auto sleep_ms = context->getSettingsRef().distributed_directory_monitor_sleep_time_ms;
|
||||
for (const auto & dir_name : dir_names)
|
||||
for (size_t i = 0; i < dir_names.size(); ++i)
|
||||
{
|
||||
const auto & dir_name = dir_names[i];
|
||||
const auto & bin_file = bin_files[i];
|
||||
|
||||
auto & directory_monitor = storage.requireDirectoryMonitor(disk, dir_name, /* startup= */ false);
|
||||
directory_monitor.addAndSchedule(file_size, sleep_ms.totalMilliseconds());
|
||||
directory_monitor.addAndSchedule(bin_file, file_size, sleep_ms.totalMilliseconds());
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -651,7 +651,7 @@ MergeTreeData::MutableDataPartPtr Fetcher::fetchSelectedPart(
|
||||
}
|
||||
#endif
|
||||
|
||||
LOG_WARNING(log, fmt::runtime(e.message() + " Will retry fetching part without zero-copy."));
|
||||
LOG_WARNING(log, "Will retry fetching part without zero-copy: {}", e.message());
|
||||
|
||||
/// It's important to release session from HTTP pool. Otherwise it's possible to get deadlock
|
||||
/// on http pool.
|
||||
|
@ -109,10 +109,11 @@ ReplicatedMergeMutateTaskBase::PrepareResult MergeFromLogEntryTask::prepare()
|
||||
/// 2. We have some larger merged part which covers new_part_name (and therefore it covers source_part_name too)
|
||||
/// 3. We have two intersecting parts, both cover source_part_name. It's logical error.
|
||||
/// TODO Why 1 and 2 can happen? Do we need more assertions here or somewhere else?
|
||||
constexpr const char * message = "Part {} is covered by {} but should be merged into {}. This shouldn't happen often.";
|
||||
LOG_WARNING(log, fmt::runtime(message), source_part_name, source_part_or_covering->name, entry.new_part_name);
|
||||
constexpr auto fmt_string = "Part {} is covered by {} but should be merged into {}. This shouldn't happen often.";
|
||||
String message;
|
||||
LOG_WARNING(LogToStr(message, log), fmt_string, source_part_name, source_part_or_covering->name, entry.new_part_name);
|
||||
if (!source_part_or_covering->info.contains(MergeTreePartInfo::fromPartName(entry.new_part_name, storage.format_version)))
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, message, source_part_name, source_part_or_covering->name, entry.new_part_name);
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, message);
|
||||
|
||||
return PrepareResult{
|
||||
.prepared_successfully = false,
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -1052,6 +1052,8 @@ public:
|
||||
/// Returns an object that protects temporary directory from cleanup
|
||||
scope_guard getTemporaryPartDirectoryHolder(const String & part_dir_name) const;
|
||||
|
||||
void waitForOutdatedPartsToBeLoaded() const;
|
||||
|
||||
protected:
|
||||
friend class IMergeTreeDataPart;
|
||||
friend class MergeTreeDataMergerMutator;
|
||||
@ -1068,7 +1070,6 @@ protected:
|
||||
/// under lockForShare if rename is possible.
|
||||
String relative_data_path;
|
||||
|
||||
|
||||
/// Current column sizes in compressed and uncompressed form.
|
||||
ColumnSizeByName column_sizes;
|
||||
|
||||
@ -1330,6 +1331,88 @@ protected:
|
||||
void resetObjectColumnsFromActiveParts(const DataPartsLock & lock);
|
||||
void updateObjectColumns(const DataPartPtr & part, const DataPartsLock & lock);
|
||||
|
||||
/** A structure that explicitly represents a "merge tree" of parts
|
||||
* which is implicitly presented by min-max block numbers and levels of parts.
|
||||
* The children of node are parts which are covered by parent part.
|
||||
* This tree provides the order of loading of parts.
|
||||
*
|
||||
* We start to traverse tree from the top level and load parts
|
||||
* corresposponded to nodes. If part is loaded successfully then
|
||||
* we stop traversal at this node. Otherwise part is broken and we
|
||||
* traverse its children and try to load covered parts which will
|
||||
* replace broken covering part. Unloaded nodes represent outdated parts
|
||||
* nd they are pushed to background task and loaded asynchronoulsy.
|
||||
*/
|
||||
class PartLoadingTree
|
||||
{
|
||||
public:
|
||||
struct Node
|
||||
{
|
||||
Node(const MergeTreePartInfo & info_, const String & name_, const DiskPtr & disk_)
|
||||
: info(info_), name(name_), disk(disk_)
|
||||
{
|
||||
}
|
||||
|
||||
const MergeTreePartInfo info;
|
||||
const String name;
|
||||
const DiskPtr disk;
|
||||
|
||||
bool is_loaded = false;
|
||||
std::map<MergeTreePartInfo, std::shared_ptr<Node>> children;
|
||||
};
|
||||
|
||||
struct PartLoadingInfo
|
||||
{
|
||||
PartLoadingInfo(const MergeTreePartInfo & info_, const String & name_, const DiskPtr & disk_)
|
||||
: info(info_), name(name_), disk(disk_)
|
||||
{
|
||||
}
|
||||
|
||||
/// Store name explicitly because it cannot be easily
|
||||
/// retrieved from info in tables with old syntax.
|
||||
MergeTreePartInfo info;
|
||||
String name;
|
||||
DiskPtr disk;
|
||||
};
|
||||
|
||||
using NodePtr = std::shared_ptr<Node>;
|
||||
using PartLoadingInfos = std::vector<PartLoadingInfo>;
|
||||
|
||||
/// Builds a tree from the list of part infos.
|
||||
static PartLoadingTree build(PartLoadingInfos nodes);
|
||||
|
||||
/// Traverses a tree and call @func on each node.
|
||||
/// If recursive is false traverses only the top level.
|
||||
template <typename Func>
|
||||
void traverse(bool recursive, Func && func);
|
||||
|
||||
private:
|
||||
/// NOTE: Parts should be added in descending order of their levels
|
||||
/// because rearranging tree to the new root is not supported.
|
||||
void add(const MergeTreePartInfo & info, const String & name, const DiskPtr & disk);
|
||||
std::unordered_map<String, NodePtr> root_by_partition;
|
||||
};
|
||||
|
||||
using PartLoadingTreeNodes = std::vector<PartLoadingTree::NodePtr>;
|
||||
|
||||
struct LoadPartResult
|
||||
{
|
||||
bool is_broken = false;
|
||||
std::optional<size_t> size_of_part;
|
||||
MutableDataPartPtr part;
|
||||
};
|
||||
|
||||
mutable std::mutex outdated_data_parts_mutex;
|
||||
mutable std::condition_variable outdated_data_parts_cv;
|
||||
|
||||
BackgroundSchedulePool::TaskHolder outdated_data_parts_loading_task;
|
||||
PartLoadingTreeNodes outdated_unloaded_data_parts TSA_GUARDED_BY(outdated_data_parts_mutex);
|
||||
bool outdated_data_parts_loading_canceled TSA_GUARDED_BY(outdated_data_parts_mutex) = false;
|
||||
|
||||
void loadOutdatedDataParts(bool is_async);
|
||||
void startOutdatedDataPartsLoadingTask();
|
||||
void stopOutdatedDataPartsLoadingTask();
|
||||
|
||||
static void incrementInsertedPartsProfileEvent(MergeTreeDataPartType type);
|
||||
static void incrementMergedPartsProfileEvent(MergeTreeDataPartType type);
|
||||
|
||||
@ -1408,18 +1491,20 @@ private:
|
||||
/// Returns default settings for storage with possible changes from global config.
|
||||
virtual std::unique_ptr<MergeTreeSettings> getDefaultSettings() const = 0;
|
||||
|
||||
void loadDataPartsFromDisk(
|
||||
MutableDataPartsVector & broken_parts_to_detach,
|
||||
MutableDataPartsVector & duplicate_parts_to_remove,
|
||||
LoadPartResult loadDataPart(
|
||||
const MergeTreePartInfo & part_info,
|
||||
const String & part_name,
|
||||
const DiskPtr & part_disk_ptr,
|
||||
MergeTreeDataPartState to_state,
|
||||
std::mutex & part_loading_mutex);
|
||||
|
||||
std::vector<LoadPartResult> loadDataPartsFromDisk(
|
||||
ThreadPool & pool,
|
||||
size_t num_parts,
|
||||
std::queue<std::vector<std::pair<String, DiskPtr>>> & parts_queue,
|
||||
bool skip_sanity_checks,
|
||||
std::queue<PartLoadingTreeNodes> & parts_queue,
|
||||
const MergeTreeSettingsPtr & settings);
|
||||
|
||||
void loadDataPartsFromWAL(
|
||||
MutableDataPartsVector & duplicate_parts_to_remove,
|
||||
MutableDataPartsVector & parts_from_wal);
|
||||
void loadDataPartsFromWAL(MutableDataPartsVector & parts_from_wal);
|
||||
|
||||
/// Create zero-copy exclusive lock for part and disk. Useful for coordination of
|
||||
/// distributed operations which can lead to data duplication. Implemented only in ReplicatedMergeTree.
|
||||
@ -1430,7 +1515,7 @@ private:
|
||||
/// Otherwise, in non-parallel case will break and return.
|
||||
void clearPartsFromFilesystemImpl(const DataPartsVector & parts, NameSet * part_names_succeed);
|
||||
|
||||
static MutableDataPartPtr preparePartForRemoval(const DataPartPtr & part);
|
||||
static MutableDataPartPtr asMutableDeletingPart(const DataPartPtr & part);
|
||||
|
||||
mutable TemporaryParts temporary_parts;
|
||||
};
|
||||
|
@ -193,7 +193,8 @@ BlocksWithPartition MergeTreeDataWriter::splitBlockIntoParts(
|
||||
if (!metadata_snapshot->hasPartitionKey()) /// Table is not partitioned.
|
||||
{
|
||||
result.emplace_back(Block(block), Row{});
|
||||
result[0].offsets = chunk_offsets;
|
||||
if (chunk_offsets != nullptr)
|
||||
result[0].offsets = std::move(chunk_offsets->offsets);
|
||||
return result;
|
||||
}
|
||||
|
||||
@ -230,7 +231,7 @@ BlocksWithPartition MergeTreeDataWriter::splitBlockIntoParts(
|
||||
/// do not interfere with possible calculated primary key columns of the same name.
|
||||
result.emplace_back(Block(block), get_partition(0));
|
||||
if (!chunk_offsets_with_partition.empty())
|
||||
result[0].offsets = chunk_offsets_with_partition[0];
|
||||
result[0].offsets = std::move(chunk_offsets_with_partition[0]->offsets);
|
||||
return result;
|
||||
}
|
||||
|
||||
@ -245,7 +246,7 @@ BlocksWithPartition MergeTreeDataWriter::splitBlockIntoParts(
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < chunk_offsets_with_partition.size(); ++i)
|
||||
result[i].offsets = chunk_offsets_with_partition[i];
|
||||
result[i].offsets = std::move(chunk_offsets_with_partition[i]->offsets);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
@ -22,15 +22,15 @@ struct BlockWithPartition
|
||||
{
|
||||
Block block;
|
||||
Row partition;
|
||||
ChunkOffsetsPtr offsets;
|
||||
std::vector<size_t> offsets;
|
||||
|
||||
BlockWithPartition(Block && block_, Row && partition_)
|
||||
: block(block_), partition(std::move(partition_))
|
||||
{
|
||||
}
|
||||
|
||||
BlockWithPartition(Block && block_, Row && partition_, ChunkOffsetsPtr chunk_offsets_)
|
||||
: block(block_), partition(std::move(partition_)), offsets(chunk_offsets_)
|
||||
BlockWithPartition(Block && block_, Row && partition_, std::vector<size_t> && offsets_)
|
||||
: block(block_), partition(std::move(partition_)), offsets(std::move(offsets_))
|
||||
{
|
||||
}
|
||||
};
|
||||
|
@ -373,9 +373,9 @@ CheckResult ReplicatedMergeTreePartCheckThread::checkPart(const String & part_na
|
||||
throw;
|
||||
|
||||
tryLogCurrentException(log, __PRETTY_FUNCTION__);
|
||||
|
||||
String message = "Part " + part_name + " looks broken. Removing it and will try to fetch.";
|
||||
LOG_ERROR(log, fmt::runtime(message));
|
||||
constexpr auto fmt_string = "Part {} looks broken. Removing it and will try to fetch.";
|
||||
String message = fmt::format(fmt_string, part_name);
|
||||
LOG_ERROR(log, fmt_string, part_name);
|
||||
|
||||
/// Delete part locally.
|
||||
storage.outdateBrokenPartAndCloneToDetached(part, "broken");
|
||||
@ -392,9 +392,9 @@ CheckResult ReplicatedMergeTreePartCheckThread::checkPart(const String & part_na
|
||||
/// Probably, someone just wrote down the part, and has not yet added to ZK.
|
||||
/// Therefore, delete only if the part is old (not very reliable).
|
||||
ProfileEvents::increment(ProfileEvents::ReplicatedPartChecksFailed);
|
||||
|
||||
String message = "Unexpected part " + part_name + " in filesystem. Removing.";
|
||||
LOG_ERROR(log, fmt::runtime(message));
|
||||
constexpr auto fmt_string = "Unexpected part {} in filesystem. Removing.";
|
||||
String message = fmt::format(fmt_string, part_name);
|
||||
LOG_ERROR(log, fmt_string, part_name);
|
||||
storage.outdateBrokenPartAndCloneToDetached(part, "unexpected");
|
||||
return {part_name, false, message};
|
||||
}
|
||||
|
@ -1191,12 +1191,10 @@ bool ReplicatedMergeTreeQueue::isCoveredByFuturePartsImpl(const LogEntry & entry
|
||||
if (entry_for_same_part_it != future_parts.end())
|
||||
{
|
||||
const LogEntry & another_entry = *entry_for_same_part_it->second;
|
||||
out_reason = fmt::format(
|
||||
"Not executing log entry {} of type {} for part {} "
|
||||
"because another log entry {} of type {} for the same part ({}) is being processed.",
|
||||
entry.znode_name, entry.type, entry.new_part_name,
|
||||
another_entry.znode_name, another_entry.type, another_entry.new_part_name);
|
||||
LOG_INFO(log, fmt::runtime(out_reason));
|
||||
constexpr auto fmt_string = "Not executing log entry {} of type {} for part {} "
|
||||
"because another log entry {} of type {} for the same part ({}) is being processed.";
|
||||
LOG_INFO(LogToStr(out_reason, log), fmt_string, entry.znode_name, entry.type, entry.new_part_name,
|
||||
another_entry.znode_name, another_entry.type, another_entry.new_part_name);
|
||||
return true;
|
||||
|
||||
/** When the corresponding action is completed, then `isNotCoveredByFuturePart` next time, will succeed,
|
||||
@ -1238,11 +1236,9 @@ bool ReplicatedMergeTreeQueue::isCoveredByFuturePartsImpl(const LogEntry & entry
|
||||
{
|
||||
if (entry.znode_name < future_part_elem.second->znode_name)
|
||||
{
|
||||
out_reason = fmt::format(
|
||||
"Not executing log entry {} for part {} "
|
||||
"because it is not disjoint with part {} that is currently executing and another entry {} is newer.",
|
||||
entry.znode_name, new_part_name, future_part_elem.first, future_part_elem.second->znode_name);
|
||||
LOG_TRACE(log, fmt::runtime(out_reason));
|
||||
constexpr auto fmt_string = "Not executing log entry {} for part {} "
|
||||
"because it is not disjoint with part {} that is currently executing and another entry {} is newer.";
|
||||
LOG_TRACE(LogToStr(out_reason, log), fmt_string, entry.znode_name, new_part_name, future_part_elem.first, future_part_elem.second->znode_name);
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -1250,11 +1246,9 @@ bool ReplicatedMergeTreeQueue::isCoveredByFuturePartsImpl(const LogEntry & entry
|
||||
continue;
|
||||
}
|
||||
|
||||
out_reason = fmt::format(
|
||||
"Not executing log entry {} for part {} "
|
||||
"because it is not disjoint with part {} that is currently executing.",
|
||||
entry.znode_name, new_part_name, future_part_elem.first);
|
||||
LOG_TRACE(log, fmt::runtime(out_reason));
|
||||
constexpr auto fmt_string = "Not executing log entry {} for part {} "
|
||||
"because it is not disjoint with part {} that is currently executing.";
|
||||
LOG_TEST(LogToStr(out_reason, log), fmt_string, entry.znode_name, new_part_name, future_part_elem.first);
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -1337,11 +1331,9 @@ bool ReplicatedMergeTreeQueue::shouldExecuteLogEntry(
|
||||
{
|
||||
if (future_parts.contains(name))
|
||||
{
|
||||
out_postpone_reason = fmt::format(
|
||||
"Not executing log entry {} of type {} for part {} "
|
||||
"because part {} is not ready yet (log entry for that part is being processed).",
|
||||
entry.znode_name, entry.typeToString(), entry.new_part_name, name);
|
||||
LOG_TRACE(log, fmt::runtime(out_postpone_reason));
|
||||
constexpr auto fmt_string = "Not executing log entry {} of type {} for part {} "
|
||||
"because part {} is not ready yet (log entry for that part is being processed).";
|
||||
LOG_TRACE(LogToStr(out_postpone_reason, log), fmt_string, entry.znode_name, entry.typeToString(), entry.new_part_name, name);
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -1357,10 +1349,8 @@ bool ReplicatedMergeTreeQueue::shouldExecuteLogEntry(
|
||||
|
||||
if (merger_mutator.merges_blocker.isCancelled())
|
||||
{
|
||||
out_postpone_reason = fmt::format(
|
||||
"Not executing log entry {} of type {} for part {} because merges and mutations are cancelled now.",
|
||||
entry.znode_name, entry.typeToString(), entry.new_part_name);
|
||||
LOG_DEBUG(log, fmt::runtime(out_postpone_reason));
|
||||
constexpr auto fmt_string = "Not executing log entry {} of type {} for part {} because merges and mutations are cancelled now.";
|
||||
LOG_DEBUG(LogToStr(out_postpone_reason, log), fmt_string, entry.znode_name, entry.typeToString(), entry.new_part_name);
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -1375,8 +1365,8 @@ bool ReplicatedMergeTreeQueue::shouldExecuteLogEntry(
|
||||
|
||||
if (!disks.empty() && only_s3_storage && storage.checkZeroCopyLockExists(entry.new_part_name, disks[0]))
|
||||
{
|
||||
out_postpone_reason = "Not executing merge/mutation for the part " + entry.new_part_name
|
||||
+ ", waiting other replica to execute it and will fetch after.";
|
||||
constexpr auto fmt_string = "Not executing merge/mutation for the part {}, waiting other replica to execute it and will fetch after.";
|
||||
out_postpone_reason = fmt::format(fmt_string, entry.new_part_name);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
@ -1387,9 +1377,8 @@ bool ReplicatedMergeTreeQueue::shouldExecuteLogEntry(
|
||||
|
||||
if (replica_to_execute_merge && !merge_strategy_picker.isMergeFinishedByReplica(replica_to_execute_merge.value(), entry))
|
||||
{
|
||||
String reason = "Not executing merge for the part " + entry.new_part_name
|
||||
+ ", waiting for " + replica_to_execute_merge.value() + " to execute merge.";
|
||||
out_postpone_reason = reason;
|
||||
constexpr auto fmt_string = "Not executing merge for the part {}, waiting for {} to execute merge.";
|
||||
out_postpone_reason = fmt::format(fmt_string, entry.new_part_name, replica_to_execute_merge.value());
|
||||
return false;
|
||||
}
|
||||
}
|
||||
@ -1411,20 +1400,16 @@ bool ReplicatedMergeTreeQueue::shouldExecuteLogEntry(
|
||||
{
|
||||
if (merger_mutator.ttl_merges_blocker.isCancelled())
|
||||
{
|
||||
out_postpone_reason = fmt::format(
|
||||
"Not executing log entry {} for part {} because merges with TTL are cancelled now.",
|
||||
entry.znode_name, entry.new_part_name);
|
||||
LOG_DEBUG(log, fmt::runtime(out_postpone_reason));
|
||||
constexpr auto fmt_string = "Not executing log entry {} for part {} because merges with TTL are cancelled now.";
|
||||
LOG_DEBUG(LogToStr(out_postpone_reason, log), fmt_string, entry.znode_name, entry.new_part_name);
|
||||
return false;
|
||||
}
|
||||
size_t total_merges_with_ttl = data.getTotalMergesWithTTLInMergeList();
|
||||
if (total_merges_with_ttl >= data_settings->max_number_of_merges_with_ttl_in_pool)
|
||||
{
|
||||
out_postpone_reason = fmt::format(
|
||||
"Not executing log entry {} for part {} because {} merges with TTL already executing, maximum {}.",
|
||||
entry.znode_name, entry.new_part_name, total_merges_with_ttl,
|
||||
data_settings->max_number_of_merges_with_ttl_in_pool);
|
||||
LOG_DEBUG(log, fmt::runtime(out_postpone_reason));
|
||||
constexpr auto fmt_string = "Not executing log entry {} for part {} because {} merges with TTL already executing, maximum {}.";
|
||||
LOG_DEBUG(LogToStr(out_postpone_reason, log), fmt_string, entry.znode_name, entry.new_part_name, total_merges_with_ttl,
|
||||
data_settings->max_number_of_merges_with_ttl_in_pool);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
@ -1432,12 +1417,10 @@ bool ReplicatedMergeTreeQueue::shouldExecuteLogEntry(
|
||||
|
||||
if (!ignore_max_size && sum_parts_size_in_bytes > max_source_parts_size)
|
||||
{
|
||||
out_postpone_reason = fmt::format("Not executing log entry {} of type {} for part {}"
|
||||
" because source parts size ({}) is greater than the current maximum ({}).",
|
||||
entry.znode_name, entry.typeToString(), entry.new_part_name,
|
||||
ReadableSize(sum_parts_size_in_bytes), ReadableSize(max_source_parts_size));
|
||||
|
||||
LOG_DEBUG(log, fmt::runtime(out_postpone_reason));
|
||||
constexpr auto fmt_string = "Not executing log entry {} of type {} for part {}"
|
||||
" because source parts size ({}) is greater than the current maximum ({}).";
|
||||
LOG_DEBUG(LogToStr(out_postpone_reason, log), fmt_string, entry.znode_name, entry.typeToString(), entry.new_part_name,
|
||||
ReadableSize(sum_parts_size_in_bytes), ReadableSize(max_source_parts_size));
|
||||
|
||||
return false;
|
||||
}
|
||||
@ -1450,10 +1433,8 @@ bool ReplicatedMergeTreeQueue::shouldExecuteLogEntry(
|
||||
if (!alter_sequence.canExecuteMetaAlter(entry.alter_version, state_lock))
|
||||
{
|
||||
int head_alter = alter_sequence.getHeadAlterVersion(state_lock);
|
||||
out_postpone_reason = fmt::format(
|
||||
"Cannot execute alter metadata {} with version {} because another alter {} must be executed before",
|
||||
entry.znode_name, entry.alter_version, head_alter);
|
||||
LOG_TRACE(log, fmt::runtime(out_postpone_reason));
|
||||
constexpr auto fmt_string = "Cannot execute alter metadata {} with version {} because another alter {} must be executed before";
|
||||
LOG_TRACE(LogToStr(out_postpone_reason, log), fmt_string, entry.znode_name, entry.alter_version, head_alter);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
@ -1466,17 +1447,13 @@ bool ReplicatedMergeTreeQueue::shouldExecuteLogEntry(
|
||||
int head_alter = alter_sequence.getHeadAlterVersion(state_lock);
|
||||
if (head_alter == entry.alter_version)
|
||||
{
|
||||
out_postpone_reason = fmt::format(
|
||||
"Cannot execute alter data {} with version {} because metadata still not altered",
|
||||
entry.znode_name, entry.alter_version);
|
||||
LOG_TRACE(log, fmt::runtime(out_postpone_reason));
|
||||
constexpr auto fmt_string = "Cannot execute alter data {} with version {} because metadata still not altered";
|
||||
LOG_TRACE(LogToStr(out_postpone_reason, log), fmt_string, entry.znode_name, entry.alter_version);
|
||||
}
|
||||
else
|
||||
{
|
||||
out_postpone_reason = fmt::format(
|
||||
"Cannot execute alter data {} with version {} because another alter {} must be executed before",
|
||||
entry.znode_name, entry.alter_version, head_alter);
|
||||
LOG_TRACE(log, fmt::runtime(out_postpone_reason));
|
||||
constexpr auto fmt_string = "Cannot execute alter data {} with version {} because another alter {} must be executed before";
|
||||
LOG_TRACE(LogToStr(out_postpone_reason, log), fmt_string, entry.znode_name, entry.alter_version, head_alter);
|
||||
}
|
||||
|
||||
return false;
|
||||
@ -1498,14 +1475,12 @@ bool ReplicatedMergeTreeQueue::shouldExecuteLogEntry(
|
||||
{
|
||||
if (drop_range_info.isDisjoint(info))
|
||||
continue;
|
||||
out_postpone_reason = fmt::format(
|
||||
"Not executing log entry {} of type {} for part {} "
|
||||
"because another DROP_RANGE or REPLACE_RANGE entry with not disjoint range {} is currently executing.",
|
||||
entry.znode_name,
|
||||
entry.typeToString(),
|
||||
entry.new_part_name,
|
||||
info.getPartNameForLogs());
|
||||
LOG_TRACE(log, fmt::runtime(out_postpone_reason));
|
||||
constexpr auto fmt_string = "Not executing log entry {} of type {} for part {} "
|
||||
"because another DROP_RANGE or REPLACE_RANGE entry with not disjoint range {} is currently executing.";
|
||||
LOG_TRACE(LogToStr(out_postpone_reason, log), fmt_string, entry.znode_name,
|
||||
entry.typeToString(),
|
||||
entry.new_part_name,
|
||||
info.getPartNameForLogs());
|
||||
return false;
|
||||
}
|
||||
}
|
||||
@ -1531,11 +1506,10 @@ bool ReplicatedMergeTreeQueue::shouldExecuteLogEntry(
|
||||
auto new_part_info = MergeTreePartInfo::fromPartName(new_part_name, format_version);
|
||||
if (!new_part_info.isDisjoint(drop_part_info))
|
||||
{
|
||||
out_postpone_reason = fmt::format(
|
||||
"Not executing log entry {} of type {} for part {} "
|
||||
"because it probably depends on {} (REPLACE_RANGE).",
|
||||
entry.znode_name, entry.typeToString(), entry.new_part_name, replace_entry->znode_name);
|
||||
LOG_TRACE(log, fmt::runtime(out_postpone_reason));
|
||||
constexpr auto fmt_string = "Not executing log entry {} of type {} for part {} "
|
||||
"because it probably depends on {} (REPLACE_RANGE).";
|
||||
LOG_TRACE(LogToStr(out_postpone_reason, log), fmt_string, entry.znode_name, entry.typeToString(),
|
||||
entry.new_part_name, replace_entry->znode_name);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user