diff --git a/.github/workflows/backport_branches.yml b/.github/workflows/backport_branches.yml
index cbd3bd7bec4..c52a58eac8a 100644
--- a/.github/workflows/backport_branches.yml
+++ b/.github/workflows/backport_branches.yml
@@ -683,3 +683,4 @@ jobs:
run: |
cd "$GITHUB_WORKSPACE/tests/ci"
python3 finish_check.py
+ python3 merge_pr.py
diff --git a/.github/workflows/docs_check.yml b/.github/workflows/docs_check.yml
index a513eb9216d..d69020d810e 100644
--- a/.github/workflows/docs_check.yml
+++ b/.github/workflows/docs_check.yml
@@ -169,3 +169,4 @@ jobs:
run: |
cd "$GITHUB_WORKSPACE/tests/ci"
python3 finish_check.py
+ python3 merge_pr.py --check-approved
diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml
index aecf3799a5d..c677ec4bf5c 100644
--- a/.github/workflows/pull_request.yml
+++ b/.github/workflows/pull_request.yml
@@ -4388,3 +4388,4 @@ jobs:
run: |
cd "$GITHUB_WORKSPACE/tests/ci"
python3 finish_check.py
+ python3 merge_pr.py --check-approved
diff --git a/contrib/poco b/contrib/poco
index 79923422618..0ab9bba7cca 160000
--- a/contrib/poco
+++ b/contrib/poco
@@ -1 +1 @@
-Subproject commit 799234226187c0ae0b8c90f23465b25ed7956e56
+Subproject commit 0ab9bba7ccad3c8dacce04a35cb3b78218547ab4
diff --git a/docker/test/fuzzer/run-fuzzer.sh b/docker/test/fuzzer/run-fuzzer.sh
index 2582b599d58..3458cf905da 100755
--- a/docker/test/fuzzer/run-fuzzer.sh
+++ b/docker/test/fuzzer/run-fuzzer.sh
@@ -5,6 +5,7 @@ set -x
# core.COMM.PID-TID
sysctl kernel.core_pattern='core.%e.%p-%P'
+dmesg --clear ||:
set -e
set -u
@@ -368,6 +369,7 @@ if [ -f core.zst ]; then
fi
rg --text -F '' server.log > fatal.log ||:
+dmesg -T > dmesg.log ||:
zstd --threads=0 server.log
@@ -396,6 +398,7 @@ p.links a { padding: 5px; margin: 3px; background: #FFF; line-height: 2; white-s
fuzzer.log
server.log.zst
main.log
+ dmesg.log
${CORE_LINK}
diff --git a/docker/test/stateless/run.sh b/docker/test/stateless/run.sh
index 2165045e565..ee4b5d7c156 100755
--- a/docker/test/stateless/run.sh
+++ b/docker/test/stateless/run.sh
@@ -128,6 +128,7 @@ function run_tests()
if [[ "${HIGH_LEVEL_COVERAGE}" = "YES" ]]; then
ADDITIONAL_OPTIONS+=('--report-coverage')
+ ADDITIONAL_OPTIONS+=('--report-logs-stats')
fi
set +e
diff --git a/docker/test/stress/stress b/docker/test/stress/stress
index cf92b86c18f..4afd2745526 100755
--- a/docker/test/stress/stress
+++ b/docker/test/stress/stress
@@ -289,6 +289,7 @@ if __name__ == "__main__":
"--database=system",
"--hung-check",
"--stress",
+ "--report-logs-stats",
"00001_select_1",
]
)
diff --git a/docs/en/engines/database-engines/postgresql.md b/docs/en/engines/database-engines/postgresql.md
index ac19794c167..939995a61c5 100644
--- a/docs/en/engines/database-engines/postgresql.md
+++ b/docs/en/engines/database-engines/postgresql.md
@@ -136,3 +136,7 @@ DESCRIBE TABLE test_database.test_table;
│ data │ Nullable(String) │
└────────┴───────────────────┘
```
+
+## Related content
+
+- Blog: [ClickHouse and PostgreSQL - a match made in data heaven - part 1](https://clickhouse.com/blog/migrating-data-between-clickhouse-postgres)
diff --git a/docs/en/engines/table-engines/integrations/postgresql.md b/docs/en/engines/table-engines/integrations/postgresql.md
index 7f9659400b8..b73d28c8508 100644
--- a/docs/en/engines/table-engines/integrations/postgresql.md
+++ b/docs/en/engines/table-engines/integrations/postgresql.md
@@ -175,3 +175,6 @@ CREATE TABLE pg_table_schema_with_dots (a UInt32)
- [The `postgresql` table function](../../../sql-reference/table-functions/postgresql.md)
- [Using PostgreSQL as a dictionary source](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md#dicts-external_dicts_dict_sources-postgresql)
+
+## Related content
+- Blog: [ClickHouse and PostgreSQL - a match made in data heaven - part 1](https://clickhouse.com/blog/migrating-data-between-clickhouse-postgres)
diff --git a/docs/en/interfaces/formats.md b/docs/en/interfaces/formats.md
index e28c486afca..d384ed639eb 100644
--- a/docs/en/interfaces/formats.md
+++ b/docs/en/interfaces/formats.md
@@ -1203,12 +1203,14 @@ SELECT * FROM json_each_row_nested
- [input_format_json_read_bools_as_numbers](/docs/en/operations/settings/settings.md/#input_format_json_read_bools_as_numbers) - allow to parse bools as numbers in JSON input formats. Default value - `true`.
- [input_format_json_read_numbers_as_strings](/docs/en/operations/settings/settings.md/#input_format_json_read_numbers_as_strings) - allow to parse numbers as strings in JSON input formats. Default value - `false`.
- [input_format_json_read_objects_as_strings](/docs/en/operations/settings/settings.md/#input_format_json_read_objects_as_strings) - allow to parse JSON objects as strings in JSON input formats. Default value - `false`.
+- [input_format_json_named_tuples_as_objects](/docs/en/operations/settings/settings.md/#input_format_json_named_tuples_as_objects) - parse named tuple columns as JSON objects. Default value - `true`.
+- [input_format_json_defaults_for_missing_elements_in_named_tuple](/docs/en/operations/settings/settings.md/#input_format_json_defaults_for_missing_elements_in_named_tuple) - insert default values for missing elements in JSON object while parsing named tuple. Default value - `true`.
- [output_format_json_quote_64bit_integers](/docs/en/operations/settings/settings.md/#output_format_json_quote_64bit_integers) - controls quoting of 64-bit integers in JSON output format. Default value - `true`.
- [output_format_json_quote_64bit_floats](/docs/en/operations/settings/settings.md/#output_format_json_quote_64bit_floats) - controls quoting of 64-bit floats in JSON output format. Default value - `false`.
- [output_format_json_quote_denormals](/docs/en/operations/settings/settings.md/#output_format_json_quote_denormals) - enables '+nan', '-nan', '+inf', '-inf' outputs in JSON output format. Default value - `false`.
- [output_format_json_quote_decimals](/docs/en/operations/settings/settings.md/#output_format_json_quote_decimals) - controls quoting of decimals in JSON output format. Default value - `false`.
- [output_format_json_escape_forward_slashes](/docs/en/operations/settings/settings.md/#output_format_json_escape_forward_slashes) - controls escaping forward slashes for string outputs in JSON output format. Default value - `true`.
-- [output_format_json_named_tuples_as_objects](/docs/en/operations/settings/settings.md/#output_format_json_named_tuples_as_objects) - serialize named tuple columns as JSON objects. Default value - `false`.
+- [output_format_json_named_tuples_as_objects](/docs/en/operations/settings/settings.md/#output_format_json_named_tuples_as_objects) - serialize named tuple columns as JSON objects. Default value - `true`.
- [output_format_json_array_of_rows](/docs/en/operations/settings/settings.md/#output_format_json_array_of_rows) - output a JSON array of all rows in JSONEachRow(Compact) format. Default value - `false`.
- [output_format_json_validate_utf8](/docs/en/operations/settings/settings.md/#output_format_json_validate_utf8) - enables validation of UTF-8 sequences in JSON output formats (note that it doesn't impact formats JSON/JSONCompact/JSONColumnsWithMetadata, they always validate utf8). Default value - `false`.
diff --git a/docs/en/operations/settings/query-complexity.md b/docs/en/operations/settings/query-complexity.md
index 376b7480358..7a6b2340d29 100644
--- a/docs/en/operations/settings/query-complexity.md
+++ b/docs/en/operations/settings/query-complexity.md
@@ -266,7 +266,7 @@ Default value: 0.
Limits the size in bytes of the hash table used when joining tables.
-This settings applies to [SELECT … JOIN](../../sql-reference/statements/select/join.md#select-join) operations and [Join table engine](../../engines/table-engines/special/join.md).
+This setting applies to [SELECT … JOIN](../../sql-reference/statements/select/join.md#select-join) operations and [Join table engine](../../engines/table-engines/special/join.md).
If the query contains joins, ClickHouse checks this setting for every intermediate result.
diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md
index 4ffe2bbc7c4..9def33debbd 100644
--- a/docs/en/operations/settings/settings.md
+++ b/docs/en/operations/settings/settings.md
@@ -402,40 +402,62 @@ Default value: `ALL`.
## join_algorithm {#settings-join_algorithm}
-Specifies [JOIN](../../sql-reference/statements/select/join.md) algorithm.
+Specifies which [JOIN](../../sql-reference/statements/select/join.md) algorithm is used.
Several algorithms can be specified, and an available one would be chosen for a particular query based on kind/strictness and table engine.
Possible values:
-- `default` — `hash` or `direct`, if possible (same as `direct,hash`)
+### `default`
-- `hash` — [Hash join algorithm](https://en.wikipedia.org/wiki/Hash_join) is used. The most generic implementation that supports all combinations of kind and strictness and multiple join keys that are combined with `OR` in the `JOIN ON` section.
+This is the equivalent of `hash` or `direct`, if possible (same as `direct,hash`)
-- `parallel_hash` - a variation of `hash` join that splits the data into buckets and builds several hashtables instead of one concurrently to speed up this process.
+### `grace_hash`
+
+[Grace hash join](https://en.wikipedia.org/wiki/Hash_join#Grace_hash_join) is used. Grace hash provides an algorithm option that provides performant complex joins while limiting memory use.
+
+The first phase of a grace join reads the right table and splits it into N buckets depending on the hash value of key columns (initially, N is `grace_hash_join_initial_buckets`). This is done in a way to ensure that each bucket can be processed independently. Rows from the first bucket are added to an in-memory hash table while the others are saved to disk. If the hash table grows beyond the memory limit (e.g., as set by [`max_bytes_in_join`](/docs/en/operations/settings/query-complexity.md/#settings-max_bytes_in_join)), the number of buckets is increased and the assigned bucket for each row. Any rows which don’t belong to the current bucket are flushed and reassigned.
+
+### `hash`
+
+[Hash join algorithm](https://en.wikipedia.org/wiki/Hash_join) is used. The most generic implementation that supports all combinations of kind and strictness and multiple join keys that are combined with `OR` in the `JOIN ON` section.
+
+### `parallel_hash`
+
+A variation of `hash` join that splits the data into buckets and builds several hashtables instead of one concurrently to speed up this process.
When using the `hash` algorithm, the right part of `JOIN` is uploaded into RAM.
-- `partial_merge` — a variation of the [sort-merge algorithm](https://en.wikipedia.org/wiki/Sort-merge_join), where only the right table is fully sorted.
+### `partial_merge`
+
+A variation of the [sort-merge algorithm](https://en.wikipedia.org/wiki/Sort-merge_join), where only the right table is fully sorted.
The `RIGHT JOIN` and `FULL JOIN` are supported only with `ALL` strictness (`SEMI`, `ANTI`, `ANY`, and `ASOF` are not supported).
-When using `partial_merge` algorithm, ClickHouse sorts the data and dumps it to the disk. The `partial_merge` algorithm in ClickHouse differs slightly from the classic realization. First, ClickHouse sorts the right table by joining keys in blocks and creates a min-max index for sorted blocks. Then it sorts parts of the left table by `join key` and joins them over the right table. The min-max index is also used to skip unneeded right table blocks.
+When using the `partial_merge` algorithm, ClickHouse sorts the data and dumps it to the disk. The `partial_merge` algorithm in ClickHouse differs slightly from the classic realization. First, ClickHouse sorts the right table by joining keys in blocks and creates a min-max index for sorted blocks. Then it sorts parts of the left table by the `join key` and joins them over the right table. The min-max index is also used to skip unneeded right table blocks.
-- `direct` - can be applied when the right storage supports key-value requests.
+### `direct`
+
+This algorithm can be applied when the storage for the right table supports key-value requests.
The `direct` algorithm performs a lookup in the right table using rows from the left table as keys. It's supported only by special storage such as [Dictionary](../../engines/table-engines/special/dictionary.md/#dictionary) or [EmbeddedRocksDB](../../engines/table-engines/integrations/embedded-rocksdb.md) and only the `LEFT` and `INNER` JOINs.
-- `auto` — try `hash` join and switch on the fly to another algorithm if the memory limit is violated.
+### `auto`
-- `full_sorting_merge` — [Sort-merge algorithm](https://en.wikipedia.org/wiki/Sort-merge_join) with full sorting joined tables before joining.
+When set to `auto`, `hash` join is tried first, and the algorithm is switched on the fly to another algorithm if the memory limit is violated.
-- `prefer_partial_merge` — ClickHouse always tries to use `partial_merge` join if possible, otherwise, it uses `hash`. *Deprecated*, same as `partial_merge,hash`.
+### `full_sorting_merge`
+
+[Sort-merge algorithm](https://en.wikipedia.org/wiki/Sort-merge_join) with full sorting joined tables before joining.
+
+### `prefer_partial_merge`
+
+ClickHouse always tries to use `partial_merge` join if possible, otherwise, it uses `hash`. *Deprecated*, same as `partial_merge,hash`.
## join_any_take_last_row {#settings-join_any_take_last_row}
-Changes behaviour of join operations with `ANY` strictness.
+Changes the behaviour of join operations with `ANY` strictness.
:::warning
This setting applies only for `JOIN` operations with [Join](../../engines/table-engines/special/join.md) engine tables.
@@ -498,7 +520,7 @@ Default value: 65536.
Limits the number of files allowed for parallel sorting in MergeJoin operations when they are executed on disk.
-The bigger the value of the setting, the more RAM used and the less disk I/O needed.
+The bigger the value of the setting, the more RAM is used and the less disk I/O is needed.
Possible values:
@@ -514,12 +536,12 @@ Enables legacy ClickHouse server behaviour in `ANY INNER|LEFT JOIN` operations.
Use this setting only for backward compatibility if your use cases depend on legacy `JOIN` behaviour.
:::
-When the legacy behaviour enabled:
+When the legacy behaviour is enabled:
- Results of `t1 ANY LEFT JOIN t2` and `t2 ANY RIGHT JOIN t1` operations are not equal because ClickHouse uses the logic with many-to-one left-to-right table keys mapping.
- Results of `ANY INNER JOIN` operations contain all rows from the left table like the `SEMI LEFT JOIN` operations do.
-When the legacy behaviour disabled:
+When the legacy behaviour is disabled:
- Results of `t1 ANY LEFT JOIN t2` and `t2 ANY RIGHT JOIN t1` operations are equal because ClickHouse uses the logic which provides one-to-many keys mapping in `ANY RIGHT JOIN` operations.
- Results of `ANY INNER JOIN` operations contain one row per key from both the left and right tables.
@@ -572,7 +594,7 @@ Default value: `163840`.
## merge_tree_min_rows_for_concurrent_read_for_remote_filesystem {#merge-tree-min-rows-for-concurrent-read-for-remote-filesystem}
-The minimum number of lines to read from one file before [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) engine can parallelize reading, when reading from remote filesystem.
+The minimum number of lines to read from one file before the [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) engine can parallelize reading, when reading from remote filesystem.
Possible values:
@@ -706,7 +728,7 @@ log_queries=1
## log_queries_min_query_duration_ms {#settings-log-queries-min-query-duration-ms}
-If enabled (non-zero), queries faster then the value of this setting will not be logged (you can think about this as a `long_query_time` for [MySQL Slow Query Log](https://dev.mysql.com/doc/refman/5.7/en/slow-query-log.html)), and this basically means that you will not find them in the following tables:
+If enabled (non-zero), queries faster than the value of this setting will not be logged (you can think about this as a `long_query_time` for [MySQL Slow Query Log](https://dev.mysql.com/doc/refman/5.7/en/slow-query-log.html)), and this basically means that you will not find them in the following tables:
- `system.query_log`
- `system.query_thread_log`
@@ -741,7 +763,7 @@ log_queries_min_type='EXCEPTION_WHILE_PROCESSING'
Setting up query threads logging.
-Query threads log into [system.query_thread_log](../../operations/system-tables/query_thread_log.md) table. This setting have effect only when [log_queries](#settings-log-queries) is true. Queries’ threads run by ClickHouse with this setup are logged according to the rules in the [query_thread_log](../../operations/server-configuration-parameters/settings.md/#server_configuration_parameters-query_thread_log) server configuration parameter.
+Query threads log into the [system.query_thread_log](../../operations/system-tables/query_thread_log.md) table. This setting has effect only when [log_queries](#settings-log-queries) is true. Queries’ threads run by ClickHouse with this setup are logged according to the rules in the [query_thread_log](../../operations/server-configuration-parameters/settings.md/#server_configuration_parameters-query_thread_log) server configuration parameter.
Possible values:
@@ -760,7 +782,7 @@ log_query_threads=1
Setting up query views logging.
-When a query run by ClickHouse with this setup on has associated views (materialized or live views), they are logged in the [query_views_log](../../operations/server-configuration-parameters/settings.md/#server_configuration_parameters-query_views_log) server configuration parameter.
+When a query run by ClickHouse with this setting enabled has associated views (materialized or live views), they are logged in the [query_views_log](../../operations/server-configuration-parameters/settings.md/#server_configuration_parameters-query_views_log) server configuration parameter.
Example:
@@ -787,7 +809,7 @@ It can be used to improve the readability of server logs. Additionally, it helps
Possible values:
-- Any string no longer than [max_query_size](#settings-max_query_size). If length is exceeded, the server throws an exception.
+- Any string no longer than [max_query_size](#settings-max_query_size). If the max_query_size is exceeded, the server throws an exception.
Default value: empty string.
@@ -821,11 +843,11 @@ The setting also does not have a purpose when using INSERT SELECT, since data is
Default value: 1,048,576.
-The default is slightly more than `max_block_size`. The reason for this is because certain table engines (`*MergeTree`) form a data part on the disk for each inserted block, which is a fairly large entity. Similarly, `*MergeTree` tables sort data during insertion, and a large enough block size allow sorting more data in RAM.
+The default is slightly more than `max_block_size`. The reason for this is that certain table engines (`*MergeTree`) form a data part on the disk for each inserted block, which is a fairly large entity. Similarly, `*MergeTree` tables sort data during insertion, and a large enough block size allow sorting more data in RAM.
## min_insert_block_size_rows {#min-insert-block-size-rows}
-Sets the minimum number of rows in the block which can be inserted into a table by an `INSERT` query. Smaller-sized blocks are squashed into bigger ones.
+Sets the minimum number of rows in the block that can be inserted into a table by an `INSERT` query. Smaller-sized blocks are squashed into bigger ones.
Possible values:
@@ -891,7 +913,7 @@ Higher values will lead to higher memory usage.
## max_compress_block_size {#max-compress-block-size}
-The maximum size of blocks of uncompressed data before compressing for writing to a table. By default, 1,048,576 (1 MiB). Specifying smaller block size generally leads to slightly reduced compression ratio, the compression and decompression speed increases slightly due to cache locality, and memory consumption is reduced.
+The maximum size of blocks of uncompressed data before compressing for writing to a table. By default, 1,048,576 (1 MiB). Specifying a smaller block size generally leads to slightly reduced compression ratio, the compression and decompression speed increases slightly due to cache locality, and memory consumption is reduced.
:::warning
This is an expert-level setting, and you shouldn't change it if you're just getting started with ClickHouse.
@@ -935,7 +957,7 @@ Default value: 1000.
## interactive_delay {#interactive-delay}
-The interval in microseconds for checking whether request execution has been cancelled and sending the progress.
+The interval in microseconds for checking whether request execution has been canceled and sending the progress.
Default value: 100,000 (checks for cancelling and sends the progress ten times per second).
@@ -4122,7 +4144,20 @@ Enabled by default.
Serialize named tuple columns as JSON objects.
-Disabled by default.
+Enabled by default.
+
+### input_format_json_named_tuples_as_objects {#input_format_json_named_tuples_as_objects}
+
+Parse named tuple columns as JSON objects.
+
+Enabled by default.
+
+### input_format_json_defaults_for_missing_elements_in_named_tuple {#input_format_json_defaults_for_missing_elements_in_named_tuple}
+
+Insert default values for missing elements in JSON object while parsing named tuple.
+This setting works only when setting `input_format_json_named_tuples_as_objects` is enabled.
+
+Enabled by default.
### output_format_json_array_of_rows {#output_format_json_array_of_rows}
diff --git a/docs/en/operations/utilities/clickhouse-local.md b/docs/en/operations/utilities/clickhouse-local.md
index 963ddfe7a02..a4fa5579638 100644
--- a/docs/en/operations/utilities/clickhouse-local.md
+++ b/docs/en/operations/utilities/clickhouse-local.md
@@ -120,5 +120,6 @@ Read 186 rows, 4.15 KiB in 0.035 sec., 5302 rows/sec., 118.34 KiB/sec.
## Related Content
+- [Extracting, converting, and querying data in local files using clickhouse-local](https://clickhouse.com/blog/extracting-converting-querying-local-files-with-sql-clickhouse-local)
- [Getting Data Into ClickHouse - Part 1](https://clickhouse.com/blog/getting-data-into-clickhouse-part-1)
- [Exploring massive, real-world data sets: 100+ Years of Weather Records in ClickHouse](https://clickhouse.com/blog/real-world-data-noaa-climate-data)
diff --git a/docs/en/sql-reference/aggregate-functions/reference/index.md b/docs/en/sql-reference/aggregate-functions/reference/index.md
index 575141766dd..bd8e72e0fec 100644
--- a/docs/en/sql-reference/aggregate-functions/reference/index.md
+++ b/docs/en/sql-reference/aggregate-functions/reference/index.md
@@ -57,6 +57,7 @@ ClickHouse-specific aggregate functions:
- [uniqCombined](../../../sql-reference/aggregate-functions/reference/uniqcombined.md)
- [uniqCombined64](../../../sql-reference/aggregate-functions/reference/uniqcombined64.md)
- [uniqHLL12](../../../sql-reference/aggregate-functions/reference/uniqhll12.md)
+- [uniqTheta](../../../sql-reference/aggregate-functions/reference/uniqthetasketch.md)
- [quantile](../../../sql-reference/aggregate-functions/reference/quantile.md)
- [quantiles](../../../sql-reference/aggregate-functions/reference/quantiles.md)
- [quantileExact](../../../sql-reference/aggregate-functions/reference/quantileexact.md)
@@ -77,4 +78,6 @@ ClickHouse-specific aggregate functions:
- [contingency](./contingency.md)
- [cramersV](./cramersv.md)
- [cramersVBiasCorrected](./cramersvbiascorrected.md)
-- [theilsU](./theilsu.md)
\ No newline at end of file
+- [theilsU](./theilsu.md)
+- [maxIntersections](./maxintersections.md)
+- [maxIntersectionsPosition](./maxintersectionsposition.md)
diff --git a/docs/en/sql-reference/aggregate-functions/reference/maxintersections.md b/docs/en/sql-reference/aggregate-functions/reference/maxintersections.md
new file mode 100644
index 00000000000..db99b900a3e
--- /dev/null
+++ b/docs/en/sql-reference/aggregate-functions/reference/maxintersections.md
@@ -0,0 +1,64 @@
+---
+slug: /en/sql-reference/aggregate-functions/reference/maxintersections
+sidebar_position: 360
+title: maxIntersections
+---
+
+# maxIntersections
+
+Aggregate function that calculates the maximum number of times that a group of intervals intersects each other (if all the intervals intersect at least once).
+
+The syntax is:
+
+```sql
+maxIntersections(start_column, end_column)
+```
+
+**Arguments**
+
+- `start_column` – the numeric column that represents the start of each interval. If `start_column` is `NULL` or 0 then the interval will be skipped.
+
+- `end_column` - the numeric column that represents the end of each interval. If `end_column` is `NULL` or 0 then the interval will be skipped.
+
+**Returned value**
+
+Returns the maximum number of intersected intervals.
+
+**Example**
+
+```sql
+CREATE TABLE my_events (
+ start UInt32,
+ end UInt32
+)
+Engine = MergeTree
+ORDER BY tuple();
+
+INSERT INTO my_events VALUES
+ (1, 3),
+ (1, 6),
+ (2, 5),
+ (3, 7);
+```
+
+The intervals look like the following:
+
+```response
+1 - 3
+1 - - - - 6
+ 2 - - 5
+ 3 - - - 7
+```
+
+Three of these intervals have a common value (the value is `4`, but the value that is common is not important, we are measuring the count of the intersections). The intervals `(1,3)` and `(3,7)` share an endpoint but are not considered intersecting by the `maxIntersections` function.
+
+```sql
+SELECT maxIntersections(start, end) FROM my_events;
+```
+
+Response:
+```response
+3
+```
+
+If you have multiple occurrences of the maximum interval, you can use the [`maxIntersectionsPosition` function](./maxintersectionsposition.md) to locate the number and location of those occurrences.
\ No newline at end of file
diff --git a/docs/en/sql-reference/aggregate-functions/reference/maxintersectionsposition.md b/docs/en/sql-reference/aggregate-functions/reference/maxintersectionsposition.md
new file mode 100644
index 00000000000..7dd63f09316
--- /dev/null
+++ b/docs/en/sql-reference/aggregate-functions/reference/maxintersectionsposition.md
@@ -0,0 +1,64 @@
+---
+slug: /en/sql-reference/aggregate-functions/reference/maxintersectionsposition
+sidebar_position: 361
+title: maxIntersectionsPosition
+---
+
+# maxIntersectionsPosition
+
+Aggregate function that calculates the positions of the occurrences of the [`maxIntersections` function](./maxintersections.md).
+
+The syntax is:
+
+```sql
+maxIntersectionsPosition(start_column, end_column)
+```
+
+**Arguments**
+
+- `start_column` – the numeric column that represents the start of each interval. If `start_column` is `NULL` or 0 then the interval will be skipped.
+
+- `end_column` - the numeric column that represents the end of each interval. If `end_column` is `NULL` or 0 then the interval will be skipped.
+
+**Returned value**
+
+Returns the start positions of the maximum number of intersected intervals.
+
+**Example**
+
+```sql
+CREATE TABLE my_events (
+ start UInt32,
+ end UInt32
+)
+Engine = MergeTree
+ORDER BY tuple();
+
+INSERT INTO my_events VALUES
+ (1, 3),
+ (1, 6),
+ (2, 5),
+ (3, 7);
+```
+
+The intervals look like the following:
+
+```response
+1 - 3
+1 - - - - 6
+ 2 - - 5
+ 3 - - - 7
+```
+
+Notice that three of these intervals have the value 4 in common, and that starts with the 2nd interval:
+
+```sql
+SELECT maxIntersectionsPosition(start, end) FROM my_events;
+```
+
+Response:
+```response
+2
+```
+
+In other words, the `(1,6)` row is the start of the 3 intervals that intersect, and 3 is the maximum number of intervals that intersect.
\ No newline at end of file
diff --git a/docs/en/sql-reference/aggregate-functions/reference/quantileinterpolatedweighted.md b/docs/en/sql-reference/aggregate-functions/reference/quantileinterpolatedweighted.md
new file mode 100644
index 00000000000..07fcd187217
--- /dev/null
+++ b/docs/en/sql-reference/aggregate-functions/reference/quantileinterpolatedweighted.md
@@ -0,0 +1,68 @@
+---
+slug: /en/sql-reference/aggregate-functions/reference/quantileInterpolatedWeighted
+sidebar_position: 203
+---
+
+# quantileInterpolatedWeighted
+
+Computes [quantile](https://en.wikipedia.org/wiki/Quantile) of a numeric data sequence using linear interpolation, taking into account the weight of each element.
+
+To get the interpolated value, all the passed values are combined into an array, which are then sorted by their corresponding weights. Quantile interpolation is then performed using the [weighted percentile method](https://en.wikipedia.org/wiki/Percentile#The_weighted_percentile_method) by building a cumulative distribution based on weights and then a linear interpolation is performed using the weights and the values to compute the quantiles.
+
+When using multiple `quantile*` functions with different levels in a query, the internal states are not combined (that is, the query works less efficiently than it could). In this case, use the [quantiles](../../../sql-reference/aggregate-functions/reference/quantiles.md#quantiles) function.
+
+**Syntax**
+
+``` sql
+quantileInterpolatedWeighted(level)(expr, weight)
+```
+
+Alias: `medianInterpolatedWeighted`.
+
+**Arguments**
+
+- `level` — Level of quantile. Optional parameter. Constant floating-point number from 0 to 1. We recommend using a `level` value in the range of `[0.01, 0.99]`. Default value: 0.5. At `level=0.5` the function calculates [median](https://en.wikipedia.org/wiki/Median).
+- `expr` — Expression over the column values resulting in numeric [data types](../../../sql-reference/data-types/index.md#data_types), [Date](../../../sql-reference/data-types/date.md) or [DateTime](../../../sql-reference/data-types/datetime.md).
+- `weight` — Column with weights of sequence members. Weight is a number of value occurrences.
+
+**Returned value**
+
+- Quantile of the specified level.
+
+Type:
+
+- [Float64](../../../sql-reference/data-types/float.md) for numeric data type input.
+- [Date](../../../sql-reference/data-types/date.md) if input values have the `Date` type.
+- [DateTime](../../../sql-reference/data-types/datetime.md) if input values have the `DateTime` type.
+
+**Example**
+
+Input table:
+
+``` text
+┌─n─┬─val─┐
+│ 0 │ 3 │
+│ 1 │ 2 │
+│ 2 │ 1 │
+│ 5 │ 4 │
+└───┴─────┘
+```
+
+Query:
+
+``` sql
+SELECT quantileInterpolatedWeighted(n, val) FROM t
+```
+
+Result:
+
+``` text
+┌─quantileInterpolatedWeighted(n, val)─┐
+│ 1 │
+└──────────────────────────────────────┘
+```
+
+**See Also**
+
+- [median](../../../sql-reference/aggregate-functions/reference/median.md#median)
+- [quantiles](../../../sql-reference/aggregate-functions/reference/quantiles.md#quantiles)
diff --git a/docs/en/sql-reference/aggregate-functions/reference/quantiles.md b/docs/en/sql-reference/aggregate-functions/reference/quantiles.md
index 5c9120fb8f4..57151915336 100644
--- a/docs/en/sql-reference/aggregate-functions/reference/quantiles.md
+++ b/docs/en/sql-reference/aggregate-functions/reference/quantiles.md
@@ -9,7 +9,7 @@ sidebar_position: 201
Syntax: `quantiles(level1, level2, …)(x)`
-All the quantile functions also have corresponding quantiles functions: `quantiles`, `quantilesDeterministic`, `quantilesTiming`, `quantilesTimingWeighted`, `quantilesExact`, `quantilesExactWeighted`, `quantilesTDigest`, `quantilesBFloat16`. These functions calculate all the quantiles of the listed levels in one pass, and return an array of the resulting values.
+All the quantile functions also have corresponding quantiles functions: `quantiles`, `quantilesDeterministic`, `quantilesTiming`, `quantilesTimingWeighted`, `quantilesExact`, `quantilesExactWeighted`, `quantileInterpolatedWeighted`, `quantilesTDigest`, `quantilesBFloat16`. These functions calculate all the quantiles of the listed levels in one pass, and return an array of the resulting values.
## quantilesExactExclusive
diff --git a/docs/en/sql-reference/data-types/json.md b/docs/en/sql-reference/data-types/json.md
index ab1596b1760..d9099ba5ad3 100644
--- a/docs/en/sql-reference/data-types/json.md
+++ b/docs/en/sql-reference/data-types/json.md
@@ -6,6 +6,10 @@ sidebar_label: JSON
# JSON
+:::warning
+This feature is experimental and is not production ready. If you need to work with JSON documents, consider using [this guide](/docs/en/guides/developer/working-with-json/json-load-data.md) instead.
+:::
+
Stores JavaScript Object Notation (JSON) documents in a single column.
`JSON` is an alias for `Object('json')`.
diff --git a/docs/en/sql-reference/functions/array-functions.md b/docs/en/sql-reference/functions/array-functions.md
index c044b972754..9d2f89c1837 100644
--- a/docs/en/sql-reference/functions/array-functions.md
+++ b/docs/en/sql-reference/functions/array-functions.md
@@ -121,7 +121,7 @@ Accepts an empty array and returns a one-element array that is equal to the defa
## range(end), range(\[start, \] end \[, step\])
-Returns an array of `UInt` numbers from `start` to `end - 1` by `step`.
+Returns an array of numbers from `start` to `end - 1` by `step`. The supported types are [UInt8, UInt16, UInt32, UInt64, Int8, Int16, Int32, Int64](../data-types/int-uint.md).
**Syntax**
``` sql
@@ -130,31 +130,30 @@ range([start, ] end [, step])
**Arguments**
-- `start` — The first element of the array. Optional, required if `step` is used. Default value: 0. [UInt](../data-types/int-uint.md)
-- `end` — The number before which the array is constructed. Required. [UInt](../data-types/int-uint.md)
-- `step` — Determines the incremental step between each element in the array. Optional. Default value: 1. [UInt](../data-types/int-uint.md)
+- `start` — The first element of the array. Optional, required if `step` is used. Default value: 0.
+- `end` — The number before which the array is constructed. Required.
+- `step` — Determines the incremental step between each element in the array. Optional. Default value: 1.
**Returned value**
-- Array of `UInt` numbers from `start` to `end - 1` by `step`.
+- Array of numbers from `start` to `end - 1` by `step`.
**Implementation details**
-- All arguments must be positive values: `start`, `end`, `step` are `UInt` data types, as well as elements of the returned array.
+- All arguments `start`, `end`, `step` must be below data types: `UInt8`, `UInt16`, `UInt32`, `UInt64`,`Int8`, `Int16`, `Int32`, `Int64`, as well as elements of the returned array, which's type is a super type of all arguments's.
- An exception is thrown if query results in arrays with a total length of more than number of elements specified by the [function_range_max_elements_in_block](../../operations/settings/settings.md#settings-function_range_max_elements_in_block) setting.
-
**Examples**
Query:
``` sql
-SELECT range(5), range(1, 5), range(1, 5, 2);
+SELECT range(5), range(1, 5), range(1, 5, 2), range(-1, 5, 2);
```
Result:
```txt
-┌─range(5)────┬─range(1, 5)─┬─range(1, 5, 2)─┐
-│ [0,1,2,3,4] │ [1,2,3,4] │ [1,3] │
-└─────────────┴─────────────┴────────────────┘
+┌─range(5)────┬─range(1, 5)─┬─range(1, 5, 2)─┬─range(-1, 5, 2)─┐
+│ [0,1,2,3,4] │ [1,2,3,4] │ [1,3] │ [-1,1,3] │
+└─────────────┴─────────────┴────────────────┴─────────────────┘
```
## array(x1, …), operator \[x1, …\]
diff --git a/docs/en/sql-reference/table-functions/generate.md b/docs/en/sql-reference/table-functions/generate.md
index dd56b47cd3a..380c8364090 100644
--- a/docs/en/sql-reference/table-functions/generate.md
+++ b/docs/en/sql-reference/table-functions/generate.md
@@ -39,3 +39,16 @@ SELECT * FROM generateRandom('a Array(Int8), d Decimal32(4), c Tuple(DateTime64(
│ [68] │ -67417.0770 │ ('2080-03-12 14:17:31.269','110425e5-413f-10a6-05ba-fa6b3e929f15') │
└──────────┴──────────────┴────────────────────────────────────────────────────────────────────┘
```
+
+```sql
+CREATE TABLE random (a Array(Int8), d Decimal32(4), c Tuple(DateTime64(3), UUID)) engine=Memory;
+INSERT INTO random SELECT * FROM generateRandom() LIMIT 2;
+SELECT * FROM random;
+```
+
+```text
+┌─a────────────────────────────┬────────────d─┬─c──────────────────────────────────────────────────────────────────┐
+│ [] │ 68091.8197 │ ('2037-10-02 12:44:23.368','039ecab7-81c2-45ee-208c-844e5c6c5652') │
+│ [8,-83,0,-22,65,9,-30,28,64] │ -186233.4909 │ ('2062-01-11 00:06:04.124','69563ea1-5ad1-f870-16d8-67061da0df25') │
+└──────────────────────────────┴──────────────┴────────────────────────────────────────────────────────────────────┘
+```
\ No newline at end of file
diff --git a/docs/zh/sql-reference/functions/array-functions.md b/docs/zh/sql-reference/functions/array-functions.md
index 565304710cc..d150b94b8af 100644
--- a/docs/zh/sql-reference/functions/array-functions.md
+++ b/docs/zh/sql-reference/functions/array-functions.md
@@ -117,7 +117,7 @@ SELECT notEmpty([1,2]);
## range(end), range(\[start, \] end \[, step\]) {#range}
-返回一个以`step`作为增量步长的从`start`到`end - 1`的`UInt`类型数字数组。
+返回一个以`step`作为增量步长的从`start`到`end - 1`的整形数字数组, 支持类型包括[`UInt8`, `UInt16`, `UInt32`, `UInt64`, `Int8`, `Int16`, `Int32`, `Int64`](../data-types/int-uint.md)。
**语法**
``` sql
@@ -126,31 +126,30 @@ range([start, ] end [, step])
**参数**
-- `start` — 数组的第一个元素。可选项,如果设置了`step`时同样需要`start`,默认值为:0,类型为[UInt](../data-types/int-uint.md)。
-- `end` — 计数到`end`结束,但不包括`end`,必填项,类型为[UInt](../data-types/int-uint.md)。
-- `step` — 确定数组中每个元素之间的增量步长。可选项,默认值为:1,类型为[UInt](../data-types/int-uint.md)。
+- `start` — 数组的第一个元素。可选项,如果设置了`step`时同样需要`start`,默认值为:0。
+- `end` — 计数到`end`结束,但不包括`end`,必填项。
+- `step` — 确定数组中每个元素之间的增量步长。可选项,默认值为:1。
**返回值**
-- 以`step`作为增量步长的从`start`到`end - 1`的`UInt`类型数字数组。
+- 以`step`作为增量步长的从`start`到`end - 1`的数字数组。
**注意事项**
-- 所有参数必须是正值:`start`、`end`、`step`,类型均为`UInt`,结果数组的元素与此相同。
+- 所有参数`start`、`end`、`step`必须属于以下几种类型之一:[`UInt8`, `UInt16`, `UInt32`, `UInt64`, `Int8`, `Int16`, `Int32`, `Int64`](../data-types/int-uint.md)。结果数组的元素数据类型为所有入参类型的最小超类,也必须属于以上几种类型之一。
- 如果查询结果的数组总长度超过[function_range_max_elements_in_block](../../operations/settings/settings.md#settings-function_range_max_elements_in_block)指定的元素数,将会抛出异常。
-
**示例**
查询语句:
``` sql
-SELECT range(5), range(1, 5), range(1, 5, 2);
+SELECT range(5), range(1, 5), range(1, 5, 2), range(-1, 5, 2);
```
结果:
```txt
-┌─range(5)────┬─range(1, 5)─┬─range(1, 5, 2)─┐
-│ [0,1,2,3,4] │ [1,2,3,4] │ [1,3] │
-└─────────────┴─────────────┴────────────────┘
+┌─range(5)────┬─range(1, 5)─┬─range(1, 5, 2)─┬─range(-1, 5, 2)─┐
+│ [0,1,2,3,4] │ [1,2,3,4] │ [1,3] │ [-1,1,3] │
+└─────────────┴─────────────┴────────────────┴─────────────────┘
```
## array(x1, …), operator \[x1, …\] {#arrayx1-operator-x1}
diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp
index 742838d6433..419b80ccff2 100644
--- a/programs/server/Server.cpp
+++ b/programs/server/Server.cpp
@@ -140,6 +140,7 @@ namespace CurrentMetrics
namespace ProfileEvents
{
extern const Event MainConfigLoads;
+ extern const Event ServerStartupMilliseconds;
}
namespace fs = std::filesystem;
@@ -652,6 +653,8 @@ static void sanityChecks(Server & server)
int Server::main(const std::vector & /*args*/)
try
{
+ Stopwatch startup_watch;
+
Poco::Logger * log = &logger();
UseSSL use_ssl;
@@ -1822,6 +1825,9 @@ try
LOG_INFO(log, "Ready for connections.");
}
+ startup_watch.stop();
+ ProfileEvents::increment(ProfileEvents::ServerStartupMilliseconds, startup_watch.elapsedMilliseconds());
+
try
{
global_context->startClusterDiscovery();
diff --git a/src/Access/Common/AccessType.h b/src/Access/Common/AccessType.h
index 366667410d5..f1f99fc9166 100644
--- a/src/Access/Common/AccessType.h
+++ b/src/Access/Common/AccessType.h
@@ -167,6 +167,7 @@ enum class AccessType
M(SYSTEM_SYNC_REPLICA, "SYNC REPLICA", TABLE, SYSTEM) \
M(SYSTEM_RESTART_REPLICA, "RESTART REPLICA", TABLE, SYSTEM) \
M(SYSTEM_RESTORE_REPLICA, "RESTORE REPLICA", TABLE, SYSTEM) \
+ M(SYSTEM_WAIT_LOADING_PARTS, "WAIT LOADING PARTS", TABLE, SYSTEM) \
M(SYSTEM_SYNC_DATABASE_REPLICA, "SYNC DATABASE REPLICA", DATABASE, SYSTEM) \
M(SYSTEM_SYNC_TRANSACTION_LOG, "SYNC TRANSACTION LOG", GLOBAL, SYSTEM) \
M(SYSTEM_FLUSH_DISTRIBUTED, "FLUSH DISTRIBUTED", TABLE, SYSTEM_FLUSH) \
diff --git a/src/Access/tests/gtest_access_rights_ops.cpp b/src/Access/tests/gtest_access_rights_ops.cpp
index 02aafb7415b..e21ebda2a31 100644
--- a/src/Access/tests/gtest_access_rights_ops.cpp
+++ b/src/Access/tests/gtest_access_rights_ops.cpp
@@ -53,7 +53,7 @@ TEST(AccessRights, Union)
"SHOW ROW POLICIES, SYSTEM MERGES, SYSTEM TTL MERGES, SYSTEM FETCHES, "
"SYSTEM MOVES, SYSTEM SENDS, SYSTEM REPLICATION QUEUES, "
"SYSTEM DROP REPLICA, SYSTEM SYNC REPLICA, SYSTEM RESTART REPLICA, "
- "SYSTEM RESTORE REPLICA, SYSTEM SYNC DATABASE REPLICA, SYSTEM FLUSH DISTRIBUTED, dictGet ON db1.*");
+ "SYSTEM RESTORE REPLICA, SYSTEM WAIT LOADING PARTS, SYSTEM SYNC DATABASE REPLICA, SYSTEM FLUSH DISTRIBUTED, dictGet ON db1.*");
}
diff --git a/src/AggregateFunctions/AggregateFunctionHistogram.h b/src/AggregateFunctions/AggregateFunctionHistogram.h
index c559b3f115f..ac81f7466fa 100644
--- a/src/AggregateFunctions/AggregateFunctionHistogram.h
+++ b/src/AggregateFunctions/AggregateFunctionHistogram.h
@@ -207,7 +207,7 @@ private:
{
// Fuse points if their text representations differ only in last digit
auto min_diff = 10 * (points[left].mean + points[right].mean) * std::numeric_limits::epsilon();
- if (points[left].mean + min_diff >= points[right].mean)
+ if (points[left].mean + std::fabs(min_diff) >= points[right].mean)
{
points[left] = points[left] + points[right];
}
diff --git a/src/AggregateFunctions/AggregateFunctionQuantile.h b/src/AggregateFunctions/AggregateFunctionQuantile.h
index 6427d03f089..49157acf690 100644
--- a/src/AggregateFunctions/AggregateFunctionQuantile.h
+++ b/src/AggregateFunctions/AggregateFunctionQuantile.h
@@ -232,6 +232,9 @@ struct NameQuantilesExactInclusive { static constexpr auto name = "quantilesExac
struct NameQuantileExactWeighted { static constexpr auto name = "quantileExactWeighted"; };
struct NameQuantilesExactWeighted { static constexpr auto name = "quantilesExactWeighted"; };
+struct NameQuantileInterpolatedWeighted { static constexpr auto name = "quantileInterpolatedWeighted"; };
+struct NameQuantilesInterpolatedWeighted { static constexpr auto name = "quantilesInterpolatedWeighted"; };
+
struct NameQuantileTiming { static constexpr auto name = "quantileTiming"; };
struct NameQuantileTimingWeighted { static constexpr auto name = "quantileTimingWeighted"; };
struct NameQuantilesTiming { static constexpr auto name = "quantilesTiming"; };
diff --git a/src/AggregateFunctions/AggregateFunctionQuantileInterpolatedWeighted.cpp b/src/AggregateFunctions/AggregateFunctionQuantileInterpolatedWeighted.cpp
new file mode 100644
index 00000000000..68b42376df7
--- /dev/null
+++ b/src/AggregateFunctions/AggregateFunctionQuantileInterpolatedWeighted.cpp
@@ -0,0 +1,70 @@
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+
+
+namespace DB
+{
+struct Settings;
+
+namespace ErrorCodes
+{
+ extern const int ILLEGAL_TYPE_OF_ARGUMENT;
+}
+
+namespace
+{
+
+ template using FuncQuantileInterpolatedWeighted = AggregateFunctionQuantile, NameQuantileInterpolatedWeighted, true, void, false>;
+ template using FuncQuantilesInterpolatedWeighted = AggregateFunctionQuantile, NameQuantilesInterpolatedWeighted, true, void, true>;
+
+ template class Function>
+ AggregateFunctionPtr createAggregateFunctionQuantile(
+ const std::string & name, const DataTypes & argument_types, const Array & params, const Settings *)
+ {
+ /// Second argument type check doesn't depend on the type of the first one.
+ Function::assertSecondArg(argument_types);
+
+ const DataTypePtr & argument_type = argument_types[0];
+ WhichDataType which(argument_type);
+
+#define DISPATCH(TYPE) \
+ if (which.idx == TypeIndex::TYPE) return std::make_shared>(argument_types, params);
+ FOR_BASIC_NUMERIC_TYPES(DISPATCH)
+#undef DISPATCH
+ if (which.idx == TypeIndex::Date) return std::make_shared>(argument_types, params);
+ if (which.idx == TypeIndex::DateTime) return std::make_shared>(argument_types, params);
+
+ if (which.idx == TypeIndex::Decimal32) return std::make_shared>(argument_types, params);
+ if (which.idx == TypeIndex::Decimal64) return std::make_shared>(argument_types, params);
+ if (which.idx == TypeIndex::Decimal128) return std::make_shared>(argument_types, params);
+ if (which.idx == TypeIndex::Decimal256) return std::make_shared>(argument_types, params);
+ if (which.idx == TypeIndex::DateTime64) return std::make_shared>(argument_types, params);
+
+ if (which.idx == TypeIndex::Int128) return std::make_shared>(argument_types, params);
+ if (which.idx == TypeIndex::UInt128) return std::make_shared>(argument_types, params);
+ if (which.idx == TypeIndex::Int256) return std::make_shared>(argument_types, params);
+ if (which.idx == TypeIndex::UInt256) return std::make_shared>(argument_types, params);
+
+ throw Exception("Illegal type " + argument_type->getName() + " of argument for aggregate function " + name,
+ ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
+ }
+}
+
+void registerAggregateFunctionsQuantileInterpolatedWeighted(AggregateFunctionFactory & factory)
+{
+ /// For aggregate functions returning array we cannot return NULL on empty set.
+ AggregateFunctionProperties properties = { .returns_default_when_only_null = true };
+
+ factory.registerFunction(NameQuantileInterpolatedWeighted::name, createAggregateFunctionQuantile);
+ factory.registerFunction(NameQuantilesInterpolatedWeighted::name, { createAggregateFunctionQuantile, properties });
+
+ /// 'median' is an alias for 'quantile'
+ factory.registerAlias("medianInterpolatedWeighted", NameQuantileInterpolatedWeighted::name);
+}
+
+}
diff --git a/src/AggregateFunctions/QuantileInterpolatedWeighted.h b/src/AggregateFunctions/QuantileInterpolatedWeighted.h
new file mode 100644
index 00000000000..95daeed2e57
--- /dev/null
+++ b/src/AggregateFunctions/QuantileInterpolatedWeighted.h
@@ -0,0 +1,308 @@
+#pragma once
+
+#include
+
+#include
+#include
+
+
+namespace DB
+{
+struct Settings;
+
+namespace ErrorCodes
+{
+ extern const int NOT_IMPLEMENTED;
+}
+
+/** Approximates Quantile by:
+ * - sorting input values and weights
+ * - building a cumulative distribution based on weights
+ * - performing linear interpolation between the weights and values
+ *
+ */
+template
+struct QuantileInterpolatedWeighted
+{
+ struct Int128Hash
+ {
+ size_t operator()(Int128 x) const
+ {
+ return CityHash_v1_0_2::Hash128to64({x >> 64, x & 0xffffffffffffffffll});
+ }
+ };
+
+ using Weight = UInt64;
+ using UnderlyingType = NativeType;
+ using Hasher = std::conditional_t, Int128Hash, HashCRC32>;
+
+ /// When creating, the hash table must be small.
+ using Map = HashMapWithStackMemory;
+
+ Map map;
+
+ void add(const Value & x)
+ {
+ /// We must skip NaNs as they are not compatible with comparison sorting.
+ if (!isNaN(x))
+ ++map[x];
+ }
+
+ void add(const Value & x, Weight weight)
+ {
+ if (!isNaN(x))
+ map[x] += weight;
+ }
+
+ void merge(const QuantileInterpolatedWeighted & rhs)
+ {
+ for (const auto & pair : rhs.map)
+ map[pair.getKey()] += pair.getMapped();
+ }
+
+ void serialize(WriteBuffer & buf) const
+ {
+ map.write(buf);
+ }
+
+ void deserialize(ReadBuffer & buf)
+ {
+ typename Map::Reader reader(buf);
+ while (reader.next())
+ {
+ const auto & pair = reader.get();
+ map[pair.first] = pair.second;
+ }
+ }
+
+ Value get(Float64 level) const
+ {
+ return getImpl(level);
+ }
+
+ void getMany(const Float64 * levels, const size_t * indices, size_t size, Value * result) const
+ {
+ getManyImpl(levels, indices, size, result);
+ }
+
+ /// The same, but in the case of an empty state, NaN is returned.
+ Float64 getFloat(Float64) const
+ {
+ throw Exception("Method getFloat is not implemented for QuantileInterpolatedWeighted", ErrorCodes::NOT_IMPLEMENTED);
+ }
+
+ void getManyFloat(const Float64 *, const size_t *, size_t, Float64 *) const
+ {
+ throw Exception("Method getManyFloat is not implemented for QuantileInterpolatedWeighted", ErrorCodes::NOT_IMPLEMENTED);
+ }
+
+private:
+ using Pair = typename std::pair;
+
+ /// Get the value of the `level` quantile. The level must be between 0 and 1.
+ template
+ T getImpl(Float64 level) const
+ {
+ size_t size = map.size();
+
+ if (0 == size)
+ return std::numeric_limits::quiet_NaN();
+
+ /// Maintain a vector of pair of values and weights for easier sorting and for building
+ /// a cumulative distribution using the provided weights.
+ std::vector value_weight_pairs;
+ value_weight_pairs.reserve(size);
+
+ /// Note: weight provided must be a 64-bit integer
+ /// Float64 is used as accumulator here to get approximate results.
+ /// But weight used in the internal array is stored as Float64 as we
+ /// do some quantile estimation operation which involves division and
+ /// require Float64 level of precision.
+
+ Float64 sum_weight = 0;
+ for (const auto & pair : map)
+ {
+ sum_weight += pair.getMapped();
+ auto value = pair.getKey();
+ auto weight = pair.getMapped();
+ value_weight_pairs.push_back({value, weight});
+ }
+
+ ::sort(value_weight_pairs.begin(), value_weight_pairs.end(), [](const Pair & a, const Pair & b) { return a.first < b.first; });
+
+ Float64 accumulated = 0;
+
+ /// vector for populating and storing the cumulative sum using the provided weights.
+ /// example: [0,1,2,3,4,5] -> [0,1,3,6,10,15]
+ std::vector weights_cum_sum;
+ weights_cum_sum.reserve(size);
+
+ for (size_t idx = 0; idx < size; ++idx)
+ {
+ accumulated += value_weight_pairs[idx].second;
+ weights_cum_sum.push_back(accumulated);
+ }
+
+ /// The following estimation of quantile is general and the idea is:
+ /// https://en.wikipedia.org/wiki/Percentile#The_weighted_percentile_method
+
+ /// calculates a simple cumulative distribution based on weights
+ if (sum_weight != 0)
+ {
+ for (size_t idx = 0; idx < size; ++idx)
+ value_weight_pairs[idx].second = (weights_cum_sum[idx] - 0.5 * value_weight_pairs[idx].second) / sum_weight;
+ }
+
+ /// perform linear interpolation
+ size_t idx = 0;
+ if (size >= 2)
+ {
+ if (level >= value_weight_pairs[size - 2].second)
+ {
+ idx = size - 2;
+ }
+ else
+ {
+ size_t start = 0, end = size - 1;
+ while (start <= end)
+ {
+ size_t mid = start + (end - start) / 2;
+ if (mid > size)
+ break;
+ if (level > value_weight_pairs[mid + 1].second)
+ start = mid + 1;
+ else
+ {
+ idx = mid;
+ end = mid - 1;
+ }
+ }
+ }
+ }
+
+ size_t l = idx;
+ size_t u = idx + 1 < size ? idx + 1 : idx;
+
+ Float64 xl = value_weight_pairs[l].second, xr = value_weight_pairs[u].second;
+ UnderlyingType yl = value_weight_pairs[l].first, yr = value_weight_pairs[u].first;
+
+ if (level < xl)
+ yr = yl;
+ if (level > xr)
+ yl = yr;
+
+ return static_cast(interpolate(level, xl, xr, yl, yr));
+ }
+
+ /// Get the `size` values of `levels` quantiles. Write `size` results starting with `result` address.
+ /// indices - an array of index levels such that the corresponding elements will go in ascending order.
+ template
+ void getManyImpl(const Float64 * levels, const size_t * indices, size_t num_levels, Value * result) const
+ {
+ size_t size = map.size();
+
+ if (0 == size)
+ {
+ for (size_t i = 0; i < num_levels; ++i)
+ result[i] = Value();
+ return;
+ }
+
+ std::vector value_weight_pairs;
+ value_weight_pairs.reserve(size);
+
+ Float64 sum_weight = 0;
+ for (const auto & pair : map)
+ {
+ sum_weight += pair.getMapped();
+ auto value = pair.getKey();
+ auto weight = pair.getMapped();
+ value_weight_pairs.push_back({value, weight});
+ }
+
+ ::sort(value_weight_pairs.begin(), value_weight_pairs.end(), [](const Pair & a, const Pair & b) { return a.first < b.first; });
+
+ Float64 accumulated = 0;
+
+ /// vector for populating and storing the cumulative sum using the provided weights.
+ /// example: [0,1,2,3,4,5] -> [0,1,3,6,10,15]
+ std::vector weights_cum_sum;
+ weights_cum_sum.reserve(size);
+
+ for (size_t idx = 0; idx < size; ++idx)
+ {
+ accumulated += value_weight_pairs[idx].second;
+ weights_cum_sum.emplace_back(accumulated);
+ }
+
+
+ /// The following estimation of quantile is general and the idea is:
+ /// https://en.wikipedia.org/wiki/Percentile#The_weighted_percentile_method
+
+ /// calculates a simple cumulative distribution based on weights
+ if (sum_weight != 0)
+ {
+ for (size_t idx = 0; idx < size; ++idx)
+ value_weight_pairs[idx].second = (weights_cum_sum[idx] - 0.5 * value_weight_pairs[idx].second) / sum_weight;
+ }
+
+ for (size_t level_index = 0; level_index < num_levels; ++level_index)
+ {
+ /// perform linear interpolation for every level
+ auto level = levels[indices[level_index]];
+
+ size_t idx = 0;
+ if (size >= 2)
+ {
+ if (level >= value_weight_pairs[size - 2].second)
+ {
+ idx = size - 2;
+ }
+ else
+ {
+ size_t start = 0, end = size - 1;
+ while (start <= end)
+ {
+ size_t mid = start + (end - start) / 2;
+ if (mid > size)
+ break;
+ if (level > value_weight_pairs[mid + 1].second)
+ start = mid + 1;
+ else
+ {
+ idx = mid;
+ end = mid - 1;
+ }
+ }
+ }
+ }
+
+ size_t l = idx;
+ size_t u = idx + 1 < size ? idx + 1 : idx;
+
+ Float64 xl = value_weight_pairs[l].second, xr = value_weight_pairs[u].second;
+ UnderlyingType yl = value_weight_pairs[l].first, yr = value_weight_pairs[u].first;
+
+ if (level < xl)
+ yr = yl;
+ if (level > xr)
+ yl = yr;
+
+ result[indices[level_index]] = static_cast(interpolate(level, xl, xr, yl, yr));
+ }
+ }
+
+ /// This ignores overflows or NaN's that might arise during add, sub and mul operations and doesn't aim to provide exact
+ /// results since `the quantileInterpolatedWeighted` function itself relies mainly on approximation.
+ UnderlyingType NO_SANITIZE_UNDEFINED interpolate(Float64 level, Float64 xl, Float64 xr, UnderlyingType yl, UnderlyingType yr) const
+ {
+ UnderlyingType dy = yr - yl;
+ Float64 dx = xr - xl;
+ dx = dx == 0 ? 1 : dx; /// to handle NaN behavior that might arise during integer division below.
+
+ /// yl + (dy / dx) * (level - xl)
+ return static_cast(yl + (dy / dx) * (level - xl));
+ }
+};
+
+}
diff --git a/src/AggregateFunctions/registerAggregateFunctions.cpp b/src/AggregateFunctions/registerAggregateFunctions.cpp
index ecf6ab51367..1fe759c122a 100644
--- a/src/AggregateFunctions/registerAggregateFunctions.cpp
+++ b/src/AggregateFunctions/registerAggregateFunctions.cpp
@@ -21,6 +21,7 @@ void registerAggregateFunctionsQuantile(AggregateFunctionFactory &);
void registerAggregateFunctionsQuantileDeterministic(AggregateFunctionFactory &);
void registerAggregateFunctionsQuantileExact(AggregateFunctionFactory &);
void registerAggregateFunctionsQuantileExactWeighted(AggregateFunctionFactory &);
+void registerAggregateFunctionsQuantileInterpolatedWeighted(AggregateFunctionFactory &);
void registerAggregateFunctionsQuantileExactLow(AggregateFunctionFactory &);
void registerAggregateFunctionsQuantileExactHigh(AggregateFunctionFactory &);
void registerAggregateFunctionsQuantileExactInclusive(AggregateFunctionFactory &);
@@ -106,6 +107,7 @@ void registerAggregateFunctions()
registerAggregateFunctionsQuantileDeterministic(factory);
registerAggregateFunctionsQuantileExact(factory);
registerAggregateFunctionsQuantileExactWeighted(factory);
+ registerAggregateFunctionsQuantileInterpolatedWeighted(factory);
registerAggregateFunctionsQuantileExactLow(factory);
registerAggregateFunctionsQuantileExactHigh(factory);
registerAggregateFunctionsQuantileExactInclusive(factory);
diff --git a/src/Analyzer/MatcherNode.cpp b/src/Analyzer/MatcherNode.cpp
index 9d822771087..fc74b4ff67e 100644
--- a/src/Analyzer/MatcherNode.cpp
+++ b/src/Analyzer/MatcherNode.cpp
@@ -11,6 +11,7 @@
#include
#include
#include
+#include
namespace DB
{
@@ -206,19 +207,43 @@ QueryTreeNodePtr MatcherNode::cloneImpl() const
ASTPtr MatcherNode::toASTImpl() const
{
ASTPtr result;
+ ASTPtr transformers;
+
+ if (!children.empty())
+ {
+ transformers = std::make_shared();
+
+ for (const auto & child : children)
+ transformers->children.push_back(child->toAST());
+ }
if (matcher_type == MatcherNodeType::ASTERISK)
{
if (qualified_identifier.empty())
{
- result = std::make_shared();
+ auto asterisk = std::make_shared();
+
+ if (transformers)
+ {
+ asterisk->transformers = std::move(transformers);
+ asterisk->children.push_back(asterisk->transformers);
+ }
+
+ result = asterisk;
}
else
{
auto qualified_asterisk = std::make_shared();
auto identifier_parts = qualified_identifier.getParts();
- qualified_asterisk->children.push_back(std::make_shared(std::move(identifier_parts)));
+ qualified_asterisk->qualifier = std::make_shared(std::move(identifier_parts));
+ qualified_asterisk->children.push_back(qualified_asterisk->qualifier);
+
+ if (transformers)
+ {
+ qualified_asterisk->transformers = std::move(transformers);
+ qualified_asterisk->children.push_back(qualified_asterisk->transformers);
+ }
result = qualified_asterisk;
}
@@ -229,6 +254,13 @@ ASTPtr MatcherNode::toASTImpl() const
{
auto regexp_matcher = std::make_shared();
regexp_matcher->setPattern(columns_matcher->pattern());
+
+ if (transformers)
+ {
+ regexp_matcher->transformers = std::move(transformers);
+ regexp_matcher->children.push_back(regexp_matcher->transformers);
+ }
+
result = regexp_matcher;
}
else
@@ -237,7 +269,14 @@ ASTPtr MatcherNode::toASTImpl() const
regexp_matcher->setPattern(columns_matcher->pattern());
auto identifier_parts = qualified_identifier.getParts();
- regexp_matcher->children.push_back(std::make_shared(std::move(identifier_parts)));
+ regexp_matcher->qualifier = std::make_shared(std::move(identifier_parts));
+ regexp_matcher->children.push_back(regexp_matcher->qualifier);
+
+ if (transformers)
+ {
+ regexp_matcher->transformers = std::move(transformers);
+ regexp_matcher->children.push_back(regexp_matcher->transformers);
+ }
result = regexp_matcher;
}
@@ -257,23 +296,36 @@ ASTPtr MatcherNode::toASTImpl() const
{
auto columns_list_matcher = std::make_shared();
columns_list_matcher->column_list = std::move(column_list);
+ columns_list_matcher->children.push_back(columns_list_matcher->column_list);
+
+ if (transformers)
+ {
+ columns_list_matcher->transformers = std::move(transformers);
+ columns_list_matcher->children.push_back(columns_list_matcher->transformers);
+ }
+
result = columns_list_matcher;
}
else
{
auto columns_list_matcher = std::make_shared();
- columns_list_matcher->column_list = std::move(column_list);
auto identifier_parts = qualified_identifier.getParts();
- columns_list_matcher->children.push_back(std::make_shared(std::move(identifier_parts)));
+ columns_list_matcher->qualifier = std::make_shared(std::move(identifier_parts));
+ columns_list_matcher->column_list = std::move(column_list);
+ columns_list_matcher->children.push_back(columns_list_matcher->qualifier);
+ columns_list_matcher->children.push_back(columns_list_matcher->column_list);
+
+ if (transformers)
+ {
+ columns_list_matcher->transformers = std::move(transformers);
+ columns_list_matcher->children.push_back(columns_list_matcher->transformers);
+ }
result = columns_list_matcher;
}
}
- for (const auto & child : children)
- result->children.push_back(child->toAST());
-
return result;
}
diff --git a/src/Analyzer/Passes/AggregateFunctionsArithmericOperationsPass.cpp b/src/Analyzer/Passes/AggregateFunctionsArithmericOperationsPass.cpp
index 01072e0b3fc..149af61e002 100644
--- a/src/Analyzer/Passes/AggregateFunctionsArithmericOperationsPass.cpp
+++ b/src/Analyzer/Passes/AggregateFunctionsArithmericOperationsPass.cpp
@@ -73,7 +73,7 @@ public:
if (!inner_function_node)
return;
- auto & inner_function_arguments_nodes = inner_function_node->getArguments().getNodes();
+ const auto & inner_function_arguments_nodes = inner_function_node->getArguments().getNodes();
if (inner_function_arguments_nodes.size() != 2)
return;
@@ -119,13 +119,15 @@ public:
{
lower_function_name = function_name_if_constant_is_negative;
}
- resolveAggregateFunctionNode(*aggregate_function_node, inner_function_arguments_nodes[1], lower_function_name);
- auto inner_function = aggregate_function_arguments_nodes[0];
- auto inner_function_right_argument = std::move(inner_function_arguments_nodes[1]);
- aggregate_function_arguments_nodes = {inner_function_right_argument};
- inner_function_arguments_nodes[1] = node;
- node = std::move(inner_function);
+ auto inner_function_clone = inner_function_node->clone();
+ auto & inner_function_clone_arguments = inner_function_clone->as().getArguments();
+ auto & inner_function_clone_arguments_nodes = inner_function_clone_arguments.getNodes();
+ auto inner_function_clone_right_argument = inner_function_clone_arguments_nodes[1];
+ aggregate_function_arguments_nodes = {inner_function_clone_right_argument};
+ resolveAggregateFunctionNode(*aggregate_function_node, inner_function_clone_right_argument, lower_function_name);
+ inner_function_clone_arguments_nodes[1] = node;
+ node = std::move(inner_function_clone);
}
else if (right_argument_constant_node)
{
@@ -136,18 +138,20 @@ public:
{
lower_function_name = function_name_if_constant_is_negative;
}
- resolveAggregateFunctionNode(*aggregate_function_node, inner_function_arguments_nodes[0], function_name_if_constant_is_negative);
- auto inner_function = aggregate_function_arguments_nodes[0];
- auto inner_function_left_argument = std::move(inner_function_arguments_nodes[0]);
- aggregate_function_arguments_nodes = {inner_function_left_argument};
- inner_function_arguments_nodes[0] = node;
- node = std::move(inner_function);
+ auto inner_function_clone = inner_function_node->clone();
+ auto & inner_function_clone_arguments = inner_function_clone->as().getArguments();
+ auto & inner_function_clone_arguments_nodes = inner_function_clone_arguments.getNodes();
+ auto inner_function_clone_left_argument = inner_function_clone_arguments_nodes[0];
+ aggregate_function_arguments_nodes = {inner_function_clone_left_argument};
+ resolveAggregateFunctionNode(*aggregate_function_node, inner_function_clone_left_argument, lower_function_name);
+ inner_function_clone_arguments_nodes[0] = node;
+ node = std::move(inner_function_clone);
}
}
private:
- static inline void resolveAggregateFunctionNode(FunctionNode & function_node, QueryTreeNodePtr & argument, const String & aggregate_function_name)
+ static inline void resolveAggregateFunctionNode(FunctionNode & function_node, const QueryTreeNodePtr & argument, const String & aggregate_function_name)
{
auto function_aggregate_function = function_node.getAggregateFunction();
diff --git a/src/Analyzer/Passes/OptimizeRedundantFunctionsInOrderByPass.cpp b/src/Analyzer/Passes/OptimizeRedundantFunctionsInOrderByPass.cpp
new file mode 100644
index 00000000000..8c9db191bbd
--- /dev/null
+++ b/src/Analyzer/Passes/OptimizeRedundantFunctionsInOrderByPass.cpp
@@ -0,0 +1,124 @@
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+
+namespace DB
+{
+
+namespace
+{
+
+class OptimizeRedundantFunctionsInOrderByVisitor : public InDepthQueryTreeVisitor
+{
+public:
+ static bool needChildVisit(QueryTreeNodePtr & node, QueryTreeNodePtr & /*parent*/)
+ {
+ if (node->as())
+ return false;
+ return true;
+ }
+
+ void visitImpl(QueryTreeNodePtr & node)
+ {
+ auto * query = node->as();
+ if (!query)
+ return;
+
+ if (!query->hasOrderBy())
+ return;
+
+ auto & order_by = query->getOrderBy();
+ for (auto & elem : order_by.getNodes())
+ {
+ auto * order_by_elem = elem->as();
+ if (order_by_elem->withFill())
+ return;
+ }
+
+ QueryTreeNodes new_order_by_nodes;
+ new_order_by_nodes.reserve(order_by.getNodes().size());
+
+ for (auto & elem : order_by.getNodes())
+ {
+ auto & order_by_expr = elem->as()->getExpression();
+ switch (order_by_expr->getNodeType())
+ {
+ case QueryTreeNodeType::FUNCTION:
+ {
+ if (isRedundantExpression(order_by_expr))
+ continue;
+ break;
+ }
+ case QueryTreeNodeType::COLUMN:
+ {
+ existing_keys.insert(order_by_expr);
+ break;
+ }
+ default:
+ break;
+ }
+
+ new_order_by_nodes.push_back(elem);
+ }
+ existing_keys.clear();
+
+ if (new_order_by_nodes.size() < order_by.getNodes().size())
+ order_by.getNodes() = std::move(new_order_by_nodes);
+ }
+
+private:
+ QueryTreeNodePtrWithHashSet existing_keys;
+
+ bool isRedundantExpression(QueryTreeNodePtr function)
+ {
+ QueryTreeNodes nodes_to_process{ function };
+ while (!nodes_to_process.empty())
+ {
+ auto node = nodes_to_process.back();
+ nodes_to_process.pop_back();
+
+ // TODO: handle constants here
+ switch (node->getNodeType())
+ {
+ case QueryTreeNodeType::FUNCTION:
+ {
+ auto * function_node = node->as();
+ const auto & function_arguments = function_node->getArguments().getNodes();
+ if (function_arguments.empty())
+ return false;
+ const auto & function_base = function_node->getFunction();
+ if (!function_base || !function_base->isDeterministicInScopeOfQuery())
+ return false;
+
+ // Process arguments in order
+ for (auto it = function_arguments.rbegin(); it != function_arguments.rend(); ++it)
+ nodes_to_process.push_back(*it);
+ break;
+ }
+ case QueryTreeNodeType::COLUMN:
+ {
+ if (!existing_keys.contains(node))
+ return false;
+ break;
+ }
+ default:
+ return false;
+ }
+ }
+ return true;
+ }
+};
+
+}
+
+void OptimizeRedundantFunctionsInOrderByPass::run(QueryTreeNodePtr query_tree_node, ContextPtr /*context*/)
+{
+ OptimizeRedundantFunctionsInOrderByVisitor().visit(query_tree_node);
+}
+
+}
diff --git a/src/Analyzer/Passes/OptimizeRedundantFunctionsInOrderByPass.h b/src/Analyzer/Passes/OptimizeRedundantFunctionsInOrderByPass.h
new file mode 100644
index 00000000000..609a6360d27
--- /dev/null
+++ b/src/Analyzer/Passes/OptimizeRedundantFunctionsInOrderByPass.h
@@ -0,0 +1,23 @@
+#pragma once
+
+#include
+
+namespace DB
+{
+
+/** If ORDER BY has argument x followed by f(x) transforms it to ORDER BY x.
+ * Optimize ORDER BY x, y, f(x), g(x, y), f(h(x)), t(f(x), g(x)) into ORDER BY x, y
+ * in case if f(), g(), h(), t() are deterministic (in scope of query).
+ * Don't optimize ORDER BY f(x), g(x), x even if f(x) is bijection for x or g(x).
+ */
+class OptimizeRedundantFunctionsInOrderByPass final : public IQueryTreePass
+{
+public:
+ String getName() override { return "OptimizeRedundantFunctionsInOrderBy"; }
+
+ String getDescription() override { return "If ORDER BY has argument x followed by f(x) transforms it to ORDER BY x."; }
+
+ void run(QueryTreeNodePtr query_tree_node, ContextPtr context) override;
+};
+
+}
diff --git a/src/Analyzer/Passes/SumIfToCountIfPass.cpp b/src/Analyzer/Passes/SumIfToCountIfPass.cpp
index 879eb4d4a8d..1faf79e87f9 100644
--- a/src/Analyzer/Passes/SumIfToCountIfPass.cpp
+++ b/src/Analyzer/Passes/SumIfToCountIfPass.cpp
@@ -77,11 +77,11 @@ public:
if (!nested_function || nested_function->getFunctionName() != "if")
return;
- auto & nested_if_function_arguments_nodes = nested_function->getArguments().getNodes();
+ const auto & nested_if_function_arguments_nodes = nested_function->getArguments().getNodes();
if (nested_if_function_arguments_nodes.size() != 3)
return;
- auto & cond_argument = nested_if_function_arguments_nodes[0];
+ const auto & cond_argument = nested_if_function_arguments_nodes[0];
const auto * if_true_condition_constant_node = nested_if_function_arguments_nodes[1]->as();
const auto * if_false_condition_constant_node = nested_if_function_arguments_nodes[2]->as();
@@ -101,7 +101,7 @@ public:
/// Rewrite `sum(if(cond, 1, 0))` into `countIf(cond)`.
if (if_true_condition_value == 1 && if_false_condition_value == 0)
{
- function_node_arguments_nodes[0] = std::move(nested_if_function_arguments_nodes[0]);
+ function_node_arguments_nodes[0] = nested_if_function_arguments_nodes[0];
function_node_arguments_nodes.resize(1);
resolveAsCountIfAggregateFunction(*function_node, function_node_arguments_nodes[0]->getResultType());
@@ -120,7 +120,7 @@ public:
auto not_function = std::make_shared("not");
auto & not_function_arguments = not_function->getArguments().getNodes();
- not_function_arguments.push_back(std::move(nested_if_function_arguments_nodes[0]));
+ not_function_arguments.push_back(nested_if_function_arguments_nodes[0]);
not_function->resolveAsFunction(FunctionFactory::instance().get("not", context)->build(not_function->getArgumentColumns()));
diff --git a/src/Analyzer/QueryTreeBuilder.cpp b/src/Analyzer/QueryTreeBuilder.cpp
index 2b2326badfa..adaa878ae2f 100644
--- a/src/Analyzer/QueryTreeBuilder.cpp
+++ b/src/Analyzer/QueryTreeBuilder.cpp
@@ -111,7 +111,7 @@ private:
QueryTreeNodePtr buildJoinTree(const ASTPtr & tables_in_select_query, const ContextPtr & context) const;
- ColumnTransformersNodes buildColumnTransformers(const ASTPtr & matcher_expression, size_t start_child_index, const ContextPtr & context) const;
+ ColumnTransformersNodes buildColumnTransformers(const ASTPtr & matcher_expression, const ContextPtr & context) const;
ASTPtr query;
QueryTreeNodePtr query_tree_node;
@@ -439,13 +439,13 @@ QueryTreeNodePtr QueryTreeBuilder::buildExpression(const ASTPtr & expression, co
}
else if (const auto * asterisk = expression->as())
{
- auto column_transformers = buildColumnTransformers(expression, 0 /*start_child_index*/, context);
+ auto column_transformers = buildColumnTransformers(asterisk->transformers, context);
result = std::make_shared(std::move(column_transformers));
}
else if (const auto * qualified_asterisk = expression->as())
{
- auto & qualified_identifier = qualified_asterisk->children.at(0)->as();
- auto column_transformers = buildColumnTransformers(expression, 1 /*start_child_index*/, context);
+ auto & qualified_identifier = qualified_asterisk->qualifier->as();
+ auto column_transformers = buildColumnTransformers(qualified_asterisk->transformers, context);
result = std::make_shared(Identifier(qualified_identifier.name_parts), std::move(column_transformers));
}
else if (const auto * ast_literal = expression->as())
@@ -543,7 +543,7 @@ QueryTreeNodePtr QueryTreeBuilder::buildExpression(const ASTPtr & expression, co
}
else if (const auto * columns_regexp_matcher = expression->as())
{
- auto column_transformers = buildColumnTransformers(expression, 0 /*start_child_index*/, context);
+ auto column_transformers = buildColumnTransformers(columns_regexp_matcher->transformers, context);
result = std::make_shared(columns_regexp_matcher->getMatcher(), std::move(column_transformers));
}
else if (const auto * columns_list_matcher = expression->as())
@@ -557,18 +557,18 @@ QueryTreeNodePtr QueryTreeBuilder::buildExpression(const ASTPtr & expression, co
column_list_identifiers.emplace_back(Identifier{column_list_identifier.name_parts});
}
- auto column_transformers = buildColumnTransformers(expression, 0 /*start_child_index*/, context);
+ auto column_transformers = buildColumnTransformers(columns_list_matcher->transformers, context);
result = std::make_shared(std::move(column_list_identifiers), std::move(column_transformers));
}
else if (const auto * qualified_columns_regexp_matcher = expression->as())
{
- auto & qualified_identifier = qualified_columns_regexp_matcher->children.at(0)->as();
- auto column_transformers = buildColumnTransformers(expression, 1 /*start_child_index*/, context);
+ auto & qualified_identifier = qualified_columns_regexp_matcher->qualifier->as();
+ auto column_transformers = buildColumnTransformers(qualified_columns_regexp_matcher->transformers, context);
result = std::make_shared(Identifier(qualified_identifier.name_parts), qualified_columns_regexp_matcher->getMatcher(), std::move(column_transformers));
}
else if (const auto * qualified_columns_list_matcher = expression->as())
{
- auto & qualified_identifier = qualified_columns_list_matcher->children.at(0)->as();
+ auto & qualified_identifier = qualified_columns_list_matcher->qualifier->as();
Identifiers column_list_identifiers;
column_list_identifiers.reserve(qualified_columns_list_matcher->column_list->children.size());
@@ -579,7 +579,7 @@ QueryTreeNodePtr QueryTreeBuilder::buildExpression(const ASTPtr & expression, co
column_list_identifiers.emplace_back(Identifier{column_list_identifier.name_parts});
}
- auto column_transformers = buildColumnTransformers(expression, 1 /*start_child_index*/, context);
+ auto column_transformers = buildColumnTransformers(qualified_columns_list_matcher->transformers, context);
result = std::make_shared(Identifier(qualified_identifier.name_parts), std::move(column_list_identifiers), std::move(column_transformers));
}
else
@@ -833,15 +833,15 @@ QueryTreeNodePtr QueryTreeBuilder::buildJoinTree(const ASTPtr & tables_in_select
}
-ColumnTransformersNodes QueryTreeBuilder::buildColumnTransformers(const ASTPtr & matcher_expression, size_t start_child_index, const ContextPtr & context) const
+ColumnTransformersNodes QueryTreeBuilder::buildColumnTransformers(const ASTPtr & matcher_expression, const ContextPtr & context) const
{
ColumnTransformersNodes column_transformers;
- size_t children_size = matcher_expression->children.size();
- for (; start_child_index < children_size; ++start_child_index)
+ if (!matcher_expression)
+ return column_transformers;
+
+ for (const auto & child : matcher_expression->children)
{
- const auto & child = matcher_expression->children[start_child_index];
-
if (auto * apply_transformer = child->as())
{
if (apply_transformer->lambda)
diff --git a/src/Analyzer/QueryTreePassManager.cpp b/src/Analyzer/QueryTreePassManager.cpp
index 4148d42ee23..8efe0dd4602 100644
--- a/src/Analyzer/QueryTreePassManager.cpp
+++ b/src/Analyzer/QueryTreePassManager.cpp
@@ -15,6 +15,7 @@
#include
#include
#include
+#include
#include
#include
@@ -91,7 +92,6 @@ public:
* TODO: Support setting optimize_move_functions_out_of_any.
* TODO: Support setting optimize_aggregators_of_group_by_keys.
* TODO: Support setting optimize_duplicate_order_by_and_distinct.
- * TODO: Support setting optimize_redundant_functions_in_order_by.
* TODO: Support setting optimize_monotonous_functions_in_order_by.
* TODO: Support settings.optimize_or_like_chain.
* TODO: Add optimizations based on function semantics. Example: SELECT * FROM test_table WHERE id != id. (id is not nullable column).
@@ -203,6 +203,9 @@ void addQueryTreePasses(QueryTreePassManager & manager)
if (settings.optimize_if_chain_to_multiif)
manager.addPass(std::make_unique());
+ if (settings.optimize_redundant_functions_in_order_by)
+ manager.addPass(std::make_unique());
+
manager.addPass(std::make_unique());
manager.addPass(std::make_unique());
diff --git a/src/Backups/BackupIO_S3.cpp b/src/Backups/BackupIO_S3.cpp
index 4529f34e2a7..aa667fde06f 100644
--- a/src/Backups/BackupIO_S3.cpp
+++ b/src/Backups/BackupIO_S3.cpp
@@ -156,10 +156,9 @@ void BackupWriterS3::copyObjectImpl(
const String & src_key,
const String & dst_bucket,
const String & dst_key,
- const Aws::S3::Model::HeadObjectResult & head,
+ size_t size,
const std::optional & metadata) const
{
- size_t size = head.GetContentLength();
LOG_TRACE(log, "Copying {} bytes using single-operation copy", size);
Aws::S3::Model::CopyObjectRequest request;
@@ -177,7 +176,7 @@ void BackupWriterS3::copyObjectImpl(
if (!outcome.IsSuccess() && (outcome.GetError().GetExceptionName() == "EntityTooLarge"
|| outcome.GetError().GetExceptionName() == "InvalidRequest"))
{ // Can't come here with MinIO, MinIO allows single part upload for large objects.
- copyObjectMultipartImpl(src_bucket, src_key, dst_bucket, dst_key, head, metadata);
+ copyObjectMultipartImpl(src_bucket, src_key, dst_bucket, dst_key, size, metadata);
return;
}
@@ -191,10 +190,9 @@ void BackupWriterS3::copyObjectMultipartImpl(
const String & src_key,
const String & dst_bucket,
const String & dst_key,
- const Aws::S3::Model::HeadObjectResult & head,
+ size_t size,
const std::optional & metadata) const
{
- size_t size = head.GetContentLength();
LOG_TRACE(log, "Copying {} bytes using multipart upload copy", size);
String multipart_upload_id;
@@ -309,16 +307,16 @@ void BackupWriterS3::copyFileNative(DiskPtr from_disk, const String & file_name_
std::string source_bucket = object_storage->getObjectsNamespace();
auto file_path = fs::path(s3_uri.key) / file_name_to;
- auto head = S3::headObject(*client, source_bucket, objects[0].absolute_path).GetResult();
- if (static_cast(head.GetContentLength()) < request_settings.getUploadSettings().max_single_operation_copy_size)
+ auto size = S3::getObjectSize(*client, source_bucket, objects[0].absolute_path);
+ if (size < request_settings.getUploadSettings().max_single_operation_copy_size)
{
copyObjectImpl(
- source_bucket, objects[0].absolute_path, s3_uri.bucket, file_path, head);
+ source_bucket, objects[0].absolute_path, s3_uri.bucket, file_path, size);
}
else
{
copyObjectMultipartImpl(
- source_bucket, objects[0].absolute_path, s3_uri.bucket, file_path, head);
+ source_bucket, objects[0].absolute_path, s3_uri.bucket, file_path, size);
}
}
}
diff --git a/src/Backups/BackupIO_S3.h b/src/Backups/BackupIO_S3.h
index 634b35c1e74..70487717c48 100644
--- a/src/Backups/BackupIO_S3.h
+++ b/src/Backups/BackupIO_S3.h
@@ -67,7 +67,7 @@ private:
const String & src_key,
const String & dst_bucket,
const String & dst_key,
- const Aws::S3::Model::HeadObjectResult & head,
+ size_t size,
const std::optional & metadata = std::nullopt) const;
void copyObjectMultipartImpl(
@@ -75,7 +75,7 @@ private:
const String & src_key,
const String & dst_bucket,
const String & dst_key,
- const Aws::S3::Model::HeadObjectResult & head,
+ size_t size,
const std::optional & metadata = std::nullopt) const;
void removeFilesBatch(const Strings & file_names);
diff --git a/src/Common/ProfileEvents.cpp b/src/Common/ProfileEvents.cpp
index abc28299f96..3732af0d4f3 100644
--- a/src/Common/ProfileEvents.cpp
+++ b/src/Common/ProfileEvents.cpp
@@ -309,6 +309,8 @@ The server successfully detected this situation and will download merged part fr
M(S3CopyObject, "Number of S3 API CopyObject calls.") \
M(S3ListObjects, "Number of S3 API ListObjects calls.") \
M(S3HeadObject, "Number of S3 API HeadObject calls.") \
+ M(S3GetObjectAttributes, "Number of S3 API GetObjectAttributes calls.") \
+ M(S3GetObjectMetadata, "Number of S3 API GetObject calls for getting metadata.") \
M(S3CreateMultipartUpload, "Number of S3 API CreateMultipartUpload calls.") \
M(S3UploadPartCopy, "Number of S3 API UploadPartCopy calls.") \
M(S3UploadPart, "Number of S3 API UploadPart calls.") \
@@ -321,6 +323,8 @@ The server successfully detected this situation and will download merged part fr
M(DiskS3CopyObject, "Number of DiskS3 API CopyObject calls.") \
M(DiskS3ListObjects, "Number of DiskS3 API ListObjects calls.") \
M(DiskS3HeadObject, "Number of DiskS3 API HeadObject calls.") \
+ M(DiskS3GetObjectAttributes, "Number of DiskS3 API GetObjectAttributes calls.") \
+ M(DiskS3GetObjectMetadata, "Number of DiskS3 API GetObject calls for getting metadata.") \
M(DiskS3CreateMultipartUpload, "Number of DiskS3 API CreateMultipartUpload calls.") \
M(DiskS3UploadPartCopy, "Number of DiskS3 API UploadPartCopy calls.") \
M(DiskS3UploadPart, "Number of DiskS3 API UploadPart calls.") \
@@ -449,7 +453,8 @@ The server successfully detected this situation and will download merged part fr
M(OverflowBreak, "Number of times, data processing was cancelled by query complexity limitation with setting '*_overflow_mode' = 'break' and the result is incomplete.") \
M(OverflowThrow, "Number of times, data processing was cancelled by query complexity limitation with setting '*_overflow_mode' = 'throw' and exception was thrown.") \
M(OverflowAny, "Number of times approximate GROUP BY was in effect: when aggregation was performed only on top of first 'max_rows_to_group_by' unique keys and other keys were ignored due to 'group_by_overflow_mode' = 'any'.") \
-
+ \
+ M(ServerStartupMilliseconds, "Time elapsed from starting server to listening to sockets in milliseconds")\
namespace ProfileEvents
{
diff --git a/src/Common/logger_useful.h b/src/Common/logger_useful.h
index 1e84efd8085..e83245c0fa1 100644
--- a/src/Common/logger_useful.h
+++ b/src/Common/logger_useful.h
@@ -7,6 +7,29 @@
#include
#include
+/// This wrapper is useful to save formatted message into a String before sending it to a logger
+class LogToStrImpl
+{
+ String & out_str;
+ Poco::Logger * logger;
+ bool propagate_to_actual_log = true;
+public:
+ LogToStrImpl(String & out_str_, Poco::Logger * logger_) : out_str(out_str_) , logger(logger_) {}
+ LogToStrImpl & operator -> () { return *this; }
+ bool is(Poco::Message::Priority priority) { propagate_to_actual_log &= logger->is(priority); return true; }
+ LogToStrImpl * getChannel() {return this; }
+ const String & name() const { return logger->name(); }
+ void log(const Poco::Message & message)
+ {
+ out_str = message.getText();
+ if (!propagate_to_actual_log)
+ return;
+ if (auto * channel = logger->getChannel())
+ channel->log(message);
+ }
+};
+
+#define LogToStr(x, y) std::make_unique(x, y)
namespace
{
@@ -17,8 +40,37 @@ namespace
[[maybe_unused]] const ::Poco::Logger * getLogger(const ::Poco::Logger * logger) { return logger; };
[[maybe_unused]] const ::Poco::Logger * getLogger(const std::atomic<::Poco::Logger *> & logger) { return logger.load(); };
+ [[maybe_unused]] std::unique_ptr getLogger(std::unique_ptr && logger) { return logger; };
+
+ template struct is_fmt_runtime : std::false_type {};
+ template struct is_fmt_runtime> : std::true_type {};
+
+ /// Usually we use LOG_*(...) macros with either string literals or fmt::runtime(whatever) as a format string.
+ /// This function is useful to get a string_view to a static format string passed to LOG_* macro.
+ template constexpr std::string_view tryGetStaticFormatString(T && x)
+ {
+ if constexpr (is_fmt_runtime::value)
+ {
+ /// It definitely was fmt::runtime(something).
+ /// We are not sure about a lifetime of the string, so return empty view.
+ /// Also it can be arbitrary string, not a formatting pattern.
+ /// So returning empty pattern will not pollute the set of patterns.
+ return std::string_view();
+ }
+ else
+ {
+ /// Most likely it was a string literal.
+ /// Unfortunately, there's no good way to check if something is a string literal.
+ /// But fmtlib requires a format string to be compile-time constant unless fmt::runtime is used.
+ static_assert(std::is_nothrow_convertible::value);
+ static_assert(!std::is_pointer::value);
+ return std::string_view(x);
+ }
+ }
}
+#define LOG_IMPL_FIRST_ARG(X, ...) X
+
/// Logs a message to a specified logger with that level.
/// If more than one argument is provided,
/// the first argument is interpreted as template with {}-substitutions
@@ -30,7 +82,7 @@ namespace
auto _logger = ::getLogger(logger); \
const bool _is_clients_log = (DB::CurrentThread::getGroup() != nullptr) && \
(DB::CurrentThread::getGroup()->client_logs_level >= (priority)); \
- if (_logger->is((PRIORITY)) || _is_clients_log) \
+ if (_is_clients_log || _logger->is((PRIORITY))) \
{ \
std::string formatted_message = numArgs(__VA_ARGS__) > 1 ? fmt::format(__VA_ARGS__) : firstArg(__VA_ARGS__); \
if (auto _channel = _logger->getChannel()) \
@@ -40,7 +92,7 @@ namespace
file_function += "; "; \
file_function += __PRETTY_FUNCTION__; \
Poco::Message poco_message(_logger->name(), formatted_message, \
- (PRIORITY), file_function.c_str(), __LINE__); \
+ (PRIORITY), file_function.c_str(), __LINE__, tryGetStaticFormatString(LOG_IMPL_FIRST_ARG(__VA_ARGS__))); \
_channel->log(poco_message); \
} \
} \
diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index b8d46244b6c..f58bd7ebafb 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -773,6 +773,8 @@ static constexpr UInt64 operator""_GiB(unsigned long long value)
M(Bool, input_format_json_validate_types_from_metadata, true, "For JSON/JSONCompact/JSONColumnsWithMetadata input formats this controls whether format parser should check if data types from input metadata match data types of the corresponding columns from the table", 0) \
M(Bool, input_format_json_read_numbers_as_strings, false, "Allow to parse numbers as strings in JSON input formats", 0) \
M(Bool, input_format_json_read_objects_as_strings, true, "Allow to parse JSON objects as strings in JSON input formats", 0) \
+ M(Bool, input_format_json_named_tuples_as_objects, true, "Deserialize named tuple columns as JSON objects", 0) \
+ M(Bool, input_format_json_defaults_for_missing_elements_in_named_tuple, true, "Insert default value in named tuple element if it's missing in json object", 0) \
M(Bool, input_format_try_infer_integers, true, "Try to infer integers instead of floats while schema inference in text formats", 0) \
M(Bool, input_format_try_infer_dates, true, "Try to infer dates from string fields while schema inference in text formats", 0) \
M(Bool, input_format_try_infer_datetimes, true, "Try to infer datetimes from string fields while schema inference in text formats", 0) \
diff --git a/src/Core/SettingsChangesHistory.h b/src/Core/SettingsChangesHistory.h
index d67f1b94d5d..534fcd42037 100644
--- a/src/Core/SettingsChangesHistory.h
+++ b/src/Core/SettingsChangesHistory.h
@@ -80,7 +80,8 @@ namespace SettingsChangesHistory
/// It's used to implement `compatibility` setting (see https://github.com/ClickHouse/ClickHouse/issues/35972)
static std::map settings_changes_history =
{
- {"23.1", {{"input_format_json_read_objects_as_strings", 0, 1, "Enable reading nested json objects as strings while object type is experimental"}}},
+ {"23.1", {{"input_format_json_read_objects_as_strings", 0, 1, "Enable reading nested json objects as strings while object type is experimental"},
+ {"input_format_json_defaults_for_missing_elements_in_named_tuple", false, true, "Allow missing elements in JSON objects while reading named tuples by default"}}},
{"22.12", {{"max_size_to_preallocate_for_aggregation", 10'000'000, 100'000'000, "This optimizes performance"},
{"query_plan_aggregation_in_order", 0, 1, "Enable some refactoring around query plan"},
{"format_binary_max_string_size", 0, 1_GiB, "Prevent allocating large amount of memory"}}},
diff --git a/src/DataTypes/Serializations/SerializationTuple.cpp b/src/DataTypes/Serializations/SerializationTuple.cpp
index 0ed2b034985..50d956584b9 100644
--- a/src/DataTypes/Serializations/SerializationTuple.cpp
+++ b/src/DataTypes/Serializations/SerializationTuple.cpp
@@ -16,6 +16,7 @@ namespace ErrorCodes
{
extern const int SIZES_OF_COLUMNS_IN_TUPLE_DOESNT_MATCH;
extern const int NOT_FOUND_COLUMN_IN_BLOCK;
+ extern const int INCORRECT_DATA;
}
@@ -154,7 +155,7 @@ void SerializationTuple::deserializeText(IColumn & column, ReadBuffer & istr, co
void SerializationTuple::serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
{
- if (settings.json.named_tuples_as_objects
+ if (settings.json.write_named_tuples_as_objects
&& have_explicit_names)
{
writeChar('{', ostr);
@@ -185,7 +186,7 @@ void SerializationTuple::serializeTextJSON(const IColumn & column, size_t row_nu
void SerializationTuple::deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
- if (settings.json.named_tuples_as_objects
+ if (settings.json.read_named_tuples_as_objects
&& have_explicit_names)
{
skipWhitespaceIfAny(istr);
@@ -194,12 +195,15 @@ void SerializationTuple::deserializeTextJSON(IColumn & column, ReadBuffer & istr
addElementSafe(elems.size(), column, [&]
{
- // Require all elements but in arbitrary order.
- for (size_t i = 0; i < elems.size(); ++i)
+ std::vector seen_elements(elems.size(), 0);
+ size_t i = 0;
+ while (!istr.eof() && *istr.position() != '}')
{
+ if (i == elems.size())
+ throw Exception(ErrorCodes::INCORRECT_DATA, "Unexpected number of elements in named tuple. Expected no more than {}", elems.size());
+
if (i > 0)
{
- skipWhitespaceIfAny(istr);
assertChar(',', istr);
skipWhitespaceIfAny(istr);
}
@@ -211,12 +215,35 @@ void SerializationTuple::deserializeTextJSON(IColumn & column, ReadBuffer & istr
skipWhitespaceIfAny(istr);
const size_t element_pos = getPositionByName(name);
+ seen_elements[element_pos] = 1;
auto & element_column = extractElementColumn(column, element_pos);
elems[element_pos]->deserializeTextJSON(element_column, istr, settings);
+
+ skipWhitespaceIfAny(istr);
+ ++i;
}
- skipWhitespaceIfAny(istr);
assertChar('}', istr);
+
+ /// Check if we have missing elements.
+ if (i != elems.size())
+ {
+ for (size_t element_pos = 0; element_pos != seen_elements.size(); ++element_pos)
+ {
+ if (seen_elements[element_pos])
+ continue;
+
+ if (!settings.json.defaults_for_missing_elements_in_named_tuple)
+ throw Exception(
+ ErrorCodes::INCORRECT_DATA,
+ "JSON object doesn't contain tuple element {}. If you want to insert defaults in case of missing elements, "
+ "enable setting input_format_json_defaults_for_missing_elements_in_named_tuple",
+ elems[element_pos]->getElementName());
+
+ auto & element_column = extractElementColumn(column, element_pos);
+ element_column.insertDefault();
+ }
+ }
});
}
else
diff --git a/src/Databases/DDLDependencyVisitor.cpp b/src/Databases/DDLDependencyVisitor.cpp
index f0137e5bd60..ffe84f6fb77 100644
--- a/src/Databases/DDLDependencyVisitor.cpp
+++ b/src/Databases/DDLDependencyVisitor.cpp
@@ -2,6 +2,7 @@
#include
#include
#include
+#include
#include
#include
#include
@@ -175,7 +176,7 @@ namespace
/// Finds dependencies of a function.
void visitFunction(const ASTFunction & function)
{
- if (function.name == "joinGet" || function.name == "dictHas" || function.name == "dictIsIn" || function.name.starts_with("dictGet"))
+ if (functionIsJoinGet(function.name) || functionIsDictGet(function.name))
{
/// dictGet('dict_name', attr_names, id_expr)
/// dictHas('dict_name', id_expr)
diff --git a/src/Databases/DDLLoadingDependencyVisitor.cpp b/src/Databases/DDLLoadingDependencyVisitor.cpp
index 8536d1c890d..3a61f821629 100644
--- a/src/Databases/DDLLoadingDependencyVisitor.cpp
+++ b/src/Databases/DDLLoadingDependencyVisitor.cpp
@@ -1,6 +1,7 @@
#include
#include
#include
+#include
#include
#include
#include
@@ -52,23 +53,41 @@ bool DDLMatcherBase::needChildVisit(const ASTPtr & node, const ASTPtr & child)
return true;
}
-ssize_t DDLMatcherBase::getPositionOfTableNameArgument(const ASTFunction & function)
+ssize_t DDLMatcherBase::getPositionOfTableNameArgumentToEvaluate(const ASTFunction & function)
{
- if (function.name == "joinGet" ||
- function.name == "dictHas" ||
- function.name == "dictIsIn" ||
- function.name.starts_with("dictGet"))
+ if (functionIsJoinGet(function.name) || functionIsDictGet(function.name))
return 0;
- if (Poco::toLower(function.name) == "in")
+ return -1;
+}
+
+ssize_t DDLMatcherBase::getPositionOfTableNameArgumentToVisit(const ASTFunction & function)
+{
+ ssize_t maybe_res = getPositionOfTableNameArgumentToEvaluate(function);
+ if (0 <= maybe_res)
+ return maybe_res;
+
+ if (functionIsInOrGlobalInOperator(function.name))
+ {
+ if (function.children.empty())
+ return -1;
+
+ const auto * args = function.children[0]->as();
+ if (!args || args->children.size() != 2)
+ return -1;
+
+ if (args->children[1]->as())
+ return -1;
+
return 1;
+ }
return -1;
}
void DDLLoadingDependencyVisitor::visit(const ASTFunction & function, Data & data)
{
- ssize_t table_name_arg_idx = getPositionOfTableNameArgument(function);
+ ssize_t table_name_arg_idx = getPositionOfTableNameArgumentToVisit(function);
if (table_name_arg_idx < 0)
return;
extractTableNameFromArgument(function, data, table_name_arg_idx);
diff --git a/src/Databases/DDLLoadingDependencyVisitor.h b/src/Databases/DDLLoadingDependencyVisitor.h
index f987e885266..f173517f852 100644
--- a/src/Databases/DDLLoadingDependencyVisitor.h
+++ b/src/Databases/DDLLoadingDependencyVisitor.h
@@ -23,7 +23,8 @@ class DDLMatcherBase
{
public:
static bool needChildVisit(const ASTPtr & node, const ASTPtr & child);
- static ssize_t getPositionOfTableNameArgument(const ASTFunction & function);
+ static ssize_t getPositionOfTableNameArgumentToVisit(const ASTFunction & function);
+ static ssize_t getPositionOfTableNameArgumentToEvaluate(const ASTFunction & function);
};
/// Visits ASTCreateQuery and extracts the names of all tables which should be loaded before a specified table.
diff --git a/src/Databases/NormalizeAndEvaluateConstantsVisitor.cpp b/src/Databases/NormalizeAndEvaluateConstantsVisitor.cpp
index d9e494e7c9a..c4d1e8bda8c 100644
--- a/src/Databases/NormalizeAndEvaluateConstantsVisitor.cpp
+++ b/src/Databases/NormalizeAndEvaluateConstantsVisitor.cpp
@@ -23,7 +23,7 @@ void NormalizeAndEvaluateConstants::visit(const ASTFunction & function, Data & d
{
/// Replace expressions like "dictGet(currentDatabase() || '.dict', 'value', toUInt32(1))"
/// with "dictGet('db_name.dict', 'value', toUInt32(1))"
- ssize_t table_name_arg_idx = getPositionOfTableNameArgument(function);
+ ssize_t table_name_arg_idx = getPositionOfTableNameArgumentToEvaluate(function);
if (table_name_arg_idx < 0)
return;
diff --git a/src/Disks/IO/AsynchronousReadIndirectBufferFromRemoteFS.cpp b/src/Disks/IO/AsynchronousReadIndirectBufferFromRemoteFS.cpp
index ad1d690f4a9..711f1553ce6 100644
--- a/src/Disks/IO/AsynchronousReadIndirectBufferFromRemoteFS.cpp
+++ b/src/Disks/IO/AsynchronousReadIndirectBufferFromRemoteFS.cpp
@@ -171,8 +171,9 @@ bool AsynchronousReadIndirectBufferFromRemoteFS::nextImpl()
if (!hasPendingDataToRead())
return false;
- size_t size, offset;
+ chassert(file_offset_of_buffer_end <= impl->getFileSize());
+ size_t size, offset;
if (prefetch_future.valid())
{
ProfileEventTimeIncrement watch(ProfileEvents::AsynchronousRemoteReadWaitMicroseconds);
@@ -210,8 +211,8 @@ bool AsynchronousReadIndirectBufferFromRemoteFS::nextImpl()
/// In case of multiple files for the same file in clickhouse (i.e. log family)
/// file_offset_of_buffer_end will not match getImplementationBufferOffset()
/// so we use [impl->getImplementationBufferOffset(), impl->getFileSize()]
- assert(file_offset_of_buffer_end >= impl->getImplementationBufferOffset());
- assert(file_offset_of_buffer_end <= impl->getFileSize());
+ chassert(file_offset_of_buffer_end >= impl->getImplementationBufferOffset());
+ chassert(file_offset_of_buffer_end <= impl->getFileSize());
return bytes_read;
}
@@ -277,6 +278,15 @@ off_t AsynchronousReadIndirectBufferFromRemoteFS::seek(off_t offset, int whence)
/// First reset the buffer so the next read will fetch new data to the buffer.
resetWorkingBuffer();
+ if (read_until_position && new_pos > *read_until_position)
+ {
+ ProfileEvents::increment(ProfileEvents::RemoteFSSeeksWithReset);
+ impl->reset();
+
+ file_offset_of_buffer_end = new_pos = *read_until_position; /// read_until_position is a non-included boundary.
+ return new_pos;
+ }
+
/**
* Lazy ignore. Save number of bytes to ignore and ignore it either for prefetch buffer or current buffer.
* Note: we read in range [file_offset_of_buffer_end, read_until_position).
diff --git a/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp b/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp
index 01d4154199a..712a9a7c3b1 100644
--- a/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp
+++ b/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp
@@ -256,7 +256,7 @@ size_t ReadBufferFromRemoteFSGather::getFileSize() const
String ReadBufferFromRemoteFSGather::getInfoForLog()
{
if (!current_buf)
- throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot get info: buffer not initialized");
+ return "";
return current_buf->getInfoForLog();
}
diff --git a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp
index 3c620ca819e..a56a78d6722 100644
--- a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp
+++ b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp
@@ -125,14 +125,19 @@ std::string S3ObjectStorage::generateBlobNameForPath(const std::string & /* path
getRandomASCIIString(key_name_total_size - key_name_prefix_size));
}
-Aws::S3::Model::HeadObjectOutcome S3ObjectStorage::requestObjectHeadData(const std::string & bucket_from, const std::string & key) const
+size_t S3ObjectStorage::getObjectSize(const std::string & bucket_from, const std::string & key) const
{
- return S3::headObject(*client.get(), bucket_from, key, "", true);
+ return S3::getObjectSize(*client.get(), bucket_from, key, {}, /* for_disk_s3= */ true);
}
bool S3ObjectStorage::exists(const StoredObject & object) const
{
- return S3::objectExists(*client.get(), bucket, object.absolute_path, "", true);
+ return S3::objectExists(*client.get(), bucket, object.absolute_path, {}, /* for_disk_s3= */ true);
+}
+
+void S3ObjectStorage::checkObjectExists(const std::string & bucket_from, const std::string & key, std::string_view description) const
+{
+ return S3::checkObjectExists(*client.get(), bucket_from, key, {}, /* for_disk_s3= */ true, description);
}
std::unique_ptr S3ObjectStorage::readObjects( /// NOLINT
@@ -409,13 +414,10 @@ ObjectMetadata S3ObjectStorage::getObjectMetadata(const std::string & path) cons
{
ObjectMetadata result;
- auto object_head = requestObjectHeadData(bucket, path);
- throwIfError(object_head);
-
- auto & object_head_result = object_head.GetResult();
- result.size_bytes = object_head_result.GetContentLength();
- result.last_modified = object_head_result.GetLastModified().Millis();
- result.attributes = object_head_result.GetMetadata();
+ auto object_info = S3::getObjectInfo(*client.get(), bucket, path, {}, /* for_disk_s3= */ true);
+ result.size_bytes = object_info.size;
+ result.last_modified = object_info.last_modification_time;
+ result.attributes = S3::getObjectMetadata(*client.get(), bucket, path, {}, /* for_disk_s3= */ true);
return result;
}
@@ -442,7 +444,7 @@ void S3ObjectStorage::copyObjectImpl(
const String & src_key,
const String & dst_bucket,
const String & dst_key,
- std::optional head,
+ size_t size,
std::optional metadata) const
{
auto client_ptr = client.get();
@@ -464,7 +466,7 @@ void S3ObjectStorage::copyObjectImpl(
if (!outcome.IsSuccess() && (outcome.GetError().GetExceptionName() == "EntityTooLarge"
|| outcome.GetError().GetExceptionName() == "InvalidRequest"))
{ // Can't come here with MinIO, MinIO allows single part upload for large objects.
- copyObjectMultipartImpl(src_bucket, src_key, dst_bucket, dst_key, head, metadata);
+ copyObjectMultipartImpl(src_bucket, src_key, dst_bucket, dst_key, size, metadata);
return;
}
@@ -472,12 +474,7 @@ void S3ObjectStorage::copyObjectImpl(
auto settings_ptr = s3_settings.get();
if (settings_ptr->request_settings.check_objects_after_upload)
- {
- auto object_head = requestObjectHeadData(dst_bucket, dst_key);
- if (!object_head.IsSuccess())
- throw Exception(ErrorCodes::S3_ERROR, "Object {} from bucket {} disappeared immediately after upload, it's a bug in S3 or S3 API.", dst_key, dst_bucket);
- }
-
+ checkObjectExists(dst_bucket, dst_key, "Immediately after upload");
}
void S3ObjectStorage::copyObjectMultipartImpl(
@@ -485,15 +482,11 @@ void S3ObjectStorage::copyObjectMultipartImpl(
const String & src_key,
const String & dst_bucket,
const String & dst_key,
- std::optional head,
+ size_t size,
std::optional metadata) const
{
- if (!head)
- head = requestObjectHeadData(src_bucket, src_key).GetResult();
-
auto settings_ptr = s3_settings.get();
auto client_ptr = client.get();
- size_t size = head->GetContentLength();
String multipart_upload_id;
@@ -569,29 +562,24 @@ void S3ObjectStorage::copyObjectMultipartImpl(
}
if (settings_ptr->request_settings.check_objects_after_upload)
- {
- auto object_head = requestObjectHeadData(dst_bucket, dst_key);
- if (!object_head.IsSuccess())
- throw Exception(ErrorCodes::S3_ERROR, "Object {} from bucket {} disappeared immediately after upload, it's a bug in S3 or S3 API.", dst_key, dst_bucket);
- }
-
+ checkObjectExists(dst_bucket, dst_key, "Immediately after upload");
}
void S3ObjectStorage::copyObject( // NOLINT
const StoredObject & object_from, const StoredObject & object_to, std::optional object_to_attributes)
{
- auto head = requestObjectHeadData(bucket, object_from.absolute_path).GetResult();
+ auto size = getObjectSize(bucket, object_from.absolute_path);
static constexpr int64_t multipart_upload_threashold = 5UL * 1024 * 1024 * 1024;
- if (head.GetContentLength() >= multipart_upload_threashold)
+ if (size >= multipart_upload_threashold)
{
copyObjectMultipartImpl(
- bucket, object_from.absolute_path, bucket, object_to.absolute_path, head, object_to_attributes);
+ bucket, object_from.absolute_path, bucket, object_to.absolute_path, size, object_to_attributes);
}
else
{
copyObjectImpl(
- bucket, object_from.absolute_path, bucket, object_to.absolute_path, head, object_to_attributes);
+ bucket, object_from.absolute_path, bucket, object_to.absolute_path, size, object_to_attributes);
}
}
diff --git a/src/Disks/ObjectStorages/S3/S3ObjectStorage.h b/src/Disks/ObjectStorages/S3/S3ObjectStorage.h
index 0a07639e253..a6318bf59b8 100644
--- a/src/Disks/ObjectStorages/S3/S3ObjectStorage.h
+++ b/src/Disks/ObjectStorages/S3/S3ObjectStorage.h
@@ -172,7 +172,7 @@ private:
const String & src_key,
const String & dst_bucket,
const String & dst_key,
- std::optional head = std::nullopt,
+ size_t size,
std::optional metadata = std::nullopt) const;
void copyObjectMultipartImpl(
@@ -180,13 +180,14 @@ private:
const String & src_key,
const String & dst_bucket,
const String & dst_key,
- std::optional head = std::nullopt,
+ size_t size,
std::optional metadata = std::nullopt) const;
void removeObjectImpl(const StoredObject & object, bool if_exists);
void removeObjectsImpl(const StoredObjects & objects, bool if_exists);
- Aws::S3::Model::HeadObjectOutcome requestObjectHeadData(const std::string & bucket_from, const std::string & key) const;
+ size_t getObjectSize(const std::string & bucket_from, const std::string & key) const;
+ void checkObjectExists(const std::string & bucket_from, const std::string & key, std::string_view description) const;
std::string bucket;
diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp
index dc2f4ffcf55..ed2464f98e8 100644
--- a/src/Formats/FormatFactory.cpp
+++ b/src/Formats/FormatFactory.cpp
@@ -90,7 +90,9 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings)
format_settings.input_allow_errors_ratio = settings.input_format_allow_errors_ratio;
format_settings.json.array_of_rows = settings.output_format_json_array_of_rows;
format_settings.json.escape_forward_slashes = settings.output_format_json_escape_forward_slashes;
- format_settings.json.named_tuples_as_objects = settings.output_format_json_named_tuples_as_objects;
+ format_settings.json.write_named_tuples_as_objects = settings.output_format_json_named_tuples_as_objects;
+ format_settings.json.read_named_tuples_as_objects = settings.input_format_json_named_tuples_as_objects;
+ format_settings.json.defaults_for_missing_elements_in_named_tuple = settings.input_format_json_defaults_for_missing_elements_in_named_tuple;
format_settings.json.quote_64bit_integers = settings.output_format_json_quote_64bit_integers;
format_settings.json.quote_64bit_floats = settings.output_format_json_quote_64bit_floats;
format_settings.json.quote_denormals = settings.output_format_json_quote_denormals;
diff --git a/src/Formats/FormatSettings.h b/src/Formats/FormatSettings.h
index dcdd44edfeb..9d8680a009d 100644
--- a/src/Formats/FormatSettings.h
+++ b/src/Formats/FormatSettings.h
@@ -153,7 +153,9 @@ struct FormatSettings
bool quote_denormals = true;
bool quote_decimals = false;
bool escape_forward_slashes = true;
- bool named_tuples_as_objects = false;
+ bool read_named_tuples_as_objects = false;
+ bool write_named_tuples_as_objects = false;
+ bool defaults_for_missing_elements_in_named_tuple = false;
bool serialize_as_strings = false;
bool read_bools_as_numbers = true;
bool read_numbers_as_strings = true;
diff --git a/src/Functions/MatchImpl.h b/src/Functions/MatchImpl.h
index edb0df3ae34..3622db234b5 100644
--- a/src/Functions/MatchImpl.h
+++ b/src/Functions/MatchImpl.h
@@ -118,6 +118,16 @@ struct MatchImpl
if (haystack_offsets.empty())
return;
+ /// Shortcut for the silly but practical case that the pattern matches everything/nothing independently of the haystack:
+ /// - col [not] [i]like '%' / '%%'
+ /// - match(col, '.*')
+ if ((is_like && (needle == "%" or needle == "%%")) || (!is_like && (needle == ".*" || needle == ".*?")))
+ {
+ for (auto & x : res)
+ x = !negate;
+ return;
+ }
+
/// Special case that the [I]LIKE expression reduces to finding a substring in a string
String strstr_pattern;
if (is_like && impl::likePatternIsSubstring(needle, strstr_pattern))
@@ -267,6 +277,16 @@ struct MatchImpl
if (haystack.empty())
return;
+ /// Shortcut for the silly but practical case that the pattern matches everything/nothing independently of the haystack:
+ /// - col [not] [i]like '%' / '%%'
+ /// - match(col, '.*')
+ if ((is_like && (needle == "%" or needle == "%%")) || (!is_like && (needle == ".*" || needle == ".*?")))
+ {
+ for (auto & x : res)
+ x = !negate;
+ return;
+ }
+
/// Special case that the [I]LIKE expression reduces to finding a substring in a string
String strstr_pattern;
if (is_like && impl::likePatternIsSubstring(needle, strstr_pattern))
diff --git a/src/IO/ReadBufferFromS3.cpp b/src/IO/ReadBufferFromS3.cpp
index 2d274435a74..69d2a244097 100644
--- a/src/IO/ReadBufferFromS3.cpp
+++ b/src/IO/ReadBufferFromS3.cpp
@@ -250,7 +250,7 @@ size_t ReadBufferFromS3::getFileSize()
if (file_size)
return *file_size;
- auto object_size = S3::getObjectSize(*client_ptr, bucket, key, version_id, true, read_settings.for_object_storage);
+ auto object_size = S3::getObjectSize(*client_ptr, bucket, key, version_id, /* for_disk_s3= */ read_settings.for_object_storage);
file_size = object_size;
return *file_size;
diff --git a/src/IO/S3Common.cpp b/src/IO/S3Common.cpp
index 0c3a63b46ea..a18fcf70566 100644
--- a/src/IO/S3Common.cpp
+++ b/src/IO/S3Common.cpp
@@ -27,6 +27,8 @@
# include
# include
# include
+# include
+# include
# include
# include
@@ -40,7 +42,11 @@
namespace ProfileEvents
{
+ extern const Event S3GetObjectAttributes;
+ extern const Event S3GetObjectMetadata;
extern const Event S3HeadObject;
+ extern const Event DiskS3GetObjectAttributes;
+ extern const Event DiskS3GetObjectMetadata;
extern const Event DiskS3HeadObject;
}
@@ -699,6 +705,92 @@ public:
}
};
+/// Extracts the endpoint from a constructed S3 client.
+String getEndpoint(const Aws::S3::S3Client & client)
+{
+ const auto * endpoint_provider = dynamic_cast(const_cast(client).accessEndpointProvider().get());
+ if (!endpoint_provider)
+ return {};
+ String endpoint;
+ endpoint_provider->GetBuiltInParameters().GetParameter("Endpoint").GetString(endpoint);
+ return endpoint;
+}
+
+/// Performs a request to get the size and last modification time of an object.
+/// The function performs either HeadObject or GetObjectAttributes request depending on the endpoint.
+std::pair, Aws::S3::S3Error> tryGetObjectInfo(
+ const Aws::S3::S3Client & client, const String & bucket, const String & key, const String & version_id, bool for_disk_s3)
+{
+ auto endpoint = getEndpoint(client);
+ bool use_get_object_attributes_request = (endpoint.find(".amazonaws.com") != String::npos);
+
+ if (use_get_object_attributes_request)
+ {
+ /// It's better not to use `HeadObject` requests for AWS S3 because they don't work well with the global region.
+ /// Details: `HeadObject` request never returns a response body (even if there is an error) however
+ /// if the request was sent without specifying a region in the endpoint (i.e. for example "https://test.s3.amazonaws.com/mydata.csv"
+ /// instead of "https://test.s3-us-west-2.amazonaws.com/mydata.csv") then that response body is one of the main ways
+ /// to determine the correct region and try to repeat the request again with the correct region.
+ /// For any other request type (`GetObject`, `ListObjects`, etc.) AWS SDK does that because they have response bodies,
+ /// but for `HeadObject` there is no response body so this way doesn't work. That's why we use `GetObjectAttributes` request instead.
+ /// See https://github.com/aws/aws-sdk-cpp/issues/1558 and also the function S3ErrorMarshaller::ExtractRegion() for more information.
+
+ ProfileEvents::increment(ProfileEvents::S3GetObjectAttributes);
+ if (for_disk_s3)
+ ProfileEvents::increment(ProfileEvents::DiskS3GetObjectAttributes);
+
+ Aws::S3::Model::GetObjectAttributesRequest req;
+ req.SetBucket(bucket);
+ req.SetKey(key);
+
+ if (!version_id.empty())
+ req.SetVersionId(version_id);
+
+ req.SetObjectAttributes({Aws::S3::Model::ObjectAttributes::ObjectSize});
+
+ auto outcome = client.GetObjectAttributes(req);
+ if (outcome.IsSuccess())
+ {
+ const auto & result = outcome.GetResult();
+ DB::S3::ObjectInfo object_info;
+ object_info.size = static_cast(result.GetObjectSize());
+ object_info.last_modification_time = result.GetLastModified().Millis() / 1000;
+ return {object_info, {}};
+ }
+
+ return {std::nullopt, outcome.GetError()};
+ }
+ else
+ {
+ /// By default we use `HeadObject` requests.
+ /// We cannot just use `GetObjectAttributes` requests always because some S3 providers (e.g. Minio)
+ /// don't support `GetObjectAttributes` requests.
+
+ ProfileEvents::increment(ProfileEvents::S3HeadObject);
+ if (for_disk_s3)
+ ProfileEvents::increment(ProfileEvents::DiskS3HeadObject);
+
+ Aws::S3::Model::HeadObjectRequest req;
+ req.SetBucket(bucket);
+ req.SetKey(key);
+
+ if (!version_id.empty())
+ req.SetVersionId(version_id);
+
+ auto outcome = client.HeadObject(req);
+ if (outcome.IsSuccess())
+ {
+ const auto & result = outcome.GetResult();
+ DB::S3::ObjectInfo object_info;
+ object_info.size = static_cast(result.GetContentLength());
+ object_info.last_modification_time = result.GetLastModified().Millis() / 1000;
+ return {object_info, {}};
+ }
+
+ return {std::nullopt, outcome.GetError()};
+ }
+}
+
}
@@ -894,54 +986,33 @@ namespace S3
return error == Aws::S3::S3Errors::RESOURCE_NOT_FOUND || error == Aws::S3::S3Errors::NO_SUCH_KEY;
}
- Aws::S3::Model::HeadObjectOutcome headObject(const Aws::S3::S3Client & client, const String & bucket, const String & key, const String & version_id, bool for_disk_s3)
+ ObjectInfo getObjectInfo(const Aws::S3::S3Client & client, const String & bucket, const String & key, const String & version_id, bool for_disk_s3, bool throw_on_error)
{
- ProfileEvents::increment(ProfileEvents::S3HeadObject);
- if (for_disk_s3)
- ProfileEvents::increment(ProfileEvents::DiskS3HeadObject);
-
- Aws::S3::Model::HeadObjectRequest req;
- req.SetBucket(bucket);
- req.SetKey(key);
-
- if (!version_id.empty())
- req.SetVersionId(version_id);
-
- return client.HeadObject(req);
- }
-
- S3::ObjectInfo getObjectInfo(const Aws::S3::S3Client & client, const String & bucket, const String & key, const String & version_id, bool throw_on_error, bool for_disk_s3)
- {
- auto outcome = headObject(client, bucket, key, version_id, for_disk_s3);
-
- if (outcome.IsSuccess())
+ auto [object_info, error] = tryGetObjectInfo(client, bucket, key, version_id, for_disk_s3);
+ if (object_info)
{
- auto read_result = outcome.GetResultWithOwnership();
- return {.size = static_cast(read_result.GetContentLength()), .last_modification_time = read_result.GetLastModified().Millis() / 1000};
+ return *object_info;
}
else if (throw_on_error)
{
- const auto & error = outcome.GetError();
throw DB::Exception(ErrorCodes::S3_ERROR,
- "Failed to HEAD object: {}. HTTP response code: {}",
+ "Failed to get object attributes: {}. HTTP response code: {}",
error.GetMessage(), static_cast(error.GetResponseCode()));
}
return {};
}
- size_t getObjectSize(const Aws::S3::S3Client & client, const String & bucket, const String & key, const String & version_id, bool throw_on_error, bool for_disk_s3)
+ size_t getObjectSize(const Aws::S3::S3Client & client, const String & bucket, const String & key, const String & version_id, bool for_disk_s3, bool throw_on_error)
{
- return getObjectInfo(client, bucket, key, version_id, throw_on_error, for_disk_s3).size;
+ return getObjectInfo(client, bucket, key, version_id, for_disk_s3, throw_on_error).size;
}
bool objectExists(const Aws::S3::S3Client & client, const String & bucket, const String & key, const String & version_id, bool for_disk_s3)
{
- auto outcome = headObject(client, bucket, key, version_id, for_disk_s3);
-
- if (outcome.IsSuccess())
+ auto [object_info, error] = tryGetObjectInfo(client, bucket, key, version_id, for_disk_s3);
+ if (object_info)
return true;
- const auto & error = outcome.GetError();
if (isNotFoundError(error.GetErrorType()))
return false;
@@ -949,6 +1020,48 @@ namespace S3
"Failed to check existence of key {} in bucket {}: {}",
key, bucket, error.GetMessage());
}
+
+ void checkObjectExists(const Aws::S3::S3Client & client, const String & bucket, const String & key, const String & version_id, bool for_disk_s3, std::string_view description)
+ {
+ auto [object_info, error] = tryGetObjectInfo(client, bucket, key, version_id, for_disk_s3);
+ if (object_info)
+ return;
+ throw S3Exception(error.GetErrorType(), "{}Object {} in bucket {} suddenly disappeared: {}",
+ (description.empty() ? "" : (String(description) + ": ")), key, bucket, error.GetMessage());
+ }
+
+ std::map getObjectMetadata(const Aws::S3::S3Client & client, const String & bucket, const String & key, const String & version_id, bool for_disk_s3, bool throw_on_error)
+ {
+ ProfileEvents::increment(ProfileEvents::S3GetObjectMetadata);
+ if (for_disk_s3)
+ ProfileEvents::increment(ProfileEvents::DiskS3GetObjectMetadata);
+
+ /// We must not use the `HeadObject` request, see the comment about `HeadObjectRequest` in S3Common.h.
+
+ Aws::S3::Model::GetObjectRequest req;
+ req.SetBucket(bucket);
+ req.SetKey(key);
+
+ /// Only the first byte will be read.
+ /// We don't need that first byte but the range should be set otherwise the entire object will be read.
+ req.SetRange("bytes=0-0");
+
+ if (!version_id.empty())
+ req.SetVersionId(version_id);
+
+ auto outcome = client.GetObject(req);
+
+ if (outcome.IsSuccess())
+ return outcome.GetResult().GetMetadata();
+
+ if (!throw_on_error)
+ return {};
+
+ const auto & error = outcome.GetError();
+ throw S3Exception(error.GetErrorType(),
+ "Failed to get metadata of key {} in bucket {}: {}",
+ key, bucket, error.GetMessage());
+ }
}
}
diff --git a/src/IO/S3Common.h b/src/IO/S3Common.h
index f0844c05abc..69ae1cbb4f4 100644
--- a/src/IO/S3Common.h
+++ b/src/IO/S3Common.h
@@ -11,15 +11,15 @@
#if USE_AWS_S3
#include
-#include
-#include
-#include
-#include
-#include
-
#include
#include
+#include
+#include
+#include
+
+
+namespace Aws::S3 { class S3Client; }
namespace DB
{
@@ -121,22 +121,29 @@ struct URI
static void validateBucket(const String & bucket, const Poco::URI & uri);
};
+/// WARNING: Don't use `HeadObjectRequest`! Use the functions below instead.
+/// For explanation see the comment about `HeadObject` request in the function tryGetObjectInfo().
+
struct ObjectInfo
{
size_t size = 0;
time_t last_modification_time = 0;
};
-bool isNotFoundError(Aws::S3::S3Errors error);
+ObjectInfo getObjectInfo(const Aws::S3::S3Client & client, const String & bucket, const String & key, const String & version_id = "", bool for_disk_s3 = false, bool throw_on_error = true);
-Aws::S3::Model::HeadObjectOutcome headObject(const Aws::S3::S3Client & client, const String & bucket, const String & key, const String & version_id = "", bool for_disk_s3 = false);
-
-S3::ObjectInfo getObjectInfo(const Aws::S3::S3Client & client, const String & bucket, const String & key, const String & version_id, bool throw_on_error, bool for_disk_s3);
-
-size_t getObjectSize(const Aws::S3::S3Client & client, const String & bucket, const String & key, const String & version_id, bool throw_on_error, bool for_disk_s3);
+size_t getObjectSize(const Aws::S3::S3Client & client, const String & bucket, const String & key, const String & version_id = "", bool for_disk_s3 = false, bool throw_on_error = true);
bool objectExists(const Aws::S3::S3Client & client, const String & bucket, const String & key, const String & version_id = "", bool for_disk_s3 = false);
+/// Throws an exception if a specified object doesn't exist. `description` is used as a part of the error message.
+void checkObjectExists(const Aws::S3::S3Client & client, const String & bucket, const String & key, const String & version_id = "", bool for_disk_s3 = false, std::string_view description = {});
+
+bool isNotFoundError(Aws::S3::S3Errors error);
+
+/// Returns the object's metadata.
+std::map getObjectMetadata(const Aws::S3::S3Client & client, const String & bucket, const String & key, const String & version_id = "", bool for_disk_s3 = false, bool throw_on_error = true);
+
}
#endif
diff --git a/src/IO/WriteBufferFromS3.cpp b/src/IO/WriteBufferFromS3.cpp
index ec77fc44de6..7d279c07e03 100644
--- a/src/IO/WriteBufferFromS3.cpp
+++ b/src/IO/WriteBufferFromS3.cpp
@@ -182,12 +182,8 @@ void WriteBufferFromS3::finalizeImpl()
if (check_objects_after_upload)
{
LOG_TRACE(log, "Checking object {} exists after upload", key);
-
- auto response = S3::headObject(*client_ptr, bucket, key, "", write_settings.for_object_storage);
- if (!response.IsSuccess())
- throw S3Exception(fmt::format("Object {} from bucket {} disappeared immediately after upload, it's a bug in S3 or S3 API.", key, bucket), response.GetError().GetErrorType());
- else
- LOG_TRACE(log, "Object {} exists after upload", key);
+ S3::checkObjectExists(*client_ptr, bucket, key, {}, /* for_disk_s3= */ write_settings.for_object_storage, "Immediately after upload");
+ LOG_TRACE(log, "Object {} exists after upload", key);
}
}
diff --git a/src/Interpreters/DatabaseAndTableWithAlias.cpp b/src/Interpreters/DatabaseAndTableWithAlias.cpp
index 70825ea8292..7fb581c1b4d 100644
--- a/src/Interpreters/DatabaseAndTableWithAlias.cpp
+++ b/src/Interpreters/DatabaseAndTableWithAlias.cpp
@@ -28,13 +28,29 @@ DatabaseAndTableWithAlias::DatabaseAndTableWithAlias(const ASTTableIdentifier &
database = current_database;
}
+DatabaseAndTableWithAlias::DatabaseAndTableWithAlias(const ASTIdentifier & identifier, const String & current_database)
+{
+ alias = identifier.tryGetAlias();
+
+ if (identifier.name_parts.size() == 2)
+ std::tie(database, table) = std::tie(identifier.name_parts[0], identifier.name_parts[1]);
+ else if (identifier.name_parts.size() == 1)
+ table = identifier.name_parts[0];
+ else
+ throw Exception("Logical error: invalid identifier", ErrorCodes::LOGICAL_ERROR);
+
+ if (database.empty())
+ database = current_database;
+}
+
DatabaseAndTableWithAlias::DatabaseAndTableWithAlias(const ASTPtr & node, const String & current_database)
{
- const auto * identifier = node->as();
- if (!identifier)
- throw Exception("Logical error: table identifier expected", ErrorCodes::LOGICAL_ERROR);
-
- *this = DatabaseAndTableWithAlias(*identifier, current_database);
+ if (const auto * table_identifier = node->as())
+ *this = DatabaseAndTableWithAlias(*table_identifier, current_database);
+ else if (const auto * identifier = node->as())
+ *this = DatabaseAndTableWithAlias(*identifier, current_database);
+ else
+ throw Exception("Logical error: identifier or table identifier expected", ErrorCodes::LOGICAL_ERROR);
}
DatabaseAndTableWithAlias::DatabaseAndTableWithAlias(const ASTTableExpression & table_expression, const String & current_database)
diff --git a/src/Interpreters/DatabaseAndTableWithAlias.h b/src/Interpreters/DatabaseAndTableWithAlias.h
index 237c56d3ce3..58327ff1d81 100644
--- a/src/Interpreters/DatabaseAndTableWithAlias.h
+++ b/src/Interpreters/DatabaseAndTableWithAlias.h
@@ -14,6 +14,7 @@ namespace DB
{
class ASTSelectQuery;
+class ASTIdentifier;
class ASTTableIdentifier;
struct ASTTableExpression;
@@ -28,6 +29,7 @@ struct DatabaseAndTableWithAlias
DatabaseAndTableWithAlias() = default;
explicit DatabaseAndTableWithAlias(const ASTPtr & identifier_node, const String & current_database = "");
+ explicit DatabaseAndTableWithAlias(const ASTIdentifier & identifier, const String & current_database = "");
explicit DatabaseAndTableWithAlias(const ASTTableIdentifier & identifier, const String & current_database = "");
explicit DatabaseAndTableWithAlias(const ASTTableExpression & table_expression, const String & current_database = "");
diff --git a/src/Interpreters/GatherFunctionQuantileVisitor.cpp b/src/Interpreters/GatherFunctionQuantileVisitor.cpp
index 2abd7af1455..805fcfec181 100644
--- a/src/Interpreters/GatherFunctionQuantileVisitor.cpp
+++ b/src/Interpreters/GatherFunctionQuantileVisitor.cpp
@@ -25,6 +25,7 @@ static const std::unordered_map quantile_fuse_name_mapping = {
{NameQuantileExactInclusive::name, NameQuantilesExactInclusive::name},
{NameQuantileExactLow::name, NameQuantilesExactLow::name},
{NameQuantileExactWeighted::name, NameQuantilesExactWeighted::name},
+ {NameQuantileInterpolatedWeighted::name, NameQuantilesInterpolatedWeighted::name},
{NameQuantileTDigest::name, NameQuantilesTDigest::name},
{NameQuantileTDigestWeighted::name, NameQuantilesTDigestWeighted::name},
{NameQuantileTiming::name, NameQuantilesTiming::name},
@@ -61,9 +62,11 @@ void GatherFunctionQuantileData::FuseQuantileAggregatesData::addFuncNode(ASTPtr
const auto & arguments = func->arguments->children;
+
bool need_two_args = func->name == NameQuantileDeterministic::name || func->name == NameQuantileExactWeighted::name
- || func->name == NameQuantileTimingWeighted::name || func->name == NameQuantileTDigestWeighted::name
- || func->name == NameQuantileBFloat16Weighted::name;
+ || func->name == NameQuantileInterpolatedWeighted::name || func->name == NameQuantileTimingWeighted::name
+ || func->name == NameQuantileTDigestWeighted::name || func->name == NameQuantileBFloat16Weighted::name;
+
if (arguments.size() != (need_two_args ? 2 : 1))
return;
diff --git a/src/Interpreters/InterpreterExplainQuery.cpp b/src/Interpreters/InterpreterExplainQuery.cpp
index 2e4fd50cd01..17a6b695088 100644
--- a/src/Interpreters/InterpreterExplainQuery.cpp
+++ b/src/Interpreters/InterpreterExplainQuery.cpp
@@ -288,6 +288,20 @@ struct ExplainSettings : public Settings
}
};
+struct QuerySyntaxSettings
+{
+ bool oneline = false;
+
+ constexpr static char name[] = "SYNTAX";
+
+ std::unordered_map> boolean_settings =
+ {
+ {"oneline", oneline},
+ };
+
+ std::unordered_map> integer_settings;
+};
+
template
ExplainSettings checkAndGetSettings(const ASTPtr & ast_settings)
{
@@ -362,13 +376,12 @@ QueryPipeline InterpreterExplainQuery::executeImpl()
}
case ASTExplainQuery::AnalyzedSyntax:
{
- if (ast.getSettings())
- throw Exception("Settings are not supported for EXPLAIN SYNTAX query.", ErrorCodes::UNKNOWN_SETTING);
+ auto settings = checkAndGetSettings(ast.getSettings());
ExplainAnalyzedSyntaxVisitor::Data data(getContext());
ExplainAnalyzedSyntaxVisitor(data).visit(query);
- ast.getExplainedQuery()->format(IAST::FormatSettings(buf, false));
+ ast.getExplainedQuery()->format(IAST::FormatSettings(buf, settings.oneline));
break;
}
case ASTExplainQuery::QueryTree:
diff --git a/src/Interpreters/InterpreterSystemQuery.cpp b/src/Interpreters/InterpreterSystemQuery.cpp
index 1f1ef68492c..a82a11e7c97 100644
--- a/src/Interpreters/InterpreterSystemQuery.cpp
+++ b/src/Interpreters/InterpreterSystemQuery.cpp
@@ -487,7 +487,7 @@ BlockIO InterpreterSystemQuery::execute()
dropDatabaseReplica(query);
break;
case Type::SYNC_REPLICA:
- syncReplica(query);
+ syncReplica();
break;
case Type::SYNC_DATABASE_REPLICA:
syncReplicatedDatabase(query);
@@ -507,6 +507,9 @@ BlockIO InterpreterSystemQuery::execute()
case Type::RESTORE_REPLICA:
restoreReplica();
break;
+ case Type::WAIT_LOADING_PARTS:
+ waitLoadingParts();
+ break;
case Type::RESTART_DISK:
restartDisk(query.disk);
case Type::FLUSH_LOGS:
@@ -852,7 +855,7 @@ void InterpreterSystemQuery::dropDatabaseReplica(ASTSystemQuery & query)
throw Exception("Invalid query", ErrorCodes::LOGICAL_ERROR);
}
-void InterpreterSystemQuery::syncReplica(ASTSystemQuery &)
+void InterpreterSystemQuery::syncReplica()
{
getContext()->checkAccess(AccessType::SYSTEM_SYNC_REPLICA, table_id);
StoragePtr table = DatabaseCatalog::instance().getTable(table_id, getContext());
@@ -872,6 +875,23 @@ void InterpreterSystemQuery::syncReplica(ASTSystemQuery &)
throw Exception(ErrorCodes::BAD_ARGUMENTS, table_is_not_replicated.data(), table_id.getNameForLogs());
}
+void InterpreterSystemQuery::waitLoadingParts()
+{
+ getContext()->checkAccess(AccessType::SYSTEM_WAIT_LOADING_PARTS, table_id);
+ StoragePtr table = DatabaseCatalog::instance().getTable(table_id, getContext());
+
+ if (auto * merge_tree = dynamic_cast(table.get()))
+ {
+ LOG_TRACE(log, "Waiting for loading of parts of table {}", table_id.getFullTableName());
+ merge_tree->waitForOutdatedPartsToBeLoaded();
+ LOG_TRACE(log, "Finished waiting for loading of parts of table {}", table_id.getFullTableName());
+ }
+ else
+ {
+ throw Exception(ErrorCodes::BAD_ARGUMENTS,
+ "Command WAIT LOADING PARTS is supported only for MergeTree table, but got: {}", table->getName());
+ }
+}
void InterpreterSystemQuery::syncReplicatedDatabase(ASTSystemQuery & query)
{
@@ -1071,6 +1091,11 @@ AccessRightsElements InterpreterSystemQuery::getRequiredAccessForDDLOnCluster()
required_access.emplace_back(AccessType::SYSTEM_RESTART_REPLICA);
break;
}
+ case Type::WAIT_LOADING_PARTS:
+ {
+ required_access.emplace_back(AccessType::SYSTEM_WAIT_LOADING_PARTS, query.getDatabase(), query.getTable());
+ break;
+ }
case Type::SYNC_DATABASE_REPLICA:
{
required_access.emplace_back(AccessType::SYSTEM_SYNC_DATABASE_REPLICA, query.getDatabase());
diff --git a/src/Interpreters/InterpreterSystemQuery.h b/src/Interpreters/InterpreterSystemQuery.h
index 0058d0c9def..5673890daf3 100644
--- a/src/Interpreters/InterpreterSystemQuery.h
+++ b/src/Interpreters/InterpreterSystemQuery.h
@@ -56,7 +56,8 @@ private:
void restartReplica(const StorageID & replica, ContextMutablePtr system_context);
void restartReplicas(ContextMutablePtr system_context);
- void syncReplica(ASTSystemQuery & query);
+ void syncReplica();
+ void waitLoadingParts();
void syncReplicatedDatabase(ASTSystemQuery & query);
diff --git a/src/Interpreters/JoinToSubqueryTransformVisitor.cpp b/src/Interpreters/JoinToSubqueryTransformVisitor.cpp
index a57b8d2354b..5500c274c23 100644
--- a/src/Interpreters/JoinToSubqueryTransformVisitor.cpp
+++ b/src/Interpreters/JoinToSubqueryTransformVisitor.cpp
@@ -49,7 +49,8 @@ ASTPtr makeSubqueryTemplate()
ASTPtr makeSubqueryQualifiedAsterisk()
{
auto asterisk = std::make_shared();
- asterisk->children.emplace_back(std::make_shared("--.s"));
+ asterisk->qualifier = std::make_shared("--.s");
+ asterisk->children.push_back(asterisk->qualifier);
return asterisk;
}
@@ -153,24 +154,34 @@ private:
for (auto & table_name : data.tables_order)
data.addTableColumns(table_name, columns);
- for (const auto & transformer : asterisk->children)
- IASTColumnsTransformer::transform(transformer, columns);
+ if (asterisk->transformers)
+ {
+ for (const auto & transformer : asterisk->transformers->children)
+ IASTColumnsTransformer::transform(transformer, columns);
+ }
}
else if (const auto * qualified_asterisk = child->as())
{
has_asterisks = true;
- auto & identifier = child->children[0]->as();
+ if (!qualified_asterisk->qualifier)
+ throw Exception("Logical error: qualified asterisk must have a qualifier", ErrorCodes::LOGICAL_ERROR);
+
+ auto & identifier = qualified_asterisk->qualifier->as();
data.addTableColumns(identifier.name(), columns);
- // QualifiedAsterisk's transformers start to appear at child 1
- for (const auto * it = qualified_asterisk->children.begin() + 1; it != qualified_asterisk->children.end(); ++it)
+ if (qualified_asterisk->transformers)
{
- if (it->get()->as() || it->get()->as() || it->get()->as())
- IASTColumnsTransformer::transform(*it, columns);
- else
- throw Exception("Logical error: qualified asterisk must only have children of IASTColumnsTransformer type", ErrorCodes::LOGICAL_ERROR);
+ for (const auto & transformer : qualified_asterisk->transformers->children)
+ {
+ if (transformer->as() ||
+ transformer->as() ||
+ transformer->as())
+ IASTColumnsTransformer::transform(transformer, columns);
+ else
+ throw Exception("Logical error: qualified asterisk must only have children of IASTColumnsTransformer type", ErrorCodes::LOGICAL_ERROR);
+ }
}
}
else if (const auto * columns_list_matcher = child->as())
@@ -180,8 +191,11 @@ private:
for (const auto & ident : columns_list_matcher->column_list->children)
columns.emplace_back(ident->clone());
- for (const auto & transformer : columns_list_matcher->children)
- IASTColumnsTransformer::transform(transformer, columns);
+ if (columns_list_matcher->transformers)
+ {
+ for (const auto & transformer : columns_list_matcher->transformers->children)
+ IASTColumnsTransformer::transform(transformer, columns);
+ }
}
else if (const auto * columns_regexp_matcher = child->as())
{
@@ -193,8 +207,11 @@ private:
columns,
[&](const String & column_name) { return columns_regexp_matcher->isColumnMatching(column_name); });
- for (const auto & transformer : columns_regexp_matcher->children)
- IASTColumnsTransformer::transform(transformer, columns);
+ if (columns_regexp_matcher->transformers)
+ {
+ for (const auto & transformer : columns_regexp_matcher->transformers->children)
+ IASTColumnsTransformer::transform(transformer, columns);
+ }
}
else
data.new_select_expression_list->children.push_back(child);
@@ -425,6 +442,7 @@ private:
{
if (data.expression_list->children.empty())
data.expression_list->children.emplace_back(std::make_shared());
+
select.setExpression(ASTSelectQuery::Expression::SELECT, std::move(data.expression_list));
}
data.done = true;
diff --git a/src/Interpreters/JoinedTables.cpp b/src/Interpreters/JoinedTables.cpp
index b88bb5d1caf..1d8676cfc57 100644
--- a/src/Interpreters/JoinedTables.cpp
+++ b/src/Interpreters/JoinedTables.cpp
@@ -154,7 +154,7 @@ private:
static void visit(const ASTQualifiedAsterisk & node, const ASTPtr &, Data & data)
{
- auto & identifier = node.children[0]->as();
+ auto & identifier = node.qualifier->as();
bool rewritten = false;
for (const auto & table : data)
{
diff --git a/src/Interpreters/MergeTreeTransaction.cpp b/src/Interpreters/MergeTreeTransaction.cpp
index f438194b87b..f16ece46530 100644
--- a/src/Interpreters/MergeTreeTransaction.cpp
+++ b/src/Interpreters/MergeTreeTransaction.cpp
@@ -303,7 +303,6 @@ bool MergeTreeTransaction::rollback() noexcept
part->version.unlockRemovalTID(tid, TransactionInfoContext{part->storage.getStorageID(), part->name});
}
-
assert([&]()
{
std::lock_guard lock{mutex};
diff --git a/src/Interpreters/TextLog.cpp b/src/Interpreters/TextLog.cpp
index 6d490e3e95f..45d5a7b2344 100644
--- a/src/Interpreters/TextLog.cpp
+++ b/src/Interpreters/TextLog.cpp
@@ -49,7 +49,9 @@ NamesAndTypesList TextLogElement::getNamesAndTypes()
{"revision", std::make_shared()},
{"source_file", std::make_shared(std::make_shared())},
- {"source_line", std::make_shared()}
+ {"source_line", std::make_shared()},
+
+ {"message_format_string", std::make_shared(std::make_shared())},
};
}
@@ -74,6 +76,8 @@ void TextLogElement::appendToBlock(MutableColumns & columns) const
columns[i++]->insert(source_file);
columns[i++]->insert(source_line);
+
+ columns[i++]->insert(message_format_string);
}
TextLog::TextLog(ContextPtr context_, const String & database_name_,
diff --git a/src/Interpreters/TextLog.h b/src/Interpreters/TextLog.h
index 243e001fc52..6efc1c906d4 100644
--- a/src/Interpreters/TextLog.h
+++ b/src/Interpreters/TextLog.h
@@ -28,6 +28,8 @@ struct TextLogElement
String source_file;
UInt64 source_line{};
+ std::string_view message_format_string;
+
static std::string name() { return "TextLog"; }
static NamesAndTypesList getNamesAndTypes();
static NamesAndAliases getNamesAndAliases() { return {}; }
diff --git a/src/Interpreters/TranslateQualifiedNamesVisitor.cpp b/src/Interpreters/TranslateQualifiedNamesVisitor.cpp
index 9c3a681fd32..36691885459 100644
--- a/src/Interpreters/TranslateQualifiedNamesVisitor.cpp
+++ b/src/Interpreters/TranslateQualifiedNamesVisitor.cpp
@@ -156,21 +156,19 @@ void TranslateQualifiedNamesMatcher::visit(ASTFunction & node, const ASTPtr &, D
func_arguments->children.clear();
}
-void TranslateQualifiedNamesMatcher::visit(const ASTQualifiedAsterisk &, const ASTPtr & ast, Data & data)
+void TranslateQualifiedNamesMatcher::visit(const ASTQualifiedAsterisk & node, const ASTPtr &, Data & data)
{
- if (ast->children.empty())
- throw Exception("Logical error: qualified asterisk must have children", ErrorCodes::LOGICAL_ERROR);
-
- auto & ident = ast->children[0];
+ if (!node.qualifier)
+ throw Exception("Logical error: qualified asterisk must have a qualifier", ErrorCodes::LOGICAL_ERROR);
/// @note it could contain table alias as table name.
- DatabaseAndTableWithAlias db_and_table(ident);
+ DatabaseAndTableWithAlias db_and_table(node.qualifier);
for (const auto & known_table : data.tables)
if (db_and_table.satisfies(known_table.table, true))
return;
- throw Exception("Unknown qualified identifier: " + ident->getAliasOrColumnName(), ErrorCodes::UNKNOWN_IDENTIFIER);
+ throw Exception("Unknown qualified identifier: " + node.qualifier->getAliasOrColumnName(), ErrorCodes::UNKNOWN_IDENTIFIER);
}
void TranslateQualifiedNamesMatcher::visit(ASTTableJoin & join, const ASTPtr & , Data & data)
@@ -266,16 +264,22 @@ void TranslateQualifiedNamesMatcher::visit(ASTExpressionList & node, const ASTPt
first_table = false;
}
- for (const auto & transformer : asterisk->children)
- IASTColumnsTransformer::transform(transformer, columns);
+ if (asterisk->transformers)
+ {
+ for (const auto & transformer : asterisk->transformers->children)
+ IASTColumnsTransformer::transform(transformer, columns);
+ }
}
else if (auto * asterisk_column_list = child->as())
{
for (const auto & ident : asterisk_column_list->column_list->children)
columns.emplace_back(ident->clone());
- for (const auto & transformer : asterisk_column_list->children)
- IASTColumnsTransformer::transform(transformer, columns);
+ if (asterisk_column_list->transformers)
+ {
+ for (const auto & transformer : asterisk_column_list->transformers->children)
+ IASTColumnsTransformer::transform(transformer, columns);
+ }
}
else if (const auto * asterisk_regexp_pattern = child->as())
{
@@ -292,12 +296,15 @@ void TranslateQualifiedNamesMatcher::visit(ASTExpressionList & node, const ASTPt
first_table = false;
}
- for (const auto & transformer : asterisk_regexp_pattern->children)
- IASTColumnsTransformer::transform(transformer, columns);
+ if (asterisk_regexp_pattern->transformers)
+ {
+ for (const auto & transformer : asterisk_regexp_pattern->transformers->children)
+ IASTColumnsTransformer::transform(transformer, columns);
+ }
}
else if (const auto * qualified_asterisk = child->as())
{
- DatabaseAndTableWithAlias ident_db_and_name(qualified_asterisk->children[0]);
+ DatabaseAndTableWithAlias ident_db_and_name(qualified_asterisk->qualifier);
for (const auto & table : tables_with_columns)
{
@@ -309,10 +316,10 @@ void TranslateQualifiedNamesMatcher::visit(ASTExpressionList & node, const ASTPt
}
}
- // QualifiedAsterisk's transformers start to appear at child 1
- for (const auto * it = qualified_asterisk->children.begin() + 1; it != qualified_asterisk->children.end(); ++it)
+ if (qualified_asterisk->transformers)
{
- IASTColumnsTransformer::transform(*it, columns);
+ for (const auto & transformer : qualified_asterisk->transformers->children)
+ IASTColumnsTransformer::transform(transformer, columns);
}
}
else
diff --git a/src/Loggers/OwnSplitChannel.cpp b/src/Loggers/OwnSplitChannel.cpp
index 35a6d4ad86a..7974d5212e1 100644
--- a/src/Loggers/OwnSplitChannel.cpp
+++ b/src/Loggers/OwnSplitChannel.cpp
@@ -133,6 +133,8 @@ void OwnSplitChannel::logSplit(const Poco::Message & msg)
elem.source_file = msg.getSourceFile();
elem.source_line = msg.getSourceLine();
+ elem.message_format_string = msg.getFormatString();
+
std::shared_ptr text_log_locked{};
{
std::lock_guard lock(text_log_mutex);
diff --git a/src/Parsers/ASTAsterisk.cpp b/src/Parsers/ASTAsterisk.cpp
index e2f45d04fa4..1ffbb85da7c 100644
--- a/src/Parsers/ASTAsterisk.cpp
+++ b/src/Parsers/ASTAsterisk.cpp
@@ -8,21 +8,37 @@ namespace DB
ASTPtr ASTAsterisk::clone() const
{
auto clone = std::make_shared(*this);
- clone->cloneChildren();
+
+ if (expression) { clone->expression = expression->clone(); clone->children.push_back(clone->expression); }
+ if (transformers) { clone->transformers = transformers->clone(); clone->children.push_back(clone->transformers); }
+
return clone;
}
-void ASTAsterisk::appendColumnName(WriteBuffer & ostr) const { ostr.write('*'); }
+void ASTAsterisk::appendColumnName(WriteBuffer & ostr) const
+{
+ if (expression)
+ {
+ expression->appendColumnName(ostr);
+ writeCString(".", ostr);
+ }
+
+ ostr.write('*');
+}
void ASTAsterisk::formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const
{
+ if (expression)
+ {
+ expression->formatImpl(settings, state, frame);
+ settings.ostr << ".";
+ }
+
settings.ostr << "*";
- /// Format column transformers
- for (const auto & child : children)
+ if (transformers)
{
- settings.ostr << ' ';
- child->formatImpl(settings, state, frame);
+ transformers->formatImpl(settings, state, frame);
}
}
diff --git a/src/Parsers/ASTAsterisk.h b/src/Parsers/ASTAsterisk.h
index 027758ba48c..840b7996536 100644
--- a/src/Parsers/ASTAsterisk.h
+++ b/src/Parsers/ASTAsterisk.h
@@ -16,6 +16,8 @@ public:
ASTPtr clone() const override;
void appendColumnName(WriteBuffer & ostr) const override;
+ ASTPtr expression;
+ ASTPtr transformers;
protected:
void formatImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const override;
};
diff --git a/src/Parsers/ASTColumnsMatcher.cpp b/src/Parsers/ASTColumnsMatcher.cpp
index 124206043cf..d301394cc54 100644
--- a/src/Parsers/ASTColumnsMatcher.cpp
+++ b/src/Parsers/ASTColumnsMatcher.cpp
@@ -18,12 +18,20 @@ namespace ErrorCodes
ASTPtr ASTColumnsRegexpMatcher::clone() const
{
auto clone = std::make_shared(*this);
- clone->cloneChildren();
+
+ if (expression) { clone->expression = expression->clone(); clone->children.push_back(clone->expression); }
+ if (transformers) { clone->transformers = transformers->clone(); clone->children.push_back(clone->transformers); }
+
return clone;
}
void ASTColumnsRegexpMatcher::appendColumnName(WriteBuffer & ostr) const
{
+ if (expression)
+ {
+ expression->appendColumnName(ostr);
+ writeCString(".", ostr);
+ }
writeCString("COLUMNS(", ostr);
writeQuotedString(original_pattern, ostr);
writeChar(')', ostr);
@@ -38,15 +46,21 @@ void ASTColumnsRegexpMatcher::updateTreeHashImpl(SipHash & hash_state) const
void ASTColumnsRegexpMatcher::formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const
{
- settings.ostr << (settings.hilite ? hilite_keyword : "") << "COLUMNS" << (settings.hilite ? hilite_none : "") << "(";
+ settings.ostr << (settings.hilite ? hilite_keyword : "");
+
+ if (expression)
+ {
+ expression->formatImpl(settings, state, frame);
+ settings.ostr << ".";
+ }
+
+ settings.ostr << "COLUMNS" << (settings.hilite ? hilite_none : "") << "(";
settings.ostr << quoteString(original_pattern);
settings.ostr << ")";
- /// Format column transformers
- for (const auto & child : children)
+ if (transformers)
{
- settings.ostr << ' ';
- child->formatImpl(settings, state, frame);
+ transformers->formatImpl(settings, state, frame);
}
}
@@ -60,6 +74,11 @@ void ASTColumnsRegexpMatcher::setPattern(String pattern)
DB::ErrorCodes::CANNOT_COMPILE_REGEXP);
}
+const String & ASTColumnsRegexpMatcher::getPattern() const
+{
+ return original_pattern;
+}
+
const std::shared_ptr & ASTColumnsRegexpMatcher::getMatcher() const
{
return column_matcher;
@@ -73,19 +92,23 @@ bool ASTColumnsRegexpMatcher::isColumnMatching(const String & column_name) const
ASTPtr ASTColumnsListMatcher::clone() const
{
auto clone = std::make_shared(*this);
- clone->column_list = column_list->clone();
- clone->cloneChildren();
- return clone;
-}
-void ASTColumnsListMatcher::updateTreeHashImpl(SipHash & hash_state) const
-{
- column_list->updateTreeHash(hash_state);
- IAST::updateTreeHashImpl(hash_state);
+ if (expression) { clone->expression = expression->clone(); clone->children.push_back(clone->expression); }
+ if (transformers) { clone->transformers = transformers->clone(); clone->children.push_back(clone->transformers); }
+
+ clone->column_list = column_list->clone();
+ clone->children.push_back(clone->column_list);
+
+ return clone;
}
void ASTColumnsListMatcher::appendColumnName(WriteBuffer & ostr) const
{
+ if (expression)
+ {
+ expression->appendColumnName(ostr);
+ writeCString(".", ostr);
+ }
writeCString("COLUMNS(", ostr);
for (auto * it = column_list->children.begin(); it != column_list->children.end(); ++it)
{
@@ -99,7 +122,15 @@ void ASTColumnsListMatcher::appendColumnName(WriteBuffer & ostr) const
void ASTColumnsListMatcher::formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const
{
- settings.ostr << (settings.hilite ? hilite_keyword : "") << "COLUMNS" << (settings.hilite ? hilite_none : "") << "(";
+ settings.ostr << (settings.hilite ? hilite_keyword : "");
+
+ if (expression)
+ {
+ expression->formatImpl(settings, state, frame);
+ settings.ostr << ".";
+ }
+
+ settings.ostr << "COLUMNS" << (settings.hilite ? hilite_none : "") << "(";
for (ASTs::const_iterator it = column_list->children.begin(); it != column_list->children.end(); ++it)
{
@@ -111,33 +142,39 @@ void ASTColumnsListMatcher::formatImpl(const FormatSettings & settings, FormatSt
}
settings.ostr << ")";
- /// Format column transformers
- for (const auto & child : children)
+ if (transformers)
{
- settings.ostr << ' ';
- child->formatImpl(settings, state, frame);
+ transformers->formatImpl(settings, state, frame);
}
}
ASTPtr ASTQualifiedColumnsRegexpMatcher::clone() const
{
auto clone = std::make_shared(*this);
- clone->cloneChildren();
+
+ if (transformers) { clone->transformers = transformers->clone(); clone->children.push_back(clone->transformers); }
+
+ clone->qualifier = qualifier->clone();
+ clone->children.push_back(clone->qualifier);
+
return clone;
}
void ASTQualifiedColumnsRegexpMatcher::appendColumnName(WriteBuffer & ostr) const
{
- const auto & qualifier = children.at(0);
qualifier->appendColumnName(ostr);
writeCString(".COLUMNS(", ostr);
writeQuotedString(original_pattern, ostr);
writeChar(')', ostr);
}
-void ASTQualifiedColumnsRegexpMatcher::setPattern(String pattern)
+void ASTQualifiedColumnsRegexpMatcher::setPattern(String pattern, bool set_matcher)
{
original_pattern = std::move(pattern);
+
+ if (!set_matcher)
+ return;
+
column_matcher = std::make_shared(original_pattern, RE2::Quiet);
if (!column_matcher->ok())
throw DB::Exception(
@@ -166,35 +203,35 @@ void ASTQualifiedColumnsRegexpMatcher::formatImpl(const FormatSettings & setting
{
settings.ostr << (settings.hilite ? hilite_keyword : "");
- const auto & qualifier = children.at(0);
qualifier->formatImpl(settings, state, frame);
settings.ostr << ".COLUMNS" << (settings.hilite ? hilite_none : "") << "(";
settings.ostr << quoteString(original_pattern);
settings.ostr << ")";
- /// Format column transformers
- size_t children_size = children.size();
-
- for (size_t i = 1; i < children_size; ++i)
+ if (transformers)
{
- const auto & child = children[i];
- settings.ostr << ' ';
- child->formatImpl(settings, state, frame);
+ transformers->formatImpl(settings, state, frame);
}
}
ASTPtr ASTQualifiedColumnsListMatcher::clone() const
{
auto clone = std::make_shared(*this);
+
+ if (transformers) { clone->transformers = transformers->clone(); clone->children.push_back(clone->transformers); }
+
+ clone->qualifier = qualifier->clone();
clone->column_list = column_list->clone();
- clone->cloneChildren();
+
+ clone->children.push_back(clone->qualifier);
+ clone->children.push_back(clone->column_list);
+
return clone;
}
void ASTQualifiedColumnsListMatcher::appendColumnName(WriteBuffer & ostr) const
{
- const auto & qualifier = children.at(0);
qualifier->appendColumnName(ostr);
writeCString(".COLUMNS(", ostr);
@@ -208,19 +245,10 @@ void ASTQualifiedColumnsListMatcher::appendColumnName(WriteBuffer & ostr) const
writeChar(')', ostr);
}
-void ASTQualifiedColumnsListMatcher::updateTreeHashImpl(SipHash & hash_state) const
-{
- column_list->updateTreeHash(hash_state);
- IAST::updateTreeHashImpl(hash_state);
-}
-
void ASTQualifiedColumnsListMatcher::formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const
{
settings.ostr << (settings.hilite ? hilite_keyword : "");
-
- const auto & qualifier = children.at(0);
qualifier->formatImpl(settings, state, frame);
-
settings.ostr << ".COLUMNS" << (settings.hilite ? hilite_none : "") << "(";
for (ASTs::const_iterator it = column_list->children.begin(); it != column_list->children.end(); ++it)
@@ -232,14 +260,9 @@ void ASTQualifiedColumnsListMatcher::formatImpl(const FormatSettings & settings,
}
settings.ostr << ")";
- /// Format column transformers
- size_t children_size = children.size();
-
- for (size_t i = 1; i < children_size; ++i)
+ if (transformers)
{
- const auto & child = children[i];
- settings.ostr << ' ';
- child->formatImpl(settings, state, frame);
+ transformers->formatImpl(settings, state, frame);
}
}
diff --git a/src/Parsers/ASTColumnsMatcher.h b/src/Parsers/ASTColumnsMatcher.h
index 7ce246608b9..f31a8bd9a22 100644
--- a/src/Parsers/ASTColumnsMatcher.h
+++ b/src/Parsers/ASTColumnsMatcher.h
@@ -24,10 +24,13 @@ public:
void appendColumnName(WriteBuffer & ostr) const override;
void setPattern(String pattern);
+ const String & getPattern() const;
const std::shared_ptr & getMatcher() const;
bool isColumnMatching(const String & column_name) const;
void updateTreeHashImpl(SipHash & hash_state) const override;
+ ASTPtr expression;
+ ASTPtr transformers;
protected:
void formatImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const override;
@@ -43,9 +46,10 @@ public:
String getID(char) const override { return "ColumnsListMatcher"; }
ASTPtr clone() const override;
void appendColumnName(WriteBuffer & ostr) const override;
- void updateTreeHashImpl(SipHash & hash_state) const override;
+ ASTPtr expression;
ASTPtr column_list;
+ ASTPtr transformers;
protected:
void formatImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const override;
};
@@ -59,10 +63,12 @@ public:
void appendColumnName(WriteBuffer & ostr) const override;
const std::shared_ptr & getMatcher() const;
- void setPattern(String pattern);
+ void setPattern(String pattern, bool set_matcher = true);
void setMatcher(std::shared_ptr matcher);
void updateTreeHashImpl(SipHash & hash_state) const override;
+ ASTPtr qualifier;
+ ASTPtr transformers;
protected:
void formatImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const override;
@@ -78,9 +84,10 @@ public:
String getID(char) const override { return "QualifiedColumnsListMatcher"; }
ASTPtr clone() const override;
void appendColumnName(WriteBuffer & ostr) const override;
- void updateTreeHashImpl(SipHash & hash_state) const override;
+ ASTPtr qualifier;
ASTPtr column_list;
+ ASTPtr transformers;
protected:
void formatImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const override;
};
diff --git a/src/Parsers/ASTColumnsTransformers.cpp b/src/Parsers/ASTColumnsTransformers.cpp
index 16752fa115e..f3bbeb6167b 100644
--- a/src/Parsers/ASTColumnsTransformers.cpp
+++ b/src/Parsers/ASTColumnsTransformers.cpp
@@ -19,6 +19,15 @@ namespace ErrorCodes
extern const int CANNOT_COMPILE_REGEXP;
}
+void ASTColumnsTransformerList::formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const
+{
+ for (const auto & child : children)
+ {
+ settings.ostr << ' ';
+ child->formatImpl(settings, state, frame);
+ }
+}
+
void IASTColumnsTransformer::transform(const ASTPtr & transformer, ASTs & nodes)
{
if (const auto * apply = transformer->as())
diff --git a/src/Parsers/ASTColumnsTransformers.h b/src/Parsers/ASTColumnsTransformers.h
index 5179726e8cb..f67993724c1 100644
--- a/src/Parsers/ASTColumnsTransformers.h
+++ b/src/Parsers/ASTColumnsTransformers.h
@@ -9,6 +9,23 @@ namespace re2
namespace DB
{
+
+/// A list of column transformers
+class ASTColumnsTransformerList : public IAST
+{
+public:
+ String getID(char) const override { return "ColumnsTransformerList"; }
+ ASTPtr clone() const override
+ {
+ auto clone = std::make_shared(*this);
+ clone->cloneChildren();
+ return clone;
+ }
+
+protected:
+ void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override;
+};
+
class IASTColumnsTransformer : public IAST
{
public:
diff --git a/src/Parsers/ASTExplainQuery.h b/src/Parsers/ASTExplainQuery.h
index 156ffdeacb9..cb8b3199c81 100644
--- a/src/Parsers/ASTExplainQuery.h
+++ b/src/Parsers/ASTExplainQuery.h
@@ -6,6 +6,10 @@
namespace DB
{
+namespace ErrorCodes
+{
+ extern const int BAD_ARGUMENTS;
+}
/// AST, EXPLAIN or other query with meaning of explanation query instead of execution
class ASTExplainQuery : public ASTQueryWithOutput
@@ -23,6 +27,45 @@ public:
CurrentTransaction, /// 'EXPLAIN CURRENT TRANSACTION'
};
+ static String toString(ExplainKind kind)
+ {
+ switch (kind)
+ {
+ case ParsedAST: return "EXPLAIN AST";
+ case AnalyzedSyntax: return "EXPLAIN SYNTAX";
+ case QueryTree: return "EXPLAIN QUERY TREE";
+ case QueryPlan: return "EXPLAIN";
+ case QueryPipeline: return "EXPLAIN PIPELINE";
+ case QueryEstimates: return "EXPLAIN ESTIMATE";
+ case TableOverride: return "EXPLAIN TABLE OVERRIDE";
+ case CurrentTransaction: return "EXPLAIN CURRENT TRANSACTION";
+ }
+
+ UNREACHABLE();
+ }
+
+ static ExplainKind fromString(const String & str)
+ {
+ if (str == "EXPLAIN AST")
+ return ParsedAST;
+ if (str == "EXPLAIN SYNTAX")
+ return AnalyzedSyntax;
+ if (str == "EXPLAIN QUERY TREE")
+ return QueryTree;
+ if (str == "EXPLAIN" || str == "EXPLAIN PLAN")
+ return QueryPlan;
+ if (str == "EXPLAIN PIPELINE")
+ return QueryPipeline;
+ if (str == "EXPLAIN ESTIMATE")
+ return QueryEstimates;
+ if (str == "EXPLAIN TABLE OVERRIDE")
+ return TableOverride;
+ if (str == "EXPLAIN CURRENT TRANSACTION")
+ return CurrentTransaction;
+
+ throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown explain kind '{}'", str);
+ }
+
explicit ASTExplainQuery(ExplainKind kind_) : kind(kind_) {}
String getID(char delim) const override { return "Explain" + (delim + toString(kind)); }
@@ -103,23 +146,6 @@ private:
/// Used by EXPLAIN TABLE OVERRIDE
ASTPtr table_function;
ASTPtr table_override;
-
- static String toString(ExplainKind kind)
- {
- switch (kind)
- {
- case ParsedAST: return "EXPLAIN AST";
- case AnalyzedSyntax: return "EXPLAIN SYNTAX";
- case QueryTree: return "EXPLAIN QUERY TREE";
- case QueryPlan: return "EXPLAIN";
- case QueryPipeline: return "EXPLAIN PIPELINE";
- case QueryEstimates: return "EXPLAIN ESTIMATE";
- case TableOverride: return "EXPLAIN TABLE OVERRIDE";
- case CurrentTransaction: return "EXPLAIN CURRENT TRANSACTION";
- }
-
- UNREACHABLE();
- }
};
}
diff --git a/src/Parsers/ASTQualifiedAsterisk.cpp b/src/Parsers/ASTQualifiedAsterisk.cpp
index b755e4eb98c..2dcf481adb7 100644
--- a/src/Parsers/ASTQualifiedAsterisk.cpp
+++ b/src/Parsers/ASTQualifiedAsterisk.cpp
@@ -7,22 +7,18 @@ namespace DB
void ASTQualifiedAsterisk::appendColumnName(WriteBuffer & ostr) const
{
- const auto & qualifier = children.at(0);
qualifier->appendColumnName(ostr);
writeCString(".*", ostr);
}
void ASTQualifiedAsterisk::formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const
{
- const auto & qualifier = children.at(0);
qualifier->formatImpl(settings, state, frame);
settings.ostr << ".*";
- /// Format column transformers
- for (ASTs::const_iterator it = children.begin() + 1; it != children.end(); ++it)
+ if (transformers)
{
- settings.ostr << ' ';
- (*it)->formatImpl(settings, state, frame);
+ transformers->formatImpl(settings, state, frame);
}
}
diff --git a/src/Parsers/ASTQualifiedAsterisk.h b/src/Parsers/ASTQualifiedAsterisk.h
index 1b644532f53..e67b4cd82dd 100644
--- a/src/Parsers/ASTQualifiedAsterisk.h
+++ b/src/Parsers/ASTQualifiedAsterisk.h
@@ -17,11 +17,18 @@ public:
ASTPtr clone() const override
{
auto clone = std::make_shared(*this);
- clone->cloneChildren();
+
+ if (transformers) { clone->transformers = transformers->clone(); clone->children.push_back(clone->transformers); }
+
+ clone->qualifier = qualifier->clone();
+ clone->children.push_back(clone->qualifier);
+
return clone;
}
void appendColumnName(WriteBuffer & ostr) const override;
+ ASTPtr qualifier;
+ ASTPtr transformers;
protected:
void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override;
};
diff --git a/src/Parsers/ASTSystemQuery.cpp b/src/Parsers/ASTSystemQuery.cpp
index 5ed77f48ceb..bfc7c5e6a45 100644
--- a/src/Parsers/ASTSystemQuery.cpp
+++ b/src/Parsers/ASTSystemQuery.cpp
@@ -166,6 +166,7 @@ void ASTSystemQuery::formatImpl(const FormatSettings & settings, FormatState &,
else if ( type == Type::RESTART_REPLICA
|| type == Type::RESTORE_REPLICA
|| type == Type::SYNC_REPLICA
+ || type == Type::WAIT_LOADING_PARTS
|| type == Type::FLUSH_DISTRIBUTED
|| type == Type::RELOAD_DICTIONARY
|| type == Type::RELOAD_MODEL
diff --git a/src/Parsers/ASTSystemQuery.h b/src/Parsers/ASTSystemQuery.h
index 76788fd31fe..ae08fe464ad 100644
--- a/src/Parsers/ASTSystemQuery.h
+++ b/src/Parsers/ASTSystemQuery.h
@@ -35,6 +35,7 @@ public:
RESTART_REPLICAS,
RESTART_REPLICA,
RESTORE_REPLICA,
+ WAIT_LOADING_PARTS,
DROP_REPLICA,
DROP_DATABASE_REPLICA,
SYNC_REPLICA,
diff --git a/src/Parsers/ExpressionElementParsers.cpp b/src/Parsers/ExpressionElementParsers.cpp
index 5951128c285..231897605e0 100644
--- a/src/Parsers/ExpressionElementParsers.cpp
+++ b/src/Parsers/ExpressionElementParsers.cpp
@@ -28,6 +28,8 @@
#include
#include
#include
+#include
+#include
#include
#include
#include
@@ -116,8 +118,40 @@ bool ParserSubquery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
}
else if (ASTPtr explain_node; explain.parse(pos, explain_node, expected))
{
- /// Replace SELECT * FROM (EXPLAIN SELECT ...) with SELECT * FROM viewExplain(EXPLAIN SELECT ...)
- result_node = buildSelectFromTableFunction(makeASTFunction("viewExplain", explain_node));
+ const auto & explain_query = explain_node->as();
+
+ if (explain_query.getTableFunction() || explain_query.getTableOverride())
+ throw Exception("EXPLAIN in a subquery cannot have a table function or table override", ErrorCodes::BAD_ARGUMENTS);
+
+ /// Replace subquery `(EXPLAIN SELECT ...)`
+ /// with `(SELECT * FROM viewExplain("", "